PhantomJS server code:
/phantomserver/server.js (this file contain server code)
/phantomserver/server.js (this file contain server code)
var server = require('webserver').create(),
system = require('system'),
fs = require('fs'),
port = system.env.PORT || 8083;
function parseGET(url){
var query = url.substr(url.indexOf("?")+1);
var result = {};
query.split("&").forEach(function(part) {
var e = part.indexOf("=")
var key = part.substr(0, e);
var value = part.substr(e+1);
result[key] = decodeURIComponent(value);
});
return result;
}
var domainName = "http://www.test.com";
var service = server.listen(port, function(request, response) {
//console.log(encodeURI(request.url));
var url = domainName+request.url;
//var queryString = parseGET(request.url);
//console.log(url);
if(request.method == 'GET' && url){
//var url = queryString.url;
var tempUrl = url.replace(domainName,"");
var tempDirArr = tempUrl.split("/");
var fileName = tempDirArr.pop();
var tempStr ="/temp/snapshot";
if(tempDirArr.length>0){
for(i=0;i<tempDirArr.length;i++){
tempStr = tempStr+"/"+tempDirArr[i];
if(!fs.isDirectory(tempStr)) {
fs.makeDirectory(tempStr);
}
}
}
var dirPathFinal = tempStr;
var filePathFinal = dirPathFinal+"/"+fileName+".html";
console.log(filePathFinal);
if(fs.exists(filePathFinal)) {
var content = fs.read(filePathFinal);
response.statusCode = 200;
response.setHeader("content-type","text/html; charset=UTF-8");
console.log("response sent from file");
response.write(content);
response.close();
} else {
request_page(url, function(properties, pageContents){
response.statusCode = 200;
response.setHeader("content-type","text/html; charset=UTF-8");
console.log("response sent after generation");
content = pageContents.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,"");
content = content.replace(/<style id="_vis_opt_path_hides" type="text\/css">([^<]*)<\/style>/gi,"");
fs.write(filePathFinal, content);
response.write(content);
response.close();
});
}
} else {
response.statusCode = 404;
response.setHeader('Content-Type', 'text/html; charset=utf-8');
//response.write(fs.read('index.html'));
response.close();
}
});
if(service) console.log("server started - http://localhost:" + server.port);
function request_page(url, callback){
var page = new WebPage();
page.clipRect = { top: 0, left: 0, width: 700, height: 400 };
page.viewportSize = { width: 700, height: 400 };
page.onLoadStarted = function () {
console.log('loading:' + url);
};
page.onLoadFinished = function (status) {
console.log('loaded:' + url);
var properties = {};
properties.title = page.evaluate(function () {
return document.title
});
properties.links = page.evaluate(function () {
return Object.keys(
[].reduce.call(
document.querySelectorAll('a'),
function(memo, a){
if(a.protocol.indexOf('http') === 0) memo[a.href] = true;
return memo;
}
,{})
)
});
properties.link_areas = page.evaluate(function () {
var sizes = [].reduce.call(document.querySelectorAll('a'), function(memo, a){
var bb = a.getBoundingClientRect(),
area = bb.width * bb.height,
href = a.getAttribute('href');
// update the map
if(area){
memo[href] = (memo[href] || 0) + area;
}
return memo;
},{});
return Object.keys(sizes).map(function(url){
return [url, sizes[url]];
});
})
setTimeout(function(){
callback(properties,page.content);
page.close();
},10000);
};
page.open(url+"?nosocial=1");
}
//Shell script which run server in background (phserver.sh)
echo `phantomjs /opt/deployment/popkorn/phantomserver/server.js`
system = require('system'),
fs = require('fs'),
port = system.env.PORT || 8083;
function parseGET(url){
var query = url.substr(url.indexOf("?")+1);
var result = {};
query.split("&").forEach(function(part) {
var e = part.indexOf("=")
var key = part.substr(0, e);
var value = part.substr(e+1);
result[key] = decodeURIComponent(value);
});
return result;
}
var domainName = "http://www.test.com";
var service = server.listen(port, function(request, response) {
//console.log(encodeURI(request.url));
var url = domainName+request.url;
//var queryString = parseGET(request.url);
//console.log(url);
if(request.method == 'GET' && url){
//var url = queryString.url;
var tempUrl = url.replace(domainName,"");
var tempDirArr = tempUrl.split("/");
var fileName = tempDirArr.pop();
var tempStr ="/temp/snapshot";
if(tempDirArr.length>0){
for(i=0;i<tempDirArr.length;i++){
tempStr = tempStr+"/"+tempDirArr[i];
if(!fs.isDirectory(tempStr)) {
fs.makeDirectory(tempStr);
}
}
}
var dirPathFinal = tempStr;
var filePathFinal = dirPathFinal+"/"+fileName+".html";
console.log(filePathFinal);
if(fs.exists(filePathFinal)) {
var content = fs.read(filePathFinal);
response.statusCode = 200;
response.setHeader("content-type","text/html; charset=UTF-8");
console.log("response sent from file");
response.write(content);
response.close();
} else {
request_page(url, function(properties, pageContents){
response.statusCode = 200;
response.setHeader("content-type","text/html; charset=UTF-8");
console.log("response sent after generation");
content = pageContents.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,"");
content = content.replace(/<style id="_vis_opt_path_hides" type="text\/css">([^<]*)<\/style>/gi,"");
fs.write(filePathFinal, content);
response.write(content);
response.close();
});
}
} else {
response.statusCode = 404;
response.setHeader('Content-Type', 'text/html; charset=utf-8');
//response.write(fs.read('index.html'));
response.close();
}
});
if(service) console.log("server started - http://localhost:" + server.port);
function request_page(url, callback){
var page = new WebPage();
page.clipRect = { top: 0, left: 0, width: 700, height: 400 };
page.viewportSize = { width: 700, height: 400 };
page.onLoadStarted = function () {
console.log('loading:' + url);
};
page.onLoadFinished = function (status) {
console.log('loaded:' + url);
var properties = {};
properties.title = page.evaluate(function () {
return document.title
});
properties.links = page.evaluate(function () {
return Object.keys(
[].reduce.call(
document.querySelectorAll('a'),
function(memo, a){
if(a.protocol.indexOf('http') === 0) memo[a.href] = true;
return memo;
}
,{})
)
});
properties.link_areas = page.evaluate(function () {
var sizes = [].reduce.call(document.querySelectorAll('a'), function(memo, a){
var bb = a.getBoundingClientRect(),
area = bb.width * bb.height,
href = a.getAttribute('href');
// update the map
if(area){
memo[href] = (memo[href] || 0) + area;
}
return memo;
},{});
return Object.keys(sizes).map(function(url){
return [url, sizes[url]];
});
})
setTimeout(function(){
callback(properties,page.content);
page.close();
},10000);
};
page.open(url+"?nosocial=1");
}
//Shell script which run server in background (phserver.sh)
echo `phantomjs /opt/deployment/popkorn/phantomserver/server.js`
Files:
server.js : phantomjs server code which generate html page runtime. it runs on 8083 port
phserver.sh : this file run phantomjs server, it will be called from supservisor
server.js : phantomjs server code which generate html page runtime. it runs on 8083 port
phserver.sh : this file run phantomjs server, it will be called from supservisor
Supervisor configuration:
[program:phantom_server]
command=sh /phantomserver/phserver.sh &;
user=username
[program:phantom_server]
command=sh /phantomserver/phserver.sh &;
user=username
Processes which should keep running
node /usr/bin/phantomjs /phantomserver/server.js
$/usr/lib/node_modules/phantomjs/lib/phantom/bin/phantomjs /phantomserver/server.js
node /usr/bin/phantomjs /phantomserver/server.js
$/usr/lib/node_modules/phantomjs/lib/phantom/bin/phantomjs /phantomserver/server.js
Apache redirection configuration
we used ProxyPassMatch for it
we used ProxyPassMatch for it
Redirect to particular folder, so that it can be matched by pattern (ProxyPassMatch) and served by proxy server
RewriteCond %{HTTP_USER_AGENT} ^facebookexternalhit [OR]
RewriteCond %{HTTP_USER_AGENT} ^Twitterbot [OR]
RewriteCond %{QUERY_STRING} _escaped_fragment_=$
RewriteRule (.*) /snapshot/$1.html? [END]
RewriteRule (.*) /escfrg/$1? [L,P] #P flag is used for proxy redirection
RewriteCond %{HTTP_USER_AGENT} ^facebookexternalhit [OR]
RewriteCond %{HTTP_USER_AGENT} ^Twitterbot [OR]
RewriteCond %{QUERY_STRING} _escaped_fragment_=$
RewriteRule (.*) /snapshot/$1.html? [END]
RewriteRule (.*) /escfrg/$1? [L,P] #P flag is used for proxy redirection
Proxy server redirection
ProxyPassMatch /escfrg/(.*) http://localhost:8083/$1
ProxyPassReverse /escfrg http://localhost:8083/$1
ProxyPassMatch /escfrg/(.*) http://localhost:8083/$1
ProxyPassReverse /escfrg http://localhost:8083/$1