Friday, 19 June 2015

Phantomjs Server For Google Crawling, Facebook And Twitter Share in AngularJS website

PhantomJS server code:
/phantomserver/server.js (this file contain server code)
var server = require('webserver').create(),
system = require('system'),
fs     = require('fs'),
port   = system.env.PORT || 8083;


function parseGET(url){
  var query = url.substr(url.indexOf("?")+1);
  var result = {};
  query.split("&").forEach(function(part) {
    var e = part.indexOf("=")
    var key = part.substr(0, e);
    var value = part.substr(e+1);
    result[key] = decodeURIComponent(value);
  });
  return result;
}

var domainName = "http://www.test.com";

var service = server.listen(port, function(request, response) {
//console.log(encodeURI(request.url));
var url = domainName+request.url;
//var queryString = parseGET(request.url);
//console.log(url);

if(request.method == 'GET' && url){
//var url = queryString.url;
var tempUrl = url.replace(domainName,"");
var tempDirArr = tempUrl.split("/");
var fileName = tempDirArr.pop();
var tempStr ="/temp/snapshot";
if(tempDirArr.length>0){
for(i=0;i<tempDirArr.length;i++){
tempStr = tempStr+"/"+tempDirArr[i];
if(!fs.isDirectory(tempStr)) {
fs.makeDirectory(tempStr);
}
}
}
var dirPathFinal = tempStr;
var filePathFinal = dirPathFinal+"/"+fileName+".html";
console.log(filePathFinal);

if(fs.exists(filePathFinal)) {
var content = fs.read(filePathFinal);
response.statusCode = 200;
response.setHeader("content-type","text/html; charset=UTF-8");
console.log("response sent from file");
response.write(content);
response.close();
} else {
request_page(url, function(properties, pageContents){
response.statusCode = 200;
response.setHeader("content-type","text/html; charset=UTF-8");
console.log("response sent after generation");
content = pageContents.replace(/<script\b[^<]*(?:(?!<\/script>)<[^<]*)*<\/script>/gi,"");
content = content.replace(/<style id="_vis_opt_path_hides" type="text\/css">([^<]*)<\/style>/gi,"");
fs.write(filePathFinal, content);
response.write(content);
response.close();
});
}

} else {
response.statusCode = 404;
response.setHeader('Content-Type', 'text/html; charset=utf-8');
//response.write(fs.read('index.html'));
response.close();
}

});

if(service) console.log("server started - http://localhost:" + server.port);

function request_page(url, callback){

var page = new WebPage();
page.clipRect = { top: 0, left: 0, width: 700, height: 400 };
page.viewportSize = { width: 700, height: 400 };

page.onLoadStarted = function () {
console.log('loading:' + url);
};

page.onLoadFinished = function (status) {
console.log('loaded:' + url);

var properties = {};

properties.title = page.evaluate(function () {
return document.title
});

properties.links = page.evaluate(function () {
return Object.keys(
[].reduce.call(
document.querySelectorAll('a'), 
function(memo, a){
if(a.protocol.indexOf('http') === 0) memo[a.href] = true;
return memo;
}
,{})
)
});

properties.link_areas = page.evaluate(function () {
var sizes = [].reduce.call(document.querySelectorAll('a'), function(memo, a){
var bb = a.getBoundingClientRect(),
area = bb.width * bb.height,
href = a.getAttribute('href');
 
// update the map
if(area){
memo[href] = (memo[href] || 0) + area;
}
 
return memo;
},{});

return Object.keys(sizes).map(function(url){
return [url, sizes[url]];
});
})

setTimeout(function(){
callback(properties,page.content);
page.close();
},10000);
};

page.open(url+"?nosocial=1");

}


//Shell script which run server in background (phserver.sh)
echo `phantomjs /opt/deployment/popkorn/phantomserver/server.js`
Files:
server.js : phantomjs server code which generate html page runtime. it runs on 8083 port
phserver.sh : this file run phantomjs server, it will be called from supservisor
Supervisor configuration:
[program:phantom_server]
command=sh /phantomserver/phserver.sh &;
user=username
Processes which should keep running
node /usr/bin/phantomjs /phantomserver/server.js
$/usr/lib/node_modules/phantomjs/lib/phantom/bin/phantomjs /phantomserver/server.js
Apache redirection configuration
we used ProxyPassMatch for it
Redirect to particular folder, so that it can be matched by pattern (ProxyPassMatch) and served by proxy server
RewriteCond %{HTTP_USER_AGENT} ^facebookexternalhit [OR]
RewriteCond %{HTTP_USER_AGENT} ^Twitterbot [OR]
RewriteCond %{QUERY_STRING} _escaped_fragment_=$
RewriteRule (.*) /snapshot/$1.html? [END]
RewriteRule (.*) /escfrg/$1? [L,P] #P flag is used for proxy redirection
Proxy server redirection
ProxyPassMatch /escfrg/(.*) http://localhost:8083/$1
ProxyPassReverse /escfrg http://localhost:8083/$1

No comments:

Post a Comment