使用phantom.js生成多个HAR文件

问题描述:

我正在使用netsniff.js的代码来生成har file,我想改进它以从数组中给出的多个链接生成har文件(在我的下面的代码中命名为links) 。使用phantom.js生成多个HAR文件

这里有Using Multiple page.open in Single Script另一个问题,可能会帮助我,但我不知道如何实现我的代码给定的解决方案..

下面是我的代码(它如果links输出文件记录FAIL to load the address阵列包含多个项):

"use strict"; 
if (!Date.prototype.toISOString) { 
    Date.prototype.toISOString = function() { 
     function pad(n) { return n < 10 ? '0' + n : n; } 
     function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n } 
     return this.getFullYear() + '-' + 
      pad(this.getMonth() + 1) + '-' + 
      pad(this.getDate()) + 'T' + 
      pad(this.getHours()) + ':' + 
      pad(this.getMinutes()) + ':' + 
      pad(this.getSeconds()) + '.' + 
      ms(this.getMilliseconds()) + 'Z'; 
    } 
} 
var entries = []; 
function createHAR(address, title, startTime, resources) 
{ 
    resources.forEach(function (resource) { 
     var request = resource.request, 
      startReply = resource.startReply, 
      endReply = resource.endReply; 

     if (!request || !startReply || !endReply) { 
      return; 
     } 

     // Exclude Data URI from HAR file because 
     // they aren't included in specification 
     if (request.url.match(/(^data:image\/.*)/i)) { 
      return; 
     } 

     entries.push({ 
      startedDateTime: request.time.toISOString(), 
      time: endReply.time - request.time, 
      request: { 
       method: request.method, 
       url: request.url, 
       httpVersion: "HTTP/1.1", 
       cookies: [], 
       headers: request.headers, 
       queryString: [], 
       headersSize: -1, 
       bodySize: -1 
      }, 
      response: { 
       status: endReply.status, 
       statusText: endReply.statusText, 
       httpVersion: "HTTP/1.1", 
       cookies: [], 
       headers: endReply.headers, 
       redirectURL: "", 
       headersSize: -1, 
       bodySize: startReply.bodySize, 
       content: { 
        size: startReply.bodySize, 
        mimeType: endReply.contentType 
       } 
      }, 
      cache: {}, 
      timings: { 
       blocked: 0, 
       dns: -1, 
       connect: -1, 
       send: 0, 
       wait: startReply.time - request.time, 
       receive: endReply.time - startReply.time, 
       ssl: -1 
      }, 
      pageref: address 
     }); 
    }); 

    return { 
     log: { 
      version: '1.2', 
      creator: { 
       name: "PhantomJS", 
       version: phantom.version.major + '.' + phantom.version.minor + 
        '.' + phantom.version.patch 
      }, 
      pages: [{ 
       startedDateTime: startTime.toISOString(), 
       id: address, 
       title: title, 
       pageTimings: { 
        onLoad: page.endTime - page.startTime 
       } 
      }], 
      entries: entries 
     } 
    }; 
} 
var page = require('webpage').create() 
var fs = require('fs'); 
var count = 0; 
function processSites(links) 
{ 
    page.address = links.pop(); 
    var path = 'file' + count + '.har'; 
    page.resources = []; 
    console.log("page resources:", page.resources) 
    count = count + 1; 
    page.onLoadStarted = function() { 
     page.startTime = new Date(); 
    }; 
    page.onResourceRequested = function (req) { 
     page.resources[req.id] = { 
      request: req, 
      startReply: null, 
      endReply: null 
     }; 
    }; 

    page.onResourceReceived = function (res) { 
     if (res.stage === 'start') { 
      page.resources[res.id].startReply = res; 
     } 
     if (res.stage === 'end') { 
      page.resources[res.id].endReply = res; 
     } 
    }; 

    page.open(page.address, function (status) { 
     var har; 
     setTimeout(function() { 
      if (status !== 'success') { 
       console.log('FAIL to load the address'); 
       phantom.exit(1); 
      } else { 
       page.endTime = new Date(); 
       page.title = page.evaluate(function() { 
        return document.title; 
       }); 
       entries = []; 
       har = createHAR(page.address, page.title, page.startTime, page.resources); 
       // console.log(JSON.stringify(har, undefined, 4)); 
       fs.write(path, JSON.stringify(har), 'w'); 

       if(links.length > 0) 
       { 
        processSites(links); 
       } 
       else 
       { 
        phantom.exit(); 
       } 
      } 
     }, 10000); 
    }); 

} 

var links = ["http://*.com", "http://marvel.com"]; 

processSites(links); 

更新:
上面的代码生成两个HAR文件file1.har和file2.har,但第二HAR文件还包含从两条链路产生的har代码,和它应该只有该har代码,第一个链接...

通过设置var har = " "

你不能重复在一个简单的循环开在PhantomJS页面,因为page.open方法是异步的解决了这个问题。它不等待第一个站点被处理,马上打开第二个站点。

我已将您的脚本重写为使用递归:下一个站点将在当前处理后才会打开。 (注意:如果队列中的任何站点将无法加载,整个过程将停止,但您可以轻松地重写脚本以避免这种情况)。

if (!Date.prototype.toISOString) { 
    Date.prototype.toISOString = function() { 
     // ... 
    } 
} 

var entries = []; 

function createHAR(address, title, startTime, resources) 
{ 
    // ... 
} 

var page = require('webpage').create() 

function processSites(links) 
{ 
    page.address = links.pop(); 

    console.log("PAGE ADDRESS: ", page.address); 
    page.resources = []; 

    page.onLoadStarted = function() { 
     page.startTime = new Date(); 
    }; 
    page.onResourceRequested = function (req) { 
     page.resources[req.id] = { 
      request: req, 
      startReply: null, 
      endReply: null 
     }; 
    }; 

    page.onResourceReceived = function (res) { 
     if (res.stage === 'start') { 
      page.resources[res.id].startReply = res; 
     } 
     if (res.stage === 'end') { 
      page.resources[res.id].endReply = res; 
     } 
    }; 

    page.open(page.address, function (status) { 
     var har; 
     setTimeout(function() { 
      if (status !== 'success') { 
       console.log('FAIL to load the address'); 
       phantom.exit(1); 
      } else { 
       page.endTime = new Date(); 
       page.title = page.evaluate(function() { 
        return document.title; 
       }); 
       har = createHAR(page.address, page.title, page.startTime, page.resources); 
       console.log(JSON.stringify(har, undefined, 4)); 

       if(links.length > 0) 
       { 
        processSites(links); 
       } 
       else 
       { 
        phantom.exit(); 
       } 
      } 
     }, 10000); 
    }); 

} 

var links = ["http://edition.cnn.com", "http://*.com"]; 

processSites(links); 
+1

我发现''http://edition.cnn.com''的har文件也将包含为''http://*.com“'生成的har代码......”一种单独输出har代码的方法? – Valip

+1

Vaviloff你还能帮助我吗?我更新了这个问题。谢谢! – Valip

+1

也许'var entries = [];'应该在'createHar'函数内?没有真正读懂它,问题是关于迭代数组链接,而不是创建har文件本身。 – Vaviloff