使用phantom.js生成多个HAR文件
问题描述:
我正在使用netsniff.js的代码来生成har file
,我想改进它以从数组中给出的多个链接生成har文件(在我的下面的代码中命名为links
) 。使用phantom.js生成多个HAR文件
这里有Using Multiple page.open in Single Script另一个问题,可能会帮助我,但我不知道如何实现我的代码给定的解决方案..
下面是我的代码(它如果links
输出文件记录FAIL to load the address
阵列包含多个项):
"use strict";
if (!Date.prototype.toISOString) {
Date.prototype.toISOString = function() {
function pad(n) { return n < 10 ? '0' + n : n; }
function ms(n) { return n < 10 ? '00'+ n : n < 100 ? '0' + n : n }
return this.getFullYear() + '-' +
pad(this.getMonth() + 1) + '-' +
pad(this.getDate()) + 'T' +
pad(this.getHours()) + ':' +
pad(this.getMinutes()) + ':' +
pad(this.getSeconds()) + '.' +
ms(this.getMilliseconds()) + 'Z';
}
}
var entries = [];
function createHAR(address, title, startTime, resources)
{
resources.forEach(function (resource) {
var request = resource.request,
startReply = resource.startReply,
endReply = resource.endReply;
if (!request || !startReply || !endReply) {
return;
}
// Exclude Data URI from HAR file because
// they aren't included in specification
if (request.url.match(/(^data:image\/.*)/i)) {
return;
}
entries.push({
startedDateTime: request.time.toISOString(),
time: endReply.time - request.time,
request: {
method: request.method,
url: request.url,
httpVersion: "HTTP/1.1",
cookies: [],
headers: request.headers,
queryString: [],
headersSize: -1,
bodySize: -1
},
response: {
status: endReply.status,
statusText: endReply.statusText,
httpVersion: "HTTP/1.1",
cookies: [],
headers: endReply.headers,
redirectURL: "",
headersSize: -1,
bodySize: startReply.bodySize,
content: {
size: startReply.bodySize,
mimeType: endReply.contentType
}
},
cache: {},
timings: {
blocked: 0,
dns: -1,
connect: -1,
send: 0,
wait: startReply.time - request.time,
receive: endReply.time - startReply.time,
ssl: -1
},
pageref: address
});
});
return {
log: {
version: '1.2',
creator: {
name: "PhantomJS",
version: phantom.version.major + '.' + phantom.version.minor +
'.' + phantom.version.patch
},
pages: [{
startedDateTime: startTime.toISOString(),
id: address,
title: title,
pageTimings: {
onLoad: page.endTime - page.startTime
}
}],
entries: entries
}
};
}
var page = require('webpage').create()
var fs = require('fs');
var count = 0;
function processSites(links)
{
page.address = links.pop();
var path = 'file' + count + '.har';
page.resources = [];
console.log("page resources:", page.resources)
count = count + 1;
page.onLoadStarted = function() {
page.startTime = new Date();
};
page.onResourceRequested = function (req) {
page.resources[req.id] = {
request: req,
startReply: null,
endReply: null
};
};
page.onResourceReceived = function (res) {
if (res.stage === 'start') {
page.resources[res.id].startReply = res;
}
if (res.stage === 'end') {
page.resources[res.id].endReply = res;
}
};
page.open(page.address, function (status) {
var har;
setTimeout(function() {
if (status !== 'success') {
console.log('FAIL to load the address');
phantom.exit(1);
} else {
page.endTime = new Date();
page.title = page.evaluate(function() {
return document.title;
});
entries = [];
har = createHAR(page.address, page.title, page.startTime, page.resources);
// console.log(JSON.stringify(har, undefined, 4));
fs.write(path, JSON.stringify(har), 'w');
if(links.length > 0)
{
processSites(links);
}
else
{
phantom.exit();
}
}
}, 10000);
});
}
var links = ["http://*.com", "http://marvel.com"];
processSites(links);
更新:
上面的代码生成两个HAR文件file1.har和file2.har,但第二HAR文件还包含从两条链路产生的har
代码,和它应该只有该har
代码,第一个链接...
通过设置var har = " "
答
你不能重复在一个简单的循环开在PhantomJS页面,因为page.open
方法是异步的解决了这个问题。它不等待第一个站点被处理,马上打开第二个站点。
我已将您的脚本重写为使用递归:下一个站点将在当前处理后才会打开。 (注意:如果队列中的任何站点将无法加载,整个过程将停止,但您可以轻松地重写脚本以避免这种情况)。
if (!Date.prototype.toISOString) {
Date.prototype.toISOString = function() {
// ...
}
}
var entries = [];
function createHAR(address, title, startTime, resources)
{
// ...
}
var page = require('webpage').create()
function processSites(links)
{
page.address = links.pop();
console.log("PAGE ADDRESS: ", page.address);
page.resources = [];
page.onLoadStarted = function() {
page.startTime = new Date();
};
page.onResourceRequested = function (req) {
page.resources[req.id] = {
request: req,
startReply: null,
endReply: null
};
};
page.onResourceReceived = function (res) {
if (res.stage === 'start') {
page.resources[res.id].startReply = res;
}
if (res.stage === 'end') {
page.resources[res.id].endReply = res;
}
};
page.open(page.address, function (status) {
var har;
setTimeout(function() {
if (status !== 'success') {
console.log('FAIL to load the address');
phantom.exit(1);
} else {
page.endTime = new Date();
page.title = page.evaluate(function() {
return document.title;
});
har = createHAR(page.address, page.title, page.startTime, page.resources);
console.log(JSON.stringify(har, undefined, 4));
if(links.length > 0)
{
processSites(links);
}
else
{
phantom.exit();
}
}
}, 10000);
});
}
var links = ["http://edition.cnn.com", "http://*.com"];
processSites(links);
我发现''http://edition.cnn.com''的har文件也将包含为''http://*.com“'生成的har代码......”一种单独输出har代码的方法? – Valip
Vaviloff你还能帮助我吗?我更新了这个问题。谢谢! – Valip
也许'var entries = [];'应该在'createHar'函数内?没有真正读懂它,问题是关于迭代数组链接,而不是创建har文件本身。 – Vaviloff