跟师傅学习的那点事儿--爬虫JS解密--天气网站。
目标网站:
https://www.aqistudy.cn/html/city_detail.html
当打开这个网站,发现所有的数据都是以图表形式展现,那么我们用selenium模拟浏览器,就产生了困难,果断放弃。
F12分析,
点击之后发现,Network中只多了下面两个请求:
打开请求分析:
我们找到的请求中,数据是这么个鬼,明显加密的了。
找到按钮所在的位置:
全局搜索框住的id:
发现在我们点击的时候,有一个点击事件的发生。其中有getData()这样一个函数,
搜索之后,找到这个函数:
function getData()
{
state = 0;
city=$('#city').val();
$.cookie('dcity', city, {expires : 30});
//type = $('#type').combobox('getValue');
type = $('input:radio[name=type]:checked').val();
getTimeSel();
if(type=="HOUR")
{
var timediff = converTimeFormat($('#dtbEndTime').datetimebox('getValue')).getTime()-converTimeFormat($('#dtbStartTime').datetimebox('getValue')).getTime();
if(timediff >30*24*3600*1000)
{
showMessage(false,"按小时查询仅支持查询一个月数据,查看长时间变化趋势请选择按日查询!");
return ;
}
}
getAQIData();
getWeatherData();
}
发现其中又调用了下面框中的两个函数:
继续寻找这两个函数:
function getAQIData()
{
var method = 'GETDETAIL';
var param = {};
param.city = city;
param.type = type;
param.startTime = startTime;
param.endTime = endTime;
getServerData(method, param, function(obj) {
data = obj.data;
if(data.total>0)
{
dataAQI.splice(0, dataAQI.length);
dataPM25.splice(0, dataPM25.length);
dataPM10.splice(0, dataPM10.length);
dataCO.splice(0, dataCO.length);
dataNO2.splice(0, dataNO2.length);
dataO3.splice(0, dataO3.length);
dataSO2.splice(0, dataSO2.length);
dataRank.splice(0, dataRank.length);
for(i=0;i<data.rows.length;i++)
{
dataAQI.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].aqi)
});
dataPM25.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].pm2_5)
});
dataPM10.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].pm10)
});
dataCO.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseFloat((parseFloat(data.rows[i].co)).toFixed(2))
});
dataNO2.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].no2)
});
dataO3.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].o3)
});
dataSO2.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].so2)
});
dataRank.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].rank)
});
}
dataPolar = [calAvg(dataAQI),calAvg(dataPM25),calAvg(dataPM10),calAvg(dataSO2),calAvg(dataO3),calAvg(dataNO2)];
state ++ ;
if(state>=2)
{
showCurrentTab();
}
}
}, 0.5);
}
function getWeatherData()
{
var method = 'GETCITYWEATHER';
var param = {};
param.city = city;
param.type = type;
param.startTime = startTime;
param.endTime = endTime;
getServerData(method, param, function(obj) {
data = obj.data;
if(data.total>0)
{
dataTemp.splice(0, dataTemp.length);
dataHumi.splice(0, dataHumi.length);
dataWind.splice(0, dataWind.length);
for(i=0;i<data.rows.length;i++)
{
dataTemp.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].temp)
});
dataHumi.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].humi)
});
dataWind.push({
x: converTimeFormat(data.rows[i].time).getTime(),
y: parseInt(data.rows[i].wse),
d: data.rows[i].wd,
w: data.rows[i].tq,
marker:{symbol: getWindDirectionUrl(data.rows[i].wd)}
});
}
state ++ ;
if(state>=2)
{
showCurrentTab();
}
}
}, 0.5);
}
继续看:
这两个函数又调用了上面框中的函数:getServerData()
继续寻找getServerData()这个函数:
搜索之后找到:
全是乱七八糟的js,这是经过混淆的js代码,我们需要进行反混淆,推荐一个网站:http://www.bm8.com.cn/jsConfusion/
代码格式化之后:
function getServerData(method, object, callback, period) {
const key = hex_md5(method + JSON.stringify(object));
const data = getDataFromLocalStorage(key, period);
if (!data) {
var param = getParam(method, object);
$.ajax({
url: '../apinew/aqistudyapi.php',
data: {
d: param
},
type: "post",
success: function (data) {
data = decodeData(data);
obj = JSON.parse(data);
if (obj.success) {
if (period > 0) {
obj.result.time = new Date().getTime();
localStorageUtil.save(key, obj.result)
}
callback(obj.result)
} else {
console.log(obj.errcode, obj.errmsg)
}
}
})
} else {
callback(data)
}
}
又调用了这个函数:
继续搜索getParam()函数:
var getParam = (function () {
function ObjectSort(obj) {
var newObject = {};
Object.keys(obj).sort().map(function (key) {
newObject[key] = obj[key]
});
return newObject
}
return function (method, obj) {
var appId = '1a45f75b824b2dc628d5955356b5ef18';
var clienttype = 'WEB';
var timestamp = new Date().getTime();
var param = {
appId: appId,
method: method,
timestamp: timestamp,
clienttype: clienttype,
object: obj,
secret: hex_md5(appId + method + timestamp + clienttype + JSON.stringify(ObjectSort(obj)))
};
param = BASE64.encrypt(JSON.stringify(param));
return AES.encrypt(param, aes_client_key, aes_client_iv)
}
})();
这里采用了BASE64和AES进行加密,然后解密之后才返回的数据,那么解密方式在哪?接下来寻找解密方式。
function decodeData(data) {
data = AES.decrypt(data, aes_server_key, aes_server_iv);
data = DES.decrypt(data, des_key, des_iv);
data = BASE64.decrypt(data);
return data
}
找到了这样一个解密方式,现在加密方式和解密方式都找到了,接下来模拟执行即可。
模拟执行代码如下:
import execjs import requests import json method = 'GETCITYWEATHER' city = '杭州' type = 'HOUR' start_time = '2019-03-22 00:00:00' end_time = '2019-03-22 23:00:00' js = execjs.compile(open("./weather.js", "r",encoding='utf-8').read()) params = js.call("getEncryptedData", method,city,type,start_time,end_time) print(params) # 接口api api = 'https://www.aqistudy.cn/apinew/aqistudyapi.php' response = requests.post(api, data={'d': params}) #解密 decrypted_data = js.call('decodeData',response.text) data = json.loads(decrypted_data) print(data)
本文参考:https://cuiqingcai.com/5024.html?tdsourcetag=s_pctim_aiomsg