使用node.js爬网站图
目标网站:http://m.mmjpg.com/hot/
var http = require('http'); var request = require('request'); var fs = require('fs'); var Promise = require('bluebird'); var curIndex = 1 var curYear = 2015 //下载文件 function downloadFile(uri,filename){ return new Promise((resolve, reject)=> { console.log("downloadURL:"+uri); request({uri: uri, encoding: 'binary', method:"GET", headers:{ referer: "Referer" }}, (error, response, body) => { if (!error && response.statusCode == 200) { fs.writeFileSync(filename, body, 'binary') console.log("downloaded:" + filename) resolve(); } }) }); } var downDest = "D:/MeiziTu"; //下载路径,可改成自己的文件夹,需要提前建立好 var downloadArr = []; while(curYear <= 2018) { let yearPath = downDest + "/" +curYear; if (!fs.existsSync(yearPath)) { fs.mkdirSync(yearPath) } while (curIndex < 2000){ var filename = curIndex + ".jpg"; var url = "http://img.mmjpg.com/large/" + curYear + "/" + filename; var path = yearPath + "/" + filename; downloadArr.push({url: url, path: path}) curIndex += 1; } curYear+=1; } // 串行执行 Promise.mapSeries(downloadArr, (item)=>downloadFile(item.url, item.path)).then(()=>{ console.log("全部任务执行完成"); })运行方法:
1. 安装 node 运行环境,去 官网:https://nodejs.org/zh-cn/ 下载安装即可。
2. 安装bluebird库,控制台输入命令 npm install bluebird 即可安装。
3. 新建一个 js 文件,复制上面的代码,在控制台 使用 node 你的文件名.js 即可运行,如下图。