使用node.js爬网站图

目标网站:http://m.mmjpg.com/hot/

var http = require('http');
var request = require('request');
var fs = require('fs');
var Promise = require('bluebird');
var curIndex = 1
var curYear = 2015
//下载文件
function downloadFile(uri,filename){
    return new Promise((resolve, reject)=> {
        console.log("downloadURL:"+uri);
        request({uri: uri, encoding: 'binary', method:"GET", headers:{
                referer: "Referer"
            }}, (error, response, body) => {
            if (!error && response.statusCode == 200) {
                fs.writeFileSync(filename, body, 'binary')
                console.log("downloaded:" + filename)
                resolve();
            }
        })
    });
}
 
var downDest = "D:/MeiziTu"; //下载路径,可改成自己的文件夹,需要提前建立好
var downloadArr = [];
while(curYear <= 2018) {
    let yearPath = downDest + "/" +curYear;
    if (!fs.existsSync(yearPath)) {
        fs.mkdirSync(yearPath)
    }
    while (curIndex < 2000){
        var filename = curIndex + ".jpg";
        var url = "http://img.mmjpg.com/large/" + curYear + "/" + filename;
        var path = yearPath + "/" + filename;
        downloadArr.push({url: url, path: path})
        curIndex += 1;
    }
    curYear+=1;
}
 
// 串行执行
Promise.mapSeries(downloadArr, (item)=>downloadFile(item.url, item.path)).then(()=>{
    console.log("全部任务执行完成");
})
运行方法
1. 安装 node 运行环境,去 官网:
https://nodejs.org/zh-cn/ 下载安装即可。
2. 安装bluebird库,控制台输入命令 npm install bluebird 即可安装。

3. 新建一个 js 文件,复制上面的代码,在控制台 使用 node 你的文件名.js 即可运行,如下图。


猜你喜欢

转载自blog.csdn.net/apple2344/article/details/80223748