用node爬点羞羞图吧~2019-01-24

npm i request request-promise cheerio
npm index.js
const rp = require("request-promise")
const fs = require("fs")
const cheerio = require("cheerio")

class Download {
  constructor(url){
    this.curPage = 1
    this.main(url)
  }
  async main(url){
    const pageTotal = await this.getPageTotal(url)
    console.log(`成功获取到 ${pageTotal} 个页面,开始分页获取~`)
    for (let j=1; j<=pageTotal; j++) {
      const pageList = await this.getPageData(url, j)
      for (let i=0; i<pageList.length; i++) {
        const $ = await this.getPage(pageList[i].url)
        console.log(`开始下载第 ${j} 页,第${i + 1} 组的图片,共 ${$('.pagenavi a').eq(-2).find('span').html()} 张......`)
        await this.mkdirFolder($, pageList[i], i)
      }
    }
  }
  async getPage(url) {
    const data = await rp({
        url,
        transform: function (body) {
          return cheerio.load(body);
        }
      })
    return data
  }
  async getPageTotal(url){
    const $ = await this.getPage(url)
    const pageTotal = $('.nav-links a').eq(-2).html()
    return pageTotal
  }
  async getPageData(url, curPage){
    const pageList = []
    const $ = await this.getPage(url + 'page/' + curPage)
    $('#pins li img').each(function(){
      pageList.push({
        name: $(this).attr('alt'),
        url: $(this).parent().attr('href'),
      })
    })
    return pageList
  }
  async mkdirFolder($, pageList, i){
    const perPageUrl = []
    for (let i=1; i<=$('.pagenavi a').eq(-2).find('span').html(); i++) {
      perPageUrl.push(pageList.url + '/' + i)
    }
    console.log(perPageUrl.length)
    const folderName = __dirname + '/' + pageList.name
    fs.mkdirSync(folderName, err => {
      if (err) console.log(folderName + '目录已存在,跳过写入图片')
    })
    console.log(`${pageList.name} 文件夹创建成功,开始写入图片~`);
    await this.touchFile(perPageUrl, pageList.name)
  }
  async touchFile(perPageUrl, name){
    for (let i=0; i<perPageUrl.length; i++) {
      const $ = await this.getPage(perPageUrl[i])
      const imgSrc = $('.main-image img').attr('src')
      let headers = {
        Accept: "text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8",
        "Accept-Encoding": "gzip, deflate",
        "Accept-Language": "zh-CN,zh;q=0.9,en;q=0.8",
        "Cache-Control": "no-cache",
        Host: "i.meizitu.net",
        Pragma: "no-cache",
        "Proxy-Connection": "keep-alive",
        Referer: perPageUrl[i],
        "Upgrade-Insecure-Requests": 1,
        "User-Agent": "Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/65.0.3325.19 Safari/537.36"
      }; // 反防盗链
      await rp({
        url: imgSrc,
        resolveWithFullResponse: true,
        headers
      }).pipe(fs.createWriteStream(`${__dirname}/${name}/${i}.jpg`)); // 下载
      console.log(`正在下载 ${name} 图组 ${i}/${perPageUrl[i].length}`)
    }
  }
}
let start = new Download('https://www.mzitu.com/')

目前文件夹已存在有点问题,紧急修复中....

猜你喜欢

转载自blog.csdn.net/weixin_34216107/article/details/87578033