利用xpath下载图片

import requests
import os
import time
from urllib import request
from lxml import etree


for k in range(1,121):
    url = 'http://www.mzitu.com/xinggan/page/%s/'%k

    headers = {
        'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
        'Accept-Language': 'zh-CN,zh;q=0.9',
        'Connection':'keep-alive',
        'Host': 'www.mzitu.com',
        'Referer': 'http://www.mzitu.com/146445/',
        'Cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534504633; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534508588',
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }

    response = requests.get(url,headers=headers)

    html = response.text

    html_ele = etree.HTML(html)

    li_list = html_ele.xpath('//ul[@id="pins"]/li')

    for res in li_list:
        # print(res)
        filename = res.xpath('./span[1]/a')[0].text
        # print(filename)
        if not os.path.exists(filename):
            os.mkdir(filename)
        fileurl = res.xpath('./a/@href')[0]
        # print(fileurl)
        response2 =requests.get(fileurl,headers=headers)
        html2 = response2.text
        html2_ele = etree.HTML(html2)
        # url_max = html2_ele.xpath('//div[@class="pagenavi"]/a/@href')[-2]
        url_max = html2_ele.xpath('//div[@class="pagenavi"]/a/@href')[-2].split('/')[-1]
        # print(url_max)
        #一组的所有照片
        for i in range(1,int(url_max)+1):
            html3 = fileurl+'/%s'%i
            # print(html3)
            response3 = requests.get(html3, headers=headers)
            img_html = response3.text
            html3_ele = etree.HTML(img_html)
            img_url = html3_ele.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
            # print(type(img_url))
            img_name = '{}/'.format(filename) + img_url.split('/')[-1]
            # print(type(img_name))
            headers2 = {
                'Referer': fileurl,
                # 'Cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534504633; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534508588',
                'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
            }
            img = requests.get(img_url,headers=headers2)
            with open(img_name, 'wb') as f:
                f.write(img.content)
            # time.sleep(2)
            print('%s下载完毕'%img_name)
    print('%s页下载完毕'%k)

猜你喜欢

转载自blog.csdn.net/weixin_38920937/article/details/81783718