import requests
import os
import time
from urllib import request
from lxml import etree
for k in range(1,121):
url = 'http://www.mzitu.com/xinggan/page/%s/'%k
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Connection':'keep-alive',
'Host': 'www.mzitu.com',
'Referer': 'http://www.mzitu.com/146445/',
'Cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534504633; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534508588',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
response = requests.get(url,headers=headers)
html = response.text
html_ele = etree.HTML(html)
li_list = html_ele.xpath('//ul[@id="pins"]/li')
for res in li_list:
# print(res)
filename = res.xpath('./span[1]/a')[0].text
# print(filename)
if not os.path.exists(filename):
os.mkdir(filename)
fileurl = res.xpath('./a/@href')[0]
# print(fileurl)
response2 =requests.get(fileurl,headers=headers)
html2 = response2.text
html2_ele = etree.HTML(html2)
# url_max = html2_ele.xpath('//div[@class="pagenavi"]/a/@href')[-2]
url_max = html2_ele.xpath('//div[@class="pagenavi"]/a/@href')[-2].split('/')[-1]
# print(url_max)
#一组的所有照片
for i in range(1,int(url_max)+1):
html3 = fileurl+'/%s'%i
# print(html3)
response3 = requests.get(html3, headers=headers)
img_html = response3.text
html3_ele = etree.HTML(img_html)
img_url = html3_ele.xpath('//div[@class="main-image"]/p/a/img/@src')[0]
# print(type(img_url))
img_name = '{}/'.format(filename) + img_url.split('/')[-1]
# print(type(img_name))
headers2 = {
'Referer': fileurl,
# 'Cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534504633; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534508588',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
}
img = requests.get(img_url,headers=headers2)
with open(img_name, 'wb') as f:
f.write(img.content)
# time.sleep(2)
print('%s下载完毕'%img_name)
print('%s页下载完毕'%k)
利用xpath下载图片
猜你喜欢
转载自blog.csdn.net/weixin_38920937/article/details/81783718
今日推荐
周排行