import requests
import re,os
from urllib import request
for i in range(0,4):
url = 'http://www.mzitu.com/page/%s/'%(i)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534477754,1534515220,1534596942; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534597512',
'Host': 'www.mzitu.com',
'If-Modified-Since': 'Fri, 17 Aug 2018 13:59:46 GMT',
'Referer': 'http://www.mzitu.com/146823',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
}
response = requests.get(url,headers=headers)
# with open('test.html', 'wb') as f:
# f.write(response.content)
html = response.text
meizi_url_list = re.findall('<span><a href="(.*?)" target="_blank">',html)
# print(meizi_url_list)
if not os.path.exists('download'):
os.mkdir('download')
for meizili in meizi_url_list:
# print(meizili,type(meizili))
for i in range(1,10):
urlli = meizili + '/%s'%(i)
headers = {
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/webp,image/apng,*/*;q=0.8',
'Accept-Encoding': 'gzip, deflate',
'Accept-Language': 'zh-CN,zh;q=0.9',
'Cache-Control': 'max-age=0',
'Connection': 'keep-alive',
'Cookie': 'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534477754,1534515220,1534596942; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534597512',
'Host': 'www.mzitu.com',
'If-Modified-Since': 'Fri, 17 Aug 2018 13:59:46 GMT',
'Referer': 'http://www.mzitu.com/',
'Upgrade-Insecure-Requests': '1',
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
}
responseli = requests.get(urlli,headers=headers)
htmlli = responseli.text
# meizili_url_list = re.findall('<img src="(.*?)" alt=',htmlli)
pattern = r'img\ssrc="(.+?)"\sa'
# 妹子jpg的url
try:
meizili_url = re.findall(pattern, htmlli)[0]
print(meizili_url,type(meizili_url))
# print(meizili_url.split('/')[-1])
headers = {
'Referer': meizili,
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.84 Safari/537.36',
}
aaa = requests.get(meizili_url, headers=headers).content
print(aaa)
filename = 'download/'+meizili_url.split('/')[-1]
with open(filename, 'wb') as pic:
pic.write(aaa)
except:
pass
# # 下载图片
# request.urlretrieve(image_url, filename)
爬虫:妹子图
猜你喜欢
转载自blog.csdn.net/cheng535/article/details/81837462
今日推荐
周排行