import requests import mysqlhelper from lxml import etree import os import re from urllib import request base_url='http://www.mzitu.com/page/%s/' headers={ 'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36', # 'Cookie':'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534495457; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534507556', # 'Host':'http://www.mzitu.com/', 'Referer':'http://www.mzitu.com/page/2/', } if not os.path.exists('download'): os.mkdir('download') for i in range(1,4): url=base_url % i # print(url) response= requests.get(url,headers=headers) # print(response.text) res_html=response.text pattern =r'<li>.*<img.*data-original=\'(.*?)\' />' # pattern_alt=r'<li>.*<span>.*<a.*>(.*?)</a>' # res_alt=re.findall(pattern_alt,res_html) # for alt in res_alt: # name = alt + '.jpg' res_src=re.findall(pattern,res_html) for imgs_src in res_src: print(imgs_src) name=imgs_src.split('/')[-1] filename = 'download/'+name # request.urlretrieve(imgs_src,filename) #此处img_src为图片的地址与上面url不同 response = requests.get(imgs_src,headers=headers) with open(filename,'wb')as f: f.write(response.content)
爬取mzi图片
猜你喜欢
转载自blog.csdn.net/weixin_42958164/article/details/81783616
今日推荐
周排行