爬取mzi图片

import requests
import mysqlhelper
from lxml import etree
import os
import re
from urllib import request

base_url='http://www.mzitu.com/page/%s/'
headers={
    'User-Agent':'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/62.0.3202.62 Safari/537.36',
    # 'Cookie':'Hm_lvt_dbc355aef238b6c32b43eacbbf161c3c=1534495457; Hm_lpvt_dbc355aef238b6c32b43eacbbf161c3c=1534507556',
    # 'Host':'http://www.mzitu.com/',
    'Referer':'http://www.mzitu.com/page/2/',

}
if not os.path.exists('download'):
    os.mkdir('download')
for i in range(1,4):
    url=base_url % i
    # print(url)
    response= requests.get(url,headers=headers)
    # print(response.text)
    res_html=response.text
    pattern =r'<li>.*<img.*data-original=\'(.*?)\' />'
    # pattern_alt=r'<li>.*<span>.*<a.*>(.*?)</a>'
    # res_alt=re.findall(pattern_alt,res_html)
    # for alt in res_alt:
    #     name = alt + '.jpg'
    res_src=re.findall(pattern,res_html)
    for imgs_src in res_src:
        print(imgs_src)
        name=imgs_src.split('/')[-1]
        filename = 'download/'+name
        # request.urlretrieve(imgs_src,filename)
        #此处img_src为图片的地址与上面url不同
        response = requests.get(imgs_src,headers=headers)
        with open(filename,'wb')as f:
            f.write(response.content)







猜你喜欢

转载自blog.csdn.net/weixin_42958164/article/details/81783616