python批量爬取图片

爬取mzitu网站的girl图片

使用requests库请求链接获取响应
使用lxml库解析html获取资源链接

import requests
from lxml import html

etree = html.etree
headers = {
    'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) '
                  'Chrome/77.0.3865.120 Safari/537.36'
    , 'Referer': 'http://www.mzitu.com'
}
url = 'http://mzitu.com/tag/meitun/'
data = requests.get(url, headers=headers).text
s = etree.HTML(data)
file = s.xpath('//*[@id="pins"]/li')

i = 0
for div in file:
    url_te = div.xpath('./a/@href')[0]
    data_te = requests.get(url_te, headers=headers).text
    s_te = etree.HTML(data_te)
    page = int(s_te.xpath('/html/body/div[2]/div[1]/div[4]/a[5]/span/text()')[0])

    # 图片列表页
    for x in range(1, page):
        urls = url_te + '/' + str(x)
        data_s = requests.get(urls, headers=headers).text
        s_s = etree.HTML(data_s)

        img_url = s_s.xpath('/html/body/div[2]/div[1]/div[3]/p/a/img/@src')[0]

        print('图片链接' + img_url)

        r = requests.get(img_url, headers=headers)

        # 保存图片至当前image目录下 也可使用绝对路径D:/image/ 此目录不会自动创建需手动创建
        path = path = str('image/' + str(i) + '.png')  # 此处路径需要修改
        with open(path, 'wb') as f:
            f.write(r.content)
        i += 1

原创文章 157 获赞 113 访问量 7万+

猜你喜欢

转载自blog.csdn.net/setlilei/article/details/102755066