python3实现简单循环抓取并下载图片(一)

一、项目简介

1.内容:抓取彼岸壁纸的人物壁纸,并保存在本地文件夹

2.解析:xpath

3.涉及的类库:requests、lxml

二、代码

# python
# -*- coding:utf-8 -*-
# author:Only time:2019/8/18


import requests
from lxml import etree


def get_url():
    urllist = []
    first_url = 'http://www.netbian.com/renwu/index.htm'
    urllist.append(first_url)
    for page in range(2,41):
        url = 'http://www.netbian.com/renwu/index_' + str(page) + '.htm'
        urllist.append(url)
    return urllist


def get_html():
    htmllist = []
    for page in get_url():

        header = {
            'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
        }
        response = requests.get(page,headers = header,timeout = 2)
        response.encoding = 'gbk'
        response = response.text
        htmllist.append(response)
    return htmllist


def html_parse_save():
    number = 0
    for html_1 in get_html():
        number += 1
        html = etree.HTML(html_1)
        divs = html.xpath('//div[@class="list"]/ul/li/a')
        for div in divs:
            title = div.xpath('./b/text()')[0]
            img = div.xpath('./img/@src')[0]
            image = requests.get(img)

            with open("图片/%s.jpg" % title,'wb') as f:
                f.write(image.content)
        print('第'+str(number)+'图片保存成功')


if __name__ == "__main__":
    html_parse_save()

三、运行结果

发布了22 篇原创文章 · 获赞 18 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/weixin_43930694/article/details/99710489
今日推荐