利用python编写一个简单的爬糗事百科热图爬虫

python爬虫,爬取糗事百科并保存到文件中

以下是代码实现部分,如果对你有帮助,留下一个赞

from urllib import request
from lxml import etree
from urllib import request
import time
def page():
    for num in range(1,6):
        time.sleep(1)
        base_url = 'https://www.qiushibaike.com/imgrank/page/{0}'.format(num)
        headers = {
            'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
        # 构造带有请求头的url
        req = request.Request(base_url, headers=headers)
        response = request.urlopen(req)
        html = response.read().decode('utf-8')
        htmls = etree.HTML(html)
        make_page(htmls)
# 清洗数据/存储
def make_page(htmls):
    from urllib import request
    picture = htmls.xpath('//div[@class="thumb"]/a/img/@src')
    time.sleep(1)
    for pic_url in picture:
        src = pic_url.split('/')[-1]
        image_url = 'https:' + pic_url
        print('图片下载中。。。。。')
        request.urlretrieve(image_url,'D:\66\ssss' )
        print('写入完成')
page()```

*
*
发布了22 篇原创文章 · 获赞 4 · 访问量 1980

猜你喜欢

转载自blog.csdn.net/qq_41337034/article/details/89366330