python爬虫,爬取糗事百科并保存到文件中
以下是代码实现部分,如果对你有帮助,留下一个赞
from urllib import request
from lxml import etree
from urllib import request
import time
def page():
for num in range(1,6):
time.sleep(1)
base_url = 'https://www.qiushibaike.com/imgrank/page/{0}'.format(num)
headers = {
'User-Agent': ' Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/71.0.3578.98 Safari/537.36'}
# 构造带有请求头的url
req = request.Request(base_url, headers=headers)
response = request.urlopen(req)
html = response.read().decode('utf-8')
htmls = etree.HTML(html)
make_page(htmls)
# 清洗数据/存储
def make_page(htmls):
from urllib import request
picture = htmls.xpath('//div[@class="thumb"]/a/img/@src')
time.sleep(1)
for pic_url in picture:
src = pic_url.split('/')[-1]
image_url = 'https:' + pic_url
print('图片下载中。。。。。')
request.urlretrieve(image_url,'D:\66\ssss' )
print('写入完成')
page()```
*
*