一、项目简介
1.内容:抓取彼岸壁纸的人物壁纸,并保存在本地文件夹
2.解析:xpath
3.涉及的类库:requests、lxml
二、代码
# python
# -*- coding:utf-8 -*-
# author:Only time:2019/8/18
import requests
from lxml import etree
def get_url():
urllist = []
first_url = 'http://www.netbian.com/renwu/index.htm'
urllist.append(first_url)
for page in range(2,41):
url = 'http://www.netbian.com/renwu/index_' + str(page) + '.htm'
urllist.append(url)
return urllist
def get_html():
htmllist = []
for page in get_url():
header = {
'user-agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.132 Safari/537.36'
}
response = requests.get(page,headers = header,timeout = 2)
response.encoding = 'gbk'
response = response.text
htmllist.append(response)
return htmllist
def html_parse_save():
number = 0
for html_1 in get_html():
number += 1
html = etree.HTML(html_1)
divs = html.xpath('//div[@class="list"]/ul/li/a')
for div in divs:
title = div.xpath('./b/text()')[0]
img = div.xpath('./img/@src')[0]
image = requests.get(img)
with open("图片/%s.jpg" % title,'wb') as f:
f.write(image.content)
print('第'+str(number)+'图片保存成功')
if __name__ == "__main__":
html_parse_save()