妹子图图片爬取

 1 import requests
 2 from lxml import etree
 3 import os
 4 from urllib import request
 5 import mysqlhelper
 6 
 7 
 8 myhelper = mysqlhelper.MysqlHelper()
 9 sql = 'insert into meizitu(name,pic_url) values(%s,%s)'
10 
11 base_url = 'http://www.mzitu.com/page/%s/'
12 headers = {
13 
14     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36',
15 }
16 for i in range(1,3):
17     url = base_url % i
18 
19     response = requests.get(url,headers=headers)
20     html_ele = etree.HTML(response.text)
21 
22     a_list = html_ele.xpath('//ul[@id="pins"]/li/a/@href')
23 
24 
25     for url_list in a_list:
26         response = requests.get(url_list,headers)
27         html_ele = etree.HTML(response.text)
28 
29         page = html_ele.xpath('//div[@class="pagenavi"]/a[5]/span')[0].text
30 
31         for pg in range(1,int(page)+1):
32             image_url = str(url_list) +'/' + str(pg)
33             image_name=  'meizitu/' + image_url.split('/')[-2] + image_url.split('/')[-1] + '.jpg'
34 
35             data = (image_name,image_url)
36             myhelper.execute_modify_sql(sql, data)
37 
38 
39             # if not os.path.exists('meizitu'):
40             #     os.mkdir('meizitu')
41             #
42             #     filename = 'meizitu/' + image_url.split('/')[-2] + image_url.split('/')[-1] + '.jpg'
43             #     print(filename)
44             #     request.urlretrieve(image_url, filename)
45             #
46             # else:
47             #     print('图片接收失败')

猜你喜欢

转载自www.cnblogs.com/daihao9527/p/9503169.html