我爱我家数据爬取

 1 import requests
 2 from lxml import etree
 3 import mysqlhelper
 4 
 5 base_url = 'https://bj.5i5j.com/zufang/huilongguan/n%s/'
 6 myhelper = mysqlhelper.MysqlHelper()
 7 sql = 'insert into woaiwojiaxinxi(title, space, address, follow, price) values(%s,%s,%s,%s,%s)'
 8 
 9 for i in range(1, 4):
10     url = base_url % i
11     response = requests.get(url)
12     html_ele = etree.HTML(response.text)
13 
14     li_list = html_ele.xpath('//ul[@class="pList"]/li')
15     for li_ele in li_list:
16         title = li_ele.xpath('./div[2]/h3/a')[0].text
17         print(title)
18 
19 
20         space = li_ele.xpath('./div[2]/div[1]/p[1]/text()')[0]
21         print(space)
22         address = li_ele.xpath('./div[2]/div[1]/p[2]/a/text()')[0] + li_ele.xpath('./div[2]/div[1]/p[2]/text()')[0]
23         print(address)
24         follow = li_ele.xpath('./div[2]/div[1]/p[3]/text()')[0]
25         print(follow)
26         price = li_ele.xpath('./div[2]/div/div/p/strong')[0].text + " 元/月"
27         print(price)
28 
29         data = (title, space, address, follow, price)
30         myhelper.execute_modify_sql(sql, data)

猜你喜欢

转载自www.cnblogs.com/daihao9527/p/9503166.html
今日推荐