一个简单的恋家的信息爬取

import requests
from lxml import etree
from day03.pymysql_text import Mysql_text

#封装一个函数
def lainjia(url):
    response = requests.get(url)

    # with open('lianjia.html','wb')as f:
    #     f.write(response.content)
    # 调用etree的HTML函数返回一个ele对象
    lj_ele = etree.HTML(response.text)
    #先找到总的li的path
    lj_list = lj_ele.xpath('//ul[@id="house-lst"]/li')
    #循环所有的li
    for li_list in lj_list:
        #依次获取自己想要的内容
        li_title = li_list.xpath('./div[2]/h2/a')[0].text
        print(li_title)
        li_region = li_list.xpath('./div[2]/div[1]/div[1]/a/span')[0].text
        print(li_region)
        li_zone = li_list.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text
        print(li_zone)
        li_dx = li_list.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text
        print(li_dx)
        li_price = li_list.xpath('./div[2]/div[2]/div[1]/span[1]')[0].text
        print(li_price)
        #把获取到的数据组成一个元组
        data = (li_title,li_region,li_zone,li_dx,li_price)
        #调用实例化pymysql对象的执行sql语句的方法
        m.sqlzz(sql,data)

#实例化对象,这个封装的类就是之前写的博客的数据库操作的类
m = Mysql_text()
sql = 'insert into lianjia(title,region,zone,dx,price) VALUE (%s,%s,%s,%s,%s)'
#循环url路径
for i in range(1,4):
    url = 'https://bj.lianjia.com/ditiezufang/pg%srp1/'%i

    lainjia(url)

猜你喜欢

转载自blog.csdn.net/yangbenhao/article/details/81779634