利用xpath爬取lianjia租房信息 并保存到数据库

import requests
from lxml import etree
import time
import pymysql


class MyMysql(object):
    def __init__(self):
        self.db = pymysql.connect('127.0.0.1','root','******','wang')
        self.cursor = self.db.cursor()

    def excute_sql(self,sql,data):
        self.cursor.execute(sql,data)
        self.db.commit()

    def __del__(self):
        self.cursor.close()
        self.db.close()

sql = 'insert into lianjia_jinan(title,region,zone,meters,price,date,url) values(%s,%s,%s,%s,%s,%s,%s)'
msq = MyMysql()
for i in range(1,4):

    url = 'https://jn.lianjia.com/zufang/pg%srco10/' % i

    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/68.0.3440.106 Safari/537.36'
    }

    response = requests.get(url,headers=headers)

    html = response.text

    html_ele = etree.HTML(html)

    # 找到ul里的所有li
    li_list = html_ele.xpath('//ul[@id="house-lst"]/li')
    # print(len(li_list))
    for res in li_list:
        title = res.xpath('./div[2]/h2/a')[0].text
        # print(title)
        url = res.xpath('./div[2]/h2/a/@href')[0]
        # print(url)
        region = res.xpath('./div[2]/div[1]/div[1]/a/span')[0].text
        # print(region)
        zone = res.xpath('./div[2]/div[1]/div[1]/span[1]/span')[0].text
        # print(zone)
        meters = res.xpath('./div[2]/div[1]/div[1]/span[2]')[0].text
        # print(meters)
        price = res.xpath('./div[2]/div[2]/div[1]/span')[0].text
        # print(price)
        date = res.xpath('./div[2]/div[2]/div[2]')[0].text

        data = (title,region,zone,meters,price,date,url)
        msq.excute_sql(sql,data)
        time.sleep(1)
        # print(date)
    print('第{}页保存完毕'.format(i))

猜你喜欢

转载自blog.csdn.net/weixin_38920937/article/details/81783740