爬去某家房价的数据

想看看老家的房价,所以写了脚本,供大家分享下

import requests
import time
from bs4 import BeautifulSoup
# 写入数据库
from mysql import close_db


@close_db
def write_db(con, param):
    try:
        sql = "insert into house(adress, total_price, avg_price, area, title, url) " \
              "VALUES (%s,%s,%s,%s,%s,%s)"
        con.execute(sql, param)
    except Exception as e:
        print(e)


# 主方法
def main():
    # 给请求指定一个请求头来模拟chrome浏览器
    headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 '
                             '(KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'}
    page_max = 24
    # 爬取地址
    for i in range(1, int(page_max) + 1):
        print("第几页:" + str(i))
        if i == 1:
            house = 'https://ty.lianjia.com/loupan/rs/'
        else:
            house = 'https://ty.lianjia.com/loupan/pg' + str(i)
        res = requests.get(house, headers=headers)
        soup = BeautifulSoup(res.text, 'html.parser')
        li_max = soup.find('ul', class_='resblock-list-wrapper').find_all('li')
        for li in li_max:
            try:
                house_param = {}
                param = []
                # 所在小区
                title = li.find('div', class_='resblock-name').find('a').text
                house_param['title'] = title
                # 小区地址
                address = li.find('div', class_='resblock-location').text
                address = address.replace('\n', '')
                house_param['address'] = address
                # 访问链接
                title_url = li.find('a').attrs['href']
                title_url = 'https://ty.fang.lianjia.com' + title_url
                # 均价
                avg_price = li.find('div', class_='resblock-price').find('div', class_='main-price').find('span',
                                                                                                          class_='number').text
                house_param['avg_price'] = avg_price
                # 建筑面积
                square_metre = li.find('div', class_='resblock-area').text
                square_metre = square_metre.replace('\n', '')
                house_param['area'] = square_metre
                # 总价
                total_price = li.find('div', class_='resblock-price').find('div', class_='second').text
                house_param['total_price'] = total_price
                param.append(address)
                param.append(total_price)
                param.append(avg_price)
                param.append(square_metre)
                param.append(title)
                param.append(title_url)
                write_db(param)
                time.sleep(5)
            except Exception as e:
                print(e)


if __name__ == '__main__':
    main()

迭代器自己封装的,还不太成熟没有关闭数据库链接

import functools

import pymysql

# 打开数据库连接
config = {
    'host': 'localhost',
    'port': 3306,
    'user': 'root',
    'passwd': 'root',
    'db': 'test'
}
db = pymysql.connect(**config)
con = db.cursor()


def close_db(func):
    @functools.wraps(func)
    def wrapper(*args, **kw):
        r = func(con, *args, **kw)
        db.commit()
        return r

    return wrapper

猜你喜欢

转载自www.cnblogs.com/yuabnfa/p/10918878.html