想看看老家的房价,所以写了脚本,供大家分享下
import requests import time from bs4 import BeautifulSoup # 写入数据库 from mysql import close_db @close_db def write_db(con, param): try: sql = "insert into house(adress, total_price, avg_price, area, title, url) " \ "VALUES (%s,%s,%s,%s,%s,%s)" con.execute(sql, param) except Exception as e: print(e) # 主方法 def main(): # 给请求指定一个请求头来模拟chrome浏览器 headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 ' '(KHTML, like Gecko) Chrome/54.0.2840.99 Safari/537.36'} page_max = 24 # 爬取地址 for i in range(1, int(page_max) + 1): print("第几页:" + str(i)) if i == 1: house = 'https://ty.lianjia.com/loupan/rs/' else: house = 'https://ty.lianjia.com/loupan/pg' + str(i) res = requests.get(house, headers=headers) soup = BeautifulSoup(res.text, 'html.parser') li_max = soup.find('ul', class_='resblock-list-wrapper').find_all('li') for li in li_max: try: house_param = {} param = [] # 所在小区 title = li.find('div', class_='resblock-name').find('a').text house_param['title'] = title # 小区地址 address = li.find('div', class_='resblock-location').text address = address.replace('\n', '') house_param['address'] = address # 访问链接 title_url = li.find('a').attrs['href'] title_url = 'https://ty.fang.lianjia.com' + title_url # 均价 avg_price = li.find('div', class_='resblock-price').find('div', class_='main-price').find('span', class_='number').text house_param['avg_price'] = avg_price # 建筑面积 square_metre = li.find('div', class_='resblock-area').text square_metre = square_metre.replace('\n', '') house_param['area'] = square_metre # 总价 total_price = li.find('div', class_='resblock-price').find('div', class_='second').text house_param['total_price'] = total_price param.append(address) param.append(total_price) param.append(avg_price) param.append(square_metre) param.append(title) param.append(title_url) write_db(param) time.sleep(5) except Exception as e: print(e) if __name__ == '__main__': main()
迭代器自己封装的,还不太成熟没有关闭数据库链接
import functools import pymysql # 打开数据库连接 config = { 'host': 'localhost', 'port': 3306, 'user': 'root', 'passwd': 'root', 'db': 'test' } db = pymysql.connect(**config) con = db.cursor() def close_db(func): @functools.wraps(func) def wrapper(*args, **kw): r = func(con, *args, **kw) db.commit() return r return wrapper