我爱我家房源信息爬取

我爱我家房源信息获取

  • 无特殊爬取需求
import requests
from lxml import etree
from mysql_link import mysql_connect

def get_5i5j(count):
    mysql_ = mysql_connect()

    headers = {
        'Cookie': '_Jo0OQK=6B2EFBECBAB6D76BCDB834644B1B2D3BC2FFE7FE5ECC9F67E588A57175B2C4A553BB1B99580083D10FBE3107B2235A474021805425FF6DC8C7E536BB944BCFF6EB1DE8682CA7D10E3B498FB9E3C853EFEE298FB9E3C853EFEE215D8BEE34E43E5C0GJ1Z1Jw==; PHPSESSID=u518ep3lfv9sig9rt3jfdtrf2j; _ga=GA1.2.656332641.1534582894; _gid=GA1.2.1467514563.1534582894; yfx_c_g_u_id_10000001=_ck18081817013612438566341316835; yfx_f_l_v_t_10000001=f_t_1534582896243__r_t_1534582896243__v_t_1534582896243__r_c_0; Hm_lvt_94ed3d23572054a86ed341d64b267ec6=1534582902; Hm_lpvt_94ed3d23572054a86ed341d64b267ec6=1534583124',
        'Host': 'bj.5i5j.com',
        'Upgrade-Insecure-Requests': '1',
        'User-Agent': 'Mozilla/5.0 (Windows NT 6.1; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/67.0.3396.62 Safari/537.36',
    }
    url = 'https://bj.5i5j.com/zufang/huilongguan/n%d/'
    for num in range(1, count+1):
        full_url = url%num
        response = requests.get(full_url,headers=headers)
        # with open('5i5j.html','wb')as f:
        #     f.write(response.content)
        html_ele = etree.HTML(response.text)
        li_list = html_ele.xpath('//div[@class="list-con-box"]/ul/li')
        for li in li_list:
            title = li.xpath('./div[2]/h3/a/text()')
            # if  len(title):
            #
            print(title[0])
            # 面积
            area = li.xpath('./div[2]/div[1]/p[1]/text()')
            print(area[0])
            # 位置整理
            distance = li.xpath('./div[2]/div[1]/p[2]/text()')
            # print(distance)
            distance1 = li.xpath('./div[2]/div[1]/p[2]/a/text()')
            # print(distance1)
            if len(distance)>0:
                d = distance1+distance
                dd = d[0]+d[1]
            else:
                dd = distance1[0]
            print(dd)
            # 价格
            price = li.xpath('./div[2]/div[1]/div/p/strong')
            print(price[0].text)
            sql = 'insert into 5i5j (title,area,descr,price)values("{}","{}","{}","{}")'.format(title[0],area[0],dd,price[0].text)
            print(sql)
            mysql_.mysql_do(sql)
            # print(title[0].replace(' '*2,""))
if __name__ == '__main__':
    get_5i5j(3)

猜你喜欢

转载自blog.csdn.net/qq_41847171/article/details/81865236