房天下二手房爬取

二手房抓取:房天下,链家,安居客。。。进行数据分析与挖掘

数据的抓取:

headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"}

def detail_html(url):
    response = requests.get(url, headers=headers)
    response = response.content.decode('gbk')
    result = etree.HTML(response)
    title_list=result.xpath('//*[@class="shop_list shop_list_4"]/dl')
    for title in title_list:
        item={}
        name=title.xpath('.//*[@class="clearfix"]/a/@title')
        item['name']=name[0] if len(name)>0 else None
        item['style']=title.xpath('string(.//p[@class="tel_shop"])').strip().replace('\r\n','').replace(' ','')
        item['price']=title.xpath('string(.//span[@class="red"])')
        item['place']=title.xpath('.//p[@class="add_shop"]//span/text()')
        item['place'] = item['place'][0] if len(name) > 0 else None
        item['house_name']=title.xpath('.//p[@class="add_shop"]/a/@title')
        item['house_name'] = item['house_name'][0] if len(name) > 0 else None
        print(item)

def main():
    for i in range(1,101):
        url = 'https://hz.esf.fang.com/house/i3{}/'.format(i)
        detail_html(url)

if __name__=='__main__':
    main()

数据分析:

待续。。。。

猜你喜欢

转载自blog.csdn.net/yitian1585531/article/details/86187923