二手房抓取:房天下,链家,安居客。。。进行数据分析与挖掘
数据的抓取:
headers = {"User-Agent": "Mozilla/5.0 (Macintosh; Intel Mac OS X 10_13_2) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/63.0.3239.84 Safari/537.36"}
def detail_html(url):
response = requests.get(url, headers=headers)
response = response.content.decode('gbk')
result = etree.HTML(response)
title_list=result.xpath('//*[@class="shop_list shop_list_4"]/dl')
for title in title_list:
item={}
name=title.xpath('.//*[@class="clearfix"]/a/@title')
item['name']=name[0] if len(name)>0 else None
item['style']=title.xpath('string(.//p[@class="tel_shop"])').strip().replace('\r\n','').replace(' ','')
item['price']=title.xpath('string(.//span[@class="red"])')
item['place']=title.xpath('.//p[@class="add_shop"]//span/text()')
item['place'] = item['place'][0] if len(name) > 0 else None
item['house_name']=title.xpath('.//p[@class="add_shop"]/a/@title')
item['house_name'] = item['house_name'][0] if len(name) > 0 else None
print(item)
def main():
for i in range(1,101):
url = 'https://hz.esf.fang.com/house/i3{}/'.format(i)
detail_html(url)
if __name__=='__main__':
main()
数据分析:
待续。。。。