import requests
import parsel
base_url = 'https://nc.lianjia.com/ershoufang/pg1'
headers = {
'User-Agent': "User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0"
}
response = requests.get(base_url,headers=headers).text
htmls = parsel.Selector(response)
urls = htmls.xpath('//ul[@class="sellListContent"]/li/a/@href').extract()
for url in urls:
response = requests.get(url,headers=headers).text
html = parsel.Selector(response)
title_main = html.xpath('//div[@class="title-wrapper"]//div[@class="title"]/h1/text()').extract()
title_sub = html.xpath('//div[@class="title-wrapper"]//div[@class="title"]/div/text()').extract()
price = html.xpath('//div[@class="price "]/span/text()').extract()
label = html.xpath('//div[@class="communityName"]/a[@class="info "]/text()').extract()
areaName = html.xpath('//div[@class="areaName"]/span[@class="info"]/a/text()').extract()
print(title_main)
with open(r"C:\Users\Administrator\Desktop\03\lianjia.csv", 'a', encoding='utf-8')as f:
f.write("{},{},{},{},{}\n".format(title_main, title_sub, price, label, areaName))
爬虫03-爬取房源
猜你喜欢
转载自blog.csdn.net/qq_41458842/article/details/106223739
今日推荐
周排行