爬虫03-爬取房源

import requests
import parsel

base_url = 'https://nc.lianjia.com/ershoufang/pg1'
headers = {
    
    
    'User-Agent': "User-Agent:Mozilla/5.0 (compatible; MSIE 9.0; Windows NT 6.1; Trident/5.0"
}

response = requests.get(base_url,headers=headers).text
htmls = parsel.Selector(response)
urls = htmls.xpath('//ul[@class="sellListContent"]/li/a/@href').extract()
for url in urls:
    response = requests.get(url,headers=headers).text
    html = parsel.Selector(response)
    title_main = html.xpath('//div[@class="title-wrapper"]//div[@class="title"]/h1/text()').extract()
    title_sub = html.xpath('//div[@class="title-wrapper"]//div[@class="title"]/div/text()').extract()
    price = html.xpath('//div[@class="price "]/span/text()').extract()
    label = html.xpath('//div[@class="communityName"]/a[@class="info "]/text()').extract()
    areaName = html.xpath('//div[@class="areaName"]/span[@class="info"]/a/text()').extract()
    print(title_main)
    with open(r"C:\Users\Administrator\Desktop\03\lianjia.csv", 'a', encoding='utf-8')as f:
        f.write("{},{},{},{},{}\n".format(title_main, title_sub, price, label, areaName))

猜你喜欢

转载自blog.csdn.net/qq_41458842/article/details/106223739