贝壳租房Xpath爬虫+数据分析实战

sadsadsadsa 

import requests
from lxml import etree

url = "https://xa.zu.ke.com/zufang/"
header = {"User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/78.0.3904.108 Safari/537.36"}

html = requests.get(url=url,headers=header).text
tree = etree.HTML(html)

# 获取div标签列表
div_list = tree.xpath('//div[@class="content__list"]/div')
for div in div_list:
    # 获取p标签列表
    p_list = div.xpath('./div[1]/p')
    target_url = p.xpath('.//p[1]/a/@href')[0]
    print(target_url)
    '''
    for p in p_list:
        
        title = p.xpath('./a/text()')[0]
        area = p.xpath('./a[1]/text()')[0]
        
        print(title)
        print(area)
        break
        '''
'''
print(list)
for li in list:
    #title = li.xpath('./div[1]/p[1]/a/text()')[0]
    #rurl = li.xpath('./div[1]/p[1]/a/@href')[0]
    #area = li.xpath('./div[1]/p[2]/a[1]/text()')[0]
    #fx = li.xpath('./div[1]/p[2]/a[2]/text()')[0]
    #name = li.xpath('./div[1]/p[2]/a[3]/text()')[0]
    #space = li.xpath('./div[1]/p[2]/text()')[4]
    #cx = li.xpath('./div[1]/p[2]/text()')[5]
    #gj = li.xpath('./div[1]/p[2]/span/text()')[6]
    #lc = li.xpath('./div[1]/p[2]/text()')[6]
    #uptime =li.xpath('./div[1]/p[3]/text()')[0]
    uptype = li.xpath('./div[1]/p[4]//text()')
    #uptype = li.xpath('./div[1]/p[4]/i[0]/text()')[0]
    #zx = li.xpath('./div[1]/p[4]/i[1]/text()')[0]
    #gn = li.xpath('./div[1]/p[4]/i[2]/text()')[0]
    #kf = li.xpath('./div[1]/p[4]/i[3]/text()')[0]
    #print(title)
    #print(rurl)
    #print(space)
    #print(fx)
    #print(name)
    #print(area)
    #print(cx)
    #print(gj)
    #print(lc)
    #print(uptime)
    print(uptype)
    #print(zx)
    #print(gn)
    #print(kf)
    break
'''

猜你喜欢

转载自www.cnblogs.com/Iceredtea/p/11995922.html
今日推荐