python爬虫学习(十四)xpath获取不同标签城市名

# -*- coding: utf-8 -*-
import requests
from lxml import etree
import os
if __name__ == '__main__':
    # headers = {
    #     'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
    # }
    # url='https://www.aqistudy.cn/historydata/'
    # page_text=requests.get(url=url,headers=headers).text
    # #解析热门城市
    # tree =  etree.HTML(page_text)
    # hot_li_list = tree.xpath('//div[@class="bottom"]/ul/li')
    # all_city_name=[]
    # for li in hot_li_list:
    #     hot_city_name = li.xpath('./a/text()')[0]
    #     all_city_name.append(hot_city_name)
    # #解析所有城市
    # city_name_list=tree.xpath('//div[@class="bottom"]/ul/div[2]/li')
    # for li in city_name_list:
    #     city_name = li.xpath('./a/text()')[0]
    #     all_city_name.append(city_name)
    #
    #
    # print(all_city_name,len(all_city_name))



    #用一个xpath获取两组数据
    headers = {
        'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; WOW64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/76.0.3809.87 Safari/537.36'
    }
    url='https://www.aqistudy.cn/historydata/'
    page_text=requests.get(url=url,headers=headers).text
    #解析热门城市
    tree =  etree.HTML(page_text)
    #div/ul/li/a                #热门城市
    #div/ul/div[2]/li/a         #全部城市
    aLL_city_names=[]
    a_list = tree.xpath('//div[@class="bottom"]/ul/li/a | //div[@class="bottom"]/ul/div[2]/li/a')
    for a in a_list:
        city_name = a.xpath('./text()')[0]
        aLL_city_names.append(city_name)
    print(aLL_city_names,len(aLL_city_names))
发布了23 篇原创文章 · 获赞 0 · 访问量 667

猜你喜欢

转载自blog.csdn.net/haimian_baba/article/details/103816978
今日推荐