中国天气网爬虫

之前没有使用过BeautifulSoup,这次特意使用它来爬取,不得不说写起来是真的不方便,而且速度慢。

import requests
from bs4 import BeautifulSoup
from pyecharts.charts import Bar

ALL_DATA = []


def parse_page(url):
    headers = {
        "User-Aent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/77.0.3865.75 Safari/537.36",
    }
    response = requests.get(url, headers=headers)
    # print(response.content.decode("utf-8"))
    text = response.content.decode("utf-8")
    soup = BeautifulSoup(text, "html5lib")
    conMidtab = soup.find('div', class_='conMidtab')
    tables = conMidtab.find_all('table')
    for table in tables:
        trs = table.find_all('tr')[2:]
        for index, tr in enumerate(trs):
            tds = tr.find_all('td')
            city_td = tds[0]
            if index == 0:
                city_td = tds[1]
            city = list(city_td.stripped_strings)[0]
            temp_td = tds[-2]
            min_temp = list(temp_td.stripped_strings)[0]
            ALL_DATA.append({'city': city, 'min_temp': int(min_temp)})
            # print({"cityname": city, "min_temp": min_temp})


def main():
    urls = {
        "http://www.weather.com.cn/textFC/hz.shtml",
        "http://www.weather.com.cn/textFC/db.shtml",
        "http://www.weather.com.cn/textFC/hb.shtml",
        "http://www.weather.com.cn/textFC/xb.shtml",
        "http://www.weather.com.cn/textFC/gat.shtml",
        "http://www.weather.com.cn/textFC/hn.shtml",
        "http://www.weather.com.cn/textFC/xn.shtml",
        "http://www.weather.com.cn/textFC/hd.shtml"

    }
    for url in urls:
        parse_page(url)

    #  分析数据
    #  根据最低温度进行排序
    ALL_DATA.sort(key=lambda data: data['min_temp'])

    data = ALL_DATA[0: 10]
    cities = list(map(lambda x: x['city'], data))
    temps = list(map(lambda x: x['min_temp'], data))
    chart = Bar()
    chart.add_xaxis(cities)
    chart.add_yaxis('', temps)
    chart.render('temperature.html')


if __name__ == '__main__':
    main()

发布了61 篇原创文章 · 获赞 48 · 访问量 4453

猜你喜欢

转载自blog.csdn.net/weixin_45257157/article/details/103335922
今日推荐