Obtain weather data through crawlers

Through python, a simple weather program crawler can be implemented

The specific code is as follows:

import requests
# 此包必须导入
from bs4 import BeautifulSoup
import re
# 数据来源(修改地名即可获取其他城市)
r = requests.get("https://hubei.weather.com.cn/xiaogan/index.shtml")
r.encoding = "utf-8"
html_doc = r.text
soup = BeautifulSoup(html_doc, 'html.parser').find('div', 'forecastBox')
city_link = []
# 根据正则表达式获取城市下每个a链接
for item in soup.findAll(name='a', attrs={'href': re.compile(r'(\w+)/weather/(\d+)')}):
    city_link.append(item.attrs['href'])
weather_list = []
for item in city_link:
    r = requests.get(item)
    r.encoding = "utf-8"
    html_doc = r.text
    soup = BeautifulSoup(html_doc, 'html.parser')
    weather_city = {'city': soup.find('div', 'crumbs fl').findAll('span')[3].string}
    weather = ''
    for li in soup.find('ul', 't clearfix').findAll('li'):
        date = li.h1.string
        cloud = li.p.attrs['title']
        high = li.findAll('p', attrs={'class': 'tem'})[0].span
        if high is not None:
            high = high.string
        else:
            high = '暂无最高温'
        # 获取天气的具体信息
        low = li.findAll('p', attrs={'class': 'tem'})[0].i.string
        wind = li.findAll('p', attrs={'class': 'win'})[0].em.span.attrs['title']
        wind += li.findAll('p', attrs={'class': 'win'})[0].i.string
        weather += str(date) + ":" + str(cloud) + "," + str(high) + r"/" + str(low) + "," + str(wind) + "\n"
    weather_city['weather'] = weather
    weather_list.append(weather_city)
with open('soup_weather.txt', 'a', encoding='utf-8') as fp:
    for item in weather_list:
        fp.write(item['city'] + '\n')
        fp.write(item['weather'] + '\n\n')

Guess you like

Origin blog.csdn.net/Jocks5/article/details/121715941