Through python, a simple weather program crawler can be implemented
The specific code is as follows:
import requests
# 此包必须导入
from bs4 import BeautifulSoup
import re
# 数据来源(修改地名即可获取其他城市)
r = requests.get("https://hubei.weather.com.cn/xiaogan/index.shtml")
r.encoding = "utf-8"
html_doc = r.text
soup = BeautifulSoup(html_doc, 'html.parser').find('div', 'forecastBox')
city_link = []
# 根据正则表达式获取城市下每个a链接
for item in soup.findAll(name='a', attrs={'href': re.compile(r'(\w+)/weather/(\d+)')}):
city_link.append(item.attrs['href'])
weather_list = []
for item in city_link:
r = requests.get(item)
r.encoding = "utf-8"
html_doc = r.text
soup = BeautifulSoup(html_doc, 'html.parser')
weather_city = {'city': soup.find('div', 'crumbs fl').findAll('span')[3].string}
weather = ''
for li in soup.find('ul', 't clearfix').findAll('li'):
date = li.h1.string
cloud = li.p.attrs['title']
high = li.findAll('p', attrs={'class': 'tem'})[0].span
if high is not None:
high = high.string
else:
high = '暂无最高温'
# 获取天气的具体信息
low = li.findAll('p', attrs={'class': 'tem'})[0].i.string
wind = li.findAll('p', attrs={'class': 'win'})[0].em.span.attrs['title']
wind += li.findAll('p', attrs={'class': 'win'})[0].i.string
weather += str(date) + ":" + str(cloud) + "," + str(high) + r"/" + str(low) + "," + str(wind) + "\n"
weather_city['weather'] = weather
weather_list.append(weather_city)
with open('soup_weather.txt', 'a', encoding='utf-8') as fp:
for item in weather_list:
fp.write(item['city'] + '\n')
fp.write(item['weather'] + '\n\n')