天气情况爬虫

针对重庆沙坪坝区2021年02月份历史天气数据查询 - 重庆沙坪坝气温的2021年01-12月份进行数据爬虫,代码如下:


import requests
import pandas as pd
from bs4 import BeautifulSoup
import csv
def isexsit(li):
    if '/' in li:
        index = li.index('/')
        zp ='%s%s%s'%(li[index-1],li[index],li[index+1])
        print(zp)
        li[index-1:index+1]=[]
        li[index-1] = zp
    return li
def not_empty(s):
    return s and s.strip()
def get_data(url,i,filename):

    resp = requests.get(url)

    html = resp.content.decode('UTF-8')
    soup = BeautifulSoup(html,'html.parser')
    res=[]
    li_list = soup.find_all('li')
    #print(li_list)
    dates,conditions,tempu,templ,AQI,wind,RRR = [],[],[],[],[],[],[]
    with open(filename, 'a', errors='ignore', newline='') as f:
        f_csv = csv.writer(f)
        if i == 1:
            for data in li_list[5:]:
                sub_data = data.text.split()
                sub_data = list(filter(not_empty, sub_data))
                sub_data = isexsit(sub_data)
                f_csv.writerow(sub_data)
                print("sub_data1:",sub_data)
        else:
            for data in li_list[6:]:
                sub_data = data.text.split()
                sub_data = list(filter(not_empty, sub_data))
                sub_data = isexsit(sub_data)
                print("sub_data:",sub_data)
                f_csv.writerow(sub_data)
# year是年
# month是月份,如month=11,就是取,1-11月的数据
year = '2021'
month = 12
for i in range(1,month+1):
    print(i)
    a = get_data("https://www.tianqi24.com/shapingba/history{0}{1:0>2d}.html".format(year,i),i,filename=f"res{year}.csv")

结果:

猜你喜欢

转载自blog.csdn.net/lishijie258/article/details/127507739