June weather analysis Xi'an

June weather analysis Xi'an

1. Data crawling

1. requestsRequest page

2. Beautifulsoupparsing the page

`

def get_data(self,url):
    """
    爬取天气数据
    以列表的形式返回数据
    """
    resp = requests.get(url)
    html = resp.content.decode('gbk')
    soup = BeautifulSoup(html, 'html.parser') # 和网页源代码格式不同,即将其格式化
    tr_list = soup.find_all('tr')
    return tr_list

`

2. Data Retention

1. Save as csvfile

`

def saving_data(self,tr_list):
    """
    将出入的列表的内容
    保存数据为CSV文件
    """
    dates, conditions, h_temps, l_temps = [], [], [], []
    for data in tr_list[1:]:
        s_datas = data.text.split()
        dates.append(s_datas[0])
        conditions.append(''.join(s_datas[1:3]))
        h_temps.append(s_datas[3])
        l_temps.append(s_datas[5])
    _data=pd.DataFrame()
    _data['日期']=dates
    _data['天气状况']=conditions
    _data['最高温度']=h_temps
    _data['最低温度']=l_temps
    _data.to_csv("xi'an weather condition.csv",index=False,encoding='utf-8')#不保存索引号

`

3. Data processing

1. The data read

2. Data cleansing

(1) whether the default

(2) negative interference dirty data

3. Data processing

Processing (1) of the string

(2) change the data type

`

def data_cleaning(self,path):
    """
    清洗爬取下的数据
    将温度的格式转化为整数类型
    """
    data = pd.read_csv(path)
    # print(data.head(5))
    # print(data.isnull())
    # print((data.isnull()).sum())
    data['最高温度\摄氏度'] = data['最高温度'].map(lambda x: int(x.replace('℃', '')))
    data['最低温度\摄氏度'] = data['最低温度'].map(lambda x: int(x.replace('℃', '')))
    data.to_csv("xi'an weather condition.csv",index=False,encoding='utf-8')#不保存索引号

`

3. Data Visualization

1. Chinese coding problem solving

`

plt.rcParams['font.sans-serif'] = ['SimHei']  # 解决中文问题
plt.rcParams['axes.unicode_minus'] = False  # 解决负号问题

`

2. Style line graph showing data

`

def data_visualization(self,path):
    """
    数据可视化

    """
    plt.rcParams['font.sans-serif'] = ['SimHei']  # 解决中文问题
    plt.rcParams['axes.unicode_minus'] = False  # 解决负号问题
    data = pd.read_csv(path)
    dates = data['日期']
    highs = data['最高温度\摄氏度']
    lows = data['最低温度\摄氏度']
    # #根据数据绘制图形
    fig = plt.figure(dpi=128, figsize=(10, 6))
    plt.plot(dates, highs, c="red", alpha=0.5)
    plt.plot(dates, lows, c="green", alpha=0.5)
    # 给图表区域着色
    plt.fill_between(dates, highs, lows, facecolor='blue', alpha=0.2)  
    # fill_between(x,y.z,facecolor,alpha)
    # 设置图形格式
    plt.title("西安2019年6月份天气分析", fontsize=24)
    plt.xlabel("日期", fontsize=12)
    fig.autofmt_xdate()
    plt.ylabel("气温", fontsize=12)
    # 参数刻度线样式的设置
    plt.tick_params(axis="both", which="major", labelsize=10)
    # 修改刻度
    plt.xticks(dates[::1])#步长为1
    # 显示2019年六月份每日最高气温的折线图
    plt.show()

`

4. The main function

`

def main():
    #需要爬取的网页的URL
    url = 'http://www.tianqihoubao.com/lishi/xian/month/201906.html'
    #爬取的数据保存路径
    path=r"D:\py项目\爬虫\xi'an weather condition.csv"
    #类的实例化
    weather = Weather()
    #调用类的方法获取数据(返回的是列表)
    list=weather.get_data(url)
    #调用类的方法保存数据(csv文件)
    weather.saving_data(list)
    #调用类的方法清下数据
    weather.data_cleaning(path)
    #调用类的方法可视化
    weather.data_visualization(path)
if __name__ == '__main__':
    main()

`

Guess you like

Origin blog.csdn.net/qq_45066719/article/details/94216600