pandas实战练习09

需求描述:

通过直观的方式体现,从covid-19开始至今美国人对感染的担心程度严重还是对经济的担心程度严重
• covid_concern_toplines.csv
• subject:concern-infected 关心感染,concern-economy 关心经济
• very_estimate:非常关注
• somewhat_estimate:有些关注
• not_very_estimate:不是非常关注
• not_at_all_estimate:一点也不关注

#!/user/bin/env python
#-*-coding: utf-8-*-
#@Time           : 2020/9/18 16:27
#@Author         : GodSpeed
#@File           : 实战练习09.py
#@Software       : PyCharm

import pandas as pd
import numpy as np
from matplotlib import pyplot as plt

plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

import pyecharts.options as opts   # 导入配置项
from pyecharts.charts import Line  # 导入折线图类

#创建类,封装数据处理
class Covid_concern(object):

    def __init__(self,file_name,subject):
        self.file_name = file_name
        self.subject = subject  # 关心的内容

    # 读取数据存放到df,返回
    def read_csvfile(self):
        df = pd.read_csv(self.file_name)
        #print(df.head)
        #print(df.info())
        return df

    # 整理数据
    def processing_data(self):
        # 截取subject
        df = self.read_csvfile()
        df = df[df["subject"] == self.subject]

        #问题:modeldate日期是天,过于密集,采用降采样为M
        print('df----------------',df)
        # 第一步先把modeldate的类型由object转换为timestamp

        df['modeldate'] = pd.to_datetime ( df['modeldate'])
        print("df!!!!!!!!!",df)

        print ( "df[modeldate]5555\n", df["modeldate"] )

        # 第二步,把modeldate变为时间索引

        df.set_index("modeldate",inplace=True)
        print("df",df.head())
        print ( "df#####\n", df.info () )
        print(df.columns.values)

        # 第三步:降采样
        df2 = df.resample("MS").mean()
        print(df2)

        #截取关心的列
        #print(df2.loc[:,"very_estimate":"not_at_all_estimate"])
        con_df = df2.loc[:,"very_estimate":"not_at_all_estimate"]
        #print(type(con_df))

        print(con_df.columns.values)

        return con_df


    # 绘图
    def draw_fun(self):


        df = self.processing_data()

        '''
        DataFrame.plot(x=None, y=None, kind='line', ax=None, subplots=False, sharex=None,   sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None,   legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None,   yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None,   position=0.5, table=False, yerr=None, xerr=None, stacked=True/False,   
        sort_columns=False, secondary_y=False, mark_right=True, **kwds)
        '''
        df.plot(figsize=(12, 6))

    # 利用pyecharts进行绘图
    def draw_pyecharts(self):
        # 注意: 在使用pyecharts过程中,它的数据一定是python原生的list
        # 所以需要对DataFram con_df数据进行转换
        df = self.processing_data ()
        # xaxis
        #print(df.index.values)
        '''
        ['2020-02-01T00:00:00.000000000' '2020-03-01T00:00:00.000000000'
         '2020-04-01T00:00:00.000000000' '2020-05-01T00:00:00.000000000'
         '2020-06-01T00:00:00.000000000' '2020-07-01T00:00:00.000000000'
         '2020-08-01T00:00:00.000000000' '2020-09-01T00:00:00.000000000']
        '''
        #print(type(df.index.values)) #<class 'numpy.ndarray'>


        print(df.index) #datetime64[ns]
        '''
        DatetimeIndex(['2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01',
               '2020-06-01', '2020-07-01', '2020-08-01', '2020-09-01'],
              dtype='datetime64[ns]', name='modeldate', freq='MS')
        '''

        # 利用str或strftime方法可以传入一个格式化字符串 注意只有时间索引具有strftime
        #• very_estimate:非常关注
        #• somewhat_estimate:有些关注
        #• not_very_estimate:不是非常关注
        #• not_at_all_estimate:一点也不关注
        xaxis = [x.strftime("%Y-%m%d") for x in df.index]
        print(xaxis)

        #解决very_estimate,somewhat_estimate,not_very_estimate,not_at_all_estimate数据有效位太长
        #利用函数映射

        df["very_estimate"] = df["very_estimate"].apply(round)
        df["somewhat_estimate"] = df["somewhat_estimate"].apply ( round )
        df["not_very_estimate"] = df["not_very_estimate"].apply ( round )
        df["not_at_all_estimate"] = df["not_at_all_estimate"].apply ( round )

        c = (
            Line ()  # 创建折线图对象
                .add_xaxis ( xaxis)  # 添加x轴,数据
                .add_yaxis ( "very_estimate", df["very_estimate"].tolist(), is_smooth=True )  # 添加y轴:图例,y数据
                .add_yaxis ( "somewhat_estimate", df["somewhat_estimate"].tolist(), is_smooth=True )  # 添加y轴:图例,y数据
                .add_yaxis ( "not_very_estimate", df["not_very_estimate"].tolist(), is_smooth=True )  # 添加y轴:图例,y数据
                .add_yaxis ( "not_at_all_estimate", df["not_at_all_estimate"].tolist(), is_smooth=True )  # 添加y轴:图例,y数据
                .set_global_opts ( title_opts=opts.TitleOpts ( title="covid_concern_toplines" ) )  # 全局配置项,指定标题
                .render("covid_concern_toplines.html")    # 渲染到本地的网页呈现
        )

if __name__ == '__main__':
    covid_concern = Covid_concern("covid/covid_concern_toplines.csv","concern-infected")
    df = covid_concern.draw_fun()

    # concern-economy
    covid_concern = Covid_concern ( "covid/covid_concern_toplines.csv", "concern-economy" )
    #df = covid_concern.draw_fun ()
    #plt.show ()

    covid_concern.draw_pyecharts()

在这里插入图片描述

猜你喜欢

转载自blog.csdn.net/Narutolxy/article/details/108676344