需求描述:
通过直观的方式体现,从covid-19开始至今美国人对感染的担心程度严重还是对经济的担心程度严重
• covid_concern_toplines.csv
• subject:concern-infected 关心感染,concern-economy 关心经济
• very_estimate:非常关注
• somewhat_estimate:有些关注
• not_very_estimate:不是非常关注
• not_at_all_estimate:一点也不关注
#!/user/bin/env python
#-*-coding: utf-8-*-
#@Time : 2020/9/18 16:27
#@Author : GodSpeed
#@File : 实战练习09.py
#@Software : PyCharm
import pandas as pd
import numpy as np
from matplotlib import pyplot as plt
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False
import pyecharts.options as opts # 导入配置项
from pyecharts.charts import Line # 导入折线图类
#创建类,封装数据处理
class Covid_concern(object):
def __init__(self,file_name,subject):
self.file_name = file_name
self.subject = subject # 关心的内容
# 读取数据存放到df,返回
def read_csvfile(self):
df = pd.read_csv(self.file_name)
#print(df.head)
#print(df.info())
return df
# 整理数据
def processing_data(self):
# 截取subject
df = self.read_csvfile()
df = df[df["subject"] == self.subject]
#问题:modeldate日期是天,过于密集,采用降采样为M
print('df----------------',df)
# 第一步先把modeldate的类型由object转换为timestamp
df['modeldate'] = pd.to_datetime ( df['modeldate'])
print("df!!!!!!!!!",df)
print ( "df[modeldate]5555\n", df["modeldate"] )
# 第二步,把modeldate变为时间索引
df.set_index("modeldate",inplace=True)
print("df",df.head())
print ( "df#####\n", df.info () )
print(df.columns.values)
# 第三步:降采样
df2 = df.resample("MS").mean()
print(df2)
#截取关心的列
#print(df2.loc[:,"very_estimate":"not_at_all_estimate"])
con_df = df2.loc[:,"very_estimate":"not_at_all_estimate"]
#print(type(con_df))
print(con_df.columns.values)
return con_df
# 绘图
def draw_fun(self):
df = self.processing_data()
'''
DataFrame.plot(x=None, y=None, kind='line', ax=None, subplots=False, sharex=None, sharey=False, layout=None, figsize=None, use_index=True, title=None, grid=None, legend=True, style=None, logx=False, logy=False, loglog=False, xticks=None, yticks=None, xlim=None, ylim=None, rot=None, fontsize=None, colormap=None, position=0.5, table=False, yerr=None, xerr=None, stacked=True/False,
sort_columns=False, secondary_y=False, mark_right=True, **kwds)
'''
df.plot(figsize=(12, 6))
# 利用pyecharts进行绘图
def draw_pyecharts(self):
# 注意: 在使用pyecharts过程中,它的数据一定是python原生的list
# 所以需要对DataFram con_df数据进行转换
df = self.processing_data ()
# xaxis
#print(df.index.values)
'''
['2020-02-01T00:00:00.000000000' '2020-03-01T00:00:00.000000000'
'2020-04-01T00:00:00.000000000' '2020-05-01T00:00:00.000000000'
'2020-06-01T00:00:00.000000000' '2020-07-01T00:00:00.000000000'
'2020-08-01T00:00:00.000000000' '2020-09-01T00:00:00.000000000']
'''
#print(type(df.index.values)) #<class 'numpy.ndarray'>
print(df.index) #datetime64[ns]
'''
DatetimeIndex(['2020-02-01', '2020-03-01', '2020-04-01', '2020-05-01',
'2020-06-01', '2020-07-01', '2020-08-01', '2020-09-01'],
dtype='datetime64[ns]', name='modeldate', freq='MS')
'''
# 利用str或strftime方法可以传入一个格式化字符串 注意只有时间索引具有strftime
#• very_estimate:非常关注
#• somewhat_estimate:有些关注
#• not_very_estimate:不是非常关注
#• not_at_all_estimate:一点也不关注
xaxis = [x.strftime("%Y-%m%d") for x in df.index]
print(xaxis)
#解决very_estimate,somewhat_estimate,not_very_estimate,not_at_all_estimate数据有效位太长
#利用函数映射
df["very_estimate"] = df["very_estimate"].apply(round)
df["somewhat_estimate"] = df["somewhat_estimate"].apply ( round )
df["not_very_estimate"] = df["not_very_estimate"].apply ( round )
df["not_at_all_estimate"] = df["not_at_all_estimate"].apply ( round )
c = (
Line () # 创建折线图对象
.add_xaxis ( xaxis) # 添加x轴,数据
.add_yaxis ( "very_estimate", df["very_estimate"].tolist(), is_smooth=True ) # 添加y轴:图例,y数据
.add_yaxis ( "somewhat_estimate", df["somewhat_estimate"].tolist(), is_smooth=True ) # 添加y轴:图例,y数据
.add_yaxis ( "not_very_estimate", df["not_very_estimate"].tolist(), is_smooth=True ) # 添加y轴:图例,y数据
.add_yaxis ( "not_at_all_estimate", df["not_at_all_estimate"].tolist(), is_smooth=True ) # 添加y轴:图例,y数据
.set_global_opts ( title_opts=opts.TitleOpts ( title="covid_concern_toplines" ) ) # 全局配置项,指定标题
.render("covid_concern_toplines.html") # 渲染到本地的网页呈现
)
if __name__ == '__main__':
covid_concern = Covid_concern("covid/covid_concern_toplines.csv","concern-infected")
df = covid_concern.draw_fun()
# concern-economy
covid_concern = Covid_concern ( "covid/covid_concern_toplines.csv", "concern-economy" )
#df = covid_concern.draw_fun ()
#plt.show ()
covid_concern.draw_pyecharts()