import pandas as pd
catering_sale = 'B:\pycharm\DataMining\catering_sale.xls'
data = pd.read_excel(catering_sale, index_col=u'日期') # 指定主码,有中文出现的时候,需要加u
describe = data.describe()
print(describe)
count = describe.loc['count'] #loc统计对应的数值
print("缺省值 % d" % (len(data) - int(count))) # 记录为201条,但是count只有200,说明缺失值数为1
# 画出数据的箱线图
import matplotlib.pyplot as plt # 库里面没有中文字体
plt.rcParams['font.sans-serif'] = ['SimHei'] # 用来正常显示中文标签 plt.reParams是一个配置表
plt.rcParams['axes.unicode_minus'] = False # 用来正常显示负号
plt.figure()
p = data.boxplot(return_type='dict') # 画箱线图,处理异常值
x = p['fliers'][0].get_xdata() # 'flies'即为异常值的标签
y = p['fliers'][0].get_ydata()
y.sort()
for i in range(len(x)):
if i>0:
plt.annotate(y[i], xy = (x[i],y[i]), xytext=(x[i]+0.05 -0.8/(y[i]-y[i-1]),y[i]))
else:
plt.annotate(y[i], xy = (x[i],y[i]), xytext=(x[i]+0.08,y[i]))#图形中添加注释
plt.show()