import numpy as np
import pandas as pd
from pandas import Series, DataFrame
# 生成一个时间序列
t_range = pd.date_range('2016-01-01', '2016-12-31')
t_range
Out[5]:
DatetimeIndex(['2016-01-01', '2016-01-02', '2016-01-03', '2016-01-04',
'2016-01-05', '2016-01-06', '2016-01-07', '2016-01-08',
'2016-01-09', '2016-01-10',
...
'2016-12-22', '2016-12-23', '2016-12-24', '2016-12-25',
'2016-12-26', '2016-12-27', '2016-12-28', '2016-12-29',
'2016-12-30', '2016-12-31'],
dtype='datetime64[ns]', length=366, freq='D')
# 创建Series
s1 = Series(np.random.randn(len(t_range)), index=t_range)
s1
Out[9]:
2016-01-01 -1.408484
2016-01-02 -0.530784
2016-01-03 0.659089
2016-01-04 1.468151
2016-01-05 0.678399
2016-01-06 -2.446700
2016-01-07 -0.403404
2016-01-08 0.623137
2016-01-09 1.068036
2016-01-10 1.406340
2016-01-11 -0.925950
2016-01-12 0.886542
2016-01-13 0.527620
2016-01-14 0.177987
2016-01-15 -0.843907
2016-01-16 1.271302
2016-01-17 1.646341
2016-01-18 -0.420305
2016-01-19 -1.552205
2016-01-20 -0.884822
2016-01-21 0.660273
2016-01-22 0.945790
2016-01-23 1.698283
2016-01-24 0.668180
2016-01-25 1.470522
2016-01-26 0.687848
2016-01-27 0.033351
2016-01-28 -0.844644
2016-01-29 0.472518
2016-01-30 -0.920086
2016-12-02 0.313375
2016-12-03 0.458618
2016-12-04 0.197696
2016-12-05 1.238550
2016-12-06 2.249532
2016-12-07 1.095712
2016-12-08 0.693674
2016-12-09 -0.377020
2016-12-10 0.532677
2016-12-11 1.714745
2016-12-12 0.124774
2016-12-13 -0.372079
2016-12-14 -0.932541
2016-12-15 -0.320267
2016-12-16 -0.719403
2016-12-17 -2.012314
2016-12-18 -0.510938
2016-12-19 -0.354006
2016-12-20 -0.351626
2016-12-21 -0.653467
2016-12-22 0.169920
2016-12-23 0.588163
2016-12-24 -0.692558
2016-12-25 -0.078781
2016-12-26 -0.157261
2016-12-27 -1.480809
2016-12-28 1.586904
2016-12-29 -0.791816
2016-12-30 0.951799
2016-12-31 1.283303
Freq: D, Length: 366, dtype: float64
# 一月份数据
s1['2016-01']
Out[10]:
2016-01-01 -1.408484
2016-01-02 -0.530784
2016-01-03 0.659089
2016-01-04 1.468151
2016-01-05 0.678399
2016-01-06 -2.446700
2016-01-07 -0.403404
2016-01-08 0.623137
2016-01-09 1.068036
2016-01-10 1.406340
2016-01-11 -0.925950
2016-01-12 0.886542
2016-01-13 0.527620
2016-01-14 0.177987
2016-01-15 -0.843907
2016-01-16 1.271302
2016-01-17 1.646341
2016-01-18 -0.420305
2016-01-19 -1.552205
2016-01-20 -0.884822
2016-01-21 0.660273
2016-01-22 0.945790
2016-01-23 1.698283
2016-01-24 0.668180
2016-01-25 1.470522
2016-01-26 0.687848
2016-01-27 0.033351
2016-01-28 -0.844644
2016-01-29 0.472518
2016-01-30 -0.920086
2016-01-31 -1.070854
Freq: D, dtype: float64
# 一月份取平均值
s1['2016-01'].mean()
Out[11]: 0.15476017406190043
# 对月份取平均值
s1_month = s1.resample('M').mean()
s1_month
Out[13]:
2016-01-31 0.154760
2016-02-29 -0.171572
2016-03-31 -0.127375
2016-04-30 -0.298081
2016-05-31 0.332433
2016-06-30 -0.317580
2016-07-31 -0.239776
2016-08-31 -0.020108
2016-09-30 -0.276503
2016-10-31 -0.121938
2016-11-30 -0.025510
2016-12-31 0.088271
Freq: M, dtype: float64
# 前填充ffill,比如一号里的没有的数据是从1月1号采取过来的
s1.resample('H').ffill()
Out[14]:
2016-01-01 00:00:00 -1.408484
2016-01-01 01:00:00 -1.408484
2016-01-01 02:00:00 -1.408484
2016-01-01 03:00:00 -1.408484
2016-01-01 04:00:00 -1.408484
2016-01-01 05:00:00 -1.408484
2016-01-01 06:00:00 -1.408484
2016-01-01 07:00:00 -1.408484
2016-01-01 08:00:00 -1.408484
2016-01-01 09:00:00 -1.408484
2016-01-01 10:00:00 -1.408484
2016-01-01 11:00:00 -1.408484
2016-01-01 12:00:00 -1.408484
2016-01-01 13:00:00 -1.408484
2016-01-01 14:00:00 -1.408484
2016-01-01 15:00:00 -1.408484
2016-01-01 16:00:00 -1.408484
2016-01-01 17:00:00 -1.408484
2016-01-01 18:00:00 -1.408484
2016-01-01 19:00:00 -1.408484
2016-01-01 20:00:00 -1.408484
2016-01-01 21:00:00 -1.408484
2016-01-01 22:00:00 -1.408484
2016-01-01 23:00:00 -1.408484
2016-01-02 00:00:00 -0.530784
2016-01-02 01:00:00 -0.530784
2016-01-02 02:00:00 -0.530784
2016-01-02 03:00:00 -0.530784
2016-01-02 04:00:00 -0.530784
2016-01-02 05:00:00 -0.530784
2016-12-29 19:00:00 -0.791816
2016-12-29 20:00:00 -0.791816
2016-12-29 21:00:00 -0.791816
2016-12-29 22:00:00 -0.791816
2016-12-29 23:00:00 -0.791816
2016-12-30 00:00:00 0.951799
2016-12-30 01:00:00 0.951799
2016-12-30 02:00:00 0.951799
2016-12-30 03:00:00 0.951799
2016-12-30 04:00:00 0.951799
2016-12-30 05:00:00 0.951799
2016-12-30 06:00:00 0.951799
2016-12-30 07:00:00 0.951799
2016-12-30 08:00:00 0.951799
2016-12-30 09:00:00 0.951799
2016-12-30 10:00:00 0.951799
2016-12-30 11:00:00 0.951799
2016-12-30 12:00:00 0.951799
2016-12-30 13:00:00 0.951799
2016-12-30 14:00:00 0.951799
2016-12-30 15:00:00 0.951799
2016-12-30 16:00:00 0.951799
2016-12-30 17:00:00 0.951799
2016-12-30 18:00:00 0.951799
2016-12-30 19:00:00 0.951799
2016-12-30 20:00:00 0.951799
2016-12-30 21:00:00 0.951799
2016-12-30 22:00:00 0.951799
2016-12-30 23:00:00 0.951799
2016-12-31 00:00:00 1.283303
Freq: H, Length: 8761, dtype: float64
# 后填充bfill,比如一号里的没有的数据是从1月2号采取过来的
s1.resample('H').bfill()
Out[15]:
2016-01-01 00:00:00 -1.408484
2016-01-01 01:00:00 -0.530784
2016-01-01 02:00:00 -0.530784
2016-01-01 03:00:00 -0.530784
2016-01-01 04:00:00 -0.530784
2016-01-01 05:00:00 -0.530784
2016-01-01 06:00:00 -0.530784
2016-01-01 07:00:00 -0.530784
2016-01-01 08:00:00 -0.530784
2016-01-01 09:00:00 -0.530784
2016-01-01 10:00:00 -0.530784
2016-01-01 11:00:00 -0.530784
2016-01-01 12:00:00 -0.530784
2016-01-01 13:00:00 -0.530784
2016-01-01 14:00:00 -0.530784
2016-01-01 15:00:00 -0.530784
2016-01-01 16:00:00 -0.530784
2016-01-01 17:00:00 -0.530784
2016-01-01 18:00:00 -0.530784
2016-01-01 19:00:00 -0.530784
2016-01-01 20:00:00 -0.530784
2016-01-01 21:00:00 -0.530784
2016-01-01 22:00:00 -0.530784
2016-01-01 23:00:00 -0.530784
2016-01-02 00:00:00 -0.530784
2016-01-02 01:00:00 0.659089
2016-01-02 02:00:00 0.659089
2016-01-02 03:00:00 0.659089
2016-01-02 04:00:00 0.659089
2016-01-02 05:00:00 0.659089
2016-12-29 19:00:00 0.951799
2016-12-29 20:00:00 0.951799
2016-12-29 21:00:00 0.951799
2016-12-29 22:00:00 0.951799
2016-12-29 23:00:00 0.951799
2016-12-30 00:00:00 0.951799
2016-12-30 01:00:00 1.283303
2016-12-30 02:00:00 1.283303
2016-12-30 03:00:00 1.283303
2016-12-30 04:00:00 1.283303
2016-12-30 05:00:00 1.283303
2016-12-30 06:00:00 1.283303
2016-12-30 07:00:00 1.283303
2016-12-30 08:00:00 1.283303
2016-12-30 09:00:00 1.283303
2016-12-30 10:00:00 1.283303
2016-12-30 11:00:00 1.283303
2016-12-30 12:00:00 1.283303
2016-12-30 13:00:00 1.283303
2016-12-30 14:00:00 1.283303
2016-12-30 15:00:00 1.283303
2016-12-30 16:00:00 1.283303
2016-12-30 17:00:00 1.283303
2016-12-30 18:00:00 1.283303
2016-12-30 19:00:00 1.283303
2016-12-30 20:00:00 1.283303
2016-12-30 21:00:00 1.283303
2016-12-30 22:00:00 1.283303
2016-12-30 23:00:00 1.283303
2016-12-31 00:00:00 1.283303
Freq: H, Length: 8761, dtype: float64
# 画图
# 新建一个时间序列
t_range = pd.date_range('2016-01-01', '2016-12-31', freq='H')
t_range
Out[17]:
DatetimeIndex(['2016-01-01 00:00:00', '2016-01-01 01:00:00',
'2016-01-01 02:00:00', '2016-01-01 03:00:00',
'2016-01-01 04:00:00', '2016-01-01 05:00:00',
'2016-01-01 06:00:00', '2016-01-01 07:00:00',
'2016-01-01 08:00:00', '2016-01-01 09:00:00',
...
'2016-12-30 15:00:00', '2016-12-30 16:00:00',
'2016-12-30 17:00:00', '2016-12-30 18:00:00',
'2016-12-30 19:00:00', '2016-12-30 20:00:00',
'2016-12-30 21:00:00', '2016-12-30 22:00:00',
'2016-12-30 23:00:00', '2016-12-31 00:00:00'],
dtype='datetime64[ns]', length=8761, freq='H')
# 建立一个DataFrame
stock_df = DataFrame(index=t_range)
# 加入两行,模拟股票
stock_df['BABA'] = np.random.randint(80, 100, size=len(t_range))
stock_df['TENCENT'] = np.random.randint(30, 50, size=len(t_range))
# 显示图片,下面
stock_df.plot()
Out[22]: <matplotlib.axes._subplots.AxesSubplot at 0x2259b52fa90>
# 数据过于密集,重新清洗,按周清洗
weekly_df = DataFrame()
# resample参数w表示周分类,再取平均值
weekly_df['BABA'] = stock_df['BABA'].resample('W').mean()
weekly_df['TENCENT'] = stock_df['TENCENT'].resample('W').mean()
weekly_df.head()
Out[31]:
BABA TENCENT
2016-01-03 89.250000 39.430556
2016-01-10 89.065476 38.595238
2016-01-17 89.363095 39.410714
2016-01-24 89.482143 38.857143
2016-01-31 89.869048 40.309524
# 显示
weekly_df.plot()
Out[32]: <matplotlib.axes._subplots.AxesSubplot at 0x2259bf84e48>