06时间序列笔记

版权声明:本文为博主原创文章,未经博主允许不得转载。 https://blog.csdn.net/fantacy10000/article/details/82110152
#一.日期和时间数据类型及工具
#常用模块:datetime、time以及calendar模块
#常用数据类型:datetime.datetime是用的最多的数据类型

from datetime import datetime
now=datetime.now()
print(now)
print(now.year,now.month,now.day)

#1.datetime以毫秒形式存储日期和时间
create_time=datetime(2008,8,8,20,12,21)
print(create_time)

#2.date.timedelta表示两个datetime对象之间的时间差,可与datetime对象相加
from datetime import timedelta
create_time=datetime(2008,8,8,20,12,21)
new_time=create_time+timedelta(12)
print(new_time)

dalta=datetime(2011,1,7)-datetime(2008,6,24,8,15)
print(dalta)
#附:datetime模块中的数据类型
#1.date:以公历形式存储日历日期(年月日)
#2.time:将时间存储为时、分、秒、毫秒
#3.datetime:存储日期和时间
2018-08-26 20:44:37.765000
2018 8 26
2008-08-08 20:12:21
2008-08-20 20:12:21
926 days, 15:45:00
#3.字符串和datetime的相互转换
#3.1.datetime对象转为字符串(str和strftime方法)
stamp=datetime(2010,1,2)
print(str(stamp))
print(stamp.strftime('%Y-%m-%d'))
#3.2.字符串转datetime对象(strptime方法)
value='2011-2-4'
print(datetime.strptime(value,'%Y-%m-%d'))
#批量处理
datetstr=['2010-3-9','2014-9-5']
date=[datetime.strptime(x,'%Y-%m-%d') for x in datetstr]
print(date)
#3.3.pandas的to_datetime方法可以解析多种不同的日期表示形式
import pandas as pd
dateset=['7/6/2011','3/4/1999']
tran=pd.to_datetime(dateset)
#附:NaT是pandas中时间戳数据的NA值
2010-01-02 00:00:00
2010-01-02
2011-02-04 00:00:00
[datetime.datetime(2010, 3, 9, 0, 0), datetime.datetime(2014, 9, 5, 0, 0)]
#二.时间序列基础
#pandas最基本的时间序列类型就是以时间戳为索引的Series
from datetime import datetime
import pandas as pd
import numpy as np
dates=[datetime(2011,1,1),datetime(2011,1,2),datetime(2011,1,3)]
#dates=pd.DatetimeIndex(['1/1/2000','1/2/2000','1/2/2000','/3/2000'])
ts=pd.Series(np.random.randn(3),index=dates)
print(ts)
2011-01-01   -1.663693
2011-01-02    0.738571
2011-01-03    0.740516
dtype: float64
#1.索引、选取、子集构造
#由于TimeSeries是Series的一个子类,所以两者在这些方面行为一样
dates=[datetime(2011,1,1),datetime(2011,1,2),datetime(2011,1,3)]
ts=pd.Series(np.random.randn(3),index=dates)
print(ts)
print(ts['2011-1-1'])#索引
print(ts[datetime(2011,1,2):])#选取
dates=pd.date_range('1/1/2000',periods=20,freq='W-WED')#子集构造
print(dates)
2011-01-01    1.232802
2011-01-02   -1.508113
2011-01-03   -1.987884
dtype: float64
1.23280185363
2011-01-02   -1.508113
2011-01-03   -1.987884
dtype: float64
DatetimeIndex(['2000-01-05', '2000-01-12', '2000-01-19', '2000-01-26',
               '2000-02-02', '2000-02-09', '2000-02-16', '2000-02-23',
               '2000-03-01', '2000-03-08', '2000-03-15', '2000-03-22',
               '2000-03-29', '2000-04-05', '2000-04-12', '2000-04-19',
               '2000-04-26', '2000-05-03', '2000-05-10', '2000-05-17'],
              dtype='datetime64[ns]', freq='W-WED')
#2.带有重复索引的时间序列
dates=pd.DatetimeIndex(['1/1/2000','1/2/2000','1/2/2000','/3/2000'])
print(dates)
tf=pd.Series(np.random.randn(4),index=dates)
print(tf)
print(tf.index.is_unique)
DatetimeIndex(['2000-01-01', '2000-01-02', '2000-01-02', '2000-03-01'], dtype='datetime64[ns]', freq=None)
2000-01-01   -0.071480
2000-01-02   -0.020102
2000-01-02    0.281841
2000-03-01    1.472859
dtype: float64
False
#三.日期的范围、频率以及移动
#1.pandas中的时间序列一般是没有固定频率的,因而当需要固定频率数据时,可以用resample进行填充补齐,值为NAN
dates=[datetime(2011,1,1),datetime(2011,1,3),datetime(2011,1,6)]
ts=pd.Series(np.random.randn(3),index=dates)
print(ts)
td=ts.resample('D')
print(td)#以天为频率进行填充
2011-01-01   -0.313916
2011-01-03   -0.923299
2011-01-06   -0.834299
dtype: float64
DatetimeIndexResampler [freq=<Day>, axis=0, closed=left, label=left, convention=start, base=0]
#2.生成日期范围
#pandas.date_range可用于生成指定长度的DatetimeIndex
index1=pd.date_range('4/1/2011','4/10/2011')
print(index1)

index2=pd.date_range(start='4/1/2011',periods=10)
index3=pd.date_range(end='4/20/2011',periods=5)
print(index2)
print(index3)
#频率默认是天为单位,可以传入‘BM’作为以月为频率
DatetimeIndex(['2011-04-01', '2011-04-02', '2011-04-03', '2011-04-04',
               '2011-04-05', '2011-04-06', '2011-04-07', '2011-04-08',
               '2011-04-09', '2011-04-10'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2011-04-01', '2011-04-02', '2011-04-03', '2011-04-04',
               '2011-04-05', '2011-04-06', '2011-04-07', '2011-04-08',
               '2011-04-09', '2011-04-10'],
              dtype='datetime64[ns]', freq='D')
DatetimeIndex(['2011-04-16', '2011-04-17', '2011-04-18', '2011-04-19',
               '2011-04-20'],
              dtype='datetime64[ns]', freq='D')
#3.频率和日期偏移量
pd.date_range('1/1/2011','1/3/2011',freq='4h')
pd.date_range('1/1/2011','1/3/2011',freq='1h30min')
#时间序列的基础频率表(P314-315)
DatetimeIndex(['2011-01-01 00:00:00', '2011-01-01 01:30:00',
               '2011-01-01 03:00:00', '2011-01-01 04:30:00',
               '2011-01-01 06:00:00', '2011-01-01 07:30:00',
               '2011-01-01 09:00:00', '2011-01-01 10:30:00',
               '2011-01-01 12:00:00', '2011-01-01 13:30:00',
               '2011-01-01 15:00:00', '2011-01-01 16:30:00',
               '2011-01-01 18:00:00', '2011-01-01 19:30:00',
               '2011-01-01 21:00:00', '2011-01-01 22:30:00',
               '2011-01-02 00:00:00', '2011-01-02 01:30:00',
               '2011-01-02 03:00:00', '2011-01-02 04:30:00',
               '2011-01-02 06:00:00', '2011-01-02 07:30:00',
               '2011-01-02 09:00:00', '2011-01-02 10:30:00',
               '2011-01-02 12:00:00', '2011-01-02 13:30:00',
               '2011-01-02 15:00:00', '2011-01-02 16:30:00',
               '2011-01-02 18:00:00', '2011-01-02 19:30:00',
               '2011-01-02 21:00:00', '2011-01-02 22:30:00',
               '2011-01-03 00:00:00'],
              dtype='datetime64[ns]', freq='90T')
#4.移动数据(超期和滞后)**
#(1)使用Series和DataFrame都有的shift方法进行单纯的值移动(索引值保持不变)
dates=[datetime(2011,1,1),datetime(2011,1,3),datetime(2011,1,6)]
ts=pd.Series(np.random.randn(3),index=dates)
print(ts)
print(ts.shift(2))
#(2)使用shift方法,但将已知频率传入,以实现索引值的移动(值保持不变)
print(ts.shift(2,freq='M'))
2011-01-01    0.936540
2011-01-03   -0.102222
2011-01-06   -1.902179
dtype: float64
2011-01-01        NaN
2011-01-03        NaN
2011-01-06    0.93654
dtype: float64
2011-02-28    0.936540
2011-02-28   -0.102222
2011-02-28   -1.902179
dtype: float64
#四.时区处理
#五.时间序列绘图
#当对DataFrame调用plot时,所有时间序列都会被绘制在一个subplot上,并带有图例。
#对其中单列数据调用plot则生成一张简单的图表
#移动窗口函数P339

猜你喜欢

转载自blog.csdn.net/fantacy10000/article/details/82110152