53 pandas 时间序列-时区处理(tz_localize本地化tz_conver转换)(tcy)

时区处理 2019/1/16

1.pytz和dateutil库-对时区的支持:

    1) DST 夏令时
    2) UTC 协调世界时
    3) 时区是以UTC的偏移量的形式表示的

# 1.1查看时区名称
import pytz,dateutil
pytz.common_timezones
'''[...,'Asia/Shanghai',...] '''#Dateutil时区字符串与pytz 时区区别开始dateutil/

# 1.2构造时区对象
tz_pytz =pytz.timezone('Asia/Shanghai') #<DstTzInfo 'Asia/Shanghai' LMT+8:06:00 STD>
tz_dateutil = dateutil.tz.gettz('Asia/Shanghai')# tzfile('PRC')
tz_utc=dateutil.tz.tzutc() #构造UTC对象 # tzutc()

# 1.3警告
# 跨时区库版本的时区定义可能不被视为相等。
# 用一个版本本地化用不同版本进行操作的存储数据时,可能会导致问题。

2.DatetimeIndex

# 实例2.1:设置时区
rng = pd.date_range('3/6/2019', periods=2, freq='D')
rng.tz is None#True pandas对象默认无时区

rng = pd.date_range('3/6/2019', periods=2,tz='Asia/Shanghai') #提供时区pytz
rng.tz # <DstTzInfo 'Asia/Shanghai' LMT+8:06:00 STD>
rng_dateutil = pd.date_range('3/6/2019', periods=2, freq='D',tz='Asia/Shanghai') #提供时区dateutil
rng_dateutil.tz#<DstTzInfo 'Asia/Shanghai' LMT+8:06:00 STD>

rng_utc = pd.date_range('3/6/2019', periods=10, freq='D', tz=dateutil.tz.tzutc()) # dateutil - utc special case
rng_utc = pd.date_range('3/6/2019', periods=10, freq='D', tz='UTC') #等价
rng_utc.tz # tzutc()

rng1= pd.date_range('3/6/2019 00:00', periods=10, freq='D',tz=tz_pytz)
rng2 = pd.date_range('3/6/2019 00:00', periods=10, freq='D',tz=tz_dateutil)
rng1==rng2 #当为上海时不等;当为'Europe/London'为[...,True,...]相等

# 2.2本地化和转换

# 2.2本地化和转换
# Series/DataFrame/DatetimeIndex的 tz_localize(),tz_convert()方法

# 实例2.21:无时区转本地化UTC时区
rng = pd.date_range('1/16/2019 15:14:30', periods=3, freq='D', )
ts = pd.Series([1,2,3],index= rng)
rng_utc = pd.date_range('1/16/2019 15:14:30', periods=3, tz='UTC')
ts1 = pd.Series([1,2,3],index= rng_utc)#显式构造时区对象
ts_utc = ts.tz_localize('UTC')

ts
''''''''' 
2019-01-16 15:14:30 1
2019-01-17 15:14:30 2
2019-01-18 15:14:30 3
Freq: D, dtype: int64
'''
ts_utc
ts1
'''''''''
2019-01-16 15:14:30+00:00 1
2019-01-17 15:14:30+00:00 2
2019-01-18 15:14:30+00:00 3
Freq: D, dtype: int64
'''
ts_utc.index
ts1.index
'''''''''
DatetimeIndex(['2019-01-16 15:14:30+00:00', '2019-01-17 15:14:30+00:00',
'2019-01-18 15:14:30+00:00'],dtype='datetime64[ns, UTC]', freq='D')
'''
# 实例2.22:将本地UTC时区转为上海时区
ts_shanghai=ts_utc.tz_convert('Asia/Shanghai')
#
2019-01-16 23:14:30+08:00 1
2019-01-17 23:14:30+08:00 2
2019-01-18 23:14:30+08:00 3
Freq: D, dtype: int64

# 实例2.23:将上海时区转UTC时区
ts_utc.tz_convert('Asia/Shanghai').tz_convert('UTC')
#
2019-01-16 15:14:30+00:00 1
2019-01-17 15:14:30+00:00 2
2019-01-18 15:14:30+00:00 3
Freq: D, dtype: int64

3.时间戳

3.1.说明
# 将时区直接传递给datetime.datetime构造函数不正确,用时区上的localize方法进行本地化。
# 时间戳都以UTC格式存储
# DatetimeIndex带有时区的标量值 将其字段(日,小时,分钟)本地化为时区。

3.2实例

# 实例3.21:构造
d1=datetime.datetime(2019, 1, 16,16,14, tzinfo=pytz.timezone('Asia/Shanghai'))#添加时区
#datetime.datetime(2019, 1, 16, 16, 14, tzinfo=<DstTzInfo 'Asia/Shanghai' LMT+8:06:00 STD>)

d2=pd.Timestamp('2019-1-16 1614',tz=pytz.timezone('Asia/Shanghai'))#添加时区
d2=pd.Timestamp('2019-1-16 1614',tz='Asia/Shanghai') #添加时区 等价
#Timestamp('2019-01-16 16:14:00+0800', tz='Asia/Shanghai')

# 实例3.22:具有相同UTC值的时间戳被认为是相等的,即使它们位于不同的时区
idx1= ts1.index.tz_convert('US/Eastern')
idx2= ts1.index.tz_convert('Europe/Berlin')

idx1[2]#Timestamp('2019-01-18 10:14:30-0500', tz='US/Eastern', freq='D')
idx2[2]#Timestamp('2019-01-18 16:14:30+0100', tz='Europe/Berlin', freq='D')

idx1[2]==idx2[2]#True

# 实例3.23Timestamp本地化及转换到其他时区

# 实例3.231:本地化
d0=pd.Timestamp('2019-1-16 1614')

d0.tz_localize('UTC')
#Timestamp('2019-01-16 16:14:00+0000', tz='UTC')
d0.tz_localize('Asia/Shanghai')
#Timestamp('2019-01-16 16:14:00+0800', tz='Asia/Shanghai')

# 实例3.232:转换到其他时区
d0.tz_localize('Asia/Shanghai').tz_convert('UTC')
#Timestamp('2019-01-16 08:14:00+0000', tz='UTC')
idx1[2].tz_convert('Europe/Berlin')
# Timestamp('2019-01-18 16:14:30+0100', tz='Europe/Berlin')

#实例:3.24
d0.value#1547655240000000000 d0为有时区的时间戳,值为自1970-1-1算起的纳秒数

4.删除时区

4.1.DatetimeIndex删除时区tz
    tz_localize(None)将删除持有当地时间表示的时区
    tz_convert(None)转换为UTC时间后将删除时区

4.2实例

idx = pd.DatetimeIndex(start='2019-08-01 09:00', freq='H', periods=2, tz='US/Eastern')
'''''''''
DatetimeIndex(['2019-08-01 09:00:00-04:00', '2019-08-01 10:00:00-04:00'],
dtype='datetime64[ns, US/Eastern]', freq='H')
'''
idx.tz_localize(None)
'''''''''
DatetimeIndex(['2019-08-01 09:00:00', '2019-08-01 10:00:00'],
dtype='datetime64[ns]', freq='H')
'''
idx.tz_convert(None)
idx.tz_convert('UCT').tz_localize(None)#等价
'''''''''
DatetimeIndex(['2019-08-01 13:00:00', '2019-08-01 14:00:00' ],
dtype='datetime64[ns]', freq='H')
'''

5.tz_localize本地化时的模糊时间-夏令时

# 当存在重复时,本地化无法确定DST和非DST小时数。
# ambiguous='infer'到tz_localize将尝试确定正确的偏移量

实例5.

# 实例5.1:ambiguous='infer'自动推断
idx = pd.DatetimeIndex(['11/06/2019 00:00', '11/06/2019 01:00', '11/06/2019 01:00', '11/06/2019 02:00',])
idx.tz_localize('US/Eastern')#包含不明确的时间有可能失败,本次能够显示
idx.tz_localize('US/Eastern', ambiguous='infer')#推断模糊时间

# 实例5.2:ambiguous=[bool]自动推断
# True表示DST夏令时,False表示非DST
rng_hourly_dst = np.array([1, 1, 0, 0])
idx.tz_localize('US/Eastern', ambiguous=rng_hourly_dst).tolist()

idx.tz_localize('US/Eastern', ambiguous='NaT').tolist()
'''''''''
DatetimeIndex(['2019-11-06 00:00:00-05:00', '2019-11-06 01:00:00-05:00',
'2019-11-06 01:00:00-05:00', '2019-11-06 02:00:00-05:00'],
dtype='datetime64[ns, US/Eastern]', freq=None)
'''
6.转换astype(...)为时区 
# Dtypes有无时区时显示:datetime64[ns]/datetime64[ns, tz]

ts = pd.Series(pd.date_range('20190101',periods=3))
ts.astype('datetime64[ns]')# make an ts tz naive
'''''''''
0 2019-01-01
1 2019-01-02
2 2019-01-03
dtype: datetime64[ns]
'''
ts.astype('datetime64[ns, US/Eastern]')# localize and convert a naive timezone
'''''''''
0 2018-12-31 19:00:00-05:00
1 2019-01-01 19:00:00-05:00
2 2019-01-02 19:00:00-05:00
dtype: datetime64[ns, US/Eastern]
'''
# convert to a new timezone
ts.astype('datetime64[ns, CET]')
'''''''''
0 2019-01-01 01:00:00+01:00
1 2019-01-02 01:00:00+01:00
2 2019-01-03 01:00:00+01:00
dtype: datetime64[ns, CET]
'''
ts.values#一旦转换为NumPy数组,这些将失去tz时区
'''''''''
array(['2019-01-01T00:00:00.000000000', '2019-01-02T00:00:00.000000000',
'2019-01-03T00:00:00.000000000'], dtype='datetime64[ns]')
''' 

 

猜你喜欢

转载自blog.csdn.net/tcy23456/article/details/86513728
53
今日推荐