Python common date in the treatment and the corresponding tool and form pd

Date and time data types and tools

In python, as long as we will use datetime time and calendar module

datetime in milliseconds stores date and time

datetime.timedelta represents the time difference between two time objects

import datetime

delta = datetime.datetime(2011, 1, 7) - datetime.datetime(2008, 6, 24, 8, 15)

delta.days
delta.seconds

strat = datetime.datetime(2011, 1, 7)
# 等价
strat + datetime.timedelta(12)
strat + datetime.timedelta(days = 12)

Conversion between strings and datatime.datetime

stamp = datetime.datetime(2011, 1, 3)
stamp

str(stamp)

# 如果想要定义转换为 str 时的格式 
stamp.strftime("%Y-%m-%d")

Best Mode for converting the format of certain str datetime.datetime, strptime is resolved by a known date format,

value = '2011-01-03'
value

datetime.datetime.strptime(value, "%Y-%m-%d")

datestrs = ['7/6/2011', '8/6/2011']

[datetime.datetime.strptime(x, "%m/%d/%Y") for x in datestrs]

Custom written every time the date format is a very troublesome thing, especially for some common date format, this time we have to use it = dateutil that third-party packages inside parser.parse method

dateutil This package can resolve almost all human beings can understand date representation

from dateutil.parser import parse
parse("2011-01-03")
parse("2011/1/1")
parse("2011/01/01")
parse("Jan 31, 1997 10:45 PM")
# 在国际通用的格式里面 日通常胡出现在月的前面 这时候我们传入 dayfirst=True 即可 
parse("6/12/2011", dayfirst=True)
parse("6/12/2011")

pd to_datetime

datestrs = ["7/6/2011", "8/6/2011"]

# print(datestrs)

# to_datetime 方法可以解析多种不同的日期格式
idt = pd.to_datetime(datestrs)
print(idt)
# DatetimeIndex(['2011-07-06', '2011-08-06'], dtype='datetime64[ns]', freq=None)


# to_datetime 可以处理缺失值
# NaT (Not a time) 是 pd 中时间戳数据的 NA 值
idx = pd.to_datetime(datestrs + [None])

print(idx)

print(idx[2])

print(pd.isna(idx))
print(pd.isnull(idx))

pd is the most basic type of time series with time stamp indexed Series

	import pandas as pd
	import datetime
	import numpy as np
	
	dates = [
	    datetime.datetime(2011, 1, 2),
	    datetime.datetime(2011, 1, 5),
	    datetime.datetime(2011, 1, 7),
	    datetime.datetime(2011, 1, 8),
	    datetime.datetime(2011, 1, 10),
	    datetime.datetime(2011, 1, 12),
	
	]
	
	ts = pd.Series(np.random.randn(6), index=dates)
	
	print(ts)
	
	print(type(ts))
	
	print(ts.index)
	
	# 我们并没有必要显式创建一个 Series 当我们创建一个带有 DatetimeIndex 的Series 的时候
	# pd 就会知道这个对象是一个时间序列
	
	# 跟其他的Series一样不同索引序列之间的算术运算会自动按照日期对齐
	res = ts + ts[::2]
	
	print(ts[::2])
	
	print(res)
	
	# pd 以 np 的 datetime64 的数据类型以纳秒形式存储时间戳
	print(ts.index.dtype)
	
	# DatetimeIndex 中的每个标量是 pd 的 Timestamp 对象
	# 该类型的对象可以随时转化为 datetime 对象 还可以存储频率信息
	stamp = ts.index[0]
	
	print(stamp, type(stamp))
	
	# TimeSeries 是 Series 的一个子类 在索引以及信息选取上行为是一样的
	stamp = ts.index[2]
	
	print(stamp)
	
	print(ts[stamp])
	
	# 一种更为方便的 传入一个可以解释为 日期的字符串
	print(ts['1/10/2011'])
	
	print(ts['20110110'])
	
	# !!! 对于较长的时间序列 只需要传入"年" 或 "年月" 就可以
	longer_ts = pd.Series(np.random.randn(1000), index=pd.date_range("1/1/2000", periods=1000))
	
	print(longer_ts)
	
	#  通过日期进行切片只对规则的 Series 有效
	print(ts[datetime.datetime(2011, 1, 7):])
	
	# 由于大部分时间序列都是按照时间先后顺序进行排列的
	# 我们也可以用不存在该时间序列中的时间戳对其进行排序 (即范围查询)
	print(ts)
	
	print(ts['1/6/2011': "1/11/2011"])
	# 与之等价的实例方法
	# 早于 after 时间点的切片
	print(ts.truncate(after='1/9/2011'))
	
	# 以上操作对 df 也有效
	dates = pd.date_range('1/1/2000', periods=100, freq='W-WED')
	
	print(dates)
	
	longer_ts = pd.DataFrame(np.random.randn(100, 4),
	                         index=dates,
	                         columns=["Colorado", 'Texas', "New York", "Ohio"]
	                         )
	
	print(longer_ts)
	
	# print(longer_ts.ix['5-2001'])  # 新版本已弃用
	
	print(longer_ts.loc['5-2001'])  # yes

Time series with duplicate index

	# 在某些场景中 可能会遇到多个观测数据落在同一个时间点上的情况
	
	import pandas as pd
	import numpy as np
	
	
	dates = pd.DatetimeIndex(['1/1/2000', '1/2/2000', '1/2/2000', '1/2/2000', '1/3/2000'])
	
	# print(dates)
	
	dup_ts = pd.Series(np.arange(5), index=dates)
	
	# print(dup_ts)
	
	# 检查索引的 is_unique 属性 判断是否唯一
	flag = dup_ts.is_unique
	
	# print(flag)
	
	# 索引其中的不重复值
	ret1 = dup_ts['1/3/2000']
	
	# print(ret1)
	
	# 索引其中的重复值 显示多个值
	ret2 = dup_ts['1/2/2000']
	
	# print(ret2)
	
	# 假设相对具有非唯一时间戳的数据进行聚合
	# 一是使用 groupby 并且传入 level = 0
	grouped = dup_ts.groupby(level=0)
	
	print(grouped)
	
	print(grouped.count())
	
	print(grouped.mean())

Guess you like

Origin blog.csdn.net/Enjolras_fuu/article/details/90708488