业务中经常需要去分析时间序列相关的数据,可能会用到年,月,日,小时,星期,节假日等等,尤其是在零售和电商行业中,可能需要分析日销售的高峰时段等等,所以能够有这样一张多维度细颗粒的时间表其实用起来是非常简便的,话不多说,直接贴代码
import pandas as pd
from datetime import datetime,timedelta
from chinese_calendar import is_holiday,is_workday
starttime = '2019-01-01'
n = 730
date_list = []
year_list = []
month_list = []
day_list = []
weeknum_list = []
week_list = []
is_week = []
quarter = []
is_work = []
holiday = []
if type(starttime) == type('rrr'):
a = datetime.strptime(starttime,'%Y-%m-%d').date()
else:
a = starttime
for i in range(n):
day_ = a+timedelta(days=i)
date_list.append(day_.strftime("%Y-%m-%d"))
year_list.append(day_.year)
month_list.append(day_.month)
day_list.append(day_.day)
weeknum_list.append(day_.strftime("%A"))
week_list.append("|".join('%s' %id for id in list(day_.isocalendar())))
quarter.append(int((int(day_.strftime("%m"))-1)/3+1))
is_work.append(is_workday(day_))
holiday.append(is_holiday(day_))
result = {"date":date_list,
"year":year_list,
"month":month_list,
"day":day_list,
"weeknum":weeknum_list,
"week":week_list,
"quarter":quarter,
"is_work":is_work,
"is_holiday":holiday
}
b = pd.DataFrame(result)
b['value']=1
c = ["00","01","02","03","04","05","06","07","08","09","10","11","12","13","14","15","16","17","18","19","20","21","22","23"]
d = pd.DataFrame(c,columns=['hour'])
d['value']=1
e = pd.merge(b,d,on='value')
def fun(x):
if x == '00' or x == "01" or x == "02":
return "拂晓"
elif x == '03' or x == "04" or x == "05":
return "黎明"
elif x == '06' or x == "07" or x == "08":
return "清晨"
elif x == '09' or x == "10" or x == "11":
return "上午"
elif x == '12' or x == "13" or x == "14":
return "中午"
elif x == '15' or x == "16" or x == "17":
return "下午"
elif x == '18' or x == "19" or x == "20":
return "晚上"
else:
return "深夜"
ty = e['hour'].apply(lambda x: fun(x))
e['type']=ty
print(e.head())
数据可以存进数据库里,用起来也方便!如下:
| dim_time | CREATE TABLE `dim_time` (
`date` varchar(10) DEFAULT NULL,
`year` varchar(4) DEFAULT NULL,
`month` varchar(2) DEFAULT NULL,
`day` varchar(2) DEFAULT NULL,
`weeknum` varchar(10) DEFAULT NULL COMMENT '星期几',
`week` varchar(10) DEFAULT NULL COMMENT '年|第几周|第几天',
`quarter` int(11) DEFAULT NULL COMMENT '季度',
`is_work` int(255) DEFAULT NULL,
`is_holiday` int(255) DEFAULT NULL,
`value` int(11) DEFAULT NULL COMMENT '标识字段 无意义',
`hour` varchar(2) DEFAULT NULL,
`type` varchar(255) DEFAULT NULL
) ENGINE=InnoDB DEFAULT CHARSET=utf8 COMMENT='生成时间维度表,