资金流入流出预测-时间序列

1.基于周期因子的时间序列预测

  1. 提取时间序列的周期周期性特征进行预测
    • 确定周期、 计算周期因子
      方法1:除以周均值,按列取中位数
      方法2:季节指数计算方式,获得每个工作日或周末均值,再除以整体均值
    • 计算base
    • 预测=base*周期因子
      ** 观察序列,当序列呈现周期性变化的时候,可以使用周期因子法作为baseline **
  2. 如何预测下个月每天的情况
    • 获得每日(1-31号)的均值
    • 统计(周一到周日)每日的频次
    • 基于星期周期因子获取加权均值
    • 根据因子和每日均值预测

2.具体操作

1. 以星期为周期的中位数预测

  1. 选取时段

    • 训练集时段:2014-03-01~2014-08-03
    • 测试数据集:2014-08-04~2014-08-31
  2. 导入数据包

import pandas as pd
import numpy as np
import sklearn as skr
import datetime
import matplotlib as plt
import seaborn as sns
from dateutil import relativedelta
  1. 读取数据
def load_data(file_path):
    data_balance = pd.read_csv(file_path)
    data_balance = add_timestamp(data_balance,"report_date")
    return data_balance.reset_index(drop=True)
#给数据集添加时间戳
def add_timestamp(data,date):
    data_balance = data.copy()
    data_balance["date"] = pd.to_datetime(data_balance[date],format="%Y%m%d")
    data_balance["day"] = data_balance["date"].dt.day
    data_balance["month"]= data_balance["date"].dt.month
    data_balance["year"] = data_balance["date"].dt.year
    data_balance["week"] = data_balance["date"].dt.week
    data_balance["weekday"] = data_balance["date"].dt.weekday
    return data_balance.reset_index(drop=True)
#按照日期统计申购/赎回金额总量
def total_amt(data,date):
    data_temp = data.copy()
    data_temp = data.groupby("date",as_index=False)["total_purchase_amt","total_redeem_amt"].sum()
    return data_temp[data_temp["date"]>=date].reset_index(drop=True)
#生成测试数据
def generate_data(data,start_date,end_date):
    total_balance = data.copy()
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    test_data=[]
    while start_date!=end_date:
        temp = [start_date,np.nan,np.nan]
        test_data.append(temp)
        start_date += datetime.timedelta(days = 1)
    test_data = pd.DataFrame(test_data)
    test_data.columns = total_balance.columns
    total_balance = pd.concat([total_balance,test_data],axis=0)
    return total_balance.reset_index(drop=True)
    
user_balance_file_path = r"./Data/user_balance_table.csv"
user_info_file_path = r"./Data/user_profile_table.csv"
data_balance = load_data(user_balance_file_path)
total_balance = total_amt(data_balance,"2014-03-01")
total_balance = generate_data(total_balance,"2014-08-04","2014-8-31")
total_balance = add_timestamp(total_balance,"date")

在这里插入图片描述

#定义时间序列规则预测结果的方法
def generate_base(data,month_index):
    #选中固定时间短的数据集
    total_balance = data.copy()
    total_balance = total_balance[["date","total_purchase_amt","total_redeem_amt"]]
    total_balance = total_balance[(total_balance["date"]>="2014-03-01")&(total_balance["date"]<pd.Timestamp(2014,month_index,1))]
    #加入时间戳
    total_balance["day"] = total_balance["date"].dt.day
    total_balance["month"] = total_balance["date"].dt.month
    total_balance["week"] = total_balance["date"].dt.week
    total_balance["weekday"] = total_balance["date"].dt.weekday
    #统计每日因子,按照星期聚合的均值/所有数据的均值
    mean_of_each_weekday = total_balance[["weekday","total_purchase_amt","total_redeem_amt"]].groupby("weekday",as_index=False).mean()
    for name in ["total_purchase_amt","total_redeem_amt"]:
        mean_of_each_weekday = mean_of_each_weekday.rename(columns={
    
    name:name+"_weekdaymean"})
    mean_of_each_weekday["total_purchase_amt_weekdaymean"] /=np.mean(total_balance["total_purchase_amt"])
    mean_of_each_weekday["total_redeem_amt_weekdaymean"] /=np.mean(total_balance["total_redeem_amt"])
    #将统计结果左联到原数据集中
    total_balance = pd.merge(total_balance,mean_of_each_weekday,on="weekday",how="left")
    #统计1-31号中星期出现的频率
    weekday_count = total_balance[["date","weekday","day"]].groupby(["day","weekday"],as_index=False).count()
    weekday_count = pd.merge(weekday_count,mean_of_each_weekday,on="weekday")
    #根据频率对每日因子加权,获得日期因子
    weekday_count["total_purchase_amt_weekdaymean"] *= weekday_count["date"]/len(np.unique(total_balance["month"]))
    weekday_count["total_redeem_amt_weekdaymean"]*=weekday_count["date"]/len(np.unique(total_balance["month"]))
    day_rate = weekday_count.drop(["weekday","date"],axis=1).groupby("day",as_index=False).sum()
    #将测试数据集中所有日期的均值剔除日期残差得到base
    day_mean = total_balance[["day","total_purchase_amt","total_redeem_amt"]].groupby("day",as_index=False).mean()
    day_pre = pd.merge(day_mean,day_rate,on="day",how="left")
    day_pre["total_purchase_amt"] /=day_pre["total_purchase_amt_weekdaymean"]
    day_pre["total_purchase_amt"] /=day_pre["total_redeem_amt_weekdaymean"]
    #生成测试数据集
    for index,row in day_pre.iterrows():
        if month_index in (2,4,6,9) and row["day"]==31:
            break
        day_pre.loc[index, 'date'] = datetime.datetime(2014, month_index, int(row['day']))
    #基于base和每日因子计算最终的预测结果
    day_pre["weekday"] = day_pre["date"].dt.weekday
    day_pre = day_pre[["date","weekday","total_purchase_amt","total_redeem_amt"]]
    day_pre = pd.merge(day_pre,mean_of_each_weekday,on="weekday")
    day_pre["total_purchase_amt"]*=day_pre["total_purchase_amt_weekdaymean"]
    day_pre["total_purchase_amt"]*=day_pre["total_redeem_amt_weekdaymean"]
    day_pre = day_pre.sort_values("date")[["date","total_purchase_amt","total_redeem_amt"]]
    return day_pre

猜你喜欢

转载自blog.csdn.net/ava_zhang2017/article/details/108166568