资金流入流出预测-时间序列

1.基于周期因子的时间序列预测

提取时间序列的周期周期性特征进行预测
- 确定周期、计算周期因子
  方法1:除以周均值，按列取中位数
  方法2:季节指数计算方式，获得每个工作日或周末均值，再除以整体均值
- 计算base
- 预测=base*周期因子
  ** 观察序列，当序列呈现周期性变化的时候，可以使用周期因子法作为baseline **
如何预测下个月每天的情况
- 获得每日（1-31号）的均值
- 统计（周一到周日）每日的频次
- 基于星期周期因子获取加权均值
- 根据因子和每日均值预测

2.具体操作

1. 以星期为周期的中位数预测

选取时段
- 训练集时段：2014-03-01～2014-08-03
- 测试数据集：2014-08-04～2014-08-31
导入数据包

import pandas as pd
import numpy as np
import sklearn as skr
import datetime
import matplotlib as plt
import seaborn as sns
from dateutil import relativedelta

读取数据

def load_data(file_path):
    data_balance = pd.read_csv(file_path)
    data_balance = add_timestamp(data_balance,"report_date")
    return data_balance.reset_index(drop=True)
#给数据集添加时间戳
def add_timestamp(data,date):
    data_balance = data.copy()
    data_balance["date"] = pd.to_datetime(data_balance[date],format="%Y%m%d")
    data_balance["day"] = data_balance["date"].dt.day
    data_balance["month"]= data_balance["date"].dt.month
    data_balance["year"] = data_balance["date"].dt.year
    data_balance["week"] = data_balance["date"].dt.week
    data_balance["weekday"] = data_balance["date"].dt.weekday
    return data_balance.reset_index(drop=True)
#按照日期统计申购/赎回金额总量
def total_amt(data,date):
    data_temp = data.copy()
    data_temp = data.groupby("date",as_index=False)["total_purchase_amt","total_redeem_amt"].sum()
    return data_temp[data_temp["date"]>=date].reset_index(drop=True)
#生成测试数据
def generate_data(data,start_date,end_date):
    total_balance = data.copy()
    start_date = pd.to_datetime(start_date)
    end_date = pd.to_datetime(end_date)
    test_data=[]
    while start_date!=end_date:
        temp = [start_date,np.nan,np.nan]
        test_data.append(temp)
        start_date += datetime.timedelta(days = 1)
    test_data = pd.DataFrame(test_data)
    test_data.columns = total_balance.columns
    total_balance = pd.concat([total_balance,test_data],axis=0)
    return total_balance.reset_index(drop=True)
    
user_balance_file_path = r"./Data/user_balance_table.csv"
user_info_file_path = r"./Data/user_profile_table.csv"
data_balance = load_data(user_balance_file_path)
total_balance = total_amt(data_balance,"2014-03-01")
total_balance = generate_data(total_balance,"2014-08-04","2014-8-31")
total_balance = add_timestamp(total_balance,"date")

在这里插入图片描述

#定义时间序列规则预测结果的方法
def generate_base(data,month_index):
    #选中固定时间短的数据集
    total_balance = data.copy()
    total_balance = total_balance[["date","total_purchase_amt","total_redeem_amt"]]
    total_balance = total_balance[(total_balance["date"]>="2014-03-01")&(total_balance["date"]<pd.Timestamp(2014,month_index,1))]
    #加入时间戳
    total_balance["day"] = total_balance["date"].dt.day
    total_balance["month"] = total_balance["date"].dt.month
    total_balance["week"] = total_balance["date"].dt.week
    total_balance["weekday"] = total_balance["date"].dt.weekday
    #统计每日因子,按照星期聚合的均值/所有数据的均值
    mean_of_each_weekday = total_balance[["weekday","total_purchase_amt","total_redeem_amt"]].groupby("weekday",as_index=False).mean()
    for name in ["total_purchase_amt","total_redeem_amt"]:
        mean_of_each_weekday = mean_of_each_weekday.rename(columns={
    
    name:name+"_weekdaymean"})
    mean_of_each_weekday["total_purchase_amt_weekdaymean"] /=np.mean(total_balance["total_purchase_amt"])
    mean_of_each_weekday["total_redeem_amt_weekdaymean"] /=np.mean(total_balance["total_redeem_amt"])
    #将统计结果左联到原数据集中
    total_balance = pd.merge(total_balance,mean_of_each_weekday,on="weekday",how="left")
    #统计1-31号中星期出现的频率
    weekday_count = total_balance[["date","weekday","day"]].groupby(["day","weekday"],as_index=False).count()
    weekday_count = pd.merge(weekday_count,mean_of_each_weekday,on="weekday")
    #根据频率对每日因子加权，获得日期因子
    weekday_count["total_purchase_amt_weekdaymean"] *= weekday_count["date"]/len(np.unique(total_balance["month"]))
    weekday_count["total_redeem_amt_weekdaymean"]*=weekday_count["date"]/len(np.unique(total_balance["month"]))
    day_rate = weekday_count.drop(["weekday","date"],axis=1).groupby("day",as_index=False).sum()
    #将测试数据集中所有日期的均值剔除日期残差得到base
    day_mean = total_balance[["day","total_purchase_amt","total_redeem_amt"]].groupby("day",as_index=False).mean()
    day_pre = pd.merge(day_mean,day_rate,on="day",how="left")
    day_pre["total_purchase_amt"] /=day_pre["total_purchase_amt_weekdaymean"]
    day_pre["total_purchase_amt"] /=day_pre["total_redeem_amt_weekdaymean"]
    #生成测试数据集
    for index,row in day_pre.iterrows():
        if month_index in (2,4,6,9) and row["day"]==31:
            break
        day_pre.loc[index, 'date'] = datetime.datetime(2014, month_index, int(row['day']))
    #基于base和每日因子计算最终的预测结果
    day_pre["weekday"] = day_pre["date"].dt.weekday
    day_pre = day_pre[["date","weekday","total_purchase_amt","total_redeem_amt"]]
    day_pre = pd.merge(day_pre,mean_of_each_weekday,on="weekday")
    day_pre["total_purchase_amt"]*=day_pre["total_purchase_amt_weekdaymean"]
    day_pre["total_purchase_amt"]*=day_pre["total_redeem_amt_weekdaymean"]
    day_pre = day_pre.sort_values("date")[["date","total_purchase_amt","total_redeem_amt"]]
    return day_pre

资金流入流出预测-时间序列

目录

1.基于周期因子的时间序列预测

2.具体操作

1. 以星期为周期的中位数预测

猜你喜欢