尽管最优化的方法是理论上最优解,但我认为还是蒙特卡洛方法来看简单,也更真实一些。
另外,数据处理和清洗其实是这段代码花的时间最长的部分。尽管数据是从相应的网站copy下来的,但是数据还是有不少的奇异值,需要处理。如果你不亲身经历的话,就不会知道经历了什么。
需要说明的是,收益率和波动,需要严格的日期对齐,并不象随机数一样,简单,活不少。
# -*- coding: utf-8 -*-
"""
Created on Tue Oct 22 08:34:18 2019
@author: songroom
"""
#https://blog.csdn.net/weixin_42018258/article/details/80953809
#https://blog.csdn.net/itcastcpp/article/details/75807811
# Pyfolio
# https://www.jianshu.com/p/4cc811402477
# import pandas as pd
# https://blog.csdn.net/asialee_bird/article/details/89417750
#
import matplotlib.pyplot as plt
import pandas as pd
import os
import numpy as np
import time as t
import datetime as dt
from datetime import timedelta
stocks = ["YUM","DIS","RIO","COST","FCN","PEP","TAL","GOOG","OKE","NKE","FB","CELG","BABA","ICE","BAX"]
stocks = ["YUM","DIS","RIO","COST","FCN","PEP","TAL","GOOG","OKE","NKE","FB","BABA","ICE","BAX"]
stocks = ["YUM","DIS","RIO","COST","FCN","PEP","TAL","GOOG","OKE","NKE","BABA","ICE","BAX"]
stocks = ["YUM","DIS","RIO","COST","FCN","PEP","TAL","GOOG","NKE","BABA","ICE","BAX"]
stocks = ["YUM","DIS","COST","FCN","PEP","TAL","NKE","ICE","BAX"]
path = r"S:\data"
output_path = r"C:\Users\Administrator\Desktop\output"
nowtime = dt.datetime.now().strftime("%Y%m%d_%H%M%S")
port_best_weight ={
"YUM": 0.023756235171897618,
"DIS" :0.03014070796771593,
"COST": 0.08892953965680671,
"FCN" :0.1876235028458973,
"PEP": 0.07380931812203975,
"TAL": 0.20264458091189966,
"NKE": 0.2257260573355881,
"ICE": 0.037347140266800055,
"BAX": 0.13002291772135482}
def best_portfolio():
pass
def read_sp500_source():
path_stock = os.path.join(path,"sp500.csv")
df = pd.read_csv(path_stock,sep=',',index_col= 0,encoding='utf-8')
df.index = [dt.datetime.strptime(day,'%Y/%m/%d') for day in df.index]
return df
def read_csv_stocks_source():
data =[]
for stock in stocks:
path_stock = os.path.join(path,stock+".csv")
print("stock=>",stock)
df = pd.read_csv(path_stock,sep=',',index_col= 0,encoding='utf-8')
df.index = [dt.datetime.strptime(day,'%Y/%m/%d') for day in df.index]
data.append(df)
return data
#close open high low volume pct
def etl_index_data(sp500,common_start_dt):
df = sp500.iloc[sp500.index >= common_start_dt,:]
df = df.sort_index(ascending=True)
return df.close
def etl_stock_data(data,common_start_dt):
new_data =[]
i =0
for d in data:
df = d.iloc[d.index >= common_start_dt,:]
df2 = df.sort_index(ascending=True)
new_data.append(df2)
print("etl=>i:",i, "stock:",stocks[i],"len:",len(df2))
i = i+1
return new_data
def get_common_dates_array(data,sp500):
length = len(data)
dates ={}
i = 1
for d in data[1:length]:
for date in sp500.index:
if date not in d.index:
break
else:
dates[stocks[i]] = date
i = i+1
return dates
def put_stocks_df_data_together(sort_data):
data = pd.DataFrame()
for df in sort_data:
if len(data)==0:
data = df.close
else:
data = pd.concat([data,df.close],axis=1)
data.columns = stocks
return data
def price2ret(stocks_close_price):
stocks_returns_raw = stocks_close_price.pct_change()
#dropna(how='all',axis=0)# row
stocks_returns = stocks_returns_raw.iloc[1:,:]
return stocks_returns
def get_portfolio_random_weight(count):
random_weights = np.random.random(count)
portfolio_weights = np.array(random_weights/sum(random_weights))
return portfolio_weights
def get_portfolio_returns(stock_returns,portfolio_weights):
weighted_returns = stock_returns.mul(portfolio_weights, axis=1)
portfolio_returns = weighted_returns.sum(axis=1)
return portfolio_returns
def get_portfolio_cumprod_return(portfolio_returns):
portfolio_cumprod_returns = (1+portfolio_returns).cumprod()-1
return portfolio_cumprod_returns
def get_porfolio_gain_return(portfolio_returns):
portfolio_cumprod_returns = get_portfolio_cumprod_return(portfolio_returns)
length = len(portfolio_cumprod_returns)
gained_portfolio_return = portfolio_cumprod_returns[length-1]
return (gained_portfolio_return/length)*252
def get_std(portfolio_returns):
portfolio_std = np.std(portfolio_returns)* np.sqrt(252)
return portfolio_std
def get_max_drawdown_value(portfolio_returns):
pass
def get_sharpe_ratio(portfolio_returns,expect_market_annual_return):
'''
计算公式为:SharpeRatio=[ E(Rp)-Rf ] / σp
E(Rp):投资组合预期报酬率(平均回报率):
Rf: 无风险利率(通常用国债利率来代替)
σp:投资组合的标准差
'''
gained_portfolio_return = get_porfolio_gain_return(portfolio_returns)
portfolio_std = get_std(portfolio_returns)
sharpe_ratio = (gained_portfolio_return-expect_market_annual_return) / portfolio_std
print("gained_return: ",gained_portfolio_return," std: ",portfolio_std," sharp ratio: ",sharpe_ratio)
return sharpe_ratio
def get_stock_beta_value(stock_returns,index_returns):
'''
beta = 个股与市场的相关性corr* 个股标准差std /指数的标准差 std
'''
codes = stock_returns.columns
stock_stds = stock_returns.std()*np.sqrt(252) # series
index_std = index_returns.std()*np.sqrt(252) # value
stocks_beta =[]
for code in codes:
corr = stock_returns[code].corr(index_returns)
beta = corr*stock_stds[code]/index_std
stocks_beta.append(beta)
print("code: ",code , "beta:",beta,"corr:",corr)
return stocks_beta
def get_portfolio_beta(stocks_beta,weights):
beta =0
for i in range(len(stocks_beta)):
beta = beta + stocks_beta[i]*weights[i]
return beta
#def get_sp500_close_price():
# sp500 = read_sp500_source()
# sort_index_data = etl_index_data(sp500,common_start_dt)
if __name__ == '__main__':
# 数据准备
t0 = t.time()
count = len(stocks)
print("including sp500, stock counts:",count)
sp500 = read_sp500_source()
stocks_data = read_csv_stocks_source()
dates = get_common_dates_array(stocks_data,sp500)
add_days = 1800
common_start_dt = np.max(list(dates.values()))+timedelta(days =add_days)
print("common_start_dt:",common_start_dt)
sort_stock_data = etl_stock_data(stocks_data,common_start_dt)
sort_index_data = etl_index_data(sp500,common_start_dt)
stocks_close_price = put_stocks_df_data_together(sort_stock_data)
stock_returns = price2ret(stocks_close_price)
index_returns = (sort_index_data-sort_index_data.shift(1))/sort_index_data.shift(1)
index_returns = index_returns.dropna()
print("=>")
print(len(index_returns))
simu_counts = 100
expect_market_annual_return = 0.02
best_sharp_ratio = -10000
best_weights =[]
best_return = 0.0
for i in range(simu_counts):
portfolio_weights = get_portfolio_random_weight(count)
portfolio_returns = get_portfolio_returns(stock_returns,portfolio_weights)
sharp_ratio_value = get_sharpe_ratio(portfolio_returns,expect_market_annual_return)
print("now simu : ",i,"sharpe ratio:",sharp_ratio_value)
if sharp_ratio_value > best_sharp_ratio:
best_sharp_ratio = sharp_ratio_value
best_weights = portfolio_weights
print("simu get better sharp ratio: ",sharp_ratio_value)
t1 = t.time()
print("simu process is over,cost time:",t1-t0,'s')
j = 0
for w in best_weights:
print("stock:",stocks[j],"weights:",w)
j =j+1
best_simulation_returns =get_portfolio_returns(stock_returns,best_weights)
print("start_dt:",common_start_dt,"end_dt:2019-10-21")
print("return:",get_porfolio_gain_return(best_simulation_returns))
print("best sharpe ratio:",best_sharp_ratio)
# corr
print("stocks corr:")
print(stock_returns.corr())
# cov:
print("stocks cov:")
corr_df =stock_returns.corr()
print(corr_df)
#df.to_csv(file,index = False,encoding = 'utf_8_sig')
corr_df.to_csv(output_path+"\\corr_matrix_"+nowtime+".csv",index = True,encoding = 'utf_8_sig')
# stock std
print("stocks std:")
print(stock_returns.std()*np.sqrt(252))
# stock beta and portfolio beta
stocks_beta = get_stock_beta_value(stock_returns,index_returns)
portfolio_beta = get_portfolio_beta(stocks_beta,best_weights)
print("portfolio beta: ",portfolio_beta)
best_simulation_cumprod_return = get_portfolio_cumprod_return(best_simulation_returns)
best_simulation_cumprod_return.plot()
print("----------best portfolio-----------")
best_portfolio_returns_used = get_portfolio_returns(stock_returns,list(port_best_weight.values()))
best_portfolio_returns_used.to_frame().to_csv(output_path+"\\portfolio_return_"+nowtime+".csv")
best_portfolio_cumprod_return_used = get_portfolio_cumprod_return(best_portfolio_returns_used)
best_portfolio_cumprod_return_used.to_frame().to_csv(output_path+"\\cumprod_portfolio_return_"+nowtime+".csv")
sp500_cumprod_return = get_portfolio_cumprod_return(index_returns)
sp500_cumprod_return.to_frame().to_csv(output_path+"\\cumprod_sp500_return_"+nowtime+".csv")
pd.DataFrame.from_dict(port_best_weight,orient='index').to_csv(output_path+"\\weights_"+nowtime+".csv")
best_port_beta = get_portfolio_beta(stocks_beta,list(port_best_weight.values()))
print("best_risk_portfolio_beta:", best_port_beta)
best_sharp_ratio_value_used = get_sharpe_ratio(best_portfolio_returns_used,expect_market_annual_return)
print("best portfolio sharp-ratio value:",best_sharp_ratio_value_used)