python机器学习之线性回归

#线性回归最小二乘法

from sklearn import linear_model

import sys

import tushare as ts

import matplotlib.pyplot as plt

import pandas as pd

import sklearn.metrics as sm

sh=ts.get_hist_data('sh').sort_index()#获取上证指数每日数据,并按时间索引排序

pf=ts.get_hist_data('600000').sort_index()#获取浦发银行数据,并按时间索引排序

sh['re']=np.log(sh['close']/sh['close'].shift(1))#计算上证指数收益率

pf['re']=np.log(pf['close']/pf['close'].shift(1))#计算浦发银行收益率

sh=sh.dropna()#删除缺失值

pf=pf.dropna()#删除缺失值

data=pd.merge(sh['re'],pf['re'],left_index=True,right_index=True)#将数据合并

data.columns=['x','y']#给列命名

testdata=data['2018-05-01':]

traindata=data['2017-01-01':'2018-04-30']

x_train=np.array(traindata['x']).reshape(len(traindata['x']),1)

y_train=np.array(traindata['y']).reshape(len(traindata['y']),1)

x_test=np.array(testdata['x']).reshape(len(testdata['x']),1)

y_test=np.array(testdata['y']).reshape(len(testdata['y']),1)#区分训练集,测试集

linearr=linear_model.LinearRegression()#建立线性回归模型

linearr.fit(x_train,y_train)#数据学习

y_train_pred=linearr.predict(x_train)#基于训练集得到的线性y值

plt.figure()

plt.scatter(x_train,y_train,color='green')#原始训练集数据散点图

plt.plot(x_train,y_train_pred,color='black',linewidth=4)#线性回归的拟合线

plt.title('train')#标题

plt.show()

y_test_pred=linearr.predict(x_test)

plt.scatter(x_test,y_test,color='green')#绘制测试集数据散点图

plt.plot(x_test,y_test_pred,color='black',linewidth=4)#基于线性回归的预测线

plt.title('test')

plt.show()

print('MSE=',sm.mean_squared_error(y_test,y_test_pred))#MSE值

print('R2=',sm.r2_score(y_test,y_test_pred))#R2值越大越好

猜你喜欢

转载自www.cnblogs.com/thechain/p/9281496.html