线性回归小代码

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from sklearn import linear_model
from sklearn.model_selection import train_test_split,cross_val_score
from sklearn import metrics
data = pd.read_excel('Folds5x2_pp.xlsx')
# print(data.head())
# print(data.describe())
#print(data.shape)#(9568, 5)
y = data['PE']
x = data.iloc[:,:-1]
#划分训练集
X_train, X_test, y_train, y_test = train_test_split(x,y,test_size=0.2,random_state=1)
liner = linear_model.LinearRegression()
liner.fit(X_train,y_train)
#模型系数结果
# print('theta0\n',liner.intercept_)
# print('theta\n',liner.coef_)
'''theta0
 459.9472128958727
theta
 [-1.96874028 -0.23852978  0.05697873 -0.15933335]'''
#模型评价
y_pre = liner.predict(X_test)
#mse 和 rmse
print('均方误差\n',metrics.mean_squared_error(y_test,y_pre))
print('均方误差根\n',np.sqrt(metrics.mean_squared_error(y_test,y_pre)))
print('r2\n',metrics.r2_score(y_test,y_pre))
'''均方误差
 20.329991554850057
均方误差根
 4.508879190536164
 r2
 0.9321860060402446'''
#交叉验证来持续优化模型 10折交叉验证


X = data[['AT', 'V', 'AP', 'RH']]
y = data[['PE']]
from sklearn.model_selection import cross_val_predict
predicted = cross_val_predict(liner, X, y, cv=10)
#print(len(predicted)) #9568
# 用scikit-learn计算MSE
print ("MSE:",metrics.mean_squared_error(y, predicted))
# 用scikit-learn计算RMSE
print ("RMSE:",np.sqrt(metrics.mean_squared_error(y, predicted)))
# 可以看出,采用交叉验证模型的MSE比上边的大,
# 主要原因是我们这里是对所有折的样本做测试集对应的预测值的MSE,
# 而上边仅仅对25%的测试集做了MSE。两者的先决条件并不同。
fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

猜你喜欢

转载自blog.csdn.net/zhaoqqa/article/details/82118677
今日推荐