线性回归和Ridge回归

网址:https://www.cnblogs.com/pinard/p/6023000.html

线性回归和交叉验证

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets,linear_model

读取csv里面的数据

data = pd.read_excel("F:\data\CCPP\Folds5x2_pp.xlsx");
x = data[['AT', 'V', 'AP', 'RH']]
y = data[['PE']]

划分训练集和测试集

from sklearn.cross_validation import train_test_split

x为待划分的样本特征集合,y为待划分的样本标签,x,y经train_test_split划分后,x_train为训练集特征集合,x_test为训练的标签;y_train为测试集合的样本特征集合,y_test为测试集合的样本标签

x_train,x_test,y_train,y_test = train_test_split(x, y, random_state=1)#可以通过test_size来设置划分比列

导入线性模型

from sklearn.linear_model import LinearRegression
linreg = LinearRegression()#线性回归函数

拟合

linreg.fit(x_train,y_train)
print("linreg.intercept_",linreg.intercept_,"linreg.coef_",linreg.coef_)

模型拟合测试级

y_pred = linreg.predict(x_test)
from sklearn import metrics

用scikit-learn计算MSE

print("MSE:",metrics.mean_squared_error(y_test, y_pred))

用scikit-learn计算RMSE

print("RMSE:",np.sqrt(metrics.mean_squared_error(y_test, y_pred)))

利用交叉验证来优化模型

from sklearn.model_selection import cross_val_predict

cv为s折交验证

predicted = cross_val_predict(linreg, x, y, cv=100)
print("predicted:",predicted.shape)

用scikit-learn计算MSE

print("MSE:",metrics.mean_squared_error(y, predicted))

用scikit-learn计算RMSE

print ("RMSE:",np.sqrt(metrics.mean_squared_error(y, predicted)))

画出图像

fig, ax = plt.subplots()
ax.scatter(y, predicted)
ax.plot([y.min(), y.max()], [y.min(), y.max()], 'k--', lw=4)
ax.set_xlabel('Measured')
ax.set_ylabel('Predicted')
plt.show()

Ridge回归用scikit-learn选择Ridge回归超参数α

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from sklearn import datasets,linear_model
from sklearn import metrics

读取csv里面的数据

data = pd.read_excel("F:\data\CCPP\Folds5x2_pp.xlsx");
x = data[['AT', 'V', 'AP', 'RH']]
y = data[['PE']]

划分训练集和测试集

from sklearn.model_selection import train_test_split

x为待划分的样本特征集合,y为待划分的样本标签,x,y经train_test_split划分后,x_train为训练集特征集合,x_test为训练的标签;y_train为测试集合的样本特征集合,y_test为测试集合的样本标签

x_train,x_test,y_train,y_test = train_test_split(x, y, random_state=1)#可以通过test_size来设置划分比列
n_alphas = 200
alphas = np.logspace(-10,-2,n_alphas)
print("alphas:",alphas)
clf = linear_model.Ridge(fit_intercept=False)
coefs = []
for a in alphas:
#设置本次循环的超参数
clf.set_params(alpha=a)
#针对每个alpha做ridge回归
clf.fit(x_train, y_train)
y_predict = clf.predict(x_test)
error = metrics.mean_squared_error(y_predict,y_test)#计算方差
print("error:",error)
# 把每一个超参数alpha对应的theta存下来
coefs.append(clf.coef_)
print("coefs:",coefs)
from sklearn import metrics

ax = plt.gca()

ax.plot(alphas, coefs)

#将alpha的值取对数便于画图

ax.set_xscale('log')

#翻转x轴的大小方向,让alpha从大到小显示

ax.set_xlim(ax.get_xlim()[::-1])

plt.xlabel('alpha')

plt.ylabel('weights')

plt.title('Ridge coefficients as a function of the regularization')

plt.axis('tight')

plt.show()

猜你喜欢

转载自www.cnblogs.com/131415-520/p/11741303.html