线性回归python实践

#coding=utf-8
import numpy as np
from sklearn.linear_model import LinearRegression
from sklearn.metrics import mean_squared_error
from sklearn.preprocessing import PolynomialFeatures
import matplotlib.pyplot as plt
from sklearn.pipeline import Pipeline
from sklearn.linear_model import Ridge

#为了方便观察数据，不让数据以科学计数法呈现。
np.set_printoptions(suppress=True)
 
def get_func(a,b,x): #传入线性回归得到的系数，截距，和样本点。
    y = b
    for i in range(len(a)):
        y+=a[i]*x**(i+1)
    return y
 
#解决中文字体显示的问题
plt.rcParams['font.sans-serif'] = ['SimHei'] 
plt.rcParams['axes.unicode_minus'] = False 
 
#生成10个样本点。
x = np.linspace(-5,10,10)
y = x**2+2  #这是原函数
 
#加上数据扰动
for i in range(10):
    y[i]+=50*np.random.rand()
plt.scatter(x,y,c='r')
 
#开始拟合
#用一阶函数去拟合
model = LinearRegression()
model.fit(x.reshape(-1,1),y)
 
#绘图用的100个样本点
x1 = np.linspace(-5,10,100)
y1 = get_func(model.coef_, model.intercept_, x1)
#绘图。
plt.plot(x1,y1,'g',label=u'1阶')
print "一阶参数:  "+str(model.intercept_)+str(model.coef_)
print "均方误差:  "+str(mean_squared_error(y,model.predict(x.reshape(-1,1))))



#这里设置管道方便操作
pipe = Pipeline([
    ("poly",PolynomialFeatures(include_bias=False)),
    ("liner",LinearRegression())])
 
#poly__degree=0。表示给名字为poly的函数PolynomialFeatures的degree参数设置值为2
#PolynomialFeatures这个的作用是把一个x特征，变为x和x平方两个特征。

#二阶拟合
pipe.set_params(poly__degree=2).fit(x.reshape(-1,1),y)
y2 = get_func(pipe._final_estimator.coef_, pipe._final_estimator.intercept_, x1)
plt.plot(x1,y2,'b',label=u'2阶')
print "二阶参数:  "+str(pipe._final_estimator.intercept_)+str(pipe._final_estimator.coef_)
print "均方误差:  "+str(mean_squared_error(y,pipe.predict(x.reshape(-1,1))))


#9阶拟合
pipe.set_params(poly__degree=9).fit(x.reshape(-1,1),y)
y9 = get_func(pipe._final_estimator.coef_, pipe._final_estimator.intercept_, x1)
plt.plot(x1,y9,'black',label=u'9阶')
print "9阶参数"+str(pipe._final_estimator.intercept_)+str(pipe._final_estimator.coef_)
print "均方误差"+str(mean_squared_error(y,pipe.predict(x.reshape(-1,1))))

#用岭回归减小9阶的过拟合现象
ridge = Ridge(alpha=100000)  #alpha是惩罚因子,他的确定需要交叉验证确定最优值，这里先直接给出.
#构造9阶的x特征
poly9 = PolynomialFeatures(degree=9,include_bias=False)
x9 = poly9.fit_transform(x.reshape(-1,1))
print "9阶特征:"+str(x9[0])
ridge.fit(x9,y)
ridge_y9 = get_func(ridge.coef_,ridge.intercept_, x1)
plt.plot(x1,ridge_y9,'yellow',label=u'岭回归9阶')
print "岭回归参数："+str(ridge.coef_)

plt.legend(loc='upper left',fontsize='15')
plt.show()
运行结果
注意可能每一次绘制的图形会不一样。因为在代码中我给的样本点是随机的
通过运行代码我们发现高阶数（9阶）的情况，出现了数据过拟合。我们利用岭回归，减轻了过拟合
线性回归python实践

猜你喜欢