Jupyter Notebook
多项式回归1
import numpy as np
from matplotlib import pyplot as plt
100
X = np.random.uniform(-5, 5, size = 100)
x = X.reshape(-1, 1)
0.75 * x**2 +
y = 0.75 * x**2 + x + 5 + np.random.normal(0, 1, 100).reshape(-1, 1)
x.shape
(100, 1)
y.shape
(100, 1)
plt.scatter(x, y,color="r")```
![在这里插入图片描述](https://img-blog.csdnimg.cn/20190309221122873.png?x-oss-process=image/watermark,type_ZmFuZ3poZW5naGVpdGk,shadow_10,text_aHR0cHM6Ly9ibG9nLmNzZG4ubmV0L3FxXzM3OTgyMTA5,size_16,color_FFFFFF,t_70)
如果使用线性回归先试一下
from sklearn.linear_model import LinearRegression
lin_gre = LinearRegression()
lin_gre.fit(x, y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)
y_predict1 = lin_gre.predict(x)
o
plt.scatter(x, y)
plt.plot(x, y_predict1, color=“r”)
[<matplotlib.lines.Line2D at 0x2126a6ac5f8>]
from sklearn.metrics import mean_squared_error
, y
mean_squared_error(y_predict1, y)
27.83715552739646
此时我们可以看出拟合的并不好,均值方差误差达到29
from sklearn.preprocessing import PolynomialFeatures
ploy_reg = PolynomialFeatures(degree= 3) #有三次方
ploy_reg.fit(x)
PolynomialFeatures(degree=3, include_bias=True, interaction_only=False)
ploy_x = ploy_reg.transform(x)
什么有4列呢,因为x^3, x^2, x, x^0
ploy_x.shape #为什么有4列呢,因为x^3, x^2, x, x^0
(100, 4)
x
ploy_x[:5, :]
array([[ 1. , 3.42924246, 11.75970384, 40.32687572],
[ 1. , -0.83580201, 0.698565 , -0.58386203],
[ 1. , 3.44774315, 11.88693284, 40.98309129],
[ 1. , -4.18679748, 17.52927313, -73.39151656],
[ 1. , 4.15454411, 17.26023675, 71.70841491]])
:5, :
x[:5, :]
array([[ 3.42924246],
[-0.83580201],
[ 3.44774315],
[-4.18679748],
[ 4.15454411]])
lin_reg_2 = LinearRegression()
, y
lin_reg_2.fit(ploy_x, y)
LinearRegression(copy_X=True, fit_intercept=True, n_jobs=None,
normalize=False)
ploy_x
poly_predict = lin_reg_2.predict(ploy_x)
ploy_x.shape
(100, 4)
, y
mean_squared_error(poly_predict, y)
1.0923225514464658
此时我们的误差只有1左右,是不是很神奇呀