import numpy as np from sklearn import datasets import matplotlib.pyplot as plt %matplotlib inline
diabetes = datasets.load_diabetes print(type(diabetes)) print(diabetes.data.shape) print(diabetes.target.shape) -------------------------- sklearn.utils.Bunch (442,10) (442,)
diabetes.data = diabetes.data[:,np.newaxis,2] #只取原数据的第三列(bmi),并(np.newaxis)新增一个轴
将数据分成训练集和测试集
#取出训练集,从0行到倒数20行-1行 x_train = diabetes.data[:-20] y_train = diabetes.target[:-20] #取出测试集,从-20行到最后一行 x_test = diabetes.data[-20:0] y_test = diabetes.data[-20:0]
自定一个计算权重值和预测值的类
class LinearRegression(object): def __init__(self): self.w = None def fit(self,X,y): #计算权重的函数 # y=wx+b 假设b=w0x0 ,x0=1, b=w0 X = np.insert(X,0,1,axis=1) 在数据集新增一列x0,数值是1 #计算w, w=(x.T * x)**-1 * x.T * y #inv():矩阵逆转,即 [矩阵]^-1 dot():数组计算,点乘 X_ = np.linalg.inv(X.T.dot(X)) # (x.T * x) ** -1 self.w = X_.dot(X.T).dot(y) #X_ * x.t * y def predict(self,X): #计算 Ypred的函数 X = np.insert(X,0,1,axis=1) y_pred = X.dot(self.w) # b = w0x0, Y=wx+b = wx return y_pred
clf = LinearRegression() #创建一个实例对象 clf.fit(x_train,y_train) #通过训练集计算出w(权重)的值
print(clf.w)
--------------------------------
array([152.91886183, 938.23786125])
y_pred = clf.predict(x_test) #计算出预测值 print(y_test) print(x_test[:,0]) print(y_pred) ------------------------------------ [233. 91. 111. 152. 120. 67. 310. 94. 183. 66. 173. 72. 49. 64. 48. 178. 104. 132. 220. 57.] [ 0.07786339 -0.03961813 0.01103904 -0.04069594 -0.03422907 0.00564998 0.08864151 -0.03315126 -0.05686312 -0.03099563 0.05522933 -0.06009656 0.00133873 -0.02345095 -0.07410811 0.01966154 -0.01590626 -0.01590626 0.03906215 -0.0730303 ] [225.9732401 , 115.74763374, 163.27610621, 114.73638965, 120.80385422, 158.21988574, 236.08568105, 121.81509832, 99.56772822, 123.83758651, 204.73711411, 96.53399594, 154.17490936, 130.91629517, 83.3878227 , 171.36605897, 137.99500384, 137.99500384, 189.56845268, 84.3990668 ]
#查看真实历史数据与预测值之间误差平方的均值 def mean_squared_error(y_true,y_pred): mse = np.mean(np.powerured,2) return mse
mean_squared_error(y_test,y_pred) --------------------------------- 2548.072398725972
#画真实数据的散点状图 plt.scatter(x_test[:,0], y_test, color='black', label='true') #画预测数值的趋势图 plt.plot(x_test[:,0], y_pred, color='blue', linewidth=3, label='pred') plt.legend() ----------------------------------------------