此处于之前创建的SimpleLinearRegression模块中,创建一个新的类,用于实现向量化线性回归。
一、编写向量化线性回归的类:
import numpy as np
class SimpleLinearRegression2(object):
"""使用向量化实现线性回归"""
def __init__(self):
"""计算出来的变量,后缀加上_"""
self.a_ = None # 表示线的斜率
self.b_ = None # 表示线的截距
def fit(self, X_train, Y_train):
"""训练模型"""
X_mean = np.mean(X_train)
Y_mean = np.mean(Y_train)
num = (X_train - X_mean).dot(Y_train - Y_mean)
deno = (X_train - X_mean).dot(X_train - X_mean)
self.a_ = num / deno
self.b_ = Y_mean - self.a_ * X_mean
def _predict(self, x):
"""预测单个x的结果"""
return self.a_ * x + self.b_
def predict(self, X_test):
"""预测X,X是一维的数据"""
y_predict = [self._predict(x) for x in X_test]
return np.array(y_predict)
def __repr__(self):
return 'SimpleLinearRegression2(a=%s, b=%s)' %(self.a_, self.b_)
二、向量化实现线性回归:
from playML.SimpleLinearRegression import SimpleLinearRegression2
x = np.array([1, 2, 3, 4, 5])
y = np.array([1, 3, 2, 3, 5])
reg2 = SimpleLinearRegression2()
reg2.fit(x, y)
y_hat2 = reg2.predict(x) # 预测值
plt.scatter(x, y)
plt.plot(x, y_hat2, color='r')
plt.axis([0, 6, 0, 6])
plt.show()
三、向量化性能测试:
import numpy as np
from playML.SimpleLinearRegression import SimpleLinearRegression2
m = 1000000
big_x = np.random.random(size=m)
big_y = big_x * 2.0 + 3.0 + np.random.normal(size=m)
%timeit reg1.fit(big_x, big_y)
>>> 1.17 s ± 27.8 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)
%timeit reg2.fit(big_x, big_y)
>>> 16.1 ms ± 390 µs per loop (mean ± std. dev. of 7 runs, 100 loops each)
参考资料:bobo老师机器学习教程