SimpleLinearRegression向量化

1. 两种方式实现的简单线性回归的python代码

1.用for循环求a、b的值
2.用向量运算------>性能提高很多

import numpy as np

class SimpleLinearRegression1:
    def __init__(self):
        """初始化SimpleLinearRegression模型"""
        self.a_=None
        self.b_=None
    def fit(self,x_train,y_train):
        """根据训练数据集x_train,y_train训练SimpleLinearRegression模型"""
        assert x_train.ndim==1,"只能解决单一特征数据"
        assert len(x_train)==len(y_train),\
        "x和y的个数必须相等"
        x_mean=np.mean(x_train)
        y_mean=np.mean(y_train)

        num = 0.0
        d = 0.0
        for x_i, y_i in zip(x_train, y_train):
            num += (x_i - x_mean) * (y_i - y_mean)
            d += (x_i - x_mean) ** 2

        self.a_=num/d
        self.b_=y_mean-self.a_*x_mean

        return self
    def predict(self,x_predict):
        """给定预测数据集x_predict,返回表示x_predict的结果向量"""
        assert x_predict.ndim==1,"x_predict只能是一维数组"
        assert self.a_ is not None and self.b_ is not None,\
            "在预测之前必须先训练得到参数a,b"
        return np.array([self._predict(x) for x in x_predict])

    def _predict(self,x_single):
        """给定单个预测数据x_single,返回预测结果"""
        return self.a_*x_single+self.b_
    def __repr__(self):
        return "SimpleLinearRegression1()"

class SimpleLinearRegression2:
    def __init__(self):
        """初始化SimpleLinearRegression模型"""
        self.a_=None
        self.b_=None
    def fit(self,x_train,y_train):
        """根据训练数据集x_train,y_train训练SimpleLinearRegression模型"""
        assert x_train.ndim==1,"只能解决单一特征数据"
        assert len(x_train)==len(y_train),\
        "x和y的个数必须相等"
        x_mean=np.mean(x_train)
        y_mean=np.mean(y_train)

        num = (x_train - x_mean).dot(y_train - y_mean)
        d = (x_train - x_mean).dot(x_train - x_mean)

        self.a_ = num / d
        self.b_ = y_mean - self.a_ * x_mean

        return self
    def predict(self,x_predict):
        """给定预测数据集x_predict,返回表示x_predict的结果向量"""
        assert x_predict.ndim==1,"x_predict只能是一维数组"
        assert self.a_ is not None and self.b_ is not None,\
            "在预测之前必须先训练得到参数a,b"
        return np.array([self._predict(x) for x in x_predict])

    def _predict(self,x_single):
        """给定单个预测数据x_single,返回预测结果"""
        return self.a_*x_single+self.b_
    def __repr__(self):
        return "SimpleLinearRegression2()"


2. 使用我们自己的SimpleLinearRegression1

from SimpleLinearRegression import SimpleLinearRegression1
import numpy as np
import matplotlib.pyplot as plt

x=np.array([1.,2.,3.,4.,5.])
y=np.array([1.,3.,2.,3.,5.])

reg1=SimpleLinearRegression1()
reg1.fit(x,y)
SimpleLinearRegression1()
reg1.a_
0.8
reg1.b_
0.39999999999999947
x_predict=np.array([6])
y_predict=reg1.predict(x_predict)
y_predict
array([5.2])
y_hat1=reg1.predict(x)
plt.scatter(x,y)
plt.plot(x,y_hat1,color='r')
plt.axis([0,6,0,6])
[0, 6, 0, 6]

在这里插入图片描述

3. 向量化实现SimpleLinearRegression2

from SimpleLinearRegression import SimpleLinearRegression2
reg2=SimpleLinearRegression2()
reg2.fit(x,y)
SimpleLinearRegression2()
reg2.a_
0.8
reg2.b_
0.39999999999999947
reg2.predict(x_predict)
array([5.2])
y_hat2=reg2.predict(x)
plt.scatter(x,y)
plt.plot(x,y_hat2,color='r')
plt.axis([0,6,0,6])
[0, 6, 0, 6]

在这里插入图片描述

4. 向量化实现的性能测试

m=10000000
big_x=np.random.random(size=m)
big_y=big_x*2.0+3.0+np.random.normal(size=m)
%timeit reg1.fit(big_x,big_y)
%timeit reg2.fit(big_x,big_y)
18.9 s ± 1.44 s per loop (mean ± std. dev. of 7 runs, 1 loop each)
342 ms ± 31.3 ms per loop (mean ± std. dev. of 7 runs, 1 loop each)

向量化可以很好的提高性能


reg1.a_
2.00057885845175
reg1.b_
2.999474051528445
reg2.a_
2.000578858451468
reg2.b_
2.9994740515285856

猜你喜欢

转载自blog.csdn.net/chairon/article/details/107285192