Bobo老师机器学习笔记第五课-简单线性回归

课程地址：https://coding.imooc.com/class/169.html

最小二乘法的推导博客点击此处

代码实现（参考Bobo实现，如果要看BoBo老师源代码，请点击此处）：

# -*- encoding: utf-8 -*-
"""
实现简单的线性回归,
自己实现SimpleLineRegession1过程中的2个错误：
1、deno += (x - x_mean) ** 2 写成 deno = (x - x_mean) ** 2 这里要注意： deno是所有计算结果的累计值
2、 方程方式self.a_ * x + self.b_ 写成 self.a_ * x - self.b_。 计算b的公式b=y_mean - a * x_mean, 但是整个方程是 y = ax+b
"""
import numpy as np


class SimpleLineRegession1(object):
    """
    不使用向量化实现简单的线性回归
    """

    def __init__(self):
        """
        在过程中计算出来的变量统一命令,后缀加上_
        """
        self.a_ = None  # 表示线性的斜率
        self.b_ = None  # 表示线

    def fit(self, X_train, y_train):
        """
        训练模型
        :param X_train:
        :return:
        """
        assert X_train.ndim == 1 and y_train.ndim == 1, 'X和Y必须为1维'
        assert len(X_train) == len(y_train), 'X和Y的训练个数不相同'
        x_mean = np.mean(X_train)
        y_mean = np.mean(y_train)
        num = 0.0  # 分子  Numerator and denominator
        deno = 0.0
        for x, y in zip(X_train, y_train):
            num += (x - x_mean) * (y - y_mean)
            deno += (x - x_mean) ** 2
        self.a_ = num / deno
        self.b_ = y_mean - self.a_ * x_mean


    def _predict(self, x):
        """
        预测单个X的结果 线性方程y = a*x + b
        :param x:
        :return:
        """

        return self.a_ * x + self.b_

    def predict(self, X_test):
        """
        预测X，X是一维的数据
        :param X_test:
        :return:
        """
        assert X_test.ndim == 1, 'X_test必须是一维数组'
        assert self.a_ is not None and self.b_ is not None , '在predict之前请先fit'

        y_pridect = [self._predict(x) for x in X_test]
        return np.array(y_pridect)

    def __repr__(self):
        return ('SimpleLineRegession1(a=%s, b=%s)' %(self.a_, self.b_))


class SimpleLineRegession2(object):
    """
    不使用向量化实现简单的线性回归
    """

    def __init__(self):
        """
        在过程中计算出来的变量统一命令,后缀加上_
        """
        self.a_ = None  # 表示线性的斜率
        self.b_ = None  # 表示线

    def fit(self, X_train, y_train):
        """
        训练模型
        :param X_train:
        :return:
        """
        assert X_train.ndim == 1 and y_train.ndim == 1, 'X和Y必须为1维'
        assert len(X_train) == len(y_train), 'X和Y的训练个数不相同'
        x_mean = np.mean(X_train)
        y_mean = np.mean(y_train)
        self.a_ = (X_train - x_mean).dot(y_train - y_mean) / (X_train - x_mean).dot(X_train - x_mean)
        self.b_ = y_mean - self.a_ * x_mean


    def _predict(self, x):
        """
        预测单个X的结果 线性方程y = a*x + b
        :param x:
        :return:
        """

        return self.a_ * x + self.b_

    def predict(self, X_test):
        """
        预测X，X是一维的数据
        :param X_test:
        :return:
        """
        assert X_test.ndim == 1, 'X_test必须是一维数组'
        assert self.a_ is not None and self.b_ is not None , '在predict之前请先fit'

        y_pridect = [self._predict(x) for x in X_test]
        return np.array(y_pridect)

    def __repr__(self):
        return 'SimpleLineRegession2(a=%s, b=%s)' %(self.a_, self.b_)

测试代码：

import numpy as np
from timeit import timeit as timeit
import matplotlib.pyplot as plt
from simplelinerregression import SimpleLineRegession1, SimpleLineRegession2

x = np.random.randint(1.0, 6, 10000) + np.random.normal(size=10000)
y = 0.8 * x + 0.4 + np.random.normal(size=len(x))

def test_reg1():

    reg1 = SimpleLineRegession1()
    reg1.fit(x, y)
    reg1.predict(x)
    print reg1

def test_reg2():
    reg2 = SimpleLineRegession2()
    reg2.fit(x, y)
    reg2.predict(x)
    print reg2

def draw_graph():

    x = np.array([1., 2., 3., 4., 5.])
    y = np.array([1., 3., 2., 3.0, 5.0])
    plt.scatter(x, y)
    plt.scatter(x, y, color='green')
    plt.axis([0, 6, 0, 6])

    reg1 = SimpleLineRegession1()
    reg1.fit(x, y)
    y_predict = reg1.predict(x)

    line_mark = 'y=%sx+%s' % (np.round(reg1.a_, 2), np.round(reg1.b_, 2))
    plt.plot(x, y_predict, color='red', label=line_mark)
    plt.legend()
    plt.show()


if __name__ == '__main__':
    print timeit('test_reg1()', "from __main__ import test_reg1", number=3)
    print timeit('test_reg2()', "from __main__ import test_reg2", number=3)
    draw_graph()

运行结果：

运行结果，明显SimpleLineRegession2效率要比SimpleLineRegession1高很多
SimpleLineRegession1(a=0.8018889242367586, b=0.39478340695596614)
SimpleLineRegession1(a=0.8018889242367586, b=0.39478340695596614)
SimpleLineRegession1(a=0.8018889242367586, b=0.39478340695596614)
0.0413969199446
SimpleLineRegession2(a=0.8018889242367646, b=0.39478340695594794)
SimpleLineRegession2(a=0.8018889242367646, b=0.39478340695594794)
SimpleLineRegession2(a=0.8018889242367646, b=0.39478340695594794)
0.0128730256884

Bobo老师机器学习笔记第五课-简单线性回归

猜你喜欢