线性回归代码-python

线性回归模型

其中包括5个方法

1、最小二乘法调用numpy包实现

2、最小二乘法调用scipy包实现

3、自己编写最小二乘法实现

4、线性回归模型调用sklearn包实现

5、自己编写线性回归方法实现

示例结果

13329264-5422590bd6e81c33.png
程序运行结果

13329264-0ebd423b84d9fb20.png
程序运行结果

代码

import numpy as np
import matplotlib.pyplot as plt
from scipy.optimize import leastsq  # 方法二中使用
from sklearn import linear_model


data = np.loadtxt('challenge_dataset.txt', delimiter=',')
print('data.shape: {0}'.format(data.shape))
print('data.type : {0}'.format(type(data)))


# 线性回归模型类
class Linear_regression_methods:
    def __init__(self, data):
        self.data = data
        self.x = data[:, 0]
        self.y = data[:, 1]

    def plt_method(self, title, a, b):
        # title.type is str
        # a is weight, b is bias
        plt.title(title)
        plt.plot(self.x, self.y, 'o', label='data', markersize=10)
        plt.plot(self.x, a * self.x + b, 'r', label='line')
        plt.legend()
        plt.show()

    def print_method(self, title, a, b):
        return print('-'*50 + "\n{}\ny = {:.5f}x + {:.5f}".format(title, a, b))

    def computer_error(self, a, b):
        x = self.data[:, 0]
        y = self.data[:, 1]
        totalError = (y - (a * x + b)) ** 2
        totalError = np.sum(totalError, axis=0)
        results = totalError / float(len(data))
        return print('this model final error: {:.5f}'.format(results))

    def one_leastsq_call_numpy_pakeage(self):
        # 调用numpy.linalg.lstsq()方法
        A = np.vstack([self.x, np.ones(len(self.x))]).T
        a, b = np.linalg.lstsq(A, self.y)[0]  # 求一个线性方程组的最小二乘解
        self.print_method('first leastsq_call_numpy_pakeage', a, b)
        self.plt_method('first leastsq_call_numpy_pakeage', a, b)  # 调用画图方法
        self.computer_error(a, b)

    def two_leatsq_call_scipy_pakeage(self):
        # 调用scipy.optimize中的lestsq方法
        def fun(p, x):  # 定义想要拟合的函数
            k, b = p  # 从参数p获得拟合参数
            return k*x + b

        def err(p, x, y):  # 定义误差函数
            return fun(p, x) - y

        # 定义起始的参数 即从 y = 1*x+1 开始,其实这个值可以随便设,只不过会影响到找到最优解的时间
        p0 = [1, 1]  # 也可随机初始化
        # leastsq函数需传入numpy类型
        xishu = leastsq(err, p0, args=(self.x, self.y))
        self.print_method('second leatsq_call_scipy_pakeage', xishu[0][0], xishu[0][1])
        self.plt_method('second leatsq_call_scipy_pakeage', xishu[0][0], xishu[0][1])
        self.computer_error(xishu[0][0], xishu[0][1])

    def three_leastsq_function(self):
        # 最小二乘法手动实现方法
        def calcAB(x, y):
            n = len(x)
            sumX, sumY, sumXY, sumXX=0, 0, 0, 0
            for i in range(0, n):
                sumX += x[i]
                sumY += y[i]
                sumXX += x[i]*x[i]
                sumXY += x[i]*y[i]
            a = (sumXY - (1/n) * (sumX * sumY)) / (sumXX - (1/n) * sumX * sumX)
            b = sumY/n - a * sumX/n
            return a, b
        a, b = calcAB(self.x, self.y)
        self.print_method('third leastsq_function', a, b)
        self.plt_method('third leastsq_function', a, b)
        self.computer_error(a, b)

    def four_linear_model_call_sklearn(self):
        # train model on data
        body_reg = linear_model.LinearRegression()
        x_values = self.x.reshape(-1, 1)
        y_values = self.y.reshape(-1, 1)
        body_reg.fit(x_values, y_values)
        results = body_reg.predict(x_values)
        a = float((results[0] - results[1]) / (self.x[0] - self.x[1]))  # 确定两点求直线的斜率与截距
        b = float(results[1] - a * self.x[1])
        self.print_method('fourth linear_model_call_sklearn', a, b)
        self.plt_method('fourth linear_model_call_sklearn', a, b)
        self.computer_error(a, b)

    def five_linear_regression(self):
        def computer_gradent(b_current, m_current, data, learning_rate):
            b_gradient = 0
            m_gradient = 0
            N = float(len(data))
            # 向量化形式
            x = data[:, 0]
            y = data[:, 1]
            b_gradient = -(2 / N) * (y - (m_current * x + b_current))  # 对平方误差损失函数求偏导
            b_gradient = np.sum(b_gradient, axis=0)
            m_gradient = -(2 / N) * x * (y - (m_current * x + b_current))  # 目的是极小化平方误差
            m_gradient = np.sum(m_gradient, axis=0)
            # 用偏导数更新b和m的值
            new_b = b_current - (learning_rate * b_gradient)
            new_m = m_current - (learning_rate * m_gradient)
            return [new_b, new_m]

        def optimizer(data, starting_b, starting_m, learning_rate, num_iter):
            b = starting_b
            m = starting_m
            # gradient descent
            for i in range(num_iter):
                # update b and m with the new more accurate b and m by performing
                # this gradient step
                b, m = computer_gradent(b, m, data, learning_rate)
            return [b, m]

        def Linear_regerssion(data):
            # define hyperparamters 定义超参数
            # learning_rate is used for update gradient
            # define the number that will iteration
            # define  y =mx+b
            learning_rate = 0.001
            initial_b = 0.0
            initial_m = 0.0
            num_iter = 1000
            [b, m] = optimizer(data, initial_b, initial_m, learning_rate, num_iter)
            return m, b
        m, b = Linear_regerssion(self.data)
        self.print_method('five_linear_regression', m, b)
        self.plt_method('five_linear_regression', m, b)
        self.computer_error(m, b)


model = Linear_regression_methods(data)
model.one_leastsq_call_numpy_pakeage()
model.two_leatsq_call_scipy_pakeage()
model.three_leastsq_function()
model.four_linear_model_call_sklearn()
model.five_linear_regression()

猜你喜欢

转载自blog.csdn.net/weixin_34377065/article/details/87424198