机器学习实战笔记 --- Python实现线性回归

版权声明:学习交流为主,未经博主同意禁止转载,禁止用于商用。 https://blog.csdn.net/u012965373/article/details/88653647
# -*- codingutf-8 -*-
__author__ = 'yangxin_ryan'
from numpy import *
import matplotlib.pylab as plt


class Regression(object):

    def load_data_set(self, file_name):
        num_feat = len(open(file_name).readline().split('\t')) - 1
        data_mat = []
        label_mat = []
        fr = open(file_name)
        for line in fr.readlines():
            line_arr = []
            cur_line = line.strip().split("\t")
            for i in range(num_feat):
                line_arr.append(float(cur_line[i]))
            data_mat.append(line_arr)
            label_mat.append(float(cur_line[-1]))
        return data_mat, label_mat

    def stand_regres(self, x_arr, y_arr):
        x_mat = mat(x_arr)
        y_mat = mat(y_arr).T
        xTx = x_mat.T * x_mat
        if linalg.det(xTx) == 0.0:
            print("This matrix is singular, cannot do inverse")
            return
        ws = xTx.I * (x_mat.T * y_mat)
        return ws

    def lwlr(self, test_point, x_arr, y_arr, k=1.0):
        x_mat = mat(x_arr)
        y_mat = mat(y_arr).T
        m = shape(x_mat)[0]
        weights = mat(eye((m)))
        for j in range(m):
            diff_mat = test_point - x_mat[j:]
            weights[j, j] = exp(diff_mat * diff_mat / (-2.0 * k ** 2))
        xTx = x_mat * (weights * x_mat)
        if linalg.det(xTx) == 0.0:
            print("This matrix is singular, cannot do inverse")
            return
        ws = xTx.I * (x_mat.T * (weights * y_mat))
        return test_point * ws

    def lwlr_test(self, test_arr, x_arr, y_arr, k=1.0):
        m = shape(test_arr)[0]
        y_mat = zeros(m)
        for i in range(m):
            y_mat[i] = self.lwlr(test_arr[i], x_arr, y_arr, k)
        return y_mat

    def lwlr_test_plot(self, x_arr, y_arr, k=1.0):
        y_hat = zeros(shape(y_arr))
        x_copy = mat(x_arr)
        x_copy.sort(0)
        for i in range(shape(x_arr)[0]):
            y_hat[i] = self.lwlr(x_copy[i], x_arr, y_arr, k)
        return y_hat, x_copy

    def rss_error(self, y_arr, y_hat_arr):
        return ((y_arr - y_hat_arr) ** 2).sum()

    def ridge_regres(self, x_mat, y_mat, lam=0.2):
        xTx = x_mat.T * x_mat
        denom = xTx + eye(shape(x_mat)[1]) * lam
        if linalg.det(denom) == 0.0:
            print("This matrix is singular, cannot do inverse")
            return
        ws = denom.I * (x_mat.T * y_mat)
        return ws

    def ridge_test(self, x_arr, y_arr):
        x_mat = mat(x_arr)
        y_mat = mat(y_arr).T
        y_mean = mean(y_mat, 0)
        y_mat = y_mat - y_mean
        x_means = mean(x_mat, 0)
        x_var = var(x_mat, 0)
        x_mat = (x_mat - x_means) / x_var
        num_test_pts = 30
        w_mat = zeros((num_test_pts, shape(x_mat)[1]))
        for i in range(num_test_pts):
            # exp() 返回 e^x
            ws = self.ridge_regres(x_mat, y_mat, exp(i - 10))
            w_mat[i, :] = ws.T
        return w_mat

    def regularize(self, x_mat):  # 按列进行规范化
        in_mat = x_mat.copy()
        in_means = mean(in_mat, 0)  # 计算平均值然后减去它
        in_var = var(in_mat, 0)  # 计算除以Xi的方差
        in_mat = (in_mat - in_means) / in_var
        return in_mat

    def stageWise(self, x_arr, y_arr, eps=0.01, numIt=100):
        x_mat = mat(x_arr)
        y_mat = mat(y_arr).T
        y_mean = mean(y_mat, 0)
        y_mat = y_mat - y_mean  # 也可以规则化ys但会得到更小的coef
        x_mat = self.regularize(x_mat)
        m, n = shape(x_mat)
        return_mat = zeros((numIt, n))  # 测试代码删除
        ws = zeros((n, 1))
        ws_test = ws.copy()
        ws_max = ws.copy()
        for i in range(numIt):
            print(ws.T)
            lowest_error = inf
            for j in range(n):
                for sign in [-1, 1]:
                    ws_test = ws.copy()
                    ws_test[j] += eps * sign
                    y_test = x_mat * ws_test
                    rss_e = self.rss_error(y_mat.A, y_test.A)
                    if rss_e < lowest_error:
                        lowest_error = rss_e
                        ws_max = ws_test
            ws = ws_max.copy()
            return_mat[i, :] = ws.T
        return return_mat

    def regression1(self):
        x_arr, y_arr = self.load_data_set("data/8.Regression/data.txt")
        x_mat = mat(x_arr)
        y_mat = mat(y_arr)
        ws = self.stand_regres(x_arr, y_arr)
        fig = plt.figure()
        ax = fig.add_subplot(111)  # add_subplot(349)函数的参数的意思是,将画布分成3行4列图像画在从左到右从上到下第9块
        ax.scatter([x_mat[:, 1].flatten()], [y_mat.T[:, 0].flatten().A[0]])  # scatter 的x是xMat中的第二列,y是yMat的第一列
        x_copy = x_mat.copy()
        x_copy.sort(0)
        y_hat = x_copy * ws
        ax.plot(x_copy[:, 1], y_hat)
        plt.show()


if __name__ == '__main__':
    regression = Regression()
    regression.regression1()

猜你喜欢

转载自blog.csdn.net/u012965373/article/details/88653647