1.简单的线性回归算法

'''
    @author yokan
    @date 2018/5/15
'''
#简单的线性回归算法
import math
import numpy as np
import matplotlib.pyplot as pl

#暂时无用,这是sklearn框架做线性回归的算法,框架学习在后面,我准备先学算法
from sklearn.linear_model import LinearRegression
from mpl_toolkits.mplot3d import axes3d

#1.从linear_regression_data1导入数据
data = np.loadtxt("linear_regression_data1.txt",delimiter=",")   #delimiter 以,分割成矩阵
print(data)
#2.把数据拆分成两列方便后面的计算
x = np.c_[np.ones(data.shape[0]),data[:,0]]                    #np.ones(data.shape[0])在前面是没用的,是求内积的时候纬度不够的时候用的
y = np.c_[data[:,1]]                                           #把txt中的数据拆分成x,y两列,加一列1,后面求内积
#我也很好奇为啥要加一列1,不是1,2,3,4,5以下是我个人的理解:
#我们求的theta最终应该是这种形式的y = θ(0)*1 + θ(1)*(x1)^i + θ(2)*(x2)^i ...所以theta0对应的就是1

#3求损失函数 公式为(1/2*m)*(sum(h(x(i)) - y(i)))^2
def getlossMethod(x,y,theta = [[0],[0]]):      #theta 默认值给0
    m = len(y)              #m为
    h = x.dot(theta)        #x和theta的内积就是h
    loss_v = 1/2*m*sum(math.sqrt(h-y))     #基本就是求预估值h和y的差值,直到这个值近似于0那么这两个值就基本相似了
    return loss_v

#4求出具体的theta
def gettheta(x,y,theta = [[0],[0]],r_count = 1500,alpha = 0.01):          #这是一个梯度下降的问题,多试几次才会接近完美的值,这里就试1500次,alpha为学习效率
    m = len(y)
    for i in range(r_count):
        h = x.dot(theta)
        theta = theta - alpha*(1/m)* (x.T.dot(h - y))                       #梯度下降的算法 = theta - alpha*(1/m)*(偏微分(损失函数)
    return theta
#5画图
theta = gettheta(x,y)
xx = np.arange(5, 23)
yy = theta[0] + theta[1]*xx                                         # 根据画图框架需求的数据得出x和y轴方向的值
pl.scatter(x[:, 1], y, s=30, c='r', marker='x', linewidths=1)       # 画出数据的散点图
pl.plot(xx, yy)       # 画出梯度下降的收敛线
# pl.show()                                                           # 展示
#6调试写的算法,这里为止从学习角度来说算学习完了
print(theta.T.dot([1, 3.5])*10000)                                  #测试,很多种方式可以测试,我就按7月的方式弄了
print(theta.T.dot([1, 7])*10000)
#得出的结果可以算是期望



#以下注释的为sklearn求线性回归的代码,学到框架再回来重新弄
# regr = LinearRegression()
# regr.fit(X[:,1].reshape(-1,1), y.ravel())
# plt.plot(xx, regr.intercept_+regr.coef_*xx, label='Linear regression (Scikit-learn GLM)')
#
# plt.xlim(4,24)
# plt.xlabel('Population of City in 10,000s')
# plt.ylabel('Profit in $10,000s')
# plt.legend(loc=4);
1.简单的线性回归算法

猜你喜欢