python之线性回归

什么是线性回归

通过训练来得到线性方程系数ehta的过程叫做线性回归,这样我们可以预测某个x对应的y值

线性 回归的数学表示

线性回归中的最小二乘 法

通过对L求导使ehta最小

 线性回归的梯度下降算法

 

 

复习numpy的向量操作 

#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy import  dot
from numpy import  mat

#inv 求矩阵的逆
#dot求矩阵点乘
#mat是矩阵

A = np.mat([1,1])
#B = np.array([1,1])
print('矩阵A:\n',A)
print("A的转置:\n",A.T)
#reshape给矩阵整形
print(A.reshape(2,1))
#print('B:\n',B)

B = mat([[1,2],[2,3]])
print(B.reshape(1,4))
print("B的逆:\n",inv(B))
print("矩阵B:\n",B)
print("B的第一行:\n",B[0,:])
print("B的第一列:\n",B[:,0])
#A 1*2 B 2*2
print('A和B点乘',dot(A,B))
print("B点乘A的转置:\b",dot(B,A.T))

 实现最小二乘法

#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy import  dot
from numpy import  mat

#y=2x
X = mat([1,2,3]).reshape(3,1)
Y = 2*X

#theta = (X'X)^-1X'Y

theta = dot(dot(inv(dot(X.T,X)),X.T),Y)
print (theta)

 实现梯度下降

#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy import  dot
from numpy import  mat

#y=2x
X = mat([1,2,3]).reshape(3,1)
Y = 2*X

#theta = theta - alpha*(theta*X -Y)*X

theta = 1.
alpha = 0.1
for i in range(100):
    theta = theta + np.sum(alpha * (Y - dot(X, theta))*X.reshape(1,3))/3.

print (theta)

回归分析实战

随机数据生成代码,生成data.csv文件


import random
def Y(X1, X2, X3):
 return 0.65 * X1 + 0.70 * X2 - 0.55 * X3 + 1.95
def Produce():
 filename = 'data.csv'
 with open(filename, 'w') as file:
  file.write('X1,X2,X3,Y\n')
  for i in range(200):
   random.seed()
   x1 = random.random() * 10
   x2 = random.random() * 10
   x3 = random.random() * 10
   y = Y(x1, x2, x3)
   try:
    file.write(str(x1) + ',' + str(x2) + ',' + str(x3) + ',' + str(y) + '\n')
   except Exception as e:
    print ('Write Error')
    print (str(e))

Produce()

分别用最小二乘法和梯度下降法计算theta

#coding=gbk
import numpy as np
from numpy.linalg import inv
from numpy import  dot
from numpy import  mat
import pandas as pd

#获取X和Y向量
dataset = pd.read_csv('data.csv')
#print(dataset)
temp = dataset.iloc[:,0:3]#取数据集中
temp['x0'] = 1#截距

X = temp.iloc[:,[3,0,1,2]]

Y = dataset.iloc[:,3].values.reshape(200,1)
#Y =  dataset.iloc[:,3].values.reshape(200,1)
#print(Y)

#最小二乘法
theta = dot(dot(inv(dot(X.T,X)),X.T),Y)
print("最小二乘法计算theta:\n",theta.reshape(4,1))

#梯度下降法
theta = np.array([1.,1.,1.,1.]).reshape(4,1)
alpha = 0.02#学习率
temp = theta #缓存,一块同步更新,很重要
X0 = X.iloc[:,0].values.reshape(len(Y),1)
X1 = X.iloc[:,1].values.reshape(len(Y),1)
X2 = X.iloc[:,2].values.reshape(len(Y),1)
X3 = X.iloc[:,3].values.reshape(len(Y),1)

for i in range(10000):
    temp[0] = theta[0] + alpha*np.sum((Y-dot(X,theta))*X0)/200.
    temp[1] = theta[1] + alpha*np.sum((Y-dot(X,theta))*X1)/200.
    temp[2] = theta[2] + alpha*np.sum((Y-dot(X,theta))*X2)/200.
    temp[3] = theta[3] + alpha*np.sum((Y-dot(X,theta))*X3)/200.
    theta = temp
print ("梯度下降法计算theta:",theta)

运行结果

猜你喜欢

转载自blog.csdn.net/weixin_38199770/article/details/81113993