# 首先观察一下数据的分布
import pandas as pd
# 读取数据集
df = pd.read_csv('./data.csv')
df.head(5)
x | and | |
---|---|---|
0 | 32.502345 | 31.707006 |
1 | 53.426804 | 68.777596 |
2 | 61.530358 | 62.562382 |
3 | 47.475640 | 71.546632 |
4 | 59.813208 | 87.230925 |
import matplotlib.pyplot as plt
import numpy as np
%matplotlib inline
# 利用这些点在平面坐标上进行绘图,将数据样本转化成数组
points = np.array(df)
# 提取列表中的第一列为x的值
X = points[:,0]
# 提取列表中的第二列为y的值
Y = points[:,1]
# 绘制散点图
plt.scatter(X,Y)
<matplotlib.collections.PathCollection at 0x1cf2d5e2448>
The data sample is shown in the figure above, and then use the gradient descent method to fit a straight line to meet the distribution of the sub-sample
# 思路:列出损失函数 loss = ∑i(wxi + b - yi)**2
# 对于每一轮的迭代都会更新参数w与b,更新方程为:w' = w - lr*(dloss/dw)
# 一次梯读下降对w,b进行更新
b_start = 0
w_start = 0
iteration = 900
learning_rate = 0.0001
N = len(points)
# 一步的梯度下降,对b,w进行更新并返回
def Step_GradientDescent(b,w):
lr = learning_rate
# 求b,w的偏导数值
for i in range(0,len(points)):
dw = 2*X[i]*(w*X[i]+b-Y[i])/N
db = 2*(w*X[i]+b-Y[i])/N
# 进行更新
w_updata = w - lr*dw
b_updata = b - lr*db
return b_updata,w_updata
# 实现梯度下降算法
def Total_GradientDescent(b,w):
iteration_times = iteration
for i in range(0,iteration_times):
b,w = Step_GradientDescent(b,w)
return b,w
# 计算方差
def Deviation_value(b,w):
loss = 0
for i in range(0,len(points)):
loss += (w*X[i]+b-Y[i])**2
return loss
def main():
b = b_start
w = w_start
print("Before__b:{0},w:{1},Deviation value:{2}".format(b,w,Deviation_value(b,w)))
b,w = Total_GradientDescent(b,w)
print("After__b:{0},w:{1},Deviation value:{2}".format(b,w,Deviation_value(b,w)))
return b,w
b,w = main()
It can be seen that the training results are obtained
Before__b:0,w:0,Deviation value:556510.7834490552
After__b:0.0574654741275659,w:1.4440202845195163,Deviation value:11557.105373357175
# 根据以上结果,绘制出一条直线,与前述的散点图进行拟合
# axline((0, 0), (1, 1), linewidth=4, color='r')
plt.subplot(111)
plt.scatter(X,Y)
plt.axline((0,0.0574654741275659),slope=1.4440202845195163,linewidth=2, color='blue')
<matplotlib.lines._AxLine at 0x1cf2deeeb48>
This result is ideal. Now, if you slightly change the learning rate and the number of iterations, the loss will be too large, as shown in the following figure:
learning_rate=0.0001,iteration=1100
learning_rate=0.0001,iteration=1000