如果嫌随机梯度下降的方法还不够快,没事,总会有大神跳出来说还有动量梯度下降。
普通梯度下降,每次调整权重的方法就是梯度乘以学习速率:
而动量梯度下降,不仅考虑了当前的梯度,还用到了之前的梯度:
从公式可以看出,梯度里包含了上一次梯度,权重discount一般设置为0.9。
实践证明,动量梯度下降收敛速度真的很快,代码如下:
'''
普通的全梯度下降方法
'''
import numpy as np
import math
print(__doc__)
sample = 10
num_input = 5
#加入训练数据
np.random.seed(0)
normalRand = np.random.normal(0,0.1,sample) # 10个均值为0方差为0.1 的随机数 (b)
weight = [5,100,-5,-400,0.02] # 1 * 5 权重
x_train = np.random.random((sample, num_input)) #x 数据(10 * 5)
y_train = np.zeros((sample,1)) # y数据(10 * 1)
for i in range (0,len(x_train)):
total = 0
for j in range(0,len(x_train[i])):
total += weight[j]*x_train[i,j]
y_train[i] = total+ normalRand[i]
# 训练
np.random.seed(0)
weight = np.random.random(num_input+1)
np.random.seed(0)
recordGrade = np.random.random(num_input+1)
discount = 0.9
rate = 0.04
for epoch in range(0,500):
# 计算loss
predictY = np.zeros((len(x_train)))
for i in range(0,len(x_train)):
predictY[i] = np.dot(x_train[i],weight[0:num_input])+ weight[num_input]
loss = 0
for i in range(0,len(x_train)):
loss += (predictY[i]-y_train[i])**2
print("epoch: %d-loss: %f"%(epoch,loss)) #打印迭代次数和损失函数
# 计算梯度并更新
for i in range(0,len(weight)-1): #权重w
grade = 0
for j in range(0,len(x_train)):
grade += 2*(predictY[j]-y_train[j])*x_train[j,i]
recordGrade[i] = recordGrade[i]*discount + grade
weight[i] = weight[i] - rate*recordGrade[i]
grade = 0
for j in range(0,len(x_train)): #偏差b
grade += 2*(predictY[j]-y_train[j])
recordGrade[num_input] = recordGrade[num_input]*discount + grade
weight[num_input] = weight[num_input] - rate*recordGrade[num_input]
print(weight)