使用minibatch的方式进行梯度下降
项目 | 内容 |
---|---|
课程内容 | 2019人工智能实战 |
作业要求 | 第三次作业 |
课程目标 | 使用minibatch的方式进行梯度下降 |
本次作业的帮助 | 了解了batch长度和不同的选取方式对loss值下降速率的影响 |
我的GitHub主页 | Simon.Tuan |
- 完整代码
import numpy as np
import matplotlib.pyplot as plt
from pathlib import Path
x_data_name = "D:/Desktop/TemperatureControlXData.dat"
y_data_name = "D:/Desktop/TemperatureControlYData.dat"
class CData(object):
def __init__(self, loss, w, b, epoch, iteration):
self.loss = loss
self.w = w
self.b = b
self.epoch = epoch
self.iteration = iteration
def ReadData():
Xfile = Path(x_data_name)
Yfile = Path(y_data_name)
if Xfile.exists() & Yfile.exists():
X = np.load(Xfile)
Y = np.load(Yfile)
return X.reshape(1, -1), Y.reshape(1, -1)
else:
return None, None
def ForwardCalculationBatch(W, B, batch_x):
Z = np.dot(W, batch_x) + B
return Z
def BackPropagationBatch(batch_x, batch_y, batch_z):
m = batch_x.shape[1]
dZ = batch_z - batch_y
dB = dZ.sum(axis=1, keepdims=True) / m
dW = np.dot(dZ, batch_x.T) / m
return dW, dB
def UpdateWeights(w, b, dW, dB, eta):
w = w - eta * dW
b = b - eta * dB
return w, b
def InitialWeights(num_input, num_output):
W = np.random.normal(size=(num_output, num_input))
B = np.zeros((num_output, 1))
return W, B
def CheckLoss(W, B, X, Y):
m = X.shape[1]
Z = np.dot(W, X) + B
LOSS = (Z - Y) ** 2
loss = LOSS.sum() / m / 2
return loss
def GetBatchSamples(X,Y,batch_size,iteration):
num_feature = X.shape[0]
sequence = np.arange(0, X.shape[1])
newSequence = np.random.permutation(sequence)
batch_x = X[0:num_feature, newSequence[0: batch_size]].reshape((num_feature, batch_size), order='C')
batch_y = Y[0:num_feature, newSequence[0: batch_size]].reshape((num_feature, batch_size), order='C')
return batch_x, batch_y
def GetMinimalLossData(dict_loss):
key = sorted(dict_loss.keys())[0]
w = dict_loss[key].w
b = dict_loss[key].b
return w, b, dict_loss[key]
if __name__ == '__main__':
eta = 0.05
max_epoch = 100
b_size = [5, 10, 15]
i = 0
while (i < 3):
batch_size = b_size[i]
W, B = InitialWeights(1, 1)
dict_loss = {}
# read data
X, Y = ReadData()
# count of samples
num_example = X.shape[1]
num_feature = X.shape[0]
max_iteration = (int)(num_example / batch_size)
for epoch in range(max_epoch):
print("epoch=%d" % epoch)
for iteration in range(max_iteration):
batch_x, batch_y = GetBatchSamples(X, Y, batch_size, iteration)
# get z from x,y
batch_z = ForwardCalculationBatch(W, B, batch_x)
# calculate gradient of w and b
dW, dB = BackPropagationBatch(batch_x, batch_y, batch_z)
# update w,b
W, B = UpdateWeights(W, B, dW, dB, eta)
# calculate loss for this batch
loss = CheckLoss(W, B, X, Y)
print(epoch, iteration, loss, W, B)
prev_loss = loss
dict_loss[loss] = CData(loss, W, B, epoch, iteration)
# end for
# end for
color = ['red', 'black', 'blue']
loss = []
for key in dict_loss:
loss.append(key)
plt.plot(loss, color=color[i], label='batchsize=' + str(b_size[i]))
i = i + 1
plt.ylabel('loss')
plt.xlabel('epoch')
plt.xlim(0,850)
plt.ylim(0.0045,0.01)
plt.show()
- 主要修改及注释
def GetBatchSamples(X,Y,batch_size,iteration):
# 获取行数
num_feature = X.shape[0]
# 生成与列数相同长度的自然数数组
sequence = np.arange(0, X.shape[1])
# 对该数组进行随机乱序并将结果储存在一个新数组中
newSequence = np.random.permutation(sequence)
# 得到x变量的batch
batch_x = X[0:num_feature, newSequence[0: batch_size]].reshape((num_feature, batch_size), order='C')
# 得到对应y变量的batch
batch_y = Y[0:num_feature, newSequence[0: batch_size]].reshape((num_feature, batch_size), order='C')
return batch_x, batch_y
运行结果
*图中红黑蓝线分别代表单词采样数为5,10,15- 复习内容
- 为什么是椭圆而不是圆?如何把这个图变成一个圆?
因为w与b对loss的影响程度是不同的,课件中的图片相当于该曲面在两个方向上的截面,也可以很容易看出loss值在两个方向上的变化并不是对称的。
构造w与b的值对z影响相同的线性函数即可,形如$z = wb*x + wb$ - 为什么中心是个椭圆区域而不是一个点?
因为绘图时取的步长是离散的,很难保证最后所处的位置正好在曲面顶点,如果不在顶点,其节面必然不是一个点。