Deep Learning Notes 01 -- ディープ ニューラル ネットワーク モデルのコード実装
このブログシリーズはディープラーニングの学習過程を記録するために使用されます
1. ニューラルネットワークの基本モデル
A 0 = X = { x 11 ⋯ xn 1 x 12 ⋯ xn 2 x 13 ⋯ xn 3 } W 1 = { w 111 w 112 w 113 w 121 w 122 w 123 w 131 w 132 w 133 } B 1 = { b 11 b 12 b 13 } A0=X= \left\{ \begin{行列} x11& \cdots & xn1\\ x12& \cdots & xn2\\ x13& \cdots & xn3 \end{行列} \right\} W1=\ left\{ \begin{行列} w111& w112 & w113\\ w121& w122 & w123\\ w131& w132 & w133\\ \end{行列} \right\} B1=\left\{ \begin{行列} b11\\ b12 \\ b13\\ \end{行列} \right\}あ0=バツ=⎩⎨⎧× 11 _× 12 _× 1 3⋯⋯⋯× n1 _× n2 _× n3 _⎭⎬⎫W1 _=⎩⎨⎧w 1 1 1w 1 2 1w 1 3 1w 1 1 2w 1 2 2w 1 3 2w 1 1 3w 1 2 3w 1 3 3⎭⎬⎫B1 _=⎩⎨⎧b 1 1b 1 2b 1 3⎭⎬⎫
A 1 = 活性関数 数 ( W 1 X + B 1 ) = { A 111 ⋯ A 1 n 1 A 112 ⋯ A 1 n 2 A 113 ⋯ A 1 n 3 } W 2 = { w 211 w 212 w 213 w 221 w 222 w 223 w 231 w 232 w 233 } B 2 = { b 21 b 22 b 23 } A1=激活関数(W1X+B1)= \left\{ \begin{matrix} A111& \cdots & A1n1\\ A112& \cdots & A1n2\\ A113& \cdots & A1n3 \end{行列} \right\} W2=\left\{ \begin{行列} w211& w212 & w213\\ w221& w222 & w223\\ w231& w232 & w233\\ \ end{行列} \right\} B2=\left\{ \begin{行列} b21\\ b22\\ b23\\ \end{行列} \right\}A1 _=アクティベーション機能( W 1 X+B1 )_=⎩⎨⎧あ111 _ _あ112 _ _あ113 _ _⋯⋯⋯A1n1 _ _ _A1n2 _ _ _A1n3 _ _ _⎭⎬⎫W2 _=⎩⎨⎧w 2 1 1w 2 2 1w 2 3 1w 2 1 2w 2 2 2w 2 3 2w 2 1 3w 2 2 3w 2 3 3⎭⎬⎫B2_ _=⎩⎨⎧b 2 1b 2 2b 2 3⎭⎬⎫
A 2 = 活性関数 数 ( W 2 A 1 + B 2 ) = { A 211 ⋯ A 2 n 1 A 212 ⋯ A 2 n 2 A 213 ⋯ A 2 n 3 } W 3 = { w 311 w 312 w 313 w 321 w 322 w 323 w 331 w 332 w 333 } B 3 = { b 31 b 32 b 33 } A2=激活関数(W2A1+B2)= \left\{ \begin{matrix} A211& \cdots & A2n1\\ A212& \cdots & A2n2\\ A213& \cdots & A2n3 \end{行列} \right\} W3=\left\{ \begin{行列} w311& w312 & w313\\ w321& w322 & w323\\ w331& w332 & w333\\ \end{行列} \right\} B3=\left\{ \begin{行列} b31\\ b32\\ b33\\ \end{行列} \right\}A2 _=アクティベーション機能(W2A1 )_+B2 )_=⎩⎨⎧A 2 1 1A 2 1 2A 2 1 3⋯⋯⋯A2n1 _ _ _A2n2 _ _ _A2n3 _ _ _⎭⎬⎫W3 _=⎩⎨⎧w 3 1 1w 3 2 1w 3 3 1w 3 1 2w 3 2 2w 3 3 2w 3 1 3w 3 2 3w 3 3 3⎭⎬⎫B3 _=⎩⎨⎧b 3 1b 3 2b 3 3⎭⎬⎫
A 3 = 活性関数 数 ( W 3 A 2 + B 3 ) = { A 311 ⋯ A 3 n 1 A 312 ⋯ A 3 n 2 A 313 ⋯ A 3 n 3 } W 4 = { w 311 w 312 w 313 w 321 w 322 w 323 } B 4 = { b 31 b 32 } A3=激活関数数(W3A2+B3)= \left\{ \begin{matrix} A311& \cdots & A3n1\\ A312& \cdots & A3n2\\ A313& \cdots & A3n3\\ \end{行列} \right\} W4=\left\{ \begin{行列} w311& w312 & w313\\ w321& w322 & w323\\ \end{行列} \right\} B4=\左\{ \begin{行列} b31\\ b32\\ \end{行列} \right\}A3 _=アクティベーション機能(W3A2 )_+B3 )_=⎩⎨⎧A 3 1 1A 3 1 2A 3 1 3⋯⋯⋯A3n1 _ _ _A3n2 _ _ _A3n3 _ _ _⎭⎬⎫W4 _={
w 3 1 1w 3 2 1w 3 1 2w 3 2 2w 3 1 3w 3 2 3}B4 _={
b 3 1b 3 2}
関数
YP = A 4 = W 4 A 3 + B 4 ) = { A 411 ⋯ A 4 n 1 A 412 ⋯ A 4 n 2 } = { YP 11 ⋯ YP n 1 YP 12 ⋯ YP n 2 } YP =A4=寸法(W4A3+B4)=\left\{\begin{行列}A411&\cdots&A4n1\\A412&\cdots&A4n2\\\end{行列}\right\}=\left\ {\begin{行列}YP11&\ cdots&YPn1\\YP12&\cdots&YPn2\\end{行列}\right\}YP _=A4 _=アクティベーション機能(W4A3 )_+B4 )_={
A 4 1 1A 4 1 2⋯⋯A4n1A4n2 _ _ _}={
Y P 1 1YP12 _ _ _⋯⋯YPn1 _ _ _YPn2 _ _ _}
J = 1 m ∑ i = 1 n 損失 ( YP , Y ) J = \frac{1}{m}\sum_{i=1}^{n}損失(YP,Y)J=メートル1i = 1∑ん損失( YP 、_ _ _ _Y )
d YP = ∂ J ∂ YP , d Z 4 ( Z 4 = W 4 A 3 + B 4 ) = d YP ∗ ∂ 活性化関数 ∂ Z 4 , d W 4 = d Z 4 ∗ A 3 , d B 4 = d Z 4 , d A 3 = d Z 4 ∗ W 4 dYP=\frac{\部分J}{\部分YP},dZ4(Z4=W4A3+B4)=dYP*\frac{\部分活性化関数}{ \partial Z4}、dW4 = dZ4*A3、dB4 = dZ4、dA3=dZ4*W4dYP _ _=∂YP _ _∂ J、dZ4 (ゼット4 )_=W4A3 _ _ _+B4 )_=dYP _ _∗∂Z4 _ _∂活性化関数、d W 4=dZ4 _ _∗A3 、dB4 _ _ _=dZ4 、dA3 _ _ _ _=dZ4 _ _∗W4 _
d Z 3 ( Z 3 = W 3 A 2 + B 3 ) = d A 3 ∗ ∂ 活性化関数 ∂ Z 3 、d W 3 = d Z 3 ∗ A 2 、d B 3 = d Z 3 d A 2 = d Z 3 ∗ W 3 dZ3(Z3=W3A2+B3)=dA3*\frac{\部分活性化関数}{\部分 Z3}, dW3 =dZ3*A2, dB3 = dZ3 dA2=dZ3*W3dZ3 (ゼット3 )。=W3A2 _ _ _+B3 )_=dA3 _ _∗∂Z3 _ _∂活性化関数,dW3 _ _=dZ3 _ _∗A2 、dB3 _ _ _=dZ3dA2 _ _ _ _ _=dZ3 _ _∗W3 ._
_ 。。。。。。。。。……。。。。。。。。。。
B = B − α ∗ d B 、W = W − α ∗ d WB = B- α*dB、W=W-α*dWB=B−ある∗d B 、W=W−ある∗dW _
2. コードの実装
import numpy as np
import random
class Neuron_Net:
"""
神经网络的代码实现
"""
def __init__(self):
self.alfa = 0.01
self.layer_shape = []
self.W = []
self.dW = []
self.B = []
self.dB = []
self.Z = []
self.dZ = []
self.A = [0]
self.dA = [0]
self.activity_infos = []
self.dropout = []
self.loss_fuc = None
self.debug = False
self.print_accuracy = True
self.accuracy_error = 0.5
def addLayer(self, input_dim: int, output_dim: int, activity_info, dropout=1, W_init=1):
"""
:param input_dim: 输入维度
:param output_dim: 输出维度
:param activity_info: 激励函数信息字典{main_func:(Z)->return f(Z),dA_dZ: (Z)->return dA_dZ}
:param dropout: 设置正则化参数 1为不设定
:param W_init: 设定W的初始参数值
:return: None
"""
# 初始化W
self.W.append(np.random.randn(output_dim, input_dim) * np.sqrt(W_init / input_dim))
self.B.append(np.zeros((output_dim, 1)))
self.activity_infos.append(activity_info)
self.layer_shape.append((input_dim, output_dim))
self.dropout.append(dropout)
# 在存储信息的列表中开创位置
self.A.append(0)
self.dA.append(0)
self.dW.append(0)
self.dB.append(0)
self.Z.append(0)
self.dZ.append(0)
def compile(self, loss_fuc, alfa, debug=False, print_accuracy=True, accuracy_error=0.5):
"""
:param loss_fuc: 损失函数 (An,Y,accuracy_error)->return dAn,Loss,accuracy
:param alfa: 学习率
:param debug: 是否打开debug模式 未实现
:param print_accuracy: 是否打印准确度
:param accuracy_error: 预测值误差范围
"""
for index, item in enumerate(self.layer_shape):
if not self.W[index].shape[1] == item[0]:
raise Exception('W[', index, ']列维度错误!')
if not self.B[index].shape[0] == item[1]:
raise Exception('B[', index, ']行维度错误!')
if index > 0 and not self.layer_shape[index - 1][1] == item[0]:
raise Exception('第', index - 1, '层的输出不等于第', index, '层的输入')
self.loss_fuc = loss_fuc
self.debug = debug
self.print_accuracy = print_accuracy
self.accuracy_error = accuracy_error
self.alfa = alfa
def train(self, X, Y, epoch):
"""
训练神经模型
:param X: 训练集
:param Y: 标签集
:param epoch: 迭代次数
"""
self.A[0] = X
for i in range(epoch):
print('第', i + 1, '次')
# 前向传播
for index, item in enumerate(self.layer_shape):
Z = np.dot(self.W[index], self.A[index]) + self.B[index]
self.A[index + 1] = self.activity_infos[index]['main_func'](Z)
self.Z[index] = Z
# Dropout正则化
d = np.random.rand(self.A[index + 1].shape[0], self.A[index + 1].shape[1]) <= self.dropout[index]
self.A[index + 1] = np.multiply(self.A[index + 1], d)
self.A[index + 1] /= self.dropout[index]
dAn, Loss, accuracy = self.loss_fuc(self.A[-1], Y, self.accuracy_error)
# 反向传播
# 反向传播参数的存放顺序相反 如dA=[dAn,dA(n-1),....,dA0]
self.dA[0] = dAn # dA为倒序
for index in range(len(self.layer_shape)):
# 倒序插入dZ
self.dZ[index] = np.multiply(self.dA[index], self.activity_infos[index]['dA/dZ'](self.Z[-index - 1]))
dW = np.dot(self.dZ[index], self.A[-index - 2].T)
self.dW[index] = dW / self.A[-index - 2].shape[1]
self.dB[index] = self.dZ[index].sum(axis=1, keepdims=True) / self.A[-index - 2].shape[1]
self.dA[index + 1] = np.dot(self.W[-index - 1].T, self.dZ[index])
self.update_W_B()
if self.print_accuracy:
print('accuracy', accuracy)
print('Loss:', Loss)
def update_W_B(self):
# 更新W和B
for index in range(len(self.W)):
self.W[index] -= self.alfa * self.dW[-index - 1]
self.B[index] -= self.alfa * self.dB[-index - 1]
def predict(self, X):
# 预测
temp_arr = [X, 0]
for index in range(len(self.layer_shape)):
temp_arr[1] = np.dot(self.W[index], temp_arr[0]) + self.B[index]
temp_arr[0] = self.activity_infos[index]['main_func'](temp_arr[1])
return temp_arr[0]
活性化関数と損失関数
def activity_func(a):
# simgoid函数
yp = 1 / (1 + np.exp(-1 * a))
return yp
def dA_dZ(Z):
# 返回simgoid函数的dA/dZ
return np.multiply((1 / (1 + np.exp(-1 * Z))), (1 - (1 / (1 + np.exp(-1 * Z)))))
def loss_func(YP, Y, error):
size = YP.shape[1] # 得到YP的数量
Y_arr = Y.tolist()
YP_arr = YP.tolist()
# 避免后续求dA时出现除0现象
temp = np.where(YP == 1)
for index, item in enumerate(temp[0]):
YP[item, temp[1][index]] = 0.999999999999
dA = -(np.true_divide(Y, YP) + np.true_divide((Y - 1), (1 - YP)))
loss_part1 = np.dot(Y, np.log(YP).T)
loss_part2 = np.dot(1 - Y, np.log(1 - YP).T)
loss = -(loss_part1 + loss_part2)
count = 0
for index, item in enumerate(Y_arr[0]):
if abs(item - YP_arr[0][index]) < error:
count += 1
accuracy = count / size
return dA, float(loss) / size if not np.isnan(float(loss) / size) else 0, accuracy
def relu(z):
return np.multiply(z, z > 0)
def rele_dA_dZ(z):
return z > 0
テストステートメント
if __name__ == '__main__':
X_list = []
for i in range(200):
if i < 100:
temp1 = random.random() * 100
temp2 = 100 + random.random() * 100
else:
temp1 = 100 + random.random() * 100
temp2 = random.random() * 100
X_list.append([temp1, temp2])
Y_list = []
for i in range(200):
Y_list.append(1 if i < 100 else 0)
W = np.array([[0, 0]]).astype('float64').reshape(1, 2)
B = 0
X = np.array(X_list).T
Y = np.array(Y_list).reshape(1, 200)
alfa = 0.01
net = Neuron_Net()
activity_info = {
'main_func': activity_func, 'dA/dZ': dA_dZ}
relu_info = {
'main_func': relu, 'dA/dZ': rele_dA_dZ}
net.addLayer(2, 3, activity_info)
net.addLayer(3, 1, activity_info)
net.compile(loss_func, alfa, accuracy_error=0.5)
net.train(X, Y, 500)