[Deep Learning] Andrew Ng - Homework - Building multi-layer neural networks and their applications

In fact, I finished listening to Ng's deep learning in the past few months, and I did all the homework after class. I also typed the code according to various references, but I didn't study much during the summer vacation. . . I have basically forgotten it. In the past few weeks, I have reviewed the notes of the deep learning course, read the summaries of other bloggers, and reviewed network structures such as CNN, RNN, LSTM, and attention mechanisms. Although it feels like I am very impatient, and I am also learning the algorithms related to integrated learning and recommendation systems. I also tell myself here: I can’t chew off more than I can chew, and I can’t eat hot tofu in a hurry. Take your time, and you still need to accumulate more.
I plan to slowly review all the deep learning codes and mark them with comments, but I feel that this process will be very slow.

Table of contents

Calculate the loss of forward propagation

Parameter initialization

import numpy as np
import h5py
import matplotlib.pyplot as plt
import testCases 
from dnn_utils import sigmoid, sigmoid_backward, relu, relu_backward 
import lr_utils 

#两层神经网络参数初始化
np.random.seed(1)#随机种子确保结果相同
def initialize_parameters(n_x,n_h,n_y):
    W1 = np.random.randn(n_h, n_x) * 0.01
    b1 = np.zeros((n_h, 1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y, 1))
    
    #使用断言确保数据格式是正确的
    assert(W1.shape == (n_h, n_x))
    assert(b1.shape == (n_h, 1))
    assert(W2.shape == (n_y, n_h))
    assert(b2.shape == (n_y, 1))
    
    parameters = {"W1": W1,
                  "b1": b1,
                  "W2": W2,
                  "b2": b2}
    
    return parameters  

#多层神经网络参数初始化
'''layers_dims - 包含网络中每个层的节点数量的列表
#比如网络有三层，第一层有5个神经元，第二层有3个神经元，第三层有6个神经元
则layers_dims = [5,3,6]，len(layers_dims) = 3'''

def initialize_parameters_deep(layers_dims):
    np.random.seed(3)
    parameters = {}#存放参数的字典
    L = len(layers_dims)#网络的层数
    
    for l in range(1,L):#遍历网络的每一层对参数进行随机初始化
        parameters["W" + str(l)] = np.random.randn(layers_dims[l], layers_dims[l - 1]) / np.sqrt(layers_dims[l - 1])
        parameters["b" + str(l)] = np.zeros((layers_dims[l], 1))
        
        #确保数据的格式正确
        assert(parameters["W" + str(l)].shape == (layers_dims[l], layers_dims[l-1]))
        assert(parameters["b" + str(l)].shape == (layers_dims[l], 1))
        
    return parameters

forward propagation

#线性部分
def linear_forward(A,W,b):
    Z = np.dot(W,A) + b
    
    assert(Z.shape == (W.shape[0],A.shape[1]))
    
    cache = (A,W,b)#存储前向传播得到的参数用于反向传播
     
    return Z,cache

#神经网络每一层的带激活函数的线性部分
def linear_activation_forward(A_prev,W,b,activation):
    
    if activation == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    elif activation == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = relu(Z)
    
    assert(A.shape == (W.shape[0],A_prev.shape[1]))
    
    cache = (linear_cache,activation_cache)#存储前向传播得到的参数以便用于后续的反向传播
    
    return A,cache


#神经网络的前向传播过程
def L_model_forward(X,parameters):
    caches = []
    A = X
    #每层网络有w和b两个参数，整除2来判断神经网络的层数
    #“//”表示整数除法，返回商的整数部分（向下取整）。
    L = len(parameters) // 2

    for l in range(1,L):#遍历网络的每一层进行前向传播
        A_prev = A 
        A, cache = linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], "relu")
        caches.append(cache)
    
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)], "sigmoid")#最后一层
    caches.append(cache)
    
    return AL,caches  #返回最终值AL，即y^,和储存的用于后续反向传播的参数A,W,b

Calculate the loss of forward propagation

#计算前向传播的损失
def compute_cost(AL,Y):#传参要先传AL
    m = Y.shape[1]#样本数量
    #用对数损失函数 log loss  来计算前向传播的损失
    cost = -np.sum(Y * np.log(AL) + (1 - Y) * np.log(1 - AL))/m
    cost = np.squeeze(cost)#将cost变为一维
    return cost

#测试compute_cost
Y,AL = testCases.compute_cost_test_case()
print("cost = " + str(compute_cost(AL, Y)))

Backpropagation

#线性反向传播
def linear_backward(dZ,cache):
    A_prev, W, b = cache  #从cache中取出反向传播所需要的参数
    m = A_prev.shape[1]#样本数量
    dW = np.dot(dZ, A_prev.T) / m #计算梯度
    db = np.sum(dZ, axis=1, keepdims=True) / m
    dA_prev = np.dot(W.T, dZ)
    
    assert (dA_prev.shape == A_prev.shape)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    
    return dA_prev, dW, db

#测试linear_backward

dZ, linear_cache = testCases.linear_backward_test_case()

dA_prev, dW, db = linear_backward(dZ, linear_cache)
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db))


#带激活函数的线性反向传播
def linear_activation_backward(dA,cache,activation="relu"):
    linear_cache, activation_cache = cache
    if activation == "relu":
        dZ = relu_backward(dA, activation_cache)#最前面导入的包里有relu_backward
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev,dW,db


#测试linear_activation_backward
AL, linear_activation_cache = testCases.linear_activation_backward_test_case()
dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "sigmoid")
print ("sigmoid:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db) + "\n")
 
dA_prev, dW, db = linear_activation_backward(AL, linear_activation_cache, activation = "relu")
print ("relu:")
print ("dA_prev = "+ str(dA_prev))
print ("dW = " + str(dW))
print ("db = " + str(db))


#模型的反向传播部分
def L_model_backward(AL,Y,caches):
    grads = {}#储存梯度的空字典
    L = len(caches)#层数，每层有这层的caches，caches数目就是层数
    m = AL.shape[1]#样本数量
    Y = Y.reshape(AL.shape)
    dAL = - (np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    current_cache = caches[L-1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")
    
    for l in reversed(range(L-1)):#遍历L层
        current_cache = caches[l]
        dA_prev_temp, dW_temp, db_temp = linear_activation_backward(grads["dA" + str(l + 2)], current_cache, "relu")
        grads["dA" + str(l + 1)] = dA_prev_temp
        grads["dW" + str(l + 1)] = dW_temp
        grads["db" + str(l + 1)] = db_temp
    
    return grads

#用反向传播的得到的梯度遍历网络的每层进行参数更新
def update_parameters(parameters,grads,alpha):
    
    L = len(parameters)//2
    
    for l in range(L):#遍历到第L-1层，所以下面的层数要（1+l）
        
        parameters['W' + str(l + 1)] = parameters['W' + str(l + 1)] - alpha * grads['dW' + str(l + 1)]
        parameters['b' + str(l + 1)] = parameters['b' + str(l + 1)] - alpha * grads['db' + str(l + 1)]
        
    return parameters

#构建完整的神经网络
def two_layers_model(X,Y,layers_dims,learning_rate=0.0075,num_iterations=3000,print_cost=False,isPlot=True):
  
    np.random.seed(1)
    grads = {}#梯度
    costs = []
    (n_x,n_h,n_y) = layers_dims#模型共三层，每层的单元数目
    

#     初始化参数
    parameters = initialize_parameters(n_x, n_h, n_y)
    
    W1 = parameters["W1"]
    b1 = parameters["b1"]
    W2 = parameters["W2"]
    b2 = parameters["b2"]
    
#     开始进行迭代
    for i in range(0,num_iterations):
        #前向传播
        A1, cache1 = linear_activation_forward(X, W1, b1, "relu")
        A2, cache2 = linear_activation_forward(A1, W2, b2, "sigmoid")
        
        #计算成本
        cost = compute_cost(A2,Y)
        
        #后向传播
        ##初始化后向传播的dA2
        dA2 = - (np.divide(Y, A2) - np.divide(1 - Y, 1 - A2))
        
        ##向后传播，输入：“dA2，cache2，cache1”。 输出：“dA1，dW2，db2;还有dA0（未使用），dW1，db1”。
        dA1, dW2, db2 = linear_activation_backward(dA2, cache2, "sigmoid")
        dA0, dW1, db1 = linear_activation_backward(dA1, cache1, "relu")
        
        ##向后传播完成后的数据保存到grads
        grads["dW1"] = dW1
        grads["db1"] = db1
        grads["dW2"] = dW2
        grads["db2"] = db2
        
        #更新参数
        parameters = update_parameters(parameters,grads,learning_rate)
        W1 = parameters["W1"]
        b1 = parameters["b1"]
        W2 = parameters["W2"]
        b2 = parameters["b2"]
        
        #打印成本值，迭代100次打印一次，如果print_cost=False则忽略
        if i % 100 == 0:
            #记录成本
            costs.append(cost)
            #是否打印成本值
            if print_cost:
                print("第", i ,"次迭代，成本值为：" ,np.squeeze(cost))
                
    #迭代完成，绘制图
    if isPlot:
        plt.plot(np.squeeze(costs))
        plt.ylabel('cost')
        plt.xlabel('iterations (per tens)')
        plt.title("Learning rate =" + str(learning_rate))
        plt.show()
    
    #返回parameters
    return parameters

train_set_x_orig,train_set_y,test_set_x_orig,test_set_y,classes = lr_utils.load_dataset()
train_x_flatten = train_set_x_orig.reshape(train_set_x_orig.shape[0],-1).T
test_x_flatten = test_set_x_orig.reshape(test_set_x_orig.shape[0],-1).T

train_x = train_x_flatten/255
train_y = train_set_y
test_x = test_x_flatten/255
test_y = test_set_y

n_x = 12288
n_h = 7
n_y = 1
layers_dims = (n_x,n_h,n_y)
parameters = two_layers_model(train_x,train_set_y,layers_dims = (n_x,n_h,n_y),num_iterations = 2500,print_cost = True,isPlot = True)

'''第 0 次迭代，成本值为： 0.6930497356599891
第 100 次迭代，成本值为： 0.6464320953428849
第 200 次迭代，成本值为： 0.6325140647912677
第 300 次迭代，成本值为： 0.6015024920354665
第 400 次迭代，成本值为： 0.5601966311605748
第 500 次迭代，成本值为： 0.515830477276473
第 600 次迭代，成本值为： 0.47549013139433266
第 700 次迭代，成本值为： 0.43391631512257495
第 800 次迭代，成本值为： 0.400797753620389
第 900 次迭代，成本值为： 0.3580705011323798
第 1000 次迭代，成本值为： 0.3394281538366412
第 1100 次迭代，成本值为： 0.3052753636196264
第 1200 次迭代，成本值为： 0.2749137728213017
第 1300 次迭代，成本值为： 0.2468176821061485
第 1400 次迭代，成本值为： 0.19850735037466108
第 1500 次迭代，成本值为： 0.174483181125566
第 1600 次迭代，成本值为： 0.17080762978096897
第 1700 次迭代，成本值为： 0.11306524562164709
第 1800 次迭代，成本值为： 0.09629426845937147
第 1900 次迭代，成本值为： 0.08342617959726864
第 2000 次迭代，成本值为： 0.07439078704319083
第 2100 次迭代，成本值为： 0.06630748132267932
第 2200 次迭代，成本值为： 0.05919329501038171
第 2300 次迭代，成本值为： 0.05336140348560558
第 2400 次迭代，成本值为： 0.0485547856287702'''


pred_train = predict(train_x, train_y, parameters) #训练集
pred_test = predict(test_x, test_y, parameters) #测试集

'''准确度为: 0.9952153110047847
准确度为: 0.78
'''

Build multi-layer neural networks and applications

[Deep Learning] Andrew Ng - Homework - Building multi-layer neural networks and their applications

Parameter initialization

forward propagation

Calculate the loss of forward propagation

Backpropagation

Guess you like