人工智能与深度学习实例之深度神经网络

从网上找的训练集和测试集,代码如下:

import numpy as np
import h5py
from scipy.misc import imread

# ------------------------DDN神经网络框架------------------------

# sigmoid 正向激活函数
def sigmoid(Z):
    
    # 由线性单元导入参数 Z
    # 导出 cache 存储Z值
    
    A = 1.0 / (1 + np.exp(-Z))
    cache = Z
    
    return A, cache


# ReLu(Z) 正向激活函数
def ReLU(Z):
    A = np.maximum(0,Z)
    
    assert(A.shape == Z.shape)
    cache = Z
    
    return A, cache


# sigmoid 反向激活函数
def sigmoid_backward(dA, cache):
    
    # dZ[l] = dA[l] * g[l]'( Z[l] )
    
    Z = cache
    
    f = 1.0 / (1 + np.exp(-Z))
    
    dZ = dA * f * (1-f)
    
    assert(dZ.shape == Z.shape)
    
    return dZ
    

# ReLU 反向激活函数
def ReLU_backward(dA, cache):
    Z = cache
    
    dZ = np.array(dA,copy = True)
    dZ[Z <= 0] = 0
    
    assert(dZ.shape == Z.shape)
    
    return dZ

# ------------------------正向传播------------------------

# 初始化隐藏层至输出层参数
def initialize_parameters(layer):
    # layer -- 神经网络各层层数
    
    L = len(layer)
    parameters = {}
    
    # 当前矩阵 W 的维度:  行 -- 当前神经元数目   列 -- 上层神经元数目
    # 当前矩阵 b 的维度:  行 -- 当前神经元数目   列 -- 始终为 1
    for i in range(1, L):
        parameters["W" + str(i)] = np.random.randn(layer[i], layer[i-1])/np.sqrt(layer[i-1])
        parameters["b" + str(i)] = np.zeros((layer[i], 1))
        
        assert (parameters["W" + str(i)].shape == (layer[i], layer[i-1]))
        assert (parameters["b" + str(i)].shape == (layer[i], 1))
        
    return parameters


# 测试代码   
'''
initialize_parameters([3,4,1])
'''


# 正向传播过程线性
def linear_forward(A_prev, W, b):
    
    # A_prev -- 上一层的传递参数 A
    # W,b -- 当前层的参数
    
    # Z = W*A + b
    Z = np.dot(W, A_prev) + b
    assert (Z.shape == (W.shape[0], A_prev.shape[1]))
    
    # 存储 A_prev W b
    cache = (A_prev, W, b)
    
    return Z, cache


# 测试代码
'''
A_prev = np.array([[1], [2], [3]])
parameters = initialize_parameters([3, 4, 1])
Z, cache = linear_forward(A_prev, parameters["W1"], parameters["b1"])
'''

# 正向传播 -- 线性->激活
def linear_forward_activation(A_prev, W, b, activation_function):
    
    # linear_cache -- 存储 A_prev, W, b
    # activation_cache -- 存储 Z
    
    if activation_function == "sigmoid":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = sigmoid(Z)
    elif activation_function == "relu":
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = ReLU(Z)
    
    # 当前层 A 的维度: 行 -- W矩阵行   列 -- A_prev矩阵列
    assert (A.shape == (W.shape[0], A_prev.shape[1]))
    
    cache = (linear_cache, activation_cache)
    
    return A, cache


# 正向总体传播
def deep_forward(X, parameters):
    
    # caches -- 存储隐藏层、输出层 A_prev, W, b
    # AL -- 输出层输出 A
    
    caches = []
    A = X
    L = len(parameters) // 2
    
    # 隐藏层:对应激活函数 -- ReLU
    for i in range(1, L):
        A_prev = A
        A, cache = linear_forward_activation(A_prev, parameters["W" + str(i)], parameters["b" + str(i)], "relu")
        caches.append(cache)
        
    # 输出层:对应激活函数 -- sigmoid
    # W = parameters["W"+str(length)]
    # b = parameters["b"+str(length)]
    AL, cache = linear_forward_activation(A, parameters["W" + str(L)], parameters["b" + str(L)], "sigmoid")
    caches.append(cache)
    
    assert(AL.shape == (1,X.shape[1]))
    
    return AL, caches


# 成本函数(cost function)
def compute_cost(AL, Y):
    
    # Y -- 测试集真实值
    # AL -- 正向传播所得预测值 Y(hat)
    
    m = Y.shape[1]
    cost = (1./m) * (-np.dot(Y,np.log(AL).T) - np.dot(1-Y,np.log(1-AL).T))  
    cost = np.squeeze(cost)
    assert (cost.shape == ())
    
    return cost


Y = np.asarray([[1,1,1]])  
AL = np.asarray([[.8,.9,0.4]])  
cost = compute_cost(AL,Y)

# 测试代码
'''
X = np.array([[1],[2],[3],[4],[5]])
parameters = initialize_parameters([5,4,3,1])
AL, caches = deep_forward(X, parameters)
Y = np.array([[2]])
print(compute_cost(AL, Y))
'''

# ------------------------反向传播------------------------


# 反向传播线性传播
def linear_backward(dZ, cache):
    
    A_prev, W, b = cache
    
    # 原理
    '''
    dW[l] = dZ[l]*A_prev
    db[l] = dZ[l
    dA[l-1] = W[l].T * dZ[l]
    '''
    
    #  m -- 样本数目
    m = A_prev.shape[1]
    
    # dW -- 损失函数对当前层 W 梯度
    # db -- 损失函数对当前层 b 梯度
    # dA_prev -- 损失函数对l-1层 A 梯度
    
    dW = np.dot(dZ, A_prev.T) / m
    
    # 保证数组的二维性
    db = np.sum(dZ, axis = 1, keepdims = True) / m
    
    dA_prev = np.dot(W.T, dZ)
    
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    assert (dA_prev.shape == A_prev.shape)
    
    return dA_prev, dW , db


# 测试代码
# 当前层数为 3,上层层数为 1
'''np.random.seed(3)
dZ = np.random.randn(3,1)
A_prev = np.random.randn(1,1)
W = np.random.randn(3,1)
b = np.random.randn(3,1)
linear_cache = (A_prev, W, b)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
'''

# 反向传播 -- 线性->激活
def linear_activation_backward(dA, cache, activation):
    
    # linear_cache -- 存储了A_prev, W, b
    
    linear_cache, activation_cache = cache
    
    if activation == "sigmoid":
        dZ = sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    elif activation == "relu":
        dZ = ReLU_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    
    return dA_prev, dW, db

# 测试代码
'''
np.random.seed(2)
dA = np.random.randn(1,2)
A = np.random.randn(3,2)
W = np.random.randn(1,3)
b = np.random.randn(1,1)
Z = np.random.randn(1,2)
linear_cache = (A,W,b)
activation_cache = Z
cache = (linear_cache,activation_cache)
dA_prev,dW,db = linear_activation_backward(dA, cache, "relu")
'''

# 反向总体传播
def deep_backward(AL, Y, caches):
    
    # AL -- 正向传播输出函数值(预测值)  组成向量
    # Y -- 测试集真实值 组成向量
    grads = {}
    
    #递归神经网络深度
    L = len(caches)
    
    # 统一 Y与 AL维度
    Y = Y.reshape(AL.shape)
    
    # 求解 dA[l]
    dAL = -(np.divide(Y, AL) - np.divide(1 - Y, 1 - AL))
    
    # sigmoid -> ReLU 反向传播
    current_cache = caches[L - 1]
    grads["dA" + str(L)], grads["dW" + str(L)], grads["db" + str(L)] = linear_activation_backward(dAL, current_cache, "sigmoid")
    
    # 隐藏层间的反向传播, 同时记录各梯度数据以供参数更新使用
    for i in reversed(range(L - 1)):
        current_cache = caches[i]
        dA_prev, dW, db = linear_activation_backward(grads["dA" + str(i + 2)], current_cache, "relu") 
        grads["dA" + str(i+1)] = dA_prev
        grads["dW" + str(i+1)] = dW
        grads["db" + str(i+1)] = db
    
    return grads

# 测试代码
'''
np.random.seed(3)
AL = np.random.randn(1, 2)
Y = np.array([[1, 0]])
A1 = np.random.randn(4,2)
W1 = np.random.randn(3,4)
b1 = np.random.randn(3,1)
Z1 = np.random.randn(3,2)
linear_cache_activation_1 = ((A1, W1, b1), Z1)
A2 = np.random.randn(3,2)
W2 = np.random.randn(1,3)
b2 = np.random.randn(1,1)
Z2 = np.random.randn(1,2)
linear_cache_activation_2 = ( (A2, W2, b2), Z2)
caches = (linear_cache_activation_1, linear_cache_activation_2)
grads = deep_backward(AL, Y, caches)
print(grads)
'''

# 参数更新
def update_parameters(parameters, grads, learning_rate):
    
    # learning_rate: 学习率
    
    L = len(parameters) // 2    #深层神经网络的层数
    
    # 循环更新 parameters 参数
    for i in range(L):  
        parameters["W"+str(i + 1)] = parameters["W" + str(i + 1)] - learning_rate * grads["dW"+str(i + 1)]  
        parameters["b"+str(i + 1)] = parameters["b" + str(i + 1)] - learning_rate * grads["db"+str(i + 1)]  
  
    return parameters


# 总体传播测试代码
'''
X = np.array([[1],[2],[3],[4],[5]])
parameters = initialize_parameters([5,4,3,1])
print("parameters:{}".format(parameters))
AL, caches = deep_forward(X, parameters)
Y = np.array([[2]])
grads = deep_backward(AL, Y, caches)
learning_rate = 0.05
parameters_update = update_parameters(parameters, grads, 2)
print("parameters_update:{}".format(parameters_update))
'''

#------------------------实际应用------------------------

# 加载数据集
def load_dataset():  
    train_dataset = h5py.File('train_catvnoncat.h5', "r")
    
    # 训练集像素矩阵  
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])
    
    # 训练集标签
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])  
  
    test_dataset = h5py.File('test_catvnoncat.h5', "r")
    # 测试像素矩阵  
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])
    
    #测试集标签
    test_set_y_orig = np.array(test_dataset["test_set_y"][:]) 
  
    # 类型列表
    classes = np.array(test_dataset["list_classes"][:]) 
  
    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))  
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))  
  
    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes  


# 预测神经网络结果
def predict(X, y, parameters):
    
    # X -- 输入数据集
    # y -- 真实值组成向量
    
    # 样本数目集
    number = X.shape[1]
    
    # 神经网络层数
    
    p = np.zeros((1,number))
    
    AL, caches = deep_forward(X, parameters)
    
    # 循环对预测值进行评估:
    for i in range(0, AL.shape[1]):
        if AL[0, i] > 0.5:
            p[0, i] = 1
        else:
            p[0, i] = 0
    
    # 计算预测正确所占比例        
    print("Accuracy: " + str(np.sum((p == y) /number )))
    
    return p

def L_layer_model(X, Y, layer, iterations = 1000, learning_rate = 0.0075):
    costs = []
    
    parameters = initialize_parameters(layer)
    
    # 一次传播过程
    for i in range(0,iterations):
        AL, caches = deep_forward(X, parameters)
        
        cost = compute_cost(AL, Y)
        
        grads = deep_backward(AL, Y, caches)
        
        parameters = update_parameters(parameters, grads, learning_rate)
        
        # 每循环50次记录
        if  i % 100 == 0:
            print ("Cost after iteration %i: %f" %(i, cost))
        if  i % 100 == 0:
            costs.append(cost)
    return parameters
    
# ---------main函数---------

train_x_orig, train_y, test_x_orig, test_y, classes = load_dataset()

# 存储数据集信息
m_train = train_x_orig.shape[0]  
num_px = train_x_orig.shape[1]  
m_test = test_x_orig.shape[0]

# 打印数据集信息
print("Number of training examples:" + str(m_train))  
print("Number of testing examples:" + str(m_test))  
print("Each image of size:(" + str(num_px) + "," + str(num_px) + ",3)")  
print("train_x_orig shape:" + str(train_x_orig.shape))  
print("train_y shape:" + str(train_y.shape))  
print("test_x_orig shape:" + str(test_x_orig.shape))  
print("test_y shape:" + str(test_y.shape))


# 测试集图片矩阵转换成向量形式
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0], -1).T
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0], -1).T

# 数据集标准化(使数据集中在 0-1 间)
train_x = train_x_flatten / 255
test_x = test_x_flatten / 255

print("train_x's shape:" + str(train_x.shape))
print("test_x's shape:" + str(test_x.shape)) 


# 神经网络层数
layer = [12288, 20, 7, 5, 1]

parameters = L_layer_model(train_x, train_y, layer , iterations = 2500)

# 训练集正确率
print("train:")
predictions_train = predict(train_x,train_y,parameters)

# 测试集正确率
print("test:")
predictions_test = predict(test_x,test_y,parameters)

# 应用阶段图片测试
image =  np.array(imread("image.jpg"))
my_image = image.reshape([-1,1])

# 图片真实分类[0 or 1]
my_label_y = [0]

# 图片信息预测并打印
print("my_image:")
my_predicted_image = predict(my_image, my_label_y, parameters)
print("result:")
print ("y = " + str(np.squeeze(my_predicted_image)) + " " +classes[int(np.squeeze(my_predicted_image)),].decode("utf-8")) 

猜你喜欢

转载自blog.csdn.net/adorkable_thief/article/details/80333351