吴恩达作业3,利用深层神经网络实现小猫的分类

利用4层神经网络实现小猫的分类,小猫训练样本是(209,64*64*3=12288),故输入节点是12288个,隐藏层节点依次为20,7,5,输出层为1。

首先看文件路径,dnn_utils_v2.py代码是激活函数和激活函数导数 载入数据集 打印预测错误照片的代码:

import numpy as np
import matplotlib.pyplot as plt
import h5py

def sigmoid(Z):
    """
    Implements the sigmoid activation in numpy
    
    Arguments:
    Z -- numpy array of any shape
    
    Returns:
    A -- output of sigmoid(z), same shape as Z
    cache -- returns Z as well, useful during backpropagation
    """
    
    A = 1/(1+np.exp(-Z))
    cache = Z
    
    return A, cache

def relu(Z):
    """
    Implement the RELU function.

    Arguments:
    Z -- Output of the linear layer, of any shape

    Returns:
    A -- Post-activation parameter, of the same shape as Z
    cache -- a python dictionary containing "A" ; stored for computing the backward pass efficiently
    """
    
    A = np.maximum(0,Z)
    
    assert(A.shape == Z.shape)
    
    cache = Z 
    return A, cache


def relu_backward(dA, cache):
    """
    Implement the backward propagation for a single RELU unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    Z = cache
    dZ = np.array(dA, copy=True) # just converting dz to a correct object.
    
    # When z <= 0, you should set dz to 0 as well. 
    dZ[Z <= 0] = 0
    
    assert (dZ.shape == Z.shape)
    
    return dZ

def sigmoid_backward(dA, cache):
    """
    Implement the backward propagation for a single SIGMOID unit.

    Arguments:
    dA -- post-activation gradient, of any shape
    cache -- 'Z' where we store for computing backward propagation efficiently

    Returns:
    dZ -- Gradient of the cost with respect to Z
    """
    
    Z = cache
    
    s = 1/(1+np.exp(-Z))
    dZ = dA * s * (1-s)
    
    assert (dZ.shape == Z.shape)
    
    return dZ

"""
载入数据集
"""
def load_data():
    train_dataset = h5py.File('datasets/train_catvnoncat.h5', "r")
    train_set_x_orig = np.array(train_dataset["train_set_x"][:])  # your train set features
    train_set_y_orig = np.array(train_dataset["train_set_y"][:])  # your train set labels

    test_dataset = h5py.File('datasets/test_catvnoncat.h5', "r")
    test_set_x_orig = np.array(test_dataset["test_set_x"][:])  # your test set features
    test_set_y_orig = np.array(test_dataset["test_set_y"][:])  # your test set labels

    classes = np.array(test_dataset["list_classes"][:])  # the list of classes

    train_set_y_orig = train_set_y_orig.reshape((1, train_set_y_orig.shape[0]))
    test_set_y_orig = test_set_y_orig.reshape((1, test_set_y_orig.shape[0]))

    return train_set_x_orig, train_set_y_orig, test_set_x_orig, test_set_y_orig, classes


def print_mislabeled_images(classes, X, y, p):
    """
    Plots images where predictions and truth were different.
    X -- dataset
    y -- true labels
    p -- predictions
    """
    a = p + y
    mislabeled_indices = np.asarray(np.where(a == 1))
    plt.rcParams['figure.figsize'] = (40.0, 40.0)  # set default size of plots
    num_images = len(mislabeled_indices[0])
    for i in range(num_images):
        index = mislabeled_indices[1][i]

        plt.subplot(2, num_images, i + 1)
        plt.imshow(X[:, index].reshape(64, 64, 3), interpolation='nearest')
        plt.axis('off')
        plt.title(
            "Prediction: " + classes[int(p[0, index])].decode("utf-8") + " \n Class: " + classes[y[0, index]].decode(
                "utf-8"))

StartDeepNeural.py是整个模型前向传播和后向传播的代码

import numpy as np
import dnn_utils_v2
def initialize_parameters(n_x,n_h,n_y):
    W1 = np.random.randn(n_h,n_x)*0.01
    b1 = np.zeros((n_h,1))
    W2 = np.random.randn(n_y, n_h) * 0.01
    b2 = np.zeros((n_y,1))
    assert (W1.shape==(n_h,n_x))
    assert (b1.shape == (n_h, 1))
    assert (W2.shape == (n_y, n_h))
    assert (b2.shape == (n_y, 1))
    parameters={'W1': W1,
                'b1': b1,
                'W2': W2,
                'b2': b2}
    return parameters
"""
多层神经网络的参数初始化:注意权重的初始化 为防止梯度爆炸和梯度消失
"""
def initialize_parameters_deep(layer_dims):
    L=len(layer_dims)
    parameters={}
    for i in range(1,L):
        parameters['W'+str(i)] = np.random.randn(layer_dims[i],layer_dims[i-1])*np.sqrt(2.0/layer_dims[i-1])
        parameters['b' + str(i)] = np.zeros((layer_dims[i], 1))
        assert (parameters['W'+str(i)].shape==(layer_dims[i],layer_dims[i-1]))
        assert (parameters['b' + str(i)].shape == (layer_dims[i], 1))
    return parameters
"""
前向传播过程中某一层未加激活函数的操作
"""
def linear_forward(A,W,b):
    Z = np.dot(W,A)+b
    assert(Z.shape==(W.shape[0],A.shape[1]))
    cache=(A,W,b)
    return Z,cache
"""
前向传播过程中 某一层通过激活函数来采取相应的操作
"""
def linear_activation_forward(A_prev,W,b,activation):
    if activation=='sigmoid':
        Z, linear_cache=linear_forward(A_prev,W,b)
        A, activation_cache = dnn_utils_v2.sigmoid(Z)
    elif activation=='relu':
        Z, linear_cache = linear_forward(A_prev, W, b)
        A, activation_cache = dnn_utils_v2.relu(Z)
    assert(A.shape==(W.shape[0],A_prev.shape[1]))
    cache=(linear_cache,activation_cache)###save ( (A W b ),Z) tuple
    return A,cache
"""
整个模型的前向传播过程
"""
def L_model_forward(X,parameters):
    L=len(parameters)//2
    A=X
    caches=[]
    for i in range(1,L):
        A_prev=A
        A, cache=linear_activation_forward(A_prev,parameters['W'+str(i)],parameters['b'+str(i)],
                                           activation='relu')
        caches.append(cache)
    AL, cache = linear_activation_forward(A, parameters['W' + str(L)], parameters['b' + str(L)],
                                          activation='sigmoid')
    caches.append(cache)
    assert(AL.shape==(parameters['W' + str(L)].shape[0],X.shape[1]))
    return AL,caches
"""
计算损失值
"""
def compute_cost(AL,Y):
    m = Y.shape[1]
    cost = (1. / m) * (-np.dot(Y, np.log(AL).T) - np.dot(1 - Y, np.log(1 - AL).T))
    #cost = -1 / m * (np.dot(Y, np.log(AL).T) + np.dot((1 - Y), np.log(1 - AL).T))
    cost = np.squeeze(cost)
    assert (cost.shape==())
    return cost
"""
后向传播过程中某一层未加激活函数的操作
"""
def linear_backward(dZ,cache):
    A_prev,W,b=cache
    m=A_prev.shape[1]
    dW=1/m*np.dot(dZ,A_prev.T)
    db=1/m*np.sum(dZ,axis=1,keepdims=True)
    dA_prev=np.dot(W.T,dZ)
    assert (dW.shape == W.shape)
    assert (db.shape == b.shape)
    assert (dA_prev.shape == A_prev.shape)
    return dA_prev,dW,db
"""
后向传播过程中 某一层通过激活函数来采取相应的操作
"""
def linear_activation_backward(dA,cache,activation):
    linear_cache, activation_cache=cache
    if activation=='relu':
        dZ=dnn_utils_v2.relu_backward(dA, activation_cache)
        dA_prev, dW, db=linear_backward(dZ, linear_cache)
    elif activation=='sigmoid':
        dZ=dnn_utils_v2.sigmoid_backward(dA, activation_cache)
        dA_prev, dW, db = linear_backward(dZ, linear_cache)
    return dA_prev, dW, db
"""
整个模型的后向传播过程
"""
def L_model_backward(AL,Y,caches):
    grads={}
    L=len(caches) ###[ [( (X W1 b1 ),Z1)],[( (A1 W2 b2 ),Z2) ] [( (A2 W3 b3 ),Z3) ]]
    m=AL.shape[1]
    Y=Y.reshape(AL.shape)
    dAL=-np.divide(Y,AL)+np.divide((1-Y),(1-AL))
    current_cache=caches[L-1]
    grads['dA'+str(L)],grads['dW'+str(L)],grads['db'+str(L)]=linear_activation_backward(dAL, current_cache, activation='sigmoid')
    for i in reversed(range(L-1)):
        current_cache = caches[i]
        grads['dA' + str(i+1)], grads['dW' + str(i+1)], grads['db' + str(i+1)]=linear_activation_backward(grads['dA'+str(i+2)], current_cache, activation='relu')
    return grads
"""
更新参数
"""
def update_parameters(parameters,grads,learning_rate):
    L=len(parameters)//2
    for i in range(L):
        parameters['W'+str(i+1)]=parameters['W'+str(i+1)]-learning_rate*grads['dW' + str(i+1)]
        parameters['b' + str(i + 1)] = parameters['b' + str(i + 1)] - learning_rate * grads['db' + str(i + 1)]
    return parameters

DeepNeuralCat_noCat.py就是训练的代码:首先看数据集

import numpy as np
import dnn_utils_v2
import matplotlib.pyplot as plt
import StartDeepNeural
train_x_orig, train_y_orig, test_x_orig, test_y_orig,classa=dnn_utils_v2.load_data()
print('train={}'.format(train_x_orig.shape))
print(train_y_orig.shape)
print(train_y_orig[:,:10])
print(test_x_orig.shape)
print(test_y_orig.shape)
print(classa)
plt.imshow(train_x_orig[0])
plt.show()

打印结果:


可知训练样本为209个,维度为(64,64,3),测试样本为50个,维度为(64,64,3),0代表不是猫

拉成二维向量 对于训练样本(209,64*64*3),测试样本(50,64*64*3)

import numpy as np
import dnn_utils_v2
import matplotlib.pyplot as plt
import StartDeepNeural
train_x_orig, train_y_orig, test_x_orig, test_y_orig,classa=dnn_utils_v2.load_data()
# print('train={}'.format(train_x_orig.shape))
# print(train_y_orig.shape)
# print(train_y_orig[:,:10])
# print(test_x_orig.shape)
# print(test_y_orig.shape)
# print(classa)
# plt.imshow(train_x_orig[0])
# plt.show()
##train
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0],train_x_orig.shape[1] *train_x_orig.shape[2] * 3).T
train_x = train_x_flatten / 255.
#print(train_x.shape)
##test
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],test_x_orig.shape[1] *test_x_orig.shape[2] * 3).T
test_x = test_x_flatten / 255.
#print(test_x.shape)

开始训练,下面代码也写了两层神经网络,只是没有用到而已。

import numpy as np
import dnn_utils_v2
import matplotlib.pyplot as plt
import StartDeepNeural
train_x_orig, train_y_orig, test_x_orig, test_y_orig,classa=dnn_utils_v2.load_data()
# print('train={}'.format(train_x_orig.shape))
# print(train_y_orig.shape)
# print(train_y_orig[:,:10])
# print(test_x_orig.shape)
# print(test_y_orig.shape)
# print(classa)
# plt.imshow(train_x_orig[0])
# plt.show()
##train
train_x_flatten = train_x_orig.reshape(train_x_orig.shape[0],train_x_orig.shape[1] *train_x_orig.shape[2] * 3).T
train_x = train_x_flatten / 255.
#print(train_x.shape)
##test
test_x_flatten = test_x_orig.reshape(test_x_orig.shape[0],test_x_orig.shape[1] *test_x_orig.shape[2] * 3).T
test_x = test_x_flatten / 255.
#print(test_x.shape)

def two_layer_model(X,Y,num_iterations,learning_rate):
    n_x = 12288
    n_h = 7
    n_y = 1
    layer_dims = (n_x, n_h, n_y)
    #m=X.shape[1]
    #(n_x, n_h, n_y)=layer_dims
    parameters=StartDeepNeural.initialize_parameters_deep(layer_dims)
    # W1 = parameters['W1']
    # W2 = parameters['W2']
    # b1 = parameters['b1']
    # b2 = parameters['b2']
    costs=[]
    #return W1,W2,b1,b2
    for i in range(0,num_iterations):
        # A1, cache1=StartDeepNeural.linear_activation_forward(X,W1,b1,activation='relu')
        # A2, cache2 = StartDeepNeural.linear_activation_forward(A1, W2, b2, activation='sigmoid')
        AL, caches=StartDeepNeural.L_model_forward(X, parameters) ##[ [( (X W1 b1 ),Z1)],[( (A1 W2 b2 ),Z2) ]]
        cost=StartDeepNeural.compute_cost(AL, Y)
        grads=StartDeepNeural.L_model_backward(AL, Y, caches)
        parameters=StartDeepNeural.update_paremeters(parameters, grads, learning_rate)
        if i %100==0:
            print('iterations{}:cost {}'.format(i,cost))
            costs.append(cost)
    return costs,parameters
def predict(X,Y,parameters):
    AL, caches = StartDeepNeural.L_model_forward(X, parameters)##AL.shape=(1,m)
    m=X.shape[1]
    p=np.zeros((1,m))
    for i in range(AL.shape[1]):
        if AL[0][i]>0.5:
            p[0][i]=1
        else:
            p[0][i] = 0
    result = np.squeeze(np.dot(p, Y.T) + np.dot(1 - p, 1 - Y.T))
    accuracy=result/m
    return accuracy,p
def two_layer_model_test():
    costs, parameters = two_layer_model(train_x, train_y_orig,num_iterations = 3000, learning_rate = 0.0075)
    # print(parameters)
    accuracy ,p_train= predict(train_x, train_y_orig, parameters)
    print('train accuracy is {}'.format(accuracy))
    accuracy ,p_test= predict(test_x, test_y_orig, parameters)
    print('test accuracy is {}'.format(accuracy))
    plt.plot(costs)
    plt.xlabel('iterations')
    plt.ylabel('costs')
    plt.title('learning rate is 0.0075')
    plt.show()

def L_layer_model(X, Y, layer_dims,learning_rate=0.0075,num_iterations=2000):
    parameters = StartDeepNeural.initialize_parameters_deep(layer_dims)
    costs = []
    # return W1,W2,b1,b2
    for i in range(0, num_iterations):
        # A1, cache1=StartDeepNeural.linear_activation_forward(X,W1,b1,activation='relu')
        # A2, cache2 = StartDeepNeural.linear_activation_forward(A1, W2, b2, activation='sigmoid')
        AL, caches = StartDeepNeural.L_model_forward(X, parameters)  ##[ [( (X W1 b1 ),Z1)],[( (A1 W2 b2 ),Z2) ]]
        cost = StartDeepNeural.compute_cost(AL, Y)
        grads = StartDeepNeural.L_model_backward(AL, Y, caches)
        parameters = StartDeepNeural.update_parameters(parameters, grads, learning_rate)
        if i % 100 == 0:
            #print(grads)
            print('iterations{}:cost {}'.format(i, cost))
            costs.append(cost)
    return costs, parameters
def print_mislabel_images(classes,test_x,test_y_orig,p_test):
    a=test_y_orig+p_test
    mislable_index=np.asarray(np.where(a==1))#[[1,0]] [[1,1]]np.where 返回(array([0]),array([1])    ##np.asarray 返回 array([[0],[1]])
    plt.figure(figsize=(40, 40))  # set default size of plots  画布大小 4000×4000
    num_images=len(mislable_index[0])
    for i in range(num_images):
        index=mislable_index[1][i]
        plt.subplot(2,num_images,i+1)
        plt.imshow(test_x[:,index].reshape(64,64,3))
        #plt.axis('off')
        plt.title('prediction '+classes[int(p_test[0][index])].decode('utf-8')+' real '+classes[int(test_y_orig[0][index])].decode('utf-8'))
        plt.savefig('1.jpg')
def L_layer_model_test():
    layers_dims = [12288, 20, 7, 5, 1]
    costs,parameters = L_layer_model(train_x, train_y_orig, layers_dims)
    accuracy , p_train= predict(train_x, train_y_orig, parameters)
    print('train accuracy is {}'.format(accuracy))
    accuracy , p_test= predict(test_x, test_y_orig, parameters)
    print('test accuracy is {}'.format(accuracy))
    print_mislabel_images(classa,test_x,test_y_orig,p_test)
    # plt.plot(costs)
    # plt.xlabel('iterations')
    # plt.ylabel('costs')
    # plt.title('learning rate is 0.0075')
    plt.show()
if __name__=='__main__':
   L_layer_model_test()

打印结果:


猜你喜欢

转载自blog.csdn.net/fanzonghao/article/details/81029763