初始化

0初始化

def zeros_initializations(layer_dims):
"""
	argu:
		input:
			layer_dims:a list the size of each layer
		output:
			parameters:a dictionary contains the parameters of each layer{'W1','b1',...,'Wl','bl'}
"""
	parameters = {}
	L = len(layer_dims)
	for l in range(L):
		parameters['W' + str(l)] = np.zeros((layer_dims[l],layer_dims[l-1]))
		parameters['b' + str(l)] = np.zeros((layers_dims[l],1))
	return parameters

随机初始化

def random_initializations(layer_dims):
"""
	argu:
		input:
			layer_dims:a list the size of each layer
		output:
			parameters:a dictionary contains the parameters of each layer{'W1','b1',...,'Wl','bl'}
"""
	parameters = {}
	L = len(layer_dims)
	for l in range(L):
		parameters['W' + str(l)] = np.random.randn((layer_dims[l],layer_dims[l-1]))
		parameters['b' + str(l)] = np.zeros((layers_dims[l],1))
	return parameters

He 初始化

def he_initialize(layers_dims):
"""
	argu:
		input:
			layer_dims:a list the size of each layer
		output:
			parameters:a dictionary contains the parameters of each layer{'W1','b1',...,'Wl','bl'}
"""
	parameters = {}
	L = len(layer_dims)
	for l in range(L):
		parameters['W' + str(l)] = np.random.randn((layer_dims[l],layer_dims[l-1])) * (np.sqrt(2. / layers_dims[l-1]))
		parameters['b' + str(l)] = np.zeros((layers_dims[l],1))
	return parameters

正则化

L2正则化

# GRADED FUNCTION: compute_cost_with_regularization

def compute_cost_with_regularization(A3, Y, parameters, lambd):
    """
    Implement the cost function with L2 regularization. See formula (2) above.
    
    Arguments:
    A3 -- post-activation, output of forward propagation, of shape (output size, number of examples)
    Y -- "true" labels vector, of shape (output size, number of examples)
    parameters -- python dictionary containing parameters of the model
    
    Returns:
    cost - value of the regularized loss function (formula (2))
    """
    m = Y.shape[1]
    W1 = parameters["W1"]
    W2 = parameters["W2"]
    W3 = parameters["W3"]
    
    cross_entropy_cost = compute_cost(A3, Y) # This gives you the cross-entropy part of the cost
    
    ### START CODE HERE ### (approx. 1 line)
    L2_regularization_cost = (1. / m)*(lambd / 2) * (np.sum(np.square(W1)) + np.sum(np.square(W2)) + np.sum(np.square(W3)))
    
    cost = cross_entropy_cost + L2_regularization_cost
    
    return cost

梯度下降法

# GRADED FUNCTION: backward_propagation_with_regularization

def backward_propagation_with_regularization(X, Y, cache, lambd):
    """
    Implements the backward propagation of our baseline model to which we added an L2 regularization.
    
    Arguments:
    X -- input dataset, of shape (input size, number of examples)
    Y -- "true" labels vector, of shape (output size, number of examples)
    cache -- cache output from forward_propagation()
    lambd -- regularization hyperparameter, scalar
    
    Returns:
    gradients -- A dictionary with the gradients with respect to each parameter, activation and pre-activation variables
    """
    
    m = X.shape[1]
    (Z1, A1, W1, b1, Z2, A2, W2, b2, Z3, A3, W3, b3) = cache
    
    dZ3 = A3 - Y
    
    ### START CODE HERE ### (approx. 1 line)
    dW3 = 1./m * (np.dot(dZ3, A2.T) + lambd * W3)
    ### END CODE HERE ###
    db3 = 1./m * np.sum(dZ3, axis=1, keepdims = True)
    
    dA2 = np.dot(W3.T, dZ3)
    dZ2 = np.multiply(dA2, np.int64(A2 > 0))
    ### START CODE HERE ### (approx. 1 line)
    dW2 = 1./m * (np.dot(dZ2, A1.T) + lambd * W2 )
    ### END CODE HERE ###
    db2 = 1./m * np.sum(dZ2, axis=1, keepdims = True)
    
    dA1 = np.dot(W2.T, dZ2)
    dZ1 = np.multiply(dA1, np.int64(A1 > 0))
    ### START CODE HERE ### (approx. 1 line)
    dW1 = 1./m * (np.dot(dZ1, X.T) + lambd * W1 )
    ### END CODE HERE ###
    db1 = 1./m * np.sum(dZ1, axis=1, keepdims = True)
    
    gradients = {"dZ3": dZ3, "dW3": dW3, "db3": db3,"dA2": dA2,
                 "dZ2": dZ2, "dW2": dW2, "db2": db2, "dA1": dA1, 
                 "dZ1": dZ1, "dW1": dW1, "db1": db1}
    
    return gradients

吴恩达改善深层神经网络参数：超参数调试、正则化以及优化——深度学习的实用层面作业

初始化

0初始化

随机初始化

He 初始化

正则化

L2正则化

梯度下降法

猜你喜欢