先上一张summary map
1. L层神经网络参数初始化: 返回各层参数W(1)…W(l-1)
def initialize_parameters_deep(layer_dims):
for l in range(1,L):
parameters['W'+str(l)]=np.random.randn(layer_dims[l],layer_dims[l-1])*0.01
parameters['b'+str(l)]=np.zeros((layer_dims[l],1))
return parameters
Step 2:
1.网络的前向传播的线性部分:inputa[l-1] output a[l],cache(z[l],w[l],b[l])/ 这里的A后续会由A_pre代替
def linear_forward(A,W,b):
Z=np.dot(W,A)+b
cache=(A,W,b)
return Z,cache
2.实现relu和 sigmoid激活函数:input z[l], output a[l]/ 返回a[l],z[l]
def sigmoid(Z):
A = 1/(1+np.exp(-Z))
cache = Z
return A, cache
def relu(Z):
cache = Z
A = np.maximum(0,Z)
assert(A.shape== Z.shape)
cache = Z
return A, cache
3.联合前两步,实现网络前向传播的一个【linear->activation】层 函数
def linear_activation_forward(A_prev,W, b, activation):
Z, linear_cache= linear_forward(A_prev, W, b)
A, activation_cache = sigmoidor relu(Z)
cache = (linear_cache, activation_cache)#activation_cache就是Z
return A, cache#这里cache里存了(A,W,b和Z)
4.实现前向传播的前L-1层【linear->relu】最后一层的【linear->sigmoid】函数
def L_model_forward(X, parameters):
caches = []
A = X
for l in range(1, L):
A_prev= A
A, cache =linear_activation_forward(A_prev, parameters['W' + str(l)], parameters['b' + str(l)], activation= "relu")
caches.append(cache)
AL, cache =linear_activation_forward(activation = "sigmoid")
caches.append(cache)#把所有中间参数(A,W,b和Z)存在caches里了
return AL, caches
Step 3:计算损失函数
def compute_cost(AL, Y):
return cost
Step 4:反向传播的实现:input da[l] output da[l-1], dw[l], db[l]
1.计算神经网络线性部分(linear part)的反向传播 (假设你已经知道dZ[l],计算dW[l],db[l],dA[l-1])
inputdZ[l],A_prev,W output dA_prev,dW,db
dZ与g(z)这个激活函数有关,所以要另外求呢
def linear_backward(dZ, cache): inputdZ[l],A_prev,W output dA_prev,dW,db
A_prev,W, b = cache
m = A_prev.shape[1]
dW = 1./m * np.dot(dZ,A_prev.T)
db = 1./m* np.sum(dZ, axis = 1, keepdims =True)
dA_prev = np.dot(W.T,dZ)
return dA_prev, dW, db
2.求出relu和sigmoid函数的梯度函数(relu_backward/relu_backward)
假设dA已经,
dz[l] = da[l]*g[l]'(z[l])
dz[l] = w[l+1]Tdz[l+1]*g[l]'(z[l])(*是逐个元素相乘)
def relu_backward(dA, cache):#这里的cache存储的是Z
Z = cache
dZ = np.array(dA, copy=True) #just converting dz to a correct object.
# When z <= 0, you should set dzto 0 as well.
dZ[Z <= 0] = 0
assert (dZ.shape== Z.shape)
return dZ
def sigmoid_backward(dA, cache):
Z = cache
s = 1/(1+np.exp(-Z))
dZ = dA * s * (1-s)
assert (dZ.shape== Z.shape)
return dZ
3.联合前两步,实现一个新的【linear->Activation】反向函数
def linear_activation_backward(dA, cache, activation):
linear_cache, activation_cache= cache
if activation== "relu":
dZ = relu_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
elif activation== "sigmoid":
dZ = sigmoid_backward(dA, activation_cache)
dA_prev, dW, db = linear_backward(dZ, linear_cache)
return dA_prev, dW, db
4.整合,实现最后一层的【linear->sigmoid】和前L-1层的【linear->relu】的反向函数
求Da
(A,W,b和Z)存在caches里了
def L_model_backward(AL, Y, caches):
grads = {}
L = len(caches)# the number oflayers
m = AL.shape[1]
Y = Y.reshape(AL.shape) # after this line, Y is the same shapeas AL
# Initializing thebackpropagation
dAL = - (np.divide(Y, AL)- np.divide(1 - Y, 1 - AL))
# Lth layer (SIGMOID -> LINEAR) gradients. Inputs:"AL, Y, caches". Outputs: "grads["dAL"],grads["dWL"], grads["dbL"]
current_cache = caches[L-1]
grads["dA" + str(L)], grads["dW" + str(L)], grads["db"+ str(L)] = linear_activation_backward(dAL, current_cache, activation = "sigmoid")
for lin reversed(range(L-1)):
# lth layer: (RELU -> LINEAR) gradients.
current_cache = caches[l]
dA_prev_temp, dW_temp,db_temp =linear_activation_backward(grads["dA" +str(l + 2)], current_cache, activation = "relu")
grads["dA" + str(l+ 1)] = dA_prev_temp
grads["dW" + str(l+ 1)] = dW_temp
grads["db" + str(l+ 1)] = db_temp
return grads
Step 5:更新参数
def update_parameters(parameters, grads, learning_rate):
return parameters
6.实现一个预测函数,来预测测试集的正确率
def predict(X, y, parameters):
m = X.shape[1]
n = len(parameters) // 2 #number of layers in the neural network
p = np.zeros((1,m))
# Forward propagation
probas,caches =L_model_forward(X, parameters)
# convert probas to 0/1predictions
for iin range(0, probas.shape[1]):
if probas[0,i]> 0.5:
p[0,i] =1
else:
p[0,i] =0
print("p="+str(p))
#print results
#print ("predictions: " +str(p))
#print ("true labels: " +str(y))
print("Accuracy: " + str(np.sum((p == y)/m)))
return p
def L_layer_model(X, Y, layers_dims, learning_rate = 0.0075, num_iterations = 3000,print_cost=False):#lr was 0.009
np.random.seed(1)
costs = [] #keep track of cost
#Parameters initialization.
parameters= initialize_parameters_deep(layers_dims)
#Loop (gradient descent)
for i in range(0, num_iterations):
#Forward propagation: [LINEAR -> RELU]*(L-1) -> LINEAR -> SIGMOID.
AL,caches = L_model_forward(X,parameters)
#Compute cost.
cost =compute_cost(AL,Y)
#Backward propagation.
grads= L_model_backward(AL,Y,caches)
#Update parameters.
parameters = update_parameters(parameters,grads,learning_rate)
#Print the cost every 100 training example
if print_cost and i % 100 == 0:
print ("Cost after iteration %i:%f" %(i, cost))
if print_cost and i % 100 == 0:
costs.append(cost)
#plot the cost
plt.plot(np.squeeze(costs))
plt.ylabel('cost')
plt.xlabel('iterations(per tens)')
plt.title("Learningrate =" + str(learning_rate))
plt.show()
return parameters
parameters = L_layer_model(train_x,train_y, layers_dims, num_iterations = 2500,print_cost = True)