机器学习实战（基于scikit-learn和TensorFlow）-第十一章训练深度神经网络笔记（二）

关注微信公共号：小程在线

关注CSDN博客：程志伟的博客

######################### 重用预训练图层 ########################
#在本例中，对于我们要重用的每个变量，我们找到其初始值设定项的赋值操作，并获得其第二个输入，
#它对应于初始化值。当我们运行初始值设定项时，我们使用一个feed_dict将初始化值替换为我们想要的值：

tf.compat.v1.disable_eager_execution()
reset_graph()

n_inputs = 2
n_hidden1 = 3

original_w = [[1., 2., 3.], [4., 5., 6.]] # Load the weights from the other framework
original_b = [7., 8., 9.] # Load the biases from the other framework

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
# [...] Build the rest of the model

# Get a handle on the assignment nodes for the hidden1 variables
graph = tf.get_default_graph()
assign_kernel = graph.get_operation_by_name("hidden1/kernel/Assign")
assign_bias = graph.get_operation_by_name("hidden1/bias/Assign")
init_kernel = assign_kernel.inputs[1]
init_bias = assign_bias.inputs[1]

init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init, feed_dict={init_kernel: original_w, init_bias: original_b})
# [...] Train the model on your new task
print(hidden1.eval(feed_dict={X: [[10.0, 11.0]]})) # not shown in the book
[[ 61. 83. 105.]]

###另一种方法（最初在本书中使用）是创建专用分配节点和专用占位符。这更冗长，效率更低，但您可能会发现这更明确：
#重用其他框架的模型

reset_graph()

n_inputs = 2
n_hidden1 = 3

original_w = [[1., 2., 3.], [4., 5., 6.]] # Load the weights from the other framework
original_b = [7., 8., 9.] # Load the biases from the other framework

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
# [...] Build the rest of the model

# Get a handle on the variables of layer hidden1
with tf.variable_scope("", default_name="", reuse=True): # root scope
hidden1_weights = tf.get_variable("hidden1/kernel")
hidden1_biases = tf.get_variable("hidden1/bias")

# Create dedicated placeholders and assignment nodes
original_weights = tf.placeholder(tf.float32, shape=(n_inputs, n_hidden1))
original_biases = tf.placeholder(tf.float32, shape=n_hidden1)
assign_hidden1_weights = tf.assign(hidden1_weights, original_weights)
assign_hidden1_biases = tf.assign(hidden1_biases, original_biases)

init = tf.global_variables_initializer()

with tf.Session() as sess:
sess.run(init)
sess.run(assign_hidden1_weights, feed_dict={original_weights: original_w})
sess.run(assign_hidden1_biases, feed_dict={original_biases: original_b})
# [...] Train the model on your new task
print(hidden1.eval(feed_dict={X: [[10.0, 11.0]]}))
[[ 61. 83. 105.]]

####请注意，我们还可以使用get_collection（）并指定作用域来获取变量的句柄：

tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES, scope="hidden1")
Out[57]:
[<tf.Variable 'hidden1/kernel:0' shape=(2, 3) dtype=float32>,
<tf.Variable 'hidden1/bias:0' shape=(3,) dtype=float32>]

tf.get_default_graph().get_tensor_by_name("hidden1/kernel:0")
Out[58]: <tf.Tensor 'hidden1/kernel:0' shape=() dtype=resource>

tf.get_default_graph().get_tensor_by_name("hidden1/bias:0")
Out[59]: <tf.Tensor 'hidden1/bias:0' shape=() dtype=resource>

####冻结低层

reset_graph()

n_inputs = 28 * 28 # MNIST
n_hidden1 = 300 # reused
n_hidden2 = 50 # reused
n_hidden3 = 50 # reused
n_hidden4 = 20 # new!
n_outputs = 10 # new!

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1") # reused
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2") # reused
hidden3 = tf.layers.dense(hidden2, n_hidden3, activation=tf.nn.relu, name="hidden3") # reused
hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu, name="hidden4") # new!
logits = tf.layers.dense(hidden4, n_outputs, name="outputs") # new!

with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

with tf.name_scope("train"): # not shown in the book
optimizer = tf.train.GradientDescentOptimizer(learning_rate) # not shown
train_vars = tf.get_collection(tf.GraphKeys.TRAINABLE_VARIABLES,
scope="hidden[34]|outputs")
training_op = optimizer.minimize(loss, var_list=train_vars)

reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
scope="hidden[123]") # regular expression
restore_saver = tf.train.Saver(reuse_vars) # to restore layers 1-3

init = tf.global_variables_initializer()
saver = tf.train.Saver()

with tf.Session() as sess:
init.run()
restore_saver.restore(sess, "./my_model_final.ckpt")

for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
print(epoch, "Validation accuracy:", accuracy_val)

save_path = saver.save(sess, "./my_new_model_final.ckpt")
INFO:tensorflow:Restoring parameters from ./my_model_final.ckpt
0 Validation accuracy: 0.8896
1 Validation accuracy: 0.926
2 Validation accuracy: 0.936
3 Validation accuracy: 0.9428
4 Validation accuracy: 0.9448
5 Validation accuracy: 0.9462
6 Validation accuracy: 0.951
7 Validation accuracy: 0.9516
8 Validation accuracy: 0.9546
9 Validation accuracy: 0.9552
10 Validation accuracy: 0.9558
11 Validation accuracy: 0.9566
12 Validation accuracy: 0.9566
13 Validation accuracy: 0.9572
14 Validation accuracy: 0.958
15 Validation accuracy: 0.9578
16 Validation accuracy: 0.959
17 Validation accuracy: 0.96
18 Validation accuracy: 0.9596
19 Validation accuracy: 0.9588

reset_graph()

n_inputs = 28 * 28 # MNIST
n_hidden1 = 300 # reused
n_hidden2 = 50 # reused
n_hidden3 = 50 # reused
n_hidden4 = 20 # new!
n_outputs = 10 # new!

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu,
name="hidden1") # reused frozen
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu,
name="hidden2") # reused frozen
hidden2_stop = tf.stop_gradient(hidden2)
hidden3 = tf.layers.dense(hidden2_stop, n_hidden3, activation=tf.nn.relu,
name="hidden3") # reused, not frozen
hidden4 = tf.layers.dense(hidden3, n_hidden4, activation=tf.nn.relu,
name="hidden4") # new!
logits = tf.layers.dense(hidden4, n_outputs, name="outputs") # new!

with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

with tf.name_scope("train"):
optimizer = tf.train.GradientDescentOptimizer(learning_rate)
training_op = optimizer.minimize(loss)

reuse_vars = tf.get_collection(tf.GraphKeys.GLOBAL_VARIABLES,
scope="hidden[123]") # regular expression
restore_saver = tf.train.Saver(reuse_vars) # to restore layers 1-3

init = tf.global_variables_initializer()
saver = tf.train.Saver()

###################### 快速优化器 ############################
#Momentum优化

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
momentum=0.9)

#Nesterov梯度加速

optimizer = tf.train.MomentumOptimizer(learning_rate=learning_rate,
momentum=0.9, use_nesterov=True)

#AdaGrad

optimizer = tf.train.AdagradOptimizer(learning_rate=learning_rate)

#RMSProp

optimizer = tf.train.RMSPropOptimizer(learning_rate=learning_rate,
momentum=0.9, decay=0.9, epsilon=1e-10)

#Adam Optimization

optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)

####更快的优化器学习速率调度

reset_graph()

n_inputs = 28 * 28 # MNIST
n_hidden1 = 300
n_hidden2 = 50
n_outputs = 10

X = tf.placeholder(tf.float32, shape=(None, n_inputs), name="X")
y = tf.placeholder(tf.int32, shape=(None), name="y")

with tf.name_scope("dnn"):
hidden1 = tf.layers.dense(X, n_hidden1, activation=tf.nn.relu, name="hidden1")
hidden2 = tf.layers.dense(hidden1, n_hidden2, activation=tf.nn.relu, name="hidden2")
logits = tf.layers.dense(hidden2, n_outputs, name="outputs")

with tf.name_scope("loss"):
xentropy = tf.nn.sparse_softmax_cross_entropy_with_logits(labels=y, logits=logits)
loss = tf.reduce_mean(xentropy, name="loss")

with tf.name_scope("eval"):
correct = tf.nn.in_top_k(logits, y, 1)
accuracy = tf.reduce_mean(tf.cast(correct, tf.float32), name="accuracy")

with tf.name_scope("train"): # not shown in the book
initial_learning_rate = 0.1
decay_steps = 10000
decay_rate = 1/10
global_step = tf.Variable(0, trainable=False, name="global_step")
learning_rate = tf.train.exponential_decay(initial_learning_rate, global_step,
decay_steps, decay_rate)
optimizer = tf.train.MomentumOptimizer(learning_rate, momentum=0.9)
training_op = optimizer.minimize(loss, global_step=global_step)

init = tf.global_variables_initializer()
saver = tf.train.Saver()

n_epochs = 5
batch_size = 50

with tf.Session() as sess:
init.run()
for epoch in range(n_epochs):
for X_batch, y_batch in shuffle_batch(X_train, y_train, batch_size):
sess.run(training_op, feed_dict={X: X_batch, y: y_batch})
accuracy_val = accuracy.eval(feed_dict={X: X_valid, y: y_valid})
print(epoch, "Validation accuracy:", accuracy_val)

save_path = saver.save(sess, "./my_model_final.ckpt")
0 Validation accuracy: 0.9624
1 Validation accuracy: 0.9698
2 Validation accuracy: 0.9766
3 Validation accuracy: 0.9792
4 Validation accuracy: 0.982