一、MNIST数字识别
首先加载MNIST手写数字识别训练集
mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True) # 加载数据集
print("Traing data size:", mnist.train.num_examples) # 训练集样本数
print("Validating data size:", mnist.validation.num_examples) # 验证集样本数
print("Test data size:", mnist.test.num_examples) # 测试集样本数
print("Example training data:", mnist.train.images[0])
print("Example training data label:", mnist.train.labels[0])
然后定义所需参数
input_node = 784 # mnist数据集共有28*28个像素,所以输入节点共有784
output_node = 10 # 输出层节点数
layer1_node = 500 # 隐藏层节点数
batch_size = 100 # 一个训练batch中的训练数据个数
learning_rate_base = 0.8 # 基础学习率
learning_rate_decay = 0.99 # 学习率衰减率
regularization_rate = 0.0001 # 正则化项
training_steps = 30000 # 训练轮数
moving_average_decay = 0.99 # 滑动平均衰减率
之后我们创建一个函数用来实现神经网络的前向传播过程,同时加入滑动平均。
函数avg_class.average() 计算括号内变量的滑动平均值,这里的avg_class是最初我们初始化的滑动平均类
def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
# 这里没有对结果加入softmax激活函数,具体参考损失函数的结构
if avg_class == None:
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
return tf.matmul(layer1, weights2) + biases2
else:
# 使用avg_class.average函数计算出变量的滑动平均值
layer1 = tf.nn.relu(
tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)
训练模型过程
在之前简单的不添加任何优化算法的基础上,按照先后顺序分别使用了:
- 滑动平均模型
- L2正则化
- 学习率衰减
同时在我们定义反向优化算法后,因为之前使用了滑动平均模型,需要使用tf.control_dependencies或tf.group两种函数,这样在反向传播过程中不仅更新了参数,也更新了参数的影子变量。
def train(mnist):
# 定义输入空白位
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
# 定义神经网络变量参数
weights1 = tf.Variable(tf.truncated_normal([input_node, layer1_node], stddev = 0.1))
biases1 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [layer1_node]))
weights2 = tf.Variable(tf.truncated_normal([layer1_node, output_node], stddev = 0.1))
biases2 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [output_node]))
# 计算神经网络前向传播的结果
y_hat = inference(x, None, weights1, biases1, weights2, biases2)
# 这里与之前说到滑动平均模型里的num_updates变量一致,通过模仿迭代次数来控制衰减速率
global_step = tf.Variable(0, trainable = False)
# 初始化滑动平均类
variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
# 对所有的可训练的网络参数变量使用滑动平均,也就是所说的GraphKey.TRAINABLE_VARIABLES集合中的元素
# 这里不包括global_step变量
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# 计算使用滑动平均之后的前向传播结果
average_y_hat = inference(x, variable_averages, weights1, biases1, weights2, biases2)
# 定义损失函数
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits = y_hat, labels = tf.argmax(y, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 计算L2正则化
regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
# 根据正则化公式,这里不对偏置项进行计算
regularization = regularizer(weights1) + regularizer(weights2)
# 总的损失等于交叉熵的损失和正则化损失的和
loss = cross_entropy_mean + regularization
# 学习率衰减函数
learning_rate = tf.train.exponential_decay(
learning_rate_base, # 基础学习率,在此基础上进行衰减
global_step, # 当前迭代的轮数
mnist.train.num_examples, # 走完所有数据需要的迭代次数
learning_rate_decay) # 学习率衰减速率
# 使用梯度下降法优化
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
# 反向传播过程中,需要同时更新参数的影子变量
# 下述函数既完成了参数的更新,又能同时更新参数的影子变量
# 下述语句等价于
# train_op = tf.group(train_step, variables_averages_op)
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name = 'train')
# 测试输出结果是否与真实标签相等
correction_prediction = tf.equal(tf.argmax(average_y_hat, 1), tf.argmax(y, 1))
# 测试一组数据正确率
# 这里将correction_pred类型改为tf.float32
accuracy = tf.reduce_mean(tf.cast(correction_prediction, tf.float32))
# 参数初始化
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 验证集输入字典
validate_feed = {x: mnist.validation.images,
y: mnist.validation.labels}
# 测试集输入字典
test_feed = {x: mnist.test.images,
y: mnist.test.labels}
# 开始训练
for i in range(training_steps):
# 产生当前轮的训练批次
xs, ys = mnist.train.next_batch(batch_size)
sess.run(train_op, feed_dict = {x: xs, y:ys})
# 每一千次训练测试一下验证集正确率
if i % 1000 == 0:
validate_acc = sess.run(accuracy, feed_dict = validate_feed)
print("After %d training step, validation accuracy using average model is %g" %(i, validate_acc))
# 此时模型已经训练完成,最终在测试集上测试下正确率
test_acc = sess.run(accuracy, feed_dict = test_feed)
print("validation accuracy using average model is %g" % (test_acc))
最终程序的调用打包:
def main(argv = None):
mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True)
train(mnist)
if __name__ == '__main__':
tf.app.run()
二、变量管理
变量可以通过创建时赋予的名字来使用变量,这应用在网络结构复杂时候的情况。
通过使用tf.get variable创建或获取变量
# 使用get_variable函数创建名称为"v"的变量,初始化为给定常量
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
v = tf.Variable(tf.constant(1.0, shape = [1]), name = "v")
这里tf.get_variable的变量名称是必填参数,如果有重名变量,程序会报错,创建失败:
# 该段代码出现报错,因为重复创建了名称为"v"的变量
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
w = tf.get_variable("v", shape = [1,2], initializer = tf.constant_initializer(2.0))
那么现在问题是我们需要获取已经创建变量,这就需要通过tf.variable_scope函数生成上下文管理器。
下述代码表示了这个过程,如果tf.variable_scope如果reuse = False,tf.get_variable将创建新的变量,如果reuse = True,该函数将会直接获取已经创建的变量。
# 在foo的命名空间内创建名字为v的变量
with tf.variable_scope("foo"):
v = tf.get_variable(
"v", shape = [1], initializer = tf.constant_initializer(1.0))
# 此时命名空间foo已存在名字为v的变量,因此下面代码会报错
with tf.variable_scope("foo"):
v = tf.get_variable("v", [1])
# reuse设置为True,tf.get_variable函数将直接获取已声明变量
with tf.variable_scope("foo", reuse = True):
v1 = tf.get_variable("v", [1])
print(v == v1)
# 该段代码报错,因为bar空间中没有设置变量v
with tf.variable_scope("bar", reuse = True):
v = tf.get_variable("v", [1])
同时tf.variable_scope可以嵌套的:
with tf.variable_scope("root"):
# 获取root命名空间的reuse
print(tf.get_variable_scope().reuse)
with tf.variable_scope("foo", reuse = True):
# 获取foo命名空间的reuse
print(tf.get_variable_scope().reuse)
with tf.variable_scope("bar"):
# 由于没有指定bar命名空间的reuse,所以与外面一层的reuse一致
print(tf.get_variable_scope().reuse)
print(tf.get_variable_scope().reuse)
通过tf.variable_scope创建命名空间,可以用来管理变量名称:
v1 = tf.get_variable("v", [1])
print(v1.name) # 输出v:0,v表示了变量名称,0表示v1生成名称为v变量的第一个运算结果
with tf.variable_scope("foo"):
v2 = tf.get_variable("v", [1])
print(v2.name) # 输出foo/v:0,与之前相似,只不过foo/v表示了在命名空间foo下的变量v
with tf.variable_scope("foo"):
with tf.variable_scope("bar"):
v3 = tf.get_variable("v", [1])
print(v3.name) # 输出foo/bar/v:0
v4 = tf.get_variable("v1",[1])
print(v4.name) # 输出foo/v1:0
with tf.variable_scope("",reuse = True):
v5 = tf.get_variable("foo/bar/v", [1])
print(v5 == v3)
v6 = tf.get_variable("foo/v1", [1])
print(v6 == v4)
使用tf.reset_default_graph():重置默认图
三、模型持久化
Tensorflow通过下述代码保存计算图
v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2
init = tf.global_variables_initializer()
# 声明tf.train.Saver保存模型
saver = tf.train.Saver()
with tf.Session() as sess:
sess.run(init)
saver.save(sess, "./model.ckpt")
此时文件目录下会出现四个文件:
model.ckpt.meta,model.ckpt.index,model.ckpt.data-00000-of-00001(此文件名不一定),checkpoint
.meta存储了网络结构,.index和.data保存了训练好的参数,checkpoint记录最新的模型。
通过下述代码恢复模型:
with tf.Session() as sess:
# 加载持久化的图
saver = tf.train.import_meta_graph("./model.ckpt.meta")
# 检查最新的保存点并恢复
saver.restore(sess, tf.train.latest_checkpoint("./"))
当然也可以直接通过下面代码恢复模型
with tf.Session() as sess:
saver.restore(sess, "./model.ckpt")
之前说到滑动平均模型,由于每个变量都对应存在一个影子变量,所以在保存模型的时候也要考虑。下面是保存滑动平均模型样例:
v = tf.Variable(0, dtype = tf.float32, name = "v")
# 未声明滑动平均模型,因此只有一个变量v
# 输出v:0
for variables in tf.global_variables():
print(variables.name)
ema = tf.train.ExponentialMovingAverage(0.99)
maintain_averages_op = ema.apply(tf.global_variables())
# 声明滑动平均模型后,自动为变量v生成一个影子变量
# 输出 v:0 和 v/ExponentialMovingAverage:0
for variables in tf.global_variables():
print(variables.name)
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
sess.run(tf.assign(v, 10))
sess.run(maintain_averages_op)
# 保存变量v和其影子变量
saver.save(sess, "./model.ckpt")
print(sess.run([v, ema.average(v)]))
之后读取模型参数,这里我们直接将保存的影子变量换成
v = tf.Variable(0, dtype = tf.float32, name = "v")
# 把保存的v的影子变量赋给v
saver = tf.train.Saver({"v/ExponentialMovingAverage": v})
with tf.Session() as sess:
saver.restore(sess, "./model.ckpt")
print(sess.run(v))
也可以通过.variables_to_restore(),可以生成变量与其对应影子变量的字典
v = tf.Variable(0, dtype = tf.float32, name = "v")
ema = tf.train.ExponentialMovingAverage(0.99)
# ema.variables_to_restore相当于直接生成了上述代码提供的字典
print(ema.variables_to_restore())
saver = tf.train.Saver(ema.variables_to_restore())
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
saver.save(sess, "./model.ckpt")
读取模型参数
with tf.Session() as sess:
saver.restore(sess, "./model.ckpt")
print(sess.run(v))
上述所使用的模型持久化,由于记录了程序运行所需要的全部信息,对于变量初始化信息,模型保存的辅助信息都有所记录,而有时实际使用的时候,只需要通过神经网络前向传播到输出层输出结果。Tensorflow提供了convert_variables_to_constants函数,该函数可以将计算图中的变量及取值通过常量方式保存。
import tensorflow as tf
from tensorflow.python.framework import graph_util
v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2
init = tf.global_variables_initializer()
with tf.Session() as sess:
sess.run(init)
# 导出当前计算图的GraphDef部分
graph_def = tf.get_default_graph().as_graph_def()
# 将图中的变量及取值转化成常量,同时将图中不必要的节点去掉(例如变量初始化操作)
output_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, ['add'])
# 将导出的模型存入文件
with tf.gfile.GFile("./combined_model.pb", "wb") as f:
f.write(output_graph_def.SerializeToString())
读取模型
import tensorflow as tf
from tensorflow.python.platform import gfile
with tf.Session() as sess:
model_filename = "./combined_model.pb"
# 读取保存的模型文件,并将文件解析成对应的GraphDef Protocol Buffer
with gfile.FastGFile(model_filename, 'rb') as f:
graph_def = tf.GraphDef()
graph_def.ParseFromString(f.read())
# 将graph_def保存的图加载到当前的图中国,return_element = ["add:0"]给出了返回张量的
# 名称,在保存的时候给出的是计算节点的名称,所以是add,而在加载的时候是张量的名称
# 所以是add:0
result = tf.import_graph_def(graph_def, return_elements = ["add:0"])
print(sess.run(result))
四、mnist最佳程序样例
最初的mnist程序样例没有涉及保存模型信息。
下面给出模型训练的过程程序:
import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data
# 神经网络结构参数
input_node = 784
output_node = 10
layer1_node = 500
# 生成weight
def get_weight_variable(shape, regularizer):
weights = tf.get_variable(
"weights", shape,
initializer = tf.truncated_normal_initializer(stddev = 0.1))
if regularizer != None:
tf.add_to_collection("losses", regularizer(weights))
return weights
# 神经网络正向传播
def inference(input_tensor, regularizer):
with tf.variable_scope('layer1'):
weights = get_weight_variable(
[input_node, layer1_node], regularizer)
biases = tf.get_variable(
"biases", [layer1_node],
initializer = tf.constant_initializer(0.0))
layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
with tf.variable_scope('layer2'):
weights = get_weight_variable(
[layer1_node, output_node], regularizer)
biases = tf.get_variable(
"biases", [output_node],
initializer = tf.constant_initializer(0.0))
layer2 = tf.matmul(layer1, weights) + biases
return layer2
# 配置神经网络参数
batch_size = 100
learning_rate_base = 0.8
learning_rate_decay = 0.99
regularaztion_rate = 0.0001
training_steps = 30000
moving_average_decay = 0.99
# 模型保存路径和名称
model_save_path = "./"
model_name = "model.ckpt"
def train(mnist):
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
regularizer = tf.contrib.layers.l2_regularizer(regularaztion_rate)
y_hat = inference(x, regularizer)
# 滑动平均模型
global_step = tf.Variable(0, trainable = False)
variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
variables_averages_op = variable_averages.apply(tf.trainable_variables())
# 损失函数
cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
logits = y_hat, labels = tf.argmax(y, 1))
cross_entropy_mean = tf.reduce_mean(cross_entropy)
# 损失函数加入正则化
loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
# 学习率衰减
learning_rate = tf.train.exponential_decay(
learning_rate_base,
global_step,
mnist.train.num_examples / batch_size,
learning_rate_decay)
train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
# 滑动平均模型反向传播
with tf.control_dependencies([train_step, variables_averages_op]):
train_op = tf.no_op(name = 'train')
# 初始化变量
init = tf.global_variables_initializer()
# 初始化持久化类
saver = tf.train.Saver()
# 会话
with tf.Session() as sess:
sess.run(init)
for i in range(training_steps):
xs, ys = mnist.train.next_batch(batch_size)
_, loss_value, _ = sess.run([train_op, loss, global_step],
feed_dict = {x:xs, y:ys})
if i % 1000 == 0:
print("After %d training step, loss on training batch is %g" %(i, loss_value))
saver.save(sess, os.path.join(model_save_path, model_name), global_step = global_step)
mnist = input_data.read_data_sets("./", one_hot = True)
train(mnist)
上述代码表示了整个训练过程,下面提供计算测试集准确率代码,该代码每10秒读取计算图,验证测试集。
def evaluate(mnist):
with tf.Graph().as_default() as g:
x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
validate_feed = {x:mnist.validation.images, y:mnist.validation.labels}
# 计算前向传播结果
y_hat = inference(x, None)
# 计算测试集正确率
correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_hat, 1))
accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
# 定义滑动平均类
variable_averages = tf.train.ExponentialMovingAverage(
moving_average_decay)
# 直接生成变量与其对应的影子变量的字典
variables_to_restore = variable_averages.variables_to_restore()
saver = tf.train.Saver(variables_to_restore)
while True:
with tf.Session() as sess:
# tf.train.get_checkpoint_state函数会通过checkpoint文件自动找到
# 目录中最新模型的文件名
ckpt = tf.train.get_checkpoint_state(
model_save_path)
if ckpt and ckpt.model_checkpoint_path:
# 加载模型
saver.restore(sess, ckpt.model_checkpoint_path)
# 通过文件名得到保存模型保存时迭代的轮数
global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
accuracy_score = sess.run(accuracy, feed_dict = validate_feed)
print("After %s training step, loss on training batch is %g" % (global_step, accuracy_score))
else:
print("No checkpoint file found")
return
time.sleep(eval_interval_secs)
mnist = input_data.read_data_sets("./", one_hot = True)
evaluate(mnist)