Tensorflow框架（三）

一、MNIST数字识别

首先加载MNIST手写数字识别训练集

mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True) # 加载数据集
print("Traing data size:", mnist.train.num_examples)          # 训练集样本数
print("Validating data size:", mnist.validation.num_examples) # 验证集样本数
print("Test data size:", mnist.test.num_examples)             # 测试集样本数
print("Example training data:", mnist.train.images[0])
print("Example training data label:", mnist.train.labels[0])

然后定义所需参数

input_node = 784 # mnist数据集共有28*28个像素，所以输入节点共有784
output_node = 10 # 输出层节点数
layer1_node = 500 # 隐藏层节点数
batch_size = 100 # 一个训练batch中的训练数据个数
learning_rate_base = 0.8 # 基础学习率
learning_rate_decay = 0.99 # 学习率衰减率
regularization_rate = 0.0001 # 正则化项
training_steps = 30000 # 训练轮数
moving_average_decay = 0.99 # 滑动平均衰减率

之后我们创建一个函数用来实现神经网络的前向传播过程，同时加入滑动平均。

函数avg_class.average() 计算括号内变量的滑动平均值，这里的avg_class是最初我们初始化的滑动平均类

def inference(input_tensor, avg_class, weights1, biases1, weights2, biases2):
    # 这里没有对结果加入softmax激活函数，具体参考损失函数的结构
    if avg_class == None:
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights1) + biases1)
        return tf.matmul(layer1, weights2) + biases2
    else:
        # 使用avg_class.average函数计算出变量的滑动平均值
        layer1 = tf.nn.relu(
        tf.matmul(input_tensor, avg_class.average(weights1)) + avg_class.average(biases1))
        return tf.matmul(layer1, avg_class.average(weights2)) + avg_class.average(biases2)

训练模型过程

在之前简单的不添加任何优化算法的基础上，按照先后顺序分别使用了：

滑动平均模型
L2正则化
学习率衰减

同时在我们定义反向优化算法后，因为之前使用了滑动平均模型，需要使用tf.control_dependencies或tf.group两种函数，这样在反向传播过程中不仅更新了参数，也更新了参数的影子变量。

def train(mnist):
    # 定义输入空白位
    x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
    y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
    
    # 定义神经网络变量参数
    weights1 = tf.Variable(tf.truncated_normal([input_node, layer1_node], stddev = 0.1))
    biases1 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [layer1_node]))
    weights2 = tf.Variable(tf.truncated_normal([layer1_node, output_node], stddev = 0.1))
    biases2 = tf.Variable(tf.constant(0.1, dtype = tf.float32, shape = [output_node]))
    
    # 计算神经网络前向传播的结果
    y_hat = inference(x, None, weights1, biases1, weights2, biases2)
    
    # 这里与之前说到滑动平均模型里的num_updates变量一致，通过模仿迭代次数来控制衰减速率
    global_step = tf.Variable(0, trainable = False)
    
    # 初始化滑动平均类
    variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
    
    # 对所有的可训练的网络参数变量使用滑动平均，也就是所说的GraphKey.TRAINABLE_VARIABLES集合中的元素
    # 这里不包括global_step变量
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    
    # 计算使用滑动平均之后的前向传播结果
    average_y_hat = inference(x, variable_averages, weights1, biases1, weights2, biases2)
    
    # 定义损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits = y_hat, labels = tf.argmax(y, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    
    # 计算L2正则化
    regularizer = tf.contrib.layers.l2_regularizer(regularization_rate)
    
    # 根据正则化公式，这里不对偏置项进行计算
    regularization = regularizer(weights1) + regularizer(weights2)
    
    # 总的损失等于交叉熵的损失和正则化损失的和
    loss = cross_entropy_mean + regularization
    
    # 学习率衰减函数
    learning_rate = tf.train.exponential_decay(
    learning_rate_base, # 基础学习率，在此基础上进行衰减
    global_step,        # 当前迭代的轮数
    mnist.train.num_examples, # 走完所有数据需要的迭代次数
    learning_rate_decay) # 学习率衰减速率
    
    # 使用梯度下降法优化
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
    
    # 反向传播过程中，需要同时更新参数的影子变量
    # 下述函数既完成了参数的更新，又能同时更新参数的影子变量
    # 下述语句等价于
    # train_op = tf.group(train_step, variables_averages_op)
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name = 'train')
    
    # 测试输出结果是否与真实标签相等
    correction_prediction = tf.equal(tf.argmax(average_y_hat, 1), tf.argmax(y, 1))
    
    # 测试一组数据正确率
    # 这里将correction_pred类型改为tf.float32
    accuracy = tf.reduce_mean(tf.cast(correction_prediction, tf.float32))
    
    # 参数初始化
    init = tf.global_variables_initializer()
    with tf.Session() as sess:    
        sess.run(init)
    
        # 验证集输入字典
        validate_feed = {x: mnist.validation.images,
                         y: mnist.validation.labels}
        # 测试集输入字典
        test_feed = {x: mnist.test.images,
                     y: mnist.test.labels}
        
        # 开始训练
        for i in range(training_steps):
            # 产生当前轮的训练批次
            xs, ys = mnist.train.next_batch(batch_size)
            sess.run(train_op, feed_dict = {x: xs, y:ys})
            
            # 每一千次训练测试一下验证集正确率
            if i % 1000 == 0:
                validate_acc = sess.run(accuracy, feed_dict = validate_feed)
                print("After %d training step, validation accuracy using average model is %g" %(i, validate_acc))
        
        # 此时模型已经训练完成，最终在测试集上测试下正确率
        test_acc = sess.run(accuracy, feed_dict = test_feed)
        print("validation accuracy using average model is %g" % (test_acc))

最终程序的调用打包：

def main(argv = None):
    mnist = input_data.read_data_sets("C:/Users/14981/Desktop/Deep Learning/", one_hot = True)
    train(mnist)


if __name__ == '__main__':
    tf.app.run()

二、变量管理

变量可以通过创建时赋予的名字来使用变量，这应用在网络结构复杂时候的情况。

通过使用tf.get variable创建或获取变量

# 使用get_variable函数创建名称为"v"的变量，初始化为给定常量
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
v = tf.Variable(tf.constant(1.0, shape = [1]), name = "v")

这里tf.get_variable的变量名称是必填参数，如果有重名变量，程序会报错，创建失败：

# 该段代码出现报错，因为重复创建了名称为"v"的变量
v = tf.get_variable("v", shape = [1], initializer = tf.constant_initializer(1.0))
w = tf.get_variable("v", shape = [1,2], initializer = tf.constant_initializer(2.0))

那么现在问题是我们需要获取已经创建变量，这就需要通过tf.variable_scope函数生成上下文管理器。

下述代码表示了这个过程，如果tf.variable_scope如果reuse = False，tf.get_variable将创建新的变量，如果reuse = True，该函数将会直接获取已经创建的变量。

# 在foo的命名空间内创建名字为v的变量
with tf.variable_scope("foo"):
    v = tf.get_variable(
    "v", shape = [1], initializer = tf.constant_initializer(1.0))

# 此时命名空间foo已存在名字为v的变量，因此下面代码会报错
with tf.variable_scope("foo"):
    v = tf.get_variable("v", [1])

# reuse设置为True，tf.get_variable函数将直接获取已声明变量
with tf.variable_scope("foo", reuse = True):
    v1 = tf.get_variable("v", [1])
    print(v == v1)

# 该段代码报错，因为bar空间中没有设置变量v
with tf.variable_scope("bar", reuse = True):
    v = tf.get_variable("v", [1])

同时tf.variable_scope可以嵌套的：

with tf.variable_scope("root"):
    # 获取root命名空间的reuse
    print(tf.get_variable_scope().reuse)
    with tf.variable_scope("foo", reuse = True):
        # 获取foo命名空间的reuse
        print(tf.get_variable_scope().reuse)
        with tf.variable_scope("bar"):
            # 由于没有指定bar命名空间的reuse，所以与外面一层的reuse一致
            print(tf.get_variable_scope().reuse)
    print(tf.get_variable_scope().reuse)

通过tf.variable_scope创建命名空间，可以用来管理变量名称:

v1 = tf.get_variable("v", [1])
print(v1.name) # 输出v:0，v表示了变量名称，0表示v1生成名称为v变量的第一个运算结果

with tf.variable_scope("foo"):
    v2 = tf.get_variable("v", [1])
    print(v2.name) # 输出foo/v:0，与之前相似，只不过foo/v表示了在命名空间foo下的变量v

with tf.variable_scope("foo"):
    with tf.variable_scope("bar"):
        v3 = tf.get_variable("v", [1])
        print(v3.name) # 输出foo/bar/v:0
    v4 = tf.get_variable("v1",[1])
    print(v4.name) # 输出foo/v1:0
with tf.variable_scope("",reuse = True):
    v5 = tf.get_variable("foo/bar/v", [1])
    print(v5 == v3)
    v6 = tf.get_variable("foo/v1", [1])
    print(v6 == v4)

使用tf.reset_default_graph()：重置默认图

三、模型持久化

Tensorflow通过下述代码保存计算图

v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2

init = tf.global_variables_initializer()

# 声明tf.train.Saver保存模型
saver = tf.train.Saver()

with tf.Session() as sess:
    sess.run(init)
    saver.save(sess, "./model.ckpt")

此时文件目录下会出现四个文件：

model.ckpt.meta，model.ckpt.index，model.ckpt.data-00000-of-00001(此文件名不一定)，checkpoint

.meta存储了网络结构，.index和.data保存了训练好的参数，checkpoint记录最新的模型。

通过下述代码恢复模型：

with tf.Session() as sess:
    # 加载持久化的图
    saver = tf.train.import_meta_graph("./model.ckpt.meta")

    # 检查最新的保存点并恢复
    saver.restore(sess, tf.train.latest_checkpoint("./"))

当然也可以直接通过下面代码恢复模型

with tf.Session() as sess:
    saver.restore(sess, "./model.ckpt")

之前说到滑动平均模型，由于每个变量都对应存在一个影子变量，所以在保存模型的时候也要考虑。下面是保存滑动平均模型样例:

v = tf.Variable(0, dtype = tf.float32, name = "v")

# 未声明滑动平均模型，因此只有一个变量v
# 输出v:0
for variables in tf.global_variables():
    print(variables.name)
    
ema = tf.train.ExponentialMovingAverage(0.99)
maintain_averages_op = ema.apply(tf.global_variables())

# 声明滑动平均模型后，自动为变量v生成一个影子变量
# 输出 v:0 和 v/ExponentialMovingAverage:0
for variables in tf.global_variables():
    print(variables.name)
    
saver = tf.train.Saver()
init = tf.global_variables_initializer()
with tf.Session() as sess:
    sess.run(init)
    sess.run(tf.assign(v, 10))
    sess.run(maintain_averages_op)
    # 保存变量v和其影子变量
    saver.save(sess, "./model.ckpt")
    print(sess.run([v, ema.average(v)]))

之后读取模型参数，这里我们直接将保存的影子变量换成

v = tf.Variable(0, dtype = tf.float32, name = "v")
# 把保存的v的影子变量赋给v
saver = tf.train.Saver({"v/ExponentialMovingAverage": v})
with tf.Session() as sess:
    saver.restore(sess, "./model.ckpt")
    print(sess.run(v))

也可以通过.variables_to_restore()，可以生成变量与其对应影子变量的字典

v = tf.Variable(0, dtype = tf.float32, name = "v")
ema = tf.train.ExponentialMovingAverage(0.99)

# ema.variables_to_restore相当于直接生成了上述代码提供的字典
print(ema.variables_to_restore())
saver = tf.train.Saver(ema.variables_to_restore())


with tf.Session() as sess:
    sess.run(tf.global_variables_initializer())
    saver.save(sess, "./model.ckpt")

读取模型参数

with tf.Session() as sess:
    saver.restore(sess, "./model.ckpt")
    print(sess.run(v))

上述所使用的模型持久化，由于记录了程序运行所需要的全部信息，对于变量初始化信息，模型保存的辅助信息都有所记录，而有时实际使用的时候，只需要通过神经网络前向传播到输出层输出结果。Tensorflow提供了convert_variables_to_constants函数，该函数可以将计算图中的变量及取值通过常量方式保存。

import tensorflow as tf
from tensorflow.python.framework import graph_util

v1 = tf.Variable(tf.constant(1.0, shape = [1]), name = "v1")
v2 = tf.Variable(tf.constant(2.0, shape = [1]), name = "v2")
result = v1 + v2
init = tf.global_variables_initializer()

with tf.Session() as sess:
    sess.run(init)
    
    # 导出当前计算图的GraphDef部分
    graph_def = tf.get_default_graph().as_graph_def()

    # 将图中的变量及取值转化成常量，同时将图中不必要的节点去掉（例如变量初始化操作）
    output_graph_def = graph_util.convert_variables_to_constants(sess, graph_def, ['add'])
    
    # 将导出的模型存入文件
    with tf.gfile.GFile("./combined_model.pb", "wb") as f:
        f.write(output_graph_def.SerializeToString())

读取模型

import tensorflow as tf
from tensorflow.python.platform import gfile
with tf.Session() as sess:
    model_filename = "./combined_model.pb"
    
    # 读取保存的模型文件，并将文件解析成对应的GraphDef Protocol Buffer
    with gfile.FastGFile(model_filename, 'rb') as f:
        graph_def = tf.GraphDef()
        graph_def.ParseFromString(f.read())
        
    # 将graph_def保存的图加载到当前的图中国，return_element = ["add:0"]给出了返回张量的
    # 名称，在保存的时候给出的是计算节点的名称，所以是add，而在加载的时候是张量的名称
    # 所以是add:0
    result = tf.import_graph_def(graph_def, return_elements = ["add:0"])
    print(sess.run(result))

四、mnist最佳程序样例

最初的mnist程序样例没有涉及保存模型信息。

下面给出模型训练的过程程序：

import tensorflow as tf
import os
from tensorflow.examples.tutorials.mnist import input_data

# 神经网络结构参数
input_node = 784
output_node = 10
layer1_node = 500

# 生成weight
def get_weight_variable(shape, regularizer):
    weights = tf.get_variable(
    "weights", shape,
    initializer = tf.truncated_normal_initializer(stddev = 0.1))
    
    if regularizer != None:
        tf.add_to_collection("losses", regularizer(weights))
        
    return weights

# 神经网络正向传播
def inference(input_tensor, regularizer):
    with tf.variable_scope('layer1'):
        weights = get_weight_variable(
            [input_node, layer1_node], regularizer)
        biases = tf.get_variable(
            "biases", [layer1_node],
            initializer = tf.constant_initializer(0.0))
        layer1 = tf.nn.relu(tf.matmul(input_tensor, weights) + biases)
    
    with tf.variable_scope('layer2'):
        weights = get_weight_variable(
            [layer1_node, output_node], regularizer)
        biases = tf.get_variable(
            "biases", [output_node],
            initializer = tf.constant_initializer(0.0))
        layer2 = tf.matmul(layer1, weights) + biases
    return layer2

# 配置神经网络参数
batch_size = 100
learning_rate_base = 0.8
learning_rate_decay = 0.99
regularaztion_rate = 0.0001
training_steps = 30000
moving_average_decay = 0.99

# 模型保存路径和名称
model_save_path = "./"
model_name = "model.ckpt"

def train(mnist):
    x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
    y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
    
    regularizer = tf.contrib.layers.l2_regularizer(regularaztion_rate)
    
    y_hat = inference(x, regularizer)
    
    # 滑动平均模型
    global_step = tf.Variable(0, trainable = False)
    variable_averages = tf.train.ExponentialMovingAverage(moving_average_decay, global_step)
    variables_averages_op = variable_averages.apply(tf.trainable_variables())
    
    # 损失函数
    cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
    logits = y_hat, labels = tf.argmax(y, 1))
    cross_entropy_mean = tf.reduce_mean(cross_entropy)
    # 损失函数加入正则化
    loss = cross_entropy_mean + tf.add_n(tf.get_collection('losses'))
    
    # 学习率衰减
    learning_rate = tf.train.exponential_decay(
        learning_rate_base,
        global_step,
        mnist.train.num_examples / batch_size,
        learning_rate_decay)
    train_step = tf.train.GradientDescentOptimizer(learning_rate).minimize(loss, global_step = global_step)
    
    # 滑动平均模型反向传播
    with tf.control_dependencies([train_step, variables_averages_op]):
        train_op = tf.no_op(name = 'train')
    
    # 初始化变量
    init = tf.global_variables_initializer()
    
    # 初始化持久化类
    saver = tf.train.Saver()
    
    # 会话
    with tf.Session() as sess:
        sess.run(init)
        
        for i in range(training_steps):
            xs, ys = mnist.train.next_batch(batch_size)
            _, loss_value, _ = sess.run([train_op, loss, global_step],
                                       feed_dict = {x:xs, y:ys})
            
            if i % 1000 == 0:
                print("After %d training step, loss on training batch is %g" %(i, loss_value))
                saver.save(sess, os.path.join(model_save_path, model_name), global_step = global_step)

mnist = input_data.read_data_sets("./", one_hot = True)
train(mnist)

上述代码表示了整个训练过程，下面提供计算测试集准确率代码，该代码每10秒读取计算图，验证测试集。

def evaluate(mnist):
    with tf.Graph().as_default() as g:
        x = tf.placeholder(tf.float32, [None, input_node], name = 'x-input')
        y = tf.placeholder(tf.float32, [None, output_node], name = 'y-input')
        validate_feed = {x:mnist.validation.images, y:mnist.validation.labels}
        
        # 计算前向传播结果
        y_hat = inference(x, None)
        
        # 计算测试集正确率
        correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_hat, 1))
        accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
        
        # 定义滑动平均类
        variable_averages = tf.train.ExponentialMovingAverage(
            moving_average_decay)
        
        # 直接生成变量与其对应的影子变量的字典
        variables_to_restore = variable_averages.variables_to_restore()
        saver = tf.train.Saver(variables_to_restore)
        
        while True:
            with tf.Session() as sess:
                # tf.train.get_checkpoint_state函数会通过checkpoint文件自动找到
                # 目录中最新模型的文件名
                ckpt = tf.train.get_checkpoint_state(
                    model_save_path)
                if ckpt and ckpt.model_checkpoint_path:
                    # 加载模型
                    saver.restore(sess, ckpt.model_checkpoint_path)
                    # 通过文件名得到保存模型保存时迭代的轮数
                    global_step = ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]
                    
                    accuracy_score = sess.run(accuracy, feed_dict = validate_feed)
                    
                    print("After %s training step, loss on training batch is %g" % (global_step, accuracy_score))
                else:
                    print("No checkpoint file found")
                    return
            time.sleep(eval_interval_secs)

mnist = input_data.read_data_sets("./", one_hot = True)
evaluate(mnist)

Tensorflow框架（三）

猜你喜欢