tensorflow实战第6章：卷积神经网络

Today the CSDN blog occurs a bug that i can not write in Chinese(copy is OK),i do not know whether the sougou input has conflicts with CSDN.

Let's begin!

The training process has been divided into two py-files:

The first one is mnist_inference.py,it's a method which output the tensor with the input tensor from the last full-collection layer,'train' is a bool param,if set to True,some datas will be dropouted from the training datas.regularizer here use L2 regularizer,this two params are all helpful to avoid overfitting problems.

the CNN contrains two Conv-layers,two pool-layers and two full-collectionlayers,shape of input tensor,first conv-layer,first pool-layer,second conv-layer,second pool-layer,first full-collection layer,second full-collection layer is 28*28*1,5*5*1*32--28*28*32(input and output shape),14*14*32(output shape),5*5*32*64--14*14*64(input and output shape),batch_size*7*7*64--[batch_size,7*7*64](here the output was reshaped from 4-dimension to 2-dimension,the other layers have the batch_size as the first dimension in fact,i ignore it),[7*7*64,node_size]--[batch_size,node_size](input and output shape),[node_size,num_of_labels]--[batch_size,num_of_labels](input and output shape)

Now we know,here the conv-layers just change the fourth params(32 and 64) and don't change the size of graphs(28*28),and the pool-layers just change the size of graphs(here to half length and half width),by softmax the last output(batch_size*num_of_labels) we can get the Probability distribution and then through reduce_sum and recude_mean we get the cross_entropy,add regularizer loss to cross_entropy is the final loss.

Second one is mnist_train.py,used for training.

Another part is mnist_test.py,used for testing and predicting.

It's better to use AdamOptimizer to optimize the params,and we use the MovingAverage to save shadow_variables of the params,this is good for testing and predicting,save the model so that we can use it again if needed.

--------------------------------------------------------------------------------------

1,mnist_inference.py

import tensorflow as tf
input_node=784
output_node=10
image_size=28
num_channels=1
num_labels=10
conv1_deep=32
conv1_size=5
conv2_deep=64
conv2_size=5
fc_size=512
#前向传播过程
#卷积层权重使用的是truncated_normal_initializer初始化，偏置项初始为0
#而全连接层偏置项初始为0.1，为什么？
def inference(input_tensor,train,regularizer): #train用于区分验证和训练
   with tf.variable_scope('layer1-conv1'):
      conv1_weights=tf.get_variable('weight',[conv1_size,conv1_size,num_channels,conv1_deep],initializer=tf.truncated_normal_initializer(stddev=0.1))
       conv1_biases=tf.get_variable('bias',[conv1_deep],initializer=tf.constant_initializer(0.0))
       conv1=tf.nn.conv2d(input_tensor,conv1_weights,strides=[1,1,1,1],padding='SAME')
       relu1=tf.nn.relu(tf.nn.bias_add(conv1,conv1_biases))
   with tf.variable_scope('layer2-poll1'):
       pool1=tf.nn.max_pool(relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
   with tf.variable_scope('layer3-conv2'):
       conv2_weights=tf.get_variable('weight',[conv2_size,conv2_size,conv1_deep,conv2_deep],initializer=tf.truncated_normal_initializer(stddev=0.1))
       conv2_biases=tf.get_variable('bias',[conv2_deep],initializer=tf.constant_initializer(0.0))
       conv2=tf.nn.conv2d(pool1,conv2_weights,strides=[1,1,1,1],padding='SAME')
       relu2=tf.nn.relu(tf.nn.bias_add(conv2,conv2_biases))
   with tf.variable_scope('layer4-pool2'):
       pool2=tf.nn.max_pool(relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')
       #将第四层池化层的输出转化为第五层全连接的输入格式
       pool_shape=pool2.get_shape().as_list()#去掉as_list()也行
       nodes=pool_shape[1]*pool_shape[2]*pool_shape[3] #第一个为batch的size
       #将第四层输出转为一个batch的向量
       reshaped=tf.reshape(pool2,[-1,nodes])#pool_shape[0]，改动第一个为-1后可以运行了
       #print(pool_shape[0])准备试试看
   with tf.variable_scope('layer5-fc1'):
       fc1_weights=tf.get_variable('weight',[nodes,fc_size],initializer=tf.truncated_normal_initializer(stddev=0.1))
       if regularizer!=None:
           tf.add_to_collection('losses',regularizer(fc1_weights))
       fc1_biases=tf.get_variable('bias',[fc_size],initializer=tf.constant_initializer(0.1))
       fc1=tf.nn.relu(tf.matmul(reshaped,fc1_weights)+fc1_biases)
       if train:
           fc1=tf.nn.dropout(fc1,0.5) #dropout一般在全连接层
   with tf.variable_scope('layer6-fc2'):
       fc2_weights=tf.get_variable('weight',[fc_size,num_labels],initializer=tf.truncated_normal_initializer(stddev=0.1))
       if regularizer!=None:
           tf.add_to_collection('losses',regularizer(fc2_weights))
       fc2_biases=tf.get_variable('bias',[num_labels],initializer=tf.constant_initializer(0.1))
       logit=tf.matmul(fc1,fc2_weights)+fc2_biases

return logit

-------------------------------------------------------------------------------------

2,mnist_train.py

import os
import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference

batch_size=100
#learning_rate_base=0.8 #for GradientDescentOptimizer
#learning_rate_decay=0.99
regularaztion_rate=0.0001
training_steps=20000
moving_average_decay=0.99
model_save_path='/tmp/my_LeNet/model_1/' #folder

model_name='model.ckpt' #filename

def train(mnist):
   x=tf.placeholder(tf.float32,[None,mnist_inference.input_node],name='x-input')
   y_=tf.placeholder(tf.float32,[None,mnist_inference.output_node],name='y-input')
   x_reshaped=tf.reshape(x,[-1,mnist_inference.image_size,mnist_inference.image_size,mnist_inference.num_channels])
   regularizer=tf.contrib.layers.l2_regularizer(regularaztion_rate)
   y=mnist_inference.inference(x_reshaped,True,regularizer)
   global_step=tf.Variable(0,trainable=False)
   variable_averages=tf.train.ExponentialMovingAverage(moving_average_decay,global_step)
   variable_averages_op=variable_averages.apply(tf.trainable_variables())
   cross_entropy=tf.nn.sparse_softmax_cross_entropy_with_logits(logits=y,labels=tf.argmax(y_,1))#tf.argmax(y_,1)按行返回最大值的下标
   cross_entropy_mean=tf.reduce_mean(cross_entropy)
   loss=cross_entropy_mean+tf.add_n(tf.get_collection('losses'))#将集合列表元素相加后，再与cross_entropy_mean相加
        train_step=tf.train.AdamOptimizer(1e-3).minimize(loss,global_step=global_step)
   with tf.control_dependencies([train_step,variable_averages_op]):
       train_op=tf.no_op(name='train')
   correct_prediction = tf.equal(tf.argmax(y, 1), tf.argmax(y_, 1))
   accuracy = tf.reduce_mean(tf.cast(correct_prediction, tf.float32))
   saver=tf.train.Saver()
   with tf.Session() as sess:
       tf.global_variables_initializer().run()
       for i in range(training_steps):
           xs,ys=mnist.train.next_batch(batch_size)
           #reshaped_xs=np.reshape(xs,(batch_size,mnist_inference.image_size,mnist_inference.image_size,mnist_inference.num_channels))
           _,loss_value,accuracy_value,step=sess.run([train_op,loss,accuracy,global_step],feed_dict={x:xs,y_:ys})
           if i%1000==0:
               print('After %d steps,loss on training batch is %g accuracy is %g'%(step,loss_value,accuracy_value))
               saver.save(sess,os.path.join(model_save_path,model_name),global_step=global_step)
def main(argv=None):
   mnist=input_data.read_data_sets('/newSoftware/Sublime Text 3/Data/myCode/MNIST-data/',one_hot=True)#one_hot表示只有一个正确值
   train(mnist)
if __name__=='__main__':

tf.app.run()

---------------------------------------------------------------------------------------

3,mnist_test.py

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import mnist_inference

import mnist_train

def evaluate(mnist):
   with tf.Graph().as_default() as g:
       x=tf.placeholder(tf.float32,[None,mnist_inference.input_node],name='x-input')
       y_=tf.placeholder(tf.float32,[None,mnist_inference.output_node],name='y-input')
       x_=tf.reshape(x,[-1,mnist_inference.image_size,mnist_inference.image_size,mnist_inference.num_channels])
       y=mnist_inference.inference(x_,False,None)
       correct_prediction=tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
       accuracy=tf.reduce_mean(tf.cast(correct_prediction,tf.float32))
       variable_averages=tf.train.ExponentialMovingAverage(mnist_train.moving_average_decay)
       #通过以下操作提取变量的滑动平均值
       variables_to_restore=variable_averages.variables_to_restore()
       saver=tf.train.Saver(variables_to_restore)
       with tf.Session() as sess:
           ckpt=tf.train.get_checkpoint_state(mnist_train.model_save_path)
           if ckpt and ckpt.model_checkpoint_path:
               saver.restore(sess,ckpt.model_checkpoint_path)
               global_step=ckpt.model_checkpoint_path.split('/')[-1].split('-')[-1]#最新模型
               #这里将所有验证数据输入时瞬间出现卡死现象，分批次验证？
               accuracy_score=sess.run(accuracy,feed_dict={x:mnist.validation.images,y_:mnist.validation.labels})
               print("after %s training steps,validation accuracy=%g"%(global_step,accuracy_score))
           else:
               print('no checkpoint file found')
               return
def main(argv=None):
   mnist=input_data.read_data_sets('/newSoftware/Sublime Text 3/Data/myCode/MNIST-data/',one_hot=True)#one_hot表示只有一个正确值
   evaluate(mnist)
if __name__ == '__main__':

tf.app.run()

---------------------------------------------------------------------------------------

Due to the words are copied from my code,it's not convenient for reading,and i can not explain more.Help another day i can write in Chinese...

tensorflow实战第6章：卷积神经网络

猜你喜欢