VGGNet network structure
The code below is based on VGG-16
from datetime import datetime import math import time import tensorflow as tf # Define the parameter initialization function Xavier initialization method has been mentioned in 4.1 Autoencoder # Reference: https://blog.csdn.net/li_haiyu/article/details/80009430 # def xavier_init( fan_in, fan_out, constant = 1 ): # low = -constant * np.sqrt( 6.0 / ( fan_in + fan_out ) ) # high = constant * np.sqrt( 6.0 / ( fan_in + fan_out ) ) # return tf.random_uniform((fan_in, fan_out), minval=low, maxval=high, dtype=tf.float32 ) # Create a function conv_op to create a convolutional layer and store the parameters of this layer in the parameter list # input_op input tensor, kh: kernel hight, kw: kernel width, n_out: the number of convolution kernels, the height of the dh step, the width of the dw step, p is the parameter list def conv_op(input_op,name,kh,kw,n_out,dh,dw,p): # a.get_shape() gets the dimension of the tensor, the dimension of this function is returned as a tuple, list/tuple.[-1] means to get the last number, the number of image channels is obtained here # Reference: https://blog.csdn.net/li_haiyu/article/details/80063842 n_in = input_op.get_shape()[-1].value # tf.name_scope mainly implements parameter naming management # Reference: https://blog.csdn.net/Li_haiyu/article/details/80119340 with tf.name_scope(name) as scope: # shape = [height, width, number of input channels, number of output channels] kernel = tf.get_variable(scope+"W",shape=[kh,kw,n_in,n_out],dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer_conv2d()) # Perform convolution operation on input_op conv = tf.nn.conv2d(input_op,kernel,(1,dh,dw,1),padding = "SAME") # biases use tf.constant to assign 0, and then use tf. Variable turns it into a trainable parameter bias_init_val = tf.constant(0.0,shape=[n_out],dtype=tf.float32) biases = tf.Variable(bias_init_val,trainable=True,name='b') z = tf.nn.bias_add(conv,biases) activation = tf.nn.relu(z,name = scope) p += [kernel,biases] return activation # Define the creation function fc_op of the fully connected layer def fc_op(input_op,name,n_out,p): n_in = input_op.get_shape()[-1].value with tf.name_scope(name) as scope: # The parameter dimensions of the full link layer are only two, the first is the number of input channels n_in, and the second is the number of output channels n_out kernel = tf.get_variable(scope+"w",shape=[n_in,n_out],dtype=tf.float32, initializer=tf.contrib.layers.xavier_initializer()) # baises are no longer initialized to 0, but given a smaller value of 0.1, in order to avoid dead neuron biases = tf.Variable(tf.constant(0.1,shape=[n_out],dtype=tf.float32),name = 'b') activation = tf.nn.relu_layer(input_op,kernel,biases,name=scope) p += [kernel,biases] return activation # Define the creation function mpool_op of the maximum pooling layer, the pooling size is kh*kw and the step size is dh*dw def mpool_op(input_op,name,kh,kw,dh,dw): return tf.nn.max_pool(input_op,ksize=[1,kh,kw,1],strides=[1,dh,dw,1],padding = "SAME",name = name) # Create the network structure of VGGNet-16 """Description: 6 parts, the first five parts are convolutional networks, and the last part is a fully linked network The inference we define has two inputs input_op and keep_prob, and keep_prob is used to control droput's retention rate of neurons The input of the first convolutional layer is input_op whose size is 224*224*3, and the output size is 224*224*64 The input and output size of the second convolutional layer is 224*224*64, and the maximum pooling layer of the convolutional layer is a standard 2*2 maximum pooling, and the output is 112*112*64""" def inference_op(input_op,keep_prob): p = [] '''The first part of the convolution input is 224*224*3''' # First part convolution # There are two layers of convolutional layers, both of which have 64 convolution kernels, the size is 3*3, and the stride is 1*1 conv1_1 = conv_op(input_op,name='conv1_1',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p) conv1_2 = conv_op(conv1_1,name='conv1_2',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p) # After the first part of the convolution, maxpooling is performed with a step size of 2*2 pool1 = mpool_op(conv1_2,name = 'pool1',kh=2,kw=2,dh=2,dw=2) '''The first part of the convolution output is 112*112*64''' # Second part convolution # There are a total of two convolution layers, both with 128 convolution kernels, the size is 3*3, and the stride is 1*1 conv2_1 = conv_op(pool1, name='conv2_1', kh=3, kw=3, n_out=128, dh=1, dw=1, p=p) conv2_2 = conv_op(conv2_1, name='conv2_2', kh=3, kw=3, n_out=128, dh=1, dw=1, p=p) # After the second part of the convolution, maxpooling is performed, and the step size is 2*2 pool2 = mpool_op(conv2_2, name='pool2', kh=2, kw=2,dh=2,dw=2) '''Output 56*56*128 after max pooling''' # The third part of the convolution # There are a total of three convolutional layers, all with 256 convolution kernels, the size is 3*3, and the stride is 1*1 conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p) # After the third part of the convolution, maxpooling is performed, and the step size is 2*2 pool3 = mpool_op(conv3_3, name='pool3', kh=2, kw=2,dh=2,dw=2 ) '''Output 28*28*256 after max pooling''' # Fourth part convolution # There are a total of three convolution layers, all with 512 convolution kernels, the size is 3*3, and the stride is 1*1 conv4_1 = conv_op(pool3, name='conv4_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) # After the third part of the convolution, maxpooling is performed, and the step size is 2*2 pool4 = mpool_op(conv4_3, name='pool4', kh=2, kw=2, dh=2, dw=2) '''Output 14*14*512 after max pooling''' # The fifth part of the convolution # There are three convolution layers in total, or 512 convolution kernels, the size is 3*3, and the stride is 1*1 conv5_1 = conv_op(pool4, name='conv5_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv5_2 = conv_op(conv5_1, name='conv5_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p) # After the third part of the convolution, maxpooling is performed, and the step size is 2*2 pool5 = mpool_op(conv5_3, name='pool5', kh=2, kw=2, dh=2, dw=2) '''Output 7*7*512 after max pooling''' '''Maybe you have discovered that each convolutional network of VGG-16 will reduce the side length of the image by half, but the number of convolutional output channels is doubled, In this way, the image area is reduced to 1/4, and the output channel is doubled, so the number of output channels is increased to 152, but the image is reduced to 7*7''' through max pooling '''Full link layer starts''' '''We flatten the output of the fifth segment of convolution, and use the tf.reshape function to convert the sample Huawei into a one-dimensional vector with a length of 7*7*512 = 25088''' shp = pool5.get_shape() # Multiply the values of the three dimensions flattened_shape = shp[1].value*shp[2].value*shp[3].value # Finally, reshape into a row, a total of 25088 elements resh1 = tf.reshape(pool5,[-1,flattened_shape],name='resh1') # Then link a fully connected layer with 4096 hidden layer nodes, the activation function is ReLu, and then link a dropout layer, with a retention rate of 0.5 during node training and 1.0 during prediction '''keep_prob should be less than 1 during training, used to create randomness and prevent overfitting, keep_prob should be equal to 1 during testing, that is, use all features to predict the class of the sample''' fc6 = fc_op(resh1,name='fc6',n_out=4096,p=p) fc6_drop = tf.nn.dropout(fc6,keep_prob,name='fc6_drop') fc7 = fc_op(fc6_drop,name='fc7',n_out=4096,p=p) fc7_drop = tf.nn.dropout(fc7,keep_prob,name='fc7_drop') # Finally link the fully connected layer of 1000 output nodes, and use softmax to process to get the output classification probability, # Here use tf.argmax to find the category with the largest output probability # Finally return fc8, softmax, predictions and parameter list p together fc8 = fc_op(fc7_drop,name='fc8',n_out=1000,p = p) softmax = tf.nn.softmax(fc8) predictions = tf.argmax(softmax,1) return predictions,softmax,fc8,p '''Our evaluation function time_tensorflow_run() is very similar to the previous Alexnet, with only one difference: We added fee_dict to the session.run() method, which is convenient to pass in keep_drop later to control the retention ratio of the Dropout layer''' def time_tensorflow_run(session,target,feed,info_string): num_steps_burn_in = 10 total_duration = 0.0 total_duration_squared = 0.0 for i in range(num_batches + num_steps_burn_in): start_time = time.time() _ = session.run(target,feed_dict = feed) duration = time.time()- start_time if i>= num_steps_burn_in: if not i%10: print('%s:step %d, duration = %.3f'%(datetime.now(),i - num_steps_burn_in,duration)) total_duration += duration total_duration_squared += duration*duration mn = total_duration/num_batches vr = total_duration_squared/num_batches-mn*mn sd = match.sqrt(vr) print('%s: %s across %d steps,%.3f +/- %.3f sec/batch'%(datetime.now(),info_string,num_batches,mn,sd)) '''The main function run_benchmark is defined below, our goal is still to evaluate the computing performance of forward and backward''' def run_benchmark(): with tf.Graph().as_default(): image_size = 224 images = tf.Variable(tf.truncated_normal([batch_size,image_size,image_size,3],dtype=tf.float32,stddev=1e-1)) # Next, create the placeholder of keep_prob, and call the inference_op function to build the network structure of VGG-16 to obtain fc8, predictions, softmax and parameter list p keep_prob = tf.placeholder(tf.float32) predictions,softmax,fc8,p = inference_op(images,keep_prob) # Then create Session and initialize global parameters init = tf.global_variables_initializer() sex = tf.Session () sess.run(init) # keep_prob is set to 1.0, use time_tensorflow_run() to evaluate the forward operation time, and then calculate the l2 loss of fc8 output by the last fully connected layer of VGG-16, # And use tf.gradients to find the gradient of all model parameters relative to this loss, and finally use time_tensorflow_run to evaluate the backward operation time, here target # For the gradient operation grad, keep_prob is 0.5 time_tensorflow_run(sess,predictions,{keep_prob:1.0},"Forward") objective = tf.nn.l2.loss(fc8) grad = tf.gradients(objective,p) time_tensorflow_run(sess,grad,{keep_prob:0.5},"Forward-backward") batch_size = 32 num_batches = 100 run_benchmark()