The tensorflow implementation of VGGNet of the classic convolutional neural network - Tensorflow actual combat

VGGNet network structure


The code below is based on VGG-16

from datetime import datetime
import math
import time
import tensorflow as tf


# Define the parameter initialization function Xavier initialization method has been mentioned in 4.1 Autoencoder
# Reference: https://blog.csdn.net/li_haiyu/article/details/80009430
# def xavier_init( fan_in, fan_out, constant = 1 ):
#     low  = -constant * np.sqrt( 6.0 / ( fan_in + fan_out ) )
#     high =  constant * np.sqrt( 6.0 / ( fan_in + fan_out ) )
#     return tf.random_uniform((fan_in, fan_out), minval=low, maxval=high, dtype=tf.float32 )


# Create a function conv_op to create a convolutional layer and store the parameters of this layer in the parameter list
# input_op input tensor, kh: kernel hight, kw: kernel width, n_out: the number of convolution kernels, the height of the dh step, the width of the dw step, p is the parameter list
def conv_op(input_op,name,kh,kw,n_out,dh,dw,p):
    # a.get_shape() gets the dimension of the tensor, the dimension of this function is returned as a tuple, list/tuple.[-1] means to get the last number, the number of image channels is obtained here
    # Reference: https://blog.csdn.net/li_haiyu/article/details/80063842
    n_in = input_op.get_shape()[-1].value
    # tf.name_scope mainly implements parameter naming management
    # Reference: https://blog.csdn.net/Li_haiyu/article/details/80119340
    with tf.name_scope(name) as scope:
        # shape = [height, width, number of input channels, number of output channels]
        kernel = tf.get_variable(scope+"W",shape=[kh,kw,n_in,n_out],dtype=tf.float32,
                                 initializer=tf.contrib.layers.xavier_initializer_conv2d())
        # Perform convolution operation on input_op
        conv = tf.nn.conv2d(input_op,kernel,(1,dh,dw,1),padding = "SAME")
        # biases use tf.constant to assign 0, and then use tf. Variable turns it into a trainable parameter
        bias_init_val = tf.constant(0.0,shape=[n_out],dtype=tf.float32)
        biases = tf.Variable(bias_init_val,trainable=True,name='b')
        z = tf.nn.bias_add(conv,biases)
        activation = tf.nn.relu(z,name = scope)
        p += [kernel,biases]
        return activation

# Define the creation function fc_op of the fully connected layer
def fc_op(input_op,name,n_out,p):
    n_in = input_op.get_shape()[-1].value

    with tf.name_scope(name) as scope:
        # The parameter dimensions of the full link layer are only two, the first is the number of input channels n_in, and the second is the number of output channels n_out
        kernel = tf.get_variable(scope+"w",shape=[n_in,n_out],dtype=tf.float32,
                                initializer=tf.contrib.layers.xavier_initializer())
        # baises are no longer initialized to 0, but given a smaller value of 0.1, in order to avoid dead neuron
        biases = tf.Variable(tf.constant(0.1,shape=[n_out],dtype=tf.float32),name = 'b')
        activation = tf.nn.relu_layer(input_op,kernel,biases,name=scope)
        p += [kernel,biases]
        return activation

# Define the creation function mpool_op of the maximum pooling layer, the pooling size is kh*kw and the step size is dh*dw
def mpool_op(input_op,name,kh,kw,dh,dw):
    return tf.nn.max_pool(input_op,ksize=[1,kh,kw,1],strides=[1,dh,dw,1],padding = "SAME",name = name)

# Create the network structure of VGGNet-16
"""Description: 6 parts, the first five parts are convolutional networks, and the last part is a fully linked network
The inference we define has two inputs input_op and keep_prob, and keep_prob is used to control droput's retention rate of neurons
The input of the first convolutional layer is input_op whose size is 224*224*3, and the output size is 224*224*64
The input and output size of the second convolutional layer is 224*224*64, and the maximum pooling layer of the convolutional layer is a standard 2*2 maximum pooling, and the output is 112*112*64"""
def inference_op(input_op,keep_prob):
    p = []
    '''The first part of the convolution input is 224*224*3'''
    # First part convolution
    # There are two layers of convolutional layers, both of which have 64 convolution kernels, the size is 3*3, and the stride is 1*1
    conv1_1 = conv_op(input_op,name='conv1_1',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
    conv1_2 = conv_op(conv1_1,name='conv1_2',kh=3,kw=3,n_out=64,dh=1,dw=1,p=p)
    # After the first part of the convolution, maxpooling is performed with a step size of 2*2
    pool1 = mpool_op(conv1_2,name = 'pool1',kh=2,kw=2,dh=2,dw=2)
    '''The first part of the convolution output is 112*112*64'''

    # Second part convolution
    # There are a total of two convolution layers, both with 128 convolution kernels, the size is 3*3, and the stride is 1*1
    conv2_1 = conv_op(pool1, name='conv2_1', kh=3, kw=3, n_out=128, dh=1, dw=1, p=p)
    conv2_2 = conv_op(conv2_1, name='conv2_2', kh=3, kw=3, n_out=128, dh=1, dw=1, p=p)
    # After the second part of the convolution, maxpooling is performed, and the step size is 2*2
    pool2 = mpool_op(conv2_2, name='pool2', kh=2, kw=2,dh=2,dw=2)
    '''Output 56*56*128 after max pooling'''

    # The third part of the convolution
    # There are a total of three convolutional layers, all with 256 convolution kernels, the size is 3*3, and the stride is 1*1
    conv3_1 = conv_op(pool2, name='conv3_1', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_2 = conv_op(conv3_1, name='conv3_2', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    conv3_3 = conv_op(conv3_2, name='conv3_3', kh=3, kw=3, n_out=256, dh=1, dw=1, p=p)
    # After the third part of the convolution, maxpooling is performed, and the step size is 2*2
    pool3 = mpool_op(conv3_3, name='pool3', kh=2, kw=2,dh=2,dw=2 )
    '''Output 28*28*256 after max pooling'''

    # Fourth part convolution
    # There are a total of three convolution layers, all with 512 convolution kernels, the size is 3*3, and the stride is 1*1
    conv4_1 = conv_op(pool3, name='conv4_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_2 = conv_op(conv4_1, name='conv4_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv4_3 = conv_op(conv4_2, name='conv4_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    # After the third part of the convolution, maxpooling is performed, and the step size is 2*2
    pool4 = mpool_op(conv4_3, name='pool4', kh=2, kw=2, dh=2, dw=2)
    '''Output 14*14*512 after max pooling'''

    # The fifth part of the convolution
    # There are three convolution layers in total, or 512 convolution kernels, the size is 3*3, and the stride is 1*1
    conv5_1 = conv_op(pool4, name='conv5_1', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_2 = conv_op(conv5_1, name='conv5_2', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    conv5_3 = conv_op(conv5_2, name='conv5_3', kh=3, kw=3, n_out=512, dh=1, dw=1, p=p)
    # After the third part of the convolution, maxpooling is performed, and the step size is 2*2
    pool5 = mpool_op(conv5_3, name='pool5', kh=2, kw=2, dh=2, dw=2)
    '''Output 7*7*512 after max pooling'''

    '''Maybe you have discovered that each convolutional network of VGG-16 will reduce the side length of the image by half, but the number of convolutional output channels is doubled,
    In this way, the image area is reduced to 1/4, and the output channel is doubled, so the number of output channels is increased to 152, but the image is reduced to 7*7''' through max pooling

    '''Full link layer starts'''
    '''We flatten the output of the fifth segment of convolution, and use the tf.reshape function to convert the sample Huawei into a one-dimensional vector with a length of 7*7*512 = 25088'''
    shp = pool5.get_shape()

    # Multiply the values ​​of the three dimensions
    flattened_shape = shp[1].value*shp[2].value*shp[3].value
    # Finally, reshape into a row, a total of 25088 elements
    resh1 = tf.reshape(pool5,[-1,flattened_shape],name='resh1')

    # Then link a fully connected layer with 4096 hidden layer nodes, the activation function is ReLu, and then link a dropout layer, with a retention rate of 0.5 during node training and 1.0 during prediction
    '''keep_prob should be less than 1 during training, used to create randomness and prevent overfitting, keep_prob should be equal to 1 during testing, that is, use all features to predict the class of the sample'''
    fc6 = fc_op(resh1,name='fc6',n_out=4096,p=p)
    fc6_drop = tf.nn.dropout(fc6,keep_prob,name='fc6_drop')

    fc7 = fc_op(fc6_drop,name='fc7',n_out=4096,p=p)
    fc7_drop = tf.nn.dropout(fc7,keep_prob,name='fc7_drop')

    # Finally link the fully connected layer of 1000 output nodes, and use softmax to process to get the output classification probability,
    # Here use tf.argmax to find the category with the largest output probability
    # Finally return fc8, softmax, predictions and parameter list p together
    fc8 = fc_op(fc7_drop,name='fc8',n_out=1000,p = p)
    softmax = tf.nn.softmax(fc8)
    predictions = tf.argmax(softmax,1)
    return predictions,softmax,fc8,p

'''Our evaluation function time_tensorflow_run() is very similar to the previous Alexnet, with only one difference:
We added fee_dict to the session.run() method, which is convenient to pass in keep_drop later to control the retention ratio of the Dropout layer'''
def time_tensorflow_run(session,target,feed,info_string):
    num_steps_burn_in = 10
    total_duration = 0.0
    total_duration_squared = 0.0
    for i in range(num_batches + num_steps_burn_in):
        start_time = time.time()
        _ = session.run(target,feed_dict = feed)
        duration = time.time()- start_time
        if i>= num_steps_burn_in:
            if not i%10:
                print('%s:step %d, duration = %.3f'%(datetime.now(),i - num_steps_burn_in,duration))
            total_duration += duration
            total_duration_squared += duration*duration
    mn = total_duration/num_batches
    vr = total_duration_squared/num_batches-mn*mn
    sd = match.sqrt(vr)
    print('%s: %s across %d steps,%.3f +/- %.3f sec/batch'%(datetime.now(),info_string,num_batches,mn,sd))

'''The main function run_benchmark is defined below, our goal is still to evaluate the computing performance of forward and backward'''
def run_benchmark():
    with tf.Graph().as_default():
        image_size = 224
        images = tf.Variable(tf.truncated_normal([batch_size,image_size,image_size,3],dtype=tf.float32,stddev=1e-1))

        # Next, create the placeholder of keep_prob, and call the inference_op function to build the network structure of VGG-16 to obtain fc8, predictions, softmax and parameter list p
        keep_prob = tf.placeholder(tf.float32)
        predictions,softmax,fc8,p = inference_op(images,keep_prob)

        # Then create Session and initialize global parameters
        init = tf.global_variables_initializer()
        sex = tf.Session ()
        sess.run(init)

        # keep_prob is set to 1.0, use time_tensorflow_run() to evaluate the forward operation time, and then calculate the l2 loss of fc8 output by the last fully connected layer of VGG-16,
        # And use tf.gradients to find the gradient of all model parameters relative to this loss, and finally use time_tensorflow_run to evaluate the backward operation time, here target
        # For the gradient operation grad, keep_prob is 0.5
        time_tensorflow_run(sess,predictions,{keep_prob:1.0},"Forward")
        objective = tf.nn.l2.loss(fc8)
        grad = tf.gradients(objective,p)
        time_tensorflow_run(sess,grad,{keep_prob:0.5},"Forward-backward")

batch_size = 32
num_batches = 100
run_benchmark()


Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=325379503&siteId=291194637