tensorflow实现 Inception V3

架构

输入图像尺寸: 299x299x3 卷积1: 3x3/2

输入图像尺寸: 149x149x32 卷积2: 3x3/1

输入图像尺寸: 147x147x32 卷积3: 3x3/1

输入图像尺寸: 147x147x64 池化1: 3x3/2

输入图像尺寸: 73x73x64 卷积4: 3x3/1

输入图像尺寸: 71x71x80 卷积5: 3x3/2

输入图像尺寸: 35x35x192 卷积5: 3x3/1

Inception 模块组:

输入图像尺寸: 35x35x192,

3个Inception module (strides=1, 且Padding=SAME,所以输出大小不变,只是通道个数发生变化) 输出图像尺寸: 35x35x288,

5个Inception module , 输出图像尺寸: 17x17x768

3个Inception module, 输出图像尺寸: 8x8x1280

池化2:8x8 输出图像尺寸:1x1x2048

softmax 输出图像尺寸:1x1x1000

# 首先定义一个简单(lambda)的函数,来产生截断的正态分布
import tensorflow as tf
import tensorflow.contrib.slim as slim

trunce_normal = lambda stddev: tf.trucated_normal_initializer(0.0, stddev)
# lambda 函数
# g = lambda x:x+1
# 调用:g(1) -- > 2

由于inception中有很多参数值相同,因此用slim中的arg_scope来设置一些常用默认值。(卷积的激活函数、权重初始化方式、标准化方式等)

def inception_v3_arg_scope(weight_decay=0.00004, stddev=0.1, batch_norm_var_collction='moving_vars'):
    #  weight_decay :   学习率  stddev 权重初始化标准差
    #  batch_nrm_var_collction: 
    #  定义BN的一些参数
    batch_norm_params={
        'decay': 0.9997, # 
        'epsilon': 0.001,
        'updates_collections': tf.GrapKeys.UPDATE_OPS,
        'variables_colleaction':{
            'beta':None,
            'gamma':None,
            'moving_mean':[batch_norm_var_collection],
            'moving_variance':[batch_norm_var_collection],
        }

    }

    with slim.arg_scope([slim.conv2d, slim.fully_connected], weights_regularizer=slim.l2_regularizer(weight_decay)):
        with slim.arg_scope([slim.conv2d],
                           weights_initializer=trunce_normal(stddev),
                           activation_fn=tf.nn.relu,
                           normalizer_fn=slim.batch_norm,
                           normalizer_params=batch_norm_params) as sc:
            return sc # 事先定义好卷积需要的各种参数,后面就可以只需要一行就可以创建一个卷积层
# 接下来搭建InceptionV3的网络

def inception_v3_base(inputs, scope=None): # 输入299×299×3
    # scope: 包含了函数默认的参数环境
    end_point = {} # 用来保存某些关键节点供之后使用
    # tf.variable_scope(name_or_scope,default_name=None,values=None,
    #                   initializer=None,regularizer=None,caching_device=None,partitioner=None)
    with tf.variable_scope(scope, 'InceptionV3',[inputs]): 
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding="VALID"):
            # valid : (w-f)/s +1
            net = slim.conv2d(inputs, 32, [3,3], stride=2, scope='conv2d_1a_3x3') #  (299-3)/2+1=149  149×149×32
            net = slim.conv2d(net, 32, [3,3], scope='conv2d_2a_3x3') # (149-3)/1+1 = 147  147*147*32
            net = slim.conv2d(net, 64, [3,3], padding="SAME", scope='conv2d_2b_3x3') # 147/1  147*147*64
            net = slim.max_pool2d(net, [3,3], stride=2, scope="Maxpool_3d_3x3") # (147-3)/2+1=73 73*73*64
            net = slim.conv2d(net, 80, [3,3],scope='conv2d_3b_1x1') # (73-3)/1+1 =71 71*71*80
            net = slim.conv2d(net, 192, [3,3], stride=2, scope='conv2d_4a_3x3') # (71-3)/2+1=35 35*35*192
            net = sim.max_pool2d(net, [3,3], padding="SAME", scope='Maxpool_5a_3x3') # 35/1=35  35*35*192
            # 完成了前部的几个卷积层

    # 下面开始搭建Incepyion模块组。

    with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding="SAME"):
        # SAME: w/s

        # 第一个模块组中的第一个module
        with tf.variable_scope("Mixed_5b"): # 定义变量区域
            with tf.variable_scope('Branch_0'): # 第一个分支
                branch_0 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1') # 输入35×35×192, 输出 35×35×64
            with tf.variable_scope('Beanch_1'): # 第二个分支
                branch_1 = slim.conv2d(net, 48, [1, 1],scope='conv2d_0a_1x1') #
                branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='conv2d_0b_5x5') # 35/1 输出35×35×64
            with tf.variable_scope('Branch_2'): # 第三个分支
                branch_2 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='conv2d_0b_3x3') 
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='conv2d_0c_3x3') # 输出35×35×96
            with tf.variable_scope('Branch_3'): # 第四个分支
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') # 35*35*192
                branch_3 = slim.conv2d(branch_3, 32, [1, 1], scope='conv2d_0b_1x1') # 35*35*32

            # 进行输出在channel上的连接
            #  注: 新版的tf.concat的维度是放在前面,后面是要连接的tensor
            # 最终输出35×35×(64+64+96+32) = 35×35×256
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])

        #上面完成了第一个Inception模块组的中的第一个,接下来搭建第二个
        # 与第一个一样也是4个分支,唯一不同的是 最后一个分支由32个1x1换成64个
        with tf.variable_scope('Mixed_5c'):
            with tf.variable_scope('Branch_0'): # 第一个分支
                branch_0 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1') # 输入35×35×192, 输出 35×35×64
            with tf.variable_scope('Beanch_1'): # 第二个分支
                branch_1 = slim.conv2d(net, 48, [1, 1],scope='conv2d_0a_1x1') #
                branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='conv2d_0b_5x5') # 35/1 输出35×35×64
            with tf.variable_scope('Branch_2'): # 第三个分支
                branch_2 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='conv2d_0b_3x3') 
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='conv2d_0c_3x3') # 输出35×35×96
            with tf.variable_scope('Branch_3'): # 第四个分支
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') # 35*35*192
                branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='conv2d_0b_1x1') # 35*35*64
            # 最终输出35×35×(64+64+96+64) = 35×35×288
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])

        # 接下来搭建第三个
        # 与上面一个完全相同
        with tf.variable_scope('Mixed_5d'):
            with tf.variable_scope('Branch_0'): # 第一个分支
                branch_0 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1') # 输入35×35×192, 输出 35×35×64
            with tf.variable_scope('Beanch_1'): # 第二个分支
                branch_1 = slim.conv2d(net, 48, [1, 1],scope='conv2d_0a_1x1') #
                branch_1 = slim.conv2d(branch_1, 64, [5, 5], scope='conv2d_0b_5x5') # 35/1 输出35×35×64
            with tf.variable_scope('Branch_2'): # 第三个分支
                branch_2 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='conv2d_0b_3x3') 
                branch_2 = slim.conv2d(branch_2, 96, [3, 3], scope='conv2d_0c_3x3') # 输出35×35×96
            with tf.variable_scope('Branch_3'): # 第四个分支
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3') # 35*35*192
                branch_3 = slim.conv2d(branch_3, 64, [1, 1], scope='conv2d_0b_1x1') # 35*35*64
            # 最终输出35×35×(64+64+96+64) = 35×35×288
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3])

        # 开始搭建第二个模块组(共有5个module), 并且第二个到第5个完全相同。 每个module共有3个分支

        # 第一个module的架构
        # 第一个: 384@3x3卷积,并且步长为2,padding为VALID,因此输出 压缩为 (35-3)/2+1=17
        # 第二个分支: 有3层。分别是64@1×1和两个96@3×3 (最后一层s=2 VALID,压缩大小)
        # 第三个分支: max_pool层 (s=2, VALID)      
        with tf.variable_scope('Mixed_6a'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 384, [3, 3], stride=2, padding="VALID", scope='conv2d_1a_1x1') # 输出17×17×384
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 64, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 96, [3, 3], scope='conv2d_0b_3x3')
                branch_1 = slim.conv2d(branch_1, 96, [3, 3], stride=2, padding="VALID", scope='conv2d_1a_1x1') # 17×17×96
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.max_pool2d(net, [3,3], stride=2, padding="VALID", scope='maxPool_1a_3x3') # 输出 17×17×288
            net = tf.concat(3, [branch_0, branch_1, branch_2]) # 17×17×(384+96+288) = 17×17×768

        # 第2-5个module
        # 共有4个分支,利用到了“非平均卷积”的操作,为Inceptiop V3的核心 
        # 第一个分支:简单的 192@1×1 卷积
        # 第二个分支: 共包含3层: 128@1×1 -- 128@1×7 -- 192@7×1 (非对称,减少了参数和过拟合,同时多了一个激活函数层增加了非线性特征变换)
        # 第三个分支:5层 :  128@1×1 -- 128@7×1-- 128@1×7 -- 128@7×1 -- 192@1×7
        # 第四个分支: 3×3平均池化--192@1×1
        with tf.variable_scope('Mixed_6b'):  # 默认stride=1,SAME
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='conv2d_0a_1x1') # 输入17×17×768  -输出: 17×17×192
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 128, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 128, [1, 7], scope='conv2d_0b_1x7') # 
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='conv2d_0c_1x7')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 128, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 128, [7, 1], scope='conv2d_0b_1x1')
                branch_2 = slim.conv2d(branch_2, 128, [1, 7], scope='conv2d_0c_1x1')
                branch_2 = slim.conv2d(branch_2, 128, [7, 1], scope='conv2d_0d_1x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='conv2d_0e_1x1')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool-0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='conv2d_0b_1x1')
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) # 17*17*(192+12+192+192) = 17*17*768
        # 第3个module中,基本和第二个相同,自由在第二个分支和第3分支中前几个卷积输出通道数由128变为160
        with tf.variable_scope('Mixed_6c'):  # 默认stride=1,SAME
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='conv2d_0a_1x1') # 输入17×17×768  -输出: 17×17×192
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 1260, [1, 7], scope='conv2d_0b_1x7') # 
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='conv2d_0c_1x7')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 160, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='conv2d_0b_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [1, 7], scope='conv2d_0c_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='conv2d_0d_1x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='conv2d_0e_1x1')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool-0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='conv2d_0b_1x1')
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) # 17*17*(192+12+192+192) = 17*17*768
        # 第4个module
        with tf.variable_scope('Mixed_6d'):  # 默认stride=1,SAME
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='conv2d_0a_1x1') # 输入17×17×768  -输出: 17×17×192
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 1260, [1, 7], scope='conv2d_0b_1x7') # 
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='conv2d_0c_1x7')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 160, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='conv2d_0b_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [1, 7], scope='conv2d_0c_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='conv2d_0d_1x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='conv2d_0e_1x1')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool-0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='conv2d_0b_1x1')
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) # 17*17*(192+12+192+192) = 17*17*768
        # 第5个
        # 将该mudule的输出保存下来,组网额Auxiliary Classifier辅助模型的分类
        with tf.variable_scope('Mixed_6e'):  # 默认stride=1,SAME
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1, 1], scope='conv2d_0a_1x1') # 输入17×17×768  -输出: 17×17×192
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 160, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 1260, [1, 7], scope='conv2d_0b_1x7') # 
                branch_1 = slim.conv2d(branch_1, 192, [7, 1], scope='conv2d_0c_1x7')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 160, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='conv2d_0b_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [1, 7], scope='conv2d_0c_1x1')
                branch_2 = slim.conv2d(branch_2, 160, [7, 1], scope='conv2d_0d_1x1')
                branch_2 = slim.conv2d(branch_2, 192, [1, 7], scope='conv2d_0e_1x1')
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool-0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='conv2d_0b_1x1')
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) # 17*17*(192+12+192+192) = 17*17*768
            # 
            end_points['Mixed_6e'] = net

        # 第三个Inception 模块组,包含3个module
        # 第一个module:一共三个分支 
            # 第一个分支为 192@1×1 接着为 320@3×3(s=2,VALID 压缩图片),
            # 第二个分支:(4个卷积层) 192@1x1 -- 192@1x7-- 192@7x1 -- 192@3x3(s=2 VALID 压缩图片)
            # 第三个分支:3x3最大池化层 s=2 VALID
        # 图片大小不断减小,通道数增加
        with tf.variable_scope('Mixed_7a'):
            with tf.variable_scope('Branch_0'):
                branch_0 = slim.conv2d(net, 192, [1,1], scope='conv2d_0a_1x1')
                branch_0 = slim.conv2d(branch_0, 320, [3, 3], stride=2, padding='VALID', scope='conv2d_1a_3x3')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 192, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = slim.conv2d(branch_1, 192, [1, 7], scope='conv2d_0b_1x7')
                branch_1 = slim.con2d(branch_1, 192, [7, 1], scope='conv2d_0c_7x1')
                branch_1 = slim.conv2d(branch_1, 192, [3, 3], stride=2, padding='VALID', scope='conv2d_1a_3x3')
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.max_pool2d(net, [3, 3], stride=2, padding="VALID", scope='Maxpool_1a_3x3')
            net = tf.concat(3, [branch_0, branch_1, branch_2]) # 8*8*(320+192+768) = 8*8*1280

        # 第二个module
        # 四个分支
            # 第一个分支:320@1x1卷积
            # 第二个分支:384@1x2的卷积,然后分为两个分支(1: 384@1x3 2 : 384@3x1) concat后得到384+384=768个通道
            # 第三个分支:448@1x1 -- 384@3x3 -- 两个分支(1: 384@1x3 2 : 384@3x1))
            # 第四个分支:3x3池化层,后接一个192@1x1的卷积
        with tf.variable_scope('Mixed_7b'): # 默认stride=1,SAME
            with tf.variable_scope('Branch_0'):
                brach_0 = slim.conv2d(net, 320, [1, 1], scope='conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 384, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = tf.concat(3, [
                    slim.conv2d(branch_1, 384, [1, 3], scope='conv2d_0b_1x3'),
                    slim.conv2d(branch_1, 384, [3,1], scope='conv2d_0b_3x1')
                ])
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 448, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 384, [3, 3], scope='conv2d_0b_1x1')
                branch_2 = tf.concat(3, [
                    slim.conv2d(branch_1, 384, [1, 3], scope='conv2d_0c_1x3'),
                    slim.conv2d(branch_1, 384, [3,1], scope='conv2d_0d_3x1')
                ])
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='conv2d_0b_1x1')
            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) # 8*8*(320+768+768+192) = 8*8*2048

            # 第三个module 与前面一致

        with tf.variable_scope('Mixed_7c'): # 默认stride=1,SAME
            with tf.variable_scope('Branch_0'):
                brach_0 = slim.conv2d(net, 320, [1, 1], scope='conv2d_0a_1x1')
            with tf.variable_scope('Branch_1'):
                branch_1 = slim.conv2d(net, 384, [1, 1], scope='conv2d_0a_1x1')
                branch_1 = tf.concat(3, [
                    slim.conv2d(branch_1, 384, [1, 3], scope='conv2d_0b_1x3'),
                    slim.conv2d(branch_1, 384, [3,1], scope='conv2d_0b_3x1')
                ])
            with tf.variable_scope('Branch_2'):
                branch_2 = slim.conv2d(net, 448, [1, 1], scope='conv2d_0a_1x1')
                branch_2 = slim.conv2d(branch_2, 384, [3, 3], scope='conv2d_0b_1x1')
                branch_2 = tf.concat(3, [
                    slim.conv2d(branch_1, 384, [1, 3], scope='conv2d_0c_1x3'),
                    slim.conv2d(branch_1, 384, [3,1], scope='conv2d_0d_3x1')
                ])
            with tf.variable_scope('Branch_3'):
                branch_3 = slim.avg_pool2d(net, [3, 3], scope='AvgPool_0a_3x3')
                branch_3 = slim.conv2d(branch_3, 192, [1, 1], scope='conv2d_0b_1x1')

            net = tf.concat(3, [branch_0, branch_1, branch_2, branch_3]) # 8*8*(320+768+768+192) = 8*8*2048

        return net, end_points                    

分析:

首先是5个卷积层和两个池化层交替的普通结构,然后是3个Inception模块组,每个模块组中包含多个Inception module。

经过这些层,图片大小是逐渐缩小的,通道数持续增加。每一层卷积、池化或者Inception module将空间结构简化,同时将空间信息转化为高阶抽象的特征信息(将空间维度转换为通道维度)

Inception Module一般有四个分支:
1: 一般是1×1的卷积
2:一般是1×1卷积后在分解为1×n和n×1的卷积
3: 和2类似,但是层数更多一点
4:一般是最大或者平均池化层

因此,Inception module是通过组合简单的特征抽象(分支1)、比较复杂的特征抽象(分支2、3)以及简化结构的池化层、有选择的保留不同层次的高阶特征,这样可以最大程度丰富网络的表达能力。

Inception V3还有全局平均池化、Auxliliry logits 和softmax层

# 定义一个函数来实现这些层
def inception_v3(inputs, num_classes=1000, is_training=True, dropout_keep_prob=0.8, prefiction_fn=slim.softmax,
                spatial_squeeze=True, reuse=None, scope='InceptionV3'):
    """
    inputs: 输入的图像(299×299×3)
    num_classes: 类别数
    is_training:只有在训练时需要dropout和BN
    predction_fn:进行分类的函数
    spatial_squeeze: 是否对输出进行squeeze操作(注1)
    reuse: 是否对网络和Variable进行重复利用
    """

    with tf.variable_scope(scope, "inceptionV3", [inputs, num_classes], reuse=reuse) as scope:
        # 前向计算
        with slim.arg_scope([slim.batch_norm, slim.dropout], is_training=is_training):
            net, end_points = inception_v3_base(inputs, scope=scope)

        # 接下来处理Auxiliary logits部分,
        # 对endpoints中MIxed_6e的输出net,先5*5的平均池化(s=3, VALID,缩小尺寸),
        # 然后128@1×1和768@5×5的卷积,(VALID),输出1×1×768
        # 最后通过num_classes@1x1的卷积得到最终类别的概率。并储存起来
        with slim.arg_scope([slim.conv2d, slim.max_pool2d, slim.avg_pool2d], stride=1, padding="SAME"):
            aux_logits = endpoints['Mixed_6e']

            with tf.variable_scope('AuxLogits'):
                aux_logits = slim.avg_pool2d(aux_logits, [5, 5], stride=3, padding="VALID", scope='AvgPool_1a_5x5')
                aux_logits = slim.conv2d(aux_logits, 128, [1,1],scope='conv2d_1b_1x1')

                aux_logits = slim.conv2d(aux_logits, 768, [5,5], weights_initializer=trunc_normal(0.01),
                                         padding="VALID", scope='conv2d_2a_5x5') # 输出1×1×768
                aux_logits = slim.conv2d(aux_logits, nm_classes, [1,1], activation_fn=None,
                                         weights_initializer=trunc_normal(0.01),
                                         padding="VALID", scope='conv2d_2b_1x1') # 输出1×1×num_classes

                if spatial_squeeze:
                    aux_logits = tf.squeeze(aux_logits, [1, 2], name='SpatialSqueeze')  # 注2
                end_points["AuxLogits"] = aux_logits

            # 下面处理正常的分类预测逻辑logits
            with tf.variable_scope("Logits"):
                # 对Mixed_7e(z最后一层的输出)进行平均池化
                net = slim.avg_pool2d(net, [8, 8], padding="VALID", scope="AvgPool_1a_8x8") # 1*1*2048
                # dropout
                net = slim.dropout(net, keep_prob=dropout_keep_prob, scope='Dropout_1b')

                end_points['prelogits'] = net 

                logits = slim.conv2d(aux_logits, nm_classes, [1,1], activation_fn=None, normalizer_fn=None, 
                                     scope='conv2d_1c_1x1')
                if spatial_squeeze:
                    logits = tf.squeeze(logits, [1, 2], name='SpatialSqueeze')

                end_points['Logits'] = logits
                end_points['Predictions'] = prediction_fn(logits, scope='Predictions') # softmax

            return logits, end_points

注1:

squeeze操作:去除维度为1的维度,比如5×3×1 –>> 5×3

注2:

给定张量输入,此操作返回相同类型的张量,并删除所有尺寸为1的尺寸。 如果不想删除所有尺寸1尺寸,可以通过指定squeeze_dims来删除特定尺寸1尺寸。

‘t’ is a tensor of shape [1, 2, 1, 3, 1, 1]
shape(squeeze(t)) ==> [2, 3]

Or, to remove specific size 1 dimensions:

‘t’ is a tensor of shape [1, 2, 1, 3, 1, 1]
shape(squeeze(t, [2, 4])) ==> [1, 2, 3, 1]

至此,我们已经降Inception V3的网络构建完成。其模型非常复杂,其中用到了很多涉及大型网络的经验和技巧:

(1)“卷积分解”很有效,可以降低参数、减轻过拟合、增加网络非线性的表达能力

(2)卷积网路从输入到输出、应该让图片尺寸逐渐减小,输出通道数逐渐增加,让空间结构简化,将空间信息转化为高阶抽象的特征信息

(3)Inception Module用多个分支提取不同抽象程度的高阶特征的思路很有效,可以丰富网络的表达能力。

参数量:

Inception V1: 700万 (22层)

Inception V3: 2500万 (42层)

AlexNet : 6000万 (8层)

VGGNet:1.4亿(16/19层)

猜你喜欢

转载自blog.csdn.net/ifruoxi/article/details/78311595