AlexNet及其TensorFlow实现

本文主要介绍CNN复兴之作，2012年Alex所作ImageNet Classification with Deep Convolutional
Neural Networks及其基于TensorFlow GPU 1.3.0 rc1版本的实现。

这里写图片描述

AlexNet采用上图架构。请注意上图，由于当时的GPU运算性能有限，文章将2、4、5三个卷积层分为两部分，分配至两个GPU并行计算，而2→3为一个GPU。具体参数我写到了下图中。

这里写图片描述

相比于前人工作，AlexNet主要采用了如下几点改进方法：
1. ReLU activation function.
2. Local Response Normalization.
3. Overlapping Pooling.
4. Dropout.
5. Data augmentation.
5. Multiple GPU.

代码分为两部分，AlexNet.py为AlexNet网络构建部分，train.py为模型验证部分。搭建好模型之后，我们应用由caffe训练好的权重w和偏置b转换为tensorflow的bvlc_alexnet.npy参数作为模型的参数，以省去多日训练之苦。参数可从我的百度云下载。链接：https://pan.baidu.com/s/1V6MPQvXv0ler5on0znlIXA 密码：n5im，若失效可从http://www.cs.toronto.edu/~guerzhoy/tf_alexnet/bvlc_alexnet.npy 下载。除了两份代码与一份预训练参数之外，还需要的ImageNet 1000类名与测试图片可从这里下载。

代码部分

第一部分，AlexNet.py

# !/usr/bin/env python3
# coding=utf-8

"""
AlexNet Using TensorFlow

Author : Chai Zheng, Ph.D.@Zhejiang University, Hangzhou
Email  : [email protected]
Blog   : http://blog.csdn.net/chai_zheng/
Github : https://github.com/Chai-Zheng/
Date   : 2018.4.1
"""

import tensorflow as tf
import numpy as np


class AlexNet(object):
    def __init__(self, x, keep_prob, skip_layer, weights_path='bvlc_alexnet.npy'):
        self.x = x
        self.keep_prob = keep_prob
        self.skip_layer = skip_layer
        self.weights_path = weights_path
        self.build_AlexNet()

    def build_AlexNet(self):
        conv1 = conv_layer(self.x, 96, 11, 11, 4, 4, 'conv1', groups=1, padding='VALID')
        norm1 = LRN_layer(conv1, 2, 1e-4, 0.75, 'norm1')
        pool1 = max_pool_layer(norm1, 3, 3, 2, 2, 'pool1')

        conv2 = conv_layer(pool1, 256, 5, 5, 1, 1, 'conv2', groups=2)
        norm2 = LRN_layer(conv2, 2, 1e-4, 0.75, 'norm2')
        pool2 = max_pool_layer(norm2, 3, 3, 2, 2, 'pool2', padding='VALID')

        conv3 = conv_layer(pool2, 384, 3, 3, 1, 1, 'conv3')

        conv4 = conv_layer(conv3, 384, 3, 3, 1, 1, 'conv4', groups=2)

        conv5 = conv_layer(conv4, 256, 3, 3, 1, 1, 'conv5', groups=2)
        pool5 = max_pool_layer(conv5, 3, 3, 2, 2, 'pool5', 'VALID')
        pool5_flatted = tf.reshape(pool5, [-1, 6*6*256], 'pool5_flatted')

        fc6 = fc_layer(pool5_flatted, 6*6*256, 4096, name='fc6')
        dropout6 = dropout(fc6, self.keep_prob)

        fc7 = fc_layer(dropout6, 4096, 4096, name='fc7')
        dropout7 = dropout(fc7, self.keep_prob)

        self.fc8 = fc_output_layer(dropout7, 4096, 1000, name='fc8')

    def load_weights(self, sess):
        weights_dict = np.load(self.weights_path, encoding='bytes').item()
        for name in weights_dict:
            if name not in self.skip_layer:
                with tf.variable_scope(name, reuse=True):
                    for p in weights_dict[name]:
                        if len(p.shape) == 1:    # bias
                            var = tf.get_variable('b', trainable=False)
                            sess.run(var.assign(p))
                        else: # weights
                            var = tf.get_variable('w', trainable=False)
                            sess.run(var.assign(p))


def weights(shape):
    return tf.get_variable('w', shape, trainable=True)


def bias(shape):
    return tf.get_variable('b', shape, trainable=True)


def conv_layer(x, filter_num, filter_height, filter_width, stride_x, stride_y, name, groups=1, padding='SAME'):
    channel = int(x.shape[-1])
    conv2d = lambda a, b: tf.nn.conv2d(input=a, filter=b, strides=[1, stride_y, stride_x, 1], padding=padding)
    with tf.variable_scope(name) as scope:
        w = weights([filter_height, filter_width, int(channel/groups), filter_num])
        b = bias([filter_num])

        x_split = tf.split(value=x, num_or_size_splits=groups, axis=3)
        w_split = tf.split(value=w, num_or_size_splits=groups, axis=3)

        conv_split = [conv2d(m, n) for m, n in zip(x_split, w_split)]
        conv_merge = tf.concat(conv_split, axis=3)

        return tf.nn.relu(conv_merge+b, name='scope.name')


def LRN_layer(x, R, alpha, beta, name, bias=1.0):
    return tf.nn.local_response_normalization(x, depth_radius=R, alpha=alpha, beta=beta, name=name, bias=bias)


def max_pool_layer(x, filter_height, filter_width, stride_x, stride_y, name, padding='SAME'):
    return tf.nn.max_pool(x, ksize=[1, filter_height, filter_width, 1], strides=[1, stride_y, stride_x, 1],
                          padding=padding, name=name)


def dropout(x, keep_prob, name=None):
    return tf.nn.dropout(x, keep_prob, name)


def fc_layer(x, input_num, output_num, name):
    with tf.variable_scope(name) as scope:
        w = weights([input_num, output_num])
        b = bias([output_num])
        return tf.nn.relu(tf.matmul(x, w)+b)


def fc_output_layer(x, input_num, output_num, name):
    with tf.variable_scope(name) as scope:
        w = weights([input_num, output_num])
        b = bias([output_num])
        return tf.nn.softmax(tf.matmul(x, w)+b)

第二部分，模型调用参数与验证 train.py

# !/usr/bin/env python3
# coding=utf-8

"""
AlexNet Using TensorFlow

Author : Chai Zheng, Ph.D.@Zhejiang University, Hangzhou
Email  : [email protected]
Blog   : http://blog.csdn.net/chai_zheng/
Github : https://github.com/Chai-Zheng/
Date   : 2018.4.1
"""

import os
import cv2
import numpy as np
import tensorflow as tf
import matplotlib.pyplot as plt
import AlexNet
import caffe_classes

path = 'test_images'

withPath = lambda f: '{}/{}'.format(path, f)
test_images = dict((f, cv2.imread(withPath(f))) for f in os.listdir(path) if os.path.isfile(withPath(f)))

if test_images.values():
    dropout_prob = 1.0
    skip_layer = []

    image_mean = np.array([104, 117, 124], np.float)
    x = tf.placeholder('float', [1, 227, 227, 3])

    trained_model = AlexNet.AlexNet(x, dropout_prob, skip_layer)
    y_predict = trained_model.fc8

    fig = plt.figure()

    with tf.Session() as sess:
        sess.run(tf.global_variables_initializer())
        trained_model.load_weights(sess)

        j = 0
        for i, img in test_images.items():
            image_resized = cv2.resize(img.astype(np.float), (227, 227))-image_mean
            probs = sess.run(y_predict, feed_dict={x: image_resized.reshape(1, 227, 227, 3)})
            max_prob = np.max(probs)
            y_pre = caffe_classes.class_names[np.argmax(probs)]

            fig.add_subplot(1, 3, j+1)
            plt.imshow(cv2.cvtColor(img, cv2.COLOR_BGR2RGB))
            plt.title('class:{} probability:{}'.format(y_pre, max_prob))
            j += 1

        plt.show()

最终效果如图：
这里写图片描述

参考：
[1] Krizhevsky A, Sutskever I, Hinton G E. Imagenet classification with deep convolutional neural networks[C]//Advances in neural information processing systems. 2012: 1097-1105.
[2] https://kratzert.github.io/2017/02/24/finetuning-alexnet-with-tensorflow.html
[3] https://github.com/kratzert/finetune_alexnet_with_tensorflow
[4] https://blog.csdn.net/zyqdragon/article/details/72353420

AlexNet及其TensorFlow实现

猜你喜欢