tensorflow学习（6.Alexnet实现及猫狗分类）

有关Alexnet介绍的部分可以见：https://blog.csdn.net/qq_26499769/article/details/82928164

然后自己结合教程尝试着实现了一下：

from skimage import io,transform
import glob
import numpy as np
import tensorflow as tf
#from alexnet import alexnet


def alexnet(x, keep_prob, num_classes):
    # conv1
    with tf.name_scope('conv1') as scope:
        kernel = tf.Variable(tf.truncated_normal([11, 11, 3, 96], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(x, kernel, [1, 4, 4, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[96], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv1 = tf.nn.relu(bias, name=scope)

    # lrn1
    with tf.name_scope('lrn1') as scope:
        lrn1 = tf.nn.local_response_normalization(conv1,
                                                  alpha=1e-4,
                                                  beta=0.75,
                                                  depth_radius=2,
                                                  bias=2.0)

    # pool1
    with tf.name_scope('pool1') as scope:
        pool1 = tf.nn.max_pool(lrn1,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID')

    # conv2
    with tf.name_scope('conv2') as scope:
        pool1_groups = tf.split(axis=3, value=pool1, num_or_size_splits=2)
        kernel = tf.Variable(tf.truncated_normal([5, 5, 48, 256], dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)
        conv_up = tf.nn.conv2d(pool1_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')
        conv_down = tf.nn.conv2d(pool1_groups[1], kernel_groups[1], [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)
        bias_up = tf.nn.bias_add(conv_up, biases_groups[0])
        bias_down = tf.nn.bias_add(conv_down, biases_groups[1])
        bias = tf.concat(axis=3, values=[bias_up, bias_down])
        conv2 = tf.nn.relu(bias, name=scope)

    # lrn2
    with tf.name_scope('lrn2') as scope:
        lrn2 = tf.nn.local_response_normalization(conv2,
                                                  alpha=1e-4,
                                                  beta=0.75,
                                                  depth_radius=2,
                                                  bias=2.0)

    # pool2
    with tf.name_scope('pool2') as scope:
        pool2 = tf.nn.max_pool(lrn2,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID')

        # conv3
    with tf.name_scope('conv3') as scope:
        kernel = tf.Variable(tf.truncated_normal([3, 3, 256, 384],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        conv = tf.nn.conv2d(pool2, kernel, [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.bias_add(conv, biases)
        conv3 = tf.nn.relu(bias, name=scope)

    # conv4
    with tf.name_scope('conv4') as scope:
        conv3_groups = tf.split(axis=3, value=conv3, num_or_size_splits=2)
        kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 384],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)
        conv_up = tf.nn.conv2d(conv3_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')
        conv_down = tf.nn.conv2d(conv3_groups[1], kernel_groups[1], [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[384], dtype=tf.float32),
                             trainable=True, name='biases')
        biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)
        bias_up = tf.nn.bias_add(conv_up, biases_groups[0])
        bias_down = tf.nn.bias_add(conv_down, biases_groups[1])
        bias = tf.concat(axis=3, values=[bias_up, bias_down])
        conv4 = tf.nn.relu(bias, name=scope)

    # conv5
    with tf.name_scope('conv5') as scope:
        conv4_groups = tf.split(axis=3, value=conv4, num_or_size_splits=2)
        kernel = tf.Variable(tf.truncated_normal([3, 3, 192, 256],
                                                 dtype=tf.float32,
                                                 stddev=1e-1), name='weights')
        kernel_groups = tf.split(axis=3, value=kernel, num_or_size_splits=2)
        conv_up = tf.nn.conv2d(conv4_groups[0], kernel_groups[0], [1, 1, 1, 1], padding='SAME')
        conv_down = tf.nn.conv2d(conv4_groups[1], kernel_groups[1], [1, 1, 1, 1], padding='SAME')
        biases = tf.Variable(tf.constant(0.0, shape=[256], dtype=tf.float32),
                             trainable=True, name='biases')
        biases_groups = tf.split(axis=0, value=biases, num_or_size_splits=2)
        bias_up = tf.nn.bias_add(conv_up, biases_groups[0])
        bias_down = tf.nn.bias_add(conv_down, biases_groups[1])
        bias = tf.concat(axis=3, values=[bias_up, bias_down])
        conv5 = tf.nn.relu(bias, name=scope)

    # pool5
    with tf.name_scope('pool5') as scope:
        pool5 = tf.nn.max_pool(conv5,
                               ksize=[1, 3, 3, 1],
                               strides=[1, 2, 2, 1],
                               padding='VALID', )

    # flattened6
    with tf.name_scope('flattened6') as scope:
        flattened = tf.reshape(pool5, shape=[-1, 6 * 6 * 256])

    # fc6
    with tf.name_scope('fc6') as scope:
        weights = tf.Variable(tf.truncated_normal([6 * 6 * 256, 4096],
                                                  dtype=tf.float32,
                                                  stddev=1e-1), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.xw_plus_b(flattened, weights, biases)
        fc6 = tf.nn.relu(bias)

    # dropout6
    with tf.name_scope('dropout6') as scope:
        dropout6 = tf.nn.dropout(fc6, keep_prob)

    # fc7
    with tf.name_scope('fc7') as scope:
        weights = tf.Variable(tf.truncated_normal([4096, 4096],
                                                  dtype=tf.float32,
                                                  stddev=1e-1), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[4096], dtype=tf.float32),
                             trainable=True, name='biases')
        bias = tf.nn.xw_plus_b(dropout6, weights, biases)
        fc7 = tf.nn.relu(bias)

    # dropout7
    with tf.name_scope('dropout7') as scope:
        dropout7 = tf.nn.dropout(fc7, keep_prob)

    # fc8
    with tf.name_scope('fc8') as scope:
        weights = tf.Variable(tf.truncated_normal([4096, num_classes],
                                                  dtype=tf.float32,
                                                  stddev=1e-1), name='weights')
        biases = tf.Variable(tf.constant(0.0, shape=[num_classes], dtype=tf.float32),
                             trainable=True, name='biases')
        fc8 = tf.nn.xw_plus_b(dropout7, weights, biases)

    return fc8



#将所有的图片重新设置尺寸为227*227*3
w = 227
h = 227
c = 3
num_classes=2
learning_rate=0.5

#mnist数据集中训练数据和测试数据保存地址
train_path = "train/"
test_path = "test/"

#读取图片及其标签函数
def read_image(path,start_num,end_num):
    images = []
    labels = []
    for animal in ['cat','dog']:
        for img_num in range(start_num,end_num,1):#获取指定目录下的所有图片
            img=path + '/' + animal + '.'+str(img_num+1)+'.jpg'
            print("reading the image:%s"%img)
            image = io.imread(img)
            image = transform.resize(image,(w,h,c))
            images.append(image)
            if animal=='cat':
                labels.append([0,1])
            if animal == 'dog':
                labels.append([1,0])
    return np.asarray(images,dtype=np.float32),np.asarray(labels,dtype=np.int32)#array和asarray都可以将结构数据转化为ndarray，但是主要区别就是当数据源是ndarray时，array仍然会copy出一个副本，占用新的内存，但asarray不会




#读取训练数据及测试数据
train_data,train_label = read_image(train_path,1,1+500)
test_data,test_label = read_image(train_path,1001,1001+500)

#打乱训练数据及测试数据
train_image_num = len(train_data)
train_image_index = np.arange(train_image_num)#arange(start，stop, step, dtype=None)根据start与stop指定的范围以及step设定的步长，生成一个 ndarray。
np.random.shuffle(train_image_index)#乱序函数，多维时只对一维乱序，说明见https://blog.csdn.net/jasonzzj/article/details/53932645
train_data = train_data[train_image_index]#乱序后的数据
train_label = train_label[train_image_index]

test_image_num = len(test_data)
test_image_index = np.arange(test_image_num)
np.random.shuffle(test_image_index)
test_data = test_data[test_image_index]
test_label = test_label[test_image_index]

#alexnet调用

x = tf.placeholder(tf.float32,[None,w,h,c],name='x')
y_ = tf.placeholder(tf.int32,[None,num_classes],name='y_')
keep_prob = tf.placeholder(tf.float32)
fc8 = alexnet(x, keep_prob, num_classes)

# loss
with tf.name_scope('loss'):
    loss_op = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(logits=fc8,
                                                                        labels=y_))
# optimizer
with tf.name_scope('optimizer'):
    optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
    train_op = optimizer.minimize(loss_op)

# accuracy
with tf.name_scope("accuracy"):
    correct_pred = tf.equal(tf.argmax(fc8, 1), tf.argmax(y_, 1))
    accuracy = tf.reduce_mean(tf.cast(correct_pred, tf.float32))

init = tf.global_variables_initializer()

# Tensorboard
filewriter_path='alexnet_tmp/tensorboard'
tf.summary.scalar('loss', loss_op)
tf.summary.scalar('accuracy', accuracy)
merged_summary = tf.summary.merge_all()
writer = tf.summary.FileWriter(filewriter_path)

# saver
saver = tf.train.Saver()




#每次获取batch_size个样本进行训练或测试
def get_batch(data,label,batch_size):
    for start_index in range(0,len(data)-batch_size+1,batch_size):
        slice_index = slice(start_index,start_index+batch_size)
        yield data[slice_index],label[slice_index]

#创建Session会话
with tf.Session() as sess:
    #初始化所有变量(权值，偏置等)
    #将所有样本训练10次，每次训练中以64个为一组训练完所有样本。
    #train_num可以设置大一些。
    train_num = 10
    batch_size = 64


    sess.run(init)

    j = 0
    for i in range(train_num):
        print("batchnum:",i)
        train_loss,train_acc,batch_num = 0, 0, 0
        for train_data_batch,train_label_batch in get_batch(train_data,train_label,batch_size):
            _,err,acc = sess.run([train_op,loss_op,accuracy],feed_dict={x:train_data_batch,y_:train_label_batch,keep_prob:0.5})
            train_loss+=err;train_acc+=acc;batch_num+=1
            j=j+1
            result=sess.run(merged_summary,feed_dict={x: train_data_batch, y_: train_label_batch, keep_prob: 0.5})
            writer.add_summary(result, j)
        print("train loss:",train_loss/(batch_num))
        print("train acc:",train_acc/(batch_num))


        test_loss,test_acc,batch_num = 0, 0, 0
        for test_data_batch,test_label_batch in get_batch(test_data,test_label,batch_size):
            err,acc = sess.run([loss_op,accuracy],feed_dict={x:test_data_batch,y_:test_label_batch,keep_prob:1})
            test_loss+=err;test_acc+=acc;batch_num+=1
        print("test loss:",test_loss/(batch_num))
        print("test acc:",test_acc/(batch_num))

数据集的来源：

https://www.kaggle.com/c/dogs-vs-cats-redux-kernels-edition/data（感觉把资源放在网盘根本下不下来）

训练结果准确度只有50%，可能是数据使用太少的原因

简单说明一下问题：

1.代码的注释不是很好，有时间会加上详细的注释

2.由于电脑本身的限制，没有使用很多数据去训练，结果其实是很差的，没有起到验证结果，有机会会验证代码

3.分析代码本身的一些问题，因为数据量是比较多的，所以说一次训练或者数据的读入有时是内存是不够的，正确的做法应该是将数据分次输入，每次结束训练后，将参数保存，再次读入，利用新的数据训练

4.Alexnet的论文没有进行阅读，找时间会将论文详细进行阅读

由于平时时间不是很多，博客的更新内容是平时想到的一些问题，如果感觉讲解很乱请谅解。以后有机会可能会对自己关注的问题进行博客的更新，例如，变尺寸图像的输入，不均衡样本的问题，少label的样本问题，小目标检测，网络的优化及过拟合问题，拟合可变参数的系统（如PSF去卷积问题，核在变化的情况），在不进行大量样本扩充和计算量提升的情况下，提高网络结果的准确性和抑制过拟合情形，实际项目的实时性要求和轻量化工作。如果有相应的见解，很希望能共同学习。

tensorflow学习（6.Alexnet实现及猫狗分类）

猜你喜欢