完整实现利用tensorflow训练自己的图片数据集

经过差不多一个礼拜的时间的学习，终于把完整的一个利用自己爬取的图片做训练数据集的卷积神经网络的实现（基于tensorflow）

简单整理一下思路：

获取数据集（上网爬取，或者直接找公开的图片数据集）
reshape图片成相同大小（公开数据集一般都是相同shape的）
对图片集进行处理，得到网络的输入
搭建卷积神经网络框架
进行网络的训练
读取训练好的网络完成测试

每三、四部分的参考和具体实现都在前面的两篇博客里提到了

搭建一个卷积神经网络

利用tensorflow训练自己的图片数据集——数据准备

再附一下重点参考：江湖人称星爷的博客。给了我很大的指引和参考。现在附的跟前面有一点改动，基本思路是一样的。

第一、二部分还有一些小问题，暂时先不放上来。

第三部分：新建PreWork.py文件

'''
PreWork.py
功能：实现对指定大小的生成图片进行sample与label分类制作
获得神经网络输入的get_files文件，同时为了方便网络的训练，输入数据进行batch处理。

2018/7/19完成
-------copyright@GCN-------
'''

import os
import numpy as np
from PIL import Image
import tensorflow as tf
import matplotlib.pyplot as plt
from numpy import *

angry = []
label_angry = []
disgusted = []
label_disgusted = []
fearful = []
label_fearful = []
happy = []
label_happy = []
sadness = []
label_sadness = []
surprised = []
label_surprised = []


def get_file(file_dir):
    # step1：获取路径下所有的图片路径名，存放到
    # 对应的列表中，同时贴上标签，存放到label列表中。
    for file in os.listdir(file_dir + '/angry'):
        angry.append(file_dir + '/angry' + '/' + file)
        label_angry.append(0)
    for file in os.listdir(file_dir + '/disgusted'):
        disgusted.append(file_dir + '/disgusted' + '/' + file)
        label_disgusted.append(1)
    for file in os.listdir(file_dir + '/fearful'):
        fearful.append(file_dir + '/fearful' + '/' + file)
        label_fearful.append(2)
    for file in os.listdir(file_dir + '/happy'):
        happy.append(file_dir + '/happy' + '/' + file)
        label_happy.append(3)
    for file in os.listdir(file_dir + '/sadness'):
        sadness.append(file_dir + '/sadness' + '/' + file)
        label_sadness.append(4)
    for file in os.listdir(file_dir + '/surprised'):
        surprised.append(file_dir + '/surprised' + '/' + file)
        label_surprised.append(5)

    # 打印出提取图片的情况，检测是否正确提取
    print("There are %d angry\nThere are %d disgusted\nThere are %d fearful\n" %(len(angry), len(disgusted), len(fearful)),end="")
    print("There are %d happy\nThere are %d sadness\nThere are %d surprised\n" %(len(happy),len(sadness),len(surprised)))

    # step2：对生成的图片路径和标签List做打乱处理把所有的合起来组成一个list（img和lab）
    # 合并数据numpy.hstack(tup)
    # tup可以是python中的元组（tuple）、列表（list），或者numpy中数组（array），函数作用是将tup在水平方向上（按列顺序）合并
    image_list = np.hstack((angry, disgusted, fearful, happy, sadness, surprised))
    label_list = np.hstack((label_angry, label_disgusted, label_fearful, label_happy, label_sadness, label_surprised))
    # 利用shuffle，转置、随机打乱
    temp = np.array([image_list, label_list])   # 转换成2维矩阵
    temp = temp.transpose()     # 转置
    # numpy.transpose(a, axes=None) 作用：将输入的array转置，并返回转置后的array
    np.random.shuffle(temp)     # 按行随机打乱顺序函数

    # 将所有的img和lab转换成list
    all_image_list = list(temp[:, 0])    # 取出第0列数据，即图片路径
    all_label_list = list(temp[:, 1])    # 取出第1列数据，即图片标签
    label_list = [int(i) for i in label_list]   # 转换成int数据类型

    return image_list, label_list

# 将image和label转为list格式数据，因为后边用到的的一些tensorflow函数接收的是list格式数据
# 为了方便网络的训练，输入数据进行batch处理
# image_W, image_H, ：图像高度和宽度
# batch_size：每个batch要放多少张图片
# capacity：一个队列最大多少
def get_batch(image, label, image_W, image_H, batch_size, capacity):
    # step1：将上面生成的List传入get_batch() ，转换类型，产生一个输入队列queue
    # tf.cast()用来做类型转换
    image = tf.cast(image, tf.string)   # 可变长度的字节数组.每一个张量元素都是一个字节数组
    label = tf.cast(label, tf.int32)
    # tf.train.slice_input_producer是一个tensor生成器
    # 作用是按照设定，每次从一个tensor列表中按顺序或者随机抽取出一个tensor放入文件名队列。
    input_queue = tf.train.slice_input_producer([image, label])
    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])   # tf.read_file()从队列中读取图像

    # step2：将图像解码，使用相同类型的图像
    image = tf.image.decode_jpeg(image_contents, channels=3)
    # jpeg或者jpg格式都用decode_jpeg函数，其他格式可以去查看官方文档

    # step3：数据预处理，对图像进行旋转、缩放、裁剪、归一化等操作，让计算出的模型更健壮。
    image = tf.image.resize_image_with_crop_or_pad(image, image_W, image_H)
    # 对resize后的图片进行标准化处理
    image = tf.image.per_image_standardization(image)

    # step4：生成batch
    # image_batch: 4D tensor [batch_size, width, height, 3], dtype = tf.float32
    # label_batch: 1D tensor [batch_size], dtype = tf.int32
    image_batch, label_batch = tf.train.batch([image, label], batch_size=batch_size, num_threads=16, capacity=capacity)

    # 重新排列label，行数为[batch_size]
    label_batch = tf.reshape(label_batch, [batch_size])
    # image_batch = tf.cast(image_batch, tf.uint8)    # 显示彩色图像
    image_batch = tf.cast(image_batch, tf.float32)    # 显示灰度图
    # print(label_batch) Tensor("Reshape:0", shape=(6,), dtype=int32)
    return image_batch, label_batch
    # 获取两个batch，两个batch即为传入神经网络的数据

"""
def PreWork():
    # 对预处理的数据进行可视化，查看预处理的效果
    IMG_W = 256
    IMG_H = 256
    BATCH_SIZE = 6
    CAPACITY = 64

    train_dir = 'F:/Python/PycharmProjects/DeepLearning/CK+_part'

    # image_list, label_list, val_images, val_labels = get_file(train_dir)
    image_list, label_list = get_file(train_dir)
    image_batch, label_batch = get_batch(image_list, label_list, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)
    print(label_batch.shape)

    lists = ('angry', 'disgusted', 'fearful', 'happy', 'sadness', 'surprised')

    with tf.Session() as sess:
        i = 0
        coord = tf.train.Coordinator()  # 创建一个线程协调器，用来管理之后在Session中启动的所有线程
        threads = tf.train.start_queue_runners(coord=coord)
        try:
            while not coord.should_stop() and i < 1:
                # 提取出两个batch的图片并可视化。
                img, label = sess.run([image_batch, label_batch])  # 在会话中取出img和label
                # img = tf.cast(img, tf.uint8)

                '''
                1、range()返回的是range object，而np.arange()返回的是numpy.ndarray()
                range(start, end, step)，返回一个list对象，起始值为start，终止值为end，但不含终止值，步长为step。只能创建int型list。
                arange(start, end, step)，与range()类似，但是返回一个array对象。需要引入import numpy as np，并且arange可以使用float型数据。
                
                2、range()不支持步长为小数，np.arange()支持步长为小数
                
                3、两者都可用于迭代
                range尽可用于迭代，而np.nrange作用远不止于此，它是一个序列，可被当做向量使用。
                '''
                for j in np.arange(BATCH_SIZE):
                    # np.arange()函数返回一个有终点和起点的固定步长的排列
                    print('label: %d' % label[j])
                    plt.imshow(img[j, :, :, :])
                    title = lists[int(label[j])]
                    plt.title(title)
                    plt.show()
                i += 1
        except tf.errors.OutOfRangeError:
            print('done!')
        finally:
            coord.request_stop()
        coord.join(threads)

if __name__ == '__main__':
    PreWork()
"""

第四部分：新建CNNModel.py文件

补充的知识点

在TensorFlow的世界里，变量的定义和初始化是分开的，所有关于图变量的赋值和计算都要通过tf.Session的run来进行。

想要将所有图变量进行集体初始化时应该使用tf.global_variables_initializer
tf.placehold与tf.Variable的区别：
    tf.placehold 占位符
        主要为真实输入数据和输出标签的输入， 用于在 feed_dict中的变量，不需要指定初始值，具体值在feed_dict中的变量给出。
    tf.Variable 主要用于定义weights bias等可训练会改变的变量，必须指定初始值。
        通过Variable()构造函数后，此variable的类型和形状固定不能修改了，但值可以用assign方法修改。

tf.get_variable和tf.Variable函数差别
相同点：通过两函数创建变量的过程基本一样，
        tf.variable函数调用时提供的维度(shape)信息以及初始化方法(initializer)的参数和tf.Variable函数调用时提供的初始化过程中的参数基本类似。
不同点：两函数指定变量名称的参数不同，
        对于tf.Variable函数，变量名称是一个可选的参数，通过name="v"的形式给出
        tf.get_variable函数，变量名称是一个必填的参数，它会根据变量名称去创建或者获取变量

'''
CNNModel.py
含3层卷积池化层，2层全连接层，激活函数ReLU，采用dropout和softmax函数做分类器
2018/7/18完成搭建，2018/7/19实现文件调用，2018/7/22修改网络结构
-------copyright@GCN-------
'''
import tensorflow as tf


# 函数申明
def weight_variable(shape, n):
    # tf.truncated_normal(shape, mean, stddev)这个函数产生正态分布，均值和标准差自己设定。
    # shape表示生成张量的维度，mean是均值
    # stddev是标准差,，默认最大为1，最小为-1，均值为0
    initial = tf.truncated_normal(shape, stddev=n, dtype=tf.float32)
    return initial

def bias_variable(shape):
    # 创建一个结构为shape矩阵也可以说是数组shape声明其行列，初始化所有值为0.1
    initial = tf.constant(0.1, shape=shape, dtype=tf.float32)
    return initial

def conv2d(x, W):
    # 卷积遍历各方向步数为1，SAME：边缘外自动补0，遍历相乘
    # padding 一般只有两个值
    # 卷积层后输出图像大小为：（W+2P-f）/stride+1并向下取整
    return tf.nn.conv2d(x, W, strides=[1, 1, 1, 1], padding='SAME')

def max_pool_2x2(x, name):
    # 池化卷积结果（conv2d）池化层采用kernel大小为3*3，步数也为2，SAME：周围补0，取最大值。数据量缩小了4倍
    # x 是 CNN 第一步卷积的输出量，其shape必须为[batch, height, weight, channels];
    # ksize 是池化窗口的大小， shape为[batch, height, weight, channels]
    # stride 步长，一般是[1，stride， stride，1]
    # 池化层输出图像的大小为(W-f)/stride+1，向上取整
    return tf.nn.max_pool(x, ksize=[1, 3, 3, 1], strides=[1, 2, 2, 1], padding='SAME', name=name)


# 一个简单的卷积神经网络，卷积+池化层 x2，全连接层x2，最后一个softmax层做分类。
# 64个3x3的卷积核（3通道），padding=’SAME’，表示padding后卷积的图与原图尺寸一致，激活函数relu()
def deep_CNN(images, batch_size, n_classes):
    # 搭建网络
    # 第一层卷积
    # 第一二参数值得卷积核尺寸大小，即patch；第三个参数是通道数；第四个是卷积核个数
    with tf.variable_scope('conv1') as scope:
        # 所谓名字的scope，指当绑定了一个名字到一个对象的时候，该名字在程序文本中的可见范围
        w_conv1 = tf.Variable(weight_variable([3, 3, 3, 64], 1.0), name='weights', dtype=tf.float32)
        b_conv1 = tf.Variable(bias_variable([64]), name='biases', dtype=tf.float32)   # 64个偏置值
        # tf.nn.bias_add 是 tf.add 的一个特例:tf.add(tf.matmul(x, w), b) == tf.matmul(x, w) + b
        # h_conv1 = tf.nn.relu(tf.nn.bias_add(conv2d(images, w_conv1), b_conv1), name=scope.name)
        h_conv1 = tf.nn.relu(conv2d(images, w_conv1)+b_conv1, name='conv1')  # 得到128*128*64(假设原始图像是128*128)
    # 第一层池化
    # 3x3最大池化，步长strides为2，池化后执行lrn()操作，局部响应归一化，增强了模型的泛化能力。
    # tf.nn.lrn(input,depth_radius=None,bias=None,alpha=None,beta=None,name=None)
    with tf.variable_scope('pooling1_lrn') as scope:
        pool1 = max_pool_2x2(h_conv1, 'pooling1')   # 得到64*64*64
        norm1 = tf.nn.lrn(pool1, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm1')

    # 第二层卷积
    # 32个3x3的卷积核（16通道），padding=’SAME’，表示padding后卷积的图与原图尺寸一致，激活函数relu()
    with tf.variable_scope('conv2') as scope:
        w_conv2 = tf.Variable(weight_variable([3, 3, 64, 32], 0.1), name='weights', dtype=tf.float32)
        b_conv2 = tf.Variable(bias_variable([32]), name='biases', dtype=tf.float32)   # 32个偏置值
        h_conv2 = tf.nn.relu(conv2d(norm1, w_conv2)+b_conv2, name='conv2')  # 得到64*64*32

    # 第二层池化
    # 3x3最大池化，步长strides为2，池化后执行lrn()操作
    with tf.variable_scope('pooling2_lrn') as scope:
        pool2 = max_pool_2x2(h_conv2, 'pooling2')  # 得到32*32*32
        norm2 = tf.nn.lrn(pool2, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm2')

    # 第三层卷积
    # 16个3x3的卷积核（16通道），padding=’SAME’，表示padding后卷积的图与原图尺寸一致，激活函数relu()
    with tf.variable_scope('conv3') as scope:
        w_conv3 = tf.Variable(weight_variable([3, 3, 32, 16], 0.1), name='weights', dtype=tf.float32)
        b_conv3 = tf.Variable(bias_variable([16]), name='biases', dtype=tf.float32)   # 16个偏置值
        h_conv3 = tf.nn.relu(conv2d(norm2, w_conv3)+b_conv3, name='conv3')  # 得到32*32*16

    # 第三层池化
    # 3x3最大池化，步长strides为2，池化后执行lrn()操作
    with tf.variable_scope('pooling3_lrn') as scope:
        pool3 = max_pool_2x2(h_conv3, 'pooling3')  # 得到16*16*16
        norm3 = tf.nn.lrn(pool3, depth_radius=4, bias=1.0, alpha=0.001 / 9.0, beta=0.75, name='norm3')

    # 第四层全连接层
    # 128个神经元，将之前pool层的输出reshape成一行，激活函数relu()
    with tf.variable_scope('local3') as scope:
        reshape = tf.reshape(norm3, shape=[batch_size, -1])
        dim = reshape.get_shape()[1].value
        w_fc1 = tf.Variable(weight_variable([dim, 128], 0.005),  name='weights', dtype=tf.float32)
        b_fc1 = tf.Variable(bias_variable([128]), name='biases', dtype=tf.float32)
        h_fc1 = tf.nn.relu(tf.matmul(reshape, w_fc1) + b_fc1, name=scope.name)

    # 第五层全连接层
    # 128个神经元，激活函数relu()
    with tf.variable_scope('local4') as scope:
        w_fc2 = tf.Variable(weight_variable([128 ,128], 0.005),name='weights', dtype=tf.float32)
        b_fc2 = tf.Variable(bias_variable([128]), name='biases', dtype=tf.float32)
        h_fc2 = tf.nn.relu(tf.matmul(h_fc1, w_fc2) + b_fc1, name=scope.name)


    # 对卷积结果执行dropout操作
    # keep_prob = tf.placeholder(tf.float32)
    h_fc2_dropout = tf.nn.dropout(h_fc2, 0.5)
    # tf.nn.dropout(x, keep_prob, noise_shape=None, seed=None, name=None)
    # 第二个参数keep_prob: 设置神经元被选中的概率,在初始化时keep_prob是一个占位符

    # Softmax回归层
    # 将前面的FC层输出，做一个线性回归，计算出每一类的得分，在这里是2类，所以这个层输出的是两个得分。
    with tf.variable_scope('softmax_linear') as scope:
        weights = tf.Variable(weight_variable([128, n_classes], 0.005), name='softmax_linear', dtype=tf.float32)
        biases = tf.Variable(bias_variable([n_classes]), name='biases', dtype=tf.float32)
        softmax_linear = tf.add(tf.matmul(h_fc2_dropout, weights), biases, name='softmax_linear')
        # softmax_linear = tf.nn.softmax(tf.add(tf.matmul(h_fc2_dropout, weights), biases, name='softmax_linear'))
    return softmax_linear
    # 最后返回softmax层的输出


# loss计算
# 传入参数：logits，网络计算输出值。labels，真实值，在这里是0或者1
# 返回参数：loss，损失值
def losses(logits, labels):
    with tf.variable_scope('loss') as scope:
        cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(logits=logits, labels=labels, name='xentropy_per_example')
        loss = tf.reduce_mean(cross_entropy, name='loss')
        tf.summary.scalar(scope.name + '/loss', loss)
    return loss

# loss损失值优化
# 输入参数：loss。learning_rate，学习速率。
# 返回参数：train_op，训练op，这个参数要输入sess.run中让模型去训练。
def trainning(loss, learning_rate):
    with tf.name_scope('optimizer'):
        optimizer = tf.train.AdamOptimizer(learning_rate=learning_rate)
        global_step = tf.Variable(0, name='global_step', trainable=False)
        train_op = optimizer.minimize(loss, global_step=global_step)
    return train_op

# 评价/准确率计算
# 输入参数：logits，网络计算值。labels，标签，也就是真实值，在这里是0或者1。
# 返回参数：accuracy，当前step的平均准确率，也就是在这些batch中多少张图片被正确分类了。
def evaluation(logits, labels):
    with tf.variable_scope('accuracy') as scope:
        correct = tf.nn.in_top_k(logits, labels, 1)
        accuracy = tf.reduce_mean(tf.cast(correct, tf.float16))
        tf.summary.scalar(scope.name + '/accuracy', accuracy)
    return accuracy

第五部分：新建Train.py文件

'''
Train.py
对搭建好的网络进行训练，并保存训练参数，以便下次使用
2018/7/19实现文件调用，2018/7/22实现最终网络的训练
-------copyright@GCN-------
'''
# 导入文件
import os
import numpy as np
import tensorflow as tf
from DeepLearning.PreWork import get_file, get_batch
from DeepLearning.CNNModel import deep_CNN, losses, trainning, evaluation

# 变量声明
N_CLASSES = 6
IMG_W = 28  # resize图像，太大的话训练时间久
IMG_H = 28
BATCH_SIZE = 20     # 每个batch要放多少张图片
CAPACITY = 200      # 一个队列最大多少
MAX_STEP = 10000  # 一般大于10K
learning_rate = 0.0001  # 一般小于0.0001

# 获取批次batch
train_dir = 'F:/Python/PycharmProjects/DeepLearning/CK+_part'  # 训练样本的读入路径
logs_train_dir = 'F:/Python/PycharmProjects/DeepLearning/CK+_part'  #logs存储路径
train, train_label = get_file(train_dir)
# 训练数据及标签
train_batch, train_label_batch = get_batch(train, train_label, IMG_W, IMG_H, BATCH_SIZE, CAPACITY)

# 训练操作定义
train_logits = deep_CNN(train_batch, BATCH_SIZE, N_CLASSES)
train_loss = losses(train_logits, train_label_batch)
train_op = trainning(train_loss, learning_rate)
train_acc = evaluation(train_logits, train_label_batch)

# 这个是log汇总记录
summary_op = tf.summary.merge_all()

# 产生一个会话
sess = tf.Session()
train_writer = tf.summary.FileWriter(logs_train_dir, sess.graph)
# 产生一个saver来存储训练好的模型
saver = tf.train.Saver()
# 所有节点初始化
sess.run(tf.global_variables_initializer())
# 队列监控
coord = tf.train.Coordinator() # 设置多线程协调器
threads = tf.train.start_queue_runners(sess=sess, coord=coord)

# 进行batch的训练
try:
    # 执行MAX_STEP步的训练，一步一个batch
    for step in np.arange(MAX_STEP):
        if coord.should_stop():
            break
        # 启动以下操作节点，有个疑问，为什么train_logits在这里没有开启？
        _, tra_loss, tra_acc = sess.run([train_op, train_loss, train_acc])

        # 每隔50步打印一次当前的loss以及acc，同时记录log，写入writer
        if step % 100 == 0:
            print('Step %d, train loss = %.2f, train accuracy = %.2f%%' % (step, tra_loss, tra_acc * 100.0))
            summary_str = sess.run(summary_op)
            train_writer.add_summary(summary_str, step)

        # 保存最后一次网络参数
        checkpoint_path = os.path.join(logs_train_dir, 'thing.ckpt')
        saver.save(sess, checkpoint_path)

        '''
        # 每隔100步，保存一次训练好的模型
        if (step + 1) == MAX_STEP:
            checkpoint_path = os.path.join(logs_train_dir, 'thing.ckpt')
            saver.save(sess, checkpoint_path, global_step=step)
        '''
        

except tf.errors.OutOfRangeError:
    print('Done training -- epoch limit reached')

finally:
    coord.request_stop()
coord.join(threads)
sess.close()

数据库用的是CK+表情数据库中的原图

放上完整的项目链接CNN(from github)

完整实现利用tensorflow训练自己的图片数据集

猜你喜欢