《TensorFlow深度学习应用实践》学习笔记之AlexNet_raw

使用的数据集是kaggle猫狗大战比赛的数据集
网络、训练和测试的代码
数据集的放置方式为：以‘cat’为文件夹名称，放置猫的图片，以‘dog’为文件夹名称，放置狗的图片
需要导入的data_align模块也放在这里的

import tensorflow as tf
import data_align
import time
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

with tf.device('/cpu:0'):
    #参数值设置
    learning_rate=1e-4
    training_iters=200
    batch_size=50
    display_step=5
    n_classes=2
    n_fc1=4096
    n_fc2=2048

    #构建模型
    x= tf.placeholder(tf.float32,[None,227,227,3])
    y=tf.placeholder(tf.float32,[None,n_classes])

    W_conv={
        'conv1':tf.Variable(tf.truncated_normal([11,11,3,96],
                                                stddev=0.0001)),
        'conv2':tf.Variable(tf.truncated_normal([5,5,96,256],
                                                stddev=0.01)),
        'conv3':tf.Variable(tf.truncated_normal([3,3,256,384],
                                                stddev=0.01)),
        'conv4':tf.Variable(tf.truncated_normal([3,3,384,384],
                                                stddev=0.01)),
        'conv5':tf.Variable(tf.truncated_normal([3,3,384,256],
                                                stddev=0.01)),
        'fc1':tf.Variable(tf.truncated_normal([6*6*256,n_fc1],
                                              stddev=0.1)),
        'fc2':tf.Variable(tf.truncated_normal([n_fc1,n_fc2],stddev=0.1)),
        'fc3':tf.Variable(tf.truncated_normal([n_fc2,n_classes],stddev=0.1))
    }
    b_conv={    #必须初始化 否则可能导致不收敛
        'conv1':tf.Variable(tf.constant(0.0,dtype=tf.float32,shape=[96])),
        'conv2':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[256])),
        'conv3':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[384])),
        'conv4':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[384])),
        'conv5':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[256])),
        'fc1':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[n_fc1])),
        'fc2':tf.Variable(tf.constant(0.1,dtype=tf.float32,shape=[n_fc2])),
        'fc3':tf.Variable(tf.constant(0.0,dtype=tf.float32,shape=[n_classes]))
    } 

    #第1层卷积层
    conv1=tf.nn.conv2d(x,W_conv['conv1'],strides=[1,4,4,1],padding='VALID')
    conv1=tf.nn.bias_add(conv1,b_conv['conv1'])
    conv1=tf.nn.relu(conv1)
    #第1层池化层
    pool1=tf.nn.avg_pool(conv1,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
    #LRN层
    norml=tf.nn.lrn(pool1,5,bias=1.0,alpha=0.001/9.0,beta=0.75)

    #第2层卷积层
    conv2=tf.nn.conv2d(norml,W_conv['conv2'],strides=[1,1,1,1],padding='SAME')
    conv2=tf.nn.bias_add(conv2,b_conv['conv2'])
    conv2=tf.nn.relu(conv2)
    #第2层池化层
    pool2=tf.nn.avg_pool(conv2,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')
    #LRN层
    norm2=tf.nn.lrn(pool2,5,bias=1.0,alpha=0.001/9.0,beta=0.75)

    #第3层卷积层
    conv3=tf.nn.conv2d(norm2,W_conv['conv3'],strides=[1,1,1,1],padding='SAME')
    conv3=tf.nn.bias_add(conv3,b_conv['conv3'])
    conv3=tf.nn.relu(conv3)

    #第4层卷积层
    conv4=tf.nn.conv2d(conv3,W_conv['conv4'],strides=[1,1,1,1],padding='SAME')
    conv4=tf.nn.bias_add(conv4,b_conv['conv4'])
    conv4=tf.nn.relu(conv4)

    #第5层卷积层
    conv5=tf.nn.conv2d(conv4,W_conv['conv5'],strides=[1,1,1,1],padding='SAME')
    conv5=tf.nn.bias_add(conv5,b_conv['conv5'])
    conv5=tf.nn.relu(conv5)
    #第5层池化层
    pool5=tf.nn.avg_pool(conv5,ksize=[1,3,3,1],strides=[1,2,2,1],padding='VALID')

    #第6层全连接层
    # print(pool5.shape)
    reshape=tf.reshape(pool5,[-1,6*6*256])
    fc1=tf.add(tf.matmul(reshape,W_conv['fc1']),b_conv['fc1'])
    fc1 = tf.nn.relu(fc1)
    fc1 = tf.nn.dropout(fc1, 0.5)

    #第7层全连接层
    fc2=tf.add(tf.matmul(fc1,W_conv['fc2']),b_conv['fc2'])
    fc2=tf.nn.relu(fc2)
    fc2=tf.nn.dropout(fc2,0.5)

    #第8层全连接层
    fc3=tf.add(tf.matmul(fc2,W_conv['fc3']),b_conv['fc3'])

    #定义损失
    '''
    对fc3进行 exp/+exp归一化、求log值，求相反数，最终得到正实数，(最初的时候)对y进行one_hot编码，然后对位相乘，reduce_mean求得是平均数
    '''
    # labels=tf.argmax(y,axis=1)
    # loss=tf.reduce_mean(tf.nn.sparse_softmax_cross_entropy_with_logits(labels=labels,logits=fc3))
    # loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits_v2(labels=fc3, logits=y))
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(labels=y, logits=fc3))
    optimizer=tf.train.GradientDescentOptimizer(learning_rate=learning_rate).minimize(loss)
    #评估模型
    correct_pred=tf.equal(tf.argmax(fc3,1),tf.argmax(y,1))
    accuracy=tf.reduce_mean(tf.cast(correct_pred,tf.float32))

init=tf.global_variables_initializer()
save_model = r".//model\AlexNet.ckpt"
def train(opech):
    with tf.Session() as sess:
        sess.run(init)

        train_writer=tf.summary.FileWriter(r'.//log',sess.graph)  # 输出日志的地方
        saver = tf.train.Saver()

        c=[]
        start_time=time.time()

        coord=tf.train.Coordinator()  #实例化队列协调器
        threads=tf.train.start_queue_runners(coord=coord)
        step=0
        for i in range(opech):
            step=i
            image,label=sess.run([image_batch, label_batch])

            # image,label=data_align.read_and_decode(tfrecords_file,batch_size)#
            labels=data_align.onehot(label)    #对标签进行one_hot

            sess.run(optimizer,feed_dict={x:image,y:labels})
            loss_record=sess.run(loss,feed_dict={x:image,y:labels})
            print('now the loss is %f'%loss_record)

            c.append(loss_record)
            end_time=time.time()
            print('time: ',end_time-start_time)
            start_time=end_time
            print('------------------%d onpech is finished------------------'%i)
        print('Optimization Finished!')
        saver.save(sess,save_model)
        print('Model Save Finished!')

        coord.request_stop()
        coord.join(threads)
        plt.plot(c)
        plt.xlabel('iter')
        plt.ylabel('loss')
        plt.title('lr=%f, ti=%d, bs=%d' % (learning_rate, training_iters, batch_size))
        plt.tight_layout()
        plt.savefig(r'cnn-tf-AlexNet.png',dpi=200)
        plt.show()

def per_class(imagefile):

    image = Image.open(imagefile)
    image = image.resize([227, 227])
    image_array = np.array(image)

    image = tf.cast(image_array,tf.float32)
    image = tf.image.per_image_standardization(image)
    image = tf.reshape(image, [1, 227, 227, 3])

    saver = tf.train.Saver()
    with tf.Session() as sess:

        save_model =  tf.train.latest_checkpoint('.//model') #
        saver.restore(sess, save_model)
        image = tf.reshape(image, [1, 227, 227, 3])
        image = sess.run(image)
        prediction = sess.run(fc3, feed_dict={x: image})

        max_index = np.argmax(prediction)
        if max_index==0:
            return "cat"
        else:
            return "dog"

if __name__=='__main__':
    model='train'
    if model=='train':
        get_images=r'.//data//catsdogs'

        X_train, y_train = data_align.get_file(get_images)
        image_batch, label_batch = data_align.get_batch(X_train, y_train, 227, 227, 50, 900)
        train(90)
    elif model=='test':
        imagefile = r'.//9.jpg'
        r = per_class(imagefile)
        print(r)

需要用到的函数文件data_align.py

import cv2
import os
import numpy as np
import tensorflow as tf
from skimage import io

def rebuild(dir):
    for root, dirs, files in os.walk(dir):
        print(root,dirs,files)
        for file in files:
            filepath=os.path.join(root,file)
            try:
                image=cv2.imread(filepath)
                dim=(227,227)
                resized=cv2.resize(image,dim)
                path=r'E:\TensorFlow\AlexNet_raw\kaggledogscats\\'+file
                cv2.imwrite(path,resized)
            except:
                print(filepath)
                os.remove(filepath)
        cv2.waitKey(0)

def get_file(file_dir):
    images=[]
    temp=[]
    for root,sub_folders,files in os.walk(file_dir):
        # print(root,sub_folders,files)
        #image directories
        for name in files:
            images.append(os.path.join(root,name))
        #get 10 sub-folder names
        for name in sub_folders:
            temp.append(os.path.join(root,name))

        # print(files)
    #assign 10 labels based on the folder names
    labels=[]
    for one_folder in temp:
        n_img=len(os.listdir(one_folder))
        letter=one_folder.split('\\')[-1]

        if letter=='cat':
            labels=np.append(labels,n_img*[0])
        else:
            labels=np.append(labels,n_img*[1])

    #shuffle
    temp=np.array([images,labels])
    # print(temp)
    temp=temp.transpose()
    np.random.shuffle(temp)
    print(temp.shape)
    image_list=list(temp[:,0])
    label_list=list(temp[:,1])
    label_list=[int(float(i)) for i in label_list]
    return image_list,label_list

def int64_feature(value):                                                 #[]输入为list
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))  #方括号表示输入为列表 转化为二进制形式

def convert_to_tfrecord(images_list,labels_list,save_dir,name):
    filename=os.path.join(save_dir,name+'.tfrecords')
    n_samples=len(labels_list)
    writer=tf.python_io.TFRecordWriter(filename)  #实例化并传入保存文件路径 写入到文件中
    print('\nTransform start......')
    for i in np.arange(0,n_samples):
        try:
            image=io.imread(images_list[i])
            image_raw=image.tostring()
            label=int(labels_list[i])
            example=tf.train.Example(features=tf.train.Features(feature={    #协议内存块
                'label':int64_feature(label),
                'image_raw':bytes_feature(image_raw),
            }))
            writer.write(example.SerializeToString())
        except IOError as e:
            print('Could not read:',images_list[i])
    writer.close()
    print('Transform done!')

def read_and_decode(tfrecords_file,batch_size):
    # 返回输出队列，QueueRunner加入到当前图中的QUEUE_RUNNER收集器
    filename_queue=tf.train.string_input_producer([tfrecords_file])

    reader=tf.TFRecordReader()        #实例化读取器
    _,serialized_example=reader.read(filename_queue) #返回队列当中的下一个键值对tensor

    # 输入标量字符串张量,输出字典映射向量tensor和稀疏向量值
    img_features=tf.parse_single_example(serialized_example,
                                         features={
                                             'label':tf.FixedLenFeature([],
                                                                        tf.int64),
                                             'image_raw':tf.FixedLenFeature([],
                                                                            tf.string),
                                         })
    image=tf.decode_raw(img_features['image_raw'],tf.uint8) #解析字符向量tensor为实数，需要有相同长度
    image=tf.reshape(image,[227,227,3])
    label=tf.cast(img_features['label'],tf.int32)

    #从TFRecords中读取数据，保证内容和标签同步，
    '''
    Args:
    tensors: 入队列表向量或字典向量The list or dictionary of tensors to enqueue.
    batch_size: 每次入队出队的数量The new batch size pulled from the queue.
    capacity: 队列中最大的元素数量An integer. The maximum number of elements in the queue.
    min_after_dequeue: 在一次出队以后对列中最小元素数量Minimum number elements in the queue after a dequeue, used to ensure a level of mixing of elements.
    num_threads: 向量列表入队的线程数The number of threads enqueuing tensor_list.
    seed: 队列中shuffle的种子Seed for the random shuffling within the queue.
    enqueue_many: 向量列表中的每个向量是否是单个实例Whether each tensor in tensor_list is a single example.
    shapes: (Optional) The shapes for each example. Defaults to the inferred shapes for tensor_list.
    allow_smaller_final_batch: (Optional) Boolean. If True, allow the final batch to be smaller if there are insufficient items left in the queue.
    shared_name: (Optional) If set, this queue will be shared under the given name across multiple sessions.
    name: (Optional) A name for the operations.
    '''
    image_batch,label_batch=tf.train.shuffle_batch([image,label],
                                                   batch_size=batch_size,
                                                   min_after_dequeue=100,
                                                   num_threads=64,
                                                   capacity=200)
    return image_batch,tf.reshape(label_batch,[batch_size])

def onehot(labels):
    n_sample=len(labels)
    n_class=max(labels)+1
    onehot_labels=np.zeros((n_sample,n_class))
    onehot_labels[np.arange(n_sample),labels]=1
    return onehot_labels

def get_batch(image_list,label_list,img_width,img_height,batch_size,capacity):
    image=tf.cast(image_list,tf.string)
    label=tf.cast(label_list,tf.int32)

    input_queue=tf.train.slice_input_producer([image,label])
    label=input_queue[1]
    image_contents=tf.read_file(input_queue[0])
    image=tf.image.decode_jpeg(image_contents,channels=3)

    image=tf.image.resize_image_with_crop_or_pad(image,img_width,img_height)
    image=tf.image.per_image_standardization(image)
    image_batch,label_batch=tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
    label_batch=tf.reshape(label_batch,[batch_size])
    return image_batch,label_batch

《TensorFlow深度学习应用实践》学习笔记之AlexNet_raw

猜你喜欢