03_ Based on CNN's Cats & Dogs

Here Insert Picture Description

Cats & Dogs Background

Cats & Dogs data set from a competition on Kaggle: Dogs vs. Cats, Cats & Dogs Download data set, wherein the data set 12,500 cats and 12,500 dogs

  1. http://www.kaggle.com/c/dogs-vs-cats
    Here Insert Picture Description
    use Finetuning of VGGNet be adjusted so training for a training set of Cats & Dogs, create a project file, all the material as follows
    Here Insert Picture Description

The sample code

Modification of the model step1

First modification (VGG16_model.py file) of the model, where the output is the original 1000 determines different categories, and in this image is two, which is determined for cats and dogs, and therefore the first first full data connection step is to modify the output layer

def fc_layers(self):
	self.fc6 = self.fc("fc1", self.pool5, 4096, trainable=False) #语句变动
    self.fc7 = self.fc("fc2", self.fc6, 4096, trainable=False) #语句变动
    self.fc8 = self.fc("fc3", self.fc7, 2)

Here it is the last layer output channels are set to 2, while the other portion, create a defined layer and full convolution method of the connection layer without making changes too.

        def conv(self,name, input_data, out_channel):
        in_channel = input_data.get_shape()[-1]
        with tf.variable_scope(name):
            kernel = tf.get_variable("weights", [3, 3, in_channel, out_channel], dtype=tf.float32, trainable=False) #语句变动
            biases = tf.get_variable("biases", [out_channel], dtype=tf.float32, trainable=False) #语句变动
            conv_res = tf.nn.conv2d(input_data, kernel, [1, 1, 1, 1], padding="SAME")
            res = tf.nn.bias_add(conv_res, biases)
            out = tf.nn.relu(res, name=name)
        self.parameters += [kernel, biases]
        return out

    def fc(self, name, input_data, out_channel, trainable=True):
        shape = input_data.get_shape().as_list()
        if len(shape) == 4:
            size = shape[-1] * shape[-2] * shape[-3]
        else:size = shape[1]
        input_data_flat = tf.reshape(input_data,[-1,size])
        with tf.variable_scope(name):
            weights = tf.get_variable(name="weights",shape=[size,out_channel],dtype=tf.float32,trainable=trainable) #语句变动
            biases = tf.get_variable(name="biases",shape=[out_channel],dtype=tf.float32, trainable=trainable) #语句变动
            res = tf.matmul(input_data_flat,weights)
            out = tf.nn.relu(tf.nn.bias_add(res,biases))
        self.parameters += [weights, biases]
        return out

step2 data input

For the modified model, it needs to be re-trained, and the first condition is data entry, here I use the input stream of the data. code show as below

def get_file(file_dir):
    images = []
    temp = []
    for root, sub_folders, files in os.walk(file_dir):
        for name in files:
            images.append(os.path.join(root, name))
        for name in sub_folders:
            temp.append(os.path.join(root, name))
    labels = []
    for one_folder in temp:
        n_img = len(os.listdir(one_folder))
        letter = one_folder.split('/')[-1]
        if letter == 'cat':
            labels = np.append(labels, n_img * [0])
        else:
            labels = np.append(labels, n_img * [1])
    # shuffle
    temp = np.array([images, labels])
    temp = temp.transpose()
    np.random.shuffle(temp)
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]

    return image_list, label_list

Defined herein get_file function file input file folders are classified by different classification criteria folder as the picture divided into two categories, the list of files using the two images are used to store the tag address corresponding to the address and, at the same time we need in accordance with the requirements of the program, the train image folder, cat and dog into the folder, as shown:

Here Insert Picture Description

def get_batch(image_list, label_list, img_width, img_height, batch_size, capacity):

    image = tf.cast(image_list, tf.string)
    label = tf.cast(label_list, tf.int32)

    input_queue = tf.train.slice_input_producer([image,label])

    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])
    image = tf.image.decode_jpeg(image_contents,channels=3)

    image = tf.image.resize_image_with_crop_or_pad(image,img_width,img_height)
    image = tf.image.per_image_standardization(image) # 将图片标准化
    image_batch,label_batch = tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
    label_batch = tf.reshape(label_batch,[batch_size])

    return image_batch,label_batch

get_batch function by reading a list of addresses of the cycle into the given picture has a parameter batch_size size, and read the corresponding image data as a tag label with the training, the full definition as follows:

import tensorflow as tf
import numpy as np
import os
img_width = 224
img_height = 224


def get_file(file_dir):
    images = []
    temp = []
    for root, sub_folders, files in os.walk(file_dir):
        for name in files:
            images.append(os.path.join(root, name))
        for name in sub_folders:
            temp.append(os.path.join(root, name))
    labels = []
    for one_folder in temp:
        n_img = len(os.listdir(one_folder))
        letter = one_folder.split('/')[-1]
        if letter == 'cat':
            labels = np.append(labels, n_img * [0])
        else:
            labels = np.append(labels, n_img * [1])
    # shuffle
    temp = np.array([images, labels])
    temp = temp.transpose()
    np.random.shuffle(temp)
    image_list = list(temp[:, 0])
    label_list = list(temp[:, 1])
    label_list = [int(float(i)) for i in label_list]

    return image_list, label_list


def get_batch(image_list, label_list, img_width, img_height, batch_size, capacity):

    image = tf.cast(image_list, tf.string)
    label = tf.cast(label_list, tf.int32)

    input_queue = tf.train.slice_input_producer([image,label])

    label = input_queue[1]
    image_contents = tf.read_file(input_queue[0])
    image = tf.image.decode_jpeg(image_contents,channels=3)

    image = tf.image.resize_image_with_crop_or_pad(image,img_width,img_height)
    image = tf.image.per_image_standardization(image) # 将图片标准化
    image_batch,label_batch = tf.train.batch([image,label],batch_size=batch_size,num_threads=64,capacity=capacity)
    label_batch = tf.reshape(label_batch,[batch_size])

    return image_batch,label_batch


def onehot(labels):
    n_sample = len(labels)
    n_class = max(labels) + 1
    onehot_labels = np.zeros((n_sample, n_class))
    onehot_labels[np.arange(n_sample), labels] = 1
    return onehot_labels

Training and re-model storage step3

Finetuning The most important step is to retrain and storage model. First, the output value of the model has been done in the class definition, so the model class is initialized only need to define the output variable can be given a specific

vgg = model.vgg16(x_imgs)
    fc3_cat_and_dog = vgg.probs
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc3_cat_and_dog, labels=y_imgs))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)

Here also we define the loss function is minimized and the method, complete code is as follows:

import numpy as np
import tensorflow as tf
import VGG16_model as model
import create_and_read_TFRecord2 as reader2

if __name__ == '__main__':

    X_train, y_train = reader2.get_file("./train/")
    image_batch, label_batch = reader2.get_batch(X_train, y_train, 224, 224, 25, 256)

    x_imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
    y_imgs = tf.placeholder(tf.int32, [None, 2])

    vgg = model.vgg16(x_imgs)
    fc3_cat_and_dog = vgg.probs
    loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits=fc3_cat_and_dog, labels=y_imgs))
    optimizer = tf.train.GradientDescentOptimizer(learning_rate=0.001).minimize(loss)

    sess = tf.Session()
    sess.run(tf.global_variables_initializer())
    vgg.load_weights('./vgg16_weights.npz', sess)
    saver = vgg.saver()

    coord = tf.train.Coordinator()
    threads = tf.train.start_queue_runners(coord=coord, sess=sess)

    import time
    start_time = time.time()

    for i in range(200):

            image, label = sess.run([image_batch, label_batch])
            labels = reader2.onehot(label)

            sess.run(optimizer, feed_dict={x_imgs: image, y_imgs: labels})
            loss_record = sess.run(loss, feed_dict={x_imgs: image, y_imgs: labels})
            print("now the loss is %f " % loss_record)
            end_time = time.time()
            print('time: ', (end_time - start_time))
            start_time = end_time
            print("----------epoch %d is finished---------------" % i)

    saver.save(sess, "./model/")
    print("Optimization Finished!")

Use the training function in the way Tensorflow queue of data entry, and for weights reload also used similar way earlier in this article, the final data 200 iterations, memory model in the model folder.

step4 model reuse

import tensorflow as tf
from scipy.misc import imread, imresize
import VGG16_model as model

imgs = tf.placeholder(tf.float32, [None, 224, 224, 3])
sess = tf.Session()
vgg = model.vgg16(imgs)
fc3_cat_and_dog = vgg.probs
saver = vgg.saver()
saver.restore(sess, './model/')

import os
for root, sub_folders, files in os.walk('./test/'):
    i = 0
    cat = 0
    dog = 0
    for name in files:
        i += 1
        filepath = os.path.join(root, name)

        try:
            img1 = imread(filepath, mode='RGB')
            img1 = imresize(img1, (224, 224))
        except:
            print("remove", filepath)

        prob = sess.run(fc3_cat_and_dog, feed_dict={vgg.imgs: [img1]})
        import numpy as np
        max_index = np.argmax(prob)
        if max_index == 0:
            cat += 1
        else:
            dog += 1
        if i % 50 == 0:
            acc = (cat * 1.)/(dog + cat)
            print(acc)
            print("-----------img number is %d------------" % i)

Here Insert Picture Description

Published 128 original articles · won praise 17 · views 10000 +

Guess you like

Origin blog.csdn.net/lsqzedu/article/details/103996109