[TensorFlow series] [7] Single image multi-label classification network construction and training

This time, the following problem is solved: How to train a classification network with multiple labels for a single image? That is, the image annotation is at the image-level level.

the problem we are facing:

1. The number of tags for pictures is not fixed, some have one tag, some have two tags, but the total number of types of tags is fixed, such as 5 types.

To solve this problem, the method of label filling is adopted this time, that is, all missing labels are marked with 0, which means that one-hot encoding is no longer used.
For example: the label is: -1,1,1,-1,1; -1 means that this type of label does not exist, 1 means that this type of label exists, then the label code of this image is:

0 0 0 0 0
0 1 0 0 0
0 0 1 0 0
0 0 0 0 0
0 0 0 0 1

2. How to measure losses?

This time, the loss of each label of a picture is calculated, and then the average is taken.

3. How to calculate the precision

This time, the precision of each label of a picture is calculated, and then the average is taken.

【data set】

This time, the dataset of Nanjing University is used. For details, see:

https://blog.csdn.net/u012936765/article/details/76944727

【Data preprocessing】

Since the dataset is a mat file, first convert it to a txt label file

At the same time, the image dataset is converted into a tfrecord file. Note: the encoding of the label is done here. details as follows:

import tensorflow as tf
import scipy.io as sio
import numpy as np
import os
from PIL import Image


BASE_PATH=r"E:\miml-image-data\original"
BASE_LABEL_PATH = r"E:\miml-image-data\processed"

def mat2txt():
    #使用scopy读取mat文件
    mat_data = sio.loadmat(BASE_LABEL_PATH+"\miml data.mat")
    #标签数据存储在targets中
    label_data = mat_data['targets']
    with open(BASE_LABEL_PATH+"\label.txt",'w') as f:
        labels = []
        for i in range(len(label_data)):
            labels.append(label_data[i].tolist())
        for j in range(len(labels[0])):
            line = []
            line.append(labels[0][i])
            line.append(labels[1][i])
            line.append(labels[2][i])
            line.append(labels[3][i])
            line.append(labels[4][i])
            line = ','.join(str(s) for s in line)
            jpg_name = str(j+1)+".jpg"
            f.write(jpg_name + ','+line+'\n')


#mat2txt()

train_list = []
test_list = []

with open(BASE_LABEL_PATH+"\label.txt") as f:
    i = 1
    for line in f.readlines():
        #print(line)
        if i % 5 == 0:
            test_list.append(line)
        else:
            train_list.append(line)
        i += 1

np.random.shuffle(train_list)
np.random.shuffle(test_list)

def int_2_one_hot(labels):
    r = []
    if labels[0] == -1:
        r.append([0,0,0,0,0])
    else:
        r.append([1,0,0,0,0])

    if labels[1] == -1:
        r.append([0,0,0,0,0])
    else:
        r.append([0,1,0,0,0])

    if labels[2] == -1:
        r.append([0,0,0,0,0])
    else:
        r.append([0,0,1,0,0])

    if labels[3] == -1:
        r.append([0,0,0,0,0])
    else:
        r.append([0,0,0,1,0])

    if labels[4] == -1:
        r.append([0,0,0,0,0])
    else:
        r.append([0,0,0,0,1])
    return r

def image_2_tfrecords(list,tf_record_path):
    tf_write = tf.python_io.TFRecordWriter(tf_record_path)
    for i in range(len(list)):
        item = list[i]
        item = item.strip('\n')
        items = item.split(',')
        image_name = items[0]
        image_path = os.path.join(BASE_PATH,image_name)
        if os.path.isfile(image_path):
            image = Image.open(image_path)
            image = image.resize((224,224))
            image = image.tobytes()
            features ={}
            features['raw_image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image]))
            labels = int_2_one_hot(items[1:])
            features['label_1'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[0]))
            features['label_2'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[1]))
            features['label_3'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[2]))
            features['label_4'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[3]))
            features['label_5'] = tf.train.Feature(int64_list=tf.train.Int64List(value=labels[4]))
            tf_features = tf.train.Features(feature=features)
            example = tf.train.Example(features=tf_features)
            tf_serialized = example.SerializeToString()
            tf_write.write(tf_serialized)
        else:
            print("not")
    tf_write.close()

image_2_tfrecords(train_list,r"E:\miml-image-data\processed\train.tfrecords")
image_2_tfrecords(test_list,r"E:\miml-image-data\processed\test.tfrecords")

[fine-tuning based on vgg16]

Here, directly use the public vgg16 model parameters and load them into this network for fine-tuning

The model file address is: http://www.cs.toronto.edu/~frossard/post/vgg16/

code show as below:

import tensorflow as tf
import numpy as np

#定义解析数据的api
def parse_tf(example):
    dics = {}
    dics['label_1'] = tf.FixedLenFeature(shape=[5],dtype=tf.int64)
    dics['label_2'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64)
    dics['label_3'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64)
    dics['label_4'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64)
    dics['label_5'] = tf.FixedLenFeature(shape=[5], dtype=tf.int64)
    dics['raw_image'] = tf.FixedLenFeature(shape=[],dtype=tf.string)
    parsed = tf.parse_single_example(example,features=dics)
    image = tf.decode_raw(parsed['raw_image'],out_type=tf.uint8)
    image = tf.reshape(image,shape=[224,224,3])
    image = tf.image.per_image_standardization(image)
    label_1 = parsed['label_1']
    label_2 = parsed['label_2']
    label_3 = parsed['label_3']
    label_4 = parsed['label_4']
    label_5 = parsed['label_5']

    label_1 = tf.cast(label_1,tf.int32)
    label_2 = tf.cast(label_2, tf.int32)
    label_3 = tf.cast(label_3, tf.int32)
    label_4 = tf.cast(label_4, tf.int32)
    label_5 = tf.cast(label_5, tf.int32)

    return image,label_1,label_2,label_3,label_4,label_5

def Conv(x,conv_shape,bias_shape,parameters,padding="SAME",strides=[1,1,1,1]):
    w = tf.Variable(initial_value=tf.random_normal(shape=conv_shape,dtype=tf.float32),trainable=False)
    b = tf.Variable(initial_value=tf.zeros(shape=bias_shape),trainable=False)
    parameters += [w,b]
    conv1 = tf.nn.conv2d(x,w,strides=strides,padding=padding)
    out = tf.nn.bias_add(conv1,b)
    return tf.nn.relu(out)
def Max_Pooling(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME"):
    return tf.nn.max_pool(x,ksize=[1,2,2,1],strides=[1,2,2,1],padding='SAME')

def FC(x,w_shape,b_shape,parameters):
    w = tf.Variable(initial_value=tf.random_normal(shape=w_shape,dtype=tf.float32))
    b = tf.Variable(initial_value=tf.zeros(shape=b_shape))
    parameters += [w, b]
    fc = tf.matmul(x,w)
    fc = tf.nn.bias_add(fc,b)
    return tf.nn.relu(fc)
def Last_FC(x,w_shape,b_shape):
    w = tf.Variable(initial_value=tf.random_normal(shape=w_shape,dtype=tf.float32))
    b = tf.Variable(initial_value=tf.zeros(shape=b_shape))
    fc = tf.matmul(x,w)
    fc = tf.nn.bias_add(fc,b)
    return fc
my_parameters = []
x = tf.placeholder(dtype=tf.float32,shape=[None,224,224,3])
y1_ = tf.placeholder(dtype=tf.float32,shape=[None,5])
y2_ = tf.placeholder(dtype=tf.float32,shape=[None,5])
y3_ = tf.placeholder(dtype=tf.float32,shape=[None,5])
y4_ = tf.placeholder(dtype=tf.float32,shape=[None,5])
y5_ = tf.placeholder(dtype=tf.float32,shape=[None,5])

conv1_1 = Conv(x,conv_shape=[3,3,3,64],bias_shape=[64],parameters=my_parameters)
conv1_2 = Conv(conv1_1,conv_shape=[3,3,64,64],bias_shape=[64],parameters=my_parameters)
pool1 = Max_Pooling(conv1_2)

conv2_1 = Conv(pool1,conv_shape=[3,3,64,128],bias_shape=[128],parameters=my_parameters)
conv2_2 = Conv(conv2_1,conv_shape=[3,3,128,128],bias_shape=[128],parameters=my_parameters)
pool2 = Max_Pooling(conv2_2)

conv3_1 = Conv(pool2,conv_shape=[3,3,128,256],bias_shape=[256],parameters=my_parameters)
conv3_2 = Conv(conv3_1,conv_shape=[3,3,256,256],bias_shape=[256],parameters=my_parameters)
conv3_3 = Conv(conv3_2,conv_shape=[3,3,256,256],bias_shape=[256],parameters=my_parameters)
pool3 = Max_Pooling(conv3_3)

conv4_1 = Conv(pool3,conv_shape=[3,3,256,512],bias_shape=[512],parameters=my_parameters)
conv4_2 = Conv(conv4_1,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters)
conv4_3 = Conv(conv4_2,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters)
pool4 = Max_Pooling(conv4_3)

conv5_1 = Conv(pool4,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters)
conv5_2 = Conv(conv5_1,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters)
conv5_3 = Conv(conv5_2,conv_shape=[3,3,512,512],bias_shape=[512],parameters=my_parameters)
pool5 = Max_Pooling(conv5_3)

pool5 = tf.reshape(pool5,shape=[-1,7*7*512])

fc1 = FC(pool5,w_shape=[7*7*512,4096],b_shape=[4096],parameters=my_parameters)
fc2 = FC(fc1,w_shape=[4096,4096],b_shape=[4096],parameters=my_parameters)

fc3 =Last_FC(fc2,w_shape=[4096,5],b_shape=[5])

#由于一张图片有5各可能类别，所以使用5各分类器
y1 = tf.nn.softmax(fc3)
y2 = tf.nn.softmax(fc3)
y3 = tf.nn.softmax(fc3)
y4 = tf.nn.softmax(fc3)
y5 = tf.nn.softmax(fc3)

#
y1_1 = tf.clip_by_value(y1,1e-8,tf.reduce_max(y1))
y2_1 = tf.clip_by_value(y2,1e-8,tf.reduce_max(y2))
y3_1 = tf.clip_by_value(y3,1e-8,tf.reduce_max(y3))
y4_1 = tf.clip_by_value(y4,1e-8,tf.reduce_max(y4))
y5_1 = tf.clip_by_value(y5,1e-8,tf.reduce_max(y5))

#定义5个损失
loss1 = tf.reduce_mean(-tf.reduce_sum(y1_*tf.log(y1_1)))
loss2 = tf.reduce_mean(-tf.reduce_sum(y2_*tf.log(y2_1)))
loss3 = tf.reduce_mean(-tf.reduce_sum(y3_*tf.log(y3_1)))
loss4 = tf.reduce_mean(-tf.reduce_sum(y4_*tf.log(y4_1)))
loss5 = tf.reduce_mean(-tf.reduce_sum(y5_*tf.log(y5_1)))
#取个平均损失
loss = (loss1 + loss2 + loss3 + loss4 + loss5)/5

train = tf.train.AdamOptimizer(learning_rate=1e-6).minimize(loss)

#定义各自的精确度
correct_predict1 = tf.equal(tf.argmax(y1_,1),tf.argmax(y1,1))
correct_predict2 = tf.equal(tf.argmax(y2_,1),tf.argmax(y2,1))
correct_predict3 = tf.equal(tf.argmax(y3_,1),tf.argmax(y3,1))
correct_predict4 = tf.equal(tf.argmax(y4_,1),tf.argmax(y4,1))
correct_predict5 = tf.equal(tf.argmax(y5_,1),tf.argmax(y5,1))

auc1 = tf.reduce_mean(tf.cast(correct_predict1,dtype=tf.float32))
auc2 = tf.reduce_mean(tf.cast(correct_predict2,dtype=tf.float32))
auc3 = tf.reduce_mean(tf.cast(correct_predict3,dtype=tf.float32))
auc4 = tf.reduce_mean(tf.cast(correct_predict4,dtype=tf.float32))
auc5 = tf.reduce_mean(tf.cast(correct_predict5,dtype=tf.float32))
#取个平均精度
auc = (auc1 + auc2 + auc3 + auc4 + auc5)/5

train_dataset = tf.data.TFRecordDataset(r"E:\miml-image-data\processed\train.tfrecords")
train_dataset = train_dataset.map(parse_tf)
train_dataset = train_dataset.batch(16).repeat(1)
train_iter = train_dataset.make_one_shot_iterator()
train_next_element = train_iter.get_next()

test_dataset = tf.data.TFRecordDataset(r"E:\miml-image-data\processed\test.tfrecords")
test_dataset = test_dataset.map(parse_tf)
test_dataset = test_dataset.batch(16).repeat(1)
test_iter = test_dataset.make_one_shot_iterator()
test_next_element = test_iter.get_next()

init = tf.global_variables_initializer()

with tf.Session() as session:
    session.run(init)
    weights = np.load(r"D:\vgg16_weight\vgg16_weights.npz")
    keys = sorted(weights.keys())
    for i,k in enumerate(keys):
        if k == 'fc8_W' or k == 'fc8_b':
            continue
        else:
            session.run(my_parameters[i].assign(weights[k]))
    count = 0
    try:
        while True:
            image,label1,label2,label3,label4,label5 = session.run(train_next_element)
            _,train_loss = session.run(fetches=[train,loss],feed_dict={
                x:image,y1_:label1,y2_:label2,y3_:label3,y4_:label4,y5_:label5
            })
            print("loss=",train_loss)
            if count % 10 == 0:
                image, label1, label2, label3, label4, label5 = session.run(test_next_element)
                test_auc = session.run(fetches=auc,feed_dict={
                    x: image, y1_: label1, y2_: label2, y3_: label3, y4_: label4, y5_: label5
                })
                print("auc=",test_auc)
            count += 1
    except tf.errors.OutOfRangeError:
        print("end!")

[TensorFlow series] [7] Single image multi-label classification network construction and training

Guess you like