[TensorFlow series] [1] Use TFRecordDataset to read image data

This article is transferred from the personal WeChat public account, and mainly addresses the following three questions:

  1. How to convert original image data and labels into TFRecords format data?

  2. How to use TFRecordDataset to read data in TFRecords format?

  3. How to get data from TFRecordDataset for NN training?

     

The overall idea is:

jpg---->train.tfrecords----->dataset------>NN

[1] Overview of TFReocrd

[2] Generate TFRecords file

def image2tfrecord(image_list,label_list):
    len2 = len(image_list)
    print("len=",len2)
    writer = tf.python_io.TFRecordWriter("train.tfrecords")
    for i in range(len2):
        #read image and decode
        image = Image.open(image_list[i])
        image = image.resize((28,28))
        #Convert to raw bytes
        image_bytes = image.tobytes()
        #create dictionary
        features = {}
        #Use bytes to store image
        features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
        # Use int64 to express label
        features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])]))
        # Combine all features into features
        tf_features = tf.train.Features(feature=features)
        #Convert to example
        tf_example = tf.train.Example(features=tf_features)
        #Serialize samples
        tf_serialized = tf_example.SerializeToString()
        #Write serialized samples to rfrecord
        writer.write(tf_serialized)
    writer.close()

[3] Parsing TFrecord files

Define the parse data function
#Input parameter example_proto is tf_serialized
def pares_tf(example_proto):
    #Define the parsed dictionary
    dics = {}
    dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64)
    dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string)
    #Call the interface to parse a line of samples
    parsed_example = tf.parse_single_example(serialized=example_proto,features=dics)
    image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8)
    image = tf.reshape(image,shape=[28*28])
    #It is the key to normalize the image data here. Without this sentence, the accuracy does not converge, which is about 0.1.
    # With the normalization processing here, the accuracy is consistent with the original data
    image = tf.cast(image,tf.float32)*(1./255)-0.5
    label = parsed_example['label']
    label = tf.cast(label,tf.int32)
    label = tf.one_hot(label, depth=10, on_value=1)
    return image,label

[4] Use TFRecordDataset to read data and perform NN training

Here, LeNet is still taken as an example.

import tensorflow as tf
from PIL import Image

def paths2list(path_file_name):
    list = []
    for line in open(path_file_name):
        list.append(line[0:len(line)-1])
    return list
def pathslabel2list(path_file_name):
    list = []
    for line in open(path_file_name):
        #Storage is label is in string format, here you need to force it
        list.append(int(line[0:len(line)-1]))
    return list
def one_hot_2_int(one_hot):
    for i in range(10):
        if one_hot[i] == 1:
            return  i
        else:
            continue
    return 0
train_image_list = paths2list(r"E:\mnist_jpg\jpg\train\train_image_list.txt")
train_image_label_list =  pathslabel2list(r"E:\mnist_jpg\jpg\train\train_label_list.txt")

#define the file to create TFRcord

def image2tfrecord(image_list,label_list):
    len2 = len(image_list)
    print("len=",len2)
    writer = tf.python_io.TFRecordWriter("train.tfrecords")
    for i in range(len2):
        #read image and decode
        image = Image.open(image_list[i])
        image = image.resize((28,28))
        #Convert to raw bytes
        image_bytes = image.tobytes()
        #create dictionary
        features = {}
        #Use bytes to store image
        features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes]))
        # Use int64 to express label
        features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])]))
        # Combine all features into features
        tf_features = tf.train.Features(feature=features)
        #Convert to example
        tf_example = tf.train.Example(features=tf_features)
        #Serialize samples
        tf_serialized = tf_example.SerializeToString()
        #Write serialized samples to rfrecord
        writer.write(tf_serialized)
    writer.close()
#Call the above interface to convert the image and label data into data in tfrecord format
image2tfrecord(train_image_list,train_image_label_list)

#Define parsing data function
#Input parameter example_proto is tf_serialized
def pares_tf(example_proto):
    #Define the parsed dictionary
    dics = {}
    dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64)
    dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string)
    #Call the interface to parse a line of samples
    parsed_example = tf.parse_single_example(serialized=example_proto,features=dics)
    image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8)
    image = tf.reshape(image,shape=[28*28])
    #It is the key to normalize the image data here. Without this sentence, the accuracy does not converge, which is about 0.1.
    # With the normalization processing here, the accuracy is consistent with the original data
    image = tf.cast(image,tf.float32)*(1./255)-0.5
    label = parsed_example['label']
    label = tf.cast(label,tf.int32)
    label = tf.one_hot(label, depth=10, on_value=1)
    return image,label

dataset = tf.data.TFRecordDataset(filenames=['train.tfrecords'])
dataset = dataset.map(pares_tf)
dataset = dataset.batch(32).repeat(1)

iterator = dataset.make_one_shot_iterator()

next_element = iterator.get_next()

#Define the input data mnist picture size 28*28*1=784, None means batch_size
x = tf.placeholder(dtype=tf.float32,shape=[None,28*28],name="x")
#Define label data, mnist has a total of 10 categories
y_ = tf.placeholder(dtype=tf.float32,shape=[None,10],name="y_")
#Adjust the data to two-dimensional data, w*H*c---> 28*28*1,-1 means N sheets
image = tf.reshape(x,shape=[-1,28,28,1])

#The first layer, convolution kernel={5*5*1*32}, pooling kernel={2*2*1,1*2*2*1}
w1 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,1,32],stddev=0.1,dtype=tf.float32,name="w1"))
b1= tf.Variable(initial_value=tf.zeros(shape=[32]))
conv1 = tf.nn.conv2d(input=image,filter=w1,strides=[1,1,1,1],padding="SAME",name="conv1")
relu1 = tf.nn.relu(tf.nn.bias_add(conv1,b1),name="relu1")
pool1 = tf.nn.max_pool(value=relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME")
#shape={None,14,14,32}
#Second layer, convolution kernel={5*5*32*64}, ​​pooling kernel={2*2*1,1*2*2*1}
w2 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,32,64],stddev=0.1,dtype=tf.float32,name="w2"))
b2 = tf.Variable(initial_value=tf.zeros(shape=[64]))
conv2 = tf.nn.conv2d(input=pool1,filter=w2,strides=[1,1,1,1],padding="SAME")
relu2 = tf.nn.relu(tf.nn.bias_add(conv2,b2),name="relu2")
pool2 = tf.nn.max_pool(value=relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool2")
#shape={None,7,7,64}
#FC1
w3 = tf.Variable(initial_value=tf.random_normal(shape=[7*7*64,1024],stddev=0.1,dtype=tf.float32,name="w3"))
b3 = tf.Variable(initial_value=tf.zeros(shape=[1024]))
#Key, reshape
input3 = tf.reshape(pool2,shape=[-1,7*7*64],name="input3")
fc1 = tf.nn.relu(tf.nn.bias_add(value=tf.matmul(input3,w3),bias=b3))
#shape={None,1024}
#FC2
w4 = tf.Variable(initial_value=tf.random_normal(shape=[1024,10],stddev=0.1,dtype=tf.float32,name="w4"))
b4 = tf.Variable(initial_value=tf.zeros(shape=[10]))
fc2 = tf.nn.bias_add(value=tf.matmul(fc1,w4),bias=b4)
#shape={None,10}
#define cross entropy loss
# Use softmax to represent NN computation output values ​​as probabilities
y = tf.nn.softmax(fc2)

# Define the cross entropy loss function
cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y)))
#define solver
train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss=cross_entropy)

#Define the correct value to determine whether the two indexes in the table below are equal
correct_predict = tf.equal(tf.argmax(y,1),tf.argmax(y_,1))
#define how to calculate the accuracy
accuracy = tf.reduce_mean(tf.cast(correct_predict,dtype=tf.float32),name="accuracy")
#define initialization op
init = tf.global_variables_initializer()

with tf.Session() as sess:
    print("start")
    sess.run(fetches=init)
    i = 0
    try:
        while True:
            #Get values ​​from the dataset each time through session
            image,label= sess.run(fetches=next_element)
            sess.run(fetches=train, feed_dict={x: image, y_: label})
            if i % 100 == 0:
                train_accuracy = sess.run(fetches=accuracy, feed_dict={x: image, y_: label})
                print(i, "accuracy=", train_accuracy)
            i = i + 1
    except tf.errors.OutOfRangeError:
        print("end!")
  The data source is as follows:

The training results are as follows:

【V】Summary
1. When the image data is processed and processed, it must be normalized, that is, 0-255 is processed to 0-1
2. The principle of generating and reading TFRcord files is the process of serialization and deserialization, and the name and type can be matched.
3. TFRcord can save multiple features and parse only the parts of interest.
4. Use TFRecordDataset to read data, and its bottom layer encapsulates multi-threading, queue and other operations, which is simple and convenient.
5. Use dataset to read data without modifying the network, just send the data into the input and label data replaced by placeholder in the network.
6. Try to convert the original data into TFRecord format data, and use the dataset to read. Fast, convenient and simple.

Guess you like

Origin http://43.154.161.224:23101/article/api/json?id=324399147&siteId=291194637