This article is transferred from the personal WeChat public account, and mainly addresses the following three questions:
-
How to convert original image data and labels into TFRecords format data?
-
How to use TFRecordDataset to read data in TFRecords format?
-
How to get data from TFRecordDataset for NN training?
The overall idea is:
jpg---->train.tfrecords----->dataset------>NN
[1] Overview of TFReocrd
[2] Generate TFRecords file
def image2tfrecord(image_list,label_list): len2 = len(image_list) print("len=",len2) writer = tf.python_io.TFRecordWriter("train.tfrecords") for i in range(len2): #read image and decode image = Image.open(image_list[i]) image = image.resize((28,28)) #Convert to raw bytes image_bytes = image.tobytes() #create dictionary features = {} #Use bytes to store image features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes])) # Use int64 to express label features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])])) # Combine all features into features tf_features = tf.train.Features(feature=features) #Convert to example tf_example = tf.train.Example(features=tf_features) #Serialize samples tf_serialized = tf_example.SerializeToString() #Write serialized samples to rfrecord writer.write(tf_serialized) writer.close()
[3] Parsing TFrecord files
Define the parse data function #Input parameter example_proto is tf_serialized def pares_tf(example_proto): #Define the parsed dictionary dics = {} dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64) dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string) #Call the interface to parse a line of samples parsed_example = tf.parse_single_example(serialized=example_proto,features=dics) image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8) image = tf.reshape(image,shape=[28*28]) #It is the key to normalize the image data here. Without this sentence, the accuracy does not converge, which is about 0.1. # With the normalization processing here, the accuracy is consistent with the original data image = tf.cast(image,tf.float32)*(1./255)-0.5 label = parsed_example['label'] label = tf.cast(label,tf.int32) label = tf.one_hot(label, depth=10, on_value=1) return image,label
[4] Use TFRecordDataset to read data and perform NN training
Here, LeNet is still taken as an example.
import tensorflow as tf from PIL import Image def paths2list(path_file_name): list = [] for line in open(path_file_name): list.append(line[0:len(line)-1]) return list def pathslabel2list(path_file_name): list = [] for line in open(path_file_name): #Storage is label is in string format, here you need to force it list.append(int(line[0:len(line)-1])) return list def one_hot_2_int(one_hot): for i in range(10): if one_hot[i] == 1: return i else: continue return 0 train_image_list = paths2list(r"E:\mnist_jpg\jpg\train\train_image_list.txt") train_image_label_list = pathslabel2list(r"E:\mnist_jpg\jpg\train\train_label_list.txt") #define the file to create TFRcord def image2tfrecord(image_list,label_list): len2 = len(image_list) print("len=",len2) writer = tf.python_io.TFRecordWriter("train.tfrecords") for i in range(len2): #read image and decode image = Image.open(image_list[i]) image = image.resize((28,28)) #Convert to raw bytes image_bytes = image.tobytes() #create dictionary features = {} #Use bytes to store image features['image'] = tf.train.Feature(bytes_list=tf.train.BytesList(value=[image_bytes])) # Use int64 to express label features['label'] = tf.train.Feature(int64_list=tf.train.Int64List(value=[int(label_list[i])])) # Combine all features into features tf_features = tf.train.Features(feature=features) #Convert to example tf_example = tf.train.Example(features=tf_features) #Serialize samples tf_serialized = tf_example.SerializeToString() #Write serialized samples to rfrecord writer.write(tf_serialized) writer.close() #Call the above interface to convert the image and label data into data in tfrecord format image2tfrecord(train_image_list,train_image_label_list) #Define parsing data function #Input parameter example_proto is tf_serialized def pares_tf(example_proto): #Define the parsed dictionary dics = {} dics['label'] = tf.FixedLenFeature(shape=[],dtype=tf.int64) dics['image'] = tf.FixedLenFeature(shape=[],dtype=tf.string) #Call the interface to parse a line of samples parsed_example = tf.parse_single_example(serialized=example_proto,features=dics) image = tf.decode_raw(parsed_example['image'],out_type=tf.uint8) image = tf.reshape(image,shape=[28*28]) #It is the key to normalize the image data here. Without this sentence, the accuracy does not converge, which is about 0.1. # With the normalization processing here, the accuracy is consistent with the original data image = tf.cast(image,tf.float32)*(1./255)-0.5 label = parsed_example['label'] label = tf.cast(label,tf.int32) label = tf.one_hot(label, depth=10, on_value=1) return image,label dataset = tf.data.TFRecordDataset(filenames=['train.tfrecords']) dataset = dataset.map(pares_tf) dataset = dataset.batch(32).repeat(1) iterator = dataset.make_one_shot_iterator() next_element = iterator.get_next() #Define the input data mnist picture size 28*28*1=784, None means batch_size x = tf.placeholder(dtype=tf.float32,shape=[None,28*28],name="x") #Define label data, mnist has a total of 10 categories y_ = tf.placeholder(dtype=tf.float32,shape=[None,10],name="y_") #Adjust the data to two-dimensional data, w*H*c---> 28*28*1,-1 means N sheets image = tf.reshape(x,shape=[-1,28,28,1]) #The first layer, convolution kernel={5*5*1*32}, pooling kernel={2*2*1,1*2*2*1} w1 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,1,32],stddev=0.1,dtype=tf.float32,name="w1")) b1= tf.Variable(initial_value=tf.zeros(shape=[32])) conv1 = tf.nn.conv2d(input=image,filter=w1,strides=[1,1,1,1],padding="SAME",name="conv1") relu1 = tf.nn.relu(tf.nn.bias_add(conv1,b1),name="relu1") pool1 = tf.nn.max_pool(value=relu1,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME") #shape={None,14,14,32} #Second layer, convolution kernel={5*5*32*64}, pooling kernel={2*2*1,1*2*2*1} w2 = tf.Variable(initial_value=tf.random_normal(shape=[5,5,32,64],stddev=0.1,dtype=tf.float32,name="w2")) b2 = tf.Variable(initial_value=tf.zeros(shape=[64])) conv2 = tf.nn.conv2d(input=pool1,filter=w2,strides=[1,1,1,1],padding="SAME") relu2 = tf.nn.relu(tf.nn.bias_add(conv2,b2),name="relu2") pool2 = tf.nn.max_pool(value=relu2,ksize=[1,2,2,1],strides=[1,2,2,1],padding="SAME",name="pool2") #shape={None,7,7,64} #FC1 w3 = tf.Variable(initial_value=tf.random_normal(shape=[7*7*64,1024],stddev=0.1,dtype=tf.float32,name="w3")) b3 = tf.Variable(initial_value=tf.zeros(shape=[1024])) #Key, reshape input3 = tf.reshape(pool2,shape=[-1,7*7*64],name="input3") fc1 = tf.nn.relu(tf.nn.bias_add(value=tf.matmul(input3,w3),bias=b3)) #shape={None,1024} #FC2 w4 = tf.Variable(initial_value=tf.random_normal(shape=[1024,10],stddev=0.1,dtype=tf.float32,name="w4")) b4 = tf.Variable(initial_value=tf.zeros(shape=[10])) fc2 = tf.nn.bias_add(value=tf.matmul(fc1,w4),bias=b4) #shape={None,10} #define cross entropy loss # Use softmax to represent NN computation output values as probabilities y = tf.nn.softmax(fc2) # Define the cross entropy loss function cross_entropy = tf.reduce_mean(-tf.reduce_sum(y_ * tf.log(y))) #define solver train = tf.train.AdamOptimizer(learning_rate=0.0001).minimize(loss=cross_entropy) #Define the correct value to determine whether the two indexes in the table below are equal correct_predict = tf.equal(tf.argmax(y,1),tf.argmax(y_,1)) #define how to calculate the accuracy accuracy = tf.reduce_mean(tf.cast(correct_predict,dtype=tf.float32),name="accuracy") #define initialization op init = tf.global_variables_initializer() with tf.Session() as sess: print("start") sess.run(fetches=init) i = 0 try: while True: #Get values from the dataset each time through session image,label= sess.run(fetches=next_element) sess.run(fetches=train, feed_dict={x: image, y_: label}) if i % 100 == 0: train_accuracy = sess.run(fetches=accuracy, feed_dict={x: image, y_: label}) print(i, "accuracy=", train_accuracy) i = i + 1 except tf.errors.OutOfRangeError: print("end!") The data source is as follows:
The training results are as follows:
【V】Summary 1. When the image data is processed and processed, it must be normalized, that is, 0-255 is processed to 0-1 2. The principle of generating and reading TFRcord files is the process of serialization and deserialization, and the name and type can be matched. 3. TFRcord can save multiple features and parse only the parts of interest. 4. Use TFRecordDataset to read data, and its bottom layer encapsulates multi-threading, queue and other operations, which is simple and convenient. 5. Use dataset to read data without modifying the network, just send the data into the input and label data replaced by placeholder in the network. 6. Try to convert the original data into TFRecord format data, and use the dataset to read. Fast, convenient and simple.