tensorflow实战Google深度学习框架---TFRecord

将输入数据转化为TFRecord格式

import tensorflow as tf
from tensorflow.examples.tutorials.mnist import input_data
import numpy as np

#生成整数类型的属性
def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

#生成字符串类型的属性
def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

mnist=input_data.read_data_sets('D:/anicode/spyderworkspace/8.3mnist/MNIST_data',
                            dtype=tf.uint8,one_hot=True)

images=mnist.train.images

#训练数据所对应的正确答案，可以作为一个属性保存在TFRecord中
labels=mnist.train.labels

#训练数据的图像分辨率，还可以作为example中的一个属性
pixels=images.shape[1]
num_examples=mnist.train.num_examples

#输出TFRecord文件的地址
filename='D:/anicode/spyderworkspace/8.3mnist/output.tfrecords'

#创建一个writer来写TFRecord文件
writer=tf.python_io.TFRecordWriter(filename)

for index in range(num_examples):
    #将图像转化为一个字符串
    image_raw=images[index].tostring()
    
    #将一个样例转换为Example Protocol Buffer，并将所有信息写入这个数据结构
    example=tf.train.Example(features=tf.train.Features(feature={
            'pixels':_int64_feature(pixels),
            'label':_int64_feature(np.argmax(labels[index])),
            'image_raw':_bytes_feature(image_raw)
            }))
    #将一个Example写入TFRecord文件
    writer.write(example.SerializeToString())
writer.close()

读取TFRecord中的数据

import tensorflow as tf

#创建一个reader来读取TFRecord文件中的样例
reader=tf.TFRecordReader()

#创建一个队列来维护输入文件列表
filename_queue=tf.train.string_input_producer(['D:/anicode/spyderworkspace/8.3mnist/output.tfrecords'])

#从文件中读出一个样例
_,serialized_example=reader.read(filename_queue)#返回文件名和文件

#解析一个读入的样例
features=tf.parse_single_example(
        serialized_example,
        features={'image_raw':tf.FixedLenFeature([],tf.string),
                  'pixels':tf.FixedLenFeature([],tf.int64),
                  'label':tf.FixedLenFeature([],tf.int64),
                })

#tf.decode_raw可以将字符串解析成图像对应的像素数组
images=tf.decode_raw(features['image_raw'],tf.uint8)
labels=tf.cast(features['label'],tf.int32)
pixels=tf.cast(features['pixels'],tf.int32)

sess=tf.Session()

#启动多线程处理数据
coord=tf.train.Coordinator()
threads=tf.train.start_queue_runners(sess=sess,coord=coord)

#每次运行可以读取TFRecord文件中的一个样例，当所有样例读取完之后，在此样例中程序会重头读取
for i in range(10):
    image,label,pixel=sess.run([images,labels,pixels])

tensorflow实战Google深度学习框架---TFRecord

猜你喜欢