图片转tfrecord格式的优化

前两天在用一个开源代码的时候，发现他把数据转tfrecord以后特别大，本来硬盘空间就有些紧张，这下更不够了。看了一下转完以后的大小，是原始数据的9、10倍，这就有些夸张了。之前用tfrecord的时候没有特别注意这个事情，这次看到了还是研究一下。

方案1：

这是原来代码里的写法，为了简便，就只挑重要部分的写了：

import tensorflow as tf
import numpy as np
import os,sys
import PIL.Image

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=[value]))

def _bytes_feature(value):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[value]))

def get_file_list(input_dir):
    res = []
    files = os.listdir(input_dir)
    for file in files:
        res.append(file)
    return res

def img_to_feature_1(img_path):
    img = PIL.Image.open(img_path)
    img = np.array(img)
    height, width, channel = img.shape

    feature = tf.train.Features(feature={
        'img': _bytes_feature(img.tostring()),
        'width': _int64_feature(width),
        'height': _int64_feature(height),
        'channel': _int64_feature(channel)
    }) 
    return feature

def convert_to_tfrecord():
    input_dir = './data'
    save_path = './data2.tfrecord'
    file_list = get_file_list(input_dir)

    writer_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)
    writer = tf.python_io.TFRecordWriter(path=save_path, options=writer_options)
    for file in file_list:
        img_path = os.path.join(input_dir, file)
        print (img_path)
        feature = img_to_feature_1(img_path)
        example = tf.train.Example(features=feature)
        writer.write(example.SerializeToString())
        #break

    writer.close()

convert_to_tfrecord()

可以看到，这么写是把图片先转成numpy矩阵，然后转成string再保存，并且，这样还要保存图片原始的size，在decode的时候再还原回原来的矩阵。将矩阵转string，这肯定要比原来数据大很多啊，这种写法也太不优美了吧。
原来总共78M的图片转完以后竟然有505M：

方案2：

接下来这种写法其实也没什么，就是把读到的二进制直接写进去，然后在读tfrecord的时候再解析成图片格式。

def img_to_feature_2(img_path):
    with tf.gfile.GFile(img_path, 'rb') as fid:
        encoded_jpg = fid.read()
    feature = tf.train.Features(feature={
        'img': _bytes_feature(encoded_jpg),
        'format': _bytes_feature('jpeg'.encode('utf8'))
    })
    return feature

虽然写法很简单，但是效果很明显，同样的数据，大小变为了73M，并且读写速度明显变快了：

读数据

下面附上读tfrecord的代码，第一种就不写了，网上很多：

import tensorflow as tf
import numpy as np
import os, sys

slim_example_decoder = tf.contrib.slim.tfexample_decoder

def get_read_data(filename):
    filename_queue = tf.train.string_input_producer([filename])
    tfrecord_options = tf.python_io.TFRecordOptions(tf.python_io.TFRecordCompressionType.ZLIB)

    reader = tf.TFRecordReader(options=tfrecord_options)
    _, serialized_example = reader.read(filename_queue)

    keys_to_features = {
        'img': tf.FixedLenFeature((), tf.string, default_value=''),
        'format': tf.FixedLenFeature((), tf.string, default_value='jpeg')
    }
    items_to_handlers = {
        'img': slim_example_decoder.Image(image_key='img', format_key='format', channels=3),
        'format': slim_example_decoder.Tensor('format')
    }

    serialized_example = tf.reshape(serialized_example, shape=[])
    decoder = slim_example_decoder.TFExampleDecoder(keys_to_features, items_to_handlers)
    keys = decoder.list_items()
    tensors = decoder.decode(serialized_example, items=keys)
    tensor_dict = dict(zip(keys, tensors))
    return tensor_dict['img'], tensor_dict['format']

def run():
    input_path = './data2.tfrecord'
    img_tensor, img_format_tensor = get_read_data(input_path) 
    with tf.Session() as sess:
        init_op = tf.initialize_all_variables()
        sess.run(init_op)
        coord = tf.train.Coordinator()
        threads = tf.train.start_queue_runners(coord=coord)
        for i in range(10):
            img, format_name = sess.run([img_tensor, img_format_tensor]) 
            np_img = np.array(img)
            print ('img:', img)
            print ('img shape:', img.shape)
            print ('img format:', format_name)

        coord.request_stop()
        coord.join(threads)

run()

图片转tfrecord格式的优化

方案1：

方案2：

读数据

猜你喜欢