Create a data set tfrecords format with tensorflow

The following code is to generate a file for each image size is 227 * 227 * tfrecord 1, label this category is the English name.

The original picture is 256 * 256 * 3RGB .jpg file type, because of the color of the picture there is no requirement in the production of data sets, so in order to save space, had graying process.

import tensorflow as tf
import os
import sys
from PIL import Image
import numpy as np

# Dataset path
TRAIN_DATASET_DIR = "E:/python文件/tensorflow_learn/MyNet/images/train/"
TEST_DATASET_DIR = "E:/python文件/tensorflow_learn/MyNet/images/test/"
# Tfrecord file storage path
TFRECORD_DIR = "E: / python file / tensorflow_learn / MyNet / images /"
# Type name
classes = {"apple_scab", "black_rot", "cedar_apple_rust", "healthy"}


# Tfrecord determine whether a file exists
def _dataset_exists(tfrecord_dir):
    for split_name in ['train', 'test']:
        # Train.tfrecords file path and produce test.tfrecords
        output_filename = os.path.join(tfrecord_dir, split_name+'.tfrecords')
        if not tf.gfile.Exists(output_filename):
            return False
    return True


def int64_feature(values):
    if not isinstance(values, (tuple, list)):
        values = [values]
    return tf.train.Feature(int64_list=tf.train.Int64List(value=values))


def bytes_feature(values):
    return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values]))


# Get all files in that category
def _get_filenames_and_classes(dataset_dir):
    photo_filename = []
    for filename in os.listdir(dataset_dir):
        # Get File Path
        path = os.path.join(dataset_dir, filename)
        photo_filename.append(path)
    return photo_filename


# Converting data into formats TFRecord
def _convert_dataset(split_name, dataset_dir):
    assert split_name in ['train', 'test']
    with tf.Session() as sess:
        output_filename = os.path.join(TFRECORD_DIR, split_name+'.tfrecords')
        with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer:
            for index, name in enumerate(classes):
                if split_name == 'train':
                    class_path = TRAIN_DATASET_DIR + name + '/'
                else:
                    class_path = TEST_DATASET_DIR + name + '/'
                filenames = _get_filenames_and_classes(class_path)
                for i, img_name in enumerate(filenames):
                    sys.stdout.write('\r>>%s %s  Convering image: %d/%d' % (split_name, name, i+1, len(filenames)))
                    print(str(img_name))
                    sys.stdout.flush()
                    image_data = Image.open(img_name)
                    image_data = image_data.resize((227, 227))
                    image_data = np.array (image_data.convert ( 'L')) # image processing gradation
                    img_raw = image_data.tobytes()
                    example = tf.train.Example(
                        features=tf.train.Features(
                            feature={
                                'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])),
                                'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[index])),
                            }
                        )
                    )
                    tfrecord_writer.write(example.SerializeToString())
            # tfrecord_writer.close()


# Tfrecord determine whether a file exists
if _dataset_exists(TFRECORD_DIR):
    print ( "file already exists")
else:
    # Data Conversion
    _convert_dataset('test', TEST_DATASET_DIR)
    _convert_dataset('train', TRAIN_DATASET_DIR)
print ( 'files generated tfrecord!')

  

Guess you like

Origin www.cnblogs.com/lyf98/p/11965256.html