The following code is to generate a file for each image size is 227 * 227 * tfrecord 1, label this category is the English name.
The original picture is 256 * 256 * 3RGB .jpg file type, because of the color of the picture there is no requirement in the production of data sets, so in order to save space, had graying process.
import tensorflow as tf import os import sys from PIL import Image import numpy as np # Dataset path TRAIN_DATASET_DIR = "E:/python文件/tensorflow_learn/MyNet/images/train/" TEST_DATASET_DIR = "E:/python文件/tensorflow_learn/MyNet/images/test/" # Tfrecord file storage path TFRECORD_DIR = "E: / python file / tensorflow_learn / MyNet / images /" # Type name classes = {"apple_scab", "black_rot", "cedar_apple_rust", "healthy"} # Tfrecord determine whether a file exists def _dataset_exists(tfrecord_dir): for split_name in ['train', 'test']: # Train.tfrecords file path and produce test.tfrecords output_filename = os.path.join(tfrecord_dir, split_name+'.tfrecords') if not tf.gfile.Exists(output_filename): return False return True def int64_feature(values): if not isinstance(values, (tuple, list)): values = [values] return tf.train.Feature(int64_list=tf.train.Int64List(value=values)) def bytes_feature(values): return tf.train.Feature(bytes_list=tf.train.BytesList(value=[values])) # Get all files in that category def _get_filenames_and_classes(dataset_dir): photo_filename = [] for filename in os.listdir(dataset_dir): # Get File Path path = os.path.join(dataset_dir, filename) photo_filename.append(path) return photo_filename # Converting data into formats TFRecord def _convert_dataset(split_name, dataset_dir): assert split_name in ['train', 'test'] with tf.Session() as sess: output_filename = os.path.join(TFRECORD_DIR, split_name+'.tfrecords') with tf.python_io.TFRecordWriter(output_filename) as tfrecord_writer: for index, name in enumerate(classes): if split_name == 'train': class_path = TRAIN_DATASET_DIR + name + '/' else: class_path = TEST_DATASET_DIR + name + '/' filenames = _get_filenames_and_classes(class_path) for i, img_name in enumerate(filenames): sys.stdout.write('\r>>%s %s Convering image: %d/%d' % (split_name, name, i+1, len(filenames))) print(str(img_name)) sys.stdout.flush() image_data = Image.open(img_name) image_data = image_data.resize((227, 227)) image_data = np.array (image_data.convert ( 'L')) # image processing gradation img_raw = image_data.tobytes() example = tf.train.Example( features=tf.train.Features( feature={ 'img_raw': tf.train.Feature(bytes_list=tf.train.BytesList(value=[img_raw])), 'label': tf.train.Feature(int64_list=tf.train.Int64List(value=[index])), } ) ) tfrecord_writer.write(example.SerializeToString()) # tfrecord_writer.close() # Tfrecord determine whether a file exists if _dataset_exists(TFRECORD_DIR): print ( "file already exists") else: # Data Conversion _convert_dataset('test', TEST_DATASET_DIR) _convert_dataset('train', TRAIN_DATASET_DIR)
print ( 'files generated tfrecord!')