TensorFlow 自定义TFRecord文件读写

1 TensorFlow TFRecord文件写入

def _int64_feature(value):
    return tf.train.Feature(int64_list=tf.train.Int64List(value=value))
def serialize_pair_batches(input_words, output_words):
    feature = {
        'input': _int64_feature(input_words),
        'output': _int64_feature(output_words),
    }

    example_proto = tf.train.Example(features=tf.train.Features(feature=feature))
    return example_proto.SerializeToString()
def generate_tf_record():
    #vocab_dict, view_seqs = wt.build_vocab_threading(args["input"], args["thread"], args["min_count"], False)
    a = np.zeros(30, dtype=np.int64).flatten()
    b = np.zeros((30, 1), dtype=np.int64).flatten()
    c = serialize_pair_batches(a,b)
    train_data_path = "./TFRecord.tfrecord"
    writer = tf.python_io.TFRecordWriter(train_data_path)
    writer.write(c)

2 TensorFlow TFRecord文件加载

def parse_function(serialize_string):
    feature_description = {
        'input': tf.VarLenFeature(dtype=tf.int64),
        'output': tf.VarLenFeature(dtype=tf.int64),
    }
    return tf.io.parse_single_example(serialize_string, feature_description)
def load_tf_record():
    dataset = tf.data.TFRecordDataset(["./TFRecord.tfrecord"])
    result = dataset.map(parse_function)
    iterator = result.make_one_shot_iterator()
    batch = iterator.get_next()
    input_sparse = batch["input"]
    output_sparse = batch["output"]
    input = tf.reshape(tf.sparse_tensor_to_dense(input_sparse), shape=(30,))
    output = tf.reshape(tf.sparse_tensor_to_dense(output_sparse), shape=(30, 1))
    sess = tf.Session()
    with sess.as_default():
        input, output = sess.run([input, output])
        print(input)
        print(output)
发布了15 篇原创文章 · 获赞 7 · 访问量 1万+

猜你喜欢

转载自blog.csdn.net/redhatforyou/article/details/102611147