1.对于定长的label和feature
生成tfrecord数据:
多标签样本,这里label包含5个
import os
import tensorflow as tf
import numpy as np
output_flie = str(os.path.dirname(os.getcwd()))+"/train.tfrecords"
with tf.python_io.TFRecordWriter(output_flie) as writer:
labels = np.array([[1,0,0,1,0],[0,1,0,0,1],[0,0,0,0,1],[1,0,0,0,0]])
features = np.array([[0,0,0,0,0,0],[1,1,1,1,1,2],[1,1,1,0,0,2],[0,0,0,0,1,9]])
for i in range(4):
label = labels[i]
feature = features[i]
example = tf.train.Example(features=tf.train.Features(feature={
"label": tf.train.Feature(int64_list=tf.train.Int64List(value=label)),
'feature': tf.train.Feature(int64_list=tf.train.Int64List(value=feature))
}))
writer.write(example.SerializeToString())
解析tfrecord数据:
import os
import tensorflow as tf
import numpy as np
def read_tf(output_flie):
filename_queue = tf.train.string_input_producer([output_flie])
reader = tf.TFRecordReader()
_, serialized_example = reader.read(filename_queue)
result = tf.parse_single_example(serialized_example,
features={
'label': tf.FixedLenFeature([5], tf.int64),
'feature': tf.FixedLenFeature([6], tf.int64),
})
feature = result['feature']
label = result['label']
return feature, label
output_flie = str(os.path.dirname(os.getcwd())) + "/train.tfrecords"
feature, label = read_tf(output_flie)
imageBatch, labelBatch = tf.train.batch([feature, label], batch_size=2, capacity=3)
with tf.Session() as sess:
sess.run(tf.global_variables_initializer())
coord = tf.train.Coordinator()
threads = tf.train.start_queue_runners(sess=sess, coord=coord)
print(1)
images, labels = sess.run([imageBatch, labelBatch])
print(images)
print(labels)
coord.request_stop()
coord.join(threads)
输出:
1
('----images: ', array([[0, 0, 0, 0, 0, 0],
[1, 1, 1, 1, 1, 2]]))
('----labels:', array([[1, 0, 0, 1, 0],
[0, 1, 0, 0, 1]]))
2.对于变长label和feature
生成tfrecord
跟定长的数据生成方式一样
import os
import tensorflow as tf
import numpy as np
train_TFfile = str(os.path.dirname(os.getcwd()))+"/hh.tfrecords"
writer = tf.python_io.TFRecordWriter(train_TFfile)
labels = [[1,2,3],[3,4],[5,2,6],[6,4,9],[9]]
features = [[2,5],[3],[5,8],[1,4],[5,9]]
for i in range(5):
label = labels[i]
print(label)
feature = features[i]
example = tf.train.Example(
features=tf.train.Features(
feature={'label': tf.train.Feature(int64_list=tf.train.Int64List(value=label)),
'feature': tf.train.Feature(int64_list=tf.train.Int64List(value=feature))}))
writer.write(example.SerializeToString())
writer.close()
解析tfrecord
主要改变的就是:
tf.VarLenFeature(tf.int64)
未完待续ing
常见错误:
当定义的label维度跟解析时维度不同,会报错如下:
报错详情:
tensorflow.python.framework.errors_impl.InvalidArgumentError: Name: <unknown>, Key: label, Index: 0. Number of int64 values != expected. Values size: 1 but output shape: [3]
意思是:label的size是1,但是使用的时候,超出1.
解决:生成tfrecord时,label的长度要跟解析时相同。
参考:
1.https://blog.csdn.net/weixin_42001089/article/details/90236241