tensorflow-TFRecord 文件详解
TFRecord 是 tensorflow 内置的文件格式,它是一种二进制文件,具有以下优点:
-
统一各种输入文件的操作
-
更好的利用内存,方便复制和移动
-
将二进制数据和标签(label)存储在同一个文件中
import os
import numpy as np
import tempfile
import tensorflow as tf
# example_path = os.path.join(tempfile.gettempdir(), "example.tfrecords")
example_path='./temp.tfrecords'
np.random.seed(0)
# Write the records to a file.
with tf.io.TFRecordWriter(example_path) as file_writer:
for _ in range(4):
#产生随机数
x, y = np.random.random(), np.random.random()
print(x,y,'--->')
x1=tf.train.Feature(float_list=tf.train.FloatList(value=[x]))
y1=tf.train.Feature(float_list=tf.train.FloatList(value=[y]))
print('x1=',x1)
print('y1=',y1)
feature0={
"x": x1,
"y":y1 ,
}
print('feature0=',feature0)
features0=tf.train.Features(feature=feature0)
print('features=',feature0)
record_bytes = tf.train.Example(features=features0).SerializeToString()
file_writer.write(record_bytes)
# Read the data back out.
def decode_fn(record_bytes):
return tf.io.parse_single_example(
# Data
record_bytes,
# Schema
{"x": tf.io.FixedLenFeature([], dtype=tf.float32),
"y": tf.io.FixedLenFeature([], dtype=tf.float32)}
)
for batch in tf.data.TFRecordDataset([example_path]).map(decode_fn):
print("x = {x:.4f}, y = {y:.4f}".format(**batch))