Writing records features:
def getRecordData(fileName, outFile):
with tf.io.gfile.GFile(fileName, 'rb') as fid:
encoded_jpg = fid.read()
encoded_jpg_io = io.BytesIO(encoded_jpg)
y = []
with open(outFile) as outFile:
# ...populate y....
return {
'image': tf.train.Feature(bytes_list=tf.train.BytesList(value=encoded_jpg_io)),
'output': tf.train.Feature(float_list=tf.train.FloatList(value=y))
}
Parsing tfrecords:
def parseExample(example):
features = {
"image": tf.io.FixedLenFeature([], tf.string),
"output": tf.io.FixedLenFeature([], tf.float32)
}
parsed = tf.io.parse_single_example(example, features=features)
image = tf.image.decode_png(parsed["image"], channels=3)
return image, parsed["output"]
def make_dataset(dir, dtype, dataSetType, parse_fn):
dataset = tf.data.TFRecordDataset(...path...)
dataset = dataset.shuffle(buffer_size=1000)
dataset = dataset.map(parseExample)
dataset = dataset.batch(batch_size=32)
dataset.cache('E:\\trainingcache')
return dataset
when I try to verify if images a loaded correctly
dataset = make_dataset(args.records_dir, 'training', 'tables', parseExample)
for image_features in dataset:
image_raw = image_features['image'].numpy()
display.display(display.Image(data=image_raw))
I get:
example_parsing_ops.cc:240 : Invalid argument: Key: output. Can't parse serialized Example.
I was able to reproduce the error using the below code -
%tensorflow_version 2.x
import tensorflow as tf
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Output = [20191221 + x for x in range(0,10)]
print("Writing Output - ", Output)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Output':tf.train.Feature(float_list=tf.train.FloatList(value=Output))
}
))
writer = tf.io.TFRecordWriter("Output.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Output': tf.io.FixedLenFeature([], tf.float32)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Output = features['Output']
return Output
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Output.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
if __name__ == '__main__':
write_date_tfrecord()
generator = dataset_generator()
for Output in generator:
print(Output)
Output -
2.2.0
Writing Output - [20191221, 20191222, 20191223, 20191224, 20191225, 20191226, 20191227, 20191228, 20191229, 20191230]
---------------------------------------------------------------------------
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/context.py in execution_mode(mode)
1985 ctx.executor = executor_new
-> 1986 yield
1987 finally:
10 frames
InvalidArgumentError: Key: Output. Can't parse serialized Example.
[[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]] [Op:IteratorGetNext]
During handling of the above exception, another exception occurred:
InvalidArgumentError Traceback (most recent call last)
/usr/local/lib/python3.6/dist-packages/tensorflow/python/eager/executor.py in wait(self)
65 def wait(self):
66 """Waits for ops dispatched in this executor to finish."""
---> 67 pywrap_tfe.TFE_ExecutorWaitForAllPendingNodes(self._handle)
68
69 def clear_error(self):
InvalidArgumentError: Key: Output. Can't parse serialized Example.
[[{{node ParseSingleExample/ParseExample/ParseExampleV2}}]]
Solution - I was able to fix the issue and run the code successfully after modifying the code inside parse_function
. In your case modify it in parseExample
function. Modify,
'Output': tf.io.FixedLenFeature([], tf.float32)
to
'Output': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
Fixed Code -
%tensorflow_version 2.x
import tensorflow as tf
print(tf.__version__)
def write_date_tfrecord():
#writes 10 dummy values to replicate the issue
Output = [20191221 + x for x in range(0,10)]
print("Writing Output - ", Output)
example = tf.train.Example(
features = tf.train.Features(
feature = {
'Output':tf.train.Feature(float_list=tf.train.FloatList(value=Output))
}
))
writer = tf.io.TFRecordWriter("Output.tf_record")
writer.write(example.SerializeToString())
def parse_function(serialized_example):
features = {
'Output': tf.io.FixedLenSequenceFeature([], tf.float32,allow_missing=True)
}
features = tf.io.parse_single_example(serialized=serialized_example, features=features)
Output = features['Output']
return Output
def dataset_generator():
trRecordDataset = tf.data.TFRecordDataset("Output.tf_record")
trRecordDataset = trRecordDataset.map(parse_function, num_parallel_calls = tf.data.experimental.AUTOTUNE)
return trRecordDataset
if __name__ == '__main__':
write_date_tfrecord()
generator = dataset_generator()
for Output in generator:
print(Output)
Output -
2.2.0
Writing Output - [20191221, 20191222, 20191223, 20191224, 20191225, 20191226, 20191227, 20191228, 20191229, 20191230]
tf.Tensor(
[20191220. 20191222. 20191224. 20191224. 20191224. 20191226. 20191228.
20191228. 20191228. 20191230.], shape=(10,), dtype=float32)
Hope this answers your question. Happy Learning.
If you love us? You can donate to us via Paypal or buy me a coffee so we can maintain and grow! Thank you!
Donate Us With