Home | History | Annotate | Download | only in kernel_tests
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 """Tests for tensorflow.ops.parsing_ops."""
     16 
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import copy
     22 import itertools
     23 
     24 import numpy as np
     25 
     26 from google.protobuf import json_format
     27 
     28 from tensorflow.core.example import example_pb2
     29 from tensorflow.core.example import feature_pb2
     30 from tensorflow.python.framework import constant_op
     31 from tensorflow.python.framework import dtypes
     32 from tensorflow.python.framework import errors_impl
     33 from tensorflow.python.framework import ops
     34 from tensorflow.python.framework import sparse_tensor
     35 from tensorflow.python.framework import tensor_shape
     36 from tensorflow.python.framework import tensor_util
     37 from tensorflow.python.ops import array_ops
     38 from tensorflow.python.ops import parsing_ops
     39 from tensorflow.python.platform import test
     40 from tensorflow.python.platform import tf_logging
     41 
     42 # Helpers for creating Example objects
     43 example = example_pb2.Example
     44 feature = feature_pb2.Feature
     45 features = lambda d: feature_pb2.Features(feature=d)
     46 bytes_feature = lambda v: feature(bytes_list=feature_pb2.BytesList(value=v))
     47 int64_feature = lambda v: feature(int64_list=feature_pb2.Int64List(value=v))
     48 float_feature = lambda v: feature(float_list=feature_pb2.FloatList(value=v))
     49 # Helpers for creating SequenceExample objects
     50 feature_list = lambda l: feature_pb2.FeatureList(feature=l)
     51 feature_lists = lambda d: feature_pb2.FeatureLists(feature_list=d)
     52 sequence_example = example_pb2.SequenceExample
     53 
     54 
     55 def flatten(list_of_lists):
     56   """Flatten one level of nesting."""
     57   return itertools.chain.from_iterable(list_of_lists)
     58 
     59 
     60 def flatten_values_tensors_or_sparse(tensors_list):
     61   """Flatten each SparseTensor object into 3 Tensors for session.run()."""
     62   return list(
     63       flatten([[v.indices, v.values, v.dense_shape] if isinstance(
     64           v, sparse_tensor.SparseTensor) else [v] for v in tensors_list]))
     65 
     66 
     67 def _compare_output_to_expected(tester, dict_tensors, expected_tensors,
     68                                 flat_output):
     69   tester.assertEqual(set(dict_tensors.keys()), set(expected_tensors.keys()))
     70 
     71   i = 0  # Index into the flattened output of session.run()
     72   for k, v in dict_tensors.items():
     73     expected_v = expected_tensors[k]
     74     tf_logging.info("Comparing key: %s", k)
     75     if isinstance(v, sparse_tensor.SparseTensor):
     76       # Three outputs for SparseTensor : indices, values, shape.
     77       tester.assertEqual([k, len(expected_v)], [k, 3])
     78       tester.assertAllEqual(expected_v[0], flat_output[i])
     79       tester.assertAllEqual(expected_v[1], flat_output[i + 1])
     80       tester.assertAllEqual(expected_v[2], flat_output[i + 2])
     81       i += 3
     82     else:
     83       # One output for standard Tensor.
     84       tester.assertAllEqual(expected_v, flat_output[i])
     85       i += 1
     86 
     87 
     88 class ParseExampleTest(test.TestCase):
     89 
     90   def _test(self, kwargs, expected_values=None, expected_err=None):
     91     with self.test_session() as sess:
     92       if expected_err:
     93         with self.assertRaisesWithPredicateMatch(expected_err[0],
     94                                                  expected_err[1]):
     95           out = parsing_ops.parse_example(**kwargs)
     96           sess.run(flatten_values_tensors_or_sparse(out.values()))
     97         return
     98       else:
     99         # Returns dict w/ Tensors and SparseTensors.
    100         out = parsing_ops.parse_example(**kwargs)
    101         result = flatten_values_tensors_or_sparse(out.values())
    102         # Check values.
    103         tf_result = sess.run(result)
    104         _compare_output_to_expected(self, out, expected_values, tf_result)
    105 
    106       # Check shapes; if serialized is a Tensor we need its size to
    107       # properly check.
    108       serialized = kwargs["serialized"]
    109       batch_size = (serialized.eval().size if isinstance(serialized, ops.Tensor)
    110                     else np.asarray(serialized).size)
    111       for k, f in kwargs["features"].items():
    112         if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
    113           self.assertEqual(
    114               tuple(out[k].get_shape().as_list()), (batch_size,) + f.shape)
    115         elif isinstance(f, parsing_ops.VarLenFeature):
    116           self.assertEqual(
    117               tuple(out[k].indices.get_shape().as_list()), (None, 2))
    118           self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
    119           self.assertEqual(
    120               tuple(out[k].dense_shape.get_shape().as_list()), (2,))
    121 
    122   def testEmptySerializedWithAllDefaults(self):
    123     sparse_name = "st_a"
    124     a_name = "a"
    125     b_name = "b"
    126     c_name = "c:has_a_tricky_name"
    127     a_default = [0, 42, 0]
    128     b_default = np.random.rand(3, 3).astype(bytes)
    129     c_default = np.random.rand(2).astype(np.float32)
    130 
    131     expected_st_a = (  # indices, values, shape
    132         np.empty(
    133             (0, 2), dtype=np.int64),  # indices
    134         np.empty(
    135             (0,), dtype=np.int64),  # sp_a is DT_INT64
    136         np.array(
    137             [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
    138 
    139     expected_output = {
    140         sparse_name: expected_st_a,
    141         a_name: np.array(2 * [[a_default]]),
    142         b_name: np.array(2 * [b_default]),
    143         c_name: np.array(2 * [c_default]),
    144     }
    145 
    146     self._test(
    147         {
    148             "example_names":
    149                 np.empty(
    150                     (0,), dtype=bytes),
    151             "serialized":
    152                 ops.convert_to_tensor(["", ""]),
    153             "features": {
    154                 sparse_name:
    155                     parsing_ops.VarLenFeature(dtypes.int64),
    156                 a_name:
    157                     parsing_ops.FixedLenFeature(
    158                         (1, 3), dtypes.int64, default_value=a_default),
    159                 b_name:
    160                     parsing_ops.FixedLenFeature(
    161                         (3, 3), dtypes.string, default_value=b_default),
    162                 c_name:
    163                     parsing_ops.FixedLenFeature(
    164                         (2,), dtypes.float32, default_value=c_default),
    165             }
    166         },
    167         expected_output)
    168 
    169   def testEmptySerializedWithoutDefaultsShouldFail(self):
    170     input_features = {
    171         "st_a":
    172             parsing_ops.VarLenFeature(dtypes.int64),
    173         "a":
    174             parsing_ops.FixedLenFeature(
    175                 (1, 3), dtypes.int64, default_value=[0, 42, 0]),
    176         "b":
    177             parsing_ops.FixedLenFeature(
    178                 (3, 3),
    179                 dtypes.string,
    180                 default_value=np.random.rand(3, 3).astype(bytes)),
    181         # Feature "c" is missing a default, this gap will cause failure.
    182         "c":
    183             parsing_ops.FixedLenFeature(
    184                 (2,), dtype=dtypes.float32),
    185     }
    186 
    187     # Edge case where the key is there but the feature value is empty
    188     original = example(features=features({"c": feature()}))
    189     self._test(
    190         {
    191             "example_names": ["in1"],
    192             "serialized": [original.SerializeToString()],
    193             "features": input_features,
    194         },
    195         expected_err=(
    196             errors_impl.OpError,
    197             "Name: in1, Feature: c \\(data type: float\\) is required"))
    198 
    199     # Standard case of missing key and value.
    200     self._test(
    201         {
    202             "example_names": ["in1", "in2"],
    203             "serialized": ["", ""],
    204             "features": input_features,
    205         },
    206         expected_err=(
    207             errors_impl.OpError,
    208             "Name: in1, Feature: c \\(data type: float\\) is required"))
    209 
    210   def testDenseNotMatchingShapeShouldFail(self):
    211     original = [
    212         example(features=features({
    213             "a": float_feature([1, 1, 3]),
    214         })), example(features=features({
    215             "a": float_feature([-1, -1]),
    216         }))
    217     ]
    218 
    219     names = ["passing", "failing"]
    220     serialized = [m.SerializeToString() for m in original]
    221 
    222     self._test(
    223         {
    224             "example_names": names,
    225             "serialized": ops.convert_to_tensor(serialized),
    226             "features": {
    227                 "a": parsing_ops.FixedLenFeature((1, 3), dtypes.float32)
    228             }
    229         },
    230         expected_err=(errors_impl.OpError,
    231                       "Name: failing, Key: a, Index: 1.  Number of float val"))
    232 
    233   def testDenseDefaultNoShapeShouldFail(self):
    234     original = [example(features=features({"a": float_feature([1, 1, 3]),})),]
    235 
    236     serialized = [m.SerializeToString() for m in original]
    237 
    238     self._test(
    239         {
    240             "example_names": ["failing"],
    241             "serialized": ops.convert_to_tensor(serialized),
    242             "features": {
    243                 "a": parsing_ops.FixedLenFeature(None, dtypes.float32)
    244             }
    245         },
    246         expected_err=(ValueError, "Missing shape for feature a"))
    247 
    248   def testSerializedContainingSparse(self):
    249     original = [
    250         example(features=features({
    251             "st_c": float_feature([3, 4])
    252         })),
    253         example(features=features({
    254             "st_c": float_feature([]),  # empty float list
    255         })),
    256         example(features=features({
    257             "st_d": feature(),  # feature with nothing in it
    258         })),
    259         example(features=features({
    260             "st_c": float_feature([1, 2, -1]),
    261             "st_d": bytes_feature([b"hi"])
    262         }))
    263     ]
    264 
    265     serialized = [m.SerializeToString() for m in original]
    266 
    267     expected_st_c = (  # indices, values, shape
    268         np.array(
    269             [[0, 0], [0, 1], [3, 0], [3, 1], [3, 2]], dtype=np.int64), np.array(
    270                 [3.0, 4.0, 1.0, 2.0, -1.0], dtype=np.float32), np.array(
    271                     [4, 3], dtype=np.int64))  # batch == 2, max_elems = 3
    272 
    273     expected_st_d = (  # indices, values, shape
    274         np.array(
    275             [[3, 0]], dtype=np.int64), np.array(
    276                 ["hi"], dtype=bytes), np.array(
    277                     [4, 1], dtype=np.int64))  # batch == 2, max_elems = 1
    278 
    279     expected_output = {
    280         "st_c": expected_st_c,
    281         "st_d": expected_st_d,
    282     }
    283 
    284     self._test({
    285         "serialized": ops.convert_to_tensor(serialized),
    286         "features": {
    287             "st_c": parsing_ops.VarLenFeature(dtypes.float32),
    288             "st_d": parsing_ops.VarLenFeature(dtypes.string)
    289         }
    290     }, expected_output)
    291 
    292   def testSerializedContainingSparseFeature(self):
    293     original = [
    294         example(features=features({
    295             "val": float_feature([3, 4]),
    296             "idx": int64_feature([5, 10])
    297         })),
    298         example(features=features({
    299             "val": float_feature([]),  # empty float list
    300             "idx": int64_feature([])
    301         })),
    302         example(features=features({
    303             "val": feature(),  # feature with nothing in it
    304             # missing idx feature
    305         })),
    306         example(features=features({
    307             "val": float_feature([1, 2, -1]),
    308             "idx":
    309                 int64_feature([0, 9, 3])  # unsorted
    310         }))
    311     ]
    312 
    313     serialized = [m.SerializeToString() for m in original]
    314 
    315     expected_sp = (  # indices, values, shape
    316         np.array(
    317             [[0, 5], [0, 10], [3, 0], [3, 3], [3, 9]], dtype=np.int64),
    318         np.array(
    319             [3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32), np.array(
    320                 [4, 13], dtype=np.int64))  # batch == 4, max_elems = 13
    321 
    322     expected_output = {"sp": expected_sp,}
    323 
    324     self._test({
    325         "serialized": ops.convert_to_tensor(serialized),
    326         "features": {
    327             "sp": parsing_ops.SparseFeature(
    328                 ["idx"], "val", dtypes.float32, [13])
    329         }
    330     }, expected_output)
    331 
    332   def testSerializedContainingSparseFeatureReuse(self):
    333     original = [
    334         example(features=features({
    335             "val1": float_feature([3, 4]),
    336             "val2": float_feature([5, 6]),
    337             "idx": int64_feature([5, 10])
    338         })),
    339         example(features=features({
    340             "val1": float_feature([]),  # empty float list
    341             "idx": int64_feature([])
    342         })),
    343     ]
    344 
    345     serialized = [m.SerializeToString() for m in original]
    346 
    347     expected_sp1 = (  # indices, values, shape
    348         np.array(
    349             [[0, 5], [0, 10]], dtype=np.int64), np.array(
    350                 [3.0, 4.0], dtype=np.float32), np.array(
    351                     [2, 13], dtype=np.int64))  # batch == 2, max_elems = 13
    352 
    353     expected_sp2 = (  # indices, values, shape
    354         np.array(
    355             [[0, 5], [0, 10]], dtype=np.int64), np.array(
    356                 [5.0, 6.0], dtype=np.float32), np.array(
    357                     [2, 7], dtype=np.int64))  # batch == 2, max_elems = 13
    358 
    359     expected_output = {
    360         "sp1": expected_sp1,
    361         "sp2": expected_sp2,
    362     }
    363 
    364     self._test({
    365         "serialized": ops.convert_to_tensor(serialized),
    366         "features": {
    367             "sp1":
    368                 parsing_ops.SparseFeature("idx", "val1", dtypes.float32, 13),
    369             "sp2":
    370                 parsing_ops.SparseFeature(
    371                     "idx", "val2", dtypes.float32, size=7, already_sorted=True)
    372         }
    373     }, expected_output)
    374 
    375   def testSerializedContaining3DSparseFeature(self):
    376     original = [
    377         example(features=features({
    378             "val": float_feature([3, 4]),
    379             "idx0": int64_feature([5, 10]),
    380             "idx1": int64_feature([0, 2]),
    381         })),
    382         example(features=features({
    383             "val": float_feature([]),  # empty float list
    384             "idx0": int64_feature([]),
    385             "idx1": int64_feature([]),
    386         })),
    387         example(features=features({
    388             "val": feature(),  # feature with nothing in it
    389             # missing idx feature
    390         })),
    391         example(features=features({
    392             "val": float_feature([1, 2, -1]),
    393             "idx0": int64_feature([0, 9, 3]),  # unsorted
    394             "idx1": int64_feature([1, 0, 2]),
    395         }))
    396     ]
    397 
    398     serialized = [m.SerializeToString() for m in original]
    399 
    400     expected_sp = (
    401         # indices
    402         np.array(
    403             [[0, 5, 0], [0, 10, 2], [3, 0, 1], [3, 3, 2], [3, 9, 0]],
    404             dtype=np.int64),
    405         # values
    406         np.array([3.0, 4.0, 1.0, -1.0, 2.0], dtype=np.float32),
    407         # shape batch == 4, max_elems = 13
    408         np.array([4, 13, 3], dtype=np.int64))
    409 
    410     expected_output = {"sp": expected_sp,}
    411 
    412     self._test({
    413         "serialized": ops.convert_to_tensor(serialized),
    414         "features": {
    415             "sp": parsing_ops.SparseFeature(
    416                 ["idx0", "idx1"], "val", dtypes.float32, [13, 3])
    417         }
    418     }, expected_output)
    419 
    420   def testSerializedContainingDense(self):
    421     aname = "a"
    422     bname = "b*has+a:tricky_name"
    423     original = [
    424         example(features=features({
    425             aname: float_feature([1, 1]),
    426             bname: bytes_feature([b"b0_str"]),
    427         })), example(features=features({
    428             aname: float_feature([-1, -1]),
    429             bname: bytes_feature([b""]),
    430         }))
    431     ]
    432 
    433     serialized = [m.SerializeToString() for m in original]
    434 
    435     expected_output = {
    436         aname:
    437             np.array(
    438                 [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
    439         bname:
    440             np.array(
    441                 ["b0_str", ""], dtype=bytes).reshape(2, 1, 1, 1, 1),
    442     }
    443 
    444     # No defaults, values required
    445     self._test(
    446         {
    447             "serialized":
    448                 ops.convert_to_tensor(serialized),
    449             "features": {
    450                 aname:
    451                     parsing_ops.FixedLenFeature(
    452                         (1, 2, 1), dtype=dtypes.float32),
    453                 bname:
    454                     parsing_ops.FixedLenFeature(
    455                         (1, 1, 1, 1), dtype=dtypes.string),
    456             }
    457         },
    458         expected_output)
    459 
    460   # This test is identical as the previous one except
    461   # for the creation of 'serialized'.
    462   def testSerializedContainingDenseWithConcat(self):
    463     aname = "a"
    464     bname = "b*has+a:tricky_name"
    465     # TODO(lew): Feature appearing twice should be an error in future.
    466     original = [
    467         (example(features=features({
    468             aname: float_feature([10, 10]),
    469         })), example(features=features({
    470             aname: float_feature([1, 1]),
    471             bname: bytes_feature([b"b0_str"]),
    472         }))),
    473         (
    474             example(features=features({
    475                 bname: bytes_feature([b"b100"]),
    476             })),
    477             example(features=features({
    478                 aname: float_feature([-1, -1]),
    479                 bname: bytes_feature([b"b1"]),
    480             })),),
    481     ]
    482 
    483     serialized = [
    484         m.SerializeToString() + n.SerializeToString() for (m, n) in original
    485     ]
    486 
    487     expected_output = {
    488         aname:
    489             np.array(
    490                 [[1, 1], [-1, -1]], dtype=np.float32).reshape(2, 1, 2, 1),
    491         bname:
    492             np.array(
    493                 ["b0_str", "b1"], dtype=bytes).reshape(2, 1, 1, 1, 1),
    494     }
    495 
    496     # No defaults, values required
    497     self._test(
    498         {
    499             "serialized":
    500                 ops.convert_to_tensor(serialized),
    501             "features": {
    502                 aname:
    503                     parsing_ops.FixedLenFeature(
    504                         (1, 2, 1), dtype=dtypes.float32),
    505                 bname:
    506                     parsing_ops.FixedLenFeature(
    507                         (1, 1, 1, 1), dtype=dtypes.string),
    508             }
    509         },
    510         expected_output)
    511 
    512   def testSerializedContainingDenseScalar(self):
    513     original = [
    514         example(features=features({
    515             "a": float_feature([1]),
    516         })), example(features=features({}))
    517     ]
    518 
    519     serialized = [m.SerializeToString() for m in original]
    520 
    521     expected_output = {
    522         "a":
    523             np.array(
    524                 [[1], [-1]], dtype=np.float32)  # 2x1 (column vector)
    525     }
    526 
    527     self._test(
    528         {
    529             "serialized":
    530                 ops.convert_to_tensor(serialized),
    531             "features": {
    532                 "a":
    533                     parsing_ops.FixedLenFeature(
    534                         (1,), dtype=dtypes.float32, default_value=-1),
    535             }
    536         },
    537         expected_output)
    538 
    539   def testSerializedContainingDenseWithDefaults(self):
    540     original = [
    541         example(features=features({
    542             "a": float_feature([1, 1]),
    543         })),
    544         example(features=features({
    545             "b": bytes_feature([b"b1"]),
    546         })),
    547         example(features=features({
    548             "b": feature()
    549         })),
    550     ]
    551 
    552     serialized = [m.SerializeToString() for m in original]
    553 
    554     expected_output = {
    555         "a":
    556             np.array(
    557                 [[1, 1], [3, -3], [3, -3]], dtype=np.float32).reshape(3, 1, 2,
    558                                                                       1),
    559         "b":
    560             np.array(
    561                 ["tmp_str", "b1", "tmp_str"], dtype=bytes).reshape(3, 1, 1, 1,
    562                                                                    1),
    563     }
    564 
    565     self._test(
    566         {
    567             "serialized":
    568                 ops.convert_to_tensor(serialized),
    569             "features": {
    570                 "a":
    571                     parsing_ops.FixedLenFeature(
    572                         (1, 2, 1),
    573                         dtype=dtypes.float32,
    574                         default_value=[3.0, -3.0]),
    575                 "b":
    576                     parsing_ops.FixedLenFeature(
    577                         (1, 1, 1, 1),
    578                         dtype=dtypes.string,
    579                         default_value="tmp_str"),
    580             }
    581         },
    582         expected_output)
    583 
    584   def testSerializedContainingSparseAndSparseFeatureAndDenseWithNoDefault(self):
    585     expected_st_a = (  # indices, values, shape
    586         np.empty(
    587             (0, 2), dtype=np.int64),  # indices
    588         np.empty(
    589             (0,), dtype=np.int64),  # sp_a is DT_INT64
    590         np.array(
    591             [2, 0], dtype=np.int64))  # batch == 2, max_elems = 0
    592     expected_sp = (  # indices, values, shape
    593         np.array(
    594             [[0, 0], [0, 3], [1, 7]], dtype=np.int64), np.array(
    595                 ["a", "b", "c"], dtype="|S"), np.array(
    596                     [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13
    597 
    598     original = [
    599         example(features=features({
    600             "c": float_feature([3, 4]),
    601             "val": bytes_feature([b"a", b"b"]),
    602             "idx": int64_feature([0, 3])
    603         })), example(features=features({
    604             "c": float_feature([1, 2]),
    605             "val": bytes_feature([b"c"]),
    606             "idx": int64_feature([7])
    607         }))
    608     ]
    609 
    610     names = ["in1", "in2"]
    611     serialized = [m.SerializeToString() for m in original]
    612 
    613     a_default = [1, 2, 3]
    614     b_default = np.random.rand(3, 3).astype(bytes)
    615     expected_output = {
    616         "st_a": expected_st_a,
    617         "sp": expected_sp,
    618         "a": np.array(2 * [[a_default]]),
    619         "b": np.array(2 * [b_default]),
    620         "c": np.array(
    621             [[3, 4], [1, 2]], dtype=np.float32),
    622     }
    623 
    624     self._test(
    625         {
    626             "example_names":
    627                 names,
    628             "serialized":
    629                 ops.convert_to_tensor(serialized),
    630             "features": {
    631                 "st_a":
    632                     parsing_ops.VarLenFeature(dtypes.int64),
    633                 "sp":
    634                     parsing_ops.SparseFeature("idx", "val", dtypes.string, 13),
    635                 "a":
    636                     parsing_ops.FixedLenFeature(
    637                         (1, 3), dtypes.int64, default_value=a_default),
    638                 "b":
    639                     parsing_ops.FixedLenFeature(
    640                         (3, 3), dtypes.string, default_value=b_default),
    641                 # Feature "c" must be provided, since it has no default_value.
    642                 "c":
    643                     parsing_ops.FixedLenFeature((2,), dtypes.float32),
    644             }
    645         },
    646         expected_output)
    647 
    648   def testSerializedContainingSparseAndSparseFeatureWithReuse(self):
    649     expected_idx = (  # indices, values, shape
    650         np.array(
    651             [[0, 0], [0, 1], [1, 0], [1, 1]], dtype=np.int64),
    652         np.array([0, 3, 7, 1]), np.array(
    653             [2, 2], dtype=np.int64))  # batch == 4, max_elems = 2
    654 
    655     expected_sp = (  # indices, values, shape
    656         np.array(
    657             [[0, 0], [0, 3], [1, 1], [1, 7]], dtype=np.int64), np.array(
    658                 ["a", "b", "d", "c"], dtype="|S"), np.array(
    659                     [2, 13], dtype=np.int64))  # batch == 4, max_elems = 13
    660 
    661     original = [
    662         example(features=features({
    663             "val": bytes_feature([b"a", b"b"]),
    664             "idx": int64_feature([0, 3])
    665         })), example(features=features({
    666             "val": bytes_feature([b"c", b"d"]),
    667             "idx": int64_feature([7, 1])
    668         }))
    669     ]
    670 
    671     names = ["in1", "in2"]
    672     serialized = [m.SerializeToString() for m in original]
    673 
    674     expected_output = {
    675         "idx": expected_idx,
    676         "sp": expected_sp,
    677     }
    678 
    679     self._test({
    680         "example_names": names,
    681         "serialized": ops.convert_to_tensor(serialized),
    682         "features": {
    683             "idx": parsing_ops.VarLenFeature(dtypes.int64),
    684             "sp": parsing_ops.SparseFeature(
    685                 ["idx"], "val", dtypes.string, [13]),
    686         }
    687     }, expected_output)
    688 
    689   def _testSerializedContainingVarLenDenseLargerBatch(self, batch_size):
    690     # During parsing, data read from the serialized proto is stored in buffers.
    691     # For small batch sizes, a buffer will contain one minibatch entry.
    692     # For larger batch sizes, a buffer may contain several minibatch
    693     # entries.  This test identified a bug where the code that copied
    694     # data out of the buffers and into the output tensors assumed each
    695     # buffer only contained one minibatch entry.  The bug has since been fixed.
    696     truth_int = [i for i in range(batch_size)]
    697     truth_str = [[("foo%d" % i).encode(), ("bar%d" % i).encode()]
    698                  for i in range(batch_size)]
    699 
    700     expected_str = copy.deepcopy(truth_str)
    701 
    702     # Delete some intermediate entries
    703     for i in range(batch_size):
    704       col = 1
    705       if np.random.rand() < 0.25:
    706         # w.p. 25%, drop out the second entry
    707         expected_str[i][col] = b"default"
    708         col -= 1
    709         truth_str[i].pop()
    710       if np.random.rand() < 0.25:
    711         # w.p. 25%, drop out the second entry (possibly again)
    712         expected_str[i][col] = b"default"
    713         truth_str[i].pop()
    714 
    715     expected_output = {
    716         # Batch size batch_size, 1 time step.
    717         "a": np.array(truth_int, dtype=np.int64).reshape(batch_size, 1),
    718         # Batch size batch_size, 2 time steps.
    719         "b": np.array(expected_str, dtype="|S").reshape(batch_size, 2),
    720     }
    721 
    722     original = [
    723         example(features=features(
    724             {"a": int64_feature([truth_int[i]]),
    725              "b": bytes_feature(truth_str[i])}))
    726         for i in range(batch_size)
    727     ]
    728 
    729     serialized = [m.SerializeToString() for m in original]
    730 
    731     self._test({
    732         "serialized": ops.convert_to_tensor(serialized, dtype=dtypes.string),
    733         "features": {
    734             "a": parsing_ops.FixedLenSequenceFeature(
    735                 shape=(), dtype=dtypes.int64, allow_missing=True,
    736                 default_value=-1),
    737             "b": parsing_ops.FixedLenSequenceFeature(
    738                 shape=[], dtype=dtypes.string, allow_missing=True,
    739                 default_value="default"),
    740         }
    741     }, expected_output)
    742 
    743   def testSerializedContainingVarLenDenseLargerBatch(self):
    744     np.random.seed(3456)
    745     for batch_size in (1, 10, 20, 100, 256):
    746       self._testSerializedContainingVarLenDenseLargerBatch(batch_size)
    747 
    748   def testSerializedContainingVarLenDense(self):
    749     aname = "a"
    750     bname = "b"
    751     cname = "c"
    752     dname = "d"
    753     example_names = ["in1", "in2", "in3", "in4"]
    754     original = [
    755         example(features=features({
    756             cname: int64_feature([2]),
    757         })),
    758         example(features=features({
    759             aname: float_feature([1, 1]),
    760             bname: bytes_feature([b"b0_str", b"b1_str"]),
    761         })),
    762         example(features=features({
    763             aname: float_feature([-1, -1, 2, 2]),
    764             bname: bytes_feature([b"b1"]),
    765         })),
    766         example(features=features({
    767             aname: float_feature([]),
    768             cname: int64_feature([3]),
    769         })),
    770     ]
    771 
    772     serialized = [m.SerializeToString() for m in original]
    773 
    774     expected_output = {
    775         aname:
    776             np.array(
    777                 [
    778                     [0, 0, 0, 0],
    779                     [1, 1, 0, 0],
    780                     [-1, -1, 2, 2],
    781                     [0, 0, 0, 0],
    782                 ],
    783                 dtype=np.float32).reshape(4, 2, 2, 1),
    784         bname:
    785             np.array(
    786                 [["", ""], ["b0_str", "b1_str"], ["b1", ""], ["", ""]],
    787                 dtype=bytes).reshape(4, 2, 1, 1, 1),
    788         cname:
    789             np.array([2, 0, 0, 3], dtype=np.int64).reshape(4, 1),
    790         dname:
    791             np.empty(shape=(4, 0), dtype=bytes),
    792     }
    793 
    794     self._test({
    795         "example_names": example_names,
    796         "serialized": ops.convert_to_tensor(serialized),
    797         "features": {
    798             aname:
    799                 parsing_ops.FixedLenSequenceFeature(
    800                     (2, 1), dtype=dtypes.float32, allow_missing=True),
    801             bname:
    802                 parsing_ops.FixedLenSequenceFeature(
    803                     (1, 1, 1), dtype=dtypes.string, allow_missing=True),
    804             cname:
    805                 parsing_ops.FixedLenSequenceFeature(
    806                     shape=[], dtype=dtypes.int64, allow_missing=True),
    807             dname:
    808                 parsing_ops.FixedLenSequenceFeature(
    809                     shape=[], dtype=dtypes.string, allow_missing=True),
    810         }
    811     }, expected_output)
    812 
    813     # Test with padding values.
    814     expected_output_custom_padding = dict(expected_output)
    815     expected_output_custom_padding[aname] = np.array(
    816         [
    817             [-2, -2, -2, -2],
    818             [1, 1, -2, -2],
    819             [-1, -1, 2, 2],
    820             [-2, -2, -2, -2],
    821         ],
    822         dtype=np.float32).reshape(4, 2, 2, 1)
    823 
    824     self._test({
    825         "example_names": example_names,
    826         "serialized": ops.convert_to_tensor(serialized),
    827         "features": {
    828             aname:
    829                 parsing_ops.FixedLenSequenceFeature(
    830                     (2, 1), dtype=dtypes.float32, allow_missing=True,
    831                     default_value=-2.0),
    832             bname:
    833                 parsing_ops.FixedLenSequenceFeature(
    834                     (1, 1, 1), dtype=dtypes.string, allow_missing=True),
    835             cname:
    836                 parsing_ops.FixedLenSequenceFeature(
    837                     shape=[], dtype=dtypes.int64, allow_missing=True),
    838             dname:
    839                 parsing_ops.FixedLenSequenceFeature(
    840                     shape=[], dtype=dtypes.string, allow_missing=True),
    841         }
    842     }, expected_output_custom_padding)
    843 
    844     # Change number of required values so the inputs are not a
    845     # multiple of this size.
    846     self._test(
    847         {
    848             "example_names": example_names,
    849             "serialized": ops.convert_to_tensor(serialized),
    850             "features": {
    851                 aname:
    852                     parsing_ops.FixedLenSequenceFeature(
    853                         (2, 1), dtype=dtypes.float32, allow_missing=True),
    854                 bname:
    855                     parsing_ops.FixedLenSequenceFeature(
    856                         (2, 1, 1), dtype=dtypes.string, allow_missing=True),
    857             }
    858         },
    859         expected_err=(
    860             errors_impl.OpError, "Name: in3, Key: b, Index: 2.  "
    861             "Number of bytes values is not a multiple of stride length."))
    862 
    863     self._test(
    864         {
    865             "example_names": example_names,
    866             "serialized": ops.convert_to_tensor(serialized),
    867             "features": {
    868                 aname:
    869                     parsing_ops.FixedLenSequenceFeature(
    870                         (2, 1), dtype=dtypes.float32, allow_missing=True,
    871                         default_value=[]),
    872                 bname:
    873                     parsing_ops.FixedLenSequenceFeature(
    874                         (2, 1, 1), dtype=dtypes.string, allow_missing=True),
    875             }
    876         },
    877         expected_err=(ValueError,
    878                       "Cannot reshape a tensor with 0 elements to shape"))
    879 
    880     self._test(
    881         {
    882             "example_names": example_names,
    883             "serialized": ops.convert_to_tensor(serialized),
    884             "features": {
    885                 aname:
    886                     parsing_ops.FixedLenFeature(
    887                         (None, 2, 1), dtype=dtypes.float32),
    888                 bname:
    889                     parsing_ops.FixedLenSequenceFeature(
    890                         (2, 1, 1), dtype=dtypes.string, allow_missing=True),
    891             }
    892         },
    893         expected_err=(ValueError,
    894                       "First dimension of shape for feature a unknown. "
    895                       "Consider using FixedLenSequenceFeature."))
    896 
    897     self._test(
    898         {
    899             "example_names": example_names,
    900             "serialized": ops.convert_to_tensor(serialized),
    901             "features": {
    902                 cname:
    903                     parsing_ops.FixedLenFeature(
    904                         (1, None), dtype=dtypes.int64, default_value=[[1]]),
    905             }
    906         },
    907         expected_err=(ValueError,
    908                       "All dimensions of shape for feature c need to be known "
    909                       r"but received \(1, None\)."))
    910 
    911     self._test({
    912         "example_names": example_names,
    913         "serialized": ops.convert_to_tensor(serialized),
    914         "features": {
    915             aname:
    916                 parsing_ops.FixedLenSequenceFeature(
    917                     (2, 1), dtype=dtypes.float32, allow_missing=True),
    918             bname:
    919                 parsing_ops.FixedLenSequenceFeature(
    920                     (1, 1, 1), dtype=dtypes.string, allow_missing=True),
    921             cname:
    922                 parsing_ops.FixedLenSequenceFeature(
    923                     shape=[], dtype=dtypes.int64, allow_missing=False),
    924             dname:
    925                 parsing_ops.FixedLenSequenceFeature(
    926                     shape=[], dtype=dtypes.string, allow_missing=True),
    927         }
    928     }, expected_err=(ValueError,
    929                      "Unsupported: FixedLenSequenceFeature requires "
    930                      "allow_missing to be True."))
    931 
    932 
    933 class ParseSingleExampleTest(test.TestCase):
    934 
    935   def _test(self, kwargs, expected_values=None, expected_err=None):
    936     with self.test_session() as sess:
    937       if expected_err:
    938         with self.assertRaisesWithPredicateMatch(expected_err[0],
    939                                                  expected_err[1]):
    940           out = parsing_ops.parse_single_example(**kwargs)
    941           sess.run(flatten_values_tensors_or_sparse(out.values()))
    942       else:
    943         # Returns dict w/ Tensors and SparseTensors.
    944         out = parsing_ops.parse_single_example(**kwargs)
    945         # Check values.
    946         tf_result = sess.run(flatten_values_tensors_or_sparse(out.values()))
    947         _compare_output_to_expected(self, out, expected_values, tf_result)
    948 
    949       # Check shapes.
    950       for k, f in kwargs["features"].items():
    951         if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
    952           self.assertEqual(tuple(out[k].get_shape()),
    953                            tensor_shape.as_shape(f.shape))
    954         elif isinstance(f, parsing_ops.VarLenFeature):
    955           self.assertEqual(
    956               tuple(out[k].indices.get_shape().as_list()), (None, 1))
    957           self.assertEqual(tuple(out[k].values.get_shape().as_list()), (None,))
    958           self.assertEqual(
    959               tuple(out[k].dense_shape.get_shape().as_list()), (1,))
    960 
    961   def testSingleExampleWithSparseAndSparseFeatureAndDense(self):
    962     original = example(features=features({
    963         "c": float_feature([3, 4]),
    964         "d": float_feature([0.0, 1.0]),
    965         "val": bytes_feature([b"a", b"b"]),
    966         "idx": int64_feature([0, 3]),
    967         "st_a": float_feature([3.0, 4.0])
    968     }))
    969 
    970     serialized = original.SerializeToString()
    971 
    972     expected_st_a = (
    973         np.array(
    974             [[0], [1]], dtype=np.int64),  # indices
    975         np.array(
    976             [3.0, 4.0], dtype=np.float32),  # values
    977         np.array(
    978             [2], dtype=np.int64))  # shape: max_values = 2
    979 
    980     expected_sp = (  # indices, values, shape
    981         np.array(
    982             [[0], [3]], dtype=np.int64), np.array(
    983                 ["a", "b"], dtype="|S"), np.array(
    984                     [13], dtype=np.int64))  # max_values = 13
    985 
    986     a_default = [1, 2, 3]
    987     b_default = np.random.rand(3, 3).astype(bytes)
    988     expected_output = {
    989         "st_a": expected_st_a,
    990         "sp": expected_sp,
    991         "a": [a_default],
    992         "b": b_default,
    993         "c": np.array([3, 4], dtype=np.float32),
    994         "d": np.array([0.0, 1.0], dtype=np.float32),
    995     }
    996 
    997     self._test(
    998         {
    999             "example_names":
   1000                 ops.convert_to_tensor("in1"),
   1001             "serialized":
   1002                 ops.convert_to_tensor(serialized),
   1003             "features": {
   1004                 "st_a":
   1005                     parsing_ops.VarLenFeature(dtypes.float32),
   1006                 "sp":
   1007                     parsing_ops.SparseFeature(
   1008                         ["idx"], "val", dtypes.string, [13]),
   1009                 "a":
   1010                     parsing_ops.FixedLenFeature(
   1011                         (1, 3), dtypes.int64, default_value=a_default),
   1012                 "b":
   1013                     parsing_ops.FixedLenFeature(
   1014                         (3, 3), dtypes.string, default_value=b_default),
   1015                 # Feature "c" must be provided, since it has no default_value.
   1016                 "c":
   1017                     parsing_ops.FixedLenFeature(2, dtypes.float32),
   1018                 "d":
   1019                     parsing_ops.FixedLenSequenceFeature([],
   1020                                                         dtypes.float32,
   1021                                                         allow_missing=True)
   1022             }
   1023         },
   1024         expected_output)
   1025 
   1026 
   1027 class ParseSequenceExampleTest(test.TestCase):
   1028 
   1029   def testCreateSequenceExample(self):
   1030     value = sequence_example(
   1031         context=features({
   1032             "global_feature": float_feature([1, 2, 3]),
   1033         }),
   1034         feature_lists=feature_lists({
   1035             "repeated_feature_2_frames":
   1036                 feature_list([
   1037                     bytes_feature([b"a", b"b", b"c"]),
   1038                     bytes_feature([b"a", b"d", b"e"])
   1039                 ]),
   1040             "repeated_feature_3_frames":
   1041                 feature_list([
   1042                     int64_feature([3, 4, 5, 6, 7]),
   1043                     int64_feature([-1, 0, 0, 0, 0]),
   1044                     int64_feature([1, 2, 3, 4, 5])
   1045                 ])
   1046         }))
   1047     value.SerializeToString()  # Smoke test
   1048 
   1049   def _test(self,
   1050             kwargs,
   1051             expected_context_values=None,
   1052             expected_feat_list_values=None,
   1053             expected_err=None):
   1054     expected_context_values = expected_context_values or {}
   1055     expected_feat_list_values = expected_feat_list_values or {}
   1056 
   1057     with self.test_session() as sess:
   1058       if expected_err:
   1059         with self.assertRaisesWithPredicateMatch(expected_err[0],
   1060                                                  expected_err[1]):
   1061           c_out, fl_out = parsing_ops.parse_single_sequence_example(**kwargs)
   1062           if c_out:
   1063             sess.run(flatten_values_tensors_or_sparse(c_out.values()))
   1064           if fl_out:
   1065             sess.run(flatten_values_tensors_or_sparse(fl_out.values()))
   1066       else:
   1067         # Returns dicts w/ Tensors and SparseTensors.
   1068         context_out, feat_list_out = parsing_ops.parse_single_sequence_example(
   1069             **kwargs)
   1070         context_result = sess.run(
   1071             flatten_values_tensors_or_sparse(context_out.values(
   1072             ))) if context_out else []
   1073         feat_list_result = sess.run(
   1074             flatten_values_tensors_or_sparse(feat_list_out.values(
   1075             ))) if feat_list_out else []
   1076         # Check values.
   1077         _compare_output_to_expected(self, context_out, expected_context_values,
   1078                                     context_result)
   1079         _compare_output_to_expected(self, feat_list_out,
   1080                                     expected_feat_list_values, feat_list_result)
   1081 
   1082       # Check shapes; if serialized is a Tensor we need its size to
   1083       # properly check.
   1084       if "context_features" in kwargs:
   1085         for k, f in kwargs["context_features"].items():
   1086           if isinstance(f, parsing_ops.FixedLenFeature) and f.shape is not None:
   1087             self.assertEqual(
   1088                 tuple(context_out[k].get_shape().as_list()), f.shape)
   1089           elif isinstance(f, parsing_ops.VarLenFeature):
   1090             self.assertEqual(
   1091                 tuple(context_out[k].indices.get_shape().as_list()), (None, 1))
   1092             self.assertEqual(
   1093                 tuple(context_out[k].values.get_shape().as_list()), (None,))
   1094             self.assertEqual(
   1095                 tuple(context_out[k].dense_shape.get_shape().as_list()), (1,))
   1096 
   1097   def testSequenceExampleWithSparseAndDenseContext(self):
   1098     original = sequence_example(context=features({
   1099         "c": float_feature([3, 4]),
   1100         "st_a": float_feature([3.0, 4.0])
   1101     }))
   1102 
   1103     serialized = original.SerializeToString()
   1104 
   1105     expected_st_a = (
   1106         np.array(
   1107             [[0], [1]], dtype=np.int64),  # indices
   1108         np.array(
   1109             [3.0, 4.0], dtype=np.float32),  # values
   1110         np.array(
   1111             [2], dtype=np.int64))  # shape: num_features = 2
   1112 
   1113     a_default = [1, 2, 3]
   1114     b_default = np.random.rand(3, 3).astype(bytes)
   1115     expected_context_output = {
   1116         "st_a": expected_st_a,
   1117         "a": [a_default],
   1118         "b": b_default,
   1119         "c": np.array(
   1120             [3, 4], dtype=np.float32),
   1121     }
   1122 
   1123     self._test(
   1124         {
   1125             "example_name":
   1126                 "in1",
   1127             "serialized":
   1128                 ops.convert_to_tensor(serialized),
   1129             "context_features": {
   1130                 "st_a":
   1131                     parsing_ops.VarLenFeature(dtypes.float32),
   1132                 "a":
   1133                     parsing_ops.FixedLenFeature(
   1134                         (1, 3), dtypes.int64, default_value=a_default),
   1135                 "b":
   1136                     parsing_ops.FixedLenFeature(
   1137                         (3, 3), dtypes.string, default_value=b_default),
   1138                 # Feature "c" must be provided, since it has no default_value.
   1139                 "c":
   1140                     parsing_ops.FixedLenFeature((2,), dtypes.float32),
   1141             }
   1142         },
   1143         expected_context_values=expected_context_output)
   1144 
   1145   def testSequenceExampleWithMultipleSizeFeatureLists(self):
   1146     original = sequence_example(feature_lists=feature_lists({
   1147         "a":
   1148             feature_list([
   1149                 int64_feature([-1, 0, 1]),
   1150                 int64_feature([2, 3, 4]),
   1151                 int64_feature([5, 6, 7]),
   1152                 int64_feature([8, 9, 10]),
   1153             ]),
   1154         "b":
   1155             feature_list([bytes_feature([b"r00", b"r01", b"r10", b"r11"])]),
   1156         "c":
   1157             feature_list([float_feature([3, 4]), float_feature([-1, 2])]),
   1158     }))
   1159 
   1160     serialized = original.SerializeToString()
   1161 
   1162     expected_feature_list_output = {
   1163         "a": np.array(
   1164             [  # outer dimension is time.
   1165                 [[-1, 0, 1]],  # inside are 1x3 matrices
   1166                 [[2, 3, 4]],
   1167                 [[5, 6, 7]],
   1168                 [[8, 9, 10]]
   1169             ],
   1170             dtype=np.int64),
   1171         "b": np.array(
   1172             [  # outer dimension is time, inside are 2x2 matrices
   1173                 [[b"r00", b"r01"], [b"r10", b"r11"]]
   1174             ],
   1175             dtype=bytes),
   1176         "c": np.array(
   1177             [  # outer dimension is time, inside are 2-vectors
   1178                 [3, 4], [-1, 2]
   1179             ],
   1180             dtype=np.float32),
   1181         "d": np.empty(
   1182             shape=(0, 5), dtype=np.float32),  # empty_allowed_missing
   1183     }
   1184 
   1185     self._test(
   1186         {
   1187             "example_name":
   1188                 "in1",
   1189             "serialized":
   1190                 ops.convert_to_tensor(serialized),
   1191             "sequence_features": {
   1192                 "a":
   1193                     parsing_ops.FixedLenSequenceFeature((1, 3), dtypes.int64),
   1194                 "b":
   1195                     parsing_ops.FixedLenSequenceFeature((2, 2), dtypes.string),
   1196                 "c":
   1197                     parsing_ops.FixedLenSequenceFeature(2, dtypes.float32),
   1198                 "d":
   1199                     parsing_ops.FixedLenSequenceFeature(
   1200                         (5,), dtypes.float32, allow_missing=True),
   1201             }
   1202         },
   1203         expected_feat_list_values=expected_feature_list_output)
   1204 
   1205   def testSequenceExampleWithoutDebugName(self):
   1206     original = sequence_example(feature_lists=feature_lists({
   1207         "a":
   1208             feature_list([int64_feature([3, 4]), int64_feature([1, 0])]),
   1209         "st_a":
   1210             feature_list([
   1211                 float_feature([3.0, 4.0]), float_feature([5.0]),
   1212                 float_feature([])
   1213             ]),
   1214         "st_b":
   1215             feature_list([
   1216                 bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
   1217                 bytes_feature([b"b", b"c"])
   1218             ])
   1219     }))
   1220 
   1221     serialized = original.SerializeToString()
   1222 
   1223     expected_st_a = (
   1224         np.array(
   1225             [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
   1226         np.array(
   1227             [3.0, 4.0, 5.0], dtype=np.float32),  # values
   1228         np.array(
   1229             [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
   1230 
   1231     expected_st_b = (
   1232         np.array(
   1233             [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
   1234         np.array(
   1235             ["a", "b", "c"], dtype="|S"),  # values
   1236         np.array(
   1237             [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
   1238 
   1239     expected_st_c = (
   1240         np.empty(
   1241             (0, 2), dtype=np.int64),  # indices
   1242         np.empty(
   1243             (0,), dtype=np.int64),  # values
   1244         np.array(
   1245             [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
   1246 
   1247     expected_feature_list_output = {
   1248         "a": np.array(
   1249             [[3, 4], [1, 0]], dtype=np.int64),
   1250         "st_a": expected_st_a,
   1251         "st_b": expected_st_b,
   1252         "st_c": expected_st_c,
   1253     }
   1254 
   1255     self._test(
   1256         {
   1257             "serialized": ops.convert_to_tensor(serialized),
   1258             "sequence_features": {
   1259                 "st_a": parsing_ops.VarLenFeature(dtypes.float32),
   1260                 "st_b": parsing_ops.VarLenFeature(dtypes.string),
   1261                 "st_c": parsing_ops.VarLenFeature(dtypes.int64),
   1262                 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
   1263             }
   1264         },
   1265         expected_feat_list_values=expected_feature_list_output)
   1266 
   1267   def testSequenceExampleWithSparseAndDenseFeatureLists(self):
   1268     original = sequence_example(feature_lists=feature_lists({
   1269         "a":
   1270             feature_list([int64_feature([3, 4]), int64_feature([1, 0])]),
   1271         "st_a":
   1272             feature_list([
   1273                 float_feature([3.0, 4.0]), float_feature([5.0]),
   1274                 float_feature([])
   1275             ]),
   1276         "st_b":
   1277             feature_list([
   1278                 bytes_feature([b"a"]), bytes_feature([]), bytes_feature([]),
   1279                 bytes_feature([b"b", b"c"])
   1280             ])
   1281     }))
   1282 
   1283     serialized = original.SerializeToString()
   1284 
   1285     expected_st_a = (
   1286         np.array(
   1287             [[0, 0], [0, 1], [1, 0]], dtype=np.int64),  # indices
   1288         np.array(
   1289             [3.0, 4.0, 5.0], dtype=np.float32),  # values
   1290         np.array(
   1291             [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
   1292 
   1293     expected_st_b = (
   1294         np.array(
   1295             [[0, 0], [3, 0], [3, 1]], dtype=np.int64),  # indices
   1296         np.array(
   1297             ["a", "b", "c"], dtype="|S"),  # values
   1298         np.array(
   1299             [4, 2], dtype=np.int64))  # shape: num_time = 4, max_feat = 2
   1300 
   1301     expected_st_c = (
   1302         np.empty(
   1303             (0, 2), dtype=np.int64),  # indices
   1304         np.empty(
   1305             (0,), dtype=np.int64),  # values
   1306         np.array(
   1307             [0, 0], dtype=np.int64))  # shape: num_time = 0, max_feat = 0
   1308 
   1309     expected_feature_list_output = {
   1310         "a": np.array(
   1311             [[3, 4], [1, 0]], dtype=np.int64),
   1312         "st_a": expected_st_a,
   1313         "st_b": expected_st_b,
   1314         "st_c": expected_st_c,
   1315     }
   1316 
   1317     self._test(
   1318         {
   1319             "example_name": "in1",
   1320             "serialized": ops.convert_to_tensor(serialized),
   1321             "sequence_features": {
   1322                 "st_a": parsing_ops.VarLenFeature(dtypes.float32),
   1323                 "st_b": parsing_ops.VarLenFeature(dtypes.string),
   1324                 "st_c": parsing_ops.VarLenFeature(dtypes.int64),
   1325                 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64),
   1326             }
   1327         },
   1328         expected_feat_list_values=expected_feature_list_output)
   1329 
   1330   def testSequenceExampleWithEmptyFeatureInFeatureLists(self):
   1331     original = sequence_example(feature_lists=feature_lists({
   1332         "st_a":
   1333             feature_list([
   1334                 float_feature([3.0, 4.0]),
   1335                 feature(),
   1336                 float_feature([5.0]),
   1337             ]),
   1338     }))
   1339 
   1340     serialized = original.SerializeToString()
   1341 
   1342     expected_st_a = (
   1343         np.array(
   1344             [[0, 0], [0, 1], [2, 0]], dtype=np.int64),  # indices
   1345         np.array(
   1346             [3.0, 4.0, 5.0], dtype=np.float32),  # values
   1347         np.array(
   1348             [3, 2], dtype=np.int64))  # shape: num_time = 3, max_feat = 2
   1349 
   1350     expected_feature_list_output = {
   1351         "st_a": expected_st_a,
   1352     }
   1353 
   1354     self._test(
   1355         {
   1356             "example_name": "in1",
   1357             "serialized": ops.convert_to_tensor(serialized),
   1358             "sequence_features": {
   1359                 "st_a": parsing_ops.VarLenFeature(dtypes.float32),
   1360             }
   1361         },
   1362         expected_feat_list_values=expected_feature_list_output)
   1363 
   1364   def testSequenceExampleListWithInconsistentDataFails(self):
   1365     original = sequence_example(feature_lists=feature_lists({
   1366         "a": feature_list([int64_feature([-1, 0]), float_feature([2, 3])])
   1367     }))
   1368 
   1369     serialized = original.SerializeToString()
   1370 
   1371     self._test(
   1372         {
   1373             "example_name": "in1",
   1374             "serialized": ops.convert_to_tensor(serialized),
   1375             "sequence_features": {
   1376                 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
   1377             }
   1378         },
   1379         expected_err=(errors_impl.OpError, "Feature list: a, Index: 1."
   1380                       "  Data types don't match. Expected type: int64"))
   1381 
   1382   def testSequenceExampleListWithWrongDataTypeFails(self):
   1383     original = sequence_example(feature_lists=feature_lists({
   1384         "a": feature_list([float_feature([2, 3])])
   1385     }))
   1386 
   1387     serialized = original.SerializeToString()
   1388 
   1389     self._test(
   1390         {
   1391             "example_name": "in1",
   1392             "serialized": ops.convert_to_tensor(serialized),
   1393             "sequence_features": {
   1394                 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
   1395             }
   1396         },
   1397         expected_err=(errors_impl.OpError,
   1398                       "Feature list: a, Index: 0.  Data types don't match."
   1399                       " Expected type: int64"))
   1400 
   1401   def testSequenceExampleListWithWrongSparseDataTypeFails(self):
   1402     original = sequence_example(feature_lists=feature_lists({
   1403         "a":
   1404             feature_list([
   1405                 int64_feature([3, 4]), int64_feature([1, 2]),
   1406                 float_feature([2.0, 3.0])
   1407             ])
   1408     }))
   1409 
   1410     serialized = original.SerializeToString()
   1411 
   1412     self._test(
   1413         {
   1414             "example_name": "in1",
   1415             "serialized": ops.convert_to_tensor(serialized),
   1416             "sequence_features": {
   1417                 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
   1418             }
   1419         },
   1420         expected_err=(errors_impl.OpError,
   1421                       "Name: in1, Feature list: a, Index: 2."
   1422                       "  Data types don't match. Expected type: int64"
   1423                       "  Feature is: float_list"))
   1424 
   1425   def testSequenceExampleListWithWrongShapeFails(self):
   1426     original = sequence_example(feature_lists=feature_lists({
   1427         "a": feature_list([int64_feature([2, 3]), int64_feature([2, 3, 4])]),
   1428     }))
   1429 
   1430     serialized = original.SerializeToString()
   1431 
   1432     self._test(
   1433         {
   1434             "example_name": "in1",
   1435             "serialized": ops.convert_to_tensor(serialized),
   1436             "sequence_features": {
   1437                 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
   1438             }
   1439         },
   1440         expected_err=(errors_impl.OpError, r"Name: in1, Key: a, Index: 1."
   1441                       r"  Number of int64 values != expected."
   1442                       r"  values size: 3 but output shape: \[2\]"))
   1443 
   1444   def testSequenceExampleWithMissingFeatureListFails(self):
   1445     original = sequence_example(feature_lists=feature_lists({}))
   1446 
   1447     # Test fails because we didn't add:
   1448     #  feature_list_dense_defaults = {"a": None}
   1449     self._test(
   1450         {
   1451             "example_name": "in1",
   1452             "serialized": ops.convert_to_tensor(original.SerializeToString()),
   1453             "sequence_features": {
   1454                 "a": parsing_ops.FixedLenSequenceFeature((2,), dtypes.int64)
   1455             }
   1456         },
   1457         expected_err=(
   1458             errors_impl.OpError,
   1459             "Name: in1, Feature list 'a' is required but could not be found."
   1460             "  Did you mean to include it in"
   1461             " feature_list_dense_missing_assumed_empty or"
   1462             " feature_list_dense_defaults?"))
   1463 
   1464 
   1465 class DecodeJSONExampleTest(test.TestCase):
   1466 
   1467   def _testRoundTrip(self, examples):
   1468     with self.test_session() as sess:
   1469       examples = np.array(examples, dtype=np.object)
   1470 
   1471       json_tensor = constant_op.constant(
   1472           [json_format.MessageToJson(m) for m in examples.flatten()],
   1473           shape=examples.shape,
   1474           dtype=dtypes.string)
   1475       binary_tensor = parsing_ops.decode_json_example(json_tensor)
   1476       binary_val = sess.run(binary_tensor)
   1477 
   1478       if examples.shape:
   1479         self.assertShapeEqual(binary_val, json_tensor)
   1480         for input_example, output_binary in zip(
   1481             np.array(examples).flatten(), binary_val.flatten()):
   1482           output_example = example_pb2.Example()
   1483           output_example.ParseFromString(output_binary)
   1484           self.assertProtoEquals(input_example, output_example)
   1485       else:
   1486         output_example = example_pb2.Example()
   1487         output_example.ParseFromString(binary_val)
   1488         self.assertProtoEquals(examples.item(), output_example)
   1489 
   1490   def testEmptyTensor(self):
   1491     self._testRoundTrip([])
   1492     self._testRoundTrip([[], [], []])
   1493 
   1494   def testEmptyExamples(self):
   1495     self._testRoundTrip([example(), example(), example()])
   1496 
   1497   def testDenseFeaturesScalar(self):
   1498     self._testRoundTrip(
   1499         example(features=features({
   1500             "a": float_feature([1, 1, 3])
   1501         })))
   1502 
   1503   def testDenseFeaturesVector(self):
   1504     self._testRoundTrip([
   1505         example(features=features({
   1506             "a": float_feature([1, 1, 3])
   1507         })),
   1508         example(features=features({
   1509             "a": float_feature([-1, -1, 2])
   1510         })),
   1511     ])
   1512 
   1513   def testDenseFeaturesMatrix(self):
   1514     self._testRoundTrip([
   1515         [example(features=features({
   1516             "a": float_feature([1, 1, 3])
   1517         }))],
   1518         [example(features=features({
   1519             "a": float_feature([-1, -1, 2])
   1520         }))],
   1521     ])
   1522 
   1523   def testSparseFeatures(self):
   1524     self._testRoundTrip([
   1525         example(features=features({
   1526             "st_c": float_feature([3, 4])
   1527         })),
   1528         example(features=features({
   1529             "st_c": float_feature([])
   1530         })),
   1531         example(features=features({
   1532             "st_d": feature()
   1533         })),
   1534         example(features=features({
   1535             "st_c": float_feature([1, 2, -1]),
   1536             "st_d": bytes_feature([b"hi"])
   1537         })),
   1538     ])
   1539 
   1540   def testSerializedContainingBytes(self):
   1541     aname = "a"
   1542     bname = "b*has+a:tricky_name"
   1543     self._testRoundTrip([
   1544         example(features=features({
   1545             aname: float_feature([1, 1]),
   1546             bname: bytes_feature([b"b0_str"])
   1547         })),
   1548         example(features=features({
   1549             aname: float_feature([-1, -1]),
   1550             bname: bytes_feature([b"b1"])
   1551         })),
   1552     ])
   1553 
   1554   def testInvalidSyntax(self):
   1555     with self.test_session() as sess:
   1556       json_tensor = constant_op.constant(["{]"])
   1557       binary_tensor = parsing_ops.decode_json_example(json_tensor)
   1558       with self.assertRaisesOpError("Error while parsing JSON"):
   1559         sess.run(binary_tensor)
   1560 
   1561 
   1562 class ParseTensorOpTest(test.TestCase):
   1563 
   1564   def testToFloat32(self):
   1565     with self.test_session():
   1566       expected = np.random.rand(3, 4, 5).astype(np.float32)
   1567       tensor_proto = tensor_util.make_tensor_proto(expected)
   1568 
   1569       serialized = array_ops.placeholder(dtypes.string)
   1570       tensor = parsing_ops.parse_tensor(serialized, dtypes.float32)
   1571 
   1572       result = tensor.eval(
   1573           feed_dict={serialized: tensor_proto.SerializeToString()})
   1574 
   1575       self.assertAllEqual(expected, result)
   1576 
   1577   def testToUint8(self):
   1578     with self.test_session():
   1579       expected = np.random.rand(3, 4, 5).astype(np.uint8)
   1580       tensor_proto = tensor_util.make_tensor_proto(expected)
   1581 
   1582       serialized = array_ops.placeholder(dtypes.string)
   1583       tensor = parsing_ops.parse_tensor(serialized, dtypes.uint8)
   1584 
   1585       result = tensor.eval(
   1586           feed_dict={serialized: tensor_proto.SerializeToString()})
   1587 
   1588       self.assertAllEqual(expected, result)
   1589 
   1590   def testTypeMismatch(self):
   1591     with self.test_session():
   1592       expected = np.random.rand(3, 4, 5).astype(np.uint8)
   1593       tensor_proto = tensor_util.make_tensor_proto(expected)
   1594 
   1595       serialized = array_ops.placeholder(dtypes.string)
   1596       tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
   1597 
   1598       with self.assertRaisesOpError(
   1599           r"Type mismatch between parsed tensor \(uint8\) and dtype "
   1600           r"\(uint16\)"):
   1601         tensor.eval(feed_dict={serialized: tensor_proto.SerializeToString()})
   1602 
   1603   def testInvalidInput(self):
   1604     with self.test_session():
   1605       serialized = array_ops.placeholder(dtypes.string)
   1606       tensor = parsing_ops.parse_tensor(serialized, dtypes.uint16)
   1607 
   1608       with self.assertRaisesOpError(
   1609           "Could not parse `serialized` as TensorProto: 'bogus'"):
   1610         tensor.eval(feed_dict={serialized: "bogus"})
   1611 
   1612       with self.assertRaisesOpError(
   1613           r"Expected `serialized` to be a scalar, got shape: \[1\]"):
   1614         tensor.eval(feed_dict={serialized: ["bogus"]})
   1615 
   1616 
   1617 if __name__ == "__main__":
   1618   test.main()
   1619