Home | History | Annotate | Download | only in ops
      1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved.
      2 #
      3 # Licensed under the Apache License, Version 2.0 (the "License");
      4 # you may not use this file except in compliance with the License.
      5 # You may obtain a copy of the License at
      6 #
      7 #     http://www.apache.org/licenses/LICENSE-2.0
      8 #
      9 # Unless required by applicable law or agreed to in writing, software
     10 # distributed under the License is distributed on an "AS IS" BASIS,
     11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 # See the License for the specific language governing permissions and
     13 # limitations under the License.
     14 # ==============================================================================
     15 
     16 """Parsing Ops."""
     17 from __future__ import absolute_import
     18 from __future__ import division
     19 from __future__ import print_function
     20 
     21 import collections
     22 import re
     23 
     24 from tensorflow.python.framework import constant_op
     25 from tensorflow.python.framework import dtypes
     26 from tensorflow.python.framework import ops
     27 from tensorflow.python.framework import sparse_tensor
     28 from tensorflow.python.framework import tensor_shape
     29 from tensorflow.python.ops import array_ops
     30 from tensorflow.python.ops import control_flow_ops
     31 from tensorflow.python.ops import gen_parsing_ops
     32 from tensorflow.python.ops import math_ops
     33 from tensorflow.python.ops import sparse_ops
     34 # go/tf-wildcard-import
     35 # pylint: disable=wildcard-import,undefined-variable
     36 from tensorflow.python.ops.gen_parsing_ops import *
     37 # pylint: enable=wildcard-import,undefined-variable
     38 from tensorflow.python.platform import tf_logging
     39 from tensorflow.python.util.tf_export import tf_export
     40 
     41 
     42 ops.NotDifferentiable("DecodeRaw")
     43 ops.NotDifferentiable("ParseTensor")
     44 ops.NotDifferentiable("SerializeTensor")
     45 ops.NotDifferentiable("StringToNumber")
     46 
     47 
     48 @tf_export("VarLenFeature")
     49 class VarLenFeature(collections.namedtuple("VarLenFeature", ["dtype"])):
     50   """Configuration for parsing a variable-length input feature.
     51 
     52   Fields:
     53     dtype: Data type of input.
     54   """
     55   pass
     56 
     57 
     58 @tf_export("SparseFeature")
     59 class SparseFeature(
     60     collections.namedtuple(
     61         "SparseFeature",
     62         ["index_key", "value_key", "dtype", "size", "already_sorted"])):
     63   """Configuration for parsing a sparse input feature from an `Example`.
     64 
     65   Note, preferably use `VarLenFeature` (possibly in combination with a
     66   `SequenceExample`) in order to parse out `SparseTensor`s instead of
     67   `SparseFeature` due to its simplicity.
     68 
     69   Closely mimicking the `SparseTensor` that will be obtained by parsing an
     70   `Example` with a `SparseFeature` config, a `SparseFeature` contains a
     71 
     72   * `value_key`: The name of key for a `Feature` in the `Example` whose parsed
     73     `Tensor` will be the resulting `SparseTensor.values`.
     74 
     75   * `index_key`: A list of names - one for each dimension in the resulting
     76     `SparseTensor` whose `indices[i][dim]` indicating the position of
     77     the `i`-th value in the `dim` dimension will be equal to the `i`-th value in
     78     the Feature with key named `index_key[dim]` in the `Example`.
     79 
     80   * `size`: A list of ints for the resulting `SparseTensor.dense_shape`.
     81 
     82   For example, we can represent the following 2D `SparseTensor`
     83 
     84   ```python
     85   SparseTensor(indices=[[3, 1], [20, 0]],
     86                values=[0.5, -1.0]
     87                dense_shape=[100, 3])
     88   ```
     89 
     90   with an `Example` input proto
     91 
     92   ```python
     93   features {
     94     feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
     95     feature { key: "ix0" value { int64_list { value: [ 3, 20 ] } } }
     96     feature { key: "ix1" value { int64_list { value: [ 1, 0 ] } } }
     97   }
     98   ```
     99 
    100   and `SparseFeature` config with 2 `index_key`s
    101 
    102   ```python
    103   SparseFeature(index_key=["ix0", "ix1"],
    104                 value_key="val",
    105                 dtype=tf.float32,
    106                 size=[100, 3])
    107   ```
    108 
    109   Fields:
    110     index_key: A single string name or a list of string names of index features.
    111       For each key the underlying feature's type must be `int64` and its length
    112       must always match that of the `value_key` feature.
    113       To represent `SparseTensor`s with a `dense_shape` of `rank` higher than 1
    114       a list of length `rank` should be used.
    115     value_key: Name of value feature.  The underlying feature's type must
    116       be `dtype` and its length must always match that of all the `index_key`s'
    117       features.
    118     dtype: Data type of the `value_key` feature.
    119     size: A Python int or list thereof specifying the dense shape. Should be a
    120       list if and only if `index_key` is a list. In that case the list must be
    121       equal to the length of `index_key`. Each for each entry `i` all values in
    122       the `index_key`[i] feature must be in `[0, size[i])`.
    123     already_sorted: A Python boolean to specify whether the values in
    124       `value_key` are already sorted by their index position. If so skip
    125       sorting. False by default (optional).
    126   """
    127 
    128   def __new__(cls, index_key, value_key, dtype, size, already_sorted=False):
    129     return super(SparseFeature, cls).__new__(
    130         cls, index_key, value_key, dtype, size, already_sorted)
    131 
    132 
    133 @tf_export("FixedLenFeature")
    134 class FixedLenFeature(collections.namedtuple(
    135     "FixedLenFeature", ["shape", "dtype", "default_value"])):
    136   """Configuration for parsing a fixed-length input feature.
    137 
    138   To treat sparse input as dense, provide a `default_value`; otherwise,
    139   the parse functions will fail on any examples missing this feature.
    140 
    141   Fields:
    142     shape: Shape of input data.
    143     dtype: Data type of input.
    144     default_value: Value to be used if an example is missing this feature. It
    145         must be compatible with `dtype` and of the specified `shape`.
    146   """
    147 
    148   def __new__(cls, shape, dtype, default_value=None):
    149     return super(FixedLenFeature, cls).__new__(
    150         cls, shape, dtype, default_value)
    151 
    152 
    153 @tf_export("FixedLenSequenceFeature")
    154 class FixedLenSequenceFeature(collections.namedtuple(
    155     "FixedLenSequenceFeature",
    156     ["shape", "dtype", "allow_missing", "default_value"])):
    157   """Configuration for parsing a variable-length input feature into a `Tensor`.
    158 
    159   The resulting `Tensor` of parsing a single `SequenceExample` or `Example` has
    160   a static `shape` of `[None] + shape` and the specified `dtype`.
    161   The resulting `Tensor` of parsing a `batch_size` many `Example`s has
    162   a static `shape` of `[batch_size, None] + shape` and the specified `dtype`.
    163   The entries in the `batch` from different `Examples` will be padded with
    164   `default_value` to the maximum length present in the `batch`.
    165 
    166   To treat a sparse input as dense, provide `allow_missing=True`; otherwise,
    167   the parse functions will fail on any examples missing this feature.
    168 
    169   Fields:
    170     shape: Shape of input data for dimension 2 and higher. First dimension is
    171       of variable length `None`.
    172     dtype: Data type of input.
    173     allow_missing: Whether to allow this feature to be missing from a feature
    174       list item. Is available only for parsing `SequenceExample` not for
    175       parsing `Examples`.
    176     default_value: Scalar value to be used to pad multiple `Example`s to their
    177       maximum length. Irrelevant for parsing a single `Example` or
    178       `SequenceExample`. Defaults to "" for dtype string and 0 otherwise
    179       (optional).
    180   """
    181 
    182   def __new__(cls, shape, dtype, allow_missing=False, default_value=None):
    183     return super(FixedLenSequenceFeature, cls).__new__(
    184         cls, shape, dtype, allow_missing, default_value)
    185 
    186 
    187 def _features_to_raw_params(features, types):
    188   """Split feature tuples into raw params used by `gen_parsing_ops`.
    189 
    190   Args:
    191     features: A `dict` mapping feature keys to objects of a type in `types`.
    192     types: Type of features to allow, among `FixedLenFeature`, `VarLenFeature`,
    193       `SparseFeature`, and `FixedLenSequenceFeature`.
    194 
    195   Returns:
    196     Tuple of `sparse_keys`, `sparse_types`, `dense_keys`, `dense_types`,
    197       `dense_defaults`, `dense_shapes`.
    198 
    199   Raises:
    200     ValueError: if `features` contains an item not in `types`, or an invalid
    201         feature.
    202   """
    203   sparse_keys = []
    204   sparse_types = []
    205   dense_keys = []
    206   dense_types = []
    207   # When the graph is built twice, multiple dense_defaults in a normal dict
    208   # could come out in different orders. This will fail the _e2e_test which
    209   # expects exactly the same graph.
    210   # OrderedDict which preserves the order can solve the problem.
    211   dense_defaults = collections.OrderedDict()
    212   dense_shapes = []
    213   if features:
    214     # NOTE: We iterate over sorted keys to keep things deterministic.
    215     for key in sorted(features.keys()):
    216       feature = features[key]
    217       if isinstance(feature, VarLenFeature):
    218         if VarLenFeature not in types:
    219           raise ValueError("Unsupported VarLenFeature %s." % feature)
    220         if not feature.dtype:
    221           raise ValueError("Missing type for feature %s." % key)
    222         sparse_keys.append(key)
    223         sparse_types.append(feature.dtype)
    224       elif isinstance(feature, SparseFeature):
    225         if SparseFeature not in types:
    226           raise ValueError("Unsupported SparseFeature %s." % feature)
    227 
    228         if not feature.index_key:
    229           raise ValueError(
    230               "Missing index_key for SparseFeature %s." % feature)
    231         if not feature.value_key:
    232           raise ValueError(
    233               "Missing value_key for SparseFeature %s." % feature)
    234         if not feature.dtype:
    235           raise ValueError("Missing type for feature %s." % key)
    236         index_keys = feature.index_key
    237         if isinstance(index_keys, str):
    238           index_keys = [index_keys]
    239         elif len(index_keys) > 1:
    240           tf_logging.warning("SparseFeature is a complicated feature config "
    241                              "and should only be used after careful "
    242                              "consideration of VarLenFeature.")
    243         for index_key in sorted(index_keys):
    244           if index_key in sparse_keys:
    245             dtype = sparse_types[sparse_keys.index(index_key)]
    246             if dtype != dtypes.int64:
    247               raise ValueError("Conflicting type %s vs int64 for feature %s." %
    248                                (dtype, index_key))
    249           else:
    250             sparse_keys.append(index_key)
    251             sparse_types.append(dtypes.int64)
    252         if feature.value_key in sparse_keys:
    253           dtype = sparse_types[sparse_keys.index(feature.value_key)]
    254           if dtype != feature.dtype:
    255             raise ValueError("Conflicting type %s vs %s for feature %s." % (
    256                 dtype, feature.dtype, feature.value_key))
    257         else:
    258           sparse_keys.append(feature.value_key)
    259           sparse_types.append(feature.dtype)
    260       elif isinstance(feature, FixedLenFeature):
    261         if FixedLenFeature not in types:
    262           raise ValueError("Unsupported FixedLenFeature %s." % feature)
    263         if not feature.dtype:
    264           raise ValueError("Missing type for feature %s." % key)
    265         if feature.shape is None:
    266           raise ValueError("Missing shape for feature %s." % key)
    267         feature_tensor_shape = tensor_shape.as_shape(feature.shape)
    268         if (feature.shape and feature_tensor_shape.ndims and
    269             feature_tensor_shape.dims[0].value is None):
    270           raise ValueError("First dimension of shape for feature %s unknown. "
    271                            "Consider using FixedLenSequenceFeature." % key)
    272         if (feature.shape is not None and
    273             not feature_tensor_shape.is_fully_defined()):
    274           raise ValueError("All dimensions of shape for feature %s need to be "
    275                            "known but received %s." % (key, str(feature.shape)))
    276         dense_keys.append(key)
    277         dense_shapes.append(feature.shape)
    278         dense_types.append(feature.dtype)
    279         if feature.default_value is not None:
    280           dense_defaults[key] = feature.default_value
    281       elif isinstance(feature, FixedLenSequenceFeature):
    282         if FixedLenSequenceFeature not in types:
    283           raise ValueError("Unsupported FixedLenSequenceFeature %s." % feature)
    284         if not feature.dtype:
    285           raise ValueError("Missing type for feature %s." % key)
    286         if feature.shape is None:
    287           raise ValueError("Missing shape for feature %s." % key)
    288         dense_keys.append(key)
    289         dense_shapes.append(feature.shape)
    290         dense_types.append(feature.dtype)
    291         if feature.allow_missing:
    292           dense_defaults[key] = None
    293         if feature.default_value is not None:
    294           dense_defaults[key] = feature.default_value
    295       else:
    296         raise ValueError("Invalid feature %s:%s." % (key, feature))
    297   return (
    298       sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
    299       dense_shapes)
    300 
    301 
    302 def _construct_sparse_tensors_for_sparse_features(features, tensor_dict):
    303   """Merges SparseTensors of indices and values of SparseFeatures.
    304 
    305   Constructs new dict based on `tensor_dict`. For `SparseFeatures` in the values
    306   of `features` expects their `index_key`s and `index_value`s to be present in
    307   `tensor_dict` mapping to `SparseTensor`s. Constructs a single `SparseTensor`
    308   from them, and adds it to the result with the key from `features`.
    309   Copies other keys and values from `tensor_dict` with keys present in
    310   `features`.
    311 
    312   Args:
    313     features: A `dict` mapping feature keys to `SparseFeature` values.
    314       Values of other types will be ignored.
    315     tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor`
    316       values. Expected to contain keys of the `SparseFeature`s' `index_key`s and
    317       `value_key`s and mapping them to `SparseTensor`s.
    318   Returns:
    319     A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Similar
    320     to `tensor_dict` except each `SparseFeature`s in `features` results in a
    321     single `SparseTensor`.
    322   """
    323   tensor_dict = dict(tensor_dict)  # Do not modify argument passed in.
    324   # Construct SparseTensors for SparseFeatures.
    325   for key in sorted(features.keys()):
    326     feature = features[key]
    327     if isinstance(feature, SparseFeature):
    328       if isinstance(feature.index_key, str):
    329         sp_ids = tensor_dict[feature.index_key]
    330       else:
    331         sp_ids = [tensor_dict[index_key] for index_key in feature.index_key]
    332       sp_values = tensor_dict[feature.value_key]
    333       tensor_dict[key] = sparse_ops.sparse_merge(
    334           sp_ids,
    335           sp_values,
    336           vocab_size=feature.size,
    337           already_sorted=feature.already_sorted)
    338   # Remove tensors from dictionary that were only used to construct
    339   # SparseTensors for SparseFeature.
    340   for key in set(tensor_dict) - set(features):
    341     del tensor_dict[key]
    342   return tensor_dict
    343 
    344 
    345 def _prepend_none_dimension(features):
    346   if features:
    347     modified_features = dict(features)  # Create a copy to modify
    348     for key, feature in features.items():
    349       if isinstance(feature, FixedLenSequenceFeature):
    350         if not feature.allow_missing:
    351           raise ValueError("Unsupported: FixedLenSequenceFeature requires "
    352                            "allow_missing to be True.")
    353         modified_features[key] = FixedLenSequenceFeature(
    354             [None] + list(feature.shape),
    355             feature.dtype,
    356             feature.allow_missing,
    357             feature.default_value)
    358     return modified_features
    359   else:
    360     return features
    361 
    362 
    363 @tf_export("parse_example")
    364 def parse_example(serialized, features, name=None, example_names=None):
    365   # pylint: disable=line-too-long
    366   """Parses `Example` protos into a `dict` of tensors.
    367 
    368   Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
    369   protos given in `serialized`. We refer to `serialized` as a batch with
    370   `batch_size` many entries of individual `Example` protos.
    371 
    372   `example_names` may contain descriptive names for the corresponding serialized
    373   protos. These may be useful for debugging purposes, but they have no effect on
    374   the output. If not `None`, `example_names` must be the same length as
    375   `serialized`.
    376 
    377   This op parses serialized examples into a dictionary mapping keys to `Tensor`
    378   and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
    379   `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
    380   and `SparseFeature` is mapped to a `SparseTensor`, and each
    381   `FixedLenFeature` is mapped to a `Tensor`.
    382 
    383   Each `VarLenFeature` maps to a `SparseTensor` of the specified type
    384   representing a ragged matrix. Its indices are `[batch, index]` where `batch`
    385   identifies the example in `serialized`, and `index` is the value's index in
    386   the list of values associated with that feature and example.
    387 
    388   Each `SparseFeature` maps to a `SparseTensor` of the specified type
    389   representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`.
    390   Its `values` come from the feature in the examples with key `value_key`.
    391   A `values[i]` comes from a position `k` in the feature of an example at batch
    392   entry `batch`. This positional information is recorded in `indices[i]` as
    393   `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
    394   the feature in the example at with key `SparseFeature.index_key[j]`.
    395   In other words, we split the indices (except the first index indicating the
    396   batch entry) of a `SparseTensor` by dimension into different features of the
    397   `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
    398   `SparseFeature` whenever possible.
    399 
    400   Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
    401   `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`.
    402 
    403   `FixedLenFeature` entries with a `default_value` are optional. With no default
    404   value, we will fail if that `Feature` is missing from any example in
    405   `serialized`.
    406 
    407   Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
    408   (or `tf.float32` if not specified) and shape
    409   `(serialized.size(), None) + df.shape`.
    410   All examples in `serialized` will be padded with `default_value` along the
    411   second dimension.
    412 
    413   Examples:
    414 
    415   For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three
    416   serialized `Example`s are provided:
    417 
    418   ```
    419   serialized = [
    420     features
    421       { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } },
    422     features
    423       { feature []},
    424     features
    425       { feature { key: "ft" value { float_list { value: [3.0] } } }
    426   ]
    427   ```
    428 
    429   then the output will look like:
    430 
    431   ```python
    432   {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]],
    433                       values=[1.0, 2.0, 3.0],
    434                       dense_shape=(3, 2)) }
    435   ```
    436 
    437   If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and
    438   `shape=[]` is used then the output will look like:
    439 
    440   ```python
    441   {"ft": [[1.0, 2.0], [3.0, -1.0]]}
    442   ```
    443 
    444   Given two `Example` input protos in `serialized`:
    445 
    446   ```
    447   [
    448     features {
    449       feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } }
    450       feature { key: "gps" value { float_list { value: [] } } }
    451     },
    452     features {
    453       feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } }
    454       feature { key: "dank" value { int64_list { value: [ 42 ] } } }
    455       feature { key: "gps" value { } }
    456     }
    457   ]
    458   ```
    459 
    460   And arguments
    461 
    462   ```
    463   example_names: ["input0", "input1"],
    464   features: {
    465       "kw": VarLenFeature(tf.string),
    466       "dank": VarLenFeature(tf.int64),
    467       "gps": VarLenFeature(tf.float32),
    468   }
    469   ```
    470 
    471   Then the output is a dictionary:
    472 
    473   ```python
    474   {
    475     "kw": SparseTensor(
    476         indices=[[0, 0], [0, 1], [1, 0]],
    477         values=["knit", "big", "emmy"]
    478         dense_shape=[2, 2]),
    479     "dank": SparseTensor(
    480         indices=[[1, 0]],
    481         values=[42],
    482         dense_shape=[2, 1]),
    483     "gps": SparseTensor(
    484         indices=[],
    485         values=[],
    486         dense_shape=[2, 0]),
    487   }
    488   ```
    489 
    490   For dense results in two serialized `Example`s:
    491 
    492   ```
    493   [
    494     features {
    495       feature { key: "age" value { int64_list { value: [ 0 ] } } }
    496       feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
    497      },
    498      features {
    499       feature { key: "age" value { int64_list { value: [] } } }
    500       feature { key: "gender" value { bytes_list { value: [ "f" ] } } }
    501     }
    502   ]
    503   ```
    504 
    505   We can use arguments:
    506 
    507   ```
    508   example_names: ["input0", "input1"],
    509   features: {
    510       "age": FixedLenFeature([], dtype=tf.int64, default_value=-1),
    511       "gender": FixedLenFeature([], dtype=tf.string),
    512   }
    513   ```
    514 
    515   And the expected output is:
    516 
    517   ```python
    518   {
    519     "age": [[0], [-1]],
    520     "gender": [["f"], ["f"]],
    521   }
    522   ```
    523 
    524   An alternative to `VarLenFeature` to obtain a `SparseTensor` is
    525   `SparseFeature`. For example, given two `Example` input protos in
    526   `serialized`:
    527 
    528   ```
    529   [
    530     features {
    531       feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } }
    532       feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } }
    533     },
    534     features {
    535       feature { key: "val" value { float_list { value: [ 0.0 ] } } }
    536       feature { key: "ix" value { int64_list { value: [ 42 ] } } }
    537     }
    538   ]
    539   ```
    540 
    541   And arguments
    542 
    543   ```
    544   example_names: ["input0", "input1"],
    545   features: {
    546       "sparse": SparseFeature(
    547           index_key="ix", value_key="val", dtype=tf.float32, size=100),
    548   }
    549   ```
    550 
    551   Then the output is a dictionary:
    552 
    553   ```python
    554   {
    555     "sparse": SparseTensor(
    556         indices=[[0, 3], [0, 20], [1, 42]],
    557         values=[0.5, -1.0, 0.0]
    558         dense_shape=[2, 100]),
    559   }
    560   ```
    561 
    562   Args:
    563     serialized: A vector (1-D Tensor) of strings, a batch of binary
    564       serialized `Example` protos.
    565     features: A `dict` mapping feature keys to `FixedLenFeature`,
    566       `VarLenFeature`, and `SparseFeature` values.
    567     name: A name for this operation (optional).
    568     example_names: A vector (1-D Tensor) of strings (optional), the names of
    569       the serialized protos in the batch.
    570 
    571   Returns:
    572     A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
    573 
    574   Raises:
    575     ValueError: if any feature is invalid.
    576   """
    577   if not features:
    578     raise ValueError("Missing: features was %s." % features)
    579   features = _prepend_none_dimension(features)
    580   (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
    581    dense_shapes) = _features_to_raw_params(
    582        features,
    583        [VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature])
    584   outputs = _parse_example_raw(
    585       serialized, example_names, sparse_keys, sparse_types, dense_keys,
    586       dense_types, dense_defaults, dense_shapes, name)
    587   return _construct_sparse_tensors_for_sparse_features(features, outputs)
    588 
    589 
    590 def _parse_example_raw(serialized,
    591                        names=None,
    592                        sparse_keys=None,
    593                        sparse_types=None,
    594                        dense_keys=None,
    595                        dense_types=None,
    596                        dense_defaults=None,
    597                        dense_shapes=None,
    598                        name=None):
    599   """Parses `Example` protos.
    600 
    601   Args:
    602     serialized: A vector (1-D Tensor) of strings, a batch of binary
    603       serialized `Example` protos.
    604     names: A vector (1-D Tensor) of strings (optional), the names of
    605       the serialized protos.
    606     sparse_keys: A list of string keys in the examples' features.
    607       The results for these keys will be returned as `SparseTensor` objects.
    608     sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
    609       Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
    610       and `tf.string` (`BytesList`) are supported.
    611     dense_keys: A list of string keys in the examples' features.
    612       The results for these keys will be returned as `Tensor`s
    613     dense_types: A list of DTypes of the same length as `dense_keys`.
    614       Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
    615       and `tf.string` (`BytesList`) are supported.
    616     dense_defaults: A dict mapping string keys to `Tensor`s.
    617       The keys of the dict must match the dense_keys of the feature.
    618     dense_shapes: A list of tuples with the same length as `dense_keys`.
    619       The shape of the data for each dense feature referenced by `dense_keys`.
    620       Required for any input tensors identified by `dense_keys`.  Must be
    621       either fully defined, or may contain an unknown first dimension.
    622       An unknown first dimension means the feature is treated as having
    623       a variable number of blocks, and the output shape along this dimension
    624       is considered unknown at graph build time.  Padding is applied for
    625       minibatch elements smaller than the maximum number of blocks for the
    626       given feature along this dimension.
    627     name: A name for this operation (optional).
    628 
    629   Returns:
    630     A `dict` mapping keys to `Tensor`s and `SparseTensor`s.
    631 
    632   Raises:
    633     ValueError: If sparse and dense key sets intersect, or input lengths do not
    634       match up.
    635   """
    636   with ops.name_scope(name, "ParseExample", [serialized, names]):
    637     names = [] if names is None else names
    638     dense_defaults = collections.OrderedDict(
    639     ) if dense_defaults is None else dense_defaults
    640     sparse_keys = [] if sparse_keys is None else sparse_keys
    641     sparse_types = [] if sparse_types is None else sparse_types
    642     dense_keys = [] if dense_keys is None else dense_keys
    643     dense_types = [] if dense_types is None else dense_types
    644     dense_shapes = (
    645         [[]] * len(dense_keys) if dense_shapes is None else dense_shapes)
    646 
    647     num_dense = len(dense_keys)
    648     num_sparse = len(sparse_keys)
    649 
    650     if len(dense_shapes) != num_dense:
    651       raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d"
    652                        % (len(dense_shapes), num_dense))
    653     if len(dense_types) != num_dense:
    654       raise ValueError("len(dense_types) != len(num_dense): %d vs. %d"
    655                        % (len(dense_types), num_dense))
    656     if len(sparse_types) != num_sparse:
    657       raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d"
    658                        % (len(sparse_types), num_sparse))
    659     if num_dense + num_sparse == 0:
    660       raise ValueError("Must provide at least one sparse key or dense key")
    661     if not set(dense_keys).isdisjoint(set(sparse_keys)):
    662       raise ValueError(
    663           "Dense and sparse keys must not intersect; intersection: %s" %
    664           set(dense_keys).intersection(set(sparse_keys)))
    665 
    666     # Convert dense_shapes to TensorShape object.
    667     dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes]
    668 
    669     dense_defaults_vec = []
    670     for i, key in enumerate(dense_keys):
    671       default_value = dense_defaults.get(key)
    672       dense_shape = dense_shapes[i]
    673       if (dense_shape.ndims is not None and dense_shape.ndims > 0 and
    674           dense_shape[0].value is None):
    675         # Variable stride dense shape, the default value should be a
    676         # scalar padding value
    677         if default_value is None:
    678           default_value = ops.convert_to_tensor(
    679               "" if dense_types[i] == dtypes.string else 0,
    680               dtype=dense_types[i])
    681         else:
    682           # Reshape to a scalar to ensure user gets an error if they
    683           # provide a tensor that's not intended to be a padding value
    684           # (0 or 2+ elements).
    685           key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
    686           default_value = ops.convert_to_tensor(
    687               default_value, dtype=dense_types[i], name=key_name)
    688           default_value = array_ops.reshape(default_value, [])
    689       else:
    690         if default_value is None:
    691           default_value = constant_op.constant([], dtype=dense_types[i])
    692         elif not isinstance(default_value, ops.Tensor):
    693           key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
    694           default_value = ops.convert_to_tensor(
    695               default_value, dtype=dense_types[i], name=key_name)
    696           default_value = array_ops.reshape(default_value, dense_shape)
    697 
    698       dense_defaults_vec.append(default_value)
    699 
    700     # Finally, convert dense_shapes to TensorShapeProto
    701     dense_shapes = [shape.as_proto() for shape in dense_shapes]
    702 
    703     # pylint: disable=protected-access
    704     outputs = gen_parsing_ops._parse_example(
    705         serialized=serialized,
    706         names=names,
    707         dense_defaults=dense_defaults_vec,
    708         sparse_keys=sparse_keys,
    709         sparse_types=sparse_types,
    710         dense_keys=dense_keys,
    711         dense_shapes=dense_shapes,
    712         name=name)
    713     # pylint: enable=protected-access
    714 
    715     (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs
    716 
    717     sparse_tensors = [
    718         sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
    719         in zip(sparse_indices, sparse_values, sparse_shapes)]
    720 
    721     return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
    722 
    723 
    724 @tf_export("parse_single_example")
    725 def parse_single_example(serialized, features, name=None, example_names=None):
    726   """Parses a single `Example` proto.
    727 
    728   Similar to `parse_example`, except:
    729 
    730   For dense tensors, the returned `Tensor` is identical to the output of
    731   `parse_example`, except there is no batch dimension, the output shape is the
    732   same as the shape given in `dense_shape`.
    733 
    734   For `SparseTensor`s, the first (batch) column of the indices matrix is removed
    735   (the indices matrix is a column vector), the values vector is unchanged, and
    736   the first (`batch_size`) entry of the shape vector is removed (it is now a
    737   single element vector).
    738 
    739   One might see performance advantages by batching `Example` protos with
    740   `parse_example` instead of using this function directly.
    741 
    742   Args:
    743     serialized: A scalar string Tensor, a single serialized Example.
    744       See `_parse_single_example_raw` documentation for more details.
    745     features: A `dict` mapping feature keys to `FixedLenFeature` or
    746       `VarLenFeature` values.
    747     name: A name for this operation (optional).
    748     example_names: (Optional) A scalar string Tensor, the associated name.
    749       See `_parse_single_example_raw` documentation for more details.
    750 
    751   Returns:
    752     A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
    753 
    754   Raises:
    755     ValueError: if any feature is invalid.
    756   """
    757   if not features:
    758     raise ValueError("Missing features.")
    759   if example_names is None:
    760     return parse_single_example_v2(serialized, features, name)
    761   features = _prepend_none_dimension(features)
    762   (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults,
    763    dense_shapes) = _features_to_raw_params(
    764        features,
    765        [VarLenFeature, FixedLenFeature, FixedLenSequenceFeature, SparseFeature])
    766   outputs = _parse_single_example_raw(
    767       serialized, example_names, sparse_keys, sparse_types, dense_keys,
    768       dense_types, dense_defaults, dense_shapes, name)
    769   return _construct_sparse_tensors_for_sparse_features(features, outputs)
    770 
    771 
    772 def _parse_single_example_raw(serialized,
    773                               names=None,
    774                               sparse_keys=None,
    775                               sparse_types=None,
    776                               dense_keys=None,
    777                               dense_types=None,
    778                               dense_defaults=None,
    779                               dense_shapes=None,
    780                               name=None):
    781   """Parses a single `Example` proto.
    782 
    783   Args:
    784     serialized: A scalar string Tensor, a single serialized Example.
    785       See `_parse_example_raw` documentation for more details.
    786     names: (Optional) A scalar string Tensor, the associated name.
    787       See `_parse_example_raw` documentation for more details.
    788     sparse_keys: See `_parse_example_raw` documentation for more details.
    789     sparse_types: See `_parse_example_raw` documentation for more details.
    790     dense_keys: See `_parse_example_raw` documentation for more details.
    791     dense_types: See `_parse_example_raw` documentation for more details.
    792     dense_defaults: See `_parse_example_raw` documentation for more details.
    793     dense_shapes: See `_parse_example_raw` documentation for more details.
    794     name: A name for this operation (optional).
    795 
    796   Returns:
    797     A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
    798 
    799   Raises:
    800     ValueError: if any feature is invalid.
    801   """
    802   with ops.name_scope(name, "ParseSingleExample", [serialized, names]):
    803     serialized = ops.convert_to_tensor(serialized)
    804     serialized_shape = serialized.get_shape()
    805     if serialized_shape.ndims is not None:
    806       if serialized_shape.ndims != 0:
    807         raise ValueError("Input serialized must be a scalar")
    808     else:
    809       serialized = control_flow_ops.with_dependencies(
    810           [control_flow_ops.Assert(
    811               math_ops.equal(array_ops.rank(serialized), 0),
    812               ["Input serialized must be a scalar"],
    813               name="SerializedIsScalar")],
    814           serialized,
    815           name="SerializedDependencies")
    816     serialized = array_ops.expand_dims(serialized, 0)
    817     if names is not None:
    818       names = ops.convert_to_tensor(names)
    819       names_shape = names.get_shape()
    820       if names_shape.ndims is not None:
    821         if names_shape.ndims != 0:
    822           raise ValueError("Input names must be a scalar")
    823       else:
    824         names = control_flow_ops.with_dependencies(
    825             [control_flow_ops.Assert(
    826                 math_ops.equal(array_ops.rank(names), 0),
    827                 ["Input names must be a scalar"],
    828                 name="NamesIsScalar")],
    829             names,
    830             name="NamesDependencies")
    831       names = array_ops.expand_dims(names, 0)
    832 
    833     outputs = _parse_example_raw(
    834         serialized,
    835         names=names,
    836         sparse_keys=sparse_keys,
    837         sparse_types=sparse_types,
    838         dense_keys=dense_keys,
    839         dense_types=dense_types,
    840         dense_defaults=dense_defaults,
    841         dense_shapes=dense_shapes,
    842         name=name)
    843     if dense_keys is not None:
    844       for d in dense_keys:
    845         d_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", d)
    846         outputs[d] = array_ops.squeeze(
    847             outputs[d], [0], name="Squeeze_%s" % d_name)
    848     if sparse_keys is not None:
    849       for s in sparse_keys:
    850         s_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", s)
    851         outputs[s] = sparse_tensor.SparseTensor(
    852             array_ops.slice(outputs[s].indices,
    853                             [0, 1], [-1, -1], name="Slice_Indices_%s" % s_name),
    854             outputs[s].values,
    855             array_ops.slice(outputs[s].dense_shape,
    856                             [1], [-1], name="Squeeze_Shape_%s" % s_name))
    857     return outputs
    858 
    859 
    860 @tf_export("parse_single_sequence_example")
    861 def parse_single_sequence_example(
    862     serialized, context_features=None, sequence_features=None,
    863     example_name=None, name=None):
    864   # pylint: disable=line-too-long
    865   """Parses a single `SequenceExample` proto.
    866 
    867   Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
    868   proto given in `serialized`.
    869 
    870   This op parses a serialized sequence example into a tuple of dictionaries
    871   mapping keys to `Tensor` and `SparseTensor` objects respectively.
    872   The first dictionary contains mappings for keys appearing in
    873   `context_features`, and the second dictionary contains mappings for keys
    874   appearing in `sequence_features`.
    875 
    876   At least one of `context_features` and `sequence_features` must be provided
    877   and non-empty.
    878 
    879   The `context_features` keys are associated with a `SequenceExample` as a
    880   whole, independent of time / frame.  In contrast, the `sequence_features` keys
    881   provide a way to access variable-length data within the `FeatureList` section
    882   of the `SequenceExample` proto.  While the shapes of `context_features` values
    883   are fixed with respect to frame, the frame dimension (the first dimension)
    884   of `sequence_features` values may vary between `SequenceExample` protos,
    885   and even between `feature_list` keys within the same `SequenceExample`.
    886 
    887   `context_features` contains `VarLenFeature` and `FixedLenFeature` objects.
    888   Each `VarLenFeature` is mapped to a `SparseTensor`, and each `FixedLenFeature`
    889   is mapped to a `Tensor`, of the specified type, shape, and default value.
    890 
    891   `sequence_features` contains `VarLenFeature` and `FixedLenSequenceFeature`
    892   objects. Each `VarLenFeature` is mapped to a `SparseTensor`, and each
    893   `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type.
    894   The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`, where
    895   `T` is the length of the associated `FeatureList` in the `SequenceExample`.
    896   For instance, `FixedLenSequenceFeature([])` yields a scalar 1-D `Tensor` of
    897   static shape `[None]` and dynamic shape `[T]`, while
    898   `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor`
    899   of static shape `[None, k]` and dynamic shape `[T, k]`.
    900 
    901   Each `SparseTensor` corresponding to `sequence_features` represents a ragged
    902   vector.  Its indices are `[time, index]`, where `time` is the `FeatureList`
    903   entry and `index` is the value's index in the list of values associated with
    904   that time.
    905 
    906   `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature`
    907   entries with `allow_missing=True` are optional; otherwise, we will fail if
    908   that `Feature` or `FeatureList` is missing from any example in `serialized`.
    909 
    910   `example_name` may contain a descriptive name for the corresponding serialized
    911   proto. This may be useful for debugging purposes, but it has no effect on the
    912   output. If not `None`, `example_name` must be a scalar.
    913 
    914   Args:
    915     serialized: A scalar (0-D Tensor) of type string, a single binary
    916       serialized `SequenceExample` proto.
    917     context_features: A `dict` mapping feature keys to `FixedLenFeature` or
    918       `VarLenFeature` values. These features are associated with a
    919       `SequenceExample` as a whole.
    920     sequence_features: A `dict` mapping feature keys to
    921       `FixedLenSequenceFeature` or `VarLenFeature` values. These features are
    922       associated with data within the `FeatureList` section of the
    923       `SequenceExample` proto.
    924     example_name: A scalar (0-D Tensor) of strings (optional), the name of
    925       the serialized proto.
    926     name: A name for this operation (optional).
    927 
    928   Returns:
    929     A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
    930     The first dict contains the context key/values.
    931     The second dict contains the feature_list key/values.
    932 
    933   Raises:
    934     ValueError: if any feature is invalid.
    935   """
    936   # pylint: enable=line-too-long
    937   if not (context_features or sequence_features):
    938     raise ValueError("Missing features.")
    939   (context_sparse_keys, context_sparse_types, context_dense_keys,
    940    context_dense_types, context_dense_defaults,
    941    context_dense_shapes) = _features_to_raw_params(
    942        context_features, [VarLenFeature, FixedLenFeature])
    943   (feature_list_sparse_keys, feature_list_sparse_types,
    944    feature_list_dense_keys, feature_list_dense_types,
    945    feature_list_dense_defaults,
    946    feature_list_dense_shapes) = _features_to_raw_params(
    947        sequence_features, [VarLenFeature, FixedLenSequenceFeature])
    948   return _parse_single_sequence_example_raw(
    949       serialized, context_sparse_keys, context_sparse_types,
    950       context_dense_keys, context_dense_types, context_dense_defaults,
    951       context_dense_shapes, feature_list_sparse_keys,
    952       feature_list_sparse_types, feature_list_dense_keys,
    953       feature_list_dense_types, feature_list_dense_shapes,
    954       feature_list_dense_defaults, example_name, name)
    955 
    956 
    957 def _parse_single_sequence_example_raw(serialized,
    958                                        context_sparse_keys=None,
    959                                        context_sparse_types=None,
    960                                        context_dense_keys=None,
    961                                        context_dense_types=None,
    962                                        context_dense_defaults=None,
    963                                        context_dense_shapes=None,
    964                                        feature_list_sparse_keys=None,
    965                                        feature_list_sparse_types=None,
    966                                        feature_list_dense_keys=None,
    967                                        feature_list_dense_types=None,
    968                                        feature_list_dense_shapes=None,
    969                                        feature_list_dense_defaults=None,
    970                                        debug_name=None,
    971                                        name=None):
    972   """Parses a single `SequenceExample` proto.
    973 
    974   Args:
    975     serialized: A scalar (0-D Tensor) of type string, a single binary
    976       serialized `SequenceExample` proto.
    977     context_sparse_keys: A list of string keys in the `SequenceExample`'s
    978       features.  The results for these keys will be returned as
    979       `SparseTensor` objects.
    980     context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`.
    981       Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
    982       and `tf.string` (`BytesList`) are supported.
    983     context_dense_keys: A list of string keys in the examples' features.
    984       The results for these keys will be returned as `Tensor`s
    985     context_dense_types: A list of DTypes, same length as `context_dense_keys`.
    986       Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
    987       and `tf.string` (`BytesList`) are supported.
    988     context_dense_defaults: A dict mapping string keys to `Tensor`s.
    989       The keys of the dict must match the context_dense_keys of the feature.
    990     context_dense_shapes: A list of tuples, same length as `context_dense_keys`.
    991       The shape of the data for each context_dense feature referenced by
    992       `context_dense_keys`.  Required for any input tensors identified by
    993       `context_dense_keys` whose shapes are anything other than `[]` or `[1]`.
    994     feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s
    995       feature_lists.  The results for these keys will be returned as
    996       `SparseTensor` objects.
    997     feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`.
    998       Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
    999       and `tf.string` (`BytesList`) are supported.
   1000     feature_list_dense_keys: A list of string keys in the `SequenceExample`'s
   1001       features_lists. The results for these keys will be returned as `Tensor`s.
   1002     feature_list_dense_types: A list of `DTypes`, same length as
   1003       `feature_list_dense_keys`.  Only `tf.float32` (`FloatList`),
   1004       `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported.
   1005     feature_list_dense_shapes: A list of tuples, same length as
   1006       `feature_list_dense_keys`.  The shape of the data for each
   1007       `FeatureList` feature referenced by `feature_list_dense_keys`.
   1008     feature_list_dense_defaults: A dict mapping key strings to values.
   1009       The only currently allowed value is `None`.  Any key appearing
   1010       in this dict with value `None` is allowed to be missing from the
   1011       `SequenceExample`.  If missing, the key is treated as zero-length.
   1012     debug_name: A scalar (0-D Tensor) of strings (optional), the name of
   1013       the serialized proto.
   1014     name: A name for this operation (optional).
   1015 
   1016   Returns:
   1017     A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s.
   1018     The first dict contains the context key/values.
   1019     The second dict contains the feature_list key/values.
   1020 
   1021   Raises:
   1022     ValueError: If context_sparse and context_dense key sets intersect,
   1023       if input lengths do not match up, or if a value in
   1024       feature_list_dense_defaults is not None.
   1025     TypeError: if feature_list_dense_defaults is not either None or a dict.
   1026   """
   1027   with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]):
   1028     context_dense_defaults = (
   1029         {} if context_dense_defaults is None else context_dense_defaults)
   1030     context_sparse_keys = (
   1031         [] if context_sparse_keys is None else context_sparse_keys)
   1032     context_sparse_types = (
   1033         [] if context_sparse_types is None else context_sparse_types)
   1034     context_dense_keys = (
   1035         [] if context_dense_keys is None else context_dense_keys)
   1036     context_dense_types = (
   1037         [] if context_dense_types is None else context_dense_types)
   1038     context_dense_shapes = (
   1039         [[]] * len(context_dense_keys)
   1040         if context_dense_shapes is None else context_dense_shapes)
   1041     feature_list_sparse_keys = (
   1042         [] if feature_list_sparse_keys is None else feature_list_sparse_keys)
   1043     feature_list_sparse_types = (
   1044         [] if feature_list_sparse_types is None else feature_list_sparse_types)
   1045     feature_list_dense_keys = (
   1046         [] if feature_list_dense_keys is None else feature_list_dense_keys)
   1047     feature_list_dense_types = (
   1048         [] if feature_list_dense_types is None else feature_list_dense_types)
   1049     feature_list_dense_shapes = (
   1050         [[]] * len(feature_list_dense_keys)
   1051         if feature_list_dense_shapes is None else feature_list_dense_shapes)
   1052     feature_list_dense_defaults = (
   1053         dict() if feature_list_dense_defaults is None
   1054         else feature_list_dense_defaults)
   1055     debug_name = "" if debug_name is None else debug_name
   1056 
   1057     # Internal
   1058     feature_list_dense_missing_assumed_empty = []
   1059 
   1060     num_context_dense = len(context_dense_keys)
   1061     num_feature_list_dense = len(feature_list_dense_keys)
   1062     num_context_sparse = len(context_sparse_keys)
   1063     num_feature_list_sparse = len(feature_list_sparse_keys)
   1064 
   1065     if len(context_dense_shapes) != num_context_dense:
   1066       raise ValueError(
   1067           "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d"
   1068           % (len(context_dense_shapes), num_context_dense))
   1069     if len(context_dense_types) != num_context_dense:
   1070       raise ValueError(
   1071           "len(context_dense_types) != len(num_context_dense): %d vs. %d"
   1072           % (len(context_dense_types), num_context_dense))
   1073     if len(feature_list_dense_shapes) != num_feature_list_dense:
   1074       raise ValueError(
   1075           "len(feature_list_dense_shapes) != len(feature_list_dense_keys): "
   1076           "%d vs. %d" % (len(feature_list_dense_shapes),
   1077                          num_feature_list_dense))
   1078     if len(feature_list_dense_types) != num_feature_list_dense:
   1079       raise ValueError(
   1080           "len(feature_list_dense_types) != len(num_feature_list_dense):"
   1081           "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense))
   1082     if len(context_sparse_types) != num_context_sparse:
   1083       raise ValueError(
   1084           "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d"
   1085           % (len(context_sparse_types), num_context_sparse))
   1086     if len(feature_list_sparse_types) != num_feature_list_sparse:
   1087       raise ValueError(
   1088           "len(feature_list_sparse_types) != len(feature_list_sparse_keys): "
   1089           "%d vs. %d"
   1090           % (len(feature_list_sparse_types), num_feature_list_sparse))
   1091     if (num_context_dense + num_context_sparse
   1092         + num_feature_list_dense + num_feature_list_sparse) == 0:
   1093       raise ValueError(
   1094           "Must provide at least one context_sparse key, context_dense key, "
   1095           ", feature_list_sparse key, or feature_list_dense key")
   1096     if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)):
   1097       raise ValueError(
   1098           "context_dense and context_sparse keys must not intersect; "
   1099           "intersection: %s" %
   1100           set(context_dense_keys).intersection(set(context_sparse_keys)))
   1101     if not set(feature_list_dense_keys).isdisjoint(
   1102         set(feature_list_sparse_keys)):
   1103       raise ValueError(
   1104           "feature_list_dense and feature_list_sparse keys must not intersect; "
   1105           "intersection: %s" %
   1106           set(feature_list_dense_keys).intersection(
   1107               set(feature_list_sparse_keys)))
   1108     if not isinstance(feature_list_dense_defaults, dict):
   1109       raise TypeError("feature_list_dense_defaults must be a dict")
   1110     for k, v in feature_list_dense_defaults.items():
   1111       if v is not None:
   1112         raise ValueError("Value feature_list_dense_defaults[%s] must be None"
   1113                          % k)
   1114       feature_list_dense_missing_assumed_empty.append(k)
   1115 
   1116     context_dense_defaults_vec = []
   1117     for i, key in enumerate(context_dense_keys):
   1118       default_value = context_dense_defaults.get(key)
   1119       if default_value is None:
   1120         default_value = constant_op.constant([], dtype=context_dense_types[i])
   1121       elif not isinstance(default_value, ops.Tensor):
   1122         key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
   1123         default_value = ops.convert_to_tensor(
   1124             default_value, dtype=context_dense_types[i], name=key_name)
   1125         default_value = array_ops.reshape(
   1126             default_value, context_dense_shapes[i])
   1127 
   1128       context_dense_defaults_vec.append(default_value)
   1129 
   1130     context_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
   1131                             for shape in context_dense_shapes]
   1132     feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto()
   1133                                  for shape in feature_list_dense_shapes]
   1134 
   1135     # pylint: disable=protected-access
   1136     outputs = gen_parsing_ops._parse_single_sequence_example(
   1137         serialized=serialized,
   1138         debug_name=debug_name,
   1139         context_dense_defaults=context_dense_defaults_vec,
   1140         context_sparse_keys=context_sparse_keys,
   1141         context_sparse_types=context_sparse_types,
   1142         context_dense_keys=context_dense_keys,
   1143         context_dense_shapes=context_dense_shapes,
   1144         feature_list_sparse_keys=feature_list_sparse_keys,
   1145         feature_list_sparse_types=feature_list_sparse_types,
   1146         feature_list_dense_keys=feature_list_dense_keys,
   1147         feature_list_dense_types=feature_list_dense_types,
   1148         feature_list_dense_shapes=feature_list_dense_shapes,
   1149         feature_list_dense_missing_assumed_empty=(
   1150             feature_list_dense_missing_assumed_empty),
   1151         name=name)
   1152     # pylint: enable=protected-access
   1153 
   1154     (context_sparse_indices, context_sparse_values,
   1155      context_sparse_shapes, context_dense_values,
   1156      feature_list_sparse_indices, feature_list_sparse_values,
   1157      feature_list_sparse_shapes, feature_list_dense_values) = outputs
   1158 
   1159     context_sparse_tensors = [
   1160         sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
   1161         in zip(context_sparse_indices,
   1162                context_sparse_values,
   1163                context_sparse_shapes)]
   1164 
   1165     feature_list_sparse_tensors = [
   1166         sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape)
   1167         in zip(feature_list_sparse_indices,
   1168                feature_list_sparse_values,
   1169                feature_list_sparse_shapes)]
   1170 
   1171     context_output = dict(
   1172         zip(context_sparse_keys + context_dense_keys,
   1173             context_sparse_tensors + context_dense_values))
   1174     feature_list_output = dict(
   1175         zip(feature_list_sparse_keys + feature_list_dense_keys,
   1176             feature_list_sparse_tensors + feature_list_dense_values))
   1177 
   1178     return (context_output, feature_list_output)
   1179 
   1180 
   1181 # Swap `name` and `na_value` for backward compatibility.
   1182 @tf_export("decode_csv")
   1183 def decode_csv(records, record_defaults, field_delim=",",
   1184                use_quote_delim=True, name=None, na_value=""):
   1185   # pylint: disable=protected-access
   1186   """Convert CSV records to tensors. Each column maps to one tensor.
   1187 
   1188   RFC 4180 format is expected for the CSV records.
   1189   (https://tools.ietf.org/html/rfc4180)
   1190   Note that we allow leading and trailing spaces with int or float field.
   1191 
   1192   Args:
   1193     records: A `Tensor` of type `string`.
   1194       Each string is a record/row in the csv and all records should have
   1195       the same format.
   1196     record_defaults: A list of `Tensor` objects with specific types.
   1197       Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`.
   1198       One tensor per column of the input record, with either a
   1199       scalar default value for that column or empty if the column is required.
   1200     field_delim: An optional `string`. Defaults to `","`.
   1201       char delimiter to separate fields in a record.
   1202     use_quote_delim: An optional `bool`. Defaults to `True`.
   1203       If false, treats double quotation marks as regular
   1204       characters inside of the string fields (ignoring RFC 4180, Section 2,
   1205       Bullet 5).
   1206     name: A name for the operation (optional).
   1207     na_value: Additional string to recognize as NA/NaN.
   1208 
   1209   Returns:
   1210     A list of `Tensor` objects. Has the same type as `record_defaults`.
   1211     Each tensor will have the same shape as records.
   1212   """
   1213   # TODO(martinwicke), remove the wrapper when new Python API generator is done.
   1214   return gen_parsing_ops._decode_csv(
   1215       records=records, record_defaults=record_defaults,
   1216       field_delim=field_delim, use_quote_delim=use_quote_delim,
   1217       na_value=na_value, name=name)
   1218   # pylint: enable=protected-access
   1219 
   1220 
   1221 # TODO(b/70890287): Combine the implementation of this op and
   1222 # `parse_single_example()` after 1/10/2018.
   1223 def parse_single_example_v2(serialized, features, name=None):
   1224   # pylint: disable=line-too-long
   1225   """Parses an `Example` proto into a `dict` of tensors.
   1226 
   1227   Parses a serialized
   1228   [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto)
   1229   proto given in `serialized`.
   1230 
   1231   This op parses serialized examples into a dictionary mapping keys to `Tensor`
   1232   and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`,
   1233   `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature`
   1234   and `SparseFeature` is mapped to a `SparseTensor`, and each
   1235   `FixedLenFeature` is mapped to a `Tensor`.
   1236 
   1237   Each `VarLenFeature` maps to a `SparseTensor` of the specified type
   1238   representing a ragged matrix. Its indices are `[index]` where
   1239   `index` is the value's index in the list of values associated with
   1240   that feature and example.
   1241 
   1242   Each `SparseFeature` maps to a `SparseTensor` of the specified type
   1243   representing a Tensor of `dense_shape` `SparseFeature.size`.
   1244   Its `values` come from the feature in the examples with key `value_key`.
   1245   A `values[i]` comes from a position `k` in the feature of an example at batch
   1246   entry `batch`. This positional information is recorded in `indices[i]` as
   1247   `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of
   1248   the feature in the example at with key `SparseFeature.index_key[j]`.
   1249   In other words, we split the indices (except the first index indicating the
   1250   batch entry) of a `SparseTensor` by dimension into different features of the
   1251   `Example`. Due to its complexity a `VarLenFeature` should be preferred over a
   1252   `SparseFeature` whenever possible.
   1253 
   1254   Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or
   1255   `tf.float32` if not specified) and shape `df.shape`.
   1256 
   1257   `FixedLenFeature` entries with a `default_value` are optional. With no default
   1258   value, we will fail if that `Feature` is missing from any example in
   1259   `serialized`.
   1260 
   1261   Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type
   1262   (or `tf.float32` if not specified) and shape `(None,) + df.shape`.
   1263 
   1264   Args:
   1265     serialized: A scalar (0-D Tensor) string, a serialized `Example` proto.
   1266     features: A `dict` mapping feature keys to `FixedLenFeature`,
   1267       `VarLenFeature`, and `SparseFeature` values.
   1268     name: A name for this operation (optional).
   1269 
   1270   Returns:
   1271     A `dict` mapping feature keys to `Tensor` and `SparseTensor` values.
   1272 
   1273   Raises:
   1274     ValueError: if any feature is invalid.
   1275   """
   1276   if not features:
   1277     raise ValueError("Missing: features was %s." % features)
   1278   features = _prepend_none_dimension(features)
   1279   (sparse_keys, sparse_types, dense_keys, dense_types,
   1280    dense_defaults, dense_shapes) = _features_to_raw_params(
   1281        features,
   1282        [VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature])
   1283   outputs = _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types,
   1284                                          dense_keys, dense_types,
   1285                                          dense_defaults, dense_shapes, name)
   1286   return _construct_sparse_tensors_for_sparse_features(features, outputs)
   1287 
   1288 
   1289 def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types,
   1290                                  dense_keys, dense_types, dense_defaults,
   1291                                  dense_shapes, name):
   1292   """Parses `Example` protos.
   1293 
   1294   Args:
   1295     serialized: A scalar (0-D Tensor) string, containing a binary
   1296       serialized `Example` proto.
   1297     sparse_keys: A list of string keys in the examples' features.
   1298       The results for these keys will be returned as `SparseTensor` objects.
   1299     sparse_types: A list of `DTypes` of the same length as `sparse_keys`.
   1300       Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
   1301       and `tf.string` (`BytesList`) are supported.
   1302     dense_keys: A list of string keys in the examples' features.
   1303       The results for these keys will be returned as `Tensor`s
   1304     dense_types: A list of DTypes of the same length as `dense_keys`.
   1305       Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`),
   1306       and `tf.string` (`BytesList`) are supported.
   1307     dense_defaults: A dict mapping string keys to `Tensor`s.
   1308       The keys of the dict must match the dense_keys of the feature.
   1309     dense_shapes: A list of tuples with the same length as `dense_keys`.
   1310       The shape of the data for each dense feature referenced by `dense_keys`.
   1311       Required for any input tensors identified by `dense_keys`.  Must be
   1312       either fully defined, or may contain an unknown first dimension.
   1313       An unknown first dimension means the feature is treated as having
   1314       a variable number of blocks, and the output shape along this dimension
   1315       is considered unknown at graph build time.  Padding is applied for
   1316       minibatch elements smaller than the maximum number of blocks for the
   1317       given feature along this dimension.
   1318     name: A name for this operation (optional).
   1319 
   1320   Returns:
   1321     A `dict` mapping keys to `Tensor`s and `SparseTensor`s.
   1322 
   1323   Raises:
   1324     ValueError: If sparse and dense key sets intersect, or input lengths do not
   1325       match up.
   1326   """
   1327   with ops.name_scope(name, "ParseSingleExample", [serialized]):
   1328     serialized = ops.convert_to_tensor(serialized, name="serialized")
   1329     dense_defaults = collections.OrderedDict(
   1330     ) if dense_defaults is None else dense_defaults
   1331     sparse_keys = [] if sparse_keys is None else sparse_keys
   1332     sparse_types = [] if sparse_types is None else sparse_types
   1333     dense_keys = [] if dense_keys is None else dense_keys
   1334     dense_types = [] if dense_types is None else dense_types
   1335     dense_shapes = ([[]] * len(dense_keys)
   1336                     if dense_shapes is None else dense_shapes)
   1337 
   1338     num_dense = len(dense_keys)
   1339     num_sparse = len(sparse_keys)
   1340 
   1341     if len(dense_shapes) != num_dense:
   1342       raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" %
   1343                        (len(dense_shapes), num_dense))
   1344     if len(dense_types) != num_dense:
   1345       raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" %
   1346                        (len(dense_types), num_dense))
   1347     if len(sparse_types) != num_sparse:
   1348       raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" %
   1349                        (len(sparse_types), num_sparse))
   1350     if num_dense + num_sparse == 0:
   1351       raise ValueError("Must provide at least one sparse key or dense key")
   1352     if not set(dense_keys).isdisjoint(set(sparse_keys)):
   1353       raise ValueError(
   1354           "Dense and sparse keys must not intersect; intersection: %s" %
   1355           set(dense_keys).intersection(set(sparse_keys)))
   1356 
   1357     # Convert dense_shapes to TensorShape object.
   1358     dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes]
   1359 
   1360     dense_defaults_vec = []
   1361     for i, key in enumerate(dense_keys):
   1362       default_value = dense_defaults.get(key)
   1363       dense_shape = dense_shapes[i]
   1364       if (dense_shape.ndims is not None and dense_shape.ndims > 0 and
   1365           dense_shape[0].value is None):
   1366         # Variable stride dense shape, the default value should be a
   1367         # scalar padding value
   1368         if default_value is None:
   1369           default_value = ops.convert_to_tensor(
   1370               "" if dense_types[i] == dtypes.string else 0,
   1371               dtype=dense_types[i])
   1372         else:
   1373           # Reshape to a scalar to ensure user gets an error if they
   1374           # provide a tensor that's not intended to be a padding value
   1375           # (0 or 2+ elements).
   1376           key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
   1377           default_value = ops.convert_to_tensor(
   1378               default_value, dtype=dense_types[i], name=key_name)
   1379           default_value = array_ops.reshape(default_value, [])
   1380       else:
   1381         if default_value is None:
   1382           default_value = constant_op.constant([], dtype=dense_types[i])
   1383         elif not isinstance(default_value, ops.Tensor):
   1384           key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key)
   1385           default_value = ops.convert_to_tensor(
   1386               default_value, dtype=dense_types[i], name=key_name)
   1387           default_value = array_ops.reshape(default_value, dense_shape)
   1388 
   1389       dense_defaults_vec.append(default_value)
   1390 
   1391     # Finally, convert dense_shapes to TensorShapeProto
   1392     dense_shapes = [shape.as_proto() for shape in dense_shapes]
   1393 
   1394     # pylint: disable=protected-access
   1395     outputs = gen_parsing_ops.parse_single_example(
   1396         serialized=serialized,
   1397         dense_defaults=dense_defaults_vec,
   1398         num_sparse=len(sparse_keys),
   1399         sparse_keys=sparse_keys,
   1400         sparse_types=sparse_types,
   1401         dense_keys=dense_keys,
   1402         dense_shapes=dense_shapes,
   1403         name=name)
   1404     # pylint: enable=protected-access
   1405 
   1406     (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs
   1407 
   1408     sparse_tensors = [
   1409         sparse_tensor.SparseTensor(ix, val, shape)
   1410         for (ix, val,
   1411              shape) in zip(sparse_indices, sparse_values, sparse_shapes)
   1412     ]
   1413 
   1414     return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values))
   1415