1 # Copyright 2015 The TensorFlow Authors. All Rights Reserved. 2 # 3 # Licensed under the Apache License, Version 2.0 (the "License"); 4 # you may not use this file except in compliance with the License. 5 # You may obtain a copy of the License at 6 # 7 # http://www.apache.org/licenses/LICENSE-2.0 8 # 9 # Unless required by applicable law or agreed to in writing, software 10 # distributed under the License is distributed on an "AS IS" BASIS, 11 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 # See the License for the specific language governing permissions and 13 # limitations under the License. 14 # ============================================================================== 15 16 """Parsing Ops.""" 17 from __future__ import absolute_import 18 from __future__ import division 19 from __future__ import print_function 20 21 import collections 22 import re 23 24 from tensorflow.python.framework import constant_op 25 from tensorflow.python.framework import dtypes 26 from tensorflow.python.framework import ops 27 from tensorflow.python.framework import sparse_tensor 28 from tensorflow.python.framework import tensor_shape 29 from tensorflow.python.ops import array_ops 30 from tensorflow.python.ops import control_flow_ops 31 from tensorflow.python.ops import gen_parsing_ops 32 from tensorflow.python.ops import math_ops 33 from tensorflow.python.ops import sparse_ops 34 # go/tf-wildcard-import 35 # pylint: disable=wildcard-import,undefined-variable 36 from tensorflow.python.ops.gen_parsing_ops import * 37 # pylint: enable=wildcard-import,undefined-variable 38 from tensorflow.python.platform import tf_logging 39 from tensorflow.python.util.tf_export import tf_export 40 41 42 ops.NotDifferentiable("DecodeRaw") 43 ops.NotDifferentiable("ParseTensor") 44 ops.NotDifferentiable("SerializeTensor") 45 ops.NotDifferentiable("StringToNumber") 46 47 48 @tf_export("VarLenFeature") 49 class VarLenFeature(collections.namedtuple("VarLenFeature", ["dtype"])): 50 """Configuration for parsing a variable-length input feature. 51 52 Fields: 53 dtype: Data type of input. 54 """ 55 pass 56 57 58 @tf_export("SparseFeature") 59 class SparseFeature( 60 collections.namedtuple( 61 "SparseFeature", 62 ["index_key", "value_key", "dtype", "size", "already_sorted"])): 63 """Configuration for parsing a sparse input feature from an `Example`. 64 65 Note, preferably use `VarLenFeature` (possibly in combination with a 66 `SequenceExample`) in order to parse out `SparseTensor`s instead of 67 `SparseFeature` due to its simplicity. 68 69 Closely mimicking the `SparseTensor` that will be obtained by parsing an 70 `Example` with a `SparseFeature` config, a `SparseFeature` contains a 71 72 * `value_key`: The name of key for a `Feature` in the `Example` whose parsed 73 `Tensor` will be the resulting `SparseTensor.values`. 74 75 * `index_key`: A list of names - one for each dimension in the resulting 76 `SparseTensor` whose `indices[i][dim]` indicating the position of 77 the `i`-th value in the `dim` dimension will be equal to the `i`-th value in 78 the Feature with key named `index_key[dim]` in the `Example`. 79 80 * `size`: A list of ints for the resulting `SparseTensor.dense_shape`. 81 82 For example, we can represent the following 2D `SparseTensor` 83 84 ```python 85 SparseTensor(indices=[[3, 1], [20, 0]], 86 values=[0.5, -1.0] 87 dense_shape=[100, 3]) 88 ``` 89 90 with an `Example` input proto 91 92 ```python 93 features { 94 feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } } 95 feature { key: "ix0" value { int64_list { value: [ 3, 20 ] } } } 96 feature { key: "ix1" value { int64_list { value: [ 1, 0 ] } } } 97 } 98 ``` 99 100 and `SparseFeature` config with 2 `index_key`s 101 102 ```python 103 SparseFeature(index_key=["ix0", "ix1"], 104 value_key="val", 105 dtype=tf.float32, 106 size=[100, 3]) 107 ``` 108 109 Fields: 110 index_key: A single string name or a list of string names of index features. 111 For each key the underlying feature's type must be `int64` and its length 112 must always match that of the `value_key` feature. 113 To represent `SparseTensor`s with a `dense_shape` of `rank` higher than 1 114 a list of length `rank` should be used. 115 value_key: Name of value feature. The underlying feature's type must 116 be `dtype` and its length must always match that of all the `index_key`s' 117 features. 118 dtype: Data type of the `value_key` feature. 119 size: A Python int or list thereof specifying the dense shape. Should be a 120 list if and only if `index_key` is a list. In that case the list must be 121 equal to the length of `index_key`. Each for each entry `i` all values in 122 the `index_key`[i] feature must be in `[0, size[i])`. 123 already_sorted: A Python boolean to specify whether the values in 124 `value_key` are already sorted by their index position. If so skip 125 sorting. False by default (optional). 126 """ 127 128 def __new__(cls, index_key, value_key, dtype, size, already_sorted=False): 129 return super(SparseFeature, cls).__new__( 130 cls, index_key, value_key, dtype, size, already_sorted) 131 132 133 @tf_export("FixedLenFeature") 134 class FixedLenFeature(collections.namedtuple( 135 "FixedLenFeature", ["shape", "dtype", "default_value"])): 136 """Configuration for parsing a fixed-length input feature. 137 138 To treat sparse input as dense, provide a `default_value`; otherwise, 139 the parse functions will fail on any examples missing this feature. 140 141 Fields: 142 shape: Shape of input data. 143 dtype: Data type of input. 144 default_value: Value to be used if an example is missing this feature. It 145 must be compatible with `dtype` and of the specified `shape`. 146 """ 147 148 def __new__(cls, shape, dtype, default_value=None): 149 return super(FixedLenFeature, cls).__new__( 150 cls, shape, dtype, default_value) 151 152 153 @tf_export("FixedLenSequenceFeature") 154 class FixedLenSequenceFeature(collections.namedtuple( 155 "FixedLenSequenceFeature", 156 ["shape", "dtype", "allow_missing", "default_value"])): 157 """Configuration for parsing a variable-length input feature into a `Tensor`. 158 159 The resulting `Tensor` of parsing a single `SequenceExample` or `Example` has 160 a static `shape` of `[None] + shape` and the specified `dtype`. 161 The resulting `Tensor` of parsing a `batch_size` many `Example`s has 162 a static `shape` of `[batch_size, None] + shape` and the specified `dtype`. 163 The entries in the `batch` from different `Examples` will be padded with 164 `default_value` to the maximum length present in the `batch`. 165 166 To treat a sparse input as dense, provide `allow_missing=True`; otherwise, 167 the parse functions will fail on any examples missing this feature. 168 169 Fields: 170 shape: Shape of input data for dimension 2 and higher. First dimension is 171 of variable length `None`. 172 dtype: Data type of input. 173 allow_missing: Whether to allow this feature to be missing from a feature 174 list item. Is available only for parsing `SequenceExample` not for 175 parsing `Examples`. 176 default_value: Scalar value to be used to pad multiple `Example`s to their 177 maximum length. Irrelevant for parsing a single `Example` or 178 `SequenceExample`. Defaults to "" for dtype string and 0 otherwise 179 (optional). 180 """ 181 182 def __new__(cls, shape, dtype, allow_missing=False, default_value=None): 183 return super(FixedLenSequenceFeature, cls).__new__( 184 cls, shape, dtype, allow_missing, default_value) 185 186 187 def _features_to_raw_params(features, types): 188 """Split feature tuples into raw params used by `gen_parsing_ops`. 189 190 Args: 191 features: A `dict` mapping feature keys to objects of a type in `types`. 192 types: Type of features to allow, among `FixedLenFeature`, `VarLenFeature`, 193 `SparseFeature`, and `FixedLenSequenceFeature`. 194 195 Returns: 196 Tuple of `sparse_keys`, `sparse_types`, `dense_keys`, `dense_types`, 197 `dense_defaults`, `dense_shapes`. 198 199 Raises: 200 ValueError: if `features` contains an item not in `types`, or an invalid 201 feature. 202 """ 203 sparse_keys = [] 204 sparse_types = [] 205 dense_keys = [] 206 dense_types = [] 207 # When the graph is built twice, multiple dense_defaults in a normal dict 208 # could come out in different orders. This will fail the _e2e_test which 209 # expects exactly the same graph. 210 # OrderedDict which preserves the order can solve the problem. 211 dense_defaults = collections.OrderedDict() 212 dense_shapes = [] 213 if features: 214 # NOTE: We iterate over sorted keys to keep things deterministic. 215 for key in sorted(features.keys()): 216 feature = features[key] 217 if isinstance(feature, VarLenFeature): 218 if VarLenFeature not in types: 219 raise ValueError("Unsupported VarLenFeature %s." % feature) 220 if not feature.dtype: 221 raise ValueError("Missing type for feature %s." % key) 222 sparse_keys.append(key) 223 sparse_types.append(feature.dtype) 224 elif isinstance(feature, SparseFeature): 225 if SparseFeature not in types: 226 raise ValueError("Unsupported SparseFeature %s." % feature) 227 228 if not feature.index_key: 229 raise ValueError( 230 "Missing index_key for SparseFeature %s." % feature) 231 if not feature.value_key: 232 raise ValueError( 233 "Missing value_key for SparseFeature %s." % feature) 234 if not feature.dtype: 235 raise ValueError("Missing type for feature %s." % key) 236 index_keys = feature.index_key 237 if isinstance(index_keys, str): 238 index_keys = [index_keys] 239 elif len(index_keys) > 1: 240 tf_logging.warning("SparseFeature is a complicated feature config " 241 "and should only be used after careful " 242 "consideration of VarLenFeature.") 243 for index_key in sorted(index_keys): 244 if index_key in sparse_keys: 245 dtype = sparse_types[sparse_keys.index(index_key)] 246 if dtype != dtypes.int64: 247 raise ValueError("Conflicting type %s vs int64 for feature %s." % 248 (dtype, index_key)) 249 else: 250 sparse_keys.append(index_key) 251 sparse_types.append(dtypes.int64) 252 if feature.value_key in sparse_keys: 253 dtype = sparse_types[sparse_keys.index(feature.value_key)] 254 if dtype != feature.dtype: 255 raise ValueError("Conflicting type %s vs %s for feature %s." % ( 256 dtype, feature.dtype, feature.value_key)) 257 else: 258 sparse_keys.append(feature.value_key) 259 sparse_types.append(feature.dtype) 260 elif isinstance(feature, FixedLenFeature): 261 if FixedLenFeature not in types: 262 raise ValueError("Unsupported FixedLenFeature %s." % feature) 263 if not feature.dtype: 264 raise ValueError("Missing type for feature %s." % key) 265 if feature.shape is None: 266 raise ValueError("Missing shape for feature %s." % key) 267 feature_tensor_shape = tensor_shape.as_shape(feature.shape) 268 if (feature.shape and feature_tensor_shape.ndims and 269 feature_tensor_shape.dims[0].value is None): 270 raise ValueError("First dimension of shape for feature %s unknown. " 271 "Consider using FixedLenSequenceFeature." % key) 272 if (feature.shape is not None and 273 not feature_tensor_shape.is_fully_defined()): 274 raise ValueError("All dimensions of shape for feature %s need to be " 275 "known but received %s." % (key, str(feature.shape))) 276 dense_keys.append(key) 277 dense_shapes.append(feature.shape) 278 dense_types.append(feature.dtype) 279 if feature.default_value is not None: 280 dense_defaults[key] = feature.default_value 281 elif isinstance(feature, FixedLenSequenceFeature): 282 if FixedLenSequenceFeature not in types: 283 raise ValueError("Unsupported FixedLenSequenceFeature %s." % feature) 284 if not feature.dtype: 285 raise ValueError("Missing type for feature %s." % key) 286 if feature.shape is None: 287 raise ValueError("Missing shape for feature %s." % key) 288 dense_keys.append(key) 289 dense_shapes.append(feature.shape) 290 dense_types.append(feature.dtype) 291 if feature.allow_missing: 292 dense_defaults[key] = None 293 if feature.default_value is not None: 294 dense_defaults[key] = feature.default_value 295 else: 296 raise ValueError("Invalid feature %s:%s." % (key, feature)) 297 return ( 298 sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults, 299 dense_shapes) 300 301 302 def _construct_sparse_tensors_for_sparse_features(features, tensor_dict): 303 """Merges SparseTensors of indices and values of SparseFeatures. 304 305 Constructs new dict based on `tensor_dict`. For `SparseFeatures` in the values 306 of `features` expects their `index_key`s and `index_value`s to be present in 307 `tensor_dict` mapping to `SparseTensor`s. Constructs a single `SparseTensor` 308 from them, and adds it to the result with the key from `features`. 309 Copies other keys and values from `tensor_dict` with keys present in 310 `features`. 311 312 Args: 313 features: A `dict` mapping feature keys to `SparseFeature` values. 314 Values of other types will be ignored. 315 tensor_dict: A `dict` mapping feature keys to `Tensor` and `SparseTensor` 316 values. Expected to contain keys of the `SparseFeature`s' `index_key`s and 317 `value_key`s and mapping them to `SparseTensor`s. 318 Returns: 319 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. Similar 320 to `tensor_dict` except each `SparseFeature`s in `features` results in a 321 single `SparseTensor`. 322 """ 323 tensor_dict = dict(tensor_dict) # Do not modify argument passed in. 324 # Construct SparseTensors for SparseFeatures. 325 for key in sorted(features.keys()): 326 feature = features[key] 327 if isinstance(feature, SparseFeature): 328 if isinstance(feature.index_key, str): 329 sp_ids = tensor_dict[feature.index_key] 330 else: 331 sp_ids = [tensor_dict[index_key] for index_key in feature.index_key] 332 sp_values = tensor_dict[feature.value_key] 333 tensor_dict[key] = sparse_ops.sparse_merge( 334 sp_ids, 335 sp_values, 336 vocab_size=feature.size, 337 already_sorted=feature.already_sorted) 338 # Remove tensors from dictionary that were only used to construct 339 # SparseTensors for SparseFeature. 340 for key in set(tensor_dict) - set(features): 341 del tensor_dict[key] 342 return tensor_dict 343 344 345 def _prepend_none_dimension(features): 346 if features: 347 modified_features = dict(features) # Create a copy to modify 348 for key, feature in features.items(): 349 if isinstance(feature, FixedLenSequenceFeature): 350 if not feature.allow_missing: 351 raise ValueError("Unsupported: FixedLenSequenceFeature requires " 352 "allow_missing to be True.") 353 modified_features[key] = FixedLenSequenceFeature( 354 [None] + list(feature.shape), 355 feature.dtype, 356 feature.allow_missing, 357 feature.default_value) 358 return modified_features 359 else: 360 return features 361 362 363 @tf_export("parse_example") 364 def parse_example(serialized, features, name=None, example_names=None): 365 # pylint: disable=line-too-long 366 """Parses `Example` protos into a `dict` of tensors. 367 368 Parses a number of serialized [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 369 protos given in `serialized`. We refer to `serialized` as a batch with 370 `batch_size` many entries of individual `Example` protos. 371 372 `example_names` may contain descriptive names for the corresponding serialized 373 protos. These may be useful for debugging purposes, but they have no effect on 374 the output. If not `None`, `example_names` must be the same length as 375 `serialized`. 376 377 This op parses serialized examples into a dictionary mapping keys to `Tensor` 378 and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`, 379 `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature` 380 and `SparseFeature` is mapped to a `SparseTensor`, and each 381 `FixedLenFeature` is mapped to a `Tensor`. 382 383 Each `VarLenFeature` maps to a `SparseTensor` of the specified type 384 representing a ragged matrix. Its indices are `[batch, index]` where `batch` 385 identifies the example in `serialized`, and `index` is the value's index in 386 the list of values associated with that feature and example. 387 388 Each `SparseFeature` maps to a `SparseTensor` of the specified type 389 representing a Tensor of `dense_shape` `[batch_size] + SparseFeature.size`. 390 Its `values` come from the feature in the examples with key `value_key`. 391 A `values[i]` comes from a position `k` in the feature of an example at batch 392 entry `batch`. This positional information is recorded in `indices[i]` as 393 `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of 394 the feature in the example at with key `SparseFeature.index_key[j]`. 395 In other words, we split the indices (except the first index indicating the 396 batch entry) of a `SparseTensor` by dimension into different features of the 397 `Example`. Due to its complexity a `VarLenFeature` should be preferred over a 398 `SparseFeature` whenever possible. 399 400 Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or 401 `tf.float32` if not specified) and shape `(serialized.size(),) + df.shape`. 402 403 `FixedLenFeature` entries with a `default_value` are optional. With no default 404 value, we will fail if that `Feature` is missing from any example in 405 `serialized`. 406 407 Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type 408 (or `tf.float32` if not specified) and shape 409 `(serialized.size(), None) + df.shape`. 410 All examples in `serialized` will be padded with `default_value` along the 411 second dimension. 412 413 Examples: 414 415 For example, if one expects a `tf.float32` `VarLenFeature` `ft` and three 416 serialized `Example`s are provided: 417 418 ``` 419 serialized = [ 420 features 421 { feature { key: "ft" value { float_list { value: [1.0, 2.0] } } } }, 422 features 423 { feature []}, 424 features 425 { feature { key: "ft" value { float_list { value: [3.0] } } } 426 ] 427 ``` 428 429 then the output will look like: 430 431 ```python 432 {"ft": SparseTensor(indices=[[0, 0], [0, 1], [2, 0]], 433 values=[1.0, 2.0, 3.0], 434 dense_shape=(3, 2)) } 435 ``` 436 437 If instead a `FixedLenSequenceFeature` with `default_value = -1.0` and 438 `shape=[]` is used then the output will look like: 439 440 ```python 441 {"ft": [[1.0, 2.0], [3.0, -1.0]]} 442 ``` 443 444 Given two `Example` input protos in `serialized`: 445 446 ``` 447 [ 448 features { 449 feature { key: "kw" value { bytes_list { value: [ "knit", "big" ] } } } 450 feature { key: "gps" value { float_list { value: [] } } } 451 }, 452 features { 453 feature { key: "kw" value { bytes_list { value: [ "emmy" ] } } } 454 feature { key: "dank" value { int64_list { value: [ 42 ] } } } 455 feature { key: "gps" value { } } 456 } 457 ] 458 ``` 459 460 And arguments 461 462 ``` 463 example_names: ["input0", "input1"], 464 features: { 465 "kw": VarLenFeature(tf.string), 466 "dank": VarLenFeature(tf.int64), 467 "gps": VarLenFeature(tf.float32), 468 } 469 ``` 470 471 Then the output is a dictionary: 472 473 ```python 474 { 475 "kw": SparseTensor( 476 indices=[[0, 0], [0, 1], [1, 0]], 477 values=["knit", "big", "emmy"] 478 dense_shape=[2, 2]), 479 "dank": SparseTensor( 480 indices=[[1, 0]], 481 values=[42], 482 dense_shape=[2, 1]), 483 "gps": SparseTensor( 484 indices=[], 485 values=[], 486 dense_shape=[2, 0]), 487 } 488 ``` 489 490 For dense results in two serialized `Example`s: 491 492 ``` 493 [ 494 features { 495 feature { key: "age" value { int64_list { value: [ 0 ] } } } 496 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 497 }, 498 features { 499 feature { key: "age" value { int64_list { value: [] } } } 500 feature { key: "gender" value { bytes_list { value: [ "f" ] } } } 501 } 502 ] 503 ``` 504 505 We can use arguments: 506 507 ``` 508 example_names: ["input0", "input1"], 509 features: { 510 "age": FixedLenFeature([], dtype=tf.int64, default_value=-1), 511 "gender": FixedLenFeature([], dtype=tf.string), 512 } 513 ``` 514 515 And the expected output is: 516 517 ```python 518 { 519 "age": [[0], [-1]], 520 "gender": [["f"], ["f"]], 521 } 522 ``` 523 524 An alternative to `VarLenFeature` to obtain a `SparseTensor` is 525 `SparseFeature`. For example, given two `Example` input protos in 526 `serialized`: 527 528 ``` 529 [ 530 features { 531 feature { key: "val" value { float_list { value: [ 0.5, -1.0 ] } } } 532 feature { key: "ix" value { int64_list { value: [ 3, 20 ] } } } 533 }, 534 features { 535 feature { key: "val" value { float_list { value: [ 0.0 ] } } } 536 feature { key: "ix" value { int64_list { value: [ 42 ] } } } 537 } 538 ] 539 ``` 540 541 And arguments 542 543 ``` 544 example_names: ["input0", "input1"], 545 features: { 546 "sparse": SparseFeature( 547 index_key="ix", value_key="val", dtype=tf.float32, size=100), 548 } 549 ``` 550 551 Then the output is a dictionary: 552 553 ```python 554 { 555 "sparse": SparseTensor( 556 indices=[[0, 3], [0, 20], [1, 42]], 557 values=[0.5, -1.0, 0.0] 558 dense_shape=[2, 100]), 559 } 560 ``` 561 562 Args: 563 serialized: A vector (1-D Tensor) of strings, a batch of binary 564 serialized `Example` protos. 565 features: A `dict` mapping feature keys to `FixedLenFeature`, 566 `VarLenFeature`, and `SparseFeature` values. 567 name: A name for this operation (optional). 568 example_names: A vector (1-D Tensor) of strings (optional), the names of 569 the serialized protos in the batch. 570 571 Returns: 572 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 573 574 Raises: 575 ValueError: if any feature is invalid. 576 """ 577 if not features: 578 raise ValueError("Missing: features was %s." % features) 579 features = _prepend_none_dimension(features) 580 (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults, 581 dense_shapes) = _features_to_raw_params( 582 features, 583 [VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature]) 584 outputs = _parse_example_raw( 585 serialized, example_names, sparse_keys, sparse_types, dense_keys, 586 dense_types, dense_defaults, dense_shapes, name) 587 return _construct_sparse_tensors_for_sparse_features(features, outputs) 588 589 590 def _parse_example_raw(serialized, 591 names=None, 592 sparse_keys=None, 593 sparse_types=None, 594 dense_keys=None, 595 dense_types=None, 596 dense_defaults=None, 597 dense_shapes=None, 598 name=None): 599 """Parses `Example` protos. 600 601 Args: 602 serialized: A vector (1-D Tensor) of strings, a batch of binary 603 serialized `Example` protos. 604 names: A vector (1-D Tensor) of strings (optional), the names of 605 the serialized protos. 606 sparse_keys: A list of string keys in the examples' features. 607 The results for these keys will be returned as `SparseTensor` objects. 608 sparse_types: A list of `DTypes` of the same length as `sparse_keys`. 609 Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), 610 and `tf.string` (`BytesList`) are supported. 611 dense_keys: A list of string keys in the examples' features. 612 The results for these keys will be returned as `Tensor`s 613 dense_types: A list of DTypes of the same length as `dense_keys`. 614 Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), 615 and `tf.string` (`BytesList`) are supported. 616 dense_defaults: A dict mapping string keys to `Tensor`s. 617 The keys of the dict must match the dense_keys of the feature. 618 dense_shapes: A list of tuples with the same length as `dense_keys`. 619 The shape of the data for each dense feature referenced by `dense_keys`. 620 Required for any input tensors identified by `dense_keys`. Must be 621 either fully defined, or may contain an unknown first dimension. 622 An unknown first dimension means the feature is treated as having 623 a variable number of blocks, and the output shape along this dimension 624 is considered unknown at graph build time. Padding is applied for 625 minibatch elements smaller than the maximum number of blocks for the 626 given feature along this dimension. 627 name: A name for this operation (optional). 628 629 Returns: 630 A `dict` mapping keys to `Tensor`s and `SparseTensor`s. 631 632 Raises: 633 ValueError: If sparse and dense key sets intersect, or input lengths do not 634 match up. 635 """ 636 with ops.name_scope(name, "ParseExample", [serialized, names]): 637 names = [] if names is None else names 638 dense_defaults = collections.OrderedDict( 639 ) if dense_defaults is None else dense_defaults 640 sparse_keys = [] if sparse_keys is None else sparse_keys 641 sparse_types = [] if sparse_types is None else sparse_types 642 dense_keys = [] if dense_keys is None else dense_keys 643 dense_types = [] if dense_types is None else dense_types 644 dense_shapes = ( 645 [[]] * len(dense_keys) if dense_shapes is None else dense_shapes) 646 647 num_dense = len(dense_keys) 648 num_sparse = len(sparse_keys) 649 650 if len(dense_shapes) != num_dense: 651 raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" 652 % (len(dense_shapes), num_dense)) 653 if len(dense_types) != num_dense: 654 raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" 655 % (len(dense_types), num_dense)) 656 if len(sparse_types) != num_sparse: 657 raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" 658 % (len(sparse_types), num_sparse)) 659 if num_dense + num_sparse == 0: 660 raise ValueError("Must provide at least one sparse key or dense key") 661 if not set(dense_keys).isdisjoint(set(sparse_keys)): 662 raise ValueError( 663 "Dense and sparse keys must not intersect; intersection: %s" % 664 set(dense_keys).intersection(set(sparse_keys))) 665 666 # Convert dense_shapes to TensorShape object. 667 dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes] 668 669 dense_defaults_vec = [] 670 for i, key in enumerate(dense_keys): 671 default_value = dense_defaults.get(key) 672 dense_shape = dense_shapes[i] 673 if (dense_shape.ndims is not None and dense_shape.ndims > 0 and 674 dense_shape[0].value is None): 675 # Variable stride dense shape, the default value should be a 676 # scalar padding value 677 if default_value is None: 678 default_value = ops.convert_to_tensor( 679 "" if dense_types[i] == dtypes.string else 0, 680 dtype=dense_types[i]) 681 else: 682 # Reshape to a scalar to ensure user gets an error if they 683 # provide a tensor that's not intended to be a padding value 684 # (0 or 2+ elements). 685 key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) 686 default_value = ops.convert_to_tensor( 687 default_value, dtype=dense_types[i], name=key_name) 688 default_value = array_ops.reshape(default_value, []) 689 else: 690 if default_value is None: 691 default_value = constant_op.constant([], dtype=dense_types[i]) 692 elif not isinstance(default_value, ops.Tensor): 693 key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) 694 default_value = ops.convert_to_tensor( 695 default_value, dtype=dense_types[i], name=key_name) 696 default_value = array_ops.reshape(default_value, dense_shape) 697 698 dense_defaults_vec.append(default_value) 699 700 # Finally, convert dense_shapes to TensorShapeProto 701 dense_shapes = [shape.as_proto() for shape in dense_shapes] 702 703 # pylint: disable=protected-access 704 outputs = gen_parsing_ops._parse_example( 705 serialized=serialized, 706 names=names, 707 dense_defaults=dense_defaults_vec, 708 sparse_keys=sparse_keys, 709 sparse_types=sparse_types, 710 dense_keys=dense_keys, 711 dense_shapes=dense_shapes, 712 name=name) 713 # pylint: enable=protected-access 714 715 (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs 716 717 sparse_tensors = [ 718 sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) 719 in zip(sparse_indices, sparse_values, sparse_shapes)] 720 721 return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values)) 722 723 724 @tf_export("parse_single_example") 725 def parse_single_example(serialized, features, name=None, example_names=None): 726 """Parses a single `Example` proto. 727 728 Similar to `parse_example`, except: 729 730 For dense tensors, the returned `Tensor` is identical to the output of 731 `parse_example`, except there is no batch dimension, the output shape is the 732 same as the shape given in `dense_shape`. 733 734 For `SparseTensor`s, the first (batch) column of the indices matrix is removed 735 (the indices matrix is a column vector), the values vector is unchanged, and 736 the first (`batch_size`) entry of the shape vector is removed (it is now a 737 single element vector). 738 739 One might see performance advantages by batching `Example` protos with 740 `parse_example` instead of using this function directly. 741 742 Args: 743 serialized: A scalar string Tensor, a single serialized Example. 744 See `_parse_single_example_raw` documentation for more details. 745 features: A `dict` mapping feature keys to `FixedLenFeature` or 746 `VarLenFeature` values. 747 name: A name for this operation (optional). 748 example_names: (Optional) A scalar string Tensor, the associated name. 749 See `_parse_single_example_raw` documentation for more details. 750 751 Returns: 752 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 753 754 Raises: 755 ValueError: if any feature is invalid. 756 """ 757 if not features: 758 raise ValueError("Missing features.") 759 if example_names is None: 760 return parse_single_example_v2(serialized, features, name) 761 features = _prepend_none_dimension(features) 762 (sparse_keys, sparse_types, dense_keys, dense_types, dense_defaults, 763 dense_shapes) = _features_to_raw_params( 764 features, 765 [VarLenFeature, FixedLenFeature, FixedLenSequenceFeature, SparseFeature]) 766 outputs = _parse_single_example_raw( 767 serialized, example_names, sparse_keys, sparse_types, dense_keys, 768 dense_types, dense_defaults, dense_shapes, name) 769 return _construct_sparse_tensors_for_sparse_features(features, outputs) 770 771 772 def _parse_single_example_raw(serialized, 773 names=None, 774 sparse_keys=None, 775 sparse_types=None, 776 dense_keys=None, 777 dense_types=None, 778 dense_defaults=None, 779 dense_shapes=None, 780 name=None): 781 """Parses a single `Example` proto. 782 783 Args: 784 serialized: A scalar string Tensor, a single serialized Example. 785 See `_parse_example_raw` documentation for more details. 786 names: (Optional) A scalar string Tensor, the associated name. 787 See `_parse_example_raw` documentation for more details. 788 sparse_keys: See `_parse_example_raw` documentation for more details. 789 sparse_types: See `_parse_example_raw` documentation for more details. 790 dense_keys: See `_parse_example_raw` documentation for more details. 791 dense_types: See `_parse_example_raw` documentation for more details. 792 dense_defaults: See `_parse_example_raw` documentation for more details. 793 dense_shapes: See `_parse_example_raw` documentation for more details. 794 name: A name for this operation (optional). 795 796 Returns: 797 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 798 799 Raises: 800 ValueError: if any feature is invalid. 801 """ 802 with ops.name_scope(name, "ParseSingleExample", [serialized, names]): 803 serialized = ops.convert_to_tensor(serialized) 804 serialized_shape = serialized.get_shape() 805 if serialized_shape.ndims is not None: 806 if serialized_shape.ndims != 0: 807 raise ValueError("Input serialized must be a scalar") 808 else: 809 serialized = control_flow_ops.with_dependencies( 810 [control_flow_ops.Assert( 811 math_ops.equal(array_ops.rank(serialized), 0), 812 ["Input serialized must be a scalar"], 813 name="SerializedIsScalar")], 814 serialized, 815 name="SerializedDependencies") 816 serialized = array_ops.expand_dims(serialized, 0) 817 if names is not None: 818 names = ops.convert_to_tensor(names) 819 names_shape = names.get_shape() 820 if names_shape.ndims is not None: 821 if names_shape.ndims != 0: 822 raise ValueError("Input names must be a scalar") 823 else: 824 names = control_flow_ops.with_dependencies( 825 [control_flow_ops.Assert( 826 math_ops.equal(array_ops.rank(names), 0), 827 ["Input names must be a scalar"], 828 name="NamesIsScalar")], 829 names, 830 name="NamesDependencies") 831 names = array_ops.expand_dims(names, 0) 832 833 outputs = _parse_example_raw( 834 serialized, 835 names=names, 836 sparse_keys=sparse_keys, 837 sparse_types=sparse_types, 838 dense_keys=dense_keys, 839 dense_types=dense_types, 840 dense_defaults=dense_defaults, 841 dense_shapes=dense_shapes, 842 name=name) 843 if dense_keys is not None: 844 for d in dense_keys: 845 d_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", d) 846 outputs[d] = array_ops.squeeze( 847 outputs[d], [0], name="Squeeze_%s" % d_name) 848 if sparse_keys is not None: 849 for s in sparse_keys: 850 s_name = re.sub("[^A-Za-z0-9_.\\-/]", "_", s) 851 outputs[s] = sparse_tensor.SparseTensor( 852 array_ops.slice(outputs[s].indices, 853 [0, 1], [-1, -1], name="Slice_Indices_%s" % s_name), 854 outputs[s].values, 855 array_ops.slice(outputs[s].dense_shape, 856 [1], [-1], name="Squeeze_Shape_%s" % s_name)) 857 return outputs 858 859 860 @tf_export("parse_single_sequence_example") 861 def parse_single_sequence_example( 862 serialized, context_features=None, sequence_features=None, 863 example_name=None, name=None): 864 # pylint: disable=line-too-long 865 """Parses a single `SequenceExample` proto. 866 867 Parses a single serialized [`SequenceExample`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 868 proto given in `serialized`. 869 870 This op parses a serialized sequence example into a tuple of dictionaries 871 mapping keys to `Tensor` and `SparseTensor` objects respectively. 872 The first dictionary contains mappings for keys appearing in 873 `context_features`, and the second dictionary contains mappings for keys 874 appearing in `sequence_features`. 875 876 At least one of `context_features` and `sequence_features` must be provided 877 and non-empty. 878 879 The `context_features` keys are associated with a `SequenceExample` as a 880 whole, independent of time / frame. In contrast, the `sequence_features` keys 881 provide a way to access variable-length data within the `FeatureList` section 882 of the `SequenceExample` proto. While the shapes of `context_features` values 883 are fixed with respect to frame, the frame dimension (the first dimension) 884 of `sequence_features` values may vary between `SequenceExample` protos, 885 and even between `feature_list` keys within the same `SequenceExample`. 886 887 `context_features` contains `VarLenFeature` and `FixedLenFeature` objects. 888 Each `VarLenFeature` is mapped to a `SparseTensor`, and each `FixedLenFeature` 889 is mapped to a `Tensor`, of the specified type, shape, and default value. 890 891 `sequence_features` contains `VarLenFeature` and `FixedLenSequenceFeature` 892 objects. Each `VarLenFeature` is mapped to a `SparseTensor`, and each 893 `FixedLenSequenceFeature` is mapped to a `Tensor`, each of the specified type. 894 The shape will be `(T,) + df.dense_shape` for `FixedLenSequenceFeature` `df`, where 895 `T` is the length of the associated `FeatureList` in the `SequenceExample`. 896 For instance, `FixedLenSequenceFeature([])` yields a scalar 1-D `Tensor` of 897 static shape `[None]` and dynamic shape `[T]`, while 898 `FixedLenSequenceFeature([k])` (for `int k >= 1`) yields a 2-D matrix `Tensor` 899 of static shape `[None, k]` and dynamic shape `[T, k]`. 900 901 Each `SparseTensor` corresponding to `sequence_features` represents a ragged 902 vector. Its indices are `[time, index]`, where `time` is the `FeatureList` 903 entry and `index` is the value's index in the list of values associated with 904 that time. 905 906 `FixedLenFeature` entries with a `default_value` and `FixedLenSequenceFeature` 907 entries with `allow_missing=True` are optional; otherwise, we will fail if 908 that `Feature` or `FeatureList` is missing from any example in `serialized`. 909 910 `example_name` may contain a descriptive name for the corresponding serialized 911 proto. This may be useful for debugging purposes, but it has no effect on the 912 output. If not `None`, `example_name` must be a scalar. 913 914 Args: 915 serialized: A scalar (0-D Tensor) of type string, a single binary 916 serialized `SequenceExample` proto. 917 context_features: A `dict` mapping feature keys to `FixedLenFeature` or 918 `VarLenFeature` values. These features are associated with a 919 `SequenceExample` as a whole. 920 sequence_features: A `dict` mapping feature keys to 921 `FixedLenSequenceFeature` or `VarLenFeature` values. These features are 922 associated with data within the `FeatureList` section of the 923 `SequenceExample` proto. 924 example_name: A scalar (0-D Tensor) of strings (optional), the name of 925 the serialized proto. 926 name: A name for this operation (optional). 927 928 Returns: 929 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s. 930 The first dict contains the context key/values. 931 The second dict contains the feature_list key/values. 932 933 Raises: 934 ValueError: if any feature is invalid. 935 """ 936 # pylint: enable=line-too-long 937 if not (context_features or sequence_features): 938 raise ValueError("Missing features.") 939 (context_sparse_keys, context_sparse_types, context_dense_keys, 940 context_dense_types, context_dense_defaults, 941 context_dense_shapes) = _features_to_raw_params( 942 context_features, [VarLenFeature, FixedLenFeature]) 943 (feature_list_sparse_keys, feature_list_sparse_types, 944 feature_list_dense_keys, feature_list_dense_types, 945 feature_list_dense_defaults, 946 feature_list_dense_shapes) = _features_to_raw_params( 947 sequence_features, [VarLenFeature, FixedLenSequenceFeature]) 948 return _parse_single_sequence_example_raw( 949 serialized, context_sparse_keys, context_sparse_types, 950 context_dense_keys, context_dense_types, context_dense_defaults, 951 context_dense_shapes, feature_list_sparse_keys, 952 feature_list_sparse_types, feature_list_dense_keys, 953 feature_list_dense_types, feature_list_dense_shapes, 954 feature_list_dense_defaults, example_name, name) 955 956 957 def _parse_single_sequence_example_raw(serialized, 958 context_sparse_keys=None, 959 context_sparse_types=None, 960 context_dense_keys=None, 961 context_dense_types=None, 962 context_dense_defaults=None, 963 context_dense_shapes=None, 964 feature_list_sparse_keys=None, 965 feature_list_sparse_types=None, 966 feature_list_dense_keys=None, 967 feature_list_dense_types=None, 968 feature_list_dense_shapes=None, 969 feature_list_dense_defaults=None, 970 debug_name=None, 971 name=None): 972 """Parses a single `SequenceExample` proto. 973 974 Args: 975 serialized: A scalar (0-D Tensor) of type string, a single binary 976 serialized `SequenceExample` proto. 977 context_sparse_keys: A list of string keys in the `SequenceExample`'s 978 features. The results for these keys will be returned as 979 `SparseTensor` objects. 980 context_sparse_types: A list of `DTypes`, the same length as `sparse_keys`. 981 Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), 982 and `tf.string` (`BytesList`) are supported. 983 context_dense_keys: A list of string keys in the examples' features. 984 The results for these keys will be returned as `Tensor`s 985 context_dense_types: A list of DTypes, same length as `context_dense_keys`. 986 Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), 987 and `tf.string` (`BytesList`) are supported. 988 context_dense_defaults: A dict mapping string keys to `Tensor`s. 989 The keys of the dict must match the context_dense_keys of the feature. 990 context_dense_shapes: A list of tuples, same length as `context_dense_keys`. 991 The shape of the data for each context_dense feature referenced by 992 `context_dense_keys`. Required for any input tensors identified by 993 `context_dense_keys` whose shapes are anything other than `[]` or `[1]`. 994 feature_list_sparse_keys: A list of string keys in the `SequenceExample`'s 995 feature_lists. The results for these keys will be returned as 996 `SparseTensor` objects. 997 feature_list_sparse_types: A list of `DTypes`, same length as `sparse_keys`. 998 Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), 999 and `tf.string` (`BytesList`) are supported. 1000 feature_list_dense_keys: A list of string keys in the `SequenceExample`'s 1001 features_lists. The results for these keys will be returned as `Tensor`s. 1002 feature_list_dense_types: A list of `DTypes`, same length as 1003 `feature_list_dense_keys`. Only `tf.float32` (`FloatList`), 1004 `tf.int64` (`Int64List`), and `tf.string` (`BytesList`) are supported. 1005 feature_list_dense_shapes: A list of tuples, same length as 1006 `feature_list_dense_keys`. The shape of the data for each 1007 `FeatureList` feature referenced by `feature_list_dense_keys`. 1008 feature_list_dense_defaults: A dict mapping key strings to values. 1009 The only currently allowed value is `None`. Any key appearing 1010 in this dict with value `None` is allowed to be missing from the 1011 `SequenceExample`. If missing, the key is treated as zero-length. 1012 debug_name: A scalar (0-D Tensor) of strings (optional), the name of 1013 the serialized proto. 1014 name: A name for this operation (optional). 1015 1016 Returns: 1017 A tuple of two `dict`s, each mapping keys to `Tensor`s and `SparseTensor`s. 1018 The first dict contains the context key/values. 1019 The second dict contains the feature_list key/values. 1020 1021 Raises: 1022 ValueError: If context_sparse and context_dense key sets intersect, 1023 if input lengths do not match up, or if a value in 1024 feature_list_dense_defaults is not None. 1025 TypeError: if feature_list_dense_defaults is not either None or a dict. 1026 """ 1027 with ops.name_scope(name, "ParseSingleSequenceExample", [serialized]): 1028 context_dense_defaults = ( 1029 {} if context_dense_defaults is None else context_dense_defaults) 1030 context_sparse_keys = ( 1031 [] if context_sparse_keys is None else context_sparse_keys) 1032 context_sparse_types = ( 1033 [] if context_sparse_types is None else context_sparse_types) 1034 context_dense_keys = ( 1035 [] if context_dense_keys is None else context_dense_keys) 1036 context_dense_types = ( 1037 [] if context_dense_types is None else context_dense_types) 1038 context_dense_shapes = ( 1039 [[]] * len(context_dense_keys) 1040 if context_dense_shapes is None else context_dense_shapes) 1041 feature_list_sparse_keys = ( 1042 [] if feature_list_sparse_keys is None else feature_list_sparse_keys) 1043 feature_list_sparse_types = ( 1044 [] if feature_list_sparse_types is None else feature_list_sparse_types) 1045 feature_list_dense_keys = ( 1046 [] if feature_list_dense_keys is None else feature_list_dense_keys) 1047 feature_list_dense_types = ( 1048 [] if feature_list_dense_types is None else feature_list_dense_types) 1049 feature_list_dense_shapes = ( 1050 [[]] * len(feature_list_dense_keys) 1051 if feature_list_dense_shapes is None else feature_list_dense_shapes) 1052 feature_list_dense_defaults = ( 1053 dict() if feature_list_dense_defaults is None 1054 else feature_list_dense_defaults) 1055 debug_name = "" if debug_name is None else debug_name 1056 1057 # Internal 1058 feature_list_dense_missing_assumed_empty = [] 1059 1060 num_context_dense = len(context_dense_keys) 1061 num_feature_list_dense = len(feature_list_dense_keys) 1062 num_context_sparse = len(context_sparse_keys) 1063 num_feature_list_sparse = len(feature_list_sparse_keys) 1064 1065 if len(context_dense_shapes) != num_context_dense: 1066 raise ValueError( 1067 "len(context_dense_shapes) != len(context_dense_keys): %d vs. %d" 1068 % (len(context_dense_shapes), num_context_dense)) 1069 if len(context_dense_types) != num_context_dense: 1070 raise ValueError( 1071 "len(context_dense_types) != len(num_context_dense): %d vs. %d" 1072 % (len(context_dense_types), num_context_dense)) 1073 if len(feature_list_dense_shapes) != num_feature_list_dense: 1074 raise ValueError( 1075 "len(feature_list_dense_shapes) != len(feature_list_dense_keys): " 1076 "%d vs. %d" % (len(feature_list_dense_shapes), 1077 num_feature_list_dense)) 1078 if len(feature_list_dense_types) != num_feature_list_dense: 1079 raise ValueError( 1080 "len(feature_list_dense_types) != len(num_feature_list_dense):" 1081 "%d vs. %d" % (len(feature_list_dense_types), num_feature_list_dense)) 1082 if len(context_sparse_types) != num_context_sparse: 1083 raise ValueError( 1084 "len(context_sparse_types) != len(context_sparse_keys): %d vs. %d" 1085 % (len(context_sparse_types), num_context_sparse)) 1086 if len(feature_list_sparse_types) != num_feature_list_sparse: 1087 raise ValueError( 1088 "len(feature_list_sparse_types) != len(feature_list_sparse_keys): " 1089 "%d vs. %d" 1090 % (len(feature_list_sparse_types), num_feature_list_sparse)) 1091 if (num_context_dense + num_context_sparse 1092 + num_feature_list_dense + num_feature_list_sparse) == 0: 1093 raise ValueError( 1094 "Must provide at least one context_sparse key, context_dense key, " 1095 ", feature_list_sparse key, or feature_list_dense key") 1096 if not set(context_dense_keys).isdisjoint(set(context_sparse_keys)): 1097 raise ValueError( 1098 "context_dense and context_sparse keys must not intersect; " 1099 "intersection: %s" % 1100 set(context_dense_keys).intersection(set(context_sparse_keys))) 1101 if not set(feature_list_dense_keys).isdisjoint( 1102 set(feature_list_sparse_keys)): 1103 raise ValueError( 1104 "feature_list_dense and feature_list_sparse keys must not intersect; " 1105 "intersection: %s" % 1106 set(feature_list_dense_keys).intersection( 1107 set(feature_list_sparse_keys))) 1108 if not isinstance(feature_list_dense_defaults, dict): 1109 raise TypeError("feature_list_dense_defaults must be a dict") 1110 for k, v in feature_list_dense_defaults.items(): 1111 if v is not None: 1112 raise ValueError("Value feature_list_dense_defaults[%s] must be None" 1113 % k) 1114 feature_list_dense_missing_assumed_empty.append(k) 1115 1116 context_dense_defaults_vec = [] 1117 for i, key in enumerate(context_dense_keys): 1118 default_value = context_dense_defaults.get(key) 1119 if default_value is None: 1120 default_value = constant_op.constant([], dtype=context_dense_types[i]) 1121 elif not isinstance(default_value, ops.Tensor): 1122 key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) 1123 default_value = ops.convert_to_tensor( 1124 default_value, dtype=context_dense_types[i], name=key_name) 1125 default_value = array_ops.reshape( 1126 default_value, context_dense_shapes[i]) 1127 1128 context_dense_defaults_vec.append(default_value) 1129 1130 context_dense_shapes = [tensor_shape.as_shape(shape).as_proto() 1131 for shape in context_dense_shapes] 1132 feature_list_dense_shapes = [tensor_shape.as_shape(shape).as_proto() 1133 for shape in feature_list_dense_shapes] 1134 1135 # pylint: disable=protected-access 1136 outputs = gen_parsing_ops._parse_single_sequence_example( 1137 serialized=serialized, 1138 debug_name=debug_name, 1139 context_dense_defaults=context_dense_defaults_vec, 1140 context_sparse_keys=context_sparse_keys, 1141 context_sparse_types=context_sparse_types, 1142 context_dense_keys=context_dense_keys, 1143 context_dense_shapes=context_dense_shapes, 1144 feature_list_sparse_keys=feature_list_sparse_keys, 1145 feature_list_sparse_types=feature_list_sparse_types, 1146 feature_list_dense_keys=feature_list_dense_keys, 1147 feature_list_dense_types=feature_list_dense_types, 1148 feature_list_dense_shapes=feature_list_dense_shapes, 1149 feature_list_dense_missing_assumed_empty=( 1150 feature_list_dense_missing_assumed_empty), 1151 name=name) 1152 # pylint: enable=protected-access 1153 1154 (context_sparse_indices, context_sparse_values, 1155 context_sparse_shapes, context_dense_values, 1156 feature_list_sparse_indices, feature_list_sparse_values, 1157 feature_list_sparse_shapes, feature_list_dense_values) = outputs 1158 1159 context_sparse_tensors = [ 1160 sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) 1161 in zip(context_sparse_indices, 1162 context_sparse_values, 1163 context_sparse_shapes)] 1164 1165 feature_list_sparse_tensors = [ 1166 sparse_tensor.SparseTensor(ix, val, shape) for (ix, val, shape) 1167 in zip(feature_list_sparse_indices, 1168 feature_list_sparse_values, 1169 feature_list_sparse_shapes)] 1170 1171 context_output = dict( 1172 zip(context_sparse_keys + context_dense_keys, 1173 context_sparse_tensors + context_dense_values)) 1174 feature_list_output = dict( 1175 zip(feature_list_sparse_keys + feature_list_dense_keys, 1176 feature_list_sparse_tensors + feature_list_dense_values)) 1177 1178 return (context_output, feature_list_output) 1179 1180 1181 # Swap `name` and `na_value` for backward compatibility. 1182 @tf_export("decode_csv") 1183 def decode_csv(records, record_defaults, field_delim=",", 1184 use_quote_delim=True, name=None, na_value=""): 1185 # pylint: disable=protected-access 1186 """Convert CSV records to tensors. Each column maps to one tensor. 1187 1188 RFC 4180 format is expected for the CSV records. 1189 (https://tools.ietf.org/html/rfc4180) 1190 Note that we allow leading and trailing spaces with int or float field. 1191 1192 Args: 1193 records: A `Tensor` of type `string`. 1194 Each string is a record/row in the csv and all records should have 1195 the same format. 1196 record_defaults: A list of `Tensor` objects with specific types. 1197 Acceptable types are `float32`, `float64`, `int32`, `int64`, `string`. 1198 One tensor per column of the input record, with either a 1199 scalar default value for that column or empty if the column is required. 1200 field_delim: An optional `string`. Defaults to `","`. 1201 char delimiter to separate fields in a record. 1202 use_quote_delim: An optional `bool`. Defaults to `True`. 1203 If false, treats double quotation marks as regular 1204 characters inside of the string fields (ignoring RFC 4180, Section 2, 1205 Bullet 5). 1206 name: A name for the operation (optional). 1207 na_value: Additional string to recognize as NA/NaN. 1208 1209 Returns: 1210 A list of `Tensor` objects. Has the same type as `record_defaults`. 1211 Each tensor will have the same shape as records. 1212 """ 1213 # TODO(martinwicke), remove the wrapper when new Python API generator is done. 1214 return gen_parsing_ops._decode_csv( 1215 records=records, record_defaults=record_defaults, 1216 field_delim=field_delim, use_quote_delim=use_quote_delim, 1217 na_value=na_value, name=name) 1218 # pylint: enable=protected-access 1219 1220 1221 # TODO(b/70890287): Combine the implementation of this op and 1222 # `parse_single_example()` after 1/10/2018. 1223 def parse_single_example_v2(serialized, features, name=None): 1224 # pylint: disable=line-too-long 1225 """Parses an `Example` proto into a `dict` of tensors. 1226 1227 Parses a serialized 1228 [`Example`](https://www.tensorflow.org/code/tensorflow/core/example/example.proto) 1229 proto given in `serialized`. 1230 1231 This op parses serialized examples into a dictionary mapping keys to `Tensor` 1232 and `SparseTensor` objects. `features` is a dict from keys to `VarLenFeature`, 1233 `SparseFeature`, and `FixedLenFeature` objects. Each `VarLenFeature` 1234 and `SparseFeature` is mapped to a `SparseTensor`, and each 1235 `FixedLenFeature` is mapped to a `Tensor`. 1236 1237 Each `VarLenFeature` maps to a `SparseTensor` of the specified type 1238 representing a ragged matrix. Its indices are `[index]` where 1239 `index` is the value's index in the list of values associated with 1240 that feature and example. 1241 1242 Each `SparseFeature` maps to a `SparseTensor` of the specified type 1243 representing a Tensor of `dense_shape` `SparseFeature.size`. 1244 Its `values` come from the feature in the examples with key `value_key`. 1245 A `values[i]` comes from a position `k` in the feature of an example at batch 1246 entry `batch`. This positional information is recorded in `indices[i]` as 1247 `[batch, index_0, index_1, ...]` where `index_j` is the `k-th` value of 1248 the feature in the example at with key `SparseFeature.index_key[j]`. 1249 In other words, we split the indices (except the first index indicating the 1250 batch entry) of a `SparseTensor` by dimension into different features of the 1251 `Example`. Due to its complexity a `VarLenFeature` should be preferred over a 1252 `SparseFeature` whenever possible. 1253 1254 Each `FixedLenFeature` `df` maps to a `Tensor` of the specified type (or 1255 `tf.float32` if not specified) and shape `df.shape`. 1256 1257 `FixedLenFeature` entries with a `default_value` are optional. With no default 1258 value, we will fail if that `Feature` is missing from any example in 1259 `serialized`. 1260 1261 Each `FixedLenSequenceFeature` `df` maps to a `Tensor` of the specified type 1262 (or `tf.float32` if not specified) and shape `(None,) + df.shape`. 1263 1264 Args: 1265 serialized: A scalar (0-D Tensor) string, a serialized `Example` proto. 1266 features: A `dict` mapping feature keys to `FixedLenFeature`, 1267 `VarLenFeature`, and `SparseFeature` values. 1268 name: A name for this operation (optional). 1269 1270 Returns: 1271 A `dict` mapping feature keys to `Tensor` and `SparseTensor` values. 1272 1273 Raises: 1274 ValueError: if any feature is invalid. 1275 """ 1276 if not features: 1277 raise ValueError("Missing: features was %s." % features) 1278 features = _prepend_none_dimension(features) 1279 (sparse_keys, sparse_types, dense_keys, dense_types, 1280 dense_defaults, dense_shapes) = _features_to_raw_params( 1281 features, 1282 [VarLenFeature, SparseFeature, FixedLenFeature, FixedLenSequenceFeature]) 1283 outputs = _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types, 1284 dense_keys, dense_types, 1285 dense_defaults, dense_shapes, name) 1286 return _construct_sparse_tensors_for_sparse_features(features, outputs) 1287 1288 1289 def _parse_single_example_v2_raw(serialized, sparse_keys, sparse_types, 1290 dense_keys, dense_types, dense_defaults, 1291 dense_shapes, name): 1292 """Parses `Example` protos. 1293 1294 Args: 1295 serialized: A scalar (0-D Tensor) string, containing a binary 1296 serialized `Example` proto. 1297 sparse_keys: A list of string keys in the examples' features. 1298 The results for these keys will be returned as `SparseTensor` objects. 1299 sparse_types: A list of `DTypes` of the same length as `sparse_keys`. 1300 Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), 1301 and `tf.string` (`BytesList`) are supported. 1302 dense_keys: A list of string keys in the examples' features. 1303 The results for these keys will be returned as `Tensor`s 1304 dense_types: A list of DTypes of the same length as `dense_keys`. 1305 Only `tf.float32` (`FloatList`), `tf.int64` (`Int64List`), 1306 and `tf.string` (`BytesList`) are supported. 1307 dense_defaults: A dict mapping string keys to `Tensor`s. 1308 The keys of the dict must match the dense_keys of the feature. 1309 dense_shapes: A list of tuples with the same length as `dense_keys`. 1310 The shape of the data for each dense feature referenced by `dense_keys`. 1311 Required for any input tensors identified by `dense_keys`. Must be 1312 either fully defined, or may contain an unknown first dimension. 1313 An unknown first dimension means the feature is treated as having 1314 a variable number of blocks, and the output shape along this dimension 1315 is considered unknown at graph build time. Padding is applied for 1316 minibatch elements smaller than the maximum number of blocks for the 1317 given feature along this dimension. 1318 name: A name for this operation (optional). 1319 1320 Returns: 1321 A `dict` mapping keys to `Tensor`s and `SparseTensor`s. 1322 1323 Raises: 1324 ValueError: If sparse and dense key sets intersect, or input lengths do not 1325 match up. 1326 """ 1327 with ops.name_scope(name, "ParseSingleExample", [serialized]): 1328 serialized = ops.convert_to_tensor(serialized, name="serialized") 1329 dense_defaults = collections.OrderedDict( 1330 ) if dense_defaults is None else dense_defaults 1331 sparse_keys = [] if sparse_keys is None else sparse_keys 1332 sparse_types = [] if sparse_types is None else sparse_types 1333 dense_keys = [] if dense_keys is None else dense_keys 1334 dense_types = [] if dense_types is None else dense_types 1335 dense_shapes = ([[]] * len(dense_keys) 1336 if dense_shapes is None else dense_shapes) 1337 1338 num_dense = len(dense_keys) 1339 num_sparse = len(sparse_keys) 1340 1341 if len(dense_shapes) != num_dense: 1342 raise ValueError("len(dense_shapes) != len(dense_keys): %d vs. %d" % 1343 (len(dense_shapes), num_dense)) 1344 if len(dense_types) != num_dense: 1345 raise ValueError("len(dense_types) != len(num_dense): %d vs. %d" % 1346 (len(dense_types), num_dense)) 1347 if len(sparse_types) != num_sparse: 1348 raise ValueError("len(sparse_types) != len(sparse_keys): %d vs. %d" % 1349 (len(sparse_types), num_sparse)) 1350 if num_dense + num_sparse == 0: 1351 raise ValueError("Must provide at least one sparse key or dense key") 1352 if not set(dense_keys).isdisjoint(set(sparse_keys)): 1353 raise ValueError( 1354 "Dense and sparse keys must not intersect; intersection: %s" % 1355 set(dense_keys).intersection(set(sparse_keys))) 1356 1357 # Convert dense_shapes to TensorShape object. 1358 dense_shapes = [tensor_shape.as_shape(shape) for shape in dense_shapes] 1359 1360 dense_defaults_vec = [] 1361 for i, key in enumerate(dense_keys): 1362 default_value = dense_defaults.get(key) 1363 dense_shape = dense_shapes[i] 1364 if (dense_shape.ndims is not None and dense_shape.ndims > 0 and 1365 dense_shape[0].value is None): 1366 # Variable stride dense shape, the default value should be a 1367 # scalar padding value 1368 if default_value is None: 1369 default_value = ops.convert_to_tensor( 1370 "" if dense_types[i] == dtypes.string else 0, 1371 dtype=dense_types[i]) 1372 else: 1373 # Reshape to a scalar to ensure user gets an error if they 1374 # provide a tensor that's not intended to be a padding value 1375 # (0 or 2+ elements). 1376 key_name = "padding_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) 1377 default_value = ops.convert_to_tensor( 1378 default_value, dtype=dense_types[i], name=key_name) 1379 default_value = array_ops.reshape(default_value, []) 1380 else: 1381 if default_value is None: 1382 default_value = constant_op.constant([], dtype=dense_types[i]) 1383 elif not isinstance(default_value, ops.Tensor): 1384 key_name = "key_" + re.sub("[^A-Za-z0-9_.\\-/]", "_", key) 1385 default_value = ops.convert_to_tensor( 1386 default_value, dtype=dense_types[i], name=key_name) 1387 default_value = array_ops.reshape(default_value, dense_shape) 1388 1389 dense_defaults_vec.append(default_value) 1390 1391 # Finally, convert dense_shapes to TensorShapeProto 1392 dense_shapes = [shape.as_proto() for shape in dense_shapes] 1393 1394 # pylint: disable=protected-access 1395 outputs = gen_parsing_ops.parse_single_example( 1396 serialized=serialized, 1397 dense_defaults=dense_defaults_vec, 1398 num_sparse=len(sparse_keys), 1399 sparse_keys=sparse_keys, 1400 sparse_types=sparse_types, 1401 dense_keys=dense_keys, 1402 dense_shapes=dense_shapes, 1403 name=name) 1404 # pylint: enable=protected-access 1405 1406 (sparse_indices, sparse_values, sparse_shapes, dense_values) = outputs 1407 1408 sparse_tensors = [ 1409 sparse_tensor.SparseTensor(ix, val, shape) 1410 for (ix, val, 1411 shape) in zip(sparse_indices, sparse_values, sparse_shapes) 1412 ] 1413 1414 return dict(zip(sparse_keys + dense_keys, sparse_tensors + dense_values)) 1415