Home | History | Annotate | Download | only in util
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #ifndef TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_
     17 #define TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_
     18 
     19 #include <string>
     20 #include <vector>
     21 
     22 #include "tensorflow/core/example/example.pb.h"
     23 #include "tensorflow/core/example/feature.pb.h"
     24 #include "tensorflow/core/framework/allocator.h"
     25 #include "tensorflow/core/framework/graph.pb.h"
     26 #include "tensorflow/core/framework/partial_tensor_shape.h"
     27 #include "tensorflow/core/framework/tensor.h"
     28 #include "tensorflow/core/framework/types.h"
     29 #include "tensorflow/core/platform/types.h"
     30 #include "tensorflow/core/util/sparse/sparse_tensor.h"
     31 
     32 // This is a set of helper methods that will make it possible to share
     33 // tensorflow::Example proto Tensor conversion code inside the ExampleParserOp
     34 // OpKernel as well as in external code.
     35 namespace tensorflow {
     36 
     37 // "Dense" feature configuration.
     38 struct FixedLenFeature {
     39   string key;
     40   DataType dtype;
     41   TensorShape shape;
     42   Tensor default_value;
     43   string values_output_tensor_name;
     44 };
     45 
     46 // "Sparse" feature configuration.
     47 struct VarLenFeature {
     48   string key;
     49   DataType dtype;
     50   string values_output_tensor_name;
     51   string indices_output_tensor_name;
     52   string shapes_output_tensor_name;
     53 };
     54 
     55 // Given a single tensorflow::Example, with an optional example name
     56 // at a particular index within a batch, and dense and sparse feature
     57 // configurations from fixed_len_features, var_len_features, this method
     58 // updates the dense value tensor and the sparse values temporary vector
     59 // of tensors. The indexing of the output vectors correspond 1:1 to the
     60 // indexing of the feature configuration vectors.
     61 //
     62 // The fixed_len_features and var_len_features maps are assume to be
     63 // have disjoint key fields from the Feature map in the tensorflow.Example
     64 // proto.
     65 //
     66 // For each sparse feature, the sparse values temporary vector holds a
     67 // tensor for each Example. Each tensor is either empty or filled, depending
     68 // on if the sparse feature value is set for the Example. This
     69 // temporary structure is needed because we need to know the total number
     70 // of filled elements in the batch to get the proper final sparse tensor
     71 // shapes allocated.  After the entire batch is processed,
     72 // GetSparseTensorShape can be used to calculate the final shapes and
     73 // CopyIntoSparseTensor can be used to copy from the temporary vector
     74 // into the final allocated tensors.
     75 Status SingleExampleProtoToTensors(
     76     const Example& example, const string& name, const int batch_index,
     77     const std::vector<FixedLenFeature>& fixed_len_features,
     78     const std::vector<VarLenFeature>& var_len_features,
     79     std::vector<Tensor*>* dense_values,
     80     std::vector<std::vector<Tensor>>* sparse_values_temporary_vector);
     81 
     82 // The shape of the indices and values tensors associated with a SparseTensor
     83 // are dependent on the contents of the batch.
     84 struct VarLenFeatureBatchShapes {
     85   TensorShape indices_shape;
     86   TensorShape values_shape;
     87   int max_num_features;
     88 };
     89 
     90 // Get the shape of the sparse values and indices tensors for the batch,
     91 // given how many of the tensors in the temporary sparse values vector
     92 // are actually filled.
     93 Status GetSparseTensorShapes(const VarLenFeature& var_len_feature,
     94                              const std::vector<Tensor>& sparse_values_tmp,
     95                              const int batch_size,
     96                              VarLenFeatureBatchShapes* output_shapes);
     97 
     98 // A method to convert a batch of tensorflow::Example protos into output
     99 // tensors. This method is useful if there already is a batch of deserialized
    100 // Example protos in memory (such as a serving use-case) and we do not wish
    101 // to incur an extraneous serialize/deserialize.  It is intended
    102 // as an outside of OpKernel compatible replacement for the functionality of
    103 // ExampleParserOp. In a serving setting, this method could be used to produce
    104 // a feed_dict of Tensors that could bypass the ExampleParserOp.
    105 //
    106 // Note that unlike SingleExampleProtoToTensors, output tensors are
    107 // allocated using a provided Allocator within this method.
    108 Status BatchExampleProtoToTensors(
    109     const std::vector<const Example*>& examples,
    110     const std::vector<string>& names,
    111     const std::vector<FixedLenFeature>& fixed_len_features,
    112     const std::vector<VarLenFeature>& var_len_features, Allocator* allocator,
    113     std::vector<Tensor>* output_dense_values_tensor,
    114     std::vector<Tensor>* output_sparse_indices_tensor,
    115     std::vector<Tensor>* output_sparse_values_tensor,
    116     std::vector<Tensor>* output_sparse_shapes_tensor);
    117 
    118 // Check that the given dtype is one that is compatible with
    119 // tensorflow::Example protocol buffer feature values.
    120 Status CheckValidType(const DataType& dtype);
    121 
    122 // Check that the provided Feature proto message's oneof value
    123 // matches that of the provided dtype.
    124 Status CheckTypesMatch(const Feature& feature, const DataType& dtype,
    125                        bool* match);
    126 
    127 // For a single Example, copy a dense feature value into an output
    128 // dense value tensor Out at the provided out_index offset.
    129 Status FeatureDenseCopy(const std::size_t out_index, const string& name,
    130                         const string& key, const DataType& dtype,
    131                         const TensorShape& shape, const Feature& feature,
    132                         Tensor* out);
    133 
    134 // Copy the value a provided Tensor into an output dense_value tensor Out
    135 // at the provided out_index offset.
    136 void RowDenseCopy(const std::size_t& out_index, const DataType& dtype,
    137                   const Tensor& in, Tensor* out);
    138 
    139 // For a single Example, and given sparse feature return a temporary output
    140 // Tensor suitable for being collected in the temporary sparse value vector.
    141 Tensor FeatureSparseCopy(const std::size_t batch, const string& key,
    142                          const DataType& dtype, const Feature& feature);
    143 
    144 // Copy a temporary Tensor into the final sparse indices and values
    145 // tensor at a given batch index and element offset. This method
    146 // assumes that the indices/values Tensors have been properly allocated
    147 // for the batch.
    148 int64 CopyIntoSparseTensor(const Tensor& in, const int batch,
    149                            const int64 offset, Tensor* indices, Tensor* values);
    150 
    151 // Parses the attributes passed to ParseExample.
    152 // REQUIRES: Init must be called after construction.
    153 class ParseExampleAttrs {
    154  public:
    155   template <typename ContextType>
    156   Status Init(ContextType* ctx) {
    157     TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types));
    158     TF_RETURN_IF_ERROR(ctx->GetAttr("Ndense", &num_dense));
    159     TF_RETURN_IF_ERROR(ctx->GetAttr("Nsparse", &num_sparse));
    160     TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types));
    161     TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes));
    162     // Temporary check until we start allowing a variable length outer
    163     // dimension.
    164     for (int i = 0; i < dense_shapes.size(); ++i) {
    165       bool shape_ok = true;
    166       if (dense_shapes[i].dims() == -1) {
    167         shape_ok = false;
    168       } else {
    169         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
    170           if (dense_shapes[i].dim_size(d) == -1) {
    171             shape_ok = false;
    172           }
    173         }
    174       }
    175       if (!shape_ok) {
    176         return errors::InvalidArgument(
    177             "dense_shapes[", i,
    178             "] has unknown rank or unknown inner dimensions: ",
    179             dense_shapes[i].DebugString());
    180       }
    181       TensorShape dense_shape;
    182       if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
    183         variable_length.push_back(true);
    184         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
    185           dense_shape.AddDim(dense_shapes[i].dim_size(d));
    186         }
    187       } else {
    188         variable_length.push_back(false);
    189         dense_shapes[i].AsTensorShape(&dense_shape);
    190       }
    191       elements_per_stride.push_back(dense_shape.num_elements());
    192     }
    193     return FinishInit();
    194   }
    195 
    196   int64 num_sparse;
    197   int64 num_dense;
    198   std::vector<DataType> sparse_types;
    199   std::vector<DataType> dense_types;
    200   std::vector<PartialTensorShape> dense_shapes;
    201   std::vector<bool> variable_length;
    202   std::vector<std::size_t> elements_per_stride;
    203 
    204  private:
    205   Status FinishInit();  // for context-independent parts of Init.
    206 };
    207 
    208 // Parses the attributes passed to ParseSingleExample.
    209 // REQUIRES: Init must be called after construction.
    210 class ParseSingleExampleAttrs {
    211  public:
    212   template <typename ContextType>
    213   Status Init(ContextType* ctx) {
    214     TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_keys", &sparse_keys));
    215     TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types));
    216     TF_RETURN_IF_ERROR(ctx->GetAttr("dense_keys", &dense_keys));
    217     TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types));
    218     TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes));
    219 
    220     int num_sparse;
    221     TF_RETURN_IF_ERROR(ctx->GetAttr("num_sparse", &num_sparse));
    222     if (num_sparse != sparse_keys.size() || num_sparse != sparse_types.size()) {
    223       return errors::InvalidArgument(
    224           "num_sparse (", num_sparse, ") must match the size of sparse_keys (",
    225           sparse_keys.size(), ") and sparse_types (", sparse_types.size(), ")");
    226     }
    227 
    228     // Temporary check until we start allowing a variable length outer
    229     // dimension.
    230     for (int i = 0; i < dense_shapes.size(); ++i) {
    231       bool shape_ok = true;
    232       if (dense_shapes[i].dims() == -1) {
    233         shape_ok = false;
    234       } else {
    235         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
    236           if (dense_shapes[i].dim_size(d) == -1) {
    237             shape_ok = false;
    238           }
    239         }
    240       }
    241       if (!shape_ok) {
    242         return errors::InvalidArgument(
    243             "dense_shapes[", i,
    244             "] has unknown rank or unknown inner dimensions: ",
    245             dense_shapes[i].DebugString());
    246       }
    247       TensorShape dense_shape;
    248       if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) {
    249         variable_length.push_back(true);
    250         for (int d = 1; d < dense_shapes[i].dims(); ++d) {
    251           dense_shape.AddDim(dense_shapes[i].dim_size(d));
    252         }
    253       } else {
    254         variable_length.push_back(false);
    255         dense_shapes[i].AsTensorShape(&dense_shape);
    256       }
    257       elements_per_stride.push_back(dense_shape.num_elements());
    258     }
    259     return FinishInit();
    260   }
    261 
    262   std::vector<string> sparse_keys;
    263   std::vector<DataType> sparse_types;
    264   std::vector<string> dense_keys;
    265   std::vector<DataType> dense_types;
    266   std::vector<PartialTensorShape> dense_shapes;
    267   std::vector<bool> variable_length;
    268   std::vector<std::size_t> elements_per_stride;
    269 
    270  private:
    271   Status FinishInit();  // for context-independent parts of Init.
    272 };
    273 
    274 // Parses the attributes passed to ParseSingleSequenceExample.
    275 // REQUIRES: Init must be called after construction.
    276 class ParseSingleSequenceExampleAttrs {
    277  public:
    278   template <typename ContextType>
    279   Status Init(ContextType* ctx) {
    280     TF_RETURN_IF_ERROR(
    281         ctx->GetAttr("context_sparse_types", &context_sparse_types));
    282     TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_dense", &num_context_dense));
    283     TF_RETURN_IF_ERROR(
    284         ctx->GetAttr("Nfeature_list_dense", &num_feature_list_dense));
    285     TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_sparse", &num_context_sparse));
    286     TF_RETURN_IF_ERROR(ctx->GetAttr("Tcontext_dense", &context_dense_types));
    287     TF_RETURN_IF_ERROR(
    288         ctx->GetAttr("feature_list_sparse_types", &feature_list_sparse_types));
    289     TF_RETURN_IF_ERROR(
    290         ctx->GetAttr("feature_list_dense_types", &feature_list_dense_types));
    291     TF_RETURN_IF_ERROR(
    292         ctx->GetAttr("Nfeature_list_sparse", &num_feature_list_sparse));
    293     TF_RETURN_IF_ERROR(
    294         ctx->GetAttr("context_dense_shapes", &context_dense_shapes));
    295     TF_RETURN_IF_ERROR(
    296         ctx->GetAttr("feature_list_dense_shapes", &feature_list_dense_shapes));
    297     return FinishInit();
    298   }
    299 
    300   int64 num_context_sparse;
    301   int64 num_context_dense;
    302   int64 num_feature_list_sparse;
    303   int64 num_feature_list_dense;
    304   std::vector<DataType> context_sparse_types;
    305   std::vector<DataType> context_dense_types;
    306   std::vector<TensorShape> context_dense_shapes;
    307   std::vector<DataType> feature_list_sparse_types;
    308   std::vector<DataType> feature_list_dense_types;
    309   std::vector<TensorShape> feature_list_dense_shapes;
    310 
    311  private:
    312   Status FinishInit();  // for context-independent parts of Init.
    313 };
    314 
    315 }  // namespace tensorflow
    316 
    317 #endif  // TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_
    318