1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #ifndef TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_ 17 #define TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_ 18 19 #include <string> 20 #include <vector> 21 22 #include "tensorflow/core/example/example.pb.h" 23 #include "tensorflow/core/example/feature.pb.h" 24 #include "tensorflow/core/framework/allocator.h" 25 #include "tensorflow/core/framework/graph.pb.h" 26 #include "tensorflow/core/framework/partial_tensor_shape.h" 27 #include "tensorflow/core/framework/tensor.h" 28 #include "tensorflow/core/framework/types.h" 29 #include "tensorflow/core/platform/types.h" 30 #include "tensorflow/core/util/sparse/sparse_tensor.h" 31 32 // This is a set of helper methods that will make it possible to share 33 // tensorflow::Example proto Tensor conversion code inside the ExampleParserOp 34 // OpKernel as well as in external code. 35 namespace tensorflow { 36 37 // "Dense" feature configuration. 38 struct FixedLenFeature { 39 string key; 40 DataType dtype; 41 TensorShape shape; 42 Tensor default_value; 43 string values_output_tensor_name; 44 }; 45 46 // "Sparse" feature configuration. 47 struct VarLenFeature { 48 string key; 49 DataType dtype; 50 string values_output_tensor_name; 51 string indices_output_tensor_name; 52 string shapes_output_tensor_name; 53 }; 54 55 // Given a single tensorflow::Example, with an optional example name 56 // at a particular index within a batch, and dense and sparse feature 57 // configurations from fixed_len_features, var_len_features, this method 58 // updates the dense value tensor and the sparse values temporary vector 59 // of tensors. The indexing of the output vectors correspond 1:1 to the 60 // indexing of the feature configuration vectors. 61 // 62 // The fixed_len_features and var_len_features maps are assume to be 63 // have disjoint key fields from the Feature map in the tensorflow.Example 64 // proto. 65 // 66 // For each sparse feature, the sparse values temporary vector holds a 67 // tensor for each Example. Each tensor is either empty or filled, depending 68 // on if the sparse feature value is set for the Example. This 69 // temporary structure is needed because we need to know the total number 70 // of filled elements in the batch to get the proper final sparse tensor 71 // shapes allocated. After the entire batch is processed, 72 // GetSparseTensorShape can be used to calculate the final shapes and 73 // CopyIntoSparseTensor can be used to copy from the temporary vector 74 // into the final allocated tensors. 75 Status SingleExampleProtoToTensors( 76 const Example& example, const string& name, const int batch_index, 77 const std::vector<FixedLenFeature>& fixed_len_features, 78 const std::vector<VarLenFeature>& var_len_features, 79 std::vector<Tensor*>* dense_values, 80 std::vector<std::vector<Tensor>>* sparse_values_temporary_vector); 81 82 // The shape of the indices and values tensors associated with a SparseTensor 83 // are dependent on the contents of the batch. 84 struct VarLenFeatureBatchShapes { 85 TensorShape indices_shape; 86 TensorShape values_shape; 87 int max_num_features; 88 }; 89 90 // Get the shape of the sparse values and indices tensors for the batch, 91 // given how many of the tensors in the temporary sparse values vector 92 // are actually filled. 93 Status GetSparseTensorShapes(const VarLenFeature& var_len_feature, 94 const std::vector<Tensor>& sparse_values_tmp, 95 const int batch_size, 96 VarLenFeatureBatchShapes* output_shapes); 97 98 // A method to convert a batch of tensorflow::Example protos into output 99 // tensors. This method is useful if there already is a batch of deserialized 100 // Example protos in memory (such as a serving use-case) and we do not wish 101 // to incur an extraneous serialize/deserialize. It is intended 102 // as an outside of OpKernel compatible replacement for the functionality of 103 // ExampleParserOp. In a serving setting, this method could be used to produce 104 // a feed_dict of Tensors that could bypass the ExampleParserOp. 105 // 106 // Note that unlike SingleExampleProtoToTensors, output tensors are 107 // allocated using a provided Allocator within this method. 108 Status BatchExampleProtoToTensors( 109 const std::vector<const Example*>& examples, 110 const std::vector<string>& names, 111 const std::vector<FixedLenFeature>& fixed_len_features, 112 const std::vector<VarLenFeature>& var_len_features, Allocator* allocator, 113 std::vector<Tensor>* output_dense_values_tensor, 114 std::vector<Tensor>* output_sparse_indices_tensor, 115 std::vector<Tensor>* output_sparse_values_tensor, 116 std::vector<Tensor>* output_sparse_shapes_tensor); 117 118 // Check that the given dtype is one that is compatible with 119 // tensorflow::Example protocol buffer feature values. 120 Status CheckValidType(const DataType& dtype); 121 122 // Check that the provided Feature proto message's oneof value 123 // matches that of the provided dtype. 124 Status CheckTypesMatch(const Feature& feature, const DataType& dtype, 125 bool* match); 126 127 // For a single Example, copy a dense feature value into an output 128 // dense value tensor Out at the provided out_index offset. 129 Status FeatureDenseCopy(const std::size_t out_index, const string& name, 130 const string& key, const DataType& dtype, 131 const TensorShape& shape, const Feature& feature, 132 Tensor* out); 133 134 // Copy the value a provided Tensor into an output dense_value tensor Out 135 // at the provided out_index offset. 136 void RowDenseCopy(const std::size_t& out_index, const DataType& dtype, 137 const Tensor& in, Tensor* out); 138 139 // For a single Example, and given sparse feature return a temporary output 140 // Tensor suitable for being collected in the temporary sparse value vector. 141 Tensor FeatureSparseCopy(const std::size_t batch, const string& key, 142 const DataType& dtype, const Feature& feature); 143 144 // Copy a temporary Tensor into the final sparse indices and values 145 // tensor at a given batch index and element offset. This method 146 // assumes that the indices/values Tensors have been properly allocated 147 // for the batch. 148 int64 CopyIntoSparseTensor(const Tensor& in, const int batch, 149 const int64 offset, Tensor* indices, Tensor* values); 150 151 // Parses the attributes passed to ParseExample. 152 // REQUIRES: Init must be called after construction. 153 class ParseExampleAttrs { 154 public: 155 template <typename ContextType> 156 Status Init(ContextType* ctx) { 157 TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types)); 158 TF_RETURN_IF_ERROR(ctx->GetAttr("Ndense", &num_dense)); 159 TF_RETURN_IF_ERROR(ctx->GetAttr("Nsparse", &num_sparse)); 160 TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types)); 161 TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes)); 162 // Temporary check until we start allowing a variable length outer 163 // dimension. 164 for (int i = 0; i < dense_shapes.size(); ++i) { 165 bool shape_ok = true; 166 if (dense_shapes[i].dims() == -1) { 167 shape_ok = false; 168 } else { 169 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 170 if (dense_shapes[i].dim_size(d) == -1) { 171 shape_ok = false; 172 } 173 } 174 } 175 if (!shape_ok) { 176 return errors::InvalidArgument( 177 "dense_shapes[", i, 178 "] has unknown rank or unknown inner dimensions: ", 179 dense_shapes[i].DebugString()); 180 } 181 TensorShape dense_shape; 182 if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) { 183 variable_length.push_back(true); 184 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 185 dense_shape.AddDim(dense_shapes[i].dim_size(d)); 186 } 187 } else { 188 variable_length.push_back(false); 189 dense_shapes[i].AsTensorShape(&dense_shape); 190 } 191 elements_per_stride.push_back(dense_shape.num_elements()); 192 } 193 return FinishInit(); 194 } 195 196 int64 num_sparse; 197 int64 num_dense; 198 std::vector<DataType> sparse_types; 199 std::vector<DataType> dense_types; 200 std::vector<PartialTensorShape> dense_shapes; 201 std::vector<bool> variable_length; 202 std::vector<std::size_t> elements_per_stride; 203 204 private: 205 Status FinishInit(); // for context-independent parts of Init. 206 }; 207 208 // Parses the attributes passed to ParseSingleExample. 209 // REQUIRES: Init must be called after construction. 210 class ParseSingleExampleAttrs { 211 public: 212 template <typename ContextType> 213 Status Init(ContextType* ctx) { 214 TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_keys", &sparse_keys)); 215 TF_RETURN_IF_ERROR(ctx->GetAttr("sparse_types", &sparse_types)); 216 TF_RETURN_IF_ERROR(ctx->GetAttr("dense_keys", &dense_keys)); 217 TF_RETURN_IF_ERROR(ctx->GetAttr("Tdense", &dense_types)); 218 TF_RETURN_IF_ERROR(ctx->GetAttr("dense_shapes", &dense_shapes)); 219 220 int num_sparse; 221 TF_RETURN_IF_ERROR(ctx->GetAttr("num_sparse", &num_sparse)); 222 if (num_sparse != sparse_keys.size() || num_sparse != sparse_types.size()) { 223 return errors::InvalidArgument( 224 "num_sparse (", num_sparse, ") must match the size of sparse_keys (", 225 sparse_keys.size(), ") and sparse_types (", sparse_types.size(), ")"); 226 } 227 228 // Temporary check until we start allowing a variable length outer 229 // dimension. 230 for (int i = 0; i < dense_shapes.size(); ++i) { 231 bool shape_ok = true; 232 if (dense_shapes[i].dims() == -1) { 233 shape_ok = false; 234 } else { 235 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 236 if (dense_shapes[i].dim_size(d) == -1) { 237 shape_ok = false; 238 } 239 } 240 } 241 if (!shape_ok) { 242 return errors::InvalidArgument( 243 "dense_shapes[", i, 244 "] has unknown rank or unknown inner dimensions: ", 245 dense_shapes[i].DebugString()); 246 } 247 TensorShape dense_shape; 248 if (dense_shapes[i].dims() > 0 && dense_shapes[i].dim_size(0) == -1) { 249 variable_length.push_back(true); 250 for (int d = 1; d < dense_shapes[i].dims(); ++d) { 251 dense_shape.AddDim(dense_shapes[i].dim_size(d)); 252 } 253 } else { 254 variable_length.push_back(false); 255 dense_shapes[i].AsTensorShape(&dense_shape); 256 } 257 elements_per_stride.push_back(dense_shape.num_elements()); 258 } 259 return FinishInit(); 260 } 261 262 std::vector<string> sparse_keys; 263 std::vector<DataType> sparse_types; 264 std::vector<string> dense_keys; 265 std::vector<DataType> dense_types; 266 std::vector<PartialTensorShape> dense_shapes; 267 std::vector<bool> variable_length; 268 std::vector<std::size_t> elements_per_stride; 269 270 private: 271 Status FinishInit(); // for context-independent parts of Init. 272 }; 273 274 // Parses the attributes passed to ParseSingleSequenceExample. 275 // REQUIRES: Init must be called after construction. 276 class ParseSingleSequenceExampleAttrs { 277 public: 278 template <typename ContextType> 279 Status Init(ContextType* ctx) { 280 TF_RETURN_IF_ERROR( 281 ctx->GetAttr("context_sparse_types", &context_sparse_types)); 282 TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_dense", &num_context_dense)); 283 TF_RETURN_IF_ERROR( 284 ctx->GetAttr("Nfeature_list_dense", &num_feature_list_dense)); 285 TF_RETURN_IF_ERROR(ctx->GetAttr("Ncontext_sparse", &num_context_sparse)); 286 TF_RETURN_IF_ERROR(ctx->GetAttr("Tcontext_dense", &context_dense_types)); 287 TF_RETURN_IF_ERROR( 288 ctx->GetAttr("feature_list_sparse_types", &feature_list_sparse_types)); 289 TF_RETURN_IF_ERROR( 290 ctx->GetAttr("feature_list_dense_types", &feature_list_dense_types)); 291 TF_RETURN_IF_ERROR( 292 ctx->GetAttr("Nfeature_list_sparse", &num_feature_list_sparse)); 293 TF_RETURN_IF_ERROR( 294 ctx->GetAttr("context_dense_shapes", &context_dense_shapes)); 295 TF_RETURN_IF_ERROR( 296 ctx->GetAttr("feature_list_dense_shapes", &feature_list_dense_shapes)); 297 return FinishInit(); 298 } 299 300 int64 num_context_sparse; 301 int64 num_context_dense; 302 int64 num_feature_list_sparse; 303 int64 num_feature_list_dense; 304 std::vector<DataType> context_sparse_types; 305 std::vector<DataType> context_dense_types; 306 std::vector<TensorShape> context_dense_shapes; 307 std::vector<DataType> feature_list_sparse_types; 308 std::vector<DataType> feature_list_dense_types; 309 std::vector<TensorShape> feature_list_dense_shapes; 310 311 private: 312 Status FinishInit(); // for context-independent parts of Init. 313 }; 314 315 } // namespace tensorflow 316 317 #endif // TENSORFLOW_CORE_UTIL_EXAMPLE_PROTO_HELPER_H_ 318