Home | History | Annotate | Download | only in example
      1 /* Copyright 2016 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 // A set of lightweight wrappers which simplify access to Feature protos.
     17 //
     18 // TensorFlow Example proto uses associative maps on top of oneof fields.
     19 // SequenceExample proto uses associative map of FeatureList.
     20 // So accessing feature values is not very convenient.
     21 //
     22 // For example, to read a first value of integer feature "tag":
     23 //   int id = example.features().feature().at("tag").int64_list().value(0)
     24 //
     25 // to add a value:
     26 //   auto features = example->mutable_features();
     27 //   (*features->mutable_feature())["tag"].mutable_int64_list()->add_value(id)
     28 //
     29 // For float features you have to use float_list, for string - bytes_list.
     30 //
     31 // To do the same with this library:
     32 //   int id = GetFeatureValues<int64>("tag", example).Get(0);
     33 //   GetFeatureValues<int64>("tag", &example)->Add(id);
     34 //
     35 // Modification of bytes features is slightly different:
     36 //   auto tag = GetFeatureValues<string>("tag", &example);
     37 //   *tag->Add() = "lorem ipsum";
     38 //
     39 // To copy multiple values into a feature:
     40 //   AppendFeatureValues({1,2,3}, "tag", &example);
     41 //
     42 // GetFeatureValues gives you access to underlying data - RepeatedField object
     43 // (RepeatedPtrField for byte list). So refer to its documentation of
     44 // RepeatedField for full list of supported methods.
     45 //
     46 // NOTE: Due to the nature of oneof proto fields setting a feature of one type
     47 // automatically clears all values stored as another type with the same feature
     48 // key.
     49 //
     50 // This library also has tools to work with SequenceExample protos.
     51 //
     52 // To get a value from SequenceExample.context:
     53 //   int id = GetFeatureValues<protobuf_int64>("tag", se.context()).Get(0);
     54 // To add a value to the context:
     55 //   GetFeatureValues<protobuf_int64>("tag", se.mutable_context())->Add(42);
     56 //
     57 // To add values to feature_lists:
     58 //   AppendFeatureValues({4.0},
     59 //                       GetFeatureList("images", &se)->Add());
     60 //   AppendFeatureValues({5.0, 3.0},
     61 //                       GetFeatureList("images", &se)->Add());
     62 // This will create a feature list keyed as "images" with two features:
     63 //   feature_lists {
     64 //     feature_list {
     65 //       key: "images"
     66 //       value {
     67 //         feature { float_list { value: [4.0] } }
     68 //         feature { float_list { value: [5.0, 3.0] } }
     69 //       }
     70 //     } }
     71 //
     72 // Functions exposed by this library:
     73 //   HasFeature<[FeatureType]>(key, proto) -> bool
     74 //     Returns true if a feature with the specified key, and optionally
     75 //     FeatureType, belongs to the Features or Example proto.
     76 //   HasFeatureList(key, sequence_example) -> bool
     77 //     Returns true if SequenceExample has a feature_list with the key.
     78 //   GetFeatureValues<FeatureType>(key, proto) -> RepeatedField<FeatureType>
     79 //     Returns values for the specified key and the FeatureType.
     80 //     Supported types for the proto: Example, Features.
     81 //   GetFeatureList(key, sequence_example) -> RepeatedPtrField<Feature>
     82 //     Returns Feature protos associated with a key.
     83 //   AppendFeatureValues(begin, end, feature)
     84 //   AppendFeatureValues(container or initializer_list, feature)
     85 //     Copies values into a Feature.
     86 //   AppendFeatureValues(begin, end, key, proto)
     87 //   AppendFeatureValues(container or initializer_list, key, proto)
     88 //     Copies values into Features and Example protos with the specified key.
     89 //
     90 // Auxiliary functions, it is unlikely you'll need to use them directly:
     91 //   GetFeatures(proto) -> Features
     92 //     A convenience function to get Features proto.
     93 //     Supported types for the proto: Example, Features.
     94 //   GetFeature(key, proto) -> Feature*
     95 //     Returns a Feature proto for the specified key, creates a new if
     96 //     necessary. Supported types for the proto: Example, Features.
     97 //   GetFeatureValues<FeatureType>(feature) -> RepeatedField<FeatureType>
     98 //     Returns values of the feature for the FeatureType.
     99 
    100 #ifndef TENSORFLOW_EXAMPLE_FEATURE_H_
    101 #define TENSORFLOW_EXAMPLE_FEATURE_H_
    102 
    103 #include <iterator>
    104 #include <type_traits>
    105 
    106 #include "tensorflow/core/example/example.pb.h"
    107 #include "tensorflow/core/example/feature.pb.h"
    108 #include "tensorflow/core/lib/core/stringpiece.h"
    109 #include "tensorflow/core/platform/protobuf.h"
    110 #include "tensorflow/core/platform/types.h"
    111 
    112 namespace tensorflow {
    113 
    114 namespace internal {
    115 
    116 // DEPRECATED: Use GetFeature instead.
    117 // TODO(gorban): Update all clients in a followup CL.
    118 // Returns a reference to a feature corresponding to the name.
    119 // Note: it will create a new Feature if it is missing in the example.
    120 Feature& ExampleFeature(const string& name, Example* example);
    121 
    122 // Specializations of RepeatedFieldTrait define a type of RepeatedField
    123 // corresponding to a selected feature type.
    124 template <typename FeatureType>
    125 struct RepeatedFieldTrait;
    126 
    127 template <>
    128 struct RepeatedFieldTrait<protobuf_int64> {
    129   using Type = protobuf::RepeatedField<protobuf_int64>;
    130 };
    131 
    132 template <>
    133 struct RepeatedFieldTrait<float> {
    134   using Type = protobuf::RepeatedField<float>;
    135 };
    136 
    137 template <>
    138 struct RepeatedFieldTrait<string> {
    139   using Type = protobuf::RepeatedPtrField<string>;
    140 };
    141 
    142 // Specializations of FeatureTrait define a type of feature corresponding to a
    143 // selected value type.
    144 template <typename ValueType, class Enable = void>
    145 struct FeatureTrait;
    146 
    147 template <typename ValueType>
    148 struct FeatureTrait<ValueType, typename std::enable_if<
    149                                    std::is_integral<ValueType>::value>::type> {
    150   using Type = protobuf_int64;
    151 };
    152 
    153 template <typename ValueType>
    154 struct FeatureTrait<
    155     ValueType,
    156     typename std::enable_if<std::is_floating_point<ValueType>::value>::type> {
    157   using Type = float;
    158 };
    159 
    160 template <typename T>
    161 struct is_string
    162     : public std::integral_constant<
    163           bool,
    164           std::is_same<char*, typename std::decay<T>::type>::value ||
    165               std::is_same<const char*, typename std::decay<T>::type>::value> {
    166 };
    167 
    168 template <>
    169 struct is_string<string> : std::true_type {};
    170 
    171 template <>
    172 struct is_string<::tensorflow::StringPiece> : std::true_type {};
    173 
    174 template <typename ValueType>
    175 struct FeatureTrait<
    176     ValueType, typename std::enable_if<is_string<ValueType>::value>::type> {
    177   using Type = string;
    178 };
    179 
    180 }  //  namespace internal
    181 
    182 // Returns true if sequence_example has a feature_list with the specified key.
    183 bool HasFeatureList(const string& key, const SequenceExample& sequence_example);
    184 
    185 // A family of template functions to return mutable Features proto from a
    186 // container proto. Supported ProtoTypes: Example, Features.
    187 template <typename ProtoType>
    188 Features* GetFeatures(ProtoType* proto);
    189 
    190 template <typename ProtoType>
    191 const Features& GetFeatures(const ProtoType& proto);
    192 
    193 // Base declaration of a family of template functions to return a read only
    194 // repeated field of feature values.
    195 template <typename FeatureType>
    196 const typename internal::RepeatedFieldTrait<FeatureType>::Type&
    197 GetFeatureValues(const Feature& feature);
    198 
    199 // Returns a read only repeated field corresponding to a feature with the
    200 // specified name and FeatureType. Supported ProtoTypes: Example, Features.
    201 template <typename FeatureType, typename ProtoType>
    202 const typename internal::RepeatedFieldTrait<FeatureType>::Type&
    203 GetFeatureValues(const string& key, const ProtoType& proto) {
    204   return GetFeatureValues<FeatureType>(GetFeatures(proto).feature().at(key));
    205 }
    206 
    207 // Returns a mutable repeated field of a feature values.
    208 template <typename FeatureType>
    209 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues(
    210     Feature* feature);
    211 
    212 // Returns a mutable repeated field corresponding to a feature with the
    213 // specified name and FeatureType. Supported ProtoTypes: Example, Features.
    214 template <typename FeatureType, typename ProtoType>
    215 typename internal::RepeatedFieldTrait<FeatureType>::Type* GetFeatureValues(
    216     const string& key, ProtoType* proto) {
    217   ::tensorflow::Feature& feature =
    218       (*GetFeatures(proto)->mutable_feature())[key];
    219   return GetFeatureValues<FeatureType>(&feature);
    220 }
    221 
    222 // Returns a Feature proto for the specified key, creates a new if necessary.
    223 // Supported types for the proto: Example, Features.
    224 template <typename ProtoType>
    225 Feature* GetFeature(const string& key, ProtoType* proto) {
    226   return &(*GetFeatures(proto)->mutable_feature())[key];
    227 }
    228 
    229 // Returns a repeated field with features corresponding to a feature_list key.
    230 const protobuf::RepeatedPtrField<Feature>& GetFeatureList(
    231     const string& key, const SequenceExample& sequence_example);
    232 
    233 // Returns a mutable repeated field with features corresponding to a
    234 // feature_list key. It will create a new FeatureList if necessary.
    235 protobuf::RepeatedPtrField<Feature>* GetFeatureList(
    236     const string& feature_list_key, SequenceExample* sequence_example);
    237 
    238 template <typename IteratorType>
    239 void AppendFeatureValues(IteratorType first, IteratorType last,
    240                          Feature* feature) {
    241   using FeatureType = typename internal::FeatureTrait<
    242       typename std::iterator_traits<IteratorType>::value_type>::Type;
    243   std::copy(first, last,
    244             protobuf::RepeatedFieldBackInserter(
    245                 GetFeatureValues<FeatureType>(feature)));
    246 }
    247 
    248 template <typename ValueType>
    249 void AppendFeatureValues(std::initializer_list<ValueType> container,
    250                          Feature* feature) {
    251   AppendFeatureValues(container.begin(), container.end(), feature);
    252 }
    253 
    254 template <typename ContainerType>
    255 void AppendFeatureValues(const ContainerType& container, Feature* feature) {
    256   using IteratorType = typename ContainerType::const_iterator;
    257   AppendFeatureValues<IteratorType>(container.begin(), container.end(),
    258                                     feature);
    259 }
    260 
    261 // Copies elements from the range, defined by [first, last) into the feature
    262 // obtainable from the (proto, key) combination.
    263 template <typename IteratorType, typename ProtoType>
    264 void AppendFeatureValues(IteratorType first, IteratorType last,
    265                          const string& key, ProtoType* proto) {
    266   AppendFeatureValues(first, last, GetFeature(key, GetFeatures(proto)));
    267 }
    268 
    269 // Copies all elements from the container into a feature.
    270 template <typename ContainerType, typename ProtoType>
    271 void AppendFeatureValues(const ContainerType& container, const string& key,
    272                          ProtoType* proto) {
    273   using IteratorType = typename ContainerType::const_iterator;
    274   AppendFeatureValues<IteratorType>(container.begin(), container.end(), key,
    275                                     proto);
    276 }
    277 
    278 // Copies all elements from the initializer list into a Feature contained by
    279 // Features or Example proto.
    280 template <typename ValueType, typename ProtoType>
    281 void AppendFeatureValues(std::initializer_list<ValueType> container,
    282                          const string& key, ProtoType* proto) {
    283   using IteratorType =
    284       typename std::initializer_list<ValueType>::const_iterator;
    285   AppendFeatureValues<IteratorType>(container.begin(), container.end(), key,
    286                                     proto);
    287 }
    288 
    289 // Returns true if a feature with the specified key belongs to the Features.
    290 // The template parameter pack accepts zero or one template argument - which
    291 // is FeatureType. If the FeatureType not specified (zero template arguments)
    292 // the function will not check the feature type. Otherwise it will return false
    293 // if the feature has a wrong type.
    294 template <typename... FeatureType>
    295 bool HasFeature(const string& key, const Features& features);
    296 
    297 // Returns true if a feature with the specified key belongs to the Example.
    298 // Doesn't check feature type if used without FeatureType, otherwise the
    299 // specialized versions return false if the feature has a wrong type.
    300 template <typename... FeatureType>
    301 bool HasFeature(const string& key, const Example& example) {
    302   return HasFeature<FeatureType...>(key, GetFeatures(example));
    303 };
    304 
    305 // DEPRECATED: use HasFeature instead.
    306 // TODO(gorban): update all clients in a followup CL.
    307 template <typename... FeatureType>
    308 bool ExampleHasFeature(const string& key, const Example& example) {
    309   return HasFeature<FeatureType...>(key, example);
    310 }
    311 
    312 }  // namespace tensorflow
    313 #endif  // TENSORFLOW_EXAMPLE_FEATURE_H_
    314