Home | History | Annotate | Download | only in example
      1 // Protocol messages for describing input data Examples for machine learning
      2 // model training or inference.
      3 syntax = "proto3";
      4 
      5 import "tensorflow/core/example/feature.proto";
      6 option cc_enable_arenas = true;
      7 option java_outer_classname = "ExampleProtos";
      8 option java_multiple_files = true;
      9 option java_package = "org.tensorflow.example";
     10 
     11 package tensorflow;
     12 
     13 // An Example is a mostly-normalized data format for storing data for
     14 // training and inference.  It contains a key-value store (features); where
     15 // each key (string) maps to a Feature message (which is oneof packed BytesList,
     16 // FloatList, or Int64List).  This flexible and compact format allows the
     17 // storage of large amounts of typed data, but requires that the data shape
     18 // and use be determined by the configuration files and parsers that are used to
     19 // read and write this format.  That is, the Example is mostly *not* a
     20 // self-describing format.  In TensorFlow, Examples are read in row-major
     21 // format, so any configuration that describes data with rank-2 or above
     22 // should keep this in mind.  For example, to store an M x N matrix of Bytes,
     23 // the BytesList must contain M*N bytes, with M rows of N contiguous values
     24 // each.  That is, the BytesList value must store the matrix as:
     25 //     .... row 0 .... .... row 1 .... // ...........  // ... row M-1 ....
     26 //
     27 // An Example for a movie recommendation application:
     28 //   features {
     29 //     feature {
     30 //       key: "age"
     31 //       value { float_list {
     32 //         value: 29.0
     33 //       }}
     34 //     }
     35 //     feature {
     36 //       key: "movie"
     37 //       value { bytes_list {
     38 //         value: "The Shawshank Redemption"
     39 //         value: "Fight Club"
     40 //       }}
     41 //     }
     42 //     feature {
     43 //       key: "movie_ratings"
     44 //       value { float_list {
     45 //         value: 9.0
     46 //         value: 9.7
     47 //       }}
     48 //     }
     49 //     feature {
     50 //       key: "suggestion"
     51 //       value { bytes_list {
     52 //         value: "Inception"
     53 //       }}
     54 //     }
     55 //     # Note that this feature exists to be used as a label in training.
     56 //     # E.g., if training a logistic regression model to predict purchase
     57 //     # probability in our learning tool we would set the label feature to
     58 //     # "suggestion_purchased".
     59 //     feature {
     60 //       key: "suggestion_purchased"
     61 //       value { float_list {
     62 //         value: 1.0
     63 //       }}
     64 //     }
     65 //     # Similar to "suggestion_purchased" above this feature exists to be used
     66 //     # as a label in training.
     67 //     # E.g., if training a linear regression model to predict purchase
     68 //     # price in our learning tool we would set the label feature to
     69 //     # "purchase_price".
     70 //     feature {
     71 //       key: "purchase_price"
     72 //       value { float_list {
     73 //         value: 9.99
     74 //       }}
     75 //     }
     76 //  }
     77 //
     78 // A conformant Example data set obeys the following conventions:
     79 //   - If a Feature K exists in one example with data type T, it must be of
     80 //       type T in all other examples when present. It may be omitted.
     81 //   - The number of instances of Feature K list data may vary across examples,
     82 //       depending on the requirements of the model.
     83 //   - If a Feature K doesn't exist in an example, a K-specific default will be
     84 //       used, if configured.
     85 //   - If a Feature K exists in an example but contains no items, the intent
     86 //       is considered to be an empty tensor and no default will be used.
     87 
     88 message Example {
     89   Features features = 1;
     90 };
     91 
     92 // A SequenceExample is an Example representing one or more sequences, and
     93 // some context.  The context contains features which apply to the entire
     94 // example. The feature_lists contain a key, value map where each key is
     95 // associated with a repeated set of Features (a FeatureList).
     96 // A FeatureList thus represents the values of a feature identified by its key
     97 // over time / frames.
     98 //
     99 // Below is a SequenceExample for a movie recommendation application recording a
    100 // sequence of ratings by a user. The time-independent features ("locale",
    101 // "age", "favorites") describing the user are part of the context. The sequence
    102 // of movies the user rated are part of the feature_lists. For each movie in the
    103 // sequence we have information on its name and actors and the user's rating.
    104 // This information is recorded in three separate feature_list(s).
    105 // In the example below there are only two movies. All three feature_list(s),
    106 // namely "movie_ratings", "movie_names", and "actors" have a feature value for
    107 // both movies. Note, that "actors" is itself a bytes_list with multiple
    108 // strings per movie.
    109 //
    110 // context: {
    111 //   feature: {
    112 //     key  : "locale"
    113 //     value: {
    114 //       bytes_list: {
    115 //         value: [ "pt_BR" ]
    116 //       }
    117 //     }
    118 //   }
    119 //   feature: {
    120 //     key  : "age"
    121 //     value: {
    122 //       float_list: {
    123 //         value: [ 19.0 ]
    124 //       }
    125 //     }
    126 //   }
    127 //   feature: {
    128 //     key  : "favorites"
    129 //     value: {
    130 //       bytes_list: {
    131 //         value: [ "Majesty Rose", "Savannah Outen", "One Direction" ]
    132 //       }
    133 //     }
    134 //   }
    135 // }
    136 // feature_lists: {
    137 //   feature_list: {
    138 //     key  : "movie_ratings"
    139 //     value: {
    140 //       feature: {
    141 //         float_list: {
    142 //           value: [ 4.5 ]
    143 //         }
    144 //       }
    145 //       feature: {
    146 //         float_list: {
    147 //           value: [ 5.0 ]
    148 //         }
    149 //       }
    150 //     }
    151 //   }
    152 //   feature_list: {
    153 //     key  : "movie_names"
    154 //     value: {
    155 //       feature: {
    156 //         bytes_list: {
    157 //           value: [ "The Shawshank Redemption" ]
    158 //         }
    159 //       }
    160 //       feature: {
    161 //         bytes_list: {
    162 //           value: [ "Fight Club" ]
    163 //         }
    164 //       }
    165 //     }
    166 //   }
    167 //   feature_list: {
    168 //     key  : "actors"
    169 //     value: {
    170 //       feature: {
    171 //         bytes_list: {
    172 //           value: [ "Tim Robbins", "Morgan Freeman" ]
    173 //         }
    174 //       }
    175 //       feature: {
    176 //         bytes_list: {
    177 //           value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ]
    178 //         }
    179 //       }
    180 //     }
    181 //   }
    182 // }
    183 //
    184 // A conformant SequenceExample data set obeys the following conventions:
    185 //
    186 // Context:
    187 //   - All conformant context features K must obey the same conventions as
    188 //     a conformant Example's features (see above).
    189 // Feature lists:
    190 //   - A FeatureList L may be missing in an example; it is up to the
    191 //     parser configuration to determine if this is allowed or considered
    192 //     an empty list (zero length).
    193 //   - If a FeatureList L exists, it may be empty (zero length).
    194 //   - If a FeatureList L is non-empty, all features within the FeatureList
    195 //     must have the same data type T. Even across SequenceExamples, the type T
    196 //     of the FeatureList identified by the same key must be the same. An entry
    197 //     without any values may serve as an empty feature.
    198 //   - If a FeatureList L is non-empty, it is up to the parser configuration
    199 //     to determine if all features within the FeatureList must
    200 //     have the same size.  The same holds for this FeatureList across multiple
    201 //     examples.
    202 //
    203 // Examples of conformant and non-conformant examples' FeatureLists:
    204 //
    205 // Conformant FeatureLists:
    206 //    feature_lists: { feature_list: {
    207 //      key: "movie_ratings"
    208 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    209 //               feature: { float_list: { value: [ 5.0 ] } } }
    210 //    } }
    211 //
    212 // Non-conformant FeatureLists (mismatched types):
    213 //    feature_lists: { feature_list: {
    214 //      key: "movie_ratings"
    215 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    216 //               feature: { int64_list: { value: [ 5 ] } } }
    217 //    } }
    218 //
    219 // Conditionally conformant FeatureLists, the parser configuration determines
    220 // if the feature sizes must match:
    221 //    feature_lists: { feature_list: {
    222 //      key: "movie_ratings"
    223 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    224 //               feature: { float_list: { value: [ 5.0, 6.0 ] } } }
    225 //    } }
    226 //
    227 // Conformant pair of SequenceExample
    228 //    feature_lists: { feature_list: {
    229 //      key: "movie_ratings"
    230 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    231 //               feature: { float_list: { value: [ 5.0 ] } } }
    232 //    } }
    233 // and:
    234 //    feature_lists: { feature_list: {
    235 //      key: "movie_ratings"
    236 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    237 //               feature: { float_list: { value: [ 5.0 ] } }
    238 //               feature: { float_list: { value: [ 2.0 ] } } }
    239 //    } }
    240 //
    241 // Conformant pair of SequenceExample
    242 //    feature_lists: { feature_list: {
    243 //      key: "movie_ratings"
    244 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    245 //               feature: { float_list: { value: [ 5.0 ] } } }
    246 //    } }
    247 // and:
    248 //    feature_lists: { feature_list: {
    249 //      key: "movie_ratings"
    250 //      value: { }
    251 //    } }
    252 //
    253 // Conditionally conformant pair of SequenceExample, the parser configuration
    254 // determines if the second feature_lists is consistent (zero-length) or
    255 // invalid (missing "movie_ratings"):
    256 //    feature_lists: { feature_list: {
    257 //      key: "movie_ratings"
    258 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    259 //               feature: { float_list: { value: [ 5.0 ] } } }
    260 //    } }
    261 // and:
    262 //    feature_lists: { }
    263 //
    264 // Non-conformant pair of SequenceExample (mismatched types)
    265 //    feature_lists: { feature_list: {
    266 //      key: "movie_ratings"
    267 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    268 //               feature: { float_list: { value: [ 5.0 ] } } }
    269 //    } }
    270 // and:
    271 //    feature_lists: { feature_list: {
    272 //      key: "movie_ratings"
    273 //      value: { feature: { int64_list: { value: [ 4 ] } }
    274 //               feature: { int64_list: { value: [ 5 ] } }
    275 //               feature: { int64_list: { value: [ 2 ] } } }
    276 //    } }
    277 //
    278 // Conditionally conformant pair of SequenceExample; the parser configuration
    279 // determines if the feature sizes must match:
    280 //    feature_lists: { feature_list: {
    281 //      key: "movie_ratings"
    282 //      value: { feature: { float_list: { value: [ 4.5 ] } }
    283 //               feature: { float_list: { value: [ 5.0 ] } } }
    284 //    } }
    285 // and:
    286 //    feature_lists: { feature_list: {
    287 //      key: "movie_ratings"
    288 //      value: { feature: { float_list: { value: [ 4.0 ] } }
    289 //               feature: { float_list: { value: [ 5.0, 3.0 ] } }
    290 //    } }
    291 
    292 message SequenceExample {
    293   Features context = 1;
    294   FeatureLists feature_lists = 2;
    295 };
    296