1 // Protocol messages for describing input data Examples for machine learning 2 // model training or inference. 3 syntax = "proto3"; 4 5 import "tensorflow/core/example/feature.proto"; 6 option cc_enable_arenas = true; 7 option java_outer_classname = "ExampleProtos"; 8 option java_multiple_files = true; 9 option java_package = "org.tensorflow.example"; 10 11 package tensorflow; 12 13 // An Example is a mostly-normalized data format for storing data for 14 // training and inference. It contains a key-value store (features); where 15 // each key (string) maps to a Feature message (which is oneof packed BytesList, 16 // FloatList, or Int64List). This flexible and compact format allows the 17 // storage of large amounts of typed data, but requires that the data shape 18 // and use be determined by the configuration files and parsers that are used to 19 // read and write this format. That is, the Example is mostly *not* a 20 // self-describing format. In TensorFlow, Examples are read in row-major 21 // format, so any configuration that describes data with rank-2 or above 22 // should keep this in mind. For example, to store an M x N matrix of Bytes, 23 // the BytesList must contain M*N bytes, with M rows of N contiguous values 24 // each. That is, the BytesList value must store the matrix as: 25 // .... row 0 .... .... row 1 .... // ........... // ... row M-1 .... 26 // 27 // An Example for a movie recommendation application: 28 // features { 29 // feature { 30 // key: "age" 31 // value { float_list { 32 // value: 29.0 33 // }} 34 // } 35 // feature { 36 // key: "movie" 37 // value { bytes_list { 38 // value: "The Shawshank Redemption" 39 // value: "Fight Club" 40 // }} 41 // } 42 // feature { 43 // key: "movie_ratings" 44 // value { float_list { 45 // value: 9.0 46 // value: 9.7 47 // }} 48 // } 49 // feature { 50 // key: "suggestion" 51 // value { bytes_list { 52 // value: "Inception" 53 // }} 54 // } 55 // # Note that this feature exists to be used as a label in training. 56 // # E.g., if training a logistic regression model to predict purchase 57 // # probability in our learning tool we would set the label feature to 58 // # "suggestion_purchased". 59 // feature { 60 // key: "suggestion_purchased" 61 // value { float_list { 62 // value: 1.0 63 // }} 64 // } 65 // # Similar to "suggestion_purchased" above this feature exists to be used 66 // # as a label in training. 67 // # E.g., if training a linear regression model to predict purchase 68 // # price in our learning tool we would set the label feature to 69 // # "purchase_price". 70 // feature { 71 // key: "purchase_price" 72 // value { float_list { 73 // value: 9.99 74 // }} 75 // } 76 // } 77 // 78 // A conformant Example data set obeys the following conventions: 79 // - If a Feature K exists in one example with data type T, it must be of 80 // type T in all other examples when present. It may be omitted. 81 // - The number of instances of Feature K list data may vary across examples, 82 // depending on the requirements of the model. 83 // - If a Feature K doesn't exist in an example, a K-specific default will be 84 // used, if configured. 85 // - If a Feature K exists in an example but contains no items, the intent 86 // is considered to be an empty tensor and no default will be used. 87 88 message Example { 89 Features features = 1; 90 }; 91 92 // A SequenceExample is an Example representing one or more sequences, and 93 // some context. The context contains features which apply to the entire 94 // example. The feature_lists contain a key, value map where each key is 95 // associated with a repeated set of Features (a FeatureList). 96 // A FeatureList thus represents the values of a feature identified by its key 97 // over time / frames. 98 // 99 // Below is a SequenceExample for a movie recommendation application recording a 100 // sequence of ratings by a user. The time-independent features ("locale", 101 // "age", "favorites") describing the user are part of the context. The sequence 102 // of movies the user rated are part of the feature_lists. For each movie in the 103 // sequence we have information on its name and actors and the user's rating. 104 // This information is recorded in three separate feature_list(s). 105 // In the example below there are only two movies. All three feature_list(s), 106 // namely "movie_ratings", "movie_names", and "actors" have a feature value for 107 // both movies. Note, that "actors" is itself a bytes_list with multiple 108 // strings per movie. 109 // 110 // context: { 111 // feature: { 112 // key : "locale" 113 // value: { 114 // bytes_list: { 115 // value: [ "pt_BR" ] 116 // } 117 // } 118 // } 119 // feature: { 120 // key : "age" 121 // value: { 122 // float_list: { 123 // value: [ 19.0 ] 124 // } 125 // } 126 // } 127 // feature: { 128 // key : "favorites" 129 // value: { 130 // bytes_list: { 131 // value: [ "Majesty Rose", "Savannah Outen", "One Direction" ] 132 // } 133 // } 134 // } 135 // } 136 // feature_lists: { 137 // feature_list: { 138 // key : "movie_ratings" 139 // value: { 140 // feature: { 141 // float_list: { 142 // value: [ 4.5 ] 143 // } 144 // } 145 // feature: { 146 // float_list: { 147 // value: [ 5.0 ] 148 // } 149 // } 150 // } 151 // } 152 // feature_list: { 153 // key : "movie_names" 154 // value: { 155 // feature: { 156 // bytes_list: { 157 // value: [ "The Shawshank Redemption" ] 158 // } 159 // } 160 // feature: { 161 // bytes_list: { 162 // value: [ "Fight Club" ] 163 // } 164 // } 165 // } 166 // } 167 // feature_list: { 168 // key : "actors" 169 // value: { 170 // feature: { 171 // bytes_list: { 172 // value: [ "Tim Robbins", "Morgan Freeman" ] 173 // } 174 // } 175 // feature: { 176 // bytes_list: { 177 // value: [ "Brad Pitt", "Edward Norton", "Helena Bonham Carter" ] 178 // } 179 // } 180 // } 181 // } 182 // } 183 // 184 // A conformant SequenceExample data set obeys the following conventions: 185 // 186 // Context: 187 // - All conformant context features K must obey the same conventions as 188 // a conformant Example's features (see above). 189 // Feature lists: 190 // - A FeatureList L may be missing in an example; it is up to the 191 // parser configuration to determine if this is allowed or considered 192 // an empty list (zero length). 193 // - If a FeatureList L exists, it may be empty (zero length). 194 // - If a FeatureList L is non-empty, all features within the FeatureList 195 // must have the same data type T. Even across SequenceExamples, the type T 196 // of the FeatureList identified by the same key must be the same. An entry 197 // without any values may serve as an empty feature. 198 // - If a FeatureList L is non-empty, it is up to the parser configuration 199 // to determine if all features within the FeatureList must 200 // have the same size. The same holds for this FeatureList across multiple 201 // examples. 202 // 203 // Examples of conformant and non-conformant examples' FeatureLists: 204 // 205 // Conformant FeatureLists: 206 // feature_lists: { feature_list: { 207 // key: "movie_ratings" 208 // value: { feature: { float_list: { value: [ 4.5 ] } } 209 // feature: { float_list: { value: [ 5.0 ] } } } 210 // } } 211 // 212 // Non-conformant FeatureLists (mismatched types): 213 // feature_lists: { feature_list: { 214 // key: "movie_ratings" 215 // value: { feature: { float_list: { value: [ 4.5 ] } } 216 // feature: { int64_list: { value: [ 5 ] } } } 217 // } } 218 // 219 // Conditionally conformant FeatureLists, the parser configuration determines 220 // if the feature sizes must match: 221 // feature_lists: { feature_list: { 222 // key: "movie_ratings" 223 // value: { feature: { float_list: { value: [ 4.5 ] } } 224 // feature: { float_list: { value: [ 5.0, 6.0 ] } } } 225 // } } 226 // 227 // Conformant pair of SequenceExample 228 // feature_lists: { feature_list: { 229 // key: "movie_ratings" 230 // value: { feature: { float_list: { value: [ 4.5 ] } } 231 // feature: { float_list: { value: [ 5.0 ] } } } 232 // } } 233 // and: 234 // feature_lists: { feature_list: { 235 // key: "movie_ratings" 236 // value: { feature: { float_list: { value: [ 4.5 ] } } 237 // feature: { float_list: { value: [ 5.0 ] } } 238 // feature: { float_list: { value: [ 2.0 ] } } } 239 // } } 240 // 241 // Conformant pair of SequenceExample 242 // feature_lists: { feature_list: { 243 // key: "movie_ratings" 244 // value: { feature: { float_list: { value: [ 4.5 ] } } 245 // feature: { float_list: { value: [ 5.0 ] } } } 246 // } } 247 // and: 248 // feature_lists: { feature_list: { 249 // key: "movie_ratings" 250 // value: { } 251 // } } 252 // 253 // Conditionally conformant pair of SequenceExample, the parser configuration 254 // determines if the second feature_lists is consistent (zero-length) or 255 // invalid (missing "movie_ratings"): 256 // feature_lists: { feature_list: { 257 // key: "movie_ratings" 258 // value: { feature: { float_list: { value: [ 4.5 ] } } 259 // feature: { float_list: { value: [ 5.0 ] } } } 260 // } } 261 // and: 262 // feature_lists: { } 263 // 264 // Non-conformant pair of SequenceExample (mismatched types) 265 // feature_lists: { feature_list: { 266 // key: "movie_ratings" 267 // value: { feature: { float_list: { value: [ 4.5 ] } } 268 // feature: { float_list: { value: [ 5.0 ] } } } 269 // } } 270 // and: 271 // feature_lists: { feature_list: { 272 // key: "movie_ratings" 273 // value: { feature: { int64_list: { value: [ 4 ] } } 274 // feature: { int64_list: { value: [ 5 ] } } 275 // feature: { int64_list: { value: [ 2 ] } } } 276 // } } 277 // 278 // Conditionally conformant pair of SequenceExample; the parser configuration 279 // determines if the feature sizes must match: 280 // feature_lists: { feature_list: { 281 // key: "movie_ratings" 282 // value: { feature: { float_list: { value: [ 4.5 ] } } 283 // feature: { float_list: { value: [ 5.0 ] } } } 284 // } } 285 // and: 286 // feature_lists: { feature_list: { 287 // key: "movie_ratings" 288 // value: { feature: { float_list: { value: [ 4.0 ] } } 289 // feature: { float_list: { value: [ 5.0, 3.0 ] } } 290 // } } 291 292 message SequenceExample { 293 Features context = 1; 294 FeatureLists feature_lists = 2; 295 }; 296