1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 // Main abstraction controlling the tflite interpreter. 16 // See context.h for the API for defining operations (TfLiteRegistration). 17 #ifndef TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_ 18 #define TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_ 19 20 #include <cstdio> 21 #include <cstdlib> 22 #include <vector> 23 #include "tensorflow/contrib/lite/allocation.h" 24 #include "tensorflow/contrib/lite/context.h" 25 #include "tensorflow/contrib/lite/error_reporter.h" 26 #include "tensorflow/contrib/lite/memory_planner.h" 27 28 namespace tflite { 29 30 // Map statically from a c++ type to a TfLiteType (used below for safe casts). 31 template <class T> 32 constexpr TfLiteType typeToTfLiteType() { 33 return kTfLiteNoType; 34 } 35 template <> 36 constexpr TfLiteType typeToTfLiteType<int>() { 37 return kTfLiteInt32; 38 } 39 template <> 40 constexpr TfLiteType typeToTfLiteType<int64_t>() { 41 return kTfLiteInt64; 42 } 43 template <> 44 constexpr TfLiteType typeToTfLiteType<float>() { 45 return kTfLiteFloat32; 46 } 47 template <> 48 constexpr TfLiteType typeToTfLiteType<unsigned char>() { 49 return kTfLiteUInt8; 50 } 51 52 // Forward declare since NNAPIDelegate uses Interpreter. 53 class NNAPIDelegate; 54 55 // An interpreter for a graph of nodes that input and output from tensors. 56 // Each node of the graph processes a set of input tensors and produces a 57 // set of output Tensors. All inputs/output tensors are referenced by index. 58 // 59 // Usage: 60 // 61 // -- Create basic model 62 // Interpreter foo(2, 1); 63 // foo.SetTensorParametersReadWrite(0, ...); 64 // foo.SetTensorParametersReadOnly(1, ...); 65 // foo.SetNodeParameters(0, ...) 66 // 67 // -- Resize input array to 1 length. 68 // foo.ResizeInputTensor(0, 1); 69 // foo.AllocateTensors(); 70 // -- Install array data 71 // foo.typed_tensor<float>(0)[0] = 3; 72 // foo.Invoke(); 73 // foo.typed_tensor<float>(0)[0] = 4; 74 // foo.Invoke(); 75 // -- Resize input array and set data. 76 // foo.ResizeInputTensor(0, 2); 77 // foo.AllocateTensors(); 78 // foo.typed_tensor<float>(0)[0] = 4; 79 // foo.typed_tensor<float>(0)[1] = 8; 80 // foo.Invoke(); 81 // 82 83 struct TfLiteIntArrayDeleter { 84 void operator()(TfLiteIntArray* a) { 85 if (a) TfLiteIntArrayFree(a); 86 } 87 }; 88 89 class Interpreter { 90 public: 91 // Instantiate an interpreter. All errors associated with reading and 92 // processing this model will be forwarded to the error_reporter object. 93 // 94 // Note, if error_reporter is nullptr, then a default StderrReporter is 95 // used. 96 explicit Interpreter(ErrorReporter* error_reporter = DefaultErrorReporter()); 97 98 ~Interpreter(); 99 100 Interpreter(const Interpreter&) = delete; 101 Interpreter& operator=(const Interpreter&) = delete; 102 103 // Functions to build interpreter 104 105 // Provide a list of tensor indexes that are inputs to the model. 106 // Each index is bound check and this modifies the consistent_ flag of the 107 // interpreter. 108 TfLiteStatus SetInputs(std::vector<int> inputs); 109 110 // Provide a list of tensor indexes that are outputs to the model 111 // Each index is bound check and this modifies the consistent_ flag of the 112 // interpreter. 113 TfLiteStatus SetOutputs(std::vector<int> outputs); 114 115 // Adds a node with the given parameters and returns the index of the new 116 // node in `node_index` (optionally). Interpreter will take ownership of 117 // `builtin_data` and destroy it with `free`. Ownership of 'init_data' 118 // remains with the caller. 119 TfLiteStatus AddNodeWithParameters(const std::vector<int>& inputs, 120 const std::vector<int>& outputs, 121 const char* init_data, 122 size_t init_data_size, void* builtin_data, 123 const TfLiteRegistration* registration, 124 int* node_index = nullptr); 125 126 // Adds `tensors_to_add` tensors, preserving pre-existing Tensor entries. 127 // The value pointed to by `first_new_tensor_index` will be set to the 128 // index of the first new tensor if `first_new_tensor_index` is non-null. 129 TfLiteStatus AddTensors(int tensors_to_add, 130 int* first_new_tensor_index = nullptr); 131 132 // Set description of inputs/outputs/data/fptrs for node `node_index`. 133 // This variant assumes an external buffer has been allocated of size 134 // bytes. The lifetime of buffer must be ensured to be greater or equal 135 // to Interpreter. 136 TfLiteStatus SetTensorParametersReadOnly( 137 int tensor_index, TfLiteType type, const char* name, 138 const std::vector<int>& dims, TfLiteQuantizationParams quantization, 139 const char* buffer, size_t bytes, const Allocation* allocation = nullptr); 140 141 // Set description of inputs/outputs/data/fptrs for node `node_index`. 142 // This variant assumes an external buffer has been allocated of size 143 // bytes. The lifetime of buffer must be ensured to be greater or equal 144 // to Interpreter. 145 TfLiteStatus SetTensorParametersReadWrite( 146 int tensor_index, TfLiteType type, const char* name, 147 const std::vector<int>& dims, TfLiteQuantizationParams quantization); 148 149 // Functions to access tensor data 150 151 // Read only access to list of inputs. 152 const std::vector<int>& inputs() const { return inputs_; } 153 154 // Return the name of a given input. The given index must be between 0 and 155 // inputs().size(). 156 const char* GetInputName(int index) const { 157 return context_.tensors[inputs_[index]].name; 158 } 159 160 // Read only access to list of outputs. 161 const std::vector<int>& outputs() const { return outputs_; } 162 163 // Return the name of a given output. The given index must be between 0 and 164 // outputs().size(). 165 const char* GetOutputName(int index) const { 166 return context_.tensors[outputs_[index]].name; 167 } 168 169 // Return the number of tensors in the model. 170 int tensors_size() const { return context_.tensors_size; } 171 172 // Return the number of ops in the model. 173 int nodes_size() const { return nodes_and_registration_.size(); } 174 175 // WARNING: Experimental interface, subject to change 176 const std::vector<int>& execution_plan() const { return execution_plan_; } 177 178 // WARNING: Experimental interface, subject to change 179 // Overrides execution plan. This bounds checks indices sent in. 180 TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan); 181 182 // Get a tensor data structure. 183 // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this 184 // read/write access to structure 185 TfLiteTensor* tensor(int tensor_index) { 186 if (tensor_index >= context_.tensors_size || tensor_index < 0) 187 return nullptr; 188 return &context_.tensors[tensor_index]; 189 } 190 191 // Get a pointer to an operation and registration data structure if in bounds. 192 // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this 193 // read/write access to structure 194 const std::pair<TfLiteNode, TfLiteRegistration>* node_and_registration( 195 int node_index) { 196 if (node_index >= nodes_and_registration_.size() || node_index < 0) 197 return nullptr; 198 return &nodes_and_registration_[node_index]; 199 } 200 201 // Perform a checked cast to the appropriate tensor type. 202 template <class T> 203 T* typed_tensor(int tensor_index) { 204 if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) { 205 if (tensor_ptr->type == typeToTfLiteType<T>()) { 206 return reinterpret_cast<T*>(tensor_ptr->data.raw); 207 } 208 } 209 return nullptr; 210 } 211 212 // Return a pointer into the data of a given input tensor. The given index 213 // must be between 0 and inputs().size(). 214 template <class T> 215 T* typed_input_tensor(int index) { 216 return typed_tensor<T>(inputs_[index]); 217 } 218 219 // Return a pointer into the data of a given output tensor. The given index 220 // must be between 0 and outputs().size(). 221 template <class T> 222 T* typed_output_tensor(int index) { 223 return typed_tensor<T>(outputs_[index]); 224 } 225 226 // Change the dimensionality of a given tensor. Note, this is only acceptable 227 // for tensor indices that are inputs. 228 // Returns status of failure or success. 229 // TODO(aselle): Consider implementing ArraySlice equivalent to make this 230 // more adept at accepting data without an extra copy. Use absl::ArraySlice 231 // if our partners determine that dependency is acceptable. 232 TfLiteStatus ResizeInputTensor(int tensor_index, 233 const std::vector<int>& dims); 234 235 // Update allocations for all tensors. This will redim dependent tensors using 236 // the input tensor dimensionality as given. This is relatively expensive. 237 // If you know that your sizes are not changing, you need not call this. 238 239 // Returns status of success or failure. 240 TfLiteStatus AllocateTensors(); 241 242 // Invoke the interpreter (run the whole graph in dependency order). 243 // 244 // NOTE: It is possible that the interpreter is not in a ready state 245 // to evaluate (i.e. if a ResizeTensor() has been performed without an 246 // AllocateTensors(). 247 // Returns status of success or failure. 248 TfLiteStatus Invoke(); 249 250 // Enable or disable the NN API (true to enable) 251 void UseNNAPI(bool enable); 252 253 // Set the number of threads available to the interpreter. 254 void SetNumThreads(int num_threads); 255 256 // Allow a delegate to look at the graph and modify the graph to handle 257 // parts of the graph themselves. After this is called, the graph may 258 // contain new nodes that replace 1 more nodes. 259 TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate); 260 261 private: 262 // Give 'op_reg' a chance to initialize itself using the contents of 263 // 'buffer'. 264 void* OpInit(const TfLiteRegistration& op_reg, const char* buffer, 265 size_t length) { 266 if (op_reg.init == nullptr) return nullptr; 267 return op_reg.init(&context_, buffer, length); 268 } 269 270 // Let 'op_reg' release any memory it might have allocated via 'OpInit'. 271 void OpFree(const TfLiteRegistration& op_reg, void* buffer) { 272 if (op_reg.free == nullptr) return; 273 if (buffer) { 274 op_reg.free(&context_, buffer); 275 } 276 } 277 278 // Prepare the given 'node' for execution. 279 TfLiteStatus OpPrepare(const TfLiteRegistration& op_reg, TfLiteNode* node) { 280 if (op_reg.prepare == nullptr) return kTfLiteOk; 281 return op_reg.prepare(&context_, node); 282 } 283 284 // Invoke the operator represented by 'node'. 285 TfLiteStatus OpInvoke(const TfLiteRegistration& op_reg, TfLiteNode* node) { 286 if (op_reg.invoke == nullptr) return kTfLiteError; 287 return op_reg.invoke(&context_, node); 288 } 289 290 // Call OpPrepare() for as many ops as possible, allocating memory for their 291 // tensors. If an op containing dynamic tensors is found, preparation will be 292 // postponed until this function is called again. This allows the interpreter 293 // to wait until Invoke() to resolve the sizes of dynamic tensors. 294 TfLiteStatus PrepareOpsAndTensors(); 295 296 // Call OpPrepare() for all ops starting at 'first_node'. Stop when a 297 // dynamic tensors is found or all ops have been prepared. Fill 298 // 'last_node_prepared' with the id of the op containing dynamic tensors, or 299 // the last in the graph. 300 TfLiteStatus PrepareOpsStartingAt(int first_execution_plan_index, 301 int* last_execution_plan_index_prepared); 302 303 // Tensors needed by the interpreter. Use `AddTensors` to add more blank 304 // tensor entries. Note, `tensors_.data()` needs to be synchronized to the 305 // `context_` whenever this std::vector is reallocated. Currently this 306 // only happens in `AddTensors()`. 307 std::vector<TfLiteTensor> tensors_; 308 309 // Check if an array of tensor indices are valid with respect to the Tensor 310 // array. 311 // NOTE: this changes consistent_ to be false if indices are out of bounds. 312 TfLiteStatus CheckTensorIndices(const char* label, const int* indices, 313 int length); 314 315 // Compute the number of bytes required to represent a tensor with dimensions 316 // specified by the array dims (of length dims_size). Returns the status code 317 // and bytes. 318 TfLiteStatus BytesRequired(TfLiteType type, const int* dims, int dims_size, 319 size_t* bytes); 320 321 // Request an tensor be resized implementation. If the given tensor is of 322 // type kTfLiteDynamic it will also be allocated new memory. 323 TfLiteStatus ResizeTensorImpl(TfLiteTensor* tensor, TfLiteIntArray* new_size); 324 325 // Report a detailed error string (will be printed to stderr). 326 // TODO(aselle): allow user of class to provide alternative destinations. 327 void ReportErrorImpl(const char* format, va_list args); 328 329 // Entry point for C node plugin API to request an tensor be resized. 330 static TfLiteStatus ResizeTensor(TfLiteContext* context, TfLiteTensor* tensor, 331 TfLiteIntArray* new_size); 332 // Entry point for C node plugin API to report an error. 333 static void ReportError(TfLiteContext* context, const char* format, ...); 334 335 // Entry point for C node plugin API to add new tensors. 336 static TfLiteStatus AddTensors(TfLiteContext* context, int tensors_to_add, 337 int* first_new_tensor_index); 338 339 // WARNING: This is an experimental API and subject to change. 340 // Entry point for C API ReplaceSubgraphsWithDelegateKernels 341 static TfLiteStatus ReplaceSubgraphsWithDelegateKernels( 342 TfLiteContext* context, TfLiteRegistration registration, 343 const TfLiteIntArray* nodes_to_replace); 344 345 // Update the execution graph to replace some of the nodes with stub 346 // nodes. Specifically any node index that has `nodes[index]==1` will be 347 // slated for replacement with a delegate kernel specified by registration. 348 // WARNING: This is an experimental interface that is subject to change. 349 TfLiteStatus ReplaceSubgraphsWithDelegateKernels( 350 TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace); 351 352 // WARNING: This is an experimental interface that is subject to change. 353 // Gets the internal pointer to a TensorFlow lite node by node_index. 354 TfLiteStatus GetNodeAndRegistration(int node_index, TfLiteNode** node, 355 TfLiteRegistration** registration); 356 357 // WARNING: This is an experimental interface that is subject to change. 358 // Entry point for C node plugin API to get a node by index. 359 static TfLiteStatus GetNodeAndRegistration(struct TfLiteContext*, 360 int node_index, TfLiteNode** node, 361 TfLiteRegistration** registration); 362 363 // WARNING: This is an experimental interface that is subject to change. 364 // Gets an TfLiteIntArray* representing the execution plan. The caller owns 365 // this memory and must free it with TfLiteIntArrayFree(). 366 TfLiteStatus GetExecutionPlan(TfLiteIntArray** execution_plan); 367 368 // WARNING: This is an experimental interface that is subject to change. 369 // Entry point for C node plugin API to get the execution plan 370 static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context, 371 TfLiteIntArray** execution_plan); 372 373 // A pure C data structure used to communicate with the pure C plugin 374 // interface. To avoid copying tensor metadata, this is also the definitive 375 // structure to store tensors. 376 TfLiteContext context_; 377 378 // Node inputs/outputs are stored in TfLiteNode and TfLiteRegistration stores 379 // function pointers to actual implementation. 380 std::vector<std::pair<TfLiteNode, TfLiteRegistration>> 381 nodes_and_registration_; 382 383 // Whether the model is consistent. That is to say if the inputs and outputs 384 // of every node and the global inputs and outputs are valid indexes into 385 // the tensor array. 386 bool consistent_ = true; 387 388 // Whether the model is safe to invoke (if any errors occurred this 389 // will be false). 390 bool invokable_ = false; 391 392 // Array of indices representing the tensors that are inputs to the 393 // interpreter. 394 std::vector<int> inputs_; 395 396 // Array of indices representing the tensors that are outputs to the 397 // interpreter. 398 std::vector<int> outputs_; 399 400 // The error reporter delegate that tflite will forward queries errors to. 401 ErrorReporter* error_reporter_; 402 403 // Index of the next node to prepare. 404 // During Invoke(), Interpreter will allocate input tensors first, which are 405 // known to be fixed size. Then it will allocate outputs from nodes as many 406 // as possible. When there is a node that produces dynamic sized tensor. 407 // Intepreter will stop allocating tensors, set the value of next allocate 408 // node id, and execute the node to generate the output tensor before continue 409 // to allocate successors. This process repeats until all nodes are executed. 410 // NOTE: this relies on the order of nodes that is in topological order. 411 int next_execution_plan_index_to_prepare_; 412 413 // WARNING: This is an experimental interface that is subject to change. 414 // This is a list of node indices (to index into nodes_and_registration). 415 // This represents a valid topological sort (dependency ordered) execution 416 // plan. In particular, it is valid for this ordering to contain only a 417 // subset of the node indices. 418 std::vector<int> execution_plan_; 419 420 // In the future, we'd like a TfLiteIntArray compatible representation. 421 // TODO(aselle): replace execution_plan_ with this. 422 std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> plan_cache_; 423 424 // Whether to delegate to NN API 425 std::unique_ptr<NNAPIDelegate> nnapi_delegate_; 426 427 std::unique_ptr<MemoryPlanner> memory_planner_; 428 }; 429 430 } // namespace tflite 431 #endif // TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_ 432