Home | History | Annotate | Download | only in lite
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 // Main abstraction controlling the tflite interpreter.
     16 // See context.h for the API for defining operations (TfLiteRegistration).
     17 #ifndef TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_
     18 #define TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_
     19 
     20 #include <cstdio>
     21 #include <cstdlib>
     22 #include <vector>
     23 #include "tensorflow/contrib/lite/allocation.h"
     24 #include "tensorflow/contrib/lite/context.h"
     25 #include "tensorflow/contrib/lite/error_reporter.h"
     26 #include "tensorflow/contrib/lite/memory_planner.h"
     27 
     28 namespace tflite {
     29 
     30 // Map statically from a c++ type to a TfLiteType (used below for safe casts).
     31 template <class T>
     32 constexpr TfLiteType typeToTfLiteType() {
     33   return kTfLiteNoType;
     34 }
     35 template <>
     36 constexpr TfLiteType typeToTfLiteType<int>() {
     37   return kTfLiteInt32;
     38 }
     39 template <>
     40 constexpr TfLiteType typeToTfLiteType<int64_t>() {
     41   return kTfLiteInt64;
     42 }
     43 template <>
     44 constexpr TfLiteType typeToTfLiteType<float>() {
     45   return kTfLiteFloat32;
     46 }
     47 template <>
     48 constexpr TfLiteType typeToTfLiteType<unsigned char>() {
     49   return kTfLiteUInt8;
     50 }
     51 
     52 // Forward declare since NNAPIDelegate uses Interpreter.
     53 class NNAPIDelegate;
     54 
     55 // An interpreter for a graph of nodes that input and output from tensors.
     56 // Each node of the graph processes a set of input tensors and produces a
     57 // set of output Tensors. All inputs/output tensors are referenced by index.
     58 //
     59 // Usage:
     60 //
     61 // -- Create basic model
     62 // Interpreter foo(2, 1);
     63 // foo.SetTensorParametersReadWrite(0, ...);
     64 // foo.SetTensorParametersReadOnly(1, ...);
     65 // foo.SetNodeParameters(0, ...)
     66 //
     67 // -- Resize input array to 1 length.
     68 // foo.ResizeInputTensor(0, 1);
     69 // foo.AllocateTensors();
     70 // -- Install array data
     71 // foo.typed_tensor<float>(0)[0] = 3;
     72 // foo.Invoke();
     73 // foo.typed_tensor<float>(0)[0] = 4;
     74 // foo.Invoke();
     75 // -- Resize input array and set data.
     76 // foo.ResizeInputTensor(0, 2);
     77 // foo.AllocateTensors();
     78 // foo.typed_tensor<float>(0)[0] = 4;
     79 // foo.typed_tensor<float>(0)[1] = 8;
     80 // foo.Invoke();
     81 //
     82 
     83 struct TfLiteIntArrayDeleter {
     84   void operator()(TfLiteIntArray* a) {
     85     if (a) TfLiteIntArrayFree(a);
     86   }
     87 };
     88 
     89 class Interpreter {
     90  public:
     91   // Instantiate an interpreter. All errors associated with reading and
     92   // processing this model will be forwarded to the error_reporter object.
     93   //
     94   // Note, if error_reporter is nullptr, then a default StderrReporter is
     95   // used.
     96   explicit Interpreter(ErrorReporter* error_reporter = DefaultErrorReporter());
     97 
     98   ~Interpreter();
     99 
    100   Interpreter(const Interpreter&) = delete;
    101   Interpreter& operator=(const Interpreter&) = delete;
    102 
    103   // Functions to build interpreter
    104 
    105   // Provide a list of tensor indexes that are inputs to the model.
    106   // Each index is bound check and this modifies the consistent_ flag of the
    107   // interpreter.
    108   TfLiteStatus SetInputs(std::vector<int> inputs);
    109 
    110   // Provide a list of tensor indexes that are outputs to the model
    111   // Each index is bound check and this modifies the consistent_ flag of the
    112   // interpreter.
    113   TfLiteStatus SetOutputs(std::vector<int> outputs);
    114 
    115   // Adds a node with the given parameters and returns the index of the new
    116   // node in `node_index` (optionally). Interpreter will take ownership of
    117   // `builtin_data` and destroy it with `free`. Ownership of 'init_data'
    118   // remains with the caller.
    119   TfLiteStatus AddNodeWithParameters(const std::vector<int>& inputs,
    120                                      const std::vector<int>& outputs,
    121                                      const char* init_data,
    122                                      size_t init_data_size, void* builtin_data,
    123                                      const TfLiteRegistration* registration,
    124                                      int* node_index = nullptr);
    125 
    126   // Adds `tensors_to_add` tensors, preserving pre-existing Tensor entries.
    127   // The value pointed to by `first_new_tensor_index` will be set to the
    128   // index of the first new tensor if `first_new_tensor_index` is non-null.
    129   TfLiteStatus AddTensors(int tensors_to_add,
    130                           int* first_new_tensor_index = nullptr);
    131 
    132   // Set description of inputs/outputs/data/fptrs for node `node_index`.
    133   // This variant assumes an external buffer has been allocated of size
    134   // bytes. The lifetime of buffer must be ensured to be greater or equal
    135   // to Interpreter.
    136   TfLiteStatus SetTensorParametersReadOnly(
    137       int tensor_index, TfLiteType type, const char* name,
    138       const std::vector<int>& dims, TfLiteQuantizationParams quantization,
    139       const char* buffer, size_t bytes, const Allocation* allocation = nullptr);
    140 
    141   // Set description of inputs/outputs/data/fptrs for node `node_index`.
    142   // This variant assumes an external buffer has been allocated of size
    143   // bytes. The lifetime of buffer must be ensured to be greater or equal
    144   // to Interpreter.
    145   TfLiteStatus SetTensorParametersReadWrite(
    146       int tensor_index, TfLiteType type, const char* name,
    147       const std::vector<int>& dims, TfLiteQuantizationParams quantization);
    148 
    149   // Functions to access tensor data
    150 
    151   // Read only access to list of inputs.
    152   const std::vector<int>& inputs() const { return inputs_; }
    153 
    154   // Return the name of a given input. The given index must be between 0 and
    155   // inputs().size().
    156   const char* GetInputName(int index) const {
    157     return context_.tensors[inputs_[index]].name;
    158   }
    159 
    160   // Read only access to list of outputs.
    161   const std::vector<int>& outputs() const { return outputs_; }
    162 
    163   // Return the name of a given output. The given index must be between 0 and
    164   // outputs().size().
    165   const char* GetOutputName(int index) const {
    166     return context_.tensors[outputs_[index]].name;
    167   }
    168 
    169   // Return the number of tensors in the model.
    170   int tensors_size() const { return context_.tensors_size; }
    171 
    172   // Return the number of ops in the model.
    173   int nodes_size() const { return nodes_and_registration_.size(); }
    174 
    175   // WARNING: Experimental interface, subject to change
    176   const std::vector<int>& execution_plan() const { return execution_plan_; }
    177 
    178   // WARNING: Experimental interface, subject to change
    179   // Overrides execution plan. This bounds checks indices sent in.
    180   TfLiteStatus SetExecutionPlan(const std::vector<int>& new_plan);
    181 
    182   // Get a tensor data structure.
    183   // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this
    184   // read/write access to structure
    185   TfLiteTensor* tensor(int tensor_index) {
    186     if (tensor_index >= context_.tensors_size || tensor_index < 0)
    187       return nullptr;
    188     return &context_.tensors[tensor_index];
    189   }
    190 
    191   // Get a pointer to an operation and registration data structure if in bounds.
    192   // TODO(aselle): Create a safe ArrayHandle interface to avoid exposing this
    193   // read/write access to structure
    194   const std::pair<TfLiteNode, TfLiteRegistration>* node_and_registration(
    195       int node_index) {
    196     if (node_index >= nodes_and_registration_.size() || node_index < 0)
    197       return nullptr;
    198     return &nodes_and_registration_[node_index];
    199   }
    200 
    201   // Perform a checked cast to the appropriate tensor type.
    202   template <class T>
    203   T* typed_tensor(int tensor_index) {
    204     if (TfLiteTensor* tensor_ptr = tensor(tensor_index)) {
    205       if (tensor_ptr->type == typeToTfLiteType<T>()) {
    206         return reinterpret_cast<T*>(tensor_ptr->data.raw);
    207       }
    208     }
    209     return nullptr;
    210   }
    211 
    212   // Return a pointer into the data of a given input tensor. The given index
    213   // must be between 0 and inputs().size().
    214   template <class T>
    215   T* typed_input_tensor(int index) {
    216     return typed_tensor<T>(inputs_[index]);
    217   }
    218 
    219   // Return a pointer into the data of a given output tensor. The given index
    220   // must be between 0 and outputs().size().
    221   template <class T>
    222   T* typed_output_tensor(int index) {
    223     return typed_tensor<T>(outputs_[index]);
    224   }
    225 
    226   // Change the dimensionality of a given tensor. Note, this is only acceptable
    227   // for tensor indices that are inputs.
    228   // Returns status of failure or success.
    229   // TODO(aselle): Consider implementing ArraySlice equivalent to make this
    230   //   more adept at accepting data without an extra copy. Use absl::ArraySlice
    231   //   if our partners determine that dependency is acceptable.
    232   TfLiteStatus ResizeInputTensor(int tensor_index,
    233                                  const std::vector<int>& dims);
    234 
    235   // Update allocations for all tensors. This will redim dependent tensors using
    236   // the input tensor dimensionality as given. This is relatively expensive.
    237   // If you know that your sizes are not changing, you need not call this.
    238 
    239   // Returns status of success or failure.
    240   TfLiteStatus AllocateTensors();
    241 
    242   // Invoke the interpreter (run the whole graph in dependency order).
    243   //
    244   // NOTE: It is possible that the interpreter is not in a ready state
    245   // to evaluate (i.e. if a ResizeTensor() has been performed without an
    246   // AllocateTensors().
    247   // Returns status of success or failure.
    248   TfLiteStatus Invoke();
    249 
    250   // Enable or disable the NN API (true to enable)
    251   void UseNNAPI(bool enable);
    252 
    253   // Set the number of threads available to the interpreter.
    254   void SetNumThreads(int num_threads);
    255 
    256   // Allow a delegate to look at the graph and modify the graph to handle
    257   // parts of the graph themselves. After this is called, the graph may
    258   // contain new nodes that replace 1 more nodes.
    259   TfLiteStatus ModifyGraphWithDelegate(TfLiteDelegate* delegate);
    260 
    261  private:
    262   // Give 'op_reg' a chance to initialize itself using the contents of
    263   // 'buffer'.
    264   void* OpInit(const TfLiteRegistration& op_reg, const char* buffer,
    265                size_t length) {
    266     if (op_reg.init == nullptr) return nullptr;
    267     return op_reg.init(&context_, buffer, length);
    268   }
    269 
    270   // Let 'op_reg' release any memory it might have allocated via 'OpInit'.
    271   void OpFree(const TfLiteRegistration& op_reg, void* buffer) {
    272     if (op_reg.free == nullptr) return;
    273     if (buffer) {
    274       op_reg.free(&context_, buffer);
    275     }
    276   }
    277 
    278   // Prepare the given 'node' for execution.
    279   TfLiteStatus OpPrepare(const TfLiteRegistration& op_reg, TfLiteNode* node) {
    280     if (op_reg.prepare == nullptr) return kTfLiteOk;
    281     return op_reg.prepare(&context_, node);
    282   }
    283 
    284   // Invoke the operator represented by 'node'.
    285   TfLiteStatus OpInvoke(const TfLiteRegistration& op_reg, TfLiteNode* node) {
    286     if (op_reg.invoke == nullptr) return kTfLiteError;
    287     return op_reg.invoke(&context_, node);
    288   }
    289 
    290   // Call OpPrepare() for as many ops as possible, allocating memory for their
    291   // tensors. If an op containing dynamic tensors is found, preparation will be
    292   // postponed until this function is called again. This allows the interpreter
    293   // to wait until Invoke() to resolve the sizes of dynamic tensors.
    294   TfLiteStatus PrepareOpsAndTensors();
    295 
    296   // Call OpPrepare() for all ops starting at 'first_node'. Stop when a
    297   // dynamic tensors is found or all ops have been prepared. Fill
    298   // 'last_node_prepared' with the id of the op containing dynamic tensors, or
    299   // the last in the graph.
    300   TfLiteStatus PrepareOpsStartingAt(int first_execution_plan_index,
    301                                     int* last_execution_plan_index_prepared);
    302 
    303   // Tensors needed by the interpreter. Use `AddTensors` to add more blank
    304   // tensor entries. Note, `tensors_.data()` needs to be synchronized to the
    305   // `context_` whenever this std::vector is reallocated. Currently this
    306   // only happens in `AddTensors()`.
    307   std::vector<TfLiteTensor> tensors_;
    308 
    309   // Check if an array of tensor indices are valid with respect to the Tensor
    310   // array.
    311   // NOTE: this changes consistent_ to be false if indices are out of bounds.
    312   TfLiteStatus CheckTensorIndices(const char* label, const int* indices,
    313                                   int length);
    314 
    315   // Compute the number of bytes required to represent a tensor with dimensions
    316   // specified by the array dims (of length dims_size). Returns the status code
    317   // and bytes.
    318   TfLiteStatus BytesRequired(TfLiteType type, const int* dims, int dims_size,
    319                              size_t* bytes);
    320 
    321   // Request an tensor be resized implementation. If the given tensor is of
    322   // type kTfLiteDynamic it will also be allocated new memory.
    323   TfLiteStatus ResizeTensorImpl(TfLiteTensor* tensor, TfLiteIntArray* new_size);
    324 
    325   // Report a detailed error string (will be printed to stderr).
    326   // TODO(aselle): allow user of class to provide alternative destinations.
    327   void ReportErrorImpl(const char* format, va_list args);
    328 
    329   // Entry point for C node plugin API to request an tensor be resized.
    330   static TfLiteStatus ResizeTensor(TfLiteContext* context, TfLiteTensor* tensor,
    331                                    TfLiteIntArray* new_size);
    332   // Entry point for C node plugin API to report an error.
    333   static void ReportError(TfLiteContext* context, const char* format, ...);
    334 
    335   // Entry point for C node plugin API to add new tensors.
    336   static TfLiteStatus AddTensors(TfLiteContext* context, int tensors_to_add,
    337                                  int* first_new_tensor_index);
    338 
    339   // WARNING: This is an experimental API and subject to change.
    340   // Entry point for C API ReplaceSubgraphsWithDelegateKernels
    341   static TfLiteStatus ReplaceSubgraphsWithDelegateKernels(
    342       TfLiteContext* context, TfLiteRegistration registration,
    343       const TfLiteIntArray* nodes_to_replace);
    344 
    345   // Update the execution graph to replace some of the nodes with stub
    346   // nodes. Specifically any node index that has `nodes[index]==1` will be
    347   // slated for replacement with a delegate kernel specified by registration.
    348   // WARNING: This is an experimental interface that is subject to change.
    349   TfLiteStatus ReplaceSubgraphsWithDelegateKernels(
    350       TfLiteRegistration registration, const TfLiteIntArray* nodes_to_replace);
    351 
    352   // WARNING: This is an experimental interface that is subject to change.
    353   // Gets the internal pointer to a TensorFlow lite node by node_index.
    354   TfLiteStatus GetNodeAndRegistration(int node_index, TfLiteNode** node,
    355                                       TfLiteRegistration** registration);
    356 
    357   // WARNING: This is an experimental interface that is subject to change.
    358   // Entry point for C node plugin API to get a node by index.
    359   static TfLiteStatus GetNodeAndRegistration(struct TfLiteContext*,
    360                                              int node_index, TfLiteNode** node,
    361                                              TfLiteRegistration** registration);
    362 
    363   // WARNING: This is an experimental interface that is subject to change.
    364   // Gets an TfLiteIntArray* representing the execution plan. The caller owns
    365   // this memory and must free it with TfLiteIntArrayFree().
    366   TfLiteStatus GetExecutionPlan(TfLiteIntArray** execution_plan);
    367 
    368   // WARNING: This is an experimental interface that is subject to change.
    369   // Entry point for C node plugin API to get the execution plan
    370   static TfLiteStatus GetExecutionPlan(struct TfLiteContext* context,
    371                                        TfLiteIntArray** execution_plan);
    372 
    373   // A pure C data structure used to communicate with the pure C plugin
    374   // interface. To avoid copying tensor metadata, this is also the definitive
    375   // structure to store tensors.
    376   TfLiteContext context_;
    377 
    378   // Node inputs/outputs are stored in TfLiteNode and TfLiteRegistration stores
    379   // function pointers to actual implementation.
    380   std::vector<std::pair<TfLiteNode, TfLiteRegistration>>
    381       nodes_and_registration_;
    382 
    383   // Whether the model is consistent. That is to say if the inputs and outputs
    384   // of every node and the global inputs and outputs are valid indexes into
    385   // the tensor array.
    386   bool consistent_ = true;
    387 
    388   // Whether the model is safe to invoke (if any errors occurred this
    389   // will be false).
    390   bool invokable_ = false;
    391 
    392   // Array of indices representing the tensors that are inputs to the
    393   // interpreter.
    394   std::vector<int> inputs_;
    395 
    396   // Array of indices representing the tensors that are outputs to the
    397   // interpreter.
    398   std::vector<int> outputs_;
    399 
    400   // The error reporter delegate that tflite will forward queries errors to.
    401   ErrorReporter* error_reporter_;
    402 
    403   // Index of the next node to prepare.
    404   // During Invoke(), Interpreter will allocate input tensors first, which are
    405   // known to be fixed size. Then it will allocate outputs from nodes as many
    406   // as possible. When there is a node that produces dynamic sized tensor.
    407   // Intepreter will stop allocating tensors, set the value of next allocate
    408   // node id, and execute the node to generate the output tensor before continue
    409   // to allocate successors. This process repeats until all nodes are executed.
    410   // NOTE: this relies on the order of nodes that is in topological order.
    411   int next_execution_plan_index_to_prepare_;
    412 
    413   // WARNING: This is an experimental interface that is subject to change.
    414   // This is a list of node indices (to index into nodes_and_registration).
    415   // This represents a valid topological sort (dependency ordered) execution
    416   // plan. In particular, it is valid for this ordering to contain only a
    417   // subset of the node indices.
    418   std::vector<int> execution_plan_;
    419 
    420   // In the future, we'd like a TfLiteIntArray compatible representation.
    421   // TODO(aselle): replace execution_plan_ with this.
    422   std::unique_ptr<TfLiteIntArray, TfLiteIntArrayDeleter> plan_cache_;
    423 
    424   // Whether to delegate to NN API
    425   std::unique_ptr<NNAPIDelegate> nnapi_delegate_;
    426 
    427   std::unique_ptr<MemoryPlanner> memory_planner_;
    428 };
    429 
    430 }  // namespace tflite
    431 #endif  // TENSORFLOW_CONTRIB_LITE_INTERPRETER_H_
    432