Home | History | Annotate | Download | only in lite
      1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved.
      2 
      3 Licensed under the Apache License, Version 2.0 (the "License");
      4 you may not use this file except in compliance with the License.
      5 You may obtain a copy of the License at
      6 
      7     http://www.apache.org/licenses/LICENSE-2.0
      8 
      9 Unless required by applicable law or agreed to in writing, software
     10 distributed under the License is distributed on an "AS IS" BASIS,
     11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 See the License for the specific language governing permissions and
     13 limitations under the License.
     14 ==============================================================================*/
     15 
     16 #include "tensorflow/contrib/lite/nnapi_delegate.h"
     17 #include <fcntl.h>
     18 #include <sys/mman.h>
     19 #include <sys/stat.h>
     20 #include <sys/types.h>
     21 #include "tensorflow/contrib/lite/builtin_op_data.h"
     22 #include "tensorflow/contrib/lite/error_reporter.h"
     23 #include "tensorflow/contrib/lite/model.h"
     24 #include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h"
     25 
     26 namespace tflite {
     27 
     28 // TODO(aselle): FATAL leaves resources hanging.
     29 void FATAL(const char* format, ...) {
     30   va_list args;
     31   va_start(args, format);
     32   vfprintf(stderr, format, args);
     33   va_end(args);
     34   fflush(stderr);
     35   exit(1);
     36 }
     37 
     38 // TODO(aselle): Change the error model to use status codes.
     39 #define CHECK_TFLITE_SUCCESS(x)                       \
     40   if (x != kTfLiteOk) {                               \
     41     FATAL("Aborting since tflite returned failure."); \
     42   }
     43 
     44 #define CHECK_NN(x)                                   \
     45   if (x != ANEURALNETWORKS_NO_ERROR) {                \
     46     FATAL("Aborting since tflite returned failure."); \
     47   }
     48 
     49 NNAPIAllocation::NNAPIAllocation(const char* filename,
     50                                  ErrorReporter* error_reporter)
     51     : MMAPAllocation(filename, error_reporter) {
     52   if (mmapped_buffer_ != MAP_FAILED)
     53     CHECK_NN(ANeuralNetworksMemory_createFromFd(buffer_size_bytes_, PROT_READ,
     54                                                 mmap_fd_, 0, &handle_));
     55 }
     56 
     57 NNAPIAllocation::~NNAPIAllocation() {
     58   if (handle_) {
     59     ANeuralNetworksMemory_free(handle_);
     60   }
     61 }
     62 
     63 NNAPIDelegate::~NNAPIDelegate() {
     64   if (nn_model_) {
     65     ANeuralNetworksModel_free(nn_model_);
     66     nn_model_ = nullptr;
     67     // TODO(aselle): Is this thread-safe and callable multiple times?
     68   }
     69   // ANeuralNetworksShutdown();
     70 }
     71 
     72 // Adds the tensors of the interpreter to the NN API model.
     73 // Returns the number of operands added.
     74 uint32_t addTensorOperands(tflite::Interpreter* interpreter,
     75                            ANeuralNetworksModel* nn_model) {
     76   uint32_t next_id = 0;
     77   for (size_t i = 0; i < interpreter->tensors_size(); i++) {
     78     int32_t nn_type = 0;
     79     float scale = 1.0f;
     80     int32_t zeroPoint = 0;
     81     TfLiteTensor* tensor = interpreter->tensor(i);
     82     switch (tensor->type) {
     83       case kTfLiteNoType:
     84         // Tensors added during initialization of Ops don't have a type yet and
     85         // should not be registered with the NNAPI.
     86         continue;
     87       case kTfLiteFloat32:
     88         nn_type = ANEURALNETWORKS_TENSOR_FLOAT32;
     89         break;
     90       case kTfLiteUInt8:
     91         nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM;
     92         scale = tensor->params.scale;
     93         zeroPoint = tensor->params.zero_point;
     94         if (scale <= 0.0f) {
     95             // internal tensor, not valid for NNAPI
     96             continue;
     97         }
     98         break;
     99       case kTfLiteInt32:
    100         nn_type = ANEURALNETWORKS_TENSOR_INT32;
    101         scale = tensor->params.scale;
    102         zeroPoint = tensor->params.zero_point;
    103         break;
    104       default:
    105         FATAL("Unsupported type.");
    106     }
    107     // TODO(aselle): Note, many of these are intermediate results. Do I need
    108     // to ever specify these sizes. I am currently below doing setValue
    109     // on all of them, but I shouldn't in the future.
    110     // Answer(jeanluc): If all the operators can set the dimension correctly,
    111     // you won't need to.
    112     ANeuralNetworksOperandType operand_type{
    113         nn_type, static_cast<uint32_t>(tensor->dims->size),
    114         reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint};
    115     CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type));
    116 
    117     // TODO(aselle): Based on Michael's suggestion, limiting this to read
    118     // only memory
    119     if (tensor->allocation_type == kTfLiteMmapRo) {
    120       if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>(
    121               static_cast<const Allocation*>(tensor->allocation))) {
    122         CHECK_NN(ANeuralNetworksModel_setOperandValueFromMemory(
    123             nn_model, i, alloc->memory(), alloc->offset(tensor->data.raw),
    124             tensor->bytes));
    125       } else {
    126         CHECK_NN(ANeuralNetworksModel_setOperandValue(
    127             nn_model, i, tensor->data.raw, tensor->bytes));
    128       }
    129     }
    130     ++next_id;
    131   }
    132   return next_id;
    133 }
    134 
    135 // Adds the operations and their parameters to the NN API model.
    136 // 'next-id' is the operand ID of the next operand of the model.
    137 void AddOpsAndParams(tflite::Interpreter* interpreter,
    138                      ANeuralNetworksModel* nn_model, uint32_t next_id) {
    139   for (size_t i = 0; i < interpreter->nodes_size(); i++) {
    140     const auto* node_and_registration = interpreter->node_and_registration(i);
    141     const TfLiteNode& node = node_and_registration->first;
    142     const TfLiteRegistration& registration = node_and_registration->second;
    143     tflite::BuiltinOperator builtin =
    144         static_cast<tflite::BuiltinOperator>(registration.builtin_code);
    145 
    146     // Add the parameters.
    147     std::vector<uint32_t> augmented_inputs(
    148         node.inputs->data, node.inputs->data + node.inputs->size);
    149 
    150     auto add_scalar_int32 = [&nn_model, &augmented_inputs,
    151                              &next_id](int value) {
    152       ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32};
    153       CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
    154       CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
    155                                                     sizeof(int32_t)))
    156       augmented_inputs.push_back(next_id++);
    157     };
    158 
    159     auto add_scalar_float32 = [&nn_model, &augmented_inputs,
    160                                &next_id](float value) {
    161       ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_FLOAT32};
    162       CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
    163       CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value,
    164                                                     sizeof(float)))
    165       augmented_inputs.push_back(next_id++);
    166     };
    167 
    168     auto duplicate_state_tensor_float32 =
    169         [interpreter, &nn_model, &augmented_inputs, &next_id](int tensor_id) {
    170           const TfLiteTensor* tensor = interpreter->tensor(tensor_id);
    171           CHECK_NN(ANeuralNetworksModel_setOperandValue(
    172               nn_model, tensor_id, tensor->data.raw, tensor->bytes));
    173           augmented_inputs.push_back(tensor_id);
    174         };
    175 
    176     auto add_add_params = [&add_scalar_int32]() { add_scalar_int32(0); };
    177 
    178     auto add_pooling_params = [&add_scalar_int32](void* data) {
    179       auto builtin = reinterpret_cast<TfLitePoolParams*>(data);
    180       add_scalar_int32(builtin->padding);
    181       add_scalar_int32(builtin->stride_width);
    182       add_scalar_int32(builtin->stride_height);
    183       add_scalar_int32(builtin->filter_width);
    184       add_scalar_int32(builtin->filter_height);
    185       add_scalar_int32(builtin->activation);
    186     };
    187 
    188     auto add_convolution_params = [&add_scalar_int32](void* data) {
    189       auto builtin = reinterpret_cast<TfLiteConvParams*>(data);
    190       add_scalar_int32(builtin->padding);
    191       add_scalar_int32(builtin->stride_width);
    192       add_scalar_int32(builtin->stride_height);
    193       add_scalar_int32(builtin->activation);
    194     };
    195 
    196     auto add_depthwise_conv_params = [&add_scalar_int32](void* data) {
    197       auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data);
    198       add_scalar_int32(builtin->padding);
    199       add_scalar_int32(builtin->stride_width);
    200       add_scalar_int32(builtin->stride_height);
    201       add_scalar_int32(builtin->depth_multiplier);
    202       add_scalar_int32(builtin->activation);
    203     };
    204 
    205     auto add_fully_connected_params = [&add_scalar_int32](void* data) {
    206       auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data);
    207       add_scalar_int32(builtin->activation);
    208     };
    209 
    210     auto add_concatenation_params = [&add_scalar_int32](void* data) {
    211       auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(data);
    212       add_scalar_int32(builtin->axis);
    213       if (builtin->activation != kTfLiteActNone) {
    214         FATAL("Concatenation does not support fused activation in NNAPI");
    215       }
    216     };
    217 
    218     auto add_softmax_params = [&add_scalar_float32](void* data) {
    219       auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(data);
    220       add_scalar_float32(builtin->beta);
    221     };
    222 
    223     auto add_space_to_depth_params = [&add_scalar_int32](void* data) {
    224       auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(data);
    225       add_scalar_int32(builtin->block_size);
    226     };
    227 
    228     auto add_lstm_params = [&add_scalar_int32,
    229                             &add_scalar_float32](void* data) {
    230       auto builtin = reinterpret_cast<TfLiteLSTMParams*>(data);
    231       add_scalar_int32(builtin->activation);
    232       add_scalar_float32(builtin->cell_clip);
    233       add_scalar_float32(builtin->proj_clip);
    234     };
    235 
    236 #if 0
    237     auto add_reshape_params = [&](void* data) {
    238       auto builtin = reinterpret_cast<TfLiteReshapeParams*>(data);
    239       uint32_t tensor_size_shape = builtin->num_dimensions;
    240       ANeuralNetworksOperandType operand_type{
    241           ANEURALNETWORKS_TENSOR_INT32,
    242           {static_cast<uint32_t>(1),
    243            reinterpret_cast<uint32_t*>(&tensor_size_shape)},
    244           0,
    245           0};
    246       CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type))
    247       CHECK_NN(ANeuralNetworksModel_setOperandValue(
    248           nn_model, next_id, builtin->shape,
    249           sizeof(int) * builtin->num_dimensions));
    250       augmented_inputs.push_back(next_id++);
    251     };
    252 #endif
    253 
    254     ANeuralNetworksOperationType nn_op_type;
    255     switch (builtin) {
    256       case tflite::BuiltinOperator_ADD:
    257         nn_op_type = ANEURALNETWORKS_ADD;
    258         add_add_params();
    259         break;
    260       case tflite::BuiltinOperator_AVERAGE_POOL_2D:
    261         add_pooling_params(node.builtin_data);
    262         nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D;
    263         break;
    264       case tflite::BuiltinOperator_MAX_POOL_2D:
    265         add_pooling_params(node.builtin_data);
    266         nn_op_type = ANEURALNETWORKS_MAX_POOL_2D;
    267         break;
    268       case tflite::BuiltinOperator_L2_POOL_2D:
    269         add_pooling_params(node.builtin_data);
    270         nn_op_type = ANEURALNETWORKS_L2_POOL_2D;
    271         break;
    272       case tflite::BuiltinOperator_CONV_2D:
    273         add_convolution_params(node.builtin_data);
    274         nn_op_type = ANEURALNETWORKS_CONV_2D;
    275         break;
    276       case tflite::BuiltinOperator_RELU:
    277         nn_op_type = ANEURALNETWORKS_RELU;
    278         break;
    279       case tflite::BuiltinOperator_RELU6:
    280         nn_op_type = ANEURALNETWORKS_RELU6;
    281         break;
    282       case tflite::BuiltinOperator_TANH:
    283         nn_op_type = ANEURALNETWORKS_TANH;
    284         break;
    285       case tflite::BuiltinOperator_LOGISTIC:
    286         nn_op_type = ANEURALNETWORKS_LOGISTIC;
    287         break;
    288       case tflite::BuiltinOperator_DEPTHWISE_CONV_2D:
    289         add_depthwise_conv_params(node.builtin_data);
    290         nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D;
    291         break;
    292       case tflite::BuiltinOperator_CONCATENATION:
    293         add_concatenation_params(node.builtin_data);
    294         nn_op_type = ANEURALNETWORKS_CONCATENATION;
    295         break;
    296       case tflite::BuiltinOperator_SOFTMAX:
    297         add_softmax_params(node.builtin_data);
    298         nn_op_type = ANEURALNETWORKS_SOFTMAX;
    299         break;
    300       case tflite::BuiltinOperator_FULLY_CONNECTED:
    301         add_fully_connected_params(node.builtin_data);
    302         nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED;
    303         break;
    304       case tflite::BuiltinOperator_RESHAPE:
    305         nn_op_type = ANEURALNETWORKS_RESHAPE;
    306         // add_reshape_params(node.builtin_data);
    307         break;
    308       case tflite::BuiltinOperator_SPACE_TO_DEPTH:
    309         add_space_to_depth_params(node.builtin_data);
    310         nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH;
    311         break;
    312       case tflite::BuiltinOperator_LSTM: {
    313         duplicate_state_tensor_float32(
    314             node.outputs->data[/*kOutputStateTensor*/ 1]);
    315         duplicate_state_tensor_float32(
    316             node.outputs->data[/*kCellStateTensor*/ 2]);
    317         add_lstm_params(node.builtin_data);
    318         nn_op_type = ANEURALNETWORKS_LSTM;
    319         break;
    320       }
    321       case tflite::BuiltinOperator_CONCAT_EMBEDDINGS:
    322       case tflite::BuiltinOperator_LSH_PROJECTION:
    323       case tflite::BuiltinOperator_SVDF:
    324       case tflite::BuiltinOperator_HASHTABLE_LOOKUP:
    325       case tflite::BuiltinOperator_RNN:
    326       case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN:
    327       case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN:
    328       case tflite::BuiltinOperator_EMBEDDING_LOOKUP:
    329       case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE:
    330       case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM:
    331       case tflite::BuiltinOperator_L2_NORMALIZATION:
    332       case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION:
    333       case tflite::BuiltinOperator_MUL:
    334       case tflite::BuiltinOperator_PAD:
    335       case tflite::BuiltinOperator_RESIZE_BILINEAR:
    336       case tflite::BuiltinOperator_CALL:
    337       case tflite::BuiltinOperator_SKIP_GRAM:
    338       case tflite::BuiltinOperator_RELU_N1_TO_1:
    339       case tflite::BuiltinOperator_GATHER:
    340       case tflite::BuiltinOperator_SPACE_TO_BATCH_ND:
    341       case tflite::BuiltinOperator_BATCH_TO_SPACE_ND:
    342       case tflite::BuiltinOperator_TOPK_V2:
    343       case tflite::BuiltinOperator_TRANSPOSE:
    344       case tflite::BuiltinOperator_MEAN:
    345       case tflite::BuiltinOperator_DIV:
    346       case tflite::BuiltinOperator_SUB:
    347       case tflite::BuiltinOperator_SPLIT:
    348       case tflite::BuiltinOperator_SQUEEZE:
    349       case tflite::BuiltinOperator_STRIDED_SLICE:
    350       case tflite::BuiltinOperator_EXP:
    351         FATAL("Op code %d is currently not delegated to NNAPI", builtin);
    352         nn_op_type = -1;  // set to invalid
    353         break;
    354       case tflite::BuiltinOperator_CUSTOM:
    355         FATAL("Custom operations are not supported when using NNAPI.");
    356         nn_op_type = -1;  // set to invalid
    357         break;
    358     }
    359 
    360     // Add the operation.
    361     CHECK_NN(ANeuralNetworksModel_addOperation(
    362         nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()),
    363         augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size),
    364         reinterpret_cast<uint32_t*>(node.outputs->data)));
    365   }
    366 }
    367 
    368 TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) {
    369   // TODO(aselle): This is not correct. need to handle resize invalidation.
    370   if (nn_model_ && nn_compiled_model_) return kTfLiteOk;
    371 
    372   if (!nn_model_) {
    373     CHECK_NN(ANeuralNetworksModel_create(&nn_model_));
    374 
    375     uint32_t next_id = addTensorOperands(interpreter, nn_model_);
    376     AddOpsAndParams(interpreter, nn_model_, next_id);
    377     CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs(
    378         nn_model_, static_cast<uint32_t>(interpreter->inputs().size()),
    379         reinterpret_cast<const uint32_t*>(interpreter->inputs().data()),
    380         static_cast<uint32_t>(interpreter->outputs().size()),
    381         reinterpret_cast<const uint32_t*>(interpreter->outputs().data())));
    382     CHECK_NN(ANeuralNetworksModel_finish(nn_model_));
    383   }
    384   if (!nn_compiled_model_) {
    385     CHECK_NN(ANeuralNetworksCompilation_create(nn_model_, &nn_compiled_model_));
    386     CHECK_NN(ANeuralNetworksCompilation_finish(nn_compiled_model_));
    387   }
    388   return kTfLiteOk;
    389 }
    390 
    391 TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) {
    392   if (!nn_model_) {
    393     TF_LITE_ENSURE_STATUS(BuildGraph(interpreter));
    394   }
    395 
    396   ANeuralNetworksExecution* execution = nullptr;
    397   CHECK_NN(ANeuralNetworksExecution_create(nn_compiled_model_, &execution));
    398 
    399   // Currently perform deep copy of input buffer
    400   for (size_t i = 0; i < interpreter->inputs().size(); i++) {
    401     int input = interpreter->inputs()[i];
    402     // TODO(aselle): Is this what we want or do we want input instead?
    403     // TODO(aselle): This should be called setInputValue maybe to be cons.
    404     TfLiteTensor* tensor = interpreter->tensor(input);
    405     CHECK_NN(ANeuralNetworksExecution_setInput(
    406         execution, i, nullptr, tensor->data.raw, tensor->bytes));
    407   }
    408   // Tell nn api where to place final data.
    409   for (size_t i = 0; i < interpreter->outputs().size(); i++) {
    410     int output = interpreter->outputs()[i];
    411     TfLiteTensor* tensor = interpreter->tensor(output);
    412     CHECK_NN(ANeuralNetworksExecution_setOutput(
    413         execution, i, nullptr, tensor->data.raw, tensor->bytes));
    414   }
    415   // Currently use blocking compute.
    416   ANeuralNetworksEvent* event = nullptr;
    417   CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event));
    418   CHECK_NN(ANeuralNetworksEvent_wait(event));
    419   ANeuralNetworksEvent_free(event);
    420   ANeuralNetworksExecution_free(execution);
    421 
    422 #if 0
    423   printf("From the NN API:\n");
    424   TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]);
    425   if (float* data =
    426           interpreter->typed_tensor<float>(interpreter->outputs()[0])) {
    427     size_t num = tensor->bytes / sizeof(float);
    428     for (float* p = data; p < data + num; p++) {
    429       printf(" %f", *p);
    430     }
    431     printf("\n");
    432   }
    433 #endif
    434 
    435   return kTfLiteOk;
    436 }
    437 
    438 }  // namespace tflite
    439