1 /* Copyright 2017 The TensorFlow Authors. All Rights Reserved. 2 3 Licensed under the Apache License, Version 2.0 (the "License"); 4 you may not use this file except in compliance with the License. 5 You may obtain a copy of the License at 6 7 http://www.apache.org/licenses/LICENSE-2.0 8 9 Unless required by applicable law or agreed to in writing, software 10 distributed under the License is distributed on an "AS IS" BASIS, 11 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 See the License for the specific language governing permissions and 13 limitations under the License. 14 ==============================================================================*/ 15 16 #include "tensorflow/contrib/lite/nnapi_delegate.h" 17 #include <fcntl.h> 18 #include <sys/mman.h> 19 #include <sys/stat.h> 20 #include <sys/types.h> 21 #include "tensorflow/contrib/lite/builtin_op_data.h" 22 #include "tensorflow/contrib/lite/error_reporter.h" 23 #include "tensorflow/contrib/lite/model.h" 24 #include "tensorflow/contrib/lite/nnapi/NeuralNetworksShim.h" 25 26 namespace tflite { 27 28 // TODO(aselle): FATAL leaves resources hanging. 29 void FATAL(const char* format, ...) { 30 va_list args; 31 va_start(args, format); 32 vfprintf(stderr, format, args); 33 va_end(args); 34 fflush(stderr); 35 exit(1); 36 } 37 38 // TODO(aselle): Change the error model to use status codes. 39 #define CHECK_TFLITE_SUCCESS(x) \ 40 if (x != kTfLiteOk) { \ 41 FATAL("Aborting since tflite returned failure."); \ 42 } 43 44 #define CHECK_NN(x) \ 45 if (x != ANEURALNETWORKS_NO_ERROR) { \ 46 FATAL("Aborting since tflite returned failure."); \ 47 } 48 49 NNAPIAllocation::NNAPIAllocation(const char* filename, 50 ErrorReporter* error_reporter) 51 : MMAPAllocation(filename, error_reporter) { 52 if (mmapped_buffer_ != MAP_FAILED) 53 CHECK_NN(ANeuralNetworksMemory_createFromFd(buffer_size_bytes_, PROT_READ, 54 mmap_fd_, 0, &handle_)); 55 } 56 57 NNAPIAllocation::~NNAPIAllocation() { 58 if (handle_) { 59 ANeuralNetworksMemory_free(handle_); 60 } 61 } 62 63 NNAPIDelegate::~NNAPIDelegate() { 64 if (nn_model_) { 65 ANeuralNetworksModel_free(nn_model_); 66 nn_model_ = nullptr; 67 // TODO(aselle): Is this thread-safe and callable multiple times? 68 } 69 // ANeuralNetworksShutdown(); 70 } 71 72 // Adds the tensors of the interpreter to the NN API model. 73 // Returns the number of operands added. 74 uint32_t addTensorOperands(tflite::Interpreter* interpreter, 75 ANeuralNetworksModel* nn_model) { 76 uint32_t next_id = 0; 77 for (size_t i = 0; i < interpreter->tensors_size(); i++) { 78 int32_t nn_type = 0; 79 float scale = 1.0f; 80 int32_t zeroPoint = 0; 81 TfLiteTensor* tensor = interpreter->tensor(i); 82 switch (tensor->type) { 83 case kTfLiteNoType: 84 // Tensors added during initialization of Ops don't have a type yet and 85 // should not be registered with the NNAPI. 86 continue; 87 case kTfLiteFloat32: 88 nn_type = ANEURALNETWORKS_TENSOR_FLOAT32; 89 break; 90 case kTfLiteUInt8: 91 nn_type = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM; 92 scale = tensor->params.scale; 93 zeroPoint = tensor->params.zero_point; 94 if (scale <= 0.0f) { 95 // internal tensor, not valid for NNAPI 96 continue; 97 } 98 break; 99 case kTfLiteInt32: 100 nn_type = ANEURALNETWORKS_TENSOR_INT32; 101 scale = tensor->params.scale; 102 zeroPoint = tensor->params.zero_point; 103 break; 104 default: 105 FATAL("Unsupported type."); 106 } 107 // TODO(aselle): Note, many of these are intermediate results. Do I need 108 // to ever specify these sizes. I am currently below doing setValue 109 // on all of them, but I shouldn't in the future. 110 // Answer(jeanluc): If all the operators can set the dimension correctly, 111 // you won't need to. 112 ANeuralNetworksOperandType operand_type{ 113 nn_type, static_cast<uint32_t>(tensor->dims->size), 114 reinterpret_cast<uint32_t*>(tensor->dims->data), scale, zeroPoint}; 115 CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)); 116 117 // TODO(aselle): Based on Michael's suggestion, limiting this to read 118 // only memory 119 if (tensor->allocation_type == kTfLiteMmapRo) { 120 if (const NNAPIAllocation* alloc = dynamic_cast<const NNAPIAllocation*>( 121 static_cast<const Allocation*>(tensor->allocation))) { 122 CHECK_NN(ANeuralNetworksModel_setOperandValueFromMemory( 123 nn_model, i, alloc->memory(), alloc->offset(tensor->data.raw), 124 tensor->bytes)); 125 } else { 126 CHECK_NN(ANeuralNetworksModel_setOperandValue( 127 nn_model, i, tensor->data.raw, tensor->bytes)); 128 } 129 } 130 ++next_id; 131 } 132 return next_id; 133 } 134 135 // Adds the operations and their parameters to the NN API model. 136 // 'next-id' is the operand ID of the next operand of the model. 137 void AddOpsAndParams(tflite::Interpreter* interpreter, 138 ANeuralNetworksModel* nn_model, uint32_t next_id) { 139 for (size_t i = 0; i < interpreter->nodes_size(); i++) { 140 const auto* node_and_registration = interpreter->node_and_registration(i); 141 const TfLiteNode& node = node_and_registration->first; 142 const TfLiteRegistration& registration = node_and_registration->second; 143 tflite::BuiltinOperator builtin = 144 static_cast<tflite::BuiltinOperator>(registration.builtin_code); 145 146 // Add the parameters. 147 std::vector<uint32_t> augmented_inputs( 148 node.inputs->data, node.inputs->data + node.inputs->size); 149 150 auto add_scalar_int32 = [&nn_model, &augmented_inputs, 151 &next_id](int value) { 152 ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_INT32}; 153 CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) 154 CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value, 155 sizeof(int32_t))) 156 augmented_inputs.push_back(next_id++); 157 }; 158 159 auto add_scalar_float32 = [&nn_model, &augmented_inputs, 160 &next_id](float value) { 161 ANeuralNetworksOperandType operand_type{.type = ANEURALNETWORKS_FLOAT32}; 162 CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) 163 CHECK_NN(ANeuralNetworksModel_setOperandValue(nn_model, next_id, &value, 164 sizeof(float))) 165 augmented_inputs.push_back(next_id++); 166 }; 167 168 auto duplicate_state_tensor_float32 = 169 [interpreter, &nn_model, &augmented_inputs, &next_id](int tensor_id) { 170 const TfLiteTensor* tensor = interpreter->tensor(tensor_id); 171 CHECK_NN(ANeuralNetworksModel_setOperandValue( 172 nn_model, tensor_id, tensor->data.raw, tensor->bytes)); 173 augmented_inputs.push_back(tensor_id); 174 }; 175 176 auto add_add_params = [&add_scalar_int32]() { add_scalar_int32(0); }; 177 178 auto add_pooling_params = [&add_scalar_int32](void* data) { 179 auto builtin = reinterpret_cast<TfLitePoolParams*>(data); 180 add_scalar_int32(builtin->padding); 181 add_scalar_int32(builtin->stride_width); 182 add_scalar_int32(builtin->stride_height); 183 add_scalar_int32(builtin->filter_width); 184 add_scalar_int32(builtin->filter_height); 185 add_scalar_int32(builtin->activation); 186 }; 187 188 auto add_convolution_params = [&add_scalar_int32](void* data) { 189 auto builtin = reinterpret_cast<TfLiteConvParams*>(data); 190 add_scalar_int32(builtin->padding); 191 add_scalar_int32(builtin->stride_width); 192 add_scalar_int32(builtin->stride_height); 193 add_scalar_int32(builtin->activation); 194 }; 195 196 auto add_depthwise_conv_params = [&add_scalar_int32](void* data) { 197 auto builtin = reinterpret_cast<TfLiteDepthwiseConvParams*>(data); 198 add_scalar_int32(builtin->padding); 199 add_scalar_int32(builtin->stride_width); 200 add_scalar_int32(builtin->stride_height); 201 add_scalar_int32(builtin->depth_multiplier); 202 add_scalar_int32(builtin->activation); 203 }; 204 205 auto add_fully_connected_params = [&add_scalar_int32](void* data) { 206 auto builtin = reinterpret_cast<TfLiteFullyConnectedParams*>(data); 207 add_scalar_int32(builtin->activation); 208 }; 209 210 auto add_concatenation_params = [&add_scalar_int32](void* data) { 211 auto builtin = reinterpret_cast<TfLiteConcatenationParams*>(data); 212 add_scalar_int32(builtin->axis); 213 if (builtin->activation != kTfLiteActNone) { 214 FATAL("Concatenation does not support fused activation in NNAPI"); 215 } 216 }; 217 218 auto add_softmax_params = [&add_scalar_float32](void* data) { 219 auto builtin = reinterpret_cast<TfLiteSoftmaxParams*>(data); 220 add_scalar_float32(builtin->beta); 221 }; 222 223 auto add_space_to_depth_params = [&add_scalar_int32](void* data) { 224 auto builtin = reinterpret_cast<TfLiteSpaceToDepthParams*>(data); 225 add_scalar_int32(builtin->block_size); 226 }; 227 228 auto add_lstm_params = [&add_scalar_int32, 229 &add_scalar_float32](void* data) { 230 auto builtin = reinterpret_cast<TfLiteLSTMParams*>(data); 231 add_scalar_int32(builtin->activation); 232 add_scalar_float32(builtin->cell_clip); 233 add_scalar_float32(builtin->proj_clip); 234 }; 235 236 #if 0 237 auto add_reshape_params = [&](void* data) { 238 auto builtin = reinterpret_cast<TfLiteReshapeParams*>(data); 239 uint32_t tensor_size_shape = builtin->num_dimensions; 240 ANeuralNetworksOperandType operand_type{ 241 ANEURALNETWORKS_TENSOR_INT32, 242 {static_cast<uint32_t>(1), 243 reinterpret_cast<uint32_t*>(&tensor_size_shape)}, 244 0, 245 0}; 246 CHECK_NN(ANeuralNetworksModel_addOperand(nn_model, &operand_type)) 247 CHECK_NN(ANeuralNetworksModel_setOperandValue( 248 nn_model, next_id, builtin->shape, 249 sizeof(int) * builtin->num_dimensions)); 250 augmented_inputs.push_back(next_id++); 251 }; 252 #endif 253 254 ANeuralNetworksOperationType nn_op_type; 255 switch (builtin) { 256 case tflite::BuiltinOperator_ADD: 257 nn_op_type = ANEURALNETWORKS_ADD; 258 add_add_params(); 259 break; 260 case tflite::BuiltinOperator_AVERAGE_POOL_2D: 261 add_pooling_params(node.builtin_data); 262 nn_op_type = ANEURALNETWORKS_AVERAGE_POOL_2D; 263 break; 264 case tflite::BuiltinOperator_MAX_POOL_2D: 265 add_pooling_params(node.builtin_data); 266 nn_op_type = ANEURALNETWORKS_MAX_POOL_2D; 267 break; 268 case tflite::BuiltinOperator_L2_POOL_2D: 269 add_pooling_params(node.builtin_data); 270 nn_op_type = ANEURALNETWORKS_L2_POOL_2D; 271 break; 272 case tflite::BuiltinOperator_CONV_2D: 273 add_convolution_params(node.builtin_data); 274 nn_op_type = ANEURALNETWORKS_CONV_2D; 275 break; 276 case tflite::BuiltinOperator_RELU: 277 nn_op_type = ANEURALNETWORKS_RELU; 278 break; 279 case tflite::BuiltinOperator_RELU6: 280 nn_op_type = ANEURALNETWORKS_RELU6; 281 break; 282 case tflite::BuiltinOperator_TANH: 283 nn_op_type = ANEURALNETWORKS_TANH; 284 break; 285 case tflite::BuiltinOperator_LOGISTIC: 286 nn_op_type = ANEURALNETWORKS_LOGISTIC; 287 break; 288 case tflite::BuiltinOperator_DEPTHWISE_CONV_2D: 289 add_depthwise_conv_params(node.builtin_data); 290 nn_op_type = ANEURALNETWORKS_DEPTHWISE_CONV_2D; 291 break; 292 case tflite::BuiltinOperator_CONCATENATION: 293 add_concatenation_params(node.builtin_data); 294 nn_op_type = ANEURALNETWORKS_CONCATENATION; 295 break; 296 case tflite::BuiltinOperator_SOFTMAX: 297 add_softmax_params(node.builtin_data); 298 nn_op_type = ANEURALNETWORKS_SOFTMAX; 299 break; 300 case tflite::BuiltinOperator_FULLY_CONNECTED: 301 add_fully_connected_params(node.builtin_data); 302 nn_op_type = ANEURALNETWORKS_FULLY_CONNECTED; 303 break; 304 case tflite::BuiltinOperator_RESHAPE: 305 nn_op_type = ANEURALNETWORKS_RESHAPE; 306 // add_reshape_params(node.builtin_data); 307 break; 308 case tflite::BuiltinOperator_SPACE_TO_DEPTH: 309 add_space_to_depth_params(node.builtin_data); 310 nn_op_type = ANEURALNETWORKS_SPACE_TO_DEPTH; 311 break; 312 case tflite::BuiltinOperator_LSTM: { 313 duplicate_state_tensor_float32( 314 node.outputs->data[/*kOutputStateTensor*/ 1]); 315 duplicate_state_tensor_float32( 316 node.outputs->data[/*kCellStateTensor*/ 2]); 317 add_lstm_params(node.builtin_data); 318 nn_op_type = ANEURALNETWORKS_LSTM; 319 break; 320 } 321 case tflite::BuiltinOperator_CONCAT_EMBEDDINGS: 322 case tflite::BuiltinOperator_LSH_PROJECTION: 323 case tflite::BuiltinOperator_SVDF: 324 case tflite::BuiltinOperator_HASHTABLE_LOOKUP: 325 case tflite::BuiltinOperator_RNN: 326 case tflite::BuiltinOperator_BIDIRECTIONAL_SEQUENCE_RNN: 327 case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_RNN: 328 case tflite::BuiltinOperator_EMBEDDING_LOOKUP: 329 case tflite::BuiltinOperator_EMBEDDING_LOOKUP_SPARSE: 330 case tflite::BuiltinOperator_UNIDIRECTIONAL_SEQUENCE_LSTM: 331 case tflite::BuiltinOperator_L2_NORMALIZATION: 332 case tflite::BuiltinOperator_LOCAL_RESPONSE_NORMALIZATION: 333 case tflite::BuiltinOperator_MUL: 334 case tflite::BuiltinOperator_PAD: 335 case tflite::BuiltinOperator_RESIZE_BILINEAR: 336 case tflite::BuiltinOperator_CALL: 337 case tflite::BuiltinOperator_SKIP_GRAM: 338 case tflite::BuiltinOperator_RELU_N1_TO_1: 339 case tflite::BuiltinOperator_GATHER: 340 case tflite::BuiltinOperator_SPACE_TO_BATCH_ND: 341 case tflite::BuiltinOperator_BATCH_TO_SPACE_ND: 342 case tflite::BuiltinOperator_TOPK_V2: 343 case tflite::BuiltinOperator_TRANSPOSE: 344 case tflite::BuiltinOperator_MEAN: 345 case tflite::BuiltinOperator_DIV: 346 case tflite::BuiltinOperator_SUB: 347 case tflite::BuiltinOperator_SPLIT: 348 case tflite::BuiltinOperator_SQUEEZE: 349 case tflite::BuiltinOperator_STRIDED_SLICE: 350 case tflite::BuiltinOperator_EXP: 351 FATAL("Op code %d is currently not delegated to NNAPI", builtin); 352 nn_op_type = -1; // set to invalid 353 break; 354 case tflite::BuiltinOperator_CUSTOM: 355 FATAL("Custom operations are not supported when using NNAPI."); 356 nn_op_type = -1; // set to invalid 357 break; 358 } 359 360 // Add the operation. 361 CHECK_NN(ANeuralNetworksModel_addOperation( 362 nn_model, nn_op_type, static_cast<uint32_t>(augmented_inputs.size()), 363 augmented_inputs.data(), static_cast<uint32_t>(node.outputs->size), 364 reinterpret_cast<uint32_t*>(node.outputs->data))); 365 } 366 } 367 368 TfLiteStatus NNAPIDelegate::BuildGraph(Interpreter* interpreter) { 369 // TODO(aselle): This is not correct. need to handle resize invalidation. 370 if (nn_model_ && nn_compiled_model_) return kTfLiteOk; 371 372 if (!nn_model_) { 373 CHECK_NN(ANeuralNetworksModel_create(&nn_model_)); 374 375 uint32_t next_id = addTensorOperands(interpreter, nn_model_); 376 AddOpsAndParams(interpreter, nn_model_, next_id); 377 CHECK_NN(ANeuralNetworksModel_identifyInputsAndOutputs( 378 nn_model_, static_cast<uint32_t>(interpreter->inputs().size()), 379 reinterpret_cast<const uint32_t*>(interpreter->inputs().data()), 380 static_cast<uint32_t>(interpreter->outputs().size()), 381 reinterpret_cast<const uint32_t*>(interpreter->outputs().data()))); 382 CHECK_NN(ANeuralNetworksModel_finish(nn_model_)); 383 } 384 if (!nn_compiled_model_) { 385 CHECK_NN(ANeuralNetworksCompilation_create(nn_model_, &nn_compiled_model_)); 386 CHECK_NN(ANeuralNetworksCompilation_finish(nn_compiled_model_)); 387 } 388 return kTfLiteOk; 389 } 390 391 TfLiteStatus NNAPIDelegate::Invoke(Interpreter* interpreter) { 392 if (!nn_model_) { 393 TF_LITE_ENSURE_STATUS(BuildGraph(interpreter)); 394 } 395 396 ANeuralNetworksExecution* execution = nullptr; 397 CHECK_NN(ANeuralNetworksExecution_create(nn_compiled_model_, &execution)); 398 399 // Currently perform deep copy of input buffer 400 for (size_t i = 0; i < interpreter->inputs().size(); i++) { 401 int input = interpreter->inputs()[i]; 402 // TODO(aselle): Is this what we want or do we want input instead? 403 // TODO(aselle): This should be called setInputValue maybe to be cons. 404 TfLiteTensor* tensor = interpreter->tensor(input); 405 CHECK_NN(ANeuralNetworksExecution_setInput( 406 execution, i, nullptr, tensor->data.raw, tensor->bytes)); 407 } 408 // Tell nn api where to place final data. 409 for (size_t i = 0; i < interpreter->outputs().size(); i++) { 410 int output = interpreter->outputs()[i]; 411 TfLiteTensor* tensor = interpreter->tensor(output); 412 CHECK_NN(ANeuralNetworksExecution_setOutput( 413 execution, i, nullptr, tensor->data.raw, tensor->bytes)); 414 } 415 // Currently use blocking compute. 416 ANeuralNetworksEvent* event = nullptr; 417 CHECK_NN(ANeuralNetworksExecution_startCompute(execution, &event)); 418 CHECK_NN(ANeuralNetworksEvent_wait(event)); 419 ANeuralNetworksEvent_free(event); 420 ANeuralNetworksExecution_free(execution); 421 422 #if 0 423 printf("From the NN API:\n"); 424 TfLiteTensor* tensor = interpreter->tensor(interpreter->outputs()[0]); 425 if (float* data = 426 interpreter->typed_tensor<float>(interpreter->outputs()[0])) { 427 size_t num = tensor->bytes / sizeof(float); 428 for (float* p = data; p < data + num; p++) { 429 printf(" %f", *p); 430 } 431 printf("\n"); 432 } 433 #endif 434 435 return kTfLiteOk; 436 } 437 438 } // namespace tflite 439