1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "CpuExecutor" 18 19 #include "CpuExecutor.h" 20 21 #include "NeuralNetworks.h" 22 #include "Operations.h" 23 24 #include "Eigen/Core" 25 #include <omp.h> 26 #include <sys/mman.h> 27 28 namespace android { 29 namespace nn { 30 31 // TODO: short term, make share memory mapping and updating a utility function. 32 // TODO: long term, implement mmap_fd as a hidl IMemory service. 33 RunTimePoolInfo::RunTimePoolInfo(const hidl_memory& hidlMemory, bool* fail) { 34 sp<IMemory> memory; 35 uint8_t* buffer = nullptr; 36 37 auto memType = hidlMemory.name(); 38 if (memType == "ashmem") { 39 memory = mapMemory(hidlMemory); 40 if (memory == nullptr) { 41 LOG(ERROR) << "Can't map shared memory."; 42 if (fail) *fail = true; 43 return; 44 } 45 memory->update(); 46 buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer())); 47 if (buffer == nullptr) { 48 LOG(ERROR) << "Can't access shared memory."; 49 if (fail) *fail = true; 50 return; 51 } 52 } else if (memType == "mmap_fd") { 53 size_t size = hidlMemory.size(); 54 int fd = hidlMemory.handle()->data[0]; 55 int prot = hidlMemory.handle()->data[1]; 56 size_t offset = getSizeFromInts(hidlMemory.handle()->data[2], 57 hidlMemory.handle()->data[3]); 58 buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset)); 59 if (buffer == MAP_FAILED) { 60 LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor."; 61 if (fail) *fail = true; 62 return; 63 } 64 } else { 65 LOG(ERROR) << "RunTimePoolInfo::set(): unsupported hidl_memory type"; 66 if (fail) *fail = true; 67 return; 68 } 69 70 mHidlMemory = hidlMemory; 71 mBuffer = buffer; 72 mMemory = memory; 73 } 74 75 RunTimePoolInfo::RunTimePoolInfo(uint8_t* buffer) { 76 mBuffer = buffer; 77 } 78 79 RunTimePoolInfo::RunTimePoolInfo(RunTimePoolInfo&& other) { 80 moveFrom(std::move(other)); 81 other.mBuffer = nullptr; 82 } 83 84 RunTimePoolInfo& RunTimePoolInfo::operator=(RunTimePoolInfo&& other) { 85 if (this != &other) { 86 release(); 87 moveFrom(std::move(other)); 88 other.mBuffer = nullptr; 89 } 90 return *this; 91 } 92 93 void RunTimePoolInfo::moveFrom(RunTimePoolInfo &&other) { 94 mHidlMemory = std::move(other.mHidlMemory); 95 mBuffer = std::move(other.mBuffer); 96 mMemory = std::move(other.mMemory); 97 } 98 99 void RunTimePoolInfo::release() { 100 if (mBuffer == nullptr) { 101 return; 102 } 103 104 auto memType = mHidlMemory.name(); 105 if (memType == "ashmem") { 106 // nothing to do 107 } else if (memType == "mmap_fd") { 108 size_t size = mHidlMemory.size(); 109 if (munmap(mBuffer, size)) { 110 LOG(ERROR) << "RunTimePoolInfo::release(): Can't munmap"; 111 } 112 } else if (memType == "") { 113 // Represents a POINTER argument; nothing to do 114 } else { 115 LOG(ERROR) << "RunTimePoolInfo::release(): unsupported hidl_memory type"; 116 } 117 118 mHidlMemory = hidl_memory(); 119 mMemory = nullptr; 120 mBuffer = nullptr; 121 } 122 123 // Making sure the output data are correctly updated after execution. 124 bool RunTimePoolInfo::update() const { 125 auto memType = mHidlMemory.name(); 126 if (memType == "ashmem") { 127 mMemory->commit(); 128 return true; 129 } else if (memType == "mmap_fd") { 130 int prot = mHidlMemory.handle()->data[1]; 131 if (prot & PROT_WRITE) { 132 size_t size = mHidlMemory.size(); 133 return msync(mBuffer, size, MS_SYNC) == 0; 134 } 135 } 136 // No-op for other types of memory. 137 return true; 138 } 139 140 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos, 141 const hidl_vec<hidl_memory>& pools) { 142 poolInfos->clear(); 143 poolInfos->reserve(pools.size()); 144 bool fail = false; 145 for (const auto& pool : pools) { 146 poolInfos->emplace_back(pool, &fail); 147 } 148 if (fail) { 149 LOG(ERROR) << "Could not map pools"; 150 poolInfos->clear(); 151 return false; 152 } 153 return true; 154 } 155 156 // Updates the RunTimeOperandInfo with the newly calculated shape. 157 // Allocate the buffer if we need to. 158 static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) { 159 // For user-provided model output operands, the parameters must match the Shape 160 // calculated from the preparation step. 161 if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) { 162 if (info->type != shape.type || 163 info->dimensions != shape.dimensions) { 164 LOG(ERROR) << "Invalid type or dimensions for model output"; 165 return false; 166 } 167 if (info->type == OperandType::TENSOR_QUANT8_ASYMM && 168 (info->scale != shape.scale || info->zeroPoint != shape.offset)) { 169 LOG(ERROR) << "Invalid scale or zeroPoint for model output"; 170 return false; 171 } 172 } 173 info->type = shape.type; 174 info->dimensions = shape.dimensions; 175 info->scale = shape.scale; 176 info->zeroPoint = shape.offset; 177 if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) { 178 uint32_t length = sizeOfData(info->type, info->dimensions); 179 info->buffer = new uint8_t[length]; 180 if (info->buffer == nullptr) { 181 return false; 182 } 183 } 184 return true; 185 } 186 187 // Ignore the .pools entry in model and request. This will have been taken care of 188 // by the caller. 189 int CpuExecutor::run(const V1_0::Model& model, const Request& request, 190 const std::vector<RunTimePoolInfo>& modelPoolInfos, 191 const std::vector<RunTimePoolInfo>& requestPoolInfos) { 192 return run(convertToV1_1(model), request, modelPoolInfos, requestPoolInfos); 193 } 194 195 int CpuExecutor::run(const V1_1::Model& model, const Request& request, 196 const std::vector<RunTimePoolInfo>& modelPoolInfos, 197 const std::vector<RunTimePoolInfo>& requestPoolInfos) { 198 VLOG(CPUEXE) << "CpuExecutor::run() with request(" 199 << SHOW_IF_DEBUG(toString(request)) << ")"; 200 201 ScopedOpenmpSettings openMpSettings; 202 203 mModel = &model; 204 mRequest = &request; // TODO check if mRequest is needed 205 initializeRunTimeInfo(modelPoolInfos, requestPoolInfos); 206 // The model has serialized the operation in execution order. 207 for (const auto& operation : model.operations) { 208 int n = executeOperation(operation); 209 if (n != ANEURALNETWORKS_NO_ERROR) { 210 return n; 211 } 212 } 213 for (auto& runtimeInfo : modelPoolInfos) { 214 runtimeInfo.update(); 215 } 216 for (auto& runtimeInfo : requestPoolInfos) { 217 runtimeInfo.update(); 218 } 219 mModel = nullptr; 220 mRequest = nullptr; 221 VLOG(CPUEXE) << "Completed run normally"; 222 return ANEURALNETWORKS_NO_ERROR; 223 } 224 225 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos, 226 const std::vector<RunTimePoolInfo>& requestPoolInfos) { 227 VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo"; 228 const size_t count = mModel->operands.size(); 229 mOperands.resize(count); 230 231 // Start by setting the runtime info to what's in the model. 232 for (size_t i = 0; i < count; i++) { 233 const Operand& from = mModel->operands[i]; 234 RunTimeOperandInfo& to = mOperands[i]; 235 to.type = from.type; 236 to.dimensions = from.dimensions; 237 to.scale = from.scale; 238 to.zeroPoint = from.zeroPoint; 239 to.length = from.location.length; 240 to.lifetime = from.lifetime; 241 switch (from.lifetime) { 242 case OperandLifeTime::TEMPORARY_VARIABLE: 243 to.buffer = nullptr; 244 to.numberOfUsesLeft = from.numberOfConsumers; 245 break; 246 case OperandLifeTime::CONSTANT_COPY: 247 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]); 248 to.numberOfUsesLeft = 0; 249 break; 250 case OperandLifeTime::CONSTANT_REFERENCE: { 251 auto poolIndex = from.location.poolIndex; 252 nnAssert(poolIndex < modelPoolInfos.size()); 253 auto& r = modelPoolInfos[poolIndex]; 254 to.buffer = r.getBuffer() + from.location.offset; 255 to.numberOfUsesLeft = 0; 256 break; 257 } 258 case OperandLifeTime::MODEL_INPUT: 259 case OperandLifeTime::MODEL_OUTPUT: 260 case OperandLifeTime::NO_VALUE: 261 to.buffer = nullptr; 262 to.numberOfUsesLeft = 0; 263 break; 264 default: 265 nnAssert(false); 266 break; 267 } 268 } 269 270 // Adjust the runtime info for the arguments passed to the model, 271 // modifying the buffer location, and possibly the dimensions. 272 auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes, 273 const hidl_vec<RequestArgument>& arguments) { 274 nnAssert(indexes.size() == arguments.size()); 275 for (size_t i = 0; i < indexes.size(); i++) { 276 const uint32_t operandIndex = indexes[i]; 277 const RequestArgument& from = arguments[i]; 278 RunTimeOperandInfo& to = mOperands[operandIndex]; 279 if (from.dimensions.size() > 0) { 280 // It's the responsibility of the caller to validate that 281 // from.dimensions only modifies the dimensions that were 282 // unspecified in the model. That's the case in SampleDriver.cpp 283 // with the call to validateRequest(). 284 // TODO make sure that's the case for the default CPU path. 285 to.dimensions = from.dimensions; 286 } 287 if (from.hasNoValue) { 288 to.lifetime = OperandLifeTime::NO_VALUE; 289 nnAssert(to.buffer == nullptr); 290 } else { 291 auto poolIndex = from.location.poolIndex; 292 nnAssert(poolIndex < requestPoolInfos.size()); 293 auto& r = requestPoolInfos[poolIndex]; 294 to.buffer = r.getBuffer() + from.location.offset; 295 } 296 } 297 }; 298 updateForArguments(mModel->inputIndexes, mRequest->inputs); 299 updateForArguments(mModel->outputIndexes, mRequest->outputs); 300 301 return true; 302 } 303 304 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) { 305 for (uint32_t i : inputs) { 306 auto& info = mOperands[i]; 307 // Check if it's a static or model input/output. 308 if (info.numberOfUsesLeft == 0) { 309 continue; 310 } 311 info.numberOfUsesLeft--; 312 if (info.numberOfUsesLeft == 0) { 313 nnAssert(info.buffer != nullptr); 314 delete[] info.buffer; 315 info.buffer = nullptr; 316 } 317 } 318 } 319 320 int CpuExecutor::executeOperation(const Operation& operation) { 321 // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")"; 322 const hidl_vec<uint32_t>& ins = operation.inputs; 323 const hidl_vec<uint32_t>& outs = operation.outputs; 324 bool success = false; 325 326 // Function to verify that the number of input and output parameters 327 // matches what is expected. Also checks that all the parameters have 328 // values. This function is to be used only for operations that do not 329 // accept optional arguments. 330 // TODO Have a version that works for optional arguments. 331 auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns, 332 size_t requiredOuts) -> bool { 333 auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes, 334 const char* type) -> bool { 335 size_t actualCount = indexes.size(); 336 if (actualCount != requiredCount) { 337 LOG(ERROR) << getOperationName(operation.type) 338 << ": Invalid number of " << type << " operands. Got " << actualCount 339 << " of " << requiredCount; 340 return false; 341 } 342 for (size_t i = 0; i < actualCount; i++) { 343 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) { 344 LOG(ERROR) << getOperationName(operation.type) << " " << type 345 << " operand " << i << " is required but missing."; 346 return false; 347 } 348 } 349 return true; 350 }; 351 return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out"); 352 }; 353 354 switch (operation.type) { 355 case OperationType::OEM_OPERATION: { 356 LOG(ERROR) << "OEM operation not supported for CPU execution"; 357 success = false; 358 } break; 359 case OperationType::ADD: { 360 if (!allParametersPresent(3, 1)) { 361 return ANEURALNETWORKS_BAD_DATA; 362 } 363 const RunTimeOperandInfo& in1 = mOperands[ins[0]]; 364 const RunTimeOperandInfo& in2 = mOperands[ins[1]]; 365 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); 366 367 RunTimeOperandInfo& out = mOperands[outs[0]]; 368 Shape outShape = out.shape(); 369 370 if (in1.type == OperandType::TENSOR_FLOAT32) { 371 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) && 372 setInfoAndAllocateIfNeeded(&out, outShape) && 373 addFloat32(reinterpret_cast<const float*>(in1.buffer), 374 in1.shape(), 375 reinterpret_cast<const float*>(in2.buffer), 376 in2.shape(), 377 activation, 378 reinterpret_cast<float*>(out.buffer), 379 outShape); 380 } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) { 381 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) && 382 setInfoAndAllocateIfNeeded(&out, outShape) && 383 addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer), 384 in1.shape(), 385 reinterpret_cast<const uint8_t*>(in2.buffer), 386 in2.shape(), 387 activation, 388 reinterpret_cast<uint8_t*>(out.buffer), 389 outShape); 390 } 391 } break; 392 case OperationType::MUL: { 393 if (!allParametersPresent(3, 1)) { 394 return ANEURALNETWORKS_BAD_DATA; 395 } 396 const RunTimeOperandInfo& in1 = mOperands[ins[0]]; 397 const RunTimeOperandInfo& in2 = mOperands[ins[1]]; 398 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); 399 400 RunTimeOperandInfo& out = mOperands[outs[0]]; 401 Shape outShape = out.shape(); 402 403 if (in1.type == OperandType::TENSOR_FLOAT32) { 404 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) && 405 setInfoAndAllocateIfNeeded(&out, outShape) && 406 mulFloat32(reinterpret_cast<const float*>(in1.buffer), 407 in1.shape(), 408 reinterpret_cast<const float*>(in2.buffer), 409 in2.shape(), 410 activation, 411 reinterpret_cast<float*>(out.buffer), 412 outShape); 413 } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) { 414 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) && 415 setInfoAndAllocateIfNeeded(&out, outShape) && 416 mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer), 417 in1.shape(), 418 reinterpret_cast<const uint8_t*>(in2.buffer), 419 in2.shape(), 420 activation, 421 reinterpret_cast<uint8_t*>(out.buffer), 422 outShape); 423 } 424 } break; 425 case OperationType::FLOOR: { 426 if (!allParametersPresent(1, 1)) { 427 return ANEURALNETWORKS_BAD_DATA; 428 } 429 const RunTimeOperandInfo& input = mOperands[ins[0]]; 430 RunTimeOperandInfo& output = mOperands[outs[0]]; 431 Shape outShape = output.shape(); 432 433 if (input.type == OperandType::TENSOR_FLOAT32) { 434 success = floorPrepare(input.shape(), &outShape) && 435 setInfoAndAllocateIfNeeded(&output, outShape) && 436 floorFloat32(reinterpret_cast<const float*>(input.buffer), 437 reinterpret_cast<float*>(output.buffer), 438 outShape); 439 } 440 } break; 441 case OperationType::DEQUANTIZE: { 442 if (!allParametersPresent(1, 1)) { 443 return ANEURALNETWORKS_BAD_DATA; 444 } 445 const RunTimeOperandInfo& input = mOperands[ins[0]]; 446 RunTimeOperandInfo& output = mOperands[outs[0]]; 447 Shape outShape = output.shape(); 448 449 if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 450 success = dequantizePrepare(input.shape(), &outShape) && 451 setInfoAndAllocateIfNeeded(&output, outShape) && 452 dequantizeQuant8ToFloat32( 453 reinterpret_cast<const uint8_t*>(input.buffer), 454 reinterpret_cast<float*>(output.buffer), 455 input.shape()); 456 } 457 } break; 458 case OperationType::DEPTHWISE_CONV_2D: { 459 const size_t inCount = ins.size(); 460 if ((inCount != 11 && inCount != 8) || 461 !allParametersPresent(inCount, 1)) { 462 return ANEURALNETWORKS_BAD_DATA; 463 } 464 const RunTimeOperandInfo& input = mOperands[ins[0]]; 465 const RunTimeOperandInfo& filter = mOperands[ins[1]]; 466 const RunTimeOperandInfo& bias = mOperands[ins[2]]; 467 468 int32_t padding_left, padding_right; 469 int32_t padding_top, padding_bottom; 470 int32_t stride_width, stride_height; 471 int32_t depth_multiplier; 472 int32_t activation; 473 474 if (inCount == 11) { 475 padding_left = getScalarData<int32_t>(mOperands[ins[3]]); 476 padding_right = getScalarData<int32_t>(mOperands[ins[4]]); 477 padding_top = getScalarData<int32_t>(mOperands[ins[5]]); 478 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]); 479 stride_width = getScalarData<int32_t>(mOperands[ins[7]]); 480 stride_height = getScalarData<int32_t>(mOperands[ins[8]]); 481 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]); 482 activation = getScalarData<int32_t>(mOperands[ins[10]]); 483 } else { 484 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]); 485 stride_width = getScalarData<int32_t>(mOperands[ins[4]]); 486 stride_height = getScalarData<int32_t>(mOperands[ins[5]]); 487 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]); 488 activation = getScalarData<int32_t>(mOperands[ins[7]]); 489 490 Shape inputShape = input.shape(); 491 Shape filterShape = filter.shape(); 492 int32_t input_width = getSizeOfDimension(inputShape, 2); 493 int32_t input_height = getSizeOfDimension(inputShape, 1); 494 int32_t filter_width = getSizeOfDimension(filterShape, 2); 495 int32_t filter_height = getSizeOfDimension(filterShape, 1); 496 calculateExplicitPadding(input_width, stride_width, 497 filter_width, padding_implicit, 498 &padding_left, &padding_right); 499 calculateExplicitPadding(input_height, stride_height, 500 filter_height, padding_implicit, 501 &padding_top, &padding_bottom); 502 } 503 504 RunTimeOperandInfo& output = mOperands[outs[0]]; 505 Shape outShape = output.shape(); 506 507 if (input.type == OperandType::TENSOR_FLOAT32) { 508 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(), 509 padding_left, padding_right, 510 padding_top, padding_bottom, 511 stride_width, stride_height, 512 &outShape) && 513 setInfoAndAllocateIfNeeded(&output, outShape) && 514 depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer), 515 input.shape(), 516 reinterpret_cast<const float*>(filter.buffer), 517 filter.shape(), 518 reinterpret_cast<const float*>(bias.buffer), 519 bias.shape(), 520 padding_left, padding_right, 521 padding_top, padding_bottom, 522 stride_width, stride_height, 523 depth_multiplier, activation, 524 reinterpret_cast<float*>(output.buffer), 525 outShape); 526 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 527 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(), 528 padding_left, padding_right, 529 padding_top, padding_bottom, 530 stride_width, stride_height, 531 &outShape) && 532 setInfoAndAllocateIfNeeded(&output, outShape) && 533 depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 534 input.shape(), 535 reinterpret_cast<const uint8_t*>(filter.buffer), 536 filter.shape(), 537 reinterpret_cast<const int32_t*>(bias.buffer), 538 bias.shape(), 539 padding_left, padding_right, 540 padding_top, padding_bottom, 541 stride_width, stride_height, 542 depth_multiplier, activation, 543 reinterpret_cast<uint8_t*>(output.buffer), 544 outShape); 545 } 546 547 } break; 548 case OperationType::CONV_2D: { 549 const size_t inCount = ins.size(); 550 if ((inCount != 10 && inCount != 7) || 551 !allParametersPresent(inCount, 1)) { 552 return ANEURALNETWORKS_BAD_DATA; 553 } 554 const RunTimeOperandInfo& input = mOperands[ins[0]]; 555 const RunTimeOperandInfo& filter = mOperands[ins[1]]; 556 const RunTimeOperandInfo& bias = mOperands[ins[2]]; 557 558 int32_t padding_left, padding_right; 559 int32_t padding_top, padding_bottom; 560 int32_t stride_width, stride_height; 561 int32_t activation; 562 563 if (inCount == 10) { 564 padding_left = getScalarData<int32_t>(mOperands[ins[3]]); 565 padding_right = getScalarData<int32_t>(mOperands[ins[4]]); 566 padding_top = getScalarData<int32_t>(mOperands[ins[5]]); 567 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]); 568 stride_width = getScalarData<int32_t>(mOperands[ins[7]]); 569 stride_height = getScalarData<int32_t>(mOperands[ins[8]]); 570 activation = getScalarData<int32_t>(mOperands[ins[9]]); 571 } else { 572 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]); 573 stride_width = getScalarData<int32_t>(mOperands[ins[4]]); 574 stride_height = getScalarData<int32_t>(mOperands[ins[5]]); 575 activation = getScalarData<int32_t>(mOperands[ins[6]]); 576 577 Shape inputShape = input.shape(); 578 Shape filterShape = filter.shape(); 579 int32_t input_width = getSizeOfDimension(inputShape, 2); 580 int32_t input_height = getSizeOfDimension(inputShape, 1); 581 int32_t filter_width = getSizeOfDimension(filterShape, 2); 582 int32_t filter_height = getSizeOfDimension(filterShape, 1); 583 calculateExplicitPadding(input_width, stride_width, 584 filter_width, padding_implicit, 585 &padding_left, &padding_right); 586 calculateExplicitPadding(input_height, stride_height, 587 filter_height, padding_implicit, 588 &padding_top, &padding_bottom); 589 } 590 591 RunTimeOperandInfo& output = mOperands[outs[0]]; 592 Shape outShape = output.shape(); 593 594 if (input.type == OperandType::TENSOR_FLOAT32) { 595 success = convPrepare(input.shape(), filter.shape(), bias.shape(), 596 padding_left, padding_right, 597 padding_top, padding_bottom, 598 stride_width, stride_height, 599 &outShape) && 600 setInfoAndAllocateIfNeeded(&output, outShape) && 601 convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(), 602 reinterpret_cast<const float*>(filter.buffer), filter.shape(), 603 reinterpret_cast<const float*>(bias.buffer), bias.shape(), 604 padding_left, padding_right, 605 padding_top, padding_bottom, 606 stride_width, stride_height, activation, 607 reinterpret_cast<float*>(output.buffer), outShape); 608 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 609 success = convPrepare(input.shape(), filter.shape(), bias.shape(), 610 padding_left, padding_right, 611 padding_top, padding_bottom, 612 stride_width, stride_height, 613 &outShape) && 614 setInfoAndAllocateIfNeeded(&output, outShape) && 615 convQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 616 input.shape(), 617 reinterpret_cast<const uint8_t*>(filter.buffer), 618 filter.shape(), 619 reinterpret_cast<const int32_t*>(bias.buffer), 620 bias.shape(), 621 padding_left, padding_right, 622 padding_top, padding_bottom, 623 stride_width, stride_height, activation, 624 reinterpret_cast<uint8_t*>(output.buffer), 625 outShape); 626 } 627 } break; 628 case OperationType::AVERAGE_POOL_2D: { 629 const size_t inCount = ins.size(); 630 if ((inCount != 10 && inCount != 7) || 631 !allParametersPresent(inCount, 1)) { 632 return ANEURALNETWORKS_BAD_DATA; 633 } 634 const RunTimeOperandInfo& input = mOperands[ins[0]]; 635 636 int32_t padding_left, padding_right; 637 int32_t padding_top, padding_bottom; 638 int32_t stride_width, stride_height; 639 int32_t filter_width, filter_height; 640 int32_t activation; 641 642 if (inCount == 10) { 643 padding_left = getScalarData<int32_t>(mOperands[ins[1]]); 644 padding_right = getScalarData<int32_t>(mOperands[ins[2]]); 645 padding_top = getScalarData<int32_t>(mOperands[ins[3]]); 646 padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]); 647 stride_width = getScalarData<int32_t>(mOperands[ins[5]]); 648 stride_height = getScalarData<int32_t>(mOperands[ins[6]]); 649 filter_width = getScalarData<int32_t>(mOperands[ins[7]]); 650 filter_height = getScalarData<int32_t>(mOperands[ins[8]]); 651 activation = getScalarData<int32_t>(mOperands[ins[9]]); 652 } else { 653 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]); 654 stride_width = getScalarData<int32_t>(mOperands[ins[2]]); 655 stride_height = getScalarData<int32_t>(mOperands[ins[3]]); 656 filter_width = getScalarData<int32_t>(mOperands[ins[4]]); 657 filter_height = getScalarData<int32_t>(mOperands[ins[5]]); 658 activation = getScalarData<int32_t>(mOperands[ins[6]]); 659 660 Shape inputShape = input.shape(); 661 int32_t input_width = getSizeOfDimension(inputShape, 2); 662 int32_t input_height = getSizeOfDimension(inputShape, 1); 663 calculateExplicitPadding(input_width, stride_width, 664 filter_width, padding_implicit, 665 &padding_left, &padding_right); 666 calculateExplicitPadding(input_height, stride_height, 667 filter_height, padding_implicit, 668 &padding_top, &padding_bottom); 669 } 670 671 RunTimeOperandInfo& output = mOperands[outs[0]]; 672 Shape outShape = output.shape(); 673 674 if (input.type == OperandType::TENSOR_FLOAT32) { 675 success = genericPoolingPrepare(input.shape(), 676 padding_left, padding_right, 677 padding_top, padding_bottom, 678 stride_width, stride_height, 679 filter_width, filter_height, 680 &outShape) && 681 setInfoAndAllocateIfNeeded(&output, outShape) && 682 averagePoolFloat32(reinterpret_cast<const float*>(input.buffer), 683 input.shape(), 684 padding_left, padding_right, 685 padding_top, padding_bottom, 686 stride_width, stride_height, 687 filter_width, filter_height, activation, 688 reinterpret_cast<float*>(output.buffer), 689 outShape); 690 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 691 success = genericPoolingPrepare(input.shape(), 692 padding_left, padding_right, 693 padding_top, padding_bottom, 694 stride_width, stride_height, 695 filter_width, filter_height, 696 &outShape) && 697 setInfoAndAllocateIfNeeded(&output, outShape) && 698 averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 699 input.shape(), 700 padding_left, padding_right, 701 padding_top, padding_bottom, 702 stride_width, stride_height, 703 filter_width, filter_height, activation, 704 reinterpret_cast<uint8_t*>(output.buffer), 705 outShape); 706 } 707 } break; 708 case OperationType::L2_POOL_2D: { 709 const size_t inCount = ins.size(); 710 if ((inCount != 10 && inCount != 7) || 711 !allParametersPresent(inCount, 1)) { 712 return ANEURALNETWORKS_BAD_DATA; 713 } 714 const RunTimeOperandInfo& input = mOperands[ins[0]]; 715 716 int32_t padding_left, padding_right; 717 int32_t padding_top, padding_bottom; 718 int32_t stride_width, stride_height; 719 int32_t filter_width, filter_height; 720 int32_t activation; 721 722 if (inCount == 10) { 723 padding_left = getScalarData<int32_t>(mOperands[ins[1]]); 724 padding_right = getScalarData<int32_t>(mOperands[ins[2]]); 725 padding_top = getScalarData<int32_t>(mOperands[ins[3]]); 726 padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]); 727 stride_width = getScalarData<int32_t>(mOperands[ins[5]]); 728 stride_height = getScalarData<int32_t>(mOperands[ins[6]]); 729 filter_width = getScalarData<int32_t>(mOperands[ins[7]]); 730 filter_height = getScalarData<int32_t>(mOperands[ins[8]]); 731 activation = getScalarData<int32_t>(mOperands[ins[9]]); 732 } else { 733 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]); 734 stride_width = getScalarData<int32_t>(mOperands[ins[2]]); 735 stride_height = getScalarData<int32_t>(mOperands[ins[3]]); 736 filter_width = getScalarData<int32_t>(mOperands[ins[4]]); 737 filter_height = getScalarData<int32_t>(mOperands[ins[5]]); 738 activation = getScalarData<int32_t>(mOperands[ins[6]]); 739 740 Shape inputShape = input.shape(); 741 int32_t input_width = getSizeOfDimension(inputShape, 2); 742 int32_t input_height = getSizeOfDimension(inputShape, 1); 743 calculateExplicitPadding(input_width, stride_width, 744 filter_width, padding_implicit, 745 &padding_left, &padding_right); 746 calculateExplicitPadding(input_height, stride_height, 747 filter_height, padding_implicit, 748 &padding_top, &padding_bottom); 749 } 750 751 RunTimeOperandInfo& output = mOperands[outs[0]]; 752 Shape outShape = output.shape(); 753 754 if (input.type == OperandType::TENSOR_FLOAT32) { 755 success = genericPoolingPrepare(input.shape(), 756 padding_left, padding_right, 757 padding_top, padding_bottom, 758 stride_width, stride_height, 759 filter_width, filter_height, 760 &outShape) && 761 setInfoAndAllocateIfNeeded(&output, outShape) && 762 l2PoolFloat32(reinterpret_cast<const float*>(input.buffer), 763 input.shape(), 764 padding_left, padding_right, 765 padding_top, padding_bottom, 766 stride_width, stride_height, 767 filter_width, filter_height, activation, 768 reinterpret_cast<float*>(output.buffer), 769 outShape); 770 } 771 } break; 772 case OperationType::MAX_POOL_2D: { 773 const size_t inCount = ins.size(); 774 if ((inCount != 10 && inCount != 7) || 775 !allParametersPresent(inCount, 1)) { 776 return ANEURALNETWORKS_BAD_DATA; 777 } 778 const RunTimeOperandInfo& input = mOperands[ins[0]]; 779 780 int32_t padding_left, padding_right; 781 int32_t padding_top, padding_bottom; 782 int32_t stride_width, stride_height; 783 int32_t filter_width, filter_height; 784 int32_t activation; 785 786 if (inCount == 10) { 787 padding_left = getScalarData<int32_t>(mOperands[ins[1]]); 788 padding_right = getScalarData<int32_t>(mOperands[ins[2]]); 789 padding_top = getScalarData<int32_t>(mOperands[ins[3]]); 790 padding_bottom = getScalarData<int32_t>(mOperands[ins[4]]); 791 stride_width = getScalarData<int32_t>(mOperands[ins[5]]); 792 stride_height = getScalarData<int32_t>(mOperands[ins[6]]); 793 filter_width = getScalarData<int32_t>(mOperands[ins[7]]); 794 filter_height = getScalarData<int32_t>(mOperands[ins[8]]); 795 activation = getScalarData<int32_t>(mOperands[ins[9]]); 796 } else { 797 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]); 798 stride_width = getScalarData<int32_t>(mOperands[ins[2]]); 799 stride_height = getScalarData<int32_t>(mOperands[ins[3]]); 800 filter_width = getScalarData<int32_t>(mOperands[ins[4]]); 801 filter_height = getScalarData<int32_t>(mOperands[ins[5]]); 802 activation = getScalarData<int32_t>(mOperands[ins[6]]); 803 804 Shape inputShape = input.shape(); 805 int32_t input_width = getSizeOfDimension(inputShape, 2); 806 int32_t input_height = getSizeOfDimension(inputShape, 1); 807 calculateExplicitPadding(input_width, stride_width, 808 filter_width, padding_implicit, 809 &padding_left, &padding_right); 810 calculateExplicitPadding(input_height, stride_height, 811 filter_height, padding_implicit, 812 &padding_top, &padding_bottom); 813 } 814 815 RunTimeOperandInfo& output = mOperands[outs[0]]; 816 Shape outShape = output.shape(); 817 818 if (input.type == OperandType::TENSOR_FLOAT32) { 819 success = genericPoolingPrepare(input.shape(), 820 padding_left, padding_right, 821 padding_top, padding_bottom, 822 stride_width, stride_height, 823 filter_width, filter_height, 824 &outShape) && 825 setInfoAndAllocateIfNeeded(&output, outShape) && 826 maxPoolFloat32(reinterpret_cast<const float*>(input.buffer), 827 input.shape(), 828 padding_left, padding_right, 829 padding_top, padding_bottom, 830 stride_width, stride_height, 831 filter_width, filter_height, activation, 832 reinterpret_cast<float*>(output.buffer), 833 outShape); 834 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 835 success = genericPoolingPrepare(input.shape(), 836 padding_left, padding_right, 837 padding_top, padding_bottom, 838 stride_width, stride_height, 839 filter_width, filter_height, 840 &outShape) && 841 setInfoAndAllocateIfNeeded(&output, outShape) && 842 maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 843 input.shape(), 844 padding_left, padding_right, 845 padding_top, padding_bottom, 846 stride_width, stride_height, 847 filter_width, filter_height, activation, 848 reinterpret_cast<uint8_t*>(output.buffer), 849 outShape); 850 } 851 852 } break; 853 case OperationType::RELU: { 854 if (!allParametersPresent(1, 1)) { 855 return ANEURALNETWORKS_BAD_DATA; 856 } 857 const RunTimeOperandInfo& input = mOperands[ins[0]]; 858 RunTimeOperandInfo& output = mOperands[outs[0]]; 859 Shape outShape = output.shape(); 860 861 if (input.type == OperandType::TENSOR_FLOAT32) { 862 success = genericActivationPrepare(input.shape(), &outShape) && 863 setInfoAndAllocateIfNeeded(&output, outShape) && 864 reluFloat32(reinterpret_cast<const float*>(input.buffer), 865 input.shape(), 866 reinterpret_cast<float*>(output.buffer), 867 outShape); 868 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 869 success = genericActivationPrepare(input.shape(), &outShape) && 870 setInfoAndAllocateIfNeeded(&output, outShape) && 871 reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 872 input.shape(), 873 reinterpret_cast<uint8_t*>(output.buffer), 874 outShape); 875 } 876 } break; 877 case OperationType::RELU1: { 878 if (!allParametersPresent(1, 1)) { 879 return ANEURALNETWORKS_BAD_DATA; 880 } 881 const RunTimeOperandInfo& input = mOperands[ins[0]]; 882 RunTimeOperandInfo& output = mOperands[outs[0]]; 883 Shape outShape = output.shape(); 884 885 if (input.type == OperandType::TENSOR_FLOAT32) { 886 success = genericActivationPrepare(input.shape(), &outShape) && 887 setInfoAndAllocateIfNeeded(&output, outShape) && 888 relu1Float32(reinterpret_cast<const float*>(input.buffer), 889 input.shape(), 890 reinterpret_cast<float*>(output.buffer), 891 outShape); 892 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 893 success = genericActivationPrepare(input.shape(), &outShape) && 894 setInfoAndAllocateIfNeeded(&output, outShape) && 895 relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer), 896 input.shape(), 897 reinterpret_cast<uint8_t*>(output.buffer), 898 outShape); 899 } 900 } break; 901 case OperationType::RELU6: { 902 if (!allParametersPresent(1, 1)) { 903 return ANEURALNETWORKS_BAD_DATA; 904 } 905 const RunTimeOperandInfo& input = mOperands[ins[0]]; 906 RunTimeOperandInfo& output = mOperands[outs[0]]; 907 Shape outShape = output.shape(); 908 909 if (input.type == OperandType::TENSOR_FLOAT32) { 910 success = genericActivationPrepare(input.shape(), &outShape) && 911 setInfoAndAllocateIfNeeded(&output, outShape) && 912 relu6Float32(reinterpret_cast<const float*>(input.buffer), 913 input.shape(), 914 reinterpret_cast<float*>(output.buffer), 915 outShape); 916 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 917 success = genericActivationPrepare(input.shape(), &outShape) && 918 setInfoAndAllocateIfNeeded(&output, outShape) && 919 relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer), 920 input.shape(), 921 reinterpret_cast<uint8_t*>(output.buffer), 922 outShape); 923 } 924 } break; 925 case OperationType::TANH: { 926 if (!allParametersPresent(1, 1)) { 927 return ANEURALNETWORKS_BAD_DATA; 928 } 929 const RunTimeOperandInfo& input = mOperands[ins[0]]; 930 RunTimeOperandInfo& output = mOperands[outs[0]]; 931 Shape outShape = output.shape(); 932 933 if (input.type == OperandType::TENSOR_FLOAT32) { 934 success = genericActivationPrepare(input.shape(), &outShape) && 935 setInfoAndAllocateIfNeeded(&output, outShape) && 936 tanhFloat32(reinterpret_cast<const float*>(input.buffer), 937 input.shape(), 938 reinterpret_cast<float*>(output.buffer), 939 outShape); 940 } 941 } break; 942 case OperationType::LOGISTIC: { 943 if (!allParametersPresent(1, 1)) { 944 return ANEURALNETWORKS_BAD_DATA; 945 } 946 const RunTimeOperandInfo& input = mOperands[ins[0]]; 947 RunTimeOperandInfo& output = mOperands[outs[0]]; 948 Shape outShape = output.shape(); 949 950 if (input.type == OperandType::TENSOR_FLOAT32) { 951 success = genericActivationPrepare(input.shape(), &outShape) && 952 setInfoAndAllocateIfNeeded(&output, outShape) && 953 logisticFloat32(reinterpret_cast<const float*>(input.buffer), 954 input.shape(), 955 reinterpret_cast<float*>(output.buffer), 956 outShape); 957 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 958 success = genericActivationPrepare(input.shape(), &outShape) && 959 setInfoAndAllocateIfNeeded(&output, outShape) && 960 logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 961 input.shape(), 962 reinterpret_cast<uint8_t*>(output.buffer), 963 outShape); 964 } 965 } break; 966 case OperationType::SOFTMAX: { 967 if (!allParametersPresent(2, 1)) { 968 return ANEURALNETWORKS_BAD_DATA; 969 } 970 RunTimeOperandInfo& input = mOperands[ins[0]]; 971 float beta = getScalarData<float>(mOperands[ins[1]]); 972 if (beta <= 0.0f) { 973 LOG(ERROR) << "beta must be positive for softmax"; 974 return ANEURALNETWORKS_BAD_DATA; 975 } 976 977 RunTimeOperandInfo& output = mOperands[outs[0]]; 978 Shape outShape = output.shape(); 979 980 if (input.type == OperandType::TENSOR_FLOAT32) { 981 success = genericActivationPrepare(input.shape(), &outShape) && 982 setInfoAndAllocateIfNeeded(&output, outShape) && 983 softmaxFloat32(reinterpret_cast<const float*>(input.buffer), 984 input.shape(), 985 beta, 986 reinterpret_cast<float*>(output.buffer), 987 output.shape()); 988 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 989 success = genericActivationPrepare(input.shape(), &outShape) && 990 setInfoAndAllocateIfNeeded(&output, outShape) && 991 softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 992 input.shape(), 993 beta, 994 reinterpret_cast<uint8_t*>(output.buffer), 995 output.shape()); 996 } 997 } break; 998 case OperationType::FULLY_CONNECTED: { 999 if (!allParametersPresent(4, 1)) { 1000 return ANEURALNETWORKS_BAD_DATA; 1001 } 1002 RunTimeOperandInfo& input = mOperands[ins[0]]; 1003 RunTimeOperandInfo& weights = mOperands[ins[1]]; 1004 RunTimeOperandInfo& bias = mOperands[ins[2]]; 1005 1006 int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]); 1007 1008 RunTimeOperandInfo& output = mOperands[outs[0]]; 1009 Shape outShape = output.shape(); 1010 1011 if (input.type == OperandType::TENSOR_FLOAT32) { 1012 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(), 1013 &outShape) && 1014 setInfoAndAllocateIfNeeded(&output, outShape) && 1015 fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer), 1016 input.shape(), 1017 reinterpret_cast<const float*>(weights.buffer), 1018 weights.shape(), 1019 reinterpret_cast<const float*>(bias.buffer), 1020 bias.shape(), 1021 activation, 1022 reinterpret_cast<float*>(output.buffer), 1023 outShape); 1024 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 1025 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(), 1026 &outShape) && 1027 setInfoAndAllocateIfNeeded(&output, outShape) && 1028 fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 1029 input.shape(), 1030 reinterpret_cast<const uint8_t*>(weights.buffer), 1031 weights.shape(), 1032 reinterpret_cast<const int32_t*>(bias.buffer), 1033 bias.shape(), 1034 activation, 1035 reinterpret_cast<uint8_t*>(output.buffer), 1036 outShape); 1037 } 1038 } break; 1039 case OperationType::CONCATENATION: { 1040 if (outs.size() != 1 || ins.size() < 2) { 1041 return ANEURALNETWORKS_BAD_DATA; 1042 } 1043 int numInputTensors = ins.size() - 1; 1044 int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]); 1045 1046 RunTimeOperandInfo& output = mOperands[outs[0]]; 1047 Shape outShape = output.shape(); 1048 1049 const RunTimeOperandInfo& firstInput = mOperands[ins[0]]; 1050 if (firstInput.type == OperandType::TENSOR_FLOAT32) { 1051 std::vector<Shape> inputShapes(numInputTensors); 1052 std::vector<const float*> inputDataPtrs(numInputTensors); 1053 1054 for (int i=0; i<numInputTensors; i++) { 1055 RunTimeOperandInfo& input = mOperands[ins[i]]; 1056 inputShapes[i] = input.shape(); 1057 inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer); 1058 } 1059 success = concatenationPrepare(inputShapes, axis, &outShape) && 1060 setInfoAndAllocateIfNeeded(&output, outShape) && 1061 concatenationFloat32(inputDataPtrs, inputShapes, axis, 1062 reinterpret_cast<float*>(output.buffer), outShape); 1063 } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) { 1064 std::vector<Shape> inputShapes(numInputTensors); 1065 std::vector<const uint8_t*> inputDataPtrs(numInputTensors); 1066 1067 for (int i=0; i<numInputTensors; i++) { 1068 RunTimeOperandInfo& input = mOperands[ins[i]]; 1069 inputShapes[i] = input.shape(); 1070 inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer); 1071 } 1072 success = concatenationPrepare(inputShapes, axis, &outShape) && 1073 setInfoAndAllocateIfNeeded(&output, outShape) && 1074 concatenationQuant8(inputDataPtrs, inputShapes, axis, 1075 reinterpret_cast<uint8_t*>(output.buffer), 1076 outShape); 1077 } 1078 } break; 1079 case OperationType::L2_NORMALIZATION: { 1080 if (!allParametersPresent(1, 1)) { 1081 return ANEURALNETWORKS_BAD_DATA; 1082 } 1083 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1084 RunTimeOperandInfo& output = mOperands[outs[0]]; 1085 Shape outShape = output.shape(); 1086 1087 if (input.type == OperandType::TENSOR_FLOAT32) { 1088 success = genericNormalizationPrepare(input.shape(), &outShape) && 1089 setInfoAndAllocateIfNeeded(&output, outShape) && 1090 l2normFloat32(reinterpret_cast<const float*>(input.buffer), 1091 input.shape(), 1092 reinterpret_cast<float*>(output.buffer), 1093 outShape); 1094 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 1095 success = genericNormalizationPrepare(input.shape(), &outShape) && 1096 setInfoAndAllocateIfNeeded(&output, outShape) && 1097 l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 1098 input.shape(), 1099 reinterpret_cast<uint8_t*>(output.buffer), 1100 outShape); 1101 } 1102 } break; 1103 case OperationType::LOCAL_RESPONSE_NORMALIZATION: { 1104 if (!allParametersPresent(5, 1)) { 1105 return ANEURALNETWORKS_BAD_DATA; 1106 } 1107 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1108 int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]); 1109 float bias = getScalarData<float>(mOperands[ins[2]]); 1110 float alpha = getScalarData<float>(mOperands[ins[3]]); 1111 float beta = getScalarData<float>(mOperands[ins[4]]); 1112 1113 RunTimeOperandInfo& output = mOperands[outs[0]]; 1114 Shape outShape = output.shape(); 1115 1116 if (input.type == OperandType::TENSOR_FLOAT32) { 1117 success = genericNormalizationPrepare(input.shape(), &outShape) && 1118 setInfoAndAllocateIfNeeded(&output, outShape) && 1119 localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer), 1120 input.shape(), 1121 radius, bias, alpha, beta, 1122 reinterpret_cast<float*>(output.buffer), 1123 outShape); 1124 } 1125 } break; 1126 case OperationType::RESHAPE: { 1127 if (!allParametersPresent(2, 1)) { 1128 return ANEURALNETWORKS_BAD_DATA; 1129 } 1130 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1131 const RunTimeOperandInfo& targetShape = mOperands[ins[1]]; 1132 1133 RunTimeOperandInfo& output = mOperands[outs[0]]; 1134 Shape outShape = output.shape(); 1135 1136 success = reshapePrepare(input.shape(), 1137 reinterpret_cast<const int32_t*>(targetShape.buffer), 1138 getNumberOfElements(targetShape.shape()), 1139 &outShape) && 1140 setInfoAndAllocateIfNeeded(&output, outShape) && 1141 reshapeGeneric(reinterpret_cast<const void*>(input.buffer), 1142 input.shape(), 1143 reinterpret_cast<void*>(output.buffer), 1144 outShape); 1145 } break; 1146 case OperationType::RESIZE_BILINEAR: { 1147 if (!allParametersPresent(3, 1)) { 1148 return ANEURALNETWORKS_BAD_DATA; 1149 } 1150 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1151 int32_t width = getScalarData<int32_t>(mOperands[ins[1]]); 1152 int32_t height = getScalarData<int32_t>(mOperands[ins[2]]); 1153 1154 RunTimeOperandInfo& output = mOperands[outs[0]]; 1155 Shape outShape = output.shape(); 1156 1157 if (input.type == OperandType::TENSOR_FLOAT32) { 1158 success = resizeBilinearPrepare(input.shape(), 1159 width, height, 1160 &outShape) && 1161 setInfoAndAllocateIfNeeded(&output, outShape) && 1162 resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer), 1163 input.shape(), 1164 reinterpret_cast<float*>(output.buffer), 1165 outShape); 1166 } 1167 } break; 1168 case OperationType::DEPTH_TO_SPACE: { 1169 if (!allParametersPresent(2, 1)) { 1170 return ANEURALNETWORKS_BAD_DATA; 1171 } 1172 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1173 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]); 1174 1175 RunTimeOperandInfo& output = mOperands[outs[0]]; 1176 Shape outShape = output.shape(); 1177 1178 success = depthToSpacePrepare(input.shape(), 1179 blockSize, 1180 &outShape) && 1181 setInfoAndAllocateIfNeeded(&output, outShape) && 1182 depthToSpaceGeneric(input.buffer, 1183 input.shape(), 1184 blockSize, 1185 output.buffer, 1186 outShape); 1187 } break; 1188 case OperationType::SPACE_TO_DEPTH: { 1189 if (!allParametersPresent(2, 1)) { 1190 return ANEURALNETWORKS_BAD_DATA; 1191 } 1192 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1193 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]); 1194 1195 RunTimeOperandInfo& output = mOperands[outs[0]]; 1196 Shape outShape = output.shape(); 1197 1198 success = spaceToDepthPrepare(input.shape(), 1199 blockSize, 1200 &outShape) && 1201 setInfoAndAllocateIfNeeded(&output, outShape) && 1202 spaceToDepthGeneric(input.buffer, 1203 input.shape(), 1204 blockSize, 1205 output.buffer, 1206 outShape); 1207 } break; 1208 case OperationType::EMBEDDING_LOOKUP: { 1209 const RunTimeOperandInfo &values = 1210 mOperands[ins[EmbeddingLookup::kValueTensor]]; 1211 const RunTimeOperandInfo &lookups = 1212 mOperands[ins[EmbeddingLookup::kLookupTensor]]; 1213 RunTimeOperandInfo &output = 1214 mOperands[outs[EmbeddingLookup::kOutputTensor]]; 1215 1216 Shape outputShape; 1217 EmbeddingLookup lookup(operation, mOperands); 1218 1219 success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) && 1220 setInfoAndAllocateIfNeeded(&output, outputShape) && 1221 lookup.Eval(); 1222 } break; 1223 case OperationType::HASHTABLE_LOOKUP: { 1224 const RunTimeOperandInfo &lookups = 1225 mOperands[ins[HashtableLookup::kLookupTensor]]; 1226 const RunTimeOperandInfo &keys = 1227 mOperands[ins[HashtableLookup::kKeyTensor]]; 1228 const RunTimeOperandInfo &values = 1229 mOperands[ins[HashtableLookup::kValueTensor]]; 1230 1231 RunTimeOperandInfo &output = 1232 mOperands[outs[HashtableLookup::kOutputTensor]]; 1233 RunTimeOperandInfo &hits = 1234 mOperands[outs[HashtableLookup::kHitsTensor]]; 1235 1236 Shape outputShape, hitShape; 1237 HashtableLookup lookup(operation, mOperands); 1238 1239 success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(), 1240 &outputShape, &hitShape) && 1241 setInfoAndAllocateIfNeeded(&output, outputShape) && 1242 setInfoAndAllocateIfNeeded(&hits, hitShape) && 1243 lookup.Eval(); 1244 } break; 1245 case OperationType::LSH_PROJECTION: { 1246 RunTimeOperandInfo &output = 1247 mOperands[outs[LSHProjection::kOutputTensor]]; 1248 1249 Shape outputShape; 1250 LSHProjection lsh(operation, mOperands); 1251 1252 success = LSHProjection::Prepare(operation, mOperands, 1253 &outputShape) && 1254 setInfoAndAllocateIfNeeded(&output, outputShape) && 1255 lsh.Eval(); 1256 } break; 1257 case OperationType::LSTM: { 1258 RunTimeOperandInfo &scratch = 1259 mOperands[outs[LSTMCell::kScratchBufferTensor]]; 1260 RunTimeOperandInfo &outputStateOut = 1261 mOperands[outs[LSTMCell::kOutputStateOutTensor]]; 1262 RunTimeOperandInfo &cellStateOut = 1263 mOperands[outs[LSTMCell::kCellStateOutTensor]]; 1264 RunTimeOperandInfo &output = 1265 mOperands[outs[LSTMCell::kOutputTensor]]; 1266 1267 Shape scratchShape, outputStateShape, cellStateShape, outputShape; 1268 LSTMCell lstm_cell(operation, mOperands); 1269 1270 success = LSTMCell::Prepare(operation, mOperands, 1271 &scratchShape, &outputStateShape, 1272 &cellStateShape, &outputShape) && 1273 setInfoAndAllocateIfNeeded(&scratch, scratchShape) && 1274 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) && 1275 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) && 1276 setInfoAndAllocateIfNeeded(&output, outputShape) && 1277 lstm_cell.Eval(); 1278 } break; 1279 case OperationType::RNN: { 1280 RunTimeOperandInfo &hiddenStateOut = 1281 mOperands[outs[RNN::kHiddenStateOutTensor]]; 1282 RunTimeOperandInfo &output = 1283 mOperands[outs[RNN::kOutputTensor]]; 1284 1285 Shape hiddenStateShape, outputShape; 1286 RNN rnn_cell(operation, mOperands); 1287 1288 success = RNN::Prepare(operation, mOperands, 1289 &hiddenStateShape, &outputShape) && 1290 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) && 1291 setInfoAndAllocateIfNeeded(&output, outputShape) && 1292 rnn_cell.Eval(); 1293 } break; 1294 case OperationType::SVDF: { 1295 RunTimeOperandInfo &stateOut = 1296 mOperands[outs[SVDF::kStateOutTensor]]; 1297 RunTimeOperandInfo &output = 1298 mOperands[outs[SVDF::kOutputTensor]]; 1299 1300 Shape stateShape, outputShape; 1301 SVDF svdf(operation, mOperands); 1302 1303 success = SVDF::Prepare(operation, mOperands, 1304 &stateShape, &outputShape) && 1305 setInfoAndAllocateIfNeeded(&stateOut, stateShape) && 1306 setInfoAndAllocateIfNeeded(&output, outputShape) && 1307 svdf.Eval(); 1308 } break; 1309 case OperationType::BATCH_TO_SPACE_ND: { 1310 if (!allParametersPresent(2, 1)) { 1311 return ANEURALNETWORKS_BAD_DATA; 1312 } 1313 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1314 const RunTimeOperandInfo& blockSize = mOperands[ins[1]]; 1315 1316 RunTimeOperandInfo& output = mOperands[outs[0]]; 1317 Shape outShape = output.shape(); 1318 1319 success = batchToSpacePrepare(input.shape(), 1320 reinterpret_cast<const int32_t*>(blockSize.buffer), 1321 blockSize.shape(), 1322 &outShape) && 1323 setInfoAndAllocateIfNeeded(&output, outShape) && 1324 batchToSpaceGeneric(input.buffer, 1325 input.shape(), 1326 reinterpret_cast<const int32_t*>(blockSize.buffer), 1327 output.buffer, 1328 outShape); 1329 } break; 1330 case OperationType::SPACE_TO_BATCH_ND: { 1331 if (!allParametersPresent(3, 1)) { 1332 return ANEURALNETWORKS_BAD_DATA; 1333 } 1334 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1335 const RunTimeOperandInfo& blockSize = mOperands[ins[1]]; 1336 const RunTimeOperandInfo& paddings = mOperands[ins[2]]; 1337 1338 RunTimeOperandInfo& output = mOperands[outs[0]]; 1339 Shape outShape = output.shape(); 1340 1341 success = spaceToBatchPrepare(input.shape(), 1342 reinterpret_cast<const int32_t*>(blockSize.buffer), 1343 blockSize.shape(), 1344 reinterpret_cast<const int32_t*>(paddings.buffer), 1345 paddings.shape(), 1346 &outShape) && 1347 setInfoAndAllocateIfNeeded(&output, outShape) && 1348 spaceToBatchGeneric(input.buffer, 1349 input.shape(), 1350 reinterpret_cast<const int32_t*>(blockSize.buffer), 1351 reinterpret_cast<const int32_t*>(paddings.buffer), 1352 paddings.shape(), 1353 output.buffer, 1354 outShape); 1355 } break; 1356 case OperationType::PAD: { 1357 if (!allParametersPresent(2, 1)) { 1358 return ANEURALNETWORKS_BAD_DATA; 1359 } 1360 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1361 const RunTimeOperandInfo& paddings = mOperands[ins[1]]; 1362 1363 RunTimeOperandInfo& output = mOperands[outs[0]]; 1364 Shape outShape = output.shape(); 1365 1366 success = padPrepare(input.shape(), 1367 reinterpret_cast<const int32_t*>(paddings.buffer), 1368 paddings.shape(), 1369 &outShape) && 1370 setInfoAndAllocateIfNeeded(&output, outShape) && 1371 padGeneric(input.buffer, 1372 input.shape(), 1373 reinterpret_cast<const int32_t*>(paddings.buffer), 1374 output.buffer, 1375 outShape); 1376 } break; 1377 case OperationType::SQUEEZE: { 1378 if (!allParametersPresent(2, 1)) { 1379 return ANEURALNETWORKS_BAD_DATA; 1380 } 1381 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1382 const RunTimeOperandInfo& squeezeDims = mOperands[ins[1]]; 1383 1384 RunTimeOperandInfo& output = mOperands[outs[0]]; 1385 Shape outShape = output.shape(); 1386 1387 success = squeezePrepare(input.shape(), 1388 reinterpret_cast<const int32_t*>(squeezeDims.buffer), 1389 squeezeDims.shape(), 1390 &outShape) && 1391 setInfoAndAllocateIfNeeded(&output, outShape) && 1392 squeezeGeneric(input.buffer, 1393 input.shape(), 1394 output.buffer, 1395 outShape); 1396 } break; 1397 case OperationType::TRANSPOSE: { 1398 if (!allParametersPresent(2, 1)) { 1399 return ANEURALNETWORKS_BAD_DATA; 1400 } 1401 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1402 const RunTimeOperandInfo& perms = mOperands[ins[1]]; 1403 1404 RunTimeOperandInfo& output = mOperands[outs[0]]; 1405 Shape outShape = output.shape(); 1406 1407 success = transposePrepare(input.shape(), 1408 reinterpret_cast<const int32_t*>(perms.buffer), 1409 perms.shape(), 1410 &outShape) && 1411 setInfoAndAllocateIfNeeded(&output, outShape) && 1412 transposeGeneric(input.buffer, 1413 input.shape(), 1414 reinterpret_cast<const int32_t*>(perms.buffer), 1415 perms.shape(), 1416 output.buffer, 1417 outShape); 1418 } break; 1419 case OperationType::STRIDED_SLICE: { 1420 if (!allParametersPresent(7, 1)) { 1421 return ANEURALNETWORKS_BAD_DATA; 1422 } 1423 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1424 const RunTimeOperandInfo& begins = mOperands[ins[1]]; 1425 const RunTimeOperandInfo& ends = mOperands[ins[2]]; 1426 const RunTimeOperandInfo& strides = mOperands[ins[3]]; 1427 int32_t beginMask = getScalarData<int32_t>(mOperands[ins[4]]); 1428 int32_t endMask = getScalarData<int32_t>(mOperands[ins[5]]); 1429 int32_t shrinkAxisMask = getScalarData<int32_t>(mOperands[ins[6]]); 1430 1431 RunTimeOperandInfo& output = mOperands[outs[0]]; 1432 Shape outShape = output.shape(); 1433 1434 success = stridedSlicePrepare(input.shape(), 1435 reinterpret_cast<const int32_t*>(begins.buffer), 1436 begins.shape(), 1437 reinterpret_cast<const int32_t*>(ends.buffer), 1438 ends.shape(), 1439 reinterpret_cast<const int32_t*>(strides.buffer), 1440 strides.shape(), 1441 beginMask, endMask, shrinkAxisMask, 1442 &outShape) && 1443 setInfoAndAllocateIfNeeded(&output, outShape) && 1444 stridedSliceGeneric(input.buffer, 1445 input.shape(), 1446 reinterpret_cast<const int32_t*>(begins.buffer), 1447 reinterpret_cast<const int32_t*>(ends.buffer), 1448 reinterpret_cast<const int32_t*>(strides.buffer), 1449 beginMask, endMask, shrinkAxisMask, 1450 output.buffer, 1451 outShape); 1452 } break; 1453 case OperationType::DIV: { 1454 if (!allParametersPresent(3, 1)) { 1455 return ANEURALNETWORKS_BAD_DATA; 1456 } 1457 const RunTimeOperandInfo& in1 = mOperands[ins[0]]; 1458 const RunTimeOperandInfo& in2 = mOperands[ins[1]]; 1459 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); 1460 1461 RunTimeOperandInfo& out = mOperands[outs[0]]; 1462 Shape outShape = out.shape(); 1463 1464 if (in1.type == OperandType::TENSOR_FLOAT32) { 1465 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) && 1466 setInfoAndAllocateIfNeeded(&out, outShape) && 1467 divFloat32(reinterpret_cast<const float*>(in1.buffer), 1468 in1.shape(), 1469 reinterpret_cast<const float*>(in2.buffer), 1470 in2.shape(), 1471 activation, 1472 reinterpret_cast<float*>(out.buffer), 1473 outShape); 1474 } 1475 } break; 1476 case OperationType::SUB: { 1477 if (!allParametersPresent(3, 1)) { 1478 return ANEURALNETWORKS_BAD_DATA; 1479 } 1480 const RunTimeOperandInfo& in1 = mOperands[ins[0]]; 1481 const RunTimeOperandInfo& in2 = mOperands[ins[1]]; 1482 int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]); 1483 1484 RunTimeOperandInfo& out = mOperands[outs[0]]; 1485 Shape outShape = out.shape(); 1486 1487 if (in1.type == OperandType::TENSOR_FLOAT32) { 1488 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) && 1489 setInfoAndAllocateIfNeeded(&out, outShape) && 1490 subFloat32(reinterpret_cast<const float*>(in1.buffer), 1491 in1.shape(), 1492 reinterpret_cast<const float*>(in2.buffer), 1493 in2.shape(), 1494 activation, 1495 reinterpret_cast<float*>(out.buffer), 1496 outShape); 1497 } 1498 } break; 1499 case OperationType::MEAN: { 1500 if (!allParametersPresent(3, 1)) { 1501 return ANEURALNETWORKS_BAD_DATA; 1502 } 1503 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1504 const RunTimeOperandInfo& axis = mOperands[ins[1]]; 1505 int32_t keepDims = getScalarData<int32_t>(mOperands[ins[2]]); 1506 1507 RunTimeOperandInfo& output = mOperands[outs[0]]; 1508 Shape outShape = output.shape(); 1509 1510 success = meanPrepare(input.shape(), 1511 reinterpret_cast<const int32_t*>(axis.buffer), 1512 axis.shape(), 1513 keepDims > 0, 1514 &outShape) && 1515 setInfoAndAllocateIfNeeded(&output, outShape) && 1516 meanGeneric(input.buffer, 1517 input.shape(), 1518 reinterpret_cast<const int32_t*>(axis.buffer), 1519 axis.shape(), 1520 keepDims > 0, 1521 output.buffer, 1522 outShape); 1523 } break; 1524 default: 1525 nnAssert(false); 1526 break; 1527 } 1528 if (!success) { 1529 LOG(ERROR) << getOperationName(operation.type) << " failed."; 1530 return ANEURALNETWORKS_OP_FAILED; 1531 } 1532 1533 freeNoLongerUsedOperands(ins); 1534 return ANEURALNETWORKS_NO_ERROR; 1535 } 1536 1537 ScopedOpenmpSettings::ScopedOpenmpSettings() { 1538 mBlocktimeInitial = kmp_get_blocktime(); 1539 kmp_set_blocktime(20); // ms, see b/109645291 1540 1541 #if NNAPI_LIMIT_CPU_THREADS 1542 // Code not yet enabled. Choosing the number of threads to be based on 1543 // benchmarking. See longer comment by the class declaration. 1544 mMaxThreadsInitial = Eigen::nbThreads(); 1545 const int nProcs = omp_get_num_procs(); 1546 int threads = nProcs; 1547 if (nProcs >= 8) { 1548 threads = nProcs - 4; 1549 } else if (nProcs >= 4) { 1550 threads = nProcs - 2; 1551 } 1552 Eigen::setNbThreads(threads); 1553 #endif 1554 } 1555 1556 ScopedOpenmpSettings::~ScopedOpenmpSettings() { 1557 kmp_set_blocktime(mBlocktimeInitial); 1558 #if NNAPI_LIMIT_CPU_THREADS 1559 Eigen::setNbThreads(mMaxThreadsInitial); 1560 #endif 1561 } 1562 1563 1564 } // namespace nn 1565 } // namespace android 1566