1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "CpuExecutor" 18 19 #include "CpuExecutor.h" 20 21 #include "NeuralNetworks.h" 22 #include "OperationResolver.h" 23 #include "Operations.h" 24 #include "OperationsUtils.h" 25 #include "Tracing.h" 26 27 #include "Eigen/Core" 28 // b/109953668, disable OpenMP 29 #ifdef NNAPI_OPENMP 30 #include <omp.h> 31 #endif // NNAPI_OPENMP 32 #include <android/hardware_buffer.h> 33 #include <sys/mman.h> 34 35 namespace android { 36 namespace nn { 37 38 namespace { 39 40 class OperationExecutionContext : public IOperationExecutionContext { 41 DISALLOW_IMPLICIT_CONSTRUCTORS(OperationExecutionContext); 42 43 public: 44 OperationExecutionContext(const Operation* operation, RunTimeOperandInfo* operands) 45 : operation(operation), operands(operands) {} 46 47 uint32_t getNumInputs() const override; 48 OperandType getInputType(uint32_t index) const override; 49 Shape getInputShape(uint32_t index) const override; 50 const void* getInputBuffer(uint32_t index) const override; 51 const Operand::ExtraParams getInputExtraParams(uint32_t index) const override; 52 53 uint32_t getNumOutputs() const override; 54 OperandType getOutputType(uint32_t index) const override; 55 Shape getOutputShape(uint32_t index) const override; 56 void* getOutputBuffer(uint32_t index) override; 57 58 // Return false on failure and store the result code. 59 // Use getResultCode() to retrieve it at the end of the operation execution. 60 bool setOutputShape(uint32_t index, const Shape& shape) override; 61 int getResultCode() const; 62 63 bool isOmittedInput(uint32_t index) const override; 64 bool isOmittedOutput(uint32_t index) const override; 65 66 // Return false if any of inputs or outputs is omitted, i.e. has lifetime of NO_VALUE. 67 bool checkNoOmittedOperand() const; 68 // Return false if any of inputs has dimension 0. 69 bool checkNoZeroSizedInput() const; 70 71 private: 72 const RunTimeOperandInfo* getInputInfo(uint32_t index) const; 73 const RunTimeOperandInfo* getOutputInfo(uint32_t index) const; 74 RunTimeOperandInfo* getOutputInfo(uint32_t index); 75 76 const Operation* operation; 77 RunTimeOperandInfo* operands; 78 79 int result = ANEURALNETWORKS_NO_ERROR; 80 }; 81 82 const RunTimeOperandInfo* OperationExecutionContext::getInputInfo(uint32_t index) const { 83 CHECK(index < operation->inputs.size()); 84 return &operands[operation->inputs[index]]; 85 } 86 87 const RunTimeOperandInfo* OperationExecutionContext::getOutputInfo(uint32_t index) const { 88 CHECK(index < operation->outputs.size()); 89 return &operands[operation->outputs[index]]; 90 } 91 92 RunTimeOperandInfo* OperationExecutionContext::getOutputInfo(uint32_t index) { 93 CHECK(index < operation->outputs.size()); 94 return &operands[operation->outputs[index]]; 95 } 96 97 OperandType OperationExecutionContext::getInputType(uint32_t index) const { 98 return getInputInfo(index)->type; 99 } 100 101 Shape OperationExecutionContext::getInputShape(uint32_t index) const { 102 return getInputInfo(index)->shape(); 103 } 104 105 const void* OperationExecutionContext::getInputBuffer(uint32_t index) const { 106 return getInputInfo(index)->buffer; 107 } 108 109 const Operand::ExtraParams OperationExecutionContext::getInputExtraParams(uint32_t index) const { 110 return getInputInfo(index)->extraParams; 111 } 112 113 OperandType OperationExecutionContext::getOutputType(uint32_t index) const { 114 return getOutputInfo(index)->type; 115 } 116 117 Shape OperationExecutionContext::getOutputShape(uint32_t index) const { 118 return getOutputInfo(index)->shape(); 119 } 120 121 void* OperationExecutionContext::getOutputBuffer(uint32_t index) { 122 return getOutputInfo(index)->buffer; 123 } 124 125 uint32_t OperationExecutionContext::getNumInputs() const { 126 return operation->inputs.size(); 127 } 128 129 uint32_t OperationExecutionContext::getNumOutputs() const { 130 return operation->outputs.size(); 131 } 132 133 int OperationExecutionContext::getResultCode() const { 134 return result; 135 } 136 137 // TODO: Return error code directly once we've fully integrated OperationResolver with all ops. 138 // Updates the RunTimeOperandInfo with the newly calculated shape. 139 // Allocate the buffer if we need to. 140 bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape, int* result) { 141 // For user-provided model output operands, the parameters must match the Shape 142 // calculated from the preparation step. 143 if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) { 144 if (info->type != shape.type) { 145 LOG(ERROR) << "Invalid type for model output"; 146 *result = ANEURALNETWORKS_OP_FAILED; 147 return false; 148 } 149 if (info->type == OperandType::TENSOR_QUANT8_ASYMM) { 150 if (info->scale != shape.scale) { 151 LOG(ERROR) << "Invalid scale for model output"; 152 *result = ANEURALNETWORKS_OP_FAILED; 153 return false; 154 } 155 if (info->zeroPoint != shape.offset) { 156 LOG(ERROR) << "Invalid zeroPoint for model output"; 157 *result = ANEURALNETWORKS_OP_FAILED; 158 return false; 159 } 160 } 161 if (info->extraParams != shape.extraParams) { 162 LOG(ERROR) << "Invalid extraParams for model output"; 163 *result = ANEURALNETWORKS_OP_FAILED; 164 return false; 165 } 166 } 167 168 std::vector<uint32_t> combined; 169 if (!combineDimensions(shape.dimensions, info->dimensions, &combined)) { 170 LOG(ERROR) << "Invalid dimensions for model operand"; 171 *result = ANEURALNETWORKS_OP_FAILED; 172 return false; 173 } 174 info->dimensions = combined; 175 info->type = shape.type; 176 info->scale = shape.scale; 177 info->zeroPoint = shape.offset; 178 info->extraParams = shape.extraParams; 179 180 // Allocate the buffer only if the combined dimension is fully specified 181 if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) { 182 if (isExtensionOperandType(info->type)) { 183 LOG(ERROR) << "Cannot allocate a temporary variable of an extension type"; 184 *result = ANEURALNETWORKS_OP_FAILED; 185 return false; 186 } 187 uint32_t length = nonExtensionOperandSizeOfData(info->type, info->dimensions); 188 if (length > 0) { 189 info->buffer = new uint8_t[length]; 190 if (info->buffer == nullptr) { 191 *result = ANEURALNETWORKS_OUT_OF_MEMORY; 192 return false; 193 } 194 info->length = length; 195 } 196 } 197 if (!info->isSufficient()) { 198 uint32_t length = nonExtensionOperandSizeOfData(info->type, info->dimensions); 199 LOG(ERROR) << "Insufficient size for model operand: require = " << length 200 << ", provided = " << info->length; 201 *result = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE; 202 return false; 203 } 204 *result = ANEURALNETWORKS_NO_ERROR; 205 return true; 206 } 207 208 bool OperationExecutionContext::setOutputShape(uint32_t index, const Shape& shape) { 209 return setInfoAndAllocateIfNeeded(getOutputInfo(index), shape, &result); 210 } 211 212 bool OperationExecutionContext::isOmittedInput(uint32_t index) const { 213 return getInputInfo(index)->lifetime == OperandLifeTime::NO_VALUE; 214 } 215 216 bool OperationExecutionContext::isOmittedOutput(uint32_t index) const { 217 return getOutputInfo(index)->lifetime == OperandLifeTime::NO_VALUE; 218 } 219 220 bool OperationExecutionContext::checkNoOmittedOperand() const { 221 for (uint32_t i = 0; i < operation->inputs.size(); i++) { 222 NN_RET_CHECK(!isOmittedInput(i)) << getOperationName(operation->type) << " input operand " 223 << i << " is required but missing."; 224 } 225 for (uint32_t i = 0; i < operation->outputs.size(); i++) { 226 NN_RET_CHECK(!isOmittedOutput(i)) << getOperationName(operation->type) << " output operand " 227 << i << " is required but missing."; 228 } 229 return true; 230 } 231 232 bool OperationExecutionContext::checkNoZeroSizedInput() const { 233 for (uint32_t i = 0; i < operation->inputs.size(); i++) { 234 if (isOmittedInput(i)) continue; 235 for (uint32_t j = 0; j < getInputInfo(i)->dimensions.size(); j++) { 236 NN_RET_CHECK_NE(getInputInfo(i)->dimensions[j], 0) 237 << getOperationName(operation->type) 238 << " does not support zero-sized tensor, but input " << i << " dimension " << j 239 << " is 0."; 240 } 241 } 242 return true; 243 } 244 245 } // namespace 246 247 // Used to keep a pointer to a memory pool. 248 // 249 // In the case of an "mmap_fd" pool, owns the mmap region 250 // returned by getBuffer() -- i.e., that region goes away 251 // when the RunTimePoolInfo is destroyed or is assigned to. 252 class RunTimePoolInfo::RunTimePoolInfoImpl { 253 public: 254 RunTimePoolInfoImpl(const hidl_memory& hidlMemory, uint8_t* buffer, const sp<IMemory>& memory, 255 const sp<GraphicBuffer>& graphicBuffer); 256 257 // rule of five... 258 ~RunTimePoolInfoImpl(); 259 RunTimePoolInfoImpl(const RunTimePoolInfoImpl&) = delete; 260 RunTimePoolInfoImpl(RunTimePoolInfoImpl&&) noexcept = delete; 261 RunTimePoolInfoImpl& operator=(const RunTimePoolInfoImpl&) = delete; 262 RunTimePoolInfoImpl& operator=(RunTimePoolInfoImpl&&) noexcept = delete; 263 264 uint8_t* getBuffer() const { return mBuffer; } 265 266 bool update() const; 267 268 hidl_memory getHidlMemory() const { return mHidlMemory; } 269 270 private: 271 const hidl_memory mHidlMemory; // always used 272 uint8_t* const mBuffer = nullptr; // always used 273 const sp<IMemory> mMemory; // only used when hidlMemory.name() == "ashmem" 274 const sp<GraphicBuffer> 275 mGraphicBuffer; // only used when hidlMemory.name() == "hardware_buffer_blob" 276 }; 277 278 RunTimePoolInfo::RunTimePoolInfoImpl::RunTimePoolInfoImpl(const hidl_memory& hidlMemory, 279 uint8_t* buffer, 280 const sp<IMemory>& memory, 281 const sp<GraphicBuffer>& graphicBuffer) 282 : mHidlMemory(hidlMemory), mBuffer(buffer), mMemory(memory), mGraphicBuffer(graphicBuffer) {} 283 284 RunTimePoolInfo::RunTimePoolInfoImpl::~RunTimePoolInfoImpl() { 285 if (mBuffer == nullptr) { 286 return; 287 } 288 289 const std::string memType = mHidlMemory.name(); 290 if (memType == "ashmem") { 291 // nothing to do 292 } else if (memType == "mmap_fd") { 293 const size_t size = mHidlMemory.size(); 294 if (munmap(mBuffer, size)) { 295 LOG(ERROR) << "RunTimePoolInfoImpl::~RunTimePoolInfo(): Can't munmap"; 296 } 297 } else if (memType == "hardware_buffer_blob") { 298 mGraphicBuffer->unlock(); 299 } else if (memType == "") { 300 // Represents a POINTER argument; nothing to do 301 } else { 302 LOG(ERROR) << "RunTimePoolInfoImpl::~RunTimePoolInfoImpl(): unsupported hidl_memory type"; 303 } 304 } 305 306 // Making sure the output data are correctly updated after execution. 307 bool RunTimePoolInfo::RunTimePoolInfoImpl::update() const { 308 const std::string memType = mHidlMemory.name(); 309 if (memType == "ashmem") { 310 mMemory->commit(); 311 return true; 312 } 313 if (memType == "mmap_fd") { 314 int prot = mHidlMemory.handle()->data[1]; 315 if (prot & PROT_WRITE) { 316 const size_t size = mHidlMemory.size(); 317 return msync(mBuffer, size, MS_SYNC) == 0; 318 } 319 } 320 // No-op for other types of memory. 321 return true; 322 } 323 324 // TODO: short term, make share memory mapping and updating a utility function. 325 // TODO: long term, implement mmap_fd as a hidl IMemory service. 326 std::optional<RunTimePoolInfo> RunTimePoolInfo::createFromHidlMemory( 327 const hidl_memory& hidlMemory) { 328 uint8_t* buffer = nullptr; 329 sp<IMemory> memory; 330 sp<GraphicBuffer> graphicBuffer; 331 332 const auto& memType = hidlMemory.name(); 333 if (memType == "ashmem") { 334 memory = mapMemory(hidlMemory); 335 if (memory == nullptr) { 336 LOG(ERROR) << "Can't map shared memory."; 337 return std::nullopt; 338 } 339 memory->update(); 340 buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer())); 341 if (buffer == nullptr) { 342 LOG(ERROR) << "Can't access shared memory."; 343 return std::nullopt; 344 } 345 } else if (memType == "mmap_fd") { 346 size_t size = hidlMemory.size(); 347 int fd = hidlMemory.handle()->data[0]; 348 int prot = hidlMemory.handle()->data[1]; 349 size_t offset = getSizeFromInts(hidlMemory.handle()->data[2], hidlMemory.handle()->data[3]); 350 buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset)); 351 if (buffer == MAP_FAILED) { 352 LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor."; 353 return std::nullopt; 354 } 355 } else if (memType == "hardware_buffer_blob") { 356 auto handle = hidlMemory.handle(); 357 auto format = AHARDWAREBUFFER_FORMAT_BLOB; 358 auto usage = AHARDWAREBUFFER_USAGE_CPU_READ_OFTEN | AHARDWAREBUFFER_USAGE_CPU_WRITE_OFTEN; 359 const uint32_t width = hidlMemory.size(); 360 const uint32_t height = 1; // height is always 1 for BLOB mode AHardwareBuffer. 361 const uint32_t layers = 1; // layers is always 1 for BLOB mode AHardwareBuffer. 362 const uint32_t stride = hidlMemory.size(); 363 graphicBuffer = new GraphicBuffer(handle, GraphicBuffer::HandleWrapMethod::CLONE_HANDLE, 364 width, height, format, layers, usage, stride); 365 void* gBuffer = nullptr; 366 int32_t outBytesPerPixel, outBytesPerStride; 367 status_t status = 368 graphicBuffer->lock(usage, &gBuffer, &outBytesPerPixel, &outBytesPerStride); 369 if (status != NO_ERROR) { 370 LOG(ERROR) << "RunTimePoolInfo Can't lock the AHardwareBuffer."; 371 return std::nullopt; 372 } 373 buffer = static_cast<uint8_t*>(gBuffer); 374 } else { 375 LOG(ERROR) << "RunTimePoolInfo::set(): unsupported hidl_memory type"; 376 return std::nullopt; 377 } 378 379 const auto impl = 380 std::make_shared<const RunTimePoolInfoImpl>(hidlMemory, buffer, memory, graphicBuffer); 381 return {RunTimePoolInfo(impl)}; 382 } 383 384 RunTimePoolInfo RunTimePoolInfo::createFromExistingBuffer(uint8_t* buffer) { 385 const auto impl = 386 std::make_shared<const RunTimePoolInfoImpl>(hidl_memory{}, buffer, nullptr, nullptr); 387 return {impl}; 388 } 389 390 RunTimePoolInfo::RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl) 391 : mImpl(impl) {} 392 393 uint8_t* RunTimePoolInfo::getBuffer() const { 394 return mImpl->getBuffer(); 395 } 396 397 bool RunTimePoolInfo::update() const { 398 return mImpl->update(); 399 } 400 401 hidl_memory RunTimePoolInfo::getHidlMemory() const { 402 return mImpl->getHidlMemory(); 403 } 404 405 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos, 406 const hidl_vec<hidl_memory>& pools) { 407 CHECK(poolInfos != nullptr); 408 poolInfos->clear(); 409 poolInfos->reserve(pools.size()); 410 for (const auto& pool : pools) { 411 if (std::optional<RunTimePoolInfo> poolInfo = RunTimePoolInfo::createFromHidlMemory(pool)) { 412 poolInfos->push_back(*poolInfo); 413 } else { 414 LOG(ERROR) << "Could not map pools"; 415 poolInfos->clear(); 416 return false; 417 } 418 } 419 return true; 420 } 421 422 template <typename T> 423 inline bool convertToNhwcImpl(T* to, const T* from, const std::vector<uint32_t>& fromDim) { 424 uint32_t spatialSize = fromDim[2] * fromDim[3]; 425 for (uint32_t n = 0; n < fromDim[0]; n++) { 426 for (uint32_t hw = 0; hw < spatialSize; hw++) { 427 for (uint32_t c = 0; c < fromDim[1]; c++) { 428 uint32_t fromIndex = n * fromDim[1] * spatialSize + c * spatialSize + hw; 429 *to++ = from[fromIndex]; 430 } 431 } 432 } 433 return true; 434 } 435 436 template <typename T> 437 inline bool convertFromNhwcImpl(T* to, const T* from, const std::vector<uint32_t>& fromDim) { 438 uint32_t spatialSize = fromDim[1] * fromDim[2]; 439 for (uint32_t n = 0; n < fromDim[0]; n++) { 440 for (uint32_t c = 0; c < fromDim[3]; c++) { 441 for (uint32_t hw = 0; hw < spatialSize; hw++) { 442 uint32_t fromIndex = n * spatialSize * fromDim[3] + hw * fromDim[3] + c; 443 *to++ = from[fromIndex]; 444 } 445 } 446 } 447 return true; 448 } 449 450 static bool convertToNhwc(RunTimeOperandInfo& to, const RunTimeOperandInfo& from, 451 std::unique_ptr<uint8_t[]>& ptr_guard, bool data_layout) { 452 int result; 453 if (from.dimensions.size() != 4) { 454 LOG(ERROR) << "Error converting a non-4-D tensor to NHWC layout"; 455 return false; 456 } 457 to.lifetime = OperandLifeTime::TEMPORARY_VARIABLE; 458 if (data_layout) { 459 // convert dimensions 460 Shape inShape = from.shape(); 461 auto& fromDim = from.dimensions; 462 inShape.dimensions = {fromDim[0], fromDim[2], fromDim[3], fromDim[1]}; 463 // allocate buffer 464 to.buffer = nullptr; 465 if (!setInfoAndAllocateIfNeeded(&to, inShape, &result)) { 466 return false; 467 } 468 ptr_guard.reset(to.buffer); 469 // convert value 470 if (from.type == OperandType::TENSOR_FLOAT32) { 471 return convertToNhwcImpl<float>(reinterpret_cast<float*>(to.buffer), 472 reinterpret_cast<const float*>(from.buffer), fromDim); 473 } else if (from.type == OperandType::TENSOR_FLOAT16) { 474 return convertToNhwcImpl<_Float16>(reinterpret_cast<_Float16*>(to.buffer), 475 reinterpret_cast<const _Float16*>(from.buffer), 476 fromDim); 477 } else if (from.type == OperandType::TENSOR_QUANT8_ASYMM) { 478 return convertToNhwcImpl<uint8_t>(reinterpret_cast<uint8_t*>(to.buffer), 479 reinterpret_cast<const uint8_t*>(from.buffer), 480 fromDim); 481 } else { 482 LOG(ERROR) << "Unsupported data type"; 483 return false; 484 } 485 } else { 486 to = from; 487 } 488 return true; 489 } 490 491 static bool convertFromNhwc(RunTimeOperandInfo& to, const RunTimeOperandInfo& from, 492 bool data_layout, int* result) { 493 if (from.dimensions.size() != 4) { 494 LOG(ERROR) << "Error converting a non-4-D tensor from NHWC layout"; 495 return false; 496 } 497 if (data_layout) { 498 // convert dimensions 499 Shape outShape = from.shape(); 500 auto& fromDim = from.dimensions; 501 outShape.dimensions = {fromDim[0], fromDim[3], fromDim[1], fromDim[2]}; 502 // allocate buffer 503 if (!setInfoAndAllocateIfNeeded(&to, outShape, result)) { 504 return false; 505 } 506 // convert value 507 if (from.type == OperandType::TENSOR_FLOAT32) { 508 return convertFromNhwcImpl<float>(reinterpret_cast<float*>(to.buffer), 509 reinterpret_cast<const float*>(from.buffer), fromDim); 510 } else if (from.type == OperandType::TENSOR_FLOAT16) { 511 return convertFromNhwcImpl<_Float16>(reinterpret_cast<_Float16*>(to.buffer), 512 reinterpret_cast<const _Float16*>(from.buffer), 513 fromDim); 514 } else if (from.type == OperandType::TENSOR_QUANT8_ASYMM) { 515 return convertFromNhwcImpl<uint8_t>(reinterpret_cast<uint8_t*>(to.buffer), 516 reinterpret_cast<const uint8_t*>(from.buffer), 517 fromDim); 518 } else { 519 LOG(ERROR) << "Unsupported data type"; 520 return false; 521 } 522 } else { 523 Shape outShape = from.shape(); 524 to.buffer = from.buffer; 525 to.length = from.length; 526 if (!setInfoAndAllocateIfNeeded(&to, outShape, result)) { 527 return false; 528 } 529 } 530 return true; 531 } 532 533 // Ignore the .pools entry in model and request. This will have been taken care of 534 // by the caller. 535 int CpuExecutor::run(const Model& model, const Request& request, 536 const std::vector<RunTimePoolInfo>& modelPoolInfos, 537 const std::vector<RunTimePoolInfo>& requestPoolInfos) { 538 NNTRACE_CPU(NNTRACE_PHASE_EXECUTION, "run"); 539 VLOG(CPUEXE) << "CpuExecutor::run() with request(" << SHOW_IF_DEBUG(toString(request)) << ")"; 540 541 // b/109953668, disable OpenMP 542 #ifdef NNAPI_OPENMP 543 ScopedOpenmpSettings openMpSettings; 544 #endif // NNAPI_OPENMP 545 546 mModel = &model; 547 mRequest = &request; // TODO check if mRequest is needed 548 initializeRunTimeInfo(modelPoolInfos, requestPoolInfos); 549 // The model has serialized the operation in execution order. 550 for (const auto& operation : model.operations) { 551 int n = executeOperation(operation); 552 if (n != ANEURALNETWORKS_NO_ERROR) { 553 finish(n); 554 return n; 555 } 556 } 557 for (auto& runtimeInfo : modelPoolInfos) { 558 runtimeInfo.update(); 559 } 560 for (auto& runtimeInfo : requestPoolInfos) { 561 runtimeInfo.update(); 562 } 563 finish(ANEURALNETWORKS_NO_ERROR); 564 VLOG(CPUEXE) << "Completed run normally"; 565 return ANEURALNETWORKS_NO_ERROR; 566 } 567 568 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos, 569 const std::vector<RunTimePoolInfo>& requestPoolInfos) { 570 VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo"; 571 const size_t count = mModel->operands.size(); 572 mOperands.resize(count); 573 574 // Start by setting the runtime info to what's in the model. 575 for (size_t i = 0; i < count; i++) { 576 const Operand& from = mModel->operands[i]; 577 RunTimeOperandInfo& to = mOperands[i]; 578 to.type = from.type; 579 to.dimensions = from.dimensions; 580 to.scale = from.scale; 581 to.zeroPoint = from.zeroPoint; 582 to.length = from.location.length; 583 to.lifetime = from.lifetime; 584 to.extraParams = from.extraParams; 585 switch (from.lifetime) { 586 case OperandLifeTime::TEMPORARY_VARIABLE: 587 to.buffer = nullptr; 588 to.numberOfUsesLeft = from.numberOfConsumers; 589 break; 590 case OperandLifeTime::CONSTANT_COPY: 591 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]); 592 to.numberOfUsesLeft = 0; 593 break; 594 case OperandLifeTime::CONSTANT_REFERENCE: { 595 auto poolIndex = from.location.poolIndex; 596 nnAssert(poolIndex < modelPoolInfos.size()); 597 auto& r = modelPoolInfos[poolIndex]; 598 to.buffer = r.getBuffer() + from.location.offset; 599 to.numberOfUsesLeft = 0; 600 break; 601 } 602 case OperandLifeTime::MODEL_INPUT: 603 case OperandLifeTime::MODEL_OUTPUT: 604 case OperandLifeTime::NO_VALUE: 605 to.buffer = nullptr; 606 to.numberOfUsesLeft = 0; 607 break; 608 default: 609 nnAssert(false); 610 break; 611 } 612 } 613 614 // Adjust the runtime info for the arguments passed to the model, 615 // modifying the buffer location, and possibly the dimensions. 616 auto updateForArguments = [this, &requestPoolInfos]( 617 const std::vector<uint32_t>& indexes, 618 const hidl_vec<RequestArgument>& arguments) { 619 nnAssert(indexes.size() == arguments.size()); 620 for (size_t i = 0; i < indexes.size(); i++) { 621 const uint32_t operandIndex = indexes[i]; 622 const RequestArgument& from = arguments[i]; 623 RunTimeOperandInfo& to = mOperands[operandIndex]; 624 if (from.dimensions.size() > 0) { 625 // It's the responsibility of the caller to validate that 626 // from.dimensions only modifies the dimensions that were 627 // unspecified in the model. That's the case in SampleDriver.cpp 628 // with the call to validateRequest(). 629 // TODO make sure that's the case for the default CPU path. 630 to.dimensions = from.dimensions; 631 } 632 if (from.hasNoValue) { 633 to.lifetime = OperandLifeTime::NO_VALUE; 634 nnAssert(to.buffer == nullptr); 635 to.length = 0; 636 } else { 637 auto poolIndex = from.location.poolIndex; 638 nnAssert(poolIndex < requestPoolInfos.size()); 639 auto& r = requestPoolInfos[poolIndex]; 640 to.buffer = r.getBuffer() + from.location.offset; 641 to.length = from.location.length; 642 } 643 } 644 }; 645 updateForArguments(mModel->inputIndexes, mRequest->inputs); 646 updateForArguments(mModel->outputIndexes, mRequest->outputs); 647 648 return true; 649 } 650 651 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) { 652 for (uint32_t i : inputs) { 653 auto& info = mOperands[i]; 654 // Check if it's a static or model input/output. 655 if (info.numberOfUsesLeft == 0) { 656 continue; 657 } 658 info.numberOfUsesLeft--; 659 if (info.numberOfUsesLeft == 0 && info.buffer != nullptr) { 660 delete[] info.buffer; 661 info.buffer = nullptr; 662 } 663 } 664 } 665 666 int CpuExecutor::executeOperation(const Operation& operation) { 667 // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")"; 668 const hidl_vec<uint32_t>& ins = operation.inputs; 669 const hidl_vec<uint32_t>& outs = operation.outputs; 670 bool success = false; 671 int result = ANEURALNETWORKS_NO_ERROR; 672 673 // Function to verify that the number of input and output parameters 674 // matches what is expected. Also checks that all the parameters have 675 // values. This function is to be used only for operations that do not 676 // accept optional arguments. 677 // TODO Have a version that works for optional arguments. 678 auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns, 679 size_t requiredOuts) -> bool { 680 auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes, 681 const char* type) -> bool { 682 size_t actualCount = indexes.size(); 683 if (actualCount != requiredCount) { 684 LOG(ERROR) << getOperationName(operation.type) << ": Invalid number of " << type 685 << " operands. Got " << actualCount << " of " << requiredCount; 686 return false; 687 } 688 for (size_t i = 0; i < actualCount; i++) { 689 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) { 690 LOG(ERROR) << getOperationName(operation.type) << " " << type << " operand " 691 << i << " is required but missing."; 692 return false; 693 } 694 } 695 return true; 696 }; 697 698 auto verifyNoZeroSizedInputs = [&operation, this](const hidl_vec<uint32_t>& indexes) { 699 for (size_t i = 0; i < indexes.size(); i++) { 700 for (size_t j = 0; j < mOperands[indexes[i]].dimensions.size(); j++) { 701 if (mOperands[indexes[i]].dimensions[j] == 0) { 702 LOG(ERROR) << getOperationName(operation.type) 703 << " does not support zero-sized tensor, but input " << i 704 << " dimension " << j << " is zero."; 705 return false; 706 } 707 } 708 } 709 return true; 710 }; 711 712 return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out") && 713 verifyNoZeroSizedInputs(ins); 714 }; 715 716 switch (operation.type) { 717 case OperationType::OEM_OPERATION: { 718 LOG(ERROR) << "OEM operation not supported for CPU execution"; 719 success = false; 720 } break; 721 case OperationType::FLOOR: { 722 if (!allParametersPresent(1, 1)) { 723 return ANEURALNETWORKS_BAD_DATA; 724 } 725 const RunTimeOperandInfo& input = mOperands[ins[0]]; 726 RunTimeOperandInfo& output = mOperands[outs[0]]; 727 Shape outShape = output.shape(); 728 729 if (!floorPrepare(input.shape(), &outShape) || 730 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) { 731 break; 732 } 733 if (input.type == OperandType::TENSOR_FLOAT32) { 734 success = floorFloat32(reinterpret_cast<const float*>(input.buffer), 735 reinterpret_cast<float*>(output.buffer), outShape); 736 } else if (input.type == OperandType::TENSOR_FLOAT16) { 737 success = floorFloat16(reinterpret_cast<const _Float16*>(input.buffer), 738 reinterpret_cast<_Float16*>(output.buffer), outShape); 739 } 740 } break; 741 case OperationType::DEPTHWISE_CONV_2D: { 742 const size_t inCount = ins.size(); 743 if ((inCount != 14 && inCount != 12 && inCount != 11 && inCount != 9 && inCount != 8) || 744 !allParametersPresent(inCount, 1)) { 745 return ANEURALNETWORKS_BAD_DATA; 746 } 747 const RunTimeOperandInfo& input = mOperands[ins[0]]; 748 const RunTimeOperandInfo& filter = mOperands[ins[1]]; 749 const RunTimeOperandInfo& bias = mOperands[ins[2]]; 750 751 int32_t padding_left, padding_right; 752 int32_t padding_top, padding_bottom; 753 int32_t padding_implicit = 0; 754 int32_t stride_width, stride_height; 755 int32_t dilation_width_factor = 1, dilation_height_factor = 1; 756 int32_t depth_multiplier; 757 int32_t activation; 758 bool data_layout = false; 759 bool useImplicitPadding = false; 760 761 if ((inCount >= 9 && mOperands[ins[8]].type == OperandType::BOOL) || inCount == 8) { 762 padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]); 763 stride_width = getScalarData<int32_t>(mOperands[ins[4]]); 764 stride_height = getScalarData<int32_t>(mOperands[ins[5]]); 765 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]); 766 activation = getScalarData<int32_t>(mOperands[ins[7]]); 767 if (inCount >= 9) { 768 data_layout = getScalarData<bool>(mOperands[ins[8]]); 769 } 770 if (inCount == 11) { 771 dilation_width_factor = getScalarData<int32_t>(mOperands[ins[9]]); 772 dilation_height_factor = getScalarData<int32_t>(mOperands[ins[10]]); 773 } 774 useImplicitPadding = true; 775 } else if (inCount >= 11 && mOperands[ins[8]].type == OperandType::INT32) { 776 padding_left = getScalarData<int32_t>(mOperands[ins[3]]); 777 padding_right = getScalarData<int32_t>(mOperands[ins[4]]); 778 padding_top = getScalarData<int32_t>(mOperands[ins[5]]); 779 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]); 780 stride_width = getScalarData<int32_t>(mOperands[ins[7]]); 781 stride_height = getScalarData<int32_t>(mOperands[ins[8]]); 782 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]); 783 activation = getScalarData<int32_t>(mOperands[ins[10]]); 784 if (inCount >= 12) { 785 data_layout = getScalarData<bool>(mOperands[ins[11]]); 786 } 787 if (inCount == 14) { 788 dilation_width_factor = getScalarData<int32_t>(mOperands[ins[12]]); 789 dilation_height_factor = getScalarData<int32_t>(mOperands[ins[13]]); 790 } 791 } else { 792 return ANEURALNETWORKS_BAD_DATA; 793 } 794 795 RunTimeOperandInfo& output = mOperands[outs[0]]; 796 Shape outShape = output.shape(); 797 798 RunTimeOperandInfo input_tmp, output_tmp; 799 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard; 800 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) { 801 success = false; 802 break; 803 } 804 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE; 805 output_tmp.buffer = data_layout ? nullptr : output.buffer; 806 output_tmp.length = data_layout ? 0 : output.length; 807 808 if (useImplicitPadding) { 809 Shape inputShape = input_tmp.shape(); 810 Shape filterShape = filter.shape(); 811 int32_t input_width = getSizeOfDimension(inputShape, 2); 812 int32_t input_height = getSizeOfDimension(inputShape, 1); 813 int32_t filter_width = getSizeOfDimension(filterShape, 2); 814 int32_t filter_height = getSizeOfDimension(filterShape, 1); 815 calculateExplicitPadding(input_width, stride_width, dilation_width_factor, 816 filter_width, padding_implicit, &padding_left, 817 &padding_right); 818 calculateExplicitPadding(input_height, stride_height, dilation_height_factor, 819 filter_height, padding_implicit, &padding_top, 820 &padding_bottom); 821 } 822 823 if (!depthwiseConvPrepare(input_tmp.shape(), filter.shape(), bias.shape(), padding_left, 824 padding_right, padding_top, padding_bottom, stride_width, 825 stride_height, depth_multiplier, dilation_width_factor, 826 dilation_height_factor, &outShape) || 827 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) { 828 if (!data_layout) output.dimensions = output_tmp.dimensions; 829 success = false; 830 break; 831 } 832 if (input_tmp.type == OperandType::TENSOR_FLOAT32) { 833 success = depthwiseConvFloat32( 834 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(), 835 reinterpret_cast<const float*>(filter.buffer), filter.shape(), 836 reinterpret_cast<const float*>(bias.buffer), bias.shape(), padding_left, 837 padding_right, padding_top, padding_bottom, stride_width, stride_height, 838 dilation_width_factor, dilation_height_factor, depth_multiplier, activation, 839 reinterpret_cast<float*>(output_tmp.buffer), outShape); 840 } else if (input_tmp.type == OperandType::TENSOR_FLOAT16) { 841 success = depthwiseConvFloat16( 842 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(), 843 reinterpret_cast<const _Float16*>(filter.buffer), filter.shape(), 844 reinterpret_cast<const _Float16*>(bias.buffer), bias.shape(), padding_left, 845 padding_right, padding_top, padding_bottom, stride_width, stride_height, 846 dilation_width_factor, dilation_height_factor, depth_multiplier, activation, 847 reinterpret_cast<_Float16*>(output_tmp.buffer), outShape); 848 } else if (input_tmp.type == OperandType::TENSOR_QUANT8_ASYMM) { 849 if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) { 850 success = depthwiseConvQuant8PerChannel( 851 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 852 reinterpret_cast<const int8_t*>(filter.buffer), filter.shape(), 853 filter.extraParams.channelQuant().scales.data(), 854 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(), 855 padding_left, padding_right, padding_top, padding_bottom, stride_width, 856 stride_height, dilation_width_factor, dilation_height_factor, 857 depth_multiplier, activation, 858 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 859 } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) { 860 success = depthwiseConvQuant8( 861 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 862 reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(), 863 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(), 864 padding_left, padding_right, padding_top, padding_bottom, stride_width, 865 stride_height, dilation_width_factor, dilation_height_factor, 866 depth_multiplier, activation, 867 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 868 } 869 } 870 if (data_layout) { 871 output_tmp_guard.reset(output_tmp.buffer); 872 } 873 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) { 874 success = false; 875 break; 876 } 877 } break; 878 case OperationType::LOCAL_RESPONSE_NORMALIZATION: { 879 const size_t inCount = ins.size(); 880 if ((inCount != 6 && inCount != 5) || !allParametersPresent(inCount, 1)) { 881 return ANEURALNETWORKS_BAD_DATA; 882 } 883 const RunTimeOperandInfo& input = mOperands[ins[0]]; 884 int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]); 885 float bias = (input.type == OperandType::TENSOR_FLOAT16) 886 ? getScalarData<_Float16>(mOperands[ins[2]]) 887 : getScalarData<float>(mOperands[ins[2]]); 888 float alpha = (input.type == OperandType::TENSOR_FLOAT16) 889 ? getScalarData<_Float16>(mOperands[ins[3]]) 890 : getScalarData<float>(mOperands[ins[3]]); 891 float beta = (input.type == OperandType::TENSOR_FLOAT16) 892 ? getScalarData<_Float16>(mOperands[ins[4]]) 893 : getScalarData<float>(mOperands[ins[4]]); 894 const int32_t axis = inCount == 6 ? getScalarData<int32_t>(mOperands[ins[5]]) : -1; 895 896 RunTimeOperandInfo& output = mOperands[outs[0]]; 897 Shape outShape = output.shape(); 898 899 if (!genericNormalizationPrepare(input.shape(), &outShape) || 900 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) { 901 success = false; 902 break; 903 } 904 if (input.type == OperandType::TENSOR_FLOAT32) { 905 success = localResponseNormFloat32( 906 reinterpret_cast<const float*>(input.buffer), input.shape(), radius, bias, 907 alpha, beta, axis, reinterpret_cast<float*>(output.buffer), outShape); 908 } else if (input.type == OperandType::TENSOR_FLOAT16) { 909 success = localResponseNormFloat16(reinterpret_cast<const _Float16*>(input.buffer), 910 input.shape(), radius, bias, alpha, beta, axis, 911 reinterpret_cast<_Float16*>(output.buffer), 912 outShape); 913 } 914 } break; 915 case OperationType::RESHAPE: { 916 if (!allParametersPresent(2, 1)) { 917 return ANEURALNETWORKS_BAD_DATA; 918 } 919 const RunTimeOperandInfo& input = mOperands[ins[0]]; 920 const RunTimeOperandInfo& targetShape = mOperands[ins[1]]; 921 922 RunTimeOperandInfo& output = mOperands[outs[0]]; 923 Shape outShape = output.shape(); 924 925 success = reshapePrepare(input.shape(), 926 reinterpret_cast<const int32_t*>(targetShape.buffer), 927 getNumberOfElements(targetShape.shape()), &outShape) && 928 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 929 copyData(input.buffer, input.shape(), output.buffer, outShape); 930 } break; 931 case OperationType::DEPTH_TO_SPACE: { 932 const size_t inCount = ins.size(); 933 if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) { 934 return ANEURALNETWORKS_BAD_DATA; 935 } 936 const RunTimeOperandInfo& input = mOperands[ins[0]]; 937 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]); 938 bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false; 939 940 RunTimeOperandInfo& output = mOperands[outs[0]]; 941 Shape outShape = output.shape(); 942 943 RunTimeOperandInfo input_tmp, output_tmp; 944 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard; 945 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) { 946 success = false; 947 break; 948 } 949 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE; 950 output_tmp.buffer = data_layout ? nullptr : output.buffer; 951 output_tmp.length = data_layout ? 0 : output.length; 952 if (!depthToSpacePrepare(input_tmp.shape(), blockSize, &outShape) || 953 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) { 954 if (!data_layout) output.dimensions = output_tmp.dimensions; 955 break; 956 } 957 switch (input_tmp.type) { 958 case OperandType::TENSOR_FLOAT32: { 959 success = depthToSpaceGeneric( 960 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(), 961 blockSize, reinterpret_cast<float*>(output_tmp.buffer), outShape); 962 break; 963 } 964 case OperandType::TENSOR_FLOAT16: { 965 success = depthToSpaceGeneric( 966 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(), 967 blockSize, reinterpret_cast<_Float16*>(output_tmp.buffer), outShape); 968 break; 969 } 970 case OperandType::TENSOR_QUANT8_ASYMM: { 971 success = depthToSpaceGeneric( 972 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 973 blockSize, reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 974 break; 975 } 976 default: { 977 LOG(ERROR) << "Unsupported data type"; 978 success = false; 979 } 980 } 981 if (data_layout) { 982 output_tmp_guard.reset(output_tmp.buffer); 983 } 984 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) { 985 success = false; 986 break; 987 } 988 } break; 989 case OperationType::SPACE_TO_DEPTH: { 990 const size_t inCount = ins.size(); 991 if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) { 992 return ANEURALNETWORKS_BAD_DATA; 993 } 994 const RunTimeOperandInfo& input = mOperands[ins[0]]; 995 int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]); 996 bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false; 997 998 RunTimeOperandInfo& output = mOperands[outs[0]]; 999 Shape outShape = output.shape(); 1000 1001 RunTimeOperandInfo input_tmp, output_tmp; 1002 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard; 1003 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) { 1004 success = false; 1005 break; 1006 } 1007 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE; 1008 output_tmp.buffer = data_layout ? nullptr : output.buffer; 1009 output_tmp.length = data_layout ? 0 : output.length; 1010 1011 if (!spaceToDepthPrepare(input_tmp.shape(), blockSize, &outShape) || 1012 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) { 1013 if (!data_layout) output.dimensions = output_tmp.dimensions; 1014 break; 1015 } 1016 switch (input_tmp.type) { 1017 case OperandType::TENSOR_FLOAT32: { 1018 success = spaceToDepthGeneric( 1019 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(), 1020 blockSize, reinterpret_cast<float*>(output_tmp.buffer), outShape); 1021 break; 1022 } 1023 case OperandType::TENSOR_FLOAT16: { 1024 success = spaceToDepthGeneric( 1025 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(), 1026 blockSize, reinterpret_cast<_Float16*>(output_tmp.buffer), outShape); 1027 break; 1028 } 1029 case OperandType::TENSOR_QUANT8_ASYMM: { 1030 success = spaceToDepthGeneric( 1031 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 1032 blockSize, reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 1033 break; 1034 } 1035 default: { 1036 LOG(ERROR) << "Unsupported data type"; 1037 success = false; 1038 } 1039 } 1040 if (data_layout) { 1041 output_tmp_guard.reset(output_tmp.buffer); 1042 } 1043 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) { 1044 success = false; 1045 break; 1046 } 1047 } break; 1048 case OperationType::EMBEDDING_LOOKUP: { 1049 const RunTimeOperandInfo& values = mOperands[ins[EmbeddingLookup::kValueTensor]]; 1050 const RunTimeOperandInfo& lookups = mOperands[ins[EmbeddingLookup::kLookupTensor]]; 1051 RunTimeOperandInfo& output = mOperands[outs[EmbeddingLookup::kOutputTensor]]; 1052 1053 Shape outputShape; 1054 EmbeddingLookup lookup(operation, mOperands); 1055 1056 success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) && 1057 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && lookup.Eval(); 1058 } break; 1059 case OperationType::HASHTABLE_LOOKUP: { 1060 const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]]; 1061 const RunTimeOperandInfo& keys = mOperands[ins[HashtableLookup::kKeyTensor]]; 1062 const RunTimeOperandInfo& values = mOperands[ins[HashtableLookup::kValueTensor]]; 1063 1064 RunTimeOperandInfo& output = mOperands[outs[HashtableLookup::kOutputTensor]]; 1065 RunTimeOperandInfo& hits = mOperands[outs[HashtableLookup::kHitsTensor]]; 1066 1067 Shape outputShape, hitShape; 1068 HashtableLookup lookup(operation, mOperands); 1069 1070 success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(), 1071 &outputShape, &hitShape) && 1072 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && 1073 setInfoAndAllocateIfNeeded(&hits, hitShape, &result) && lookup.Eval(); 1074 } break; 1075 case OperationType::LSH_PROJECTION: { 1076 RunTimeOperandInfo& output = mOperands[outs[LSHProjection::kOutputTensor]]; 1077 Shape outputShape; 1078 if (!LSHProjection::Prepare(operation, mOperands, &outputShape) || 1079 !setInfoAndAllocateIfNeeded(&output, outputShape, &result)) { 1080 break; 1081 } 1082 1083 LSHProjection lsh(operation, mOperands); 1084 const RunTimeOperandInfo& hash = mOperands[ins[LSHProjection::kHashTensor]]; 1085 switch (hash.type) { 1086 case OperandType::TENSOR_FLOAT32: { 1087 success = lsh.Eval<float>(); 1088 break; 1089 } 1090 case OperandType::TENSOR_FLOAT16: { 1091 success = lsh.Eval<_Float16>(); 1092 break; 1093 } 1094 default: { 1095 success = false; 1096 LOG(ERROR) << "Unsupported data type"; 1097 } 1098 } 1099 } break; 1100 case OperationType::BIDIRECTIONAL_SEQUENCE_LSTM: { 1101 const auto merge_outputs = getScalarData<bool>( 1102 mOperands[ins[BidirectionalSequenceLSTM::kMergeOutputsParam]]); 1103 RunTimeOperandInfo& fwOutput = 1104 mOperands[outs[BidirectionalSequenceLSTM::kFwOutputTensor]]; 1105 Shape fwOutputShape, bwOutputShape; 1106 1107 BidirectionalSequenceLSTM lstm(operation, mOperands); 1108 success = lstm.Prepare(operation, mOperands, &fwOutputShape, &bwOutputShape) && 1109 setInfoAndAllocateIfNeeded(&fwOutput, fwOutputShape, &result); 1110 if (!merge_outputs) { 1111 RunTimeOperandInfo& bwOutput = 1112 mOperands[outs[BidirectionalSequenceLSTM::kBwOutputTensor]]; 1113 success = success && setInfoAndAllocateIfNeeded(&bwOutput, bwOutputShape, &result); 1114 } 1115 success = success && lstm.Eval(); 1116 } break; 1117 case OperationType::LSTM: { 1118 RunTimeOperandInfo& scratch = mOperands[outs[LSTMCell::kScratchBufferTensor]]; 1119 RunTimeOperandInfo& outputStateOut = mOperands[outs[LSTMCell::kOutputStateOutTensor]]; 1120 RunTimeOperandInfo& cellStateOut = mOperands[outs[LSTMCell::kCellStateOutTensor]]; 1121 RunTimeOperandInfo& output = mOperands[outs[LSTMCell::kOutputTensor]]; 1122 1123 Shape scratchShape, outputStateShape, cellStateShape, outputShape; 1124 LSTMCell lstm_cell(operation, mOperands); 1125 1126 success = lstm_cell.Prepare(operation, mOperands, &scratchShape, &outputStateShape, 1127 &cellStateShape, &outputShape) && 1128 setInfoAndAllocateIfNeeded(&scratch, scratchShape, &result) && 1129 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape, &result) && 1130 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape, &result) && 1131 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && lstm_cell.Eval(); 1132 } break; 1133 case OperationType::RANDOM_MULTINOMIAL: { 1134 const RunTimeOperandInfo& lookups = mOperands[ins[HashtableLookup::kLookupTensor]]; 1135 const RunTimeOperandInfo& keys = mOperands[ins[HashtableLookup::kKeyTensor]]; 1136 const RunTimeOperandInfo& values = mOperands[ins[HashtableLookup::kValueTensor]]; 1137 RunTimeOperandInfo& output = mOperands[outs[Multinomial::kOutputTensor]]; 1138 1139 Shape outputShape; 1140 Multinomial multinomial(operation, mOperands); 1141 1142 success = Multinomial::Prepare(operation, mOperands, &outputShape) && 1143 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && 1144 multinomial.Eval(); 1145 } break; 1146 case OperationType::RNN: { 1147 RunTimeOperandInfo& hiddenStateOut = mOperands[outs[RNN::kHiddenStateOutTensor]]; 1148 RunTimeOperandInfo& output = mOperands[outs[RNN::kOutputTensor]]; 1149 1150 Shape hiddenStateShape, outputShape; 1151 RNN rnn_cell(operation, mOperands); 1152 1153 success = RNN::Prepare(operation, mOperands, &hiddenStateShape, &outputShape) && 1154 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape, &result) && 1155 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && rnn_cell.Eval(); 1156 } break; 1157 case OperationType::SVDF: { 1158 RunTimeOperandInfo& stateOut = mOperands[outs[SVDF::kStateOutTensor]]; 1159 RunTimeOperandInfo& output = mOperands[outs[SVDF::kOutputTensor]]; 1160 1161 Shape stateShape, outputShape; 1162 SVDF svdf(operation, mOperands); 1163 1164 success = SVDF::Prepare(operation, mOperands, &stateShape, &outputShape) && 1165 setInfoAndAllocateIfNeeded(&stateOut, stateShape, &result) && 1166 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && svdf.Eval(); 1167 } break; 1168 case OperationType::BATCH_TO_SPACE_ND: { 1169 const size_t inCount = ins.size(); 1170 if ((inCount != 3 && inCount != 2) || !allParametersPresent(inCount, 1)) { 1171 return ANEURALNETWORKS_BAD_DATA; 1172 } 1173 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1174 const RunTimeOperandInfo& blockSize = mOperands[ins[1]]; 1175 bool data_layout = inCount == 3 ? getScalarData<bool>(mOperands[ins[2]]) : false; 1176 1177 RunTimeOperandInfo& output = mOperands[outs[0]]; 1178 Shape outShape = output.shape(); 1179 1180 RunTimeOperandInfo input_tmp, output_tmp; 1181 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard; 1182 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) { 1183 success = false; 1184 break; 1185 } 1186 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE; 1187 output_tmp.buffer = data_layout ? nullptr : output.buffer; 1188 output_tmp.length = data_layout ? 0 : output.length; 1189 1190 if (!batchToSpacePrepare(input_tmp.shape(), 1191 reinterpret_cast<const int32_t*>(blockSize.buffer), 1192 blockSize.shape(), &outShape) || 1193 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) { 1194 if (!data_layout) output.dimensions = output_tmp.dimensions; 1195 break; 1196 } 1197 switch (input_tmp.type) { 1198 case OperandType::TENSOR_FLOAT32: { 1199 success = batchToSpaceGeneric( 1200 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(), 1201 reinterpret_cast<const int32_t*>(blockSize.buffer), 1202 reinterpret_cast<float*>(output_tmp.buffer), outShape); 1203 break; 1204 } 1205 case OperandType::TENSOR_FLOAT16: { 1206 success = batchToSpaceGeneric( 1207 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(), 1208 reinterpret_cast<const int32_t*>(blockSize.buffer), 1209 reinterpret_cast<_Float16*>(output_tmp.buffer), outShape); 1210 break; 1211 } 1212 case OperandType::TENSOR_QUANT8_ASYMM: { 1213 success = batchToSpaceGeneric( 1214 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 1215 reinterpret_cast<const int32_t*>(blockSize.buffer), 1216 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 1217 break; 1218 } 1219 default: { 1220 LOG(ERROR) << "Unsupported data type"; 1221 success = false; 1222 } 1223 } 1224 if (data_layout) { 1225 output_tmp_guard.reset(output_tmp.buffer); 1226 } 1227 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) { 1228 success = false; 1229 break; 1230 } 1231 } break; 1232 case OperationType::SPACE_TO_BATCH_ND: { 1233 const size_t inCount = ins.size(); 1234 if ((inCount != 4 && inCount != 3) || !allParametersPresent(inCount, 1)) { 1235 return ANEURALNETWORKS_BAD_DATA; 1236 } 1237 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1238 const RunTimeOperandInfo& blockSize = mOperands[ins[1]]; 1239 const RunTimeOperandInfo& paddings = mOperands[ins[2]]; 1240 bool data_layout = inCount == 4 ? getScalarData<bool>(mOperands[ins[3]]) : false; 1241 1242 RunTimeOperandInfo& output = mOperands[outs[0]]; 1243 Shape outShape = output.shape(); 1244 1245 RunTimeOperandInfo input_tmp, output_tmp; 1246 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard; 1247 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) { 1248 success = false; 1249 break; 1250 } 1251 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE; 1252 output_tmp.buffer = data_layout ? nullptr : output.buffer; 1253 output_tmp.length = data_layout ? 0 : output.length; 1254 1255 if (!spaceToBatchPrepare( 1256 input_tmp.shape(), reinterpret_cast<const int32_t*>(blockSize.buffer), 1257 blockSize.shape(), reinterpret_cast<const int32_t*>(paddings.buffer), 1258 paddings.shape(), &outShape) || 1259 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) { 1260 if (!data_layout) output.dimensions = output_tmp.dimensions; 1261 break; 1262 } 1263 switch (input_tmp.type) { 1264 case OperandType::TENSOR_FLOAT32: { 1265 success = spaceToBatchGeneric( 1266 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(), 1267 reinterpret_cast<const int32_t*>(blockSize.buffer), 1268 reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(), 1269 reinterpret_cast<float*>(output_tmp.buffer), outShape); 1270 break; 1271 } 1272 case OperandType::TENSOR_FLOAT16: { 1273 success = spaceToBatchGeneric( 1274 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(), 1275 reinterpret_cast<const int32_t*>(blockSize.buffer), 1276 reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(), 1277 reinterpret_cast<_Float16*>(output_tmp.buffer), outShape); 1278 break; 1279 } 1280 case OperandType::TENSOR_QUANT8_ASYMM: { 1281 success = spaceToBatchGeneric( 1282 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 1283 reinterpret_cast<const int32_t*>(blockSize.buffer), 1284 reinterpret_cast<const int32_t*>(paddings.buffer), paddings.shape(), 1285 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 1286 break; 1287 } 1288 default: { 1289 LOG(ERROR) << "Unsupported data type"; 1290 success = false; 1291 } 1292 } 1293 if (data_layout) { 1294 output_tmp_guard.reset(output_tmp.buffer); 1295 } 1296 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) { 1297 success = false; 1298 break; 1299 } 1300 } break; 1301 case OperationType::PAD: 1302 case OperationType::PAD_V2: { 1303 const bool isV2 = operation.type == OperationType::PAD_V2; 1304 if (!allParametersPresent(isV2 ? 3 : 2, 1)) { 1305 return ANEURALNETWORKS_BAD_DATA; 1306 } 1307 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1308 const RunTimeOperandInfo& paddings = mOperands[ins[1]]; 1309 1310 RunTimeOperandInfo& output = mOperands[outs[0]]; 1311 Shape outShape = output.shape(); 1312 1313 if (!padPrepare(input.shape(), reinterpret_cast<const int32_t*>(paddings.buffer), 1314 paddings.shape(), &outShape) || 1315 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) { 1316 break; 1317 } 1318 if (input.type == OperandType::TENSOR_FLOAT32) { 1319 float pad_value = isV2 ? getScalarData<float>(mOperands[ins[2]]) : 0; 1320 success = padGeneric(reinterpret_cast<const float*>(input.buffer), input.shape(), 1321 reinterpret_cast<const int32_t*>(paddings.buffer), pad_value, 1322 reinterpret_cast<float*>(output.buffer), outShape); 1323 } else if (input.type == OperandType::TENSOR_FLOAT16) { 1324 _Float16 pad_value = isV2 ? getScalarData<_Float16>(mOperands[ins[2]]) : 0; 1325 success = padGeneric(reinterpret_cast<const _Float16*>(input.buffer), input.shape(), 1326 reinterpret_cast<const int32_t*>(paddings.buffer), 1327 static_cast<_Float16>(pad_value), 1328 reinterpret_cast<_Float16*>(output.buffer), outShape); 1329 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 1330 uint8_t pad_value = 1331 isV2 ? getScalarData<uint8_t>(mOperands[ins[2]]) : outShape.offset; 1332 success = padGeneric(input.buffer, input.shape(), 1333 reinterpret_cast<const int32_t*>(paddings.buffer), pad_value, 1334 output.buffer, outShape); 1335 } 1336 } break; 1337 case OperationType::CAST: { 1338 if (!allParametersPresent(1, 1)) { 1339 return ANEURALNETWORKS_BAD_DATA; 1340 } 1341 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1342 1343 RunTimeOperandInfo& output = mOperands[outs[0]]; 1344 Shape outShape = output.shape(); 1345 1346 success = cast::prepare(input.shape(), &outShape) && 1347 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 1348 cast::eval(input.buffer, input.shape(), output.buffer, outShape); 1349 } break; 1350 case OperationType::SQUEEZE: { 1351 if (ins.size() != 2 || outs.size() != 1 || 1352 mOperands[ins[0]].lifetime == OperandLifeTime::NO_VALUE || 1353 mOperands[outs[0]].lifetime == OperandLifeTime::NO_VALUE) { 1354 LOG(ERROR) << "Wrong input/output count or lifetime for SQUEEZE op."; 1355 return ANEURALNETWORKS_BAD_DATA; 1356 } 1357 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1358 const RunTimeOperandInfo& squeezeDims = mOperands[ins[1]]; 1359 1360 RunTimeOperandInfo& output = mOperands[outs[0]]; 1361 Shape outShape = output.shape(); 1362 1363 success = squeezePrepare(input.shape(), 1364 reinterpret_cast<const int32_t*>(squeezeDims.buffer), 1365 squeezeDims.shape(), &outShape) && 1366 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 1367 copyData(input.buffer, input.shape(), output.buffer, outShape); 1368 } break; 1369 case OperationType::STRIDED_SLICE: { 1370 if (!allParametersPresent(7, 1)) { 1371 return ANEURALNETWORKS_BAD_DATA; 1372 } 1373 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1374 const RunTimeOperandInfo& begins = mOperands[ins[1]]; 1375 const RunTimeOperandInfo& ends = mOperands[ins[2]]; 1376 const RunTimeOperandInfo& strides = mOperands[ins[3]]; 1377 int32_t beginMask = getScalarData<int32_t>(mOperands[ins[4]]); 1378 int32_t endMask = getScalarData<int32_t>(mOperands[ins[5]]); 1379 int32_t shrinkAxisMask = getScalarData<int32_t>(mOperands[ins[6]]); 1380 1381 RunTimeOperandInfo& output = mOperands[outs[0]]; 1382 Shape outShape = output.shape(); 1383 1384 success = 1385 stridedSlicePrepare( 1386 input.shape(), reinterpret_cast<const int32_t*>(begins.buffer), 1387 begins.shape(), reinterpret_cast<const int32_t*>(ends.buffer), 1388 ends.shape(), reinterpret_cast<const int32_t*>(strides.buffer), 1389 strides.shape(), beginMask, endMask, shrinkAxisMask, &outShape) && 1390 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 1391 stridedSliceGeneric(input.buffer, input.shape(), 1392 reinterpret_cast<const int32_t*>(begins.buffer), 1393 reinterpret_cast<const int32_t*>(ends.buffer), 1394 reinterpret_cast<const int32_t*>(strides.buffer), beginMask, 1395 endMask, shrinkAxisMask, output.buffer, outShape); 1396 } break; 1397 case OperationType::MEAN: { 1398 if (!allParametersPresent(3, 1)) { 1399 return ANEURALNETWORKS_BAD_DATA; 1400 } 1401 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1402 const RunTimeOperandInfo& axis = mOperands[ins[1]]; 1403 int32_t keepDims = getScalarData<int32_t>(mOperands[ins[2]]); 1404 1405 RunTimeOperandInfo& output = mOperands[outs[0]]; 1406 Shape outShape = output.shape(); 1407 1408 if (!meanPrepare(input.shape(), reinterpret_cast<const int32_t*>(axis.buffer), 1409 axis.shape(), keepDims > 0, &outShape) || 1410 !setInfoAndAllocateIfNeeded(&output, outShape, &result)) { 1411 break; 1412 } 1413 if (input.type == OperandType::TENSOR_FLOAT16) { 1414 success = meanFloat16(reinterpret_cast<_Float16*>(input.buffer), input.shape(), 1415 reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(), 1416 keepDims > 0, reinterpret_cast<_Float16*>(output.buffer), 1417 outShape); 1418 } else if (input.type == OperandType::TENSOR_FLOAT32) { 1419 success = meanGeneric<float, float>( 1420 reinterpret_cast<float*>(input.buffer), input.shape(), 1421 reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(), keepDims > 0, 1422 reinterpret_cast<float*>(output.buffer), outShape); 1423 } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) { 1424 success = meanGeneric<uint8_t, int32_t>( 1425 reinterpret_cast<uint8_t*>(input.buffer), input.shape(), 1426 reinterpret_cast<const int32_t*>(axis.buffer), axis.shape(), keepDims > 0, 1427 reinterpret_cast<uint8_t*>(output.buffer), outShape); 1428 } 1429 } break; 1430 case OperationType::ARGMAX: 1431 case OperationType::ARGMIN: { 1432 if (!allParametersPresent(2, 1)) { 1433 return ANEURALNETWORKS_BAD_DATA; 1434 } 1435 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1436 int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]); 1437 1438 RunTimeOperandInfo& output = mOperands[outs[0]]; 1439 Shape outShape = output.shape(); 1440 1441 const bool isArgMin = operation.type == OperationType::ARGMIN; 1442 success = argMinMaxPrepare(input.shape(), axis, &outShape) && 1443 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 1444 argMinMaxGeneric(input.buffer, input.shape(), axis, isArgMin, output.buffer, 1445 outShape); 1446 } break; 1447 case OperationType::EXPAND_DIMS: { 1448 if (!allParametersPresent(2, 1)) { 1449 return ANEURALNETWORKS_BAD_DATA; 1450 } 1451 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1452 int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]); 1453 1454 RunTimeOperandInfo& output = mOperands[outs[0]]; 1455 Shape outShape = output.shape(); 1456 1457 success = expand_dims::prepare(input.shape(), axis, &outShape) && 1458 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 1459 expand_dims::eval(input.buffer, input.shape(), axis, output.buffer, outShape); 1460 } break; 1461 case OperationType::SPLIT: { 1462 if (ins.size() != 3) { 1463 LOG(ERROR) << "Wrong input count"; 1464 return ANEURALNETWORKS_BAD_DATA; 1465 } 1466 1467 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1468 const int32_t axis = getScalarData<int32_t>(mOperands[ins[1]]); 1469 const int32_t numOutputs = getScalarData<int32_t>(mOperands[ins[2]]); 1470 1471 if (numOutputs != outs.size()) { 1472 return ANEURALNETWORKS_BAD_DATA; 1473 } 1474 1475 std::vector<Shape> outputShapes(numOutputs); 1476 for (int i = 0; i < numOutputs; ++i) { 1477 outputShapes[i] = mOperands[outs[i]].shape(); 1478 } 1479 1480 success = splitPrepare(input.shape(), axis, numOutputs, &outputShapes); 1481 for (int i = 0; i < numOutputs; ++i) { 1482 success = success && setInfoAndAllocateIfNeeded(&(mOperands[outs[i]]), 1483 outputShapes[i], &result); 1484 } 1485 switch (input.type) { 1486 case OperandType::TENSOR_FLOAT16: { 1487 std::vector<_Float16*> outputDataPtrs(numOutputs); 1488 for (int i = 0; i < numOutputs; ++i) { 1489 outputDataPtrs[i] = reinterpret_cast<_Float16*>(mOperands[outs[i]].buffer); 1490 } 1491 success = success && 1492 splitFloat16(reinterpret_cast<const _Float16*>(input.buffer), 1493 input.shape(), axis, &outputDataPtrs, outputShapes); 1494 } break; 1495 case OperandType::TENSOR_FLOAT32: { 1496 std::vector<float*> outputDataPtrs(numOutputs); 1497 for (int i = 0; i < numOutputs; ++i) { 1498 outputDataPtrs[i] = reinterpret_cast<float*>(mOperands[outs[i]].buffer); 1499 } 1500 success = success && 1501 splitFloat32(reinterpret_cast<const float*>(input.buffer), 1502 input.shape(), axis, &outputDataPtrs, outputShapes); 1503 } break; 1504 case OperandType::TENSOR_INT32: { 1505 std::vector<int32_t*> outputDataPtrs(numOutputs); 1506 for (int i = 0; i < numOutputs; ++i) { 1507 outputDataPtrs[i] = reinterpret_cast<int32_t*>(mOperands[outs[i]].buffer); 1508 } 1509 success = success && 1510 splitInt32(reinterpret_cast<const int32_t*>(input.buffer), 1511 input.shape(), axis, &outputDataPtrs, outputShapes); 1512 } break; 1513 case OperandType::TENSOR_QUANT8_ASYMM: { 1514 std::vector<uint8_t*> outputDataPtrs(numOutputs); 1515 for (int i = 0; i < numOutputs; ++i) { 1516 outputDataPtrs[i] = reinterpret_cast<uint8_t*>(mOperands[outs[i]].buffer); 1517 } 1518 success = success && 1519 splitQuant8(reinterpret_cast<const uint8_t*>(input.buffer), 1520 input.shape(), axis, &outputDataPtrs, outputShapes); 1521 } break; 1522 default: { 1523 return ANEURALNETWORKS_BAD_DATA; 1524 } 1525 } 1526 } break; 1527 case OperationType::MAXIMUM: 1528 case OperationType::MINIMUM: { 1529 if (!allParametersPresent(2, 1)) { 1530 return ANEURALNETWORKS_BAD_DATA; 1531 } 1532 const RunTimeOperandInfo& in1 = mOperands[ins[0]]; 1533 const RunTimeOperandInfo& in2 = mOperands[ins[1]]; 1534 1535 RunTimeOperandInfo& output = mOperands[outs[0]]; 1536 Shape outputShape = output.shape(); 1537 1538 const bool isMinimum = operation.type == OperationType::MINIMUM; 1539 success = maximum_minimum::prepare(in1.shape(), in2.shape(), &outputShape) && 1540 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && 1541 maximum_minimum::eval(in1.buffer, in1.shape(), in2.buffer, in2.shape(), 1542 isMinimum, output.buffer, outputShape); 1543 } break; 1544 case OperationType::GROUPED_CONV_2D: { 1545 const size_t inCount = ins.size(); 1546 if ((inCount != 12 && inCount != 9) || !allParametersPresent(inCount, 1)) { 1547 return ANEURALNETWORKS_BAD_DATA; 1548 } 1549 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1550 const RunTimeOperandInfo& filter = mOperands[ins[1]]; 1551 const RunTimeOperandInfo& bias = mOperands[ins[2]]; 1552 1553 int32_t padding_left, padding_right; 1554 int32_t padding_top, padding_bottom; 1555 int32_t padding_implicit = 0; 1556 int32_t stride_width, stride_height; 1557 int32_t numGroups; 1558 int32_t activation; 1559 bool data_layout = false; 1560 1561 if (inCount == 12) { 1562 padding_left = getScalarData<int32_t>(mOperands[ins[3]]); 1563 padding_right = getScalarData<int32_t>(mOperands[ins[4]]); 1564 padding_top = getScalarData<int32_t>(mOperands[ins[5]]); 1565 padding_bottom = getScalarData<int32_t>(mOperands[ins[6]]); 1566 stride_width = getScalarData<int32_t>(mOperands[ins[7]]); 1567 stride_height = getScalarData<int32_t>(mOperands[ins[8]]); 1568 numGroups = getScalarData<int32_t>(mOperands[ins[9]]); 1569 activation = getScalarData<int32_t>(mOperands[ins[10]]); 1570 data_layout = getScalarData<bool>(mOperands[ins[11]]); 1571 } else { 1572 padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]); 1573 stride_width = getScalarData<int32_t>(mOperands[ins[4]]); 1574 stride_height = getScalarData<int32_t>(mOperands[ins[5]]); 1575 numGroups = getScalarData<int32_t>(mOperands[ins[6]]); 1576 activation = getScalarData<int32_t>(mOperands[ins[7]]); 1577 data_layout = getScalarData<bool>(mOperands[ins[8]]); 1578 } 1579 1580 RunTimeOperandInfo& output = mOperands[outs[0]]; 1581 Shape outShape = output.shape(); 1582 1583 RunTimeOperandInfo input_tmp, output_tmp; 1584 std::unique_ptr<uint8_t[]> input_tmp_guard, output_tmp_guard; 1585 if (!convertToNhwc(input_tmp, input, input_tmp_guard, data_layout)) { 1586 success = false; 1587 break; 1588 } 1589 output_tmp.lifetime = OperandLifeTime::TEMPORARY_VARIABLE; 1590 output_tmp.buffer = data_layout ? nullptr : output.buffer; 1591 output_tmp.length = data_layout ? 0 : output.length; 1592 1593 if (inCount == 9) { 1594 Shape inputShape = input_tmp.shape(); 1595 Shape filterShape = filter.shape(); 1596 int32_t input_width = getSizeOfDimension(inputShape, 2); 1597 int32_t input_height = getSizeOfDimension(inputShape, 1); 1598 int32_t filter_width = getSizeOfDimension(filterShape, 2); 1599 int32_t filter_height = getSizeOfDimension(filterShape, 1); 1600 calculateExplicitPadding(input_width, stride_width, filter_width, padding_implicit, 1601 &padding_left, &padding_right); 1602 calculateExplicitPadding(input_height, stride_height, filter_height, 1603 padding_implicit, &padding_top, &padding_bottom); 1604 } 1605 1606 if (!groupedConvPrepare(input_tmp.shape(), filter.shape(), bias.shape(), padding_left, 1607 padding_right, padding_top, padding_bottom, stride_width, 1608 stride_height, numGroups, &outShape) || 1609 !setInfoAndAllocateIfNeeded(&output_tmp, outShape, &result)) { 1610 if (!data_layout) output.dimensions = output_tmp.dimensions; 1611 success = false; 1612 break; 1613 } 1614 1615 if (input_tmp.type == OperandType::TENSOR_FLOAT32) { 1616 success = groupedConvFloat32( 1617 reinterpret_cast<const float*>(input_tmp.buffer), input_tmp.shape(), 1618 reinterpret_cast<const float*>(filter.buffer), filter.shape(), 1619 reinterpret_cast<const float*>(bias.buffer), bias.shape(), padding_left, 1620 padding_right, padding_top, padding_bottom, stride_width, stride_height, 1621 numGroups, activation, reinterpret_cast<float*>(output_tmp.buffer), 1622 outShape); 1623 } else if (input_tmp.type == OperandType::TENSOR_FLOAT16) { 1624 success = groupedConvFloat16( 1625 reinterpret_cast<const _Float16*>(input_tmp.buffer), input_tmp.shape(), 1626 reinterpret_cast<const _Float16*>(filter.buffer), filter.shape(), 1627 reinterpret_cast<const _Float16*>(bias.buffer), bias.shape(), padding_left, 1628 padding_right, padding_top, padding_bottom, stride_width, stride_height, 1629 numGroups, activation, reinterpret_cast<_Float16*>(output_tmp.buffer), 1630 outShape); 1631 } else if (input_tmp.type == OperandType::TENSOR_QUANT8_ASYMM) { 1632 if (filter.type == OperandType::TENSOR_QUANT8_SYMM_PER_CHANNEL) { 1633 success = groupedConvQuant8PerChannel( 1634 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 1635 reinterpret_cast<const int8_t*>(filter.buffer), filter.shape(), 1636 filter.extraParams.channelQuant().scales.data(), 1637 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(), 1638 padding_left, padding_right, padding_top, padding_bottom, stride_width, 1639 stride_height, numGroups, activation, 1640 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 1641 } else if (filter.type == OperandType::TENSOR_QUANT8_ASYMM) { 1642 success = groupedConvQuant8( 1643 reinterpret_cast<const uint8_t*>(input_tmp.buffer), input_tmp.shape(), 1644 reinterpret_cast<const uint8_t*>(filter.buffer), filter.shape(), 1645 reinterpret_cast<const int32_t*>(bias.buffer), bias.shape(), 1646 padding_left, padding_right, padding_top, padding_bottom, stride_width, 1647 stride_height, numGroups, activation, 1648 reinterpret_cast<uint8_t*>(output_tmp.buffer), outShape); 1649 } 1650 } 1651 1652 if (data_layout) { 1653 output_tmp_guard.reset(output_tmp.buffer); 1654 } 1655 if (!success || !convertFromNhwc(output, output_tmp, data_layout, &result)) { 1656 success = false; 1657 break; 1658 } 1659 } break; 1660 case OperationType::TILE: { 1661 if (!allParametersPresent(2, 1)) { 1662 return ANEURALNETWORKS_BAD_DATA; 1663 } 1664 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1665 const RunTimeOperandInfo& multiples = mOperands[ins[1]]; 1666 1667 RunTimeOperandInfo& output = mOperands[outs[0]]; 1668 Shape outShape = output.shape(); 1669 1670 success = 1671 tile::prepare(input.shape(), reinterpret_cast<const int32_t*>(multiples.buffer), 1672 multiples.shape(), &outShape) && 1673 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 1674 tile::eval(input.buffer, input.shape(), 1675 reinterpret_cast<const int32_t*>(multiples.buffer), output.buffer, 1676 outShape); 1677 } break; 1678 case OperationType::QUANTIZED_16BIT_LSTM: { 1679 if (!allParametersPresent(15, 2)) { 1680 return ANEURALNETWORKS_BAD_DATA; 1681 } 1682 1683 RunTimeOperandInfo& cellStateOut = 1684 mOperands[outs[QuantizedLSTMCell::kCellStateOutTensor]]; 1685 RunTimeOperandInfo& output = mOperands[outs[QuantizedLSTMCell::kOutputTensor]]; 1686 1687 Shape cellStateOutShape, outputShape; 1688 QuantizedLSTMCell quantizedLSTMCell(operation, mOperands); 1689 1690 success = QuantizedLSTMCell::prepare(operation, mOperands, &cellStateOutShape, 1691 &outputShape) && 1692 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateOutShape, &result) && 1693 setInfoAndAllocateIfNeeded(&output, outputShape, &result) && 1694 quantizedLSTMCell.eval(); 1695 } break; 1696 case OperationType::POW: { 1697 if (!allParametersPresent(2, 1)) { 1698 return ANEURALNETWORKS_BAD_DATA; 1699 } 1700 const RunTimeOperandInfo& base = mOperands[ins[0]]; 1701 const RunTimeOperandInfo& exponent = mOperands[ins[1]]; 1702 1703 RunTimeOperandInfo& output = mOperands[outs[0]]; 1704 Shape outShape = output.shape(); 1705 1706 success = pow::prepare(base.shape(), exponent.shape(), &outShape) && 1707 setInfoAndAllocateIfNeeded(&output, outShape, &result) && 1708 pow::eval(base.buffer, base.shape(), exponent.buffer, exponent.shape(), 1709 output.buffer, outShape); 1710 } break; 1711 case OperationType::TOPK_V2: { 1712 if (!allParametersPresent(2, 2)) { 1713 return ANEURALNETWORKS_BAD_DATA; 1714 } 1715 const RunTimeOperandInfo& input = mOperands[ins[0]]; 1716 int32_t k = getScalarData<int32_t>(mOperands[ins[1]]); 1717 1718 RunTimeOperandInfo& values = mOperands[outs[0]]; 1719 Shape valuesShape = values.shape(); 1720 RunTimeOperandInfo& indices = mOperands[outs[1]]; 1721 Shape indicesShape = indices.shape(); 1722 1723 success = topk_v2::prepare(input.shape(), k, &valuesShape, &indicesShape) && 1724 setInfoAndAllocateIfNeeded(&values, valuesShape, &result) && 1725 setInfoAndAllocateIfNeeded(&indices, indicesShape, &result) && 1726 topk_v2::eval(input.buffer, input.shape(), k, values.buffer, valuesShape, 1727 indices.buffer, indicesShape); 1728 } break; 1729 default: { 1730 const OperationRegistration* operationRegistration = 1731 mOperationResolver->findOperation(operation.type); 1732 if (operationRegistration == nullptr) { 1733 LOG(ERROR) << getOperationName(operation.type) << " not registered"; 1734 } else if (operationRegistration->prepare == nullptr || 1735 operationRegistration->execute == nullptr) { 1736 LOG(ERROR) << "Incomplete operation registration: " 1737 << getOperationName(operation.type); 1738 } else { 1739 OperationExecutionContext context(&operation, mOperands.data()); 1740 success = operationRegistration->flags.allowOmittedOperand || 1741 context.checkNoOmittedOperand(); 1742 success = success && (operationRegistration->flags.allowZeroSizedInput || 1743 context.checkNoZeroSizedInput()); 1744 success = success && operationRegistration->prepare(&context) && 1745 operationRegistration->execute(&context); 1746 result = context.getResultCode(); 1747 } 1748 } 1749 } 1750 if (!success && result == ANEURALNETWORKS_NO_ERROR) { 1751 result = ANEURALNETWORKS_OP_FAILED; 1752 } 1753 if (result != ANEURALNETWORKS_NO_ERROR) { 1754 LOG(ERROR) << getOperationName(operation.type) << " failed."; 1755 return result; 1756 } 1757 1758 freeNoLongerUsedOperands(ins); 1759 return ANEURALNETWORKS_NO_ERROR; 1760 } 1761 1762 void CpuExecutor::finish(int result) { 1763 // Free allocated temporary operands. 1764 for (auto& info : mOperands) { 1765 if (info.lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info.buffer != nullptr) { 1766 delete[] info.buffer; 1767 info.buffer = nullptr; 1768 } 1769 } 1770 1771 // Only report the output shapes when the result code is NO_ERROR or 1772 // OUTPUT_INSUFFICIENT_SIZE. 1773 if (result == ANEURALNETWORKS_NO_ERROR || result == ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE) { 1774 const auto& outputs = mModel->outputIndexes; 1775 mOutputShapes.resize(outputs.size()); 1776 for (uint32_t i = 0; i < outputs.size(); i++) { 1777 const uint32_t operandIndex = outputs[i]; 1778 RunTimeOperandInfo& from = mOperands[operandIndex]; 1779 mOutputShapes[i].dimensions = from.dimensions; 1780 mOutputShapes[i].isSufficient = from.isSufficient(); 1781 } 1782 } else { 1783 mOutputShapes.clear(); 1784 } 1785 1786 mModel = nullptr; 1787 mRequest = nullptr; 1788 mFinished = true; 1789 } 1790 1791 // b/109953668, disable OpenMP 1792 #ifdef NNAPI_OPENMP 1793 ScopedOpenmpSettings::ScopedOpenmpSettings() { 1794 mBlocktimeInitial = kmp_get_blocktime(); 1795 kmp_set_blocktime(20); // ms, see b/109645291 1796 1797 #if NNAPI_LIMIT_CPU_THREADS 1798 // Code not yet enabled. Choosing the number of threads to be based on 1799 // benchmarking. See longer comment by the class declaration. 1800 mMaxThreadsInitial = Eigen::nbThreads(); 1801 const int nProcs = omp_get_num_procs(); 1802 int threads = nProcs; 1803 if (nProcs >= 8) { 1804 threads = nProcs - 4; 1805 } else if (nProcs >= 4) { 1806 threads = nProcs - 2; 1807 } 1808 Eigen::setNbThreads(threads); 1809 #endif 1810 } 1811 1812 ScopedOpenmpSettings::~ScopedOpenmpSettings() { 1813 kmp_set_blocktime(mBlocktimeInitial); 1814 #if NNAPI_LIMIT_CPU_THREADS 1815 Eigen::setNbThreads(mMaxThreadsInitial); 1816 #endif 1817 } 1818 #endif // NNAPI_OPENMP 1819 1820 } // namespace nn 1821 } // namespace android 1822