1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #define LOG_TAG "ExecutionBuilder" 18 19 #include "ExecutionBuilder.h" 20 21 #include "CompilationBuilder.h" 22 #include "CpuExecutor.h" 23 #include "HalInterfaces.h" 24 #include "Manager.h" 25 #include "ModelBuilder.h" 26 #include "Utils.h" 27 28 #include <mutex> 29 #include <thread> 30 #include <vector> 31 32 namespace android { 33 namespace nn { 34 35 int ModelArgumentInfo::setFromPointer(const Operand& operand, 36 const ANeuralNetworksOperandType* type, void* data, 37 uint32_t length) { 38 if ((data == nullptr) != (length == 0)) { 39 const char* dataPtrMsg = data ? "NOT_NULLPTR" : "NULLPTR"; 40 LOG(ERROR) << "Data pointer must be nullptr if and only if length is zero (data = " 41 << dataPtrMsg << ", length = " << length << ")"; 42 return ANEURALNETWORKS_BAD_DATA; 43 } 44 if (data == nullptr) { 45 state = ModelArgumentInfo::HAS_NO_VALUE; 46 } else { 47 int n = updateDimensionInfo(operand, type); 48 if (n != ANEURALNETWORKS_NO_ERROR) { 49 return n; 50 } 51 uint32_t neededLength = sizeOfData(operand.type, dimensions); 52 if (operand.type != OperandType::OEM && neededLength != length) { 53 LOG(ERROR) << "Setting argument with invalid length: " << length 54 << ", expected length: " << neededLength; 55 return ANEURALNETWORKS_BAD_DATA; 56 } 57 state = ModelArgumentInfo::POINTER; 58 } 59 buffer = data; 60 locationAndLength = {.poolIndex = 0, .offset = 0, .length = length}; 61 return ANEURALNETWORKS_NO_ERROR; 62 } 63 64 int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type, 65 uint32_t poolIndex, uint32_t offset, uint32_t length) { 66 int n = updateDimensionInfo(operand, type); 67 if (n != ANEURALNETWORKS_NO_ERROR) { 68 return n; 69 } 70 uint32_t neededLength = sizeOfData(operand.type, dimensions); 71 if (operand.type != OperandType::OEM && neededLength != length) { 72 LOG(ERROR) << "Setting argument with invalid length: " << length 73 << ", expected length: " << neededLength; 74 return ANEURALNETWORKS_BAD_DATA; 75 } 76 77 state = ModelArgumentInfo::MEMORY; 78 locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length}; 79 buffer = nullptr; 80 return ANEURALNETWORKS_NO_ERROR; 81 } 82 83 int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand, 84 uint32_t poolIndex, uint32_t offset) { 85 int n = updateDimensionInfo(operand, nullptr); 86 if (n != ANEURALNETWORKS_NO_ERROR) { 87 return n; 88 } 89 state = ModelArgumentInfo::MEMORY; 90 locationAndLength = 91 {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)}; 92 buffer = nullptr; 93 return ANEURALNETWORKS_NO_ERROR; 94 } 95 96 int ModelArgumentInfo::updateDimensionInfo(const Operand& operand, 97 const ANeuralNetworksOperandType* newType) { 98 nnAssert(dimensions.empty()); 99 if (newType == nullptr) { 100 for (auto i : operand.dimensions) { 101 if (i == 0) { 102 LOG(ERROR) << "Setting input/output with unspecified dimensions"; 103 return ANEURALNETWORKS_BAD_DATA; 104 } 105 } 106 dimensions = operand.dimensions; 107 } else { 108 uint32_t count = newType->dimensionCount; 109 if (static_cast<OperandType>(newType->type) != operand.type || 110 count != operand.dimensions.size()) { 111 LOG(ERROR) << "Setting input/output with incompatible types"; 112 return ANEURALNETWORKS_BAD_DATA; 113 } 114 115 dimensions = hidl_vec<uint32_t>(count); 116 for (uint32_t i = 0; i < count; i++) { 117 if (operand.dimensions[i] != 0 && operand.dimensions[i] != newType->dimensions[i]) { 118 LOG(ERROR) << "Overriding a fully specified dimension is disallowed"; 119 return ANEURALNETWORKS_BAD_DATA; 120 } else { 121 dimensions[i] = newType->dimensions[i]; 122 } 123 } 124 } 125 return ANEURALNETWORKS_NO_ERROR; 126 } 127 128 ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) : 129 mModel(compilation->mModel), 130 mPlan(&compilation->mPlan), 131 mPartitioning(compilation->mPartitioning), 132 mInputs(mModel->inputCount()), 133 mOutputs(mModel->outputCount()) { 134 VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder"; 135 } 136 137 int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type, 138 const void* buffer, size_t length) { 139 uint32_t count = static_cast<uint32_t>(mInputs.size()); 140 if (index >= count) { 141 LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count; 142 return ANEURALNETWORKS_BAD_DATA; 143 } 144 if (type != nullptr) { 145 int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false); 146 if (n != ANEURALNETWORKS_NO_ERROR) { 147 return n; 148 } 149 } 150 if (length > 0xFFFFFFFF) { 151 LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length; 152 return ANEURALNETWORKS_BAD_DATA; 153 } 154 uint32_t l = static_cast<uint32_t>(length); 155 return mInputs[index].setFromPointer(mModel->getInputOperand(index), type, 156 const_cast<void*>(buffer), l); 157 } 158 159 int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 160 const Memory* memory, size_t offset, size_t length) { 161 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory() 162 163 uint32_t count = static_cast<uint32_t>(mInputs.size()); 164 if (index >= count) { 165 LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " " 166 << count; 167 return ANEURALNETWORKS_BAD_DATA; 168 } 169 if (!memory->validateSize(offset, length)) { 170 return ANEURALNETWORKS_BAD_DATA; 171 } 172 // TODO validate the rest 173 uint32_t poolIndex = mMemories.add(memory); 174 return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset, 175 length); 176 } 177 178 int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer, 179 size_t length) { 180 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 181 if (index >= count) { 182 LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count; 183 return ANEURALNETWORKS_BAD_DATA; 184 } 185 if (type != nullptr) { 186 int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false); 187 if (n != ANEURALNETWORKS_NO_ERROR) { 188 return n; 189 } 190 } 191 if (length > 0xFFFFFFFF) { 192 LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length; 193 return ANEURALNETWORKS_BAD_DATA; 194 } 195 uint32_t l = static_cast<uint32_t>(length); 196 return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l); 197 } 198 199 int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type, 200 const Memory* memory, size_t offset, size_t length) { 201 // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory() 202 203 uint32_t count = static_cast<uint32_t>(mOutputs.size()); 204 if (index >= count) { 205 LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " " 206 << count; 207 return ANEURALNETWORKS_BAD_DATA; 208 } 209 if (!memory->validateSize(offset, length)) { 210 return ANEURALNETWORKS_BAD_DATA; 211 } 212 // TODO validate the rest 213 uint32_t poolIndex = mMemories.add(memory); 214 return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset, 215 length); 216 } 217 218 // Attempt synchronous execution of full model on CPU. 219 // Ensure that executionCallback->notify() is called. 220 static void cpuFallbackFull(const ExecutionBuilder* executionBuilder, 221 const sp<ExecutionCallback>& executionCallback) { 222 VLOG(EXECUTION) << "cpuFallbackFull"; 223 StepExecutor executor(executionBuilder, executionBuilder->getModel(), 224 nullptr /* no VersionedIDevice, so CPU */, 225 nullptr /* no IPreparedModel */); 226 executor.mapInputsAndOutputsTrivially(); 227 sp<ExecutionCallback> fallbackCallback; 228 int n = executor.startCompute(&fallbackCallback); 229 if (n != ANEURALNETWORKS_NO_ERROR) { 230 executionCallback->notify(convertResultCodeToErrorStatus(n)); 231 return; 232 } 233 fallbackCallback->wait(); 234 executionCallback->notify(fallbackCallback->getStatus()); 235 } 236 237 // Attempt synchronous execution on CPU. 238 // (1) First, attempt to execute this step on CPU. If successful, 239 // return true. (Do not call executionCallback->notify().) 240 // (2) If unsuccessful, attempt to execute the full model on CPU, 241 // ensure that executionCallback->notify() is called, and return 242 // false. 243 static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder, 244 const ExecutionPlan* plan, 245 std::shared_ptr<ExecutionPlan::Controller> controller, 246 const sp<ExecutionCallback>& executionCallback) { 247 VLOG(EXECUTION) << "cpuFallbackPartial"; 248 std::shared_ptr<StepExecutor> executor; 249 int n = plan->fallback(controller, &executor); 250 if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) { 251 cpuFallbackFull(executionBuilder, executionCallback); 252 return false; 253 } 254 sp<ExecutionCallback> fallbackCallback; 255 if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) { 256 cpuFallbackFull(executionBuilder, executionCallback); 257 return false; 258 } 259 fallbackCallback->wait(); 260 if (fallbackCallback->getStatus() != ErrorStatus::NONE) { 261 cpuFallbackFull(executionBuilder, executionCallback); 262 return false; 263 } 264 return true; 265 } 266 267 static void asyncStartComputePartitioned(const ExecutionBuilder* executionBuilder, 268 const ExecutionPlan* plan, 269 std::shared_ptr<ExecutionPlan::Controller> controller, 270 bool allowFallback, 271 const sp<ExecutionCallback>& executionCallback) { 272 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (from plan, iteratively)"; 273 while (true) { 274 std::shared_ptr<StepExecutor> executor; 275 VLOG(EXECUTION) << "looking for next StepExecutor"; 276 int n = plan->next(controller, &executor); 277 if (n != ANEURALNETWORKS_NO_ERROR) { 278 if (allowFallback) { 279 cpuFallbackFull(executionBuilder, executionCallback); 280 } else { 281 executionCallback->notify(convertResultCodeToErrorStatus(n)); 282 } 283 return; 284 } 285 if (executor == nullptr) { 286 executionCallback->notify(ErrorStatus::NONE); 287 return; 288 } 289 290 sp<ExecutionCallback> stepCallback; 291 n = executor->startCompute(&stepCallback); 292 if (n != ANEURALNETWORKS_NO_ERROR) { 293 if (allowFallback) { 294 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) { 295 // Successfully executed one step on CPU. 296 continue; 297 } else { 298 // Either successfully executed entire plan on 299 // CPU, or tried and failed to do so. 300 return; 301 } 302 } else { 303 executionCallback->notify(convertResultCodeToErrorStatus(n)); 304 return; 305 } 306 } 307 stepCallback->wait(); 308 ErrorStatus status = stepCallback->getStatus(); 309 if (status != ErrorStatus::NONE) { 310 if (allowFallback) { 311 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) { 312 // Successfully executed one step on CPU. 313 continue; 314 } else { 315 // Either successfully executed entire plan on 316 // CPU, or tried and failed to do so. 317 return; 318 } 319 } else { 320 executionCallback->notify(status); 321 return; 322 } 323 } 324 } 325 } 326 327 int ExecutionBuilder::startCompute(sp<ExecutionCallback>* synchronizationCallback) { 328 *synchronizationCallback = nullptr; 329 330 // TODO validate that we have full types for all inputs and outputs, 331 // that the graph is not cyclic, 332 333 for (auto& p : mInputs) { 334 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 335 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified"; 336 return ANEURALNETWORKS_BAD_DATA; 337 } 338 } 339 for (auto& p : mOutputs) { 340 if (p.state == ModelArgumentInfo::UNSPECIFIED) { 341 LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified"; 342 return ANEURALNETWORKS_BAD_DATA; 343 } 344 } 345 346 #ifndef DISABLE_PARTITIONED_EXECUTION 347 { 348 // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan 349 // with the compilation and execution phases of the NN API? Or retain that path 350 // as a fallback in the case of partitioning failure? 351 // 352 // TODO: Entire plan-based-path should run in an asynchronous thread -- 353 // take the asynchronous thread logic out of startComputeOnCpu() and use 354 // it to wrap the plan-based-path. 355 if (mPartitioning > 0) { 356 const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning); 357 std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this); 358 if (controller == nullptr) { 359 if (!allowFallback) { 360 return ANEURALNETWORKS_OP_FAILED; 361 } 362 } else { 363 // TODO: use a thread pool 364 365 // Prepare the callback for asynchronous execution. 366 // sp<ExecutionCallback> object is returned when the 367 // execution has been successfully launched, otherwise a 368 // nullptr is returned. The executionCallback is 369 // abstracted in the NN API as an "event". 370 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 371 std::thread thread(asyncStartComputePartitioned, this, mPlan, controller, 372 allowFallback, 373 executionCallback); 374 executionCallback->bind_thread(std::move(thread)); 375 *synchronizationCallback = executionCallback; 376 return ANEURALNETWORKS_NO_ERROR; 377 } 378 } 379 } 380 #else 381 { 382 // Find a driver that can handle all the operations. 383 // TODO: Does not handle CPU fallback (which is tricky because 384 // StepExecutor::startCompute() is designed as 385 // asynchronous). 386 // TODO: Does not actually behave asynchronously (because 387 // StepExecutor::startCompute() isn't actually asynchronous 388 // on a device as opposed to a CPU). 389 Model hidlModel; 390 mModel->setHidlModel(&hidlModel); 391 const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers(); 392 for (const auto& device : devices) { 393 hidl_vec<bool> supports; 394 VLOG(EXECUTION) << "Checking " << device->getName(); 395 device->getSupportedOperations(hidlModel, &supports); 396 if (std::find(supports.begin(), supports.end(), false) == supports.end()) { 397 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on " << device->getName(); 398 StepExecutor executor(this, mModel, device->getInterface(), 399 nullptr /* no IPreparedModel, so compile */); 400 executor.mapInputsAndOutputsTrivially(); 401 return executor.startCompute(synchronizationCallback); 402 } 403 } 404 } 405 #endif // DISABLE_PARTITIONED_EXECUTION 406 407 // Run on the CPU. 408 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on CPU"; 409 StepExecutor executor(this, mModel, 410 nullptr /* no VersionedIDevice, so CPU */, 411 nullptr /* no IPreparedModel */); 412 executor.mapInputsAndOutputsTrivially(); 413 return executor.startCompute(synchronizationCallback); 414 } 415 416 // Figures out how to place each of the input or outputs in a buffer. This just does the layout, 417 // it does not copy data. Aligns each input a bit. 418 int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, 419 Memory* memory) { 420 uint32_t nextPoolIndex = mMemories.size(); 421 int64_t total = 0; 422 for (auto& info : *args) { 423 if (info.state == ModelArgumentInfo::POINTER) { 424 DataLocation& loc = info.locationAndLength; 425 // TODO Good enough alignment? 426 total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length); 427 loc.poolIndex = nextPoolIndex; 428 loc.offset = static_cast<uint32_t>(total); 429 total += loc.length; 430 } 431 }; 432 if (total > 0xFFFFFFFF) { 433 LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds " 434 "2^32."; 435 return ANEURALNETWORKS_BAD_DATA; 436 } 437 hidl_memory hidlMemory; 438 if (total > 0) { 439 memory->create(total); // TODO check error 440 mMemories.add(memory); 441 } 442 return ANEURALNETWORKS_NO_ERROR; 443 } 444 445 static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos, 446 hidl_vec<RequestArgument>* ioInfos) { 447 size_t count = argumentInfos.size(); 448 ioInfos->resize(count); 449 for (size_t i = 0; i < count; i++) { 450 const auto& info = argumentInfos[i]; 451 (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE, 452 .location = info.locationAndLength, 453 .dimensions = info.dimensions, 454 }; 455 } 456 } 457 458 StepExecutor::StepExecutor(const ExecutionBuilder* executionBuilder, 459 const ModelBuilder* model, 460 VersionedIDevice* driver, sp<IPreparedModel> preparedModel) : 461 mExecutionBuilder(executionBuilder), mModel(model), 462 mDriver(driver), mPreparedModel(preparedModel), 463 mInputs(model->inputCount()), mOutputs(model->outputCount()) {} 464 465 void StepExecutor::mapInputsAndOutputsTrivially() { 466 mInputs = mExecutionBuilder->mInputs; 467 mOutputs = mExecutionBuilder->mOutputs; 468 mMemories = mExecutionBuilder->mMemories; 469 } 470 471 void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput, 472 ModelArgumentInfo* executorInputOrOutput) { 473 *executorInputOrOutput = builderInputOrOutput; 474 switch (executorInputOrOutput->state) { 475 default: 476 nnAssert(!"unexpected ModelArgumentInfo::state"); 477 case ModelArgumentInfo::POINTER: 478 case ModelArgumentInfo::UNSPECIFIED: 479 break; 480 case ModelArgumentInfo::MEMORY: { 481 const uint32_t builderPoolIndex = 482 builderInputOrOutput.locationAndLength.poolIndex; 483 const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex]; 484 const uint32_t executorPoolIndex = mMemories.add(memory); 485 executorInputOrOutput->locationAndLength.poolIndex = 486 executorPoolIndex; 487 break; 488 } 489 } 490 } 491 492 int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand, 493 const Memory* memory, uint32_t offset, 494 ModelArgumentInfo* inputOrOutputInfo) { 495 // Should be similar to 496 // ExecutionBuilder::setInputFromMemory() 497 // ExecutionBuilder::setOutputFromMemory() 498 499 uint32_t poolIndex = mMemories.add(memory); 500 return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset); 501 } 502 503 static void logArguments(const char* kind, const std::vector<ModelArgumentInfo> &args) { 504 for (unsigned i = 0; i < args.size(); i++) { 505 const auto& arg = args[i]; 506 std::string prefix = kind + std::string("[") + std::to_string(i) + "] = "; 507 switch (arg.state) { 508 case ModelArgumentInfo::POINTER: 509 VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer) << ")"; 510 break; 511 case ModelArgumentInfo::MEMORY: 512 VLOG(EXECUTION) << prefix << "MEMORY(" 513 << "pool=" << arg.locationAndLength.poolIndex 514 << ", " 515 << "off=" << arg.locationAndLength.offset 516 << ")"; 517 break; 518 case ModelArgumentInfo::HAS_NO_VALUE: 519 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE"; 520 break; 521 case ModelArgumentInfo::UNSPECIFIED: 522 VLOG(EXECUTION) << prefix << "UNSPECIFIED"; 523 break; 524 default: 525 VLOG(EXECUTION) << prefix << "state(" << arg.state << ")"; 526 break; 527 } 528 } 529 } 530 531 int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) { 532 if (VLOG_IS_ON(EXECUTION)) { 533 logArguments("input", mInputs); 534 logArguments("output", mOutputs); 535 } 536 if (mDriver == nullptr) { 537 return startComputeOnCpu(synchronizationCallback); 538 } else { 539 return startComputeOnDevice(synchronizationCallback); 540 } 541 } 542 543 int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback) { 544 nnAssert(mDriver != nullptr); 545 546 *synchronizationCallback = nullptr; 547 548 // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated 549 // ExecutionPlan with the compilation and execution phases of the NN API 550 if (mPreparedModel == nullptr) { 551 Model model; 552 mModel->setHidlModel(&model); 553 554 // TODO Dangerous! In async, the model will outlive it here. Safe for now 555 sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback(); 556 // TODO(butlermichael): Propagate user preference to this point instead of 557 // using default value of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, or 558 // remove this entire block of code since it is a stale path that is only 559 // encountered on an #if-removed code. 560 ExecutionPreference preference = 561 static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER); 562 ErrorStatus prepareLaunchStatus = mDriver->prepareModel(model, preference, 563 preparedModelCallback); 564 if (prepareLaunchStatus != ErrorStatus::NONE) { 565 return convertErrorStatusToResultCode(prepareLaunchStatus); 566 } 567 568 // Immediately synchronize with callback object for now 569 // TODO: change to asynchronous later 570 preparedModelCallback->wait(); 571 ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus(); 572 mPreparedModel = preparedModelCallback->getPreparedModel(); 573 if (prepareReturnStatus != ErrorStatus::NONE) { 574 return convertErrorStatusToResultCode(prepareReturnStatus); 575 } 576 if (mPreparedModel == nullptr) { 577 return ANEURALNETWORKS_OP_FAILED; 578 } 579 } 580 581 // We separate the input & output pools so that we reduce the copying done if we 582 // do an eventual remoting (hidl_memory->update()). We could also use it to set 583 // protection on read only memory but that's not currently done. 584 Memory inputPointerArguments; 585 Memory outputPointerArguments; 586 587 // Layout the input and output data 588 int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments); 589 if (n != ANEURALNETWORKS_NO_ERROR) { 590 return n; 591 } 592 n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments); 593 if (n != ANEURALNETWORKS_NO_ERROR) { 594 return n; 595 } 596 597 // Copy the input data that was specified via a pointer. 598 // inputPointerArguments.update(); 599 for (auto& info : mInputs) { 600 if (info.state == ModelArgumentInfo::POINTER) { 601 DataLocation& loc = info.locationAndLength; 602 uint8_t* data = nullptr; 603 int n = inputPointerArguments.getPointer(&data); 604 if (n != ANEURALNETWORKS_NO_ERROR) { 605 return n; 606 } 607 memcpy(data + loc.offset, info.buffer, loc.length); 608 } 609 } 610 // TODO: Add inputPointerArguments.commit() and .update() at all the right places 611 612 Request request; 613 setRequestArgumentArray(mInputs, &request.inputs); 614 setRequestArgumentArray(mOutputs, &request.outputs); 615 uint32_t count = mMemories.size(); 616 request.pools.resize(count); 617 for (uint32_t i = 0; i < count; i++) { 618 request.pools[i] = mMemories[i]->getHidlMemory(); 619 } 620 621 // Prepare the callback for asynchronous execution. sp<ExecutionCallback> 622 // object is returned when the execution has been successfully launched, 623 // otherwise a nullptr is returned. The executionCallback is abstracted in 624 // the NN API as an "event". 625 // 626 // The sp is used for ref-counting purposes. Without it, the HIDL service 627 // could attempt to communicate with a dead callback object. 628 // 629 // TODO: Explain the "dead callback" problem further, either here or 630 // in the design document. 631 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 632 633 VLOG(EXECUTION) << "Before mPreparedModel->execute() " << SHOW_IF_DEBUG(toString(request)); 634 // Execute. 635 // TODO: What happens to the Callback if the service dies abnormally 636 // -- won't that keep the Callback live forever, because the service 637 // never has the opportunity to bump the reference count down? Or 638 // maybe the HIDL infrastructure handles this magically? At worst, 639 // it seems like this is a small memory leak, if the Callback stays 640 // alive forever. 641 Return<ErrorStatus> executeStatus = mPreparedModel->execute(request, executionCallback); 642 if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) { 643 VLOG(EXECUTION) << "**Execute failed**"; 644 return executeStatus.isOk() 645 ? convertErrorStatusToResultCode(executeStatus) 646 : ANEURALNETWORKS_OP_FAILED; 647 } 648 649 // TODO: Remove this synchronization point when the block of code below is 650 // removed. 651 executionCallback->wait(); 652 Return<ErrorStatus> callbackStatus = executionCallback->getStatus(); 653 if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) { 654 VLOG(EXECUTION) << "**Execute async failed**"; 655 return callbackStatus.isOk() 656 ? convertErrorStatusToResultCode(callbackStatus) 657 : ANEURALNETWORKS_OP_FAILED; 658 } 659 660 // Copy the output data from shared memory to the output buffers. 661 // TODO: Move this block of code somewhere else. It should not be in the 662 // startCompute function. 663 // TODO: outputMemory->update(); outputMemory->commit() 664 for (auto& info : mOutputs) { 665 if (info.state == ModelArgumentInfo::POINTER) { 666 DataLocation& loc = info.locationAndLength; 667 uint8_t* data = nullptr; 668 int n = outputPointerArguments.getPointer(&data); 669 if (n != ANEURALNETWORKS_NO_ERROR) { 670 return n; 671 } 672 memcpy(info.buffer, data + loc.offset, loc.length); 673 } 674 } 675 VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed"; 676 677 *synchronizationCallback = executionCallback; 678 return ANEURALNETWORKS_NO_ERROR; 679 } 680 681 static void asyncStartComputeOnCpu(const Model& model, const Request& request, 682 const std::vector<RunTimePoolInfo>& modelPoolInfos, 683 const std::vector<RunTimePoolInfo>& requestPoolInfos, 684 const sp<IExecutionCallback>& executionCallback) { 685 CpuExecutor executor; 686 int err = executor.run(model, request, modelPoolInfos, requestPoolInfos); 687 executionCallback->notify(convertResultCodeToErrorStatus(err)); 688 } 689 690 int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) { 691 // TODO: use a thread pool 692 693 Model model; 694 mModel->setHidlModel(&model); 695 696 // Prepare the callback for asynchronous execution. sp<ExecutionCallback> 697 // object is returned when the execution has been successfully launched, 698 // otherwise a nullptr is returned. The executionCallback is abstracted in 699 // the NN API as an "event". 700 sp<ExecutionCallback> executionCallback = new ExecutionCallback(); 701 *synchronizationCallback = nullptr; 702 703 std::vector<RunTimePoolInfo> modelPoolInfos; 704 if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) { 705 return ANEURALNETWORKS_UNMAPPABLE; 706 } 707 708 std::vector<RunTimePoolInfo> requestPoolInfos; 709 requestPoolInfos.reserve(mMemories.size()); 710 bool fail = false; 711 for (const Memory* mem : mMemories) { 712 requestPoolInfos.emplace_back(mem->getHidlMemory(), &fail); 713 } 714 if (fail) { 715 return ANEURALNETWORKS_UNMAPPABLE; 716 } 717 // Create as many pools as there are input / output. 718 auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) { 719 for (ModelArgumentInfo& argumentInfo : argumentInfos) { 720 if (argumentInfo.state == ModelArgumentInfo::POINTER) { 721 argumentInfo.locationAndLength.poolIndex = 722 static_cast<uint32_t>(requestPoolInfos.size()); 723 argumentInfo.locationAndLength.offset = 0; 724 requestPoolInfos.emplace_back(static_cast<uint8_t*>(argumentInfo.buffer)); 725 } 726 } 727 }; 728 fixPointerArguments(mInputs); 729 fixPointerArguments(mOutputs); 730 731 Request request; 732 setRequestArgumentArray(mInputs, &request.inputs); 733 setRequestArgumentArray(mOutputs, &request.outputs); 734 735 // TODO: should model be moved with a std::cref? 736 std::thread thread(asyncStartComputeOnCpu, model, std::move(request), 737 std::move(modelPoolInfos), std::move(requestPoolInfos), 738 executionCallback); 739 executionCallback->bind_thread(std::move(thread)); 740 741 *synchronizationCallback = executionCallback; 742 return ANEURALNETWORKS_NO_ERROR; 743 } 744 745 } // namespace nn 746 } // namespace android 747