Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "ExecutionBuilder"
     18 
     19 #include "ExecutionBuilder.h"
     20 
     21 #include "CompilationBuilder.h"
     22 #include "CpuExecutor.h"
     23 #include "HalInterfaces.h"
     24 #include "Manager.h"
     25 #include "ModelBuilder.h"
     26 #include "Utils.h"
     27 
     28 #include <mutex>
     29 #include <thread>
     30 #include <vector>
     31 
     32 namespace android {
     33 namespace nn {
     34 
     35 int ModelArgumentInfo::setFromPointer(const Operand& operand,
     36                                       const ANeuralNetworksOperandType* type, void* data,
     37                                       uint32_t length) {
     38     if ((data == nullptr) != (length == 0)) {
     39         const char* dataPtrMsg = data ? "NOT_NULLPTR" : "NULLPTR";
     40         LOG(ERROR) << "Data pointer must be nullptr if and only if length is zero (data = "
     41                    << dataPtrMsg << ", length = " << length << ")";
     42         return ANEURALNETWORKS_BAD_DATA;
     43     }
     44     if (data == nullptr) {
     45         state = ModelArgumentInfo::HAS_NO_VALUE;
     46     } else {
     47         int n = updateDimensionInfo(operand, type);
     48         if (n != ANEURALNETWORKS_NO_ERROR) {
     49             return n;
     50         }
     51         uint32_t neededLength = sizeOfData(operand.type, dimensions);
     52         if (operand.type != OperandType::OEM && neededLength != length) {
     53             LOG(ERROR) << "Setting argument with invalid length: " << length
     54                        << ", expected length: " << neededLength;
     55             return ANEURALNETWORKS_BAD_DATA;
     56         }
     57         state = ModelArgumentInfo::POINTER;
     58     }
     59     buffer = data;
     60     locationAndLength = {.poolIndex = 0, .offset = 0, .length = length};
     61     return ANEURALNETWORKS_NO_ERROR;
     62 }
     63 
     64 int ModelArgumentInfo::setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
     65                                      uint32_t poolIndex, uint32_t offset, uint32_t length) {
     66     int n = updateDimensionInfo(operand, type);
     67     if (n != ANEURALNETWORKS_NO_ERROR) {
     68         return n;
     69     }
     70     uint32_t neededLength = sizeOfData(operand.type, dimensions);
     71     if (operand.type != OperandType::OEM && neededLength != length) {
     72         LOG(ERROR) << "Setting argument with invalid length: " << length
     73                    << ", expected length: " << neededLength;
     74         return ANEURALNETWORKS_BAD_DATA;
     75     }
     76 
     77     state = ModelArgumentInfo::MEMORY;
     78     locationAndLength = {.poolIndex = poolIndex, .offset = offset, .length = length};
     79     buffer = nullptr;
     80     return ANEURALNETWORKS_NO_ERROR;
     81 }
     82 
     83 int ModelArgumentInfo::setFromTemporaryMemory(const Operand& operand,
     84                                               uint32_t poolIndex, uint32_t offset) {
     85     int n = updateDimensionInfo(operand, nullptr);
     86     if (n != ANEURALNETWORKS_NO_ERROR) {
     87         return n;
     88     }
     89     state = ModelArgumentInfo::MEMORY;
     90     locationAndLength =
     91             {.poolIndex = poolIndex, .offset = offset, .length = sizeOfData(operand)};
     92     buffer = nullptr;
     93     return ANEURALNETWORKS_NO_ERROR;
     94 }
     95 
     96 int ModelArgumentInfo::updateDimensionInfo(const Operand& operand,
     97                                            const ANeuralNetworksOperandType* newType) {
     98     nnAssert(dimensions.empty());
     99     if (newType == nullptr) {
    100         for (auto i : operand.dimensions) {
    101             if (i == 0) {
    102                 LOG(ERROR) << "Setting input/output with unspecified dimensions";
    103                 return ANEURALNETWORKS_BAD_DATA;
    104             }
    105         }
    106         dimensions = operand.dimensions;
    107     } else {
    108         uint32_t count = newType->dimensionCount;
    109         if (static_cast<OperandType>(newType->type) != operand.type ||
    110             count != operand.dimensions.size()) {
    111             LOG(ERROR) << "Setting input/output with incompatible types";
    112             return ANEURALNETWORKS_BAD_DATA;
    113         }
    114 
    115         dimensions = hidl_vec<uint32_t>(count);
    116         for (uint32_t i = 0; i < count; i++) {
    117             if (operand.dimensions[i] != 0 && operand.dimensions[i] != newType->dimensions[i]) {
    118                 LOG(ERROR) << "Overriding a fully specified dimension is disallowed";
    119                 return ANEURALNETWORKS_BAD_DATA;
    120             } else {
    121                 dimensions[i] = newType->dimensions[i];
    122             }
    123         }
    124     }
    125     return ANEURALNETWORKS_NO_ERROR;
    126 }
    127 
    128 ExecutionBuilder::ExecutionBuilder(const CompilationBuilder* compilation) :
    129         mModel(compilation->mModel),
    130         mPlan(&compilation->mPlan),
    131         mPartitioning(compilation->mPartitioning),
    132         mInputs(mModel->inputCount()),
    133         mOutputs(mModel->outputCount()) {
    134     VLOG(EXECUTION) << "ExecutionBuilder::ExecutionBuilder";
    135 }
    136 
    137 int ExecutionBuilder::setInput(uint32_t index, const ANeuralNetworksOperandType* type,
    138                                const void* buffer, size_t length) {
    139     uint32_t count = static_cast<uint32_t>(mInputs.size());
    140     if (index >= count) {
    141         LOG(ERROR) << "ANeuralNetworksExecution_setInput bad index " << index << " " << count;
    142         return ANEURALNETWORKS_BAD_DATA;
    143     }
    144     if (type != nullptr) {
    145         int n = validateOperandType(*type, "ANeuralNetworksExecution_setInput", false);
    146         if (n != ANEURALNETWORKS_NO_ERROR) {
    147             return n;
    148         }
    149     }
    150     if (length > 0xFFFFFFFF) {
    151         LOG(ERROR) << "ANeuralNetworksExecution_setInput input exceeds max length " << length;
    152         return ANEURALNETWORKS_BAD_DATA;
    153     }
    154     uint32_t l = static_cast<uint32_t>(length);
    155     return mInputs[index].setFromPointer(mModel->getInputOperand(index), type,
    156                                          const_cast<void*>(buffer), l);
    157 }
    158 
    159 int ExecutionBuilder::setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
    160                                          const Memory* memory, size_t offset, size_t length) {
    161     // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
    162 
    163     uint32_t count = static_cast<uint32_t>(mInputs.size());
    164     if (index >= count) {
    165         LOG(ERROR) << "ANeuralNetworksExecution_setInputFromMemory bad index " << index << " "
    166                    << count;
    167         return ANEURALNETWORKS_BAD_DATA;
    168     }
    169     if (!memory->validateSize(offset, length)) {
    170         return ANEURALNETWORKS_BAD_DATA;
    171     }
    172     // TODO validate the rest
    173     uint32_t poolIndex = mMemories.add(memory);
    174     return mInputs[index].setFromMemory(mModel->getInputOperand(index), type, poolIndex, offset,
    175                                         length);
    176 }
    177 
    178 int ExecutionBuilder::setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
    179                                 size_t length) {
    180     uint32_t count = static_cast<uint32_t>(mOutputs.size());
    181     if (index >= count) {
    182         LOG(ERROR) << "ANeuralNetworksExecution_setOutput bad index " << index << " " << count;
    183         return ANEURALNETWORKS_BAD_DATA;
    184     }
    185     if (type != nullptr) {
    186         int n = validateOperandType(*type, "ANeuralNetworksExecution_setOutput", false);
    187         if (n != ANEURALNETWORKS_NO_ERROR) {
    188             return n;
    189         }
    190     }
    191     if (length > 0xFFFFFFFF) {
    192         LOG(ERROR) << "ANeuralNetworksExecution_setOutput input exceeds max length " << length;
    193         return ANEURALNETWORKS_BAD_DATA;
    194     }
    195     uint32_t l = static_cast<uint32_t>(length);
    196     return mOutputs[index].setFromPointer(mModel->getOutputOperand(index), type, buffer, l);
    197 }
    198 
    199 int ExecutionBuilder::setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
    200                                           const Memory* memory, size_t offset, size_t length) {
    201     // Should be similar to StepExecutor::setInputOrOutputFromTemporaryMemory()
    202 
    203     uint32_t count = static_cast<uint32_t>(mOutputs.size());
    204     if (index >= count) {
    205         LOG(ERROR) << "ANeuralNetworksExecution_setOutputFromMemory bad index " << index << " "
    206                    << count;
    207         return ANEURALNETWORKS_BAD_DATA;
    208     }
    209     if (!memory->validateSize(offset, length)) {
    210         return ANEURALNETWORKS_BAD_DATA;
    211     }
    212     // TODO validate the rest
    213     uint32_t poolIndex = mMemories.add(memory);
    214     return mOutputs[index].setFromMemory(mModel->getOutputOperand(index), type, poolIndex, offset,
    215                                          length);
    216 }
    217 
    218 // Attempt synchronous execution of full model on CPU.
    219 // Ensure that executionCallback->notify() is called.
    220 static void cpuFallbackFull(const ExecutionBuilder* executionBuilder,
    221                             const sp<ExecutionCallback>& executionCallback) {
    222     VLOG(EXECUTION) << "cpuFallbackFull";
    223     StepExecutor executor(executionBuilder, executionBuilder->getModel(),
    224                           nullptr /* no VersionedIDevice, so CPU */,
    225                           nullptr /* no IPreparedModel */);
    226     executor.mapInputsAndOutputsTrivially();
    227     sp<ExecutionCallback> fallbackCallback;
    228     int n = executor.startCompute(&fallbackCallback);
    229     if (n != ANEURALNETWORKS_NO_ERROR) {
    230         executionCallback->notify(convertResultCodeToErrorStatus(n));
    231         return;
    232     }
    233     fallbackCallback->wait();
    234     executionCallback->notify(fallbackCallback->getStatus());
    235 }
    236 
    237 // Attempt synchronous execution on CPU.
    238 // (1) First, attempt to execute this step on CPU.  If successful,
    239 //     return true.  (Do not call executionCallback->notify().)
    240 // (2) If unsuccessful, attempt to execute the full model on CPU,
    241 //     ensure that executionCallback->notify() is called, and return
    242 //     false.
    243 static bool cpuFallbackPartial(const ExecutionBuilder* executionBuilder,
    244                                const ExecutionPlan* plan,
    245                                std::shared_ptr<ExecutionPlan::Controller> controller,
    246                                const sp<ExecutionCallback>& executionCallback) {
    247     VLOG(EXECUTION) << "cpuFallbackPartial";
    248     std::shared_ptr<StepExecutor> executor;
    249     int n = plan->fallback(controller, &executor);
    250     if (n != ANEURALNETWORKS_NO_ERROR || executor->isCpu()) {
    251         cpuFallbackFull(executionBuilder, executionCallback);
    252         return false;
    253     }
    254     sp<ExecutionCallback> fallbackCallback;
    255     if (executor->startComputeOnCpu(&fallbackCallback) != ANEURALNETWORKS_NO_ERROR) {
    256         cpuFallbackFull(executionBuilder, executionCallback);
    257         return false;
    258     }
    259     fallbackCallback->wait();
    260     if (fallbackCallback->getStatus() != ErrorStatus::NONE) {
    261         cpuFallbackFull(executionBuilder, executionCallback);
    262         return false;
    263     }
    264     return true;
    265 }
    266 
    267 static void asyncStartComputePartitioned(const ExecutionBuilder* executionBuilder,
    268                                          const ExecutionPlan* plan,
    269                                          std::shared_ptr<ExecutionPlan::Controller> controller,
    270                                          bool allowFallback,
    271                                          const sp<ExecutionCallback>& executionCallback) {
    272     VLOG(EXECUTION) << "ExecutionBuilder::startCompute (from plan, iteratively)";
    273     while (true) {
    274         std::shared_ptr<StepExecutor> executor;
    275         VLOG(EXECUTION) << "looking for next StepExecutor";
    276         int n = plan->next(controller, &executor);
    277         if (n != ANEURALNETWORKS_NO_ERROR) {
    278             if (allowFallback) {
    279                 cpuFallbackFull(executionBuilder, executionCallback);
    280             } else {
    281                 executionCallback->notify(convertResultCodeToErrorStatus(n));
    282             }
    283             return;
    284         }
    285         if (executor == nullptr) {
    286             executionCallback->notify(ErrorStatus::NONE);
    287             return;
    288         }
    289 
    290         sp<ExecutionCallback> stepCallback;
    291         n = executor->startCompute(&stepCallback);
    292         if (n != ANEURALNETWORKS_NO_ERROR) {
    293             if (allowFallback) {
    294                 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
    295                     // Successfully executed one step on CPU.
    296                     continue;
    297                 } else {
    298                     // Either successfully executed entire plan on
    299                     // CPU, or tried and failed to do so.
    300                     return;
    301                 }
    302             } else {
    303                 executionCallback->notify(convertResultCodeToErrorStatus(n));
    304                 return;
    305             }
    306         }
    307         stepCallback->wait();
    308         ErrorStatus status = stepCallback->getStatus();
    309         if (status != ErrorStatus::NONE) {
    310             if (allowFallback) {
    311                 if (cpuFallbackPartial(executionBuilder, plan, controller, executionCallback)) {
    312                     // Successfully executed one step on CPU.
    313                     continue;
    314                 } else {
    315                     // Either successfully executed entire plan on
    316                     // CPU, or tried and failed to do so.
    317                     return;
    318                 }
    319             } else {
    320                 executionCallback->notify(status);
    321                 return;
    322             }
    323         }
    324     }
    325 }
    326 
    327 int ExecutionBuilder::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
    328     *synchronizationCallback = nullptr;
    329 
    330     // TODO validate that we have full types for all inputs and outputs,
    331     // that the graph is not cyclic,
    332 
    333     for (auto& p : mInputs) {
    334         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
    335             LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all inputs specified";
    336             return ANEURALNETWORKS_BAD_DATA;
    337         }
    338     }
    339     for (auto& p : mOutputs) {
    340         if (p.state == ModelArgumentInfo::UNSPECIFIED) {
    341             LOG(ERROR) << "ANeuralNetworksExecution_startCompute not all outputs specified";
    342             return ANEURALNETWORKS_BAD_DATA;
    343         }
    344     }
    345 
    346 #ifndef DISABLE_PARTITIONED_EXECUTION
    347     {
    348         // TODO: Remove the non-plan-based path once we've fully integrated ExecutionPlan
    349         // with the compilation and execution phases of the NN API?  Or retain that path
    350         // as a fallback in the case of partitioning failure?
    351         //
    352         // TODO: Entire plan-based-path should run in an asynchronous thread --
    353         // take the asynchronous thread logic out of startComputeOnCpu() and use
    354         // it to wrap the plan-based-path.
    355         if (mPartitioning > 0) {
    356             const bool allowFallback = DeviceManager::partitioningAllowsFallback(mPartitioning);
    357             std::shared_ptr<ExecutionPlan::Controller> controller = mPlan->makeController(this);
    358             if (controller == nullptr) {
    359                 if (!allowFallback) {
    360                     return ANEURALNETWORKS_OP_FAILED;
    361                 }
    362             } else {
    363                 // TODO: use a thread pool
    364 
    365                 // Prepare the callback for asynchronous execution.
    366                 // sp<ExecutionCallback> object is returned when the
    367                 // execution has been successfully launched, otherwise a
    368                 // nullptr is returned.  The executionCallback is
    369                 // abstracted in the NN API as an "event".
    370                 sp<ExecutionCallback> executionCallback = new ExecutionCallback();
    371                 std::thread thread(asyncStartComputePartitioned, this, mPlan, controller,
    372                                    allowFallback,
    373                                    executionCallback);
    374                 executionCallback->bind_thread(std::move(thread));
    375                 *synchronizationCallback = executionCallback;
    376                 return ANEURALNETWORKS_NO_ERROR;
    377             }
    378         }
    379     }
    380 #else
    381     {
    382         // Find a driver that can handle all the operations.
    383         // TODO: Does not handle CPU fallback (which is tricky because
    384         //       StepExecutor::startCompute() is designed as
    385         //       asynchronous).
    386         // TODO: Does not actually behave asynchronously (because
    387         //       StepExecutor::startCompute() isn't actually asynchronous
    388         //       on a device as opposed to a CPU).
    389         Model hidlModel;
    390         mModel->setHidlModel(&hidlModel);
    391         const std::vector<std::shared_ptr<Device>>& devices = DeviceManager::get()->getDrivers();
    392         for (const auto& device : devices) {
    393             hidl_vec<bool> supports;
    394             VLOG(EXECUTION) << "Checking " << device->getName();
    395             device->getSupportedOperations(hidlModel, &supports);
    396             if (std::find(supports.begin(), supports.end(), false) == supports.end()) {
    397                 VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on " << device->getName();
    398                 StepExecutor executor(this, mModel, device->getInterface(),
    399                                       nullptr /* no IPreparedModel, so compile */);
    400                 executor.mapInputsAndOutputsTrivially();
    401                 return executor.startCompute(synchronizationCallback);
    402             }
    403         }
    404     }
    405 #endif  // DISABLE_PARTITIONED_EXECUTION
    406 
    407     // Run on the CPU.
    408     VLOG(EXECUTION) << "ExecutionBuilder::startCompute (without plan) on CPU";
    409     StepExecutor executor(this, mModel,
    410                           nullptr /* no VersionedIDevice, so CPU */,
    411                           nullptr /* no IPreparedModel */);
    412     executor.mapInputsAndOutputsTrivially();
    413     return executor.startCompute(synchronizationCallback);
    414 }
    415 
    416 // Figures out how to place each of the input or outputs in a buffer. This just does the layout,
    417 // it does not copy data.  Aligns each input a bit.
    418 int StepExecutor::allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args,
    419                                                  Memory* memory) {
    420     uint32_t nextPoolIndex = mMemories.size();
    421     int64_t total = 0;
    422     for (auto& info : *args) {
    423         if (info.state == ModelArgumentInfo::POINTER) {
    424             DataLocation& loc = info.locationAndLength;
    425             // TODO Good enough alignment?
    426             total += alignBytesNeeded(static_cast<uint32_t>(total), loc.length);
    427             loc.poolIndex = nextPoolIndex;
    428             loc.offset = static_cast<uint32_t>(total);
    429             total += loc.length;
    430         }
    431     };
    432     if (total > 0xFFFFFFFF) {
    433         LOG(ERROR) << "ANeuralNetworksExecution_startCompute Size of all inputs or outputs exceeds "
    434                       "2^32.";
    435         return ANEURALNETWORKS_BAD_DATA;
    436     }
    437     hidl_memory hidlMemory;
    438     if (total > 0) {
    439         memory->create(total);  // TODO check error
    440         mMemories.add(memory);
    441     }
    442     return ANEURALNETWORKS_NO_ERROR;
    443 }
    444 
    445 static void setRequestArgumentArray(const std::vector<ModelArgumentInfo>& argumentInfos,
    446                                      hidl_vec<RequestArgument>* ioInfos) {
    447     size_t count = argumentInfos.size();
    448     ioInfos->resize(count);
    449     for (size_t i = 0; i < count; i++) {
    450         const auto& info = argumentInfos[i];
    451         (*ioInfos)[i] = { .hasNoValue = info.state == ModelArgumentInfo::HAS_NO_VALUE,
    452                           .location = info.locationAndLength,
    453                           .dimensions = info.dimensions,
    454                         };
    455     }
    456 }
    457 
    458 StepExecutor::StepExecutor(const ExecutionBuilder* executionBuilder,
    459                            const ModelBuilder* model,
    460                            VersionedIDevice* driver, sp<IPreparedModel> preparedModel) :
    461     mExecutionBuilder(executionBuilder), mModel(model),
    462     mDriver(driver), mPreparedModel(preparedModel),
    463     mInputs(model->inputCount()), mOutputs(model->outputCount()) {}
    464 
    465 void StepExecutor::mapInputsAndOutputsTrivially() {
    466     mInputs = mExecutionBuilder->mInputs;
    467     mOutputs = mExecutionBuilder->mOutputs;
    468     mMemories = mExecutionBuilder->mMemories;
    469 }
    470 
    471 void StepExecutor::mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
    472                                     ModelArgumentInfo* executorInputOrOutput) {
    473     *executorInputOrOutput = builderInputOrOutput;
    474     switch (executorInputOrOutput->state) {
    475         default:
    476             nnAssert(!"unexpected ModelArgumentInfo::state");
    477         case ModelArgumentInfo::POINTER:
    478         case ModelArgumentInfo::UNSPECIFIED:
    479             break;
    480         case ModelArgumentInfo::MEMORY: {
    481             const uint32_t builderPoolIndex =
    482                     builderInputOrOutput.locationAndLength.poolIndex;
    483             const Memory* memory = mExecutionBuilder->mMemories[builderPoolIndex];
    484             const uint32_t executorPoolIndex = mMemories.add(memory);
    485             executorInputOrOutput->locationAndLength.poolIndex =
    486                     executorPoolIndex;
    487             break;
    488         }
    489     }
    490 }
    491 
    492 int StepExecutor::setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
    493                                                       const Memory* memory, uint32_t offset,
    494                                                       ModelArgumentInfo* inputOrOutputInfo) {
    495     // Should be similar to
    496     //     ExecutionBuilder::setInputFromMemory()
    497     //     ExecutionBuilder::setOutputFromMemory()
    498 
    499     uint32_t poolIndex = mMemories.add(memory);
    500     return inputOrOutputInfo->setFromTemporaryMemory(inputOrOutputOperand, poolIndex, offset);
    501 }
    502 
    503 static void logArguments(const char* kind, const std::vector<ModelArgumentInfo> &args) {
    504     for (unsigned i = 0; i < args.size(); i++) {
    505         const auto& arg = args[i];
    506         std::string prefix = kind + std::string("[") + std::to_string(i) + "] = ";
    507         switch (arg.state) {
    508             case ModelArgumentInfo::POINTER:
    509                 VLOG(EXECUTION) << prefix << "POINTER(" << SHOW_IF_DEBUG(arg.buffer) << ")";
    510                 break;
    511             case ModelArgumentInfo::MEMORY:
    512                 VLOG(EXECUTION) << prefix << "MEMORY("
    513                                 << "pool=" << arg.locationAndLength.poolIndex
    514                                 << ", "
    515                                 << "off=" << arg.locationAndLength.offset
    516                                 << ")";
    517                 break;
    518             case ModelArgumentInfo::HAS_NO_VALUE:
    519                 VLOG(EXECUTION) << prefix << "HAS_NO_VALUE";
    520                 break;
    521             case ModelArgumentInfo::UNSPECIFIED:
    522                 VLOG(EXECUTION) << prefix << "UNSPECIFIED";
    523                 break;
    524             default:
    525                 VLOG(EXECUTION) << prefix << "state(" << arg.state << ")";
    526                 break;
    527         }
    528     }
    529 }
    530 
    531 int StepExecutor::startCompute(sp<ExecutionCallback>* synchronizationCallback) {
    532     if (VLOG_IS_ON(EXECUTION)) {
    533         logArguments("input", mInputs);
    534         logArguments("output", mOutputs);
    535     }
    536     if (mDriver == nullptr) {
    537         return startComputeOnCpu(synchronizationCallback);
    538     } else {
    539         return startComputeOnDevice(synchronizationCallback);
    540     }
    541 }
    542 
    543 int StepExecutor::startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback) {
    544     nnAssert(mDriver != nullptr);
    545 
    546     *synchronizationCallback = nullptr;
    547 
    548     // TODO: Remove the mPreparedModel == nullptr case once we've fully integrated
    549     // ExecutionPlan with the compilation and execution phases of the NN API
    550     if (mPreparedModel == nullptr) {
    551         Model model;
    552         mModel->setHidlModel(&model);
    553 
    554         // TODO Dangerous!  In async, the model will outlive it here. Safe for now
    555         sp<PreparedModelCallback> preparedModelCallback = new PreparedModelCallback();
    556         // TODO(butlermichael): Propagate user preference to this point instead of
    557         // using default value of ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER, or
    558         // remove this entire block of code since it is a stale path that is only
    559         // encountered on an #if-removed code.
    560         ExecutionPreference preference =
    561             static_cast<ExecutionPreference>(ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER);
    562         ErrorStatus prepareLaunchStatus = mDriver->prepareModel(model, preference,
    563                                                                 preparedModelCallback);
    564         if (prepareLaunchStatus != ErrorStatus::NONE) {
    565             return convertErrorStatusToResultCode(prepareLaunchStatus);
    566         }
    567 
    568         // Immediately synchronize with callback object for now
    569         // TODO: change to asynchronous later
    570         preparedModelCallback->wait();
    571         ErrorStatus prepareReturnStatus = preparedModelCallback->getStatus();
    572         mPreparedModel = preparedModelCallback->getPreparedModel();
    573         if (prepareReturnStatus != ErrorStatus::NONE) {
    574             return convertErrorStatusToResultCode(prepareReturnStatus);
    575         }
    576         if (mPreparedModel == nullptr) {
    577             return ANEURALNETWORKS_OP_FAILED;
    578         }
    579     }
    580 
    581     // We separate the input & output pools so that we reduce the copying done if we
    582     // do an eventual remoting (hidl_memory->update()).  We could also use it to set
    583     // protection on read only memory but that's not currently done.
    584     Memory inputPointerArguments;
    585     Memory outputPointerArguments;
    586 
    587     // Layout the input and output data
    588     int n = allocatePointerArgumentsToPool(&mInputs, &inputPointerArguments);
    589     if (n != ANEURALNETWORKS_NO_ERROR) {
    590         return n;
    591     }
    592     n = allocatePointerArgumentsToPool(&mOutputs, &outputPointerArguments);
    593     if (n != ANEURALNETWORKS_NO_ERROR) {
    594         return n;
    595     }
    596 
    597     // Copy the input data that was specified via a pointer.
    598     // inputPointerArguments.update();
    599     for (auto& info : mInputs) {
    600         if (info.state == ModelArgumentInfo::POINTER) {
    601             DataLocation& loc = info.locationAndLength;
    602             uint8_t* data = nullptr;
    603             int n = inputPointerArguments.getPointer(&data);
    604             if (n != ANEURALNETWORKS_NO_ERROR) {
    605                 return n;
    606             }
    607             memcpy(data + loc.offset, info.buffer, loc.length);
    608         }
    609     }
    610     // TODO: Add inputPointerArguments.commit() and .update() at all the right places
    611 
    612     Request request;
    613     setRequestArgumentArray(mInputs, &request.inputs);
    614     setRequestArgumentArray(mOutputs, &request.outputs);
    615     uint32_t count = mMemories.size();
    616     request.pools.resize(count);
    617     for (uint32_t i = 0; i < count; i++) {
    618         request.pools[i] = mMemories[i]->getHidlMemory();
    619     }
    620 
    621     // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
    622     // object is returned when the execution has been successfully launched,
    623     // otherwise a nullptr is returned. The executionCallback is abstracted in
    624     // the NN API as an "event".
    625     //
    626     // The sp is used for ref-counting purposes. Without it, the HIDL service
    627     // could attempt to communicate with a dead callback object.
    628     //
    629     // TODO: Explain the "dead callback" problem further, either here or
    630     // in the design document.
    631     sp<ExecutionCallback> executionCallback = new ExecutionCallback();
    632 
    633     VLOG(EXECUTION) << "Before mPreparedModel->execute() " << SHOW_IF_DEBUG(toString(request));
    634     // Execute.
    635     // TODO: What happens to the Callback if the service dies abnormally
    636     // -- won't that keep the Callback live forever, because the service
    637     // never has the opportunity to bump the reference count down? Or
    638     // maybe the HIDL infrastructure handles this magically? At worst,
    639     // it seems like this is a small memory leak, if the Callback stays
    640     // alive forever.
    641     Return<ErrorStatus> executeStatus = mPreparedModel->execute(request, executionCallback);
    642     if (!executeStatus.isOk() || executeStatus != ErrorStatus::NONE) {
    643         VLOG(EXECUTION) << "**Execute failed**";
    644         return executeStatus.isOk()
    645                 ? convertErrorStatusToResultCode(executeStatus)
    646                 : ANEURALNETWORKS_OP_FAILED;
    647     }
    648 
    649     // TODO: Remove this synchronization point when the block of code below is
    650     // removed.
    651     executionCallback->wait();
    652     Return<ErrorStatus> callbackStatus = executionCallback->getStatus();
    653     if (!callbackStatus.isOk() || callbackStatus != ErrorStatus::NONE) {
    654         VLOG(EXECUTION) << "**Execute async failed**";
    655         return callbackStatus.isOk()
    656                 ? convertErrorStatusToResultCode(callbackStatus)
    657                 : ANEURALNETWORKS_OP_FAILED;
    658     }
    659 
    660     // Copy the output data from shared memory to the output buffers.
    661     // TODO: Move this block of code somewhere else. It should not be in the
    662     // startCompute function.
    663     // TODO: outputMemory->update(); outputMemory->commit()
    664     for (auto& info : mOutputs) {
    665         if (info.state == ModelArgumentInfo::POINTER) {
    666             DataLocation& loc = info.locationAndLength;
    667             uint8_t* data = nullptr;
    668             int n = outputPointerArguments.getPointer(&data);
    669             if (n != ANEURALNETWORKS_NO_ERROR) {
    670                 return n;
    671             }
    672             memcpy(info.buffer, data + loc.offset, loc.length);
    673         }
    674     }
    675     VLOG(EXECUTION) << "StepExecutor::startComputeOnDevice completed";
    676 
    677     *synchronizationCallback = executionCallback;
    678     return ANEURALNETWORKS_NO_ERROR;
    679 }
    680 
    681 static void asyncStartComputeOnCpu(const Model& model, const Request& request,
    682                                    const std::vector<RunTimePoolInfo>& modelPoolInfos,
    683                                    const std::vector<RunTimePoolInfo>& requestPoolInfos,
    684                                    const sp<IExecutionCallback>& executionCallback) {
    685     CpuExecutor executor;
    686     int err = executor.run(model, request, modelPoolInfos, requestPoolInfos);
    687     executionCallback->notify(convertResultCodeToErrorStatus(err));
    688 }
    689 
    690 int StepExecutor::startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback) {
    691     // TODO: use a thread pool
    692 
    693     Model model;
    694     mModel->setHidlModel(&model);
    695 
    696     // Prepare the callback for asynchronous execution. sp<ExecutionCallback>
    697     // object is returned when the execution has been successfully launched,
    698     // otherwise a nullptr is returned. The executionCallback is abstracted in
    699     // the NN API as an "event".
    700     sp<ExecutionCallback> executionCallback = new ExecutionCallback();
    701     *synchronizationCallback = nullptr;
    702 
    703     std::vector<RunTimePoolInfo> modelPoolInfos;
    704     if (!setRunTimePoolInfosFromHidlMemories(&modelPoolInfos, model.pools)) {
    705         return ANEURALNETWORKS_UNMAPPABLE;
    706     }
    707 
    708     std::vector<RunTimePoolInfo> requestPoolInfos;
    709     requestPoolInfos.reserve(mMemories.size());
    710     bool fail = false;
    711     for (const Memory* mem : mMemories) {
    712         requestPoolInfos.emplace_back(mem->getHidlMemory(), &fail);
    713     }
    714     if (fail) {
    715         return ANEURALNETWORKS_UNMAPPABLE;
    716     }
    717     // Create as many pools as there are input / output.
    718     auto fixPointerArguments = [&requestPoolInfos](std::vector<ModelArgumentInfo>& argumentInfos) {
    719         for (ModelArgumentInfo& argumentInfo : argumentInfos) {
    720             if (argumentInfo.state == ModelArgumentInfo::POINTER) {
    721                 argumentInfo.locationAndLength.poolIndex =
    722                             static_cast<uint32_t>(requestPoolInfos.size());
    723                 argumentInfo.locationAndLength.offset = 0;
    724                 requestPoolInfos.emplace_back(static_cast<uint8_t*>(argumentInfo.buffer));
    725             }
    726         }
    727     };
    728     fixPointerArguments(mInputs);
    729     fixPointerArguments(mOutputs);
    730 
    731     Request request;
    732     setRequestArgumentArray(mInputs, &request.inputs);
    733     setRequestArgumentArray(mOutputs, &request.outputs);
    734 
    735     // TODO: should model be moved with a std::cref?
    736     std::thread thread(asyncStartComputeOnCpu, model, std::move(request),
    737                        std::move(modelPoolInfos), std::move(requestPoolInfos),
    738                        executionCallback);
    739     executionCallback->bind_thread(std::move(thread));
    740 
    741     *synchronizationCallback = executionCallback;
    742     return ANEURALNETWORKS_NO_ERROR;
    743 }
    744 
    745 }  // namespace nn
    746 }  // namespace android
    747