Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
     18 #define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
     19 
     20 #include "Callbacks.h"
     21 #include "HalInterfaces.h"
     22 #include "Memory.h"
     23 #include "ModelBuilder.h"
     24 #include "NeuralNetworks.h"
     25 
     26 #include <unordered_map>
     27 #include <vector>
     28 
     29 using ::android::hardware::neuralnetworks::V1_0::implementation::ExecutionCallback;
     30 using ::android::hardware::neuralnetworks::V1_0::implementation::PreparedModelCallback;
     31 
     32 namespace android {
     33 namespace nn {
     34 
     35 class CompilationBuilder;
     36 class ExecutionPlan;
     37 class Memory;
     38 class ModelBuilder;
     39 class StepExecutor;
     40 class VersionedIDevice;
     41 
     42 // TODO move length out of DataLocation
     43 struct ModelArgumentInfo {
     44     // Whether the argument was specified as being in a Memory, as a pointer,
     45     // has no value, or has not been specified.
     46     // If POINTER then:
     47     //   locationAndLength.length is valid.
     48     //   dimensions is valid.
     49     //   buffer is valid
     50     // If MEMORY then:
     51     //   locationAndLength.{poolIndex, offset, length} is valid.
     52     //   dimensions is valid.
     53     enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED;
     54     DataLocation locationAndLength;
     55     std::vector<uint32_t> dimensions;
     56     void* buffer;
     57 
     58     int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
     59                        uint32_t length);
     60     int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
     61                       uint32_t poolIndex, uint32_t offset, uint32_t length);
     62     int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset);
     63     int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType);
     64 };
     65 
     66 class ExecutionBuilder {
     67     friend class StepExecutor;
     68 public:
     69     ExecutionBuilder(const CompilationBuilder* compilation);
     70 
     71     int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
     72                  size_t length);
     73     int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
     74                            const Memory* memory, size_t offset, size_t length);
     75     int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
     76                   size_t length);
     77     int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
     78                             const Memory* memory, size_t offset, size_t length);
     79     int startCompute(sp<ExecutionCallback>* synchronizationCallback);
     80 
     81     const ModelBuilder* getModel() const { return mModel; }
     82 
     83 private:
     84     const ModelBuilder* mModel;
     85     const ExecutionPlan* mPlan;
     86 
     87     // This is a DeviceManager::kPartitioning* value captured from
     88     // CompilationBuilder when the ExecutionBuilder is constructed.
     89     uint32_t mPartitioning;
     90 
     91     // The information we'll send to the driver about the inputs and outputs.
     92     // Note that we build this in two steps:
     93     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
     94     //    If set from a pointer, don't set the location in the RequestArgument but store it
     95     //    instead in mInputBuffers or mOutputBuffers.
     96     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
     97     //    the m*Buffers entries.  Copy the input values into the shared memory.
     98     // We do this to avoid creating a lot of shared memory objects if we have a lot of
     99     // parameters specified via pointers.  We also avoid copying in the case where
    100     // some of the nodes will interpreted on the CPU anyway.
    101     std::vector<ModelArgumentInfo> mInputs;
    102     std::vector<ModelArgumentInfo> mOutputs;
    103     MemoryTracker mMemories;
    104 };
    105 
    106 // class StepExecutor is used to execute a single "step" in a
    107 // potentially multiple step execution process.  The graph associated
    108 // with that step is executed in its entirety on a single device (or
    109 // on the CPU).
    110 class StepExecutor {
    111 public:
    112     // executionBuilder
    113     //     Describes the full (possibly multiple-"step") execution.
    114     // model
    115     //     The model to be executed by the executor.  Possibly a
    116     //     submodel of the model from executionBuilder.
    117     // driver, preparedModel
    118     //     The device on which to execute the "step", and the prepared
    119     //     model to execute on that device.  (Both are nullptr in the
    120     //     case of CPU.)
    121     StepExecutor(const ExecutionBuilder* executionBuilder,
    122                  const ModelBuilder* model,
    123                  VersionedIDevice* driver, sp<IPreparedModel> preparedModel);
    124 
    125     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
    126     // in the case where we have a single-"step" execution (i.e., the executor
    127     // is executing the entire model from the ExecutionBuilder).
    128     void mapInputsAndOutputsTrivially();
    129 
    130     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
    131     // one at a time.  Note that these are input/output indexes, not
    132     // operand indexes.
    133     void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
    134         mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex],
    135                          &mInputs[executorIndex]);
    136     }
    137     void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
    138         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
    139                          &mOutputs[executorIndex]);
    140     }
    141     void mapOutputToInput(uint32_t builderIndex, uint32_t executorIndex) {
    142         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
    143                          &mInputs[executorIndex]);
    144     }
    145 
    146     // The input or output is assumed to have the size of the
    147     // corresponding operand.
    148     int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) {
    149         return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex),
    150                                                    memory, offset,
    151                                                    &mInputs.at(inputIndex));
    152     }
    153     int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) {
    154         return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex),
    155                                                    memory, offset,
    156                                                    &mOutputs.at(outputIndex));
    157     }
    158 
    159     // Executes using the (driver, preparedModel) specified at construction time.
    160     int startCompute(sp<ExecutionCallback>* synchronizationCallback);
    161 
    162     // Executes using the CPU, regardless of the (driver,
    163     // preparedModel) specified at construction time.
    164     int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
    165 
    166     bool isCpu() const { return mDriver == nullptr; }
    167 
    168 private:
    169     int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
    170     int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback);
    171 
    172     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
    173                           ModelArgumentInfo* executorInputOrOutput);
    174 
    175     int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
    176                                             const Memory* memory, uint32_t offset,
    177                                             ModelArgumentInfo* inputOrOutputInfo);
    178 
    179     // describes the full (possibly multiple-"step") execution
    180     const ExecutionBuilder* mExecutionBuilder;
    181 
    182     // model to be executed on the executor, in both original and
    183     // compiled forms; and device on which to execute it
    184     const ModelBuilder* mModel;
    185     VersionedIDevice* mDriver;          // nullptr if CPU execution
    186     sp<IPreparedModel> mPreparedModel;  // nullptr if CPU execution or if bypassing ExecutionPlan
    187 
    188     // The information we'll send to the driver about the inputs and outputs.
    189     // Note that we build this in two steps:
    190     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
    191     //    If set from a pointer, don't set the location in the RequestArgument but store it
    192     //    instead in mInputBuffers or mOutputBuffers.
    193     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
    194     //    the m*Buffers entries.  Copy the input values into the shared memory.
    195     // We do this to avoid creating a lot of shared memory objects if we have a lot of
    196     // parameters specified via pointers.  We also avoid copying in the case where
    197     // some of the nodes will interpreted on the CPU anyway.
    198     std::vector<ModelArgumentInfo> mInputs;
    199     std::vector<ModelArgumentInfo> mOutputs;
    200     MemoryTracker mMemories;
    201 };
    202 
    203 } // namespace nn
    204 } // namespace android
    205 
    206 #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
    207