Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
     18 #define ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
     19 
     20 #include "Callbacks.h"
     21 #include "HalInterfaces.h"
     22 #include "Memory.h"
     23 #include "ModelBuilder.h"
     24 #include "NeuralNetworks.h"
     25 #include "VersionedInterfaces.h"
     26 
     27 #include <atomic>
     28 #include <unordered_map>
     29 #include <vector>
     30 
     31 using ::android::hardware::neuralnetworks::V1_2::implementation::ExecutionCallback;
     32 using ::android::hardware::neuralnetworks::V1_2::implementation::PreparedModelCallback;
     33 
     34 namespace android {
     35 namespace nn {
     36 
     37 class BurstBuilder;
     38 class CompilationBuilder;
     39 class ExecutionPlan;
     40 class ExecutionBurstController;
     41 class ExecutionStep;
     42 class Memory;
     43 class ModelBuilder;
     44 class StepExecutor;
     45 class Device;
     46 
     47 // TODO move length out of DataLocation
     48 struct ModelArgumentInfo {
     49     // Whether the argument was specified as being in a Memory, as a pointer,
     50     // has no value, or has not been specified.
     51     // If POINTER then:
     52     //   locationAndLength.length is valid.
     53     //   dimensions is valid.
     54     //   buffer is valid
     55     // If MEMORY then:
     56     //   locationAndLength.{poolIndex, offset, length} is valid.
     57     //   dimensions is valid.
     58     enum { POINTER, MEMORY, HAS_NO_VALUE, UNSPECIFIED } state = UNSPECIFIED;
     59     DataLocation locationAndLength;
     60     std::vector<uint32_t> dimensions;
     61     void* buffer;
     62     bool isSufficient = true;
     63 
     64     int setFromPointer(const Operand& operand, const ANeuralNetworksOperandType* type, void* buffer,
     65                        uint32_t length);
     66     int setFromMemory(const Operand& operand, const ANeuralNetworksOperandType* type,
     67                       uint32_t poolIndex, uint32_t offset, uint32_t length);
     68     int setFromTemporaryMemory(const Operand& operand, uint32_t poolIndex, uint32_t offset,
     69                                uint32_t length);
     70     int updateDimensionInfo(const Operand& operand, const ANeuralNetworksOperandType* newType);
     71 };
     72 
     73 class ExecutionBuilder {
     74     friend class StepExecutor;
     75 public:
     76     ExecutionBuilder(const CompilationBuilder* compilation);
     77 
     78     int setInput(uint32_t index, const ANeuralNetworksOperandType* type, const void* buffer,
     79                  size_t length);
     80     int setInputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
     81                            const Memory* memory, size_t offset, size_t length);
     82     int setOutput(uint32_t index, const ANeuralNetworksOperandType* type, void* buffer,
     83                   size_t length);
     84     int setOutputFromMemory(uint32_t index, const ANeuralNetworksOperandType* type,
     85                             const Memory* memory, size_t offset, size_t length);
     86 
     87     int setMeasureTiming(bool measure);
     88 
     89     int getDuration(int32_t durationCode, uint64_t* duration) const;
     90 
     91     int computeAsynchronously(sp<ExecutionCallback>* synchronizationCallback) {
     92         CHECK(synchronizationCallback != nullptr);
     93         return compute(synchronizationCallback);
     94     }
     95     int computeSynchronously() { return compute(nullptr); }
     96     int burstCompute(BurstBuilder* burst) { return compute(nullptr, burst); }
     97 
     98     // Initialize output dimensional information from ModelArgumentInfo.
     99     void initializeOutputShapes(std::vector<OutputShape>* outputShapes) const;
    100 
    101     int getOutputOperandDimensions(uint32_t index, uint32_t* dimensions);
    102     int getOutputOperandRank(uint32_t index, uint32_t* rank);
    103 
    104     // Handshake with lower-level execution support
    105     bool measureTiming() const { return mMeasureTiming; }
    106     void reportTiming(Timing timing) { mTiming = timing; }
    107 
    108     const CompilationBuilder* getCompilation() const { return mCompilation; }
    109     const ModelBuilder* getModel() const { return mModel; }
    110 
    111     ErrorStatus finish(ErrorStatus error, const std::vector<OutputShape>& outputShapes);
    112 
    113    private:
    114     // If a callback is provided, then this is asynchronous. If a callback is
    115     // not provided (i.e., is nullptr), then this is synchronous.
    116     //
    117     // If burst is provided, then the burst path will be used. If a burst is not
    118     // provided (i.e., is nullptr), then a synchronous execution will occur.
    119     //
    120     // Providing both synchronizationCallback and burstBuilder is an error.
    121     int compute(sp<ExecutionCallback>* synchronizationCallback,
    122                 BurstBuilder* burstBuilder = nullptr);
    123 
    124     const CompilationBuilder* mCompilation;
    125 
    126     // Update output dimensional information from OutputShape to ModelArgumentInfo.
    127     bool updateOutputShapes(const std::vector<OutputShape>& outputShapes);
    128 
    129     const ModelBuilder* mModel;
    130     const ExecutionPlan* mPlan;
    131 
    132     // This is a DeviceManager::kPartitioning* value captured from
    133     // CompilationBuilder when the ExecutionBuilder is constructed.
    134     uint32_t mPartitioning;
    135 
    136     // The information we'll send to the driver about the inputs and outputs.
    137     // Note that we build this in two steps:
    138     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
    139     //    If set from a pointer, don't set the location in the RequestArgument but store it
    140     //    instead in mInputBuffers or mOutputBuffers.
    141     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
    142     //    the m*Buffers entries.  Copy the input values into the shared memory.
    143     // We do this to avoid creating a lot of shared memory objects if we have a lot of
    144     // parameters specified via pointers.  We also avoid copying in the case where
    145     // some of the nodes will interpreted on the CPU anyway.
    146     std::vector<ModelArgumentInfo> mInputs;
    147     std::vector<ModelArgumentInfo> mOutputs;
    148     MemoryTracker mMemories;
    149 
    150     // Do we ask the driver to measure timing?
    151     bool mMeasureTiming = false;
    152 
    153     // Timing reported from the driver
    154     Timing mTiming = {};
    155 
    156     // Properties cannot be set once the execution has started.
    157     std::atomic_bool mStarted = false;
    158 
    159     // Timing and output shapes can only be queried after the execution is
    160     // finished.
    161     std::atomic_bool mFinished = false;
    162 };
    163 
    164 // class StepExecutor is used to execute a single "step" in a
    165 // potentially multiple step execution process.  The graph associated
    166 // with that step is executed in its entirety on a single device (or
    167 // on the CPU).
    168 class StepExecutor {
    169    public:
    170     // executionBuilder
    171     //     Describes the full (possibly multiple-"step") execution.
    172     // model
    173     //     The model to be executed by the executor.  Possibly a
    174     //     submodel of the model from executionBuilder.
    175     // driver, preparedModel
    176     //     The device on which to execute the "step", and the prepared
    177     //     model to execute on that device.  (Both are nullptr in the
    178     //     case of CPU.)
    179     StepExecutor(ExecutionBuilder* executionBuilder, const ModelBuilder* model,
    180                  std::shared_ptr<Device> device,
    181                  std::shared_ptr<VersionedIPreparedModel> preparedModel);
    182 
    183     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
    184     // in the case where we have a single-"step" execution (i.e., the executor
    185     // is executing the entire model from the ExecutionBuilder).
    186     void mapInputsAndOutputsTrivially();
    187 
    188     // Update output shapes returned from ExecutionCallback to ExecutionBuilder.
    189     bool updateOutputShapes(const std::vector<OutputShape>& from, std::vector<OutputShape>* to);
    190 
    191     // Map inputs and outputs from ExecutionBuilder to StepExecutor,
    192     // one at a time.  Note that these are input/output indexes, not
    193     // operand indexes.
    194     void mapInput(uint32_t builderIndex, uint32_t executorIndex) {
    195         mapInputOrOutput(mExecutionBuilder->mInputs[builderIndex], &mInputs[executorIndex]);
    196     }
    197     void mapOutput(uint32_t builderIndex, uint32_t executorIndex) {
    198         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex], &mOutputs[executorIndex]);
    199     }
    200     void mapOutputToInput(uint32_t builderIndex, uint32_t executorIndex) {
    201         mapInputOrOutput(mExecutionBuilder->mOutputs[builderIndex],
    202                          &mInputs[executorIndex]);
    203     }
    204 
    205     // The input or output is assumed to have the size of the
    206     // corresponding operand.
    207     int setInputFromTemporaryMemory(uint32_t inputIndex, const Memory* memory, uint32_t offset) {
    208         return setInputOrOutputFromTemporaryMemory(mModel->getInputOperand(inputIndex),
    209                                                    memory, offset,
    210                                                    &mInputs.at(inputIndex));
    211     }
    212     int setOutputFromTemporaryMemory(uint32_t outputIndex, const Memory* memory, uint32_t offset) {
    213         return setInputOrOutputFromTemporaryMemory(mModel->getOutputOperand(outputIndex),
    214                                                    memory, offset,
    215                                                    &mOutputs.at(outputIndex));
    216     }
    217 
    218     // Executes using the (driver, preparedModel) specified at construction time.
    219     int startCompute(sp<ExecutionCallback>* synchronizationCallback,
    220                      const std::shared_ptr<ExecutionBurstController>& burstController = nullptr);
    221 
    222     // Executes using the CPU, regardless of the (driver,
    223     // preparedModel) specified at construction time.
    224     int startComputeOnCpu(sp<ExecutionCallback>* synchronizationCallback);
    225 
    226     bool isCpu() const;
    227 
    228     // ExecutionStep has the index mapping between ExecutionBuilder and StepExecutor.
    229     void setExecutionStep(const std::shared_ptr<const ExecutionStep>& step) {
    230         mExecutionStep = step;
    231     }
    232 
    233    private:
    234     int allocatePointerArgumentsToPool(std::vector<ModelArgumentInfo>* args, Memory* memory);
    235     int startComputeOnDevice(sp<ExecutionCallback>* synchronizationCallback,
    236                              const std::shared_ptr<ExecutionBurstController>& burstController);
    237 
    238     void mapInputOrOutput(const ModelArgumentInfo& builderInputOrOutput,
    239                           ModelArgumentInfo* executorInputOrOutput);
    240 
    241     int setInputOrOutputFromTemporaryMemory(const Operand& inputOrOutputOperand,
    242                                             const Memory* memory, uint32_t offset,
    243                                             ModelArgumentInfo* inputOrOutputInfo);
    244 
    245     // describes the full (possibly multiple-"step") execution
    246     ExecutionBuilder* mExecutionBuilder;
    247 
    248     // describes the single execution step
    249     std::shared_ptr<const ExecutionStep> mExecutionStep = nullptr;
    250 
    251     // model to be executed on the executor, in both original and
    252     // compiled forms; and device on which to execute it
    253     const ModelBuilder* mModel;
    254     std::shared_ptr<Device> mDevice;
    255     std::shared_ptr<VersionedIPreparedModel>
    256             mPreparedModel;  // nullptr if CPU execution or if bypassing ExecutionPlan
    257 
    258     // The information we'll send to the driver about the inputs and outputs.
    259     // Note that we build this in two steps:
    260     // 1. As the arguments are specified, set the corresponding mInputs or mOutputs element.
    261     //    If set from a pointer, don't set the location in the RequestArgument but store it
    262     //    instead in mInputBuffers or mOutputBuffers.
    263     // 2. Once we have all the inputs and outputs, if needed, allocate shared memory for
    264     //    the m*Buffers entries.  Copy the input values into the shared memory.
    265     // We do this to avoid creating a lot of shared memory objects if we have a lot of
    266     // parameters specified via pointers.  We also avoid copying in the case where
    267     // some of the nodes will interpreted on the CPU anyway.
    268     std::vector<ModelArgumentInfo> mInputs;
    269     std::vector<ModelArgumentInfo> mOutputs;
    270     MemoryTracker mMemories;
    271 };
    272 
    273 } // namespace nn
    274 } // namespace android
    275 
    276 #endif // ANDROID_ML_NN_RUNTIME_EXECUTION_BUILDER_H
    277