Home | History | Annotate | Download | only in runtime
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // Classes used to plan how to execute a model across multiple devices.
     18 
     19 #ifndef ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
     20 #define ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
     21 
     22 #include "HalInterfaces.h"
     23 #include "Memory.h"
     24 #include "ModelBuilder.h"
     25 #include "NeuralNetworks.h"
     26 #include "TokenHasher.h"
     27 #include "Utils.h"
     28 #include "VersionedInterfaces.h"
     29 
     30 #include <openssl/sha.h>
     31 
     32 #include <set>
     33 #include <string>
     34 
     35 namespace android {
     36 namespace nn {
     37 
     38 class BurstBuilder;
     39 class CompilationBuilder;
     40 class Device;
     41 class ExecutionBuilder;
     42 class ExecutionPlan;
     43 class ExecutionBurstController;
     44 class Memory;
     45 class StepExecutor;
     46 
     47 class ExecutionStep {
     48 public:
     49     typedef std::vector<std::pair<uint32_t, uint32_t>> RemapVectorType;
     50     typedef std::set<std::pair<uint32_t, uint32_t>> SubModelOutputSetType;
     51 
     52     enum OperandKind { INPUT, OUTPUT };
     53 
     54     ExecutionStep(ExecutionPlan* plan, uint32_t stepIndex, std::shared_ptr<Device> device);
     55     int addOperation(int operationIndex, const ModelBuilder& fromModel);
     56     int addOperand(uint32_t fromOperandIndex, uint32_t* toOperandIndex,
     57                    const ModelBuilder& fromModel, OperandKind kind);
     58 
     59     // Each container entry is of the form (fromModel index, subModel index)
     60     const RemapVectorType& getModelInputs() const {
     61         return mModelInputs;
     62     }
     63     const RemapVectorType& getModelOutputs() const {
     64         return mModelOutputs;
     65     }
     66     const RemapVectorType& getTempsAsSubModelInputs() const {
     67         return mTempsAsSubModelInputs;
     68     }
     69     const SubModelOutputSetType& getTempsAsSubModelOutputs() const {
     70         return mTempsAsSubModelOutputs;
     71     }
     72     const RemapVectorType& getOutputsAsSubModelInputs() const {
     73         return mOutputsAsSubModelInputs;
     74     }
     75     const std::vector<uint32_t>& getOutputIndexSubModelToFromModel() const {
     76         return mOutputIndexSubModelToFromModel;
     77     }
     78     const std::vector<uint32_t>& getOutputsAsSubModelInputsIndexToFromModel() const {
     79         return mOutputsAsSubModelInputsIndexToFromModel;
     80     }
     81 
     82     void recordTempAsSubModelOutput(uint32_t fromModelIndex) {
     83         const auto it = mOperandMap.find(fromModelIndex);
     84         nnAssert(it != mOperandMap.end());
     85         mTempsAsSubModelOutputs.insert(std::make_pair(fromModelIndex, it->second));
     86     }
     87 
     88     // If this step has a submodel output of unknown size, sets
     89     // *hasOutputOfUnknownSize to true; otherwise, leaves it
     90     // unchanged.
     91     int finishSubModel(const ModelBuilder* fromModel, bool* hasOutputOfUnknownSize,
     92                        int32_t executionPreference);
     93 
     94     const ModelBuilder* getSubModel() const { return &mSubModel; }
     95     std::shared_ptr<Device> getDevice() const { return mDevice; }
     96 
     97     // only available after calling finishSubModel()
     98     std::shared_ptr<VersionedIPreparedModel> getPreparedSubModel() const {
     99         return mPreparedSubModel;
    100     }
    101 
    102     // Map inputs and outputs from ExecutionBuilder to StepExecutor.
    103     void mapInputsAndOutputs(std::shared_ptr<StepExecutor> stepExecutor) const;
    104 
    105     void dump() const;
    106 
    107     // For test only, get the transformed cache token.
    108     const uint8_t* forTest_getCacheToken() const { return mToken.getCacheToken(); }
    109 
    110    private:
    111     void logSubModel() const;
    112 
    113     // TODO: Some of the data is working state information that
    114     // shouldn't be needed after we've constructed but not executed
    115     // the step.
    116 
    117     ExecutionPlan* mPlan;
    118     uint32_t mIndex;  // index of step within plan
    119     ModelBuilder mSubModel;
    120     std::shared_ptr<Device> mDevice;
    121     std::shared_ptr<VersionedIPreparedModel> mPreparedSubModel;  // not used for CPU
    122 
    123     // Inputs of original model that are also inputs of this submodel:
    124     //     (fromModel index, subModel index)
    125     RemapVectorType mModelInputs;
    126     // Outputs of original model that are also outputs of this submodel:
    127     //     (fromModel index, subModel index)
    128     RemapVectorType mModelOutputs;
    129     // Temporaries of original model that are inputs of this submodel:
    130     //     (fromModel index, subModel index)
    131     RemapVectorType mTempsAsSubModelInputs;
    132     // Temporaries of original model that are outputs of this submodel:
    133     //     (fromModel index, subModel index)
    134     SubModelOutputSetType mTempsAsSubModelOutputs;
    135     // Outputs of original model that are inputs of this submodel:
    136     //     (fromModel index, subModel index)
    137     RemapVectorType mOutputsAsSubModelInputs;
    138     // Converts operand indexes from the main model to the submodel.
    139     std::unordered_map<uint32_t, uint32_t> mOperandMap;
    140     // Converts input indexes from the submodel to the main model
    141     // (these are input indexes, not operand indexes).  This vector
    142     // only describes inputs of the submodel that are also inputs of
    143     // the main model -- that is, mModelInputs but not mTempsAsSubModelInputs.
    144     std::vector<uint32_t> mInputIndexSubModelToFromModel;
    145     // Converts output indexes from the submodel to the main model
    146     // (these are output indexes, not operand indexes).  This vector
    147     // only describes outputs of the submodel that are also outputs of
    148     // the main model -- that is, mModelOutputs but not mTempsAsSubModelOutputs.
    149     std::vector<uint32_t> mOutputIndexSubModelToFromModel;
    150     // Converts indexes into mOutputsAsSubModelInputs to indexes into
    151     // main model outputs (these are input and output indexes, not
    152     // operand indexes).  To be specific, if the main model outputs
    153     // are mainModelOutputs,
    154     //
    155     //     mOutputsAsSubModelInputsIndexToFromModel.size() ==
    156     //     mOutputsAsSubModelInputs.size()
    157     //
    158     // and when (0 <= i < mOutputsAsSubModelInputs.size()),
    159     //
    160     //     mainModelOutputs[mOutputsAsSubModelInputsIndexToFromModel[i]] ==
    161     //     mOutputsAsSubModelInputs[i].first
    162     std::vector<uint32_t> mOutputsAsSubModelInputsIndexToFromModel;
    163 
    164     // The compilation caching token.
    165     TokenHasher mToken;
    166 };
    167 
    168 class ExecutionPlan {
    169 public:
    170     ExecutionPlan(const ExecutionPlan&) = delete;
    171     ExecutionPlan& operator=(const ExecutionPlan&) = delete;
    172 
    173     ExecutionPlan() { }
    174     ~ExecutionPlan() { delete mBody; }
    175 
    176     // Controller is part of the interface to a mechanism for
    177     // performing an execution in N steps.
    178     //
    179     // Usage pattern:
    180     // - Instantiate Controller with ExecutionPlan::makeController().
    181     // - Call ExecutionPlan::next() on Controller N+1 times.  The first N times,
    182     //   *executor is set to point to a new StepExecutor corresponding
    183     //   to that step.  The N+1st time, *executor is set to nullptr,
    184     //   signifying there are no more steps.
    185     // - If ExecutionPlan::next() returns anything other than ANEURALNETWORKS_NO_ERROR,
    186     //   a problem has occurred.
    187     class Controller {
    188         friend class ExecutionPlan;
    189     private:
    190         Controller(const Controller&) = delete;
    191         Controller& operator=(const Controller&) = delete;
    192 
    193         // Map from the operand index of a TEMPORARY in the original
    194         // model to an offset into mTemporaries used to represent that
    195         // TEMPORARY as an inter-partition input or output.
    196         typedef std::map<uint32_t, uint32_t> SubModelInputsAndOutputsType;
    197 
    198         static const size_t kBadStepIndex = ~size_t(0);
    199 
    200         Controller(const ExecutionPlan* plan, ExecutionBuilder* executionBuilder,
    201                    const BurstBuilder* burstBuilder,
    202                    std::shared_ptr<const SubModelInputsAndOutputsType> subModelInputsAndOutputs,
    203                    uint32_t totalSizeOfTemporaries);
    204 
    205         const ExecutionPlan* mPlan;
    206         ExecutionBuilder* mExecutionBuilder;
    207         const BurstBuilder* mBurstBuilder;
    208         std::shared_ptr<const SubModelInputsAndOutputsType> mSubModelInputsAndOutputs;  // may be nullptr
    209         Memory mTemporaries;
    210         size_t mNextStepIndex;
    211     };
    212 
    213     std::vector<std::shared_ptr<ExecutionBurstController>> makeBursts() const;
    214 
    215     std::shared_ptr<Controller> makeController(ExecutionBuilder* executionBuilder,
    216                                                const BurstBuilder* burstBuilder) const;
    217 
    218     int next(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor,
    219              std::shared_ptr<ExecutionBurstController>* burstController = nullptr) const;
    220 
    221     // Create the same executor as the last one created by next().
    222     int fallback(std::shared_ptr<Controller> controller, std::shared_ptr<StepExecutor>* executor) const;
    223 
    224     std::shared_ptr<ExecutionStep> createNewStep(const std::shared_ptr<Device> device);
    225 
    226     void becomeSingleStep(const std::shared_ptr<Device> device, const ModelBuilder* model);
    227 
    228     int finish(const ModelBuilder* fromModel, int32_t executionPreference);
    229 
    230     void recordTemporaryDef(uint32_t fromModelIndex, uint32_t stepIndex) {
    231         auto& temporaryToDefiningStep = compound()->mTemporaryToDefiningStep;
    232         nnAssert(temporaryToDefiningStep.count(fromModelIndex) == 0);
    233         temporaryToDefiningStep.insert(std::make_pair(fromModelIndex, stepIndex));
    234     }
    235 
    236     void dump() const;
    237 
    238     void reset();
    239 
    240     bool isValid() const { return mState != EMPTY && mBody != nullptr && mBody->mSuccessfulFinish; }
    241 
    242     void setCaching(const std::string* cacheDir, const uint8_t* token) {
    243         mCacheDir = cacheDir;
    244         mToken = token;
    245     }
    246     const std::string* getCacheDir() const { return mCacheDir; }
    247     const uint8_t* getCacheToken() const { return mToken; }
    248 
    249     // These functions are solely intended for use by unit tests of
    250     // the partitioning algorithm.
    251     enum class Kind { ERROR, EMPTY, SIMPLE, COMPOUND };
    252     Kind forTest_getKind() const;
    253     std::shared_ptr<const Device> forTest_simpleGetDevice() const;
    254     const std::vector<std::shared_ptr<ExecutionStep>>& forTest_compoundGetSteps() const;
    255     bool forTest_hasSubModelOutputsOfUnknownSize() const;
    256     const uint8_t* forTest_simpleGetCacheToken() const;
    257 
    258    private:
    259     void findTempsAsSubModelOutputs();
    260 
    261     struct Body {
    262         virtual ~Body() {}
    263         virtual void dump() const = 0;
    264         virtual int finish(const ModelBuilder* fromModel, int32_t executionPreference) = 0;
    265         virtual bool hasSubModelOutputsOfUnknownSize() const = 0;
    266         bool mSuccessfulFinish = false;
    267     };
    268 
    269     struct SimpleBody : Body {
    270         SimpleBody(std::shared_ptr<Device> device, const ModelBuilder* model,
    271                    const std::string* cacheDir, const uint8_t* token)
    272             : mDevice(device), mModel(model), mCacheDir(cacheDir), mToken(token) {}
    273 
    274         void dump() const override;
    275         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
    276         virtual bool hasSubModelOutputsOfUnknownSize() const override { return false; }
    277 
    278         std::shared_ptr<Device> mDevice;
    279         const ModelBuilder* mModel;
    280         std::shared_ptr<VersionedIPreparedModel> mPreparedModel;  // not used for CPU
    281 
    282         const std::string* mCacheDir;
    283         TokenHasher mToken;
    284     };
    285 
    286     struct CompoundBody : Body {
    287         void dump() const override;
    288         int finish(const ModelBuilder* fromModel, int32_t executionPreference) override;
    289         virtual bool hasSubModelOutputsOfUnknownSize() const override {
    290             return mHasSubModelOutputOfUnknownSize;
    291         }
    292 
    293         // TODO: Some of the data is working state information that
    294         // shouldn't be needed after we've constructed but not
    295         // executed the plan.
    296 
    297         std::vector<std::shared_ptr<ExecutionStep>> mSteps;
    298 
    299         // Map from original operand index to defining step index.
    300         // Used for all (and only) TEMPORARY_VARIABLEs.
    301         std::unordered_map<uint32_t, uint32_t> mTemporaryToDefiningStep;
    302 
    303         bool mHasSubModelOutputOfUnknownSize = false;
    304     private:
    305         void findTempsAsSubModelOutputs();
    306     };
    307 
    308     enum { EMPTY, SIMPLE, COMPOUND } mState = EMPTY;
    309     Body* mBody = nullptr;
    310     CompoundBody* compound() {
    311         nnAssert(mState == COMPOUND);
    312         return static_cast<CompoundBody*>(mBody);
    313     }
    314     const CompoundBody* compound() const {
    315         nnAssert(mState == COMPOUND);
    316         return static_cast<const CompoundBody*>(mBody);
    317     }
    318 
    319     // Pointers to compilation caching information in CompilationBuilder.
    320     const std::string* mCacheDir = nullptr;
    321     const uint8_t* mToken = nullptr;
    322 };
    323 
    324 }  // namespace nn
    325 }  // namespace android
    326 
    327 #endif  // ANDROID_ML_NN_RUNTIME_EXECUTION_PLAN_H
    328