Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
     18 #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
     19 
     20 #include "HalInterfaces.h"
     21 #include "OperationsUtils.h"
     22 #include "Utils.h"
     23 
     24 #include <algorithm>
     25 #include <android-base/macros.h>
     26 #include <vector>
     27 
     28 namespace android {
     29 namespace nn {
     30 
     31 // Information we maintain about each operand during execution that
     32 // may change during execution.
     33 struct RunTimeOperandInfo {
     34     // TODO Storing the type here is redundant, as it won't change during execution.
     35     OperandType type;
     36     // The type and dimensions of the operand.  The dimensions can
     37     // change at runtime.  We include the type because it's useful
     38     // to pass together with the dimension to the functions implementing
     39     // the operators.
     40     std::vector<uint32_t> dimensions;
     41 
     42     float scale;
     43     int32_t zeroPoint;
     44     // Where the operand's data is stored.  Check the corresponding
     45     // location information in the model to figure out if this points
     46     // to memory we have allocated for an temporary operand.
     47     uint8_t* buffer;
     48     // The length of the buffer.
     49     uint32_t length;
     50     // Whether this is a temporary variable, a model input, a constant, etc.
     51     OperandLifeTime lifetime;
     52     // Keeps track of how many operations have yet to make use
     53     // of this temporary variable.  When the count is decremented to 0,
     54     // we free the buffer.  For non-temporary variables, this count is
     55     // always 0.
     56     uint32_t numberOfUsesLeft;
     57 
     58     Shape shape() const {
     59         return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint};
     60     }
     61 };
     62 
     63 // Used to keep a pointer to each of the memory pools.
     64 //
     65 // In the case of an "mmap_fd" pool, owns the mmap region
     66 // returned by getBuffer() -- i.e., that region goes away
     67 // when the RunTimePoolInfo is destroyed or is assigned to.
     68 class RunTimePoolInfo {
     69 public:
     70     // If "fail" is not nullptr, and construction fails, then set *fail = true.
     71     // If construction succeeds, leave *fail unchanged.
     72     // getBuffer() == nullptr IFF construction fails.
     73     explicit RunTimePoolInfo(const hidl_memory& hidlMemory, bool* fail);
     74 
     75     explicit RunTimePoolInfo(uint8_t* buffer);
     76 
     77     // Implement move
     78     RunTimePoolInfo(RunTimePoolInfo&& other);
     79     RunTimePoolInfo& operator=(RunTimePoolInfo&& other);
     80 
     81     // Forbid copy
     82     RunTimePoolInfo(const RunTimePoolInfo&) = delete;
     83     RunTimePoolInfo& operator=(const RunTimePoolInfo&) = delete;
     84 
     85     ~RunTimePoolInfo() { release(); }
     86 
     87     uint8_t* getBuffer() const { return mBuffer; }
     88 
     89     bool update() const;
     90 
     91 private:
     92     void release();
     93     void moveFrom(RunTimePoolInfo&& other);
     94 
     95     hidl_memory mHidlMemory;     // always used
     96     uint8_t* mBuffer = nullptr;  // always used
     97     sp<IMemory> mMemory;         // only used when hidlMemory.name() == "ashmem"
     98 };
     99 
    100 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
    101                                          const hidl_vec<hidl_memory>& pools);
    102 
    103 // This class is used to execute a model on the CPU.
    104 class CpuExecutor {
    105 public:
    106     // Executes the model. The results will be stored at the locations
    107     // specified in the constructor.
    108     // The model must outlive the executor.  We prevent it from being modified
    109     // while this is executing.
    110     int run(const V1_0::Model& model, const Request& request,
    111             const std::vector<RunTimePoolInfo>& modelPoolInfos,
    112             const std::vector<RunTimePoolInfo>& requestPoolInfos);
    113     int run(const V1_1::Model& model, const Request& request,
    114             const std::vector<RunTimePoolInfo>& modelPoolInfos,
    115             const std::vector<RunTimePoolInfo>& requestPoolInfos);
    116 
    117 private:
    118     bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
    119                                const std::vector<RunTimePoolInfo>& requestPoolInfos);
    120     // Runs one operation of the graph.
    121     int executeOperation(const Operation& entry);
    122     // Decrement the usage count for the operands listed.  Frees the memory
    123     // allocated for any temporary variable with a count of zero.
    124     void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
    125 
    126     // The model and the request that we'll execute. Only valid while run()
    127     // is being executed.
    128     const Model* mModel = nullptr;
    129     const Request* mRequest = nullptr;
    130 
    131     // We're copying the list of all the dimensions from the model, as
    132     // these may be modified when we run the operatins.  Since we're
    133     // making a full copy, the indexes used in the operand description
    134     // stay valid.
    135     //    std::vector<uint32_t> mDimensions;
    136     // Runtime information about all the operands.
    137     std::vector<RunTimeOperandInfo> mOperands;
    138 };
    139 
    140 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by
    141 // the Eigen matrix library.)
    142 //
    143 // Currently sets a low blocktime: the time OpenMP threads busy-wait for more
    144 // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577.
    145 // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the
    146 // cores enabled throughout inference computation without too much extra power
    147 // consumption afterwards.
    148 //
    149 // The OpenMP settings are thread-local (applying only to worker threads formed
    150 // from that thread), see https://software.intel.com/en-us/node/522688 and
    151 // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class
    152 // ensures that within the scope in which an object is instantiated we use the
    153 // right settings (scopes may be nested), as long as no other library changes
    154 // them.  (Note that in current NNAPI usage only one instance is used in the
    155 // CpuExecutor thread).
    156 //
    157 // TODO(mikie): consider also setting the number of threads used. Using as many
    158 // threads as there are cores results in more variable performance: if we don't
    159 // get all cores for our threads, the latency is doubled as we wait for one core
    160 // to do twice the amount of work. Reality is complicated though as not all
    161 // cores are the same. Decision to be based on benchmarking against a
    162 // representative set of workloads and devices. I'm keeping the code here for
    163 // reference.
    164 class ScopedOpenmpSettings {
    165 public:
    166     ScopedOpenmpSettings();
    167     ~ScopedOpenmpSettings();
    168     DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings);
    169 private:
    170     int mBlocktimeInitial;
    171 #if NNAPI_LIMIT_CPU_THREADS
    172     int mMaxThreadsInitial;
    173 #endif
    174 };
    175 
    176 
    177 namespace {
    178 
    179 template <typename T>
    180 T getScalarData(const RunTimeOperandInfo& info) {
    181   // TODO: Check buffer is at least as long as size of data.
    182   T* data = reinterpret_cast<T*>(info.buffer);
    183   return data[0];
    184 }
    185 
    186 inline bool IsNullInput(const RunTimeOperandInfo *input) {
    187     return input->lifetime == OperandLifeTime::NO_VALUE;
    188 }
    189 
    190 inline int NumInputsWithValues(const Operation &operation,
    191                                std::vector<RunTimeOperandInfo> &operands) {
    192   const std::vector<uint32_t> &inputs = operation.inputs;
    193   return std::count_if(inputs.begin(), inputs.end(),
    194                        [&operands](uint32_t i) {
    195                          return !IsNullInput(&operands[i]);
    196                        });
    197 }
    198 
    199 inline int NumOutputs(const Operation &operation) {
    200   return operation.outputs.size();
    201 }
    202 
    203 inline size_t NumDimensions(const RunTimeOperandInfo *operand) {
    204   return operand->shape().dimensions.size();
    205 }
    206 
    207 inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) {
    208   return operand->shape().dimensions[i];
    209 }
    210 
    211 inline RunTimeOperandInfo *GetInput(const Operation &operation,
    212                                     std::vector<RunTimeOperandInfo> &operands,
    213                                     int index) {
    214   return &operands[operation.inputs[index]];
    215 }
    216 
    217 inline RunTimeOperandInfo *GetOutput(const Operation &operation,
    218                                      std::vector<RunTimeOperandInfo> &operands,
    219                                      int index) {
    220   return &operands[operation.outputs[index]];
    221 }
    222 
    223 }  // anonymous namespace
    224 
    225 } // namespace nn
    226 } // namespace android
    227 
    228 #endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
    229