Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
     18 #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
     19 
     20 #include "HalInterfaces.h"
     21 #include "OperationResolver.h"
     22 #include "OperationsUtils.h"
     23 #include "Utils.h"
     24 
     25 #include <android-base/macros.h>
     26 #include <ui/GraphicBuffer.h>
     27 #include <algorithm>
     28 #include <optional>
     29 #include <vector>
     30 
     31 namespace android {
     32 namespace nn {
     33 
     34 // Information we maintain about each operand during execution that
     35 // may change during execution.
     36 struct RunTimeOperandInfo {
     37     // TODO Storing the type here is redundant, as it won't change during execution.
     38     OperandType type;
     39     // The type and dimensions of the operand.  The dimensions can
     40     // change at runtime.  We include the type because it's useful
     41     // to pass together with the dimension to the functions implementing
     42     // the operators.
     43     //
     44     // A dimension being zero has different meanings for different operands at different stages:
     45     // - Model inputs:
     46     //   * Specified in model: implies "dynamic", and must be fully-specified in request.
     47     //   * Specified in request: illegal.
     48     // - Constant operands: illegal.
     49     // - Model outputs and internal operands:
     50     //   * Before evaluation: implies unknown and to be deduced from execution.
     51     //   * After evaluation:
     52     //     - If isSufficient reports true: the tensor is zero-sized.
     53     //     - Otherwise: implies unknown.
     54     std::vector<uint32_t> dimensions;
     55 
     56     float scale;
     57     int32_t zeroPoint;
     58     // Where the operand's data is stored.  Check the corresponding
     59     // location information in the model to figure out if this points
     60     // to memory we have allocated for an temporary operand.
     61     uint8_t* buffer;
     62     // The length of the buffer.
     63     uint32_t length;
     64     // Whether this is a temporary variable, a model input, a constant, etc.
     65     OperandLifeTime lifetime;
     66     // Keeps track of how many operations have yet to make use
     67     // of this temporary variable.  When the count is decremented to 0,
     68     // we free the buffer.  For non-temporary variables, this count is
     69     // always 0.
     70     uint32_t numberOfUsesLeft;
     71 
     72     Operand::ExtraParams extraParams;
     73 
     74     Shape shape() const {
     75         return {
     76                 .type = type,
     77                 .dimensions = dimensions,
     78                 .scale = scale,
     79                 .offset = zeroPoint,
     80                 .extraParams = extraParams,
     81         };
     82     }
     83 
     84     bool isSufficient() const {
     85         if (isExtensionOperandType(type)) {
     86             // We don't know sizes of extension types.
     87             return true;
     88         }
     89         return length >= nonExtensionOperandSizeOfData(type, dimensions);
     90     }
     91 };
     92 
     93 // Used to keep a pointer to each of the memory pools.
     94 //
     95 // RunTimePoolInfo references a region of memory. Other RunTimePoolInfo objects
     96 // may reference the same region of memory by either:
     97 // (1) copying an existing RunTimePoolInfo object, or
     98 // (2) creating multiple RunTimePoolInfo objects from the same memory resource
     99 //     (e.g., "createFromHidlMemory" or "createFromExistingBuffer")
    100 //
    101 // If the underlying region of memory is mapped by "createFromHidlMemory", the
    102 // mapping will be sustained until it is no longer referenced by any
    103 // RunTimePoolInfo objects.
    104 class RunTimePoolInfo {
    105    public:
    106     static std::optional<RunTimePoolInfo> createFromHidlMemory(const hidl_memory& hidlMemory);
    107     static RunTimePoolInfo createFromExistingBuffer(uint8_t* buffer);
    108 
    109     uint8_t* getBuffer() const;
    110     bool update() const;
    111     hidl_memory getHidlMemory() const;
    112 
    113    private:
    114     class RunTimePoolInfoImpl;
    115     RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl);
    116 
    117     std::shared_ptr<const RunTimePoolInfoImpl> mImpl;
    118 };
    119 
    120 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
    121                                          const hidl_vec<hidl_memory>& pools);
    122 
    123 // This class is used to execute a model on the CPU.
    124 class CpuExecutor {
    125    public:
    126     // This constructor allows clients of CpuExecutor to provide custom CPU
    127     // operation implementations. It is used by a sample driver to test
    128     // extension support.
    129     //
    130     // Note that it is not possible to provide custom CPU implementations for
    131     // non-OperationResolver operations (b/124041202).
    132     //
    133     // The operation resolver must outlive the executor.
    134     explicit CpuExecutor(const IOperationResolver* operationResolver)
    135         : mOperationResolver(operationResolver) {}
    136 
    137     CpuExecutor() : CpuExecutor(BuiltinOperationResolver::get()) {}
    138 
    139     // Executes the model. The results will be stored at the locations
    140     // specified in the constructor.
    141     // The model must outlive the executor.  We prevent it from being modified
    142     // while this is executing.
    143     int run(const Model& model, const Request& request,
    144             const std::vector<RunTimePoolInfo>& modelPoolInfos,
    145             const std::vector<RunTimePoolInfo>& requestPoolInfos);
    146 
    147     const std::vector<OutputShape>& getOutputShapes() const {
    148         CHECK(mFinished) << "getOutputShapes() called by an unfinished CpuExecutor.";
    149         return mOutputShapes;
    150     }
    151 
    152    private:
    153     bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
    154                                const std::vector<RunTimePoolInfo>& requestPoolInfos);
    155     // Runs one operation of the graph.
    156     int executeOperation(const Operation& entry);
    157     // Decrement the usage count for the operands listed.  Frees the memory
    158     // allocated for any temporary variable with a count of zero.
    159     void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs);
    160 
    161     // Frees the memory allocated for any temporary variable, and sets the
    162     // output operand shapes returning to the runtime.
    163     void finish(int result);
    164 
    165     // The model and the request that we'll execute. Only valid while run()
    166     // is being executed.
    167     const Model* mModel = nullptr;
    168     const Request* mRequest = nullptr;
    169 
    170     // We're copying the list of all the dimensions from the model, as
    171     // these may be modified when we run the operations.  Since we're
    172     // making a full copy, the indexes used in the operand description
    173     // stay valid.
    174     //    std::vector<uint32_t> mDimensions;
    175     // Runtime information about all the operands.
    176     std::vector<RunTimeOperandInfo> mOperands;
    177 
    178     // The output operand shapes returning to the runtime.
    179     std::vector<OutputShape> mOutputShapes;
    180 
    181     // Whether execution is finished and mOutputShapes is ready
    182     bool mFinished = false;
    183 
    184     const IOperationResolver* mOperationResolver;
    185 };
    186 
    187 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by
    188 // the Eigen matrix library.)
    189 //
    190 // Currently sets a low blocktime: the time OpenMP threads busy-wait for more
    191 // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577.
    192 // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the
    193 // cores enabled throughout inference computation without too much extra power
    194 // consumption afterwards.
    195 //
    196 // The OpenMP settings are thread-local (applying only to worker threads formed
    197 // from that thread), see https://software.intel.com/en-us/node/522688 and
    198 // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class
    199 // ensures that within the scope in which an object is instantiated we use the
    200 // right settings (scopes may be nested), as long as no other library changes
    201 // them.  (Note that in current NNAPI usage only one instance is used in the
    202 // CpuExecutor thread).
    203 //
    204 // TODO(mikie): consider also setting the number of threads used. Using as many
    205 // threads as there are cores results in more variable performance: if we don't
    206 // get all cores for our threads, the latency is doubled as we wait for one core
    207 // to do twice the amount of work. Reality is complicated though as not all
    208 // cores are the same. Decision to be based on benchmarking against a
    209 // representative set of workloads and devices. I'm keeping the code here for
    210 // reference.
    211 // b/109953668, disable OpenMP
    212 #ifdef NNAPI_OPENMP
    213 class ScopedOpenmpSettings {
    214 public:
    215     ScopedOpenmpSettings();
    216     ~ScopedOpenmpSettings();
    217     DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings);
    218 private:
    219     int mBlocktimeInitial;
    220 #if NNAPI_LIMIT_CPU_THREADS
    221     int mMaxThreadsInitial;
    222 #endif
    223 };
    224 #endif  // NNAPI_OPENMP
    225 
    226 
    227 namespace {
    228 
    229 template <typename T>
    230 T getScalarData(const RunTimeOperandInfo& info) {
    231     // TODO: Check buffer is at least as long as size of data.
    232     T* data = reinterpret_cast<T*>(info.buffer);
    233     return data[0];
    234 }
    235 
    236 inline bool IsNullInput(const RunTimeOperandInfo *input) {
    237     return input->lifetime == OperandLifeTime::NO_VALUE;
    238 }
    239 
    240 inline int NumInputsWithValues(const Operation &operation,
    241                                std::vector<RunTimeOperandInfo> &operands) {
    242   const std::vector<uint32_t> &inputs = operation.inputs;
    243   return std::count_if(inputs.begin(), inputs.end(),
    244                        [&operands](uint32_t i) {
    245                          return !IsNullInput(&operands[i]);
    246                        });
    247 }
    248 
    249 inline int NumOutputs(const Operation &operation) {
    250   return operation.outputs.size();
    251 }
    252 
    253 inline size_t NumDimensions(const RunTimeOperandInfo *operand) {
    254   return operand->shape().dimensions.size();
    255 }
    256 
    257 inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) {
    258   return operand->shape().dimensions[i];
    259 }
    260 
    261 inline RunTimeOperandInfo *GetInput(const Operation &operation,
    262                                     std::vector<RunTimeOperandInfo> &operands,
    263                                     int index) {
    264   return &operands[operation.inputs[index]];
    265 }
    266 
    267 inline RunTimeOperandInfo *GetOutput(const Operation &operation,
    268                                      std::vector<RunTimeOperandInfo> &operands,
    269                                      int index) {
    270   return &operands[operation.outputs[index]];
    271 }
    272 
    273 }  // anonymous namespace
    274 
    275 } // namespace nn
    276 } // namespace android
    277 
    278 #endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H
    279