1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H 18 #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H 19 20 #include "HalInterfaces.h" 21 #include "OperationsUtils.h" 22 #include "Utils.h" 23 24 #include <algorithm> 25 #include <android-base/macros.h> 26 #include <vector> 27 28 namespace android { 29 namespace nn { 30 31 // Information we maintain about each operand during execution that 32 // may change during execution. 33 struct RunTimeOperandInfo { 34 // TODO Storing the type here is redundant, as it won't change during execution. 35 OperandType type; 36 // The type and dimensions of the operand. The dimensions can 37 // change at runtime. We include the type because it's useful 38 // to pass together with the dimension to the functions implementing 39 // the operators. 40 std::vector<uint32_t> dimensions; 41 42 float scale; 43 int32_t zeroPoint; 44 // Where the operand's data is stored. Check the corresponding 45 // location information in the model to figure out if this points 46 // to memory we have allocated for an temporary operand. 47 uint8_t* buffer; 48 // The length of the buffer. 49 uint32_t length; 50 // Whether this is a temporary variable, a model input, a constant, etc. 51 OperandLifeTime lifetime; 52 // Keeps track of how many operations have yet to make use 53 // of this temporary variable. When the count is decremented to 0, 54 // we free the buffer. For non-temporary variables, this count is 55 // always 0. 56 uint32_t numberOfUsesLeft; 57 58 Shape shape() const { 59 return Shape{.type = type, .dimensions = dimensions, .scale = scale, .offset = zeroPoint}; 60 } 61 }; 62 63 // Used to keep a pointer to each of the memory pools. 64 // 65 // In the case of an "mmap_fd" pool, owns the mmap region 66 // returned by getBuffer() -- i.e., that region goes away 67 // when the RunTimePoolInfo is destroyed or is assigned to. 68 class RunTimePoolInfo { 69 public: 70 // If "fail" is not nullptr, and construction fails, then set *fail = true. 71 // If construction succeeds, leave *fail unchanged. 72 // getBuffer() == nullptr IFF construction fails. 73 explicit RunTimePoolInfo(const hidl_memory& hidlMemory, bool* fail); 74 75 explicit RunTimePoolInfo(uint8_t* buffer); 76 77 // Implement move 78 RunTimePoolInfo(RunTimePoolInfo&& other); 79 RunTimePoolInfo& operator=(RunTimePoolInfo&& other); 80 81 // Forbid copy 82 RunTimePoolInfo(const RunTimePoolInfo&) = delete; 83 RunTimePoolInfo& operator=(const RunTimePoolInfo&) = delete; 84 85 ~RunTimePoolInfo() { release(); } 86 87 uint8_t* getBuffer() const { return mBuffer; } 88 89 bool update() const; 90 91 private: 92 void release(); 93 void moveFrom(RunTimePoolInfo&& other); 94 95 hidl_memory mHidlMemory; // always used 96 uint8_t* mBuffer = nullptr; // always used 97 sp<IMemory> mMemory; // only used when hidlMemory.name() == "ashmem" 98 }; 99 100 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos, 101 const hidl_vec<hidl_memory>& pools); 102 103 // This class is used to execute a model on the CPU. 104 class CpuExecutor { 105 public: 106 // Executes the model. The results will be stored at the locations 107 // specified in the constructor. 108 // The model must outlive the executor. We prevent it from being modified 109 // while this is executing. 110 int run(const V1_0::Model& model, const Request& request, 111 const std::vector<RunTimePoolInfo>& modelPoolInfos, 112 const std::vector<RunTimePoolInfo>& requestPoolInfos); 113 int run(const V1_1::Model& model, const Request& request, 114 const std::vector<RunTimePoolInfo>& modelPoolInfos, 115 const std::vector<RunTimePoolInfo>& requestPoolInfos); 116 117 private: 118 bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos, 119 const std::vector<RunTimePoolInfo>& requestPoolInfos); 120 // Runs one operation of the graph. 121 int executeOperation(const Operation& entry); 122 // Decrement the usage count for the operands listed. Frees the memory 123 // allocated for any temporary variable with a count of zero. 124 void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs); 125 126 // The model and the request that we'll execute. Only valid while run() 127 // is being executed. 128 const Model* mModel = nullptr; 129 const Request* mRequest = nullptr; 130 131 // We're copying the list of all the dimensions from the model, as 132 // these may be modified when we run the operatins. Since we're 133 // making a full copy, the indexes used in the operand description 134 // stay valid. 135 // std::vector<uint32_t> mDimensions; 136 // Runtime information about all the operands. 137 std::vector<RunTimeOperandInfo> mOperands; 138 }; 139 140 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by 141 // the Eigen matrix library.) 142 // 143 // Currently sets a low blocktime: the time OpenMP threads busy-wait for more 144 // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577. 145 // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the 146 // cores enabled throughout inference computation without too much extra power 147 // consumption afterwards. 148 // 149 // The OpenMP settings are thread-local (applying only to worker threads formed 150 // from that thread), see https://software.intel.com/en-us/node/522688 and 151 // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class 152 // ensures that within the scope in which an object is instantiated we use the 153 // right settings (scopes may be nested), as long as no other library changes 154 // them. (Note that in current NNAPI usage only one instance is used in the 155 // CpuExecutor thread). 156 // 157 // TODO(mikie): consider also setting the number of threads used. Using as many 158 // threads as there are cores results in more variable performance: if we don't 159 // get all cores for our threads, the latency is doubled as we wait for one core 160 // to do twice the amount of work. Reality is complicated though as not all 161 // cores are the same. Decision to be based on benchmarking against a 162 // representative set of workloads and devices. I'm keeping the code here for 163 // reference. 164 class ScopedOpenmpSettings { 165 public: 166 ScopedOpenmpSettings(); 167 ~ScopedOpenmpSettings(); 168 DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings); 169 private: 170 int mBlocktimeInitial; 171 #if NNAPI_LIMIT_CPU_THREADS 172 int mMaxThreadsInitial; 173 #endif 174 }; 175 176 177 namespace { 178 179 template <typename T> 180 T getScalarData(const RunTimeOperandInfo& info) { 181 // TODO: Check buffer is at least as long as size of data. 182 T* data = reinterpret_cast<T*>(info.buffer); 183 return data[0]; 184 } 185 186 inline bool IsNullInput(const RunTimeOperandInfo *input) { 187 return input->lifetime == OperandLifeTime::NO_VALUE; 188 } 189 190 inline int NumInputsWithValues(const Operation &operation, 191 std::vector<RunTimeOperandInfo> &operands) { 192 const std::vector<uint32_t> &inputs = operation.inputs; 193 return std::count_if(inputs.begin(), inputs.end(), 194 [&operands](uint32_t i) { 195 return !IsNullInput(&operands[i]); 196 }); 197 } 198 199 inline int NumOutputs(const Operation &operation) { 200 return operation.outputs.size(); 201 } 202 203 inline size_t NumDimensions(const RunTimeOperandInfo *operand) { 204 return operand->shape().dimensions.size(); 205 } 206 207 inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) { 208 return operand->shape().dimensions[i]; 209 } 210 211 inline RunTimeOperandInfo *GetInput(const Operation &operation, 212 std::vector<RunTimeOperandInfo> &operands, 213 int index) { 214 return &operands[operation.inputs[index]]; 215 } 216 217 inline RunTimeOperandInfo *GetOutput(const Operation &operation, 218 std::vector<RunTimeOperandInfo> &operands, 219 int index) { 220 return &operands[operation.outputs[index]]; 221 } 222 223 } // anonymous namespace 224 225 } // namespace nn 226 } // namespace android 227 228 #endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H 229