1 /* 2 * Copyright (C) 2017 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H 18 #define ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H 19 20 #include "HalInterfaces.h" 21 #include "OperationResolver.h" 22 #include "OperationsUtils.h" 23 #include "Utils.h" 24 25 #include <android-base/macros.h> 26 #include <ui/GraphicBuffer.h> 27 #include <algorithm> 28 #include <optional> 29 #include <vector> 30 31 namespace android { 32 namespace nn { 33 34 // Information we maintain about each operand during execution that 35 // may change during execution. 36 struct RunTimeOperandInfo { 37 // TODO Storing the type here is redundant, as it won't change during execution. 38 OperandType type; 39 // The type and dimensions of the operand. The dimensions can 40 // change at runtime. We include the type because it's useful 41 // to pass together with the dimension to the functions implementing 42 // the operators. 43 // 44 // A dimension being zero has different meanings for different operands at different stages: 45 // - Model inputs: 46 // * Specified in model: implies "dynamic", and must be fully-specified in request. 47 // * Specified in request: illegal. 48 // - Constant operands: illegal. 49 // - Model outputs and internal operands: 50 // * Before evaluation: implies unknown and to be deduced from execution. 51 // * After evaluation: 52 // - If isSufficient reports true: the tensor is zero-sized. 53 // - Otherwise: implies unknown. 54 std::vector<uint32_t> dimensions; 55 56 float scale; 57 int32_t zeroPoint; 58 // Where the operand's data is stored. Check the corresponding 59 // location information in the model to figure out if this points 60 // to memory we have allocated for an temporary operand. 61 uint8_t* buffer; 62 // The length of the buffer. 63 uint32_t length; 64 // Whether this is a temporary variable, a model input, a constant, etc. 65 OperandLifeTime lifetime; 66 // Keeps track of how many operations have yet to make use 67 // of this temporary variable. When the count is decremented to 0, 68 // we free the buffer. For non-temporary variables, this count is 69 // always 0. 70 uint32_t numberOfUsesLeft; 71 72 Operand::ExtraParams extraParams; 73 74 Shape shape() const { 75 return { 76 .type = type, 77 .dimensions = dimensions, 78 .scale = scale, 79 .offset = zeroPoint, 80 .extraParams = extraParams, 81 }; 82 } 83 84 bool isSufficient() const { 85 if (isExtensionOperandType(type)) { 86 // We don't know sizes of extension types. 87 return true; 88 } 89 return length >= nonExtensionOperandSizeOfData(type, dimensions); 90 } 91 }; 92 93 // Used to keep a pointer to each of the memory pools. 94 // 95 // RunTimePoolInfo references a region of memory. Other RunTimePoolInfo objects 96 // may reference the same region of memory by either: 97 // (1) copying an existing RunTimePoolInfo object, or 98 // (2) creating multiple RunTimePoolInfo objects from the same memory resource 99 // (e.g., "createFromHidlMemory" or "createFromExistingBuffer") 100 // 101 // If the underlying region of memory is mapped by "createFromHidlMemory", the 102 // mapping will be sustained until it is no longer referenced by any 103 // RunTimePoolInfo objects. 104 class RunTimePoolInfo { 105 public: 106 static std::optional<RunTimePoolInfo> createFromHidlMemory(const hidl_memory& hidlMemory); 107 static RunTimePoolInfo createFromExistingBuffer(uint8_t* buffer); 108 109 uint8_t* getBuffer() const; 110 bool update() const; 111 hidl_memory getHidlMemory() const; 112 113 private: 114 class RunTimePoolInfoImpl; 115 RunTimePoolInfo(const std::shared_ptr<const RunTimePoolInfoImpl>& impl); 116 117 std::shared_ptr<const RunTimePoolInfoImpl> mImpl; 118 }; 119 120 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos, 121 const hidl_vec<hidl_memory>& pools); 122 123 // This class is used to execute a model on the CPU. 124 class CpuExecutor { 125 public: 126 // This constructor allows clients of CpuExecutor to provide custom CPU 127 // operation implementations. It is used by a sample driver to test 128 // extension support. 129 // 130 // Note that it is not possible to provide custom CPU implementations for 131 // non-OperationResolver operations (b/124041202). 132 // 133 // The operation resolver must outlive the executor. 134 explicit CpuExecutor(const IOperationResolver* operationResolver) 135 : mOperationResolver(operationResolver) {} 136 137 CpuExecutor() : CpuExecutor(BuiltinOperationResolver::get()) {} 138 139 // Executes the model. The results will be stored at the locations 140 // specified in the constructor. 141 // The model must outlive the executor. We prevent it from being modified 142 // while this is executing. 143 int run(const Model& model, const Request& request, 144 const std::vector<RunTimePoolInfo>& modelPoolInfos, 145 const std::vector<RunTimePoolInfo>& requestPoolInfos); 146 147 const std::vector<OutputShape>& getOutputShapes() const { 148 CHECK(mFinished) << "getOutputShapes() called by an unfinished CpuExecutor."; 149 return mOutputShapes; 150 } 151 152 private: 153 bool initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos, 154 const std::vector<RunTimePoolInfo>& requestPoolInfos); 155 // Runs one operation of the graph. 156 int executeOperation(const Operation& entry); 157 // Decrement the usage count for the operands listed. Frees the memory 158 // allocated for any temporary variable with a count of zero. 159 void freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs); 160 161 // Frees the memory allocated for any temporary variable, and sets the 162 // output operand shapes returning to the runtime. 163 void finish(int result); 164 165 // The model and the request that we'll execute. Only valid while run() 166 // is being executed. 167 const Model* mModel = nullptr; 168 const Request* mRequest = nullptr; 169 170 // We're copying the list of all the dimensions from the model, as 171 // these may be modified when we run the operations. Since we're 172 // making a full copy, the indexes used in the operand description 173 // stay valid. 174 // std::vector<uint32_t> mDimensions; 175 // Runtime information about all the operands. 176 std::vector<RunTimeOperandInfo> mOperands; 177 178 // The output operand shapes returning to the runtime. 179 std::vector<OutputShape> mOutputShapes; 180 181 // Whether execution is finished and mOutputShapes is ready 182 bool mFinished = false; 183 184 const IOperationResolver* mOperationResolver; 185 }; 186 187 // Class for setting reasonable OpenMP threading settings. (OpenMP is used by 188 // the Eigen matrix library.) 189 // 190 // Currently sets a low blocktime: the time OpenMP threads busy-wait for more 191 // work before going to sleep. See b/79159165, https://reviews.llvm.org/D18577. 192 // The default is 200ms, we set to 20ms here, see b/109645291. This keeps the 193 // cores enabled throughout inference computation without too much extra power 194 // consumption afterwards. 195 // 196 // The OpenMP settings are thread-local (applying only to worker threads formed 197 // from that thread), see https://software.intel.com/en-us/node/522688 and 198 // http://lists.llvm.org/pipermail/openmp-dev/2016-July/001432.html. This class 199 // ensures that within the scope in which an object is instantiated we use the 200 // right settings (scopes may be nested), as long as no other library changes 201 // them. (Note that in current NNAPI usage only one instance is used in the 202 // CpuExecutor thread). 203 // 204 // TODO(mikie): consider also setting the number of threads used. Using as many 205 // threads as there are cores results in more variable performance: if we don't 206 // get all cores for our threads, the latency is doubled as we wait for one core 207 // to do twice the amount of work. Reality is complicated though as not all 208 // cores are the same. Decision to be based on benchmarking against a 209 // representative set of workloads and devices. I'm keeping the code here for 210 // reference. 211 // b/109953668, disable OpenMP 212 #ifdef NNAPI_OPENMP 213 class ScopedOpenmpSettings { 214 public: 215 ScopedOpenmpSettings(); 216 ~ScopedOpenmpSettings(); 217 DISALLOW_COPY_AND_ASSIGN(ScopedOpenmpSettings); 218 private: 219 int mBlocktimeInitial; 220 #if NNAPI_LIMIT_CPU_THREADS 221 int mMaxThreadsInitial; 222 #endif 223 }; 224 #endif // NNAPI_OPENMP 225 226 227 namespace { 228 229 template <typename T> 230 T getScalarData(const RunTimeOperandInfo& info) { 231 // TODO: Check buffer is at least as long as size of data. 232 T* data = reinterpret_cast<T*>(info.buffer); 233 return data[0]; 234 } 235 236 inline bool IsNullInput(const RunTimeOperandInfo *input) { 237 return input->lifetime == OperandLifeTime::NO_VALUE; 238 } 239 240 inline int NumInputsWithValues(const Operation &operation, 241 std::vector<RunTimeOperandInfo> &operands) { 242 const std::vector<uint32_t> &inputs = operation.inputs; 243 return std::count_if(inputs.begin(), inputs.end(), 244 [&operands](uint32_t i) { 245 return !IsNullInput(&operands[i]); 246 }); 247 } 248 249 inline int NumOutputs(const Operation &operation) { 250 return operation.outputs.size(); 251 } 252 253 inline size_t NumDimensions(const RunTimeOperandInfo *operand) { 254 return operand->shape().dimensions.size(); 255 } 256 257 inline uint32_t SizeOfDimension(const RunTimeOperandInfo *operand, int i) { 258 return operand->shape().dimensions[i]; 259 } 260 261 inline RunTimeOperandInfo *GetInput(const Operation &operation, 262 std::vector<RunTimeOperandInfo> &operands, 263 int index) { 264 return &operands[operation.inputs[index]]; 265 } 266 267 inline RunTimeOperandInfo *GetOutput(const Operation &operation, 268 std::vector<RunTimeOperandInfo> &operands, 269 int index) { 270 return &operands[operation.outputs[index]]; 271 } 272 273 } // anonymous namespace 274 275 } // namespace nn 276 } // namespace android 277 278 #endif // ANDROID_ML_NN_COMMON_CPU_EXECUTOR_H 279