Home | History | Annotate | Download | only in include
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 // Provides C++ classes to more easily use the Neural Networks API.
     18 
     19 #ifndef ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
     20 #define ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
     21 
     22 #include "NeuralNetworks.h"
     23 
     24 #include <math.h>
     25 #include <optional>
     26 #include <string>
     27 #include <vector>
     28 
     29 namespace android {
     30 namespace nn {
     31 namespace wrapper {
     32 
     33 enum class Type {
     34     FLOAT32 = ANEURALNETWORKS_FLOAT32,
     35     INT32 = ANEURALNETWORKS_INT32,
     36     UINT32 = ANEURALNETWORKS_UINT32,
     37     TENSOR_FLOAT32 = ANEURALNETWORKS_TENSOR_FLOAT32,
     38     TENSOR_INT32 = ANEURALNETWORKS_TENSOR_INT32,
     39     TENSOR_QUANT8_ASYMM = ANEURALNETWORKS_TENSOR_QUANT8_ASYMM,
     40     BOOL = ANEURALNETWORKS_BOOL,
     41     TENSOR_QUANT16_SYMM = ANEURALNETWORKS_TENSOR_QUANT16_SYMM,
     42     TENSOR_FLOAT16 = ANEURALNETWORKS_TENSOR_FLOAT16,
     43     TENSOR_BOOL8 = ANEURALNETWORKS_TENSOR_BOOL8,
     44     FLOAT16 = ANEURALNETWORKS_FLOAT16,
     45     TENSOR_QUANT8_SYMM_PER_CHANNEL = ANEURALNETWORKS_TENSOR_QUANT8_SYMM_PER_CHANNEL,
     46     TENSOR_QUANT16_ASYMM = ANEURALNETWORKS_TENSOR_QUANT16_ASYMM,
     47     TENSOR_QUANT8_SYMM = ANEURALNETWORKS_TENSOR_QUANT8_SYMM,
     48 };
     49 
     50 enum class ExecutePreference {
     51     PREFER_LOW_POWER = ANEURALNETWORKS_PREFER_LOW_POWER,
     52     PREFER_FAST_SINGLE_ANSWER = ANEURALNETWORKS_PREFER_FAST_SINGLE_ANSWER,
     53     PREFER_SUSTAINED_SPEED = ANEURALNETWORKS_PREFER_SUSTAINED_SPEED
     54 };
     55 
     56 enum class Result {
     57     NO_ERROR = ANEURALNETWORKS_NO_ERROR,
     58     OUT_OF_MEMORY = ANEURALNETWORKS_OUT_OF_MEMORY,
     59     INCOMPLETE = ANEURALNETWORKS_INCOMPLETE,
     60     UNEXPECTED_NULL = ANEURALNETWORKS_UNEXPECTED_NULL,
     61     BAD_DATA = ANEURALNETWORKS_BAD_DATA,
     62     OP_FAILED = ANEURALNETWORKS_OP_FAILED,
     63     UNMAPPABLE = ANEURALNETWORKS_UNMAPPABLE,
     64     BAD_STATE = ANEURALNETWORKS_BAD_STATE,
     65     OUTPUT_INSUFFICIENT_SIZE = ANEURALNETWORKS_OUTPUT_INSUFFICIENT_SIZE,
     66     UNAVAILABLE_DEVICE = ANEURALNETWORKS_UNAVAILABLE_DEVICE,
     67 };
     68 
     69 struct SymmPerChannelQuantParams {
     70     ANeuralNetworksSymmPerChannelQuantParams params;
     71     std::vector<float> scales;
     72 
     73     SymmPerChannelQuantParams(std::vector<float> scalesVec, uint32_t channelDim)
     74         : scales(std::move(scalesVec)) {
     75         params = {
     76                 .channelDim = channelDim,
     77                 .scaleCount = static_cast<uint32_t>(scales.size()),
     78                 .scales = scales.size() > 0 ? scales.data() : nullptr,
     79         };
     80     }
     81 
     82     SymmPerChannelQuantParams(const SymmPerChannelQuantParams& other)
     83         : params(other.params), scales(other.scales) {
     84         params.scales = scales.size() > 0 ? scales.data() : nullptr;
     85     }
     86 
     87     SymmPerChannelQuantParams& operator=(const SymmPerChannelQuantParams& other) {
     88         if (this != &other) {
     89             params = other.params;
     90             scales = other.scales;
     91             params.scales = scales.size() > 0 ? scales.data() : nullptr;
     92         }
     93         return *this;
     94     }
     95 };
     96 
     97 struct OperandType {
     98     ANeuralNetworksOperandType operandType;
     99     std::vector<uint32_t> dimensions;
    100     std::optional<SymmPerChannelQuantParams> channelQuant;
    101 
    102     OperandType(const OperandType& other)
    103         : operandType(other.operandType),
    104           dimensions(other.dimensions),
    105           channelQuant(other.channelQuant) {
    106         operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
    107     }
    108 
    109     OperandType& operator=(const OperandType& other) {
    110         if (this != &other) {
    111             operandType = other.operandType;
    112             dimensions = other.dimensions;
    113             channelQuant = other.channelQuant;
    114             operandType.dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr;
    115         }
    116         return *this;
    117     }
    118 
    119     OperandType(Type type, std::vector<uint32_t> d, float scale = 0.0f, int32_t zeroPoint = 0)
    120         : dimensions(std::move(d)), channelQuant(std::nullopt) {
    121         operandType = {
    122                 .type = static_cast<int32_t>(type),
    123                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
    124                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
    125                 .scale = scale,
    126                 .zeroPoint = zeroPoint,
    127         };
    128     }
    129 
    130     OperandType(Type type, std::vector<uint32_t> data, float scale, int32_t zeroPoint,
    131                 SymmPerChannelQuantParams&& channelQuant)
    132         : dimensions(std::move(data)), channelQuant(std::move(channelQuant)) {
    133         operandType = {
    134                 .type = static_cast<int32_t>(type),
    135                 .dimensionCount = static_cast<uint32_t>(dimensions.size()),
    136                 .dimensions = dimensions.size() > 0 ? dimensions.data() : nullptr,
    137                 .scale = scale,
    138                 .zeroPoint = zeroPoint,
    139         };
    140     }
    141 };
    142 
    143 class Memory {
    144    public:
    145     Memory(size_t size, int protect, int fd, size_t offset) {
    146         mValid = ANeuralNetworksMemory_createFromFd(size, protect, fd, offset, &mMemory) ==
    147                  ANEURALNETWORKS_NO_ERROR;
    148     }
    149 
    150     Memory(AHardwareBuffer* buffer) {
    151         mValid = ANeuralNetworksMemory_createFromAHardwareBuffer(buffer, &mMemory) ==
    152                  ANEURALNETWORKS_NO_ERROR;
    153     }
    154 
    155     ~Memory() { ANeuralNetworksMemory_free(mMemory); }
    156 
    157     // Disallow copy semantics to ensure the runtime object can only be freed
    158     // once. Copy semantics could be enabled if some sort of reference counting
    159     // or deep-copy system for runtime objects is added later.
    160     Memory(const Memory&) = delete;
    161     Memory& operator=(const Memory&) = delete;
    162 
    163     // Move semantics to remove access to the runtime object from the wrapper
    164     // object that is being moved. This ensures the runtime object will be
    165     // freed only once.
    166     Memory(Memory&& other) { *this = std::move(other); }
    167     Memory& operator=(Memory&& other) {
    168         if (this != &other) {
    169             ANeuralNetworksMemory_free(mMemory);
    170             mMemory = other.mMemory;
    171             mValid = other.mValid;
    172             other.mMemory = nullptr;
    173             other.mValid = false;
    174         }
    175         return *this;
    176     }
    177 
    178     ANeuralNetworksMemory* get() const { return mMemory; }
    179     bool isValid() const { return mValid; }
    180 
    181    private:
    182     ANeuralNetworksMemory* mMemory = nullptr;
    183     bool mValid = true;
    184 };
    185 
    186 class Model {
    187    public:
    188     Model() {
    189         // TODO handle the value returned by this call
    190         ANeuralNetworksModel_create(&mModel);
    191     }
    192     ~Model() { ANeuralNetworksModel_free(mModel); }
    193 
    194     // Disallow copy semantics to ensure the runtime object can only be freed
    195     // once. Copy semantics could be enabled if some sort of reference counting
    196     // or deep-copy system for runtime objects is added later.
    197     Model(const Model&) = delete;
    198     Model& operator=(const Model&) = delete;
    199 
    200     // Move semantics to remove access to the runtime object from the wrapper
    201     // object that is being moved. This ensures the runtime object will be
    202     // freed only once.
    203     Model(Model&& other) { *this = std::move(other); }
    204     Model& operator=(Model&& other) {
    205         if (this != &other) {
    206             ANeuralNetworksModel_free(mModel);
    207             mModel = other.mModel;
    208             mNextOperandId = other.mNextOperandId;
    209             mValid = other.mValid;
    210             other.mModel = nullptr;
    211             other.mNextOperandId = 0;
    212             other.mValid = false;
    213         }
    214         return *this;
    215     }
    216 
    217     Result finish() {
    218         if (mValid) {
    219             auto result = static_cast<Result>(ANeuralNetworksModel_finish(mModel));
    220             if (result != Result::NO_ERROR) {
    221                 mValid = false;
    222             }
    223             return result;
    224         } else {
    225             return Result::BAD_STATE;
    226         }
    227     }
    228 
    229     uint32_t addOperand(const OperandType* type) {
    230         if (ANeuralNetworksModel_addOperand(mModel, &(type->operandType)) !=
    231             ANEURALNETWORKS_NO_ERROR) {
    232             mValid = false;
    233         }
    234         if (type->channelQuant) {
    235             if (ANeuralNetworksModel_setOperandSymmPerChannelQuantParams(
    236                         mModel, mNextOperandId, &type->channelQuant.value().params) !=
    237                 ANEURALNETWORKS_NO_ERROR) {
    238                 mValid = false;
    239             }
    240         }
    241         return mNextOperandId++;
    242     }
    243 
    244     void setOperandValue(uint32_t index, const void* buffer, size_t length) {
    245         if (ANeuralNetworksModel_setOperandValue(mModel, index, buffer, length) !=
    246             ANEURALNETWORKS_NO_ERROR) {
    247             mValid = false;
    248         }
    249     }
    250 
    251     void setOperandValueFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
    252                                    size_t length) {
    253         if (ANeuralNetworksModel_setOperandValueFromMemory(mModel, index, memory->get(), offset,
    254                                                            length) != ANEURALNETWORKS_NO_ERROR) {
    255             mValid = false;
    256         }
    257     }
    258 
    259     void addOperation(ANeuralNetworksOperationType type, const std::vector<uint32_t>& inputs,
    260                       const std::vector<uint32_t>& outputs) {
    261         if (ANeuralNetworksModel_addOperation(mModel, type, static_cast<uint32_t>(inputs.size()),
    262                                               inputs.data(), static_cast<uint32_t>(outputs.size()),
    263                                               outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
    264             mValid = false;
    265         }
    266     }
    267     void identifyInputsAndOutputs(const std::vector<uint32_t>& inputs,
    268                                   const std::vector<uint32_t>& outputs) {
    269         if (ANeuralNetworksModel_identifyInputsAndOutputs(
    270                     mModel, static_cast<uint32_t>(inputs.size()), inputs.data(),
    271                     static_cast<uint32_t>(outputs.size()),
    272                     outputs.data()) != ANEURALNETWORKS_NO_ERROR) {
    273             mValid = false;
    274         }
    275     }
    276 
    277     void relaxComputationFloat32toFloat16(bool isRelax) {
    278         if (ANeuralNetworksModel_relaxComputationFloat32toFloat16(mModel, isRelax) ==
    279             ANEURALNETWORKS_NO_ERROR) {
    280             mRelaxed = isRelax;
    281         }
    282     }
    283 
    284     ANeuralNetworksModel* getHandle() const { return mModel; }
    285     bool isValid() const { return mValid; }
    286     bool isRelaxed() const { return mRelaxed; }
    287 
    288    protected:
    289     ANeuralNetworksModel* mModel = nullptr;
    290     // We keep track of the operand ID as a convenience to the caller.
    291     uint32_t mNextOperandId = 0;
    292     bool mValid = true;
    293     bool mRelaxed = false;
    294 };
    295 
    296 class Event {
    297    public:
    298     Event() {}
    299     ~Event() { ANeuralNetworksEvent_free(mEvent); }
    300 
    301     // Disallow copy semantics to ensure the runtime object can only be freed
    302     // once. Copy semantics could be enabled if some sort of reference counting
    303     // or deep-copy system for runtime objects is added later.
    304     Event(const Event&) = delete;
    305     Event& operator=(const Event&) = delete;
    306 
    307     // Move semantics to remove access to the runtime object from the wrapper
    308     // object that is being moved. This ensures the runtime object will be
    309     // freed only once.
    310     Event(Event&& other) { *this = std::move(other); }
    311     Event& operator=(Event&& other) {
    312         if (this != &other) {
    313             ANeuralNetworksEvent_free(mEvent);
    314             mEvent = other.mEvent;
    315             other.mEvent = nullptr;
    316         }
    317         return *this;
    318     }
    319 
    320     Result wait() { return static_cast<Result>(ANeuralNetworksEvent_wait(mEvent)); }
    321 
    322     // Only for use by Execution
    323     void set(ANeuralNetworksEvent* newEvent) {
    324         ANeuralNetworksEvent_free(mEvent);
    325         mEvent = newEvent;
    326     }
    327 
    328    private:
    329     ANeuralNetworksEvent* mEvent = nullptr;
    330 };
    331 
    332 class Compilation {
    333    public:
    334     Compilation(const Model* model) {
    335         int result = ANeuralNetworksCompilation_create(model->getHandle(), &mCompilation);
    336         if (result != 0) {
    337             // TODO Handle the error
    338         }
    339     }
    340 
    341     ~Compilation() { ANeuralNetworksCompilation_free(mCompilation); }
    342 
    343     // Disallow copy semantics to ensure the runtime object can only be freed
    344     // once. Copy semantics could be enabled if some sort of reference counting
    345     // or deep-copy system for runtime objects is added later.
    346     Compilation(const Compilation&) = delete;
    347     Compilation& operator=(const Compilation&) = delete;
    348 
    349     // Move semantics to remove access to the runtime object from the wrapper
    350     // object that is being moved. This ensures the runtime object will be
    351     // freed only once.
    352     Compilation(Compilation&& other) { *this = std::move(other); }
    353     Compilation& operator=(Compilation&& other) {
    354         if (this != &other) {
    355             ANeuralNetworksCompilation_free(mCompilation);
    356             mCompilation = other.mCompilation;
    357             other.mCompilation = nullptr;
    358         }
    359         return *this;
    360     }
    361 
    362     Result setPreference(ExecutePreference preference) {
    363         return static_cast<Result>(ANeuralNetworksCompilation_setPreference(
    364                 mCompilation, static_cast<int32_t>(preference)));
    365     }
    366 
    367     Result setCaching(const std::string& cacheDir, const std::vector<uint8_t>& token) {
    368         if (token.size() != ANEURALNETWORKS_BYTE_SIZE_OF_CACHE_TOKEN) {
    369             return Result::BAD_DATA;
    370         }
    371         return static_cast<Result>(ANeuralNetworksCompilation_setCaching(
    372                 mCompilation, cacheDir.c_str(), token.data()));
    373     }
    374 
    375     Result finish() { return static_cast<Result>(ANeuralNetworksCompilation_finish(mCompilation)); }
    376 
    377     ANeuralNetworksCompilation* getHandle() const { return mCompilation; }
    378 
    379    private:
    380     ANeuralNetworksCompilation* mCompilation = nullptr;
    381 };
    382 
    383 class Execution {
    384    public:
    385     Execution(const Compilation* compilation) {
    386         int result = ANeuralNetworksExecution_create(compilation->getHandle(), &mExecution);
    387         if (result != 0) {
    388             // TODO Handle the error
    389         }
    390     }
    391 
    392     ~Execution() { ANeuralNetworksExecution_free(mExecution); }
    393 
    394     // Disallow copy semantics to ensure the runtime object can only be freed
    395     // once. Copy semantics could be enabled if some sort of reference counting
    396     // or deep-copy system for runtime objects is added later.
    397     Execution(const Execution&) = delete;
    398     Execution& operator=(const Execution&) = delete;
    399 
    400     // Move semantics to remove access to the runtime object from the wrapper
    401     // object that is being moved. This ensures the runtime object will be
    402     // freed only once.
    403     Execution(Execution&& other) { *this = std::move(other); }
    404     Execution& operator=(Execution&& other) {
    405         if (this != &other) {
    406             ANeuralNetworksExecution_free(mExecution);
    407             mExecution = other.mExecution;
    408             other.mExecution = nullptr;
    409         }
    410         return *this;
    411     }
    412 
    413     Result setInput(uint32_t index, const void* buffer, size_t length,
    414                     const ANeuralNetworksOperandType* type = nullptr) {
    415         return static_cast<Result>(
    416                 ANeuralNetworksExecution_setInput(mExecution, index, type, buffer, length));
    417     }
    418 
    419     Result setInputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
    420                               uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
    421         return static_cast<Result>(ANeuralNetworksExecution_setInputFromMemory(
    422                 mExecution, index, type, memory->get(), offset, length));
    423     }
    424 
    425     Result setOutput(uint32_t index, void* buffer, size_t length,
    426                      const ANeuralNetworksOperandType* type = nullptr) {
    427         return static_cast<Result>(
    428                 ANeuralNetworksExecution_setOutput(mExecution, index, type, buffer, length));
    429     }
    430 
    431     Result setOutputFromMemory(uint32_t index, const Memory* memory, uint32_t offset,
    432                                uint32_t length, const ANeuralNetworksOperandType* type = nullptr) {
    433         return static_cast<Result>(ANeuralNetworksExecution_setOutputFromMemory(
    434                 mExecution, index, type, memory->get(), offset, length));
    435     }
    436 
    437     Result startCompute(Event* event) {
    438         ANeuralNetworksEvent* ev = nullptr;
    439         Result result = static_cast<Result>(ANeuralNetworksExecution_startCompute(mExecution, &ev));
    440         event->set(ev);
    441         return result;
    442     }
    443 
    444     Result compute() { return static_cast<Result>(ANeuralNetworksExecution_compute(mExecution)); }
    445 
    446     Result getOutputOperandDimensions(uint32_t index, std::vector<uint32_t>* dimensions) {
    447         uint32_t rank = 0;
    448         Result result = static_cast<Result>(
    449                 ANeuralNetworksExecution_getOutputOperandRank(mExecution, index, &rank));
    450         dimensions->resize(rank);
    451         if ((result != Result::NO_ERROR && result != Result::OUTPUT_INSUFFICIENT_SIZE) ||
    452             rank == 0) {
    453             return result;
    454         }
    455         result = static_cast<Result>(ANeuralNetworksExecution_getOutputOperandDimensions(
    456                 mExecution, index, dimensions->data()));
    457         return result;
    458     }
    459 
    460    private:
    461     ANeuralNetworksExecution* mExecution = nullptr;
    462 };
    463 
    464 }  // namespace wrapper
    465 }  // namespace nn
    466 }  // namespace android
    467 
    468 #endif  //  ANDROID_ML_NN_RUNTIME_NEURAL_NETWORKS_WRAPPER_H
    469