Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "CpuExecutor"
     18 
     19 #include "CpuExecutor.h"
     20 
     21 #include "NeuralNetworks.h"
     22 #include "Operations.h"
     23 
     24 #include "Eigen/Core"
     25 #include <omp.h>
     26 #include <sys/mman.h>
     27 
     28 namespace android {
     29 namespace nn {
     30 
     31 // TODO: short term, make share memory mapping and updating a utility function.
     32 // TODO: long term, implement mmap_fd as a hidl IMemory service.
     33 RunTimePoolInfo::RunTimePoolInfo(const hidl_memory& hidlMemory, bool* fail) {
     34     sp<IMemory> memory;
     35     uint8_t* buffer = nullptr;
     36 
     37     auto memType = hidlMemory.name();
     38     if (memType == "ashmem") {
     39         memory = mapMemory(hidlMemory);
     40         if (memory == nullptr) {
     41             LOG(ERROR) << "Can't map shared memory.";
     42             if (fail) *fail = true;
     43             return;
     44         }
     45         memory->update();
     46         buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
     47         if (buffer == nullptr) {
     48             LOG(ERROR) << "Can't access shared memory.";
     49             if (fail) *fail = true;
     50             return;
     51         }
     52     } else if (memType == "mmap_fd") {
     53         size_t size = hidlMemory.size();
     54         int fd = hidlMemory.handle()->data[0];
     55         int prot = hidlMemory.handle()->data[1];
     56         size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
     57                                         hidlMemory.handle()->data[3]);
     58         buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
     59         if (buffer == MAP_FAILED) {
     60             LOG(ERROR) << "RunTimePoolInfo::set(): Can't mmap the file descriptor.";
     61             if (fail) *fail = true;
     62             return;
     63         }
     64     } else {
     65         LOG(ERROR) << "RunTimePoolInfo::set(): unsupported hidl_memory type";
     66         if (fail) *fail = true;
     67         return;
     68     }
     69 
     70     mHidlMemory = hidlMemory;
     71     mBuffer     = buffer;
     72     mMemory     = memory;
     73 }
     74 
     75 RunTimePoolInfo::RunTimePoolInfo(uint8_t* buffer) {
     76     mBuffer = buffer;
     77 }
     78 
     79 RunTimePoolInfo::RunTimePoolInfo(RunTimePoolInfo&& other) {
     80     moveFrom(std::move(other));
     81     other.mBuffer = nullptr;
     82 }
     83 
     84 RunTimePoolInfo& RunTimePoolInfo::operator=(RunTimePoolInfo&& other) {
     85     if (this != &other) {
     86         release();
     87         moveFrom(std::move(other));
     88         other.mBuffer = nullptr;
     89     }
     90     return *this;
     91 }
     92 
     93 void RunTimePoolInfo::moveFrom(RunTimePoolInfo &&other) {
     94     mHidlMemory = std::move(other.mHidlMemory);
     95     mBuffer     = std::move(other.mBuffer);
     96     mMemory     = std::move(other.mMemory);
     97 }
     98 
     99 void RunTimePoolInfo::release() {
    100     if (mBuffer == nullptr) {
    101         return;
    102     }
    103 
    104     auto memType = mHidlMemory.name();
    105     if (memType == "ashmem") {
    106         // nothing to do
    107     } else if (memType == "mmap_fd") {
    108         size_t size = mHidlMemory.size();
    109         if (munmap(mBuffer, size)) {
    110             LOG(ERROR) << "RunTimePoolInfo::release(): Can't munmap";
    111         }
    112     } else if (memType == "") {
    113         // Represents a POINTER argument; nothing to do
    114     } else {
    115         LOG(ERROR) << "RunTimePoolInfo::release(): unsupported hidl_memory type";
    116     }
    117 
    118     mHidlMemory = hidl_memory();
    119     mMemory     = nullptr;
    120     mBuffer     = nullptr;
    121 }
    122 
    123 // Making sure the output data are correctly updated after execution.
    124 bool RunTimePoolInfo::update() const {
    125     auto memType = mHidlMemory.name();
    126     if (memType == "ashmem") {
    127         mMemory->commit();
    128         return true;
    129     } else if (memType == "mmap_fd") {
    130         int prot = mHidlMemory.handle()->data[1];
    131         if (prot & PROT_WRITE) {
    132             size_t size = mHidlMemory.size();
    133             return msync(mBuffer, size, MS_SYNC) == 0;
    134         }
    135     }
    136     // No-op for other types of memory.
    137     return true;
    138 }
    139 
    140 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
    141                                          const hidl_vec<hidl_memory>& pools) {
    142     poolInfos->clear();
    143     poolInfos->reserve(pools.size());
    144     bool fail = false;
    145     for (const auto& pool : pools) {
    146         poolInfos->emplace_back(pool, &fail);
    147     }
    148     if (fail) {
    149         LOG(ERROR) << "Could not map pools";
    150         poolInfos->clear();
    151         return false;
    152     }
    153     return true;
    154 }
    155 
    156 // Updates the RunTimeOperandInfo with the newly calculated shape.
    157 // Allocate the buffer if we need to.
    158 static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
    159     // For user-provided model output operands, the parameters must match the Shape
    160     // calculated from the preparation step.
    161     if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
    162         if (info->type != shape.type ||
    163             info->dimensions != shape.dimensions) {
    164             LOG(ERROR) << "Invalid type or dimensions for model output";
    165             return false;
    166         }
    167         if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
    168             (info->scale != shape.scale || info->zeroPoint != shape.offset)) {
    169             LOG(ERROR) << "Invalid scale or zeroPoint for model output";
    170             return false;
    171         }
    172     }
    173     info->type = shape.type;
    174     info->dimensions = shape.dimensions;
    175     info->scale = shape.scale;
    176     info->zeroPoint = shape.offset;
    177     if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
    178         uint32_t length = sizeOfData(info->type, info->dimensions);
    179         info->buffer = new uint8_t[length];
    180         if (info->buffer == nullptr) {
    181             return false;
    182         }
    183     }
    184     return true;
    185 }
    186 
    187 // Ignore the .pools entry in model and request.  This will have been taken care of
    188 // by the caller.
    189 int CpuExecutor::run(const V1_0::Model& model, const Request& request,
    190                      const std::vector<RunTimePoolInfo>& modelPoolInfos,
    191                      const std::vector<RunTimePoolInfo>& requestPoolInfos) {
    192     return run(convertToV1_1(model), request, modelPoolInfos, requestPoolInfos);
    193 }
    194 
    195 int CpuExecutor::run(const V1_1::Model& model, const Request& request,
    196                      const std::vector<RunTimePoolInfo>& modelPoolInfos,
    197                      const std::vector<RunTimePoolInfo>& requestPoolInfos) {
    198     VLOG(CPUEXE) << "CpuExecutor::run() with request("
    199                  << SHOW_IF_DEBUG(toString(request)) << ")";
    200 
    201     ScopedOpenmpSettings openMpSettings;
    202 
    203     mModel = &model;
    204     mRequest = &request; // TODO check if mRequest is needed
    205     initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
    206     // The model has serialized the operation in execution order.
    207     for (const auto& operation : model.operations) {
    208         int n = executeOperation(operation);
    209         if (n != ANEURALNETWORKS_NO_ERROR) {
    210             return n;
    211         }
    212     }
    213     for (auto& runtimeInfo : modelPoolInfos) {
    214         runtimeInfo.update();
    215     }
    216     for (auto& runtimeInfo : requestPoolInfos) {
    217         runtimeInfo.update();
    218     }
    219     mModel = nullptr;
    220     mRequest = nullptr;
    221     VLOG(CPUEXE) << "Completed run normally";
    222     return ANEURALNETWORKS_NO_ERROR;
    223 }
    224 
    225 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
    226                                         const std::vector<RunTimePoolInfo>& requestPoolInfos) {
    227     VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
    228     const size_t count = mModel->operands.size();
    229     mOperands.resize(count);
    230 
    231     // Start by setting the runtime info to what's in the model.
    232     for (size_t i = 0; i < count; i++) {
    233         const Operand& from = mModel->operands[i];
    234         RunTimeOperandInfo& to = mOperands[i];
    235         to.type = from.type;
    236         to.dimensions = from.dimensions;
    237         to.scale = from.scale;
    238         to.zeroPoint = from.zeroPoint;
    239         to.length = from.location.length;
    240         to.lifetime = from.lifetime;
    241         switch (from.lifetime) {
    242             case OperandLifeTime::TEMPORARY_VARIABLE:
    243                 to.buffer = nullptr;
    244                 to.numberOfUsesLeft = from.numberOfConsumers;
    245                 break;
    246             case OperandLifeTime::CONSTANT_COPY:
    247                 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
    248                 to.numberOfUsesLeft = 0;
    249                 break;
    250             case OperandLifeTime::CONSTANT_REFERENCE: {
    251                 auto poolIndex = from.location.poolIndex;
    252                 nnAssert(poolIndex < modelPoolInfos.size());
    253                 auto& r = modelPoolInfos[poolIndex];
    254                 to.buffer = r.getBuffer() + from.location.offset;
    255                 to.numberOfUsesLeft = 0;
    256                 break;
    257             }
    258             case OperandLifeTime::MODEL_INPUT:
    259             case OperandLifeTime::MODEL_OUTPUT:
    260             case OperandLifeTime::NO_VALUE:
    261                 to.buffer = nullptr;
    262                 to.numberOfUsesLeft = 0;
    263                 break;
    264             default:
    265                 nnAssert(false);
    266                 break;
    267         }
    268     }
    269 
    270     // Adjust the runtime info for the arguments passed to the model,
    271     // modifying the buffer location, and possibly the dimensions.
    272     auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
    273                                   const hidl_vec<RequestArgument>& arguments) {
    274         nnAssert(indexes.size() == arguments.size());
    275         for (size_t i = 0; i < indexes.size(); i++) {
    276             const uint32_t operandIndex = indexes[i];
    277             const RequestArgument& from = arguments[i];
    278             RunTimeOperandInfo& to = mOperands[operandIndex];
    279             if (from.dimensions.size() > 0) {
    280                 // It's the responsibility of the caller to validate that
    281                 // from.dimensions only modifies the dimensions that were
    282                 // unspecified in the model.  That's the case in SampleDriver.cpp
    283                 // with the call to validateRequest().
    284                 // TODO make sure that's the case for the default CPU path.
    285                 to.dimensions = from.dimensions;
    286             }
    287             if (from.hasNoValue) {
    288                 to.lifetime = OperandLifeTime::NO_VALUE;
    289                 nnAssert(to.buffer == nullptr);
    290             } else {
    291                 auto poolIndex = from.location.poolIndex;
    292                 nnAssert(poolIndex < requestPoolInfos.size());
    293                 auto& r = requestPoolInfos[poolIndex];
    294                 to.buffer = r.getBuffer() + from.location.offset;
    295             }
    296         }
    297     };
    298     updateForArguments(mModel->inputIndexes, mRequest->inputs);
    299     updateForArguments(mModel->outputIndexes, mRequest->outputs);
    300 
    301     return true;
    302 }
    303 
    304 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
    305     for (uint32_t i : inputs) {
    306         auto& info = mOperands[i];
    307         // Check if it's a static or model input/output.
    308         if (info.numberOfUsesLeft == 0) {
    309             continue;
    310         }
    311         info.numberOfUsesLeft--;
    312         if (info.numberOfUsesLeft == 0) {
    313             nnAssert(info.buffer != nullptr);
    314             delete[] info.buffer;
    315             info.buffer = nullptr;
    316         }
    317     }
    318 }
    319 
    320 int CpuExecutor::executeOperation(const Operation& operation) {
    321     // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
    322     const hidl_vec<uint32_t>& ins = operation.inputs;
    323     const hidl_vec<uint32_t>& outs = operation.outputs;
    324     bool success = false;
    325 
    326     // Function to verify that the number of input and output parameters
    327     // matches what is expected.  Also checks that all the parameters have
    328     // values. This function is to be used only for operations that do not
    329     // accept optional arguments.
    330     // TODO Have a version that works for optional arguments.
    331     auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
    332                                                                 size_t requiredOuts) -> bool {
    333         auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
    334                           const char* type) -> bool {
    335             size_t actualCount = indexes.size();
    336             if (actualCount != requiredCount) {
    337                 LOG(ERROR) << getOperationName(operation.type)
    338                            << ": Invalid number of " << type << " operands. Got " << actualCount
    339                            << " of " << requiredCount;
    340                 return false;
    341             }
    342             for (size_t i = 0; i < actualCount; i++) {
    343                 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
    344                     LOG(ERROR) << getOperationName(operation.type) << " " << type
    345                                << " operand " << i << " is required but missing.";
    346                     return false;
    347                 }
    348             }
    349             return true;
    350         };
    351         return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
    352     };
    353 
    354     switch (operation.type) {
    355         case OperationType::OEM_OPERATION: {
    356             LOG(ERROR) << "OEM operation not supported for CPU execution";
    357             success = false;
    358         } break;
    359         case OperationType::ADD: {
    360             if (!allParametersPresent(3, 1)) {
    361                 return ANEURALNETWORKS_BAD_DATA;
    362             }
    363             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
    364             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
    365             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
    366 
    367             RunTimeOperandInfo& out = mOperands[outs[0]];
    368             Shape outShape = out.shape();
    369 
    370             if (in1.type == OperandType::TENSOR_FLOAT32) {
    371                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    372                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    373                           addFloat32(reinterpret_cast<const float*>(in1.buffer),
    374                                      in1.shape(),
    375                                      reinterpret_cast<const float*>(in2.buffer),
    376                                      in2.shape(),
    377                                      activation,
    378                                      reinterpret_cast<float*>(out.buffer),
    379                                      outShape);
    380             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
    381                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    382                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    383                           addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
    384                                     in1.shape(),
    385                                     reinterpret_cast<const uint8_t*>(in2.buffer),
    386                                     in2.shape(),
    387                                     activation,
    388                                     reinterpret_cast<uint8_t*>(out.buffer),
    389                                     outShape);
    390             }
    391         } break;
    392         case OperationType::MUL: {
    393             if (!allParametersPresent(3, 1)) {
    394                 return ANEURALNETWORKS_BAD_DATA;
    395             }
    396             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
    397             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
    398             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
    399 
    400             RunTimeOperandInfo& out = mOperands[outs[0]];
    401             Shape outShape = out.shape();
    402 
    403             if (in1.type == OperandType::TENSOR_FLOAT32) {
    404                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    405                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    406                           mulFloat32(reinterpret_cast<const float*>(in1.buffer),
    407                                      in1.shape(),
    408                                      reinterpret_cast<const float*>(in2.buffer),
    409                                      in2.shape(),
    410                                      activation,
    411                                      reinterpret_cast<float*>(out.buffer),
    412                                      outShape);
    413             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
    414                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    415                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    416                           mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
    417                                     in1.shape(),
    418                                     reinterpret_cast<const uint8_t*>(in2.buffer),
    419                                     in2.shape(),
    420                                     activation,
    421                                     reinterpret_cast<uint8_t*>(out.buffer),
    422                                     outShape);
    423             }
    424         } break;
    425         case OperationType::FLOOR: {
    426             if (!allParametersPresent(1, 1)) {
    427                 return ANEURALNETWORKS_BAD_DATA;
    428             }
    429             const RunTimeOperandInfo& input = mOperands[ins[0]];
    430             RunTimeOperandInfo& output = mOperands[outs[0]];
    431             Shape outShape = output.shape();
    432 
    433             if (input.type == OperandType::TENSOR_FLOAT32) {
    434                 success = floorPrepare(input.shape(), &outShape) &&
    435                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    436                           floorFloat32(reinterpret_cast<const float*>(input.buffer),
    437                                        reinterpret_cast<float*>(output.buffer),
    438                                        outShape);
    439             }
    440         } break;
    441         case OperationType::DEQUANTIZE: {
    442             if (!allParametersPresent(1, 1)) {
    443                 return ANEURALNETWORKS_BAD_DATA;
    444             }
    445             const RunTimeOperandInfo& input = mOperands[ins[0]];
    446             RunTimeOperandInfo& output = mOperands[outs[0]];
    447             Shape outShape = output.shape();
    448 
    449             if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    450                 success = dequantizePrepare(input.shape(), &outShape) &&
    451                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    452                           dequantizeQuant8ToFloat32(
    453                                   reinterpret_cast<const uint8_t*>(input.buffer),
    454                                   reinterpret_cast<float*>(output.buffer),
    455                                   input.shape());
    456             }
    457         } break;
    458         case OperationType::DEPTHWISE_CONV_2D: {
    459             const size_t inCount = ins.size();
    460             if ((inCount != 11 && inCount != 8) ||
    461                     !allParametersPresent(inCount, 1)) {
    462                 return ANEURALNETWORKS_BAD_DATA;
    463             }
    464             const RunTimeOperandInfo& input  = mOperands[ins[0]];
    465             const RunTimeOperandInfo& filter = mOperands[ins[1]];
    466             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
    467 
    468             int32_t padding_left, padding_right;
    469             int32_t padding_top, padding_bottom;
    470             int32_t stride_width, stride_height;
    471             int32_t depth_multiplier;
    472             int32_t activation;
    473 
    474             if (inCount == 11) {
    475                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
    476                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
    477                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
    478                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
    479                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    480                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    481                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
    482                 activation       = getScalarData<int32_t>(mOperands[ins[10]]);
    483             } else {
    484                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
    485                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    486                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    487                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
    488                 activation       = getScalarData<int32_t>(mOperands[ins[7]]);
    489 
    490                 Shape inputShape = input.shape();
    491                 Shape filterShape = filter.shape();
    492                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    493                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    494                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
    495                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
    496                 calculateExplicitPadding(input_width, stride_width,
    497                                          filter_width, padding_implicit,
    498                                          &padding_left, &padding_right);
    499                 calculateExplicitPadding(input_height, stride_height,
    500                                          filter_height, padding_implicit,
    501                                          &padding_top, &padding_bottom);
    502             }
    503 
    504             RunTimeOperandInfo& output = mOperands[outs[0]];
    505             Shape outShape = output.shape();
    506 
    507             if (input.type == OperandType::TENSOR_FLOAT32) {
    508                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
    509                                                padding_left, padding_right,
    510                                                padding_top, padding_bottom,
    511                                                stride_width, stride_height,
    512                                                &outShape) &&
    513                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    514                           depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
    515                                                input.shape(),
    516                                                reinterpret_cast<const float*>(filter.buffer),
    517                                                filter.shape(),
    518                                                reinterpret_cast<const float*>(bias.buffer),
    519                                                bias.shape(),
    520                                                padding_left, padding_right,
    521                                                padding_top, padding_bottom,
    522                                                stride_width, stride_height,
    523                                                depth_multiplier, activation,
    524                                                reinterpret_cast<float*>(output.buffer),
    525                                                outShape);
    526             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    527                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
    528                                                padding_left, padding_right,
    529                                                padding_top, padding_bottom,
    530                                                stride_width, stride_height,
    531                                                &outShape) &&
    532                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    533                           depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    534                                               input.shape(),
    535                                               reinterpret_cast<const uint8_t*>(filter.buffer),
    536                                               filter.shape(),
    537                                               reinterpret_cast<const int32_t*>(bias.buffer),
    538                                               bias.shape(),
    539                                               padding_left, padding_right,
    540                                               padding_top, padding_bottom,
    541                                               stride_width, stride_height,
    542                                               depth_multiplier, activation,
    543                                               reinterpret_cast<uint8_t*>(output.buffer),
    544                                               outShape);
    545             }
    546 
    547         } break;
    548         case OperationType::CONV_2D: {
    549             const size_t inCount = ins.size();
    550             if ((inCount != 10 && inCount != 7) ||
    551                     !allParametersPresent(inCount, 1)) {
    552                 return ANEURALNETWORKS_BAD_DATA;
    553             }
    554             const RunTimeOperandInfo& input  = mOperands[ins[0]];
    555             const RunTimeOperandInfo& filter = mOperands[ins[1]];
    556             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
    557 
    558             int32_t padding_left, padding_right;
    559             int32_t padding_top, padding_bottom;
    560             int32_t stride_width, stride_height;
    561             int32_t activation;
    562 
    563             if (inCount == 10) {
    564                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
    565                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
    566                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
    567                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
    568                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    569                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    570                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    571             } else {
    572                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
    573                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    574                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    575                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    576 
    577                 Shape inputShape = input.shape();
    578                 Shape filterShape = filter.shape();
    579                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    580                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    581                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
    582                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
    583                 calculateExplicitPadding(input_width, stride_width,
    584                                          filter_width, padding_implicit,
    585                                          &padding_left, &padding_right);
    586                 calculateExplicitPadding(input_height, stride_height,
    587                                          filter_height, padding_implicit,
    588                                          &padding_top, &padding_bottom);
    589             }
    590 
    591             RunTimeOperandInfo& output = mOperands[outs[0]];
    592             Shape outShape = output.shape();
    593 
    594             if (input.type == OperandType::TENSOR_FLOAT32) {
    595                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
    596                                       padding_left, padding_right,
    597                                       padding_top, padding_bottom,
    598                                       stride_width, stride_height,
    599                                       &outShape) &&
    600                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    601                           convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
    602                                       reinterpret_cast<const float*>(filter.buffer), filter.shape(),
    603                                       reinterpret_cast<const float*>(bias.buffer), bias.shape(),
    604                                       padding_left, padding_right,
    605                                       padding_top, padding_bottom,
    606                                       stride_width, stride_height, activation,
    607                                       reinterpret_cast<float*>(output.buffer), outShape);
    608             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    609                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
    610                                       padding_left, padding_right,
    611                                       padding_top, padding_bottom,
    612                                       stride_width, stride_height,
    613                                       &outShape) &&
    614                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    615                           convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    616                                      input.shape(),
    617                                      reinterpret_cast<const uint8_t*>(filter.buffer),
    618                                      filter.shape(),
    619                                      reinterpret_cast<const int32_t*>(bias.buffer),
    620                                      bias.shape(),
    621                                      padding_left, padding_right,
    622                                      padding_top, padding_bottom,
    623                                      stride_width, stride_height, activation,
    624                                      reinterpret_cast<uint8_t*>(output.buffer),
    625                                      outShape);
    626             }
    627         } break;
    628         case OperationType::AVERAGE_POOL_2D: {
    629             const size_t inCount = ins.size();
    630             if ((inCount != 10 && inCount != 7) ||
    631                     !allParametersPresent(inCount, 1)) {
    632                 return ANEURALNETWORKS_BAD_DATA;
    633             }
    634             const RunTimeOperandInfo& input = mOperands[ins[0]];
    635 
    636             int32_t padding_left, padding_right;
    637             int32_t padding_top, padding_bottom;
    638             int32_t stride_width, stride_height;
    639             int32_t filter_width, filter_height;
    640             int32_t activation;
    641 
    642             if (inCount == 10) {
    643                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
    644                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
    645                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
    646                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
    647                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
    648                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
    649                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    650                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    651                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    652             } else {
    653                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
    654                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
    655                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
    656                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    657                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    658                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    659 
    660                 Shape inputShape = input.shape();
    661                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    662                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    663                 calculateExplicitPadding(input_width, stride_width,
    664                                          filter_width, padding_implicit,
    665                                          &padding_left, &padding_right);
    666                 calculateExplicitPadding(input_height, stride_height,
    667                                          filter_height, padding_implicit,
    668                                          &padding_top, &padding_bottom);
    669             }
    670 
    671             RunTimeOperandInfo& output = mOperands[outs[0]];
    672             Shape outShape = output.shape();
    673 
    674             if (input.type == OperandType::TENSOR_FLOAT32) {
    675                 success = genericPoolingPrepare(input.shape(),
    676                                                 padding_left, padding_right,
    677                                                 padding_top, padding_bottom,
    678                                                 stride_width, stride_height,
    679                                                 filter_width, filter_height,
    680                                                 &outShape) &&
    681                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    682                           averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
    683                                              input.shape(),
    684                                              padding_left, padding_right,
    685                                              padding_top, padding_bottom,
    686                                              stride_width, stride_height,
    687                                              filter_width, filter_height, activation,
    688                                              reinterpret_cast<float*>(output.buffer),
    689                                              outShape);
    690             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    691                 success = genericPoolingPrepare(input.shape(),
    692                                                 padding_left, padding_right,
    693                                                 padding_top, padding_bottom,
    694                                                 stride_width, stride_height,
    695                                                 filter_width, filter_height,
    696                                                 &outShape) &&
    697                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    698                           averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    699                                             input.shape(),
    700                                             padding_left, padding_right,
    701                                             padding_top, padding_bottom,
    702                                             stride_width, stride_height,
    703                                             filter_width, filter_height, activation,
    704                                             reinterpret_cast<uint8_t*>(output.buffer),
    705                                             outShape);
    706             }
    707         } break;
    708         case OperationType::L2_POOL_2D: {
    709             const size_t inCount = ins.size();
    710             if ((inCount != 10 && inCount != 7) ||
    711                     !allParametersPresent(inCount, 1)) {
    712                 return ANEURALNETWORKS_BAD_DATA;
    713             }
    714             const RunTimeOperandInfo& input = mOperands[ins[0]];
    715 
    716             int32_t padding_left, padding_right;
    717             int32_t padding_top, padding_bottom;
    718             int32_t stride_width, stride_height;
    719             int32_t filter_width, filter_height;
    720             int32_t activation;
    721 
    722             if (inCount == 10) {
    723                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
    724                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
    725                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
    726                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
    727                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
    728                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
    729                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    730                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    731                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    732             } else {
    733                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
    734                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
    735                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
    736                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    737                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    738                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    739 
    740                 Shape inputShape = input.shape();
    741                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    742                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    743                 calculateExplicitPadding(input_width, stride_width,
    744                                          filter_width, padding_implicit,
    745                                          &padding_left, &padding_right);
    746                 calculateExplicitPadding(input_height, stride_height,
    747                                          filter_height, padding_implicit,
    748                                          &padding_top, &padding_bottom);
    749             }
    750 
    751             RunTimeOperandInfo& output = mOperands[outs[0]];
    752             Shape outShape = output.shape();
    753 
    754             if (input.type == OperandType::TENSOR_FLOAT32) {
    755                 success = genericPoolingPrepare(input.shape(),
    756                                                 padding_left, padding_right,
    757                                                 padding_top, padding_bottom,
    758                                                 stride_width, stride_height,
    759                                                 filter_width, filter_height,
    760                                                 &outShape) &&
    761                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    762                           l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
    763                                         input.shape(),
    764                                         padding_left, padding_right,
    765                                         padding_top, padding_bottom,
    766                                         stride_width, stride_height,
    767                                         filter_width, filter_height, activation,
    768                                         reinterpret_cast<float*>(output.buffer),
    769                                         outShape);
    770             }
    771         } break;
    772         case OperationType::MAX_POOL_2D: {
    773             const size_t inCount = ins.size();
    774             if ((inCount != 10 && inCount != 7) ||
    775                     !allParametersPresent(inCount, 1)) {
    776                 return ANEURALNETWORKS_BAD_DATA;
    777             }
    778             const RunTimeOperandInfo& input = mOperands[ins[0]];
    779 
    780             int32_t padding_left, padding_right;
    781             int32_t padding_top, padding_bottom;
    782             int32_t stride_width, stride_height;
    783             int32_t filter_width, filter_height;
    784             int32_t activation;
    785 
    786             if (inCount == 10) {
    787                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
    788                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
    789                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
    790                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
    791                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
    792                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
    793                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    794                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    795                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    796             } else {
    797                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
    798                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
    799                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
    800                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    801                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    802                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    803 
    804                 Shape inputShape = input.shape();
    805                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    806                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    807                 calculateExplicitPadding(input_width, stride_width,
    808                                          filter_width, padding_implicit,
    809                                          &padding_left, &padding_right);
    810                 calculateExplicitPadding(input_height, stride_height,
    811                                          filter_height, padding_implicit,
    812                                          &padding_top, &padding_bottom);
    813             }
    814 
    815             RunTimeOperandInfo& output = mOperands[outs[0]];
    816             Shape outShape = output.shape();
    817 
    818             if (input.type == OperandType::TENSOR_FLOAT32) {
    819                 success = genericPoolingPrepare(input.shape(),
    820                                                 padding_left, padding_right,
    821                                                 padding_top, padding_bottom,
    822                                                 stride_width, stride_height,
    823                                                 filter_width, filter_height,
    824                                                 &outShape) &&
    825                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    826                           maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
    827                                          input.shape(),
    828                                          padding_left, padding_right,
    829                                          padding_top, padding_bottom,
    830                                          stride_width, stride_height,
    831                                          filter_width, filter_height, activation,
    832                                          reinterpret_cast<float*>(output.buffer),
    833                                          outShape);
    834             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    835                 success = genericPoolingPrepare(input.shape(),
    836                                                 padding_left, padding_right,
    837                                                 padding_top, padding_bottom,
    838                                                 stride_width, stride_height,
    839                                                 filter_width, filter_height,
    840                                                 &outShape) &&
    841                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    842                           maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    843                                         input.shape(),
    844                                         padding_left, padding_right,
    845                                         padding_top, padding_bottom,
    846                                         stride_width, stride_height,
    847                                         filter_width, filter_height, activation,
    848                                         reinterpret_cast<uint8_t*>(output.buffer),
    849                                         outShape);
    850             }
    851 
    852         } break;
    853         case OperationType::RELU: {
    854             if (!allParametersPresent(1, 1)) {
    855                 return ANEURALNETWORKS_BAD_DATA;
    856             }
    857             const RunTimeOperandInfo& input = mOperands[ins[0]];
    858             RunTimeOperandInfo& output = mOperands[outs[0]];
    859             Shape outShape = output.shape();
    860 
    861             if (input.type == OperandType::TENSOR_FLOAT32) {
    862                 success = genericActivationPrepare(input.shape(), &outShape) &&
    863                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    864                           reluFloat32(reinterpret_cast<const float*>(input.buffer),
    865                                       input.shape(),
    866                                       reinterpret_cast<float*>(output.buffer),
    867                                       outShape);
    868             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    869                 success = genericActivationPrepare(input.shape(), &outShape) &&
    870                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    871                           reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    872                                      input.shape(),
    873                                      reinterpret_cast<uint8_t*>(output.buffer),
    874                                      outShape);
    875             }
    876         } break;
    877         case OperationType::RELU1: {
    878             if (!allParametersPresent(1, 1)) {
    879                 return ANEURALNETWORKS_BAD_DATA;
    880             }
    881             const RunTimeOperandInfo& input = mOperands[ins[0]];
    882             RunTimeOperandInfo& output = mOperands[outs[0]];
    883             Shape outShape = output.shape();
    884 
    885             if (input.type == OperandType::TENSOR_FLOAT32) {
    886                 success = genericActivationPrepare(input.shape(), &outShape) &&
    887                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    888                           relu1Float32(reinterpret_cast<const float*>(input.buffer),
    889                                        input.shape(),
    890                                        reinterpret_cast<float*>(output.buffer),
    891                                        outShape);
    892             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    893                 success = genericActivationPrepare(input.shape(), &outShape) &&
    894                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    895                           relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
    896                                       input.shape(),
    897                                       reinterpret_cast<uint8_t*>(output.buffer),
    898                                       outShape);
    899             }
    900         } break;
    901         case OperationType::RELU6: {
    902             if (!allParametersPresent(1, 1)) {
    903                 return ANEURALNETWORKS_BAD_DATA;
    904             }
    905             const RunTimeOperandInfo& input = mOperands[ins[0]];
    906             RunTimeOperandInfo& output = mOperands[outs[0]];
    907             Shape outShape = output.shape();
    908 
    909             if (input.type == OperandType::TENSOR_FLOAT32) {
    910                 success = genericActivationPrepare(input.shape(), &outShape) &&
    911                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    912                           relu6Float32(reinterpret_cast<const float*>(input.buffer),
    913                                        input.shape(),
    914                                        reinterpret_cast<float*>(output.buffer),
    915                                        outShape);
    916             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    917                 success = genericActivationPrepare(input.shape(), &outShape) &&
    918                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    919                           relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
    920                                       input.shape(),
    921                                       reinterpret_cast<uint8_t*>(output.buffer),
    922                                       outShape);
    923             }
    924         } break;
    925         case OperationType::TANH: {
    926             if (!allParametersPresent(1, 1)) {
    927                 return ANEURALNETWORKS_BAD_DATA;
    928             }
    929             const RunTimeOperandInfo& input = mOperands[ins[0]];
    930             RunTimeOperandInfo& output = mOperands[outs[0]];
    931             Shape outShape = output.shape();
    932 
    933             if (input.type == OperandType::TENSOR_FLOAT32) {
    934                 success = genericActivationPrepare(input.shape(), &outShape) &&
    935                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    936                           tanhFloat32(reinterpret_cast<const float*>(input.buffer),
    937                                       input.shape(),
    938                                       reinterpret_cast<float*>(output.buffer),
    939                                       outShape);
    940             }
    941         } break;
    942         case OperationType::LOGISTIC: {
    943             if (!allParametersPresent(1, 1)) {
    944                 return ANEURALNETWORKS_BAD_DATA;
    945             }
    946             const RunTimeOperandInfo& input = mOperands[ins[0]];
    947             RunTimeOperandInfo& output = mOperands[outs[0]];
    948             Shape outShape = output.shape();
    949 
    950             if (input.type == OperandType::TENSOR_FLOAT32) {
    951                 success = genericActivationPrepare(input.shape(), &outShape) &&
    952                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    953                           logisticFloat32(reinterpret_cast<const float*>(input.buffer),
    954                                           input.shape(),
    955                                           reinterpret_cast<float*>(output.buffer),
    956                                           outShape);
    957             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    958                 success = genericActivationPrepare(input.shape(), &outShape) &&
    959                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    960                           logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    961                                          input.shape(),
    962                                          reinterpret_cast<uint8_t*>(output.buffer),
    963                                          outShape);
    964             }
    965         } break;
    966         case OperationType::SOFTMAX: {
    967             if (!allParametersPresent(2, 1)) {
    968                 return ANEURALNETWORKS_BAD_DATA;
    969             }
    970             RunTimeOperandInfo& input = mOperands[ins[0]];
    971             float beta = getScalarData<float>(mOperands[ins[1]]);
    972             if (beta <= 0.0f) {
    973                 LOG(ERROR) << "beta must be positive for softmax";
    974                 return ANEURALNETWORKS_BAD_DATA;
    975             }
    976 
    977             RunTimeOperandInfo& output = mOperands[outs[0]];
    978             Shape outShape = output.shape();
    979 
    980             if (input.type == OperandType::TENSOR_FLOAT32) {
    981                 success = genericActivationPrepare(input.shape(), &outShape) &&
    982                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    983                           softmaxFloat32(reinterpret_cast<const float*>(input.buffer),
    984                                          input.shape(),
    985                                          beta,
    986                                          reinterpret_cast<float*>(output.buffer),
    987                                          output.shape());
    988             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    989                 success = genericActivationPrepare(input.shape(), &outShape) &&
    990                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    991                           softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    992                                         input.shape(),
    993                                         beta,
    994                                         reinterpret_cast<uint8_t*>(output.buffer),
    995                                         output.shape());
    996             }
    997         } break;
    998         case OperationType::FULLY_CONNECTED: {
    999             if (!allParametersPresent(4, 1)) {
   1000                 return ANEURALNETWORKS_BAD_DATA;
   1001             }
   1002             RunTimeOperandInfo& input   = mOperands[ins[0]];
   1003             RunTimeOperandInfo& weights = mOperands[ins[1]];
   1004             RunTimeOperandInfo& bias    = mOperands[ins[2]];
   1005 
   1006             int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
   1007 
   1008             RunTimeOperandInfo& output = mOperands[outs[0]];
   1009             Shape outShape = output.shape();
   1010 
   1011             if (input.type == OperandType::TENSOR_FLOAT32) {
   1012                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
   1013                                                 &outShape) &&
   1014                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1015                           fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer),
   1016                                                 input.shape(),
   1017                                                 reinterpret_cast<const float*>(weights.buffer),
   1018                                                 weights.shape(),
   1019                                                 reinterpret_cast<const float*>(bias.buffer),
   1020                                                 bias.shape(),
   1021                                                 activation,
   1022                                                 reinterpret_cast<float*>(output.buffer),
   1023                                                 outShape);
   1024             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
   1025                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
   1026                                                 &outShape) &&
   1027                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1028                           fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
   1029                                                input.shape(),
   1030                                                reinterpret_cast<const uint8_t*>(weights.buffer),
   1031                                                weights.shape(),
   1032                                                reinterpret_cast<const int32_t*>(bias.buffer),
   1033                                                bias.shape(),
   1034                                                activation,
   1035                                                reinterpret_cast<uint8_t*>(output.buffer),
   1036                                                outShape);
   1037             }
   1038         } break;
   1039         case OperationType::CONCATENATION: {
   1040             if (outs.size() != 1 || ins.size() < 2) {
   1041                 return ANEURALNETWORKS_BAD_DATA;
   1042             }
   1043             int numInputTensors = ins.size() - 1;
   1044             int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
   1045 
   1046             RunTimeOperandInfo& output = mOperands[outs[0]];
   1047             Shape outShape = output.shape();
   1048 
   1049             const RunTimeOperandInfo& firstInput = mOperands[ins[0]];
   1050             if (firstInput.type == OperandType::TENSOR_FLOAT32) {
   1051                 std::vector<Shape> inputShapes(numInputTensors);
   1052                 std::vector<const float*> inputDataPtrs(numInputTensors);
   1053 
   1054                 for (int i=0; i<numInputTensors; i++) {
   1055                     RunTimeOperandInfo& input = mOperands[ins[i]];
   1056                     inputShapes[i] = input.shape();
   1057                     inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer);
   1058                 }
   1059                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
   1060                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1061                           concatenationFloat32(inputDataPtrs, inputShapes, axis,
   1062                                                reinterpret_cast<float*>(output.buffer), outShape);
   1063             } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) {
   1064                 std::vector<Shape> inputShapes(numInputTensors);
   1065                 std::vector<const uint8_t*> inputDataPtrs(numInputTensors);
   1066 
   1067                 for (int i=0; i<numInputTensors; i++) {
   1068                     RunTimeOperandInfo& input = mOperands[ins[i]];
   1069                     inputShapes[i] = input.shape();
   1070                     inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer);
   1071                 }
   1072                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
   1073                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1074                           concatenationQuant8(inputDataPtrs, inputShapes, axis,
   1075                                               reinterpret_cast<uint8_t*>(output.buffer),
   1076                                               outShape);
   1077             }
   1078         } break;
   1079         case OperationType::L2_NORMALIZATION: {
   1080             if (!allParametersPresent(1, 1)) {
   1081                 return ANEURALNETWORKS_BAD_DATA;
   1082             }
   1083             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1084             RunTimeOperandInfo& output = mOperands[outs[0]];
   1085             Shape outShape = output.shape();
   1086 
   1087             if (input.type == OperandType::TENSOR_FLOAT32) {
   1088                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
   1089                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1090                           l2normFloat32(reinterpret_cast<const float*>(input.buffer),
   1091                                         input.shape(),
   1092                                         reinterpret_cast<float*>(output.buffer),
   1093                                         outShape);
   1094             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
   1095                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
   1096                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1097                           l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
   1098                                        input.shape(),
   1099                                        reinterpret_cast<uint8_t*>(output.buffer),
   1100                                        outShape);
   1101             }
   1102         } break;
   1103         case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
   1104             if (!allParametersPresent(5, 1)) {
   1105                 return ANEURALNETWORKS_BAD_DATA;
   1106             }
   1107             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1108             int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
   1109             float bias = getScalarData<float>(mOperands[ins[2]]);
   1110             float alpha = getScalarData<float>(mOperands[ins[3]]);
   1111             float beta = getScalarData<float>(mOperands[ins[4]]);
   1112 
   1113             RunTimeOperandInfo& output = mOperands[outs[0]];
   1114             Shape outShape = output.shape();
   1115 
   1116             if (input.type == OperandType::TENSOR_FLOAT32) {
   1117                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
   1118                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1119                           localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer),
   1120                                                    input.shape(),
   1121                                                    radius, bias, alpha, beta,
   1122                                                    reinterpret_cast<float*>(output.buffer),
   1123                                                    outShape);
   1124             }
   1125         } break;
   1126         case OperationType::RESHAPE: {
   1127             if (!allParametersPresent(2, 1)) {
   1128                 return ANEURALNETWORKS_BAD_DATA;
   1129             }
   1130             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1131             const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
   1132 
   1133             RunTimeOperandInfo& output = mOperands[outs[0]];
   1134             Shape outShape = output.shape();
   1135 
   1136             success = reshapePrepare(input.shape(),
   1137                                      reinterpret_cast<const int32_t*>(targetShape.buffer),
   1138                                      getNumberOfElements(targetShape.shape()),
   1139                                      &outShape) &&
   1140                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1141                       reshapeGeneric(reinterpret_cast<const void*>(input.buffer),
   1142                                      input.shape(),
   1143                                      reinterpret_cast<void*>(output.buffer),
   1144                                      outShape);
   1145         } break;
   1146         case OperationType::RESIZE_BILINEAR: {
   1147             if (!allParametersPresent(3, 1)) {
   1148                 return ANEURALNETWORKS_BAD_DATA;
   1149             }
   1150             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1151             int32_t width = getScalarData<int32_t>(mOperands[ins[1]]);
   1152             int32_t height = getScalarData<int32_t>(mOperands[ins[2]]);
   1153 
   1154             RunTimeOperandInfo& output = mOperands[outs[0]];
   1155             Shape outShape = output.shape();
   1156 
   1157             if (input.type == OperandType::TENSOR_FLOAT32) {
   1158                 success = resizeBilinearPrepare(input.shape(),
   1159                                                 width, height,
   1160                                                 &outShape) &&
   1161                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1162                           resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer),
   1163                                                 input.shape(),
   1164                                                 reinterpret_cast<float*>(output.buffer),
   1165                                                 outShape);
   1166             }
   1167         } break;
   1168         case OperationType::DEPTH_TO_SPACE: {
   1169             if (!allParametersPresent(2, 1)) {
   1170                 return ANEURALNETWORKS_BAD_DATA;
   1171             }
   1172             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1173             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
   1174 
   1175             RunTimeOperandInfo& output = mOperands[outs[0]];
   1176             Shape outShape = output.shape();
   1177 
   1178             success = depthToSpacePrepare(input.shape(),
   1179                                           blockSize,
   1180                                           &outShape) &&
   1181                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1182                       depthToSpaceGeneric(input.buffer,
   1183                                           input.shape(),
   1184                                           blockSize,
   1185                                           output.buffer,
   1186                                           outShape);
   1187         } break;
   1188         case OperationType::SPACE_TO_DEPTH: {
   1189             if (!allParametersPresent(2, 1)) {
   1190                 return ANEURALNETWORKS_BAD_DATA;
   1191             }
   1192             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1193             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
   1194 
   1195             RunTimeOperandInfo& output = mOperands[outs[0]];
   1196             Shape outShape = output.shape();
   1197 
   1198             success = spaceToDepthPrepare(input.shape(),
   1199                                           blockSize,
   1200                                           &outShape) &&
   1201                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1202                       spaceToDepthGeneric(input.buffer,
   1203                                           input.shape(),
   1204                                           blockSize,
   1205                                           output.buffer,
   1206                                           outShape);
   1207         } break;
   1208         case OperationType::EMBEDDING_LOOKUP: {
   1209             const RunTimeOperandInfo &values =
   1210                 mOperands[ins[EmbeddingLookup::kValueTensor]];
   1211             const RunTimeOperandInfo &lookups =
   1212                 mOperands[ins[EmbeddingLookup::kLookupTensor]];
   1213             RunTimeOperandInfo &output =
   1214                 mOperands[outs[EmbeddingLookup::kOutputTensor]];
   1215 
   1216             Shape outputShape;
   1217             EmbeddingLookup lookup(operation, mOperands);
   1218 
   1219             success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
   1220                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1221                 lookup.Eval();
   1222         } break;
   1223         case OperationType::HASHTABLE_LOOKUP: {
   1224             const RunTimeOperandInfo &lookups =
   1225                 mOperands[ins[HashtableLookup::kLookupTensor]];
   1226             const RunTimeOperandInfo &keys =
   1227                 mOperands[ins[HashtableLookup::kKeyTensor]];
   1228             const RunTimeOperandInfo &values =
   1229                 mOperands[ins[HashtableLookup::kValueTensor]];
   1230 
   1231             RunTimeOperandInfo &output =
   1232                 mOperands[outs[HashtableLookup::kOutputTensor]];
   1233             RunTimeOperandInfo &hits =
   1234                 mOperands[outs[HashtableLookup::kHitsTensor]];
   1235 
   1236             Shape outputShape, hitShape;
   1237             HashtableLookup lookup(operation, mOperands);
   1238 
   1239             success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
   1240                                              &outputShape, &hitShape) &&
   1241                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1242                 setInfoAndAllocateIfNeeded(&hits, hitShape) &&
   1243                 lookup.Eval();
   1244         } break;
   1245         case OperationType::LSH_PROJECTION: {
   1246             RunTimeOperandInfo &output =
   1247                 mOperands[outs[LSHProjection::kOutputTensor]];
   1248 
   1249             Shape outputShape;
   1250             LSHProjection lsh(operation, mOperands);
   1251 
   1252             success = LSHProjection::Prepare(operation, mOperands,
   1253                                              &outputShape) &&
   1254                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1255                 lsh.Eval();
   1256         } break;
   1257         case OperationType::LSTM: {
   1258             RunTimeOperandInfo &scratch =
   1259                 mOperands[outs[LSTMCell::kScratchBufferTensor]];
   1260             RunTimeOperandInfo &outputStateOut =
   1261                 mOperands[outs[LSTMCell::kOutputStateOutTensor]];
   1262             RunTimeOperandInfo &cellStateOut =
   1263                 mOperands[outs[LSTMCell::kCellStateOutTensor]];
   1264             RunTimeOperandInfo &output =
   1265                 mOperands[outs[LSTMCell::kOutputTensor]];
   1266 
   1267             Shape scratchShape, outputStateShape, cellStateShape, outputShape;
   1268             LSTMCell lstm_cell(operation, mOperands);
   1269 
   1270             success = LSTMCell::Prepare(operation, mOperands,
   1271                                         &scratchShape, &outputStateShape,
   1272                                         &cellStateShape, &outputShape) &&
   1273                 setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
   1274                 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
   1275                 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
   1276                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1277                 lstm_cell.Eval();
   1278         } break;
   1279         case OperationType::RNN: {
   1280             RunTimeOperandInfo &hiddenStateOut =
   1281                 mOperands[outs[RNN::kHiddenStateOutTensor]];
   1282             RunTimeOperandInfo &output =
   1283                 mOperands[outs[RNN::kOutputTensor]];
   1284 
   1285             Shape hiddenStateShape, outputShape;
   1286             RNN rnn_cell(operation, mOperands);
   1287 
   1288             success = RNN::Prepare(operation, mOperands,
   1289                                    &hiddenStateShape, &outputShape) &&
   1290                 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) &&
   1291                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1292                 rnn_cell.Eval();
   1293         } break;
   1294         case OperationType::SVDF: {
   1295             RunTimeOperandInfo &stateOut =
   1296                 mOperands[outs[SVDF::kStateOutTensor]];
   1297             RunTimeOperandInfo &output =
   1298                 mOperands[outs[SVDF::kOutputTensor]];
   1299 
   1300             Shape stateShape, outputShape;
   1301             SVDF svdf(operation, mOperands);
   1302 
   1303             success = SVDF::Prepare(operation, mOperands,
   1304                                     &stateShape, &outputShape) &&
   1305                 setInfoAndAllocateIfNeeded(&stateOut, stateShape) &&
   1306                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1307                 svdf.Eval();
   1308         } break;
   1309         case OperationType::BATCH_TO_SPACE_ND: {
   1310             if (!allParametersPresent(2, 1)) {
   1311                 return ANEURALNETWORKS_BAD_DATA;
   1312             }
   1313             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1314             const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
   1315 
   1316             RunTimeOperandInfo& output = mOperands[outs[0]];
   1317             Shape outShape = output.shape();
   1318 
   1319             success = batchToSpacePrepare(input.shape(),
   1320                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
   1321                                           blockSize.shape(),
   1322                                           &outShape) &&
   1323                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1324                       batchToSpaceGeneric(input.buffer,
   1325                                           input.shape(),
   1326                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
   1327                                           output.buffer,
   1328                                           outShape);
   1329         } break;
   1330         case OperationType::SPACE_TO_BATCH_ND: {
   1331             if (!allParametersPresent(3, 1)) {
   1332                 return ANEURALNETWORKS_BAD_DATA;
   1333             }
   1334             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1335             const RunTimeOperandInfo& blockSize = mOperands[ins[1]];
   1336             const RunTimeOperandInfo& paddings = mOperands[ins[2]];
   1337 
   1338             RunTimeOperandInfo& output = mOperands[outs[0]];
   1339             Shape outShape = output.shape();
   1340 
   1341             success = spaceToBatchPrepare(input.shape(),
   1342                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
   1343                                           blockSize.shape(),
   1344                                           reinterpret_cast<const int32_t*>(paddings.buffer),
   1345                                           paddings.shape(),
   1346                                           &outShape) &&
   1347                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1348                       spaceToBatchGeneric(input.buffer,
   1349                                           input.shape(),
   1350                                           reinterpret_cast<const int32_t*>(blockSize.buffer),
   1351                                           reinterpret_cast<const int32_t*>(paddings.buffer),
   1352                                           paddings.shape(),
   1353                                           output.buffer,
   1354                                           outShape);
   1355         } break;
   1356         case OperationType::PAD: {
   1357             if (!allParametersPresent(2, 1)) {
   1358                 return ANEURALNETWORKS_BAD_DATA;
   1359             }
   1360             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1361             const RunTimeOperandInfo& paddings = mOperands[ins[1]];
   1362 
   1363             RunTimeOperandInfo& output = mOperands[outs[0]];
   1364             Shape outShape = output.shape();
   1365 
   1366             success = padPrepare(input.shape(),
   1367                                  reinterpret_cast<const int32_t*>(paddings.buffer),
   1368                                  paddings.shape(),
   1369                                  &outShape) &&
   1370                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1371                       padGeneric(input.buffer,
   1372                                  input.shape(),
   1373                                  reinterpret_cast<const int32_t*>(paddings.buffer),
   1374                                  output.buffer,
   1375                                  outShape);
   1376         } break;
   1377         case OperationType::SQUEEZE: {
   1378             if (!allParametersPresent(2, 1)) {
   1379                 return ANEURALNETWORKS_BAD_DATA;
   1380             }
   1381             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1382             const RunTimeOperandInfo& squeezeDims = mOperands[ins[1]];
   1383 
   1384             RunTimeOperandInfo& output = mOperands[outs[0]];
   1385             Shape outShape = output.shape();
   1386 
   1387             success = squeezePrepare(input.shape(),
   1388                                      reinterpret_cast<const int32_t*>(squeezeDims.buffer),
   1389                                      squeezeDims.shape(),
   1390                                      &outShape) &&
   1391                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1392                       squeezeGeneric(input.buffer,
   1393                                      input.shape(),
   1394                                      output.buffer,
   1395                                      outShape);
   1396         } break;
   1397         case OperationType::TRANSPOSE: {
   1398             if (!allParametersPresent(2, 1)) {
   1399                 return ANEURALNETWORKS_BAD_DATA;
   1400             }
   1401             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1402             const RunTimeOperandInfo& perms = mOperands[ins[1]];
   1403 
   1404             RunTimeOperandInfo& output = mOperands[outs[0]];
   1405             Shape outShape = output.shape();
   1406 
   1407             success = transposePrepare(input.shape(),
   1408                                        reinterpret_cast<const int32_t*>(perms.buffer),
   1409                                        perms.shape(),
   1410                                        &outShape) &&
   1411                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1412                       transposeGeneric(input.buffer,
   1413                                        input.shape(),
   1414                                        reinterpret_cast<const int32_t*>(perms.buffer),
   1415                                        perms.shape(),
   1416                                        output.buffer,
   1417                                        outShape);
   1418         } break;
   1419         case OperationType::STRIDED_SLICE: {
   1420             if (!allParametersPresent(7, 1)) {
   1421                 return ANEURALNETWORKS_BAD_DATA;
   1422             }
   1423             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1424             const RunTimeOperandInfo& begins = mOperands[ins[1]];
   1425             const RunTimeOperandInfo& ends = mOperands[ins[2]];
   1426             const RunTimeOperandInfo& strides = mOperands[ins[3]];
   1427             int32_t beginMask = getScalarData<int32_t>(mOperands[ins[4]]);
   1428             int32_t endMask = getScalarData<int32_t>(mOperands[ins[5]]);
   1429             int32_t shrinkAxisMask = getScalarData<int32_t>(mOperands[ins[6]]);
   1430 
   1431             RunTimeOperandInfo& output = mOperands[outs[0]];
   1432             Shape outShape = output.shape();
   1433 
   1434             success = stridedSlicePrepare(input.shape(),
   1435                                           reinterpret_cast<const int32_t*>(begins.buffer),
   1436                                           begins.shape(),
   1437                                           reinterpret_cast<const int32_t*>(ends.buffer),
   1438                                           ends.shape(),
   1439                                           reinterpret_cast<const int32_t*>(strides.buffer),
   1440                                           strides.shape(),
   1441                                           beginMask, endMask, shrinkAxisMask,
   1442                                           &outShape) &&
   1443                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1444                       stridedSliceGeneric(input.buffer,
   1445                                           input.shape(),
   1446                                           reinterpret_cast<const int32_t*>(begins.buffer),
   1447                                           reinterpret_cast<const int32_t*>(ends.buffer),
   1448                                           reinterpret_cast<const int32_t*>(strides.buffer),
   1449                                           beginMask, endMask, shrinkAxisMask,
   1450                                           output.buffer,
   1451                                           outShape);
   1452         } break;
   1453         case OperationType::DIV: {
   1454             if (!allParametersPresent(3, 1)) {
   1455                 return ANEURALNETWORKS_BAD_DATA;
   1456             }
   1457             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
   1458             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
   1459             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
   1460 
   1461             RunTimeOperandInfo& out = mOperands[outs[0]];
   1462             Shape outShape = out.shape();
   1463 
   1464             if (in1.type == OperandType::TENSOR_FLOAT32) {
   1465                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
   1466                           setInfoAndAllocateIfNeeded(&out, outShape) &&
   1467                           divFloat32(reinterpret_cast<const float*>(in1.buffer),
   1468                                      in1.shape(),
   1469                                      reinterpret_cast<const float*>(in2.buffer),
   1470                                      in2.shape(),
   1471                                      activation,
   1472                                      reinterpret_cast<float*>(out.buffer),
   1473                                      outShape);
   1474             }
   1475         } break;
   1476         case OperationType::SUB: {
   1477             if (!allParametersPresent(3, 1)) {
   1478                 return ANEURALNETWORKS_BAD_DATA;
   1479             }
   1480             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
   1481             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
   1482             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
   1483 
   1484             RunTimeOperandInfo& out = mOperands[outs[0]];
   1485             Shape outShape = out.shape();
   1486 
   1487             if (in1.type == OperandType::TENSOR_FLOAT32) {
   1488                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
   1489                           setInfoAndAllocateIfNeeded(&out, outShape) &&
   1490                           subFloat32(reinterpret_cast<const float*>(in1.buffer),
   1491                                      in1.shape(),
   1492                                      reinterpret_cast<const float*>(in2.buffer),
   1493                                      in2.shape(),
   1494                                      activation,
   1495                                      reinterpret_cast<float*>(out.buffer),
   1496                                      outShape);
   1497             }
   1498         } break;
   1499         case OperationType::MEAN: {
   1500             if (!allParametersPresent(3, 1)) {
   1501                 return ANEURALNETWORKS_BAD_DATA;
   1502             }
   1503             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1504             const RunTimeOperandInfo& axis = mOperands[ins[1]];
   1505             int32_t keepDims = getScalarData<int32_t>(mOperands[ins[2]]);
   1506 
   1507             RunTimeOperandInfo& output = mOperands[outs[0]];
   1508             Shape outShape = output.shape();
   1509 
   1510             success = meanPrepare(input.shape(),
   1511                                   reinterpret_cast<const int32_t*>(axis.buffer),
   1512                                   axis.shape(),
   1513                                   keepDims > 0,
   1514                                   &outShape) &&
   1515                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1516                       meanGeneric(input.buffer,
   1517                                   input.shape(),
   1518                                   reinterpret_cast<const int32_t*>(axis.buffer),
   1519                                   axis.shape(),
   1520                                   keepDims > 0,
   1521                                   output.buffer,
   1522                                   outShape);
   1523         } break;
   1524         default:
   1525             nnAssert(false);
   1526             break;
   1527     }
   1528     if (!success) {
   1529         LOG(ERROR) << getOperationName(operation.type) << " failed.";
   1530         return ANEURALNETWORKS_OP_FAILED;
   1531     }
   1532 
   1533     freeNoLongerUsedOperands(ins);
   1534     return ANEURALNETWORKS_NO_ERROR;
   1535 }
   1536 
   1537 ScopedOpenmpSettings::ScopedOpenmpSettings() {
   1538     mBlocktimeInitial = kmp_get_blocktime();
   1539     kmp_set_blocktime(20);  // ms, see b/109645291
   1540 
   1541 #if NNAPI_LIMIT_CPU_THREADS
   1542     // Code not yet enabled. Choosing the number of threads to be based on
   1543     // benchmarking. See longer comment by the class declaration.
   1544     mMaxThreadsInitial = Eigen::nbThreads();
   1545     const int nProcs = omp_get_num_procs();
   1546     int threads = nProcs;
   1547     if (nProcs >= 8) {
   1548         threads = nProcs - 4;
   1549     } else if (nProcs >= 4) {
   1550         threads = nProcs - 2;
   1551     }
   1552     Eigen::setNbThreads(threads);
   1553 #endif
   1554 }
   1555 
   1556 ScopedOpenmpSettings::~ScopedOpenmpSettings() {
   1557     kmp_set_blocktime(mBlocktimeInitial);
   1558 #if NNAPI_LIMIT_CPU_THREADS
   1559     Eigen::setNbThreads(mMaxThreadsInitial);
   1560 #endif
   1561 }
   1562 
   1563 
   1564 } // namespace nn
   1565 } // namespace android
   1566