Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright (C) 2017 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #define LOG_TAG "CpuExecutor"
     18 
     19 #include "CpuExecutor.h"
     20 
     21 #include "NeuralNetworks.h"
     22 #include "Operations.h"
     23 
     24 #include <sys/mman.h>
     25 
     26 namespace android {
     27 namespace nn {
     28 
     29 // TODO: short term, make share memory mapping and updating a utility function.
     30 // TODO: long term, implement mmap_fd as a hidl IMemory service.
     31 bool RunTimePoolInfo::set(const hidl_memory& hidlMemory) {
     32     this->hidlMemory = hidlMemory;
     33     auto memType = hidlMemory.name();
     34     if (memType == "ashmem") {
     35         memory = mapMemory(hidlMemory);
     36         if (memory == nullptr) {
     37             LOG(ERROR) << "Can't map shared memory.";
     38             return false;
     39         }
     40         memory->update();
     41         buffer = reinterpret_cast<uint8_t*>(static_cast<void*>(memory->getPointer()));
     42         if (buffer == nullptr) {
     43             LOG(ERROR) << "Can't access shared memory.";
     44             return false;
     45         }
     46         return true;
     47     } else if (memType == "mmap_fd") {
     48         size_t size = hidlMemory.size();
     49         int fd = hidlMemory.handle()->data[0];
     50         int prot = hidlMemory.handle()->data[1];
     51         size_t offset = getSizeFromInts(hidlMemory.handle()->data[2],
     52                                         hidlMemory.handle()->data[3]);
     53         buffer = static_cast<uint8_t*>(mmap(nullptr, size, prot, MAP_SHARED, fd, offset));
     54         if (buffer == MAP_FAILED) {
     55             LOG(ERROR) << "Can't mmap the file descriptor.";
     56             return false;
     57         }
     58         return true;
     59     } else {
     60         LOG(ERROR) << "unsupported hidl_memory type";
     61         return false;
     62     }
     63 }
     64 
     65 // Making sure the output data are correctly updated after execution.
     66 bool RunTimePoolInfo::update() {
     67     auto memType = hidlMemory.name();
     68     if (memType == "ashmem") {
     69         memory->commit();
     70         return true;
     71     } else if (memType == "mmap_fd") {
     72         int prot = hidlMemory.handle()->data[1];
     73         if (prot & PROT_WRITE) {
     74             size_t size = hidlMemory.size();
     75             return msync(buffer, size, MS_SYNC) == 0;
     76         }
     77     }
     78     // No-op for other types of memory.
     79     return true;
     80 }
     81 
     82 bool setRunTimePoolInfosFromHidlMemories(std::vector<RunTimePoolInfo>* poolInfos,
     83                                          const hidl_vec<hidl_memory>& pools) {
     84     poolInfos->resize(pools.size());
     85     for (size_t i = 0; i < pools.size(); i++) {
     86         auto& poolInfo = (*poolInfos)[i];
     87         if (!poolInfo.set(pools[i])) {
     88             LOG(ERROR) << "Could not map pool";
     89             return false;
     90         }
     91     }
     92     return true;
     93 }
     94 
     95 // Updates the RunTimeOperandInfo with the newly calculated shape.
     96 // Allocate the buffer if we need to.
     97 static bool setInfoAndAllocateIfNeeded(RunTimeOperandInfo* info, const Shape& shape) {
     98     // For user-provided model output operands, the parameters must match the Shape
     99     // calculated from the preparation step.
    100     if (info->lifetime == OperandLifeTime::MODEL_OUTPUT) {
    101         if (info->type != shape.type ||
    102             info->dimensions != shape.dimensions) {
    103             LOG(ERROR) << "Invalid type or dimensions for model output";
    104             return false;
    105         }
    106         if (info->type == OperandType::TENSOR_QUANT8_ASYMM &&
    107             (info->scale != shape.scale || info->zeroPoint != shape.offset)) {
    108             LOG(ERROR) << "Invalid scale or zeroPoint for model output";
    109             return false;
    110         }
    111     }
    112     info->type = shape.type;
    113     info->dimensions = shape.dimensions;
    114     info->scale = shape.scale;
    115     info->zeroPoint = shape.offset;
    116     if (info->lifetime == OperandLifeTime::TEMPORARY_VARIABLE && info->buffer == nullptr) {
    117         uint32_t length = sizeOfData(info->type, info->dimensions);
    118         info->buffer = new uint8_t[length];
    119         if (info->buffer == nullptr) {
    120             return false;
    121         }
    122     }
    123     return true;
    124 }
    125 
    126 // Ignore the .pools entry in model and request.  This will have been taken care of
    127 // by the caller.
    128 int CpuExecutor::run(const Model& model, const Request& request,
    129                      const std::vector<RunTimePoolInfo>& modelPoolInfos,
    130                      const std::vector<RunTimePoolInfo>& requestPoolInfos) {
    131     VLOG(CPUEXE) << "CpuExecutor::run()";
    132     // VLOG(CPUEXE) << "model: " << toString(model);
    133     VLOG(CPUEXE) << "request: " << toString(request);
    134 
    135     mModel = &model;
    136     mRequest = &request; // TODO check if mRequest is needed
    137     initializeRunTimeInfo(modelPoolInfos, requestPoolInfos);
    138     // The model has serialized the operation in execution order.
    139     for (const auto& operation : model.operations) {
    140         int n = executeOperation(operation);
    141         if (n != ANEURALNETWORKS_NO_ERROR) {
    142             return n;
    143         }
    144     }
    145     for (auto runtimeInfo : modelPoolInfos) {
    146         runtimeInfo.update();
    147     }
    148     for (auto runtimeInfo : requestPoolInfos) {
    149         runtimeInfo.update();
    150     }
    151     mModel = nullptr;
    152     mRequest = nullptr;
    153     VLOG(CPUEXE) << "Completed run normally";
    154     return ANEURALNETWORKS_NO_ERROR;
    155 }
    156 
    157 bool CpuExecutor::initializeRunTimeInfo(const std::vector<RunTimePoolInfo>& modelPoolInfos,
    158                                         const std::vector<RunTimePoolInfo>& requestPoolInfos) {
    159     VLOG(CPUEXE) << "CpuExecutor::initializeRunTimeInfo";
    160     const size_t count = mModel->operands.size();
    161     mOperands.resize(count);
    162 
    163     // Start by setting the runtime info to what's in the model.
    164     for (size_t i = 0; i < count; i++) {
    165         const Operand& from = mModel->operands[i];
    166         RunTimeOperandInfo& to = mOperands[i];
    167         to.type = from.type;
    168         to.dimensions = from.dimensions;
    169         to.scale = from.scale;
    170         to.zeroPoint = from.zeroPoint;
    171         to.length = from.location.length;
    172         to.lifetime = from.lifetime;
    173         switch (from.lifetime) {
    174             case OperandLifeTime::TEMPORARY_VARIABLE:
    175                 to.buffer = nullptr;
    176                 to.numberOfUsesLeft = from.numberOfConsumers;
    177                 break;
    178             case OperandLifeTime::CONSTANT_COPY:
    179                 to.buffer = const_cast<uint8_t*>(&mModel->operandValues[from.location.offset]);
    180                 to.numberOfUsesLeft = 0;
    181                 break;
    182             case OperandLifeTime::CONSTANT_REFERENCE: {
    183                 auto poolIndex = from.location.poolIndex;
    184                 nnAssert(poolIndex < modelPoolInfos.size());
    185                 auto& r = modelPoolInfos[poolIndex];
    186                 to.buffer = r.buffer + from.location.offset;
    187                 to.numberOfUsesLeft = 0;
    188                 break;
    189             }
    190             case OperandLifeTime::MODEL_INPUT:
    191             case OperandLifeTime::MODEL_OUTPUT:
    192             case OperandLifeTime::NO_VALUE:
    193                 to.buffer = nullptr;
    194                 to.numberOfUsesLeft = 0;
    195                 break;
    196             default:
    197                 nnAssert(false);
    198                 break;
    199         }
    200     }
    201 
    202     // Adjust the runtime info for the arguments passed to the model,
    203     // modifying the buffer location, and possibly the dimensions.
    204     auto updateForArguments = [this, &requestPoolInfos](const std::vector<uint32_t>& indexes,
    205                                   const hidl_vec<RequestArgument>& arguments) {
    206         nnAssert(indexes.size() == arguments.size());
    207         for (size_t i = 0; i < indexes.size(); i++) {
    208             const uint32_t operandIndex = indexes[i];
    209             const RequestArgument& from = arguments[i];
    210             RunTimeOperandInfo& to = mOperands[operandIndex];
    211             if (from.dimensions.size() > 0) {
    212                 // It's the responsibility of the caller to validate that
    213                 // from.dimensions only modifies the dimensions that were
    214                 // unspecified in the model.  That's the case in SampleDriver.cpp
    215                 // with the call to validateRequest().
    216                 // TODO make sure that's the case for the default CPU path.
    217                 to.dimensions = from.dimensions;
    218             }
    219             if (from.hasNoValue) {
    220                 to.lifetime = OperandLifeTime::NO_VALUE;
    221                 nnAssert(to.buffer == nullptr);
    222             } else {
    223                 auto poolIndex = from.location.poolIndex;
    224                 nnAssert(poolIndex < requestPoolInfos.size());
    225                 auto& r = requestPoolInfos[poolIndex];
    226                 to.buffer = r.buffer + from.location.offset;
    227             }
    228         }
    229     };
    230     updateForArguments(mModel->inputIndexes, mRequest->inputs);
    231     updateForArguments(mModel->outputIndexes, mRequest->outputs);
    232 
    233     return true;
    234 }
    235 
    236 void CpuExecutor::freeNoLongerUsedOperands(const std::vector<uint32_t>& inputs) {
    237     for (uint32_t i : inputs) {
    238         auto& info = mOperands[i];
    239         // Check if it's a static or model input/output.
    240         if (info.numberOfUsesLeft == 0) {
    241             continue;
    242         }
    243         info.numberOfUsesLeft--;
    244         if (info.numberOfUsesLeft == 0) {
    245             nnAssert(info.buffer != nullptr);
    246             delete[] info.buffer;
    247             info.buffer = nullptr;
    248         }
    249     }
    250 }
    251 
    252 int CpuExecutor::executeOperation(const Operation& operation) {
    253     // VLOG(CPUEXE) << "CpuExecutor::executeOperation(" << toString(operation) << ")";
    254     const hidl_vec<uint32_t>& ins = operation.inputs;
    255     const hidl_vec<uint32_t>& outs = operation.outputs;
    256     bool success = false;
    257 
    258     // Function to verify that the number of input and output parameters
    259     // matches what is expected.  Also checks that all the parameters have
    260     // values. This function is to be used only for operations that do not
    261     // accept optional arguments.
    262     // TODO Have a version that works for optional arguments.
    263     auto allParametersPresent = [&operation, &ins, &outs, this](size_t requiredIns,
    264                                                                 size_t requiredOuts) -> bool {
    265         auto verify = [&operation, this](size_t requiredCount, const hidl_vec<uint32_t>& indexes,
    266                           const char* type) -> bool {
    267             size_t actualCount = indexes.size();
    268             if (actualCount != requiredCount) {
    269                 LOG(ERROR) << getOperationName(operation.type)
    270                            << ": Invalid number of " << type << " operands. Got " << actualCount
    271                            << " of " << requiredCount;
    272                 return false;
    273             }
    274             for (size_t i = 0; i < actualCount; i++) {
    275                 if (mOperands[indexes[i]].lifetime == OperandLifeTime::NO_VALUE) {
    276                     LOG(ERROR) << getOperationName(operation.type) << " " << type
    277                                << " operand " << i << " is required but missing.";
    278                     return false;
    279                 }
    280             }
    281             return true;
    282         };
    283         return verify(requiredIns, ins, "in") && verify(requiredOuts, outs, "out");
    284     };
    285 
    286     switch (operation.type) {
    287         case OperationType::OEM_OPERATION: {
    288             LOG(ERROR) << "OEM operation not supported for CPU execution";
    289             success = false;
    290         } break;
    291         case OperationType::ADD: {
    292             if (!allParametersPresent(3, 1)) {
    293                 return ANEURALNETWORKS_BAD_DATA;
    294             }
    295             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
    296             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
    297             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
    298 
    299             RunTimeOperandInfo& out = mOperands[outs[0]];
    300             Shape outShape = out.shape();
    301 
    302             if (in1.type == OperandType::TENSOR_FLOAT32) {
    303                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    304                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    305                           addFloat32(reinterpret_cast<const float*>(in1.buffer),
    306                                      in1.shape(),
    307                                      reinterpret_cast<const float*>(in2.buffer),
    308                                      in2.shape(),
    309                                      activation,
    310                                      reinterpret_cast<float*>(out.buffer),
    311                                      outShape);
    312             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
    313                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    314                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    315                           addQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
    316                                     in1.shape(),
    317                                     reinterpret_cast<const uint8_t*>(in2.buffer),
    318                                     in2.shape(),
    319                                     activation,
    320                                     reinterpret_cast<uint8_t*>(out.buffer),
    321                                     outShape);
    322             }
    323         } break;
    324         case OperationType::MUL: {
    325             if (!allParametersPresent(3, 1)) {
    326                 return ANEURALNETWORKS_BAD_DATA;
    327             }
    328             const RunTimeOperandInfo& in1 = mOperands[ins[0]];
    329             const RunTimeOperandInfo& in2 = mOperands[ins[1]];
    330             int32_t activation = getScalarData<int32_t>(mOperands[ins[2]]);
    331 
    332             RunTimeOperandInfo& out = mOperands[outs[0]];
    333             Shape outShape = out.shape();
    334 
    335             if (in1.type == OperandType::TENSOR_FLOAT32) {
    336                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    337                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    338                           mulFloat32(reinterpret_cast<const float*>(in1.buffer),
    339                                      in1.shape(),
    340                                      reinterpret_cast<const float*>(in2.buffer),
    341                                      in2.shape(),
    342                                      activation,
    343                                      reinterpret_cast<float*>(out.buffer),
    344                                      outShape);
    345             } else if (in1.type == OperandType::TENSOR_QUANT8_ASYMM) {
    346                 success = addMulPrepare(in1.shape(), in2.shape(), &outShape) &&
    347                           setInfoAndAllocateIfNeeded(&out, outShape) &&
    348                           mulQuant8(reinterpret_cast<const uint8_t*>(in1.buffer),
    349                                     in1.shape(),
    350                                     reinterpret_cast<const uint8_t*>(in2.buffer),
    351                                     in2.shape(),
    352                                     activation,
    353                                     reinterpret_cast<uint8_t*>(out.buffer),
    354                                     outShape);
    355             }
    356         } break;
    357         case OperationType::FLOOR: {
    358             if (!allParametersPresent(1, 1)) {
    359                 return ANEURALNETWORKS_BAD_DATA;
    360             }
    361             const RunTimeOperandInfo& input = mOperands[ins[0]];
    362             RunTimeOperandInfo& output = mOperands[outs[0]];
    363             Shape outShape = output.shape();
    364 
    365             if (input.type == OperandType::TENSOR_FLOAT32) {
    366                 success = floorPrepare(input.shape(), &outShape) &&
    367                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    368                           floorFloat32(reinterpret_cast<const float*>(input.buffer),
    369                                        reinterpret_cast<float*>(output.buffer),
    370                                        outShape);
    371             }
    372         } break;
    373         case OperationType::DEQUANTIZE: {
    374             if (!allParametersPresent(1, 1)) {
    375                 return ANEURALNETWORKS_BAD_DATA;
    376             }
    377             const RunTimeOperandInfo& input = mOperands[ins[0]];
    378             RunTimeOperandInfo& output = mOperands[outs[0]];
    379             Shape outShape = output.shape();
    380 
    381             if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    382                 success = dequantizePrepare(input.shape(), &outShape) &&
    383                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    384                           dequantizeQuant8ToFloat32(
    385                                   reinterpret_cast<const uint8_t*>(input.buffer),
    386                                   reinterpret_cast<float*>(output.buffer),
    387                                   input.shape());
    388             }
    389         } break;
    390         case OperationType::DEPTHWISE_CONV_2D: {
    391             const size_t inCount = ins.size();
    392             if ((inCount != 11 && inCount != 8) ||
    393                     !allParametersPresent(inCount, 1)) {
    394                 return ANEURALNETWORKS_BAD_DATA;
    395             }
    396             const RunTimeOperandInfo& input  = mOperands[ins[0]];
    397             const RunTimeOperandInfo& filter = mOperands[ins[1]];
    398             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
    399 
    400             int32_t padding_left, padding_right;
    401             int32_t padding_top, padding_bottom;
    402             int32_t stride_width, stride_height;
    403             int32_t depth_multiplier;
    404             int32_t activation;
    405 
    406             if (inCount == 11) {
    407                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
    408                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
    409                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
    410                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
    411                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    412                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    413                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[9]]);
    414                 activation       = getScalarData<int32_t>(mOperands[ins[10]]);
    415             } else {
    416                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
    417                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    418                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    419                 depth_multiplier = getScalarData<int32_t>(mOperands[ins[6]]);
    420                 activation       = getScalarData<int32_t>(mOperands[ins[7]]);
    421 
    422                 Shape inputShape = input.shape();
    423                 Shape filterShape = filter.shape();
    424                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    425                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    426                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
    427                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
    428                 calculateExplicitPadding(input_width, stride_width,
    429                                          filter_width, padding_implicit,
    430                                          &padding_left, &padding_right);
    431                 calculateExplicitPadding(input_height, stride_height,
    432                                          filter_height, padding_implicit,
    433                                          &padding_top, &padding_bottom);
    434             }
    435 
    436             RunTimeOperandInfo& output = mOperands[outs[0]];
    437             Shape outShape = output.shape();
    438 
    439             if (input.type == OperandType::TENSOR_FLOAT32) {
    440                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
    441                                                padding_left, padding_right,
    442                                                padding_top, padding_bottom,
    443                                                stride_width, stride_height,
    444                                                &outShape) &&
    445                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    446                           depthwiseConvFloat32(reinterpret_cast<const float*>(input.buffer),
    447                                                input.shape(),
    448                                                reinterpret_cast<const float*>(filter.buffer),
    449                                                filter.shape(),
    450                                                reinterpret_cast<const float*>(bias.buffer),
    451                                                bias.shape(),
    452                                                padding_left, padding_right,
    453                                                padding_top, padding_bottom,
    454                                                stride_width, stride_height,
    455                                                depth_multiplier, activation,
    456                                                reinterpret_cast<float*>(output.buffer),
    457                                                outShape);
    458             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    459                 success = depthwiseConvPrepare(input.shape(), filter.shape(), bias.shape(),
    460                                                padding_left, padding_right,
    461                                                padding_top, padding_bottom,
    462                                                stride_width, stride_height,
    463                                                &outShape) &&
    464                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    465                           depthwiseConvQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    466                                               input.shape(),
    467                                               reinterpret_cast<const uint8_t*>(filter.buffer),
    468                                               filter.shape(),
    469                                               reinterpret_cast<const int32_t*>(bias.buffer),
    470                                               bias.shape(),
    471                                               padding_left, padding_right,
    472                                               padding_top, padding_bottom,
    473                                               stride_width, stride_height,
    474                                               depth_multiplier, activation,
    475                                               reinterpret_cast<uint8_t*>(output.buffer),
    476                                               outShape);
    477             }
    478 
    479         } break;
    480         case OperationType::CONV_2D: {
    481             const size_t inCount = ins.size();
    482             if ((inCount != 10 && inCount != 7) ||
    483                     !allParametersPresent(inCount, 1)) {
    484                 return ANEURALNETWORKS_BAD_DATA;
    485             }
    486             const RunTimeOperandInfo& input  = mOperands[ins[0]];
    487             const RunTimeOperandInfo& filter = mOperands[ins[1]];
    488             const RunTimeOperandInfo& bias   = mOperands[ins[2]];
    489 
    490             int32_t padding_left, padding_right;
    491             int32_t padding_top, padding_bottom;
    492             int32_t stride_width, stride_height;
    493             int32_t activation;
    494 
    495             if (inCount == 10) {
    496                 padding_left     = getScalarData<int32_t>(mOperands[ins[3]]);
    497                 padding_right    = getScalarData<int32_t>(mOperands[ins[4]]);
    498                 padding_top      = getScalarData<int32_t>(mOperands[ins[5]]);
    499                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[6]]);
    500                 stride_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    501                 stride_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    502                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    503             } else {
    504                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[3]]);
    505                 stride_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    506                 stride_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    507                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    508 
    509                 Shape inputShape = input.shape();
    510                 Shape filterShape = filter.shape();
    511                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    512                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    513                 int32_t filter_width  = getSizeOfDimension(filterShape, 2);
    514                 int32_t filter_height = getSizeOfDimension(filterShape, 1);
    515                 calculateExplicitPadding(input_width, stride_width,
    516                                          filter_width, padding_implicit,
    517                                          &padding_left, &padding_right);
    518                 calculateExplicitPadding(input_height, stride_height,
    519                                          filter_height, padding_implicit,
    520                                          &padding_top, &padding_bottom);
    521             }
    522 
    523             RunTimeOperandInfo& output = mOperands[outs[0]];
    524             Shape outShape = output.shape();
    525 
    526             if (input.type == OperandType::TENSOR_FLOAT32) {
    527                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
    528                                       padding_left, padding_right,
    529                                       padding_top, padding_bottom,
    530                                       stride_width, stride_height,
    531                                       &outShape) &&
    532                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    533                           convFloat32(reinterpret_cast<const float*>(input.buffer), input.shape(),
    534                                       reinterpret_cast<const float*>(filter.buffer), filter.shape(),
    535                                       reinterpret_cast<const float*>(bias.buffer), bias.shape(),
    536                                       padding_left, padding_right,
    537                                       padding_top, padding_bottom,
    538                                       stride_width, stride_height, activation,
    539                                       reinterpret_cast<float*>(output.buffer), outShape);
    540             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    541                 success = convPrepare(input.shape(), filter.shape(), bias.shape(),
    542                                       padding_left, padding_right,
    543                                       padding_top, padding_bottom,
    544                                       stride_width, stride_height,
    545                                       &outShape) &&
    546                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    547                           convQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    548                                      input.shape(),
    549                                      reinterpret_cast<const uint8_t*>(filter.buffer),
    550                                      filter.shape(),
    551                                      reinterpret_cast<const int32_t*>(bias.buffer),
    552                                      bias.shape(),
    553                                      padding_left, padding_right,
    554                                      padding_top, padding_bottom,
    555                                      stride_width, stride_height, activation,
    556                                      reinterpret_cast<uint8_t*>(output.buffer),
    557                                      outShape);
    558             }
    559         } break;
    560         case OperationType::AVERAGE_POOL_2D: {
    561             const size_t inCount = ins.size();
    562             if ((inCount != 10 && inCount != 7) ||
    563                     !allParametersPresent(inCount, 1)) {
    564                 return ANEURALNETWORKS_BAD_DATA;
    565             }
    566             const RunTimeOperandInfo& input = mOperands[ins[0]];
    567 
    568             int32_t padding_left, padding_right;
    569             int32_t padding_top, padding_bottom;
    570             int32_t stride_width, stride_height;
    571             int32_t filter_width, filter_height;
    572             int32_t activation;
    573 
    574             if (inCount == 10) {
    575                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
    576                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
    577                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
    578                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
    579                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
    580                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
    581                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    582                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    583                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    584             } else {
    585                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
    586                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
    587                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
    588                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    589                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    590                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    591 
    592                 Shape inputShape = input.shape();
    593                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    594                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    595                 calculateExplicitPadding(input_width, stride_width,
    596                                          filter_width, padding_implicit,
    597                                          &padding_left, &padding_right);
    598                 calculateExplicitPadding(input_height, stride_height,
    599                                          filter_height, padding_implicit,
    600                                          &padding_top, &padding_bottom);
    601             }
    602 
    603             RunTimeOperandInfo& output = mOperands[outs[0]];
    604             Shape outShape = output.shape();
    605 
    606             if (input.type == OperandType::TENSOR_FLOAT32) {
    607                 success = genericPoolingPrepare(input.shape(),
    608                                                 padding_left, padding_right,
    609                                                 padding_top, padding_bottom,
    610                                                 stride_width, stride_height,
    611                                                 filter_width, filter_height,
    612                                                 &outShape) &&
    613                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    614                           averagePoolFloat32(reinterpret_cast<const float*>(input.buffer),
    615                                              input.shape(),
    616                                              padding_left, padding_right,
    617                                              padding_top, padding_bottom,
    618                                              stride_width, stride_height,
    619                                              filter_width, filter_height, activation,
    620                                              reinterpret_cast<float*>(output.buffer),
    621                                              outShape);
    622             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    623                 success = genericPoolingPrepare(input.shape(),
    624                                                 padding_left, padding_right,
    625                                                 padding_top, padding_bottom,
    626                                                 stride_width, stride_height,
    627                                                 filter_width, filter_height,
    628                                                 &outShape) &&
    629                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    630                           averagePoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    631                                             input.shape(),
    632                                             padding_left, padding_right,
    633                                             padding_top, padding_bottom,
    634                                             stride_width, stride_height,
    635                                             filter_width, filter_height, activation,
    636                                             reinterpret_cast<uint8_t*>(output.buffer),
    637                                             outShape);
    638             }
    639         } break;
    640         case OperationType::L2_POOL_2D: {
    641             const size_t inCount = ins.size();
    642             if ((inCount != 10 && inCount != 7) ||
    643                     !allParametersPresent(inCount, 1)) {
    644                 return ANEURALNETWORKS_BAD_DATA;
    645             }
    646             const RunTimeOperandInfo& input = mOperands[ins[0]];
    647 
    648             int32_t padding_left, padding_right;
    649             int32_t padding_top, padding_bottom;
    650             int32_t stride_width, stride_height;
    651             int32_t filter_width, filter_height;
    652             int32_t activation;
    653 
    654             if (inCount == 10) {
    655                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
    656                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
    657                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
    658                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
    659                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
    660                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
    661                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    662                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    663                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    664             } else {
    665                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
    666                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
    667                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
    668                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    669                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    670                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    671 
    672                 Shape inputShape = input.shape();
    673                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    674                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    675                 calculateExplicitPadding(input_width, stride_width,
    676                                          filter_width, padding_implicit,
    677                                          &padding_left, &padding_right);
    678                 calculateExplicitPadding(input_height, stride_height,
    679                                          filter_height, padding_implicit,
    680                                          &padding_top, &padding_bottom);
    681             }
    682 
    683             RunTimeOperandInfo& output = mOperands[outs[0]];
    684             Shape outShape = output.shape();
    685 
    686             if (input.type == OperandType::TENSOR_FLOAT32) {
    687                 success = genericPoolingPrepare(input.shape(),
    688                                                 padding_left, padding_right,
    689                                                 padding_top, padding_bottom,
    690                                                 stride_width, stride_height,
    691                                                 filter_width, filter_height,
    692                                                 &outShape) &&
    693                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    694                           l2PoolFloat32(reinterpret_cast<const float*>(input.buffer),
    695                                         input.shape(),
    696                                         padding_left, padding_right,
    697                                         padding_top, padding_bottom,
    698                                         stride_width, stride_height,
    699                                         filter_width, filter_height, activation,
    700                                         reinterpret_cast<float*>(output.buffer),
    701                                         outShape);
    702             }
    703         } break;
    704         case OperationType::MAX_POOL_2D: {
    705             const size_t inCount = ins.size();
    706             if ((inCount != 10 && inCount != 7) ||
    707                     !allParametersPresent(inCount, 1)) {
    708                 return ANEURALNETWORKS_BAD_DATA;
    709             }
    710             const RunTimeOperandInfo& input = mOperands[ins[0]];
    711 
    712             int32_t padding_left, padding_right;
    713             int32_t padding_top, padding_bottom;
    714             int32_t stride_width, stride_height;
    715             int32_t filter_width, filter_height;
    716             int32_t activation;
    717 
    718             if (inCount == 10) {
    719                 padding_left     = getScalarData<int32_t>(mOperands[ins[1]]);
    720                 padding_right    = getScalarData<int32_t>(mOperands[ins[2]]);
    721                 padding_top      = getScalarData<int32_t>(mOperands[ins[3]]);
    722                 padding_bottom   = getScalarData<int32_t>(mOperands[ins[4]]);
    723                 stride_width     = getScalarData<int32_t>(mOperands[ins[5]]);
    724                 stride_height    = getScalarData<int32_t>(mOperands[ins[6]]);
    725                 filter_width     = getScalarData<int32_t>(mOperands[ins[7]]);
    726                 filter_height    = getScalarData<int32_t>(mOperands[ins[8]]);
    727                 activation       = getScalarData<int32_t>(mOperands[ins[9]]);
    728             } else {
    729                 int32_t padding_implicit = getScalarData<int32_t>(mOperands[ins[1]]);
    730                 stride_width     = getScalarData<int32_t>(mOperands[ins[2]]);
    731                 stride_height    = getScalarData<int32_t>(mOperands[ins[3]]);
    732                 filter_width     = getScalarData<int32_t>(mOperands[ins[4]]);
    733                 filter_height    = getScalarData<int32_t>(mOperands[ins[5]]);
    734                 activation       = getScalarData<int32_t>(mOperands[ins[6]]);
    735 
    736                 Shape inputShape = input.shape();
    737                 int32_t input_width  = getSizeOfDimension(inputShape, 2);
    738                 int32_t input_height = getSizeOfDimension(inputShape, 1);
    739                 calculateExplicitPadding(input_width, stride_width,
    740                                          filter_width, padding_implicit,
    741                                          &padding_left, &padding_right);
    742                 calculateExplicitPadding(input_height, stride_height,
    743                                          filter_height, padding_implicit,
    744                                          &padding_top, &padding_bottom);
    745             }
    746 
    747             RunTimeOperandInfo& output = mOperands[outs[0]];
    748             Shape outShape = output.shape();
    749 
    750             if (input.type == OperandType::TENSOR_FLOAT32) {
    751                 success = genericPoolingPrepare(input.shape(),
    752                                                 padding_left, padding_right,
    753                                                 padding_top, padding_bottom,
    754                                                 stride_width, stride_height,
    755                                                 filter_width, filter_height,
    756                                                 &outShape) &&
    757                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    758                           maxPoolFloat32(reinterpret_cast<const float*>(input.buffer),
    759                                          input.shape(),
    760                                          padding_left, padding_right,
    761                                          padding_top, padding_bottom,
    762                                          stride_width, stride_height,
    763                                          filter_width, filter_height, activation,
    764                                          reinterpret_cast<float*>(output.buffer),
    765                                          outShape);
    766             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    767                 success = genericPoolingPrepare(input.shape(),
    768                                                 padding_left, padding_right,
    769                                                 padding_top, padding_bottom,
    770                                                 stride_width, stride_height,
    771                                                 filter_width, filter_height,
    772                                                 &outShape) &&
    773                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    774                           maxPoolQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    775                                         input.shape(),
    776                                         padding_left, padding_right,
    777                                         padding_top, padding_bottom,
    778                                         stride_width, stride_height,
    779                                         filter_width, filter_height, activation,
    780                                         reinterpret_cast<uint8_t*>(output.buffer),
    781                                         outShape);
    782             }
    783 
    784         } break;
    785         case OperationType::RELU: {
    786             if (!allParametersPresent(1, 1)) {
    787                 return ANEURALNETWORKS_BAD_DATA;
    788             }
    789             const RunTimeOperandInfo& input = mOperands[ins[0]];
    790             RunTimeOperandInfo& output = mOperands[outs[0]];
    791             Shape outShape = output.shape();
    792 
    793             if (input.type == OperandType::TENSOR_FLOAT32) {
    794                 success = genericActivationPrepare(input.shape(), &outShape) &&
    795                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    796                           reluFloat32(reinterpret_cast<const float*>(input.buffer),
    797                                       input.shape(),
    798                                       reinterpret_cast<float*>(output.buffer),
    799                                       outShape);
    800             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    801                 success = genericActivationPrepare(input.shape(), &outShape) &&
    802                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    803                           reluQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    804                                      input.shape(),
    805                                      reinterpret_cast<uint8_t*>(output.buffer),
    806                                      outShape);
    807             }
    808         } break;
    809         case OperationType::RELU1: {
    810             if (!allParametersPresent(1, 1)) {
    811                 return ANEURALNETWORKS_BAD_DATA;
    812             }
    813             const RunTimeOperandInfo& input = mOperands[ins[0]];
    814             RunTimeOperandInfo& output = mOperands[outs[0]];
    815             Shape outShape = output.shape();
    816 
    817             if (input.type == OperandType::TENSOR_FLOAT32) {
    818                 success = genericActivationPrepare(input.shape(), &outShape) &&
    819                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    820                           relu1Float32(reinterpret_cast<const float*>(input.buffer),
    821                                        input.shape(),
    822                                        reinterpret_cast<float*>(output.buffer),
    823                                        outShape);
    824             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    825                 success = genericActivationPrepare(input.shape(), &outShape) &&
    826                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    827                           relu1Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
    828                                       input.shape(),
    829                                       reinterpret_cast<uint8_t*>(output.buffer),
    830                                       outShape);
    831             }
    832         } break;
    833         case OperationType::RELU6: {
    834             if (!allParametersPresent(1, 1)) {
    835                 return ANEURALNETWORKS_BAD_DATA;
    836             }
    837             const RunTimeOperandInfo& input = mOperands[ins[0]];
    838             RunTimeOperandInfo& output = mOperands[outs[0]];
    839             Shape outShape = output.shape();
    840 
    841             if (input.type == OperandType::TENSOR_FLOAT32) {
    842                 success = genericActivationPrepare(input.shape(), &outShape) &&
    843                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    844                           relu6Float32(reinterpret_cast<const float*>(input.buffer),
    845                                        input.shape(),
    846                                        reinterpret_cast<float*>(output.buffer),
    847                                        outShape);
    848             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    849                 success = genericActivationPrepare(input.shape(), &outShape) &&
    850                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    851                           relu6Quant8(reinterpret_cast<const uint8_t*>(input.buffer),
    852                                       input.shape(),
    853                                       reinterpret_cast<uint8_t*>(output.buffer),
    854                                       outShape);
    855             }
    856         } break;
    857         case OperationType::TANH: {
    858             if (!allParametersPresent(1, 1)) {
    859                 return ANEURALNETWORKS_BAD_DATA;
    860             }
    861             const RunTimeOperandInfo& input = mOperands[ins[0]];
    862             RunTimeOperandInfo& output = mOperands[outs[0]];
    863             Shape outShape = output.shape();
    864 
    865             if (input.type == OperandType::TENSOR_FLOAT32) {
    866                 success = genericActivationPrepare(input.shape(), &outShape) &&
    867                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    868                           tanhFloat32(reinterpret_cast<const float*>(input.buffer),
    869                                       input.shape(),
    870                                       reinterpret_cast<float*>(output.buffer),
    871                                       outShape);
    872             }
    873         } break;
    874         case OperationType::LOGISTIC: {
    875             if (!allParametersPresent(1, 1)) {
    876                 return ANEURALNETWORKS_BAD_DATA;
    877             }
    878             const RunTimeOperandInfo& input = mOperands[ins[0]];
    879             RunTimeOperandInfo& output = mOperands[outs[0]];
    880             Shape outShape = output.shape();
    881 
    882             if (input.type == OperandType::TENSOR_FLOAT32) {
    883                 success = genericActivationPrepare(input.shape(), &outShape) &&
    884                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    885                           logisticFloat32(reinterpret_cast<const float*>(input.buffer),
    886                                           input.shape(),
    887                                           reinterpret_cast<float*>(output.buffer),
    888                                           outShape);
    889             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    890                 success = genericActivationPrepare(input.shape(), &outShape) &&
    891                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    892                           logisticQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    893                                          input.shape(),
    894                                          reinterpret_cast<uint8_t*>(output.buffer),
    895                                          outShape);
    896             }
    897         } break;
    898         case OperationType::SOFTMAX: {
    899             if (!allParametersPresent(2, 1)) {
    900                 return ANEURALNETWORKS_BAD_DATA;
    901             }
    902             RunTimeOperandInfo& input = mOperands[ins[0]];
    903             float beta = getScalarData<float>(mOperands[ins[1]]);
    904             if (beta <= 0.0f) {
    905                 LOG(ERROR) << "beta must be positive for softmax";
    906                 return ANEURALNETWORKS_BAD_DATA;
    907             }
    908 
    909             RunTimeOperandInfo& output = mOperands[outs[0]];
    910             Shape outShape = output.shape();
    911 
    912             if (input.type == OperandType::TENSOR_FLOAT32) {
    913                 success = genericActivationPrepare(input.shape(), &outShape) &&
    914                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    915                           softmaxFloat32(reinterpret_cast<const float*>(input.buffer),
    916                                          input.shape(),
    917                                          beta,
    918                                          reinterpret_cast<float*>(output.buffer),
    919                                          output.shape());
    920             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    921                 success = genericActivationPrepare(input.shape(), &outShape) &&
    922                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    923                           softmaxQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    924                                         input.shape(),
    925                                         beta,
    926                                         reinterpret_cast<uint8_t*>(output.buffer),
    927                                         output.shape());
    928             }
    929         } break;
    930         case OperationType::FULLY_CONNECTED: {
    931             if (!allParametersPresent(4, 1)) {
    932                 return ANEURALNETWORKS_BAD_DATA;
    933             }
    934             RunTimeOperandInfo& input   = mOperands[ins[0]];
    935             RunTimeOperandInfo& weights = mOperands[ins[1]];
    936             RunTimeOperandInfo& bias    = mOperands[ins[2]];
    937 
    938             int32_t activation = getScalarData<int32_t>(mOperands[ins[3]]);
    939 
    940             RunTimeOperandInfo& output = mOperands[outs[0]];
    941             Shape outShape = output.shape();
    942 
    943             if (input.type == OperandType::TENSOR_FLOAT32) {
    944                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
    945                                                 &outShape) &&
    946                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    947                           fullyConnectedFloat32(reinterpret_cast<const float*>(input.buffer),
    948                                                 input.shape(),
    949                                                 reinterpret_cast<const float*>(weights.buffer),
    950                                                 weights.shape(),
    951                                                 reinterpret_cast<const float*>(bias.buffer),
    952                                                 bias.shape(),
    953                                                 activation,
    954                                                 reinterpret_cast<float*>(output.buffer),
    955                                                 outShape);
    956             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
    957                 success = fullyConnectedPrepare(input.shape(), weights.shape(), bias.shape(),
    958                                                 &outShape) &&
    959                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    960                           fullyConnectedQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
    961                                                input.shape(),
    962                                                reinterpret_cast<const uint8_t*>(weights.buffer),
    963                                                weights.shape(),
    964                                                reinterpret_cast<const int32_t*>(bias.buffer),
    965                                                bias.shape(),
    966                                                activation,
    967                                                reinterpret_cast<uint8_t*>(output.buffer),
    968                                                outShape);
    969             }
    970         } break;
    971         case OperationType::CONCATENATION: {
    972             if (outs.size() != 1 || ins.size() < 2) {
    973                 return ANEURALNETWORKS_BAD_DATA;
    974             }
    975             int numInputTensors = ins.size() - 1;
    976             int32_t axis = getScalarData<int32_t>(mOperands[ins[numInputTensors]]);
    977 
    978             RunTimeOperandInfo& output = mOperands[outs[0]];
    979             Shape outShape = output.shape();
    980 
    981             const RunTimeOperandInfo& firstInput = mOperands[ins[0]];
    982             if (firstInput.type == OperandType::TENSOR_FLOAT32) {
    983                 std::vector<Shape> inputShapes(numInputTensors);
    984                 std::vector<const float*> inputDataPtrs(numInputTensors);
    985 
    986                 for (int i=0; i<numInputTensors; i++) {
    987                     RunTimeOperandInfo& input = mOperands[ins[i]];
    988                     inputShapes[i] = input.shape();
    989                     inputDataPtrs[i] = reinterpret_cast<const float*>(input.buffer);
    990                 }
    991                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
    992                           setInfoAndAllocateIfNeeded(&output, outShape) &&
    993                           concatenationFloat32(inputDataPtrs, inputShapes, axis,
    994                                                reinterpret_cast<float*>(output.buffer), outShape);
    995             } else if (firstInput.type == OperandType::TENSOR_QUANT8_ASYMM) {
    996                 std::vector<Shape> inputShapes(numInputTensors);
    997                 std::vector<const uint8_t*> inputDataPtrs(numInputTensors);
    998 
    999                 for (int i=0; i<numInputTensors; i++) {
   1000                     RunTimeOperandInfo& input = mOperands[ins[i]];
   1001                     inputShapes[i] = input.shape();
   1002                     inputDataPtrs[i] = reinterpret_cast<const uint8_t*>(input.buffer);
   1003                 }
   1004                 success = concatenationPrepare(inputShapes, axis, &outShape) &&
   1005                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1006                           concatenationQuant8(inputDataPtrs, inputShapes, axis,
   1007                                               reinterpret_cast<uint8_t*>(output.buffer),
   1008                                               outShape);
   1009             }
   1010         } break;
   1011         case OperationType::L2_NORMALIZATION: {
   1012             if (!allParametersPresent(1, 1)) {
   1013                 return ANEURALNETWORKS_BAD_DATA;
   1014             }
   1015             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1016             RunTimeOperandInfo& output = mOperands[outs[0]];
   1017             Shape outShape = output.shape();
   1018 
   1019             if (input.type == OperandType::TENSOR_FLOAT32) {
   1020                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
   1021                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1022                           l2normFloat32(reinterpret_cast<const float*>(input.buffer),
   1023                                         input.shape(),
   1024                                         reinterpret_cast<float*>(output.buffer),
   1025                                         outShape);
   1026             } else if (input.type == OperandType::TENSOR_QUANT8_ASYMM) {
   1027                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
   1028                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1029                           l2normQuant8(reinterpret_cast<const uint8_t*>(input.buffer),
   1030                                        input.shape(),
   1031                                        reinterpret_cast<uint8_t*>(output.buffer),
   1032                                        outShape);
   1033             }
   1034         } break;
   1035         case OperationType::LOCAL_RESPONSE_NORMALIZATION: {
   1036             if (!allParametersPresent(5, 1)) {
   1037                 return ANEURALNETWORKS_BAD_DATA;
   1038             }
   1039             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1040             int32_t radius = getScalarData<int32_t>(mOperands[ins[1]]);
   1041             float bias = getScalarData<float>(mOperands[ins[2]]);
   1042             float alpha = getScalarData<float>(mOperands[ins[3]]);
   1043             float beta = getScalarData<float>(mOperands[ins[4]]);
   1044 
   1045             RunTimeOperandInfo& output = mOperands[outs[0]];
   1046             Shape outShape = output.shape();
   1047 
   1048             if (input.type == OperandType::TENSOR_FLOAT32) {
   1049                 success = genericNormalizationPrepare(input.shape(), &outShape) &&
   1050                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1051                           localResponseNormFloat32(reinterpret_cast<const float*>(input.buffer),
   1052                                                    input.shape(),
   1053                                                    radius, bias, alpha, beta,
   1054                                                    reinterpret_cast<float*>(output.buffer),
   1055                                                    outShape);
   1056             }
   1057         } break;
   1058         case OperationType::RESHAPE: {
   1059             if (!allParametersPresent(2, 1)) {
   1060                 return ANEURALNETWORKS_BAD_DATA;
   1061             }
   1062             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1063             const RunTimeOperandInfo& targetShape = mOperands[ins[1]];
   1064 
   1065             RunTimeOperandInfo& output = mOperands[outs[0]];
   1066             Shape outShape = output.shape();
   1067 
   1068             success = reshapePrepare(input.shape(),
   1069                                      reinterpret_cast<const int32_t*>(targetShape.buffer),
   1070                                      getNumberOfElements(targetShape.shape()),
   1071                                      &outShape) &&
   1072                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1073                       reshapeGeneric(reinterpret_cast<const void*>(input.buffer),
   1074                                      input.shape(),
   1075                                      reinterpret_cast<void*>(output.buffer),
   1076                                      outShape);
   1077         } break;
   1078         case OperationType::RESIZE_BILINEAR: {
   1079             if (!allParametersPresent(3, 1)) {
   1080                 return ANEURALNETWORKS_BAD_DATA;
   1081             }
   1082             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1083             int32_t width = getScalarData<int32_t>(mOperands[ins[1]]);
   1084             int32_t height = getScalarData<int32_t>(mOperands[ins[2]]);
   1085 
   1086             RunTimeOperandInfo& output = mOperands[outs[0]];
   1087             Shape outShape = output.shape();
   1088 
   1089             if (input.type == OperandType::TENSOR_FLOAT32) {
   1090                 success = resizeBilinearPrepare(input.shape(),
   1091                                                 width, height,
   1092                                                 &outShape) &&
   1093                           setInfoAndAllocateIfNeeded(&output, outShape) &&
   1094                           resizeBilinearFloat32(reinterpret_cast<const float*>(input.buffer),
   1095                                                 input.shape(),
   1096                                                 reinterpret_cast<float*>(output.buffer),
   1097                                                 outShape);
   1098             }
   1099         } break;
   1100         case OperationType::DEPTH_TO_SPACE: {
   1101             if (!allParametersPresent(2, 1)) {
   1102                 return ANEURALNETWORKS_BAD_DATA;
   1103             }
   1104             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1105             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
   1106 
   1107             RunTimeOperandInfo& output = mOperands[outs[0]];
   1108             Shape outShape = output.shape();
   1109 
   1110             success = depthToSpacePrepare(input.shape(),
   1111                                           blockSize,
   1112                                           &outShape) &&
   1113                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1114                       depthToSpaceGeneric(input.buffer,
   1115                                           input.shape(),
   1116                                           blockSize,
   1117                                           output.buffer,
   1118                                           outShape);
   1119         } break;
   1120         case OperationType::SPACE_TO_DEPTH: {
   1121             if (!allParametersPresent(2, 1)) {
   1122                 return ANEURALNETWORKS_BAD_DATA;
   1123             }
   1124             const RunTimeOperandInfo& input = mOperands[ins[0]];
   1125             int32_t blockSize = getScalarData<int32_t>(mOperands[ins[1]]);
   1126 
   1127             RunTimeOperandInfo& output = mOperands[outs[0]];
   1128             Shape outShape = output.shape();
   1129 
   1130             success = spaceToDepthPrepare(input.shape(),
   1131                                           blockSize,
   1132                                           &outShape) &&
   1133                       setInfoAndAllocateIfNeeded(&output, outShape) &&
   1134                       spaceToDepthGeneric(input.buffer,
   1135                                           input.shape(),
   1136                                           blockSize,
   1137                                           output.buffer,
   1138                                           outShape);
   1139         } break;
   1140         case OperationType::EMBEDDING_LOOKUP: {
   1141             const RunTimeOperandInfo &values =
   1142                 mOperands[ins[EmbeddingLookup::kValueTensor]];
   1143             const RunTimeOperandInfo &lookups =
   1144                 mOperands[ins[EmbeddingLookup::kLookupTensor]];
   1145             RunTimeOperandInfo &output =
   1146                 mOperands[outs[EmbeddingLookup::kOutputTensor]];
   1147 
   1148             Shape outputShape;
   1149             EmbeddingLookup lookup(operation, mOperands);
   1150 
   1151             success = embeddingLookupPrepare(values.shape(), lookups.shape(), &outputShape) &&
   1152                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1153                 lookup.Eval();
   1154         } break;
   1155         case OperationType::HASHTABLE_LOOKUP: {
   1156             const RunTimeOperandInfo &lookups =
   1157                 mOperands[ins[HashtableLookup::kLookupTensor]];
   1158             const RunTimeOperandInfo &keys =
   1159                 mOperands[ins[HashtableLookup::kKeyTensor]];
   1160             const RunTimeOperandInfo &values =
   1161                 mOperands[ins[HashtableLookup::kValueTensor]];
   1162 
   1163             RunTimeOperandInfo &output =
   1164                 mOperands[outs[HashtableLookup::kOutputTensor]];
   1165             RunTimeOperandInfo &hits =
   1166                 mOperands[outs[HashtableLookup::kHitsTensor]];
   1167 
   1168             Shape outputShape, hitShape;
   1169             HashtableLookup lookup(operation, mOperands);
   1170 
   1171             success = hashtableLookupPrepare(lookups.shape(), keys.shape(), values.shape(),
   1172                                              &outputShape, &hitShape) &&
   1173                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1174                 setInfoAndAllocateIfNeeded(&hits, hitShape) &&
   1175                 lookup.Eval();
   1176         } break;
   1177         case OperationType::LSH_PROJECTION: {
   1178             RunTimeOperandInfo &output =
   1179                 mOperands[outs[LSHProjection::kOutputTensor]];
   1180 
   1181             Shape outputShape;
   1182             LSHProjection lsh(operation, mOperands);
   1183 
   1184             success = LSHProjection::Prepare(operation, mOperands,
   1185                                              &outputShape) &&
   1186                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1187                 lsh.Eval();
   1188         } break;
   1189         case OperationType::LSTM: {
   1190             RunTimeOperandInfo &scratch =
   1191                 mOperands[outs[LSTMCell::kScratchBufferTensor]];
   1192             RunTimeOperandInfo &outputStateOut =
   1193                 mOperands[outs[LSTMCell::kOutputStateOutTensor]];
   1194             RunTimeOperandInfo &cellStateOut =
   1195                 mOperands[outs[LSTMCell::kCellStateOutTensor]];
   1196             RunTimeOperandInfo &output =
   1197                 mOperands[outs[LSTMCell::kOutputTensor]];
   1198 
   1199             Shape scratchShape, outputStateShape, cellStateShape, outputShape;
   1200             LSTMCell lstm_cell(operation, mOperands);
   1201 
   1202             success = LSTMCell::Prepare(operation, mOperands,
   1203                                         &scratchShape, &outputStateShape,
   1204                                         &cellStateShape, &outputShape) &&
   1205                 setInfoAndAllocateIfNeeded(&scratch, scratchShape) &&
   1206                 setInfoAndAllocateIfNeeded(&outputStateOut, outputStateShape) &&
   1207                 setInfoAndAllocateIfNeeded(&cellStateOut, cellStateShape) &&
   1208                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1209                 lstm_cell.Eval();
   1210         } break;
   1211         case OperationType::RNN: {
   1212             RunTimeOperandInfo &hiddenStateOut =
   1213                 mOperands[outs[RNN::kHiddenStateOutTensor]];
   1214             RunTimeOperandInfo &output =
   1215                 mOperands[outs[RNN::kOutputTensor]];
   1216 
   1217             Shape hiddenStateShape, outputShape;
   1218             RNN rnn_cell(operation, mOperands);
   1219 
   1220             success = RNN::Prepare(operation, mOperands,
   1221                                    &hiddenStateShape, &outputShape) &&
   1222                 setInfoAndAllocateIfNeeded(&hiddenStateOut, hiddenStateShape) &&
   1223                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1224                 rnn_cell.Eval();
   1225         } break;
   1226         case OperationType::SVDF: {
   1227             RunTimeOperandInfo &stateOut =
   1228                 mOperands[outs[SVDF::kStateOutTensor]];
   1229             RunTimeOperandInfo &output =
   1230                 mOperands[outs[SVDF::kOutputTensor]];
   1231 
   1232             Shape stateShape, outputShape;
   1233             SVDF svdf(operation, mOperands);
   1234 
   1235             success = SVDF::Prepare(operation, mOperands,
   1236                                     &stateShape, &outputShape) &&
   1237                 setInfoAndAllocateIfNeeded(&stateOut, stateShape) &&
   1238                 setInfoAndAllocateIfNeeded(&output, outputShape) &&
   1239                 svdf.Eval();
   1240         } break;
   1241         default:
   1242             nnAssert(false);
   1243             break;
   1244     }
   1245     if (!success) {
   1246         LOG(ERROR) << getOperationName(operation.type) << " failed.";
   1247         return ANEURALNETWORKS_OP_FAILED;
   1248     }
   1249 
   1250     freeNoLongerUsedOperands(ins);
   1251     return ANEURALNETWORKS_NO_ERROR;
   1252 }
   1253 
   1254 } // namespace nn
   1255 } // namespace android
   1256