Home | History | Annotate | Download | only in operations
      1 /*
      2  * Copyright (C) 2018 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include "CpuOperationUtils.h"
     18 #include "OperationResolver.h"
     19 #include "OperationsUtils.h"
     20 
     21 #include <cfloat>
     22 #include <cmath>
     23 #include <numeric>
     24 
     25 #include "Tracing.h"
     26 
     27 namespace android {
     28 namespace nn {
     29 namespace bbox_ops {
     30 
     31 namespace {
     32 
     33 struct BoxEncodingCorner {
     34     float x1, y1, x2, y2;
     35 };
     36 struct BoxEncodingCenter {
     37     float w, h, x, y;
     38 };
     39 BoxEncodingCorner toBoxEncodingCorner(const BoxEncodingCenter& ctr) {
     40     return {.x1 = ctr.x - ctr.w / 2,
     41             .y1 = ctr.y - ctr.h / 2,
     42             .x2 = ctr.x + ctr.w / 2,
     43             .y2 = ctr.y + ctr.h / 2};
     44 }
     45 BoxEncodingCenter toBoxEncodingCenter(const BoxEncodingCorner& cnr) {
     46     return {.w = cnr.x2 - cnr.x1,
     47             .h = cnr.y2 - cnr.y1,
     48             .x = (cnr.x1 + cnr.x2) / 2,
     49             .y = (cnr.y1 + cnr.y2) / 2};
     50 }
     51 
     52 inline bool bboxTransformFloat32(const float* roiData, const Shape& roiShape,
     53                                  const float* bboxDeltasData, const Shape& bboxDeltasShape,
     54                                  const int32_t* batchesData, const Shape& batchesShape,
     55                                  const float* imageInfoData, const Shape& imageInfoDataShape,
     56                                  float* outputData, const Shape& outputShape) {
     57     const uint32_t roiLength = 4;
     58     const uint32_t imageLength = 2;
     59 
     60     uint32_t numClasses = getSizeOfDimension(bboxDeltasShape, 1) / roiLength;
     61     uint32_t numBatches = getSizeOfDimension(imageInfoDataShape, 0);
     62 
     63     const float* roiDataEnd = roiData + getNumberOfElements(roiShape);
     64     const float* deltas = bboxDeltasData;
     65     float* outPtr = outputData;
     66     uint32_t roiIndex = 0;
     67     for (const float* roiBase = roiData; roiBase < roiDataEnd; roiBase += roiLength, roiIndex++) {
     68         uint32_t batchIndex = batchesData[roiIndex];
     69         // Check for malformed data
     70         // 1. Invalid batch id
     71         // 2. Invalid region: x2 < x1 || y2 < y1
     72         NN_RET_CHECK_GE(batchIndex, 0);
     73         NN_RET_CHECK_LT(batchIndex, numBatches);
     74         NN_RET_CHECK_LE(roiBase[0], roiBase[2]);
     75         NN_RET_CHECK_LE(roiBase[1], roiBase[3]);
     76 
     77         const float* imageInfoBase = imageInfoData + batchIndex * imageLength;
     78         float imageHeight = imageInfoBase[0];
     79         float imageWidth = imageInfoBase[1];
     80         auto roiBefore = toBoxEncodingCenter(
     81                 {.x1 = roiBase[0], .y1 = roiBase[1], .x2 = roiBase[2], .y2 = roiBase[3]});
     82         for (uint32_t i = 0; i < numClasses; i++) {
     83             auto roiAfter = toBoxEncodingCorner({.w = std::exp(deltas[2]) * roiBefore.w,
     84                                                  .h = std::exp(deltas[3]) * roiBefore.h,
     85                                                  .x = roiBefore.x + deltas[0] * roiBefore.w,
     86                                                  .y = roiBefore.y + deltas[1] * roiBefore.h});
     87             BoxEncodingCorner cliped = {.x1 = std::min(std::max(roiAfter.x1, 0.0f), imageWidth),
     88                                         .y1 = std::min(std::max(roiAfter.y1, 0.0f), imageHeight),
     89                                         .x2 = std::min(std::max(roiAfter.x2, 0.0f), imageWidth),
     90                                         .y2 = std::min(std::max(roiAfter.y2, 0.0f), imageHeight)};
     91             outPtr[0] = cliped.x1;
     92             outPtr[1] = cliped.y1;
     93             outPtr[2] = cliped.x2;
     94             outPtr[3] = cliped.y2;
     95             deltas += roiLength;
     96             outPtr += roiLength;
     97         }
     98     }
     99     return true;
    100 }
    101 
    102 inline bool bboxTransformFloat16(const _Float16* roiData, const Shape& roiShape,
    103                                  const _Float16* bboxDeltasData, const Shape& bboxDeltasShape,
    104                                  const int32_t* batchesData, const Shape& batchesShape,
    105                                  const _Float16* imageInfoData, const Shape& imageInfoDataShape,
    106                                  _Float16* outputData, const Shape& outputShape) {
    107     std::vector<float> roi_float32(getNumberOfElements(roiShape));
    108     convertFloat16ToFloat32(roiData, &roi_float32);
    109     std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
    110     convertFloat16ToFloat32(bboxDeltasData, &delta_float32);
    111     std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoDataShape));
    112     convertFloat16ToFloat32(imageInfoData, &imageInfo_float32);
    113     std::vector<float> output_float32(getNumberOfElements(outputShape));
    114     NN_RET_CHECK(bboxTransformFloat32(roi_float32.data(), roiShape, delta_float32.data(),
    115                                       bboxDeltasShape, batchesData, batchesShape,
    116                                       imageInfo_float32.data(), imageInfoDataShape,
    117                                       output_float32.data(), outputShape));
    118     convertFloat32ToFloat16(output_float32, outputData);
    119     return true;
    120 }
    121 
    122 inline bool bboxTransformQuant(const uint16_t* roiData, const Shape& roiShape,
    123                                const uint8_t* bboxDeltasData, const Shape& bboxDeltasShape,
    124                                const int32_t* batchesData, const Shape& batchesShape,
    125                                const uint16_t* imageInfoData, const Shape& imageInfoDataShape,
    126                                uint16_t* outputData, const Shape& outputShape) {
    127     std::vector<float> roi_float32(getNumberOfElements(roiShape));
    128     convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32);
    129     std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
    130     convertQuantToFloat32(bboxDeltasData, bboxDeltasShape.scale, bboxDeltasShape.offset,
    131                           &delta_float32);
    132     std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoDataShape));
    133     convertQuantToFloat32(imageInfoData, imageInfoDataShape.scale, imageInfoDataShape.offset,
    134                           &imageInfo_float32);
    135     std::vector<float> output_float32(getNumberOfElements(outputShape));
    136     NN_RET_CHECK(bboxTransformFloat32(roi_float32.data(), roiShape, delta_float32.data(),
    137                                       bboxDeltasShape, batchesData, batchesShape,
    138                                       imageInfo_float32.data(), imageInfoDataShape,
    139                                       output_float32.data(), outputShape));
    140     convertFloat32ToQuant(output_float32, outputShape.scale, outputShape.offset, outputData);
    141     return true;
    142 }
    143 
    144 // Taking two indices of bounding boxes, return the intersection-of-union.
    145 float getIoUAxisAligned(const float* roi1, const float* roi2) {
    146     const float area1 = (roi1[2] - roi1[0]) * (roi1[3] - roi1[1]);
    147     const float area2 = (roi2[2] - roi2[0]) * (roi2[3] - roi2[1]);
    148     const float x1 = std::max(roi1[0], roi2[0]);
    149     const float x2 = std::min(roi1[2], roi2[2]);
    150     const float y1 = std::max(roi1[1], roi2[1]);
    151     const float y2 = std::min(roi1[3], roi2[3]);
    152     const float w = std::max(x2 - x1, 0.0f);
    153     const float h = std::max(y2 - y1, 0.0f);
    154     const float areaIntersect = w * h;
    155     const float areaUnion = area1 + area2 - areaIntersect;
    156     return areaIntersect / areaUnion;
    157 }
    158 
    159 }  // namespace
    160 
    161 namespace axis_aligned_bbox_transform {
    162 
    163 constexpr char kOperationName[] = "AXIS_ALIGNED_BBOX_TRANSFORM";
    164 
    165 constexpr uint32_t kNumInputs = 4;
    166 constexpr uint32_t kRoiTensor = 0;
    167 constexpr uint32_t kDeltaTensor = 1;
    168 constexpr uint32_t kBatchesTensor = 2;
    169 constexpr uint32_t kImageInfoTensor = 3;
    170 
    171 constexpr uint32_t kNumOutputs = 1;
    172 constexpr uint32_t kOutputTensor = 0;
    173 
    174 bool validate(const IOperationValidationContext* context) {
    175     NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
    176     NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
    177     std::vector<OperandType> inExpectedTypes;
    178     auto inputType = context->getInputType(kRoiTensor);
    179     if (inputType == OperandType::TENSOR_FLOAT32 || inputType == OperandType::TENSOR_FLOAT16) {
    180         inExpectedTypes = {inputType, inputType, OperandType::TENSOR_INT32, inputType};
    181     } else if (inputType == OperandType::TENSOR_QUANT16_ASYMM) {
    182         inExpectedTypes = {OperandType::TENSOR_QUANT16_ASYMM, OperandType::TENSOR_QUANT8_ASYMM,
    183                            OperandType::TENSOR_INT32, OperandType::TENSOR_QUANT16_ASYMM};
    184     } else {
    185         LOG(ERROR) << "Unsupported input tensor type for operation " << kOperationName;
    186         return false;
    187     }
    188     NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
    189     NN_RET_CHECK(validateOutputTypes(context, {inputType}));
    190     return validateHalVersion(context, HalVersion::V1_2);
    191 }
    192 
    193 bool prepare(IOperationExecutionContext* context) {
    194     Shape roiShape = context->getInputShape(kRoiTensor);
    195     Shape bboxDeltasShape = context->getInputShape(kDeltaTensor);
    196     Shape batchesShape = context->getInputShape(kBatchesTensor);
    197     Shape imageInfoShape = context->getInputShape(kImageInfoTensor);
    198     Shape outputShape = context->getOutputShape(kOutputTensor);
    199 
    200     NN_RET_CHECK_EQ(getNumberOfDimensions(roiShape), 2);
    201     NN_RET_CHECK_EQ(getNumberOfDimensions(bboxDeltasShape), 2);
    202     NN_RET_CHECK_EQ(getNumberOfDimensions(batchesShape), 1);
    203     NN_RET_CHECK_EQ(getNumberOfDimensions(imageInfoShape), 2);
    204 
    205     // Only numRois can be zero.
    206     const uint32_t kRoiDim = 4;
    207     uint32_t numRois = getSizeOfDimension(roiShape, 0);
    208     uint32_t numClasses = getSizeOfDimension(bboxDeltasShape, 1) / kRoiDim;
    209     uint32_t numBatches = getSizeOfDimension(imageInfoShape, 0);
    210     NN_RET_CHECK_GT(numClasses, 0);
    211     NN_RET_CHECK_GT(numBatches, 0);
    212     NN_RET_CHECK_EQ(getSizeOfDimension(roiShape, 1), kRoiDim);
    213     NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 0), numRois);
    214     NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 1), kRoiDim * numClasses);
    215     NN_RET_CHECK_EQ(getSizeOfDimension(batchesShape, 0), numRois);
    216     NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoShape, 1), 2);
    217 
    218     if (roiShape.type == OperandType::TENSOR_QUANT16_ASYMM) {
    219         NN_RET_CHECK_EQ(roiShape.scale, 0.125f);
    220         NN_RET_CHECK_EQ(roiShape.offset, 0);
    221         NN_RET_CHECK_EQ(imageInfoShape.scale, 0.125f);
    222         NN_RET_CHECK_EQ(imageInfoShape.offset, 0);
    223     }
    224 
    225     outputShape.type = roiShape.type;
    226     outputShape.dimensions = {numRois, numClasses * kRoiDim};
    227     outputShape.scale = 0.125f;
    228     outputShape.offset = 0;
    229     NN_RET_CHECK(context->setOutputShape(kOutputTensor, outputShape));
    230     return true;
    231 }
    232 
    233 bool execute(IOperationExecutionContext* context) {
    234     NNTRACE_TRANS("axisAlignedBBoxTransform");
    235     // Bypass execution in the case of zero-sized input.
    236     if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true;
    237     switch (context->getInputType(kRoiTensor)) {
    238         case OperandType::TENSOR_FLOAT16: {
    239             return bboxTransformFloat16(context->getInputBuffer<_Float16>(kRoiTensor),
    240                                         context->getInputShape(kRoiTensor),
    241                                         context->getInputBuffer<_Float16>(kDeltaTensor),
    242                                         context->getInputShape(kDeltaTensor),
    243                                         context->getInputBuffer<int32_t>(kBatchesTensor),
    244                                         context->getInputShape(kBatchesTensor),
    245                                         context->getInputBuffer<_Float16>(kImageInfoTensor),
    246                                         context->getInputShape(kImageInfoTensor),
    247                                         context->getOutputBuffer<_Float16>(kOutputTensor),
    248                                         context->getOutputShape(kOutputTensor));
    249         }
    250         case OperandType::TENSOR_FLOAT32: {
    251             return bboxTransformFloat32(context->getInputBuffer<float>(kRoiTensor),
    252                                         context->getInputShape(kRoiTensor),
    253                                         context->getInputBuffer<float>(kDeltaTensor),
    254                                         context->getInputShape(kDeltaTensor),
    255                                         context->getInputBuffer<int32_t>(kBatchesTensor),
    256                                         context->getInputShape(kBatchesTensor),
    257                                         context->getInputBuffer<float>(kImageInfoTensor),
    258                                         context->getInputShape(kImageInfoTensor),
    259                                         context->getOutputBuffer<float>(kOutputTensor),
    260                                         context->getOutputShape(kOutputTensor));
    261         }
    262         case OperandType::TENSOR_QUANT16_ASYMM: {
    263             return bboxTransformQuant(context->getInputBuffer<uint16_t>(kRoiTensor),
    264                                       context->getInputShape(kRoiTensor),
    265                                       context->getInputBuffer<uint8_t>(kDeltaTensor),
    266                                       context->getInputShape(kDeltaTensor),
    267                                       context->getInputBuffer<int32_t>(kBatchesTensor),
    268                                       context->getInputShape(kBatchesTensor),
    269                                       context->getInputBuffer<uint16_t>(kImageInfoTensor),
    270                                       context->getInputShape(kImageInfoTensor),
    271                                       context->getOutputBuffer<uint16_t>(kOutputTensor),
    272                                       context->getOutputShape(kOutputTensor));
    273         }
    274         default:
    275             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
    276     }
    277 }
    278 
    279 }  // namespace axis_aligned_bbox_transform
    280 
    281 namespace box_with_nms_limit {
    282 
    283 constexpr char kOperationName[] = "BOX_WITH_NMS_LIMIT";
    284 
    285 constexpr uint32_t kNumInputs = 9;
    286 constexpr uint32_t kScoreTensor = 0;
    287 constexpr uint32_t kRoiTensor = 1;
    288 constexpr uint32_t kBatchesTensor = 2;
    289 constexpr uint32_t kScoreThresholdScalar = 3;
    290 constexpr uint32_t kMaxNumDetectionScalar = 4;
    291 constexpr uint32_t kNmsKernelScalar = 5;
    292 constexpr uint32_t kIoUThresholdScalar = 6;
    293 constexpr uint32_t kSigmaScalar = 7;
    294 constexpr uint32_t kNmsScoreThresholdScalar = 8;
    295 
    296 constexpr uint32_t kNumOutputs = 4;
    297 constexpr uint32_t kOutputScoreTensor = 0;
    298 constexpr uint32_t kOutputRoiTensor = 1;
    299 constexpr uint32_t kOutputClassTensor = 2;
    300 constexpr uint32_t kOutputBatchesTensor = 3;
    301 
    302 namespace {
    303 
    304 // TODO(xusongw): Reduce code duplication with hard/soft nms path.
    305 
    306 // Inplace hard NMS within range [select, select + selectLength).
    307 uint32_t* hardNmsSingleClass(const float* scoresData, float iouThreshold, int32_t maxNumDetections,
    308                              std::function<const float*(uint32_t)> getRoiBase, uint32_t* select,
    309                              uint32_t selectLength) {
    310     uint32_t *selectStart = select, *selectEnd = select + selectLength, numDetections = 0;
    311     if (maxNumDetections < 0) {
    312         maxNumDetections = selectLength;
    313     }
    314     while (selectStart < selectEnd && numDetections < maxNumDetections) {
    315         // find max score and swap to the front
    316         auto& maxScore = *std::max_element(selectStart, selectEnd,
    317                                            [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
    318                                                return scoresData[lhs] < scoresData[rhs];
    319                                            });
    320         std::swap(maxScore, *selectStart);
    321 
    322         // Calculate IoU of the rest, swap to the end (disgard) if needed.
    323         for (uint32_t* i = selectStart + 1; i < selectEnd; i++) {
    324             float iou = getIoUAxisAligned(getRoiBase(*i), getRoiBase(*selectStart));
    325             if (iou >= iouThreshold) {
    326                 std::swap(*i--, *(--selectEnd));
    327             }
    328         }
    329         selectStart++;
    330         numDetections++;
    331     }
    332     return selectStart;
    333 }
    334 
    335 void hardNmsMultiClass(const float* scoresData, uint32_t numClasses, uint32_t numRois,
    336                        float scoreThreshold, float iouThreshold, int32_t maxNumDetections,
    337                        int32_t maxNumDetectionsPerClass,
    338                        std::function<const float*(uint32_t)> getRoiBase,
    339                        std::vector<uint32_t>* select) {
    340     // Exclude class 0 (background)
    341     for (uint32_t c = 1; c < numClasses; c++) {
    342         uint32_t size = select->size();
    343         for (uint32_t b = 0; b < numRois; b++) {
    344             const uint32_t index = b * numClasses + c;
    345             const float score = scoresData[index];
    346             if (score > scoreThreshold) {
    347                 select->push_back(index);
    348             }
    349         }
    350         uint32_t* selectStart = select->data() + size;
    351         uint32_t selectLength = select->size() - size;
    352         uint32_t* selectEnd = hardNmsSingleClass(scoresData, iouThreshold, maxNumDetectionsPerClass,
    353                                                  getRoiBase, selectStart, selectLength);
    354         select->resize(selectEnd - select->data());
    355     }
    356 
    357     // Take top maxNumDetections.
    358     std::sort(select->begin(), select->end(),
    359               [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
    360                   return scoresData[lhs] > scoresData[rhs];
    361               });
    362     if (maxNumDetections < 0 || select->size() <= maxNumDetections) {
    363         return;
    364     }
    365     select->resize(maxNumDetections);
    366 }
    367 
    368 // Inplace soft NMS within range [select, select + selectLength).
    369 using SoftNmsKernel = std::function<float(float)>;
    370 uint32_t* softNmsSingleClass(float* scoresData, float scoreThreshold, int32_t maxNumDetections,
    371                              std::function<const float*(uint32_t)> getRoiBase, SoftNmsKernel kernel,
    372                              uint32_t* select, uint32_t selectLength) {
    373     uint32_t *selectStart = select, *selectEnd = select + selectLength, numDetections = 0;
    374     if (maxNumDetections < 0) {
    375         maxNumDetections = selectLength;
    376     }
    377     while (selectStart < selectEnd && numDetections < maxNumDetections) {
    378         // find max score and swap to the front
    379         auto& maxScore = *std::max_element(selectStart, selectEnd,
    380                                            [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
    381                                                return scoresData[lhs] < scoresData[rhs];
    382                                            });
    383         std::swap(maxScore, *selectStart);
    384 
    385         // Calculate IoU of the rest, swap to the end (disgard) if needed.
    386         for (uint32_t* i = selectStart + 1; i < selectEnd; i++) {
    387             float iou = getIoUAxisAligned(getRoiBase(*i), getRoiBase(*selectStart));
    388             scoresData[*i] *= kernel(iou);
    389             if (scoresData[*i] < scoreThreshold) {
    390                 std::swap(*i--, *(--selectEnd));
    391             }
    392         }
    393         selectStart++;
    394         numDetections++;
    395     }
    396     return selectStart;
    397 }
    398 
    399 void softNmsMultiClass(float* scoresData, uint32_t numClasses, uint32_t numRois,
    400                        float scoreThreshold, float nmsScoreThreshold, int32_t maxNumDetections,
    401                        int32_t maxNumDetectionsPerClass,
    402                        std::function<const float*(uint32_t)> getRoiBase, SoftNmsKernel kernel,
    403                        std::vector<uint32_t>* select) {
    404     // Exclude class 0 (background)
    405     for (uint32_t c = 1; c < numClasses; c++) {
    406         uint32_t size = select->size();
    407         for (uint32_t b = 0; b < numRois; b++) {
    408             const uint32_t index = b * numClasses + c;
    409             const float score = scoresData[index];
    410             if (score > scoreThreshold) {
    411                 select->push_back(index);
    412             }
    413         }
    414         uint32_t* selectStart = select->data() + size;
    415         uint32_t selectLength = select->size() - size;
    416         uint32_t* selectEnd =
    417                 softNmsSingleClass(scoresData, nmsScoreThreshold, maxNumDetectionsPerClass,
    418                                    getRoiBase, kernel, selectStart, selectLength);
    419         select->resize(selectEnd - select->data());
    420     }
    421 
    422     // Take top maxNumDetections.
    423     std::sort(select->begin(), select->end(),
    424               [&scoresData](const uint32_t& lhs, const uint32_t& rhs) {
    425                   return scoresData[lhs] > scoresData[rhs];
    426               });
    427     if (maxNumDetections < 0 || select->size() <= maxNumDetections) {
    428         return;
    429     }
    430     select->resize(maxNumDetections);
    431 }
    432 
    433 bool boxWithNmsLimitFloat32Compute(float* scoresData, const Shape& scoresShape,
    434                                    const float* roiData, const Shape& roiShape,
    435                                    const int32_t* batchesData, const Shape& batchesShape,
    436                                    float scoreThreshold, int32_t maxNumDetections,
    437                                    int32_t softNmsKernel, float iouThreshold, float sigma,
    438                                    float nmsScoreThreshold, std::vector<uint32_t>* batchSplitIn,
    439                                    std::vector<uint32_t>* batchSplitOut,
    440                                    std::vector<uint32_t>* selected) {
    441     SoftNmsKernel kernel = nullptr;
    442     if (softNmsKernel == 0) {
    443         kernel = [&iouThreshold](float iou) { return iou < iouThreshold ? 1.0f : 0.0f; };
    444     } else if (softNmsKernel == 1) {
    445         kernel = [&iouThreshold](float iou) { return iou < iouThreshold ? 1.0f : 1.0f - iou; };
    446     } else if (softNmsKernel == 2) {
    447         kernel = [&sigma](float iou) { return std::exp(-1.0f * iou * iou / sigma); };
    448     } else {
    449         NN_RET_CHECK_FAIL() << "Unsupported soft NMS kernel " << softNmsKernel;
    450     }
    451 
    452     const uint32_t kRoiDim = 4;
    453     uint32_t numRois = getSizeOfDimension(scoresShape, 0);
    454     uint32_t numClasses = getSizeOfDimension(scoresShape, 1);
    455 
    456     // We assume boxes of the same batch are grouped together.
    457     std::vector<uint32_t> batch;
    458     for (uint32_t i = 0, ind = -1; i < numRois; i++) {
    459         if (batchesData[i] == ind) {
    460             (batchSplitIn->back())++;
    461         } else {
    462             ind = batchesData[i];
    463             batchSplitIn->push_back(1);
    464         }
    465     }
    466 
    467     float* scoresBase = scoresData;
    468     const float* roiBase = roiData;
    469     selected->clear();
    470     for (uint32_t b = 0; b < batchSplitIn->size(); b++) {
    471         for (uint32_t i = 0; i < batchSplitIn->at(b); i++) {
    472             const float* roi = roiBase + i * kRoiDim;
    473             // Check for malformed data: invalid region: x2 < x1 || y2 < y1
    474             NN_RET_CHECK_LE(roi[0], roi[2]);
    475             NN_RET_CHECK_LE(roi[1], roi[3]);
    476         }
    477         std::vector<uint32_t> result;
    478         softNmsMultiClass(scoresBase, numClasses, batchSplitIn->at(b), scoreThreshold,
    479                           nmsScoreThreshold, maxNumDetections, maxNumDetections,
    480                           [&roiBase](uint32_t ind) { return roiBase + ind * kRoiDim; }, kernel,
    481                           &result);
    482         // Sort again by class.
    483         std::sort(result.begin(), result.end(),
    484                   [&scoresBase, numClasses](const uint32_t& lhs, const uint32_t& rhs) {
    485                       uint32_t lhsClass = lhs % numClasses, rhsClass = rhs % numClasses;
    486                       return lhsClass == rhsClass ? scoresBase[lhs] > scoresBase[rhs]
    487                                                   : lhsClass < rhsClass;
    488                   });
    489         selected->insert(selected->end(), result.begin(), result.end());
    490         batchSplitOut->push_back(result.size());
    491         scoresBase += batchSplitIn->at(b) * numClasses;
    492         roiBase += batchSplitIn->at(b) * numClasses * kRoiDim;
    493     }
    494     return true;
    495 }
    496 
    497 template <typename T>
    498 T castTo(float val, const Shape&) {
    499     return val;
    500 }
    501 template <>
    502 uint8_t castTo(float val, const Shape& shape) {
    503     int32_t intVal = std::round(val / shape.scale + shape.offset);
    504     intVal = std::min<int32_t>(std::max<int32_t>(intVal, std::numeric_limits<uint8_t>::min()),
    505                                std::numeric_limits<uint8_t>::max());
    506     return static_cast<uint8_t>(intVal);
    507 }
    508 
    509 template <typename T_Score, typename T_Roi>
    510 bool boxWithNmsLimitWriteOutput(const std::vector<uint32_t>& selected,
    511                                 const std::vector<uint32_t>& batchSplitIn,
    512                                 const std::vector<uint32_t>& batchSplitOut,
    513                                 const std::vector<float>& scores,
    514                                 IOperationExecutionContext* context) {
    515     const uint32_t kRoiDim = 4;
    516     Shape scoresShape = context->getInputShape(kScoreTensor);
    517     uint32_t numClasses = getSizeOfDimension(scoresShape, 1);
    518 
    519     // Set output dimensions.
    520     uint32_t numOutRois = selected.size();
    521     if (numOutRois == 0) return true;
    522     Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
    523     scoresOutShape.dimensions = {numOutRois};
    524     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
    525 
    526     Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
    527     roiOutShape.dimensions = {numOutRois, 4};
    528     NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
    529 
    530     Shape classesOutShape = context->getOutputShape(kOutputClassTensor);
    531     classesOutShape.dimensions = {numOutRois};
    532     NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, classesOutShape));
    533 
    534     Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
    535     batchesOutShape.dimensions = {numOutRois};
    536     NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
    537 
    538     // Write outputs.
    539     const float* scoresBase = scores.data();
    540     const T_Roi* roiBase = context->getInputBuffer<T_Roi>(kRoiTensor);
    541     const int32_t* batchesInPtr = context->getInputBuffer<int32_t>(kBatchesTensor);
    542     T_Score* scoresOutPtr = context->getOutputBuffer<T_Score>(kOutputScoreTensor);
    543     T_Roi* roiOutPtr = context->getOutputBuffer<T_Roi>(kOutputRoiTensor);
    544     int32_t* classesOutPtr = context->getOutputBuffer<int32_t>(kOutputClassTensor);
    545     int32_t* batchesOutPtr = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
    546     uint32_t i = 0;
    547     for (uint32_t b = 0; b < batchSplitOut.size(); b++) {
    548         for (uint32_t j = 0; j < batchSplitOut[b]; j++) {
    549             uint32_t index = selected[i++];
    550             *scoresOutPtr++ = castTo<T_Score>(scoresBase[index], scoresOutShape);
    551             memcpy(roiOutPtr, roiBase + index * kRoiDim, kRoiDim * sizeof(T_Roi));
    552             roiOutPtr += kRoiDim;
    553             *classesOutPtr++ = index % numClasses;
    554             *batchesOutPtr++ = *batchesInPtr;
    555         }
    556         scoresBase += batchSplitIn[b] * numClasses;
    557         roiBase += batchSplitIn[b] * numClasses * kRoiDim;
    558         batchesInPtr += batchSplitIn[b];
    559     }
    560     return true;
    561 }
    562 
    563 bool boxWithNmsLimitFloat32(const float* scoresData, const Shape& scoresShape, const float* roiData,
    564                             const Shape& roiShape, const int32_t* batchesData,
    565                             const Shape& batchesShape, float scoreThreshold,
    566                             int32_t maxNumDetections, int32_t softNmsKernel, float iouThreshold,
    567                             float sigma, float nmsScoreThreshold, float* scoresOutData,
    568                             Shape scoresOutShape, float* roiOutData, Shape roiOutShape,
    569                             int32_t* classesOutData, Shape classesOutShape, int32_t* batchesOutData,
    570                             const Shape& batchSplitOutShape, IOperationExecutionContext* context) {
    571     NNTRACE_TRANS("boxWithNmsLimit");
    572     std::vector<float> scores_float32(getNumberOfElements(scoresShape));
    573     for (uint32_t i = 0; i < scores_float32.size(); i++) {
    574         scores_float32[i] = scoresData[i];
    575     }
    576     std::vector<uint32_t> selected, batchSplitIn, batchSplitOut;
    577     NN_RET_CHECK(boxWithNmsLimitFloat32Compute(
    578             scores_float32.data(), scoresShape, roiData, roiShape, batchesData, batchesShape,
    579             scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma, nmsScoreThreshold,
    580             &batchSplitIn, &batchSplitOut, &selected));
    581     return boxWithNmsLimitWriteOutput<float, float>(selected, batchSplitIn, batchSplitOut,
    582                                                     scores_float32, context);
    583 }
    584 
    585 bool boxWithNmsLimitFloat16(const _Float16* scoresData, const Shape& scoresShape,
    586                             const _Float16* roiData, const Shape& roiShape,
    587                             const int32_t* batchesData, const Shape& batchesShape,
    588                             _Float16 scoreThreshold, int32_t maxNumDetections,
    589                             int32_t softNmsKernel, _Float16 iouThreshold, _Float16 sigma,
    590                             _Float16 nmsScoreThreshold, _Float16* scoresOutData,
    591                             const Shape& scoresOutShape, _Float16* roiOutData,
    592                             const Shape& roiOutShape, int32_t* classesOutData,
    593                             const Shape& classesOutShape, int32_t* batchesOutData,
    594                             const Shape& batchSplitOutShape, IOperationExecutionContext* context) {
    595     std::vector<float> scores_float32(getNumberOfElements(scoresShape));
    596     convertFloat16ToFloat32(scoresData, &scores_float32);
    597     std::vector<float> roi_float32(getNumberOfElements(roiShape));
    598     convertFloat16ToFloat32(roiData, &roi_float32);
    599     std::vector<uint32_t> selected, batchSplitIn, batchSplitOut;
    600     NN_RET_CHECK(boxWithNmsLimitFloat32Compute(
    601             scores_float32.data(), scoresShape, roi_float32.data(), roiShape, batchesData,
    602             batchesShape, scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma,
    603             nmsScoreThreshold, &batchSplitIn, &batchSplitOut, &selected));
    604     return boxWithNmsLimitWriteOutput<_Float16, _Float16>(selected, batchSplitIn, batchSplitOut,
    605                                                           scores_float32, context);
    606 }
    607 
    608 bool boxWithNmsLimitQuant(const uint8_t* scoresData, const Shape& scoresShape,
    609                           const uint16_t* roiData, const Shape& roiShape,
    610                           const int32_t* batchesData, const Shape& batchesShape,
    611                           float scoreThreshold, int32_t maxNumDetections, int32_t softNmsKernel,
    612                           float iouThreshold, float sigma, float nmsScoreThreshold,
    613                           uint8_t* scoresOutData, const Shape& scoresOutShape, uint16_t* roiOutData,
    614                           const Shape& roiOutShape, int32_t* classesOutData,
    615                           const Shape& classesOutShape, int32_t* batchesOutData,
    616                           const Shape& batchSplitOutShape, IOperationExecutionContext* context) {
    617     std::vector<float> scores_float32(getNumberOfElements(scoresShape));
    618     convertQuantToFloat32(scoresData, scoresShape.scale, scoresShape.offset, &scores_float32);
    619     std::vector<float> roi_float32(getNumberOfElements(roiShape));
    620     convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32);
    621     std::vector<uint32_t> selected, batchSplitIn, batchSplitOut;
    622     NN_RET_CHECK(boxWithNmsLimitFloat32Compute(
    623             scores_float32.data(), scoresShape, roi_float32.data(), roiShape, batchesData,
    624             batchesShape, scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma,
    625             nmsScoreThreshold, &batchSplitIn, &batchSplitOut, &selected));
    626     return boxWithNmsLimitWriteOutput<uint8_t, uint16_t>(selected, batchSplitIn, batchSplitOut,
    627                                                          scores_float32, context);
    628 }
    629 
    630 }  // namespace
    631 
    632 bool validate(const IOperationValidationContext* context) {
    633     NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
    634     NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
    635     std::vector<OperandType> inExpectedTypes;
    636     std::vector<OperandType> outExpectedTypes;
    637     auto inputType = context->getInputType(kScoreTensor);
    638     if (inputType == OperandType::TENSOR_FLOAT16) {
    639         inExpectedTypes = {
    640                 OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16, OperandType::TENSOR_INT32,
    641                 OperandType::FLOAT16,        OperandType::INT32,          OperandType::INT32,
    642                 OperandType::FLOAT16,        OperandType::FLOAT16,        OperandType::FLOAT16};
    643         outExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
    644                             OperandType::TENSOR_INT32, OperandType::TENSOR_INT32};
    645     } else if (inputType == OperandType::TENSOR_FLOAT32) {
    646         inExpectedTypes = {
    647                 OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32, OperandType::TENSOR_INT32,
    648                 OperandType::FLOAT32,        OperandType::INT32,          OperandType::INT32,
    649                 OperandType::FLOAT32,        OperandType::FLOAT32,        OperandType::FLOAT32};
    650         outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
    651                             OperandType::TENSOR_INT32, OperandType::TENSOR_INT32};
    652     } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) {
    653         inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM,
    654                            OperandType::TENSOR_QUANT16_ASYMM,
    655                            OperandType::TENSOR_INT32,
    656                            OperandType::FLOAT32,
    657                            OperandType::INT32,
    658                            OperandType::INT32,
    659                            OperandType::FLOAT32,
    660                            OperandType::FLOAT32,
    661                            OperandType::FLOAT32};
    662         outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM,
    663                             OperandType::TENSOR_INT32, OperandType::TENSOR_INT32};
    664     } else {
    665         NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
    666     }
    667     NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
    668     NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes));
    669     return validateHalVersion(context, HalVersion::V1_2);
    670 }
    671 
    672 bool prepare(IOperationExecutionContext* context) {
    673     Shape scoreShape = context->getInputShape(kScoreTensor);
    674     Shape roiShape = context->getInputShape(kRoiTensor);
    675     Shape batchesShape = context->getInputShape(kBatchesTensor);
    676     Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor);
    677     Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor);
    678     Shape outputClassShape = context->getOutputShape(kOutputClassTensor);
    679     Shape outputBatchSplitShape = context->getOutputShape(kOutputBatchesTensor);
    680 
    681     NN_RET_CHECK(getNumberOfDimensions(scoreShape) == 2);
    682     NN_RET_CHECK(getNumberOfDimensions(roiShape) == 2);
    683     NN_RET_CHECK(getNumberOfDimensions(batchesShape) == 1);
    684 
    685     // Only numRois can be zero.
    686     const uint32_t kRoiDim = 4;
    687     uint32_t numRois = getSizeOfDimension(scoreShape, 0);
    688     uint32_t numClasses = getSizeOfDimension(scoreShape, 1);
    689     NN_RET_CHECK(getSizeOfDimension(roiShape, 0) == numRois);
    690     NN_RET_CHECK(getSizeOfDimension(roiShape, 1) == kRoiDim * numClasses);
    691     NN_RET_CHECK(getSizeOfDimension(batchesShape, 0) == numRois);
    692     NN_RET_CHECK_GT(numClasses, 1);
    693 
    694     if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) {
    695         NN_RET_CHECK_EQ(roiShape.scale, 0.125f);
    696         NN_RET_CHECK_EQ(roiShape.offset, 0);
    697     }
    698 
    699     outputScoreShape.type = scoreShape.type;
    700     outputScoreShape.dimensions = {0};
    701     outputScoreShape.scale = scoreShape.scale;
    702     outputScoreShape.offset = scoreShape.offset;
    703     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape));
    704 
    705     outputRoiShape.type = roiShape.type;
    706     outputRoiShape.dimensions = {0, 4};
    707     outputRoiShape.scale = 0.125f;
    708     outputRoiShape.offset = 0;
    709     NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape));
    710 
    711     outputClassShape.type = OperandType::TENSOR_INT32;
    712     outputClassShape.dimensions = {0};
    713     NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, outputClassShape));
    714 
    715     outputBatchSplitShape.type = batchesShape.type;
    716     outputBatchSplitShape.dimensions = {0};
    717     NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, outputBatchSplitShape));
    718     return true;
    719 }
    720 
    721 bool execute(IOperationExecutionContext* context) {
    722     NNTRACE_TRANS("boxWithNMSLimit");
    723     // Bypass execution in the case of zero numRois.
    724     if (getSizeOfDimension(context->getInputShape(kScoreTensor), 0) == 0) return true;
    725     switch (context->getInputType(kScoreTensor)) {
    726         case OperandType::TENSOR_FLOAT16: {
    727             return boxWithNmsLimitFloat16(
    728                     context->getInputBuffer<_Float16>(kScoreTensor),
    729                     context->getInputShape(kScoreTensor),
    730                     context->getInputBuffer<_Float16>(kRoiTensor),
    731                     context->getInputShape(kRoiTensor),
    732                     context->getInputBuffer<int32_t>(kBatchesTensor),
    733                     context->getInputShape(kBatchesTensor),
    734                     context->getInputValue<_Float16>(kScoreThresholdScalar),
    735                     context->getInputValue<int32_t>(kMaxNumDetectionScalar),
    736                     context->getInputValue<int32_t>(kNmsKernelScalar),
    737                     context->getInputValue<_Float16>(kIoUThresholdScalar),
    738                     context->getInputValue<_Float16>(kSigmaScalar),
    739                     context->getInputValue<_Float16>(kNmsScoreThresholdScalar),
    740                     context->getOutputBuffer<_Float16>(kOutputScoreTensor),
    741                     context->getOutputShape(kOutputScoreTensor),
    742                     context->getOutputBuffer<_Float16>(kOutputRoiTensor),
    743                     context->getOutputShape(kOutputRoiTensor),
    744                     context->getOutputBuffer<int32_t>(kOutputClassTensor),
    745                     context->getOutputShape(kOutputClassTensor),
    746                     context->getOutputBuffer<int32_t>(kOutputBatchesTensor),
    747                     context->getOutputShape(kOutputBatchesTensor), context);
    748         }
    749         case OperandType::TENSOR_FLOAT32: {
    750             return boxWithNmsLimitFloat32(context->getInputBuffer<float>(kScoreTensor),
    751                                           context->getInputShape(kScoreTensor),
    752                                           context->getInputBuffer<float>(kRoiTensor),
    753                                           context->getInputShape(kRoiTensor),
    754                                           context->getInputBuffer<int32_t>(kBatchesTensor),
    755                                           context->getInputShape(kBatchesTensor),
    756                                           context->getInputValue<float>(kScoreThresholdScalar),
    757                                           context->getInputValue<int32_t>(kMaxNumDetectionScalar),
    758                                           context->getInputValue<int32_t>(kNmsKernelScalar),
    759                                           context->getInputValue<float>(kIoUThresholdScalar),
    760                                           context->getInputValue<float>(kSigmaScalar),
    761                                           context->getInputValue<float>(kNmsScoreThresholdScalar),
    762                                           context->getOutputBuffer<float>(kOutputScoreTensor),
    763                                           context->getOutputShape(kOutputScoreTensor),
    764                                           context->getOutputBuffer<float>(kOutputRoiTensor),
    765                                           context->getOutputShape(kOutputRoiTensor),
    766                                           context->getOutputBuffer<int32_t>(kOutputClassTensor),
    767                                           context->getOutputShape(kOutputClassTensor),
    768                                           context->getOutputBuffer<int32_t>(kOutputBatchesTensor),
    769                                           context->getOutputShape(kOutputBatchesTensor), context);
    770         }
    771         case OperandType::TENSOR_QUANT8_ASYMM: {
    772             return boxWithNmsLimitQuant(context->getInputBuffer<uint8_t>(kScoreTensor),
    773                                         context->getInputShape(kScoreTensor),
    774                                         context->getInputBuffer<uint16_t>(kRoiTensor),
    775                                         context->getInputShape(kRoiTensor),
    776                                         context->getInputBuffer<int32_t>(kBatchesTensor),
    777                                         context->getInputShape(kBatchesTensor),
    778                                         context->getInputValue<float>(kScoreThresholdScalar),
    779                                         context->getInputValue<int32_t>(kMaxNumDetectionScalar),
    780                                         context->getInputValue<int32_t>(kNmsKernelScalar),
    781                                         context->getInputValue<float>(kIoUThresholdScalar),
    782                                         context->getInputValue<float>(kSigmaScalar),
    783                                         context->getInputValue<float>(kNmsScoreThresholdScalar),
    784                                         context->getOutputBuffer<uint8_t>(kOutputScoreTensor),
    785                                         context->getOutputShape(kOutputScoreTensor),
    786                                         context->getOutputBuffer<uint16_t>(kOutputRoiTensor),
    787                                         context->getOutputShape(kOutputRoiTensor),
    788                                         context->getOutputBuffer<int32_t>(kOutputClassTensor),
    789                                         context->getOutputShape(kOutputClassTensor),
    790                                         context->getOutputBuffer<int32_t>(kOutputBatchesTensor),
    791                                         context->getOutputShape(kOutputBatchesTensor), context);
    792         }
    793         default:
    794             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
    795     }
    796 }
    797 
    798 }  // namespace box_with_nms_limit
    799 
    800 namespace generate_proposals {
    801 
    802 constexpr char kOperationName[] = "GENERATE_PROPOSALS";
    803 
    804 constexpr uint32_t kNumInputs = 11;
    805 constexpr uint32_t kScoreTensor = 0;
    806 constexpr uint32_t kDeltaTensor = 1;
    807 constexpr uint32_t kAnchorTensor = 2;
    808 constexpr uint32_t kImageInfoTensor = 3;
    809 constexpr uint32_t kHeightStrideSalar = 4;
    810 constexpr uint32_t kWidthStrideScalar = 5;
    811 constexpr uint32_t kPreNmsMaxScalar = 6;
    812 constexpr uint32_t kPostNmsMaxScalar = 7;
    813 constexpr uint32_t kIoUThresholdScalar = 8;
    814 constexpr uint32_t kMinSizeScalar = 9;
    815 constexpr uint32_t kLayoutScalar = 10;
    816 
    817 constexpr uint32_t kNumOutputs = 3;
    818 constexpr uint32_t kOutputScoreTensor = 0;
    819 constexpr uint32_t kOutputRoiTensor = 1;
    820 constexpr uint32_t kOutputBatchesTensor = 2;
    821 
    822 namespace {
    823 
    824 void filterBoxes(const float* roiBase, const float* imageInfoBase, float minSize,
    825                  std::vector<uint32_t>* select) {
    826     const uint32_t kRoiDim = 4;
    827     uint32_t i = 0;
    828     for (uint32_t j = 0; j < select->size(); j++) {
    829         const float* roiInfo = roiBase + (*select)[j] * kRoiDim;
    830         float roiWidth, roiHeight, xRoiCenter, yRoiCenter;
    831         roiWidth = roiInfo[2] - roiInfo[0];
    832         roiHeight = roiInfo[3] - roiInfo[1];
    833         xRoiCenter = roiInfo[0] + roiWidth / 2.0f;
    834         yRoiCenter = roiInfo[1] + roiHeight / 2.0f;
    835         if (roiWidth > minSize && roiHeight > minSize && xRoiCenter < imageInfoBase[1] &&
    836             yRoiCenter < imageInfoBase[0]) {
    837             (*select)[i++] = (*select)[j];
    838         }
    839     }
    840     select->resize(i);
    841 }
    842 
    843 bool generateProposalsNhwcFloat32Compute(const float* scoresData, const Shape& scoresShape,
    844                                          const float* bboxDeltasData, const Shape& bboxDeltasShape,
    845                                          const float* anchorsData, const Shape& anchorsShape,
    846                                          const float* imageInfoData, const Shape& imageInfoShape,
    847                                          float heightStride, float widthStride, int32_t preNmsTopN,
    848                                          int32_t postNmsTopN, float iouThreshold, float minSize,
    849                                          std::vector<float>* scoresOutData,
    850                                          std::vector<float>* roiOutData,
    851                                          std::vector<int32_t>* batchesOutData) {
    852     const uint32_t kRoiDim = 4;
    853     uint32_t numBatches = getSizeOfDimension(scoresShape, 0);
    854     uint32_t height = getSizeOfDimension(scoresShape, 1);
    855     uint32_t width = getSizeOfDimension(scoresShape, 2);
    856     uint32_t numAnchors = getSizeOfDimension(scoresShape, 3);
    857     uint32_t imageInfoLength = getSizeOfDimension(imageInfoShape, 1);
    858 
    859     uint32_t batchSize = height * width * numAnchors;
    860     uint32_t roiBufferSize = batchSize * kRoiDim;
    861     std::vector<float> roiBuffer(roiBufferSize);
    862     std::vector<float> roiTransformedBuffer(roiBufferSize);
    863     scoresOutData->clear();
    864     roiOutData->clear();
    865     batchesOutData->clear();
    866 
    867     // Compute the roi region for each anchor.
    868     float* roiBase = roiBuffer.data();
    869     for (uint32_t h = 0; h < height; h++) {
    870         float hShift = h * heightStride;
    871         for (uint32_t w = 0; w < width; w++) {
    872             const float* anchorsBase = anchorsData;
    873             float wShift = w * widthStride;
    874             for (uint32_t a = 0; a < numAnchors; a++, roiBase += kRoiDim, anchorsBase += kRoiDim) {
    875                 roiBase[0] = anchorsBase[0] + wShift;
    876                 roiBase[1] = anchorsBase[1] + hShift;
    877                 roiBase[2] = anchorsBase[2] + wShift;
    878                 roiBase[3] = anchorsBase[3] + hShift;
    879             }
    880         }
    881     }
    882 
    883     const float* scoresBase = scoresData;
    884     const float* bboxDeltasBase = bboxDeltasData;
    885     const float* imageInfoBase = imageInfoData;
    886     // Need to fake some data to satisfy bboxTransform.
    887     Shape tempRoiShape = anchorsShape;
    888     tempRoiShape.dimensions = {batchSize, kRoiDim};
    889     Shape tempBBoxDeltasShape = bboxDeltasShape;
    890     tempBBoxDeltasShape.dimensions = {batchSize, kRoiDim};
    891     std::vector<int32_t> tempBatchSplitData(batchSize, 0);
    892     Shape tempbatchSplitShape = {.dimensions = {batchSize}};
    893     Shape tempImageInfoShape = imageInfoShape;
    894     tempImageInfoShape.dimensions = {1, imageInfoLength};
    895 
    896     for (uint32_t b = 0; b < numBatches; b++) {
    897         // Apply bboxDeltas to anchor locations.
    898         float tempImageInfo[] = {imageInfoBase[0], imageInfoBase[1]};
    899         if (!bboxTransformFloat32(roiBuffer.data(), tempRoiShape, bboxDeltasBase,
    900                                   tempBBoxDeltasShape, tempBatchSplitData.data(),
    901                                   tempbatchSplitShape, tempImageInfo, tempImageInfoShape,
    902                                   roiTransformedBuffer.data(), tempRoiShape)) {
    903             LOG(ERROR) << "BBoxTransform step failed in GENERATE_PROPOSALS op.";
    904             return false;
    905         }
    906 
    907         // Find the top preNmsTopN scores.
    908         std::vector<uint32_t> select(batchSize);
    909         std::iota(select.begin(), select.end(), 0);
    910         if (preNmsTopN > 0 && preNmsTopN < select.size()) {
    911             std::sort(select.begin(), select.end(),
    912                       [&scoresBase](const uint32_t lhs, const uint32_t rhs) {
    913                           return scoresBase[lhs] > scoresBase[rhs];
    914                       });
    915             select.resize(preNmsTopN);
    916         }
    917 
    918         // Filter boxes, disgard regions with height or width < minSize.
    919         filterBoxes(roiTransformedBuffer.data(), imageInfoBase, minSize, &select);
    920 
    921         // Apply hard NMS.
    922         uint32_t* selectEnd = box_with_nms_limit::hardNmsSingleClass(
    923                 scoresBase, iouThreshold, postNmsTopN,
    924                 [&roiTransformedBuffer](uint32_t ind) {
    925                     return roiTransformedBuffer.data() + ind * kRoiDim;
    926                 },
    927                 select.data(), select.size());
    928         uint32_t selectSize = selectEnd - select.data();
    929         select.resize(selectSize);
    930 
    931         // Write output.
    932         for (auto i : select) {
    933             roiOutData->insert(roiOutData->end(), roiTransformedBuffer.begin() + i * kRoiDim,
    934                                roiTransformedBuffer.begin() + (i + 1) * kRoiDim);
    935             scoresOutData->push_back(scoresBase[i]);
    936             batchesOutData->push_back(b);
    937         }
    938         scoresBase += batchSize;
    939         bboxDeltasBase += roiBufferSize;
    940         imageInfoBase += imageInfoLength;
    941     }
    942     return true;
    943 }
    944 
    945 bool generateProposalsFloat32Compute(const float* scoresData, const Shape& scoresShape,
    946                                      const float* bboxDeltasData, const Shape& bboxDeltasShape,
    947                                      const float* anchorsData, const Shape& anchorsShape,
    948                                      const float* imageInfoData, const Shape& imageInfoShape,
    949                                      float heightStride, float widthStride, int32_t preNmsTopN,
    950                                      int32_t postNmsTopN, float iouThreshold, float minSize,
    951                                      bool useNchw, std::vector<float>* scoresOutData,
    952                                      std::vector<float>* roiOutData,
    953                                      std::vector<int32_t>* batchesOutData) {
    954     InputWithLayout<float> score_nhwc(useNchw), delta_nhwc(useNchw);
    955     NN_RET_CHECK(score_nhwc.initialize(scoresData, scoresShape));
    956     NN_RET_CHECK(delta_nhwc.initialize(bboxDeltasData, bboxDeltasShape));
    957     return generateProposalsNhwcFloat32Compute(
    958             score_nhwc.getNhwcBuffer(), score_nhwc.getNhwcShape(), delta_nhwc.getNhwcBuffer(),
    959             delta_nhwc.getNhwcShape(), anchorsData, anchorsShape, imageInfoData, imageInfoShape,
    960             heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize,
    961             scoresOutData, roiOutData, batchesOutData);
    962 }
    963 
    964 bool generateProposalsFloat32(const float* scoresData, const Shape& scoresShape,
    965                               const float* bboxDeltasData, const Shape& bboxDeltasShape,
    966                               const float* anchorsData, const Shape& anchorsShape,
    967                               const float* imageInfoData, const Shape& imageInfoShape,
    968                               float heightStride, float widthStride, int32_t preNmsTopN,
    969                               int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw,
    970                               IOperationExecutionContext* context) {
    971     std::vector<float> scoresOut_float32, roiOut_float32;
    972     std::vector<int32_t> batchesOut;
    973     NN_RET_CHECK(generateProposalsFloat32Compute(
    974             scoresData, scoresShape, bboxDeltasData, bboxDeltasShape, anchorsData, anchorsShape,
    975             imageInfoData, imageInfoShape, heightStride, widthStride, preNmsTopN, postNmsTopN,
    976             iouThreshold, minSize, useNchw, &scoresOut_float32, &roiOut_float32, &batchesOut));
    977 
    978     // Set output dimensions.
    979     uint32_t numOutRois = scoresOut_float32.size();
    980     if (numOutRois == 0) return true;
    981     Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
    982     scoresOutShape.dimensions = {numOutRois};
    983     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
    984     Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
    985     roiOutShape.dimensions = {numOutRois, 4};
    986     NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
    987     Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
    988     batchesOutShape.dimensions = {numOutRois};
    989     NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
    990 
    991     // Write outputs.
    992     float* scoresOutData = context->getOutputBuffer<float>(kOutputScoreTensor);
    993     for (uint32_t i = 0; i < scoresOut_float32.size(); i++) {
    994         scoresOutData[i] = scoresOut_float32[i];
    995     }
    996     float* roiOutData = context->getOutputBuffer<float>(kOutputRoiTensor);
    997     for (uint32_t i = 0; i < roiOut_float32.size(); i++) {
    998         roiOutData[i] = roiOut_float32[i];
    999     }
   1000     int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
   1001     for (uint32_t i = 0; i < batchesOut.size(); i++) {
   1002         batchesOutData[i] = batchesOut[i];
   1003     }
   1004     return true;
   1005 }
   1006 
   1007 bool generateProposalsFloat16(const _Float16* scoresData, const Shape& scoresShape,
   1008                               const _Float16* bboxDeltasData, const Shape& bboxDeltasShape,
   1009                               const _Float16* anchorsData, const Shape& anchorsShape,
   1010                               const _Float16* imageInfoData, const Shape& imageInfoShape,
   1011                               float heightStride, float widthStride, int32_t preNmsTopN,
   1012                               int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw,
   1013                               IOperationExecutionContext* context) {
   1014     std::vector<float> score_float32(getNumberOfElements(scoresShape));
   1015     convertFloat16ToFloat32(scoresData, &score_float32);
   1016     std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
   1017     convertFloat16ToFloat32(bboxDeltasData, &delta_float32);
   1018     std::vector<float> anchors_float32(getNumberOfElements(anchorsShape));
   1019     convertFloat16ToFloat32(anchorsData, &anchors_float32);
   1020     std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoShape));
   1021     convertFloat16ToFloat32(imageInfoData, &imageInfo_float32);
   1022     std::vector<float> scoresOut_float32, roiOut_float32;
   1023     std::vector<int32_t> batchesOut;
   1024     NN_RET_CHECK(generateProposalsFloat32Compute(
   1025             score_float32.data(), scoresShape, delta_float32.data(), bboxDeltasShape,
   1026             anchors_float32.data(), anchorsShape, imageInfo_float32.data(), imageInfoShape,
   1027             heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize, useNchw,
   1028             &scoresOut_float32, &roiOut_float32, &batchesOut));
   1029 
   1030     // Set output dimensions.
   1031     uint32_t numOutRois = scoresOut_float32.size();
   1032     if (numOutRois == 0) return true;
   1033     Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
   1034     scoresOutShape.dimensions = {numOutRois};
   1035     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
   1036     Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
   1037     roiOutShape.dimensions = {numOutRois, 4};
   1038     NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
   1039     Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
   1040     batchesOutShape.dimensions = {numOutRois};
   1041     NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
   1042 
   1043     // Write outputs.
   1044     _Float16* scoresOutData = context->getOutputBuffer<_Float16>(kOutputScoreTensor);
   1045     convertFloat32ToFloat16(scoresOut_float32, scoresOutData);
   1046     _Float16* roiOutData = context->getOutputBuffer<_Float16>(kOutputRoiTensor);
   1047     convertFloat32ToFloat16(roiOut_float32, roiOutData);
   1048     int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
   1049     for (uint32_t i = 0; i < batchesOut.size(); i++) {
   1050         batchesOutData[i] = batchesOut[i];
   1051     }
   1052     return true;
   1053 }
   1054 
   1055 bool generateProposalsQuant(const uint8_t* scoresData, const Shape& scoresShape,
   1056                             const uint8_t* bboxDeltasData, const Shape& bboxDeltasShape,
   1057                             const int16_t* anchorsData, const Shape& anchorsShape,
   1058                             const uint16_t* imageInfoData, const Shape& imageInfoShape,
   1059                             float heightStride, float widthStride, int32_t preNmsTopN,
   1060                             int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw,
   1061                             IOperationExecutionContext* context) {
   1062     std::vector<float> score_float32(getNumberOfElements(scoresShape));
   1063     convertQuantToFloat32(scoresData, scoresShape.scale, scoresShape.offset, &score_float32);
   1064     std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape));
   1065     convertQuantToFloat32(bboxDeltasData, bboxDeltasShape.scale, bboxDeltasShape.offset,
   1066                           &delta_float32);
   1067     std::vector<float> anchors_float32(getNumberOfElements(anchorsShape));
   1068     convertQuantToFloat32(anchorsData, anchorsShape.scale, anchorsShape.offset, &anchors_float32);
   1069     std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoShape));
   1070     convertQuantToFloat32(imageInfoData, imageInfoShape.scale, imageInfoShape.offset,
   1071                           &imageInfo_float32);
   1072     std::vector<float> scoresOut_float32, roiOut_float32;
   1073     std::vector<int32_t> batchesOut;
   1074     NN_RET_CHECK(generateProposalsFloat32Compute(
   1075             score_float32.data(), scoresShape, delta_float32.data(), bboxDeltasShape,
   1076             anchors_float32.data(), anchorsShape, imageInfo_float32.data(), imageInfoShape,
   1077             heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize, useNchw,
   1078             &scoresOut_float32, &roiOut_float32, &batchesOut));
   1079 
   1080     // Set output dimensions.
   1081     uint32_t numOutRois = scoresOut_float32.size();
   1082     if (numOutRois == 0) return true;
   1083     Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor);
   1084     scoresOutShape.dimensions = {numOutRois};
   1085     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape));
   1086     Shape roiOutShape = context->getOutputShape(kOutputRoiTensor);
   1087     roiOutShape.dimensions = {numOutRois, 4};
   1088     NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape));
   1089     Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor);
   1090     batchesOutShape.dimensions = {numOutRois};
   1091     NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape));
   1092 
   1093     // Write outputs.
   1094     uint8_t* scoresOutData = context->getOutputBuffer<uint8_t>(kOutputScoreTensor);
   1095     convertFloat32ToQuant(scoresOut_float32, scoresOutShape.scale, scoresOutShape.offset,
   1096                           scoresOutData);
   1097     uint16_t* roiOutData = context->getOutputBuffer<uint16_t>(kOutputRoiTensor);
   1098     convertFloat32ToQuant(roiOut_float32, roiOutShape.scale, roiOutShape.offset, roiOutData);
   1099     int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor);
   1100     for (uint32_t i = 0; i < batchesOut.size(); i++) {
   1101         batchesOutData[i] = batchesOut[i];
   1102     }
   1103     return true;
   1104 }
   1105 
   1106 }  // namespace
   1107 
   1108 bool validate(const IOperationValidationContext* context) {
   1109     NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
   1110     NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
   1111     std::vector<OperandType> inExpectedTypes;
   1112     std::vector<OperandType> outExpectedTypes;
   1113     auto inputType = context->getInputType(kScoreTensor);
   1114     if (inputType == OperandType::TENSOR_FLOAT16) {
   1115         inExpectedTypes = {OperandType::TENSOR_FLOAT16,
   1116                            OperandType::TENSOR_FLOAT16,
   1117                            OperandType::TENSOR_FLOAT16,
   1118                            OperandType::TENSOR_FLOAT16,
   1119                            OperandType::FLOAT16,
   1120                            OperandType::FLOAT16,
   1121                            OperandType::INT32,
   1122                            OperandType::INT32,
   1123                            OperandType::FLOAT16,
   1124                            OperandType::FLOAT16,
   1125                            OperandType::BOOL};
   1126         outExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
   1127                             OperandType::TENSOR_INT32};
   1128     } else if (inputType == OperandType::TENSOR_FLOAT32) {
   1129         inExpectedTypes = {OperandType::TENSOR_FLOAT32,
   1130                            OperandType::TENSOR_FLOAT32,
   1131                            OperandType::TENSOR_FLOAT32,
   1132                            OperandType::TENSOR_FLOAT32,
   1133                            OperandType::FLOAT32,
   1134                            OperandType::FLOAT32,
   1135                            OperandType::INT32,
   1136                            OperandType::INT32,
   1137                            OperandType::FLOAT32,
   1138                            OperandType::FLOAT32,
   1139                            OperandType::BOOL};
   1140         outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
   1141                             OperandType::TENSOR_INT32};
   1142     } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) {
   1143         inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM,
   1144                            OperandType::TENSOR_QUANT8_ASYMM,
   1145                            OperandType::TENSOR_QUANT16_SYMM,
   1146                            OperandType::TENSOR_QUANT16_ASYMM,
   1147                            OperandType::FLOAT32,
   1148                            OperandType::FLOAT32,
   1149                            OperandType::INT32,
   1150                            OperandType::INT32,
   1151                            OperandType::FLOAT32,
   1152                            OperandType::FLOAT32,
   1153                            OperandType::BOOL};
   1154         outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM,
   1155                             OperandType::TENSOR_INT32};
   1156     } else {
   1157         NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
   1158     }
   1159     NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
   1160     NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes));
   1161     return validateHalVersion(context, HalVersion::V1_2);
   1162 }
   1163 
   1164 bool prepare(IOperationExecutionContext* context) {
   1165     bool useNchw = context->getInputValue<bool>(kLayoutScalar);
   1166     Shape scoreShape = context->getInputShape(kScoreTensor);
   1167     Shape bboxDeltasShape = context->getInputShape(kDeltaTensor);
   1168     Shape anchorsShape = context->getInputShape(kAnchorTensor);
   1169     Shape imageInfoDataShape = context->getInputShape(kImageInfoTensor);
   1170     Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor);
   1171     Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor);
   1172     Shape outputBatchSplitShape = context->getOutputShape(kOutputBatchesTensor);
   1173 
   1174     NN_RET_CHECK_EQ(getNumberOfDimensions(scoreShape), 4);
   1175     NN_RET_CHECK_EQ(getNumberOfDimensions(bboxDeltasShape), 4);
   1176     NN_RET_CHECK_EQ(getNumberOfDimensions(anchorsShape), 2);
   1177     NN_RET_CHECK_EQ(getNumberOfDimensions(imageInfoDataShape), 2);
   1178 
   1179     const uint32_t kRoiDim = 4;
   1180     uint32_t numBatches = getSizeOfDimension(scoreShape, 0);
   1181     uint32_t height = getSizeOfDimension(scoreShape, useNchw ? 2 : 1);
   1182     uint32_t width = getSizeOfDimension(scoreShape, useNchw ? 3 : 2);
   1183     uint32_t numAnchors = getSizeOfDimension(scoreShape, useNchw ? 1 : 3);
   1184 
   1185     NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 0), numBatches);
   1186     NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 2 : 1), height);
   1187     NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 3 : 2), width);
   1188     NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 1 : 3), numAnchors * kRoiDim);
   1189     NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoDataShape, 0), numBatches);
   1190     NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoDataShape, 1), 2);
   1191     NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 0), numAnchors);
   1192     NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 1), kRoiDim);
   1193 
   1194     if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) {
   1195         NN_RET_CHECK_EQ(anchorsShape.scale, 0.125f);
   1196         NN_RET_CHECK_EQ(imageInfoDataShape.scale, 0.125f);
   1197         NN_RET_CHECK_EQ(imageInfoDataShape.offset, 0);
   1198     }
   1199 
   1200     outputScoreShape.type = scoreShape.type;
   1201     outputScoreShape.dimensions = {0};
   1202     outputScoreShape.scale = scoreShape.scale;
   1203     outputScoreShape.offset = scoreShape.offset;
   1204     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape));
   1205 
   1206     outputRoiShape.dimensions = {0, 4};
   1207     if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) {
   1208         outputRoiShape.scale = 0.125f;
   1209         outputRoiShape.offset = 0;
   1210     }
   1211     NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape));
   1212 
   1213     outputBatchSplitShape.dimensions = {0};
   1214     NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, outputBatchSplitShape));
   1215     return true;
   1216 }
   1217 
   1218 bool execute(IOperationExecutionContext* context) {
   1219     NNTRACE_TRANS("generateProposals");
   1220     switch (context->getInputType(kScoreTensor)) {
   1221         case OperandType::TENSOR_FLOAT16: {
   1222             return generateProposalsFloat16(context->getInputBuffer<_Float16>(kScoreTensor),
   1223                                             context->getInputShape(kScoreTensor),
   1224                                             context->getInputBuffer<_Float16>(kDeltaTensor),
   1225                                             context->getInputShape(kDeltaTensor),
   1226                                             context->getInputBuffer<_Float16>(kAnchorTensor),
   1227                                             context->getInputShape(kAnchorTensor),
   1228                                             context->getInputBuffer<_Float16>(kImageInfoTensor),
   1229                                             context->getInputShape(kImageInfoTensor),
   1230                                             context->getInputValue<_Float16>(kHeightStrideSalar),
   1231                                             context->getInputValue<_Float16>(kWidthStrideScalar),
   1232                                             context->getInputValue<int32_t>(kPreNmsMaxScalar),
   1233                                             context->getInputValue<int32_t>(kPostNmsMaxScalar),
   1234                                             context->getInputValue<_Float16>(kIoUThresholdScalar),
   1235                                             context->getInputValue<_Float16>(kMinSizeScalar),
   1236                                             context->getInputValue<bool>(kLayoutScalar), context);
   1237         }
   1238         case OperandType::TENSOR_FLOAT32: {
   1239             return generateProposalsFloat32(context->getInputBuffer<float>(kScoreTensor),
   1240                                             context->getInputShape(kScoreTensor),
   1241                                             context->getInputBuffer<float>(kDeltaTensor),
   1242                                             context->getInputShape(kDeltaTensor),
   1243                                             context->getInputBuffer<float>(kAnchorTensor),
   1244                                             context->getInputShape(kAnchorTensor),
   1245                                             context->getInputBuffer<float>(kImageInfoTensor),
   1246                                             context->getInputShape(kImageInfoTensor),
   1247                                             context->getInputValue<float>(kHeightStrideSalar),
   1248                                             context->getInputValue<float>(kWidthStrideScalar),
   1249                                             context->getInputValue<int32_t>(kPreNmsMaxScalar),
   1250                                             context->getInputValue<int32_t>(kPostNmsMaxScalar),
   1251                                             context->getInputValue<float>(kIoUThresholdScalar),
   1252                                             context->getInputValue<float>(kMinSizeScalar),
   1253                                             context->getInputValue<bool>(kLayoutScalar), context);
   1254         }
   1255         case OperandType::TENSOR_QUANT8_ASYMM: {
   1256             return generateProposalsQuant(context->getInputBuffer<uint8_t>(kScoreTensor),
   1257                                           context->getInputShape(kScoreTensor),
   1258                                           context->getInputBuffer<uint8_t>(kDeltaTensor),
   1259                                           context->getInputShape(kDeltaTensor),
   1260                                           context->getInputBuffer<int16_t>(kAnchorTensor),
   1261                                           context->getInputShape(kAnchorTensor),
   1262                                           context->getInputBuffer<uint16_t>(kImageInfoTensor),
   1263                                           context->getInputShape(kImageInfoTensor),
   1264                                           context->getInputValue<float>(kHeightStrideSalar),
   1265                                           context->getInputValue<float>(kWidthStrideScalar),
   1266                                           context->getInputValue<int32_t>(kPreNmsMaxScalar),
   1267                                           context->getInputValue<int32_t>(kPostNmsMaxScalar),
   1268                                           context->getInputValue<float>(kIoUThresholdScalar),
   1269                                           context->getInputValue<float>(kMinSizeScalar),
   1270                                           context->getInputValue<bool>(kLayoutScalar), context);
   1271         }
   1272         default:
   1273             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
   1274     }
   1275 }
   1276 
   1277 }  // namespace generate_proposals
   1278 
   1279 namespace detection_postprocess {
   1280 
   1281 constexpr char kOperationName[] = "DETECTION_POSTPROCESS";
   1282 
   1283 constexpr uint32_t kNumInputs = 14;
   1284 constexpr uint32_t kScoreTensor = 0;
   1285 constexpr uint32_t kDeltaTensor = 1;
   1286 constexpr uint32_t kAnchorTensor = 2;
   1287 constexpr uint32_t kScaleYScalar = 3;
   1288 constexpr uint32_t kScaleXScalar = 4;
   1289 constexpr uint32_t kScaleHScalar = 5;
   1290 constexpr uint32_t kScaleWScalar = 6;
   1291 constexpr uint32_t kUseRegularNmsScalar = 7;
   1292 constexpr uint32_t kMaxNumDetectionScalar = 8;
   1293 constexpr uint32_t kMaxClassesPerDetectionScalar = 9;
   1294 constexpr uint32_t kMaxNumDetectionPerClassScalar = 10;
   1295 constexpr uint32_t kScoreThresholdScalar = 11;
   1296 constexpr uint32_t kIoUThresholdScalar = 12;
   1297 constexpr uint32_t kIsBGInLabelScalar = 13;
   1298 
   1299 constexpr uint32_t kNumOutputs = 4;
   1300 constexpr uint32_t kOutputScoreTensor = 0;
   1301 constexpr uint32_t kOutputRoiTensor = 1;
   1302 constexpr uint32_t kOutputClassTensor = 2;
   1303 constexpr uint32_t kOutputDetectionTensor = 3;
   1304 
   1305 namespace {
   1306 
   1307 bool detectionPostprocessFloat32(
   1308         const float* scoreData, const Shape& scoreShape, const float* deltaData,
   1309         const Shape& deltaShape, const float* anchorData, const Shape& anchorShape, float scaleY,
   1310         float scaleX, float scaleH, float scaleW, bool useRegularNms, int32_t maxNumDetections,
   1311         int32_t maxClassesPerDetection, int32_t maxNumDetectionsPerClass, float iouThreshold,
   1312         float scoreThreshold, bool isBGInLabel, float* scoreOutData, const Shape& scoreOutShape,
   1313         float* roiOutData, const Shape& roiOutShape, int32_t* classOutData,
   1314         const Shape& classOutShape, int32_t* detectionOutData, const Shape& detectionOutShape) {
   1315     const uint32_t kRoiDim = 4;
   1316     uint32_t numBatches = getSizeOfDimension(scoreShape, 0);
   1317     uint32_t numAnchors = getSizeOfDimension(scoreShape, 1);
   1318     uint32_t numClasses = getSizeOfDimension(scoreShape, 2);
   1319     uint32_t lengthBoxEncoding = getSizeOfDimension(deltaShape, 2);
   1320     uint32_t numOutDetection = getSizeOfDimension(scoreOutShape, 1);
   1321 
   1322     memset(scoreOutData, 0, getNumberOfElements(scoreOutShape) * sizeof(float));
   1323     memset(roiOutData, 0, getNumberOfElements(roiOutShape) * sizeof(float));
   1324     memset(classOutData, 0, getNumberOfElements(classOutShape) * sizeof(int32_t));
   1325     memset(detectionOutData, 0, getNumberOfElements(detectionOutShape) * sizeof(int32_t));
   1326 
   1327     const float* scoreBase = scoreData;
   1328     const float* deltaBase = deltaData;
   1329     float* scoreOutBase = scoreOutData;
   1330     float* roiOutBase = roiOutData;
   1331     int32_t* classOutBase = classOutData;
   1332     std::vector<float> roiBuffer(numAnchors * kRoiDim);
   1333     std::vector<float> scoreBuffer(numAnchors);
   1334     for (uint32_t b = 0; b < numBatches; b++) {
   1335         const float* anchorBase = anchorData;
   1336         for (uint32_t a = 0; a < numAnchors; a++) {
   1337             float yCtr = anchorBase[0] + anchorBase[2] * deltaBase[0] / scaleY;
   1338             float xCtr = anchorBase[1] + anchorBase[3] * deltaBase[1] / scaleX;
   1339             float hHalf = anchorBase[2] * std::exp(deltaBase[2] / scaleH) * 0.5f;
   1340             float wHalf = anchorBase[3] * std::exp(deltaBase[3] / scaleW) * 0.5f;
   1341             roiBuffer[a * kRoiDim] = yCtr - hHalf;
   1342             roiBuffer[a * kRoiDim + 1] = xCtr - wHalf;
   1343             roiBuffer[a * kRoiDim + 2] = yCtr + hHalf;
   1344             roiBuffer[a * kRoiDim + 3] = xCtr + wHalf;
   1345             anchorBase += kRoiDim;
   1346             deltaBase += lengthBoxEncoding;
   1347         }
   1348 
   1349         if (useRegularNms) {
   1350             std::vector<uint32_t> select;
   1351             box_with_nms_limit::hardNmsMultiClass(
   1352                     scoreBase, numClasses, numAnchors, scoreThreshold, iouThreshold,
   1353                     maxNumDetections, maxNumDetectionsPerClass,
   1354                     [&roiBuffer, numClasses](uint32_t ind) {
   1355                         return roiBuffer.data() + (ind / numClasses) * kRoiDim;
   1356                     },
   1357                     &select);
   1358             for (uint32_t i = 0; i < select.size(); i++) {
   1359                 uint32_t ind = select[i];
   1360                 scoreOutBase[i] = scoreBase[ind];
   1361                 memcpy(roiOutBase + i * kRoiDim, &roiBuffer[(ind / numClasses) * kRoiDim],
   1362                        kRoiDim * sizeof(float));
   1363                 classOutBase[i] = (ind % numClasses) - (isBGInLabel ? 0 : 1);
   1364             }
   1365             *detectionOutData++ = select.size();
   1366         } else {
   1367             uint32_t numOutClasses = std::min<uint32_t>(numClasses - 1, maxClassesPerDetection);
   1368             std::vector<float> maxScores(numAnchors);
   1369             for (uint32_t a = 0; a < numAnchors; a++) {
   1370                 maxScores[a] = *std::max_element(scoreBase + a * numClasses + 1,
   1371                                                  scoreBase + (a + 1) * numClasses);
   1372             }
   1373             std::vector<uint32_t> select;
   1374             for (uint32_t a = 0; a < numAnchors; a++) {
   1375                 if (maxScores[a] > scoreThreshold) {
   1376                     select.push_back(a);
   1377                 }
   1378             }
   1379             uint32_t* selectEnd = box_with_nms_limit::hardNmsSingleClass(
   1380                     maxScores.data(), iouThreshold, maxNumDetections,
   1381                     [&roiBuffer](uint32_t ind) { return roiBuffer.data() + ind * kRoiDim; },
   1382                     select.data(), select.size());
   1383             select.resize(selectEnd - select.data());
   1384             float* scoreOutPtr = scoreOutBase;
   1385             float* roiOutPtr = roiOutBase;
   1386             int32_t* classOutPtr = classOutBase;
   1387             for (auto i : select) {
   1388                 const float* score = scoreBase + i * numClasses;
   1389                 std::vector<uint32_t> scoreInds(numClasses - 1);
   1390                 std::iota(scoreInds.begin(), scoreInds.end(), 1);
   1391                 std::sort(scoreInds.begin(), scoreInds.end(),
   1392                           [&score](const uint32_t lhs, const uint32_t rhs) {
   1393                               return score[lhs] > score[rhs];
   1394                           });
   1395                 for (uint32_t c = 0; c < numOutClasses; c++) {
   1396                     *scoreOutPtr++ = score[scoreInds[c]];
   1397                     memcpy(roiOutPtr, &roiBuffer[i * kRoiDim], kRoiDim * sizeof(float));
   1398                     roiOutPtr += kRoiDim;
   1399                     *classOutPtr++ = scoreInds[c] - (isBGInLabel ? 0 : 1);
   1400                 }
   1401             }
   1402             *detectionOutData++ = select.size() * numOutClasses;
   1403         }
   1404         scoreBase += numAnchors * numClasses;
   1405         scoreOutBase += numOutDetection;
   1406         roiOutBase += numOutDetection * kRoiDim;
   1407         classOutBase += numOutDetection;
   1408     }
   1409     return true;
   1410 }
   1411 
   1412 bool detectionPostprocessFloat16(
   1413         const _Float16* scoreData, const Shape& scoreShape, const _Float16* deltaData,
   1414         const Shape& deltaShape, const _Float16* anchorData, const Shape& anchorShape, float scaleY,
   1415         float scaleX, float scaleH, float scaleW, bool useRegularNms, int32_t maxNumDetections,
   1416         int32_t maxClassesPerDetection, int32_t maxNumDetectionsPerClass, float iouThreshold,
   1417         float scoreThreshold, bool isBGInLabel, _Float16* scoreOutData, const Shape& scoreOutShape,
   1418         _Float16* roiOutData, const Shape& roiOutShape, int32_t* classOutData,
   1419         const Shape& classOutShape, int32_t* detectionOutData, const Shape& detectionOutShape) {
   1420     std::vector<float> scores_float32(getNumberOfElements(scoreShape));
   1421     convertFloat16ToFloat32(scoreData, &scores_float32);
   1422     std::vector<float> delta_float32(getNumberOfElements(deltaShape));
   1423     convertFloat16ToFloat32(deltaData, &delta_float32);
   1424     std::vector<float> anchor_float32(getNumberOfElements(anchorShape));
   1425     convertFloat16ToFloat32(anchorData, &anchor_float32);
   1426     std::vector<float> outputScore_float32(getNumberOfElements(scoreOutShape));
   1427     std::vector<float> outputRoi_float32(getNumberOfElements(roiOutShape));
   1428     NN_RET_CHECK(detectionPostprocessFloat32(
   1429             scores_float32.data(), scoreShape, delta_float32.data(), deltaShape,
   1430             anchor_float32.data(), anchorShape, scaleY, scaleX, scaleH, scaleW, useRegularNms,
   1431             maxNumDetections, maxClassesPerDetection, maxNumDetectionsPerClass, iouThreshold,
   1432             scoreThreshold, isBGInLabel, outputScore_float32.data(), scoreOutShape,
   1433             outputRoi_float32.data(), roiOutShape, classOutData, classOutShape, detectionOutData,
   1434             detectionOutShape));
   1435     convertFloat32ToFloat16(outputScore_float32, scoreOutData);
   1436     convertFloat32ToFloat16(outputRoi_float32, roiOutData);
   1437     return true;
   1438 }
   1439 
   1440 }  // namespace
   1441 
   1442 bool validate(const IOperationValidationContext* context) {
   1443     NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs);
   1444     NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs);
   1445     std::vector<OperandType> inExpectedTypes;
   1446     std::vector<OperandType> outExpectedTypes;
   1447     auto inputType = context->getInputType(kScoreTensor);
   1448     if (inputType == OperandType::TENSOR_FLOAT16) {
   1449         inExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16,
   1450                            OperandType::TENSOR_FLOAT16, OperandType::FLOAT16,
   1451                            OperandType::FLOAT16,        OperandType::FLOAT16,
   1452                            OperandType::FLOAT16,        OperandType::BOOL,
   1453                            OperandType::INT32,          OperandType::INT32,
   1454                            OperandType::INT32,          OperandType::FLOAT16,
   1455                            OperandType::FLOAT16,        OperandType::BOOL};
   1456     } else if (inputType == OperandType::TENSOR_FLOAT32) {
   1457         inExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32,
   1458                            OperandType::TENSOR_FLOAT32, OperandType::FLOAT32,
   1459                            OperandType::FLOAT32,        OperandType::FLOAT32,
   1460                            OperandType::FLOAT32,        OperandType::BOOL,
   1461                            OperandType::INT32,          OperandType::INT32,
   1462                            OperandType::INT32,          OperandType::FLOAT32,
   1463                            OperandType::FLOAT32,        OperandType::BOOL};
   1464     } else {
   1465         NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
   1466     }
   1467     NN_RET_CHECK(validateInputTypes(context, inExpectedTypes));
   1468     NN_RET_CHECK(validateOutputTypes(
   1469             context, {inputType, inputType, OperandType::TENSOR_INT32, OperandType::TENSOR_INT32}));
   1470     return validateHalVersion(context, HalVersion::V1_2);
   1471 }
   1472 
   1473 bool prepare(IOperationExecutionContext* context) {
   1474     Shape scoreShape = context->getInputShape(kScoreTensor);
   1475     Shape deltasShape = context->getInputShape(kDeltaTensor);
   1476     Shape anchorsShape = context->getInputShape(kAnchorTensor);
   1477     Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor);
   1478     Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor);
   1479     Shape outputClassShape = context->getOutputShape(kOutputClassTensor);
   1480     Shape outputDetectionShape = context->getOutputShape(kOutputDetectionTensor);
   1481 
   1482     NN_RET_CHECK_EQ(getNumberOfDimensions(scoreShape), 3);
   1483     NN_RET_CHECK_EQ(getNumberOfDimensions(deltasShape), 3);
   1484     NN_RET_CHECK_EQ(getNumberOfDimensions(anchorsShape), 2);
   1485 
   1486     const uint32_t kRoiDim = 4;
   1487     uint32_t numBatches = getSizeOfDimension(scoreShape, 0);
   1488     uint32_t numAnchors = getSizeOfDimension(scoreShape, 1);
   1489     uint32_t numClasses = getSizeOfDimension(scoreShape, 2);
   1490     uint32_t lengthBoxEncoding = getSizeOfDimension(deltasShape, 2);
   1491     uint32_t maxNumDetections = context->getInputValue<int32_t>(kMaxNumDetectionScalar);
   1492     uint32_t maxClassesPerDetection =
   1493             context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar);
   1494     uint32_t numOutDetections = maxNumDetections;
   1495 
   1496     NN_RET_CHECK_EQ(getSizeOfDimension(deltasShape, 0), numBatches);
   1497     NN_RET_CHECK_EQ(getSizeOfDimension(deltasShape, 1), numAnchors);
   1498     NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 0), numAnchors);
   1499     NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 1), kRoiDim);
   1500 
   1501     if (scoreShape.type == OperandType::TENSOR_FLOAT32) {
   1502         NN_RET_CHECK_GT(context->getInputValue<float>(kScaleYScalar), 0);
   1503         NN_RET_CHECK_GT(context->getInputValue<float>(kScaleXScalar), 0);
   1504         NN_RET_CHECK_GT(context->getInputValue<float>(kScaleHScalar), 0);
   1505         NN_RET_CHECK_GT(context->getInputValue<float>(kScaleWScalar), 0);
   1506         NN_RET_CHECK_GE(context->getInputValue<float>(kScoreThresholdScalar), 0);
   1507         NN_RET_CHECK_GE(context->getInputValue<float>(kIoUThresholdScalar), 0);
   1508     } else if (scoreShape.type == OperandType::TENSOR_FLOAT16) {
   1509         NN_RET_CHECK(context->getInputValue<_Float16>(kScaleYScalar) > 0);
   1510         NN_RET_CHECK(context->getInputValue<_Float16>(kScaleXScalar) > 0);
   1511         NN_RET_CHECK(context->getInputValue<_Float16>(kScaleHScalar) > 0);
   1512         NN_RET_CHECK(context->getInputValue<_Float16>(kScaleWScalar) > 0);
   1513         NN_RET_CHECK(context->getInputValue<_Float16>(kScoreThresholdScalar) >= 0);
   1514         NN_RET_CHECK(context->getInputValue<_Float16>(kIoUThresholdScalar) >= 0);
   1515     } else {
   1516         NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
   1517     }
   1518     NN_RET_CHECK_GT(numClasses, 1);
   1519     NN_RET_CHECK_GE(lengthBoxEncoding, 4);
   1520     NN_RET_CHECK_GT(maxNumDetections, 0);
   1521     if (context->getInputValue<bool>(kUseRegularNmsScalar)) {
   1522         NN_RET_CHECK_GT(context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar), 0);
   1523     } else {
   1524         NN_RET_CHECK_GT(maxClassesPerDetection, 0);
   1525         numOutDetections *= maxClassesPerDetection;
   1526     }
   1527 
   1528     outputScoreShape.type = scoreShape.type;
   1529     outputScoreShape.dimensions = {numBatches, numOutDetections};
   1530     NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape));
   1531 
   1532     outputRoiShape.type = anchorsShape.type;
   1533     outputRoiShape.dimensions = {numBatches, numOutDetections, 4};
   1534     NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape));
   1535 
   1536     outputClassShape.type = OperandType::TENSOR_INT32;
   1537     outputClassShape.dimensions = {numBatches, numOutDetections};
   1538     NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, outputClassShape));
   1539 
   1540     outputDetectionShape.type = OperandType::TENSOR_INT32;
   1541     outputDetectionShape.dimensions = {numBatches};
   1542     NN_RET_CHECK(context->setOutputShape(kOutputDetectionTensor, outputDetectionShape));
   1543     return true;
   1544 }
   1545 
   1546 bool execute(IOperationExecutionContext* context) {
   1547     NNTRACE_TRANS("detectionPostProcess");
   1548     switch (context->getInputType(kScoreTensor)) {
   1549         case OperandType::TENSOR_FLOAT16: {
   1550             return detectionPostprocessFloat16(
   1551                     context->getInputBuffer<_Float16>(kScoreTensor),
   1552                     context->getInputShape(kScoreTensor),
   1553                     context->getInputBuffer<_Float16>(kDeltaTensor),
   1554                     context->getInputShape(kDeltaTensor),
   1555                     context->getInputBuffer<_Float16>(kAnchorTensor),
   1556                     context->getInputShape(kAnchorTensor),
   1557                     context->getInputValue<_Float16>(kScaleYScalar),
   1558                     context->getInputValue<_Float16>(kScaleXScalar),
   1559                     context->getInputValue<_Float16>(kScaleHScalar),
   1560                     context->getInputValue<_Float16>(kScaleWScalar),
   1561                     context->getInputValue<bool>(kUseRegularNmsScalar),
   1562                     context->getInputValue<int32_t>(kMaxNumDetectionScalar),
   1563                     context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar),
   1564                     context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar),
   1565                     context->getInputValue<_Float16>(kIoUThresholdScalar),
   1566                     context->getInputValue<_Float16>(kScoreThresholdScalar),
   1567                     context->getInputValue<bool>(kIsBGInLabelScalar),
   1568                     context->getOutputBuffer<_Float16>(kOutputScoreTensor),
   1569                     context->getOutputShape(kOutputScoreTensor),
   1570                     context->getOutputBuffer<_Float16>(kOutputRoiTensor),
   1571                     context->getOutputShape(kOutputRoiTensor),
   1572                     context->getOutputBuffer<int32_t>(kOutputClassTensor),
   1573                     context->getOutputShape(kOutputClassTensor),
   1574                     context->getOutputBuffer<int32_t>(kOutputDetectionTensor),
   1575                     context->getOutputShape(kOutputDetectionTensor));
   1576         }
   1577         case OperandType::TENSOR_FLOAT32: {
   1578             return detectionPostprocessFloat32(
   1579                     context->getInputBuffer<float>(kScoreTensor),
   1580                     context->getInputShape(kScoreTensor),
   1581                     context->getInputBuffer<float>(kDeltaTensor),
   1582                     context->getInputShape(kDeltaTensor),
   1583                     context->getInputBuffer<float>(kAnchorTensor),
   1584                     context->getInputShape(kAnchorTensor),
   1585                     context->getInputValue<float>(kScaleYScalar),
   1586                     context->getInputValue<float>(kScaleXScalar),
   1587                     context->getInputValue<float>(kScaleHScalar),
   1588                     context->getInputValue<float>(kScaleWScalar),
   1589                     context->getInputValue<bool>(kUseRegularNmsScalar),
   1590                     context->getInputValue<int32_t>(kMaxNumDetectionScalar),
   1591                     context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar),
   1592                     context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar),
   1593                     context->getInputValue<float>(kIoUThresholdScalar),
   1594                     context->getInputValue<float>(kScoreThresholdScalar),
   1595                     context->getInputValue<bool>(kIsBGInLabelScalar),
   1596                     context->getOutputBuffer<float>(kOutputScoreTensor),
   1597                     context->getOutputShape(kOutputScoreTensor),
   1598                     context->getOutputBuffer<float>(kOutputRoiTensor),
   1599                     context->getOutputShape(kOutputRoiTensor),
   1600                     context->getOutputBuffer<int32_t>(kOutputClassTensor),
   1601                     context->getOutputShape(kOutputClassTensor),
   1602                     context->getOutputBuffer<int32_t>(kOutputDetectionTensor),
   1603                     context->getOutputShape(kOutputDetectionTensor));
   1604         }
   1605         default:
   1606             NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName;
   1607     }
   1608 }
   1609 
   1610 }  // namespace detection_postprocess
   1611 
   1612 }  // namespace bbox_ops
   1613 
   1614 NN_REGISTER_OPERATION(AXIS_ALIGNED_BBOX_TRANSFORM,
   1615                       bbox_ops::axis_aligned_bbox_transform::kOperationName,
   1616                       bbox_ops::axis_aligned_bbox_transform::validate,
   1617                       bbox_ops::axis_aligned_bbox_transform::prepare,
   1618                       bbox_ops::axis_aligned_bbox_transform::execute, .allowZeroSizedInput = true);
   1619 
   1620 NN_REGISTER_OPERATION(BOX_WITH_NMS_LIMIT, bbox_ops::box_with_nms_limit::kOperationName,
   1621                       bbox_ops::box_with_nms_limit::validate, bbox_ops::box_with_nms_limit::prepare,
   1622                       bbox_ops::box_with_nms_limit::execute, .allowZeroSizedInput = true);
   1623 
   1624 NN_REGISTER_OPERATION(GENERATE_PROPOSALS, bbox_ops::generate_proposals::kOperationName,
   1625                       bbox_ops::generate_proposals::validate, bbox_ops::generate_proposals::prepare,
   1626                       bbox_ops::generate_proposals::execute);
   1627 
   1628 NN_REGISTER_OPERATION(DETECTION_POSTPROCESSING, bbox_ops::detection_postprocess::kOperationName,
   1629                       bbox_ops::detection_postprocess::validate,
   1630                       bbox_ops::detection_postprocess::prepare,
   1631                       bbox_ops::detection_postprocess::execute);
   1632 }  // namespace nn
   1633 }  // namespace android
   1634