1 /* 2 * Copyright (C) 2018 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #include "CpuOperationUtils.h" 18 #include "OperationResolver.h" 19 #include "OperationsUtils.h" 20 21 #include <cfloat> 22 #include <cmath> 23 #include <numeric> 24 25 #include "Tracing.h" 26 27 namespace android { 28 namespace nn { 29 namespace bbox_ops { 30 31 namespace { 32 33 struct BoxEncodingCorner { 34 float x1, y1, x2, y2; 35 }; 36 struct BoxEncodingCenter { 37 float w, h, x, y; 38 }; 39 BoxEncodingCorner toBoxEncodingCorner(const BoxEncodingCenter& ctr) { 40 return {.x1 = ctr.x - ctr.w / 2, 41 .y1 = ctr.y - ctr.h / 2, 42 .x2 = ctr.x + ctr.w / 2, 43 .y2 = ctr.y + ctr.h / 2}; 44 } 45 BoxEncodingCenter toBoxEncodingCenter(const BoxEncodingCorner& cnr) { 46 return {.w = cnr.x2 - cnr.x1, 47 .h = cnr.y2 - cnr.y1, 48 .x = (cnr.x1 + cnr.x2) / 2, 49 .y = (cnr.y1 + cnr.y2) / 2}; 50 } 51 52 inline bool bboxTransformFloat32(const float* roiData, const Shape& roiShape, 53 const float* bboxDeltasData, const Shape& bboxDeltasShape, 54 const int32_t* batchesData, const Shape& batchesShape, 55 const float* imageInfoData, const Shape& imageInfoDataShape, 56 float* outputData, const Shape& outputShape) { 57 const uint32_t roiLength = 4; 58 const uint32_t imageLength = 2; 59 60 uint32_t numClasses = getSizeOfDimension(bboxDeltasShape, 1) / roiLength; 61 uint32_t numBatches = getSizeOfDimension(imageInfoDataShape, 0); 62 63 const float* roiDataEnd = roiData + getNumberOfElements(roiShape); 64 const float* deltas = bboxDeltasData; 65 float* outPtr = outputData; 66 uint32_t roiIndex = 0; 67 for (const float* roiBase = roiData; roiBase < roiDataEnd; roiBase += roiLength, roiIndex++) { 68 uint32_t batchIndex = batchesData[roiIndex]; 69 // Check for malformed data 70 // 1. Invalid batch id 71 // 2. Invalid region: x2 < x1 || y2 < y1 72 NN_RET_CHECK_GE(batchIndex, 0); 73 NN_RET_CHECK_LT(batchIndex, numBatches); 74 NN_RET_CHECK_LE(roiBase[0], roiBase[2]); 75 NN_RET_CHECK_LE(roiBase[1], roiBase[3]); 76 77 const float* imageInfoBase = imageInfoData + batchIndex * imageLength; 78 float imageHeight = imageInfoBase[0]; 79 float imageWidth = imageInfoBase[1]; 80 auto roiBefore = toBoxEncodingCenter( 81 {.x1 = roiBase[0], .y1 = roiBase[1], .x2 = roiBase[2], .y2 = roiBase[3]}); 82 for (uint32_t i = 0; i < numClasses; i++) { 83 auto roiAfter = toBoxEncodingCorner({.w = std::exp(deltas[2]) * roiBefore.w, 84 .h = std::exp(deltas[3]) * roiBefore.h, 85 .x = roiBefore.x + deltas[0] * roiBefore.w, 86 .y = roiBefore.y + deltas[1] * roiBefore.h}); 87 BoxEncodingCorner cliped = {.x1 = std::min(std::max(roiAfter.x1, 0.0f), imageWidth), 88 .y1 = std::min(std::max(roiAfter.y1, 0.0f), imageHeight), 89 .x2 = std::min(std::max(roiAfter.x2, 0.0f), imageWidth), 90 .y2 = std::min(std::max(roiAfter.y2, 0.0f), imageHeight)}; 91 outPtr[0] = cliped.x1; 92 outPtr[1] = cliped.y1; 93 outPtr[2] = cliped.x2; 94 outPtr[3] = cliped.y2; 95 deltas += roiLength; 96 outPtr += roiLength; 97 } 98 } 99 return true; 100 } 101 102 inline bool bboxTransformFloat16(const _Float16* roiData, const Shape& roiShape, 103 const _Float16* bboxDeltasData, const Shape& bboxDeltasShape, 104 const int32_t* batchesData, const Shape& batchesShape, 105 const _Float16* imageInfoData, const Shape& imageInfoDataShape, 106 _Float16* outputData, const Shape& outputShape) { 107 std::vector<float> roi_float32(getNumberOfElements(roiShape)); 108 convertFloat16ToFloat32(roiData, &roi_float32); 109 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape)); 110 convertFloat16ToFloat32(bboxDeltasData, &delta_float32); 111 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoDataShape)); 112 convertFloat16ToFloat32(imageInfoData, &imageInfo_float32); 113 std::vector<float> output_float32(getNumberOfElements(outputShape)); 114 NN_RET_CHECK(bboxTransformFloat32(roi_float32.data(), roiShape, delta_float32.data(), 115 bboxDeltasShape, batchesData, batchesShape, 116 imageInfo_float32.data(), imageInfoDataShape, 117 output_float32.data(), outputShape)); 118 convertFloat32ToFloat16(output_float32, outputData); 119 return true; 120 } 121 122 inline bool bboxTransformQuant(const uint16_t* roiData, const Shape& roiShape, 123 const uint8_t* bboxDeltasData, const Shape& bboxDeltasShape, 124 const int32_t* batchesData, const Shape& batchesShape, 125 const uint16_t* imageInfoData, const Shape& imageInfoDataShape, 126 uint16_t* outputData, const Shape& outputShape) { 127 std::vector<float> roi_float32(getNumberOfElements(roiShape)); 128 convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32); 129 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape)); 130 convertQuantToFloat32(bboxDeltasData, bboxDeltasShape.scale, bboxDeltasShape.offset, 131 &delta_float32); 132 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoDataShape)); 133 convertQuantToFloat32(imageInfoData, imageInfoDataShape.scale, imageInfoDataShape.offset, 134 &imageInfo_float32); 135 std::vector<float> output_float32(getNumberOfElements(outputShape)); 136 NN_RET_CHECK(bboxTransformFloat32(roi_float32.data(), roiShape, delta_float32.data(), 137 bboxDeltasShape, batchesData, batchesShape, 138 imageInfo_float32.data(), imageInfoDataShape, 139 output_float32.data(), outputShape)); 140 convertFloat32ToQuant(output_float32, outputShape.scale, outputShape.offset, outputData); 141 return true; 142 } 143 144 // Taking two indices of bounding boxes, return the intersection-of-union. 145 float getIoUAxisAligned(const float* roi1, const float* roi2) { 146 const float area1 = (roi1[2] - roi1[0]) * (roi1[3] - roi1[1]); 147 const float area2 = (roi2[2] - roi2[0]) * (roi2[3] - roi2[1]); 148 const float x1 = std::max(roi1[0], roi2[0]); 149 const float x2 = std::min(roi1[2], roi2[2]); 150 const float y1 = std::max(roi1[1], roi2[1]); 151 const float y2 = std::min(roi1[3], roi2[3]); 152 const float w = std::max(x2 - x1, 0.0f); 153 const float h = std::max(y2 - y1, 0.0f); 154 const float areaIntersect = w * h; 155 const float areaUnion = area1 + area2 - areaIntersect; 156 return areaIntersect / areaUnion; 157 } 158 159 } // namespace 160 161 namespace axis_aligned_bbox_transform { 162 163 constexpr char kOperationName[] = "AXIS_ALIGNED_BBOX_TRANSFORM"; 164 165 constexpr uint32_t kNumInputs = 4; 166 constexpr uint32_t kRoiTensor = 0; 167 constexpr uint32_t kDeltaTensor = 1; 168 constexpr uint32_t kBatchesTensor = 2; 169 constexpr uint32_t kImageInfoTensor = 3; 170 171 constexpr uint32_t kNumOutputs = 1; 172 constexpr uint32_t kOutputTensor = 0; 173 174 bool validate(const IOperationValidationContext* context) { 175 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs); 176 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs); 177 std::vector<OperandType> inExpectedTypes; 178 auto inputType = context->getInputType(kRoiTensor); 179 if (inputType == OperandType::TENSOR_FLOAT32 || inputType == OperandType::TENSOR_FLOAT16) { 180 inExpectedTypes = {inputType, inputType, OperandType::TENSOR_INT32, inputType}; 181 } else if (inputType == OperandType::TENSOR_QUANT16_ASYMM) { 182 inExpectedTypes = {OperandType::TENSOR_QUANT16_ASYMM, OperandType::TENSOR_QUANT8_ASYMM, 183 OperandType::TENSOR_INT32, OperandType::TENSOR_QUANT16_ASYMM}; 184 } else { 185 LOG(ERROR) << "Unsupported input tensor type for operation " << kOperationName; 186 return false; 187 } 188 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes)); 189 NN_RET_CHECK(validateOutputTypes(context, {inputType})); 190 return validateHalVersion(context, HalVersion::V1_2); 191 } 192 193 bool prepare(IOperationExecutionContext* context) { 194 Shape roiShape = context->getInputShape(kRoiTensor); 195 Shape bboxDeltasShape = context->getInputShape(kDeltaTensor); 196 Shape batchesShape = context->getInputShape(kBatchesTensor); 197 Shape imageInfoShape = context->getInputShape(kImageInfoTensor); 198 Shape outputShape = context->getOutputShape(kOutputTensor); 199 200 NN_RET_CHECK_EQ(getNumberOfDimensions(roiShape), 2); 201 NN_RET_CHECK_EQ(getNumberOfDimensions(bboxDeltasShape), 2); 202 NN_RET_CHECK_EQ(getNumberOfDimensions(batchesShape), 1); 203 NN_RET_CHECK_EQ(getNumberOfDimensions(imageInfoShape), 2); 204 205 // Only numRois can be zero. 206 const uint32_t kRoiDim = 4; 207 uint32_t numRois = getSizeOfDimension(roiShape, 0); 208 uint32_t numClasses = getSizeOfDimension(bboxDeltasShape, 1) / kRoiDim; 209 uint32_t numBatches = getSizeOfDimension(imageInfoShape, 0); 210 NN_RET_CHECK_GT(numClasses, 0); 211 NN_RET_CHECK_GT(numBatches, 0); 212 NN_RET_CHECK_EQ(getSizeOfDimension(roiShape, 1), kRoiDim); 213 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 0), numRois); 214 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 1), kRoiDim * numClasses); 215 NN_RET_CHECK_EQ(getSizeOfDimension(batchesShape, 0), numRois); 216 NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoShape, 1), 2); 217 218 if (roiShape.type == OperandType::TENSOR_QUANT16_ASYMM) { 219 NN_RET_CHECK_EQ(roiShape.scale, 0.125f); 220 NN_RET_CHECK_EQ(roiShape.offset, 0); 221 NN_RET_CHECK_EQ(imageInfoShape.scale, 0.125f); 222 NN_RET_CHECK_EQ(imageInfoShape.offset, 0); 223 } 224 225 outputShape.type = roiShape.type; 226 outputShape.dimensions = {numRois, numClasses * kRoiDim}; 227 outputShape.scale = 0.125f; 228 outputShape.offset = 0; 229 NN_RET_CHECK(context->setOutputShape(kOutputTensor, outputShape)); 230 return true; 231 } 232 233 bool execute(IOperationExecutionContext* context) { 234 NNTRACE_TRANS("axisAlignedBBoxTransform"); 235 // Bypass execution in the case of zero-sized input. 236 if (getNumberOfElements(context->getOutputShape(kOutputTensor)) == 0) return true; 237 switch (context->getInputType(kRoiTensor)) { 238 case OperandType::TENSOR_FLOAT16: { 239 return bboxTransformFloat16(context->getInputBuffer<_Float16>(kRoiTensor), 240 context->getInputShape(kRoiTensor), 241 context->getInputBuffer<_Float16>(kDeltaTensor), 242 context->getInputShape(kDeltaTensor), 243 context->getInputBuffer<int32_t>(kBatchesTensor), 244 context->getInputShape(kBatchesTensor), 245 context->getInputBuffer<_Float16>(kImageInfoTensor), 246 context->getInputShape(kImageInfoTensor), 247 context->getOutputBuffer<_Float16>(kOutputTensor), 248 context->getOutputShape(kOutputTensor)); 249 } 250 case OperandType::TENSOR_FLOAT32: { 251 return bboxTransformFloat32(context->getInputBuffer<float>(kRoiTensor), 252 context->getInputShape(kRoiTensor), 253 context->getInputBuffer<float>(kDeltaTensor), 254 context->getInputShape(kDeltaTensor), 255 context->getInputBuffer<int32_t>(kBatchesTensor), 256 context->getInputShape(kBatchesTensor), 257 context->getInputBuffer<float>(kImageInfoTensor), 258 context->getInputShape(kImageInfoTensor), 259 context->getOutputBuffer<float>(kOutputTensor), 260 context->getOutputShape(kOutputTensor)); 261 } 262 case OperandType::TENSOR_QUANT16_ASYMM: { 263 return bboxTransformQuant(context->getInputBuffer<uint16_t>(kRoiTensor), 264 context->getInputShape(kRoiTensor), 265 context->getInputBuffer<uint8_t>(kDeltaTensor), 266 context->getInputShape(kDeltaTensor), 267 context->getInputBuffer<int32_t>(kBatchesTensor), 268 context->getInputShape(kBatchesTensor), 269 context->getInputBuffer<uint16_t>(kImageInfoTensor), 270 context->getInputShape(kImageInfoTensor), 271 context->getOutputBuffer<uint16_t>(kOutputTensor), 272 context->getOutputShape(kOutputTensor)); 273 } 274 default: 275 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 276 } 277 } 278 279 } // namespace axis_aligned_bbox_transform 280 281 namespace box_with_nms_limit { 282 283 constexpr char kOperationName[] = "BOX_WITH_NMS_LIMIT"; 284 285 constexpr uint32_t kNumInputs = 9; 286 constexpr uint32_t kScoreTensor = 0; 287 constexpr uint32_t kRoiTensor = 1; 288 constexpr uint32_t kBatchesTensor = 2; 289 constexpr uint32_t kScoreThresholdScalar = 3; 290 constexpr uint32_t kMaxNumDetectionScalar = 4; 291 constexpr uint32_t kNmsKernelScalar = 5; 292 constexpr uint32_t kIoUThresholdScalar = 6; 293 constexpr uint32_t kSigmaScalar = 7; 294 constexpr uint32_t kNmsScoreThresholdScalar = 8; 295 296 constexpr uint32_t kNumOutputs = 4; 297 constexpr uint32_t kOutputScoreTensor = 0; 298 constexpr uint32_t kOutputRoiTensor = 1; 299 constexpr uint32_t kOutputClassTensor = 2; 300 constexpr uint32_t kOutputBatchesTensor = 3; 301 302 namespace { 303 304 // TODO(xusongw): Reduce code duplication with hard/soft nms path. 305 306 // Inplace hard NMS within range [select, select + selectLength). 307 uint32_t* hardNmsSingleClass(const float* scoresData, float iouThreshold, int32_t maxNumDetections, 308 std::function<const float*(uint32_t)> getRoiBase, uint32_t* select, 309 uint32_t selectLength) { 310 uint32_t *selectStart = select, *selectEnd = select + selectLength, numDetections = 0; 311 if (maxNumDetections < 0) { 312 maxNumDetections = selectLength; 313 } 314 while (selectStart < selectEnd && numDetections < maxNumDetections) { 315 // find max score and swap to the front 316 auto& maxScore = *std::max_element(selectStart, selectEnd, 317 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) { 318 return scoresData[lhs] < scoresData[rhs]; 319 }); 320 std::swap(maxScore, *selectStart); 321 322 // Calculate IoU of the rest, swap to the end (disgard) if needed. 323 for (uint32_t* i = selectStart + 1; i < selectEnd; i++) { 324 float iou = getIoUAxisAligned(getRoiBase(*i), getRoiBase(*selectStart)); 325 if (iou >= iouThreshold) { 326 std::swap(*i--, *(--selectEnd)); 327 } 328 } 329 selectStart++; 330 numDetections++; 331 } 332 return selectStart; 333 } 334 335 void hardNmsMultiClass(const float* scoresData, uint32_t numClasses, uint32_t numRois, 336 float scoreThreshold, float iouThreshold, int32_t maxNumDetections, 337 int32_t maxNumDetectionsPerClass, 338 std::function<const float*(uint32_t)> getRoiBase, 339 std::vector<uint32_t>* select) { 340 // Exclude class 0 (background) 341 for (uint32_t c = 1; c < numClasses; c++) { 342 uint32_t size = select->size(); 343 for (uint32_t b = 0; b < numRois; b++) { 344 const uint32_t index = b * numClasses + c; 345 const float score = scoresData[index]; 346 if (score > scoreThreshold) { 347 select->push_back(index); 348 } 349 } 350 uint32_t* selectStart = select->data() + size; 351 uint32_t selectLength = select->size() - size; 352 uint32_t* selectEnd = hardNmsSingleClass(scoresData, iouThreshold, maxNumDetectionsPerClass, 353 getRoiBase, selectStart, selectLength); 354 select->resize(selectEnd - select->data()); 355 } 356 357 // Take top maxNumDetections. 358 std::sort(select->begin(), select->end(), 359 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) { 360 return scoresData[lhs] > scoresData[rhs]; 361 }); 362 if (maxNumDetections < 0 || select->size() <= maxNumDetections) { 363 return; 364 } 365 select->resize(maxNumDetections); 366 } 367 368 // Inplace soft NMS within range [select, select + selectLength). 369 using SoftNmsKernel = std::function<float(float)>; 370 uint32_t* softNmsSingleClass(float* scoresData, float scoreThreshold, int32_t maxNumDetections, 371 std::function<const float*(uint32_t)> getRoiBase, SoftNmsKernel kernel, 372 uint32_t* select, uint32_t selectLength) { 373 uint32_t *selectStart = select, *selectEnd = select + selectLength, numDetections = 0; 374 if (maxNumDetections < 0) { 375 maxNumDetections = selectLength; 376 } 377 while (selectStart < selectEnd && numDetections < maxNumDetections) { 378 // find max score and swap to the front 379 auto& maxScore = *std::max_element(selectStart, selectEnd, 380 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) { 381 return scoresData[lhs] < scoresData[rhs]; 382 }); 383 std::swap(maxScore, *selectStart); 384 385 // Calculate IoU of the rest, swap to the end (disgard) if needed. 386 for (uint32_t* i = selectStart + 1; i < selectEnd; i++) { 387 float iou = getIoUAxisAligned(getRoiBase(*i), getRoiBase(*selectStart)); 388 scoresData[*i] *= kernel(iou); 389 if (scoresData[*i] < scoreThreshold) { 390 std::swap(*i--, *(--selectEnd)); 391 } 392 } 393 selectStart++; 394 numDetections++; 395 } 396 return selectStart; 397 } 398 399 void softNmsMultiClass(float* scoresData, uint32_t numClasses, uint32_t numRois, 400 float scoreThreshold, float nmsScoreThreshold, int32_t maxNumDetections, 401 int32_t maxNumDetectionsPerClass, 402 std::function<const float*(uint32_t)> getRoiBase, SoftNmsKernel kernel, 403 std::vector<uint32_t>* select) { 404 // Exclude class 0 (background) 405 for (uint32_t c = 1; c < numClasses; c++) { 406 uint32_t size = select->size(); 407 for (uint32_t b = 0; b < numRois; b++) { 408 const uint32_t index = b * numClasses + c; 409 const float score = scoresData[index]; 410 if (score > scoreThreshold) { 411 select->push_back(index); 412 } 413 } 414 uint32_t* selectStart = select->data() + size; 415 uint32_t selectLength = select->size() - size; 416 uint32_t* selectEnd = 417 softNmsSingleClass(scoresData, nmsScoreThreshold, maxNumDetectionsPerClass, 418 getRoiBase, kernel, selectStart, selectLength); 419 select->resize(selectEnd - select->data()); 420 } 421 422 // Take top maxNumDetections. 423 std::sort(select->begin(), select->end(), 424 [&scoresData](const uint32_t& lhs, const uint32_t& rhs) { 425 return scoresData[lhs] > scoresData[rhs]; 426 }); 427 if (maxNumDetections < 0 || select->size() <= maxNumDetections) { 428 return; 429 } 430 select->resize(maxNumDetections); 431 } 432 433 bool boxWithNmsLimitFloat32Compute(float* scoresData, const Shape& scoresShape, 434 const float* roiData, const Shape& roiShape, 435 const int32_t* batchesData, const Shape& batchesShape, 436 float scoreThreshold, int32_t maxNumDetections, 437 int32_t softNmsKernel, float iouThreshold, float sigma, 438 float nmsScoreThreshold, std::vector<uint32_t>* batchSplitIn, 439 std::vector<uint32_t>* batchSplitOut, 440 std::vector<uint32_t>* selected) { 441 SoftNmsKernel kernel = nullptr; 442 if (softNmsKernel == 0) { 443 kernel = [&iouThreshold](float iou) { return iou < iouThreshold ? 1.0f : 0.0f; }; 444 } else if (softNmsKernel == 1) { 445 kernel = [&iouThreshold](float iou) { return iou < iouThreshold ? 1.0f : 1.0f - iou; }; 446 } else if (softNmsKernel == 2) { 447 kernel = [&sigma](float iou) { return std::exp(-1.0f * iou * iou / sigma); }; 448 } else { 449 NN_RET_CHECK_FAIL() << "Unsupported soft NMS kernel " << softNmsKernel; 450 } 451 452 const uint32_t kRoiDim = 4; 453 uint32_t numRois = getSizeOfDimension(scoresShape, 0); 454 uint32_t numClasses = getSizeOfDimension(scoresShape, 1); 455 456 // We assume boxes of the same batch are grouped together. 457 std::vector<uint32_t> batch; 458 for (uint32_t i = 0, ind = -1; i < numRois; i++) { 459 if (batchesData[i] == ind) { 460 (batchSplitIn->back())++; 461 } else { 462 ind = batchesData[i]; 463 batchSplitIn->push_back(1); 464 } 465 } 466 467 float* scoresBase = scoresData; 468 const float* roiBase = roiData; 469 selected->clear(); 470 for (uint32_t b = 0; b < batchSplitIn->size(); b++) { 471 for (uint32_t i = 0; i < batchSplitIn->at(b); i++) { 472 const float* roi = roiBase + i * kRoiDim; 473 // Check for malformed data: invalid region: x2 < x1 || y2 < y1 474 NN_RET_CHECK_LE(roi[0], roi[2]); 475 NN_RET_CHECK_LE(roi[1], roi[3]); 476 } 477 std::vector<uint32_t> result; 478 softNmsMultiClass(scoresBase, numClasses, batchSplitIn->at(b), scoreThreshold, 479 nmsScoreThreshold, maxNumDetections, maxNumDetections, 480 [&roiBase](uint32_t ind) { return roiBase + ind * kRoiDim; }, kernel, 481 &result); 482 // Sort again by class. 483 std::sort(result.begin(), result.end(), 484 [&scoresBase, numClasses](const uint32_t& lhs, const uint32_t& rhs) { 485 uint32_t lhsClass = lhs % numClasses, rhsClass = rhs % numClasses; 486 return lhsClass == rhsClass ? scoresBase[lhs] > scoresBase[rhs] 487 : lhsClass < rhsClass; 488 }); 489 selected->insert(selected->end(), result.begin(), result.end()); 490 batchSplitOut->push_back(result.size()); 491 scoresBase += batchSplitIn->at(b) * numClasses; 492 roiBase += batchSplitIn->at(b) * numClasses * kRoiDim; 493 } 494 return true; 495 } 496 497 template <typename T> 498 T castTo(float val, const Shape&) { 499 return val; 500 } 501 template <> 502 uint8_t castTo(float val, const Shape& shape) { 503 int32_t intVal = std::round(val / shape.scale + shape.offset); 504 intVal = std::min<int32_t>(std::max<int32_t>(intVal, std::numeric_limits<uint8_t>::min()), 505 std::numeric_limits<uint8_t>::max()); 506 return static_cast<uint8_t>(intVal); 507 } 508 509 template <typename T_Score, typename T_Roi> 510 bool boxWithNmsLimitWriteOutput(const std::vector<uint32_t>& selected, 511 const std::vector<uint32_t>& batchSplitIn, 512 const std::vector<uint32_t>& batchSplitOut, 513 const std::vector<float>& scores, 514 IOperationExecutionContext* context) { 515 const uint32_t kRoiDim = 4; 516 Shape scoresShape = context->getInputShape(kScoreTensor); 517 uint32_t numClasses = getSizeOfDimension(scoresShape, 1); 518 519 // Set output dimensions. 520 uint32_t numOutRois = selected.size(); 521 if (numOutRois == 0) return true; 522 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor); 523 scoresOutShape.dimensions = {numOutRois}; 524 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape)); 525 526 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor); 527 roiOutShape.dimensions = {numOutRois, 4}; 528 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape)); 529 530 Shape classesOutShape = context->getOutputShape(kOutputClassTensor); 531 classesOutShape.dimensions = {numOutRois}; 532 NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, classesOutShape)); 533 534 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor); 535 batchesOutShape.dimensions = {numOutRois}; 536 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape)); 537 538 // Write outputs. 539 const float* scoresBase = scores.data(); 540 const T_Roi* roiBase = context->getInputBuffer<T_Roi>(kRoiTensor); 541 const int32_t* batchesInPtr = context->getInputBuffer<int32_t>(kBatchesTensor); 542 T_Score* scoresOutPtr = context->getOutputBuffer<T_Score>(kOutputScoreTensor); 543 T_Roi* roiOutPtr = context->getOutputBuffer<T_Roi>(kOutputRoiTensor); 544 int32_t* classesOutPtr = context->getOutputBuffer<int32_t>(kOutputClassTensor); 545 int32_t* batchesOutPtr = context->getOutputBuffer<int32_t>(kOutputBatchesTensor); 546 uint32_t i = 0; 547 for (uint32_t b = 0; b < batchSplitOut.size(); b++) { 548 for (uint32_t j = 0; j < batchSplitOut[b]; j++) { 549 uint32_t index = selected[i++]; 550 *scoresOutPtr++ = castTo<T_Score>(scoresBase[index], scoresOutShape); 551 memcpy(roiOutPtr, roiBase + index * kRoiDim, kRoiDim * sizeof(T_Roi)); 552 roiOutPtr += kRoiDim; 553 *classesOutPtr++ = index % numClasses; 554 *batchesOutPtr++ = *batchesInPtr; 555 } 556 scoresBase += batchSplitIn[b] * numClasses; 557 roiBase += batchSplitIn[b] * numClasses * kRoiDim; 558 batchesInPtr += batchSplitIn[b]; 559 } 560 return true; 561 } 562 563 bool boxWithNmsLimitFloat32(const float* scoresData, const Shape& scoresShape, const float* roiData, 564 const Shape& roiShape, const int32_t* batchesData, 565 const Shape& batchesShape, float scoreThreshold, 566 int32_t maxNumDetections, int32_t softNmsKernel, float iouThreshold, 567 float sigma, float nmsScoreThreshold, float* scoresOutData, 568 Shape scoresOutShape, float* roiOutData, Shape roiOutShape, 569 int32_t* classesOutData, Shape classesOutShape, int32_t* batchesOutData, 570 const Shape& batchSplitOutShape, IOperationExecutionContext* context) { 571 NNTRACE_TRANS("boxWithNmsLimit"); 572 std::vector<float> scores_float32(getNumberOfElements(scoresShape)); 573 for (uint32_t i = 0; i < scores_float32.size(); i++) { 574 scores_float32[i] = scoresData[i]; 575 } 576 std::vector<uint32_t> selected, batchSplitIn, batchSplitOut; 577 NN_RET_CHECK(boxWithNmsLimitFloat32Compute( 578 scores_float32.data(), scoresShape, roiData, roiShape, batchesData, batchesShape, 579 scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma, nmsScoreThreshold, 580 &batchSplitIn, &batchSplitOut, &selected)); 581 return boxWithNmsLimitWriteOutput<float, float>(selected, batchSplitIn, batchSplitOut, 582 scores_float32, context); 583 } 584 585 bool boxWithNmsLimitFloat16(const _Float16* scoresData, const Shape& scoresShape, 586 const _Float16* roiData, const Shape& roiShape, 587 const int32_t* batchesData, const Shape& batchesShape, 588 _Float16 scoreThreshold, int32_t maxNumDetections, 589 int32_t softNmsKernel, _Float16 iouThreshold, _Float16 sigma, 590 _Float16 nmsScoreThreshold, _Float16* scoresOutData, 591 const Shape& scoresOutShape, _Float16* roiOutData, 592 const Shape& roiOutShape, int32_t* classesOutData, 593 const Shape& classesOutShape, int32_t* batchesOutData, 594 const Shape& batchSplitOutShape, IOperationExecutionContext* context) { 595 std::vector<float> scores_float32(getNumberOfElements(scoresShape)); 596 convertFloat16ToFloat32(scoresData, &scores_float32); 597 std::vector<float> roi_float32(getNumberOfElements(roiShape)); 598 convertFloat16ToFloat32(roiData, &roi_float32); 599 std::vector<uint32_t> selected, batchSplitIn, batchSplitOut; 600 NN_RET_CHECK(boxWithNmsLimitFloat32Compute( 601 scores_float32.data(), scoresShape, roi_float32.data(), roiShape, batchesData, 602 batchesShape, scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma, 603 nmsScoreThreshold, &batchSplitIn, &batchSplitOut, &selected)); 604 return boxWithNmsLimitWriteOutput<_Float16, _Float16>(selected, batchSplitIn, batchSplitOut, 605 scores_float32, context); 606 } 607 608 bool boxWithNmsLimitQuant(const uint8_t* scoresData, const Shape& scoresShape, 609 const uint16_t* roiData, const Shape& roiShape, 610 const int32_t* batchesData, const Shape& batchesShape, 611 float scoreThreshold, int32_t maxNumDetections, int32_t softNmsKernel, 612 float iouThreshold, float sigma, float nmsScoreThreshold, 613 uint8_t* scoresOutData, const Shape& scoresOutShape, uint16_t* roiOutData, 614 const Shape& roiOutShape, int32_t* classesOutData, 615 const Shape& classesOutShape, int32_t* batchesOutData, 616 const Shape& batchSplitOutShape, IOperationExecutionContext* context) { 617 std::vector<float> scores_float32(getNumberOfElements(scoresShape)); 618 convertQuantToFloat32(scoresData, scoresShape.scale, scoresShape.offset, &scores_float32); 619 std::vector<float> roi_float32(getNumberOfElements(roiShape)); 620 convertQuantToFloat32(roiData, roiShape.scale, roiShape.offset, &roi_float32); 621 std::vector<uint32_t> selected, batchSplitIn, batchSplitOut; 622 NN_RET_CHECK(boxWithNmsLimitFloat32Compute( 623 scores_float32.data(), scoresShape, roi_float32.data(), roiShape, batchesData, 624 batchesShape, scoreThreshold, maxNumDetections, softNmsKernel, iouThreshold, sigma, 625 nmsScoreThreshold, &batchSplitIn, &batchSplitOut, &selected)); 626 return boxWithNmsLimitWriteOutput<uint8_t, uint16_t>(selected, batchSplitIn, batchSplitOut, 627 scores_float32, context); 628 } 629 630 } // namespace 631 632 bool validate(const IOperationValidationContext* context) { 633 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs); 634 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs); 635 std::vector<OperandType> inExpectedTypes; 636 std::vector<OperandType> outExpectedTypes; 637 auto inputType = context->getInputType(kScoreTensor); 638 if (inputType == OperandType::TENSOR_FLOAT16) { 639 inExpectedTypes = { 640 OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16, OperandType::TENSOR_INT32, 641 OperandType::FLOAT16, OperandType::INT32, OperandType::INT32, 642 OperandType::FLOAT16, OperandType::FLOAT16, OperandType::FLOAT16}; 643 outExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16, 644 OperandType::TENSOR_INT32, OperandType::TENSOR_INT32}; 645 } else if (inputType == OperandType::TENSOR_FLOAT32) { 646 inExpectedTypes = { 647 OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32, OperandType::TENSOR_INT32, 648 OperandType::FLOAT32, OperandType::INT32, OperandType::INT32, 649 OperandType::FLOAT32, OperandType::FLOAT32, OperandType::FLOAT32}; 650 outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32, 651 OperandType::TENSOR_INT32, OperandType::TENSOR_INT32}; 652 } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) { 653 inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, 654 OperandType::TENSOR_QUANT16_ASYMM, 655 OperandType::TENSOR_INT32, 656 OperandType::FLOAT32, 657 OperandType::INT32, 658 OperandType::INT32, 659 OperandType::FLOAT32, 660 OperandType::FLOAT32, 661 OperandType::FLOAT32}; 662 outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM, 663 OperandType::TENSOR_INT32, OperandType::TENSOR_INT32}; 664 } else { 665 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 666 } 667 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes)); 668 NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes)); 669 return validateHalVersion(context, HalVersion::V1_2); 670 } 671 672 bool prepare(IOperationExecutionContext* context) { 673 Shape scoreShape = context->getInputShape(kScoreTensor); 674 Shape roiShape = context->getInputShape(kRoiTensor); 675 Shape batchesShape = context->getInputShape(kBatchesTensor); 676 Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor); 677 Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor); 678 Shape outputClassShape = context->getOutputShape(kOutputClassTensor); 679 Shape outputBatchSplitShape = context->getOutputShape(kOutputBatchesTensor); 680 681 NN_RET_CHECK(getNumberOfDimensions(scoreShape) == 2); 682 NN_RET_CHECK(getNumberOfDimensions(roiShape) == 2); 683 NN_RET_CHECK(getNumberOfDimensions(batchesShape) == 1); 684 685 // Only numRois can be zero. 686 const uint32_t kRoiDim = 4; 687 uint32_t numRois = getSizeOfDimension(scoreShape, 0); 688 uint32_t numClasses = getSizeOfDimension(scoreShape, 1); 689 NN_RET_CHECK(getSizeOfDimension(roiShape, 0) == numRois); 690 NN_RET_CHECK(getSizeOfDimension(roiShape, 1) == kRoiDim * numClasses); 691 NN_RET_CHECK(getSizeOfDimension(batchesShape, 0) == numRois); 692 NN_RET_CHECK_GT(numClasses, 1); 693 694 if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) { 695 NN_RET_CHECK_EQ(roiShape.scale, 0.125f); 696 NN_RET_CHECK_EQ(roiShape.offset, 0); 697 } 698 699 outputScoreShape.type = scoreShape.type; 700 outputScoreShape.dimensions = {0}; 701 outputScoreShape.scale = scoreShape.scale; 702 outputScoreShape.offset = scoreShape.offset; 703 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape)); 704 705 outputRoiShape.type = roiShape.type; 706 outputRoiShape.dimensions = {0, 4}; 707 outputRoiShape.scale = 0.125f; 708 outputRoiShape.offset = 0; 709 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape)); 710 711 outputClassShape.type = OperandType::TENSOR_INT32; 712 outputClassShape.dimensions = {0}; 713 NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, outputClassShape)); 714 715 outputBatchSplitShape.type = batchesShape.type; 716 outputBatchSplitShape.dimensions = {0}; 717 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, outputBatchSplitShape)); 718 return true; 719 } 720 721 bool execute(IOperationExecutionContext* context) { 722 NNTRACE_TRANS("boxWithNMSLimit"); 723 // Bypass execution in the case of zero numRois. 724 if (getSizeOfDimension(context->getInputShape(kScoreTensor), 0) == 0) return true; 725 switch (context->getInputType(kScoreTensor)) { 726 case OperandType::TENSOR_FLOAT16: { 727 return boxWithNmsLimitFloat16( 728 context->getInputBuffer<_Float16>(kScoreTensor), 729 context->getInputShape(kScoreTensor), 730 context->getInputBuffer<_Float16>(kRoiTensor), 731 context->getInputShape(kRoiTensor), 732 context->getInputBuffer<int32_t>(kBatchesTensor), 733 context->getInputShape(kBatchesTensor), 734 context->getInputValue<_Float16>(kScoreThresholdScalar), 735 context->getInputValue<int32_t>(kMaxNumDetectionScalar), 736 context->getInputValue<int32_t>(kNmsKernelScalar), 737 context->getInputValue<_Float16>(kIoUThresholdScalar), 738 context->getInputValue<_Float16>(kSigmaScalar), 739 context->getInputValue<_Float16>(kNmsScoreThresholdScalar), 740 context->getOutputBuffer<_Float16>(kOutputScoreTensor), 741 context->getOutputShape(kOutputScoreTensor), 742 context->getOutputBuffer<_Float16>(kOutputRoiTensor), 743 context->getOutputShape(kOutputRoiTensor), 744 context->getOutputBuffer<int32_t>(kOutputClassTensor), 745 context->getOutputShape(kOutputClassTensor), 746 context->getOutputBuffer<int32_t>(kOutputBatchesTensor), 747 context->getOutputShape(kOutputBatchesTensor), context); 748 } 749 case OperandType::TENSOR_FLOAT32: { 750 return boxWithNmsLimitFloat32(context->getInputBuffer<float>(kScoreTensor), 751 context->getInputShape(kScoreTensor), 752 context->getInputBuffer<float>(kRoiTensor), 753 context->getInputShape(kRoiTensor), 754 context->getInputBuffer<int32_t>(kBatchesTensor), 755 context->getInputShape(kBatchesTensor), 756 context->getInputValue<float>(kScoreThresholdScalar), 757 context->getInputValue<int32_t>(kMaxNumDetectionScalar), 758 context->getInputValue<int32_t>(kNmsKernelScalar), 759 context->getInputValue<float>(kIoUThresholdScalar), 760 context->getInputValue<float>(kSigmaScalar), 761 context->getInputValue<float>(kNmsScoreThresholdScalar), 762 context->getOutputBuffer<float>(kOutputScoreTensor), 763 context->getOutputShape(kOutputScoreTensor), 764 context->getOutputBuffer<float>(kOutputRoiTensor), 765 context->getOutputShape(kOutputRoiTensor), 766 context->getOutputBuffer<int32_t>(kOutputClassTensor), 767 context->getOutputShape(kOutputClassTensor), 768 context->getOutputBuffer<int32_t>(kOutputBatchesTensor), 769 context->getOutputShape(kOutputBatchesTensor), context); 770 } 771 case OperandType::TENSOR_QUANT8_ASYMM: { 772 return boxWithNmsLimitQuant(context->getInputBuffer<uint8_t>(kScoreTensor), 773 context->getInputShape(kScoreTensor), 774 context->getInputBuffer<uint16_t>(kRoiTensor), 775 context->getInputShape(kRoiTensor), 776 context->getInputBuffer<int32_t>(kBatchesTensor), 777 context->getInputShape(kBatchesTensor), 778 context->getInputValue<float>(kScoreThresholdScalar), 779 context->getInputValue<int32_t>(kMaxNumDetectionScalar), 780 context->getInputValue<int32_t>(kNmsKernelScalar), 781 context->getInputValue<float>(kIoUThresholdScalar), 782 context->getInputValue<float>(kSigmaScalar), 783 context->getInputValue<float>(kNmsScoreThresholdScalar), 784 context->getOutputBuffer<uint8_t>(kOutputScoreTensor), 785 context->getOutputShape(kOutputScoreTensor), 786 context->getOutputBuffer<uint16_t>(kOutputRoiTensor), 787 context->getOutputShape(kOutputRoiTensor), 788 context->getOutputBuffer<int32_t>(kOutputClassTensor), 789 context->getOutputShape(kOutputClassTensor), 790 context->getOutputBuffer<int32_t>(kOutputBatchesTensor), 791 context->getOutputShape(kOutputBatchesTensor), context); 792 } 793 default: 794 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 795 } 796 } 797 798 } // namespace box_with_nms_limit 799 800 namespace generate_proposals { 801 802 constexpr char kOperationName[] = "GENERATE_PROPOSALS"; 803 804 constexpr uint32_t kNumInputs = 11; 805 constexpr uint32_t kScoreTensor = 0; 806 constexpr uint32_t kDeltaTensor = 1; 807 constexpr uint32_t kAnchorTensor = 2; 808 constexpr uint32_t kImageInfoTensor = 3; 809 constexpr uint32_t kHeightStrideSalar = 4; 810 constexpr uint32_t kWidthStrideScalar = 5; 811 constexpr uint32_t kPreNmsMaxScalar = 6; 812 constexpr uint32_t kPostNmsMaxScalar = 7; 813 constexpr uint32_t kIoUThresholdScalar = 8; 814 constexpr uint32_t kMinSizeScalar = 9; 815 constexpr uint32_t kLayoutScalar = 10; 816 817 constexpr uint32_t kNumOutputs = 3; 818 constexpr uint32_t kOutputScoreTensor = 0; 819 constexpr uint32_t kOutputRoiTensor = 1; 820 constexpr uint32_t kOutputBatchesTensor = 2; 821 822 namespace { 823 824 void filterBoxes(const float* roiBase, const float* imageInfoBase, float minSize, 825 std::vector<uint32_t>* select) { 826 const uint32_t kRoiDim = 4; 827 uint32_t i = 0; 828 for (uint32_t j = 0; j < select->size(); j++) { 829 const float* roiInfo = roiBase + (*select)[j] * kRoiDim; 830 float roiWidth, roiHeight, xRoiCenter, yRoiCenter; 831 roiWidth = roiInfo[2] - roiInfo[0]; 832 roiHeight = roiInfo[3] - roiInfo[1]; 833 xRoiCenter = roiInfo[0] + roiWidth / 2.0f; 834 yRoiCenter = roiInfo[1] + roiHeight / 2.0f; 835 if (roiWidth > minSize && roiHeight > minSize && xRoiCenter < imageInfoBase[1] && 836 yRoiCenter < imageInfoBase[0]) { 837 (*select)[i++] = (*select)[j]; 838 } 839 } 840 select->resize(i); 841 } 842 843 bool generateProposalsNhwcFloat32Compute(const float* scoresData, const Shape& scoresShape, 844 const float* bboxDeltasData, const Shape& bboxDeltasShape, 845 const float* anchorsData, const Shape& anchorsShape, 846 const float* imageInfoData, const Shape& imageInfoShape, 847 float heightStride, float widthStride, int32_t preNmsTopN, 848 int32_t postNmsTopN, float iouThreshold, float minSize, 849 std::vector<float>* scoresOutData, 850 std::vector<float>* roiOutData, 851 std::vector<int32_t>* batchesOutData) { 852 const uint32_t kRoiDim = 4; 853 uint32_t numBatches = getSizeOfDimension(scoresShape, 0); 854 uint32_t height = getSizeOfDimension(scoresShape, 1); 855 uint32_t width = getSizeOfDimension(scoresShape, 2); 856 uint32_t numAnchors = getSizeOfDimension(scoresShape, 3); 857 uint32_t imageInfoLength = getSizeOfDimension(imageInfoShape, 1); 858 859 uint32_t batchSize = height * width * numAnchors; 860 uint32_t roiBufferSize = batchSize * kRoiDim; 861 std::vector<float> roiBuffer(roiBufferSize); 862 std::vector<float> roiTransformedBuffer(roiBufferSize); 863 scoresOutData->clear(); 864 roiOutData->clear(); 865 batchesOutData->clear(); 866 867 // Compute the roi region for each anchor. 868 float* roiBase = roiBuffer.data(); 869 for (uint32_t h = 0; h < height; h++) { 870 float hShift = h * heightStride; 871 for (uint32_t w = 0; w < width; w++) { 872 const float* anchorsBase = anchorsData; 873 float wShift = w * widthStride; 874 for (uint32_t a = 0; a < numAnchors; a++, roiBase += kRoiDim, anchorsBase += kRoiDim) { 875 roiBase[0] = anchorsBase[0] + wShift; 876 roiBase[1] = anchorsBase[1] + hShift; 877 roiBase[2] = anchorsBase[2] + wShift; 878 roiBase[3] = anchorsBase[3] + hShift; 879 } 880 } 881 } 882 883 const float* scoresBase = scoresData; 884 const float* bboxDeltasBase = bboxDeltasData; 885 const float* imageInfoBase = imageInfoData; 886 // Need to fake some data to satisfy bboxTransform. 887 Shape tempRoiShape = anchorsShape; 888 tempRoiShape.dimensions = {batchSize, kRoiDim}; 889 Shape tempBBoxDeltasShape = bboxDeltasShape; 890 tempBBoxDeltasShape.dimensions = {batchSize, kRoiDim}; 891 std::vector<int32_t> tempBatchSplitData(batchSize, 0); 892 Shape tempbatchSplitShape = {.dimensions = {batchSize}}; 893 Shape tempImageInfoShape = imageInfoShape; 894 tempImageInfoShape.dimensions = {1, imageInfoLength}; 895 896 for (uint32_t b = 0; b < numBatches; b++) { 897 // Apply bboxDeltas to anchor locations. 898 float tempImageInfo[] = {imageInfoBase[0], imageInfoBase[1]}; 899 if (!bboxTransformFloat32(roiBuffer.data(), tempRoiShape, bboxDeltasBase, 900 tempBBoxDeltasShape, tempBatchSplitData.data(), 901 tempbatchSplitShape, tempImageInfo, tempImageInfoShape, 902 roiTransformedBuffer.data(), tempRoiShape)) { 903 LOG(ERROR) << "BBoxTransform step failed in GENERATE_PROPOSALS op."; 904 return false; 905 } 906 907 // Find the top preNmsTopN scores. 908 std::vector<uint32_t> select(batchSize); 909 std::iota(select.begin(), select.end(), 0); 910 if (preNmsTopN > 0 && preNmsTopN < select.size()) { 911 std::sort(select.begin(), select.end(), 912 [&scoresBase](const uint32_t lhs, const uint32_t rhs) { 913 return scoresBase[lhs] > scoresBase[rhs]; 914 }); 915 select.resize(preNmsTopN); 916 } 917 918 // Filter boxes, disgard regions with height or width < minSize. 919 filterBoxes(roiTransformedBuffer.data(), imageInfoBase, minSize, &select); 920 921 // Apply hard NMS. 922 uint32_t* selectEnd = box_with_nms_limit::hardNmsSingleClass( 923 scoresBase, iouThreshold, postNmsTopN, 924 [&roiTransformedBuffer](uint32_t ind) { 925 return roiTransformedBuffer.data() + ind * kRoiDim; 926 }, 927 select.data(), select.size()); 928 uint32_t selectSize = selectEnd - select.data(); 929 select.resize(selectSize); 930 931 // Write output. 932 for (auto i : select) { 933 roiOutData->insert(roiOutData->end(), roiTransformedBuffer.begin() + i * kRoiDim, 934 roiTransformedBuffer.begin() + (i + 1) * kRoiDim); 935 scoresOutData->push_back(scoresBase[i]); 936 batchesOutData->push_back(b); 937 } 938 scoresBase += batchSize; 939 bboxDeltasBase += roiBufferSize; 940 imageInfoBase += imageInfoLength; 941 } 942 return true; 943 } 944 945 bool generateProposalsFloat32Compute(const float* scoresData, const Shape& scoresShape, 946 const float* bboxDeltasData, const Shape& bboxDeltasShape, 947 const float* anchorsData, const Shape& anchorsShape, 948 const float* imageInfoData, const Shape& imageInfoShape, 949 float heightStride, float widthStride, int32_t preNmsTopN, 950 int32_t postNmsTopN, float iouThreshold, float minSize, 951 bool useNchw, std::vector<float>* scoresOutData, 952 std::vector<float>* roiOutData, 953 std::vector<int32_t>* batchesOutData) { 954 InputWithLayout<float> score_nhwc(useNchw), delta_nhwc(useNchw); 955 NN_RET_CHECK(score_nhwc.initialize(scoresData, scoresShape)); 956 NN_RET_CHECK(delta_nhwc.initialize(bboxDeltasData, bboxDeltasShape)); 957 return generateProposalsNhwcFloat32Compute( 958 score_nhwc.getNhwcBuffer(), score_nhwc.getNhwcShape(), delta_nhwc.getNhwcBuffer(), 959 delta_nhwc.getNhwcShape(), anchorsData, anchorsShape, imageInfoData, imageInfoShape, 960 heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize, 961 scoresOutData, roiOutData, batchesOutData); 962 } 963 964 bool generateProposalsFloat32(const float* scoresData, const Shape& scoresShape, 965 const float* bboxDeltasData, const Shape& bboxDeltasShape, 966 const float* anchorsData, const Shape& anchorsShape, 967 const float* imageInfoData, const Shape& imageInfoShape, 968 float heightStride, float widthStride, int32_t preNmsTopN, 969 int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw, 970 IOperationExecutionContext* context) { 971 std::vector<float> scoresOut_float32, roiOut_float32; 972 std::vector<int32_t> batchesOut; 973 NN_RET_CHECK(generateProposalsFloat32Compute( 974 scoresData, scoresShape, bboxDeltasData, bboxDeltasShape, anchorsData, anchorsShape, 975 imageInfoData, imageInfoShape, heightStride, widthStride, preNmsTopN, postNmsTopN, 976 iouThreshold, minSize, useNchw, &scoresOut_float32, &roiOut_float32, &batchesOut)); 977 978 // Set output dimensions. 979 uint32_t numOutRois = scoresOut_float32.size(); 980 if (numOutRois == 0) return true; 981 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor); 982 scoresOutShape.dimensions = {numOutRois}; 983 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape)); 984 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor); 985 roiOutShape.dimensions = {numOutRois, 4}; 986 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape)); 987 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor); 988 batchesOutShape.dimensions = {numOutRois}; 989 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape)); 990 991 // Write outputs. 992 float* scoresOutData = context->getOutputBuffer<float>(kOutputScoreTensor); 993 for (uint32_t i = 0; i < scoresOut_float32.size(); i++) { 994 scoresOutData[i] = scoresOut_float32[i]; 995 } 996 float* roiOutData = context->getOutputBuffer<float>(kOutputRoiTensor); 997 for (uint32_t i = 0; i < roiOut_float32.size(); i++) { 998 roiOutData[i] = roiOut_float32[i]; 999 } 1000 int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor); 1001 for (uint32_t i = 0; i < batchesOut.size(); i++) { 1002 batchesOutData[i] = batchesOut[i]; 1003 } 1004 return true; 1005 } 1006 1007 bool generateProposalsFloat16(const _Float16* scoresData, const Shape& scoresShape, 1008 const _Float16* bboxDeltasData, const Shape& bboxDeltasShape, 1009 const _Float16* anchorsData, const Shape& anchorsShape, 1010 const _Float16* imageInfoData, const Shape& imageInfoShape, 1011 float heightStride, float widthStride, int32_t preNmsTopN, 1012 int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw, 1013 IOperationExecutionContext* context) { 1014 std::vector<float> score_float32(getNumberOfElements(scoresShape)); 1015 convertFloat16ToFloat32(scoresData, &score_float32); 1016 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape)); 1017 convertFloat16ToFloat32(bboxDeltasData, &delta_float32); 1018 std::vector<float> anchors_float32(getNumberOfElements(anchorsShape)); 1019 convertFloat16ToFloat32(anchorsData, &anchors_float32); 1020 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoShape)); 1021 convertFloat16ToFloat32(imageInfoData, &imageInfo_float32); 1022 std::vector<float> scoresOut_float32, roiOut_float32; 1023 std::vector<int32_t> batchesOut; 1024 NN_RET_CHECK(generateProposalsFloat32Compute( 1025 score_float32.data(), scoresShape, delta_float32.data(), bboxDeltasShape, 1026 anchors_float32.data(), anchorsShape, imageInfo_float32.data(), imageInfoShape, 1027 heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize, useNchw, 1028 &scoresOut_float32, &roiOut_float32, &batchesOut)); 1029 1030 // Set output dimensions. 1031 uint32_t numOutRois = scoresOut_float32.size(); 1032 if (numOutRois == 0) return true; 1033 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor); 1034 scoresOutShape.dimensions = {numOutRois}; 1035 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape)); 1036 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor); 1037 roiOutShape.dimensions = {numOutRois, 4}; 1038 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape)); 1039 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor); 1040 batchesOutShape.dimensions = {numOutRois}; 1041 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape)); 1042 1043 // Write outputs. 1044 _Float16* scoresOutData = context->getOutputBuffer<_Float16>(kOutputScoreTensor); 1045 convertFloat32ToFloat16(scoresOut_float32, scoresOutData); 1046 _Float16* roiOutData = context->getOutputBuffer<_Float16>(kOutputRoiTensor); 1047 convertFloat32ToFloat16(roiOut_float32, roiOutData); 1048 int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor); 1049 for (uint32_t i = 0; i < batchesOut.size(); i++) { 1050 batchesOutData[i] = batchesOut[i]; 1051 } 1052 return true; 1053 } 1054 1055 bool generateProposalsQuant(const uint8_t* scoresData, const Shape& scoresShape, 1056 const uint8_t* bboxDeltasData, const Shape& bboxDeltasShape, 1057 const int16_t* anchorsData, const Shape& anchorsShape, 1058 const uint16_t* imageInfoData, const Shape& imageInfoShape, 1059 float heightStride, float widthStride, int32_t preNmsTopN, 1060 int32_t postNmsTopN, float iouThreshold, float minSize, bool useNchw, 1061 IOperationExecutionContext* context) { 1062 std::vector<float> score_float32(getNumberOfElements(scoresShape)); 1063 convertQuantToFloat32(scoresData, scoresShape.scale, scoresShape.offset, &score_float32); 1064 std::vector<float> delta_float32(getNumberOfElements(bboxDeltasShape)); 1065 convertQuantToFloat32(bboxDeltasData, bboxDeltasShape.scale, bboxDeltasShape.offset, 1066 &delta_float32); 1067 std::vector<float> anchors_float32(getNumberOfElements(anchorsShape)); 1068 convertQuantToFloat32(anchorsData, anchorsShape.scale, anchorsShape.offset, &anchors_float32); 1069 std::vector<float> imageInfo_float32(getNumberOfElements(imageInfoShape)); 1070 convertQuantToFloat32(imageInfoData, imageInfoShape.scale, imageInfoShape.offset, 1071 &imageInfo_float32); 1072 std::vector<float> scoresOut_float32, roiOut_float32; 1073 std::vector<int32_t> batchesOut; 1074 NN_RET_CHECK(generateProposalsFloat32Compute( 1075 score_float32.data(), scoresShape, delta_float32.data(), bboxDeltasShape, 1076 anchors_float32.data(), anchorsShape, imageInfo_float32.data(), imageInfoShape, 1077 heightStride, widthStride, preNmsTopN, postNmsTopN, iouThreshold, minSize, useNchw, 1078 &scoresOut_float32, &roiOut_float32, &batchesOut)); 1079 1080 // Set output dimensions. 1081 uint32_t numOutRois = scoresOut_float32.size(); 1082 if (numOutRois == 0) return true; 1083 Shape scoresOutShape = context->getOutputShape(kOutputScoreTensor); 1084 scoresOutShape.dimensions = {numOutRois}; 1085 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, scoresOutShape)); 1086 Shape roiOutShape = context->getOutputShape(kOutputRoiTensor); 1087 roiOutShape.dimensions = {numOutRois, 4}; 1088 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, roiOutShape)); 1089 Shape batchesOutShape = context->getOutputShape(kOutputBatchesTensor); 1090 batchesOutShape.dimensions = {numOutRois}; 1091 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, batchesOutShape)); 1092 1093 // Write outputs. 1094 uint8_t* scoresOutData = context->getOutputBuffer<uint8_t>(kOutputScoreTensor); 1095 convertFloat32ToQuant(scoresOut_float32, scoresOutShape.scale, scoresOutShape.offset, 1096 scoresOutData); 1097 uint16_t* roiOutData = context->getOutputBuffer<uint16_t>(kOutputRoiTensor); 1098 convertFloat32ToQuant(roiOut_float32, roiOutShape.scale, roiOutShape.offset, roiOutData); 1099 int32_t* batchesOutData = context->getOutputBuffer<int32_t>(kOutputBatchesTensor); 1100 for (uint32_t i = 0; i < batchesOut.size(); i++) { 1101 batchesOutData[i] = batchesOut[i]; 1102 } 1103 return true; 1104 } 1105 1106 } // namespace 1107 1108 bool validate(const IOperationValidationContext* context) { 1109 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs); 1110 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs); 1111 std::vector<OperandType> inExpectedTypes; 1112 std::vector<OperandType> outExpectedTypes; 1113 auto inputType = context->getInputType(kScoreTensor); 1114 if (inputType == OperandType::TENSOR_FLOAT16) { 1115 inExpectedTypes = {OperandType::TENSOR_FLOAT16, 1116 OperandType::TENSOR_FLOAT16, 1117 OperandType::TENSOR_FLOAT16, 1118 OperandType::TENSOR_FLOAT16, 1119 OperandType::FLOAT16, 1120 OperandType::FLOAT16, 1121 OperandType::INT32, 1122 OperandType::INT32, 1123 OperandType::FLOAT16, 1124 OperandType::FLOAT16, 1125 OperandType::BOOL}; 1126 outExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16, 1127 OperandType::TENSOR_INT32}; 1128 } else if (inputType == OperandType::TENSOR_FLOAT32) { 1129 inExpectedTypes = {OperandType::TENSOR_FLOAT32, 1130 OperandType::TENSOR_FLOAT32, 1131 OperandType::TENSOR_FLOAT32, 1132 OperandType::TENSOR_FLOAT32, 1133 OperandType::FLOAT32, 1134 OperandType::FLOAT32, 1135 OperandType::INT32, 1136 OperandType::INT32, 1137 OperandType::FLOAT32, 1138 OperandType::FLOAT32, 1139 OperandType::BOOL}; 1140 outExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32, 1141 OperandType::TENSOR_INT32}; 1142 } else if (inputType == OperandType::TENSOR_QUANT8_ASYMM) { 1143 inExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, 1144 OperandType::TENSOR_QUANT8_ASYMM, 1145 OperandType::TENSOR_QUANT16_SYMM, 1146 OperandType::TENSOR_QUANT16_ASYMM, 1147 OperandType::FLOAT32, 1148 OperandType::FLOAT32, 1149 OperandType::INT32, 1150 OperandType::INT32, 1151 OperandType::FLOAT32, 1152 OperandType::FLOAT32, 1153 OperandType::BOOL}; 1154 outExpectedTypes = {OperandType::TENSOR_QUANT8_ASYMM, OperandType::TENSOR_QUANT16_ASYMM, 1155 OperandType::TENSOR_INT32}; 1156 } else { 1157 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 1158 } 1159 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes)); 1160 NN_RET_CHECK(validateOutputTypes(context, outExpectedTypes)); 1161 return validateHalVersion(context, HalVersion::V1_2); 1162 } 1163 1164 bool prepare(IOperationExecutionContext* context) { 1165 bool useNchw = context->getInputValue<bool>(kLayoutScalar); 1166 Shape scoreShape = context->getInputShape(kScoreTensor); 1167 Shape bboxDeltasShape = context->getInputShape(kDeltaTensor); 1168 Shape anchorsShape = context->getInputShape(kAnchorTensor); 1169 Shape imageInfoDataShape = context->getInputShape(kImageInfoTensor); 1170 Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor); 1171 Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor); 1172 Shape outputBatchSplitShape = context->getOutputShape(kOutputBatchesTensor); 1173 1174 NN_RET_CHECK_EQ(getNumberOfDimensions(scoreShape), 4); 1175 NN_RET_CHECK_EQ(getNumberOfDimensions(bboxDeltasShape), 4); 1176 NN_RET_CHECK_EQ(getNumberOfDimensions(anchorsShape), 2); 1177 NN_RET_CHECK_EQ(getNumberOfDimensions(imageInfoDataShape), 2); 1178 1179 const uint32_t kRoiDim = 4; 1180 uint32_t numBatches = getSizeOfDimension(scoreShape, 0); 1181 uint32_t height = getSizeOfDimension(scoreShape, useNchw ? 2 : 1); 1182 uint32_t width = getSizeOfDimension(scoreShape, useNchw ? 3 : 2); 1183 uint32_t numAnchors = getSizeOfDimension(scoreShape, useNchw ? 1 : 3); 1184 1185 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, 0), numBatches); 1186 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 2 : 1), height); 1187 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 3 : 2), width); 1188 NN_RET_CHECK_EQ(getSizeOfDimension(bboxDeltasShape, useNchw ? 1 : 3), numAnchors * kRoiDim); 1189 NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoDataShape, 0), numBatches); 1190 NN_RET_CHECK_EQ(getSizeOfDimension(imageInfoDataShape, 1), 2); 1191 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 0), numAnchors); 1192 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 1), kRoiDim); 1193 1194 if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) { 1195 NN_RET_CHECK_EQ(anchorsShape.scale, 0.125f); 1196 NN_RET_CHECK_EQ(imageInfoDataShape.scale, 0.125f); 1197 NN_RET_CHECK_EQ(imageInfoDataShape.offset, 0); 1198 } 1199 1200 outputScoreShape.type = scoreShape.type; 1201 outputScoreShape.dimensions = {0}; 1202 outputScoreShape.scale = scoreShape.scale; 1203 outputScoreShape.offset = scoreShape.offset; 1204 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape)); 1205 1206 outputRoiShape.dimensions = {0, 4}; 1207 if (scoreShape.type == OperandType::TENSOR_QUANT8_ASYMM) { 1208 outputRoiShape.scale = 0.125f; 1209 outputRoiShape.offset = 0; 1210 } 1211 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape)); 1212 1213 outputBatchSplitShape.dimensions = {0}; 1214 NN_RET_CHECK(context->setOutputShape(kOutputBatchesTensor, outputBatchSplitShape)); 1215 return true; 1216 } 1217 1218 bool execute(IOperationExecutionContext* context) { 1219 NNTRACE_TRANS("generateProposals"); 1220 switch (context->getInputType(kScoreTensor)) { 1221 case OperandType::TENSOR_FLOAT16: { 1222 return generateProposalsFloat16(context->getInputBuffer<_Float16>(kScoreTensor), 1223 context->getInputShape(kScoreTensor), 1224 context->getInputBuffer<_Float16>(kDeltaTensor), 1225 context->getInputShape(kDeltaTensor), 1226 context->getInputBuffer<_Float16>(kAnchorTensor), 1227 context->getInputShape(kAnchorTensor), 1228 context->getInputBuffer<_Float16>(kImageInfoTensor), 1229 context->getInputShape(kImageInfoTensor), 1230 context->getInputValue<_Float16>(kHeightStrideSalar), 1231 context->getInputValue<_Float16>(kWidthStrideScalar), 1232 context->getInputValue<int32_t>(kPreNmsMaxScalar), 1233 context->getInputValue<int32_t>(kPostNmsMaxScalar), 1234 context->getInputValue<_Float16>(kIoUThresholdScalar), 1235 context->getInputValue<_Float16>(kMinSizeScalar), 1236 context->getInputValue<bool>(kLayoutScalar), context); 1237 } 1238 case OperandType::TENSOR_FLOAT32: { 1239 return generateProposalsFloat32(context->getInputBuffer<float>(kScoreTensor), 1240 context->getInputShape(kScoreTensor), 1241 context->getInputBuffer<float>(kDeltaTensor), 1242 context->getInputShape(kDeltaTensor), 1243 context->getInputBuffer<float>(kAnchorTensor), 1244 context->getInputShape(kAnchorTensor), 1245 context->getInputBuffer<float>(kImageInfoTensor), 1246 context->getInputShape(kImageInfoTensor), 1247 context->getInputValue<float>(kHeightStrideSalar), 1248 context->getInputValue<float>(kWidthStrideScalar), 1249 context->getInputValue<int32_t>(kPreNmsMaxScalar), 1250 context->getInputValue<int32_t>(kPostNmsMaxScalar), 1251 context->getInputValue<float>(kIoUThresholdScalar), 1252 context->getInputValue<float>(kMinSizeScalar), 1253 context->getInputValue<bool>(kLayoutScalar), context); 1254 } 1255 case OperandType::TENSOR_QUANT8_ASYMM: { 1256 return generateProposalsQuant(context->getInputBuffer<uint8_t>(kScoreTensor), 1257 context->getInputShape(kScoreTensor), 1258 context->getInputBuffer<uint8_t>(kDeltaTensor), 1259 context->getInputShape(kDeltaTensor), 1260 context->getInputBuffer<int16_t>(kAnchorTensor), 1261 context->getInputShape(kAnchorTensor), 1262 context->getInputBuffer<uint16_t>(kImageInfoTensor), 1263 context->getInputShape(kImageInfoTensor), 1264 context->getInputValue<float>(kHeightStrideSalar), 1265 context->getInputValue<float>(kWidthStrideScalar), 1266 context->getInputValue<int32_t>(kPreNmsMaxScalar), 1267 context->getInputValue<int32_t>(kPostNmsMaxScalar), 1268 context->getInputValue<float>(kIoUThresholdScalar), 1269 context->getInputValue<float>(kMinSizeScalar), 1270 context->getInputValue<bool>(kLayoutScalar), context); 1271 } 1272 default: 1273 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 1274 } 1275 } 1276 1277 } // namespace generate_proposals 1278 1279 namespace detection_postprocess { 1280 1281 constexpr char kOperationName[] = "DETECTION_POSTPROCESS"; 1282 1283 constexpr uint32_t kNumInputs = 14; 1284 constexpr uint32_t kScoreTensor = 0; 1285 constexpr uint32_t kDeltaTensor = 1; 1286 constexpr uint32_t kAnchorTensor = 2; 1287 constexpr uint32_t kScaleYScalar = 3; 1288 constexpr uint32_t kScaleXScalar = 4; 1289 constexpr uint32_t kScaleHScalar = 5; 1290 constexpr uint32_t kScaleWScalar = 6; 1291 constexpr uint32_t kUseRegularNmsScalar = 7; 1292 constexpr uint32_t kMaxNumDetectionScalar = 8; 1293 constexpr uint32_t kMaxClassesPerDetectionScalar = 9; 1294 constexpr uint32_t kMaxNumDetectionPerClassScalar = 10; 1295 constexpr uint32_t kScoreThresholdScalar = 11; 1296 constexpr uint32_t kIoUThresholdScalar = 12; 1297 constexpr uint32_t kIsBGInLabelScalar = 13; 1298 1299 constexpr uint32_t kNumOutputs = 4; 1300 constexpr uint32_t kOutputScoreTensor = 0; 1301 constexpr uint32_t kOutputRoiTensor = 1; 1302 constexpr uint32_t kOutputClassTensor = 2; 1303 constexpr uint32_t kOutputDetectionTensor = 3; 1304 1305 namespace { 1306 1307 bool detectionPostprocessFloat32( 1308 const float* scoreData, const Shape& scoreShape, const float* deltaData, 1309 const Shape& deltaShape, const float* anchorData, const Shape& anchorShape, float scaleY, 1310 float scaleX, float scaleH, float scaleW, bool useRegularNms, int32_t maxNumDetections, 1311 int32_t maxClassesPerDetection, int32_t maxNumDetectionsPerClass, float iouThreshold, 1312 float scoreThreshold, bool isBGInLabel, float* scoreOutData, const Shape& scoreOutShape, 1313 float* roiOutData, const Shape& roiOutShape, int32_t* classOutData, 1314 const Shape& classOutShape, int32_t* detectionOutData, const Shape& detectionOutShape) { 1315 const uint32_t kRoiDim = 4; 1316 uint32_t numBatches = getSizeOfDimension(scoreShape, 0); 1317 uint32_t numAnchors = getSizeOfDimension(scoreShape, 1); 1318 uint32_t numClasses = getSizeOfDimension(scoreShape, 2); 1319 uint32_t lengthBoxEncoding = getSizeOfDimension(deltaShape, 2); 1320 uint32_t numOutDetection = getSizeOfDimension(scoreOutShape, 1); 1321 1322 memset(scoreOutData, 0, getNumberOfElements(scoreOutShape) * sizeof(float)); 1323 memset(roiOutData, 0, getNumberOfElements(roiOutShape) * sizeof(float)); 1324 memset(classOutData, 0, getNumberOfElements(classOutShape) * sizeof(int32_t)); 1325 memset(detectionOutData, 0, getNumberOfElements(detectionOutShape) * sizeof(int32_t)); 1326 1327 const float* scoreBase = scoreData; 1328 const float* deltaBase = deltaData; 1329 float* scoreOutBase = scoreOutData; 1330 float* roiOutBase = roiOutData; 1331 int32_t* classOutBase = classOutData; 1332 std::vector<float> roiBuffer(numAnchors * kRoiDim); 1333 std::vector<float> scoreBuffer(numAnchors); 1334 for (uint32_t b = 0; b < numBatches; b++) { 1335 const float* anchorBase = anchorData; 1336 for (uint32_t a = 0; a < numAnchors; a++) { 1337 float yCtr = anchorBase[0] + anchorBase[2] * deltaBase[0] / scaleY; 1338 float xCtr = anchorBase[1] + anchorBase[3] * deltaBase[1] / scaleX; 1339 float hHalf = anchorBase[2] * std::exp(deltaBase[2] / scaleH) * 0.5f; 1340 float wHalf = anchorBase[3] * std::exp(deltaBase[3] / scaleW) * 0.5f; 1341 roiBuffer[a * kRoiDim] = yCtr - hHalf; 1342 roiBuffer[a * kRoiDim + 1] = xCtr - wHalf; 1343 roiBuffer[a * kRoiDim + 2] = yCtr + hHalf; 1344 roiBuffer[a * kRoiDim + 3] = xCtr + wHalf; 1345 anchorBase += kRoiDim; 1346 deltaBase += lengthBoxEncoding; 1347 } 1348 1349 if (useRegularNms) { 1350 std::vector<uint32_t> select; 1351 box_with_nms_limit::hardNmsMultiClass( 1352 scoreBase, numClasses, numAnchors, scoreThreshold, iouThreshold, 1353 maxNumDetections, maxNumDetectionsPerClass, 1354 [&roiBuffer, numClasses](uint32_t ind) { 1355 return roiBuffer.data() + (ind / numClasses) * kRoiDim; 1356 }, 1357 &select); 1358 for (uint32_t i = 0; i < select.size(); i++) { 1359 uint32_t ind = select[i]; 1360 scoreOutBase[i] = scoreBase[ind]; 1361 memcpy(roiOutBase + i * kRoiDim, &roiBuffer[(ind / numClasses) * kRoiDim], 1362 kRoiDim * sizeof(float)); 1363 classOutBase[i] = (ind % numClasses) - (isBGInLabel ? 0 : 1); 1364 } 1365 *detectionOutData++ = select.size(); 1366 } else { 1367 uint32_t numOutClasses = std::min<uint32_t>(numClasses - 1, maxClassesPerDetection); 1368 std::vector<float> maxScores(numAnchors); 1369 for (uint32_t a = 0; a < numAnchors; a++) { 1370 maxScores[a] = *std::max_element(scoreBase + a * numClasses + 1, 1371 scoreBase + (a + 1) * numClasses); 1372 } 1373 std::vector<uint32_t> select; 1374 for (uint32_t a = 0; a < numAnchors; a++) { 1375 if (maxScores[a] > scoreThreshold) { 1376 select.push_back(a); 1377 } 1378 } 1379 uint32_t* selectEnd = box_with_nms_limit::hardNmsSingleClass( 1380 maxScores.data(), iouThreshold, maxNumDetections, 1381 [&roiBuffer](uint32_t ind) { return roiBuffer.data() + ind * kRoiDim; }, 1382 select.data(), select.size()); 1383 select.resize(selectEnd - select.data()); 1384 float* scoreOutPtr = scoreOutBase; 1385 float* roiOutPtr = roiOutBase; 1386 int32_t* classOutPtr = classOutBase; 1387 for (auto i : select) { 1388 const float* score = scoreBase + i * numClasses; 1389 std::vector<uint32_t> scoreInds(numClasses - 1); 1390 std::iota(scoreInds.begin(), scoreInds.end(), 1); 1391 std::sort(scoreInds.begin(), scoreInds.end(), 1392 [&score](const uint32_t lhs, const uint32_t rhs) { 1393 return score[lhs] > score[rhs]; 1394 }); 1395 for (uint32_t c = 0; c < numOutClasses; c++) { 1396 *scoreOutPtr++ = score[scoreInds[c]]; 1397 memcpy(roiOutPtr, &roiBuffer[i * kRoiDim], kRoiDim * sizeof(float)); 1398 roiOutPtr += kRoiDim; 1399 *classOutPtr++ = scoreInds[c] - (isBGInLabel ? 0 : 1); 1400 } 1401 } 1402 *detectionOutData++ = select.size() * numOutClasses; 1403 } 1404 scoreBase += numAnchors * numClasses; 1405 scoreOutBase += numOutDetection; 1406 roiOutBase += numOutDetection * kRoiDim; 1407 classOutBase += numOutDetection; 1408 } 1409 return true; 1410 } 1411 1412 bool detectionPostprocessFloat16( 1413 const _Float16* scoreData, const Shape& scoreShape, const _Float16* deltaData, 1414 const Shape& deltaShape, const _Float16* anchorData, const Shape& anchorShape, float scaleY, 1415 float scaleX, float scaleH, float scaleW, bool useRegularNms, int32_t maxNumDetections, 1416 int32_t maxClassesPerDetection, int32_t maxNumDetectionsPerClass, float iouThreshold, 1417 float scoreThreshold, bool isBGInLabel, _Float16* scoreOutData, const Shape& scoreOutShape, 1418 _Float16* roiOutData, const Shape& roiOutShape, int32_t* classOutData, 1419 const Shape& classOutShape, int32_t* detectionOutData, const Shape& detectionOutShape) { 1420 std::vector<float> scores_float32(getNumberOfElements(scoreShape)); 1421 convertFloat16ToFloat32(scoreData, &scores_float32); 1422 std::vector<float> delta_float32(getNumberOfElements(deltaShape)); 1423 convertFloat16ToFloat32(deltaData, &delta_float32); 1424 std::vector<float> anchor_float32(getNumberOfElements(anchorShape)); 1425 convertFloat16ToFloat32(anchorData, &anchor_float32); 1426 std::vector<float> outputScore_float32(getNumberOfElements(scoreOutShape)); 1427 std::vector<float> outputRoi_float32(getNumberOfElements(roiOutShape)); 1428 NN_RET_CHECK(detectionPostprocessFloat32( 1429 scores_float32.data(), scoreShape, delta_float32.data(), deltaShape, 1430 anchor_float32.data(), anchorShape, scaleY, scaleX, scaleH, scaleW, useRegularNms, 1431 maxNumDetections, maxClassesPerDetection, maxNumDetectionsPerClass, iouThreshold, 1432 scoreThreshold, isBGInLabel, outputScore_float32.data(), scoreOutShape, 1433 outputRoi_float32.data(), roiOutShape, classOutData, classOutShape, detectionOutData, 1434 detectionOutShape)); 1435 convertFloat32ToFloat16(outputScore_float32, scoreOutData); 1436 convertFloat32ToFloat16(outputRoi_float32, roiOutData); 1437 return true; 1438 } 1439 1440 } // namespace 1441 1442 bool validate(const IOperationValidationContext* context) { 1443 NN_RET_CHECK_EQ(context->getNumInputs(), kNumInputs); 1444 NN_RET_CHECK_EQ(context->getNumOutputs(), kNumOutputs); 1445 std::vector<OperandType> inExpectedTypes; 1446 std::vector<OperandType> outExpectedTypes; 1447 auto inputType = context->getInputType(kScoreTensor); 1448 if (inputType == OperandType::TENSOR_FLOAT16) { 1449 inExpectedTypes = {OperandType::TENSOR_FLOAT16, OperandType::TENSOR_FLOAT16, 1450 OperandType::TENSOR_FLOAT16, OperandType::FLOAT16, 1451 OperandType::FLOAT16, OperandType::FLOAT16, 1452 OperandType::FLOAT16, OperandType::BOOL, 1453 OperandType::INT32, OperandType::INT32, 1454 OperandType::INT32, OperandType::FLOAT16, 1455 OperandType::FLOAT16, OperandType::BOOL}; 1456 } else if (inputType == OperandType::TENSOR_FLOAT32) { 1457 inExpectedTypes = {OperandType::TENSOR_FLOAT32, OperandType::TENSOR_FLOAT32, 1458 OperandType::TENSOR_FLOAT32, OperandType::FLOAT32, 1459 OperandType::FLOAT32, OperandType::FLOAT32, 1460 OperandType::FLOAT32, OperandType::BOOL, 1461 OperandType::INT32, OperandType::INT32, 1462 OperandType::INT32, OperandType::FLOAT32, 1463 OperandType::FLOAT32, OperandType::BOOL}; 1464 } else { 1465 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 1466 } 1467 NN_RET_CHECK(validateInputTypes(context, inExpectedTypes)); 1468 NN_RET_CHECK(validateOutputTypes( 1469 context, {inputType, inputType, OperandType::TENSOR_INT32, OperandType::TENSOR_INT32})); 1470 return validateHalVersion(context, HalVersion::V1_2); 1471 } 1472 1473 bool prepare(IOperationExecutionContext* context) { 1474 Shape scoreShape = context->getInputShape(kScoreTensor); 1475 Shape deltasShape = context->getInputShape(kDeltaTensor); 1476 Shape anchorsShape = context->getInputShape(kAnchorTensor); 1477 Shape outputScoreShape = context->getOutputShape(kOutputScoreTensor); 1478 Shape outputRoiShape = context->getOutputShape(kOutputRoiTensor); 1479 Shape outputClassShape = context->getOutputShape(kOutputClassTensor); 1480 Shape outputDetectionShape = context->getOutputShape(kOutputDetectionTensor); 1481 1482 NN_RET_CHECK_EQ(getNumberOfDimensions(scoreShape), 3); 1483 NN_RET_CHECK_EQ(getNumberOfDimensions(deltasShape), 3); 1484 NN_RET_CHECK_EQ(getNumberOfDimensions(anchorsShape), 2); 1485 1486 const uint32_t kRoiDim = 4; 1487 uint32_t numBatches = getSizeOfDimension(scoreShape, 0); 1488 uint32_t numAnchors = getSizeOfDimension(scoreShape, 1); 1489 uint32_t numClasses = getSizeOfDimension(scoreShape, 2); 1490 uint32_t lengthBoxEncoding = getSizeOfDimension(deltasShape, 2); 1491 uint32_t maxNumDetections = context->getInputValue<int32_t>(kMaxNumDetectionScalar); 1492 uint32_t maxClassesPerDetection = 1493 context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar); 1494 uint32_t numOutDetections = maxNumDetections; 1495 1496 NN_RET_CHECK_EQ(getSizeOfDimension(deltasShape, 0), numBatches); 1497 NN_RET_CHECK_EQ(getSizeOfDimension(deltasShape, 1), numAnchors); 1498 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 0), numAnchors); 1499 NN_RET_CHECK_EQ(getSizeOfDimension(anchorsShape, 1), kRoiDim); 1500 1501 if (scoreShape.type == OperandType::TENSOR_FLOAT32) { 1502 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleYScalar), 0); 1503 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleXScalar), 0); 1504 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleHScalar), 0); 1505 NN_RET_CHECK_GT(context->getInputValue<float>(kScaleWScalar), 0); 1506 NN_RET_CHECK_GE(context->getInputValue<float>(kScoreThresholdScalar), 0); 1507 NN_RET_CHECK_GE(context->getInputValue<float>(kIoUThresholdScalar), 0); 1508 } else if (scoreShape.type == OperandType::TENSOR_FLOAT16) { 1509 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleYScalar) > 0); 1510 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleXScalar) > 0); 1511 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleHScalar) > 0); 1512 NN_RET_CHECK(context->getInputValue<_Float16>(kScaleWScalar) > 0); 1513 NN_RET_CHECK(context->getInputValue<_Float16>(kScoreThresholdScalar) >= 0); 1514 NN_RET_CHECK(context->getInputValue<_Float16>(kIoUThresholdScalar) >= 0); 1515 } else { 1516 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 1517 } 1518 NN_RET_CHECK_GT(numClasses, 1); 1519 NN_RET_CHECK_GE(lengthBoxEncoding, 4); 1520 NN_RET_CHECK_GT(maxNumDetections, 0); 1521 if (context->getInputValue<bool>(kUseRegularNmsScalar)) { 1522 NN_RET_CHECK_GT(context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar), 0); 1523 } else { 1524 NN_RET_CHECK_GT(maxClassesPerDetection, 0); 1525 numOutDetections *= maxClassesPerDetection; 1526 } 1527 1528 outputScoreShape.type = scoreShape.type; 1529 outputScoreShape.dimensions = {numBatches, numOutDetections}; 1530 NN_RET_CHECK(context->setOutputShape(kOutputScoreTensor, outputScoreShape)); 1531 1532 outputRoiShape.type = anchorsShape.type; 1533 outputRoiShape.dimensions = {numBatches, numOutDetections, 4}; 1534 NN_RET_CHECK(context->setOutputShape(kOutputRoiTensor, outputRoiShape)); 1535 1536 outputClassShape.type = OperandType::TENSOR_INT32; 1537 outputClassShape.dimensions = {numBatches, numOutDetections}; 1538 NN_RET_CHECK(context->setOutputShape(kOutputClassTensor, outputClassShape)); 1539 1540 outputDetectionShape.type = OperandType::TENSOR_INT32; 1541 outputDetectionShape.dimensions = {numBatches}; 1542 NN_RET_CHECK(context->setOutputShape(kOutputDetectionTensor, outputDetectionShape)); 1543 return true; 1544 } 1545 1546 bool execute(IOperationExecutionContext* context) { 1547 NNTRACE_TRANS("detectionPostProcess"); 1548 switch (context->getInputType(kScoreTensor)) { 1549 case OperandType::TENSOR_FLOAT16: { 1550 return detectionPostprocessFloat16( 1551 context->getInputBuffer<_Float16>(kScoreTensor), 1552 context->getInputShape(kScoreTensor), 1553 context->getInputBuffer<_Float16>(kDeltaTensor), 1554 context->getInputShape(kDeltaTensor), 1555 context->getInputBuffer<_Float16>(kAnchorTensor), 1556 context->getInputShape(kAnchorTensor), 1557 context->getInputValue<_Float16>(kScaleYScalar), 1558 context->getInputValue<_Float16>(kScaleXScalar), 1559 context->getInputValue<_Float16>(kScaleHScalar), 1560 context->getInputValue<_Float16>(kScaleWScalar), 1561 context->getInputValue<bool>(kUseRegularNmsScalar), 1562 context->getInputValue<int32_t>(kMaxNumDetectionScalar), 1563 context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar), 1564 context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar), 1565 context->getInputValue<_Float16>(kIoUThresholdScalar), 1566 context->getInputValue<_Float16>(kScoreThresholdScalar), 1567 context->getInputValue<bool>(kIsBGInLabelScalar), 1568 context->getOutputBuffer<_Float16>(kOutputScoreTensor), 1569 context->getOutputShape(kOutputScoreTensor), 1570 context->getOutputBuffer<_Float16>(kOutputRoiTensor), 1571 context->getOutputShape(kOutputRoiTensor), 1572 context->getOutputBuffer<int32_t>(kOutputClassTensor), 1573 context->getOutputShape(kOutputClassTensor), 1574 context->getOutputBuffer<int32_t>(kOutputDetectionTensor), 1575 context->getOutputShape(kOutputDetectionTensor)); 1576 } 1577 case OperandType::TENSOR_FLOAT32: { 1578 return detectionPostprocessFloat32( 1579 context->getInputBuffer<float>(kScoreTensor), 1580 context->getInputShape(kScoreTensor), 1581 context->getInputBuffer<float>(kDeltaTensor), 1582 context->getInputShape(kDeltaTensor), 1583 context->getInputBuffer<float>(kAnchorTensor), 1584 context->getInputShape(kAnchorTensor), 1585 context->getInputValue<float>(kScaleYScalar), 1586 context->getInputValue<float>(kScaleXScalar), 1587 context->getInputValue<float>(kScaleHScalar), 1588 context->getInputValue<float>(kScaleWScalar), 1589 context->getInputValue<bool>(kUseRegularNmsScalar), 1590 context->getInputValue<int32_t>(kMaxNumDetectionScalar), 1591 context->getInputValue<int32_t>(kMaxClassesPerDetectionScalar), 1592 context->getInputValue<int32_t>(kMaxNumDetectionPerClassScalar), 1593 context->getInputValue<float>(kIoUThresholdScalar), 1594 context->getInputValue<float>(kScoreThresholdScalar), 1595 context->getInputValue<bool>(kIsBGInLabelScalar), 1596 context->getOutputBuffer<float>(kOutputScoreTensor), 1597 context->getOutputShape(kOutputScoreTensor), 1598 context->getOutputBuffer<float>(kOutputRoiTensor), 1599 context->getOutputShape(kOutputRoiTensor), 1600 context->getOutputBuffer<int32_t>(kOutputClassTensor), 1601 context->getOutputShape(kOutputClassTensor), 1602 context->getOutputBuffer<int32_t>(kOutputDetectionTensor), 1603 context->getOutputShape(kOutputDetectionTensor)); 1604 } 1605 default: 1606 NN_RET_CHECK_FAIL() << "Unsupported tensor type for operation " << kOperationName; 1607 } 1608 } 1609 1610 } // namespace detection_postprocess 1611 1612 } // namespace bbox_ops 1613 1614 NN_REGISTER_OPERATION(AXIS_ALIGNED_BBOX_TRANSFORM, 1615 bbox_ops::axis_aligned_bbox_transform::kOperationName, 1616 bbox_ops::axis_aligned_bbox_transform::validate, 1617 bbox_ops::axis_aligned_bbox_transform::prepare, 1618 bbox_ops::axis_aligned_bbox_transform::execute, .allowZeroSizedInput = true); 1619 1620 NN_REGISTER_OPERATION(BOX_WITH_NMS_LIMIT, bbox_ops::box_with_nms_limit::kOperationName, 1621 bbox_ops::box_with_nms_limit::validate, bbox_ops::box_with_nms_limit::prepare, 1622 bbox_ops::box_with_nms_limit::execute, .allowZeroSizedInput = true); 1623 1624 NN_REGISTER_OPERATION(GENERATE_PROPOSALS, bbox_ops::generate_proposals::kOperationName, 1625 bbox_ops::generate_proposals::validate, bbox_ops::generate_proposals::prepare, 1626 bbox_ops::generate_proposals::execute); 1627 1628 NN_REGISTER_OPERATION(DETECTION_POSTPROCESSING, bbox_ops::detection_postprocess::kOperationName, 1629 bbox_ops::detection_postprocess::validate, 1630 bbox_ops::detection_postprocess::prepare, 1631 bbox_ops::detection_postprocess::execute); 1632 } // namespace nn 1633 } // namespace android 1634