1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of the copyright holders may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 #include "opencv2/objdetect/objdetect_c.h" 45 46 using namespace cv; 47 using namespace cv::cuda; 48 49 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER) 50 51 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); } 52 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); } 53 54 #else 55 56 // 57 // CascadeClassifierBase 58 // 59 60 namespace 61 { 62 class CascadeClassifierBase : public cuda::CascadeClassifier 63 { 64 public: 65 CascadeClassifierBase(); 66 67 virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; } 68 virtual Size getMaxObjectSize() const { return maxObjectSize_; } 69 70 virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; } 71 virtual Size getMinObjectSize() const { return minObjectSize_; } 72 73 virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; } 74 virtual double getScaleFactor() const { return scaleFactor_; } 75 76 virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; } 77 virtual int getMinNeighbors() const { return minNeighbors_; } 78 79 virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; } 80 virtual bool getFindLargestObject() { return findLargestObject_; } 81 82 virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; } 83 virtual int getMaxNumObjects() const { return maxNumObjects_; } 84 85 protected: 86 Size maxObjectSize_; 87 Size minObjectSize_; 88 double scaleFactor_; 89 int minNeighbors_; 90 bool findLargestObject_; 91 int maxNumObjects_; 92 }; 93 94 CascadeClassifierBase::CascadeClassifierBase() : 95 maxObjectSize_(), 96 minObjectSize_(), 97 scaleFactor_(1.2), 98 minNeighbors_(4), 99 findLargestObject_(false), 100 maxNumObjects_(100) 101 { 102 } 103 } 104 105 // 106 // HaarCascade 107 // 108 109 #ifdef HAVE_OPENCV_CUDALEGACY 110 111 namespace 112 { 113 class HaarCascade_Impl : public CascadeClassifierBase 114 { 115 public: 116 explicit HaarCascade_Impl(const String& filename); 117 118 virtual Size getClassifierSize() const; 119 120 virtual void detectMultiScale(InputArray image, 121 OutputArray objects, 122 Stream& stream); 123 124 virtual void convert(OutputArray gpu_objects, 125 std::vector<Rect>& objects); 126 127 private: 128 NCVStatus load(const String& classifierFile); 129 NCVStatus calculateMemReqsAndAllocate(const Size& frameSize); 130 NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections); 131 132 Size lastAllocatedFrameSize; 133 134 Ptr<NCVMemStackAllocator> gpuAllocator; 135 Ptr<NCVMemStackAllocator> cpuAllocator; 136 137 cudaDeviceProp devProp; 138 NCVStatus ncvStat; 139 140 Ptr<NCVMemNativeAllocator> gpuCascadeAllocator; 141 Ptr<NCVMemNativeAllocator> cpuCascadeAllocator; 142 143 Ptr<NCVVectorAlloc<HaarStage64> > h_haarStages; 144 Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes; 145 Ptr<NCVVectorAlloc<HaarFeature64> > h_haarFeatures; 146 147 HaarClassifierCascadeDescriptor haar; 148 149 Ptr<NCVVectorAlloc<HaarStage64> > d_haarStages; 150 Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes; 151 Ptr<NCVVectorAlloc<HaarFeature64> > d_haarFeatures; 152 }; 153 154 static void NCVDebugOutputHandler(const String &msg) 155 { 156 CV_Error(Error::GpuApiCallError, msg.c_str()); 157 } 158 159 HaarCascade_Impl::HaarCascade_Impl(const String& filename) : 160 lastAllocatedFrameSize(-1, -1) 161 { 162 ncvSetDebugOutputHandler(NCVDebugOutputHandler); 163 ncvSafeCall( load(filename) ); 164 } 165 166 Size HaarCascade_Impl::getClassifierSize() const 167 { 168 return Size(haar.ClassifierSize.width, haar.ClassifierSize.height); 169 } 170 171 void HaarCascade_Impl::detectMultiScale(InputArray _image, 172 OutputArray _objects, 173 Stream& stream) 174 { 175 const GpuMat image = _image.getGpuMat(); 176 177 CV_Assert( image.depth() == CV_8U); 178 CV_Assert( scaleFactor_ > 1 ); 179 CV_Assert( !stream ); 180 181 Size ncvMinSize = getClassifierSize(); 182 if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height) 183 { 184 ncvMinSize.width = minObjectSize_.width; 185 ncvMinSize.height = minObjectSize_.height; 186 } 187 188 BufferPool pool(stream); 189 GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type); 190 191 unsigned int numDetections; 192 ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) ); 193 194 if (numDetections > 0) 195 { 196 objectsBuf.colRange(0, numDetections).copyTo(_objects); 197 } 198 else 199 { 200 _objects.release(); 201 } 202 } 203 204 void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects) 205 { 206 if (_gpu_objects.empty()) 207 { 208 objects.clear(); 209 return; 210 } 211 212 Mat gpu_objects; 213 if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT) 214 { 215 _gpu_objects.getGpuMat().download(gpu_objects); 216 } 217 else 218 { 219 gpu_objects = _gpu_objects.getMat(); 220 } 221 222 CV_Assert( gpu_objects.rows == 1 ); 223 CV_Assert( gpu_objects.type() == DataType<Rect>::type ); 224 225 Rect* ptr = gpu_objects.ptr<Rect>(); 226 objects.assign(ptr, ptr + gpu_objects.cols); 227 } 228 229 NCVStatus HaarCascade_Impl::load(const String& classifierFile) 230 { 231 int devId = cv::cuda::getDevice(); 232 ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR); 233 234 // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator 235 gpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment)); 236 cpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment)); 237 238 ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR); 239 ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR); 240 241 Ncv32u haarNumStages, haarNumNodes, haarNumFeatures; 242 ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures); 243 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR); 244 245 h_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages)); 246 h_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes)); 247 h_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures)); 248 249 ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); 250 ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); 251 ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR); 252 253 ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures); 254 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR); 255 256 d_haarStages.reset (new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages)); 257 d_haarNodes.reset (new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes)); 258 d_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures)); 259 260 ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); 261 ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); 262 ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR); 263 264 ncvStat = h_haarStages->copySolid(*d_haarStages, 0); 265 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); 266 ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0); 267 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); 268 ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0); 269 ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR); 270 271 return NCV_SUCCESS; 272 } 273 274 NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize) 275 { 276 if (lastAllocatedFrameSize == frameSize) 277 { 278 return NCV_SUCCESS; 279 } 280 281 // Calculate memory requirements and create real allocators 282 NCVMemStackAllocator gpuCounter(static_cast<int>(devProp.textureAlignment)); 283 NCVMemStackAllocator cpuCounter(static_cast<int>(devProp.textureAlignment)); 284 285 ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR); 286 ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR); 287 288 NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height); 289 NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height); 290 291 ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); 292 ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); 293 294 NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100); 295 ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC); 296 297 NcvSize32u roi; 298 roi.width = d_src.width(); 299 roi.height = d_src.height(); 300 Ncv32u numDetections; 301 ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages, 302 *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0); 303 304 ncvAssertReturnNcvStat(ncvStat); 305 ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); 306 307 gpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment)); 308 cpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment)); 309 310 ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR); 311 ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR); 312 313 lastAllocatedFrameSize = frameSize; 314 return NCV_SUCCESS; 315 } 316 317 NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections) 318 { 319 calculateMemReqsAndAllocate(src.size()); 320 321 NCVMemPtr src_beg; 322 src_beg.ptr = (void*)src.ptr<Ncv8u>(); 323 src_beg.memtype = NCVMemoryTypeDevice; 324 325 NCVMemSegment src_seg; 326 src_seg.begin = src_beg; 327 src_seg.size = src.step * src.rows; 328 329 NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true); 330 ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); 331 332 CV_Assert(objects.rows == 1); 333 334 NCVMemPtr objects_beg; 335 objects_beg.ptr = (void*)objects.ptr<NcvRect32u>(); 336 objects_beg.memtype = NCVMemoryTypeDevice; 337 338 NCVMemSegment objects_seg; 339 objects_seg.begin = objects_beg; 340 objects_seg.size = objects.step * objects.rows; 341 NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols); 342 ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE); 343 344 NcvSize32u roi; 345 roi.width = d_src.width(); 346 roi.height = d_src.height(); 347 348 NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height); 349 350 Ncv32u flags = 0; 351 flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0; 352 353 ncvStat = ncvDetectObjectsMultiScale_device( 354 d_src, roi, d_rects, numDetections, haar, *h_haarStages, 355 *d_haarStages, *d_haarNodes, *d_haarFeatures, 356 winMinSize, 357 minNeighbors_, 358 scaleFactor_, 1, 359 flags, 360 *gpuAllocator, *cpuAllocator, devProp, 0); 361 ncvAssertReturnNcvStat(ncvStat); 362 ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR); 363 364 return NCV_SUCCESS; 365 } 366 } 367 368 #endif 369 370 // 371 // LbpCascade 372 // 373 374 namespace cv { namespace cuda { namespace device 375 { 376 namespace lbp 377 { 378 void classifyPyramid(int frameW, 379 int frameH, 380 int windowW, 381 int windowH, 382 float initalScale, 383 float factor, 384 int total, 385 const PtrStepSzb& mstages, 386 const int nstages, 387 const PtrStepSzi& mnodes, 388 const PtrStepSzf& mleaves, 389 const PtrStepSzi& msubsets, 390 const PtrStepSzb& mfeatures, 391 const int subsetSize, 392 PtrStepSz<int4> objects, 393 unsigned int* classified, 394 PtrStepSzi integral); 395 396 void connectedConmonents(PtrStepSz<int4> candidates, 397 int ncandidates, 398 PtrStepSz<int4> objects, 399 int groupThreshold, 400 float grouping_eps, 401 unsigned int* nclasses); 402 } 403 }}} 404 405 namespace 406 { 407 cv::Size operator -(const cv::Size& a, const cv::Size& b) 408 { 409 return cv::Size(a.width - b.width, a.height - b.height); 410 } 411 412 cv::Size operator +(const cv::Size& a, const int& i) 413 { 414 return cv::Size(a.width + i, a.height + i); 415 } 416 417 cv::Size operator *(const cv::Size& a, const float& f) 418 { 419 return cv::Size(cvRound(a.width * f), cvRound(a.height * f)); 420 } 421 422 cv::Size operator /(const cv::Size& a, const float& f) 423 { 424 return cv::Size(cvRound(a.width / f), cvRound(a.height / f)); 425 } 426 427 bool operator <=(const cv::Size& a, const cv::Size& b) 428 { 429 return a.width <= b.width && a.height <= b.width; 430 } 431 432 struct PyrLavel 433 { 434 PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize) 435 { 436 do 437 { 438 order = _order; 439 scale = pow(_scale, order); 440 sFrame = frame / scale; 441 workArea = sFrame - window + 1; 442 sWindow = window * scale; 443 _order++; 444 } while (sWindow <= minObjectSize); 445 } 446 447 bool isFeasible(cv::Size maxObj) 448 { 449 return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj; 450 } 451 452 PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize) 453 { 454 return PyrLavel(order + 1, factor, frame, window, minObjectSize); 455 } 456 457 int order; 458 float scale; 459 cv::Size sFrame; 460 cv::Size workArea; 461 cv::Size sWindow; 462 }; 463 464 class LbpCascade_Impl : public CascadeClassifierBase 465 { 466 public: 467 explicit LbpCascade_Impl(const FileStorage& file); 468 469 virtual Size getClassifierSize() const { return NxM; } 470 471 virtual void detectMultiScale(InputArray image, 472 OutputArray objects, 473 Stream& stream); 474 475 virtual void convert(OutputArray gpu_objects, 476 std::vector<Rect>& objects); 477 478 private: 479 bool load(const FileNode &root); 480 void allocateBuffers(cv::Size frame); 481 482 private: 483 struct Stage 484 { 485 int first; 486 int ntrees; 487 float threshold; 488 }; 489 490 enum stage { BOOST = 0 }; 491 enum feature { LBP = 1, HAAR = 2 }; 492 493 static const stage stageType = BOOST; 494 static const feature featureType = LBP; 495 496 cv::Size NxM; 497 bool isStumps; 498 int ncategories; 499 int subsetSize; 500 int nodeStep; 501 502 // gpu representation of classifier 503 GpuMat stage_mat; 504 GpuMat trees_mat; 505 GpuMat nodes_mat; 506 GpuMat leaves_mat; 507 GpuMat subsets_mat; 508 GpuMat features_mat; 509 510 GpuMat integral; 511 GpuMat integralBuffer; 512 GpuMat resuzeBuffer; 513 514 GpuMat candidates; 515 static const int integralFactor = 4; 516 }; 517 518 LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file) 519 { 520 load(file.getFirstTopLevelNode()); 521 } 522 523 void LbpCascade_Impl::detectMultiScale(InputArray _image, 524 OutputArray _objects, 525 Stream& stream) 526 { 527 const GpuMat image = _image.getGpuMat(); 528 529 CV_Assert( image.depth() == CV_8U); 530 CV_Assert( scaleFactor_ > 1 ); 531 CV_Assert( !stream ); 532 533 const float grouping_eps = 0.2f; 534 535 BufferPool pool(stream); 536 GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type); 537 538 // used for debug 539 // candidates.setTo(cv::Scalar::all(0)); 540 // objects.setTo(cv::Scalar::all(0)); 541 542 if (maxObjectSize_ == cv::Size()) 543 maxObjectSize_ = image.size(); 544 545 allocateBuffers(image.size()); 546 547 unsigned int classified = 0; 548 GpuMat dclassified(1, 1, CV_32S); 549 cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) ); 550 551 PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_); 552 553 while (level.isFeasible(maxObjectSize_)) 554 { 555 int acc = level.sFrame.width + 1; 556 float iniScale = level.scale; 557 558 cv::Size area = level.workArea; 559 int step = 1 + (level.scale <= 2.f); 560 561 int total = 0, prev = 0; 562 563 while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_)) 564 { 565 // create sutable matrix headers 566 GpuMat src = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height)); 567 GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1)); 568 569 // generate integral for scale 570 cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR); 571 cuda::integral(src, sint); 572 573 // calculate job 574 int totalWidth = level.workArea.width / step; 575 total += totalWidth * (level.workArea.height / step); 576 577 // go to next pyramide level 578 level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_); 579 area = level.workArea; 580 581 step = (1 + (level.scale <= 2.f)); 582 prev = acc; 583 acc += level.sFrame.width + 1; 584 } 585 586 device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat, 587 leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral); 588 } 589 590 if (minNeighbors_ <= 0 || objects.empty()) 591 return; 592 593 cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); 594 device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>()); 595 596 cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) ); 597 cudaSafeCall( cudaDeviceSynchronize() ); 598 599 if (classified > 0) 600 { 601 objects.colRange(0, classified).copyTo(_objects); 602 } 603 else 604 { 605 _objects.release(); 606 } 607 } 608 609 void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects) 610 { 611 if (_gpu_objects.empty()) 612 { 613 objects.clear(); 614 return; 615 } 616 617 Mat gpu_objects; 618 if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT) 619 { 620 _gpu_objects.getGpuMat().download(gpu_objects); 621 } 622 else 623 { 624 gpu_objects = _gpu_objects.getMat(); 625 } 626 627 CV_Assert( gpu_objects.rows == 1 ); 628 CV_Assert( gpu_objects.type() == DataType<Rect>::type ); 629 630 Rect* ptr = gpu_objects.ptr<Rect>(); 631 objects.assign(ptr, ptr + gpu_objects.cols); 632 } 633 634 bool LbpCascade_Impl::load(const FileNode &root) 635 { 636 const char *CUDA_CC_STAGE_TYPE = "stageType"; 637 const char *CUDA_CC_FEATURE_TYPE = "featureType"; 638 const char *CUDA_CC_BOOST = "BOOST"; 639 const char *CUDA_CC_LBP = "LBP"; 640 const char *CUDA_CC_MAX_CAT_COUNT = "maxCatCount"; 641 const char *CUDA_CC_HEIGHT = "height"; 642 const char *CUDA_CC_WIDTH = "width"; 643 const char *CUDA_CC_STAGE_PARAMS = "stageParams"; 644 const char *CUDA_CC_MAX_DEPTH = "maxDepth"; 645 const char *CUDA_CC_FEATURE_PARAMS = "featureParams"; 646 const char *CUDA_CC_STAGES = "stages"; 647 const char *CUDA_CC_STAGE_THRESHOLD = "stageThreshold"; 648 const float CUDA_THRESHOLD_EPS = 1e-5f; 649 const char *CUDA_CC_WEAK_CLASSIFIERS = "weakClassifiers"; 650 const char *CUDA_CC_INTERNAL_NODES = "internalNodes"; 651 const char *CUDA_CC_LEAF_VALUES = "leafValues"; 652 const char *CUDA_CC_FEATURES = "features"; 653 const char *CUDA_CC_RECT = "rect"; 654 655 String stageTypeStr = (String)root[CUDA_CC_STAGE_TYPE]; 656 CV_Assert(stageTypeStr == CUDA_CC_BOOST); 657 658 String featureTypeStr = (String)root[CUDA_CC_FEATURE_TYPE]; 659 CV_Assert(featureTypeStr == CUDA_CC_LBP); 660 661 NxM.width = (int)root[CUDA_CC_WIDTH]; 662 NxM.height = (int)root[CUDA_CC_HEIGHT]; 663 CV_Assert( NxM.height > 0 && NxM.width > 0 ); 664 665 isStumps = ((int)(root[CUDA_CC_STAGE_PARAMS][CUDA_CC_MAX_DEPTH]) == 1) ? true : false; 666 CV_Assert(isStumps); 667 668 FileNode fn = root[CUDA_CC_FEATURE_PARAMS]; 669 if (fn.empty()) 670 return false; 671 672 ncategories = fn[CUDA_CC_MAX_CAT_COUNT]; 673 674 subsetSize = (ncategories + 31) / 32; 675 nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 ); 676 677 fn = root[CUDA_CC_STAGES]; 678 if (fn.empty()) 679 return false; 680 681 std::vector<Stage> stages; 682 stages.reserve(fn.size()); 683 684 std::vector<int> cl_trees; 685 std::vector<int> cl_nodes; 686 std::vector<float> cl_leaves; 687 std::vector<int> subsets; 688 689 FileNodeIterator it = fn.begin(), it_end = fn.end(); 690 for (size_t si = 0; it != it_end; si++, ++it ) 691 { 692 FileNode fns = *it; 693 Stage st; 694 st.threshold = (float)fns[CUDA_CC_STAGE_THRESHOLD] - CUDA_THRESHOLD_EPS; 695 696 fns = fns[CUDA_CC_WEAK_CLASSIFIERS]; 697 if (fns.empty()) 698 return false; 699 700 st.ntrees = (int)fns.size(); 701 st.first = (int)cl_trees.size(); 702 703 stages.push_back(st);// (int, int, float) 704 705 cl_trees.reserve(stages[si].first + stages[si].ntrees); 706 707 // weak trees 708 FileNodeIterator it1 = fns.begin(), it1_end = fns.end(); 709 for ( ; it1 != it1_end; ++it1 ) 710 { 711 FileNode fnw = *it1; 712 713 FileNode internalNodes = fnw[CUDA_CC_INTERNAL_NODES]; 714 FileNode leafValues = fnw[CUDA_CC_LEAF_VALUES]; 715 if ( internalNodes.empty() || leafValues.empty() ) 716 return false; 717 718 int nodeCount = (int)internalNodes.size()/nodeStep; 719 cl_trees.push_back(nodeCount); 720 721 cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3); 722 cl_leaves.reserve(cl_leaves.size() + leafValues.size()); 723 724 if( subsetSize > 0 ) 725 subsets.reserve(subsets.size() + nodeCount * subsetSize); 726 727 // nodes 728 FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end(); 729 730 for( ; iIt != iEnd; ) 731 { 732 cl_nodes.push_back((int)*(iIt++)); 733 cl_nodes.push_back((int)*(iIt++)); 734 cl_nodes.push_back((int)*(iIt++)); 735 736 if( subsetSize > 0 ) 737 for( int j = 0; j < subsetSize; j++, ++iIt ) 738 subsets.push_back((int)*iIt); 739 } 740 741 // leaves 742 iIt = leafValues.begin(), iEnd = leafValues.end(); 743 for( ; iIt != iEnd; ++iIt ) 744 cl_leaves.push_back((float)*iIt); 745 } 746 } 747 748 fn = root[CUDA_CC_FEATURES]; 749 if( fn.empty() ) 750 return false; 751 std::vector<uchar> features; 752 features.reserve(fn.size() * 4); 753 FileNodeIterator f_it = fn.begin(), f_end = fn.end(); 754 for (; f_it != f_end; ++f_it) 755 { 756 FileNode rect = (*f_it)[CUDA_CC_RECT]; 757 FileNodeIterator r_it = rect.begin(); 758 features.push_back(saturate_cast<uchar>((int)*(r_it++))); 759 features.push_back(saturate_cast<uchar>((int)*(r_it++))); 760 features.push_back(saturate_cast<uchar>((int)*(r_it++))); 761 features.push_back(saturate_cast<uchar>((int)*(r_it++))); 762 } 763 764 // copy data structures on gpu 765 stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) )); 766 trees_mat.upload(cv::Mat(cl_trees).reshape(1,1)); 767 nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1)); 768 leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1)); 769 subsets_mat.upload(cv::Mat(subsets).reshape(1,1)); 770 features_mat.upload(cv::Mat(features).reshape(4,1)); 771 772 return true; 773 } 774 775 void LbpCascade_Impl::allocateBuffers(cv::Size frame) 776 { 777 if (frame == cv::Size()) 778 return; 779 780 if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows) 781 { 782 resuzeBuffer.create(frame, CV_8UC1); 783 784 integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1); 785 786 #ifdef HAVE_OPENCV_CUDALEGACY 787 NcvSize32u roiSize; 788 roiSize.width = frame.width; 789 roiSize.height = frame.height; 790 791 cudaDeviceProp prop; 792 cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) ); 793 794 Ncv32u bufSize; 795 ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) ); 796 integralBuffer.create(1, bufSize, CV_8UC1); 797 #endif 798 799 candidates.create(1 , frame.width >> 1, CV_32SC4); 800 } 801 } 802 803 } 804 805 // 806 // create 807 // 808 809 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename) 810 { 811 String fext = filename.substr(filename.find_last_of(".") + 1); 812 fext = fext.toLowerCase(); 813 814 if (fext == "nvbin") 815 { 816 #ifndef HAVE_OPENCV_CUDALEGACY 817 CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); 818 return Ptr<cuda::CascadeClassifier>(); 819 #else 820 return makePtr<HaarCascade_Impl>(filename); 821 #endif 822 } 823 824 FileStorage fs(filename, FileStorage::READ); 825 826 if (!fs.isOpened()) 827 { 828 #ifndef HAVE_OPENCV_CUDALEGACY 829 CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); 830 return Ptr<cuda::CascadeClassifier>(); 831 #else 832 return makePtr<HaarCascade_Impl>(filename); 833 #endif 834 } 835 836 const char *CUDA_CC_LBP = "LBP"; 837 String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"]; 838 if (featureTypeStr == CUDA_CC_LBP) 839 { 840 return makePtr<LbpCascade_Impl>(fs); 841 } 842 else 843 { 844 #ifndef HAVE_OPENCV_CUDALEGACY 845 CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade"); 846 return Ptr<cuda::CascadeClassifier>(); 847 #else 848 return makePtr<HaarCascade_Impl>(filename); 849 #endif 850 } 851 852 CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier"); 853 return Ptr<cuda::CascadeClassifier>(); 854 } 855 856 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file) 857 { 858 return makePtr<LbpCascade_Impl>(file); 859 } 860 861 #endif 862