Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 #include "opencv2/objdetect/objdetect_c.h"
     45 
     46 using namespace cv;
     47 using namespace cv::cuda;
     48 
     49 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
     50 
     51 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
     52 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage&) { throw_no_cuda(); return Ptr<cuda::CascadeClassifier>(); }
     53 
     54 #else
     55 
     56 //
     57 // CascadeClassifierBase
     58 //
     59 
     60 namespace
     61 {
     62     class CascadeClassifierBase : public cuda::CascadeClassifier
     63     {
     64     public:
     65         CascadeClassifierBase();
     66 
     67         virtual void setMaxObjectSize(Size maxObjectSize) { maxObjectSize_ = maxObjectSize; }
     68         virtual Size getMaxObjectSize() const { return maxObjectSize_; }
     69 
     70         virtual void setMinObjectSize(Size minSize) { minObjectSize_ = minSize; }
     71         virtual Size getMinObjectSize() const { return minObjectSize_; }
     72 
     73         virtual void setScaleFactor(double scaleFactor) { scaleFactor_ = scaleFactor; }
     74         virtual double getScaleFactor() const { return scaleFactor_; }
     75 
     76         virtual void setMinNeighbors(int minNeighbors) { minNeighbors_ = minNeighbors; }
     77         virtual int getMinNeighbors() const { return minNeighbors_; }
     78 
     79         virtual void setFindLargestObject(bool findLargestObject) { findLargestObject_ = findLargestObject; }
     80         virtual bool getFindLargestObject() { return findLargestObject_; }
     81 
     82         virtual void setMaxNumObjects(int maxNumObjects) { maxNumObjects_ = maxNumObjects; }
     83         virtual int getMaxNumObjects() const { return maxNumObjects_; }
     84 
     85     protected:
     86         Size maxObjectSize_;
     87         Size minObjectSize_;
     88         double scaleFactor_;
     89         int minNeighbors_;
     90         bool findLargestObject_;
     91         int maxNumObjects_;
     92     };
     93 
     94     CascadeClassifierBase::CascadeClassifierBase() :
     95         maxObjectSize_(),
     96         minObjectSize_(),
     97         scaleFactor_(1.2),
     98         minNeighbors_(4),
     99         findLargestObject_(false),
    100         maxNumObjects_(100)
    101     {
    102     }
    103 }
    104 
    105 //
    106 // HaarCascade
    107 //
    108 
    109 #ifdef HAVE_OPENCV_CUDALEGACY
    110 
    111 namespace
    112 {
    113     class HaarCascade_Impl : public CascadeClassifierBase
    114     {
    115     public:
    116         explicit HaarCascade_Impl(const String& filename);
    117 
    118         virtual Size getClassifierSize() const;
    119 
    120         virtual void detectMultiScale(InputArray image,
    121                                       OutputArray objects,
    122                                       Stream& stream);
    123 
    124         virtual void convert(OutputArray gpu_objects,
    125                              std::vector<Rect>& objects);
    126 
    127     private:
    128         NCVStatus load(const String& classifierFile);
    129         NCVStatus calculateMemReqsAndAllocate(const Size& frameSize);
    130         NCVStatus process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections);
    131 
    132         Size lastAllocatedFrameSize;
    133 
    134         Ptr<NCVMemStackAllocator> gpuAllocator;
    135         Ptr<NCVMemStackAllocator> cpuAllocator;
    136 
    137         cudaDeviceProp devProp;
    138         NCVStatus ncvStat;
    139 
    140         Ptr<NCVMemNativeAllocator> gpuCascadeAllocator;
    141         Ptr<NCVMemNativeAllocator> cpuCascadeAllocator;
    142 
    143         Ptr<NCVVectorAlloc<HaarStage64> >           h_haarStages;
    144         Ptr<NCVVectorAlloc<HaarClassifierNode128> > h_haarNodes;
    145         Ptr<NCVVectorAlloc<HaarFeature64> >         h_haarFeatures;
    146 
    147         HaarClassifierCascadeDescriptor haar;
    148 
    149         Ptr<NCVVectorAlloc<HaarStage64> >           d_haarStages;
    150         Ptr<NCVVectorAlloc<HaarClassifierNode128> > d_haarNodes;
    151         Ptr<NCVVectorAlloc<HaarFeature64> >         d_haarFeatures;
    152     };
    153 
    154     static void NCVDebugOutputHandler(const String &msg)
    155     {
    156         CV_Error(Error::GpuApiCallError, msg.c_str());
    157     }
    158 
    159     HaarCascade_Impl::HaarCascade_Impl(const String& filename) :
    160         lastAllocatedFrameSize(-1, -1)
    161     {
    162         ncvSetDebugOutputHandler(NCVDebugOutputHandler);
    163         ncvSafeCall( load(filename) );
    164     }
    165 
    166     Size HaarCascade_Impl::getClassifierSize() const
    167     {
    168         return Size(haar.ClassifierSize.width, haar.ClassifierSize.height);
    169     }
    170 
    171     void HaarCascade_Impl::detectMultiScale(InputArray _image,
    172                                             OutputArray _objects,
    173                                             Stream& stream)
    174     {
    175         const GpuMat image = _image.getGpuMat();
    176 
    177         CV_Assert( image.depth() == CV_8U);
    178         CV_Assert( scaleFactor_ > 1 );
    179         CV_Assert( !stream );
    180 
    181         Size ncvMinSize = getClassifierSize();
    182         if (ncvMinSize.width < minObjectSize_.width && ncvMinSize.height < minObjectSize_.height)
    183         {
    184             ncvMinSize.width = minObjectSize_.width;
    185             ncvMinSize.height = minObjectSize_.height;
    186         }
    187 
    188         BufferPool pool(stream);
    189         GpuMat objectsBuf = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
    190 
    191         unsigned int numDetections;
    192         ncvSafeCall( process(image, objectsBuf, ncvMinSize, numDetections) );
    193 
    194         if (numDetections > 0)
    195         {
    196             objectsBuf.colRange(0, numDetections).copyTo(_objects);
    197         }
    198         else
    199         {
    200             _objects.release();
    201         }
    202     }
    203 
    204     void HaarCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
    205     {
    206         if (_gpu_objects.empty())
    207         {
    208             objects.clear();
    209             return;
    210         }
    211 
    212         Mat gpu_objects;
    213         if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
    214         {
    215             _gpu_objects.getGpuMat().download(gpu_objects);
    216         }
    217         else
    218         {
    219             gpu_objects = _gpu_objects.getMat();
    220         }
    221 
    222         CV_Assert( gpu_objects.rows == 1 );
    223         CV_Assert( gpu_objects.type() == DataType<Rect>::type );
    224 
    225         Rect* ptr = gpu_objects.ptr<Rect>();
    226         objects.assign(ptr, ptr + gpu_objects.cols);
    227     }
    228 
    229     NCVStatus HaarCascade_Impl::load(const String& classifierFile)
    230     {
    231         int devId = cv::cuda::getDevice();
    232         ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), NCV_CUDA_ERROR);
    233 
    234         // Load the classifier from file (assuming its size is about 1 mb) using a simple allocator
    235         gpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeDevice, static_cast<int>(devProp.textureAlignment));
    236         cpuCascadeAllocator = makePtr<NCVMemNativeAllocator>(NCVMemoryTypeHostPinned, static_cast<int>(devProp.textureAlignment));
    237 
    238         ncvAssertPrintReturn(gpuCascadeAllocator->isInitialized(), "Error creating cascade GPU allocator", NCV_CUDA_ERROR);
    239         ncvAssertPrintReturn(cpuCascadeAllocator->isInitialized(), "Error creating cascade CPU allocator", NCV_CUDA_ERROR);
    240 
    241         Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
    242         ncvStat = ncvHaarGetClassifierSize(classifierFile, haarNumStages, haarNumNodes, haarNumFeatures);
    243         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", NCV_FILE_ERROR);
    244 
    245         h_haarStages.reset  (new NCVVectorAlloc<HaarStage64>(*cpuCascadeAllocator, haarNumStages));
    246         h_haarNodes.reset   (new NCVVectorAlloc<HaarClassifierNode128>(*cpuCascadeAllocator, haarNumNodes));
    247         h_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*cpuCascadeAllocator, haarNumFeatures));
    248 
    249         ncvAssertPrintReturn(h_haarStages->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
    250         ncvAssertPrintReturn(h_haarNodes->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
    251         ncvAssertPrintReturn(h_haarFeatures->isMemAllocated(), "Error in cascade CPU allocator", NCV_CUDA_ERROR);
    252 
    253         ncvStat = ncvHaarLoadFromFile_host(classifierFile, haar, *h_haarStages, *h_haarNodes, *h_haarFeatures);
    254         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", NCV_FILE_ERROR);
    255 
    256         d_haarStages.reset  (new NCVVectorAlloc<HaarStage64>(*gpuCascadeAllocator, haarNumStages));
    257         d_haarNodes.reset   (new NCVVectorAlloc<HaarClassifierNode128>(*gpuCascadeAllocator, haarNumNodes));
    258         d_haarFeatures.reset(new NCVVectorAlloc<HaarFeature64>(*gpuCascadeAllocator, haarNumFeatures));
    259 
    260         ncvAssertPrintReturn(d_haarStages->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
    261         ncvAssertPrintReturn(d_haarNodes->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
    262         ncvAssertPrintReturn(d_haarFeatures->isMemAllocated(), "Error in cascade GPU allocator", NCV_CUDA_ERROR);
    263 
    264         ncvStat = h_haarStages->copySolid(*d_haarStages, 0);
    265         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
    266         ncvStat = h_haarNodes->copySolid(*d_haarNodes, 0);
    267         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
    268         ncvStat = h_haarFeatures->copySolid(*d_haarFeatures, 0);
    269         ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", NCV_CUDA_ERROR);
    270 
    271         return NCV_SUCCESS;
    272     }
    273 
    274     NCVStatus HaarCascade_Impl::calculateMemReqsAndAllocate(const Size& frameSize)
    275     {
    276         if (lastAllocatedFrameSize == frameSize)
    277         {
    278             return NCV_SUCCESS;
    279         }
    280 
    281         // Calculate memory requirements and create real allocators
    282         NCVMemStackAllocator gpuCounter(static_cast<int>(devProp.textureAlignment));
    283         NCVMemStackAllocator cpuCounter(static_cast<int>(devProp.textureAlignment));
    284 
    285         ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", NCV_CUDA_ERROR);
    286         ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", NCV_CUDA_ERROR);
    287 
    288         NCVMatrixAlloc<Ncv8u> d_src(gpuCounter, frameSize.width, frameSize.height);
    289         NCVMatrixAlloc<Ncv8u> h_src(cpuCounter, frameSize.width, frameSize.height);
    290 
    291         ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
    292         ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
    293 
    294         NCVVectorAlloc<NcvRect32u> d_rects(gpuCounter, 100);
    295         ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
    296 
    297         NcvSize32u roi;
    298         roi.width = d_src.width();
    299         roi.height = d_src.height();
    300         Ncv32u numDetections;
    301         ncvStat = ncvDetectObjectsMultiScale_device(d_src, roi, d_rects, numDetections, haar, *h_haarStages,
    302             *d_haarStages, *d_haarNodes, *d_haarFeatures, haar.ClassifierSize, 4, 1.2f, 1, 0, gpuCounter, cpuCounter, devProp, 0);
    303 
    304         ncvAssertReturnNcvStat(ncvStat);
    305         ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
    306 
    307         gpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
    308         cpuAllocator = makePtr<NCVMemStackAllocator>(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<int>(devProp.textureAlignment));
    309 
    310         ncvAssertPrintReturn(gpuAllocator->isInitialized(), "Error creating GPU memory allocator", NCV_CUDA_ERROR);
    311         ncvAssertPrintReturn(cpuAllocator->isInitialized(), "Error creating CPU memory allocator", NCV_CUDA_ERROR);
    312 
    313         lastAllocatedFrameSize = frameSize;
    314         return NCV_SUCCESS;
    315     }
    316 
    317     NCVStatus HaarCascade_Impl::process(const GpuMat& src, GpuMat& objects, cv::Size ncvMinSize, /*out*/ unsigned int& numDetections)
    318     {
    319         calculateMemReqsAndAllocate(src.size());
    320 
    321         NCVMemPtr src_beg;
    322         src_beg.ptr = (void*)src.ptr<Ncv8u>();
    323         src_beg.memtype = NCVMemoryTypeDevice;
    324 
    325         NCVMemSegment src_seg;
    326         src_seg.begin = src_beg;
    327         src_seg.size  = src.step * src.rows;
    328 
    329         NCVMatrixReuse<Ncv8u> d_src(src_seg, static_cast<int>(devProp.textureAlignment), src.cols, src.rows, static_cast<int>(src.step), true);
    330         ncvAssertReturn(d_src.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
    331 
    332         CV_Assert(objects.rows == 1);
    333 
    334         NCVMemPtr objects_beg;
    335         objects_beg.ptr = (void*)objects.ptr<NcvRect32u>();
    336         objects_beg.memtype = NCVMemoryTypeDevice;
    337 
    338         NCVMemSegment objects_seg;
    339         objects_seg.begin = objects_beg;
    340         objects_seg.size = objects.step * objects.rows;
    341         NCVVectorReuse<NcvRect32u> d_rects(objects_seg, objects.cols);
    342         ncvAssertReturn(d_rects.isMemReused(), NCV_ALLOCATOR_BAD_REUSE);
    343 
    344         NcvSize32u roi;
    345         roi.width = d_src.width();
    346         roi.height = d_src.height();
    347 
    348         NcvSize32u winMinSize(ncvMinSize.width, ncvMinSize.height);
    349 
    350         Ncv32u flags = 0;
    351         flags |= findLargestObject_ ? NCVPipeObjDet_FindLargestObject : 0;
    352 
    353         ncvStat = ncvDetectObjectsMultiScale_device(
    354             d_src, roi, d_rects, numDetections, haar, *h_haarStages,
    355             *d_haarStages, *d_haarNodes, *d_haarFeatures,
    356             winMinSize,
    357             minNeighbors_,
    358             scaleFactor_, 1,
    359             flags,
    360             *gpuAllocator, *cpuAllocator, devProp, 0);
    361         ncvAssertReturnNcvStat(ncvStat);
    362         ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
    363 
    364         return NCV_SUCCESS;
    365     }
    366 }
    367 
    368 #endif
    369 
    370 //
    371 // LbpCascade
    372 //
    373 
    374 namespace cv { namespace cuda { namespace device
    375 {
    376     namespace lbp
    377     {
    378         void classifyPyramid(int frameW,
    379                              int frameH,
    380                              int windowW,
    381                              int windowH,
    382                              float initalScale,
    383                              float factor,
    384                              int total,
    385                              const PtrStepSzb& mstages,
    386                              const int nstages,
    387                              const PtrStepSzi& mnodes,
    388                              const PtrStepSzf& mleaves,
    389                              const PtrStepSzi& msubsets,
    390                              const PtrStepSzb& mfeatures,
    391                              const int subsetSize,
    392                              PtrStepSz<int4> objects,
    393                              unsigned int* classified,
    394                              PtrStepSzi integral);
    395 
    396         void connectedConmonents(PtrStepSz<int4> candidates,
    397                                  int ncandidates,
    398                                  PtrStepSz<int4> objects,
    399                                  int groupThreshold,
    400                                  float grouping_eps,
    401                                  unsigned int* nclasses);
    402     }
    403 }}}
    404 
    405 namespace
    406 {
    407     cv::Size operator -(const cv::Size& a, const cv::Size& b)
    408     {
    409         return cv::Size(a.width - b.width, a.height - b.height);
    410     }
    411 
    412     cv::Size operator +(const cv::Size& a, const int& i)
    413     {
    414         return cv::Size(a.width + i, a.height + i);
    415     }
    416 
    417     cv::Size operator *(const cv::Size& a, const float& f)
    418     {
    419         return cv::Size(cvRound(a.width * f), cvRound(a.height * f));
    420     }
    421 
    422     cv::Size operator /(const cv::Size& a, const float& f)
    423     {
    424         return cv::Size(cvRound(a.width / f), cvRound(a.height / f));
    425     }
    426 
    427     bool operator <=(const cv::Size& a, const cv::Size& b)
    428     {
    429         return a.width <= b.width && a.height <= b.width;
    430     }
    431 
    432     struct PyrLavel
    433     {
    434         PyrLavel(int _order, float _scale, cv::Size frame, cv::Size window, cv::Size minObjectSize)
    435         {
    436             do
    437             {
    438                 order = _order;
    439                 scale = pow(_scale, order);
    440                 sFrame = frame / scale;
    441                 workArea = sFrame - window + 1;
    442                 sWindow = window * scale;
    443                 _order++;
    444             } while (sWindow <= minObjectSize);
    445         }
    446 
    447         bool isFeasible(cv::Size maxObj)
    448         {
    449             return workArea.width > 0 && workArea.height > 0 && sWindow <= maxObj;
    450         }
    451 
    452         PyrLavel next(float factor, cv::Size frame, cv::Size window, cv::Size minObjectSize)
    453         {
    454             return PyrLavel(order + 1, factor, frame, window, minObjectSize);
    455         }
    456 
    457         int order;
    458         float scale;
    459         cv::Size sFrame;
    460         cv::Size workArea;
    461         cv::Size sWindow;
    462     };
    463 
    464     class LbpCascade_Impl : public CascadeClassifierBase
    465     {
    466     public:
    467         explicit LbpCascade_Impl(const FileStorage& file);
    468 
    469         virtual Size getClassifierSize() const { return NxM; }
    470 
    471         virtual void detectMultiScale(InputArray image,
    472                                       OutputArray objects,
    473                                       Stream& stream);
    474 
    475         virtual void convert(OutputArray gpu_objects,
    476                              std::vector<Rect>& objects);
    477 
    478     private:
    479         bool load(const FileNode &root);
    480         void allocateBuffers(cv::Size frame);
    481 
    482     private:
    483         struct Stage
    484         {
    485             int    first;
    486             int    ntrees;
    487             float  threshold;
    488         };
    489 
    490         enum stage { BOOST = 0 };
    491         enum feature { LBP = 1, HAAR = 2 };
    492 
    493         static const stage stageType = BOOST;
    494         static const feature featureType = LBP;
    495 
    496         cv::Size NxM;
    497         bool isStumps;
    498         int ncategories;
    499         int subsetSize;
    500         int nodeStep;
    501 
    502         // gpu representation of classifier
    503         GpuMat stage_mat;
    504         GpuMat trees_mat;
    505         GpuMat nodes_mat;
    506         GpuMat leaves_mat;
    507         GpuMat subsets_mat;
    508         GpuMat features_mat;
    509 
    510         GpuMat integral;
    511         GpuMat integralBuffer;
    512         GpuMat resuzeBuffer;
    513 
    514         GpuMat candidates;
    515         static const int integralFactor = 4;
    516     };
    517 
    518     LbpCascade_Impl::LbpCascade_Impl(const FileStorage& file)
    519     {
    520         load(file.getFirstTopLevelNode());
    521     }
    522 
    523     void LbpCascade_Impl::detectMultiScale(InputArray _image,
    524                                            OutputArray _objects,
    525                                            Stream& stream)
    526     {
    527         const GpuMat image = _image.getGpuMat();
    528 
    529         CV_Assert( image.depth() == CV_8U);
    530         CV_Assert( scaleFactor_ > 1 );
    531         CV_Assert( !stream );
    532 
    533         const float grouping_eps = 0.2f;
    534 
    535         BufferPool pool(stream);
    536         GpuMat objects = pool.getBuffer(1, maxNumObjects_, DataType<Rect>::type);
    537 
    538         // used for debug
    539         // candidates.setTo(cv::Scalar::all(0));
    540         // objects.setTo(cv::Scalar::all(0));
    541 
    542         if (maxObjectSize_ == cv::Size())
    543             maxObjectSize_ = image.size();
    544 
    545         allocateBuffers(image.size());
    546 
    547         unsigned int classified = 0;
    548         GpuMat dclassified(1, 1, CV_32S);
    549         cudaSafeCall( cudaMemcpy(dclassified.ptr(), &classified, sizeof(int), cudaMemcpyHostToDevice) );
    550 
    551         PyrLavel level(0, scaleFactor_, image.size(), NxM, minObjectSize_);
    552 
    553         while (level.isFeasible(maxObjectSize_))
    554         {
    555             int acc = level.sFrame.width + 1;
    556             float iniScale = level.scale;
    557 
    558             cv::Size area = level.workArea;
    559             int step = 1 + (level.scale <= 2.f);
    560 
    561             int total = 0, prev  = 0;
    562 
    563             while (acc <= integralFactor * (image.cols + 1) && level.isFeasible(maxObjectSize_))
    564             {
    565                 // create sutable matrix headers
    566                 GpuMat src  = resuzeBuffer(cv::Rect(0, 0, level.sFrame.width, level.sFrame.height));
    567                 GpuMat sint = integral(cv::Rect(prev, 0, level.sFrame.width + 1, level.sFrame.height + 1));
    568 
    569                 // generate integral for scale
    570                 cuda::resize(image, src, level.sFrame, 0, 0, cv::INTER_LINEAR);
    571                 cuda::integral(src, sint);
    572 
    573                 // calculate job
    574                 int totalWidth = level.workArea.width / step;
    575                 total += totalWidth * (level.workArea.height / step);
    576 
    577                 // go to next pyramide level
    578                 level = level.next(scaleFactor_, image.size(), NxM, minObjectSize_);
    579                 area = level.workArea;
    580 
    581                 step = (1 + (level.scale <= 2.f));
    582                 prev = acc;
    583                 acc += level.sFrame.width + 1;
    584             }
    585 
    586             device::lbp::classifyPyramid(image.cols, image.rows, NxM.width - 1, NxM.height - 1, iniScale, scaleFactor_, total, stage_mat, stage_mat.cols / sizeof(Stage), nodes_mat,
    587                 leaves_mat, subsets_mat, features_mat, subsetSize, candidates, dclassified.ptr<unsigned int>(), integral);
    588         }
    589 
    590         if (minNeighbors_ <= 0  || objects.empty())
    591             return;
    592 
    593         cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
    594         device::lbp::connectedConmonents(candidates, classified, objects, minNeighbors_, grouping_eps, dclassified.ptr<unsigned int>());
    595 
    596         cudaSafeCall( cudaMemcpy(&classified, dclassified.ptr(), sizeof(int), cudaMemcpyDeviceToHost) );
    597         cudaSafeCall( cudaDeviceSynchronize() );
    598 
    599         if (classified > 0)
    600         {
    601             objects.colRange(0, classified).copyTo(_objects);
    602         }
    603         else
    604         {
    605             _objects.release();
    606         }
    607     }
    608 
    609     void LbpCascade_Impl::convert(OutputArray _gpu_objects, std::vector<Rect>& objects)
    610     {
    611         if (_gpu_objects.empty())
    612         {
    613             objects.clear();
    614             return;
    615         }
    616 
    617         Mat gpu_objects;
    618         if (_gpu_objects.kind() == _InputArray::CUDA_GPU_MAT)
    619         {
    620             _gpu_objects.getGpuMat().download(gpu_objects);
    621         }
    622         else
    623         {
    624             gpu_objects = _gpu_objects.getMat();
    625         }
    626 
    627         CV_Assert( gpu_objects.rows == 1 );
    628         CV_Assert( gpu_objects.type() == DataType<Rect>::type );
    629 
    630         Rect* ptr = gpu_objects.ptr<Rect>();
    631         objects.assign(ptr, ptr + gpu_objects.cols);
    632     }
    633 
    634     bool LbpCascade_Impl::load(const FileNode &root)
    635     {
    636         const char *CUDA_CC_STAGE_TYPE       = "stageType";
    637         const char *CUDA_CC_FEATURE_TYPE     = "featureType";
    638         const char *CUDA_CC_BOOST            = "BOOST";
    639         const char *CUDA_CC_LBP              = "LBP";
    640         const char *CUDA_CC_MAX_CAT_COUNT    = "maxCatCount";
    641         const char *CUDA_CC_HEIGHT           = "height";
    642         const char *CUDA_CC_WIDTH            = "width";
    643         const char *CUDA_CC_STAGE_PARAMS     = "stageParams";
    644         const char *CUDA_CC_MAX_DEPTH        = "maxDepth";
    645         const char *CUDA_CC_FEATURE_PARAMS   = "featureParams";
    646         const char *CUDA_CC_STAGES           = "stages";
    647         const char *CUDA_CC_STAGE_THRESHOLD  = "stageThreshold";
    648         const float CUDA_THRESHOLD_EPS       = 1e-5f;
    649         const char *CUDA_CC_WEAK_CLASSIFIERS = "weakClassifiers";
    650         const char *CUDA_CC_INTERNAL_NODES   = "internalNodes";
    651         const char *CUDA_CC_LEAF_VALUES      = "leafValues";
    652         const char *CUDA_CC_FEATURES         = "features";
    653         const char *CUDA_CC_RECT             = "rect";
    654 
    655         String stageTypeStr = (String)root[CUDA_CC_STAGE_TYPE];
    656         CV_Assert(stageTypeStr == CUDA_CC_BOOST);
    657 
    658         String featureTypeStr = (String)root[CUDA_CC_FEATURE_TYPE];
    659         CV_Assert(featureTypeStr == CUDA_CC_LBP);
    660 
    661         NxM.width =  (int)root[CUDA_CC_WIDTH];
    662         NxM.height = (int)root[CUDA_CC_HEIGHT];
    663         CV_Assert( NxM.height > 0 && NxM.width > 0 );
    664 
    665         isStumps = ((int)(root[CUDA_CC_STAGE_PARAMS][CUDA_CC_MAX_DEPTH]) == 1) ? true : false;
    666         CV_Assert(isStumps);
    667 
    668         FileNode fn = root[CUDA_CC_FEATURE_PARAMS];
    669         if (fn.empty())
    670             return false;
    671 
    672         ncategories = fn[CUDA_CC_MAX_CAT_COUNT];
    673 
    674         subsetSize = (ncategories + 31) / 32;
    675         nodeStep = 3 + ( ncategories > 0 ? subsetSize : 1 );
    676 
    677         fn = root[CUDA_CC_STAGES];
    678         if (fn.empty())
    679             return false;
    680 
    681         std::vector<Stage> stages;
    682         stages.reserve(fn.size());
    683 
    684         std::vector<int> cl_trees;
    685         std::vector<int> cl_nodes;
    686         std::vector<float> cl_leaves;
    687         std::vector<int> subsets;
    688 
    689         FileNodeIterator it = fn.begin(), it_end = fn.end();
    690         for (size_t si = 0; it != it_end; si++, ++it )
    691         {
    692             FileNode fns = *it;
    693             Stage st;
    694             st.threshold = (float)fns[CUDA_CC_STAGE_THRESHOLD] - CUDA_THRESHOLD_EPS;
    695 
    696             fns = fns[CUDA_CC_WEAK_CLASSIFIERS];
    697             if (fns.empty())
    698                 return false;
    699 
    700             st.ntrees = (int)fns.size();
    701             st.first = (int)cl_trees.size();
    702 
    703             stages.push_back(st);// (int, int, float)
    704 
    705             cl_trees.reserve(stages[si].first + stages[si].ntrees);
    706 
    707             // weak trees
    708             FileNodeIterator it1 = fns.begin(), it1_end = fns.end();
    709             for ( ; it1 != it1_end; ++it1 )
    710             {
    711                 FileNode fnw = *it1;
    712 
    713                 FileNode internalNodes = fnw[CUDA_CC_INTERNAL_NODES];
    714                 FileNode leafValues = fnw[CUDA_CC_LEAF_VALUES];
    715                 if ( internalNodes.empty() || leafValues.empty() )
    716                     return false;
    717 
    718                 int nodeCount = (int)internalNodes.size()/nodeStep;
    719                 cl_trees.push_back(nodeCount);
    720 
    721                 cl_nodes.reserve((cl_nodes.size() + nodeCount) * 3);
    722                 cl_leaves.reserve(cl_leaves.size() + leafValues.size());
    723 
    724                 if( subsetSize > 0 )
    725                     subsets.reserve(subsets.size() + nodeCount * subsetSize);
    726 
    727                 // nodes
    728                 FileNodeIterator iIt = internalNodes.begin(), iEnd = internalNodes.end();
    729 
    730                 for( ; iIt != iEnd; )
    731                 {
    732                     cl_nodes.push_back((int)*(iIt++));
    733                     cl_nodes.push_back((int)*(iIt++));
    734                     cl_nodes.push_back((int)*(iIt++));
    735 
    736                     if( subsetSize > 0 )
    737                         for( int j = 0; j < subsetSize; j++, ++iIt )
    738                             subsets.push_back((int)*iIt);
    739                 }
    740 
    741                 // leaves
    742                 iIt = leafValues.begin(), iEnd = leafValues.end();
    743                 for( ; iIt != iEnd; ++iIt )
    744                     cl_leaves.push_back((float)*iIt);
    745             }
    746         }
    747 
    748         fn = root[CUDA_CC_FEATURES];
    749         if( fn.empty() )
    750             return false;
    751         std::vector<uchar> features;
    752         features.reserve(fn.size() * 4);
    753         FileNodeIterator f_it = fn.begin(), f_end = fn.end();
    754         for (; f_it != f_end; ++f_it)
    755         {
    756             FileNode rect = (*f_it)[CUDA_CC_RECT];
    757             FileNodeIterator r_it = rect.begin();
    758             features.push_back(saturate_cast<uchar>((int)*(r_it++)));
    759             features.push_back(saturate_cast<uchar>((int)*(r_it++)));
    760             features.push_back(saturate_cast<uchar>((int)*(r_it++)));
    761             features.push_back(saturate_cast<uchar>((int)*(r_it++)));
    762         }
    763 
    764         // copy data structures on gpu
    765         stage_mat.upload(cv::Mat(1, (int) (stages.size() * sizeof(Stage)), CV_8UC1, (uchar*)&(stages[0]) ));
    766         trees_mat.upload(cv::Mat(cl_trees).reshape(1,1));
    767         nodes_mat.upload(cv::Mat(cl_nodes).reshape(1,1));
    768         leaves_mat.upload(cv::Mat(cl_leaves).reshape(1,1));
    769         subsets_mat.upload(cv::Mat(subsets).reshape(1,1));
    770         features_mat.upload(cv::Mat(features).reshape(4,1));
    771 
    772         return true;
    773     }
    774 
    775     void LbpCascade_Impl::allocateBuffers(cv::Size frame)
    776     {
    777         if (frame == cv::Size())
    778             return;
    779 
    780         if (resuzeBuffer.empty() || frame.width > resuzeBuffer.cols || frame.height > resuzeBuffer.rows)
    781         {
    782             resuzeBuffer.create(frame, CV_8UC1);
    783 
    784             integral.create(frame.height + 1, integralFactor * (frame.width + 1), CV_32SC1);
    785 
    786         #ifdef HAVE_OPENCV_CUDALEGACY
    787             NcvSize32u roiSize;
    788             roiSize.width = frame.width;
    789             roiSize.height = frame.height;
    790 
    791             cudaDeviceProp prop;
    792             cudaSafeCall( cudaGetDeviceProperties(&prop, cv::cuda::getDevice()) );
    793 
    794             Ncv32u bufSize;
    795             ncvSafeCall( nppiStIntegralGetSize_8u32u(roiSize, &bufSize, prop) );
    796             integralBuffer.create(1, bufSize, CV_8UC1);
    797         #endif
    798 
    799             candidates.create(1 , frame.width >> 1, CV_32SC4);
    800         }
    801     }
    802 
    803 }
    804 
    805 //
    806 // create
    807 //
    808 
    809 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const String& filename)
    810 {
    811     String fext = filename.substr(filename.find_last_of(".") + 1);
    812     fext = fext.toLowerCase();
    813 
    814     if (fext == "nvbin")
    815     {
    816     #ifndef HAVE_OPENCV_CUDALEGACY
    817         CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
    818         return Ptr<cuda::CascadeClassifier>();
    819     #else
    820         return makePtr<HaarCascade_Impl>(filename);
    821     #endif
    822     }
    823 
    824     FileStorage fs(filename, FileStorage::READ);
    825 
    826     if (!fs.isOpened())
    827     {
    828     #ifndef HAVE_OPENCV_CUDALEGACY
    829         CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
    830         return Ptr<cuda::CascadeClassifier>();
    831     #else
    832         return makePtr<HaarCascade_Impl>(filename);
    833     #endif
    834     }
    835 
    836     const char *CUDA_CC_LBP = "LBP";
    837     String featureTypeStr = (String)fs.getFirstTopLevelNode()["featureType"];
    838     if (featureTypeStr == CUDA_CC_LBP)
    839     {
    840         return makePtr<LbpCascade_Impl>(fs);
    841     }
    842     else
    843     {
    844     #ifndef HAVE_OPENCV_CUDALEGACY
    845         CV_Error(Error::StsUnsupportedFormat, "OpenCV CUDA objdetect was built without HaarCascade");
    846         return Ptr<cuda::CascadeClassifier>();
    847     #else
    848         return makePtr<HaarCascade_Impl>(filename);
    849     #endif
    850     }
    851 
    852     CV_Error(Error::StsUnsupportedFormat, "Unsupported format for CUDA CascadeClassifier");
    853     return Ptr<cuda::CascadeClassifier>();
    854 }
    855 
    856 Ptr<cuda::CascadeClassifier> cv::cuda::CascadeClassifier::create(const FileStorage& file)
    857 {
    858     return makePtr<LbpCascade_Impl>(file);
    859 }
    860 
    861 #endif
    862