Home | History | Annotate | Download | only in gpu
      1 #if defined _MSC_VER && _MSC_VER >= 1400
      2 #pragma warning( disable : 4201 4408 4127 4100)
      3 #endif
      4 
      5 #include "cvconfig.h"
      6 #include <iostream>
      7 #include <iomanip>
      8 #include <cstdio>
      9 #include "opencv2/core/cuda.hpp"
     10 #include "opencv2/cudalegacy.hpp"
     11 #include "opencv2/highgui.hpp"
     12 #include "opencv2/imgproc.hpp"
     13 #include "opencv2/objdetect.hpp"
     14 #include "opencv2/objdetect/objdetect_c.h"
     15 
     16 using namespace std;
     17 using namespace cv;
     18 
     19 
     20 #if !defined(HAVE_CUDA) || defined(__arm__)
     21 
     22 int main( int, const char** )
     23 {
     24 #if !defined(HAVE_CUDA)
     25     std::cout << "CUDA support is required (CMake key 'WITH_CUDA' must be true)." << std::endl;
     26 #endif
     27 
     28 #if defined(__arm__)
     29     std::cout << "Unsupported for ARM CUDA library." << std::endl;
     30 #endif
     31 
     32     return 0;
     33 }
     34 
     35 #else
     36 
     37 
     38 const Size2i preferredVideoFrameSize(640, 480);
     39 const cv::String wndTitle = "NVIDIA Computer Vision :: Haar Classifiers Cascade";
     40 
     41 
     42 static void matPrint(Mat &img, int lineOffsY, Scalar fontColor, const string &ss)
     43 {
     44     int fontFace = FONT_HERSHEY_DUPLEX;
     45     double fontScale = 0.8;
     46     int fontThickness = 2;
     47     Size fontSize = cv::getTextSize("T[]", fontFace, fontScale, fontThickness, 0);
     48 
     49     Point org;
     50     org.x = 1;
     51     org.y = 3 * fontSize.height * (lineOffsY + 1) / 2;
     52     putText(img, ss, org, fontFace, fontScale, Scalar(0,0,0), 5*fontThickness/2, 16);
     53     putText(img, ss, org, fontFace, fontScale, fontColor, fontThickness, 16);
     54 }
     55 
     56 
     57 static void displayState(Mat &canvas, bool bHelp, bool bGpu, bool bLargestFace, bool bFilter, double fps)
     58 {
     59     Scalar fontColorRed(0,0,255);
     60     Scalar fontColorNV(0,185,118);
     61 
     62     ostringstream ss;
     63     ss << "FPS = " << setprecision(1) << fixed << fps;
     64     matPrint(canvas, 0, fontColorRed, ss.str());
     65     ss.str("");
     66     ss << "[" << canvas.cols << "x" << canvas.rows << "], " <<
     67         (bGpu ? "GPU, " : "CPU, ") <<
     68         (bLargestFace ? "OneFace, " : "MultiFace, ") <<
     69         (bFilter ? "Filter:ON" : "Filter:OFF");
     70     matPrint(canvas, 1, fontColorRed, ss.str());
     71 
     72     if (bHelp)
     73     {
     74         matPrint(canvas, 2, fontColorNV, "Space - switch GPU / CPU");
     75         matPrint(canvas, 3, fontColorNV, "M - switch OneFace / MultiFace");
     76         matPrint(canvas, 4, fontColorNV, "F - toggle rectangles Filter");
     77         matPrint(canvas, 5, fontColorNV, "H - toggle hotkeys help");
     78     }
     79     else
     80     {
     81         matPrint(canvas, 2, fontColorNV, "H - toggle hotkeys help");
     82     }
     83 }
     84 
     85 
     86 static NCVStatus process(Mat *srcdst,
     87                   Ncv32u width, Ncv32u height,
     88                   NcvBool bFilterRects, NcvBool bLargestFace,
     89                   HaarClassifierCascadeDescriptor &haar,
     90                   NCVVector<HaarStage64> &d_haarStages, NCVVector<HaarClassifierNode128> &d_haarNodes,
     91                   NCVVector<HaarFeature64> &d_haarFeatures, NCVVector<HaarStage64> &h_haarStages,
     92                   INCVMemAllocator &gpuAllocator,
     93                   INCVMemAllocator &cpuAllocator,
     94                   cudaDeviceProp &devProp)
     95 {
     96     ncvAssertReturn(!((srcdst == NULL) ^ gpuAllocator.isCounting()), NCV_NULL_PTR);
     97 
     98     NCVStatus ncvStat;
     99 
    100     NCV_SET_SKIP_COND(gpuAllocator.isCounting());
    101 
    102     NCVMatrixAlloc<Ncv8u> d_src(gpuAllocator, width, height);
    103     ncvAssertReturn(d_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
    104     NCVMatrixAlloc<Ncv8u> h_src(cpuAllocator, width, height);
    105     ncvAssertReturn(h_src.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
    106     NCVVectorAlloc<NcvRect32u> d_rects(gpuAllocator, 100);
    107     ncvAssertReturn(d_rects.isMemAllocated(), NCV_ALLOCATOR_BAD_ALLOC);
    108 
    109     NCV_SKIP_COND_BEGIN
    110 
    111     for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
    112     {
    113         memcpy(h_src.ptr() + i * h_src.stride(), srcdst->ptr(i), srcdst->cols);
    114     }
    115 
    116     ncvStat = h_src.copySolid(d_src, 0);
    117     ncvAssertReturnNcvStat(ncvStat);
    118     ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
    119 
    120     NCV_SKIP_COND_END
    121 
    122     NcvSize32u roi;
    123     roi.width = d_src.width();
    124     roi.height = d_src.height();
    125 
    126     Ncv32u numDetections;
    127     ncvStat = ncvDetectObjectsMultiScale_device(
    128         d_src, roi, d_rects, numDetections, haar, h_haarStages,
    129         d_haarStages, d_haarNodes, d_haarFeatures,
    130         haar.ClassifierSize,
    131         (bFilterRects || bLargestFace) ? 4 : 0,
    132         1.2f, 1,
    133         (bLargestFace ? NCVPipeObjDet_FindLargestObject : 0)
    134         | NCVPipeObjDet_VisualizeInPlace,
    135         gpuAllocator, cpuAllocator, devProp, 0);
    136     ncvAssertReturnNcvStat(ncvStat);
    137     ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
    138 
    139     NCV_SKIP_COND_BEGIN
    140 
    141     ncvStat = d_src.copySolid(h_src, 0);
    142     ncvAssertReturnNcvStat(ncvStat);
    143     ncvAssertCUDAReturn(cudaStreamSynchronize(0), NCV_CUDA_ERROR);
    144 
    145     for (Ncv32u i=0; i<(Ncv32u)srcdst->rows; i++)
    146     {
    147         memcpy(srcdst->ptr(i), h_src.ptr() + i * h_src.stride(), srcdst->cols);
    148     }
    149 
    150     NCV_SKIP_COND_END
    151 
    152     return NCV_SUCCESS;
    153 }
    154 
    155 
    156 int main(int argc, const char** argv)
    157 {
    158     cout << "OpenCV / NVIDIA Computer Vision" << endl;
    159     cout << "Face Detection in video and live feed" << endl;
    160     cout << "Syntax: exename <cascade_file> <image_or_video_or_cameraid>" << endl;
    161     cout << "=========================================" << endl;
    162 
    163     ncvAssertPrintReturn(cv::cuda::getCudaEnabledDeviceCount() != 0, "No GPU found or the library is compiled without CUDA support", -1);
    164     ncvAssertPrintReturn(argc == 3, "Invalid number of arguments", -1);
    165 
    166     cv::cuda::printShortCudaDeviceInfo(cv::cuda::getDevice());
    167 
    168     string cascadeName = argv[1];
    169     string inputName = argv[2];
    170 
    171     NCVStatus ncvStat;
    172     NcvBool bQuit = false;
    173     VideoCapture capture;
    174     Size2i frameSize;
    175 
    176     //open content source
    177     Mat image = imread(inputName);
    178     Mat frame;
    179     if (!image.empty())
    180     {
    181         frameSize.width = image.cols;
    182         frameSize.height = image.rows;
    183     }
    184     else
    185     {
    186         if (!capture.open(inputName))
    187         {
    188             int camid = -1;
    189 
    190             istringstream ss(inputName);
    191             int x = 0;
    192             ss >> x;
    193 
    194             ncvAssertPrintReturn(capture.open(camid) != 0, "Can't open source", -1);
    195         }
    196 
    197         capture >> frame;
    198         ncvAssertPrintReturn(!frame.empty(), "Empty video source", -1);
    199 
    200         frameSize.width = frame.cols;
    201         frameSize.height = frame.rows;
    202     }
    203 
    204     NcvBool bUseGPU = true;
    205     NcvBool bLargestObject = false;
    206     NcvBool bFilterRects = true;
    207     NcvBool bHelpScreen = false;
    208 
    209     CascadeClassifier classifierOpenCV;
    210     ncvAssertPrintReturn(classifierOpenCV.load(cascadeName) != 0, "Error (in OpenCV) opening classifier", -1);
    211 
    212     int devId;
    213     ncvAssertCUDAReturn(cudaGetDevice(&devId), -1);
    214     cudaDeviceProp devProp;
    215     ncvAssertCUDAReturn(cudaGetDeviceProperties(&devProp, devId), -1);
    216     cout << "Using GPU: " << devId << "(" << devProp.name <<
    217             "), arch=" << devProp.major << "." << devProp.minor << endl;
    218 
    219     //==============================================================================
    220     //
    221     // Load the classifier from file (assuming its size is about 1 mb)
    222     // using a simple allocator
    223     //
    224     //==============================================================================
    225 
    226     NCVMemNativeAllocator gpuCascadeAllocator(NCVMemoryTypeDevice, static_cast<Ncv32u>(devProp.textureAlignment));
    227     ncvAssertPrintReturn(gpuCascadeAllocator.isInitialized(), "Error creating cascade GPU allocator", -1);
    228     NCVMemNativeAllocator cpuCascadeAllocator(NCVMemoryTypeHostPinned, static_cast<Ncv32u>(devProp.textureAlignment));
    229     ncvAssertPrintReturn(cpuCascadeAllocator.isInitialized(), "Error creating cascade CPU allocator", -1);
    230 
    231     Ncv32u haarNumStages, haarNumNodes, haarNumFeatures;
    232     ncvStat = ncvHaarGetClassifierSize(cascadeName, haarNumStages, haarNumNodes, haarNumFeatures);
    233     ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error reading classifier size (check the file)", -1);
    234 
    235     NCVVectorAlloc<HaarStage64> h_haarStages(cpuCascadeAllocator, haarNumStages);
    236     ncvAssertPrintReturn(h_haarStages.isMemAllocated(), "Error in cascade CPU allocator", -1);
    237     NCVVectorAlloc<HaarClassifierNode128> h_haarNodes(cpuCascadeAllocator, haarNumNodes);
    238     ncvAssertPrintReturn(h_haarNodes.isMemAllocated(), "Error in cascade CPU allocator", -1);
    239     NCVVectorAlloc<HaarFeature64> h_haarFeatures(cpuCascadeAllocator, haarNumFeatures);
    240 
    241     ncvAssertPrintReturn(h_haarFeatures.isMemAllocated(), "Error in cascade CPU allocator", -1);
    242 
    243     HaarClassifierCascadeDescriptor haar;
    244     ncvStat = ncvHaarLoadFromFile_host(cascadeName, haar, h_haarStages, h_haarNodes, h_haarFeatures);
    245     ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error loading classifier", -1);
    246 
    247     NCVVectorAlloc<HaarStage64> d_haarStages(gpuCascadeAllocator, haarNumStages);
    248     ncvAssertPrintReturn(d_haarStages.isMemAllocated(), "Error in cascade GPU allocator", -1);
    249     NCVVectorAlloc<HaarClassifierNode128> d_haarNodes(gpuCascadeAllocator, haarNumNodes);
    250     ncvAssertPrintReturn(d_haarNodes.isMemAllocated(), "Error in cascade GPU allocator", -1);
    251     NCVVectorAlloc<HaarFeature64> d_haarFeatures(gpuCascadeAllocator, haarNumFeatures);
    252     ncvAssertPrintReturn(d_haarFeatures.isMemAllocated(), "Error in cascade GPU allocator", -1);
    253 
    254     ncvStat = h_haarStages.copySolid(d_haarStages, 0);
    255     ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
    256     ncvStat = h_haarNodes.copySolid(d_haarNodes, 0);
    257     ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
    258     ncvStat = h_haarFeatures.copySolid(d_haarFeatures, 0);
    259     ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error copying cascade to GPU", -1);
    260 
    261     //==============================================================================
    262     //
    263     // Calculate memory requirements and create real allocators
    264     //
    265     //==============================================================================
    266 
    267     NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
    268     ncvAssertPrintReturn(gpuCounter.isInitialized(), "Error creating GPU memory counter", -1);
    269     NCVMemStackAllocator cpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
    270     ncvAssertPrintReturn(cpuCounter.isInitialized(), "Error creating CPU memory counter", -1);
    271 
    272     ncvStat = process(NULL, frameSize.width, frameSize.height,
    273                       false, false, haar,
    274                       d_haarStages, d_haarNodes,
    275                       d_haarFeatures, h_haarStages,
    276                       gpuCounter, cpuCounter, devProp);
    277     ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
    278 
    279     NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, gpuCounter.maxSize(), static_cast<Ncv32u>(devProp.textureAlignment));
    280     ncvAssertPrintReturn(gpuAllocator.isInitialized(), "Error creating GPU memory allocator", -1);
    281     NCVMemStackAllocator cpuAllocator(NCVMemoryTypeHostPinned, cpuCounter.maxSize(), static_cast<Ncv32u>(devProp.textureAlignment));
    282     ncvAssertPrintReturn(cpuAllocator.isInitialized(), "Error creating CPU memory allocator", -1);
    283 
    284     printf("Initialized for frame size [%dx%d]\n", frameSize.width, frameSize.height);
    285 
    286     //==============================================================================
    287     //
    288     // Main processing loop
    289     //
    290     //==============================================================================
    291 
    292     namedWindow(wndTitle, 1);
    293     Mat frameDisp;
    294 
    295     do
    296     {
    297         Mat gray;
    298         cvtColor((image.empty() ? frame : image), gray, cv::COLOR_BGR2GRAY);
    299 
    300         //
    301         // process
    302         //
    303 
    304         NcvSize32u minSize = haar.ClassifierSize;
    305         if (bLargestObject)
    306         {
    307             Ncv32u ratioX = preferredVideoFrameSize.width / minSize.width;
    308             Ncv32u ratioY = preferredVideoFrameSize.height / minSize.height;
    309             Ncv32u ratioSmallest = min(ratioX, ratioY);
    310             ratioSmallest = max((Ncv32u)(ratioSmallest / 2.5f), (Ncv32u)1);
    311             minSize.width *= ratioSmallest;
    312             minSize.height *= ratioSmallest;
    313         }
    314 
    315         Ncv32f avgTime;
    316         NcvTimer timer = ncvStartTimer();
    317 
    318         if (bUseGPU)
    319         {
    320             ncvStat = process(&gray, frameSize.width, frameSize.height,
    321                               bFilterRects, bLargestObject, haar,
    322                               d_haarStages, d_haarNodes,
    323                               d_haarFeatures, h_haarStages,
    324                               gpuAllocator, cpuAllocator, devProp);
    325             ncvAssertPrintReturn(ncvStat == NCV_SUCCESS, "Error in memory counting pass", -1);
    326         }
    327         else
    328         {
    329             vector<Rect> rectsOpenCV;
    330 
    331             classifierOpenCV.detectMultiScale(
    332                 gray,
    333                 rectsOpenCV,
    334                 1.2f,
    335                 bFilterRects ? 4 : 0,
    336                 (bLargestObject ? CV_HAAR_FIND_BIGGEST_OBJECT : 0)
    337                 | CV_HAAR_SCALE_IMAGE,
    338                 Size(minSize.width, minSize.height));
    339 
    340             for (size_t rt = 0; rt < rectsOpenCV.size(); ++rt)
    341                 rectangle(gray, rectsOpenCV[rt], Scalar(255));
    342         }
    343 
    344         avgTime = (Ncv32f)ncvEndQueryTimerMs(timer);
    345 
    346         cvtColor(gray, frameDisp, cv::COLOR_GRAY2BGR);
    347         displayState(frameDisp, bHelpScreen, bUseGPU, bLargestObject, bFilterRects, 1000.0f / avgTime);
    348         imshow(wndTitle, frameDisp);
    349 
    350         //handle input
    351         switch (cv::waitKey(3))
    352         {
    353         case ' ':
    354             bUseGPU = !bUseGPU;
    355             break;
    356         case 'm':
    357         case 'M':
    358             bLargestObject = !bLargestObject;
    359             break;
    360         case 'f':
    361         case 'F':
    362             bFilterRects = !bFilterRects;
    363             break;
    364         case 'h':
    365         case 'H':
    366             bHelpScreen = !bHelpScreen;
    367             break;
    368         case 27:
    369             bQuit = true;
    370             break;
    371         }
    372 
    373         // For camera and video file, capture the next image
    374         if (capture.isOpened())
    375         {
    376             capture >> frame;
    377             if (frame.empty())
    378             {
    379                 break;
    380             }
    381         }
    382     } while (!bQuit);
    383 
    384     cv::destroyWindow(wndTitle);
    385 
    386     return 0;
    387 }
    388 
    389 #endif //!defined(HAVE_CUDA)
    390