Home | History | Annotate | Download | only in performance
      1 #include <stdexcept>
      2 #include "opencv2/imgproc.hpp"
      3 #include "opencv2/highgui.hpp"
      4 #include "opencv2/calib3d.hpp"
      5 #include "opencv2/video.hpp"
      6 #include "opencv2/cudalegacy.hpp"
      7 #include "opencv2/cudaimgproc.hpp"
      8 #include "opencv2/cudaarithm.hpp"
      9 #include "opencv2/cudawarping.hpp"
     10 #include "opencv2/cudafeatures2d.hpp"
     11 #include "opencv2/cudafilters.hpp"
     12 #include "opencv2/cudaoptflow.hpp"
     13 #include "opencv2/cudabgsegm.hpp"
     14 
     15 #include "performance.h"
     16 
     17 #include "opencv2/opencv_modules.hpp"
     18 
     19 #ifdef HAVE_OPENCV_XFEATURES2D
     20 #include "opencv2/xfeatures2d/cuda.hpp"
     21 #include "opencv2/xfeatures2d/nonfree.hpp"
     22 #endif
     23 
     24 #ifdef HAVE_OPENCV_BGSEGM
     25 #include "opencv2/bgsegm.hpp"
     26 #endif
     27 
     28 using namespace std;
     29 using namespace cv;
     30 
     31 
     32 TEST(matchTemplate)
     33 {
     34     Mat src, templ, dst;
     35     gen(src, 3000, 3000, CV_32F, 0, 1);
     36 
     37     cuda::GpuMat d_src(src), d_templ, d_dst;
     38 
     39     Ptr<cuda::TemplateMatching> alg = cuda::createTemplateMatching(src.type(), TM_CCORR);
     40 
     41     for (int templ_size = 5; templ_size < 200; templ_size *= 5)
     42     {
     43         SUBTEST << src.cols << 'x' << src.rows << ", 32FC1" << ", templ " << templ_size << 'x' << templ_size << ", CCORR";
     44 
     45         gen(templ, templ_size, templ_size, CV_32F, 0, 1);
     46         matchTemplate(src, templ, dst, TM_CCORR);
     47 
     48         CPU_ON;
     49         matchTemplate(src, templ, dst, TM_CCORR);
     50         CPU_OFF;
     51 
     52         d_templ.upload(templ);
     53         alg->match(d_src, d_templ, d_dst);
     54 
     55         CUDA_ON;
     56         alg->match(d_src, d_templ, d_dst);
     57         CUDA_OFF;
     58     }
     59 }
     60 
     61 
     62 TEST(minMaxLoc)
     63 {
     64     Mat src;
     65     cuda::GpuMat d_src;
     66 
     67     double min_val, max_val;
     68     Point min_loc, max_loc;
     69 
     70     for (int size = 2000; size <= 8000; size *= 2)
     71     {
     72         SUBTEST << size << 'x' << size << ", 32F";
     73 
     74         gen(src, size, size, CV_32F, 0, 1);
     75 
     76         CPU_ON;
     77         minMaxLoc(src, &min_val, &max_val, &min_loc, &max_loc);
     78         CPU_OFF;
     79 
     80         d_src.upload(src);
     81 
     82         CUDA_ON;
     83         cuda::minMaxLoc(d_src, &min_val, &max_val, &min_loc, &max_loc);
     84         CUDA_OFF;
     85     }
     86 }
     87 
     88 
     89 TEST(remap)
     90 {
     91     Mat src, dst, xmap, ymap;
     92     cuda::GpuMat d_src, d_dst, d_xmap, d_ymap;
     93 
     94     int interpolation = INTER_LINEAR;
     95     int borderMode = BORDER_REPLICATE;
     96 
     97     for (int size = 1000; size <= 4000; size *= 2)
     98     {
     99         SUBTEST << size << 'x' << size << ", 8UC4, INTER_LINEAR, BORDER_REPLICATE";
    100 
    101         gen(src, size, size, CV_8UC4, 0, 256);
    102 
    103         xmap.create(size, size, CV_32F);
    104         ymap.create(size, size, CV_32F);
    105         for (int i = 0; i < size; ++i)
    106         {
    107             float* xmap_row = xmap.ptr<float>(i);
    108             float* ymap_row = ymap.ptr<float>(i);
    109             for (int j = 0; j < size; ++j)
    110             {
    111                 xmap_row[j] = (j - size * 0.5f) * 0.75f + size * 0.5f;
    112                 ymap_row[j] = (i - size * 0.5f) * 0.75f + size * 0.5f;
    113             }
    114         }
    115 
    116         remap(src, dst, xmap, ymap, interpolation, borderMode);
    117 
    118         CPU_ON;
    119         remap(src, dst, xmap, ymap, interpolation, borderMode);
    120         CPU_OFF;
    121 
    122         d_src.upload(src);
    123         d_xmap.upload(xmap);
    124         d_ymap.upload(ymap);
    125 
    126         cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
    127 
    128         CUDA_ON;
    129         cuda::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
    130         CUDA_OFF;
    131     }
    132 }
    133 
    134 
    135 TEST(dft)
    136 {
    137     Mat src, dst;
    138     cuda::GpuMat d_src, d_dst;
    139 
    140     for (int size = 1000; size <= 4000; size *= 2)
    141     {
    142         SUBTEST << size << 'x' << size << ", 32FC2, complex-to-complex";
    143 
    144         gen(src, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
    145 
    146         dft(src, dst);
    147 
    148         CPU_ON;
    149         dft(src, dst);
    150         CPU_OFF;
    151 
    152         d_src.upload(src);
    153 
    154         cuda::dft(d_src, d_dst, Size(size, size));
    155 
    156         CUDA_ON;
    157         cuda::dft(d_src, d_dst, Size(size, size));
    158         CUDA_OFF;
    159     }
    160 }
    161 
    162 
    163 TEST(cornerHarris)
    164 {
    165     Mat src, dst;
    166     cuda::GpuMat d_src, d_dst;
    167 
    168     for (int size = 1000; size <= 4000; size *= 2)
    169     {
    170         SUBTEST << size << 'x' << size << ", 32FC1, BORDER_REFLECT101";
    171 
    172         gen(src, size, size, CV_32F, 0, 1);
    173 
    174         cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
    175 
    176         CPU_ON;
    177         cornerHarris(src, dst, 5, 7, 0.1, BORDER_REFLECT101);
    178         CPU_OFF;
    179 
    180         d_src.upload(src);
    181 
    182         Ptr<cuda::CornernessCriteria> harris = cuda::createHarrisCorner(src.type(), 5, 7, 0.1, BORDER_REFLECT101);
    183 
    184         harris->compute(d_src, d_dst);
    185 
    186         CUDA_ON;
    187         harris->compute(d_src, d_dst);
    188         CUDA_OFF;
    189     }
    190 }
    191 
    192 
    193 TEST(integral)
    194 {
    195     Mat src, sum;
    196     cuda::GpuMat d_src, d_sum;
    197 
    198     for (int size = 1000; size <= 4000; size *= 2)
    199     {
    200         SUBTEST << size << 'x' << size << ", 8UC1";
    201 
    202         gen(src, size, size, CV_8U, 0, 256);
    203 
    204         integral(src, sum);
    205 
    206         CPU_ON;
    207         integral(src, sum);
    208         CPU_OFF;
    209 
    210         d_src.upload(src);
    211 
    212         cuda::integral(d_src, d_sum);
    213 
    214         CUDA_ON;
    215         cuda::integral(d_src, d_sum);
    216         CUDA_OFF;
    217     }
    218 }
    219 
    220 
    221 TEST(norm)
    222 {
    223     Mat src;
    224     cuda::GpuMat d_src, d_buf;
    225 
    226     for (int size = 2000; size <= 4000; size += 1000)
    227     {
    228         SUBTEST << size << 'x' << size << ", 32FC4, NORM_INF";
    229 
    230         gen(src, size, size, CV_32FC4, Scalar::all(0), Scalar::all(1));
    231 
    232         norm(src, NORM_INF);
    233 
    234         CPU_ON;
    235         norm(src, NORM_INF);
    236         CPU_OFF;
    237 
    238         d_src.upload(src);
    239 
    240         cuda::norm(d_src, NORM_INF, d_buf);
    241 
    242         CUDA_ON;
    243         cuda::norm(d_src, NORM_INF, d_buf);
    244         CUDA_OFF;
    245     }
    246 }
    247 
    248 
    249 TEST(meanShift)
    250 {
    251     int sp = 10, sr = 10;
    252 
    253     Mat src, dst;
    254     cuda::GpuMat d_src, d_dst;
    255 
    256     for (int size = 400; size <= 800; size *= 2)
    257     {
    258         SUBTEST << size << 'x' << size << ", 8UC3 vs 8UC4";
    259 
    260         gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
    261 
    262         pyrMeanShiftFiltering(src, dst, sp, sr);
    263 
    264         CPU_ON;
    265         pyrMeanShiftFiltering(src, dst, sp, sr);
    266         CPU_OFF;
    267 
    268         gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
    269 
    270         d_src.upload(src);
    271 
    272         cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
    273 
    274         CUDA_ON;
    275         cuda::meanShiftFiltering(d_src, d_dst, sp, sr);
    276         CUDA_OFF;
    277     }
    278 }
    279 
    280 #ifdef HAVE_OPENCV_XFEATURES2D
    281 
    282 TEST(SURF)
    283 {
    284     Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
    285     if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
    286 
    287     Ptr<Feature2D> surf = xfeatures2d::SURF::create();
    288     vector<KeyPoint> keypoints;
    289     Mat descriptors;
    290 
    291     surf->detectAndCompute(src, Mat(), keypoints, descriptors);
    292 
    293     CPU_ON;
    294     surf->detectAndCompute(src, Mat(), keypoints, descriptors);
    295     CPU_OFF;
    296 
    297     cuda::SURF_CUDA d_surf;
    298     cuda::GpuMat d_src(src);
    299     cuda::GpuMat d_keypoints;
    300     cuda::GpuMat d_descriptors;
    301 
    302     d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
    303 
    304     CUDA_ON;
    305     d_surf(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
    306     CUDA_OFF;
    307 }
    308 
    309 #endif
    310 
    311 
    312 TEST(FAST)
    313 {
    314     Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
    315     if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
    316 
    317     vector<KeyPoint> keypoints;
    318 
    319     FAST(src, keypoints, 20);
    320 
    321     CPU_ON;
    322     FAST(src, keypoints, 20);
    323     CPU_OFF;
    324 
    325     cv::Ptr<cv::cuda::FastFeatureDetector> d_FAST = cv::cuda::FastFeatureDetector::create(20);
    326     cuda::GpuMat d_src(src);
    327     cuda::GpuMat d_keypoints;
    328 
    329     d_FAST->detectAsync(d_src, d_keypoints);
    330 
    331     CUDA_ON;
    332     d_FAST->detectAsync(d_src, d_keypoints);
    333     CUDA_OFF;
    334 }
    335 
    336 
    337 TEST(ORB)
    338 {
    339     Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
    340     if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
    341 
    342     Ptr<ORB> orb = ORB::create(4000);
    343 
    344     vector<KeyPoint> keypoints;
    345     Mat descriptors;
    346 
    347     orb->detectAndCompute(src, Mat(), keypoints, descriptors);
    348 
    349     CPU_ON;
    350     orb->detectAndCompute(src, Mat(), keypoints, descriptors);
    351     CPU_OFF;
    352 
    353     Ptr<cuda::ORB> d_orb = cuda::ORB::create();
    354     cuda::GpuMat d_src(src);
    355     cuda::GpuMat d_keypoints;
    356     cuda::GpuMat d_descriptors;
    357 
    358     d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
    359 
    360     CUDA_ON;
    361     d_orb->detectAndComputeAsync(d_src, cuda::GpuMat(), d_keypoints, d_descriptors);
    362     CUDA_OFF;
    363 }
    364 
    365 
    366 TEST(BruteForceMatcher)
    367 {
    368     // Init CPU matcher
    369 
    370     int desc_len = 64;
    371 
    372     BFMatcher matcher(NORM_L2);
    373 
    374     Mat query;
    375     gen(query, 3000, desc_len, CV_32F, 0, 1);
    376 
    377     Mat train;
    378     gen(train, 3000, desc_len, CV_32F, 0, 1);
    379 
    380     // Init CUDA matcher
    381 
    382     Ptr<cuda::DescriptorMatcher> d_matcher = cuda::DescriptorMatcher::createBFMatcher(NORM_L2);
    383 
    384     cuda::GpuMat d_query(query);
    385     cuda::GpuMat d_train(train);
    386 
    387     // Output
    388     vector< vector<DMatch> > matches(2);
    389     cuda::GpuMat d_matches;
    390 
    391     SUBTEST << "match";
    392 
    393     matcher.match(query, train, matches[0]);
    394 
    395     CPU_ON;
    396     matcher.match(query, train, matches[0]);
    397     CPU_OFF;
    398 
    399     d_matcher->matchAsync(d_query, d_train, d_matches);
    400 
    401     CUDA_ON;
    402     d_matcher->matchAsync(d_query, d_train, d_matches);
    403     CUDA_OFF;
    404 
    405     SUBTEST << "knnMatch";
    406 
    407     matcher.knnMatch(query, train, matches, 2);
    408 
    409     CPU_ON;
    410     matcher.knnMatch(query, train, matches, 2);
    411     CPU_OFF;
    412 
    413     d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
    414 
    415     CUDA_ON;
    416     d_matcher->knnMatchAsync(d_query, d_train, d_matches, 2);
    417     CUDA_OFF;
    418 
    419     SUBTEST << "radiusMatch";
    420 
    421     float max_distance = 2.0f;
    422 
    423     matcher.radiusMatch(query, train, matches, max_distance);
    424 
    425     CPU_ON;
    426     matcher.radiusMatch(query, train, matches, max_distance);
    427     CPU_OFF;
    428 
    429     d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
    430 
    431     CUDA_ON;
    432     d_matcher->radiusMatchAsync(d_query, d_train, d_matches, max_distance);
    433     CUDA_OFF;
    434 }
    435 
    436 
    437 TEST(magnitude)
    438 {
    439     Mat x, y, mag;
    440     cuda::GpuMat d_x, d_y, d_mag;
    441 
    442     for (int size = 2000; size <= 4000; size += 1000)
    443     {
    444         SUBTEST << size << 'x' << size << ", 32FC1";
    445 
    446         gen(x, size, size, CV_32F, 0, 1);
    447         gen(y, size, size, CV_32F, 0, 1);
    448 
    449         magnitude(x, y, mag);
    450 
    451         CPU_ON;
    452         magnitude(x, y, mag);
    453         CPU_OFF;
    454 
    455         d_x.upload(x);
    456         d_y.upload(y);
    457 
    458         cuda::magnitude(d_x, d_y, d_mag);
    459 
    460         CUDA_ON;
    461         cuda::magnitude(d_x, d_y, d_mag);
    462         CUDA_OFF;
    463     }
    464 }
    465 
    466 
    467 TEST(add)
    468 {
    469     Mat src1, src2, dst;
    470     cuda::GpuMat d_src1, d_src2, d_dst;
    471 
    472     for (int size = 2000; size <= 4000; size += 1000)
    473     {
    474         SUBTEST << size << 'x' << size << ", 32FC1";
    475 
    476         gen(src1, size, size, CV_32F, 0, 1);
    477         gen(src2, size, size, CV_32F, 0, 1);
    478 
    479         add(src1, src2, dst);
    480 
    481         CPU_ON;
    482         add(src1, src2, dst);
    483         CPU_OFF;
    484 
    485         d_src1.upload(src1);
    486         d_src2.upload(src2);
    487 
    488         cuda::add(d_src1, d_src2, d_dst);
    489 
    490         CUDA_ON;
    491         cuda::add(d_src1, d_src2, d_dst);
    492         CUDA_OFF;
    493     }
    494 }
    495 
    496 
    497 TEST(log)
    498 {
    499     Mat src, dst;
    500     cuda::GpuMat d_src, d_dst;
    501 
    502     for (int size = 2000; size <= 4000; size += 1000)
    503     {
    504         SUBTEST << size << 'x' << size << ", 32F";
    505 
    506         gen(src, size, size, CV_32F, 1, 10);
    507 
    508         log(src, dst);
    509 
    510         CPU_ON;
    511         log(src, dst);
    512         CPU_OFF;
    513 
    514         d_src.upload(src);
    515 
    516         cuda::log(d_src, d_dst);
    517 
    518         CUDA_ON;
    519         cuda::log(d_src, d_dst);
    520         CUDA_OFF;
    521     }
    522 }
    523 
    524 
    525 TEST(mulSpectrums)
    526 {
    527     Mat src1, src2, dst;
    528     cuda::GpuMat d_src1, d_src2, d_dst;
    529 
    530     for (int size = 2000; size <= 4000; size += 1000)
    531     {
    532         SUBTEST << size << 'x' << size;
    533 
    534         gen(src1, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
    535         gen(src2, size, size, CV_32FC2, Scalar::all(0), Scalar::all(1));
    536 
    537         mulSpectrums(src1, src2, dst, 0, true);
    538 
    539         CPU_ON;
    540         mulSpectrums(src1, src2, dst, 0, true);
    541         CPU_OFF;
    542 
    543         d_src1.upload(src1);
    544         d_src2.upload(src2);
    545 
    546         cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
    547 
    548         CUDA_ON;
    549         cuda::mulSpectrums(d_src1, d_src2, d_dst, 0, true);
    550         CUDA_OFF;
    551     }
    552 }
    553 
    554 
    555 TEST(resize)
    556 {
    557     Mat src, dst;
    558     cuda::GpuMat d_src, d_dst;
    559 
    560     for (int size = 1000; size <= 3000; size += 1000)
    561     {
    562         SUBTEST << size << 'x' << size << ", 8UC4, up";
    563 
    564         gen(src, size, size, CV_8UC4, 0, 256);
    565 
    566         resize(src, dst, Size(), 2.0, 2.0);
    567 
    568         CPU_ON;
    569         resize(src, dst, Size(), 2.0, 2.0);
    570         CPU_OFF;
    571 
    572         d_src.upload(src);
    573 
    574         cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
    575 
    576         CUDA_ON;
    577         cuda::resize(d_src, d_dst, Size(), 2.0, 2.0);
    578         CUDA_OFF;
    579     }
    580 
    581     for (int size = 1000; size <= 3000; size += 1000)
    582     {
    583         SUBTEST << size << 'x' << size << ", 8UC4, down";
    584 
    585         gen(src, size, size, CV_8UC4, 0, 256);
    586 
    587         resize(src, dst, Size(), 0.5, 0.5);
    588 
    589         CPU_ON;
    590         resize(src, dst, Size(), 0.5, 0.5);
    591         CPU_OFF;
    592 
    593         d_src.upload(src);
    594 
    595         cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
    596 
    597         CUDA_ON;
    598         cuda::resize(d_src, d_dst, Size(), 0.5, 0.5);
    599         CUDA_OFF;
    600     }
    601 }
    602 
    603 
    604 TEST(cvtColor)
    605 {
    606     Mat src, dst;
    607     cuda::GpuMat d_src, d_dst;
    608 
    609     gen(src, 4000, 4000, CV_8UC1, 0, 255);
    610     d_src.upload(src);
    611 
    612     SUBTEST << "4000x4000, 8UC1, COLOR_GRAY2BGRA";
    613 
    614     cvtColor(src, dst, COLOR_GRAY2BGRA, 4);
    615 
    616     CPU_ON;
    617     cvtColor(src, dst, COLOR_GRAY2BGRA, 4);
    618     CPU_OFF;
    619 
    620     cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
    621 
    622     CUDA_ON;
    623     cuda::cvtColor(d_src, d_dst, COLOR_GRAY2BGRA, 4);
    624     CUDA_OFF;
    625 
    626     cv::swap(src, dst);
    627     d_src.swap(d_dst);
    628 
    629     SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2YCrCb";
    630 
    631     cvtColor(src, dst, COLOR_BGR2YCrCb);
    632 
    633     CPU_ON;
    634     cvtColor(src, dst, COLOR_BGR2YCrCb);
    635     CPU_OFF;
    636 
    637     cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
    638 
    639     CUDA_ON;
    640     cuda::cvtColor(d_src, d_dst, COLOR_BGR2YCrCb, 4);
    641     CUDA_OFF;
    642 
    643     cv::swap(src, dst);
    644     d_src.swap(d_dst);
    645 
    646     SUBTEST << "4000x4000, 8UC4, COLOR_YCrCb2BGR";
    647 
    648     cvtColor(src, dst, COLOR_YCrCb2BGR, 4);
    649 
    650     CPU_ON;
    651     cvtColor(src, dst, COLOR_YCrCb2BGR, 4);
    652     CPU_OFF;
    653 
    654     cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
    655 
    656     CUDA_ON;
    657     cuda::cvtColor(d_src, d_dst, COLOR_YCrCb2BGR, 4);
    658     CUDA_OFF;
    659 
    660     cv::swap(src, dst);
    661     d_src.swap(d_dst);
    662 
    663     SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2XYZ";
    664 
    665     cvtColor(src, dst, COLOR_BGR2XYZ);
    666 
    667     CPU_ON;
    668     cvtColor(src, dst, COLOR_BGR2XYZ);
    669     CPU_OFF;
    670 
    671     cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
    672 
    673     CUDA_ON;
    674     cuda::cvtColor(d_src, d_dst, COLOR_BGR2XYZ, 4);
    675     CUDA_OFF;
    676 
    677     cv::swap(src, dst);
    678     d_src.swap(d_dst);
    679 
    680     SUBTEST << "4000x4000, 8UC4, COLOR_XYZ2BGR";
    681 
    682     cvtColor(src, dst, COLOR_XYZ2BGR, 4);
    683 
    684     CPU_ON;
    685     cvtColor(src, dst, COLOR_XYZ2BGR, 4);
    686     CPU_OFF;
    687 
    688     cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
    689 
    690     CUDA_ON;
    691     cuda::cvtColor(d_src, d_dst, COLOR_XYZ2BGR, 4);
    692     CUDA_OFF;
    693 
    694     cv::swap(src, dst);
    695     d_src.swap(d_dst);
    696 
    697     SUBTEST << "4000x4000, 8UC3 vs 8UC4, COLOR_BGR2HSV";
    698 
    699     cvtColor(src, dst, COLOR_BGR2HSV);
    700 
    701     CPU_ON;
    702     cvtColor(src, dst, COLOR_BGR2HSV);
    703     CPU_OFF;
    704 
    705     cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
    706 
    707     CUDA_ON;
    708     cuda::cvtColor(d_src, d_dst, COLOR_BGR2HSV, 4);
    709     CUDA_OFF;
    710 
    711     cv::swap(src, dst);
    712     d_src.swap(d_dst);
    713 
    714     SUBTEST << "4000x4000, 8UC4, COLOR_HSV2BGR";
    715 
    716     cvtColor(src, dst, COLOR_HSV2BGR, 4);
    717 
    718     CPU_ON;
    719     cvtColor(src, dst, COLOR_HSV2BGR, 4);
    720     CPU_OFF;
    721 
    722     cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
    723 
    724     CUDA_ON;
    725     cuda::cvtColor(d_src, d_dst, COLOR_HSV2BGR, 4);
    726     CUDA_OFF;
    727 
    728     cv::swap(src, dst);
    729     d_src.swap(d_dst);
    730 }
    731 
    732 
    733 TEST(erode)
    734 {
    735     Mat src, dst, ker;
    736     cuda::GpuMat d_src, d_buf, d_dst;
    737 
    738     for (int size = 2000; size <= 4000; size += 1000)
    739     {
    740         SUBTEST << size << 'x' << size;
    741 
    742         gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
    743         ker = getStructuringElement(MORPH_RECT, Size(3, 3));
    744 
    745         erode(src, dst, ker);
    746 
    747         CPU_ON;
    748         erode(src, dst, ker);
    749         CPU_OFF;
    750 
    751         d_src.upload(src);
    752 
    753         Ptr<cuda::Filter> erode = cuda::createMorphologyFilter(MORPH_ERODE, d_src.type(), ker);
    754 
    755         erode->apply(d_src, d_dst);
    756 
    757         CUDA_ON;
    758         erode->apply(d_src, d_dst);
    759         CUDA_OFF;
    760     }
    761 }
    762 
    763 TEST(threshold)
    764 {
    765     Mat src, dst;
    766     cuda::GpuMat d_src, d_dst;
    767 
    768     for (int size = 2000; size <= 4000; size += 1000)
    769     {
    770         SUBTEST << size << 'x' << size << ", 8UC1, THRESH_BINARY";
    771 
    772         gen(src, size, size, CV_8U, 0, 100);
    773 
    774         threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
    775 
    776         CPU_ON;
    777         threshold(src, dst, 50.0, 0.0, THRESH_BINARY);
    778         CPU_OFF;
    779 
    780         d_src.upload(src);
    781 
    782         cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
    783 
    784         CUDA_ON;
    785         cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
    786         CUDA_OFF;
    787     }
    788 
    789     for (int size = 2000; size <= 4000; size += 1000)
    790     {
    791         SUBTEST << size << 'x' << size << ", 32FC1, THRESH_TRUNC [NPP]";
    792 
    793         gen(src, size, size, CV_32FC1, 0, 100);
    794 
    795         threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
    796 
    797         CPU_ON;
    798         threshold(src, dst, 50.0, 0.0, THRESH_TRUNC);
    799         CPU_OFF;
    800 
    801         d_src.upload(src);
    802 
    803         cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
    804 
    805         CUDA_ON;
    806         cuda::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
    807         CUDA_OFF;
    808     }
    809 }
    810 
    811 TEST(pow)
    812 {
    813     Mat src, dst;
    814     cuda::GpuMat d_src, d_dst;
    815 
    816     for (int size = 1000; size <= 4000; size += 1000)
    817     {
    818         SUBTEST << size << 'x' << size << ", 32F";
    819 
    820         gen(src, size, size, CV_32F, 0, 100);
    821 
    822         pow(src, -2.0, dst);
    823 
    824         CPU_ON;
    825         pow(src, -2.0, dst);
    826         CPU_OFF;
    827 
    828         d_src.upload(src);
    829 
    830         cuda::pow(d_src, -2.0, d_dst);
    831 
    832         CUDA_ON;
    833         cuda::pow(d_src, -2.0, d_dst);
    834         CUDA_OFF;
    835     }
    836 }
    837 
    838 
    839 TEST(projectPoints)
    840 {
    841     Mat src;
    842     vector<Point2f> dst;
    843     cuda::GpuMat d_src, d_dst;
    844 
    845     Mat rvec; gen(rvec, 1, 3, CV_32F, 0, 1);
    846     Mat tvec; gen(tvec, 1, 3, CV_32F, 0, 1);
    847     Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0, 1);
    848     camera_mat.at<float>(0, 1) = 0.f;
    849     camera_mat.at<float>(1, 0) = 0.f;
    850     camera_mat.at<float>(2, 0) = 0.f;
    851     camera_mat.at<float>(2, 1) = 0.f;
    852 
    853     for (int size = (int)1e6, count = 0; size >= 1e5 && count < 5; size = int(size / 1.4), count++)
    854     {
    855         SUBTEST << size;
    856 
    857         gen(src, 1, size, CV_32FC3, Scalar::all(0), Scalar::all(10));
    858 
    859         projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst);
    860 
    861         CPU_ON;
    862         projectPoints(src, rvec, tvec, camera_mat, Mat::zeros(1, 8, CV_32F), dst);
    863         CPU_OFF;
    864 
    865         d_src.upload(src);
    866 
    867         cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
    868 
    869         CUDA_ON;
    870         cuda::projectPoints(d_src, rvec, tvec, camera_mat, Mat(), d_dst);
    871         CUDA_OFF;
    872     }
    873 }
    874 
    875 
    876 static void InitSolvePnpRansac()
    877 {
    878     Mat object; gen(object, 1, 4, CV_32FC3, Scalar::all(0), Scalar::all(100));
    879     Mat image; gen(image, 1, 4, CV_32FC2, Scalar::all(0), Scalar::all(100));
    880     Mat rvec, tvec;
    881     cuda::solvePnPRansac(object, image, Mat::eye(3, 3, CV_32F), Mat(), rvec, tvec);
    882 }
    883 
    884 
    885 TEST(solvePnPRansac)
    886 {
    887     InitSolvePnpRansac();
    888 
    889     for (int num_points = 5000; num_points <= 300000; num_points = int(num_points * 3.76))
    890     {
    891         SUBTEST << num_points;
    892 
    893         Mat object; gen(object, 1, num_points, CV_32FC3, Scalar::all(10), Scalar::all(100));
    894         Mat image; gen(image, 1, num_points, CV_32FC2, Scalar::all(10), Scalar::all(100));
    895         Mat camera_mat; gen(camera_mat, 3, 3, CV_32F, 0.5, 1);
    896         camera_mat.at<float>(0, 1) = 0.f;
    897         camera_mat.at<float>(1, 0) = 0.f;
    898         camera_mat.at<float>(2, 0) = 0.f;
    899         camera_mat.at<float>(2, 1) = 0.f;
    900 
    901         Mat rvec, tvec;
    902         const int num_iters = 200;
    903         const float max_dist = 2.0f;
    904         vector<int> inliers_cpu, inliers_gpu;
    905 
    906         CPU_ON;
    907         solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
    908                        max_dist, int(num_points * 0.05), inliers_cpu);
    909         CPU_OFF;
    910 
    911         CUDA_ON;
    912         cuda::solvePnPRansac(object, image, camera_mat, Mat::zeros(1, 8, CV_32F), rvec, tvec, false, num_iters,
    913                             max_dist, int(num_points * 0.05), &inliers_gpu);
    914         CUDA_OFF;
    915     }
    916 }
    917 
    918 TEST(GaussianBlur)
    919 {
    920     for (int size = 1000; size <= 4000; size += 1000)
    921     {
    922         SUBTEST << size << 'x' << size << ", 8UC4";
    923 
    924         Mat src, dst;
    925 
    926         gen(src, size, size, CV_8UC4, 0, 256);
    927 
    928         GaussianBlur(src, dst, Size(3, 3), 1);
    929 
    930         CPU_ON;
    931         GaussianBlur(src, dst, Size(3, 3), 1);
    932         CPU_OFF;
    933 
    934         cuda::GpuMat d_src(src);
    935         cuda::GpuMat d_dst(src.size(), src.type());
    936         cuda::GpuMat d_buf;
    937 
    938         cv::Ptr<cv::cuda::Filter> gauss = cv::cuda::createGaussianFilter(d_src.type(), -1, cv::Size(3, 3), 1);
    939 
    940         gauss->apply(d_src, d_dst);
    941 
    942         CUDA_ON;
    943         gauss->apply(d_src, d_dst);
    944         CUDA_OFF;
    945     }
    946 }
    947 
    948 TEST(filter2D)
    949 {
    950     for (int size = 512; size <= 2048; size *= 2)
    951     {
    952         Mat src;
    953         gen(src, size, size, CV_8UC4, 0, 256);
    954 
    955         for (int ksize = 3; ksize <= 16; ksize += 2)
    956         {
    957             SUBTEST << "ksize = " << ksize << ", " << size << 'x' << size << ", 8UC4";
    958 
    959             Mat kernel;
    960             gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);
    961 
    962             Mat dst;
    963             cv::filter2D(src, dst, -1, kernel);
    964 
    965             CPU_ON;
    966             cv::filter2D(src, dst, -1, kernel);
    967             CPU_OFF;
    968 
    969             cuda::GpuMat d_src(src);
    970             cuda::GpuMat d_dst;
    971 
    972             Ptr<cuda::Filter> filter2D = cuda::createLinearFilter(d_src.type(), -1, kernel);
    973             filter2D->apply(d_src, d_dst);
    974 
    975             CUDA_ON;
    976             filter2D->apply(d_src, d_dst);
    977             CUDA_OFF;
    978         }
    979     }
    980 }
    981 
    982 TEST(pyrDown)
    983 {
    984     for (int size = 4000; size >= 1000; size -= 1000)
    985     {
    986         SUBTEST << size << 'x' << size << ", 8UC4";
    987 
    988         Mat src, dst;
    989         gen(src, size, size, CV_8UC4, 0, 256);
    990 
    991         pyrDown(src, dst);
    992 
    993         CPU_ON;
    994         pyrDown(src, dst);
    995         CPU_OFF;
    996 
    997         cuda::GpuMat d_src(src);
    998         cuda::GpuMat d_dst;
    999 
   1000         cuda::pyrDown(d_src, d_dst);
   1001 
   1002         CUDA_ON;
   1003         cuda::pyrDown(d_src, d_dst);
   1004         CUDA_OFF;
   1005     }
   1006 }
   1007 
   1008 TEST(pyrUp)
   1009 {
   1010     for (int size = 2000; size >= 1000; size -= 1000)
   1011     {
   1012         SUBTEST << size << 'x' << size << ", 8UC4";
   1013 
   1014         Mat src, dst;
   1015 
   1016         gen(src, size, size, CV_8UC4, 0, 256);
   1017 
   1018         pyrUp(src, dst);
   1019 
   1020         CPU_ON;
   1021         pyrUp(src, dst);
   1022         CPU_OFF;
   1023 
   1024         cuda::GpuMat d_src(src);
   1025         cuda::GpuMat d_dst;
   1026 
   1027         cuda::pyrUp(d_src, d_dst);
   1028 
   1029         CUDA_ON;
   1030         cuda::pyrUp(d_src, d_dst);
   1031         CUDA_OFF;
   1032     }
   1033 }
   1034 
   1035 
   1036 TEST(equalizeHist)
   1037 {
   1038     for (int size = 1000; size < 4000; size += 1000)
   1039     {
   1040         SUBTEST << size << 'x' << size;
   1041 
   1042         Mat src, dst;
   1043 
   1044         gen(src, size, size, CV_8UC1, 0, 256);
   1045 
   1046         equalizeHist(src, dst);
   1047 
   1048         CPU_ON;
   1049         equalizeHist(src, dst);
   1050         CPU_OFF;
   1051 
   1052         cuda::GpuMat d_src(src);
   1053         cuda::GpuMat d_dst;
   1054 
   1055         cuda::equalizeHist(d_src, d_dst);
   1056 
   1057         CUDA_ON;
   1058         cuda::equalizeHist(d_src, d_dst);
   1059         CUDA_OFF;
   1060     }
   1061 }
   1062 
   1063 
   1064 TEST(Canny)
   1065 {
   1066     Mat img = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
   1067 
   1068     if (img.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
   1069 
   1070     Mat edges(img.size(), CV_8UC1);
   1071 
   1072     CPU_ON;
   1073     Canny(img, edges, 50.0, 100.0);
   1074     CPU_OFF;
   1075 
   1076     cuda::GpuMat d_img(img);
   1077     cuda::GpuMat d_edges;
   1078 
   1079     Ptr<cuda::CannyEdgeDetector> canny = cuda::createCannyEdgeDetector(50.0, 100.0);
   1080 
   1081     canny->detect(d_img, d_edges);
   1082 
   1083     CUDA_ON;
   1084     canny->detect(d_img, d_edges);
   1085     CUDA_OFF;
   1086 }
   1087 
   1088 
   1089 TEST(reduce)
   1090 {
   1091     for (int size = 1000; size < 4000; size += 1000)
   1092     {
   1093         Mat src;
   1094         gen(src, size, size, CV_32F, 0, 255);
   1095 
   1096         Mat dst0;
   1097         Mat dst1;
   1098 
   1099         cuda::GpuMat d_src(src);
   1100         cuda::GpuMat d_dst0;
   1101         cuda::GpuMat d_dst1;
   1102 
   1103         SUBTEST << size << 'x' << size << ", dim = 0";
   1104 
   1105         reduce(src, dst0, 0, REDUCE_MIN);
   1106 
   1107         CPU_ON;
   1108         reduce(src, dst0, 0, REDUCE_MIN);
   1109         CPU_OFF;
   1110 
   1111         cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
   1112 
   1113         CUDA_ON;
   1114         cuda::reduce(d_src, d_dst0, 0, REDUCE_MIN);
   1115         CUDA_OFF;
   1116 
   1117         SUBTEST << size << 'x' << size << ", dim = 1";
   1118 
   1119         reduce(src, dst1, 1, REDUCE_MIN);
   1120 
   1121         CPU_ON;
   1122         reduce(src, dst1, 1, REDUCE_MIN);
   1123         CPU_OFF;
   1124 
   1125         cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
   1126 
   1127         CUDA_ON;
   1128         cuda::reduce(d_src, d_dst1, 1, REDUCE_MIN);
   1129         CUDA_OFF;
   1130     }
   1131 }
   1132 
   1133 
   1134 TEST(gemm)
   1135 {
   1136     Mat src1, src2, src3, dst;
   1137     cuda::GpuMat d_src1, d_src2, d_src3, d_dst;
   1138 
   1139     for (int size = 512; size <= 1024; size *= 2)
   1140     {
   1141         SUBTEST << size << 'x' << size;
   1142 
   1143         gen(src1, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
   1144         gen(src2, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
   1145         gen(src3, size, size, CV_32FC1, Scalar::all(-10), Scalar::all(10));
   1146 
   1147         gemm(src1, src2, 1.0, src3, 1.0, dst);
   1148 
   1149         CPU_ON;
   1150         gemm(src1, src2, 1.0, src3, 1.0, dst);
   1151         CPU_OFF;
   1152 
   1153         d_src1.upload(src1);
   1154         d_src2.upload(src2);
   1155         d_src3.upload(src3);
   1156 
   1157         cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
   1158 
   1159         CUDA_ON;
   1160         cuda::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
   1161         CUDA_OFF;
   1162     }
   1163 }
   1164 
   1165 TEST(GoodFeaturesToTrack)
   1166 {
   1167     Mat src = imread(abspath("../data/aloeL.jpg"), IMREAD_GRAYSCALE);
   1168     if (src.empty()) throw runtime_error("can't open ../data/aloeL.jpg");
   1169 
   1170     vector<Point2f> pts;
   1171 
   1172     goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
   1173 
   1174     CPU_ON;
   1175     goodFeaturesToTrack(src, pts, 8000, 0.01, 0.0);
   1176     CPU_OFF;
   1177 
   1178     Ptr<cuda::CornersDetector> detector = cuda::createGoodFeaturesToTrackDetector(src.type(), 8000, 0.01, 0.0);
   1179 
   1180     cuda::GpuMat d_src(src);
   1181     cuda::GpuMat d_pts;
   1182 
   1183     detector->detect(d_src, d_pts);
   1184 
   1185     CUDA_ON;
   1186     detector->detect(d_src, d_pts);
   1187     CUDA_OFF;
   1188 }
   1189 
   1190 #ifdef HAVE_OPENCV_BGSEGM
   1191 
   1192 TEST(MOG)
   1193 {
   1194     const std::string inputFile = abspath("../data/768x576.avi");
   1195 
   1196     cv::VideoCapture cap(inputFile);
   1197     if (!cap.isOpened()) throw runtime_error("can't open ../data/768x576.avi");
   1198 
   1199     cv::Mat frame;
   1200     cap >> frame;
   1201 
   1202     cv::Ptr<cv::BackgroundSubtractor> mog = cv::bgsegm::createBackgroundSubtractorMOG();
   1203     cv::Mat foreground;
   1204 
   1205     mog->apply(frame, foreground, 0.01);
   1206 
   1207     while (!TestSystem::instance().stop())
   1208     {
   1209         cap >> frame;
   1210 
   1211         TestSystem::instance().cpuOn();
   1212 
   1213         mog->apply(frame, foreground, 0.01);
   1214 
   1215         TestSystem::instance().cpuOff();
   1216     }
   1217     TestSystem::instance().cpuComplete();
   1218 
   1219     cap.open(inputFile);
   1220 
   1221     cap >> frame;
   1222 
   1223     cv::cuda::GpuMat d_frame(frame);
   1224     cv::Ptr<cv::BackgroundSubtractor> d_mog = cv::cuda::createBackgroundSubtractorMOG();
   1225     cv::cuda::GpuMat d_foreground;
   1226 
   1227     d_mog->apply(d_frame, d_foreground, 0.01);
   1228 
   1229     while (!TestSystem::instance().stop())
   1230     {
   1231         cap >> frame;
   1232         d_frame.upload(frame);
   1233 
   1234         TestSystem::instance().gpuOn();
   1235 
   1236         d_mog->apply(d_frame, d_foreground, 0.01);
   1237 
   1238         TestSystem::instance().gpuOff();
   1239     }
   1240     TestSystem::instance().gpuComplete();
   1241 }
   1242 
   1243 #endif
   1244 
   1245 TEST(MOG2)
   1246 {
   1247     const std::string inputFile = abspath("../data/768x576.avi");
   1248 
   1249     cv::VideoCapture cap(inputFile);
   1250     if (!cap.isOpened()) throw runtime_error("can't open ../data/768x576.avi");
   1251 
   1252     cv::Mat frame;
   1253     cap >> frame;
   1254 
   1255     cv::Ptr<cv::BackgroundSubtractor> mog2 = cv::createBackgroundSubtractorMOG2();
   1256     cv::Mat foreground;
   1257     cv::Mat background;
   1258 
   1259     mog2->apply(frame, foreground);
   1260     mog2->getBackgroundImage(background);
   1261 
   1262     while (!TestSystem::instance().stop())
   1263     {
   1264         cap >> frame;
   1265 
   1266         TestSystem::instance().cpuOn();
   1267 
   1268         mog2->apply(frame, foreground);
   1269         mog2->getBackgroundImage(background);
   1270 
   1271         TestSystem::instance().cpuOff();
   1272     }
   1273     TestSystem::instance().cpuComplete();
   1274 
   1275     cap.open(inputFile);
   1276 
   1277     cap >> frame;
   1278 
   1279     cv::Ptr<cv::BackgroundSubtractor> d_mog2 = cv::cuda::createBackgroundSubtractorMOG2();
   1280     cv::cuda::GpuMat d_frame(frame);
   1281     cv::cuda::GpuMat d_foreground;
   1282     cv::cuda::GpuMat d_background;
   1283 
   1284     d_mog2->apply(d_frame, d_foreground);
   1285     d_mog2->getBackgroundImage(d_background);
   1286 
   1287     while (!TestSystem::instance().stop())
   1288     {
   1289         cap >> frame;
   1290         d_frame.upload(frame);
   1291 
   1292         TestSystem::instance().gpuOn();
   1293 
   1294         d_mog2->apply(d_frame, d_foreground);
   1295         d_mog2->getBackgroundImage(d_background);
   1296 
   1297         TestSystem::instance().gpuOff();
   1298     }
   1299     TestSystem::instance().gpuComplete();
   1300 }
   1301