Home | History | Annotate | Download | only in test
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "test_precomp.hpp"
     44 
     45 #ifdef HAVE_CUDA
     46 
     47 using namespace cvtest;
     48 
     49 //////////////////////////////////////////////////////////////////////////////
     50 // GEMM
     51 
     52 #ifdef HAVE_CUBLAS
     53 
     54 CV_FLAGS(GemmFlags, 0, cv::GEMM_1_T, cv::GEMM_2_T, cv::GEMM_3_T);
     55 #define ALL_GEMM_FLAGS testing::Values(GemmFlags(0), GemmFlags(cv::GEMM_1_T), GemmFlags(cv::GEMM_2_T), GemmFlags(cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_3_T), GemmFlags(cv::GEMM_1_T | cv::GEMM_2_T | cv::GEMM_3_T))
     56 
     57 PARAM_TEST_CASE(GEMM, cv::cuda::DeviceInfo, cv::Size, MatType, GemmFlags, UseRoi)
     58 {
     59     cv::cuda::DeviceInfo devInfo;
     60     cv::Size size;
     61     int type;
     62     int flags;
     63     bool useRoi;
     64 
     65     virtual void SetUp()
     66     {
     67         devInfo = GET_PARAM(0);
     68         size = GET_PARAM(1);
     69         type = GET_PARAM(2);
     70         flags = GET_PARAM(3);
     71         useRoi = GET_PARAM(4);
     72 
     73         cv::cuda::setDevice(devInfo.deviceID());
     74     }
     75 };
     76 
     77 CUDA_TEST_P(GEMM, Accuracy)
     78 {
     79     cv::Mat src1 = randomMat(size, type, -10.0, 10.0);
     80     cv::Mat src2 = randomMat(size, type, -10.0, 10.0);
     81     cv::Mat src3 = randomMat(size, type, -10.0, 10.0);
     82     double alpha = randomDouble(-10.0, 10.0);
     83     double beta = randomDouble(-10.0, 10.0);
     84 
     85     if (CV_MAT_DEPTH(type) == CV_64F && !supportFeature(devInfo, cv::cuda::NATIVE_DOUBLE))
     86     {
     87         try
     88         {
     89             cv::cuda::GpuMat dst;
     90             cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
     91         }
     92         catch (const cv::Exception& e)
     93         {
     94             ASSERT_EQ(cv::Error::StsUnsupportedFormat, e.code);
     95         }
     96     }
     97     else if (type == CV_64FC2 && flags != 0)
     98     {
     99         try
    100         {
    101             cv::cuda::GpuMat dst;
    102             cv::cuda::gemm(loadMat(src1), loadMat(src2), alpha, loadMat(src3), beta, dst, flags);
    103         }
    104         catch (const cv::Exception& e)
    105         {
    106             ASSERT_EQ(cv::Error::StsNotImplemented, e.code);
    107         }
    108     }
    109     else
    110     {
    111         cv::cuda::GpuMat dst = createMat(size, type, useRoi);
    112         cv::cuda::gemm(loadMat(src1, useRoi), loadMat(src2, useRoi), alpha, loadMat(src3, useRoi), beta, dst, flags);
    113 
    114         cv::Mat dst_gold;
    115         cv::gemm(src1, src2, alpha, src3, beta, dst_gold, flags);
    116 
    117         EXPECT_MAT_NEAR(dst_gold, dst, CV_MAT_DEPTH(type) == CV_32F ? 1e-1 : 1e-10);
    118     }
    119 }
    120 
    121 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, GEMM, testing::Combine(
    122     ALL_DEVICES,
    123     DIFFERENT_SIZES,
    124     testing::Values(MatType(CV_32FC1), MatType(CV_32FC2), MatType(CV_64FC1), MatType(CV_64FC2)),
    125     ALL_GEMM_FLAGS,
    126     WHOLE_SUBMAT));
    127 
    128 ////////////////////////////////////////////////////////////////////////////
    129 // MulSpectrums
    130 
    131 CV_FLAGS(DftFlags, 0, cv::DFT_INVERSE, cv::DFT_SCALE, cv::DFT_ROWS, cv::DFT_COMPLEX_OUTPUT, cv::DFT_REAL_OUTPUT)
    132 
    133 PARAM_TEST_CASE(MulSpectrums, cv::cuda::DeviceInfo, cv::Size, DftFlags)
    134 {
    135     cv::cuda::DeviceInfo devInfo;
    136     cv::Size size;
    137     int flag;
    138 
    139     cv::Mat a, b;
    140 
    141     virtual void SetUp()
    142     {
    143         devInfo = GET_PARAM(0);
    144         size = GET_PARAM(1);
    145         flag = GET_PARAM(2);
    146 
    147         cv::cuda::setDevice(devInfo.deviceID());
    148 
    149         a = randomMat(size, CV_32FC2);
    150         b = randomMat(size, CV_32FC2);
    151     }
    152 };
    153 
    154 CUDA_TEST_P(MulSpectrums, Simple)
    155 {
    156     cv::cuda::GpuMat c;
    157     cv::cuda::mulSpectrums(loadMat(a), loadMat(b), c, flag, false);
    158 
    159     cv::Mat c_gold;
    160     cv::mulSpectrums(a, b, c_gold, flag, false);
    161 
    162     EXPECT_MAT_NEAR(c_gold, c, 1e-2);
    163 }
    164 
    165 CUDA_TEST_P(MulSpectrums, Scaled)
    166 {
    167     float scale = 1.f / size.area();
    168 
    169     cv::cuda::GpuMat c;
    170     cv::cuda::mulAndScaleSpectrums(loadMat(a), loadMat(b), c, flag, scale, false);
    171 
    172     cv::Mat c_gold;
    173     cv::mulSpectrums(a, b, c_gold, flag, false);
    174     c_gold.convertTo(c_gold, c_gold.type(), scale);
    175 
    176     EXPECT_MAT_NEAR(c_gold, c, 1e-2);
    177 }
    178 
    179 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, MulSpectrums, testing::Combine(
    180     ALL_DEVICES,
    181     DIFFERENT_SIZES,
    182     testing::Values(DftFlags(0), DftFlags(cv::DFT_ROWS))));
    183 
    184 ////////////////////////////////////////////////////////////////////////////
    185 // Dft
    186 
    187 struct Dft : testing::TestWithParam<cv::cuda::DeviceInfo>
    188 {
    189     cv::cuda::DeviceInfo devInfo;
    190 
    191     virtual void SetUp()
    192     {
    193         devInfo = GetParam();
    194 
    195         cv::cuda::setDevice(devInfo.deviceID());
    196     }
    197 };
    198 
    199 namespace
    200 {
    201     void testC2C(const std::string& hint, int cols, int rows, int flags, bool inplace)
    202     {
    203         SCOPED_TRACE(hint);
    204 
    205         cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC2, 0.0, 10.0);
    206 
    207         cv::Mat b_gold;
    208         cv::dft(a, b_gold, flags);
    209 
    210         cv::cuda::GpuMat d_b;
    211         cv::cuda::GpuMat d_b_data;
    212         if (inplace)
    213         {
    214             d_b_data.create(1, a.size().area(), CV_32FC2);
    215             d_b = cv::cuda::GpuMat(a.rows, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
    216         }
    217         cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), flags);
    218 
    219         EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
    220         ASSERT_EQ(CV_32F, d_b.depth());
    221         ASSERT_EQ(2, d_b.channels());
    222         EXPECT_MAT_NEAR(b_gold, cv::Mat(d_b), rows * cols * 1e-4);
    223     }
    224 }
    225 
    226 CUDA_TEST_P(Dft, C2C)
    227 {
    228     int cols = randomInt(2, 100);
    229     int rows = randomInt(2, 100);
    230 
    231     for (int i = 0; i < 2; ++i)
    232     {
    233         bool inplace = i != 0;
    234 
    235         testC2C("no flags", cols, rows, 0, inplace);
    236         testC2C("no flags 0 1", cols, rows + 1, 0, inplace);
    237         testC2C("no flags 1 0", cols, rows + 1, 0, inplace);
    238         testC2C("no flags 1 1", cols + 1, rows, 0, inplace);
    239         testC2C("DFT_INVERSE", cols, rows, cv::DFT_INVERSE, inplace);
    240         testC2C("DFT_ROWS", cols, rows, cv::DFT_ROWS, inplace);
    241         testC2C("single col", 1, rows, 0, inplace);
    242         testC2C("single row", cols, 1, 0, inplace);
    243         testC2C("single col inversed", 1, rows, cv::DFT_INVERSE, inplace);
    244         testC2C("single row inversed", cols, 1, cv::DFT_INVERSE, inplace);
    245         testC2C("single row DFT_ROWS", cols, 1, cv::DFT_ROWS, inplace);
    246         testC2C("size 1 2", 1, 2, 0, inplace);
    247         testC2C("size 2 1", 2, 1, 0, inplace);
    248     }
    249 }
    250 
    251 namespace
    252 {
    253     void testR2CThenC2R(const std::string& hint, int cols, int rows, bool inplace)
    254     {
    255         SCOPED_TRACE(hint);
    256 
    257         cv::Mat a = randomMat(cv::Size(cols, rows), CV_32FC1, 0.0, 10.0);
    258 
    259         cv::cuda::GpuMat d_b, d_c;
    260         cv::cuda::GpuMat d_b_data, d_c_data;
    261         if (inplace)
    262         {
    263             if (a.cols == 1)
    264             {
    265                 d_b_data.create(1, (a.rows / 2 + 1) * a.cols, CV_32FC2);
    266                 d_b = cv::cuda::GpuMat(a.rows / 2 + 1, a.cols, CV_32FC2, d_b_data.ptr(), a.cols * d_b_data.elemSize());
    267             }
    268             else
    269             {
    270                 d_b_data.create(1, a.rows * (a.cols / 2 + 1), CV_32FC2);
    271                 d_b = cv::cuda::GpuMat(a.rows, a.cols / 2 + 1, CV_32FC2, d_b_data.ptr(), (a.cols / 2 + 1) * d_b_data.elemSize());
    272             }
    273             d_c_data.create(1, a.size().area(), CV_32F);
    274             d_c = cv::cuda::GpuMat(a.rows, a.cols, CV_32F, d_c_data.ptr(), a.cols * d_c_data.elemSize());
    275         }
    276 
    277         cv::cuda::dft(loadMat(a), d_b, cv::Size(cols, rows), 0);
    278         cv::cuda::dft(d_b, d_c, cv::Size(cols, rows), cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
    279 
    280         EXPECT_TRUE(!inplace || d_b.ptr() == d_b_data.ptr());
    281         EXPECT_TRUE(!inplace || d_c.ptr() == d_c_data.ptr());
    282         ASSERT_EQ(CV_32F, d_c.depth());
    283         ASSERT_EQ(1, d_c.channels());
    284 
    285         cv::Mat c(d_c);
    286         EXPECT_MAT_NEAR(a, c, rows * cols * 1e-5);
    287     }
    288 }
    289 
    290 CUDA_TEST_P(Dft, R2CThenC2R)
    291 {
    292     int cols = randomInt(2, 100);
    293     int rows = randomInt(2, 100);
    294 
    295     testR2CThenC2R("sanity", cols, rows, false);
    296     testR2CThenC2R("sanity 0 1", cols, rows + 1, false);
    297     testR2CThenC2R("sanity 1 0", cols + 1, rows, false);
    298     testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, false);
    299     testR2CThenC2R("single col", 1, rows, false);
    300     testR2CThenC2R("single col 1", 1, rows + 1, false);
    301     testR2CThenC2R("single row", cols, 1, false);
    302     testR2CThenC2R("single row 1", cols + 1, 1, false);
    303 
    304     testR2CThenC2R("sanity", cols, rows, true);
    305     testR2CThenC2R("sanity 0 1", cols, rows + 1, true);
    306     testR2CThenC2R("sanity 1 0", cols + 1, rows, true);
    307     testR2CThenC2R("sanity 1 1", cols + 1, rows + 1, true);
    308     testR2CThenC2R("single row", cols, 1, true);
    309     testR2CThenC2R("single row 1", cols + 1, 1, true);
    310 }
    311 
    312 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Dft, ALL_DEVICES);
    313 
    314 ////////////////////////////////////////////////////////
    315 // Convolve
    316 
    317 namespace
    318 {
    319     void convolveDFT(const cv::Mat& A, const cv::Mat& B, cv::Mat& C, bool ccorr = false)
    320     {
    321         // reallocate the output array if needed
    322         C.create(std::abs(A.rows - B.rows) + 1, std::abs(A.cols - B.cols) + 1, A.type());
    323         cv::Size dftSize;
    324 
    325         // compute the size of DFT transform
    326         dftSize.width = cv::getOptimalDFTSize(A.cols + B.cols - 1);
    327         dftSize.height = cv::getOptimalDFTSize(A.rows + B.rows - 1);
    328 
    329         // allocate temporary buffers and initialize them with 0s
    330         cv::Mat tempA(dftSize, A.type(), cv::Scalar::all(0));
    331         cv::Mat tempB(dftSize, B.type(), cv::Scalar::all(0));
    332 
    333         // copy A and B to the top-left corners of tempA and tempB, respectively
    334         cv::Mat roiA(tempA, cv::Rect(0, 0, A.cols, A.rows));
    335         A.copyTo(roiA);
    336         cv::Mat roiB(tempB, cv::Rect(0, 0, B.cols, B.rows));
    337         B.copyTo(roiB);
    338 
    339         // now transform the padded A & B in-place;
    340         // use "nonzeroRows" hint for faster processing
    341         cv::dft(tempA, tempA, 0, A.rows);
    342         cv::dft(tempB, tempB, 0, B.rows);
    343 
    344         // multiply the spectrums;
    345         // the function handles packed spectrum representations well
    346         cv::mulSpectrums(tempA, tempB, tempA, 0, ccorr);
    347 
    348         // transform the product back from the frequency domain.
    349         // Even though all the result rows will be non-zero,
    350         // you need only the first C.rows of them, and thus you
    351         // pass nonzeroRows == C.rows
    352         cv::dft(tempA, tempA, cv::DFT_INVERSE + cv::DFT_SCALE, C.rows);
    353 
    354         // now copy the result back to C.
    355         tempA(cv::Rect(0, 0, C.cols, C.rows)).copyTo(C);
    356     }
    357 
    358     IMPLEMENT_PARAM_CLASS(KSize, int)
    359     IMPLEMENT_PARAM_CLASS(Ccorr, bool)
    360 }
    361 
    362 PARAM_TEST_CASE(Convolve, cv::cuda::DeviceInfo, cv::Size, KSize, Ccorr)
    363 {
    364     cv::cuda::DeviceInfo devInfo;
    365     cv::Size size;
    366     int ksize;
    367     bool ccorr;
    368 
    369     virtual void SetUp()
    370     {
    371         devInfo = GET_PARAM(0);
    372         size = GET_PARAM(1);
    373         ksize = GET_PARAM(2);
    374         ccorr = GET_PARAM(3);
    375 
    376         cv::cuda::setDevice(devInfo.deviceID());
    377     }
    378 };
    379 
    380 CUDA_TEST_P(Convolve, Accuracy)
    381 {
    382     cv::Mat src = randomMat(size, CV_32FC1, 0.0, 100.0);
    383     cv::Mat kernel = randomMat(cv::Size(ksize, ksize), CV_32FC1, 0.0, 1.0);
    384 
    385     cv::Ptr<cv::cuda::Convolution> conv = cv::cuda::createConvolution();
    386 
    387     cv::cuda::GpuMat dst;
    388     conv->convolve(loadMat(src), loadMat(kernel), dst, ccorr);
    389 
    390     cv::Mat dst_gold;
    391     convolveDFT(src, kernel, dst_gold, ccorr);
    392 
    393     EXPECT_MAT_NEAR(dst, dst_gold, 1e-1);
    394 }
    395 
    396 INSTANTIATE_TEST_CASE_P(CUDA_Arithm, Convolve, testing::Combine(
    397     ALL_DEVICES,
    398     DIFFERENT_SIZES,
    399     testing::Values(KSize(3), KSize(7), KSize(11), KSize(17), KSize(19), KSize(23), KSize(45)),
    400     testing::Values(Ccorr(false), Ccorr(true))));
    401 
    402 #endif // HAVE_CUBLAS
    403 
    404 #endif // HAVE_CUDA
    405