Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 using namespace cv;
     46 using namespace cv::cuda;
     47 
     48 #if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
     49 
     50 void cv::cuda::gemm(InputArray, InputArray, double, InputArray, double, OutputArray, int, Stream&) { throw_no_cuda(); }
     51 
     52 void cv::cuda::mulSpectrums(InputArray, InputArray, OutputArray, int, bool, Stream&) { throw_no_cuda(); }
     53 void cv::cuda::mulAndScaleSpectrums(InputArray, InputArray, OutputArray, int, float, bool, Stream&) { throw_no_cuda(); }
     54 
     55 void cv::cuda::dft(InputArray, OutputArray, Size, int, Stream&) { throw_no_cuda(); }
     56 
     57 Ptr<Convolution> cv::cuda::createConvolution(Size) { throw_no_cuda(); return Ptr<Convolution>(); }
     58 
     59 #else /* !defined (HAVE_CUDA) */
     60 
     61 namespace
     62 {
     63     #define error_entry(entry)  { entry, #entry }
     64 
     65     struct ErrorEntry
     66     {
     67         int code;
     68         const char* str;
     69     };
     70 
     71     struct ErrorEntryComparer
     72     {
     73         int code;
     74         ErrorEntryComparer(int code_) : code(code_) {}
     75         bool operator()(const ErrorEntry& e) const { return e.code == code; }
     76     };
     77 
     78     String getErrorString(int code, const ErrorEntry* errors, size_t n)
     79     {
     80         size_t idx = std::find_if(errors, errors + n, ErrorEntryComparer(code)) - errors;
     81 
     82         const char* msg = (idx != n) ? errors[idx].str : "Unknown error code";
     83         String str = cv::format("%s [Code = %d]", msg, code);
     84 
     85         return str;
     86     }
     87 }
     88 
     89 #ifdef HAVE_CUBLAS
     90     namespace
     91     {
     92         const ErrorEntry cublas_errors[] =
     93         {
     94             error_entry( CUBLAS_STATUS_SUCCESS ),
     95             error_entry( CUBLAS_STATUS_NOT_INITIALIZED ),
     96             error_entry( CUBLAS_STATUS_ALLOC_FAILED ),
     97             error_entry( CUBLAS_STATUS_INVALID_VALUE ),
     98             error_entry( CUBLAS_STATUS_ARCH_MISMATCH ),
     99             error_entry( CUBLAS_STATUS_MAPPING_ERROR ),
    100             error_entry( CUBLAS_STATUS_EXECUTION_FAILED ),
    101             error_entry( CUBLAS_STATUS_INTERNAL_ERROR )
    102         };
    103 
    104         const size_t cublas_error_num = sizeof(cublas_errors) / sizeof(cublas_errors[0]);
    105 
    106         static inline void ___cublasSafeCall(cublasStatus_t err, const char* file, const int line, const char* func)
    107         {
    108             if (CUBLAS_STATUS_SUCCESS != err)
    109             {
    110                 String msg = getErrorString(err, cublas_errors, cublas_error_num);
    111                 cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
    112             }
    113         }
    114     }
    115 
    116     #define cublasSafeCall(expr)  ___cublasSafeCall(expr, __FILE__, __LINE__, CV_Func)
    117 #endif // HAVE_CUBLAS
    118 
    119 #ifdef HAVE_CUFFT
    120     namespace
    121     {
    122         //////////////////////////////////////////////////////////////////////////
    123         // CUFFT errors
    124 
    125         const ErrorEntry cufft_errors[] =
    126         {
    127             error_entry( CUFFT_INVALID_PLAN ),
    128             error_entry( CUFFT_ALLOC_FAILED ),
    129             error_entry( CUFFT_INVALID_TYPE ),
    130             error_entry( CUFFT_INVALID_VALUE ),
    131             error_entry( CUFFT_INTERNAL_ERROR ),
    132             error_entry( CUFFT_EXEC_FAILED ),
    133             error_entry( CUFFT_SETUP_FAILED ),
    134             error_entry( CUFFT_INVALID_SIZE ),
    135             error_entry( CUFFT_UNALIGNED_DATA )
    136         };
    137 
    138         const int cufft_error_num = sizeof(cufft_errors) / sizeof(cufft_errors[0]);
    139 
    140         void ___cufftSafeCall(int err, const char* file, const int line, const char* func)
    141         {
    142             if (CUFFT_SUCCESS != err)
    143             {
    144                 String msg = getErrorString(err, cufft_errors, cufft_error_num);
    145                 cv::error(cv::Error::GpuApiCallError, msg, func, file, line);
    146             }
    147         }
    148     }
    149 
    150     #define cufftSafeCall(expr)  ___cufftSafeCall(expr, __FILE__, __LINE__, CV_Func)
    151 
    152 #endif
    153 
    154 ////////////////////////////////////////////////////////////////////////
    155 // gemm
    156 
    157 void cv::cuda::gemm(InputArray _src1, InputArray _src2, double alpha, InputArray _src3, double beta, OutputArray _dst, int flags, Stream& stream)
    158 {
    159 #ifndef HAVE_CUBLAS
    160     (void) _src1;
    161     (void) _src2;
    162     (void) alpha;
    163     (void) _src3;
    164     (void) beta;
    165     (void) _dst;
    166     (void) flags;
    167     (void) stream;
    168     CV_Error(Error::StsNotImplemented, "The library was build without CUBLAS");
    169 #else
    170     // CUBLAS works with column-major matrices
    171 
    172     GpuMat src1 = getInputMat(_src1, stream);
    173     GpuMat src2 = getInputMat(_src2, stream);
    174     GpuMat src3 = getInputMat(_src3, stream);
    175 
    176     CV_Assert( src1.type() == CV_32FC1 || src1.type() == CV_32FC2 || src1.type() == CV_64FC1 || src1.type() == CV_64FC2 );
    177     CV_Assert( src2.type() == src1.type() && (src3.empty() || src3.type() == src1.type()) );
    178 
    179     if (src1.depth() == CV_64F)
    180     {
    181         if (!deviceSupports(NATIVE_DOUBLE))
    182             CV_Error(cv::Error::StsUnsupportedFormat, "The device doesn't support double");
    183     }
    184 
    185     bool tr1 = (flags & GEMM_1_T) != 0;
    186     bool tr2 = (flags & GEMM_2_T) != 0;
    187     bool tr3 = (flags & GEMM_3_T) != 0;
    188 
    189     if (src1.type() == CV_64FC2)
    190     {
    191         if (tr1 || tr2 || tr3)
    192             CV_Error(cv::Error::StsNotImplemented, "transpose operation doesn't implemented for CV_64FC2 type");
    193     }
    194 
    195     Size src1Size = tr1 ? Size(src1.rows, src1.cols) : src1.size();
    196     Size src2Size = tr2 ? Size(src2.rows, src2.cols) : src2.size();
    197     Size src3Size = tr3 ? Size(src3.rows, src3.cols) : src3.size();
    198     Size dstSize(src2Size.width, src1Size.height);
    199 
    200     CV_Assert( src1Size.width == src2Size.height );
    201     CV_Assert( src3.empty() || src3Size == dstSize );
    202 
    203     GpuMat dst = getOutputMat(_dst, dstSize, src1.type(), stream);
    204 
    205     if (beta != 0)
    206     {
    207         if (src3.empty())
    208         {
    209             dst.setTo(Scalar::all(0), stream);
    210         }
    211         else
    212         {
    213             if (tr3)
    214             {
    215                 cuda::transpose(src3, dst, stream);
    216             }
    217             else
    218             {
    219                 src3.copyTo(dst, stream);
    220             }
    221         }
    222     }
    223 
    224     cublasHandle_t handle;
    225     cublasSafeCall( cublasCreate_v2(&handle) );
    226 
    227     cublasSafeCall( cublasSetStream_v2(handle, StreamAccessor::getStream(stream)) );
    228 
    229     cublasSafeCall( cublasSetPointerMode_v2(handle, CUBLAS_POINTER_MODE_HOST) );
    230 
    231     const float alphaf = static_cast<float>(alpha);
    232     const float betaf = static_cast<float>(beta);
    233 
    234     const cuComplex alphacf = make_cuComplex(alphaf, 0);
    235     const cuComplex betacf = make_cuComplex(betaf, 0);
    236 
    237     const cuDoubleComplex alphac = make_cuDoubleComplex(alpha, 0);
    238     const cuDoubleComplex betac = make_cuDoubleComplex(beta, 0);
    239 
    240     cublasOperation_t transa = tr2 ? CUBLAS_OP_T : CUBLAS_OP_N;
    241     cublasOperation_t transb = tr1 ? CUBLAS_OP_T : CUBLAS_OP_N;
    242 
    243     switch (src1.type())
    244     {
    245     case CV_32FC1:
    246         cublasSafeCall( cublasSgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
    247             &alphaf,
    248             src2.ptr<float>(), static_cast<int>(src2.step / sizeof(float)),
    249             src1.ptr<float>(), static_cast<int>(src1.step / sizeof(float)),
    250             &betaf,
    251             dst.ptr<float>(), static_cast<int>(dst.step / sizeof(float))) );
    252         break;
    253 
    254     case CV_64FC1:
    255         cublasSafeCall( cublasDgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
    256             &alpha,
    257             src2.ptr<double>(), static_cast<int>(src2.step / sizeof(double)),
    258             src1.ptr<double>(), static_cast<int>(src1.step / sizeof(double)),
    259             &beta,
    260             dst.ptr<double>(), static_cast<int>(dst.step / sizeof(double))) );
    261         break;
    262 
    263     case CV_32FC2:
    264         cublasSafeCall( cublasCgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
    265             &alphacf,
    266             src2.ptr<cuComplex>(), static_cast<int>(src2.step / sizeof(cuComplex)),
    267             src1.ptr<cuComplex>(), static_cast<int>(src1.step / sizeof(cuComplex)),
    268             &betacf,
    269             dst.ptr<cuComplex>(), static_cast<int>(dst.step / sizeof(cuComplex))) );
    270         break;
    271 
    272     case CV_64FC2:
    273         cublasSafeCall( cublasZgemm_v2(handle, transa, transb, tr2 ? src2.rows : src2.cols, tr1 ? src1.cols : src1.rows, tr2 ? src2.cols : src2.rows,
    274             &alphac,
    275             src2.ptr<cuDoubleComplex>(), static_cast<int>(src2.step / sizeof(cuDoubleComplex)),
    276             src1.ptr<cuDoubleComplex>(), static_cast<int>(src1.step / sizeof(cuDoubleComplex)),
    277             &betac,
    278             dst.ptr<cuDoubleComplex>(), static_cast<int>(dst.step / sizeof(cuDoubleComplex))) );
    279         break;
    280     }
    281 
    282     cublasSafeCall( cublasDestroy_v2(handle) );
    283 
    284     syncOutput(dst, _dst, stream);
    285 #endif
    286 }
    287 
    288 //////////////////////////////////////////////////////////////////////////////
    289 // dft
    290 
    291 void cv::cuda::dft(InputArray _src, OutputArray _dst, Size dft_size, int flags, Stream& stream)
    292 {
    293 #ifndef HAVE_CUFFT
    294     (void) _src;
    295     (void) _dst;
    296     (void) dft_size;
    297     (void) flags;
    298     (void) stream;
    299     throw_no_cuda();
    300 #else
    301     GpuMat src = getInputMat(_src, stream);
    302 
    303     CV_Assert( src.type() == CV_32FC1 || src.type() == CV_32FC2 );
    304 
    305     // We don't support unpacked output (in the case of real input)
    306     CV_Assert( !(flags & DFT_COMPLEX_OUTPUT) );
    307 
    308     const bool is_1d_input       = (dft_size.height == 1) || (dft_size.width == 1);
    309     const bool is_row_dft        = (flags & DFT_ROWS) != 0;
    310     const bool is_scaled_dft     = (flags & DFT_SCALE) != 0;
    311     const bool is_inverse        = (flags & DFT_INVERSE) != 0;
    312     const bool is_complex_input  = src.channels() == 2;
    313     const bool is_complex_output = !(flags & DFT_REAL_OUTPUT);
    314 
    315     // We don't support real-to-real transform
    316     CV_Assert( is_complex_input || is_complex_output );
    317 
    318     // Make sure here we work with the continuous input,
    319     // as CUFFT can't handle gaps
    320     GpuMat src_cont;
    321     if (src.isContinuous())
    322     {
    323         src_cont = src;
    324     }
    325     else
    326     {
    327         BufferPool pool(stream);
    328         src_cont.allocator = pool.getAllocator();
    329         createContinuous(src.rows, src.cols, src.type(), src_cont);
    330         src.copyTo(src_cont, stream);
    331     }
    332 
    333     Size dft_size_opt = dft_size;
    334     if (is_1d_input && !is_row_dft)
    335     {
    336         // If the source matrix is single column handle it as single row
    337         dft_size_opt.width = std::max(dft_size.width, dft_size.height);
    338         dft_size_opt.height = std::min(dft_size.width, dft_size.height);
    339     }
    340 
    341     CV_Assert( dft_size_opt.width > 1 );
    342 
    343     cufftType dft_type = CUFFT_R2C;
    344     if (is_complex_input)
    345         dft_type = is_complex_output ? CUFFT_C2C : CUFFT_C2R;
    346 
    347     cufftHandle plan;
    348     if (is_1d_input || is_row_dft)
    349         cufftSafeCall( cufftPlan1d(&plan, dft_size_opt.width, dft_type, dft_size_opt.height) );
    350     else
    351         cufftSafeCall( cufftPlan2d(&plan, dft_size_opt.height, dft_size_opt.width, dft_type) );
    352 
    353     cufftSafeCall( cufftSetStream(plan, StreamAccessor::getStream(stream)) );
    354 
    355     if (is_complex_input)
    356     {
    357         if (is_complex_output)
    358         {
    359             createContinuous(dft_size, CV_32FC2, _dst);
    360             GpuMat dst = _dst.getGpuMat();
    361 
    362             cufftSafeCall(cufftExecC2C(
    363                     plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftComplex>(),
    364                     is_inverse ? CUFFT_INVERSE : CUFFT_FORWARD));
    365         }
    366         else
    367         {
    368             createContinuous(dft_size, CV_32F, _dst);
    369             GpuMat dst = _dst.getGpuMat();
    370 
    371             cufftSafeCall(cufftExecC2R(
    372                     plan, src_cont.ptr<cufftComplex>(), dst.ptr<cufftReal>()));
    373         }
    374     }
    375     else
    376     {
    377         // We could swap dft_size for efficiency. Here we must reflect it
    378         if (dft_size == dft_size_opt)
    379             createContinuous(Size(dft_size.width / 2 + 1, dft_size.height), CV_32FC2, _dst);
    380         else
    381             createContinuous(Size(dft_size.width, dft_size.height / 2 + 1), CV_32FC2, _dst);
    382 
    383         GpuMat dst = _dst.getGpuMat();
    384 
    385         cufftSafeCall(cufftExecR2C(
    386                 plan, src_cont.ptr<cufftReal>(), dst.ptr<cufftComplex>()));
    387     }
    388 
    389     cufftSafeCall( cufftDestroy(plan) );
    390 
    391     if (is_scaled_dft)
    392         cuda::multiply(_dst, Scalar::all(1. / dft_size.area()), _dst, 1, -1, stream);
    393 
    394 #endif
    395 }
    396 
    397 //////////////////////////////////////////////////////////////////////////////
    398 // Convolution
    399 
    400 #ifdef HAVE_CUFFT
    401 
    402 namespace
    403 {
    404     class ConvolutionImpl : public Convolution
    405     {
    406     public:
    407         explicit ConvolutionImpl(Size user_block_size_) : user_block_size(user_block_size_) {}
    408 
    409         void convolve(InputArray image, InputArray templ, OutputArray result, bool ccorr = false, Stream& stream = Stream::Null());
    410 
    411     private:
    412         void create(Size image_size, Size templ_size);
    413         static Size estimateBlockSize(Size result_size);
    414 
    415         Size result_size;
    416         Size block_size;
    417         Size user_block_size;
    418         Size dft_size;
    419         int spect_len;
    420 
    421         GpuMat image_spect, templ_spect, result_spect;
    422         GpuMat image_block, templ_block, result_data;
    423     };
    424 
    425     void ConvolutionImpl::create(Size image_size, Size templ_size)
    426     {
    427         result_size = Size(image_size.width - templ_size.width + 1,
    428                            image_size.height - templ_size.height + 1);
    429 
    430         block_size = user_block_size;
    431         if (user_block_size.width == 0 || user_block_size.height == 0)
    432             block_size = estimateBlockSize(result_size);
    433 
    434         dft_size.width = 1 << int(ceil(std::log(block_size.width + templ_size.width - 1.) / std::log(2.)));
    435         dft_size.height = 1 << int(ceil(std::log(block_size.height + templ_size.height - 1.) / std::log(2.)));
    436 
    437         // CUFFT has hard-coded kernels for power-of-2 sizes (up to 8192),
    438         // see CUDA Toolkit 4.1 CUFFT Library Programming Guide
    439         if (dft_size.width > 8192)
    440             dft_size.width = getOptimalDFTSize(block_size.width + templ_size.width - 1);
    441         if (dft_size.height > 8192)
    442             dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
    443 
    444         // To avoid wasting time doing small DFTs
    445         dft_size.width = std::max(dft_size.width, 512);
    446         dft_size.height = std::max(dft_size.height, 512);
    447 
    448         createContinuous(dft_size, CV_32F, image_block);
    449         createContinuous(dft_size, CV_32F, templ_block);
    450         createContinuous(dft_size, CV_32F, result_data);
    451 
    452         spect_len = dft_size.height * (dft_size.width / 2 + 1);
    453         createContinuous(1, spect_len, CV_32FC2, image_spect);
    454         createContinuous(1, spect_len, CV_32FC2, templ_spect);
    455         createContinuous(1, spect_len, CV_32FC2, result_spect);
    456 
    457         // Use maximum result matrix block size for the estimated DFT block size
    458         block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
    459         block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
    460     }
    461 
    462     Size ConvolutionImpl::estimateBlockSize(Size result_size)
    463     {
    464         int width = (result_size.width + 2) / 3;
    465         int height = (result_size.height + 2) / 3;
    466         width = std::min(width, result_size.width);
    467         height = std::min(height, result_size.height);
    468         return Size(width, height);
    469     }
    470 
    471     void ConvolutionImpl::convolve(InputArray _image, InputArray _templ, OutputArray _result, bool ccorr, Stream& _stream)
    472     {
    473         GpuMat image = getInputMat(_image, _stream);
    474         GpuMat templ = getInputMat(_templ, _stream);
    475 
    476         CV_Assert( image.type() == CV_32FC1 );
    477         CV_Assert( templ.type() == CV_32FC1 );
    478 
    479         create(image.size(), templ.size());
    480 
    481         GpuMat result = getOutputMat(_result, result_size, CV_32FC1, _stream);
    482 
    483         cudaStream_t stream = StreamAccessor::getStream(_stream);
    484 
    485         cufftHandle planR2C, planC2R;
    486         cufftSafeCall( cufftPlan2d(&planC2R, dft_size.height, dft_size.width, CUFFT_C2R) );
    487         cufftSafeCall( cufftPlan2d(&planR2C, dft_size.height, dft_size.width, CUFFT_R2C) );
    488 
    489         cufftSafeCall( cufftSetStream(planR2C, stream) );
    490         cufftSafeCall( cufftSetStream(planC2R, stream) );
    491 
    492         GpuMat templ_roi(templ.size(), CV_32FC1, templ.data, templ.step);
    493         cuda::copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
    494                             templ_block.cols - templ_roi.cols, 0, Scalar(), _stream);
    495 
    496         cufftSafeCall( cufftExecR2C(planR2C, templ_block.ptr<cufftReal>(), templ_spect.ptr<cufftComplex>()) );
    497 
    498         // Process all blocks of the result matrix
    499         for (int y = 0; y < result.rows; y += block_size.height)
    500         {
    501             for (int x = 0; x < result.cols; x += block_size.width)
    502             {
    503                 Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
    504                                     std::min(y + dft_size.height, image.rows) - y);
    505                 GpuMat image_roi(image_roi_size, CV_32F, (void*)(image.ptr<float>(y) + x),
    506                                  image.step);
    507                 cuda::copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
    508                                     0, image_block.cols - image_roi.cols, 0, Scalar(), _stream);
    509 
    510                 cufftSafeCall(cufftExecR2C(planR2C, image_block.ptr<cufftReal>(),
    511                                            image_spect.ptr<cufftComplex>()));
    512                 cuda::mulAndScaleSpectrums(image_spect, templ_spect, result_spect, 0,
    513                                           1.f / dft_size.area(), ccorr, _stream);
    514                 cufftSafeCall(cufftExecC2R(planC2R, result_spect.ptr<cufftComplex>(),
    515                                            result_data.ptr<cufftReal>()));
    516 
    517                 Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
    518                                      std::min(y + block_size.height, result.rows) - y);
    519                 GpuMat result_roi(result_roi_size, result.type(),
    520                                   (void*)(result.ptr<float>(y) + x), result.step);
    521                 GpuMat result_block(result_roi_size, result_data.type(),
    522                                     result_data.ptr(), result_data.step);
    523 
    524                 result_block.copyTo(result_roi, _stream);
    525             }
    526         }
    527 
    528         cufftSafeCall( cufftDestroy(planR2C) );
    529         cufftSafeCall( cufftDestroy(planC2R) );
    530 
    531         syncOutput(result, _result, _stream);
    532     }
    533 }
    534 
    535 #endif
    536 
    537 Ptr<Convolution> cv::cuda::createConvolution(Size user_block_size)
    538 {
    539 #ifndef HAVE_CUFFT
    540     (void) user_block_size;
    541     CV_Error(Error::StsNotImplemented, "The library was build without CUFFT");
    542     return Ptr<Convolution>();
    543 #else
    544     return makePtr<ConvolutionImpl>(user_block_size);
    545 #endif
    546 }
    547 
    548 #endif /* !defined (HAVE_CUDA) */
    549