Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                           License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved.
     14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved.
     15 // Third party copyrights are property of their respective owners.
     16 //
     17 // Redistribution and use in source and binary forms, with or without modification,
     18 // are permitted provided that the following conditions are met:
     19 //
     20 //   * Redistribution's of source code must retain the above copyright notice,
     21 //     this list of conditions and the following disclaimer.
     22 //
     23 //   * Redistribution's in binary form must reproduce the above copyright notice,
     24 //     this list of conditions and the following disclaimer in the documentation
     25 //     and/or other materials provided with the distribution.
     26 //
     27 //   * The name of the copyright holders may not be used to endorse or promote products
     28 //     derived from this software without specific prior written permission.
     29 //
     30 // This software is provided by the copyright holders and contributors "as is" and
     31 // any express or implied warranties, including, but not limited to, the implied
     32 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     33 // In no event shall the Intel Corporation or contributors be liable for any direct,
     34 // indirect, incidental, special, exemplary, or consequential damages
     35 // (including, but not limited to, procurement of substitute goods or services;
     36 // loss of use, data, or profits; or business interruption) however caused
     37 // and on any theory of liability, whether in contract, strict liability,
     38 // or tort (including negligence or otherwise) arising in any way out of
     39 // the use of this software, even if advised of the possibility of such damage.
     40 //
     41 //M*/
     42 
     43 #include "precomp.hpp"
     44 
     45 using namespace cv;
     46 using namespace cv::cuda;
     47 
     48 #if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDAARITHM) || defined (CUDA_DISABLER)
     49 
     50 Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int, int, Size) { throw_no_cuda(); return Ptr<cuda::TemplateMatching>(); }
     51 
     52 #else
     53 
     54 namespace cv { namespace cuda { namespace device
     55 {
     56     namespace match_template
     57     {
     58         void matchTemplateNaive_CCORR_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
     59         void matchTemplateNaive_CCORR_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
     60 
     61         void matchTemplateNaive_SQDIFF_8U(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
     62         void matchTemplateNaive_SQDIFF_32F(const PtrStepSzb image, const PtrStepSzb templ, PtrStepSzf result, int cn, cudaStream_t stream);
     63 
     64         void matchTemplatePrepared_SQDIFF_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
     65             int cn, cudaStream_t stream);
     66 
     67         void matchTemplatePrepared_SQDIFF_NORMED_8U(int w, int h, const PtrStepSz<double> image_sqsum, double templ_sqsum, PtrStepSzf result,
     68             int cn, cudaStream_t stream);
     69 
     70         void matchTemplatePrepared_CCOFF_8U(int w, int h, const PtrStepSz<int> image_sum, int templ_sum, PtrStepSzf result, cudaStream_t stream);
     71         void matchTemplatePrepared_CCOFF_8UC2(
     72             int w, int h,
     73             const PtrStepSz<int> image_sum_r,
     74             const PtrStepSz<int> image_sum_g,
     75             int templ_sum_r,
     76             int templ_sum_g,
     77             PtrStepSzf result, cudaStream_t stream);
     78         void matchTemplatePrepared_CCOFF_8UC3(
     79                 int w, int h,
     80                 const PtrStepSz<int> image_sum_r,
     81                 const PtrStepSz<int> image_sum_g,
     82                 const PtrStepSz<int> image_sum_b,
     83                 int templ_sum_r,
     84                 int templ_sum_g,
     85                 int templ_sum_b,
     86                 PtrStepSzf result, cudaStream_t stream);
     87         void matchTemplatePrepared_CCOFF_8UC4(
     88                 int w, int h,
     89                 const PtrStepSz<int> image_sum_r,
     90                 const PtrStepSz<int> image_sum_g,
     91                 const PtrStepSz<int> image_sum_b,
     92                 const PtrStepSz<int> image_sum_a,
     93                 int templ_sum_r,
     94                 int templ_sum_g,
     95                 int templ_sum_b,
     96                 int templ_sum_a,
     97                 PtrStepSzf result, cudaStream_t stream);
     98 
     99 
    100         void matchTemplatePrepared_CCOFF_NORMED_8U(
    101                 int w, int h, const PtrStepSz<int> image_sum,
    102                 const PtrStepSz<double> image_sqsum,
    103                 int templ_sum, double templ_sqsum,
    104                 PtrStepSzf result, cudaStream_t stream);
    105         void matchTemplatePrepared_CCOFF_NORMED_8UC2(
    106                 int w, int h,
    107                 const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
    108                 const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
    109                 int templ_sum_r, double templ_sqsum_r,
    110                 int templ_sum_g, double templ_sqsum_g,
    111                 PtrStepSzf result, cudaStream_t stream);
    112         void matchTemplatePrepared_CCOFF_NORMED_8UC3(
    113                 int w, int h,
    114                 const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
    115                 const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
    116                 const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
    117                 int templ_sum_r, double templ_sqsum_r,
    118                 int templ_sum_g, double templ_sqsum_g,
    119                 int templ_sum_b, double templ_sqsum_b,
    120                 PtrStepSzf result, cudaStream_t stream);
    121         void matchTemplatePrepared_CCOFF_NORMED_8UC4(
    122                 int w, int h,
    123                 const PtrStepSz<int> image_sum_r, const PtrStepSz<double> image_sqsum_r,
    124                 const PtrStepSz<int> image_sum_g, const PtrStepSz<double> image_sqsum_g,
    125                 const PtrStepSz<int> image_sum_b, const PtrStepSz<double> image_sqsum_b,
    126                 const PtrStepSz<int> image_sum_a, const PtrStepSz<double> image_sqsum_a,
    127                 int templ_sum_r, double templ_sqsum_r,
    128                 int templ_sum_g, double templ_sqsum_g,
    129                 int templ_sum_b, double templ_sqsum_b,
    130                 int templ_sum_a, double templ_sqsum_a,
    131                 PtrStepSzf result, cudaStream_t stream);
    132 
    133         void normalize_8U(int w, int h, const PtrStepSz<double> image_sqsum,
    134                           double templ_sqsum, PtrStepSzf result, int cn, cudaStream_t stream);
    135 
    136         void extractFirstChannel_32F(const PtrStepSzb image, PtrStepSzf result, int cn, cudaStream_t stream);
    137     }
    138 }}}
    139 
    140 namespace
    141 {
    142     // Evaluates optimal template's area threshold. If
    143     // template's area is less  than the threshold, we use naive match
    144     // template version, otherwise FFT-based (if available)
    145     int getTemplateThreshold(int method, int depth)
    146     {
    147         switch (method)
    148         {
    149         case TM_CCORR:
    150             if (depth == CV_32F) return 250;
    151             if (depth == CV_8U) return 300;
    152             break;
    153 
    154         case TM_SQDIFF:
    155             if (depth == CV_8U) return 300;
    156             break;
    157         }
    158 
    159         CV_Error(Error::StsBadArg, "unsupported match template mode");
    160         return 0;
    161     }
    162 
    163     ///////////////////////////////////////////////////////////////
    164     // CCORR_32F
    165 
    166     class Match_CCORR_32F : public TemplateMatching
    167     {
    168     public:
    169         explicit Match_CCORR_32F(Size user_block_size);
    170 
    171         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    172 
    173     private:
    174         Ptr<cuda::Convolution> conv_;
    175         GpuMat result_;
    176     };
    177 
    178     Match_CCORR_32F::Match_CCORR_32F(Size user_block_size)
    179     {
    180         conv_ = cuda::createConvolution(user_block_size);
    181     }
    182 
    183     void Match_CCORR_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& _stream)
    184     {
    185         using namespace cv::cuda::device::match_template;
    186 
    187         GpuMat image = _image.getGpuMat();
    188         GpuMat templ = _templ.getGpuMat();
    189 
    190         CV_Assert( image.depth() == CV_32F );
    191         CV_Assert( image.type() == templ.type() );
    192         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    193 
    194         cudaStream_t stream = StreamAccessor::getStream(_stream);
    195 
    196         _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    197         GpuMat result = _result.getGpuMat();
    198 
    199         if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_32F))
    200         {
    201             matchTemplateNaive_CCORR_32F(image, templ, result, image.channels(), stream);
    202             return;
    203         }
    204 
    205         if (image.channels() == 1)
    206         {
    207             conv_->convolve(image.reshape(1), templ.reshape(1), result, true, _stream);
    208         }
    209         else
    210         {
    211             conv_->convolve(image.reshape(1), templ.reshape(1), result_, true, _stream);
    212             extractFirstChannel_32F(result_, result, image.channels(), stream);
    213         }
    214     }
    215 
    216     ///////////////////////////////////////////////////////////////
    217     // CCORR_8U
    218 
    219     class Match_CCORR_8U : public TemplateMatching
    220     {
    221     public:
    222         explicit Match_CCORR_8U(Size user_block_size) : match32F_(user_block_size)
    223         {
    224         }
    225 
    226         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    227 
    228     private:
    229         GpuMat imagef_, templf_;
    230         Match_CCORR_32F match32F_;
    231     };
    232 
    233     void Match_CCORR_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
    234     {
    235         using namespace cv::cuda::device::match_template;
    236 
    237         GpuMat image = _image.getGpuMat();
    238         GpuMat templ = _templ.getGpuMat();
    239 
    240         CV_Assert( image.depth() == CV_8U );
    241         CV_Assert( image.type() == templ.type() );
    242         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    243 
    244         if (templ.size().area() < getTemplateThreshold(TM_CCORR, CV_8U))
    245         {
    246             _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    247             GpuMat result = _result.getGpuMat();
    248 
    249             matchTemplateNaive_CCORR_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
    250             return;
    251         }
    252 
    253         image.convertTo(imagef_, CV_32F, stream);
    254         templ.convertTo(templf_, CV_32F, stream);
    255 
    256         match32F_.match(imagef_, templf_, _result, stream);
    257     }
    258 
    259     ///////////////////////////////////////////////////////////////
    260     // CCORR_NORMED_8U
    261 
    262     class Match_CCORR_NORMED_8U : public TemplateMatching
    263     {
    264     public:
    265         explicit Match_CCORR_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
    266         {
    267         }
    268 
    269         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    270 
    271     private:
    272         Match_CCORR_8U match_CCORR_;
    273         GpuMat image_sqsums_;
    274     };
    275 
    276     void Match_CCORR_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
    277     {
    278         using namespace cv::cuda::device::match_template;
    279 
    280         GpuMat image = _image.getGpuMat();
    281         GpuMat templ = _templ.getGpuMat();
    282 
    283         CV_Assert( image.depth() == CV_8U );
    284         CV_Assert( image.type() == templ.type() );
    285         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    286 
    287         match_CCORR_.match(image, templ, _result, stream);
    288         GpuMat result = _result.getGpuMat();
    289 
    290         cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
    291 
    292         double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
    293 
    294         normalize_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
    295     }
    296 
    297     ///////////////////////////////////////////////////////////////
    298     // SQDIFF_32F
    299 
    300     class Match_SQDIFF_32F : public TemplateMatching
    301     {
    302     public:
    303         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    304     };
    305 
    306     void Match_SQDIFF_32F::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
    307     {
    308         using namespace cv::cuda::device::match_template;
    309 
    310         GpuMat image = _image.getGpuMat();
    311         GpuMat templ = _templ.getGpuMat();
    312 
    313         CV_Assert( image.depth() == CV_32F );
    314         CV_Assert( image.type() == templ.type() );
    315         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    316 
    317         _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    318         GpuMat result = _result.getGpuMat();
    319 
    320         matchTemplateNaive_SQDIFF_32F(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
    321     }
    322 
    323     ///////////////////////////////////////////////////////////////
    324     // SQDIFF_8U
    325 
    326     class Match_SQDIFF_8U : public TemplateMatching
    327     {
    328     public:
    329         explicit Match_SQDIFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
    330         {
    331         }
    332 
    333         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    334 
    335     private:
    336         GpuMat image_sqsums_;
    337         Match_CCORR_8U match_CCORR_;
    338     };
    339 
    340     void Match_SQDIFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
    341     {
    342         using namespace cv::cuda::device::match_template;
    343 
    344         GpuMat image = _image.getGpuMat();
    345         GpuMat templ = _templ.getGpuMat();
    346 
    347         CV_Assert( image.depth() == CV_8U );
    348         CV_Assert( image.type() == templ.type() );
    349         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    350 
    351         if (templ.size().area() < getTemplateThreshold(TM_SQDIFF, CV_8U))
    352         {
    353             _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    354             GpuMat result = _result.getGpuMat();
    355 
    356             matchTemplateNaive_SQDIFF_8U(image, templ, result, image.channels(), StreamAccessor::getStream(stream));
    357             return;
    358         }
    359 
    360         cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
    361 
    362         double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
    363 
    364         match_CCORR_.match(image, templ, _result, stream);
    365         GpuMat result = _result.getGpuMat();
    366 
    367         matchTemplatePrepared_SQDIFF_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
    368     }
    369 
    370     ///////////////////////////////////////////////////////////////
    371     // SQDIFF_NORMED_8U
    372 
    373     class Match_SQDIFF_NORMED_8U : public TemplateMatching
    374     {
    375     public:
    376         explicit Match_SQDIFF_NORMED_8U(Size user_block_size) : match_CCORR_(user_block_size)
    377         {
    378         }
    379 
    380         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    381 
    382     private:
    383         GpuMat image_sqsums_;
    384         Match_CCORR_8U match_CCORR_;
    385     };
    386 
    387     void Match_SQDIFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
    388     {
    389         using namespace cv::cuda::device::match_template;
    390 
    391         GpuMat image = _image.getGpuMat();
    392         GpuMat templ = _templ.getGpuMat();
    393 
    394         CV_Assert( image.depth() == CV_8U );
    395         CV_Assert( image.type() == templ.type() );
    396         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    397 
    398         cuda::sqrIntegral(image.reshape(1), image_sqsums_, stream);
    399 
    400         double templ_sqsum = cuda::sqrSum(templ.reshape(1))[0];
    401 
    402         match_CCORR_.match(image, templ, _result, stream);
    403         GpuMat result = _result.getGpuMat();
    404 
    405         matchTemplatePrepared_SQDIFF_NORMED_8U(templ.cols, templ.rows, image_sqsums_, templ_sqsum, result, image.channels(), StreamAccessor::getStream(stream));
    406     }
    407 
    408     ///////////////////////////////////////////////////////////////
    409     // CCOFF_8U
    410 
    411     class Match_CCOEFF_8U : public TemplateMatching
    412     {
    413     public:
    414         explicit Match_CCOEFF_8U(Size user_block_size) : match_CCORR_(user_block_size)
    415         {
    416         }
    417 
    418         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    419 
    420     private:
    421         std::vector<GpuMat> images_;
    422         std::vector<GpuMat> image_sums_;
    423         Match_CCORR_8U match_CCORR_;
    424     };
    425 
    426     void Match_CCOEFF_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
    427     {
    428         using namespace cv::cuda::device::match_template;
    429 
    430         GpuMat image = _image.getGpuMat();
    431         GpuMat templ = _templ.getGpuMat();
    432 
    433         CV_Assert( image.depth() == CV_8U );
    434         CV_Assert( image.type() == templ.type() );
    435         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    436 
    437         match_CCORR_.match(image, templ, _result, stream);
    438         GpuMat result = _result.getGpuMat();
    439 
    440         if (image.channels() == 1)
    441         {
    442             image_sums_.resize(1);
    443             cuda::integral(image, image_sums_[0], stream);
    444 
    445             int templ_sum = (int) cuda::sum(templ)[0];
    446 
    447             matchTemplatePrepared_CCOFF_8U(templ.cols, templ.rows, image_sums_[0], templ_sum, result, StreamAccessor::getStream(stream));
    448         }
    449         else
    450         {
    451             cuda::split(image, images_);
    452 
    453             image_sums_.resize(images_.size());
    454             for (int i = 0; i < image.channels(); ++i)
    455                 cuda::integral(images_[i], image_sums_[i], stream);
    456 
    457             Scalar templ_sum = cuda::sum(templ);
    458 
    459             switch (image.channels())
    460             {
    461             case 2:
    462                 matchTemplatePrepared_CCOFF_8UC2(
    463                         templ.cols, templ.rows, image_sums_[0], image_sums_[1],
    464                         (int) templ_sum[0], (int) templ_sum[1],
    465                         result, StreamAccessor::getStream(stream));
    466                 break;
    467             case 3:
    468                 matchTemplatePrepared_CCOFF_8UC3(
    469                         templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2],
    470                         (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2],
    471                         result, StreamAccessor::getStream(stream));
    472                 break;
    473             case 4:
    474                 matchTemplatePrepared_CCOFF_8UC4(
    475                         templ.cols, templ.rows, image_sums_[0], image_sums_[1], image_sums_[2], image_sums_[3],
    476                         (int) templ_sum[0], (int) templ_sum[1], (int) templ_sum[2], (int) templ_sum[3],
    477                         result, StreamAccessor::getStream(stream));
    478                 break;
    479             default:
    480                 CV_Error(Error::StsBadArg, "unsupported number of channels");
    481             }
    482         }
    483     }
    484 
    485     ///////////////////////////////////////////////////////////////
    486     // CCOFF_NORMED_8U
    487 
    488     class Match_CCOEFF_NORMED_8U : public TemplateMatching
    489     {
    490     public:
    491         explicit Match_CCOEFF_NORMED_8U(Size user_block_size) : match_CCORR_32F_(user_block_size)
    492         {
    493         }
    494 
    495         void match(InputArray image, InputArray templ, OutputArray result, Stream& stream = Stream::Null());
    496 
    497     private:
    498         GpuMat imagef_, templf_;
    499         Match_CCORR_32F match_CCORR_32F_;
    500         std::vector<GpuMat> images_;
    501         std::vector<GpuMat> image_sums_;
    502         std::vector<GpuMat> image_sqsums_;
    503     };
    504 
    505     void Match_CCOEFF_NORMED_8U::match(InputArray _image, InputArray _templ, OutputArray _result, Stream& stream)
    506     {
    507         using namespace cv::cuda::device::match_template;
    508 
    509         GpuMat image = _image.getGpuMat();
    510         GpuMat templ = _templ.getGpuMat();
    511 
    512         CV_Assert( image.depth() == CV_8U );
    513         CV_Assert( image.type() == templ.type() );
    514         CV_Assert( image.cols >= templ.cols && image.rows >= templ.rows );
    515 
    516         image.convertTo(imagef_, CV_32F, stream);
    517         templ.convertTo(templf_, CV_32F, stream);
    518 
    519         match_CCORR_32F_.match(imagef_, templf_, _result, stream);
    520         GpuMat result = _result.getGpuMat();
    521 
    522         if (image.channels() == 1)
    523         {
    524             image_sums_.resize(1);
    525             cuda::integral(image, image_sums_[0], stream);
    526 
    527             image_sqsums_.resize(1);
    528             cuda::sqrIntegral(image, image_sqsums_[0], stream);
    529 
    530             int templ_sum = (int) cuda::sum(templ)[0];
    531             double templ_sqsum = cuda::sqrSum(templ)[0];
    532 
    533             matchTemplatePrepared_CCOFF_NORMED_8U(
    534                     templ.cols, templ.rows, image_sums_[0], image_sqsums_[0],
    535                     templ_sum, templ_sqsum, result, StreamAccessor::getStream(stream));
    536         }
    537         else
    538         {
    539             cuda::split(image, images_);
    540 
    541             image_sums_.resize(images_.size());
    542             image_sqsums_.resize(images_.size());
    543             for (int i = 0; i < image.channels(); ++i)
    544             {
    545                 cuda::integral(images_[i], image_sums_[i], stream);
    546                 cuda::sqrIntegral(images_[i], image_sqsums_[i], stream);
    547             }
    548 
    549             Scalar templ_sum = cuda::sum(templ);
    550             Scalar templ_sqsum = cuda::sqrSum(templ);
    551 
    552             switch (image.channels())
    553             {
    554             case 2:
    555                 matchTemplatePrepared_CCOFF_NORMED_8UC2(
    556                         templ.cols, templ.rows,
    557                         image_sums_[0], image_sqsums_[0],
    558                         image_sums_[1], image_sqsums_[1],
    559                         (int)templ_sum[0], templ_sqsum[0],
    560                         (int)templ_sum[1], templ_sqsum[1],
    561                         result, StreamAccessor::getStream(stream));
    562                 break;
    563             case 3:
    564                 matchTemplatePrepared_CCOFF_NORMED_8UC3(
    565                         templ.cols, templ.rows,
    566                         image_sums_[0], image_sqsums_[0],
    567                         image_sums_[1], image_sqsums_[1],
    568                         image_sums_[2], image_sqsums_[2],
    569                         (int)templ_sum[0], templ_sqsum[0],
    570                         (int)templ_sum[1], templ_sqsum[1],
    571                         (int)templ_sum[2], templ_sqsum[2],
    572                         result, StreamAccessor::getStream(stream));
    573                 break;
    574             case 4:
    575                 matchTemplatePrepared_CCOFF_NORMED_8UC4(
    576                         templ.cols, templ.rows,
    577                         image_sums_[0], image_sqsums_[0],
    578                         image_sums_[1], image_sqsums_[1],
    579                         image_sums_[2], image_sqsums_[2],
    580                         image_sums_[3], image_sqsums_[3],
    581                         (int)templ_sum[0], templ_sqsum[0],
    582                         (int)templ_sum[1], templ_sqsum[1],
    583                         (int)templ_sum[2], templ_sqsum[2],
    584                         (int)templ_sum[3], templ_sqsum[3],
    585                         result, StreamAccessor::getStream(stream));
    586                 break;
    587             default:
    588                 CV_Error(Error::StsBadArg, "unsupported number of channels");
    589             }
    590         }
    591     }
    592 }
    593 
    594 Ptr<cuda::TemplateMatching> cv::cuda::createTemplateMatching(int srcType, int method, Size user_block_size)
    595 {
    596     const int sdepth = CV_MAT_DEPTH(srcType);
    597 
    598     CV_Assert( sdepth == CV_8U || sdepth == CV_32F );
    599 
    600     if (sdepth == CV_32F)
    601     {
    602         switch (method)
    603         {
    604         case TM_SQDIFF:
    605             return makePtr<Match_SQDIFF_32F>();
    606 
    607         case TM_CCORR:
    608             return makePtr<Match_CCORR_32F>(user_block_size);
    609 
    610         default:
    611             CV_Error( Error::StsBadFlag, "Unsopported method" );
    612             return Ptr<cuda::TemplateMatching>();
    613         }
    614     }
    615     else
    616     {
    617         switch (method)
    618         {
    619         case TM_SQDIFF:
    620             return makePtr<Match_SQDIFF_8U>(user_block_size);
    621 
    622         case TM_SQDIFF_NORMED:
    623             return makePtr<Match_SQDIFF_NORMED_8U>(user_block_size);
    624 
    625         case TM_CCORR:
    626             return makePtr<Match_CCORR_8U>(user_block_size);
    627 
    628         case TM_CCORR_NORMED:
    629             return makePtr<Match_CCORR_NORMED_8U>(user_block_size);
    630 
    631         case TM_CCOEFF:
    632             return makePtr<Match_CCOEFF_8U>(user_block_size);
    633 
    634         case TM_CCOEFF_NORMED:
    635             return makePtr<Match_CCOEFF_NORMED_8U>(user_block_size);
    636 
    637         default:
    638             CV_Error( Error::StsBadFlag, "Unsopported method" );
    639             return Ptr<cuda::TemplateMatching>();
    640         }
    641     }
    642 }
    643 
    644 #endif
    645