Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "precomp.hpp"
     43 #include "opencl_kernels_imgproc.hpp"
     44 
     45 ////////////////////////////////////////////////// matchTemplate //////////////////////////////////////////////////////////
     46 
     47 namespace cv
     48 {
     49 
     50 #ifdef HAVE_OPENCL
     51 
     52 /////////////////////////////////////////////////// CCORR //////////////////////////////////////////////////////////////
     53 
     54 enum
     55 {
     56     SUM_1 = 0, SUM_2 = 1
     57 };
     58 
     59 static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int cn)
     60 {
     61     int depth = _image.depth();
     62 
     63     ocl::Device dev = ocl::Device::getDefault();
     64     int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
     65 
     66     ocl::Kernel k("extractFirstChannel", ocl::imgproc::match_template_oclsrc, format("-D FIRST_CHANNEL -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d",
     67                                                                             ocl::typeToStr(depth), cn, pxPerWIy));
     68     if (k.empty())
     69         return false;
     70 
     71     UMat image  = _image.getUMat();
     72     UMat result = _result.getUMat();
     73 
     74 
     75     size_t globalsize[2] = {result.cols, (result.rows+pxPerWIy-1)/pxPerWIy};
     76     return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::WriteOnly(result)).run( 2, globalsize, NULL, false);
     77 }
     78 
     79 static bool sumTemplate(InputArray _src, UMat & result)
     80 {
     81     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
     82     int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
     83     size_t wgs = ocl::Device::getDefault().maxWorkGroupSize();
     84 
     85     int wgs2_aligned = 1;
     86     while (wgs2_aligned < (int)wgs)
     87         wgs2_aligned <<= 1;
     88     wgs2_aligned >>= 1;
     89 
     90     char cvt[40];
     91     ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc,
     92                   format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d",
     93                          ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn,
     94                          ocl::convertTypeStr(depth, wdepth, cn, cvt),
     95                          (int)wgs, wgs2_aligned));
     96     if (k.empty())
     97         return false;
     98 
     99     UMat src = _src.getUMat();
    100     result.create(1, 1, CV_32FC1);
    101 
    102     ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src),
    103             resarg = ocl::KernelArg::PtrWriteOnly(result);
    104 
    105     k.args(srcarg, src.cols, (int)src.total(), resarg);
    106 
    107     size_t globalsize = wgs;
    108     return k.run(1, &globalsize, &wgs, false);
    109 }
    110 
    111 static bool useNaive(Size size)
    112 {
    113     int dft_size = 18;
    114     return size.height < dft_size && size.width < dft_size;
    115 }
    116 
    117 struct ConvolveBuf
    118 {
    119     Size result_size;
    120     Size block_size;
    121     Size user_block_size;
    122     Size dft_size;
    123 
    124     UMat image_spect, templ_spect, result_spect;
    125     UMat image_block, templ_block, result_data;
    126 
    127     void create(Size image_size, Size templ_size);
    128 };
    129 
    130 void ConvolveBuf::create(Size image_size, Size templ_size)
    131 {
    132     result_size = Size(image_size.width - templ_size.width + 1,
    133                        image_size.height - templ_size.height + 1);
    134 
    135     const double blockScale = 4.5;
    136     const int minBlockSize = 256;
    137 
    138     block_size.width = cvRound(result_size.width*blockScale);
    139     block_size.width = std::max( block_size.width, minBlockSize - templ_size.width + 1 );
    140     block_size.width = std::min( block_size.width, result_size.width );
    141     block_size.height = cvRound(templ_size.height*blockScale);
    142     block_size.height = std::max( block_size.height, minBlockSize - templ_size.height + 1 );
    143     block_size.height = std::min( block_size.height, result_size.height );
    144 
    145     dft_size.width = std::max(getOptimalDFTSize(block_size.width + templ_size.width - 1), 2);
    146     dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1);
    147     if( dft_size.width <= 0 || dft_size.height <= 0 )
    148         CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
    149 
    150     // recompute block size
    151     block_size.width = dft_size.width - templ_size.width + 1;
    152     block_size.width = std::min( block_size.width, result_size.width);
    153     block_size.height = dft_size.height - templ_size.height + 1;
    154     block_size.height = std::min( block_size.height, result_size.height );
    155 
    156     image_block.create(dft_size, CV_32F);
    157     templ_block.create(dft_size, CV_32F);
    158     result_data.create(dft_size, CV_32F);
    159 
    160     image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
    161     templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
    162     result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2);
    163 
    164     // Use maximum result matrix block size for the estimated DFT block size
    165     block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width);
    166     block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height);
    167 }
    168 
    169 static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result)
    170 {
    171     ConvolveBuf buf;
    172     CV_Assert(_image.type() == CV_32F);
    173     CV_Assert(_templ.type() == CV_32F);
    174 
    175     buf.create(_image.size(), _templ.size());
    176     _result.create(buf.result_size, CV_32F);
    177 
    178     UMat image  = _image.getUMat();
    179     UMat templ  = _templ.getUMat();
    180 
    181     UMat result = _result.getUMat();
    182 
    183     Size& block_size = buf.block_size;
    184     Size& dft_size = buf.dft_size;
    185 
    186     UMat& image_block = buf.image_block;
    187     UMat& templ_block = buf.templ_block;
    188     UMat& result_data = buf.result_data;
    189 
    190     UMat& image_spect = buf.image_spect;
    191     UMat& templ_spect = buf.templ_spect;
    192     UMat& result_spect = buf.result_spect;
    193 
    194     UMat templ_roi = templ;
    195     copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0,
    196                    templ_block.cols - templ_roi.cols, BORDER_ISOLATED);
    197 
    198     dft(templ_block, templ_spect, 0, templ.rows);
    199 
    200     // Process all blocks of the result matrix
    201     for (int y = 0; y < result.rows; y += block_size.height)
    202     {
    203         for (int x = 0; x < result.cols; x += block_size.width)
    204         {
    205             Size image_roi_size(std::min(x + dft_size.width, image.cols) - x,
    206                                 std::min(y + dft_size.height, image.rows) - y);
    207             Rect roi0(x, y, image_roi_size.width, image_roi_size.height);
    208 
    209             UMat image_roi(image, roi0);
    210 
    211             copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows,
    212                            0, image_block.cols - image_roi.cols, BORDER_ISOLATED);
    213 
    214             dft(image_block, image_spect, 0);
    215 
    216             mulSpectrums(image_spect, templ_spect, result_spect, 0, true);
    217 
    218             dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE);
    219 
    220             Size result_roi_size(std::min(x + block_size.width, result.cols) - x,
    221                                  std::min(y + block_size.height, result.rows) - y);
    222 
    223             Rect roi1(x, y, result_roi_size.width, result_roi_size.height);
    224             Rect roi2(0, 0, result_roi_size.width, result_roi_size.height);
    225 
    226             UMat result_roi(result, roi1);
    227             UMat result_block(result_data, roi2);
    228 
    229             result_block.copyTo(result_roi);
    230         }
    231     }
    232     return true;
    233 }
    234 
    235 static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result)
    236 {
    237     _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F);
    238 
    239     if (_image.channels() == 1)
    240         return(convolve_dft(_image, _templ, _result));
    241     else
    242     {
    243         UMat image = _image.getUMat();
    244         UMat templ = _templ.getUMat();
    245         UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F);
    246         bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_);
    247         if (ok==false)
    248             return false;
    249         UMat result = _result.getUMat();
    250         return (extractFirstChannel_32F(result_, _result, _image.channels()));
    251     }
    252 }
    253 
    254 static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
    255 {
    256     int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    257     int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
    258 
    259     ocl::Device dev = ocl::Device::getDefault();
    260     int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1;
    261     int rated_cn = cn;
    262     int wtype1 = wtype;
    263 
    264     if (pxPerWIx!=1)
    265     {
    266         rated_cn = pxPerWIx;
    267         type = CV_MAKE_TYPE(depth, rated_cn);
    268         wtype1 = CV_MAKE_TYPE(wdepth, rated_cn);
    269     }
    270 
    271     char cvt[40];
    272     char cvt1[40];
    273     const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt);
    274     const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1);
    275 
    276     ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc,
    277                   format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype),
    278                          convertToWT, convertToWT1, cn, pxPerWIx));
    279     if (k.empty())
    280         return false;
    281 
    282     UMat image = _image.getUMat(), templ = _templ.getUMat();
    283     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    284     UMat result = _result.getUMat();
    285 
    286     k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
    287            ocl::KernelArg::WriteOnly(result));
    288 
    289     size_t globalsize[2] = { (result.cols+pxPerWIx-1)/pxPerWIx, result.rows};
    290     return k.run(2, globalsize, NULL, false);
    291 }
    292 
    293 
    294 static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result)
    295 {
    296     if (useNaive(_templ.size()))
    297         return( matchTemplateNaive_CCORR(_image, _templ, _result));
    298     else
    299     {
    300         if(_image.depth() == CV_8U)
    301         {
    302             UMat imagef, templf;
    303             UMat image = _image.getUMat();
    304             UMat templ = _templ.getUMat();
    305             image.convertTo(imagef, CV_32F);
    306             templ.convertTo(templf, CV_32F);
    307             return(convolve_32F(imagef, templf, _result));
    308         }
    309         else
    310         {
    311             return(convolve_32F(_image, _templ, _result));
    312         }
    313     }
    314 }
    315 
    316 static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
    317 {
    318     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
    319 
    320     int type = _image.type(), cn = CV_MAT_CN(type);
    321 
    322     ocl::Kernel k("matchTemplate_CCORR_NORMED", ocl::imgproc::match_template_oclsrc,
    323                   format("-D CCORR_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn));
    324     if (k.empty())
    325         return false;
    326 
    327     UMat image = _image.getUMat(), templ = _templ.getUMat();
    328     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1);
    329     UMat result = _result.getUMat();
    330 
    331     UMat image_sums, image_sqsums;
    332     integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
    333 
    334     UMat templ_sqsum;
    335     if (!sumTemplate(templ, templ_sqsum))
    336         return false;
    337 
    338     k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
    339            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
    340 
    341     size_t globalsize[2] = { result.cols, result.rows };
    342     return k.run(2, globalsize, NULL, false);
    343 }
    344 
    345 ////////////////////////////////////// SQDIFF //////////////////////////////////////////////////////////////
    346 
    347 static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
    348 {
    349     int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    350     int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn);
    351 
    352     char cvt[40];
    353     ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc,
    354                   format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth),
    355                          ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn));
    356     if (k.empty())
    357         return false;
    358 
    359     UMat image = _image.getUMat(), templ = _templ.getUMat();
    360     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
    361     UMat result = _result.getUMat();
    362 
    363     k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ),
    364            ocl::KernelArg::WriteOnly(result));
    365 
    366     size_t globalsize[2] = { result.cols, result.rows };
    367     return k.run(2, globalsize, NULL, false);
    368 }
    369 
    370 static bool matchTemplate_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result)
    371 {
    372     if (useNaive(_templ.size()))
    373         return( matchTemplateNaive_SQDIFF(_image, _templ, _result));
    374     else
    375     {
    376         matchTemplate(_image, _templ, _result, CV_TM_CCORR);
    377 
    378         int type = _image.type(), cn = CV_MAT_CN(type);
    379 
    380         ocl::Kernel k("matchTemplate_Prepared_SQDIFF", ocl::imgproc::match_template_oclsrc,
    381                   format("-D SQDIFF_PREPARED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
    382         if (k.empty())
    383             return false;
    384 
    385         UMat image = _image.getUMat(), templ = _templ.getUMat();
    386         _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
    387         UMat result = _result.getUMat();
    388 
    389         UMat image_sums, image_sqsums;
    390         integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
    391 
    392         UMat templ_sqsum;
    393         if (!sumTemplate(_templ, templ_sqsum))
    394             return false;
    395 
    396         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
    397            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
    398 
    399         size_t globalsize[2] = { result.cols, result.rows };
    400 
    401         return k.run(2, globalsize, NULL, false);
    402     }
    403 }
    404 
    405 static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
    406 {
    407     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
    408 
    409     int type = _image.type(), cn = CV_MAT_CN(type);
    410 
    411     ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc,
    412                   format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type),  cn));
    413     if (k.empty())
    414         return false;
    415 
    416     UMat image = _image.getUMat(), templ = _templ.getUMat();
    417     _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F);
    418     UMat result = _result.getUMat();
    419 
    420     UMat image_sums, image_sqsums;
    421     integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F);
    422 
    423     UMat templ_sqsum;
    424     if (!sumTemplate(_templ, templ_sqsum))
    425         return false;
    426 
    427     k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result),
    428            templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum));
    429 
    430     size_t globalsize[2] = { result.cols, result.rows };
    431 
    432     return k.run(2, globalsize, NULL, false);
    433 }
    434 
    435 ///////////////////////////////////// CCOEFF /////////////////////////////////////////////////////////////////
    436 
    437 static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result)
    438 {
    439     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
    440 
    441     UMat image_sums, temp;
    442     integral(_image, image_sums, CV_32F);
    443 
    444     int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    445 
    446     ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc,
    447                   format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    448     if (k.empty())
    449         return false;
    450 
    451     UMat templ  = _templ.getUMat();
    452     UMat result = _result.getUMat();
    453 
    454     if (cn==1)
    455     {
    456         Scalar templMean = mean(templ);
    457         float templ_sum = (float)templMean[0];
    458 
    459         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);
    460     }
    461     else
    462     {
    463         Vec4f templ_sum = Vec4f::all(0);
    464         templ_sum = (Vec4f)mean(templ);
    465 
    466        k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum);    }
    467 
    468     size_t globalsize[2] = { result.cols, result.rows };
    469     return k.run(2, globalsize, NULL, false);
    470 }
    471 
    472 static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result)
    473 {
    474     matchTemplate(_image, _templ, _result, CV_TM_CCORR);
    475 
    476     UMat temp, image_sums, image_sqsums;
    477     integral(_image, image_sums, image_sqsums, CV_32F, CV_32F);
    478 
    479     int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    480 
    481     ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc,
    482         format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn));
    483     if (k.empty())
    484         return false;
    485 
    486     UMat templ = _templ.getUMat();
    487     Size size = _image.size(), tsize = templ.size();
    488     _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F);
    489     UMat result = _result.getUMat();
    490 
    491     float scale = 1.f / tsize.area();
    492 
    493     if (cn == 1)
    494     {
    495         float templ_sum = (float)sum(templ)[0];
    496 
    497         multiply(templ, templ, temp, 1, CV_32F);
    498         float templ_sqsum = (float)sum(temp)[0];
    499 
    500         templ_sqsum -= scale * templ_sum * templ_sum;
    501         templ_sum   *= scale;
    502 
    503         if (templ_sqsum < DBL_EPSILON)
    504         {
    505             result = Scalar::all(1);
    506             return true;
    507         }
    508 
    509         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
    510                       ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum);
    511     }
    512     else
    513     {
    514         Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0);
    515         templ_sum = sum(templ);
    516 
    517         multiply(templ, templ, temp, 1, CV_32F);
    518         templ_sqsum = sum(temp);
    519 
    520         float templ_sqsum_sum = 0;
    521         for (int i = 0; i < cn; i ++)
    522             templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i];
    523 
    524         templ_sum *= scale;
    525 
    526         if (templ_sqsum_sum < DBL_EPSILON)
    527         {
    528             result = Scalar::all(1);
    529             return true;
    530         }
    531 
    532         k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums),
    533                    ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale,
    534                    templ_sum, templ_sqsum_sum);    }
    535 
    536     size_t globalsize[2] = { result.cols, result.rows };
    537     return k.run(2, globalsize, NULL, false);
    538 }
    539 
    540 ///////////////////////////////////////////////////////////////////////////////////////////////////////////
    541 
    542 static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method)
    543 {
    544     int cn = _img.channels();
    545 
    546     if (cn > 4)
    547         return false;
    548 
    549     typedef bool (*Caller)(InputArray _img, InputArray _templ, OutputArray _result);
    550 
    551     static const Caller callers[] =
    552     {
    553         matchTemplate_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplate_CCORR,
    554         matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED
    555     };
    556     const Caller caller = callers[method];
    557 
    558     return caller(_img, _templ, _result);
    559 }
    560 
    561 #endif
    562 
    563 #if defined HAVE_IPP
    564 
    565 typedef IppStatus (CV_STDCALL * ippimatchTemplate)(const void*, int, IppiSize, const void*, int, IppiSize, Ipp32f* , int , IppEnum , Ipp8u*);
    566 
    567 static bool ipp_crossCorr(const Mat& src, const Mat& tpl, Mat& dst)
    568 {
    569     IppStatus status;
    570 
    571     IppiSize srcRoiSize = {src.cols,src.rows};
    572     IppiSize tplRoiSize = {tpl.cols,tpl.rows};
    573 
    574     Ipp8u *pBuffer;
    575     int bufSize=0;
    576 
    577     int depth = src.depth();
    578 
    579     ippimatchTemplate ippFunc =
    580             depth==CV_8U ? (ippimatchTemplate)ippiCrossCorrNorm_8u32f_C1R:
    581             depth==CV_32F? (ippimatchTemplate)ippiCrossCorrNorm_32f_C1R: 0;
    582 
    583     if (ippFunc==0)
    584         return false;
    585 
    586     IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
    587 
    588     status = ippiCrossCorrNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
    589     if ( status < 0 )
    590         return false;
    591 
    592     pBuffer = ippsMalloc_8u( bufSize );
    593 
    594     status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
    595 
    596     ippsFree( pBuffer );
    597     return status >= 0;
    598 }
    599 
    600 static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst)
    601 {
    602     IppStatus status;
    603 
    604     IppiSize srcRoiSize = {src.cols,src.rows};
    605     IppiSize tplRoiSize = {tpl.cols,tpl.rows};
    606 
    607     Ipp8u *pBuffer;
    608     int bufSize=0;
    609 
    610     int depth = src.depth();
    611 
    612     ippimatchTemplate ippFunc =
    613             depth==CV_8U ? (ippimatchTemplate)ippiSqrDistanceNorm_8u32f_C1R:
    614             depth==CV_32F? (ippimatchTemplate)ippiSqrDistanceNorm_32f_C1R: 0;
    615 
    616     if (ippFunc==0)
    617         return false;
    618 
    619     IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid);
    620 
    621     status = ippiSqrDistanceNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize);
    622     if ( status < 0 )
    623         return false;
    624 
    625     pBuffer = ippsMalloc_8u( bufSize );
    626 
    627     status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer);
    628 
    629     ippsFree( pBuffer );
    630     return status >= 0;
    631 }
    632 
    633 #endif
    634 
    635 void crossCorr( const Mat& img, const Mat& _templ, Mat& corr,
    636                 Size corrsize, int ctype,
    637                 Point anchor, double delta, int borderType )
    638 {
    639     const double blockScale = 4.5;
    640     const int minBlockSize = 256;
    641     std::vector<uchar> buf;
    642 
    643     Mat templ = _templ;
    644     int depth = img.depth(), cn = img.channels();
    645     int tdepth = templ.depth(), tcn = templ.channels();
    646     int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype);
    647 
    648     CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 );
    649 
    650     if( depth != tdepth && tdepth != std::max(CV_32F, depth) )
    651     {
    652         _templ.convertTo(templ, std::max(CV_32F, depth));
    653         tdepth = templ.depth();
    654     }
    655 
    656     CV_Assert( depth == tdepth || tdepth == CV_32F);
    657     CV_Assert( corrsize.height <= img.rows + templ.rows - 1 &&
    658                corrsize.width <= img.cols + templ.cols - 1 );
    659 
    660     CV_Assert( ccn == 1 || delta == 0 );
    661 
    662     corr.create(corrsize, ctype);
    663 
    664     int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth);
    665     Size blocksize, dftsize;
    666 
    667     blocksize.width = cvRound(templ.cols*blockScale);
    668     blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 );
    669     blocksize.width = std::min( blocksize.width, corr.cols );
    670     blocksize.height = cvRound(templ.rows*blockScale);
    671     blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 );
    672     blocksize.height = std::min( blocksize.height, corr.rows );
    673 
    674     dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2);
    675     dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1);
    676     if( dftsize.width <= 0 || dftsize.height <= 0 )
    677         CV_Error( CV_StsOutOfRange, "the input arrays are too big" );
    678 
    679     // recompute block size
    680     blocksize.width = dftsize.width - templ.cols + 1;
    681     blocksize.width = MIN( blocksize.width, corr.cols );
    682     blocksize.height = dftsize.height - templ.rows + 1;
    683     blocksize.height = MIN( blocksize.height, corr.rows );
    684 
    685     Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth );
    686     Mat dftImg( dftsize, maxDepth );
    687 
    688     int i, k, bufSize = 0;
    689     if( tcn > 1 && tdepth != maxDepth )
    690         bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth);
    691 
    692     if( cn > 1 && depth != maxDepth )
    693         bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)*
    694             (blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth));
    695 
    696     if( (ccn > 1 || cn > 1) && cdepth != maxDepth )
    697         bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth));
    698 
    699     buf.resize(bufSize);
    700 
    701     // compute DFT of each template plane
    702     for( k = 0; k < tcn; k++ )
    703     {
    704         int yofs = k*dftsize.height;
    705         Mat src = templ;
    706         Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height));
    707         Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows));
    708 
    709         if( tcn > 1 )
    710         {
    711             src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]);
    712             int pairs[] = {k, 0};
    713             mixChannels(&templ, 1, &src, 1, pairs, 1);
    714         }
    715 
    716         if( dst1.data != src.data )
    717             src.convertTo(dst1, dst1.depth());
    718 
    719         if( dst.cols > templ.cols )
    720         {
    721             Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols));
    722             part = Scalar::all(0);
    723         }
    724         dft(dst, dst, 0, templ.rows);
    725     }
    726 
    727     int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width;
    728     int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height;
    729     int tileCount = tileCountX * tileCountY;
    730 
    731     Size wholeSize = img.size();
    732     Point roiofs(0,0);
    733     Mat img0 = img;
    734 
    735     if( !(borderType & BORDER_ISOLATED) )
    736     {
    737         img.locateROI(wholeSize, roiofs);
    738         img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y,
    739                        roiofs.x, wholeSize.width-img.cols-roiofs.x);
    740     }
    741     borderType |= BORDER_ISOLATED;
    742 
    743     // calculate correlation by blocks
    744     for( i = 0; i < tileCount; i++ )
    745     {
    746         int x = (i%tileCountX)*blocksize.width;
    747         int y = (i/tileCountX)*blocksize.height;
    748 
    749         Size bsz(std::min(blocksize.width, corr.cols - x),
    750                  std::min(blocksize.height, corr.rows - y));
    751         Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1);
    752         int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y;
    753         int x1 = std::max(0, x0), y1 = std::max(0, y0);
    754         int x2 = std::min(img0.cols, x0 + dsz.width);
    755         int y2 = std::min(img0.rows, y0 + dsz.height);
    756         Mat src0(img0, Range(y1, y2), Range(x1, x2));
    757         Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height));
    758         Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1));
    759         Mat cdst(corr, Rect(x, y, bsz.width, bsz.height));
    760 
    761         for( k = 0; k < cn; k++ )
    762         {
    763             Mat src = src0;
    764             dftImg = Scalar::all(0);
    765 
    766             if( cn > 1 )
    767             {
    768                 src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]);
    769                 int pairs[] = {k, 0};
    770                 mixChannels(&src0, 1, &src, 1, pairs, 1);
    771             }
    772 
    773             if( dst1.data != src.data )
    774                 src.convertTo(dst1, dst1.depth());
    775 
    776             if( x2 - x1 < dsz.width || y2 - y1 < dsz.height )
    777                 copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0),
    778                                x1-x0, dst.cols-dst1.cols-(x1-x0), borderType);
    779 
    780             dft( dftImg, dftImg, 0, dsz.height );
    781             Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0,
    782                                          dftsize.width, dftsize.height));
    783             mulSpectrums(dftImg, dftTempl1, dftImg, 0, true);
    784             dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height );
    785 
    786             src = dftImg(Rect(0, 0, bsz.width, bsz.height));
    787 
    788             if( ccn > 1 )
    789             {
    790                 if( cdepth != maxDepth )
    791                 {
    792                     Mat plane(bsz, cdepth, &buf[0]);
    793                     src.convertTo(plane, cdepth, 1, delta);
    794                     src = plane;
    795                 }
    796                 int pairs[] = {0, k};
    797                 mixChannels(&src, 1, &cdst, 1, pairs, 1);
    798             }
    799             else
    800             {
    801                 if( k == 0 )
    802                     src.convertTo(cdst, cdepth, 1, delta);
    803                 else
    804                 {
    805                     if( maxDepth != cdepth )
    806                     {
    807                         Mat plane(bsz, cdepth, &buf[0]);
    808                         src.convertTo(plane, cdepth);
    809                         src = plane;
    810                     }
    811                     add(src, cdst, cdst);
    812                 }
    813             }
    814         }
    815     }
    816 }
    817 
    818 static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
    819 {
    820     int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    821     CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
    822     CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
    823 
    824     Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat();
    825     int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype);
    826     int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype);
    827 
    828     if (depth == CV_8U)
    829     {
    830         depth = CV_32F;
    831         type = CV_MAKETYPE(CV_32F, cn);
    832         img.convertTo(img, type, 1.0 / 255);
    833     }
    834 
    835     if (tdepth == CV_8U)
    836     {
    837         tdepth = CV_32F;
    838         ttype = CV_MAKETYPE(CV_32F, tcn);
    839         templ.convertTo(templ, ttype, 1.0 / 255);
    840     }
    841 
    842     if (mdepth == CV_8U)
    843     {
    844         mdepth = CV_32F;
    845         mtype = CV_MAKETYPE(CV_32F, mcn);
    846         compare(mask, Scalar::all(0), mask, CMP_NE);
    847         mask.convertTo(mask, mtype, 1.0 / 255);
    848     }
    849 
    850     Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
    851     _result.create(corrSize, CV_32F);
    852     Mat result = _result.getMat();
    853 
    854     Mat img2 = img.mul(img);
    855     Mat mask2 = mask.mul(mask);
    856     Mat mask_templ = templ.mul(mask);
    857     Scalar templMean, templSdv;
    858 
    859     double templSum2 = 0;
    860     meanStdDev( mask_templ, templMean, templSdv );
    861 
    862     templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
    863     templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
    864     templSum2 *= ((double)templ.rows * templ.cols);
    865 
    866     if (method == CV_TM_SQDIFF)
    867     {
    868         Mat mask2_templ = templ.mul(mask2);
    869 
    870         Mat corr(corrSize, CV_32F);
    871         crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
    872         crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 );
    873 
    874         result -= corr * 2;
    875         result += templSum2;
    876     }
    877     else if (method == CV_TM_CCORR_NORMED)
    878     {
    879         if (templSum2 < DBL_EPSILON)
    880         {
    881             result = Scalar::all(1);
    882             return;
    883         }
    884 
    885         Mat corr(corrSize, CV_32F);
    886         crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 );
    887         crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 );
    888 
    889         sqrt(corr, corr);
    890         result = result.mul(1/corr);
    891         result /= std::sqrt(templSum2);
    892     }
    893     else
    894         CV_Error(Error::StsNotImplemented, "");
    895 }
    896 }
    897 
    898 ////////////////////////////////////////////////////////////////////////////////////////////////////////
    899 
    900 void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask )
    901 {
    902     if (!_mask.empty())
    903     {
    904         cv::matchTemplateMask(_img, _templ, _result, method, _mask);
    905         return;
    906     }
    907 
    908     int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
    909     CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED );
    910     CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 );
    911 
    912     bool needswap = _img.size().height < _templ.size().height || _img.size().width < _templ.size().width;
    913     if (needswap)
    914     {
    915         CV_Assert(_img.size().height <= _templ.size().height && _img.size().width <= _templ.size().width);
    916     }
    917 
    918     CV_OCL_RUN(_img.dims() <= 2 && _result.isUMat(),
    919                (!needswap ? ocl_matchTemplate(_img, _templ, _result, method) : ocl_matchTemplate(_templ, _img, _result, method)))
    920 
    921     int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 :
    922                   method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2;
    923     bool isNormed = method == CV_TM_CCORR_NORMED ||
    924                     method == CV_TM_SQDIFF_NORMED ||
    925                     method == CV_TM_CCOEFF_NORMED;
    926 
    927     Mat img = _img.getMat(), templ = _templ.getMat();
    928     if (needswap)
    929         std::swap(img, templ);
    930 
    931     Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1);
    932     _result.create(corrSize, CV_32F);
    933     Mat result = _result.getMat();
    934 
    935 #ifdef HAVE_TEGRA_OPTIMIZATION
    936     if (tegra::useTegra() && tegra::matchTemplate(img, templ, result, method))
    937         return;
    938 #endif
    939 
    940 #if defined HAVE_IPP
    941     bool useIppMT = false;
    942     CV_IPP_CHECK()
    943     {
    944         useIppMT = (templ.rows < img.rows/2 && templ.cols < img.cols/2);
    945 
    946         if (method == CV_TM_SQDIFF && cn == 1 && useIppMT)
    947         {
    948             if (ipp_sqrDistance(img, templ, result))
    949             {
    950                 CV_IMPL_ADD(CV_IMPL_IPP);
    951                 return;
    952             }
    953             setIppErrorStatus();
    954         }
    955     }
    956 #endif
    957 
    958 #if defined HAVE_IPP
    959     if (cn == 1 && useIppMT)
    960     {
    961         if (!ipp_crossCorr(img, templ, result))
    962         {
    963             setIppErrorStatus();
    964             crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
    965         }
    966         else
    967         {
    968             CV_IMPL_ADD(CV_IMPL_IPP);
    969         }
    970     }
    971     else
    972 #endif
    973     crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0);
    974 
    975     if( method == CV_TM_CCORR )
    976         return;
    977 
    978     double invArea = 1./((double)templ.rows * templ.cols);
    979 
    980     Mat sum, sqsum;
    981     Scalar templMean, templSdv;
    982     double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0;
    983     double templNorm = 0, templSum2 = 0;
    984 
    985     if( method == CV_TM_CCOEFF )
    986     {
    987         integral(img, sum, CV_64F);
    988         templMean = mean(templ);
    989     }
    990     else
    991     {
    992         integral(img, sum, sqsum, CV_64F);
    993         meanStdDev( templ, templMean, templSdv );
    994 
    995         templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3];
    996 
    997         if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED )
    998         {
    999             result = Scalar::all(1);
   1000             return;
   1001         }
   1002 
   1003         templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3];
   1004 
   1005         if( numType != 1 )
   1006         {
   1007             templMean = Scalar::all(0);
   1008             templNorm = templSum2;
   1009         }
   1010 
   1011         templSum2 /= invArea;
   1012         templNorm = std::sqrt(templNorm);
   1013         templNorm /= std::sqrt(invArea); // care of accuracy here
   1014 
   1015         q0 = (double*)sqsum.data;
   1016         q1 = q0 + templ.cols*cn;
   1017         q2 = (double*)(sqsum.data + templ.rows*sqsum.step);
   1018         q3 = q2 + templ.cols*cn;
   1019     }
   1020 
   1021     double* p0 = (double*)sum.data;
   1022     double* p1 = p0 + templ.cols*cn;
   1023     double* p2 = (double*)(sum.data + templ.rows*sum.step);
   1024     double* p3 = p2 + templ.cols*cn;
   1025 
   1026     int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0;
   1027     int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0;
   1028 
   1029     int i, j, k;
   1030 
   1031     for( i = 0; i < result.rows; i++ )
   1032     {
   1033         float* rrow = result.ptr<float>(i);
   1034         int idx = i * sumstep;
   1035         int idx2 = i * sqstep;
   1036 
   1037         for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn )
   1038         {
   1039             double num = rrow[j], t;
   1040             double wndMean2 = 0, wndSum2 = 0;
   1041 
   1042             if( numType == 1 )
   1043             {
   1044                 for( k = 0; k < cn; k++ )
   1045                 {
   1046                     t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k];
   1047                     wndMean2 += t*t;
   1048                     num -= t*templMean[k];
   1049                 }
   1050 
   1051                 wndMean2 *= invArea;
   1052             }
   1053 
   1054             if( isNormed || numType == 2 )
   1055             {
   1056                 for( k = 0; k < cn; k++ )
   1057                 {
   1058                     t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k];
   1059                     wndSum2 += t;
   1060                 }
   1061 
   1062                 if( numType == 2 )
   1063                 {
   1064                     num = wndSum2 - 2*num + templSum2;
   1065                     num = MAX(num, 0.);
   1066                 }
   1067             }
   1068 
   1069             if( isNormed )
   1070             {
   1071                 t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm;
   1072                 if( fabs(num) < t )
   1073                     num /= t;
   1074                 else if( fabs(num) < t*1.125 )
   1075                     num = num > 0 ? 1 : -1;
   1076                 else
   1077                     num = method != CV_TM_SQDIFF_NORMED ? 0 : 1;
   1078             }
   1079 
   1080             rrow[j] = (float)num;
   1081         }
   1082     }
   1083 }
   1084 
   1085 
   1086 CV_IMPL void
   1087 cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method )
   1088 {
   1089     cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ),
   1090         result = cv::cvarrToMat(_result);
   1091     CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1,
   1092                                          std::abs(img.rows - templ.rows) + 1) &&
   1093               result.type() == CV_32F );
   1094     matchTemplate(img, templ, result, method);
   1095 }
   1096 
   1097 /* End of file. */
   1098