1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // Intel License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000, Intel Corporation, all rights reserved. 14 // Third party copyrights are property of their respective owners. 15 // 16 // Redistribution and use in source and binary forms, with or without modification, 17 // are permitted provided that the following conditions are met: 18 // 19 // * Redistribution's of source code must retain the above copyright notice, 20 // this list of conditions and the following disclaimer. 21 // 22 // * Redistribution's in binary form must reproduce the above copyright notice, 23 // this list of conditions and the following disclaimer in the documentation 24 // and/or other materials provided with the distribution. 25 // 26 // * The name of Intel Corporation may not be used to endorse or promote products 27 // derived from this software without specific prior written permission. 28 // 29 // This software is provided by the copyright holders and contributors "as is" and 30 // any express or implied warranties, including, but not limited to, the implied 31 // warranties of merchantability and fitness for a particular purpose are disclaimed. 32 // In no event shall the Intel Corporation or contributors be liable for any direct, 33 // indirect, incidental, special, exemplary, or consequential damages 34 // (including, but not limited to, procurement of substitute goods or services; 35 // loss of use, data, or profits; or business interruption) however caused 36 // and on any theory of liability, whether in contract, strict liability, 37 // or tort (including negligence or otherwise) arising in any way out of 38 // the use of this software, even if advised of the possibility of such damage. 39 // 40 //M*/ 41 42 #include "precomp.hpp" 43 #include "opencl_kernels_imgproc.hpp" 44 45 ////////////////////////////////////////////////// matchTemplate ////////////////////////////////////////////////////////// 46 47 namespace cv 48 { 49 50 #ifdef HAVE_OPENCL 51 52 /////////////////////////////////////////////////// CCORR ////////////////////////////////////////////////////////////// 53 54 enum 55 { 56 SUM_1 = 0, SUM_2 = 1 57 }; 58 59 static bool extractFirstChannel_32F(InputArray _image, OutputArray _result, int cn) 60 { 61 int depth = _image.depth(); 62 63 ocl::Device dev = ocl::Device::getDefault(); 64 int pxPerWIy = (dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; 65 66 ocl::Kernel k("extractFirstChannel", ocl::imgproc::match_template_oclsrc, format("-D FIRST_CHANNEL -D T1=%s -D cn=%d -D PIX_PER_WI_Y=%d", 67 ocl::typeToStr(depth), cn, pxPerWIy)); 68 if (k.empty()) 69 return false; 70 71 UMat image = _image.getUMat(); 72 UMat result = _result.getUMat(); 73 74 75 size_t globalsize[2] = {result.cols, (result.rows+pxPerWIy-1)/pxPerWIy}; 76 return k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::WriteOnly(result)).run( 2, globalsize, NULL, false); 77 } 78 79 static bool sumTemplate(InputArray _src, UMat & result) 80 { 81 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 82 int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); 83 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); 84 85 int wgs2_aligned = 1; 86 while (wgs2_aligned < (int)wgs) 87 wgs2_aligned <<= 1; 88 wgs2_aligned >>= 1; 89 90 char cvt[40]; 91 ocl::Kernel k("calcSum", ocl::imgproc::match_template_oclsrc, 92 format("-D CALC_SUM -D T=%s -D T1=%s -D WT=%s -D cn=%d -D convertToWT=%s -D WGS=%d -D WGS2_ALIGNED=%d", 93 ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype), cn, 94 ocl::convertTypeStr(depth, wdepth, cn, cvt), 95 (int)wgs, wgs2_aligned)); 96 if (k.empty()) 97 return false; 98 99 UMat src = _src.getUMat(); 100 result.create(1, 1, CV_32FC1); 101 102 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 103 resarg = ocl::KernelArg::PtrWriteOnly(result); 104 105 k.args(srcarg, src.cols, (int)src.total(), resarg); 106 107 size_t globalsize = wgs; 108 return k.run(1, &globalsize, &wgs, false); 109 } 110 111 static bool useNaive(Size size) 112 { 113 int dft_size = 18; 114 return size.height < dft_size && size.width < dft_size; 115 } 116 117 struct ConvolveBuf 118 { 119 Size result_size; 120 Size block_size; 121 Size user_block_size; 122 Size dft_size; 123 124 UMat image_spect, templ_spect, result_spect; 125 UMat image_block, templ_block, result_data; 126 127 void create(Size image_size, Size templ_size); 128 }; 129 130 void ConvolveBuf::create(Size image_size, Size templ_size) 131 { 132 result_size = Size(image_size.width - templ_size.width + 1, 133 image_size.height - templ_size.height + 1); 134 135 const double blockScale = 4.5; 136 const int minBlockSize = 256; 137 138 block_size.width = cvRound(result_size.width*blockScale); 139 block_size.width = std::max( block_size.width, minBlockSize - templ_size.width + 1 ); 140 block_size.width = std::min( block_size.width, result_size.width ); 141 block_size.height = cvRound(templ_size.height*blockScale); 142 block_size.height = std::max( block_size.height, minBlockSize - templ_size.height + 1 ); 143 block_size.height = std::min( block_size.height, result_size.height ); 144 145 dft_size.width = std::max(getOptimalDFTSize(block_size.width + templ_size.width - 1), 2); 146 dft_size.height = getOptimalDFTSize(block_size.height + templ_size.height - 1); 147 if( dft_size.width <= 0 || dft_size.height <= 0 ) 148 CV_Error( CV_StsOutOfRange, "the input arrays are too big" ); 149 150 // recompute block size 151 block_size.width = dft_size.width - templ_size.width + 1; 152 block_size.width = std::min( block_size.width, result_size.width); 153 block_size.height = dft_size.height - templ_size.height + 1; 154 block_size.height = std::min( block_size.height, result_size.height ); 155 156 image_block.create(dft_size, CV_32F); 157 templ_block.create(dft_size, CV_32F); 158 result_data.create(dft_size, CV_32F); 159 160 image_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2); 161 templ_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2); 162 result_spect.create(dft_size.height, dft_size.width / 2 + 1, CV_32FC2); 163 164 // Use maximum result matrix block size for the estimated DFT block size 165 block_size.width = std::min(dft_size.width - templ_size.width + 1, result_size.width); 166 block_size.height = std::min(dft_size.height - templ_size.height + 1, result_size.height); 167 } 168 169 static bool convolve_dft(InputArray _image, InputArray _templ, OutputArray _result) 170 { 171 ConvolveBuf buf; 172 CV_Assert(_image.type() == CV_32F); 173 CV_Assert(_templ.type() == CV_32F); 174 175 buf.create(_image.size(), _templ.size()); 176 _result.create(buf.result_size, CV_32F); 177 178 UMat image = _image.getUMat(); 179 UMat templ = _templ.getUMat(); 180 181 UMat result = _result.getUMat(); 182 183 Size& block_size = buf.block_size; 184 Size& dft_size = buf.dft_size; 185 186 UMat& image_block = buf.image_block; 187 UMat& templ_block = buf.templ_block; 188 UMat& result_data = buf.result_data; 189 190 UMat& image_spect = buf.image_spect; 191 UMat& templ_spect = buf.templ_spect; 192 UMat& result_spect = buf.result_spect; 193 194 UMat templ_roi = templ; 195 copyMakeBorder(templ_roi, templ_block, 0, templ_block.rows - templ_roi.rows, 0, 196 templ_block.cols - templ_roi.cols, BORDER_ISOLATED); 197 198 dft(templ_block, templ_spect, 0, templ.rows); 199 200 // Process all blocks of the result matrix 201 for (int y = 0; y < result.rows; y += block_size.height) 202 { 203 for (int x = 0; x < result.cols; x += block_size.width) 204 { 205 Size image_roi_size(std::min(x + dft_size.width, image.cols) - x, 206 std::min(y + dft_size.height, image.rows) - y); 207 Rect roi0(x, y, image_roi_size.width, image_roi_size.height); 208 209 UMat image_roi(image, roi0); 210 211 copyMakeBorder(image_roi, image_block, 0, image_block.rows - image_roi.rows, 212 0, image_block.cols - image_roi.cols, BORDER_ISOLATED); 213 214 dft(image_block, image_spect, 0); 215 216 mulSpectrums(image_spect, templ_spect, result_spect, 0, true); 217 218 dft(result_spect, result_data, cv::DFT_INVERSE | cv::DFT_REAL_OUTPUT | cv::DFT_SCALE); 219 220 Size result_roi_size(std::min(x + block_size.width, result.cols) - x, 221 std::min(y + block_size.height, result.rows) - y); 222 223 Rect roi1(x, y, result_roi_size.width, result_roi_size.height); 224 Rect roi2(0, 0, result_roi_size.width, result_roi_size.height); 225 226 UMat result_roi(result, roi1); 227 UMat result_block(result_data, roi2); 228 229 result_block.copyTo(result_roi); 230 } 231 } 232 return true; 233 } 234 235 static bool convolve_32F(InputArray _image, InputArray _templ, OutputArray _result) 236 { 237 _result.create(_image.rows() - _templ.rows() + 1, _image.cols() - _templ.cols() + 1, CV_32F); 238 239 if (_image.channels() == 1) 240 return(convolve_dft(_image, _templ, _result)); 241 else 242 { 243 UMat image = _image.getUMat(); 244 UMat templ = _templ.getUMat(); 245 UMat result_(image.rows-templ.rows+1,(image.cols-templ.cols+1)*image.channels(), CV_32F); 246 bool ok = convolve_dft(image.reshape(1), templ.reshape(1), result_); 247 if (ok==false) 248 return false; 249 UMat result = _result.getUMat(); 250 return (extractFirstChannel_32F(result_, _result, _image.channels())); 251 } 252 } 253 254 static bool matchTemplateNaive_CCORR(InputArray _image, InputArray _templ, OutputArray _result) 255 { 256 int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 257 int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); 258 259 ocl::Device dev = ocl::Device::getDefault(); 260 int pxPerWIx = (cn==1 && dev.isIntel() && (dev.type() & ocl::Device::TYPE_GPU)) ? 4 : 1; 261 int rated_cn = cn; 262 int wtype1 = wtype; 263 264 if (pxPerWIx!=1) 265 { 266 rated_cn = pxPerWIx; 267 type = CV_MAKE_TYPE(depth, rated_cn); 268 wtype1 = CV_MAKE_TYPE(wdepth, rated_cn); 269 } 270 271 char cvt[40]; 272 char cvt1[40]; 273 const char* convertToWT1 = ocl::convertTypeStr(depth, wdepth, cn, cvt); 274 const char* convertToWT = ocl::convertTypeStr(depth, wdepth, rated_cn, cvt1); 275 276 ocl::Kernel k("matchTemplate_Naive_CCORR", ocl::imgproc::match_template_oclsrc, 277 format("-D CCORR -D T=%s -D T1=%s -D WT=%s -D WT1=%s -D convertToWT=%s -D convertToWT1=%s -D cn=%d -D PIX_PER_WI_X=%d", ocl::typeToStr(type), ocl::typeToStr(depth), ocl::typeToStr(wtype1), ocl::typeToStr(wtype), 278 convertToWT, convertToWT1, cn, pxPerWIx)); 279 if (k.empty()) 280 return false; 281 282 UMat image = _image.getUMat(), templ = _templ.getUMat(); 283 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); 284 UMat result = _result.getUMat(); 285 286 k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), 287 ocl::KernelArg::WriteOnly(result)); 288 289 size_t globalsize[2] = { (result.cols+pxPerWIx-1)/pxPerWIx, result.rows}; 290 return k.run(2, globalsize, NULL, false); 291 } 292 293 294 static bool matchTemplate_CCORR(InputArray _image, InputArray _templ, OutputArray _result) 295 { 296 if (useNaive(_templ.size())) 297 return( matchTemplateNaive_CCORR(_image, _templ, _result)); 298 else 299 { 300 if(_image.depth() == CV_8U) 301 { 302 UMat imagef, templf; 303 UMat image = _image.getUMat(); 304 UMat templ = _templ.getUMat(); 305 image.convertTo(imagef, CV_32F); 306 templ.convertTo(templf, CV_32F); 307 return(convolve_32F(imagef, templf, _result)); 308 } 309 else 310 { 311 return(convolve_32F(_image, _templ, _result)); 312 } 313 } 314 } 315 316 static bool matchTemplate_CCORR_NORMED(InputArray _image, InputArray _templ, OutputArray _result) 317 { 318 matchTemplate(_image, _templ, _result, CV_TM_CCORR); 319 320 int type = _image.type(), cn = CV_MAT_CN(type); 321 322 ocl::Kernel k("matchTemplate_CCORR_NORMED", ocl::imgproc::match_template_oclsrc, 323 format("-D CCORR_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn)); 324 if (k.empty()) 325 return false; 326 327 UMat image = _image.getUMat(), templ = _templ.getUMat(); 328 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32FC1); 329 UMat result = _result.getUMat(); 330 331 UMat image_sums, image_sqsums; 332 integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F); 333 334 UMat templ_sqsum; 335 if (!sumTemplate(templ, templ_sqsum)) 336 return false; 337 338 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), 339 templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum)); 340 341 size_t globalsize[2] = { result.cols, result.rows }; 342 return k.run(2, globalsize, NULL, false); 343 } 344 345 ////////////////////////////////////// SQDIFF ////////////////////////////////////////////////////////////// 346 347 static bool matchTemplateNaive_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result) 348 { 349 int type = _image.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 350 int wdepth = CV_32F, wtype = CV_MAKE_TYPE(wdepth, cn); 351 352 char cvt[40]; 353 ocl::Kernel k("matchTemplate_Naive_SQDIFF", ocl::imgproc::match_template_oclsrc, 354 format("-D SQDIFF -D T=%s -D T1=%s -D WT=%s -D convertToWT=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), 355 ocl::typeToStr(wtype), ocl::convertTypeStr(depth, wdepth, cn, cvt), cn)); 356 if (k.empty()) 357 return false; 358 359 UMat image = _image.getUMat(), templ = _templ.getUMat(); 360 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); 361 UMat result = _result.getUMat(); 362 363 k.args(ocl::KernelArg::ReadOnlyNoSize(image), ocl::KernelArg::ReadOnly(templ), 364 ocl::KernelArg::WriteOnly(result)); 365 366 size_t globalsize[2] = { result.cols, result.rows }; 367 return k.run(2, globalsize, NULL, false); 368 } 369 370 static bool matchTemplate_SQDIFF(InputArray _image, InputArray _templ, OutputArray _result) 371 { 372 if (useNaive(_templ.size())) 373 return( matchTemplateNaive_SQDIFF(_image, _templ, _result)); 374 else 375 { 376 matchTemplate(_image, _templ, _result, CV_TM_CCORR); 377 378 int type = _image.type(), cn = CV_MAT_CN(type); 379 380 ocl::Kernel k("matchTemplate_Prepared_SQDIFF", ocl::imgproc::match_template_oclsrc, 381 format("-D SQDIFF_PREPARED -D T=%s -D cn=%d", ocl::typeToStr(type), cn)); 382 if (k.empty()) 383 return false; 384 385 UMat image = _image.getUMat(), templ = _templ.getUMat(); 386 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); 387 UMat result = _result.getUMat(); 388 389 UMat image_sums, image_sqsums; 390 integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F); 391 392 UMat templ_sqsum; 393 if (!sumTemplate(_templ, templ_sqsum)) 394 return false; 395 396 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), 397 templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum)); 398 399 size_t globalsize[2] = { result.cols, result.rows }; 400 401 return k.run(2, globalsize, NULL, false); 402 } 403 } 404 405 static bool matchTemplate_SQDIFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result) 406 { 407 matchTemplate(_image, _templ, _result, CV_TM_CCORR); 408 409 int type = _image.type(), cn = CV_MAT_CN(type); 410 411 ocl::Kernel k("matchTemplate_SQDIFF_NORMED", ocl::imgproc::match_template_oclsrc, 412 format("-D SQDIFF_NORMED -D T=%s -D cn=%d", ocl::typeToStr(type), cn)); 413 if (k.empty()) 414 return false; 415 416 UMat image = _image.getUMat(), templ = _templ.getUMat(); 417 _result.create(image.rows - templ.rows + 1, image.cols - templ.cols + 1, CV_32F); 418 UMat result = _result.getUMat(); 419 420 UMat image_sums, image_sqsums; 421 integral(image.reshape(1), image_sums, image_sqsums, CV_32F, CV_32F); 422 423 UMat templ_sqsum; 424 if (!sumTemplate(_templ, templ_sqsum)) 425 return false; 426 427 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sqsums), ocl::KernelArg::ReadWrite(result), 428 templ.rows, templ.cols, ocl::KernelArg::PtrReadOnly(templ_sqsum)); 429 430 size_t globalsize[2] = { result.cols, result.rows }; 431 432 return k.run(2, globalsize, NULL, false); 433 } 434 435 ///////////////////////////////////// CCOEFF ///////////////////////////////////////////////////////////////// 436 437 static bool matchTemplate_CCOEFF(InputArray _image, InputArray _templ, OutputArray _result) 438 { 439 matchTemplate(_image, _templ, _result, CV_TM_CCORR); 440 441 UMat image_sums, temp; 442 integral(_image, image_sums, CV_32F); 443 444 int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 445 446 ocl::Kernel k("matchTemplate_Prepared_CCOEFF", ocl::imgproc::match_template_oclsrc, 447 format("-D CCOEFF -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); 448 if (k.empty()) 449 return false; 450 451 UMat templ = _templ.getUMat(); 452 UMat result = _result.getUMat(); 453 454 if (cn==1) 455 { 456 Scalar templMean = mean(templ); 457 float templ_sum = (float)templMean[0]; 458 459 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); 460 } 461 else 462 { 463 Vec4f templ_sum = Vec4f::all(0); 464 templ_sum = (Vec4f)mean(templ); 465 466 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, templ_sum); } 467 468 size_t globalsize[2] = { result.cols, result.rows }; 469 return k.run(2, globalsize, NULL, false); 470 } 471 472 static bool matchTemplate_CCOEFF_NORMED(InputArray _image, InputArray _templ, OutputArray _result) 473 { 474 matchTemplate(_image, _templ, _result, CV_TM_CCORR); 475 476 UMat temp, image_sums, image_sqsums; 477 integral(_image, image_sums, image_sqsums, CV_32F, CV_32F); 478 479 int type = image_sums.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 480 481 ocl::Kernel k("matchTemplate_CCOEFF_NORMED", ocl::imgproc::match_template_oclsrc, 482 format("-D CCOEFF_NORMED -D T=%s -D T1=%s -D cn=%d", ocl::typeToStr(type), ocl::typeToStr(depth), cn)); 483 if (k.empty()) 484 return false; 485 486 UMat templ = _templ.getUMat(); 487 Size size = _image.size(), tsize = templ.size(); 488 _result.create(size.height - templ.rows + 1, size.width - templ.cols + 1, CV_32F); 489 UMat result = _result.getUMat(); 490 491 float scale = 1.f / tsize.area(); 492 493 if (cn == 1) 494 { 495 float templ_sum = (float)sum(templ)[0]; 496 497 multiply(templ, templ, temp, 1, CV_32F); 498 float templ_sqsum = (float)sum(temp)[0]; 499 500 templ_sqsum -= scale * templ_sum * templ_sum; 501 templ_sum *= scale; 502 503 if (templ_sqsum < DBL_EPSILON) 504 { 505 result = Scalar::all(1); 506 return true; 507 } 508 509 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums), 510 ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, templ_sum, templ_sqsum); 511 } 512 else 513 { 514 Vec4f templ_sum = Vec4f::all(0), templ_sqsum = Vec4f::all(0); 515 templ_sum = sum(templ); 516 517 multiply(templ, templ, temp, 1, CV_32F); 518 templ_sqsum = sum(temp); 519 520 float templ_sqsum_sum = 0; 521 for (int i = 0; i < cn; i ++) 522 templ_sqsum_sum += templ_sqsum[i] - scale * templ_sum[i] * templ_sum[i]; 523 524 templ_sum *= scale; 525 526 if (templ_sqsum_sum < DBL_EPSILON) 527 { 528 result = Scalar::all(1); 529 return true; 530 } 531 532 k.args(ocl::KernelArg::ReadOnlyNoSize(image_sums), ocl::KernelArg::ReadOnlyNoSize(image_sqsums), 533 ocl::KernelArg::ReadWrite(result), templ.rows, templ.cols, scale, 534 templ_sum, templ_sqsum_sum); } 535 536 size_t globalsize[2] = { result.cols, result.rows }; 537 return k.run(2, globalsize, NULL, false); 538 } 539 540 /////////////////////////////////////////////////////////////////////////////////////////////////////////// 541 542 static bool ocl_matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method) 543 { 544 int cn = _img.channels(); 545 546 if (cn > 4) 547 return false; 548 549 typedef bool (*Caller)(InputArray _img, InputArray _templ, OutputArray _result); 550 551 static const Caller callers[] = 552 { 553 matchTemplate_SQDIFF, matchTemplate_SQDIFF_NORMED, matchTemplate_CCORR, 554 matchTemplate_CCORR_NORMED, matchTemplate_CCOEFF, matchTemplate_CCOEFF_NORMED 555 }; 556 const Caller caller = callers[method]; 557 558 return caller(_img, _templ, _result); 559 } 560 561 #endif 562 563 #if defined HAVE_IPP 564 565 typedef IppStatus (CV_STDCALL * ippimatchTemplate)(const void*, int, IppiSize, const void*, int, IppiSize, Ipp32f* , int , IppEnum , Ipp8u*); 566 567 static bool ipp_crossCorr(const Mat& src, const Mat& tpl, Mat& dst) 568 { 569 IppStatus status; 570 571 IppiSize srcRoiSize = {src.cols,src.rows}; 572 IppiSize tplRoiSize = {tpl.cols,tpl.rows}; 573 574 Ipp8u *pBuffer; 575 int bufSize=0; 576 577 int depth = src.depth(); 578 579 ippimatchTemplate ippFunc = 580 depth==CV_8U ? (ippimatchTemplate)ippiCrossCorrNorm_8u32f_C1R: 581 depth==CV_32F? (ippimatchTemplate)ippiCrossCorrNorm_32f_C1R: 0; 582 583 if (ippFunc==0) 584 return false; 585 586 IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid); 587 588 status = ippiCrossCorrNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize); 589 if ( status < 0 ) 590 return false; 591 592 pBuffer = ippsMalloc_8u( bufSize ); 593 594 status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer); 595 596 ippsFree( pBuffer ); 597 return status >= 0; 598 } 599 600 static bool ipp_sqrDistance(const Mat& src, const Mat& tpl, Mat& dst) 601 { 602 IppStatus status; 603 604 IppiSize srcRoiSize = {src.cols,src.rows}; 605 IppiSize tplRoiSize = {tpl.cols,tpl.rows}; 606 607 Ipp8u *pBuffer; 608 int bufSize=0; 609 610 int depth = src.depth(); 611 612 ippimatchTemplate ippFunc = 613 depth==CV_8U ? (ippimatchTemplate)ippiSqrDistanceNorm_8u32f_C1R: 614 depth==CV_32F? (ippimatchTemplate)ippiSqrDistanceNorm_32f_C1R: 0; 615 616 if (ippFunc==0) 617 return false; 618 619 IppEnum funCfg = (IppEnum)(ippAlgAuto | ippiNormNone | ippiROIValid); 620 621 status = ippiSqrDistanceNormGetBufferSize(srcRoiSize, tplRoiSize, funCfg, &bufSize); 622 if ( status < 0 ) 623 return false; 624 625 pBuffer = ippsMalloc_8u( bufSize ); 626 627 status = ippFunc(src.ptr(), (int)src.step, srcRoiSize, tpl.ptr(), (int)tpl.step, tplRoiSize, dst.ptr<Ipp32f>(), (int)dst.step, funCfg, pBuffer); 628 629 ippsFree( pBuffer ); 630 return status >= 0; 631 } 632 633 #endif 634 635 void crossCorr( const Mat& img, const Mat& _templ, Mat& corr, 636 Size corrsize, int ctype, 637 Point anchor, double delta, int borderType ) 638 { 639 const double blockScale = 4.5; 640 const int minBlockSize = 256; 641 std::vector<uchar> buf; 642 643 Mat templ = _templ; 644 int depth = img.depth(), cn = img.channels(); 645 int tdepth = templ.depth(), tcn = templ.channels(); 646 int cdepth = CV_MAT_DEPTH(ctype), ccn = CV_MAT_CN(ctype); 647 648 CV_Assert( img.dims <= 2 && templ.dims <= 2 && corr.dims <= 2 ); 649 650 if( depth != tdepth && tdepth != std::max(CV_32F, depth) ) 651 { 652 _templ.convertTo(templ, std::max(CV_32F, depth)); 653 tdepth = templ.depth(); 654 } 655 656 CV_Assert( depth == tdepth || tdepth == CV_32F); 657 CV_Assert( corrsize.height <= img.rows + templ.rows - 1 && 658 corrsize.width <= img.cols + templ.cols - 1 ); 659 660 CV_Assert( ccn == 1 || delta == 0 ); 661 662 corr.create(corrsize, ctype); 663 664 int maxDepth = depth > CV_8S ? CV_64F : std::max(std::max(CV_32F, tdepth), cdepth); 665 Size blocksize, dftsize; 666 667 blocksize.width = cvRound(templ.cols*blockScale); 668 blocksize.width = std::max( blocksize.width, minBlockSize - templ.cols + 1 ); 669 blocksize.width = std::min( blocksize.width, corr.cols ); 670 blocksize.height = cvRound(templ.rows*blockScale); 671 blocksize.height = std::max( blocksize.height, minBlockSize - templ.rows + 1 ); 672 blocksize.height = std::min( blocksize.height, corr.rows ); 673 674 dftsize.width = std::max(getOptimalDFTSize(blocksize.width + templ.cols - 1), 2); 675 dftsize.height = getOptimalDFTSize(blocksize.height + templ.rows - 1); 676 if( dftsize.width <= 0 || dftsize.height <= 0 ) 677 CV_Error( CV_StsOutOfRange, "the input arrays are too big" ); 678 679 // recompute block size 680 blocksize.width = dftsize.width - templ.cols + 1; 681 blocksize.width = MIN( blocksize.width, corr.cols ); 682 blocksize.height = dftsize.height - templ.rows + 1; 683 blocksize.height = MIN( blocksize.height, corr.rows ); 684 685 Mat dftTempl( dftsize.height*tcn, dftsize.width, maxDepth ); 686 Mat dftImg( dftsize, maxDepth ); 687 688 int i, k, bufSize = 0; 689 if( tcn > 1 && tdepth != maxDepth ) 690 bufSize = templ.cols*templ.rows*CV_ELEM_SIZE(tdepth); 691 692 if( cn > 1 && depth != maxDepth ) 693 bufSize = std::max( bufSize, (blocksize.width + templ.cols - 1)* 694 (blocksize.height + templ.rows - 1)*CV_ELEM_SIZE(depth)); 695 696 if( (ccn > 1 || cn > 1) && cdepth != maxDepth ) 697 bufSize = std::max( bufSize, blocksize.width*blocksize.height*CV_ELEM_SIZE(cdepth)); 698 699 buf.resize(bufSize); 700 701 // compute DFT of each template plane 702 for( k = 0; k < tcn; k++ ) 703 { 704 int yofs = k*dftsize.height; 705 Mat src = templ; 706 Mat dst(dftTempl, Rect(0, yofs, dftsize.width, dftsize.height)); 707 Mat dst1(dftTempl, Rect(0, yofs, templ.cols, templ.rows)); 708 709 if( tcn > 1 ) 710 { 711 src = tdepth == maxDepth ? dst1 : Mat(templ.size(), tdepth, &buf[0]); 712 int pairs[] = {k, 0}; 713 mixChannels(&templ, 1, &src, 1, pairs, 1); 714 } 715 716 if( dst1.data != src.data ) 717 src.convertTo(dst1, dst1.depth()); 718 719 if( dst.cols > templ.cols ) 720 { 721 Mat part(dst, Range(0, templ.rows), Range(templ.cols, dst.cols)); 722 part = Scalar::all(0); 723 } 724 dft(dst, dst, 0, templ.rows); 725 } 726 727 int tileCountX = (corr.cols + blocksize.width - 1)/blocksize.width; 728 int tileCountY = (corr.rows + blocksize.height - 1)/blocksize.height; 729 int tileCount = tileCountX * tileCountY; 730 731 Size wholeSize = img.size(); 732 Point roiofs(0,0); 733 Mat img0 = img; 734 735 if( !(borderType & BORDER_ISOLATED) ) 736 { 737 img.locateROI(wholeSize, roiofs); 738 img0.adjustROI(roiofs.y, wholeSize.height-img.rows-roiofs.y, 739 roiofs.x, wholeSize.width-img.cols-roiofs.x); 740 } 741 borderType |= BORDER_ISOLATED; 742 743 // calculate correlation by blocks 744 for( i = 0; i < tileCount; i++ ) 745 { 746 int x = (i%tileCountX)*blocksize.width; 747 int y = (i/tileCountX)*blocksize.height; 748 749 Size bsz(std::min(blocksize.width, corr.cols - x), 750 std::min(blocksize.height, corr.rows - y)); 751 Size dsz(bsz.width + templ.cols - 1, bsz.height + templ.rows - 1); 752 int x0 = x - anchor.x + roiofs.x, y0 = y - anchor.y + roiofs.y; 753 int x1 = std::max(0, x0), y1 = std::max(0, y0); 754 int x2 = std::min(img0.cols, x0 + dsz.width); 755 int y2 = std::min(img0.rows, y0 + dsz.height); 756 Mat src0(img0, Range(y1, y2), Range(x1, x2)); 757 Mat dst(dftImg, Rect(0, 0, dsz.width, dsz.height)); 758 Mat dst1(dftImg, Rect(x1-x0, y1-y0, x2-x1, y2-y1)); 759 Mat cdst(corr, Rect(x, y, bsz.width, bsz.height)); 760 761 for( k = 0; k < cn; k++ ) 762 { 763 Mat src = src0; 764 dftImg = Scalar::all(0); 765 766 if( cn > 1 ) 767 { 768 src = depth == maxDepth ? dst1 : Mat(y2-y1, x2-x1, depth, &buf[0]); 769 int pairs[] = {k, 0}; 770 mixChannels(&src0, 1, &src, 1, pairs, 1); 771 } 772 773 if( dst1.data != src.data ) 774 src.convertTo(dst1, dst1.depth()); 775 776 if( x2 - x1 < dsz.width || y2 - y1 < dsz.height ) 777 copyMakeBorder(dst1, dst, y1-y0, dst.rows-dst1.rows-(y1-y0), 778 x1-x0, dst.cols-dst1.cols-(x1-x0), borderType); 779 780 dft( dftImg, dftImg, 0, dsz.height ); 781 Mat dftTempl1(dftTempl, Rect(0, tcn > 1 ? k*dftsize.height : 0, 782 dftsize.width, dftsize.height)); 783 mulSpectrums(dftImg, dftTempl1, dftImg, 0, true); 784 dft( dftImg, dftImg, DFT_INVERSE + DFT_SCALE, bsz.height ); 785 786 src = dftImg(Rect(0, 0, bsz.width, bsz.height)); 787 788 if( ccn > 1 ) 789 { 790 if( cdepth != maxDepth ) 791 { 792 Mat plane(bsz, cdepth, &buf[0]); 793 src.convertTo(plane, cdepth, 1, delta); 794 src = plane; 795 } 796 int pairs[] = {0, k}; 797 mixChannels(&src, 1, &cdst, 1, pairs, 1); 798 } 799 else 800 { 801 if( k == 0 ) 802 src.convertTo(cdst, cdepth, 1, delta); 803 else 804 { 805 if( maxDepth != cdepth ) 806 { 807 Mat plane(bsz, cdepth, &buf[0]); 808 src.convertTo(plane, cdepth); 809 src = plane; 810 } 811 add(src, cdst, cdst); 812 } 813 } 814 } 815 } 816 } 817 818 static void matchTemplateMask( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask ) 819 { 820 int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 821 CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED ); 822 CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 ); 823 824 Mat img = _img.getMat(), templ = _templ.getMat(), mask = _mask.getMat(); 825 int ttype = templ.type(), tdepth = CV_MAT_DEPTH(ttype), tcn = CV_MAT_CN(ttype); 826 int mtype = img.type(), mdepth = CV_MAT_DEPTH(type), mcn = CV_MAT_CN(mtype); 827 828 if (depth == CV_8U) 829 { 830 depth = CV_32F; 831 type = CV_MAKETYPE(CV_32F, cn); 832 img.convertTo(img, type, 1.0 / 255); 833 } 834 835 if (tdepth == CV_8U) 836 { 837 tdepth = CV_32F; 838 ttype = CV_MAKETYPE(CV_32F, tcn); 839 templ.convertTo(templ, ttype, 1.0 / 255); 840 } 841 842 if (mdepth == CV_8U) 843 { 844 mdepth = CV_32F; 845 mtype = CV_MAKETYPE(CV_32F, mcn); 846 compare(mask, Scalar::all(0), mask, CMP_NE); 847 mask.convertTo(mask, mtype, 1.0 / 255); 848 } 849 850 Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1); 851 _result.create(corrSize, CV_32F); 852 Mat result = _result.getMat(); 853 854 Mat img2 = img.mul(img); 855 Mat mask2 = mask.mul(mask); 856 Mat mask_templ = templ.mul(mask); 857 Scalar templMean, templSdv; 858 859 double templSum2 = 0; 860 meanStdDev( mask_templ, templMean, templSdv ); 861 862 templSum2 = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3]; 863 templSum2 += templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3]; 864 templSum2 *= ((double)templ.rows * templ.cols); 865 866 if (method == CV_TM_SQDIFF) 867 { 868 Mat mask2_templ = templ.mul(mask2); 869 870 Mat corr(corrSize, CV_32F); 871 crossCorr( img, mask2_templ, corr, corr.size(), corr.type(), Point(0,0), 0, 0 ); 872 crossCorr( img2, mask, result, result.size(), result.type(), Point(0,0), 0, 0 ); 873 874 result -= corr * 2; 875 result += templSum2; 876 } 877 else if (method == CV_TM_CCORR_NORMED) 878 { 879 if (templSum2 < DBL_EPSILON) 880 { 881 result = Scalar::all(1); 882 return; 883 } 884 885 Mat corr(corrSize, CV_32F); 886 crossCorr( img2, mask2, corr, corr.size(), corr.type(), Point(0,0), 0, 0 ); 887 crossCorr( img, mask_templ, result, result.size(), result.type(), Point(0,0), 0, 0 ); 888 889 sqrt(corr, corr); 890 result = result.mul(1/corr); 891 result /= std::sqrt(templSum2); 892 } 893 else 894 CV_Error(Error::StsNotImplemented, ""); 895 } 896 } 897 898 //////////////////////////////////////////////////////////////////////////////////////////////////////// 899 900 void cv::matchTemplate( InputArray _img, InputArray _templ, OutputArray _result, int method, InputArray _mask ) 901 { 902 if (!_mask.empty()) 903 { 904 cv::matchTemplateMask(_img, _templ, _result, method, _mask); 905 return; 906 } 907 908 int type = _img.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 909 CV_Assert( CV_TM_SQDIFF <= method && method <= CV_TM_CCOEFF_NORMED ); 910 CV_Assert( (depth == CV_8U || depth == CV_32F) && type == _templ.type() && _img.dims() <= 2 ); 911 912 bool needswap = _img.size().height < _templ.size().height || _img.size().width < _templ.size().width; 913 if (needswap) 914 { 915 CV_Assert(_img.size().height <= _templ.size().height && _img.size().width <= _templ.size().width); 916 } 917 918 CV_OCL_RUN(_img.dims() <= 2 && _result.isUMat(), 919 (!needswap ? ocl_matchTemplate(_img, _templ, _result, method) : ocl_matchTemplate(_templ, _img, _result, method))) 920 921 int numType = method == CV_TM_CCORR || method == CV_TM_CCORR_NORMED ? 0 : 922 method == CV_TM_CCOEFF || method == CV_TM_CCOEFF_NORMED ? 1 : 2; 923 bool isNormed = method == CV_TM_CCORR_NORMED || 924 method == CV_TM_SQDIFF_NORMED || 925 method == CV_TM_CCOEFF_NORMED; 926 927 Mat img = _img.getMat(), templ = _templ.getMat(); 928 if (needswap) 929 std::swap(img, templ); 930 931 Size corrSize(img.cols - templ.cols + 1, img.rows - templ.rows + 1); 932 _result.create(corrSize, CV_32F); 933 Mat result = _result.getMat(); 934 935 #ifdef HAVE_TEGRA_OPTIMIZATION 936 if (tegra::useTegra() && tegra::matchTemplate(img, templ, result, method)) 937 return; 938 #endif 939 940 #if defined HAVE_IPP 941 bool useIppMT = false; 942 CV_IPP_CHECK() 943 { 944 useIppMT = (templ.rows < img.rows/2 && templ.cols < img.cols/2); 945 946 if (method == CV_TM_SQDIFF && cn == 1 && useIppMT) 947 { 948 if (ipp_sqrDistance(img, templ, result)) 949 { 950 CV_IMPL_ADD(CV_IMPL_IPP); 951 return; 952 } 953 setIppErrorStatus(); 954 } 955 } 956 #endif 957 958 #if defined HAVE_IPP 959 if (cn == 1 && useIppMT) 960 { 961 if (!ipp_crossCorr(img, templ, result)) 962 { 963 setIppErrorStatus(); 964 crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0); 965 } 966 else 967 { 968 CV_IMPL_ADD(CV_IMPL_IPP); 969 } 970 } 971 else 972 #endif 973 crossCorr( img, templ, result, result.size(), result.type(), Point(0,0), 0, 0); 974 975 if( method == CV_TM_CCORR ) 976 return; 977 978 double invArea = 1./((double)templ.rows * templ.cols); 979 980 Mat sum, sqsum; 981 Scalar templMean, templSdv; 982 double *q0 = 0, *q1 = 0, *q2 = 0, *q3 = 0; 983 double templNorm = 0, templSum2 = 0; 984 985 if( method == CV_TM_CCOEFF ) 986 { 987 integral(img, sum, CV_64F); 988 templMean = mean(templ); 989 } 990 else 991 { 992 integral(img, sum, sqsum, CV_64F); 993 meanStdDev( templ, templMean, templSdv ); 994 995 templNorm = templSdv[0]*templSdv[0] + templSdv[1]*templSdv[1] + templSdv[2]*templSdv[2] + templSdv[3]*templSdv[3]; 996 997 if( templNorm < DBL_EPSILON && method == CV_TM_CCOEFF_NORMED ) 998 { 999 result = Scalar::all(1); 1000 return; 1001 } 1002 1003 templSum2 = templNorm + templMean[0]*templMean[0] + templMean[1]*templMean[1] + templMean[2]*templMean[2] + templMean[3]*templMean[3]; 1004 1005 if( numType != 1 ) 1006 { 1007 templMean = Scalar::all(0); 1008 templNorm = templSum2; 1009 } 1010 1011 templSum2 /= invArea; 1012 templNorm = std::sqrt(templNorm); 1013 templNorm /= std::sqrt(invArea); // care of accuracy here 1014 1015 q0 = (double*)sqsum.data; 1016 q1 = q0 + templ.cols*cn; 1017 q2 = (double*)(sqsum.data + templ.rows*sqsum.step); 1018 q3 = q2 + templ.cols*cn; 1019 } 1020 1021 double* p0 = (double*)sum.data; 1022 double* p1 = p0 + templ.cols*cn; 1023 double* p2 = (double*)(sum.data + templ.rows*sum.step); 1024 double* p3 = p2 + templ.cols*cn; 1025 1026 int sumstep = sum.data ? (int)(sum.step / sizeof(double)) : 0; 1027 int sqstep = sqsum.data ? (int)(sqsum.step / sizeof(double)) : 0; 1028 1029 int i, j, k; 1030 1031 for( i = 0; i < result.rows; i++ ) 1032 { 1033 float* rrow = result.ptr<float>(i); 1034 int idx = i * sumstep; 1035 int idx2 = i * sqstep; 1036 1037 for( j = 0; j < result.cols; j++, idx += cn, idx2 += cn ) 1038 { 1039 double num = rrow[j], t; 1040 double wndMean2 = 0, wndSum2 = 0; 1041 1042 if( numType == 1 ) 1043 { 1044 for( k = 0; k < cn; k++ ) 1045 { 1046 t = p0[idx+k] - p1[idx+k] - p2[idx+k] + p3[idx+k]; 1047 wndMean2 += t*t; 1048 num -= t*templMean[k]; 1049 } 1050 1051 wndMean2 *= invArea; 1052 } 1053 1054 if( isNormed || numType == 2 ) 1055 { 1056 for( k = 0; k < cn; k++ ) 1057 { 1058 t = q0[idx2+k] - q1[idx2+k] - q2[idx2+k] + q3[idx2+k]; 1059 wndSum2 += t; 1060 } 1061 1062 if( numType == 2 ) 1063 { 1064 num = wndSum2 - 2*num + templSum2; 1065 num = MAX(num, 0.); 1066 } 1067 } 1068 1069 if( isNormed ) 1070 { 1071 t = std::sqrt(MAX(wndSum2 - wndMean2,0))*templNorm; 1072 if( fabs(num) < t ) 1073 num /= t; 1074 else if( fabs(num) < t*1.125 ) 1075 num = num > 0 ? 1 : -1; 1076 else 1077 num = method != CV_TM_SQDIFF_NORMED ? 0 : 1; 1078 } 1079 1080 rrow[j] = (float)num; 1081 } 1082 } 1083 } 1084 1085 1086 CV_IMPL void 1087 cvMatchTemplate( const CvArr* _img, const CvArr* _templ, CvArr* _result, int method ) 1088 { 1089 cv::Mat img = cv::cvarrToMat(_img), templ = cv::cvarrToMat(_templ), 1090 result = cv::cvarrToMat(_result); 1091 CV_Assert( result.size() == cv::Size(std::abs(img.cols - templ.cols) + 1, 1092 std::abs(img.rows - templ.rows) + 1) && 1093 result.type() == CV_32F ); 1094 matchTemplate(img, templ, result, method); 1095 } 1096 1097 /* End of file. */ 1098