1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // Intel License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000, Intel Corporation, all rights reserved. 14 // Copyright (C) 2014, Itseez, Inc, all rights reserved. 15 // Third party copyrights are property of their respective owners. 16 // 17 // Redistribution and use in source and binary forms, with or without modification, 18 // are permitted provided that the following conditions are met: 19 // 20 // * Redistribution's of source code must retain the above copyright notice, 21 // this list of conditions and the following disclaimer. 22 // 23 // * Redistribution's in binary form must reproduce the above copyright notice, 24 // this list of conditions and the following disclaimer in the documentation 25 // and/or other materials provided with the distribution. 26 // 27 // * The name of Intel Corporation may not be used to endorse or promote products 28 // derived from this software without specific prior written permission. 29 // 30 // This software is provided by the copyright holders and contributors "as is" and 31 // any express or implied warranties, including, but not limited to, the implied 32 // warranties of merchantability and fitness for a particular purpose are disclaimed. 33 // In no event shall the Intel Corporation or contributors be liable for any direct, 34 // indirect, incidental, special, exemplary, or consequential damages 35 // (including, but not limited to, procurement of substitute goods or services; 36 // loss of use, data, or profits; or business interruption) however caused 37 // and on any theory of liability, whether in contract, strict liability, 38 // or tort (including negligence or otherwise) arising in any way out of 39 // the use of this software, even if advised of the possibility of such damage. 40 // 41 //M*/ 42 43 #include "precomp.hpp" 44 #include "opencl_kernels_imgproc.hpp" 45 46 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 47 static IppStatus sts = ippInit(); 48 #endif 49 50 /****************************************************************************************\ 51 Sobel & Scharr Derivative Filters 52 \****************************************************************************************/ 53 54 namespace cv 55 { 56 57 static void getScharrKernels( OutputArray _kx, OutputArray _ky, 58 int dx, int dy, bool normalize, int ktype ) 59 { 60 const int ksize = 3; 61 62 CV_Assert( ktype == CV_32F || ktype == CV_64F ); 63 _kx.create(ksize, 1, ktype, -1, true); 64 _ky.create(ksize, 1, ktype, -1, true); 65 Mat kx = _kx.getMat(); 66 Mat ky = _ky.getMat(); 67 68 CV_Assert( dx >= 0 && dy >= 0 && dx+dy == 1 ); 69 70 for( int k = 0; k < 2; k++ ) 71 { 72 Mat* kernel = k == 0 ? &kx : &ky; 73 int order = k == 0 ? dx : dy; 74 int kerI[3]; 75 76 if( order == 0 ) 77 kerI[0] = 3, kerI[1] = 10, kerI[2] = 3; 78 else if( order == 1 ) 79 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1; 80 81 Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]); 82 double scale = !normalize || order == 1 ? 1. : 1./32; 83 temp.convertTo(*kernel, ktype, scale); 84 } 85 } 86 87 88 static void getSobelKernels( OutputArray _kx, OutputArray _ky, 89 int dx, int dy, int _ksize, bool normalize, int ktype ) 90 { 91 int i, j, ksizeX = _ksize, ksizeY = _ksize; 92 if( ksizeX == 1 && dx > 0 ) 93 ksizeX = 3; 94 if( ksizeY == 1 && dy > 0 ) 95 ksizeY = 3; 96 97 CV_Assert( ktype == CV_32F || ktype == CV_64F ); 98 99 _kx.create(ksizeX, 1, ktype, -1, true); 100 _ky.create(ksizeY, 1, ktype, -1, true); 101 Mat kx = _kx.getMat(); 102 Mat ky = _ky.getMat(); 103 104 if( _ksize % 2 == 0 || _ksize > 31 ) 105 CV_Error( CV_StsOutOfRange, "The kernel size must be odd and not larger than 31" ); 106 std::vector<int> kerI(std::max(ksizeX, ksizeY) + 1); 107 108 CV_Assert( dx >= 0 && dy >= 0 && dx+dy > 0 ); 109 110 for( int k = 0; k < 2; k++ ) 111 { 112 Mat* kernel = k == 0 ? &kx : &ky; 113 int order = k == 0 ? dx : dy; 114 int ksize = k == 0 ? ksizeX : ksizeY; 115 116 CV_Assert( ksize > order ); 117 118 if( ksize == 1 ) 119 kerI[0] = 1; 120 else if( ksize == 3 ) 121 { 122 if( order == 0 ) 123 kerI[0] = 1, kerI[1] = 2, kerI[2] = 1; 124 else if( order == 1 ) 125 kerI[0] = -1, kerI[1] = 0, kerI[2] = 1; 126 else 127 kerI[0] = 1, kerI[1] = -2, kerI[2] = 1; 128 } 129 else 130 { 131 int oldval, newval; 132 kerI[0] = 1; 133 for( i = 0; i < ksize; i++ ) 134 kerI[i+1] = 0; 135 136 for( i = 0; i < ksize - order - 1; i++ ) 137 { 138 oldval = kerI[0]; 139 for( j = 1; j <= ksize; j++ ) 140 { 141 newval = kerI[j]+kerI[j-1]; 142 kerI[j-1] = oldval; 143 oldval = newval; 144 } 145 } 146 147 for( i = 0; i < order; i++ ) 148 { 149 oldval = -kerI[0]; 150 for( j = 1; j <= ksize; j++ ) 151 { 152 newval = kerI[j-1] - kerI[j]; 153 kerI[j-1] = oldval; 154 oldval = newval; 155 } 156 } 157 } 158 159 Mat temp(kernel->rows, kernel->cols, CV_32S, &kerI[0]); 160 double scale = !normalize ? 1. : 1./(1 << (ksize-order-1)); 161 temp.convertTo(*kernel, ktype, scale); 162 } 163 } 164 165 } 166 167 void cv::getDerivKernels( OutputArray kx, OutputArray ky, int dx, int dy, 168 int ksize, bool normalize, int ktype ) 169 { 170 if( ksize <= 0 ) 171 getScharrKernels( kx, ky, dx, dy, normalize, ktype ); 172 else 173 getSobelKernels( kx, ky, dx, dy, ksize, normalize, ktype ); 174 } 175 176 177 cv::Ptr<cv::FilterEngine> cv::createDerivFilter(int srcType, int dstType, 178 int dx, int dy, int ksize, int borderType ) 179 { 180 Mat kx, ky; 181 getDerivKernels( kx, ky, dx, dy, ksize, false, CV_32F ); 182 return createSeparableLinearFilter(srcType, dstType, 183 kx, ky, Point(-1,-1), 0, borderType ); 184 } 185 186 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 187 188 #define IPP_RETURN_ERROR {setIppErrorStatus(); return false;} 189 190 namespace cv 191 { 192 #if IPP_VERSION_X100 >= 801 193 static bool IPPDerivScharr(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, double scale, double delta, int borderType) 194 { 195 if ((0 > dx) || (0 > dy) || (1 != dx + dy)) 196 return false; 197 if (fabs(delta) > FLT_EPSILON) 198 return false; 199 200 IppiBorderType ippiBorderType = ippiGetBorderType(borderType & (~BORDER_ISOLATED)); 201 if ((int)ippiBorderType < 0) 202 return false; 203 204 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); 205 if (ddepth < 0) 206 ddepth = sdepth; 207 int dtype = CV_MAKETYPE(ddepth, cn); 208 209 Mat src = _src.getMat(); 210 if (0 == (BORDER_ISOLATED & borderType)) 211 { 212 Size size; Point offset; 213 src.locateROI(size, offset); 214 if (0 < offset.x) 215 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemLeft); 216 if (0 < offset.y) 217 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemTop); 218 if (offset.x + src.cols < size.width) 219 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemRight); 220 if (offset.y + src.rows < size.height) 221 ippiBorderType = (IppiBorderType)(ippiBorderType | ippBorderInMemBottom); 222 } 223 224 bool horz = (0 == dx) && (1 == dy); 225 IppiSize roiSize = {src.cols, src.rows}; 226 227 _dst.create( _src.size(), dtype); 228 Mat dst = _dst.getMat(); 229 IppStatus sts = ippStsErr; 230 if ((CV_8U == stype) && (CV_16S == dtype)) 231 { 232 int bufferSize = 0; Ipp8u *pBuffer; 233 if (horz) 234 { 235 if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp8u, ipp16s, 1, &bufferSize)) 236 IPP_RETURN_ERROR 237 pBuffer = ippsMalloc_8u(bufferSize); 238 if (NULL == pBuffer) 239 IPP_RETURN_ERROR 240 sts = ippiFilterScharrHorizMaskBorder_8u16s_C1R(src.ptr(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer); 241 } 242 else 243 { 244 if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp8u, ipp16s, 1, &bufferSize)) 245 IPP_RETURN_ERROR 246 pBuffer = ippsMalloc_8u(bufferSize); 247 if (NULL == pBuffer) 248 IPP_RETURN_ERROR 249 sts = ippiFilterScharrVertMaskBorder_8u16s_C1R(src.ptr(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer); 250 } 251 ippsFree(pBuffer); 252 } 253 else if ((CV_16S == stype) && (CV_16S == dtype)) 254 { 255 int bufferSize = 0; Ipp8u *pBuffer; 256 if (horz) 257 { 258 if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp16s, ipp16s, 1, &bufferSize)) 259 IPP_RETURN_ERROR 260 pBuffer = ippsMalloc_8u(bufferSize); 261 if (NULL == pBuffer) 262 IPP_RETURN_ERROR 263 sts = ippiFilterScharrHorizMaskBorder_16s_C1R(src.ptr<Ipp16s>(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer); 264 } 265 else 266 { 267 if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp16s, ipp16s, 1, &bufferSize)) 268 IPP_RETURN_ERROR 269 pBuffer = ippsMalloc_8u(bufferSize); 270 if (NULL == pBuffer) 271 IPP_RETURN_ERROR 272 sts = ippiFilterScharrVertMaskBorder_16s_C1R(src.ptr<Ipp16s>(), (int)src.step, dst.ptr<Ipp16s>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer); 273 } 274 ippsFree(pBuffer); 275 } 276 else if ((CV_32F == stype) && (CV_32F == dtype)) 277 { 278 int bufferSize = 0; Ipp8u *pBuffer; 279 if (horz) 280 { 281 if (0 > ippiFilterScharrHorizMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp32f, ipp32f, 1, &bufferSize)) 282 IPP_RETURN_ERROR 283 pBuffer = ippsMalloc_8u(bufferSize); 284 if (NULL == pBuffer) 285 IPP_RETURN_ERROR 286 sts = ippiFilterScharrHorizMaskBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, dst.ptr<Ipp32f>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer); 287 } 288 else 289 { 290 if (0 > ippiFilterScharrVertMaskBorderGetBufferSize(roiSize, ippMskSize3x3, ipp32f, ipp32f, 1, &bufferSize)) 291 IPP_RETURN_ERROR 292 pBuffer = ippsMalloc_8u(bufferSize); 293 if (NULL == pBuffer) 294 IPP_RETURN_ERROR 295 sts = ippiFilterScharrVertMaskBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, dst.ptr<Ipp32f>(), (int)dst.step, roiSize, ippMskSize3x3, ippiBorderType, 0, pBuffer); 296 } 297 ippsFree(pBuffer); 298 if (sts < 0) 299 IPP_RETURN_ERROR; 300 301 if (FLT_EPSILON < fabs(scale - 1.0)) 302 sts = ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, roiSize); 303 } 304 return (0 <= sts); 305 } 306 #elif IPP_VERSION_X100 >= 700 307 static bool IPPDerivScharr(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, double scale, double delta, int borderType) 308 { 309 if (BORDER_REPLICATE != borderType) 310 return false; 311 if ((0 > dx) || (0 > dy) || (1 != dx + dy)) 312 return false; 313 if (fabs(delta) > FLT_EPSILON) 314 return false; 315 316 Mat src = _src.getMat(), dst = _dst.getMat(); 317 318 int bufSize = 0; 319 cv::AutoBuffer<char> buffer; 320 IppiSize roi = ippiSize(src.cols, src.rows); 321 322 if( ddepth < 0 ) 323 ddepth = src.depth(); 324 325 dst.create( src.size(), CV_MAKETYPE(ddepth, src.channels()) ); 326 327 switch(src.type()) 328 { 329 case CV_8UC1: 330 { 331 if(scale != 1) 332 return false; 333 334 switch(dst.type()) 335 { 336 case CV_16S: 337 { 338 if ((dx == 1) && (dy == 0)) 339 { 340 if (0 > ippiFilterScharrVertGetBufferSize_8u16s_C1R(roi,&bufSize)) 341 return false; 342 buffer.allocate(bufSize); 343 return (0 <= ippiFilterScharrVertBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step, 344 dst.ptr<Ipp16s>(), (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); 345 } 346 if ((dx == 0) && (dy == 1)) 347 { 348 if (0 > ippiFilterScharrHorizGetBufferSize_8u16s_C1R(roi,&bufSize)) 349 return false; 350 buffer.allocate(bufSize); 351 return (0 <= ippiFilterScharrHorizBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step, 352 dst.ptr<Ipp16s>(), (int)dst.step, roi, ippBorderRepl, 0, (Ipp8u*)(char*)buffer)); 353 } 354 return false; 355 } 356 default: 357 return false; 358 } 359 } 360 case CV_32FC1: 361 { 362 switch(dst.type()) 363 { 364 case CV_32FC1: 365 { 366 if ((dx == 1) && (dy == 0)) 367 { 368 if (0 > ippiFilterScharrVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize)) 369 return false; 370 buffer.allocate(bufSize); 371 372 if (0 > ippiFilterScharrVertBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, 373 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), 374 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 375 { 376 return false; 377 } 378 379 if (scale != 1) 380 /* IPP is fast, so MulC produce very little perf degradation.*/ 381 //ippiMulC_32f_C1IR((Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); 382 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); 383 return true; 384 } 385 if ((dx == 0) && (dy == 1)) 386 { 387 if (0 > ippiFilterScharrHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows),&bufSize)) 388 return false; 389 buffer.allocate(bufSize); 390 391 if (0 > ippiFilterScharrHorizBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, 392 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), 393 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 394 return false; 395 396 if (scale != 1) 397 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); 398 return true; 399 } 400 } 401 default: 402 return false; 403 } 404 } 405 default: 406 return false; 407 } 408 } 409 #endif 410 411 static bool IPPDerivSobel(InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, int ksize, double scale, double delta, int borderType) 412 { 413 if ((borderType != BORDER_REPLICATE) || ((3 != ksize) && (5 != ksize))) 414 return false; 415 if (fabs(delta) > FLT_EPSILON) 416 return false; 417 if (1 != _src.channels()) 418 return false; 419 420 int bufSize = 0; 421 cv::AutoBuffer<char> buffer; 422 Mat src = _src.getMat(), dst = _dst.getMat(); 423 if ( ddepth < 0 ) 424 ddepth = src.depth(); 425 426 if (src.type() == CV_8U && dst.type() == CV_16S && scale == 1) 427 { 428 if ((dx == 1) && (dy == 0)) 429 { 430 if (0 > ippiFilterSobelNegVertGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) 431 IPP_RETURN_ERROR 432 buffer.allocate(bufSize); 433 434 if (0 > ippiFilterSobelNegVertBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step, 435 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 436 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 437 IPP_RETURN_ERROR 438 return true; 439 } 440 441 if ((dx == 0) && (dy == 1)) 442 { 443 if (0 > ippiFilterSobelHorizGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) 444 IPP_RETURN_ERROR 445 buffer.allocate(bufSize); 446 447 if (0 > ippiFilterSobelHorizBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step, 448 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 449 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 450 IPP_RETURN_ERROR 451 return true; 452 } 453 454 #if !defined(HAVE_IPP_ICV_ONLY) 455 if ((dx == 2) && (dy == 0)) 456 { 457 if (0 > ippiFilterSobelVertSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) 458 IPP_RETURN_ERROR 459 buffer.allocate(bufSize); 460 461 if (0 > ippiFilterSobelVertSecondBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step, 462 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 463 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 464 IPP_RETURN_ERROR 465 return true; 466 } 467 468 if ((dx == 0) && (dy == 2)) 469 { 470 if (0 > ippiFilterSobelHorizSecondGetBufferSize_8u16s_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) 471 IPP_RETURN_ERROR 472 buffer.allocate(bufSize); 473 474 if (0 > ippiFilterSobelHorizSecondBorder_8u16s_C1R(src.ptr<Ipp8u>(), (int)src.step, 475 dst.ptr<Ipp16s>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 476 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 477 IPP_RETURN_ERROR 478 return true; 479 } 480 #endif 481 } 482 483 if (src.type() == CV_32F && dst.type() == CV_32F) 484 { 485 #if 0 486 if ((dx == 1) && (dy == 0)) 487 { 488 if (0 > ippiFilterSobelNegVertGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), &bufSize)) 489 IPP_RETURN_ERROR 490 buffer.allocate(bufSize); 491 492 if (0 > ippiFilterSobelNegVertBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, 493 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 494 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 495 IPP_RETURN_ERROR 496 if(scale != 1) 497 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); 498 return true; 499 } 500 501 if ((dx == 0) && (dy == 1)) 502 { 503 if (0 > ippiFilterSobelHorizGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) 504 IPP_RETURN_ERROR 505 buffer.allocate(bufSize); 506 if (0 > ippiFilterSobelHorizBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, 507 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 508 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 509 IPP_RETURN_ERROR 510 if(scale != 1) 511 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); 512 return true; 513 } 514 #endif 515 #if !defined(HAVE_IPP_ICV_ONLY) 516 if((dx == 2) && (dy == 0)) 517 { 518 if (0 > ippiFilterSobelVertSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) 519 IPP_RETURN_ERROR 520 buffer.allocate(bufSize); 521 522 if (0 > ippiFilterSobelVertSecondBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, 523 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 524 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 525 IPP_RETURN_ERROR 526 if(scale != 1) 527 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); 528 return true; 529 } 530 531 if((dx == 0) && (dy == 2)) 532 { 533 if (0 > ippiFilterSobelHorizSecondGetBufferSize_32f_C1R(ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize),&bufSize)) 534 IPP_RETURN_ERROR 535 buffer.allocate(bufSize); 536 537 if (0 > ippiFilterSobelHorizSecondBorder_32f_C1R(src.ptr<Ipp32f>(), (int)src.step, 538 dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(src.cols, src.rows), (IppiMaskSize)(ksize*10+ksize), 539 ippBorderRepl, 0, (Ipp8u*)(char*)buffer)) 540 IPP_RETURN_ERROR 541 542 if(scale != 1) 543 ippiMulC_32f_C1R(dst.ptr<Ipp32f>(), (int)dst.step, (Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, ippiSize(dst.cols*dst.channels(), dst.rows)); 544 return true; 545 } 546 #endif 547 } 548 return false; 549 } 550 551 } 552 553 #endif 554 555 void cv::Sobel( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, 556 int ksize, double scale, double delta, int borderType ) 557 { 558 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); 559 if (ddepth < 0) 560 ddepth = sdepth; 561 int dtype = CV_MAKE_TYPE(ddepth, cn); 562 _dst.create( _src.size(), dtype ); 563 564 #ifdef HAVE_TEGRA_OPTIMIZATION 565 if (tegra::useTegra() && scale == 1.0 && delta == 0) 566 { 567 Mat src = _src.getMat(), dst = _dst.getMat(); 568 if (ksize == 3 && tegra::sobel3x3(src, dst, dx, dy, borderType)) 569 return; 570 if (ksize == -1 && tegra::scharr(src, dst, dx, dy, borderType)) 571 return; 572 } 573 #endif 574 575 #ifdef HAVE_IPP 576 CV_IPP_CHECK() 577 { 578 if (ksize < 0) 579 { 580 if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType)) 581 { 582 CV_IMPL_ADD(CV_IMPL_IPP); 583 return; 584 } 585 } 586 else if (0 < ksize) 587 { 588 if (IPPDerivSobel(_src, _dst, ddepth, dx, dy, ksize, scale, delta, borderType)) 589 { 590 CV_IMPL_ADD(CV_IMPL_IPP); 591 return; 592 } 593 } 594 } 595 #endif 596 int ktype = std::max(CV_32F, std::max(ddepth, sdepth)); 597 598 Mat kx, ky; 599 getDerivKernels( kx, ky, dx, dy, ksize, false, ktype ); 600 if( scale != 1 ) 601 { 602 // usually the smoothing part is the slowest to compute, 603 // so try to scale it instead of the faster differenciating part 604 if( dx == 0 ) 605 kx *= scale; 606 else 607 ky *= scale; 608 } 609 sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); 610 } 611 612 613 void cv::Scharr( InputArray _src, OutputArray _dst, int ddepth, int dx, int dy, 614 double scale, double delta, int borderType ) 615 { 616 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); 617 if (ddepth < 0) 618 ddepth = sdepth; 619 int dtype = CV_MAKETYPE(ddepth, cn); 620 _dst.create( _src.size(), dtype ); 621 622 #ifdef HAVE_TEGRA_OPTIMIZATION 623 if (tegra::useTegra() && scale == 1.0 && delta == 0) 624 { 625 Mat src = _src.getMat(), dst = _dst.getMat(); 626 if (tegra::scharr(src, dst, dx, dy, borderType)) 627 return; 628 } 629 #endif 630 631 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 632 CV_IPP_CHECK() 633 { 634 if (IPPDerivScharr(_src, _dst, ddepth, dx, dy, scale, delta, borderType)) 635 { 636 CV_IMPL_ADD(CV_IMPL_IPP); 637 return; 638 } 639 } 640 #endif 641 int ktype = std::max(CV_32F, std::max(ddepth, sdepth)); 642 643 Mat kx, ky; 644 getScharrKernels( kx, ky, dx, dy, false, ktype ); 645 if( scale != 1 ) 646 { 647 // usually the smoothing part is the slowest to compute, 648 // so try to scale it instead of the faster differenciating part 649 if( dx == 0 ) 650 kx *= scale; 651 else 652 ky *= scale; 653 } 654 sepFilter2D( _src, _dst, ddepth, kx, ky, Point(-1, -1), delta, borderType ); 655 } 656 657 #ifdef HAVE_OPENCL 658 659 namespace cv { 660 661 #define LAPLACIAN_LOCAL_MEM(tileX, tileY, ksize, elsize) (((tileX) + 2 * (int)((ksize) / 2)) * (3 * (tileY) + 2 * (int)((ksize) / 2)) * elsize) 662 663 static bool ocl_Laplacian5(InputArray _src, OutputArray _dst, 664 const Mat & kd, const Mat & ks, double scale, double delta, 665 int borderType, int depth, int ddepth) 666 { 667 const size_t tileSizeX = 16; 668 const size_t tileSizeYmin = 8; 669 670 const ocl::Device dev = ocl::Device::getDefault(); 671 672 int stype = _src.type(); 673 int sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype), esz = CV_ELEM_SIZE(stype); 674 675 bool doubleSupport = dev.doubleFPConfig() > 0; 676 if (!doubleSupport && (sdepth == CV_64F || ddepth == CV_64F)) 677 return false; 678 679 Mat kernelX = kd.reshape(1, 1); 680 if (kernelX.cols % 2 != 1) 681 return false; 682 Mat kernelY = ks.reshape(1, 1); 683 if (kernelY.cols % 2 != 1) 684 return false; 685 CV_Assert(kernelX.cols == kernelY.cols); 686 687 size_t wgs = dev.maxWorkGroupSize(); 688 size_t lmsz = dev.localMemSize(); 689 size_t src_step = _src.step(), src_offset = _src.offset(); 690 const size_t tileSizeYmax = wgs / tileSizeX; 691 692 // workaround for Nvidia: 3 channel vector type takes 4*elem_size in local memory 693 int loc_mem_cn = dev.vendorID() == ocl::Device::VENDOR_NVIDIA && cn == 3 ? 4 : cn; 694 695 if (((src_offset % src_step) % esz == 0) && 696 ( 697 (borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE) || 698 ((borderType == BORDER_REFLECT || borderType == BORDER_WRAP || borderType == BORDER_REFLECT_101) && 699 (_src.cols() >= (int) (kernelX.cols + tileSizeX) && _src.rows() >= (int) (kernelY.cols + tileSizeYmax))) 700 ) && 701 (tileSizeX * tileSizeYmin <= wgs) && 702 (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeYmin, kernelX.cols, loc_mem_cn * 4) <= lmsz) 703 ) 704 { 705 Size size = _src.size(), wholeSize; 706 Point origin; 707 int dtype = CV_MAKE_TYPE(ddepth, cn); 708 int wdepth = CV_32F; 709 710 size_t tileSizeY = tileSizeYmax; 711 while ((tileSizeX * tileSizeY > wgs) || (LAPLACIAN_LOCAL_MEM(tileSizeX, tileSizeY, kernelX.cols, loc_mem_cn * 4) > lmsz)) 712 { 713 tileSizeY /= 2; 714 } 715 size_t lt2[2] = { tileSizeX, tileSizeY}; 716 size_t gt2[2] = { lt2[0] * (1 + (size.width - 1) / lt2[0]), lt2[1] }; 717 718 char cvt[2][40]; 719 const char * const borderMap[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", "BORDER_WRAP", 720 "BORDER_REFLECT_101" }; 721 722 String opts = cv::format("-D BLK_X=%d -D BLK_Y=%d -D RADIUS=%d%s%s" 723 " -D convertToWT=%s -D convertToDT=%s" 724 " -D %s -D srcT1=%s -D dstT1=%s -D WT1=%s" 725 " -D srcT=%s -D dstT=%s -D WT=%s" 726 " -D CN=%d ", 727 (int)lt2[0], (int)lt2[1], kernelX.cols / 2, 728 ocl::kernelToStr(kernelX, wdepth, "KERNEL_MATRIX_X").c_str(), 729 ocl::kernelToStr(kernelY, wdepth, "KERNEL_MATRIX_Y").c_str(), 730 ocl::convertTypeStr(sdepth, wdepth, cn, cvt[0]), 731 ocl::convertTypeStr(wdepth, ddepth, cn, cvt[1]), 732 borderMap[borderType], 733 ocl::typeToStr(sdepth), ocl::typeToStr(ddepth), ocl::typeToStr(wdepth), 734 ocl::typeToStr(CV_MAKETYPE(sdepth, cn)), 735 ocl::typeToStr(CV_MAKETYPE(ddepth, cn)), 736 ocl::typeToStr(CV_MAKETYPE(wdepth, cn)), 737 cn); 738 739 ocl::Kernel k("laplacian", ocl::imgproc::laplacian5_oclsrc, opts); 740 if (k.empty()) 741 return false; 742 UMat src = _src.getUMat(); 743 _dst.create(size, dtype); 744 UMat dst = _dst.getUMat(); 745 746 int src_offset_x = static_cast<int>((src_offset % src_step) / esz); 747 int src_offset_y = static_cast<int>(src_offset / src_step); 748 749 src.locateROI(wholeSize, origin); 750 751 k.args(ocl::KernelArg::PtrReadOnly(src), (int)src_step, src_offset_x, src_offset_y, 752 wholeSize.height, wholeSize.width, ocl::KernelArg::WriteOnly(dst), 753 static_cast<float>(scale), static_cast<float>(delta)); 754 755 return k.run(2, gt2, lt2, false); 756 } 757 int iscale = cvRound(scale), idelta = cvRound(delta); 758 bool floatCoeff = std::fabs(delta - idelta) > DBL_EPSILON || std::fabs(scale - iscale) > DBL_EPSILON; 759 int wdepth = std::max(depth, floatCoeff ? CV_32F : CV_32S), kercn = 1; 760 761 if (!doubleSupport && wdepth == CV_64F) 762 return false; 763 764 char cvt[2][40]; 765 ocl::Kernel k("sumConvert", ocl::imgproc::laplacian5_oclsrc, 766 format("-D ONLY_SUM_CONVERT " 767 "-D srcT=%s -D WT=%s -D dstT=%s -D coeffT=%s -D wdepth=%d " 768 "-D convertToWT=%s -D convertToDT=%s%s", 769 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), 770 ocl::typeToStr(CV_MAKE_TYPE(wdepth, kercn)), 771 ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), 772 ocl::typeToStr(wdepth), wdepth, 773 ocl::convertTypeStr(depth, wdepth, kercn, cvt[0]), 774 ocl::convertTypeStr(wdepth, ddepth, kercn, cvt[1]), 775 doubleSupport ? " -D DOUBLE_SUPPORT" : "")); 776 if (k.empty()) 777 return false; 778 779 UMat d2x, d2y; 780 sepFilter2D(_src, d2x, depth, kd, ks, Point(-1, -1), 0, borderType); 781 sepFilter2D(_src, d2y, depth, ks, kd, Point(-1, -1), 0, borderType); 782 783 UMat dst = _dst.getUMat(); 784 785 ocl::KernelArg d2xarg = ocl::KernelArg::ReadOnlyNoSize(d2x), 786 d2yarg = ocl::KernelArg::ReadOnlyNoSize(d2y), 787 dstarg = ocl::KernelArg::WriteOnly(dst, cn, kercn); 788 789 if (wdepth >= CV_32F) 790 k.args(d2xarg, d2yarg, dstarg, (float)scale, (float)delta); 791 else 792 k.args(d2xarg, d2yarg, dstarg, iscale, idelta); 793 794 size_t globalsize[] = { dst.cols * cn / kercn, dst.rows }; 795 return k.run(2, globalsize, NULL, false); 796 } 797 798 } 799 800 #endif 801 802 void cv::Laplacian( InputArray _src, OutputArray _dst, int ddepth, int ksize, 803 double scale, double delta, int borderType ) 804 { 805 int stype = _src.type(), sdepth = CV_MAT_DEPTH(stype), cn = CV_MAT_CN(stype); 806 if (ddepth < 0) 807 ddepth = sdepth; 808 _dst.create( _src.size(), CV_MAKETYPE(ddepth, cn) ); 809 810 #ifdef HAVE_IPP 811 CV_IPP_CHECK() 812 { 813 if ((ksize == 3 || ksize == 5) && ((borderType & BORDER_ISOLATED) != 0 || !_src.isSubmatrix()) && 814 ((stype == CV_8UC1 && ddepth == CV_16S) || (ddepth == CV_32F && stype == CV_32FC1)) && !ocl::useOpenCL()) 815 { 816 int iscale = saturate_cast<int>(scale), idelta = saturate_cast<int>(delta); 817 bool floatScale = std::fabs(scale - iscale) > DBL_EPSILON, needScale = iscale != 1; 818 bool floatDelta = std::fabs(delta - idelta) > DBL_EPSILON, needDelta = delta != 0; 819 int borderTypeNI = borderType & ~BORDER_ISOLATED; 820 Mat src = _src.getMat(), dst = _dst.getMat(); 821 822 if (src.data != dst.data) 823 { 824 Ipp32s bufsize; 825 IppStatus status = (IppStatus)-1; 826 IppiSize roisize = { src.cols, src.rows }; 827 IppiMaskSize masksize = ksize == 3 ? ippMskSize3x3 : ippMskSize5x5; 828 IppiBorderType borderTypeIpp = ippiGetBorderType(borderTypeNI); 829 830 #define IPP_FILTER_LAPLACIAN(ippsrctype, ippdsttype, ippfavor) \ 831 do \ 832 { \ 833 if (borderTypeIpp >= 0 && ippiFilterLaplacianGetBufferSize_##ippfavor##_C1R(roisize, masksize, &bufsize) >= 0) \ 834 { \ 835 Ipp8u * buffer = ippsMalloc_8u(bufsize); \ 836 status = ippiFilterLaplacianBorder_##ippfavor##_C1R(src.ptr<ippsrctype>(), (int)src.step, dst.ptr<ippdsttype>(), \ 837 (int)dst.step, roisize, masksize, borderTypeIpp, 0, buffer); \ 838 ippsFree(buffer); \ 839 } \ 840 } while ((void)0, 0) 841 842 CV_SUPPRESS_DEPRECATED_START 843 if (sdepth == CV_8U && ddepth == CV_16S && !floatScale && !floatDelta) 844 { 845 IPP_FILTER_LAPLACIAN(Ipp8u, Ipp16s, 8u16s); 846 847 if (needScale && status >= 0) 848 status = ippiMulC_16s_C1IRSfs((Ipp16s)iscale, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0); 849 if (needDelta && status >= 0) 850 status = ippiAddC_16s_C1IRSfs((Ipp16s)idelta, dst.ptr<Ipp16s>(), (int)dst.step, roisize, 0); 851 } 852 else if (sdepth == CV_32F && ddepth == CV_32F) 853 { 854 IPP_FILTER_LAPLACIAN(Ipp32f, Ipp32f, 32f); 855 856 if (needScale && status >= 0) 857 status = ippiMulC_32f_C1IR((Ipp32f)scale, dst.ptr<Ipp32f>(), (int)dst.step, roisize); 858 if (needDelta && status >= 0) 859 status = ippiAddC_32f_C1IR((Ipp32f)delta, dst.ptr<Ipp32f>(), (int)dst.step, roisize); 860 } 861 CV_SUPPRESS_DEPRECATED_END 862 863 if (status >= 0) 864 { 865 CV_IMPL_ADD(CV_IMPL_IPP); 866 return; 867 } 868 setIppErrorStatus(); 869 } 870 } 871 #undef IPP_FILTER_LAPLACIAN 872 } 873 #endif 874 875 #ifdef HAVE_TEGRA_OPTIMIZATION 876 if (tegra::useTegra() && scale == 1.0 && delta == 0) 877 { 878 Mat src = _src.getMat(), dst = _dst.getMat(); 879 if (ksize == 1 && tegra::laplace1(src, dst, borderType)) 880 return; 881 if (ksize == 3 && tegra::laplace3(src, dst, borderType)) 882 return; 883 if (ksize == 5 && tegra::laplace5(src, dst, borderType)) 884 return; 885 } 886 #endif 887 888 if( ksize == 1 || ksize == 3 ) 889 { 890 float K[2][9] = 891 { 892 { 0, 1, 0, 1, -4, 1, 0, 1, 0 }, 893 { 2, 0, 2, 0, -8, 0, 2, 0, 2 } 894 }; 895 Mat kernel(3, 3, CV_32F, K[ksize == 3]); 896 if( scale != 1 ) 897 kernel *= scale; 898 filter2D( _src, _dst, ddepth, kernel, Point(-1, -1), delta, borderType ); 899 } 900 else 901 { 902 int ktype = std::max(CV_32F, std::max(ddepth, sdepth)); 903 int wdepth = sdepth == CV_8U && ksize <= 5 ? CV_16S : sdepth <= CV_32F ? CV_32F : CV_64F; 904 int wtype = CV_MAKETYPE(wdepth, cn); 905 Mat kd, ks; 906 getSobelKernels( kd, ks, 2, 0, ksize, false, ktype ); 907 908 CV_OCL_RUN(_dst.isUMat(), 909 ocl_Laplacian5(_src, _dst, kd, ks, scale, 910 delta, borderType, wdepth, ddepth)) 911 912 const size_t STRIPE_SIZE = 1 << 14; 913 Ptr<FilterEngine> fx = createSeparableLinearFilter(stype, 914 wtype, kd, ks, Point(-1,-1), 0, borderType, borderType, Scalar() ); 915 Ptr<FilterEngine> fy = createSeparableLinearFilter(stype, 916 wtype, ks, kd, Point(-1,-1), 0, borderType, borderType, Scalar() ); 917 918 Mat src = _src.getMat(), dst = _dst.getMat(); 919 int y = fx->start(src), dsty = 0, dy = 0; 920 fy->start(src); 921 const uchar* sptr = src.ptr(y); 922 923 int dy0 = std::min(std::max((int)(STRIPE_SIZE/(CV_ELEM_SIZE(stype)*src.cols)), 1), src.rows); 924 Mat d2x( dy0 + kd.rows - 1, src.cols, wtype ); 925 Mat d2y( dy0 + kd.rows - 1, src.cols, wtype ); 926 927 for( ; dsty < src.rows; sptr += dy0*src.step, dsty += dy ) 928 { 929 fx->proceed( sptr, (int)src.step, dy0, d2x.ptr(), (int)d2x.step ); 930 dy = fy->proceed( sptr, (int)src.step, dy0, d2y.ptr(), (int)d2y.step ); 931 if( dy > 0 ) 932 { 933 Mat dstripe = dst.rowRange(dsty, dsty + dy); 934 d2x.rows = d2y.rows = dy; // modify the headers, which should work 935 d2x += d2y; 936 d2x.convertTo( dstripe, ddepth, scale, delta ); 937 } 938 } 939 } 940 } 941 942 ///////////////////////////////////////////////////////////////////////////////////////// 943 944 CV_IMPL void 945 cvSobel( const void* srcarr, void* dstarr, int dx, int dy, int aperture_size ) 946 { 947 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 948 949 CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() ); 950 951 cv::Sobel( src, dst, dst.depth(), dx, dy, aperture_size, 1, 0, cv::BORDER_REPLICATE ); 952 if( CV_IS_IMAGE(srcarr) && ((IplImage*)srcarr)->origin && dy % 2 != 0 ) 953 dst *= -1; 954 } 955 956 957 CV_IMPL void 958 cvLaplace( const void* srcarr, void* dstarr, int aperture_size ) 959 { 960 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 961 962 CV_Assert( src.size() == dst.size() && src.channels() == dst.channels() ); 963 964 cv::Laplacian( src, dst, dst.depth(), aperture_size, 1, 0, cv::BORDER_REPLICATE ); 965 } 966 967 /* End of file. */ 968