1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #include "precomp.hpp" 45 #include "opencl_kernels_imgproc.hpp" 46 47 namespace cv 48 { 49 50 static void calcMinEigenVal( const Mat& _cov, Mat& _dst ) 51 { 52 int i, j; 53 Size size = _cov.size(); 54 #if CV_SSE 55 volatile bool simd = checkHardwareSupport(CV_CPU_SSE); 56 #endif 57 58 if( _cov.isContinuous() && _dst.isContinuous() ) 59 { 60 size.width *= size.height; 61 size.height = 1; 62 } 63 64 for( i = 0; i < size.height; i++ ) 65 { 66 const float* cov = _cov.ptr<float>(i); 67 float* dst = _dst.ptr<float>(i); 68 j = 0; 69 #if CV_SSE 70 if( simd ) 71 { 72 __m128 half = _mm_set1_ps(0.5f); 73 for( ; j <= size.width - 4; j += 4 ) 74 { 75 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x 76 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x 77 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x 78 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x 79 __m128 a, b, c, t; 80 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1 81 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x 82 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3 83 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3 84 a = _mm_movelh_ps(t, b); 85 b = _mm_movehl_ps(b, t); 86 a = _mm_mul_ps(a, half); 87 c = _mm_mul_ps(c, half); 88 t = _mm_sub_ps(a, c); 89 t = _mm_add_ps(_mm_mul_ps(t, t), _mm_mul_ps(b,b)); 90 a = _mm_sub_ps(_mm_add_ps(a, c), _mm_sqrt_ps(t)); 91 _mm_storeu_ps(dst + j, a); 92 } 93 } 94 #elif CV_NEON 95 float32x4_t v_half = vdupq_n_f32(0.5f); 96 for( ; j <= size.width - 4; j += 4 ) 97 { 98 float32x4x3_t v_src = vld3q_f32(cov + j * 3); 99 float32x4_t v_a = vmulq_f32(v_src.val[0], v_half); 100 float32x4_t v_b = v_src.val[1]; 101 float32x4_t v_c = vmulq_f32(v_src.val[2], v_half); 102 103 float32x4_t v_t = vsubq_f32(v_a, v_c); 104 v_t = vmlaq_f32(vmulq_f32(v_t, v_t), v_b, v_b); 105 vst1q_f32(dst + j, vsubq_f32(vaddq_f32(v_a, v_c), cv_vsqrtq_f32(v_t))); 106 } 107 #endif 108 for( ; j < size.width; j++ ) 109 { 110 float a = cov[j*3]*0.5f; 111 float b = cov[j*3+1]; 112 float c = cov[j*3+2]*0.5f; 113 dst[j] = (float)((a + c) - std::sqrt((a - c)*(a - c) + b*b)); 114 } 115 } 116 } 117 118 119 static void calcHarris( const Mat& _cov, Mat& _dst, double k ) 120 { 121 int i, j; 122 Size size = _cov.size(); 123 #if CV_SSE 124 volatile bool simd = checkHardwareSupport(CV_CPU_SSE); 125 #endif 126 127 if( _cov.isContinuous() && _dst.isContinuous() ) 128 { 129 size.width *= size.height; 130 size.height = 1; 131 } 132 133 for( i = 0; i < size.height; i++ ) 134 { 135 const float* cov = _cov.ptr<float>(i); 136 float* dst = _dst.ptr<float>(i); 137 j = 0; 138 139 #if CV_SSE 140 if( simd ) 141 { 142 __m128 k4 = _mm_set1_ps((float)k); 143 for( ; j <= size.width - 4; j += 4 ) 144 { 145 __m128 t0 = _mm_loadu_ps(cov + j*3); // a0 b0 c0 x 146 __m128 t1 = _mm_loadu_ps(cov + j*3 + 3); // a1 b1 c1 x 147 __m128 t2 = _mm_loadu_ps(cov + j*3 + 6); // a2 b2 c2 x 148 __m128 t3 = _mm_loadu_ps(cov + j*3 + 9); // a3 b3 c3 x 149 __m128 a, b, c, t; 150 t = _mm_unpacklo_ps(t0, t1); // a0 a1 b0 b1 151 c = _mm_unpackhi_ps(t0, t1); // c0 c1 x x 152 b = _mm_unpacklo_ps(t2, t3); // a2 a3 b2 b3 153 c = _mm_movelh_ps(c, _mm_unpackhi_ps(t2, t3)); // c0 c1 c2 c3 154 a = _mm_movelh_ps(t, b); 155 b = _mm_movehl_ps(b, t); 156 t = _mm_add_ps(a, c); 157 a = _mm_sub_ps(_mm_mul_ps(a, c), _mm_mul_ps(b, b)); 158 t = _mm_mul_ps(_mm_mul_ps(k4, t), t); 159 a = _mm_sub_ps(a, t); 160 _mm_storeu_ps(dst + j, a); 161 } 162 } 163 #elif CV_NEON 164 float32x4_t v_k = vdupq_n_f32((float)k); 165 166 for( ; j <= size.width - 4; j += 4 ) 167 { 168 float32x4x3_t v_src = vld3q_f32(cov + j * 3); 169 float32x4_t v_a = v_src.val[0], v_b = v_src.val[1], v_c = v_src.val[2]; 170 float32x4_t v_ac_bb = vmlsq_f32(vmulq_f32(v_a, v_c), v_b, v_b); 171 float32x4_t v_ac = vaddq_f32(v_a, v_c); 172 vst1q_f32(dst + j, vmlsq_f32(v_ac_bb, v_k, vmulq_f32(v_ac, v_ac))); 173 } 174 #endif 175 176 for( ; j < size.width; j++ ) 177 { 178 float a = cov[j*3]; 179 float b = cov[j*3+1]; 180 float c = cov[j*3+2]; 181 dst[j] = (float)(a*c - b*b - k*(a + c)*(a + c)); 182 } 183 } 184 } 185 186 187 static void eigen2x2( const float* cov, float* dst, int n ) 188 { 189 for( int j = 0; j < n; j++ ) 190 { 191 double a = cov[j*3]; 192 double b = cov[j*3+1]; 193 double c = cov[j*3+2]; 194 195 double u = (a + c)*0.5; 196 double v = std::sqrt((a - c)*(a - c)*0.25 + b*b); 197 double l1 = u + v; 198 double l2 = u - v; 199 200 double x = b; 201 double y = l1 - a; 202 double e = fabs(x); 203 204 if( e + fabs(y) < 1e-4 ) 205 { 206 y = b; 207 x = l1 - c; 208 e = fabs(x); 209 if( e + fabs(y) < 1e-4 ) 210 { 211 e = 1./(e + fabs(y) + FLT_EPSILON); 212 x *= e, y *= e; 213 } 214 } 215 216 double d = 1./std::sqrt(x*x + y*y + DBL_EPSILON); 217 dst[6*j] = (float)l1; 218 dst[6*j + 2] = (float)(x*d); 219 dst[6*j + 3] = (float)(y*d); 220 221 x = b; 222 y = l2 - a; 223 e = fabs(x); 224 225 if( e + fabs(y) < 1e-4 ) 226 { 227 y = b; 228 x = l2 - c; 229 e = fabs(x); 230 if( e + fabs(y) < 1e-4 ) 231 { 232 e = 1./(e + fabs(y) + FLT_EPSILON); 233 x *= e, y *= e; 234 } 235 } 236 237 d = 1./std::sqrt(x*x + y*y + DBL_EPSILON); 238 dst[6*j + 1] = (float)l2; 239 dst[6*j + 4] = (float)(x*d); 240 dst[6*j + 5] = (float)(y*d); 241 } 242 } 243 244 static void calcEigenValsVecs( const Mat& _cov, Mat& _dst ) 245 { 246 Size size = _cov.size(); 247 if( _cov.isContinuous() && _dst.isContinuous() ) 248 { 249 size.width *= size.height; 250 size.height = 1; 251 } 252 253 for( int i = 0; i < size.height; i++ ) 254 { 255 const float* cov = _cov.ptr<float>(i); 256 float* dst = _dst.ptr<float>(i); 257 258 eigen2x2(cov, dst, size.width); 259 } 260 } 261 262 263 enum { MINEIGENVAL=0, HARRIS=1, EIGENVALSVECS=2 }; 264 265 266 static void 267 cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size, 268 int aperture_size, int op_type, double k=0., 269 int borderType=BORDER_DEFAULT ) 270 { 271 #ifdef HAVE_TEGRA_OPTIMIZATION 272 if (tegra::useTegra() && tegra::cornerEigenValsVecs(src, eigenv, block_size, aperture_size, op_type, k, borderType)) 273 return; 274 #endif 275 #if CV_SSE2 276 bool haveSSE2 = checkHardwareSupport(CV_CPU_SSE2); 277 #endif 278 279 int depth = src.depth(); 280 double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size; 281 if( aperture_size < 0 ) 282 scale *= 2.0; 283 if( depth == CV_8U ) 284 scale *= 255.0; 285 scale = 1.0/scale; 286 287 CV_Assert( src.type() == CV_8UC1 || src.type() == CV_32FC1 ); 288 289 Mat Dx, Dy; 290 if( aperture_size > 0 ) 291 { 292 Sobel( src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType ); 293 Sobel( src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType ); 294 } 295 else 296 { 297 Scharr( src, Dx, CV_32F, 1, 0, scale, 0, borderType ); 298 Scharr( src, Dy, CV_32F, 0, 1, scale, 0, borderType ); 299 } 300 301 Size size = src.size(); 302 Mat cov( size, CV_32FC3 ); 303 int i, j; 304 305 for( i = 0; i < size.height; i++ ) 306 { 307 float* cov_data = cov.ptr<float>(i); 308 const float* dxdata = Dx.ptr<float>(i); 309 const float* dydata = Dy.ptr<float>(i); 310 j = 0; 311 312 #if CV_NEON 313 for( ; j <= size.width - 4; j += 4 ) 314 { 315 float32x4_t v_dx = vld1q_f32(dxdata + j); 316 float32x4_t v_dy = vld1q_f32(dydata + j); 317 318 float32x4x3_t v_dst; 319 v_dst.val[0] = vmulq_f32(v_dx, v_dx); 320 v_dst.val[1] = vmulq_f32(v_dx, v_dy); 321 v_dst.val[2] = vmulq_f32(v_dy, v_dy); 322 323 vst3q_f32(cov_data + j * 3, v_dst); 324 } 325 #elif CV_SSE2 326 if (haveSSE2) 327 { 328 for( ; j <= size.width - 8; j += 8 ) 329 { 330 __m128 v_dx_0 = _mm_loadu_ps(dxdata + j); 331 __m128 v_dx_1 = _mm_loadu_ps(dxdata + j + 4); 332 __m128 v_dy_0 = _mm_loadu_ps(dydata + j); 333 __m128 v_dy_1 = _mm_loadu_ps(dydata + j + 4); 334 335 __m128 v_dx2_0 = _mm_mul_ps(v_dx_0, v_dx_0); 336 __m128 v_dxy_0 = _mm_mul_ps(v_dx_0, v_dy_0); 337 __m128 v_dy2_0 = _mm_mul_ps(v_dy_0, v_dy_0); 338 __m128 v_dx2_1 = _mm_mul_ps(v_dx_1, v_dx_1); 339 __m128 v_dxy_1 = _mm_mul_ps(v_dx_1, v_dy_1); 340 __m128 v_dy2_1 = _mm_mul_ps(v_dy_1, v_dy_1); 341 342 _mm_interleave_ps(v_dx2_0, v_dx2_1, v_dxy_0, v_dxy_1, v_dy2_0, v_dy2_1); 343 344 _mm_storeu_ps(cov_data + j * 3, v_dx2_0); 345 _mm_storeu_ps(cov_data + j * 3 + 4, v_dx2_1); 346 _mm_storeu_ps(cov_data + j * 3 + 8, v_dxy_0); 347 _mm_storeu_ps(cov_data + j * 3 + 12, v_dxy_1); 348 _mm_storeu_ps(cov_data + j * 3 + 16, v_dy2_0); 349 _mm_storeu_ps(cov_data + j * 3 + 20, v_dy2_1); 350 } 351 } 352 #endif 353 354 for( ; j < size.width; j++ ) 355 { 356 float dx = dxdata[j]; 357 float dy = dydata[j]; 358 359 cov_data[j*3] = dx*dx; 360 cov_data[j*3+1] = dx*dy; 361 cov_data[j*3+2] = dy*dy; 362 } 363 } 364 365 boxFilter(cov, cov, cov.depth(), Size(block_size, block_size), 366 Point(-1,-1), false, borderType ); 367 368 if( op_type == MINEIGENVAL ) 369 calcMinEigenVal( cov, eigenv ); 370 else if( op_type == HARRIS ) 371 calcHarris( cov, eigenv, k ); 372 else if( op_type == EIGENVALSVECS ) 373 calcEigenValsVecs( cov, eigenv ); 374 } 375 376 #ifdef HAVE_OPENCL 377 378 static bool extractCovData(InputArray _src, UMat & Dx, UMat & Dy, int depth, 379 float scale, int aperture_size, int borderType) 380 { 381 UMat src = _src.getUMat(); 382 383 Size wholeSize; 384 Point ofs; 385 src.locateROI(wholeSize, ofs); 386 387 const int sobel_lsz = 16; 388 if ((aperture_size == 3 || aperture_size == 5 || aperture_size == 7 || aperture_size == -1) && 389 wholeSize.height > sobel_lsz + (aperture_size >> 1) && 390 wholeSize.width > sobel_lsz + (aperture_size >> 1)) 391 { 392 CV_Assert(depth == CV_8U || depth == CV_32F); 393 394 Dx.create(src.size(), CV_32FC1); 395 Dy.create(src.size(), CV_32FC1); 396 397 size_t localsize[2] = { sobel_lsz, sobel_lsz }; 398 size_t globalsize[2] = { localsize[0] * (1 + (src.cols - 1) / localsize[0]), 399 localsize[1] * (1 + (src.rows - 1) / localsize[1]) }; 400 401 int src_offset_x = (int)((src.offset % src.step) / src.elemSize()); 402 int src_offset_y = (int)(src.offset / src.step); 403 404 const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 405 "BORDER_WRAP", "BORDER_REFLECT101" }; 406 407 ocl::Kernel k(format("sobel%d", aperture_size).c_str(), ocl::imgproc::covardata_oclsrc, 408 cv::format("-D BLK_X=%d -D BLK_Y=%d -D %s -D SRCTYPE=%s%s", 409 (int)localsize[0], (int)localsize[1], borderTypes[borderType], ocl::typeToStr(depth), 410 aperture_size < 0 ? " -D SCHARR" : "")); 411 if (k.empty()) 412 return false; 413 414 k.args(ocl::KernelArg::PtrReadOnly(src), (int)src.step, src_offset_x, src_offset_y, 415 ocl::KernelArg::WriteOnlyNoSize(Dx), ocl::KernelArg::WriteOnly(Dy), 416 wholeSize.height, wholeSize.width, scale); 417 418 return k.run(2, globalsize, localsize, false); 419 } 420 else 421 { 422 if (aperture_size > 0) 423 { 424 Sobel(_src, Dx, CV_32F, 1, 0, aperture_size, scale, 0, borderType); 425 Sobel(_src, Dy, CV_32F, 0, 1, aperture_size, scale, 0, borderType); 426 } 427 else 428 { 429 Scharr(_src, Dx, CV_32F, 1, 0, scale, 0, borderType); 430 Scharr(_src, Dy, CV_32F, 0, 1, scale, 0, borderType); 431 } 432 } 433 434 return true; 435 } 436 437 static bool ocl_cornerMinEigenValVecs(InputArray _src, OutputArray _dst, int block_size, 438 int aperture_size, double k, int borderType, int op_type) 439 { 440 CV_Assert(op_type == HARRIS || op_type == MINEIGENVAL); 441 442 if ( !(borderType == BORDER_CONSTANT || borderType == BORDER_REPLICATE || 443 borderType == BORDER_REFLECT || borderType == BORDER_REFLECT_101) ) 444 return false; 445 446 int type = _src.type(), depth = CV_MAT_DEPTH(type); 447 if ( !(type == CV_8UC1 || type == CV_32FC1) ) 448 return false; 449 450 const char * const borderTypes[] = { "BORDER_CONSTANT", "BORDER_REPLICATE", "BORDER_REFLECT", 451 "BORDER_WRAP", "BORDER_REFLECT101" }; 452 const char * const cornerType[] = { "CORNER_MINEIGENVAL", "CORNER_HARRIS", 0 }; 453 454 455 double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size; 456 if (aperture_size < 0) 457 scale *= 2.0; 458 if (depth == CV_8U) 459 scale *= 255.0; 460 scale = 1.0 / scale; 461 462 UMat Dx, Dy; 463 if (!extractCovData(_src, Dx, Dy, depth, (float)scale, aperture_size, borderType)) 464 return false; 465 466 ocl::Kernel cornelKernel("corner", ocl::imgproc::corner_oclsrc, 467 format("-D anX=%d -D anY=%d -D ksX=%d -D ksY=%d -D %s -D %s", 468 block_size / 2, block_size / 2, block_size, block_size, 469 borderTypes[borderType], cornerType[op_type])); 470 if (cornelKernel.empty()) 471 return false; 472 473 _dst.createSameSize(_src, CV_32FC1); 474 UMat dst = _dst.getUMat(); 475 476 cornelKernel.args(ocl::KernelArg::ReadOnly(Dx), ocl::KernelArg::ReadOnly(Dy), 477 ocl::KernelArg::WriteOnly(dst), (float)k); 478 479 size_t blockSizeX = 256, blockSizeY = 1; 480 size_t gSize = blockSizeX - block_size / 2 * 2; 481 size_t globalSizeX = (Dx.cols) % gSize == 0 ? Dx.cols / gSize * blockSizeX : (Dx.cols / gSize + 1) * blockSizeX; 482 size_t rows_per_thread = 2; 483 size_t globalSizeY = ((Dx.rows + rows_per_thread - 1) / rows_per_thread) % blockSizeY == 0 ? 484 ((Dx.rows + rows_per_thread - 1) / rows_per_thread) : 485 (((Dx.rows + rows_per_thread - 1) / rows_per_thread) / blockSizeY + 1) * blockSizeY; 486 487 size_t globalsize[2] = { globalSizeX, globalSizeY }, localsize[2] = { blockSizeX, blockSizeY }; 488 return cornelKernel.run(2, globalsize, localsize, false); 489 } 490 491 static bool ocl_preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType, int depth ) 492 { 493 UMat Dx, Dy, D2x, D2y, Dxy; 494 495 if (!extractCovData(_src, Dx, Dy, depth, 1, ksize, borderType)) 496 return false; 497 498 Sobel( _src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType ); 499 Sobel( _src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType ); 500 Sobel( _src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType ); 501 502 _dst.create( _src.size(), CV_32FC1 ); 503 UMat dst = _dst.getUMat(); 504 505 double factor = 1 << (ksize - 1); 506 if( depth == CV_8U ) 507 factor *= 255; 508 factor = 1./(factor * factor * factor); 509 510 ocl::Kernel k("preCornerDetect", ocl::imgproc::precornerdetect_oclsrc); 511 if (k.empty()) 512 return false; 513 514 k.args(ocl::KernelArg::ReadOnlyNoSize(Dx), ocl::KernelArg::ReadOnlyNoSize(Dy), 515 ocl::KernelArg::ReadOnlyNoSize(D2x), ocl::KernelArg::ReadOnlyNoSize(D2y), 516 ocl::KernelArg::ReadOnlyNoSize(Dxy), ocl::KernelArg::WriteOnly(dst), (float)factor); 517 518 size_t globalsize[2] = { dst.cols, dst.rows }; 519 return k.run(2, globalsize, NULL, false); 520 } 521 522 #endif 523 524 } 525 526 void cv::cornerMinEigenVal( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType ) 527 { 528 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), 529 ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, 0.0, borderType, MINEIGENVAL)) 530 531 Mat src = _src.getMat(); 532 _dst.create( src.size(), CV_32FC1 ); 533 Mat dst = _dst.getMat(); 534 #if defined(HAVE_IPP) && (IPP_VERSION_MAJOR >= 8) 535 CV_IPP_CHECK() 536 { 537 typedef IppStatus (CV_STDCALL * ippiMinEigenValGetBufferSize)(IppiSize, int, int, int*); 538 typedef IppStatus (CV_STDCALL * ippiMinEigenVal)(const void*, int, Ipp32f*, int, IppiSize, IppiKernelType, int, int, Ipp8u*); 539 IppiKernelType kerType; 540 int kerSize = ksize; 541 if (ksize < 0) 542 { 543 kerType = ippKernelScharr; 544 kerSize = 3; 545 } else 546 { 547 kerType = ippKernelSobel; 548 } 549 bool isolated = (borderType & BORDER_ISOLATED) != 0; 550 int borderTypeNI = borderType & ~BORDER_ISOLATED; 551 if ((borderTypeNI == BORDER_REPLICATE && (!src.isSubmatrix() || isolated)) && 552 (kerSize == 3 || kerSize == 5) && (blockSize == 3 || blockSize == 5)) 553 { 554 ippiMinEigenValGetBufferSize getBufferSizeFunc = 0; 555 ippiMinEigenVal minEigenValFunc = 0; 556 float norm_coef = 0.f; 557 558 if (src.type() == CV_8UC1) 559 { 560 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_8u32f_C1R; 561 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_8u32f_C1R; 562 norm_coef = 1.f / 255.f; 563 } else if (src.type() == CV_32FC1) 564 { 565 getBufferSizeFunc = (ippiMinEigenValGetBufferSize) ippiMinEigenValGetBufferSize_32f_C1R; 566 minEigenValFunc = (ippiMinEigenVal) ippiMinEigenVal_32f_C1R; 567 norm_coef = 255.f; 568 } 569 norm_coef = kerType == ippKernelSobel ? norm_coef : norm_coef / 2.45f; 570 571 if (getBufferSizeFunc && minEigenValFunc) 572 { 573 int bufferSize; 574 IppiSize srcRoi = { src.cols, src.rows }; 575 IppStatus ok = getBufferSizeFunc(srcRoi, kerSize, blockSize, &bufferSize); 576 if (ok >= 0) 577 { 578 AutoBuffer<uchar> buffer(bufferSize); 579 ok = minEigenValFunc(src.ptr(), (int) src.step, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi, kerType, kerSize, blockSize, buffer); 580 CV_SUPPRESS_DEPRECATED_START 581 if (ok >= 0) ok = ippiMulC_32f_C1IR(norm_coef, dst.ptr<Ipp32f>(), (int) dst.step, srcRoi); 582 CV_SUPPRESS_DEPRECATED_END 583 if (ok >= 0) 584 { 585 CV_IMPL_ADD(CV_IMPL_IPP); 586 return; 587 } 588 } 589 setIppErrorStatus(); 590 } 591 } 592 } 593 #endif 594 cornerEigenValsVecs( src, dst, blockSize, ksize, MINEIGENVAL, 0, borderType ); 595 } 596 597 void cv::cornerHarris( InputArray _src, OutputArray _dst, int blockSize, int ksize, double k, int borderType ) 598 { 599 CV_OCL_RUN(_src.dims() <= 2 && _dst.isUMat(), 600 ocl_cornerMinEigenValVecs(_src, _dst, blockSize, ksize, k, borderType, HARRIS)) 601 602 Mat src = _src.getMat(); 603 _dst.create( src.size(), CV_32FC1 ); 604 Mat dst = _dst.getMat(); 605 606 #if IPP_VERSION_X100 >= 801 && 0 607 CV_IPP_CHECK() 608 { 609 int type = src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 610 int borderTypeNI = borderType & ~BORDER_ISOLATED; 611 bool isolated = (borderType & BORDER_ISOLATED) != 0; 612 613 if ( (ksize == 3 || ksize == 5) && (type == CV_8UC1 || type == CV_32FC1) && 614 (borderTypeNI == BORDER_CONSTANT || borderTypeNI == BORDER_REPLICATE) && cn == 1 && (!src.isSubmatrix() || isolated) ) 615 { 616 IppiSize roisize = { src.cols, src.rows }; 617 IppiMaskSize masksize = ksize == 5 ? ippMskSize5x5 : ippMskSize3x3; 618 IppDataType datatype = type == CV_8UC1 ? ipp8u : ipp32f; 619 Ipp32s bufsize = 0; 620 621 double scale = (double)(1 << ((ksize > 0 ? ksize : 3) - 1)) * blockSize; 622 if (ksize < 0) 623 scale *= 2.0; 624 if (depth == CV_8U) 625 scale *= 255.0; 626 scale = std::pow(scale, -4.0); 627 628 if (ippiHarrisCornerGetBufferSize(roisize, masksize, blockSize, datatype, cn, &bufsize) >= 0) 629 { 630 Ipp8u * buffer = ippsMalloc_8u(bufsize); 631 IppiDifferentialKernel filterType = ksize > 0 ? ippFilterSobel : ippFilterScharr; 632 IppiBorderType borderTypeIpp = borderTypeNI == BORDER_CONSTANT ? ippBorderConst : ippBorderRepl; 633 IppStatus status = (IppStatus)-1; 634 635 if (depth == CV_8U) 636 status = ippiHarrisCorner_8u32f_C1R((const Ipp8u *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize, 637 filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer); 638 else if (depth == CV_32F) 639 status = ippiHarrisCorner_32f_C1R((const Ipp32f *)src.data, (int)src.step, (Ipp32f *)dst.data, (int)dst.step, roisize, 640 filterType, masksize, blockSize, (Ipp32f)k, (Ipp32f)scale, borderTypeIpp, 0, buffer); 641 ippsFree(buffer); 642 643 if (status >= 0) 644 { 645 CV_IMPL_ADD(CV_IMPL_IPP); 646 return; 647 } 648 } 649 setIppErrorStatus(); 650 } 651 } 652 #endif 653 654 cornerEigenValsVecs( src, dst, blockSize, ksize, HARRIS, k, borderType ); 655 } 656 657 658 void cv::cornerEigenValsAndVecs( InputArray _src, OutputArray _dst, int blockSize, int ksize, int borderType ) 659 { 660 Mat src = _src.getMat(); 661 Size dsz = _dst.size(); 662 int dtype = _dst.type(); 663 664 if( dsz.height != src.rows || dsz.width*CV_MAT_CN(dtype) != src.cols*6 || CV_MAT_DEPTH(dtype) != CV_32F ) 665 _dst.create( src.size(), CV_32FC(6) ); 666 Mat dst = _dst.getMat(); 667 cornerEigenValsVecs( src, dst, blockSize, ksize, EIGENVALSVECS, 0, borderType ); 668 } 669 670 671 void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int borderType ) 672 { 673 int type = _src.type(); 674 CV_Assert( type == CV_8UC1 || type == CV_32FC1 ); 675 676 CV_OCL_RUN( _src.dims() <= 2 && _dst.isUMat(), 677 ocl_preCornerDetect(_src, _dst, ksize, borderType, CV_MAT_DEPTH(type))) 678 679 Mat Dx, Dy, D2x, D2y, Dxy, src = _src.getMat(); 680 _dst.create( src.size(), CV_32FC1 ); 681 Mat dst = _dst.getMat(); 682 683 Sobel( src, Dx, CV_32F, 1, 0, ksize, 1, 0, borderType ); 684 Sobel( src, Dy, CV_32F, 0, 1, ksize, 1, 0, borderType ); 685 Sobel( src, D2x, CV_32F, 2, 0, ksize, 1, 0, borderType ); 686 Sobel( src, D2y, CV_32F, 0, 2, ksize, 1, 0, borderType ); 687 Sobel( src, Dxy, CV_32F, 1, 1, ksize, 1, 0, borderType ); 688 689 double factor = 1 << (ksize - 1); 690 if( src.depth() == CV_8U ) 691 factor *= 255; 692 factor = 1./(factor * factor * factor); 693 #if CV_NEON || CV_SSE2 694 float factor_f = (float)factor; 695 #endif 696 697 #if CV_SSE2 698 volatile bool haveSSE2 = cv::checkHardwareSupport(CV_CPU_SSE2); 699 __m128 v_factor = _mm_set1_ps(factor_f), v_m2 = _mm_set1_ps(-2.0f); 700 #endif 701 702 Size size = src.size(); 703 int i, j; 704 for( i = 0; i < size.height; i++ ) 705 { 706 float* dstdata = dst.ptr<float>(i); 707 const float* dxdata = Dx.ptr<float>(i); 708 const float* dydata = Dy.ptr<float>(i); 709 const float* d2xdata = D2x.ptr<float>(i); 710 const float* d2ydata = D2y.ptr<float>(i); 711 const float* dxydata = Dxy.ptr<float>(i); 712 713 j = 0; 714 715 #if CV_SSE2 716 if (haveSSE2) 717 { 718 for( ; j <= size.width - 4; j += 4 ) 719 { 720 __m128 v_dx = _mm_loadu_ps((const float *)(dxdata + j)); 721 __m128 v_dy = _mm_loadu_ps((const float *)(dydata + j)); 722 723 __m128 v_s1 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dx), _mm_loadu_ps((const float *)(d2ydata + j))); 724 __m128 v_s2 = _mm_mul_ps(_mm_mul_ps(v_dy, v_dy), _mm_loadu_ps((const float *)(d2xdata + j))); 725 __m128 v_s3 = _mm_mul_ps(_mm_mul_ps(v_dx, v_dy), _mm_loadu_ps((const float *)(dxydata + j))); 726 v_s1 = _mm_mul_ps(v_factor, _mm_add_ps(v_s1, _mm_add_ps(v_s2, _mm_mul_ps(v_s3, v_m2)))); 727 _mm_storeu_ps(dstdata + j, v_s1); 728 } 729 } 730 #elif CV_NEON 731 for( ; j <= size.width - 4; j += 4 ) 732 { 733 float32x4_t v_dx = vld1q_f32(dxdata + j), v_dy = vld1q_f32(dydata + j); 734 float32x4_t v_s = vmulq_f32(v_dx, vmulq_f32(v_dx, vld1q_f32(d2ydata + j))); 735 v_s = vmlaq_f32(v_s, vld1q_f32(d2xdata + j), vmulq_f32(v_dy, v_dy)); 736 v_s = vmlaq_f32(v_s, vld1q_f32(dxydata + j), vmulq_n_f32(vmulq_f32(v_dy, v_dx), -2)); 737 vst1q_f32(dstdata + j, vmulq_n_f32(v_s, factor_f)); 738 } 739 #endif 740 741 for( ; j < size.width; j++ ) 742 { 743 float dx = dxdata[j]; 744 float dy = dydata[j]; 745 dstdata[j] = (float)(factor*(dx*dx*d2ydata[j] + dy*dy*d2xdata[j] - 2*dx*dy*dxydata[j])); 746 } 747 } 748 } 749 750 CV_IMPL void 751 cvCornerMinEigenVal( const CvArr* srcarr, CvArr* dstarr, 752 int block_size, int aperture_size ) 753 { 754 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 755 756 CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 ); 757 cv::cornerMinEigenVal( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE ); 758 } 759 760 CV_IMPL void 761 cvCornerHarris( const CvArr* srcarr, CvArr* dstarr, 762 int block_size, int aperture_size, double k ) 763 { 764 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 765 766 CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 ); 767 cv::cornerHarris( src, dst, block_size, aperture_size, k, cv::BORDER_REPLICATE ); 768 } 769 770 771 CV_IMPL void 772 cvCornerEigenValsAndVecs( const void* srcarr, void* dstarr, 773 int block_size, int aperture_size ) 774 { 775 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 776 777 CV_Assert( src.rows == dst.rows && src.cols*6 == dst.cols*dst.channels() && dst.depth() == CV_32F ); 778 cv::cornerEigenValsAndVecs( src, dst, block_size, aperture_size, cv::BORDER_REPLICATE ); 779 } 780 781 782 CV_IMPL void 783 cvPreCornerDetect( const void* srcarr, void* dstarr, int aperture_size ) 784 { 785 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 786 787 CV_Assert( src.size() == dst.size() && dst.type() == CV_32FC1 ); 788 cv::preCornerDetect( src, dst, aperture_size, cv::BORDER_REPLICATE ); 789 } 790 791 /* End of file */ 792