1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000-2008, Intel Corporation, all rights reserved. 14 // Copyright (C) 2009-2011, Willow Garage Inc., all rights reserved. 15 // Copyright (C) 2014-2015, Itseez Inc., all rights reserved. 16 // Third party copyrights are property of their respective owners. 17 // 18 // Redistribution and use in source and binary forms, with or without modification, 19 // are permitted provided that the following conditions are met: 20 // 21 // * Redistribution's of source code must retain the above copyright notice, 22 // this list of conditions and the following disclaimer. 23 // 24 // * Redistribution's in binary form must reproduce the above copyright notice, 25 // this list of conditions and the following disclaimer in the documentation 26 // and/or other materials provided with the distribution. 27 // 28 // * The name of the copyright holders may not be used to endorse or promote products 29 // derived from this software without specific prior written permission. 30 // 31 // This software is provided by the copyright holders and contributors "as is" and 32 // any express or implied warranties, including, but not limited to, the implied 33 // warranties of merchantability and fitness for a particular purpose are disclaimed. 34 // In no event shall the Intel Corporation or contributors be liable for any direct, 35 // indirect, incidental, special, exemplary, or consequential damages 36 // (including, but not limited to, procurement of substitute goods or services; 37 // loss of use, data, or profits; or business interruption) however caused 38 // and on any theory of liability, whether in contract, strict liability, 39 // or tort (including negligence or otherwise) arising in any way out of 40 // the use of this software, even if advised of the possibility of such damage. 41 // 42 //M*/ 43 44 #include "precomp.hpp" 45 #include <climits> 46 #include <limits> 47 48 #include "opencl_kernels_core.hpp" 49 50 namespace cv 51 { 52 53 template<typename T> static inline Scalar rawToScalar(const T& v) 54 { 55 Scalar s; 56 typedef typename DataType<T>::channel_type T1; 57 int i, n = DataType<T>::channels; 58 for( i = 0; i < n; i++ ) 59 s.val[i] = ((T1*)&v)[i]; 60 return s; 61 } 62 63 /****************************************************************************************\ 64 * sum * 65 \****************************************************************************************/ 66 67 template <typename T, typename ST> 68 struct Sum_SIMD 69 { 70 int operator () (const T *, const uchar *, ST *, int, int) const 71 { 72 return 0; 73 } 74 }; 75 76 #if CV_SSE2 77 78 template <> 79 struct Sum_SIMD<schar, int> 80 { 81 int operator () (const schar * src0, const uchar * mask, int * dst, int len, int cn) const 82 { 83 if (mask || (cn != 1 && cn != 2 && cn != 4) || !USE_SSE2) 84 return 0; 85 86 int x = 0; 87 __m128i v_zero = _mm_setzero_si128(), v_sum = v_zero; 88 89 for ( ; x <= len - 16; x += 16) 90 { 91 __m128i v_src = _mm_loadu_si128((const __m128i *)(src0 + x)); 92 __m128i v_half = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src), 8); 93 94 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_half), 16)); 95 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_half), 16)); 96 97 v_half = _mm_srai_epi16(_mm_unpackhi_epi8(v_zero, v_src), 8); 98 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_half), 16)); 99 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_half), 16)); 100 } 101 102 for ( ; x <= len - 8; x += 8) 103 { 104 __m128i v_src = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, _mm_loadl_epi64((__m128i const *)(src0 + x))), 8); 105 106 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src), 16)); 107 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src), 16)); 108 } 109 110 int CV_DECL_ALIGNED(16) ar[4]; 111 _mm_store_si128((__m128i*)ar, v_sum); 112 113 for (int i = 0; i < 4; i += cn) 114 for (int j = 0; j < cn; ++j) 115 dst[j] += ar[j + i]; 116 117 return x / cn; 118 } 119 }; 120 121 template <> 122 struct Sum_SIMD<int, double> 123 { 124 int operator () (const int * src0, const uchar * mask, double * dst, int len, int cn) const 125 { 126 if (mask || (cn != 1 && cn != 2 && cn != 4) || !USE_SSE2) 127 return 0; 128 129 int x = 0; 130 __m128d v_zero = _mm_setzero_pd(), v_sum0 = v_zero, v_sum1 = v_zero; 131 132 for ( ; x <= len - 4; x += 4) 133 { 134 __m128i v_src = _mm_loadu_si128((__m128i const *)(src0 + x)); 135 v_sum0 = _mm_add_pd(v_sum0, _mm_cvtepi32_pd(v_src)); 136 v_sum1 = _mm_add_pd(v_sum1, _mm_cvtepi32_pd(_mm_srli_si128(v_src, 8))); 137 } 138 139 double CV_DECL_ALIGNED(16) ar[4]; 140 _mm_store_pd(ar, v_sum0); 141 _mm_store_pd(ar + 2, v_sum1); 142 143 for (int i = 0; i < 4; i += cn) 144 for (int j = 0; j < cn; ++j) 145 dst[j] += ar[j + i]; 146 147 return x / cn; 148 } 149 }; 150 151 template <> 152 struct Sum_SIMD<float, double> 153 { 154 int operator () (const float * src0, const uchar * mask, double * dst, int len, int cn) const 155 { 156 if (mask || (cn != 1 && cn != 2 && cn != 4) || !USE_SSE2) 157 return 0; 158 159 int x = 0; 160 __m128d v_zero = _mm_setzero_pd(), v_sum0 = v_zero, v_sum1 = v_zero; 161 162 for ( ; x <= len - 4; x += 4) 163 { 164 __m128 v_src = _mm_loadu_ps(src0 + x); 165 v_sum0 = _mm_add_pd(v_sum0, _mm_cvtps_pd(v_src)); 166 v_src = _mm_castsi128_ps(_mm_srli_si128(_mm_castps_si128(v_src), 8)); 167 v_sum1 = _mm_add_pd(v_sum1, _mm_cvtps_pd(v_src)); 168 } 169 170 double CV_DECL_ALIGNED(16) ar[4]; 171 _mm_store_pd(ar, v_sum0); 172 _mm_store_pd(ar + 2, v_sum1); 173 174 for (int i = 0; i < 4; i += cn) 175 for (int j = 0; j < cn; ++j) 176 dst[j] += ar[j + i]; 177 178 return x / cn; 179 } 180 }; 181 182 183 #elif CV_NEON 184 185 template <> 186 struct Sum_SIMD<uchar, int> 187 { 188 int operator () (const uchar * src0, const uchar * mask, int * dst, int len, int cn) const 189 { 190 if (mask || (cn != 1 && cn != 2 && cn != 4)) 191 return 0; 192 193 int x = 0; 194 uint32x4_t v_sum = vdupq_n_u32(0u); 195 196 for ( ; x <= len - 16; x += 16) 197 { 198 uint8x16_t v_src = vld1q_u8(src0 + x); 199 uint16x8_t v_half = vmovl_u8(vget_low_u8(v_src)); 200 201 v_sum = vaddw_u16(v_sum, vget_low_u16(v_half)); 202 v_sum = vaddw_u16(v_sum, vget_high_u16(v_half)); 203 204 v_half = vmovl_u8(vget_high_u8(v_src)); 205 v_sum = vaddw_u16(v_sum, vget_low_u16(v_half)); 206 v_sum = vaddw_u16(v_sum, vget_high_u16(v_half)); 207 } 208 209 for ( ; x <= len - 8; x += 8) 210 { 211 uint16x8_t v_src = vmovl_u8(vld1_u8(src0 + x)); 212 213 v_sum = vaddw_u16(v_sum, vget_low_u16(v_src)); 214 v_sum = vaddw_u16(v_sum, vget_high_u16(v_src)); 215 } 216 217 unsigned int CV_DECL_ALIGNED(16) ar[4]; 218 vst1q_u32(ar, v_sum); 219 220 for (int i = 0; i < 4; i += cn) 221 for (int j = 0; j < cn; ++j) 222 dst[j] += ar[j + i]; 223 224 return x / cn; 225 } 226 }; 227 228 template <> 229 struct Sum_SIMD<schar, int> 230 { 231 int operator () (const schar * src0, const uchar * mask, int * dst, int len, int cn) const 232 { 233 if (mask || (cn != 1 && cn != 2 && cn != 4)) 234 return 0; 235 236 int x = 0; 237 int32x4_t v_sum = vdupq_n_s32(0); 238 239 for ( ; x <= len - 16; x += 16) 240 { 241 int8x16_t v_src = vld1q_s8(src0 + x); 242 int16x8_t v_half = vmovl_s8(vget_low_s8(v_src)); 243 244 v_sum = vaddw_s16(v_sum, vget_low_s16(v_half)); 245 v_sum = vaddw_s16(v_sum, vget_high_s16(v_half)); 246 247 v_half = vmovl_s8(vget_high_s8(v_src)); 248 v_sum = vaddw_s16(v_sum, vget_low_s16(v_half)); 249 v_sum = vaddw_s16(v_sum, vget_high_s16(v_half)); 250 } 251 252 for ( ; x <= len - 8; x += 8) 253 { 254 int16x8_t v_src = vmovl_s8(vld1_s8(src0 + x)); 255 256 v_sum = vaddw_s16(v_sum, vget_low_s16(v_src)); 257 v_sum = vaddw_s16(v_sum, vget_high_s16(v_src)); 258 } 259 260 int CV_DECL_ALIGNED(16) ar[4]; 261 vst1q_s32(ar, v_sum); 262 263 for (int i = 0; i < 4; i += cn) 264 for (int j = 0; j < cn; ++j) 265 dst[j] += ar[j + i]; 266 267 return x / cn; 268 } 269 }; 270 271 template <> 272 struct Sum_SIMD<ushort, int> 273 { 274 int operator () (const ushort * src0, const uchar * mask, int * dst, int len, int cn) const 275 { 276 if (mask || (cn != 1 && cn != 2 && cn != 4)) 277 return 0; 278 279 int x = 0; 280 uint32x4_t v_sum = vdupq_n_u32(0u); 281 282 for ( ; x <= len - 8; x += 8) 283 { 284 uint16x8_t v_src = vld1q_u16(src0 + x); 285 286 v_sum = vaddw_u16(v_sum, vget_low_u16(v_src)); 287 v_sum = vaddw_u16(v_sum, vget_high_u16(v_src)); 288 } 289 290 for ( ; x <= len - 4; x += 4) 291 v_sum = vaddw_u16(v_sum, vld1_u16(src0 + x)); 292 293 unsigned int CV_DECL_ALIGNED(16) ar[4]; 294 vst1q_u32(ar, v_sum); 295 296 for (int i = 0; i < 4; i += cn) 297 for (int j = 0; j < cn; ++j) 298 dst[j] += ar[j + i]; 299 300 return x / cn; 301 } 302 }; 303 304 template <> 305 struct Sum_SIMD<short, int> 306 { 307 int operator () (const short * src0, const uchar * mask, int * dst, int len, int cn) const 308 { 309 if (mask || (cn != 1 && cn != 2 && cn != 4)) 310 return 0; 311 312 int x = 0; 313 int32x4_t v_sum = vdupq_n_s32(0u); 314 315 for ( ; x <= len - 8; x += 8) 316 { 317 int16x8_t v_src = vld1q_s16(src0 + x); 318 319 v_sum = vaddw_s16(v_sum, vget_low_s16(v_src)); 320 v_sum = vaddw_s16(v_sum, vget_high_s16(v_src)); 321 } 322 323 for ( ; x <= len - 4; x += 4) 324 v_sum = vaddw_s16(v_sum, vld1_s16(src0 + x)); 325 326 int CV_DECL_ALIGNED(16) ar[4]; 327 vst1q_s32(ar, v_sum); 328 329 for (int i = 0; i < 4; i += cn) 330 for (int j = 0; j < cn; ++j) 331 dst[j] += ar[j + i]; 332 333 return x / cn; 334 } 335 }; 336 337 #endif 338 339 template<typename T, typename ST> 340 static int sum_(const T* src0, const uchar* mask, ST* dst, int len, int cn ) 341 { 342 const T* src = src0; 343 if( !mask ) 344 { 345 Sum_SIMD<T, ST> vop; 346 int i = vop(src0, mask, dst, len, cn), k = cn % 4; 347 src += i * cn; 348 349 if( k == 1 ) 350 { 351 ST s0 = dst[0]; 352 353 #if CV_ENABLE_UNROLLED 354 for(; i <= len - 4; i += 4, src += cn*4 ) 355 s0 += src[0] + src[cn] + src[cn*2] + src[cn*3]; 356 #endif 357 for( ; i < len; i++, src += cn ) 358 s0 += src[0]; 359 dst[0] = s0; 360 } 361 else if( k == 2 ) 362 { 363 ST s0 = dst[0], s1 = dst[1]; 364 for( ; i < len; i++, src += cn ) 365 { 366 s0 += src[0]; 367 s1 += src[1]; 368 } 369 dst[0] = s0; 370 dst[1] = s1; 371 } 372 else if( k == 3 ) 373 { 374 ST s0 = dst[0], s1 = dst[1], s2 = dst[2]; 375 for( ; i < len; i++, src += cn ) 376 { 377 s0 += src[0]; 378 s1 += src[1]; 379 s2 += src[2]; 380 } 381 dst[0] = s0; 382 dst[1] = s1; 383 dst[2] = s2; 384 } 385 386 for( ; k < cn; k += 4 ) 387 { 388 src = src0 + i*cn + k; 389 ST s0 = dst[k], s1 = dst[k+1], s2 = dst[k+2], s3 = dst[k+3]; 390 for( ; i < len; i++, src += cn ) 391 { 392 s0 += src[0]; s1 += src[1]; 393 s2 += src[2]; s3 += src[3]; 394 } 395 dst[k] = s0; 396 dst[k+1] = s1; 397 dst[k+2] = s2; 398 dst[k+3] = s3; 399 } 400 return len; 401 } 402 403 int i, nzm = 0; 404 if( cn == 1 ) 405 { 406 ST s = dst[0]; 407 for( i = 0; i < len; i++ ) 408 if( mask[i] ) 409 { 410 s += src[i]; 411 nzm++; 412 } 413 dst[0] = s; 414 } 415 else if( cn == 3 ) 416 { 417 ST s0 = dst[0], s1 = dst[1], s2 = dst[2]; 418 for( i = 0; i < len; i++, src += 3 ) 419 if( mask[i] ) 420 { 421 s0 += src[0]; 422 s1 += src[1]; 423 s2 += src[2]; 424 nzm++; 425 } 426 dst[0] = s0; 427 dst[1] = s1; 428 dst[2] = s2; 429 } 430 else 431 { 432 for( i = 0; i < len; i++, src += cn ) 433 if( mask[i] ) 434 { 435 int k = 0; 436 #if CV_ENABLE_UNROLLED 437 for( ; k <= cn - 4; k += 4 ) 438 { 439 ST s0, s1; 440 s0 = dst[k] + src[k]; 441 s1 = dst[k+1] + src[k+1]; 442 dst[k] = s0; dst[k+1] = s1; 443 s0 = dst[k+2] + src[k+2]; 444 s1 = dst[k+3] + src[k+3]; 445 dst[k+2] = s0; dst[k+3] = s1; 446 } 447 #endif 448 for( ; k < cn; k++ ) 449 dst[k] += src[k]; 450 nzm++; 451 } 452 } 453 return nzm; 454 } 455 456 457 static int sum8u( const uchar* src, const uchar* mask, int* dst, int len, int cn ) 458 { return sum_(src, mask, dst, len, cn); } 459 460 static int sum8s( const schar* src, const uchar* mask, int* dst, int len, int cn ) 461 { return sum_(src, mask, dst, len, cn); } 462 463 static int sum16u( const ushort* src, const uchar* mask, int* dst, int len, int cn ) 464 { return sum_(src, mask, dst, len, cn); } 465 466 static int sum16s( const short* src, const uchar* mask, int* dst, int len, int cn ) 467 { return sum_(src, mask, dst, len, cn); } 468 469 static int sum32s( const int* src, const uchar* mask, double* dst, int len, int cn ) 470 { return sum_(src, mask, dst, len, cn); } 471 472 static int sum32f( const float* src, const uchar* mask, double* dst, int len, int cn ) 473 { return sum_(src, mask, dst, len, cn); } 474 475 static int sum64f( const double* src, const uchar* mask, double* dst, int len, int cn ) 476 { return sum_(src, mask, dst, len, cn); } 477 478 typedef int (*SumFunc)(const uchar*, const uchar* mask, uchar*, int, int); 479 480 static SumFunc getSumFunc(int depth) 481 { 482 static SumFunc sumTab[] = 483 { 484 (SumFunc)GET_OPTIMIZED(sum8u), (SumFunc)sum8s, 485 (SumFunc)sum16u, (SumFunc)sum16s, 486 (SumFunc)sum32s, 487 (SumFunc)GET_OPTIMIZED(sum32f), (SumFunc)sum64f, 488 0 489 }; 490 491 return sumTab[depth]; 492 } 493 494 template<typename T> 495 static int countNonZero_(const T* src, int len ) 496 { 497 int i=0, nz = 0; 498 #if CV_ENABLE_UNROLLED 499 for(; i <= len - 4; i += 4 ) 500 nz += (src[i] != 0) + (src[i+1] != 0) + (src[i+2] != 0) + (src[i+3] != 0); 501 #endif 502 for( ; i < len; i++ ) 503 nz += src[i] != 0; 504 return nz; 505 } 506 507 static int countNonZero8u( const uchar* src, int len ) 508 { 509 int i=0, nz = 0; 510 #if CV_SSE2 511 if(USE_SSE2)//5x-6x 512 { 513 __m128i v_zero = _mm_setzero_si128(); 514 __m128i sum = _mm_setzero_si128(); 515 516 for (; i<=len-16; i+=16) 517 { 518 __m128i r0 = _mm_loadu_si128((const __m128i*)(src+i)); 519 sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi8(r0, v_zero)), v_zero)); 520 } 521 nz = i - _mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))); 522 } 523 #elif CV_NEON 524 int len0 = len & -16, blockSize1 = (1 << 8) - 16, blockSize0 = blockSize1 << 6; 525 uint32x4_t v_nz = vdupq_n_u32(0u); 526 uint8x16_t v_zero = vdupq_n_u8(0), v_1 = vdupq_n_u8(1); 527 const uchar * src0 = src; 528 529 while( i < len0 ) 530 { 531 int blockSizei = std::min(len0 - i, blockSize0), j = 0; 532 533 while (j < blockSizei) 534 { 535 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0; 536 uint8x16_t v_pz = v_zero; 537 538 for( ; k <= blockSizej - 16; k += 16 ) 539 v_pz = vaddq_u8(v_pz, vandq_u8(vceqq_u8(vld1q_u8(src0 + k), v_zero), v_1)); 540 541 uint16x8_t v_p1 = vmovl_u8(vget_low_u8(v_pz)), v_p2 = vmovl_u8(vget_high_u8(v_pz)); 542 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p1), vget_high_u16(v_p1)), v_nz); 543 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_p2), vget_high_u16(v_p2)), v_nz); 544 545 src0 += blockSizej; 546 j += blockSizej; 547 } 548 549 i += blockSizei; 550 } 551 552 CV_DECL_ALIGNED(16) unsigned int buf[4]; 553 vst1q_u32(buf, v_nz); 554 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]); 555 #endif 556 for( ; i < len; i++ ) 557 nz += src[i] != 0; 558 return nz; 559 } 560 561 static int countNonZero16u( const ushort* src, int len ) 562 { 563 int i = 0, nz = 0; 564 #if CV_SSE2 565 if (USE_SSE2) 566 { 567 __m128i v_zero = _mm_setzero_si128 (); 568 __m128i sum = _mm_setzero_si128(); 569 570 for ( ; i <= len - 8; i += 8) 571 { 572 __m128i r0 = _mm_loadu_si128((const __m128i*)(src + i)); 573 sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi16(r0, v_zero)), v_zero)); 574 } 575 576 nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 1); 577 src += i; 578 } 579 #elif CV_NEON 580 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6; 581 uint32x4_t v_nz = vdupq_n_u32(0u); 582 uint16x8_t v_zero = vdupq_n_u16(0), v_1 = vdupq_n_u16(1); 583 584 while( i < len0 ) 585 { 586 int blockSizei = std::min(len0 - i, blockSize0), j = 0; 587 588 while (j < blockSizei) 589 { 590 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0; 591 uint16x8_t v_pz = v_zero; 592 593 for( ; k <= blockSizej - 8; k += 8 ) 594 v_pz = vaddq_u16(v_pz, vandq_u16(vceqq_u16(vld1q_u16(src + k), v_zero), v_1)); 595 596 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz); 597 598 src += blockSizej; 599 j += blockSizej; 600 } 601 602 i += blockSizei; 603 } 604 605 CV_DECL_ALIGNED(16) unsigned int buf[4]; 606 vst1q_u32(buf, v_nz); 607 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]); 608 #endif 609 return nz + countNonZero_(src, len - i); 610 } 611 612 static int countNonZero32s( const int* src, int len ) 613 { 614 int i = 0, nz = 0; 615 #if CV_SSE2 616 if (USE_SSE2) 617 { 618 __m128i v_zero = _mm_setzero_si128 (); 619 __m128i sum = _mm_setzero_si128(); 620 621 for ( ; i <= len - 4; i += 4) 622 { 623 __m128i r0 = _mm_loadu_si128((const __m128i*)(src + i)); 624 sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_cmpeq_epi32(r0, v_zero)), v_zero)); 625 } 626 627 nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2); 628 src += i; 629 } 630 #elif CV_NEON 631 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6; 632 uint32x4_t v_nz = vdupq_n_u32(0u); 633 int32x4_t v_zero = vdupq_n_s32(0.0f); 634 uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u); 635 636 while( i < len0 ) 637 { 638 int blockSizei = std::min(len0 - i, blockSize0), j = 0; 639 640 while (j < blockSizei) 641 { 642 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0; 643 uint16x8_t v_pz = v_zerou; 644 645 for( ; k <= blockSizej - 8; k += 8 ) 646 v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_s32(vld1q_s32(src + k), v_zero)), 647 vmovn_u32(vceqq_s32(vld1q_s32(src + k + 4), v_zero))), v_1)); 648 649 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz); 650 651 src += blockSizej; 652 j += blockSizej; 653 } 654 655 i += blockSizei; 656 } 657 658 CV_DECL_ALIGNED(16) unsigned int buf[4]; 659 vst1q_u32(buf, v_nz); 660 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]); 661 #endif 662 return nz + countNonZero_(src, len - i); 663 } 664 665 static int countNonZero32f( const float* src, int len ) 666 { 667 int i = 0, nz = 0; 668 #if CV_SSE2 669 if (USE_SSE2) 670 { 671 __m128 v_zero_f = _mm_setzero_ps(); 672 __m128i v_zero = _mm_setzero_si128 (); 673 __m128i sum = _mm_setzero_si128(); 674 675 for ( ; i <= len - 4; i += 4) 676 { 677 __m128 r0 = _mm_loadu_ps(src + i); 678 sum = _mm_add_epi32(sum, _mm_sad_epu8(_mm_sub_epi8(v_zero, _mm_castps_si128(_mm_cmpeq_ps(r0, v_zero_f))), v_zero)); 679 } 680 681 nz = i - (_mm_cvtsi128_si32(_mm_add_epi32(sum, _mm_unpackhi_epi64(sum, sum))) >> 2); 682 src += i; 683 } 684 #elif CV_NEON 685 int len0 = len & -8, blockSize1 = (1 << 15), blockSize0 = blockSize1 << 6; 686 uint32x4_t v_nz = vdupq_n_u32(0u); 687 float32x4_t v_zero = vdupq_n_f32(0.0f); 688 uint16x8_t v_1 = vdupq_n_u16(1u), v_zerou = vdupq_n_u16(0u); 689 690 while( i < len0 ) 691 { 692 int blockSizei = std::min(len0 - i, blockSize0), j = 0; 693 694 while (j < blockSizei) 695 { 696 int blockSizej = std::min(blockSizei - j, blockSize1), k = 0; 697 uint16x8_t v_pz = v_zerou; 698 699 for( ; k <= blockSizej - 8; k += 8 ) 700 v_pz = vaddq_u16(v_pz, vandq_u16(vcombine_u16(vmovn_u32(vceqq_f32(vld1q_f32(src + k), v_zero)), 701 vmovn_u32(vceqq_f32(vld1q_f32(src + k + 4), v_zero))), v_1)); 702 703 v_nz = vaddq_u32(vaddl_u16(vget_low_u16(v_pz), vget_high_u16(v_pz)), v_nz); 704 705 src += blockSizej; 706 j += blockSizej; 707 } 708 709 i += blockSizei; 710 } 711 712 CV_DECL_ALIGNED(16) unsigned int buf[4]; 713 vst1q_u32(buf, v_nz); 714 nz += i - saturate_cast<int>(buf[0] + buf[1] + buf[2] + buf[3]); 715 #endif 716 return nz + countNonZero_(src, len - i); 717 } 718 719 static int countNonZero64f( const double* src, int len ) 720 { 721 return countNonZero_(src, len); 722 } 723 724 typedef int (*CountNonZeroFunc)(const uchar*, int); 725 726 static CountNonZeroFunc getCountNonZeroTab(int depth) 727 { 728 static CountNonZeroFunc countNonZeroTab[] = 729 { 730 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero8u), 731 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero16u), 732 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32s), (CountNonZeroFunc)GET_OPTIMIZED(countNonZero32f), 733 (CountNonZeroFunc)GET_OPTIMIZED(countNonZero64f), 0 734 }; 735 736 return countNonZeroTab[depth]; 737 } 738 739 template <typename T, typename ST, typename SQT> 740 struct SumSqr_SIMD 741 { 742 int operator () (const T *, const uchar *, ST *, SQT *, int, int) const 743 { 744 return 0; 745 } 746 }; 747 748 #if CV_SSE2 749 750 template <> 751 struct SumSqr_SIMD<uchar, int, int> 752 { 753 int operator () (const uchar * src0, const uchar * mask, int * sum, int * sqsum, int len, int cn) const 754 { 755 if (mask || (cn != 1 && cn != 2) || !USE_SSE2) 756 return 0; 757 758 int x = 0; 759 __m128i v_zero = _mm_setzero_si128(), v_sum = v_zero, v_sqsum = v_zero; 760 761 for ( ; x <= len - 16; x += 16) 762 { 763 __m128i v_src = _mm_loadu_si128((const __m128i *)(src0 + x)); 764 __m128i v_half = _mm_unpacklo_epi8(v_src, v_zero); 765 766 __m128i v_mullo = _mm_mullo_epi16(v_half, v_half); 767 __m128i v_mulhi = _mm_mulhi_epi16(v_half, v_half); 768 v_sum = _mm_add_epi32(v_sum, _mm_unpacklo_epi16(v_half, v_zero)); 769 v_sum = _mm_add_epi32(v_sum, _mm_unpackhi_epi16(v_half, v_zero)); 770 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpacklo_epi16(v_mullo, v_mulhi)); 771 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpackhi_epi16(v_mullo, v_mulhi)); 772 773 v_half = _mm_unpackhi_epi8(v_src, v_zero); 774 v_mullo = _mm_mullo_epi16(v_half, v_half); 775 v_mulhi = _mm_mulhi_epi16(v_half, v_half); 776 v_sum = _mm_add_epi32(v_sum, _mm_unpacklo_epi16(v_half, v_zero)); 777 v_sum = _mm_add_epi32(v_sum, _mm_unpackhi_epi16(v_half, v_zero)); 778 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpacklo_epi16(v_mullo, v_mulhi)); 779 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpackhi_epi16(v_mullo, v_mulhi)); 780 } 781 782 for ( ; x <= len - 8; x += 8) 783 { 784 __m128i v_src = _mm_unpacklo_epi8(_mm_loadl_epi64((__m128i const *)(src0 + x)), v_zero); 785 786 __m128i v_mullo = _mm_mullo_epi16(v_src, v_src); 787 __m128i v_mulhi = _mm_mulhi_epi16(v_src, v_src); 788 v_sum = _mm_add_epi32(v_sum, _mm_unpacklo_epi16(v_src, v_zero)); 789 v_sum = _mm_add_epi32(v_sum, _mm_unpackhi_epi16(v_src, v_zero)); 790 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpacklo_epi16(v_mullo, v_mulhi)); 791 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpackhi_epi16(v_mullo, v_mulhi)); 792 } 793 794 int CV_DECL_ALIGNED(16) ar[8]; 795 _mm_store_si128((__m128i*)ar, v_sum); 796 _mm_store_si128((__m128i*)(ar + 4), v_sqsum); 797 798 for (int i = 0; i < 4; i += cn) 799 for (int j = 0; j < cn; ++j) 800 { 801 sum[j] += ar[j + i]; 802 sqsum[j] += ar[4 + j + i]; 803 } 804 805 return x / cn; 806 } 807 }; 808 809 template <> 810 struct SumSqr_SIMD<schar, int, int> 811 { 812 int operator () (const schar * src0, const uchar * mask, int * sum, int * sqsum, int len, int cn) const 813 { 814 if (mask || (cn != 1 && cn != 2) || !USE_SSE2) 815 return 0; 816 817 int x = 0; 818 __m128i v_zero = _mm_setzero_si128(), v_sum = v_zero, v_sqsum = v_zero; 819 820 for ( ; x <= len - 16; x += 16) 821 { 822 __m128i v_src = _mm_loadu_si128((const __m128i *)(src0 + x)); 823 __m128i v_half = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, v_src), 8); 824 825 __m128i v_mullo = _mm_mullo_epi16(v_half, v_half); 826 __m128i v_mulhi = _mm_mulhi_epi16(v_half, v_half); 827 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_half), 16)); 828 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_half), 16)); 829 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpacklo_epi16(v_mullo, v_mulhi)); 830 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpackhi_epi16(v_mullo, v_mulhi)); 831 832 v_half = _mm_srai_epi16(_mm_unpackhi_epi8(v_zero, v_src), 8); 833 v_mullo = _mm_mullo_epi16(v_half, v_half); 834 v_mulhi = _mm_mulhi_epi16(v_half, v_half); 835 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_half), 16)); 836 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_half), 16)); 837 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpacklo_epi16(v_mullo, v_mulhi)); 838 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpackhi_epi16(v_mullo, v_mulhi)); 839 } 840 841 for ( ; x <= len - 8; x += 8) 842 { 843 __m128i v_src = _mm_srai_epi16(_mm_unpacklo_epi8(v_zero, _mm_loadl_epi64((__m128i const *)(src0 + x))), 8); 844 845 __m128i v_mullo = _mm_mullo_epi16(v_src, v_src); 846 __m128i v_mulhi = _mm_mulhi_epi16(v_src, v_src); 847 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpacklo_epi16(v_zero, v_src), 16)); 848 v_sum = _mm_add_epi32(v_sum, _mm_srai_epi32(_mm_unpackhi_epi16(v_zero, v_src), 16)); 849 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpacklo_epi16(v_mullo, v_mulhi)); 850 v_sqsum = _mm_add_epi32(v_sqsum, _mm_unpackhi_epi16(v_mullo, v_mulhi)); 851 } 852 853 int CV_DECL_ALIGNED(16) ar[8]; 854 _mm_store_si128((__m128i*)ar, v_sum); 855 _mm_store_si128((__m128i*)(ar + 4), v_sqsum); 856 857 for (int i = 0; i < 4; i += cn) 858 for (int j = 0; j < cn; ++j) 859 { 860 sum[j] += ar[j + i]; 861 sqsum[j] += ar[4 + j + i]; 862 } 863 864 return x / cn; 865 } 866 }; 867 868 #endif 869 870 template<typename T, typename ST, typename SQT> 871 static int sumsqr_(const T* src0, const uchar* mask, ST* sum, SQT* sqsum, int len, int cn ) 872 { 873 const T* src = src0; 874 875 if( !mask ) 876 { 877 SumSqr_SIMD<T, ST, SQT> vop; 878 int i = vop(src0, mask, sum, sqsum, len, cn), k = cn % 4; 879 src += i * cn; 880 881 if( k == 1 ) 882 { 883 ST s0 = sum[0]; 884 SQT sq0 = sqsum[0]; 885 for( ; i < len; i++, src += cn ) 886 { 887 T v = src[0]; 888 s0 += v; sq0 += (SQT)v*v; 889 } 890 sum[0] = s0; 891 sqsum[0] = sq0; 892 } 893 else if( k == 2 ) 894 { 895 ST s0 = sum[0], s1 = sum[1]; 896 SQT sq0 = sqsum[0], sq1 = sqsum[1]; 897 for( ; i < len; i++, src += cn ) 898 { 899 T v0 = src[0], v1 = src[1]; 900 s0 += v0; sq0 += (SQT)v0*v0; 901 s1 += v1; sq1 += (SQT)v1*v1; 902 } 903 sum[0] = s0; sum[1] = s1; 904 sqsum[0] = sq0; sqsum[1] = sq1; 905 } 906 else if( k == 3 ) 907 { 908 ST s0 = sum[0], s1 = sum[1], s2 = sum[2]; 909 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2]; 910 for( ; i < len; i++, src += cn ) 911 { 912 T v0 = src[0], v1 = src[1], v2 = src[2]; 913 s0 += v0; sq0 += (SQT)v0*v0; 914 s1 += v1; sq1 += (SQT)v1*v1; 915 s2 += v2; sq2 += (SQT)v2*v2; 916 } 917 sum[0] = s0; sum[1] = s1; sum[2] = s2; 918 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2; 919 } 920 921 for( ; k < cn; k += 4 ) 922 { 923 src = src0 + k; 924 ST s0 = sum[k], s1 = sum[k+1], s2 = sum[k+2], s3 = sum[k+3]; 925 SQT sq0 = sqsum[k], sq1 = sqsum[k+1], sq2 = sqsum[k+2], sq3 = sqsum[k+3]; 926 for( ; i < len; i++, src += cn ) 927 { 928 T v0, v1; 929 v0 = src[0], v1 = src[1]; 930 s0 += v0; sq0 += (SQT)v0*v0; 931 s1 += v1; sq1 += (SQT)v1*v1; 932 v0 = src[2], v1 = src[3]; 933 s2 += v0; sq2 += (SQT)v0*v0; 934 s3 += v1; sq3 += (SQT)v1*v1; 935 } 936 sum[k] = s0; sum[k+1] = s1; 937 sum[k+2] = s2; sum[k+3] = s3; 938 sqsum[k] = sq0; sqsum[k+1] = sq1; 939 sqsum[k+2] = sq2; sqsum[k+3] = sq3; 940 } 941 return len; 942 } 943 944 int i, nzm = 0; 945 946 if( cn == 1 ) 947 { 948 ST s0 = sum[0]; 949 SQT sq0 = sqsum[0]; 950 for( i = 0; i < len; i++ ) 951 if( mask[i] ) 952 { 953 T v = src[i]; 954 s0 += v; sq0 += (SQT)v*v; 955 nzm++; 956 } 957 sum[0] = s0; 958 sqsum[0] = sq0; 959 } 960 else if( cn == 3 ) 961 { 962 ST s0 = sum[0], s1 = sum[1], s2 = sum[2]; 963 SQT sq0 = sqsum[0], sq1 = sqsum[1], sq2 = sqsum[2]; 964 for( i = 0; i < len; i++, src += 3 ) 965 if( mask[i] ) 966 { 967 T v0 = src[0], v1 = src[1], v2 = src[2]; 968 s0 += v0; sq0 += (SQT)v0*v0; 969 s1 += v1; sq1 += (SQT)v1*v1; 970 s2 += v2; sq2 += (SQT)v2*v2; 971 nzm++; 972 } 973 sum[0] = s0; sum[1] = s1; sum[2] = s2; 974 sqsum[0] = sq0; sqsum[1] = sq1; sqsum[2] = sq2; 975 } 976 else 977 { 978 for( i = 0; i < len; i++, src += cn ) 979 if( mask[i] ) 980 { 981 for( int k = 0; k < cn; k++ ) 982 { 983 T v = src[k]; 984 ST s = sum[k] + v; 985 SQT sq = sqsum[k] + (SQT)v*v; 986 sum[k] = s; sqsum[k] = sq; 987 } 988 nzm++; 989 } 990 } 991 return nzm; 992 } 993 994 995 static int sqsum8u( const uchar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn ) 996 { return sumsqr_(src, mask, sum, sqsum, len, cn); } 997 998 static int sqsum8s( const schar* src, const uchar* mask, int* sum, int* sqsum, int len, int cn ) 999 { return sumsqr_(src, mask, sum, sqsum, len, cn); } 1000 1001 static int sqsum16u( const ushort* src, const uchar* mask, int* sum, double* sqsum, int len, int cn ) 1002 { return sumsqr_(src, mask, sum, sqsum, len, cn); } 1003 1004 static int sqsum16s( const short* src, const uchar* mask, int* sum, double* sqsum, int len, int cn ) 1005 { return sumsqr_(src, mask, sum, sqsum, len, cn); } 1006 1007 static int sqsum32s( const int* src, const uchar* mask, double* sum, double* sqsum, int len, int cn ) 1008 { return sumsqr_(src, mask, sum, sqsum, len, cn); } 1009 1010 static int sqsum32f( const float* src, const uchar* mask, double* sum, double* sqsum, int len, int cn ) 1011 { return sumsqr_(src, mask, sum, sqsum, len, cn); } 1012 1013 static int sqsum64f( const double* src, const uchar* mask, double* sum, double* sqsum, int len, int cn ) 1014 { return sumsqr_(src, mask, sum, sqsum, len, cn); } 1015 1016 typedef int (*SumSqrFunc)(const uchar*, const uchar* mask, uchar*, uchar*, int, int); 1017 1018 static SumSqrFunc getSumSqrTab(int depth) 1019 { 1020 static SumSqrFunc sumSqrTab[] = 1021 { 1022 (SumSqrFunc)GET_OPTIMIZED(sqsum8u), (SumSqrFunc)sqsum8s, (SumSqrFunc)sqsum16u, (SumSqrFunc)sqsum16s, 1023 (SumSqrFunc)sqsum32s, (SumSqrFunc)GET_OPTIMIZED(sqsum32f), (SumSqrFunc)sqsum64f, 0 1024 }; 1025 1026 return sumSqrTab[depth]; 1027 } 1028 1029 #ifdef HAVE_OPENCL 1030 1031 template <typename T> Scalar ocl_part_sum(Mat m) 1032 { 1033 CV_Assert(m.rows == 1); 1034 1035 Scalar s = Scalar::all(0); 1036 int cn = m.channels(); 1037 const T * const ptr = m.ptr<T>(0); 1038 1039 for (int x = 0, w = m.cols * cn; x < w; ) 1040 for (int c = 0; c < cn; ++c, ++x) 1041 s[c] += ptr[x]; 1042 1043 return s; 1044 } 1045 1046 enum { OCL_OP_SUM = 0, OCL_OP_SUM_ABS = 1, OCL_OP_SUM_SQR = 2 }; 1047 1048 static bool ocl_sum( InputArray _src, Scalar & res, int sum_op, InputArray _mask = noArray(), 1049 InputArray _src2 = noArray(), bool calc2 = false, const Scalar & res2 = Scalar() ) 1050 { 1051 CV_Assert(sum_op == OCL_OP_SUM || sum_op == OCL_OP_SUM_ABS || sum_op == OCL_OP_SUM_SQR); 1052 1053 const ocl::Device & dev = ocl::Device::getDefault(); 1054 bool doubleSupport = dev.doubleFPConfig() > 0, 1055 haveMask = _mask.kind() != _InputArray::NONE, 1056 haveSrc2 = _src2.kind() != _InputArray::NONE; 1057 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), 1058 kercn = cn == 1 && !haveMask ? ocl::predictOptimalVectorWidth(_src, _src2) : 1, 1059 mcn = std::max(cn, kercn); 1060 CV_Assert(!haveSrc2 || _src2.type() == type); 1061 int convert_cn = haveSrc2 ? mcn : cn; 1062 1063 if ( (!doubleSupport && depth == CV_64F) || cn > 4 ) 1064 return false; 1065 1066 int ngroups = dev.maxComputeUnits(), dbsize = ngroups * (calc2 ? 2 : 1); 1067 size_t wgs = dev.maxWorkGroupSize(); 1068 1069 int ddepth = std::max(sum_op == OCL_OP_SUM_SQR ? CV_32F : CV_32S, depth), 1070 dtype = CV_MAKE_TYPE(ddepth, cn); 1071 CV_Assert(!haveMask || _mask.type() == CV_8UC1); 1072 1073 int wgs2_aligned = 1; 1074 while (wgs2_aligned < (int)wgs) 1075 wgs2_aligned <<= 1; 1076 wgs2_aligned >>= 1; 1077 1078 static const char * const opMap[3] = { "OP_SUM", "OP_SUM_ABS", "OP_SUM_SQR" }; 1079 char cvt[2][40]; 1080 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstTK=%s -D dstT1=%s -D ddepth=%d -D cn=%d" 1081 " -D convertToDT=%s -D %s -D WGS=%d -D WGS2_ALIGNED=%d%s%s%s%s -D kercn=%d%s%s%s -D convertFromU=%s", 1082 ocl::typeToStr(CV_MAKE_TYPE(depth, mcn)), ocl::typeToStr(depth), 1083 ocl::typeToStr(dtype), ocl::typeToStr(CV_MAKE_TYPE(ddepth, mcn)), 1084 ocl::typeToStr(ddepth), ddepth, cn, 1085 ocl::convertTypeStr(depth, ddepth, mcn, cvt[0]), 1086 opMap[sum_op], (int)wgs, wgs2_aligned, 1087 doubleSupport ? " -D DOUBLE_SUPPORT" : "", 1088 haveMask ? " -D HAVE_MASK" : "", 1089 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "", 1090 haveMask && _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, 1091 haveSrc2 ? " -D HAVE_SRC2" : "", calc2 ? " -D OP_CALC2" : "", 1092 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", 1093 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, convert_cn, cvt[1]) : "noconvert"); 1094 1095 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, opts); 1096 if (k.empty()) 1097 return false; 1098 1099 UMat src = _src.getUMat(), src2 = _src2.getUMat(), 1100 db(1, dbsize, dtype), mask = _mask.getUMat(); 1101 1102 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 1103 dbarg = ocl::KernelArg::PtrWriteOnly(db), 1104 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask), 1105 src2arg = ocl::KernelArg::ReadOnlyNoSize(src2); 1106 1107 if (haveMask) 1108 { 1109 if (haveSrc2) 1110 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg, src2arg); 1111 else 1112 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, maskarg); 1113 } 1114 else 1115 { 1116 if (haveSrc2) 1117 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg, src2arg); 1118 else 1119 k.args(srcarg, src.cols, (int)src.total(), ngroups, dbarg); 1120 } 1121 1122 size_t globalsize = ngroups * wgs; 1123 if (k.run(1, &globalsize, &wgs, false)) 1124 { 1125 typedef Scalar (*part_sum)(Mat m); 1126 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> }, 1127 func = funcs[ddepth - CV_32S]; 1128 1129 Mat mres = db.getMat(ACCESS_READ); 1130 if (calc2) 1131 const_cast<Scalar &>(res2) = func(mres.colRange(ngroups, dbsize)); 1132 1133 res = func(mres.colRange(0, ngroups)); 1134 return true; 1135 } 1136 return false; 1137 } 1138 1139 #endif 1140 1141 } 1142 1143 cv::Scalar cv::sum( InputArray _src ) 1144 { 1145 #ifdef HAVE_OPENCL 1146 Scalar _res; 1147 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, 1148 ocl_sum(_src, _res, OCL_OP_SUM), 1149 _res) 1150 #endif 1151 1152 Mat src = _src.getMat(); 1153 int k, cn = src.channels(), depth = src.depth(); 1154 1155 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 1156 CV_IPP_CHECK() 1157 { 1158 size_t total_size = src.total(); 1159 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; 1160 if( src.dims == 2 || (src.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) 1161 { 1162 IppiSize sz = { cols, rows }; 1163 int type = src.type(); 1164 typedef IppStatus (CV_STDCALL* ippiSumFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm); 1165 typedef IppStatus (CV_STDCALL* ippiSumFuncNoHint)(const void*, int, IppiSize, double *); 1166 ippiSumFuncHint ippFuncHint = 1167 type == CV_32FC1 ? (ippiSumFuncHint)ippiSum_32f_C1R : 1168 type == CV_32FC3 ? (ippiSumFuncHint)ippiSum_32f_C3R : 1169 type == CV_32FC4 ? (ippiSumFuncHint)ippiSum_32f_C4R : 1170 0; 1171 ippiSumFuncNoHint ippFuncNoHint = 1172 type == CV_8UC1 ? (ippiSumFuncNoHint)ippiSum_8u_C1R : 1173 type == CV_8UC3 ? (ippiSumFuncNoHint)ippiSum_8u_C3R : 1174 type == CV_8UC4 ? (ippiSumFuncNoHint)ippiSum_8u_C4R : 1175 type == CV_16UC1 ? (ippiSumFuncNoHint)ippiSum_16u_C1R : 1176 type == CV_16UC3 ? (ippiSumFuncNoHint)ippiSum_16u_C3R : 1177 type == CV_16UC4 ? (ippiSumFuncNoHint)ippiSum_16u_C4R : 1178 type == CV_16SC1 ? (ippiSumFuncNoHint)ippiSum_16s_C1R : 1179 type == CV_16SC3 ? (ippiSumFuncNoHint)ippiSum_16s_C3R : 1180 type == CV_16SC4 ? (ippiSumFuncNoHint)ippiSum_16s_C4R : 1181 0; 1182 CV_Assert(!ippFuncHint || !ippFuncNoHint); 1183 if( ippFuncHint || ippFuncNoHint ) 1184 { 1185 Ipp64f res[4]; 1186 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) : 1187 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res); 1188 if( ret >= 0 ) 1189 { 1190 Scalar sc; 1191 for( int i = 0; i < cn; i++ ) 1192 sc[i] = res[i]; 1193 CV_IMPL_ADD(CV_IMPL_IPP); 1194 return sc; 1195 } 1196 setIppErrorStatus(); 1197 } 1198 } 1199 } 1200 #endif 1201 SumFunc func = getSumFunc(depth); 1202 1203 CV_Assert( cn <= 4 && func != 0 ); 1204 1205 const Mat* arrays[] = {&src, 0}; 1206 uchar* ptrs[1]; 1207 NAryMatIterator it(arrays, ptrs); 1208 Scalar s; 1209 int total = (int)it.size, blockSize = total, intSumBlockSize = 0; 1210 int j, count = 0; 1211 AutoBuffer<int> _buf; 1212 int* buf = (int*)&s[0]; 1213 size_t esz = 0; 1214 bool blockSum = depth < CV_32S; 1215 1216 if( blockSum ) 1217 { 1218 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15); 1219 blockSize = std::min(blockSize, intSumBlockSize); 1220 _buf.allocate(cn); 1221 buf = _buf; 1222 1223 for( k = 0; k < cn; k++ ) 1224 buf[k] = 0; 1225 esz = src.elemSize(); 1226 } 1227 1228 for( size_t i = 0; i < it.nplanes; i++, ++it ) 1229 { 1230 for( j = 0; j < total; j += blockSize ) 1231 { 1232 int bsz = std::min(total - j, blockSize); 1233 func( ptrs[0], 0, (uchar*)buf, bsz, cn ); 1234 count += bsz; 1235 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) 1236 { 1237 for( k = 0; k < cn; k++ ) 1238 { 1239 s[k] += buf[k]; 1240 buf[k] = 0; 1241 } 1242 count = 0; 1243 } 1244 ptrs[0] += bsz*esz; 1245 } 1246 } 1247 return s; 1248 } 1249 1250 #ifdef HAVE_OPENCL 1251 1252 namespace cv { 1253 1254 static bool ocl_countNonZero( InputArray _src, int & res ) 1255 { 1256 int type = _src.type(), depth = CV_MAT_DEPTH(type), kercn = ocl::predictOptimalVectorWidth(_src); 1257 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; 1258 1259 if (depth == CV_64F && !doubleSupport) 1260 return false; 1261 1262 int dbsize = ocl::Device::getDefault().maxComputeUnits(); 1263 size_t wgs = ocl::Device::getDefault().maxWorkGroupSize(); 1264 1265 int wgs2_aligned = 1; 1266 while (wgs2_aligned < (int)wgs) 1267 wgs2_aligned <<= 1; 1268 wgs2_aligned >>= 1; 1269 1270 ocl::Kernel k("reduce", ocl::core::reduce_oclsrc, 1271 format("-D srcT=%s -D srcT1=%s -D cn=1 -D OP_COUNT_NON_ZERO" 1272 " -D WGS=%d -D kercn=%d -D WGS2_ALIGNED=%d%s%s", 1273 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), 1274 ocl::typeToStr(depth), (int)wgs, kercn, 1275 wgs2_aligned, doubleSupport ? " -D DOUBLE_SUPPORT" : "", 1276 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "")); 1277 if (k.empty()) 1278 return false; 1279 1280 UMat src = _src.getUMat(), db(1, dbsize, CV_32SC1); 1281 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), 1282 dbsize, ocl::KernelArg::PtrWriteOnly(db)); 1283 1284 size_t globalsize = dbsize * wgs; 1285 if (k.run(1, &globalsize, &wgs, true)) 1286 return res = saturate_cast<int>(cv::sum(db.getMat(ACCESS_READ))[0]), true; 1287 return false; 1288 } 1289 1290 } 1291 1292 #endif 1293 1294 int cv::countNonZero( InputArray _src ) 1295 { 1296 int type = _src.type(), cn = CV_MAT_CN(type); 1297 CV_Assert( cn == 1 ); 1298 1299 #ifdef HAVE_OPENCL 1300 int res = -1; 1301 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, 1302 ocl_countNonZero(_src, res), 1303 res) 1304 #endif 1305 1306 Mat src = _src.getMat(); 1307 1308 #if defined HAVE_IPP && !defined HAVE_IPP_ICV_ONLY && 0 1309 CV_IPP_CHECK() 1310 { 1311 if (src.dims <= 2 || src.isContinuous()) 1312 { 1313 IppiSize roiSize = { src.cols, src.rows }; 1314 Ipp32s count = 0, srcstep = (Ipp32s)src.step; 1315 IppStatus status = (IppStatus)-1; 1316 1317 if (src.isContinuous()) 1318 { 1319 roiSize.width = (Ipp32s)src.total(); 1320 roiSize.height = 1; 1321 srcstep = (Ipp32s)src.total() * CV_ELEM_SIZE(type); 1322 } 1323 1324 int depth = CV_MAT_DEPTH(type); 1325 if (depth == CV_8U) 1326 status = ippiCountInRange_8u_C1R((const Ipp8u *)src.data, srcstep, roiSize, &count, 0, 0); 1327 else if (depth == CV_32F) 1328 status = ippiCountInRange_32f_C1R((const Ipp32f *)src.data, srcstep, roiSize, &count, 0, 0); 1329 1330 if (status >= 0) 1331 { 1332 CV_IMPL_ADD(CV_IMPL_IPP); 1333 return (Ipp32s)src.total() - count; 1334 } 1335 setIppErrorStatus(); 1336 } 1337 } 1338 #endif 1339 1340 CountNonZeroFunc func = getCountNonZeroTab(src.depth()); 1341 CV_Assert( func != 0 ); 1342 1343 const Mat* arrays[] = {&src, 0}; 1344 uchar* ptrs[1]; 1345 NAryMatIterator it(arrays, ptrs); 1346 int total = (int)it.size, nz = 0; 1347 1348 for( size_t i = 0; i < it.nplanes; i++, ++it ) 1349 nz += func( ptrs[0], total ); 1350 1351 return nz; 1352 } 1353 1354 cv::Scalar cv::mean( InputArray _src, InputArray _mask ) 1355 { 1356 Mat src = _src.getMat(), mask = _mask.getMat(); 1357 CV_Assert( mask.empty() || mask.type() == CV_8U ); 1358 1359 int k, cn = src.channels(), depth = src.depth(); 1360 1361 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 1362 CV_IPP_CHECK() 1363 { 1364 size_t total_size = src.total(); 1365 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; 1366 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) 1367 { 1368 IppiSize sz = { cols, rows }; 1369 int type = src.type(); 1370 if( !mask.empty() ) 1371 { 1372 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *); 1373 ippiMaskMeanFuncC1 ippFuncC1 = 1374 type == CV_8UC1 ? (ippiMaskMeanFuncC1)ippiMean_8u_C1MR : 1375 type == CV_16UC1 ? (ippiMaskMeanFuncC1)ippiMean_16u_C1MR : 1376 type == CV_32FC1 ? (ippiMaskMeanFuncC1)ippiMean_32f_C1MR : 1377 0; 1378 if( ippFuncC1 ) 1379 { 1380 Ipp64f res; 1381 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &res) >= 0 ) 1382 { 1383 CV_IMPL_ADD(CV_IMPL_IPP); 1384 return Scalar(res); 1385 } 1386 setIppErrorStatus(); 1387 } 1388 typedef IppStatus (CV_STDCALL* ippiMaskMeanFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *); 1389 ippiMaskMeanFuncC3 ippFuncC3 = 1390 type == CV_8UC3 ? (ippiMaskMeanFuncC3)ippiMean_8u_C3CMR : 1391 type == CV_16UC3 ? (ippiMaskMeanFuncC3)ippiMean_16u_C3CMR : 1392 type == CV_32FC3 ? (ippiMaskMeanFuncC3)ippiMean_32f_C3CMR : 1393 0; 1394 if( ippFuncC3 ) 1395 { 1396 Ipp64f res1, res2, res3; 1397 if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &res1) >= 0 && 1398 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &res2) >= 0 && 1399 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &res3) >= 0 ) 1400 { 1401 CV_IMPL_ADD(CV_IMPL_IPP); 1402 return Scalar(res1, res2, res3); 1403 } 1404 setIppErrorStatus(); 1405 } 1406 } 1407 else 1408 { 1409 typedef IppStatus (CV_STDCALL* ippiMeanFuncHint)(const void*, int, IppiSize, double *, IppHintAlgorithm); 1410 typedef IppStatus (CV_STDCALL* ippiMeanFuncNoHint)(const void*, int, IppiSize, double *); 1411 ippiMeanFuncHint ippFuncHint = 1412 type == CV_32FC1 ? (ippiMeanFuncHint)ippiMean_32f_C1R : 1413 type == CV_32FC3 ? (ippiMeanFuncHint)ippiMean_32f_C3R : 1414 type == CV_32FC4 ? (ippiMeanFuncHint)ippiMean_32f_C4R : 1415 0; 1416 ippiMeanFuncNoHint ippFuncNoHint = 1417 type == CV_8UC1 ? (ippiMeanFuncNoHint)ippiMean_8u_C1R : 1418 type == CV_8UC3 ? (ippiMeanFuncNoHint)ippiMean_8u_C3R : 1419 type == CV_8UC4 ? (ippiMeanFuncNoHint)ippiMean_8u_C4R : 1420 type == CV_16UC1 ? (ippiMeanFuncNoHint)ippiMean_16u_C1R : 1421 type == CV_16UC3 ? (ippiMeanFuncNoHint)ippiMean_16u_C3R : 1422 type == CV_16UC4 ? (ippiMeanFuncNoHint)ippiMean_16u_C4R : 1423 type == CV_16SC1 ? (ippiMeanFuncNoHint)ippiMean_16s_C1R : 1424 type == CV_16SC3 ? (ippiMeanFuncNoHint)ippiMean_16s_C3R : 1425 type == CV_16SC4 ? (ippiMeanFuncNoHint)ippiMean_16s_C4R : 1426 0; 1427 // Make sure only zero or one version of the function pointer is valid 1428 CV_Assert(!ippFuncHint || !ippFuncNoHint); 1429 if( ippFuncHint || ippFuncNoHint ) 1430 { 1431 Ipp64f res[4]; 1432 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, res, ippAlgHintAccurate) : 1433 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, res); 1434 if( ret >= 0 ) 1435 { 1436 Scalar sc; 1437 for( int i = 0; i < cn; i++ ) 1438 sc[i] = res[i]; 1439 CV_IMPL_ADD(CV_IMPL_IPP); 1440 return sc; 1441 } 1442 setIppErrorStatus(); 1443 } 1444 } 1445 } 1446 } 1447 #endif 1448 1449 SumFunc func = getSumFunc(depth); 1450 1451 CV_Assert( cn <= 4 && func != 0 ); 1452 1453 const Mat* arrays[] = {&src, &mask, 0}; 1454 uchar* ptrs[2]; 1455 NAryMatIterator it(arrays, ptrs); 1456 Scalar s; 1457 int total = (int)it.size, blockSize = total, intSumBlockSize = 0; 1458 int j, count = 0; 1459 AutoBuffer<int> _buf; 1460 int* buf = (int*)&s[0]; 1461 bool blockSum = depth <= CV_16S; 1462 size_t esz = 0, nz0 = 0; 1463 1464 if( blockSum ) 1465 { 1466 intSumBlockSize = depth <= CV_8S ? (1 << 23) : (1 << 15); 1467 blockSize = std::min(blockSize, intSumBlockSize); 1468 _buf.allocate(cn); 1469 buf = _buf; 1470 1471 for( k = 0; k < cn; k++ ) 1472 buf[k] = 0; 1473 esz = src.elemSize(); 1474 } 1475 1476 for( size_t i = 0; i < it.nplanes; i++, ++it ) 1477 { 1478 for( j = 0; j < total; j += blockSize ) 1479 { 1480 int bsz = std::min(total - j, blockSize); 1481 int nz = func( ptrs[0], ptrs[1], (uchar*)buf, bsz, cn ); 1482 count += nz; 1483 nz0 += nz; 1484 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) 1485 { 1486 for( k = 0; k < cn; k++ ) 1487 { 1488 s[k] += buf[k]; 1489 buf[k] = 0; 1490 } 1491 count = 0; 1492 } 1493 ptrs[0] += bsz*esz; 1494 if( ptrs[1] ) 1495 ptrs[1] += bsz; 1496 } 1497 } 1498 return s*(nz0 ? 1./nz0 : 0); 1499 } 1500 1501 #ifdef HAVE_OPENCL 1502 1503 namespace cv { 1504 1505 static bool ocl_meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask ) 1506 { 1507 bool haveMask = _mask.kind() != _InputArray::NONE; 1508 int nz = haveMask ? -1 : (int)_src.total(); 1509 Scalar mean, stddev; 1510 1511 { 1512 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 1513 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0, 1514 isContinuous = _src.isContinuous(), 1515 isMaskContinuous = _mask.isContinuous(); 1516 const ocl::Device &defDev = ocl::Device::getDefault(); 1517 int groups = defDev.maxComputeUnits(); 1518 if (defDev.isIntel()) 1519 { 1520 static const int subSliceEUCount = 10; 1521 groups = (groups / subSliceEUCount) * 2; 1522 } 1523 size_t wgs = defDev.maxWorkGroupSize(); 1524 1525 int ddepth = std::max(CV_32S, depth), sqddepth = std::max(CV_32F, depth), 1526 dtype = CV_MAKE_TYPE(ddepth, cn), 1527 sqdtype = CV_MAKETYPE(sqddepth, cn); 1528 CV_Assert(!haveMask || _mask.type() == CV_8UC1); 1529 1530 int wgs2_aligned = 1; 1531 while (wgs2_aligned < (int)wgs) 1532 wgs2_aligned <<= 1; 1533 wgs2_aligned >>= 1; 1534 1535 if ( (!doubleSupport && depth == CV_64F) || cn > 4 ) 1536 return false; 1537 1538 char cvt[2][40]; 1539 String opts = format("-D srcT=%s -D srcT1=%s -D dstT=%s -D dstT1=%s -D sqddepth=%d" 1540 " -D sqdstT=%s -D sqdstT1=%s -D convertToSDT=%s -D cn=%d%s%s" 1541 " -D convertToDT=%s -D WGS=%d -D WGS2_ALIGNED=%d%s%s", 1542 ocl::typeToStr(type), ocl::typeToStr(depth), 1543 ocl::typeToStr(dtype), ocl::typeToStr(ddepth), sqddepth, 1544 ocl::typeToStr(sqdtype), ocl::typeToStr(sqddepth), 1545 ocl::convertTypeStr(depth, sqddepth, cn, cvt[0]), 1546 cn, isContinuous ? " -D HAVE_SRC_CONT" : "", 1547 isMaskContinuous ? " -D HAVE_MASK_CONT" : "", 1548 ocl::convertTypeStr(depth, ddepth, cn, cvt[1]), 1549 (int)wgs, wgs2_aligned, haveMask ? " -D HAVE_MASK" : "", 1550 doubleSupport ? " -D DOUBLE_SUPPORT" : ""); 1551 1552 ocl::Kernel k("meanStdDev", ocl::core::meanstddev_oclsrc, opts); 1553 if (k.empty()) 1554 return false; 1555 1556 int dbsize = groups * ((haveMask ? CV_ELEM_SIZE1(CV_32S) : 0) + 1557 CV_ELEM_SIZE(sqdtype) + CV_ELEM_SIZE(dtype)); 1558 UMat src = _src.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); 1559 1560 ocl::KernelArg srcarg = ocl::KernelArg::ReadOnlyNoSize(src), 1561 dbarg = ocl::KernelArg::PtrWriteOnly(db), 1562 maskarg = ocl::KernelArg::ReadOnlyNoSize(mask); 1563 1564 if (haveMask) 1565 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg, maskarg); 1566 else 1567 k.args(srcarg, src.cols, (int)src.total(), groups, dbarg); 1568 1569 size_t globalsize = groups * wgs; 1570 if (!k.run(1, &globalsize, &wgs, false)) 1571 return false; 1572 1573 typedef Scalar (* part_sum)(Mat m); 1574 part_sum funcs[3] = { ocl_part_sum<int>, ocl_part_sum<float>, ocl_part_sum<double> }; 1575 Mat dbm = db.getMat(ACCESS_READ); 1576 1577 mean = funcs[ddepth - CV_32S](Mat(1, groups, dtype, dbm.ptr())); 1578 stddev = funcs[sqddepth - CV_32S](Mat(1, groups, sqdtype, dbm.ptr() + groups * CV_ELEM_SIZE(dtype))); 1579 1580 if (haveMask) 1581 nz = saturate_cast<int>(funcs[0](Mat(1, groups, CV_32SC1, dbm.ptr() + 1582 groups * (CV_ELEM_SIZE(dtype) + 1583 CV_ELEM_SIZE(sqdtype))))[0]); 1584 } 1585 1586 double total = nz != 0 ? 1.0 / nz : 0; 1587 int k, j, cn = _src.channels(); 1588 for (int i = 0; i < cn; ++i) 1589 { 1590 mean[i] *= total; 1591 stddev[i] = std::sqrt(std::max(stddev[i] * total - mean[i] * mean[i] , 0.)); 1592 } 1593 1594 for( j = 0; j < 2; j++ ) 1595 { 1596 const double * const sptr = j == 0 ? &mean[0] : &stddev[0]; 1597 _OutputArray _dst = j == 0 ? _mean : _sdv; 1598 if( !_dst.needed() ) 1599 continue; 1600 1601 if( !_dst.fixedSize() ) 1602 _dst.create(cn, 1, CV_64F, -1, true); 1603 Mat dst = _dst.getMat(); 1604 int dcn = (int)dst.total(); 1605 CV_Assert( dst.type() == CV_64F && dst.isContinuous() && 1606 (dst.cols == 1 || dst.rows == 1) && dcn >= cn ); 1607 double* dptr = dst.ptr<double>(); 1608 for( k = 0; k < cn; k++ ) 1609 dptr[k] = sptr[k]; 1610 for( ; k < dcn; k++ ) 1611 dptr[k] = 0; 1612 } 1613 1614 return true; 1615 } 1616 1617 } 1618 1619 #endif 1620 1621 void cv::meanStdDev( InputArray _src, OutputArray _mean, OutputArray _sdv, InputArray _mask ) 1622 { 1623 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, 1624 ocl_meanStdDev(_src, _mean, _sdv, _mask)) 1625 1626 Mat src = _src.getMat(), mask = _mask.getMat(); 1627 CV_Assert( mask.empty() || mask.type() == CV_8UC1 ); 1628 1629 int k, cn = src.channels(), depth = src.depth(); 1630 1631 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 1632 CV_IPP_CHECK() 1633 { 1634 size_t total_size = src.total(); 1635 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; 1636 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) 1637 { 1638 Ipp64f mean_temp[3]; 1639 Ipp64f stddev_temp[3]; 1640 Ipp64f *pmean = &mean_temp[0]; 1641 Ipp64f *pstddev = &stddev_temp[0]; 1642 Mat mean, stddev; 1643 int dcn_mean = -1; 1644 if( _mean.needed() ) 1645 { 1646 if( !_mean.fixedSize() ) 1647 _mean.create(cn, 1, CV_64F, -1, true); 1648 mean = _mean.getMat(); 1649 dcn_mean = (int)mean.total(); 1650 pmean = mean.ptr<Ipp64f>(); 1651 } 1652 int dcn_stddev = -1; 1653 if( _sdv.needed() ) 1654 { 1655 if( !_sdv.fixedSize() ) 1656 _sdv.create(cn, 1, CV_64F, -1, true); 1657 stddev = _sdv.getMat(); 1658 dcn_stddev = (int)stddev.total(); 1659 pstddev = stddev.ptr<Ipp64f>(); 1660 } 1661 for( int c = cn; c < dcn_mean; c++ ) 1662 pmean[c] = 0; 1663 for( int c = cn; c < dcn_stddev; c++ ) 1664 pstddev[c] = 0; 1665 IppiSize sz = { cols, rows }; 1666 int type = src.type(); 1667 if( !mask.empty() ) 1668 { 1669 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *, Ipp64f *); 1670 ippiMaskMeanStdDevFuncC1 ippFuncC1 = 1671 type == CV_8UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_8u_C1MR : 1672 type == CV_16UC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_16u_C1MR : 1673 type == CV_32FC1 ? (ippiMaskMeanStdDevFuncC1)ippiMean_StdDev_32f_C1MR : 1674 0; 1675 if( ippFuncC1 ) 1676 { 1677 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, pmean, pstddev) >= 0 ) 1678 { 1679 CV_IMPL_ADD(CV_IMPL_IPP); 1680 return; 1681 } 1682 setIppErrorStatus(); 1683 } 1684 typedef IppStatus (CV_STDCALL* ippiMaskMeanStdDevFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *, Ipp64f *); 1685 ippiMaskMeanStdDevFuncC3 ippFuncC3 = 1686 type == CV_8UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CMR : 1687 type == CV_16UC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CMR : 1688 type == CV_32FC3 ? (ippiMaskMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CMR : 1689 0; 1690 if( ippFuncC3 ) 1691 { 1692 if( ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && 1693 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && 1694 ippFuncC3(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) 1695 { 1696 CV_IMPL_ADD(CV_IMPL_IPP); 1697 return; 1698 } 1699 setIppErrorStatus(); 1700 } 1701 } 1702 else 1703 { 1704 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC1)(const void *, int, IppiSize, Ipp64f *, Ipp64f *); 1705 ippiMeanStdDevFuncC1 ippFuncC1 = 1706 type == CV_8UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_8u_C1R : 1707 type == CV_16UC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_16u_C1R : 1708 #if (IPP_VERSION_X100 >= 801) 1709 type == CV_32FC1 ? (ippiMeanStdDevFuncC1)ippiMean_StdDev_32f_C1R ://Aug 2013: bug in IPP 7.1, 8.0 1710 #endif 1711 0; 1712 if( ippFuncC1 ) 1713 { 1714 if( ippFuncC1(src.ptr(), (int)src.step[0], sz, pmean, pstddev) >= 0 ) 1715 { 1716 CV_IMPL_ADD(CV_IMPL_IPP); 1717 return; 1718 } 1719 setIppErrorStatus(); 1720 } 1721 typedef IppStatus (CV_STDCALL* ippiMeanStdDevFuncC3)(const void *, int, IppiSize, int, Ipp64f *, Ipp64f *); 1722 ippiMeanStdDevFuncC3 ippFuncC3 = 1723 type == CV_8UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_8u_C3CR : 1724 type == CV_16UC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_16u_C3CR : 1725 type == CV_32FC3 ? (ippiMeanStdDevFuncC3)ippiMean_StdDev_32f_C3CR : 1726 0; 1727 if( ippFuncC3 ) 1728 { 1729 if( ippFuncC3(src.ptr(), (int)src.step[0], sz, 1, &pmean[0], &pstddev[0]) >= 0 && 1730 ippFuncC3(src.ptr(), (int)src.step[0], sz, 2, &pmean[1], &pstddev[1]) >= 0 && 1731 ippFuncC3(src.ptr(), (int)src.step[0], sz, 3, &pmean[2], &pstddev[2]) >= 0 ) 1732 { 1733 CV_IMPL_ADD(CV_IMPL_IPP); 1734 return; 1735 } 1736 setIppErrorStatus(); 1737 } 1738 } 1739 } 1740 } 1741 #endif 1742 1743 1744 SumSqrFunc func = getSumSqrTab(depth); 1745 1746 CV_Assert( func != 0 ); 1747 1748 const Mat* arrays[] = {&src, &mask, 0}; 1749 uchar* ptrs[2]; 1750 NAryMatIterator it(arrays, ptrs); 1751 int total = (int)it.size, blockSize = total, intSumBlockSize = 0; 1752 int j, count = 0, nz0 = 0; 1753 AutoBuffer<double> _buf(cn*4); 1754 double *s = (double*)_buf, *sq = s + cn; 1755 int *sbuf = (int*)s, *sqbuf = (int*)sq; 1756 bool blockSum = depth <= CV_16S, blockSqSum = depth <= CV_8S; 1757 size_t esz = 0; 1758 1759 for( k = 0; k < cn; k++ ) 1760 s[k] = sq[k] = 0; 1761 1762 if( blockSum ) 1763 { 1764 intSumBlockSize = 1 << 15; 1765 blockSize = std::min(blockSize, intSumBlockSize); 1766 sbuf = (int*)(sq + cn); 1767 if( blockSqSum ) 1768 sqbuf = sbuf + cn; 1769 for( k = 0; k < cn; k++ ) 1770 sbuf[k] = sqbuf[k] = 0; 1771 esz = src.elemSize(); 1772 } 1773 1774 for( size_t i = 0; i < it.nplanes; i++, ++it ) 1775 { 1776 for( j = 0; j < total; j += blockSize ) 1777 { 1778 int bsz = std::min(total - j, blockSize); 1779 int nz = func( ptrs[0], ptrs[1], (uchar*)sbuf, (uchar*)sqbuf, bsz, cn ); 1780 count += nz; 1781 nz0 += nz; 1782 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) 1783 { 1784 for( k = 0; k < cn; k++ ) 1785 { 1786 s[k] += sbuf[k]; 1787 sbuf[k] = 0; 1788 } 1789 if( blockSqSum ) 1790 { 1791 for( k = 0; k < cn; k++ ) 1792 { 1793 sq[k] += sqbuf[k]; 1794 sqbuf[k] = 0; 1795 } 1796 } 1797 count = 0; 1798 } 1799 ptrs[0] += bsz*esz; 1800 if( ptrs[1] ) 1801 ptrs[1] += bsz; 1802 } 1803 } 1804 1805 double scale = nz0 ? 1./nz0 : 0.; 1806 for( k = 0; k < cn; k++ ) 1807 { 1808 s[k] *= scale; 1809 sq[k] = std::sqrt(std::max(sq[k]*scale - s[k]*s[k], 0.)); 1810 } 1811 1812 for( j = 0; j < 2; j++ ) 1813 { 1814 const double* sptr = j == 0 ? s : sq; 1815 _OutputArray _dst = j == 0 ? _mean : _sdv; 1816 if( !_dst.needed() ) 1817 continue; 1818 1819 if( !_dst.fixedSize() ) 1820 _dst.create(cn, 1, CV_64F, -1, true); 1821 Mat dst = _dst.getMat(); 1822 int dcn = (int)dst.total(); 1823 CV_Assert( dst.type() == CV_64F && dst.isContinuous() && 1824 (dst.cols == 1 || dst.rows == 1) && dcn >= cn ); 1825 double* dptr = dst.ptr<double>(); 1826 for( k = 0; k < cn; k++ ) 1827 dptr[k] = sptr[k]; 1828 for( ; k < dcn; k++ ) 1829 dptr[k] = 0; 1830 } 1831 } 1832 1833 /****************************************************************************************\ 1834 * minMaxLoc * 1835 \****************************************************************************************/ 1836 1837 namespace cv 1838 { 1839 1840 template<typename T, typename WT> static void 1841 minMaxIdx_( const T* src, const uchar* mask, WT* _minVal, WT* _maxVal, 1842 size_t* _minIdx, size_t* _maxIdx, int len, size_t startIdx ) 1843 { 1844 WT minVal = *_minVal, maxVal = *_maxVal; 1845 size_t minIdx = *_minIdx, maxIdx = *_maxIdx; 1846 1847 if( !mask ) 1848 { 1849 for( int i = 0; i < len; i++ ) 1850 { 1851 T val = src[i]; 1852 if( val < minVal ) 1853 { 1854 minVal = val; 1855 minIdx = startIdx + i; 1856 } 1857 if( val > maxVal ) 1858 { 1859 maxVal = val; 1860 maxIdx = startIdx + i; 1861 } 1862 } 1863 } 1864 else 1865 { 1866 for( int i = 0; i < len; i++ ) 1867 { 1868 T val = src[i]; 1869 if( mask[i] && val < minVal ) 1870 { 1871 minVal = val; 1872 minIdx = startIdx + i; 1873 } 1874 if( mask[i] && val > maxVal ) 1875 { 1876 maxVal = val; 1877 maxIdx = startIdx + i; 1878 } 1879 } 1880 } 1881 1882 *_minIdx = minIdx; 1883 *_maxIdx = maxIdx; 1884 *_minVal = minVal; 1885 *_maxVal = maxVal; 1886 } 1887 1888 static void minMaxIdx_8u(const uchar* src, const uchar* mask, int* minval, int* maxval, 1889 size_t* minidx, size_t* maxidx, int len, size_t startidx ) 1890 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } 1891 1892 static void minMaxIdx_8s(const schar* src, const uchar* mask, int* minval, int* maxval, 1893 size_t* minidx, size_t* maxidx, int len, size_t startidx ) 1894 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } 1895 1896 static void minMaxIdx_16u(const ushort* src, const uchar* mask, int* minval, int* maxval, 1897 size_t* minidx, size_t* maxidx, int len, size_t startidx ) 1898 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } 1899 1900 static void minMaxIdx_16s(const short* src, const uchar* mask, int* minval, int* maxval, 1901 size_t* minidx, size_t* maxidx, int len, size_t startidx ) 1902 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } 1903 1904 static void minMaxIdx_32s(const int* src, const uchar* mask, int* minval, int* maxval, 1905 size_t* minidx, size_t* maxidx, int len, size_t startidx ) 1906 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } 1907 1908 static void minMaxIdx_32f(const float* src, const uchar* mask, float* minval, float* maxval, 1909 size_t* minidx, size_t* maxidx, int len, size_t startidx ) 1910 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } 1911 1912 static void minMaxIdx_64f(const double* src, const uchar* mask, double* minval, double* maxval, 1913 size_t* minidx, size_t* maxidx, int len, size_t startidx ) 1914 { minMaxIdx_(src, mask, minval, maxval, minidx, maxidx, len, startidx ); } 1915 1916 typedef void (*MinMaxIdxFunc)(const uchar*, const uchar*, int*, int*, size_t*, size_t*, int, size_t); 1917 1918 static MinMaxIdxFunc getMinmaxTab(int depth) 1919 { 1920 static MinMaxIdxFunc minmaxTab[] = 1921 { 1922 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_8s), 1923 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16u), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_16s), 1924 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32s), 1925 (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_32f), (MinMaxIdxFunc)GET_OPTIMIZED(minMaxIdx_64f), 1926 0 1927 }; 1928 1929 return minmaxTab[depth]; 1930 } 1931 1932 static void ofs2idx(const Mat& a, size_t ofs, int* idx) 1933 { 1934 int i, d = a.dims; 1935 if( ofs > 0 ) 1936 { 1937 ofs--; 1938 for( i = d-1; i >= 0; i-- ) 1939 { 1940 int sz = a.size[i]; 1941 idx[i] = (int)(ofs % sz); 1942 ofs /= sz; 1943 } 1944 } 1945 else 1946 { 1947 for( i = d-1; i >= 0; i-- ) 1948 idx[i] = -1; 1949 } 1950 } 1951 1952 #ifdef HAVE_OPENCL 1953 1954 template <typename T> 1955 void getMinMaxRes(const Mat & db, double * minVal, double * maxVal, 1956 int* minLoc, int* maxLoc, 1957 int groupnum, int cols, double * maxVal2) 1958 { 1959 uint index_max = std::numeric_limits<uint>::max(); 1960 T minval = std::numeric_limits<T>::max(); 1961 T maxval = std::numeric_limits<T>::min() > 0 ? -std::numeric_limits<T>::max() : std::numeric_limits<T>::min(), maxval2 = maxval; 1962 uint minloc = index_max, maxloc = index_max; 1963 1964 int index = 0; 1965 const T * minptr = NULL, * maxptr = NULL, * maxptr2 = NULL; 1966 const uint * minlocptr = NULL, * maxlocptr = NULL; 1967 if (minVal || minLoc) 1968 { 1969 minptr = db.ptr<T>(); 1970 index += sizeof(T) * groupnum; 1971 } 1972 if (maxVal || maxLoc) 1973 { 1974 maxptr = (const T *)(db.ptr() + index); 1975 index += sizeof(T) * groupnum; 1976 } 1977 if (minLoc) 1978 { 1979 minlocptr = (const uint *)(db.ptr() + index); 1980 index += sizeof(uint) * groupnum; 1981 } 1982 if (maxLoc) 1983 { 1984 maxlocptr = (const uint *)(db.ptr() + index); 1985 index += sizeof(uint) * groupnum; 1986 } 1987 if (maxVal2) 1988 maxptr2 = (const T *)(db.ptr() + index); 1989 1990 for (int i = 0; i < groupnum; i++) 1991 { 1992 if (minptr && minptr[i] <= minval) 1993 { 1994 if (minptr[i] == minval) 1995 { 1996 if (minlocptr) 1997 minloc = std::min(minlocptr[i], minloc); 1998 } 1999 else 2000 { 2001 if (minlocptr) 2002 minloc = minlocptr[i]; 2003 minval = minptr[i]; 2004 } 2005 } 2006 if (maxptr && maxptr[i] >= maxval) 2007 { 2008 if (maxptr[i] == maxval) 2009 { 2010 if (maxlocptr) 2011 maxloc = std::min(maxlocptr[i], maxloc); 2012 } 2013 else 2014 { 2015 if (maxlocptr) 2016 maxloc = maxlocptr[i]; 2017 maxval = maxptr[i]; 2018 } 2019 } 2020 if (maxptr2 && maxptr2[i] > maxval2) 2021 maxval2 = maxptr2[i]; 2022 } 2023 bool zero_mask = (minLoc && minloc == index_max) || 2024 (maxLoc && maxloc == index_max); 2025 2026 if (minVal) 2027 *minVal = zero_mask ? 0 : (double)minval; 2028 if (maxVal) 2029 *maxVal = zero_mask ? 0 : (double)maxval; 2030 if (maxVal2) 2031 *maxVal2 = zero_mask ? 0 : (double)maxval2; 2032 2033 if (minLoc) 2034 { 2035 minLoc[0] = zero_mask ? -1 : minloc / cols; 2036 minLoc[1] = zero_mask ? -1 : minloc % cols; 2037 } 2038 if (maxLoc) 2039 { 2040 maxLoc[0] = zero_mask ? -1 : maxloc / cols; 2041 maxLoc[1] = zero_mask ? -1 : maxloc % cols; 2042 } 2043 } 2044 2045 typedef void (*getMinMaxResFunc)(const Mat & db, double * minVal, double * maxVal, 2046 int * minLoc, int *maxLoc, int gropunum, int cols, double * maxVal2); 2047 2048 static bool ocl_minMaxIdx( InputArray _src, double* minVal, double* maxVal, int* minLoc, int* maxLoc, InputArray _mask, 2049 int ddepth = -1, bool absValues = false, InputArray _src2 = noArray(), double * maxVal2 = NULL) 2050 { 2051 const ocl::Device & dev = ocl::Device::getDefault(); 2052 2053 #ifdef ANDROID 2054 if (dev.isNVidia()) 2055 return false; 2056 #endif 2057 2058 bool doubleSupport = dev.doubleFPConfig() > 0, haveMask = !_mask.empty(), 2059 haveSrc2 = _src2.kind() != _InputArray::NONE; 2060 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type), 2061 kercn = haveMask ? cn : std::min(4, ocl::predictOptimalVectorWidth(_src, _src2)); 2062 2063 // disabled following modes since it occasionally fails on AMD devices (e.g. A10-6800K, sep. 2014) 2064 if ((haveMask || type == CV_32FC1) && dev.isAMD()) 2065 return false; 2066 2067 CV_Assert( (cn == 1 && (!haveMask || _mask.type() == CV_8U)) || 2068 (cn >= 1 && !minLoc && !maxLoc) ); 2069 2070 if (ddepth < 0) 2071 ddepth = depth; 2072 2073 CV_Assert(!haveSrc2 || _src2.type() == type); 2074 2075 if (depth == CV_32S) 2076 return false; 2077 2078 if ((depth == CV_64F || ddepth == CV_64F) && !doubleSupport) 2079 return false; 2080 2081 int groupnum = dev.maxComputeUnits(); 2082 size_t wgs = dev.maxWorkGroupSize(); 2083 2084 int wgs2_aligned = 1; 2085 while (wgs2_aligned < (int)wgs) 2086 wgs2_aligned <<= 1; 2087 wgs2_aligned >>= 1; 2088 2089 bool needMinVal = minVal || minLoc, needMinLoc = minLoc != NULL, 2090 needMaxVal = maxVal || maxLoc, needMaxLoc = maxLoc != NULL; 2091 2092 // in case of mask we must know whether mask is filled with zeros or not 2093 // so let's calculate min or max location, if it's undefined, so mask is zeros 2094 if (!(needMaxLoc || needMinLoc) && haveMask) 2095 { 2096 if (needMinVal) 2097 needMinLoc = true; 2098 else 2099 needMaxLoc = true; 2100 } 2101 2102 char cvt[2][40]; 2103 String opts = format("-D DEPTH_%d -D srcT1=%s%s -D WGS=%d -D srcT=%s" 2104 " -D WGS2_ALIGNED=%d%s%s%s -D kercn=%d%s%s%s%s" 2105 " -D dstT1=%s -D dstT=%s -D convertToDT=%s%s%s%s%s -D wdepth=%d -D convertFromU=%s", 2106 depth, ocl::typeToStr(depth), haveMask ? " -D HAVE_MASK" : "", (int)wgs, 2107 ocl::typeToStr(CV_MAKE_TYPE(depth, kercn)), wgs2_aligned, 2108 doubleSupport ? " -D DOUBLE_SUPPORT" : "", 2109 _src.isContinuous() ? " -D HAVE_SRC_CONT" : "", 2110 _mask.isContinuous() ? " -D HAVE_MASK_CONT" : "", kercn, 2111 needMinVal ? " -D NEED_MINVAL" : "", needMaxVal ? " -D NEED_MAXVAL" : "", 2112 needMinLoc ? " -D NEED_MINLOC" : "", needMaxLoc ? " -D NEED_MAXLOC" : "", 2113 ocl::typeToStr(ddepth), ocl::typeToStr(CV_MAKE_TYPE(ddepth, kercn)), 2114 ocl::convertTypeStr(depth, ddepth, kercn, cvt[0]), 2115 absValues ? " -D OP_ABS" : "", 2116 haveSrc2 ? " -D HAVE_SRC2" : "", maxVal2 ? " -D OP_CALC2" : "", 2117 haveSrc2 && _src2.isContinuous() ? " -D HAVE_SRC2_CONT" : "", ddepth, 2118 depth <= CV_32S && ddepth == CV_32S ? ocl::convertTypeStr(CV_8U, ddepth, kercn, cvt[1]) : "noconvert"); 2119 2120 ocl::Kernel k("minmaxloc", ocl::core::minmaxloc_oclsrc, opts); 2121 if (k.empty()) 2122 return false; 2123 2124 int esz = CV_ELEM_SIZE(ddepth), esz32s = CV_ELEM_SIZE1(CV_32S), 2125 dbsize = groupnum * ((needMinVal ? esz : 0) + (needMaxVal ? esz : 0) + 2126 (needMinLoc ? esz32s : 0) + (needMaxLoc ? esz32s : 0) + 2127 (maxVal2 ? esz : 0)); 2128 UMat src = _src.getUMat(), src2 = _src2.getUMat(), db(1, dbsize, CV_8UC1), mask = _mask.getUMat(); 2129 2130 if (cn > 1 && !haveMask) 2131 { 2132 src = src.reshape(1); 2133 src2 = src2.reshape(1); 2134 } 2135 2136 if (haveSrc2) 2137 { 2138 if (!haveMask) 2139 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), 2140 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(src2)); 2141 else 2142 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), 2143 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask), 2144 ocl::KernelArg::ReadOnlyNoSize(src2)); 2145 } 2146 else 2147 { 2148 if (!haveMask) 2149 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), 2150 groupnum, ocl::KernelArg::PtrWriteOnly(db)); 2151 else 2152 k.args(ocl::KernelArg::ReadOnlyNoSize(src), src.cols, (int)src.total(), 2153 groupnum, ocl::KernelArg::PtrWriteOnly(db), ocl::KernelArg::ReadOnlyNoSize(mask)); 2154 } 2155 2156 size_t globalsize = groupnum * wgs; 2157 if (!k.run(1, &globalsize, &wgs, true)) 2158 return false; 2159 2160 static const getMinMaxResFunc functab[7] = 2161 { 2162 getMinMaxRes<uchar>, 2163 getMinMaxRes<char>, 2164 getMinMaxRes<ushort>, 2165 getMinMaxRes<short>, 2166 getMinMaxRes<int>, 2167 getMinMaxRes<float>, 2168 getMinMaxRes<double> 2169 }; 2170 2171 getMinMaxResFunc func = functab[ddepth]; 2172 2173 int locTemp[2]; 2174 func(db.getMat(ACCESS_READ), minVal, maxVal, 2175 needMinLoc ? minLoc ? minLoc : locTemp : minLoc, 2176 needMaxLoc ? maxLoc ? maxLoc : locTemp : maxLoc, 2177 groupnum, src.cols, maxVal2); 2178 2179 return true; 2180 } 2181 2182 #endif 2183 2184 } 2185 2186 void cv::minMaxIdx(InputArray _src, double* minVal, 2187 double* maxVal, int* minIdx, int* maxIdx, 2188 InputArray _mask) 2189 { 2190 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 2191 CV_Assert( (cn == 1 && (_mask.empty() || _mask.type() == CV_8U)) || 2192 (cn > 1 && _mask.empty() && !minIdx && !maxIdx) ); 2193 2194 CV_OCL_RUN(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2 && (_mask.empty() || _src.size() == _mask.size()), 2195 ocl_minMaxIdx(_src, minVal, maxVal, minIdx, maxIdx, _mask)) 2196 2197 Mat src = _src.getMat(), mask = _mask.getMat(); 2198 2199 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 2200 CV_IPP_CHECK() 2201 { 2202 size_t total_size = src.total(); 2203 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; 2204 if( src.dims == 2 || (src.isContinuous() && mask.isContinuous() && cols > 0 && (size_t)rows*cols == total_size) ) 2205 { 2206 IppiSize sz = { cols * cn, rows }; 2207 2208 if( !mask.empty() ) 2209 { 2210 typedef IppStatus (CV_STDCALL* ippiMaskMinMaxIndxFuncC1)(const void *, int, const void *, int, 2211 IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *); 2212 2213 CV_SUPPRESS_DEPRECATED_START 2214 ippiMaskMinMaxIndxFuncC1 ippFuncC1 = 2215 type == CV_8UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1MR : 2216 type == CV_8SC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1MR : 2217 type == CV_16UC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1MR : 2218 type == CV_32FC1 ? (ippiMaskMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1MR : 0; 2219 CV_SUPPRESS_DEPRECATED_END 2220 2221 if( ippFuncC1 ) 2222 { 2223 Ipp32f min, max; 2224 IppiPoint minp, maxp; 2225 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &min, &max, &minp, &maxp) >= 0 ) 2226 { 2227 if( minVal ) 2228 *minVal = (double)min; 2229 if( maxVal ) 2230 *maxVal = (double)max; 2231 if( !minp.x && !minp.y && !maxp.x && !maxp.y && !mask.ptr()[0] ) 2232 minp.x = maxp.x = -1; 2233 if( minIdx ) 2234 { 2235 size_t minidx = minp.y * cols + minp.x + 1; 2236 ofs2idx(src, minidx, minIdx); 2237 } 2238 if( maxIdx ) 2239 { 2240 size_t maxidx = maxp.y * cols + maxp.x + 1; 2241 ofs2idx(src, maxidx, maxIdx); 2242 } 2243 CV_IMPL_ADD(CV_IMPL_IPP); 2244 return; 2245 } 2246 setIppErrorStatus(); 2247 } 2248 } 2249 else 2250 { 2251 typedef IppStatus (CV_STDCALL* ippiMinMaxIndxFuncC1)(const void *, int, IppiSize, Ipp32f *, Ipp32f *, IppiPoint *, IppiPoint *); 2252 2253 CV_SUPPRESS_DEPRECATED_START 2254 ippiMinMaxIndxFuncC1 ippFuncC1 = 2255 depth == CV_8U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8u_C1R : 2256 depth == CV_8S ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_8s_C1R : 2257 depth == CV_16U ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_16u_C1R : 2258 #if !((defined _MSC_VER && defined _M_IX86) || defined __i386__) 2259 depth == CV_32F ? (ippiMinMaxIndxFuncC1)ippiMinMaxIndx_32f_C1R : 2260 #endif 2261 0; 2262 CV_SUPPRESS_DEPRECATED_END 2263 2264 if( ippFuncC1 ) 2265 { 2266 Ipp32f min, max; 2267 IppiPoint minp, maxp; 2268 if( ippFuncC1(src.ptr(), (int)src.step[0], sz, &min, &max, &minp, &maxp) >= 0 ) 2269 { 2270 if( minVal ) 2271 *minVal = (double)min; 2272 if( maxVal ) 2273 *maxVal = (double)max; 2274 if( minIdx ) 2275 { 2276 size_t minidx = minp.y * cols + minp.x + 1; 2277 ofs2idx(src, minidx, minIdx); 2278 } 2279 if( maxIdx ) 2280 { 2281 size_t maxidx = maxp.y * cols + maxp.x + 1; 2282 ofs2idx(src, maxidx, maxIdx); 2283 } 2284 CV_IMPL_ADD(CV_IMPL_IPP); 2285 return; 2286 } 2287 setIppErrorStatus(); 2288 } 2289 } 2290 } 2291 } 2292 #endif 2293 2294 MinMaxIdxFunc func = getMinmaxTab(depth); 2295 CV_Assert( func != 0 ); 2296 2297 const Mat* arrays[] = {&src, &mask, 0}; 2298 uchar* ptrs[2]; 2299 NAryMatIterator it(arrays, ptrs); 2300 2301 size_t minidx = 0, maxidx = 0; 2302 int iminval = INT_MAX, imaxval = INT_MIN; 2303 float fminval = FLT_MAX, fmaxval = -FLT_MAX; 2304 double dminval = DBL_MAX, dmaxval = -DBL_MAX; 2305 size_t startidx = 1; 2306 int *minval = &iminval, *maxval = &imaxval; 2307 int planeSize = (int)it.size*cn; 2308 2309 if( depth == CV_32F ) 2310 minval = (int*)&fminval, maxval = (int*)&fmaxval; 2311 else if( depth == CV_64F ) 2312 minval = (int*)&dminval, maxval = (int*)&dmaxval; 2313 2314 for( size_t i = 0; i < it.nplanes; i++, ++it, startidx += planeSize ) 2315 func( ptrs[0], ptrs[1], minval, maxval, &minidx, &maxidx, planeSize, startidx ); 2316 2317 if( minidx == 0 ) 2318 dminval = dmaxval = 0; 2319 else if( depth == CV_32F ) 2320 dminval = fminval, dmaxval = fmaxval; 2321 else if( depth <= CV_32S ) 2322 dminval = iminval, dmaxval = imaxval; 2323 2324 if( minVal ) 2325 *minVal = dminval; 2326 if( maxVal ) 2327 *maxVal = dmaxval; 2328 2329 if( minIdx ) 2330 ofs2idx(src, minidx, minIdx); 2331 if( maxIdx ) 2332 ofs2idx(src, maxidx, maxIdx); 2333 } 2334 2335 void cv::minMaxLoc( InputArray _img, double* minVal, double* maxVal, 2336 Point* minLoc, Point* maxLoc, InputArray mask ) 2337 { 2338 CV_Assert(_img.dims() <= 2); 2339 2340 minMaxIdx(_img, minVal, maxVal, (int*)minLoc, (int*)maxLoc, mask); 2341 if( minLoc ) 2342 std::swap(minLoc->x, minLoc->y); 2343 if( maxLoc ) 2344 std::swap(maxLoc->x, maxLoc->y); 2345 } 2346 2347 /****************************************************************************************\ 2348 * norm * 2349 \****************************************************************************************/ 2350 2351 namespace cv 2352 { 2353 2354 template<typename T, typename ST> int 2355 normInf_(const T* src, const uchar* mask, ST* _result, int len, int cn) 2356 { 2357 ST result = *_result; 2358 if( !mask ) 2359 { 2360 result = std::max(result, normInf<T, ST>(src, len*cn)); 2361 } 2362 else 2363 { 2364 for( int i = 0; i < len; i++, src += cn ) 2365 if( mask[i] ) 2366 { 2367 for( int k = 0; k < cn; k++ ) 2368 result = std::max(result, ST(cv_abs(src[k]))); 2369 } 2370 } 2371 *_result = result; 2372 return 0; 2373 } 2374 2375 template<typename T, typename ST> int 2376 normL1_(const T* src, const uchar* mask, ST* _result, int len, int cn) 2377 { 2378 ST result = *_result; 2379 if( !mask ) 2380 { 2381 result += normL1<T, ST>(src, len*cn); 2382 } 2383 else 2384 { 2385 for( int i = 0; i < len; i++, src += cn ) 2386 if( mask[i] ) 2387 { 2388 for( int k = 0; k < cn; k++ ) 2389 result += cv_abs(src[k]); 2390 } 2391 } 2392 *_result = result; 2393 return 0; 2394 } 2395 2396 template<typename T, typename ST> int 2397 normL2_(const T* src, const uchar* mask, ST* _result, int len, int cn) 2398 { 2399 ST result = *_result; 2400 if( !mask ) 2401 { 2402 result += normL2Sqr<T, ST>(src, len*cn); 2403 } 2404 else 2405 { 2406 for( int i = 0; i < len; i++, src += cn ) 2407 if( mask[i] ) 2408 { 2409 for( int k = 0; k < cn; k++ ) 2410 { 2411 T v = src[k]; 2412 result += (ST)v*v; 2413 } 2414 } 2415 } 2416 *_result = result; 2417 return 0; 2418 } 2419 2420 template<typename T, typename ST> int 2421 normDiffInf_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn) 2422 { 2423 ST result = *_result; 2424 if( !mask ) 2425 { 2426 result = std::max(result, normInf<T, ST>(src1, src2, len*cn)); 2427 } 2428 else 2429 { 2430 for( int i = 0; i < len; i++, src1 += cn, src2 += cn ) 2431 if( mask[i] ) 2432 { 2433 for( int k = 0; k < cn; k++ ) 2434 result = std::max(result, (ST)std::abs(src1[k] - src2[k])); 2435 } 2436 } 2437 *_result = result; 2438 return 0; 2439 } 2440 2441 template<typename T, typename ST> int 2442 normDiffL1_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn) 2443 { 2444 ST result = *_result; 2445 if( !mask ) 2446 { 2447 result += normL1<T, ST>(src1, src2, len*cn); 2448 } 2449 else 2450 { 2451 for( int i = 0; i < len; i++, src1 += cn, src2 += cn ) 2452 if( mask[i] ) 2453 { 2454 for( int k = 0; k < cn; k++ ) 2455 result += std::abs(src1[k] - src2[k]); 2456 } 2457 } 2458 *_result = result; 2459 return 0; 2460 } 2461 2462 template<typename T, typename ST> int 2463 normDiffL2_(const T* src1, const T* src2, const uchar* mask, ST* _result, int len, int cn) 2464 { 2465 ST result = *_result; 2466 if( !mask ) 2467 { 2468 result += normL2Sqr<T, ST>(src1, src2, len*cn); 2469 } 2470 else 2471 { 2472 for( int i = 0; i < len; i++, src1 += cn, src2 += cn ) 2473 if( mask[i] ) 2474 { 2475 for( int k = 0; k < cn; k++ ) 2476 { 2477 ST v = src1[k] - src2[k]; 2478 result += v*v; 2479 } 2480 } 2481 } 2482 *_result = result; 2483 return 0; 2484 } 2485 2486 Hamming::ResultType Hamming::operator()( const unsigned char* a, const unsigned char* b, int size ) const 2487 { 2488 return cv::hal::normHamming(a, b, size); 2489 } 2490 2491 #define CV_DEF_NORM_FUNC(L, suffix, type, ntype) \ 2492 static int norm##L##_##suffix(const type* src, const uchar* mask, ntype* r, int len, int cn) \ 2493 { return norm##L##_(src, mask, r, len, cn); } \ 2494 static int normDiff##L##_##suffix(const type* src1, const type* src2, \ 2495 const uchar* mask, ntype* r, int len, int cn) \ 2496 { return normDiff##L##_(src1, src2, mask, r, (int)len, cn); } 2497 2498 #define CV_DEF_NORM_ALL(suffix, type, inftype, l1type, l2type) \ 2499 CV_DEF_NORM_FUNC(Inf, suffix, type, inftype) \ 2500 CV_DEF_NORM_FUNC(L1, suffix, type, l1type) \ 2501 CV_DEF_NORM_FUNC(L2, suffix, type, l2type) 2502 2503 CV_DEF_NORM_ALL(8u, uchar, int, int, int) 2504 CV_DEF_NORM_ALL(8s, schar, int, int, int) 2505 CV_DEF_NORM_ALL(16u, ushort, int, int, double) 2506 CV_DEF_NORM_ALL(16s, short, int, int, double) 2507 CV_DEF_NORM_ALL(32s, int, int, double, double) 2508 CV_DEF_NORM_ALL(32f, float, float, double, double) 2509 CV_DEF_NORM_ALL(64f, double, double, double, double) 2510 2511 2512 typedef int (*NormFunc)(const uchar*, const uchar*, uchar*, int, int); 2513 typedef int (*NormDiffFunc)(const uchar*, const uchar*, const uchar*, uchar*, int, int); 2514 2515 static NormFunc getNormFunc(int normType, int depth) 2516 { 2517 static NormFunc normTab[3][8] = 2518 { 2519 { 2520 (NormFunc)GET_OPTIMIZED(normInf_8u), (NormFunc)GET_OPTIMIZED(normInf_8s), (NormFunc)GET_OPTIMIZED(normInf_16u), (NormFunc)GET_OPTIMIZED(normInf_16s), 2521 (NormFunc)GET_OPTIMIZED(normInf_32s), (NormFunc)GET_OPTIMIZED(normInf_32f), (NormFunc)normInf_64f, 0 2522 }, 2523 { 2524 (NormFunc)GET_OPTIMIZED(normL1_8u), (NormFunc)GET_OPTIMIZED(normL1_8s), (NormFunc)GET_OPTIMIZED(normL1_16u), (NormFunc)GET_OPTIMIZED(normL1_16s), 2525 (NormFunc)GET_OPTIMIZED(normL1_32s), (NormFunc)GET_OPTIMIZED(normL1_32f), (NormFunc)normL1_64f, 0 2526 }, 2527 { 2528 (NormFunc)GET_OPTIMIZED(normL2_8u), (NormFunc)GET_OPTIMIZED(normL2_8s), (NormFunc)GET_OPTIMIZED(normL2_16u), (NormFunc)GET_OPTIMIZED(normL2_16s), 2529 (NormFunc)GET_OPTIMIZED(normL2_32s), (NormFunc)GET_OPTIMIZED(normL2_32f), (NormFunc)normL2_64f, 0 2530 } 2531 }; 2532 2533 return normTab[normType][depth]; 2534 } 2535 2536 static NormDiffFunc getNormDiffFunc(int normType, int depth) 2537 { 2538 static NormDiffFunc normDiffTab[3][8] = 2539 { 2540 { 2541 (NormDiffFunc)GET_OPTIMIZED(normDiffInf_8u), (NormDiffFunc)normDiffInf_8s, 2542 (NormDiffFunc)normDiffInf_16u, (NormDiffFunc)normDiffInf_16s, 2543 (NormDiffFunc)normDiffInf_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffInf_32f), 2544 (NormDiffFunc)normDiffInf_64f, 0 2545 }, 2546 { 2547 (NormDiffFunc)GET_OPTIMIZED(normDiffL1_8u), (NormDiffFunc)normDiffL1_8s, 2548 (NormDiffFunc)normDiffL1_16u, (NormDiffFunc)normDiffL1_16s, 2549 (NormDiffFunc)normDiffL1_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL1_32f), 2550 (NormDiffFunc)normDiffL1_64f, 0 2551 }, 2552 { 2553 (NormDiffFunc)GET_OPTIMIZED(normDiffL2_8u), (NormDiffFunc)normDiffL2_8s, 2554 (NormDiffFunc)normDiffL2_16u, (NormDiffFunc)normDiffL2_16s, 2555 (NormDiffFunc)normDiffL2_32s, (NormDiffFunc)GET_OPTIMIZED(normDiffL2_32f), 2556 (NormDiffFunc)normDiffL2_64f, 0 2557 } 2558 }; 2559 2560 return normDiffTab[normType][depth]; 2561 } 2562 2563 #ifdef HAVE_OPENCL 2564 2565 static bool ocl_norm( InputArray _src, int normType, InputArray _mask, double & result ) 2566 { 2567 const ocl::Device & d = ocl::Device::getDefault(); 2568 2569 #ifdef ANDROID 2570 if (d.isNVidia()) 2571 return false; 2572 #endif 2573 2574 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 2575 bool doubleSupport = d.doubleFPConfig() > 0, 2576 haveMask = _mask.kind() != _InputArray::NONE; 2577 2578 if ( !(normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) || 2579 (!doubleSupport && depth == CV_64F)) 2580 return false; 2581 2582 UMat src = _src.getUMat(); 2583 2584 if (normType == NORM_INF) 2585 { 2586 if (!ocl_minMaxIdx(_src, NULL, &result, NULL, NULL, _mask, 2587 std::max(depth, CV_32S), depth != CV_8U && depth != CV_16U)) 2588 return false; 2589 } 2590 else if (normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR) 2591 { 2592 Scalar sc; 2593 bool unstype = depth == CV_8U || depth == CV_16U; 2594 2595 if ( !ocl_sum(haveMask ? src : src.reshape(1), sc, normType == NORM_L2 || normType == NORM_L2SQR ? 2596 OCL_OP_SUM_SQR : (unstype ? OCL_OP_SUM : OCL_OP_SUM_ABS), _mask) ) 2597 return false; 2598 2599 if (!haveMask) 2600 cn = 1; 2601 2602 double s = 0.0; 2603 for (int i = 0; i < cn; ++i) 2604 s += sc[i]; 2605 2606 result = normType == NORM_L1 || normType == NORM_L2SQR ? s : std::sqrt(s); 2607 } 2608 2609 return true; 2610 } 2611 2612 #endif 2613 2614 } 2615 2616 double cv::norm( InputArray _src, int normType, InputArray _mask ) 2617 { 2618 normType &= NORM_TYPE_MASK; 2619 CV_Assert( normType == NORM_INF || normType == NORM_L1 || 2620 normType == NORM_L2 || normType == NORM_L2SQR || 2621 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && _src.type() == CV_8U) ); 2622 2623 #ifdef HAVE_OPENCL 2624 double _result = 0; 2625 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src.isUMat()) && _src.dims() <= 2, 2626 ocl_norm(_src, normType, _mask, _result), 2627 _result) 2628 #endif 2629 2630 Mat src = _src.getMat(), mask = _mask.getMat(); 2631 int depth = src.depth(), cn = src.channels(); 2632 2633 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 2634 CV_IPP_CHECK() 2635 { 2636 size_t total_size = src.total(); 2637 int rows = src.size[0], cols = rows ? (int)(total_size/rows) : 0; 2638 2639 if( (src.dims == 2 || (src.isContinuous() && mask.isContinuous())) 2640 && cols > 0 && (size_t)rows*cols == total_size 2641 && (normType == NORM_INF || normType == NORM_L1 || 2642 normType == NORM_L2 || normType == NORM_L2SQR) ) 2643 { 2644 IppiSize sz = { cols, rows }; 2645 int type = src.type(); 2646 if( !mask.empty() ) 2647 { 2648 typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC1)(const void *, int, const void *, int, IppiSize, Ipp64f *); 2649 ippiMaskNormFuncC1 ippFuncC1 = 2650 normType == NORM_INF ? 2651 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8u_C1MR : 2652 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_8s_C1MR : 2653 // type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_16u_C1MR : 2654 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_Inf_32f_C1MR : 2655 0) : 2656 normType == NORM_L1 ? 2657 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8u_C1MR : 2658 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_8s_C1MR : 2659 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_16u_C1MR : 2660 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L1_32f_C1MR : 2661 0) : 2662 normType == NORM_L2 || normType == NORM_L2SQR ? 2663 (type == CV_8UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8u_C1MR : 2664 type == CV_8SC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_8s_C1MR : 2665 type == CV_16UC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_16u_C1MR : 2666 type == CV_32FC1 ? (ippiMaskNormFuncC1)ippiNorm_L2_32f_C1MR : 2667 0) : 0; 2668 if( ippFuncC1 ) 2669 { 2670 Ipp64f norm; 2671 if( ippFuncC1(src.ptr(), (int)src.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 ) 2672 { 2673 CV_IMPL_ADD(CV_IMPL_IPP); 2674 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; 2675 } 2676 2677 setIppErrorStatus(); 2678 } 2679 /*typedef IppStatus (CV_STDCALL* ippiMaskNormFuncC3)(const void *, int, const void *, int, IppiSize, int, Ipp64f *); 2680 ippiMaskNormFuncC3 ippFuncC3 = 2681 normType == NORM_INF ? 2682 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8u_C3CMR : 2683 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_8s_C3CMR : 2684 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_16u_C3CMR : 2685 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_Inf_32f_C3CMR : 2686 0) : 2687 normType == NORM_L1 ? 2688 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8u_C3CMR : 2689 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_8s_C3CMR : 2690 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_16u_C3CMR : 2691 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L1_32f_C3CMR : 2692 0) : 2693 normType == NORM_L2 || normType == NORM_L2SQR ? 2694 (type == CV_8UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8u_C3CMR : 2695 type == CV_8SC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_8s_C3CMR : 2696 type == CV_16UC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_16u_C3CMR : 2697 type == CV_32FC3 ? (ippiMaskNormFuncC3)ippiNorm_L2_32f_C3CMR : 2698 0) : 0; 2699 if( ippFuncC3 ) 2700 { 2701 Ipp64f norm1, norm2, norm3; 2702 if( ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 && 2703 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 && 2704 ippFuncC3(src.data, (int)src.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0) 2705 { 2706 Ipp64f norm = 2707 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) : 2708 normType == NORM_L1 ? norm1 + norm2 + norm3 : 2709 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) : 2710 0; 2711 CV_IMPL_ADD(CV_IMPL_IPP); 2712 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; 2713 } 2714 setIppErrorStatus(); 2715 }*/ 2716 } 2717 else 2718 { 2719 typedef IppStatus (CV_STDCALL* ippiNormFuncHint)(const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); 2720 typedef IppStatus (CV_STDCALL* ippiNormFuncNoHint)(const void *, int, IppiSize, Ipp64f *); 2721 ippiNormFuncHint ippFuncHint = 2722 normType == NORM_L1 ? 2723 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L1_32f_C1R : 2724 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L1_32f_C3R : 2725 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L1_32f_C4R : 2726 0) : 2727 normType == NORM_L2 || normType == NORM_L2SQR ? 2728 (type == CV_32FC1 ? (ippiNormFuncHint)ippiNorm_L2_32f_C1R : 2729 type == CV_32FC3 ? (ippiNormFuncHint)ippiNorm_L2_32f_C3R : 2730 type == CV_32FC4 ? (ippiNormFuncHint)ippiNorm_L2_32f_C4R : 2731 0) : 0; 2732 ippiNormFuncNoHint ippFuncNoHint = 2733 normType == NORM_INF ? 2734 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C1R : 2735 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C3R : 2736 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_8u_C4R : 2737 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C1R : 2738 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C3R : 2739 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16u_C4R : 2740 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C1R : 2741 #if (IPP_VERSION_X100 >= 801) 2742 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 2743 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 2744 #endif 2745 type == CV_32FC1 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C1R : 2746 type == CV_32FC3 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C3R : 2747 type == CV_32FC4 ? (ippiNormFuncNoHint)ippiNorm_Inf_32f_C4R : 2748 0) : 2749 normType == NORM_L1 ? 2750 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C1R : 2751 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C3R : 2752 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_8u_C4R : 2753 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C1R : 2754 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C3R : 2755 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16u_C4R : 2756 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C1R : 2757 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C3R : 2758 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L1_16s_C4R : 2759 0) : 2760 normType == NORM_L2 || normType == NORM_L2SQR ? 2761 (type == CV_8UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C1R : 2762 type == CV_8UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C3R : 2763 type == CV_8UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_8u_C4R : 2764 type == CV_16UC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C1R : 2765 type == CV_16UC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C3R : 2766 type == CV_16UC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16u_C4R : 2767 type == CV_16SC1 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C1R : 2768 type == CV_16SC3 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C3R : 2769 type == CV_16SC4 ? (ippiNormFuncNoHint)ippiNorm_L2_16s_C4R : 2770 0) : 0; 2771 // Make sure only zero or one version of the function pointer is valid 2772 CV_Assert(!ippFuncHint || !ippFuncNoHint); 2773 if( ippFuncHint || ippFuncNoHint ) 2774 { 2775 Ipp64f norm_array[4]; 2776 IppStatus ret = ippFuncHint ? ippFuncHint(src.ptr(), (int)src.step[0], sz, norm_array, ippAlgHintAccurate) : 2777 ippFuncNoHint(src.ptr(), (int)src.step[0], sz, norm_array); 2778 if( ret >= 0 ) 2779 { 2780 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0]; 2781 for( int i = 1; i < cn; i++ ) 2782 { 2783 norm = 2784 normType == NORM_INF ? std::max(norm, norm_array[i]) : 2785 normType == NORM_L1 ? norm + norm_array[i] : 2786 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] : 2787 0; 2788 } 2789 CV_IMPL_ADD(CV_IMPL_IPP); 2790 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm; 2791 } 2792 setIppErrorStatus(); 2793 } 2794 } 2795 } 2796 } 2797 #endif 2798 2799 if( src.isContinuous() && mask.empty() ) 2800 { 2801 size_t len = src.total()*cn; 2802 if( len == (size_t)(int)len ) 2803 { 2804 if( depth == CV_32F ) 2805 { 2806 const float* data = src.ptr<float>(); 2807 2808 if( normType == NORM_L2 ) 2809 { 2810 double result = 0; 2811 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1); 2812 return std::sqrt(result); 2813 } 2814 if( normType == NORM_L2SQR ) 2815 { 2816 double result = 0; 2817 GET_OPTIMIZED(normL2_32f)(data, 0, &result, (int)len, 1); 2818 return result; 2819 } 2820 if( normType == NORM_L1 ) 2821 { 2822 double result = 0; 2823 GET_OPTIMIZED(normL1_32f)(data, 0, &result, (int)len, 1); 2824 return result; 2825 } 2826 if( normType == NORM_INF ) 2827 { 2828 float result = 0; 2829 GET_OPTIMIZED(normInf_32f)(data, 0, &result, (int)len, 1); 2830 return result; 2831 } 2832 } 2833 if( depth == CV_8U ) 2834 { 2835 const uchar* data = src.ptr<uchar>(); 2836 2837 if( normType == NORM_HAMMING ) 2838 { 2839 return hal::normHamming(data, (int)len); 2840 } 2841 2842 if( normType == NORM_HAMMING2 ) 2843 { 2844 return hal::normHamming(data, (int)len, 2); 2845 } 2846 } 2847 } 2848 } 2849 2850 CV_Assert( mask.empty() || mask.type() == CV_8U ); 2851 2852 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 ) 2853 { 2854 if( !mask.empty() ) 2855 { 2856 Mat temp; 2857 bitwise_and(src, mask, temp); 2858 return norm(temp, normType); 2859 } 2860 int cellSize = normType == NORM_HAMMING ? 1 : 2; 2861 2862 const Mat* arrays[] = {&src, 0}; 2863 uchar* ptrs[1]; 2864 NAryMatIterator it(arrays, ptrs); 2865 int total = (int)it.size; 2866 int result = 0; 2867 2868 for( size_t i = 0; i < it.nplanes; i++, ++it ) 2869 { 2870 result += hal::normHamming(ptrs[0], total, cellSize); 2871 } 2872 2873 return result; 2874 } 2875 2876 NormFunc func = getNormFunc(normType >> 1, depth); 2877 CV_Assert( func != 0 ); 2878 2879 const Mat* arrays[] = {&src, &mask, 0}; 2880 uchar* ptrs[2]; 2881 union 2882 { 2883 double d; 2884 int i; 2885 float f; 2886 } 2887 result; 2888 result.d = 0; 2889 NAryMatIterator it(arrays, ptrs); 2890 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0; 2891 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) || 2892 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S); 2893 int isum = 0; 2894 int *ibuf = &result.i; 2895 size_t esz = 0; 2896 2897 if( blockSum ) 2898 { 2899 intSumBlockSize = (normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15))/cn; 2900 blockSize = std::min(blockSize, intSumBlockSize); 2901 ibuf = &isum; 2902 esz = src.elemSize(); 2903 } 2904 2905 for( size_t i = 0; i < it.nplanes; i++, ++it ) 2906 { 2907 for( j = 0; j < total; j += blockSize ) 2908 { 2909 int bsz = std::min(total - j, blockSize); 2910 func( ptrs[0], ptrs[1], (uchar*)ibuf, bsz, cn ); 2911 count += bsz; 2912 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) 2913 { 2914 result.d += isum; 2915 isum = 0; 2916 count = 0; 2917 } 2918 ptrs[0] += bsz*esz; 2919 if( ptrs[1] ) 2920 ptrs[1] += bsz; 2921 } 2922 } 2923 2924 if( normType == NORM_INF ) 2925 { 2926 if( depth == CV_64F ) 2927 ; 2928 else if( depth == CV_32F ) 2929 result.d = result.f; 2930 else 2931 result.d = result.i; 2932 } 2933 else if( normType == NORM_L2 ) 2934 result.d = std::sqrt(result.d); 2935 2936 return result.d; 2937 } 2938 2939 #ifdef HAVE_OPENCL 2940 2941 namespace cv { 2942 2943 static bool ocl_norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask, double & result ) 2944 { 2945 #ifdef ANDROID 2946 if (ocl::Device::getDefault().isNVidia()) 2947 return false; 2948 #endif 2949 2950 Scalar sc1, sc2; 2951 int type = _src1.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 2952 bool relative = (normType & NORM_RELATIVE) != 0; 2953 normType &= ~NORM_RELATIVE; 2954 bool normsum = normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR; 2955 2956 if (normsum) 2957 { 2958 if (!ocl_sum(_src1, sc1, normType == NORM_L2 || normType == NORM_L2SQR ? 2959 OCL_OP_SUM_SQR : OCL_OP_SUM, _mask, _src2, relative, sc2)) 2960 return false; 2961 } 2962 else 2963 { 2964 if (!ocl_minMaxIdx(_src1, NULL, &sc1[0], NULL, NULL, _mask, std::max(CV_32S, depth), 2965 false, _src2, relative ? &sc2[0] : NULL)) 2966 return false; 2967 cn = 1; 2968 } 2969 2970 double s2 = 0; 2971 for (int i = 0; i < cn; ++i) 2972 { 2973 result += sc1[i]; 2974 if (relative) 2975 s2 += sc2[i]; 2976 } 2977 2978 if (normType == NORM_L2) 2979 { 2980 result = std::sqrt(result); 2981 if (relative) 2982 s2 = std::sqrt(s2); 2983 } 2984 2985 if (relative) 2986 result /= (s2 + DBL_EPSILON); 2987 2988 return true; 2989 } 2990 2991 } 2992 2993 #endif 2994 2995 double cv::norm( InputArray _src1, InputArray _src2, int normType, InputArray _mask ) 2996 { 2997 CV_Assert( _src1.sameSize(_src2) && _src1.type() == _src2.type() ); 2998 2999 #ifdef HAVE_OPENCL 3000 double _result = 0; 3001 CV_OCL_RUN_(OCL_PERFORMANCE_CHECK(_src1.isUMat()), 3002 ocl_norm(_src1, _src2, normType, _mask, _result), 3003 _result) 3004 #endif 3005 3006 if( normType & CV_RELATIVE ) 3007 { 3008 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 3009 CV_IPP_CHECK() 3010 { 3011 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); 3012 3013 normType &= NORM_TYPE_MASK; 3014 CV_Assert( normType == NORM_INF || normType == NORM_L1 || normType == NORM_L2 || normType == NORM_L2SQR || 3015 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) ); 3016 size_t total_size = src1.total(); 3017 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0; 3018 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous())) 3019 && cols > 0 && (size_t)rows*cols == total_size 3020 && (normType == NORM_INF || normType == NORM_L1 || 3021 normType == NORM_L2 || normType == NORM_L2SQR) ) 3022 { 3023 IppiSize sz = { cols, rows }; 3024 int type = src1.type(); 3025 if( !mask.empty() ) 3026 { 3027 typedef IppStatus (CV_STDCALL* ippiMaskNormRelFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *); 3028 ippiMaskNormRelFuncC1 ippFuncC1 = 3029 normType == NORM_INF ? 3030 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8u_C1MR : 3031 #ifndef __APPLE__ 3032 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_8s_C1MR : 3033 #endif 3034 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_16u_C1MR : 3035 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_Inf_32f_C1MR : 3036 0) : 3037 normType == NORM_L1 ? 3038 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8u_C1MR : 3039 #ifndef __APPLE__ 3040 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_8s_C1MR : 3041 #endif 3042 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_16u_C1MR : 3043 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L1_32f_C1MR : 3044 0) : 3045 normType == NORM_L2 || normType == NORM_L2SQR ? 3046 (type == CV_8UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8u_C1MR : 3047 type == CV_8SC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_8s_C1MR : 3048 type == CV_16UC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_16u_C1MR : 3049 type == CV_32FC1 ? (ippiMaskNormRelFuncC1)ippiNormRel_L2_32f_C1MR : 3050 0) : 0; 3051 if( ippFuncC1 ) 3052 { 3053 Ipp64f norm; 3054 if( ippFuncC1(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 ) 3055 { 3056 CV_IMPL_ADD(CV_IMPL_IPP); 3057 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; 3058 } 3059 setIppErrorStatus(); 3060 } 3061 } 3062 else 3063 { 3064 typedef IppStatus (CV_STDCALL* ippiNormRelFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *); 3065 typedef IppStatus (CV_STDCALL* ippiNormRelFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); 3066 ippiNormRelFuncNoHint ippFuncNoHint = 3067 normType == NORM_INF ? 3068 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_8u_C1R : 3069 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16u_C1R : 3070 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_16s_C1R : 3071 type == CV_32FC1 ? (ippiNormRelFuncNoHint)ippiNormRel_Inf_32f_C1R : 3072 0) : 3073 normType == NORM_L1 ? 3074 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_8u_C1R : 3075 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16u_C1R : 3076 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L1_16s_C1R : 3077 0) : 3078 normType == NORM_L2 || normType == NORM_L2SQR ? 3079 (type == CV_8UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_8u_C1R : 3080 type == CV_16UC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16u_C1R : 3081 type == CV_16SC1 ? (ippiNormRelFuncNoHint)ippiNormRel_L2_16s_C1R : 3082 0) : 0; 3083 ippiNormRelFuncHint ippFuncHint = 3084 normType == NORM_L1 ? 3085 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L1_32f_C1R : 3086 0) : 3087 normType == NORM_L2 || normType == NORM_L2SQR ? 3088 (type == CV_32FC1 ? (ippiNormRelFuncHint)ippiNormRel_L2_32f_C1R : 3089 0) : 0; 3090 if (ippFuncNoHint) 3091 { 3092 Ipp64f norm; 3093 if( ippFuncNoHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm) >= 0 ) 3094 { 3095 CV_IMPL_ADD(CV_IMPL_IPP); 3096 return (double)norm; 3097 } 3098 setIppErrorStatus(); 3099 } 3100 if (ippFuncHint) 3101 { 3102 Ipp64f norm; 3103 if( ippFuncHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, &norm, ippAlgHintAccurate) >= 0 ) 3104 { 3105 CV_IMPL_ADD(CV_IMPL_IPP); 3106 return (double)norm; 3107 } 3108 setIppErrorStatus(); 3109 } 3110 } 3111 } 3112 } 3113 #endif 3114 return norm(_src1, _src2, normType & ~CV_RELATIVE, _mask)/(norm(_src2, normType, _mask) + DBL_EPSILON); 3115 } 3116 3117 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); 3118 int depth = src1.depth(), cn = src1.channels(); 3119 3120 normType &= 7; 3121 CV_Assert( normType == NORM_INF || normType == NORM_L1 || 3122 normType == NORM_L2 || normType == NORM_L2SQR || 3123 ((normType == NORM_HAMMING || normType == NORM_HAMMING2) && src1.type() == CV_8U) ); 3124 3125 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 3126 CV_IPP_CHECK() 3127 { 3128 size_t total_size = src1.total(); 3129 int rows = src1.size[0], cols = rows ? (int)(total_size/rows) : 0; 3130 if( (src1.dims == 2 || (src1.isContinuous() && src2.isContinuous() && mask.isContinuous())) 3131 && cols > 0 && (size_t)rows*cols == total_size 3132 && (normType == NORM_INF || normType == NORM_L1 || 3133 normType == NORM_L2 || normType == NORM_L2SQR) ) 3134 { 3135 IppiSize sz = { cols, rows }; 3136 int type = src1.type(); 3137 if( !mask.empty() ) 3138 { 3139 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC1)(const void *, int, const void *, int, const void *, int, IppiSize, Ipp64f *); 3140 ippiMaskNormDiffFuncC1 ippFuncC1 = 3141 normType == NORM_INF ? 3142 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8u_C1MR : 3143 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_8s_C1MR : 3144 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_16u_C1MR : 3145 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_Inf_32f_C1MR : 3146 0) : 3147 normType == NORM_L1 ? 3148 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8u_C1MR : 3149 #ifndef __APPLE__ 3150 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_8s_C1MR : 3151 #endif 3152 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_16u_C1MR : 3153 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L1_32f_C1MR : 3154 0) : 3155 normType == NORM_L2 || normType == NORM_L2SQR ? 3156 (type == CV_8UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8u_C1MR : 3157 type == CV_8SC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_8s_C1MR : 3158 type == CV_16UC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_16u_C1MR : 3159 type == CV_32FC1 ? (ippiMaskNormDiffFuncC1)ippiNormDiff_L2_32f_C1MR : 3160 0) : 0; 3161 if( ippFuncC1 ) 3162 { 3163 Ipp64f norm; 3164 if( ippFuncC1(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], mask.ptr(), (int)mask.step[0], sz, &norm) >= 0 ) 3165 { 3166 CV_IMPL_ADD(CV_IMPL_IPP); 3167 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; 3168 } 3169 setIppErrorStatus(); 3170 } 3171 #ifndef __APPLE__ 3172 typedef IppStatus (CV_STDCALL* ippiMaskNormDiffFuncC3)(const void *, int, const void *, int, const void *, int, IppiSize, int, Ipp64f *); 3173 ippiMaskNormDiffFuncC3 ippFuncC3 = 3174 normType == NORM_INF ? 3175 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8u_C3CMR : 3176 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_8s_C3CMR : 3177 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_16u_C3CMR : 3178 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_Inf_32f_C3CMR : 3179 0) : 3180 normType == NORM_L1 ? 3181 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8u_C3CMR : 3182 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_8s_C3CMR : 3183 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_16u_C3CMR : 3184 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L1_32f_C3CMR : 3185 0) : 3186 normType == NORM_L2 || normType == NORM_L2SQR ? 3187 (type == CV_8UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8u_C3CMR : 3188 type == CV_8SC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_8s_C3CMR : 3189 type == CV_16UC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_16u_C3CMR : 3190 type == CV_32FC3 ? (ippiMaskNormDiffFuncC3)ippiNormDiff_L2_32f_C3CMR : 3191 0) : 0; 3192 if( ippFuncC3 ) 3193 { 3194 Ipp64f norm1, norm2, norm3; 3195 if( ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 1, &norm1) >= 0 && 3196 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 2, &norm2) >= 0 && 3197 ippFuncC3(src1.data, (int)src1.step[0], src2.data, (int)src2.step[0], mask.data, (int)mask.step[0], sz, 3, &norm3) >= 0) 3198 { 3199 Ipp64f norm = 3200 normType == NORM_INF ? std::max(std::max(norm1, norm2), norm3) : 3201 normType == NORM_L1 ? norm1 + norm2 + norm3 : 3202 normType == NORM_L2 || normType == NORM_L2SQR ? std::sqrt(norm1 * norm1 + norm2 * norm2 + norm3 * norm3) : 3203 0; 3204 CV_IMPL_ADD(CV_IMPL_IPP); 3205 return normType == NORM_L2SQR ? (double)(norm * norm) : (double)norm; 3206 } 3207 setIppErrorStatus(); 3208 } 3209 #endif 3210 } 3211 else 3212 { 3213 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncHint)(const void *, int, const void *, int, IppiSize, Ipp64f *, IppHintAlgorithm hint); 3214 typedef IppStatus (CV_STDCALL* ippiNormDiffFuncNoHint)(const void *, int, const void *, int, IppiSize, Ipp64f *); 3215 ippiNormDiffFuncHint ippFuncHint = 3216 normType == NORM_L1 ? 3217 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C1R : 3218 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C3R : 3219 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L1_32f_C4R : 3220 0) : 3221 normType == NORM_L2 || normType == NORM_L2SQR ? 3222 (type == CV_32FC1 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C1R : 3223 type == CV_32FC3 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C3R : 3224 type == CV_32FC4 ? (ippiNormDiffFuncHint)ippiNormDiff_L2_32f_C4R : 3225 0) : 0; 3226 ippiNormDiffFuncNoHint ippFuncNoHint = 3227 normType == NORM_INF ? 3228 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C1R : 3229 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C3R : 3230 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_8u_C4R : 3231 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C1R : 3232 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C3R : 3233 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16u_C4R : 3234 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C1R : 3235 #if (IPP_VERSION_X100 >= 801) 3236 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C3R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 3237 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_16s_C4R : //Aug 2013: problem in IPP 7.1, 8.0 : -32768 3238 #endif 3239 type == CV_32FC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C1R : 3240 type == CV_32FC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C3R : 3241 type == CV_32FC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_Inf_32f_C4R : 3242 0) : 3243 normType == NORM_L1 ? 3244 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C1R : 3245 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C3R : 3246 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_8u_C4R : 3247 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C1R : 3248 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C3R : 3249 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16u_C4R : 3250 #if !(IPP_VERSION_X100 == 802 && (!defined(IPP_VERSION_UPDATE) || IPP_VERSION_UPDATE <= 1)) // Oct 2014: Accuracy issue with IPP 8.2 / 8.2.1 3251 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C1R : 3252 #endif 3253 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C3R : 3254 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L1_16s_C4R : 3255 0) : 3256 normType == NORM_L2 || normType == NORM_L2SQR ? 3257 (type == CV_8UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C1R : 3258 type == CV_8UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C3R : 3259 type == CV_8UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_8u_C4R : 3260 type == CV_16UC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C1R : 3261 type == CV_16UC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C3R : 3262 type == CV_16UC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16u_C4R : 3263 type == CV_16SC1 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C1R : 3264 type == CV_16SC3 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C3R : 3265 type == CV_16SC4 ? (ippiNormDiffFuncNoHint)ippiNormDiff_L2_16s_C4R : 3266 0) : 0; 3267 // Make sure only zero or one version of the function pointer is valid 3268 CV_Assert(!ippFuncHint || !ippFuncNoHint); 3269 if( ippFuncHint || ippFuncNoHint ) 3270 { 3271 Ipp64f norm_array[4]; 3272 IppStatus ret = ippFuncHint ? ippFuncHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array, ippAlgHintAccurate) : 3273 ippFuncNoHint(src1.ptr(), (int)src1.step[0], src2.ptr(), (int)src2.step[0], sz, norm_array); 3274 if( ret >= 0 ) 3275 { 3276 Ipp64f norm = (normType == NORM_L2 || normType == NORM_L2SQR) ? norm_array[0] * norm_array[0] : norm_array[0]; 3277 for( int i = 1; i < src1.channels(); i++ ) 3278 { 3279 norm = 3280 normType == NORM_INF ? std::max(norm, norm_array[i]) : 3281 normType == NORM_L1 ? norm + norm_array[i] : 3282 normType == NORM_L2 || normType == NORM_L2SQR ? norm + norm_array[i] * norm_array[i] : 3283 0; 3284 } 3285 CV_IMPL_ADD(CV_IMPL_IPP); 3286 return normType == NORM_L2 ? (double)std::sqrt(norm) : (double)norm; 3287 } 3288 setIppErrorStatus(); 3289 } 3290 } 3291 } 3292 } 3293 #endif 3294 3295 if( src1.isContinuous() && src2.isContinuous() && mask.empty() ) 3296 { 3297 size_t len = src1.total()*src1.channels(); 3298 if( len == (size_t)(int)len ) 3299 { 3300 if( src1.depth() == CV_32F ) 3301 { 3302 const float* data1 = src1.ptr<float>(); 3303 const float* data2 = src2.ptr<float>(); 3304 3305 if( normType == NORM_L2 ) 3306 { 3307 double result = 0; 3308 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1); 3309 return std::sqrt(result); 3310 } 3311 if( normType == NORM_L2SQR ) 3312 { 3313 double result = 0; 3314 GET_OPTIMIZED(normDiffL2_32f)(data1, data2, 0, &result, (int)len, 1); 3315 return result; 3316 } 3317 if( normType == NORM_L1 ) 3318 { 3319 double result = 0; 3320 GET_OPTIMIZED(normDiffL1_32f)(data1, data2, 0, &result, (int)len, 1); 3321 return result; 3322 } 3323 if( normType == NORM_INF ) 3324 { 3325 float result = 0; 3326 GET_OPTIMIZED(normDiffInf_32f)(data1, data2, 0, &result, (int)len, 1); 3327 return result; 3328 } 3329 } 3330 } 3331 } 3332 3333 CV_Assert( mask.empty() || mask.type() == CV_8U ); 3334 3335 if( normType == NORM_HAMMING || normType == NORM_HAMMING2 ) 3336 { 3337 if( !mask.empty() ) 3338 { 3339 Mat temp; 3340 bitwise_xor(src1, src2, temp); 3341 bitwise_and(temp, mask, temp); 3342 return norm(temp, normType); 3343 } 3344 int cellSize = normType == NORM_HAMMING ? 1 : 2; 3345 3346 const Mat* arrays[] = {&src1, &src2, 0}; 3347 uchar* ptrs[2]; 3348 NAryMatIterator it(arrays, ptrs); 3349 int total = (int)it.size; 3350 int result = 0; 3351 3352 for( size_t i = 0; i < it.nplanes; i++, ++it ) 3353 { 3354 result += hal::normHamming(ptrs[0], ptrs[1], total, cellSize); 3355 } 3356 3357 return result; 3358 } 3359 3360 NormDiffFunc func = getNormDiffFunc(normType >> 1, depth); 3361 CV_Assert( func != 0 ); 3362 3363 const Mat* arrays[] = {&src1, &src2, &mask, 0}; 3364 uchar* ptrs[3]; 3365 union 3366 { 3367 double d; 3368 float f; 3369 int i; 3370 unsigned u; 3371 } 3372 result; 3373 result.d = 0; 3374 NAryMatIterator it(arrays, ptrs); 3375 int j, total = (int)it.size, blockSize = total, intSumBlockSize = 0, count = 0; 3376 bool blockSum = (normType == NORM_L1 && depth <= CV_16S) || 3377 ((normType == NORM_L2 || normType == NORM_L2SQR) && depth <= CV_8S); 3378 unsigned isum = 0; 3379 unsigned *ibuf = &result.u; 3380 size_t esz = 0; 3381 3382 if( blockSum ) 3383 { 3384 intSumBlockSize = normType == NORM_L1 && depth <= CV_8S ? (1 << 23) : (1 << 15); 3385 blockSize = std::min(blockSize, intSumBlockSize); 3386 ibuf = &isum; 3387 esz = src1.elemSize(); 3388 } 3389 3390 for( size_t i = 0; i < it.nplanes; i++, ++it ) 3391 { 3392 for( j = 0; j < total; j += blockSize ) 3393 { 3394 int bsz = std::min(total - j, blockSize); 3395 func( ptrs[0], ptrs[1], ptrs[2], (uchar*)ibuf, bsz, cn ); 3396 count += bsz; 3397 if( blockSum && (count + blockSize >= intSumBlockSize || (i+1 >= it.nplanes && j+bsz >= total)) ) 3398 { 3399 result.d += isum; 3400 isum = 0; 3401 count = 0; 3402 } 3403 ptrs[0] += bsz*esz; 3404 ptrs[1] += bsz*esz; 3405 if( ptrs[2] ) 3406 ptrs[2] += bsz; 3407 } 3408 } 3409 3410 if( normType == NORM_INF ) 3411 { 3412 if( depth == CV_64F ) 3413 ; 3414 else if( depth == CV_32F ) 3415 result.d = result.f; 3416 else 3417 result.d = result.u; 3418 } 3419 else if( normType == NORM_L2 ) 3420 result.d = std::sqrt(result.d); 3421 3422 return result.d; 3423 } 3424 3425 3426 ///////////////////////////////////// batch distance /////////////////////////////////////// 3427 3428 namespace cv 3429 { 3430 3431 template<typename _Tp, typename _Rt> 3432 void batchDistL1_(const _Tp* src1, const _Tp* src2, size_t step2, 3433 int nvecs, int len, _Rt* dist, const uchar* mask) 3434 { 3435 step2 /= sizeof(src2[0]); 3436 if( !mask ) 3437 { 3438 for( int i = 0; i < nvecs; i++ ) 3439 dist[i] = normL1<_Tp, _Rt>(src1, src2 + step2*i, len); 3440 } 3441 else 3442 { 3443 _Rt val0 = std::numeric_limits<_Rt>::max(); 3444 for( int i = 0; i < nvecs; i++ ) 3445 dist[i] = mask[i] ? normL1<_Tp, _Rt>(src1, src2 + step2*i, len) : val0; 3446 } 3447 } 3448 3449 template<typename _Tp, typename _Rt> 3450 void batchDistL2Sqr_(const _Tp* src1, const _Tp* src2, size_t step2, 3451 int nvecs, int len, _Rt* dist, const uchar* mask) 3452 { 3453 step2 /= sizeof(src2[0]); 3454 if( !mask ) 3455 { 3456 for( int i = 0; i < nvecs; i++ ) 3457 dist[i] = normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len); 3458 } 3459 else 3460 { 3461 _Rt val0 = std::numeric_limits<_Rt>::max(); 3462 for( int i = 0; i < nvecs; i++ ) 3463 dist[i] = mask[i] ? normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len) : val0; 3464 } 3465 } 3466 3467 template<typename _Tp, typename _Rt> 3468 void batchDistL2_(const _Tp* src1, const _Tp* src2, size_t step2, 3469 int nvecs, int len, _Rt* dist, const uchar* mask) 3470 { 3471 step2 /= sizeof(src2[0]); 3472 if( !mask ) 3473 { 3474 for( int i = 0; i < nvecs; i++ ) 3475 dist[i] = std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)); 3476 } 3477 else 3478 { 3479 _Rt val0 = std::numeric_limits<_Rt>::max(); 3480 for( int i = 0; i < nvecs; i++ ) 3481 dist[i] = mask[i] ? std::sqrt(normL2Sqr<_Tp, _Rt>(src1, src2 + step2*i, len)) : val0; 3482 } 3483 } 3484 3485 static void batchDistHamming(const uchar* src1, const uchar* src2, size_t step2, 3486 int nvecs, int len, int* dist, const uchar* mask) 3487 { 3488 step2 /= sizeof(src2[0]); 3489 if( !mask ) 3490 { 3491 for( int i = 0; i < nvecs; i++ ) 3492 dist[i] = hal::normHamming(src1, src2 + step2*i, len); 3493 } 3494 else 3495 { 3496 int val0 = INT_MAX; 3497 for( int i = 0; i < nvecs; i++ ) 3498 { 3499 if (mask[i]) 3500 dist[i] = hal::normHamming(src1, src2 + step2*i, len); 3501 else 3502 dist[i] = val0; 3503 } 3504 } 3505 } 3506 3507 static void batchDistHamming2(const uchar* src1, const uchar* src2, size_t step2, 3508 int nvecs, int len, int* dist, const uchar* mask) 3509 { 3510 step2 /= sizeof(src2[0]); 3511 if( !mask ) 3512 { 3513 for( int i = 0; i < nvecs; i++ ) 3514 dist[i] = hal::normHamming(src1, src2 + step2*i, len, 2); 3515 } 3516 else 3517 { 3518 int val0 = INT_MAX; 3519 for( int i = 0; i < nvecs; i++ ) 3520 { 3521 if (mask[i]) 3522 dist[i] = hal::normHamming(src1, src2 + step2*i, len, 2); 3523 else 3524 dist[i] = val0; 3525 } 3526 } 3527 } 3528 3529 static void batchDistL1_8u32s(const uchar* src1, const uchar* src2, size_t step2, 3530 int nvecs, int len, int* dist, const uchar* mask) 3531 { 3532 batchDistL1_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask); 3533 } 3534 3535 static void batchDistL1_8u32f(const uchar* src1, const uchar* src2, size_t step2, 3536 int nvecs, int len, float* dist, const uchar* mask) 3537 { 3538 batchDistL1_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask); 3539 } 3540 3541 static void batchDistL2Sqr_8u32s(const uchar* src1, const uchar* src2, size_t step2, 3542 int nvecs, int len, int* dist, const uchar* mask) 3543 { 3544 batchDistL2Sqr_<uchar, int>(src1, src2, step2, nvecs, len, dist, mask); 3545 } 3546 3547 static void batchDistL2Sqr_8u32f(const uchar* src1, const uchar* src2, size_t step2, 3548 int nvecs, int len, float* dist, const uchar* mask) 3549 { 3550 batchDistL2Sqr_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask); 3551 } 3552 3553 static void batchDistL2_8u32f(const uchar* src1, const uchar* src2, size_t step2, 3554 int nvecs, int len, float* dist, const uchar* mask) 3555 { 3556 batchDistL2_<uchar, float>(src1, src2, step2, nvecs, len, dist, mask); 3557 } 3558 3559 static void batchDistL1_32f(const float* src1, const float* src2, size_t step2, 3560 int nvecs, int len, float* dist, const uchar* mask) 3561 { 3562 batchDistL1_<float, float>(src1, src2, step2, nvecs, len, dist, mask); 3563 } 3564 3565 static void batchDistL2Sqr_32f(const float* src1, const float* src2, size_t step2, 3566 int nvecs, int len, float* dist, const uchar* mask) 3567 { 3568 batchDistL2Sqr_<float, float>(src1, src2, step2, nvecs, len, dist, mask); 3569 } 3570 3571 static void batchDistL2_32f(const float* src1, const float* src2, size_t step2, 3572 int nvecs, int len, float* dist, const uchar* mask) 3573 { 3574 batchDistL2_<float, float>(src1, src2, step2, nvecs, len, dist, mask); 3575 } 3576 3577 typedef void (*BatchDistFunc)(const uchar* src1, const uchar* src2, size_t step2, 3578 int nvecs, int len, uchar* dist, const uchar* mask); 3579 3580 3581 struct BatchDistInvoker : public ParallelLoopBody 3582 { 3583 BatchDistInvoker( const Mat& _src1, const Mat& _src2, 3584 Mat& _dist, Mat& _nidx, int _K, 3585 const Mat& _mask, int _update, 3586 BatchDistFunc _func) 3587 { 3588 src1 = &_src1; 3589 src2 = &_src2; 3590 dist = &_dist; 3591 nidx = &_nidx; 3592 K = _K; 3593 mask = &_mask; 3594 update = _update; 3595 func = _func; 3596 } 3597 3598 void operator()(const Range& range) const 3599 { 3600 AutoBuffer<int> buf(src2->rows); 3601 int* bufptr = buf; 3602 3603 for( int i = range.start; i < range.end; i++ ) 3604 { 3605 func(src1->ptr(i), src2->ptr(), src2->step, src2->rows, src2->cols, 3606 K > 0 ? (uchar*)bufptr : dist->ptr(i), mask->data ? mask->ptr(i) : 0); 3607 3608 if( K > 0 ) 3609 { 3610 int* nidxptr = nidx->ptr<int>(i); 3611 // since positive float's can be compared just like int's, 3612 // we handle both CV_32S and CV_32F cases with a single branch 3613 int* distptr = (int*)dist->ptr(i); 3614 3615 int j, k; 3616 3617 for( j = 0; j < src2->rows; j++ ) 3618 { 3619 int d = bufptr[j]; 3620 if( d < distptr[K-1] ) 3621 { 3622 for( k = K-2; k >= 0 && distptr[k] > d; k-- ) 3623 { 3624 nidxptr[k+1] = nidxptr[k]; 3625 distptr[k+1] = distptr[k]; 3626 } 3627 nidxptr[k+1] = j + update; 3628 distptr[k+1] = d; 3629 } 3630 } 3631 } 3632 } 3633 } 3634 3635 const Mat *src1; 3636 const Mat *src2; 3637 Mat *dist; 3638 Mat *nidx; 3639 const Mat *mask; 3640 int K; 3641 int update; 3642 BatchDistFunc func; 3643 }; 3644 3645 } 3646 3647 void cv::batchDistance( InputArray _src1, InputArray _src2, 3648 OutputArray _dist, int dtype, OutputArray _nidx, 3649 int normType, int K, InputArray _mask, 3650 int update, bool crosscheck ) 3651 { 3652 Mat src1 = _src1.getMat(), src2 = _src2.getMat(), mask = _mask.getMat(); 3653 int type = src1.type(); 3654 CV_Assert( type == src2.type() && src1.cols == src2.cols && 3655 (type == CV_32F || type == CV_8U)); 3656 CV_Assert( _nidx.needed() == (K > 0) ); 3657 3658 if( dtype == -1 ) 3659 { 3660 dtype = normType == NORM_HAMMING || normType == NORM_HAMMING2 ? CV_32S : CV_32F; 3661 } 3662 CV_Assert( (type == CV_8U && dtype == CV_32S) || dtype == CV_32F); 3663 3664 K = std::min(K, src2.rows); 3665 3666 _dist.create(src1.rows, (K > 0 ? K : src2.rows), dtype); 3667 Mat dist = _dist.getMat(), nidx; 3668 if( _nidx.needed() ) 3669 { 3670 _nidx.create(dist.size(), CV_32S); 3671 nidx = _nidx.getMat(); 3672 } 3673 3674 if( update == 0 && K > 0 ) 3675 { 3676 dist = Scalar::all(dtype == CV_32S ? (double)INT_MAX : (double)FLT_MAX); 3677 nidx = Scalar::all(-1); 3678 } 3679 3680 if( crosscheck ) 3681 { 3682 CV_Assert( K == 1 && update == 0 && mask.empty() ); 3683 Mat tdist, tidx; 3684 batchDistance(src2, src1, tdist, dtype, tidx, normType, K, mask, 0, false); 3685 3686 // if an idx-th element from src1 appeared to be the nearest to i-th element of src2, 3687 // we update the minimum mutual distance between idx-th element of src1 and the whole src2 set. 3688 // As a result, if nidx[idx] = i*, it means that idx-th element of src1 is the nearest 3689 // to i*-th element of src2 and i*-th element of src2 is the closest to idx-th element of src1. 3690 // If nidx[idx] = -1, it means that there is no such ideal couple for it in src2. 3691 // This O(N) procedure is called cross-check and it helps to eliminate some false matches. 3692 if( dtype == CV_32S ) 3693 { 3694 for( int i = 0; i < tdist.rows; i++ ) 3695 { 3696 int idx = tidx.at<int>(i); 3697 int d = tdist.at<int>(i), d0 = dist.at<int>(idx); 3698 if( d < d0 ) 3699 { 3700 dist.at<int>(idx) = d; 3701 nidx.at<int>(idx) = i + update; 3702 } 3703 } 3704 } 3705 else 3706 { 3707 for( int i = 0; i < tdist.rows; i++ ) 3708 { 3709 int idx = tidx.at<int>(i); 3710 float d = tdist.at<float>(i), d0 = dist.at<float>(idx); 3711 if( d < d0 ) 3712 { 3713 dist.at<float>(idx) = d; 3714 nidx.at<int>(idx) = i + update; 3715 } 3716 } 3717 } 3718 return; 3719 } 3720 3721 BatchDistFunc func = 0; 3722 if( type == CV_8U ) 3723 { 3724 if( normType == NORM_L1 && dtype == CV_32S ) 3725 func = (BatchDistFunc)batchDistL1_8u32s; 3726 else if( normType == NORM_L1 && dtype == CV_32F ) 3727 func = (BatchDistFunc)batchDistL1_8u32f; 3728 else if( normType == NORM_L2SQR && dtype == CV_32S ) 3729 func = (BatchDistFunc)batchDistL2Sqr_8u32s; 3730 else if( normType == NORM_L2SQR && dtype == CV_32F ) 3731 func = (BatchDistFunc)batchDistL2Sqr_8u32f; 3732 else if( normType == NORM_L2 && dtype == CV_32F ) 3733 func = (BatchDistFunc)batchDistL2_8u32f; 3734 else if( normType == NORM_HAMMING && dtype == CV_32S ) 3735 func = (BatchDistFunc)batchDistHamming; 3736 else if( normType == NORM_HAMMING2 && dtype == CV_32S ) 3737 func = (BatchDistFunc)batchDistHamming2; 3738 } 3739 else if( type == CV_32F && dtype == CV_32F ) 3740 { 3741 if( normType == NORM_L1 ) 3742 func = (BatchDistFunc)batchDistL1_32f; 3743 else if( normType == NORM_L2SQR ) 3744 func = (BatchDistFunc)batchDistL2Sqr_32f; 3745 else if( normType == NORM_L2 ) 3746 func = (BatchDistFunc)batchDistL2_32f; 3747 } 3748 3749 if( func == 0 ) 3750 CV_Error_(CV_StsUnsupportedFormat, 3751 ("The combination of type=%d, dtype=%d and normType=%d is not supported", 3752 type, dtype, normType)); 3753 3754 parallel_for_(Range(0, src1.rows), 3755 BatchDistInvoker(src1, src2, dist, nidx, K, mask, update, func)); 3756 } 3757 3758 3759 void cv::findNonZero( InputArray _src, OutputArray _idx ) 3760 { 3761 Mat src = _src.getMat(); 3762 CV_Assert( src.type() == CV_8UC1 ); 3763 int n = countNonZero(src); 3764 if( n == 0 ) 3765 { 3766 _idx.release(); 3767 return; 3768 } 3769 if( _idx.kind() == _InputArray::MAT && !_idx.getMatRef().isContinuous() ) 3770 _idx.release(); 3771 _idx.create(n, 1, CV_32SC2); 3772 Mat idx = _idx.getMat(); 3773 CV_Assert(idx.isContinuous()); 3774 Point* idx_ptr = idx.ptr<Point>(); 3775 3776 for( int i = 0; i < src.rows; i++ ) 3777 { 3778 const uchar* bin_ptr = src.ptr(i); 3779 for( int j = 0; j < src.cols; j++ ) 3780 if( bin_ptr[j] ) 3781 *idx_ptr++ = Point(j, i); 3782 } 3783 } 3784 3785 double cv::PSNR(InputArray _src1, InputArray _src2) 3786 { 3787 CV_Assert( _src1.depth() == CV_8U ); 3788 double diff = std::sqrt(norm(_src1, _src2, NORM_L2SQR)/(_src1.total()*_src1.channels())); 3789 return 20*log10(255./(diff+DBL_EPSILON)); 3790 } 3791 3792 3793 CV_IMPL CvScalar cvSum( const CvArr* srcarr ) 3794 { 3795 cv::Scalar sum = cv::sum(cv::cvarrToMat(srcarr, false, true, 1)); 3796 if( CV_IS_IMAGE(srcarr) ) 3797 { 3798 int coi = cvGetImageCOI((IplImage*)srcarr); 3799 if( coi ) 3800 { 3801 CV_Assert( 0 < coi && coi <= 4 ); 3802 sum = cv::Scalar(sum[coi-1]); 3803 } 3804 } 3805 return sum; 3806 } 3807 3808 CV_IMPL int cvCountNonZero( const CvArr* imgarr ) 3809 { 3810 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1); 3811 if( img.channels() > 1 ) 3812 cv::extractImageCOI(imgarr, img); 3813 return countNonZero(img); 3814 } 3815 3816 3817 CV_IMPL CvScalar 3818 cvAvg( const void* imgarr, const void* maskarr ) 3819 { 3820 cv::Mat img = cv::cvarrToMat(imgarr, false, true, 1); 3821 cv::Scalar mean = !maskarr ? cv::mean(img) : cv::mean(img, cv::cvarrToMat(maskarr)); 3822 if( CV_IS_IMAGE(imgarr) ) 3823 { 3824 int coi = cvGetImageCOI((IplImage*)imgarr); 3825 if( coi ) 3826 { 3827 CV_Assert( 0 < coi && coi <= 4 ); 3828 mean = cv::Scalar(mean[coi-1]); 3829 } 3830 } 3831 return mean; 3832 } 3833 3834 3835 CV_IMPL void 3836 cvAvgSdv( const CvArr* imgarr, CvScalar* _mean, CvScalar* _sdv, const void* maskarr ) 3837 { 3838 cv::Scalar mean, sdv; 3839 3840 cv::Mat mask; 3841 if( maskarr ) 3842 mask = cv::cvarrToMat(maskarr); 3843 3844 cv::meanStdDev(cv::cvarrToMat(imgarr, false, true, 1), mean, sdv, mask ); 3845 3846 if( CV_IS_IMAGE(imgarr) ) 3847 { 3848 int coi = cvGetImageCOI((IplImage*)imgarr); 3849 if( coi ) 3850 { 3851 CV_Assert( 0 < coi && coi <= 4 ); 3852 mean = cv::Scalar(mean[coi-1]); 3853 sdv = cv::Scalar(sdv[coi-1]); 3854 } 3855 } 3856 3857 if( _mean ) 3858 *(cv::Scalar*)_mean = mean; 3859 if( _sdv ) 3860 *(cv::Scalar*)_sdv = sdv; 3861 } 3862 3863 3864 CV_IMPL void 3865 cvMinMaxLoc( const void* imgarr, double* _minVal, double* _maxVal, 3866 CvPoint* _minLoc, CvPoint* _maxLoc, const void* maskarr ) 3867 { 3868 cv::Mat mask, img = cv::cvarrToMat(imgarr, false, true, 1); 3869 if( maskarr ) 3870 mask = cv::cvarrToMat(maskarr); 3871 if( img.channels() > 1 ) 3872 cv::extractImageCOI(imgarr, img); 3873 3874 cv::minMaxLoc( img, _minVal, _maxVal, 3875 (cv::Point*)_minLoc, (cv::Point*)_maxLoc, mask ); 3876 } 3877 3878 3879 CV_IMPL double 3880 cvNorm( const void* imgA, const void* imgB, int normType, const void* maskarr ) 3881 { 3882 cv::Mat a, mask; 3883 if( !imgA ) 3884 { 3885 imgA = imgB; 3886 imgB = 0; 3887 } 3888 3889 a = cv::cvarrToMat(imgA, false, true, 1); 3890 if( maskarr ) 3891 mask = cv::cvarrToMat(maskarr); 3892 3893 if( a.channels() > 1 && CV_IS_IMAGE(imgA) && cvGetImageCOI((const IplImage*)imgA) > 0 ) 3894 cv::extractImageCOI(imgA, a); 3895 3896 if( !imgB ) 3897 return !maskarr ? cv::norm(a, normType) : cv::norm(a, normType, mask); 3898 3899 cv::Mat b = cv::cvarrToMat(imgB, false, true, 1); 3900 if( b.channels() > 1 && CV_IS_IMAGE(imgB) && cvGetImageCOI((const IplImage*)imgB) > 0 ) 3901 cv::extractImageCOI(imgB, b); 3902 3903 return !maskarr ? cv::norm(a, b, normType) : cv::norm(a, b, normType, mask); 3904 } 3905