1 /*M/////////////////////////////////////////////////////////////////////////////////////// 2 // 3 // IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING. 4 // 5 // By downloading, copying, installing or using the software you agree to this license. 6 // If you do not agree to this license, do not download, install, 7 // copy or use the software. 8 // 9 // 10 // Intel License Agreement 11 // For Open Source Computer Vision Library 12 // 13 // Copyright (C) 2000, Intel Corporation, all rights reserved. 14 // Third party copyrights are property of their respective owners. 15 // 16 // Redistribution and use in source and binary forms, with or without modification, 17 // are permitted provided that the following conditions are met: 18 // 19 // * Redistribution's of source code must retain the above copyright notice, 20 // this list of conditions and the following disclaimer. 21 // 22 // * Redistribution's in binary form must reproduce the above copyright notice, 23 // this list of conditions and the following disclaimer in the documentation 24 // and/or other materials provided with the distribution. 25 // 26 // * The name of Intel Corporation may not be used to endorse or promote products 27 // derived from this software without specific prior written permission. 28 // 29 // This software is provided by the copyright holders and contributors "as is" and 30 // any express or implied warranties, including, but not limited to, the implied 31 // warranties of merchantability and fitness for a particular purpose are disclaimed. 32 // In no event shall the Intel Corporation or contributors be liable for any direct, 33 // indirect, incidental, special, exemplary, or consequential damages 34 // (including, but not limited to, procurement of substitute goods or services; 35 // loss of use, data, or profits; or business interruption) however caused 36 // and on any theory of liability, whether in contract, strict liability, 37 // or tort (including negligence or otherwise) arising in any way out of 38 // the use of this software, even if advised of the possibility of such damage. 39 // 40 //M*/ 41 42 #include "precomp.hpp" 43 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp" 44 #include "opencv2/core/opencl/runtime/opencl_core.hpp" 45 #include "opencl_kernels_core.hpp" 46 #include <map> 47 48 namespace cv 49 { 50 51 // On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010) 52 #if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600 53 # pragma optimize("", off) 54 # pragma warning(disable: 4748) 55 #endif 56 57 #if IPP_VERSION_X100 >= 701 58 #define USE_IPP_DFT 1 59 #else 60 #undef USE_IPP_DFT 61 #endif 62 63 /****************************************************************************************\ 64 Discrete Fourier Transform 65 \****************************************************************************************/ 66 67 #define CV_MAX_LOCAL_DFT_SIZE (1 << 15) 68 69 static unsigned char bitrevTab[] = 70 { 71 0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0, 72 0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8, 73 0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4, 74 0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc, 75 0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2, 76 0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa, 77 0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6, 78 0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe, 79 0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1, 80 0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9, 81 0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5, 82 0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd, 83 0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3, 84 0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb, 85 0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7, 86 0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff 87 }; 88 89 static const double DFTTab[][2] = 90 { 91 { 1.00000000000000000, 0.00000000000000000 }, 92 {-1.00000000000000000, 0.00000000000000000 }, 93 { 0.00000000000000000, 1.00000000000000000 }, 94 { 0.70710678118654757, 0.70710678118654746 }, 95 { 0.92387953251128674, 0.38268343236508978 }, 96 { 0.98078528040323043, 0.19509032201612825 }, 97 { 0.99518472667219693, 0.09801714032956060 }, 98 { 0.99879545620517241, 0.04906767432741802 }, 99 { 0.99969881869620425, 0.02454122852291229 }, 100 { 0.99992470183914450, 0.01227153828571993 }, 101 { 0.99998117528260111, 0.00613588464915448 }, 102 { 0.99999529380957619, 0.00306795676296598 }, 103 { 0.99999882345170188, 0.00153398018628477 }, 104 { 0.99999970586288223, 0.00076699031874270 }, 105 { 0.99999992646571789, 0.00038349518757140 }, 106 { 0.99999998161642933, 0.00019174759731070 }, 107 { 0.99999999540410733, 0.00009587379909598 }, 108 { 0.99999999885102686, 0.00004793689960307 }, 109 { 0.99999999971275666, 0.00002396844980842 }, 110 { 0.99999999992818922, 0.00001198422490507 }, 111 { 0.99999999998204725, 0.00000599211245264 }, 112 { 0.99999999999551181, 0.00000299605622633 }, 113 { 0.99999999999887801, 0.00000149802811317 }, 114 { 0.99999999999971945, 0.00000074901405658 }, 115 { 0.99999999999992983, 0.00000037450702829 }, 116 { 0.99999999999998246, 0.00000018725351415 }, 117 { 0.99999999999999567, 0.00000009362675707 }, 118 { 0.99999999999999889, 0.00000004681337854 }, 119 { 0.99999999999999978, 0.00000002340668927 }, 120 { 0.99999999999999989, 0.00000001170334463 }, 121 { 1.00000000000000000, 0.00000000585167232 }, 122 { 1.00000000000000000, 0.00000000292583616 } 123 }; 124 125 #define BitRev(i,shift) \ 126 ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \ 127 ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \ 128 ((unsigned)bitrevTab[((i)>>16)&255] << 8)+ \ 129 ((unsigned)bitrevTab[((i)>>24)])) >> (shift))) 130 131 static int 132 DFTFactorize( int n, int* factors ) 133 { 134 int nf = 0, f, i, j; 135 136 if( n <= 5 ) 137 { 138 factors[0] = n; 139 return 1; 140 } 141 142 f = (((n - 1)^n)+1) >> 1; 143 if( f > 1 ) 144 { 145 factors[nf++] = f; 146 n = f == n ? 1 : n/f; 147 } 148 149 for( f = 3; n > 1; ) 150 { 151 int d = n/f; 152 if( d*f == n ) 153 { 154 factors[nf++] = f; 155 n = d; 156 } 157 else 158 { 159 f += 2; 160 if( f*f > n ) 161 break; 162 } 163 } 164 165 if( n > 1 ) 166 factors[nf++] = n; 167 168 f = (factors[0] & 1) == 0; 169 for( i = f; i < (nf+f)/2; i++ ) 170 CV_SWAP( factors[i], factors[nf-i-1+f], j ); 171 172 return nf; 173 } 174 175 static void 176 DFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab ) 177 { 178 int digits[34], radix[34]; 179 int n = factors[0], m = 0; 180 int* itab0 = itab; 181 int i, j, k; 182 Complex<double> w, w1; 183 double t; 184 185 if( n0 <= 5 ) 186 { 187 itab[0] = 0; 188 itab[n0-1] = n0-1; 189 190 if( n0 != 4 ) 191 { 192 for( i = 1; i < n0-1; i++ ) 193 itab[i] = i; 194 } 195 else 196 { 197 itab[1] = 2; 198 itab[2] = 1; 199 } 200 if( n0 == 5 ) 201 { 202 if( elem_size == sizeof(Complex<double>) ) 203 ((Complex<double>*)_wave)[0] = Complex<double>(1.,0.); 204 else 205 ((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f); 206 } 207 if( n0 != 4 ) 208 return; 209 m = 2; 210 } 211 else 212 { 213 // radix[] is initialized from index 'nf' down to zero 214 assert (nf < 34); 215 radix[nf] = 1; 216 digits[nf] = 0; 217 for( i = 0; i < nf; i++ ) 218 { 219 digits[i] = 0; 220 radix[nf-i-1] = radix[nf-i]*factors[nf-i-1]; 221 } 222 223 if( inv_itab && factors[0] != factors[nf-1] ) 224 itab = (int*)_wave; 225 226 if( (n & 1) == 0 ) 227 { 228 int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1; 229 for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ ) 230 ; 231 if( n <= 2 ) 232 { 233 itab[0] = 0; 234 itab[1] = na2; 235 } 236 else if( n <= 256 ) 237 { 238 int shift = 10 - m; 239 for( i = 0; i <= n - 4; i += 4 ) 240 { 241 j = (bitrevTab[i>>2]>>shift)*a; 242 itab[i] = j; 243 itab[i+1] = j + na2; 244 itab[i+2] = j + na4; 245 itab[i+3] = j + na2 + na4; 246 } 247 } 248 else 249 { 250 int shift = 34 - m; 251 for( i = 0; i < n; i += 4 ) 252 { 253 int i4 = i >> 2; 254 j = BitRev(i4,shift)*a; 255 itab[i] = j; 256 itab[i+1] = j + na2; 257 itab[i+2] = j + na4; 258 itab[i+3] = j + na2 + na4; 259 } 260 } 261 262 digits[1]++; 263 264 if( nf >= 2 ) 265 { 266 for( i = n, j = radix[2]; i < n0; ) 267 { 268 for( k = 0; k < n; k++ ) 269 itab[i+k] = itab[k] + j; 270 if( (i += n) >= n0 ) 271 break; 272 j += radix[2]; 273 for( k = 1; ++digits[k] >= factors[k]; k++ ) 274 { 275 digits[k] = 0; 276 j += radix[k+2] - radix[k]; 277 } 278 } 279 } 280 } 281 else 282 { 283 for( i = 0, j = 0;; ) 284 { 285 itab[i] = j; 286 if( ++i >= n0 ) 287 break; 288 j += radix[1]; 289 for( k = 0; ++digits[k] >= factors[k]; k++ ) 290 { 291 digits[k] = 0; 292 j += radix[k+2] - radix[k]; 293 } 294 } 295 } 296 297 if( itab != itab0 ) 298 { 299 itab0[0] = 0; 300 for( i = n0 & 1; i < n0; i += 2 ) 301 { 302 int k0 = itab[i]; 303 int k1 = itab[i+1]; 304 itab0[k0] = i; 305 itab0[k1] = i+1; 306 } 307 } 308 } 309 310 if( (n0 & (n0-1)) == 0 ) 311 { 312 w.re = w1.re = DFTTab[m][0]; 313 w.im = w1.im = -DFTTab[m][1]; 314 } 315 else 316 { 317 t = -CV_PI*2/n0; 318 w.im = w1.im = sin(t); 319 w.re = w1.re = std::sqrt(1. - w1.im*w1.im); 320 } 321 n = (n0+1)/2; 322 323 if( elem_size == sizeof(Complex<double>) ) 324 { 325 Complex<double>* wave = (Complex<double>*)_wave; 326 327 wave[0].re = 1.; 328 wave[0].im = 0.; 329 330 if( (n0 & 1) == 0 ) 331 { 332 wave[n].re = -1.; 333 wave[n].im = 0; 334 } 335 336 for( i = 1; i < n; i++ ) 337 { 338 wave[i] = w; 339 wave[n0-i].re = w.re; 340 wave[n0-i].im = -w.im; 341 342 t = w.re*w1.re - w.im*w1.im; 343 w.im = w.re*w1.im + w.im*w1.re; 344 w.re = t; 345 } 346 } 347 else 348 { 349 Complex<float>* wave = (Complex<float>*)_wave; 350 assert( elem_size == sizeof(Complex<float>) ); 351 352 wave[0].re = 1.f; 353 wave[0].im = 0.f; 354 355 if( (n0 & 1) == 0 ) 356 { 357 wave[n].re = -1.f; 358 wave[n].im = 0.f; 359 } 360 361 for( i = 1; i < n; i++ ) 362 { 363 wave[i].re = (float)w.re; 364 wave[i].im = (float)w.im; 365 wave[n0-i].re = (float)w.re; 366 wave[n0-i].im = (float)-w.im; 367 368 t = w.re*w1.re - w.im*w1.im; 369 w.im = w.re*w1.im + w.im*w1.re; 370 w.re = t; 371 } 372 } 373 } 374 375 template<typename T> struct DFT_VecR4 376 { 377 int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; } 378 }; 379 380 #if CV_SSE3 381 382 // optimized radix-4 transform 383 template<> struct DFT_VecR4<float> 384 { 385 int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const 386 { 387 int n = 1, i, j, nx, dw, dw0 = _dw0; 388 __m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1; 389 Cv32suf t; t.i = 0x80000000; 390 __m128 neg0_mask = _mm_load_ss(&t.f); 391 __m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3)); 392 393 for( ; n*4 <= N; ) 394 { 395 nx = n; 396 n *= 4; 397 dw0 /= 4; 398 399 for( i = 0; i < n0; i += n ) 400 { 401 Complexf *v0, *v1; 402 403 v0 = dst + i; 404 v1 = v0 + nx*2; 405 406 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]); 407 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]); 408 x02 = _mm_loadh_pi(x02, (const __m64*)&v1[0]); 409 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); 410 411 y01 = _mm_add_ps(x02, x13); 412 y23 = _mm_sub_ps(x02, x13); 413 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask); 414 t0 = _mm_movelh_ps(y01, y23); 415 y01 = _mm_add_ps(t0, t1); 416 y23 = _mm_sub_ps(t0, t1); 417 418 _mm_storel_pi((__m64*)&v0[0], y01); 419 _mm_storeh_pi((__m64*)&v0[nx], y01); 420 _mm_storel_pi((__m64*)&v1[0], y23); 421 _mm_storeh_pi((__m64*)&v1[nx], y23); 422 423 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 ) 424 { 425 v0 = dst + i + j; 426 v1 = v0 + nx*2; 427 428 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]); 429 w23 = _mm_loadl_pi(w23, (const __m64*)&wave[dw*2]); 430 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3 431 w23 = _mm_loadh_pi(w23, (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3 432 433 t0 = _mm_mul_ps(_mm_moveldup_ps(x13), w23); 434 t1 = _mm_mul_ps(_mm_movehdup_ps(x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1))); 435 x13 = _mm_addsub_ps(t0, t1); 436 // re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3) 437 x02 = _mm_loadl_pi(x02, (const __m64*)&v1[0]); // x2 = r2 i2 438 w01 = _mm_loadl_pi(w01, (const __m64*)&wave[dw]); // w1 = wr1 wi1 439 x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1)); 440 w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1)); 441 x02 = _mm_mul_ps(x02, w01); 442 x02 = _mm_addsub_ps(x02, _mm_movelh_ps(x02, x02)); 443 // re(x0) im(x0) re(x2*w1), im(x2*w1) 444 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]); 445 446 y01 = _mm_add_ps(x02, x13); 447 y23 = _mm_sub_ps(x02, x13); 448 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask); 449 t0 = _mm_movelh_ps(y01, y23); 450 y01 = _mm_add_ps(t0, t1); 451 y23 = _mm_sub_ps(t0, t1); 452 453 _mm_storel_pi((__m64*)&v0[0], y01); 454 _mm_storeh_pi((__m64*)&v0[nx], y01); 455 _mm_storel_pi((__m64*)&v1[0], y23); 456 _mm_storeh_pi((__m64*)&v1[nx], y23); 457 } 458 } 459 } 460 461 _dw0 = dw0; 462 return n; 463 } 464 }; 465 466 #endif 467 468 #ifdef USE_IPP_DFT 469 static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst, 470 const void* spec, uchar* buf) 471 { 472 return ippsDFTFwd_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst, 473 (const IppsDFTSpec_C_32fc*)spec, buf); 474 } 475 476 static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst, 477 const void* spec, uchar* buf) 478 { 479 return ippsDFTFwd_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst, 480 (const IppsDFTSpec_C_64fc*)spec, buf); 481 } 482 483 static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst, 484 const void* spec, uchar* buf) 485 { 486 return ippsDFTInv_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst, 487 (const IppsDFTSpec_C_32fc*)spec, buf); 488 } 489 490 static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst, 491 const void* spec, uchar* buf) 492 { 493 return ippsDFTInv_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst, 494 (const IppsDFTSpec_C_64fc*)spec, buf); 495 } 496 497 static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst, 498 const void* spec, uchar* buf) 499 { 500 return ippsDFTFwd_RToPack_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf); 501 } 502 503 static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst, 504 const void* spec, uchar* buf) 505 { 506 return ippsDFTFwd_RToPack_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf); 507 } 508 509 static IppStatus ippsDFTInv_PackToR( const float* src, float* dst, 510 const void* spec, uchar* buf) 511 { 512 return ippsDFTInv_PackToR_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf); 513 } 514 515 static IppStatus ippsDFTInv_PackToR( const double* src, double* dst, 516 const void* spec, uchar* buf) 517 { 518 return ippsDFTInv_PackToR_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf); 519 } 520 #endif 521 522 enum { DFT_NO_PERMUTE=256, DFT_COMPLEX_INPUT_OR_OUTPUT=512 }; 523 524 // mixed-radix complex discrete Fourier transform: double-precision version 525 template<typename T> static void 526 DFT( const Complex<T>* src, Complex<T>* dst, int n, 527 int nf, const int* factors, const int* itab, 528 const Complex<T>* wave, int tab_size, 529 const void* 530 #ifdef USE_IPP_DFT 531 spec 532 #endif 533 , Complex<T>* buf, 534 int flags, double _scale ) 535 { 536 static const T sin_120 = (T)0.86602540378443864676372317075294; 537 static const T fft5_2 = (T)0.559016994374947424102293417182819; 538 static const T fft5_3 = (T)-0.951056516295153572116439333379382; 539 static const T fft5_4 = (T)-1.538841768587626701285145288018455; 540 static const T fft5_5 = (T)0.363271264002680442947733378740309; 541 542 int n0 = n, f_idx, nx; 543 int inv = flags & DFT_INVERSE; 544 int dw0 = tab_size, dw; 545 int i, j, k; 546 Complex<T> t; 547 T scale = (T)_scale; 548 int tab_step; 549 550 #ifdef USE_IPP_DFT 551 if( spec ) 552 { 553 if( !inv ) 554 { 555 if (ippsDFTFwd_CToC( src, dst, spec, (uchar*)buf ) >= 0) 556 { 557 CV_IMPL_ADD(CV_IMPL_IPP); 558 return; 559 } 560 } 561 else 562 { 563 if (ippsDFTInv_CToC( src, dst, spec, (uchar*)buf ) >= 0) 564 { 565 CV_IMPL_ADD(CV_IMPL_IPP); 566 return; 567 } 568 } 569 setIppErrorStatus(); 570 } 571 #endif 572 573 tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n; 574 575 // 0. shuffle data 576 if( dst != src ) 577 { 578 assert( (flags & DFT_NO_PERMUTE) == 0 ); 579 if( !inv ) 580 { 581 for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step ) 582 { 583 int k0 = itab[0], k1 = itab[tab_step]; 584 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n ); 585 dst[i] = src[k0]; dst[i+1] = src[k1]; 586 } 587 588 if( i < n ) 589 dst[n-1] = src[n-1]; 590 } 591 else 592 { 593 for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step ) 594 { 595 int k0 = itab[0], k1 = itab[tab_step]; 596 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n ); 597 t.re = src[k0].re; t.im = -src[k0].im; 598 dst[i] = t; 599 t.re = src[k1].re; t.im = -src[k1].im; 600 dst[i+1] = t; 601 } 602 603 if( i < n ) 604 { 605 t.re = src[n-1].re; t.im = -src[n-1].im; 606 dst[i] = t; 607 } 608 } 609 } 610 else 611 { 612 if( (flags & DFT_NO_PERMUTE) == 0 ) 613 { 614 CV_Assert( factors[0] == factors[nf-1] ); 615 if( nf == 1 ) 616 { 617 if( (n & 3) == 0 ) 618 { 619 int n2 = n/2; 620 Complex<T>* dsth = dst + n2; 621 622 for( i = 0; i < n2; i += 2, itab += tab_step*2 ) 623 { 624 j = itab[0]; 625 assert( (unsigned)j < (unsigned)n2 ); 626 627 CV_SWAP(dst[i+1], dsth[j], t); 628 if( j > i ) 629 { 630 CV_SWAP(dst[i], dst[j], t); 631 CV_SWAP(dsth[i+1], dsth[j+1], t); 632 } 633 } 634 } 635 // else do nothing 636 } 637 else 638 { 639 for( i = 0; i < n; i++, itab += tab_step ) 640 { 641 j = itab[0]; 642 assert( (unsigned)j < (unsigned)n ); 643 if( j > i ) 644 CV_SWAP(dst[i], dst[j], t); 645 } 646 } 647 } 648 649 if( inv ) 650 { 651 for( i = 0; i <= n - 2; i += 2 ) 652 { 653 T t0 = -dst[i].im; 654 T t1 = -dst[i+1].im; 655 dst[i].im = t0; dst[i+1].im = t1; 656 } 657 658 if( i < n ) 659 dst[n-1].im = -dst[n-1].im; 660 } 661 } 662 663 n = 1; 664 // 1. power-2 transforms 665 if( (factors[0] & 1) == 0 ) 666 { 667 if( factors[0] >= 4 && checkHardwareSupport(CV_CPU_SSE3)) 668 { 669 DFT_VecR4<T> vr4; 670 n = vr4(dst, factors[0], n0, dw0, wave); 671 } 672 673 // radix-4 transform 674 for( ; n*4 <= factors[0]; ) 675 { 676 nx = n; 677 n *= 4; 678 dw0 /= 4; 679 680 for( i = 0; i < n0; i += n ) 681 { 682 Complex<T> *v0, *v1; 683 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4; 684 685 v0 = dst + i; 686 v1 = v0 + nx*2; 687 688 r0 = v1[0].re; i0 = v1[0].im; 689 r4 = v1[nx].re; i4 = v1[nx].im; 690 691 r1 = r0 + r4; i1 = i0 + i4; 692 r3 = i0 - i4; i3 = r4 - r0; 693 694 r2 = v0[0].re; i2 = v0[0].im; 695 r4 = v0[nx].re; i4 = v0[nx].im; 696 697 r0 = r2 + r4; i0 = i2 + i4; 698 r2 -= r4; i2 -= i4; 699 700 v0[0].re = r0 + r1; v0[0].im = i0 + i1; 701 v1[0].re = r0 - r1; v1[0].im = i0 - i1; 702 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3; 703 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3; 704 705 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 ) 706 { 707 v0 = dst + i + j; 708 v1 = v0 + nx*2; 709 710 r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im; 711 i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re; 712 r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re; 713 i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im; 714 r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re; 715 i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im; 716 717 r1 = i0 + i3; i1 = r0 + r3; 718 r3 = r0 - r3; i3 = i3 - i0; 719 r4 = v0[0].re; i4 = v0[0].im; 720 721 r0 = r4 + r2; i0 = i4 + i2; 722 r2 = r4 - r2; i2 = i4 - i2; 723 724 v0[0].re = r0 + r1; v0[0].im = i0 + i1; 725 v1[0].re = r0 - r1; v1[0].im = i0 - i1; 726 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3; 727 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3; 728 } 729 } 730 } 731 732 for( ; n < factors[0]; ) 733 { 734 // do the remaining radix-2 transform 735 nx = n; 736 n *= 2; 737 dw0 /= 2; 738 739 for( i = 0; i < n0; i += n ) 740 { 741 Complex<T>* v = dst + i; 742 T r0 = v[0].re + v[nx].re; 743 T i0 = v[0].im + v[nx].im; 744 T r1 = v[0].re - v[nx].re; 745 T i1 = v[0].im - v[nx].im; 746 v[0].re = r0; v[0].im = i0; 747 v[nx].re = r1; v[nx].im = i1; 748 749 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 ) 750 { 751 v = dst + i + j; 752 r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im; 753 i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im; 754 r0 = v[0].re; i0 = v[0].im; 755 756 v[0].re = r0 + r1; v[0].im = i0 + i1; 757 v[nx].re = r0 - r1; v[nx].im = i0 - i1; 758 } 759 } 760 } 761 } 762 763 // 2. all the other transforms 764 for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ ) 765 { 766 int factor = factors[f_idx]; 767 nx = n; 768 n *= factor; 769 dw0 /= factor; 770 771 if( factor == 3 ) 772 { 773 // radix-3 774 for( i = 0; i < n0; i += n ) 775 { 776 Complex<T>* v = dst + i; 777 778 T r1 = v[nx].re + v[nx*2].re; 779 T i1 = v[nx].im + v[nx*2].im; 780 T r0 = v[0].re; 781 T i0 = v[0].im; 782 T r2 = sin_120*(v[nx].im - v[nx*2].im); 783 T i2 = sin_120*(v[nx*2].re - v[nx].re); 784 v[0].re = r0 + r1; v[0].im = i0 + i1; 785 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1; 786 v[nx].re = r0 + r2; v[nx].im = i0 + i2; 787 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; 788 789 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 ) 790 { 791 v = dst + i + j; 792 r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im; 793 i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re; 794 i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im; 795 r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re; 796 r1 = r0 + i2; i1 = i0 + r2; 797 798 r2 = sin_120*(i0 - r2); i2 = sin_120*(i2 - r0); 799 r0 = v[0].re; i0 = v[0].im; 800 v[0].re = r0 + r1; v[0].im = i0 + i1; 801 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1; 802 v[nx].re = r0 + r2; v[nx].im = i0 + i2; 803 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2; 804 } 805 } 806 } 807 else if( factor == 5 ) 808 { 809 // radix-5 810 for( i = 0; i < n0; i += n ) 811 { 812 for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) 813 { 814 Complex<T>* v0 = dst + i + j; 815 Complex<T>* v1 = v0 + nx*2; 816 Complex<T>* v2 = v1 + nx*2; 817 818 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5; 819 820 r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im; 821 i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re; 822 r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im; 823 i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re; 824 825 r1 = r3 + r2; i1 = i3 + i2; 826 r3 -= r2; i3 -= i2; 827 828 r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im; 829 i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re; 830 r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im; 831 i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re; 832 833 r2 = r4 + r0; i2 = i4 + i0; 834 r4 -= r0; i4 -= i0; 835 836 r0 = v0[0].re; i0 = v0[0].im; 837 r5 = r1 + r2; i5 = i1 + i2; 838 839 v0[0].re = r0 + r5; v0[0].im = i0 + i5; 840 841 r0 -= (T)0.25*r5; i0 -= (T)0.25*i5; 842 r1 = fft5_2*(r1 - r2); i1 = fft5_2*(i1 - i2); 843 r2 = -fft5_3*(i3 + i4); i2 = fft5_3*(r3 + r4); 844 845 i3 *= -fft5_5; r3 *= fft5_5; 846 i4 *= -fft5_4; r4 *= fft5_4; 847 848 r5 = r2 + i3; i5 = i2 + r3; 849 r2 -= i4; i2 -= r4; 850 851 r3 = r0 + r1; i3 = i0 + i1; 852 r0 -= r1; i0 -= i1; 853 854 v0[nx].re = r3 + r2; v0[nx].im = i3 + i2; 855 v2[0].re = r3 - r2; v2[0].im = i3 - i2; 856 857 v1[0].re = r0 + r5; v1[0].im = i0 + i5; 858 v1[nx].re = r0 - r5; v1[nx].im = i0 - i5; 859 } 860 } 861 } 862 else 863 { 864 // radix-"factor" - an odd number 865 int p, q, factor2 = (factor - 1)/2; 866 int d, dd, dw_f = tab_size/factor; 867 Complex<T>* a = buf; 868 Complex<T>* b = buf + factor2; 869 870 for( i = 0; i < n0; i += n ) 871 { 872 for( j = 0, dw = 0; j < nx; j++, dw += dw0 ) 873 { 874 Complex<T>* v = dst + i + j; 875 Complex<T> v_0 = v[0]; 876 Complex<T> vn_0 = v_0; 877 878 if( j == 0 ) 879 { 880 for( p = 1, k = nx; p <= factor2; p++, k += nx ) 881 { 882 T r0 = v[k].re + v[n-k].re; 883 T i0 = v[k].im - v[n-k].im; 884 T r1 = v[k].re - v[n-k].re; 885 T i1 = v[k].im + v[n-k].im; 886 887 vn_0.re += r0; vn_0.im += i1; 888 a[p-1].re = r0; a[p-1].im = i0; 889 b[p-1].re = r1; b[p-1].im = i1; 890 } 891 } 892 else 893 { 894 const Complex<T>* wave_ = wave + dw*factor; 895 d = dw; 896 897 for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw ) 898 { 899 T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im; 900 T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re; 901 902 T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im; 903 T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re; 904 905 T r0 = r2 + r1; 906 T i0 = i2 - i1; 907 r1 = r2 - r1; 908 i1 = i2 + i1; 909 910 vn_0.re += r0; vn_0.im += i1; 911 a[p-1].re = r0; a[p-1].im = i0; 912 b[p-1].re = r1; b[p-1].im = i1; 913 } 914 } 915 916 v[0] = vn_0; 917 918 for( p = 1, k = nx; p <= factor2; p++, k += nx ) 919 { 920 Complex<T> s0 = v_0, s1 = v_0; 921 d = dd = dw_f*p; 922 923 for( q = 0; q < factor2; q++ ) 924 { 925 T r0 = wave[d].re * a[q].re; 926 T i0 = wave[d].im * a[q].im; 927 T r1 = wave[d].re * b[q].im; 928 T i1 = wave[d].im * b[q].re; 929 930 s1.re += r0 + i0; s0.re += r0 - i0; 931 s1.im += r1 - i1; s0.im += r1 + i1; 932 933 d += dd; 934 d -= -(d >= tab_size) & tab_size; 935 } 936 937 v[k] = s0; 938 v[n-k] = s1; 939 } 940 } 941 } 942 } 943 } 944 945 if( scale != 1 ) 946 { 947 T re_scale = scale, im_scale = scale; 948 if( inv ) 949 im_scale = -im_scale; 950 951 for( i = 0; i < n0; i++ ) 952 { 953 T t0 = dst[i].re*re_scale; 954 T t1 = dst[i].im*im_scale; 955 dst[i].re = t0; 956 dst[i].im = t1; 957 } 958 } 959 else if( inv ) 960 { 961 for( i = 0; i <= n0 - 2; i += 2 ) 962 { 963 T t0 = -dst[i].im; 964 T t1 = -dst[i+1].im; 965 dst[i].im = t0; 966 dst[i+1].im = t1; 967 } 968 969 if( i < n0 ) 970 dst[n0-1].im = -dst[n0-1].im; 971 } 972 } 973 974 975 /* FFT of real vector 976 output vector format: 977 re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ... 978 re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */ 979 template<typename T> static void 980 RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, 981 const Complex<T>* wave, int tab_size, const void* 982 #ifdef USE_IPP_DFT 983 spec 984 #endif 985 , 986 Complex<T>* buf, int flags, double _scale ) 987 { 988 int complex_output = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0; 989 T scale = (T)_scale; 990 int j, n2 = n >> 1; 991 dst += complex_output; 992 993 #ifdef USE_IPP_DFT 994 if( spec ) 995 { 996 if (ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ) >=0) 997 { 998 if( complex_output ) 999 { 1000 dst[-1] = dst[0]; 1001 dst[0] = 0; 1002 if( (n & 1) == 0 ) 1003 dst[n] = 0; 1004 } 1005 CV_IMPL_ADD(CV_IMPL_IPP); 1006 return; 1007 } 1008 setIppErrorStatus(); 1009 } 1010 #endif 1011 assert( tab_size == n ); 1012 1013 if( n == 1 ) 1014 { 1015 dst[0] = src[0]*scale; 1016 } 1017 else if( n == 2 ) 1018 { 1019 T t = (src[0] + src[1])*scale; 1020 dst[1] = (src[0] - src[1])*scale; 1021 dst[0] = t; 1022 } 1023 else if( n & 1 ) 1024 { 1025 dst -= complex_output; 1026 Complex<T>* _dst = (Complex<T>*)dst; 1027 _dst[0].re = src[0]*scale; 1028 _dst[0].im = 0; 1029 for( j = 1; j < n; j += 2 ) 1030 { 1031 T t0 = src[itab[j]]*scale; 1032 T t1 = src[itab[j+1]]*scale; 1033 _dst[j].re = t0; 1034 _dst[j].im = 0; 1035 _dst[j+1].re = t1; 1036 _dst[j+1].im = 0; 1037 } 1038 DFT( _dst, _dst, n, nf, factors, itab, wave, 1039 tab_size, 0, buf, DFT_NO_PERMUTE, 1 ); 1040 if( !complex_output ) 1041 dst[1] = dst[0]; 1042 } 1043 else 1044 { 1045 T t0, t; 1046 T h1_re, h1_im, h2_re, h2_im; 1047 T scale2 = scale*(T)0.5; 1048 factors[0] >>= 1; 1049 1050 DFT( (Complex<T>*)src, (Complex<T>*)dst, n2, nf - (factors[0] == 1), 1051 factors + (factors[0] == 1), 1052 itab, wave, tab_size, 0, buf, 0, 1 ); 1053 factors[0] <<= 1; 1054 1055 t = dst[0] - dst[1]; 1056 dst[0] = (dst[0] + dst[1])*scale; 1057 dst[1] = t*scale; 1058 1059 t0 = dst[n2]; 1060 t = dst[n-1]; 1061 dst[n-1] = dst[1]; 1062 1063 for( j = 2, wave++; j < n2; j += 2, wave++ ) 1064 { 1065 /* calc odd */ 1066 h2_re = scale2*(dst[j+1] + t); 1067 h2_im = scale2*(dst[n-j] - dst[j]); 1068 1069 /* calc even */ 1070 h1_re = scale2*(dst[j] + dst[n-j]); 1071 h1_im = scale2*(dst[j+1] - t); 1072 1073 /* rotate */ 1074 t = h2_re*wave->re - h2_im*wave->im; 1075 h2_im = h2_re*wave->im + h2_im*wave->re; 1076 h2_re = t; 1077 t = dst[n-j-1]; 1078 1079 dst[j-1] = h1_re + h2_re; 1080 dst[n-j-1] = h1_re - h2_re; 1081 dst[j] = h1_im + h2_im; 1082 dst[n-j] = h2_im - h1_im; 1083 } 1084 1085 if( j <= n2 ) 1086 { 1087 dst[n2-1] = t0*scale; 1088 dst[n2] = -t*scale; 1089 } 1090 } 1091 1092 if( complex_output && ((n & 1) == 0 || n == 1)) 1093 { 1094 dst[-1] = dst[0]; 1095 dst[0] = 0; 1096 if( n > 1 ) 1097 dst[n] = 0; 1098 } 1099 } 1100 1101 /* Inverse FFT of complex conjugate-symmetric vector 1102 input vector format: 1103 re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR 1104 re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */ 1105 template<typename T> static void 1106 CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab, 1107 const Complex<T>* wave, int tab_size, 1108 const void* 1109 #ifdef USE_IPP_DFT 1110 spec 1111 #endif 1112 , Complex<T>* buf, 1113 int flags, double _scale ) 1114 { 1115 int complex_input = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0; 1116 int j, k, n2 = (n+1) >> 1; 1117 T scale = (T)_scale; 1118 T save_s1 = 0.; 1119 T t0, t1, t2, t3, t; 1120 1121 assert( tab_size == n ); 1122 1123 if( complex_input ) 1124 { 1125 assert( src != dst ); 1126 save_s1 = src[1]; 1127 ((T*)src)[1] = src[0]; 1128 src++; 1129 } 1130 #ifdef USE_IPP_DFT 1131 if( spec ) 1132 { 1133 if (ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ) >=0) 1134 { 1135 if( complex_input ) 1136 ((T*)src)[0] = (T)save_s1; 1137 CV_IMPL_ADD(CV_IMPL_IPP); 1138 return; 1139 } 1140 1141 setIppErrorStatus(); 1142 } 1143 #endif 1144 if( n == 1 ) 1145 { 1146 dst[0] = (T)(src[0]*scale); 1147 } 1148 else if( n == 2 ) 1149 { 1150 t = (src[0] + src[1])*scale; 1151 dst[1] = (src[0] - src[1])*scale; 1152 dst[0] = t; 1153 } 1154 else if( n & 1 ) 1155 { 1156 Complex<T>* _src = (Complex<T>*)(src-1); 1157 Complex<T>* _dst = (Complex<T>*)dst; 1158 1159 _dst[0].re = src[0]; 1160 _dst[0].im = 0; 1161 for( j = 1; j < n2; j++ ) 1162 { 1163 int k0 = itab[j], k1 = itab[n-j]; 1164 t0 = _src[j].re; t1 = _src[j].im; 1165 _dst[k0].re = t0; _dst[k0].im = -t1; 1166 _dst[k1].re = t0; _dst[k1].im = t1; 1167 } 1168 1169 DFT( _dst, _dst, n, nf, factors, itab, wave, 1170 tab_size, 0, buf, DFT_NO_PERMUTE, 1. ); 1171 dst[0] *= scale; 1172 for( j = 1; j < n; j += 2 ) 1173 { 1174 t0 = dst[j*2]*scale; 1175 t1 = dst[j*2+2]*scale; 1176 dst[j] = t0; 1177 dst[j+1] = t1; 1178 } 1179 } 1180 else 1181 { 1182 int inplace = src == dst; 1183 const Complex<T>* w = wave; 1184 1185 t = src[1]; 1186 t0 = (src[0] + src[n-1]); 1187 t1 = (src[n-1] - src[0]); 1188 dst[0] = t0; 1189 dst[1] = t1; 1190 1191 for( j = 2, w++; j < n2; j += 2, w++ ) 1192 { 1193 T h1_re, h1_im, h2_re, h2_im; 1194 1195 h1_re = (t + src[n-j-1]); 1196 h1_im = (src[j] - src[n-j]); 1197 1198 h2_re = (t - src[n-j-1]); 1199 h2_im = (src[j] + src[n-j]); 1200 1201 t = h2_re*w->re + h2_im*w->im; 1202 h2_im = h2_im*w->re - h2_re*w->im; 1203 h2_re = t; 1204 1205 t = src[j+1]; 1206 t0 = h1_re - h2_im; 1207 t1 = -h1_im - h2_re; 1208 t2 = h1_re + h2_im; 1209 t3 = h1_im - h2_re; 1210 1211 if( inplace ) 1212 { 1213 dst[j] = t0; 1214 dst[j+1] = t1; 1215 dst[n-j] = t2; 1216 dst[n-j+1]= t3; 1217 } 1218 else 1219 { 1220 int j2 = j >> 1; 1221 k = itab[j2]; 1222 dst[k] = t0; 1223 dst[k+1] = t1; 1224 k = itab[n2-j2]; 1225 dst[k] = t2; 1226 dst[k+1]= t3; 1227 } 1228 } 1229 1230 if( j <= n2 ) 1231 { 1232 t0 = t*2; 1233 t1 = src[n2]*2; 1234 1235 if( inplace ) 1236 { 1237 dst[n2] = t0; 1238 dst[n2+1] = t1; 1239 } 1240 else 1241 { 1242 k = itab[n2]; 1243 dst[k*2] = t0; 1244 dst[k*2+1] = t1; 1245 } 1246 } 1247 1248 factors[0] >>= 1; 1249 DFT( (Complex<T>*)dst, (Complex<T>*)dst, n2, 1250 nf - (factors[0] == 1), 1251 factors + (factors[0] == 1), itab, 1252 wave, tab_size, 0, buf, 1253 inplace ? 0 : DFT_NO_PERMUTE, 1. ); 1254 factors[0] <<= 1; 1255 1256 for( j = 0; j < n; j += 2 ) 1257 { 1258 t0 = dst[j]*scale; 1259 t1 = dst[j+1]*(-scale); 1260 dst[j] = t0; 1261 dst[j+1] = t1; 1262 } 1263 } 1264 if( complex_input ) 1265 ((T*)src)[0] = (T)save_s1; 1266 } 1267 1268 static void 1269 CopyColumn( const uchar* _src, size_t src_step, 1270 uchar* _dst, size_t dst_step, 1271 int len, size_t elem_size ) 1272 { 1273 int i, t0, t1; 1274 const int* src = (const int*)_src; 1275 int* dst = (int*)_dst; 1276 src_step /= sizeof(src[0]); 1277 dst_step /= sizeof(dst[0]); 1278 1279 if( elem_size == sizeof(int) ) 1280 { 1281 for( i = 0; i < len; i++, src += src_step, dst += dst_step ) 1282 dst[0] = src[0]; 1283 } 1284 else if( elem_size == sizeof(int)*2 ) 1285 { 1286 for( i = 0; i < len; i++, src += src_step, dst += dst_step ) 1287 { 1288 t0 = src[0]; t1 = src[1]; 1289 dst[0] = t0; dst[1] = t1; 1290 } 1291 } 1292 else if( elem_size == sizeof(int)*4 ) 1293 { 1294 for( i = 0; i < len; i++, src += src_step, dst += dst_step ) 1295 { 1296 t0 = src[0]; t1 = src[1]; 1297 dst[0] = t0; dst[1] = t1; 1298 t0 = src[2]; t1 = src[3]; 1299 dst[2] = t0; dst[3] = t1; 1300 } 1301 } 1302 } 1303 1304 1305 static void 1306 CopyFrom2Columns( const uchar* _src, size_t src_step, 1307 uchar* _dst0, uchar* _dst1, 1308 int len, size_t elem_size ) 1309 { 1310 int i, t0, t1; 1311 const int* src = (const int*)_src; 1312 int* dst0 = (int*)_dst0; 1313 int* dst1 = (int*)_dst1; 1314 src_step /= sizeof(src[0]); 1315 1316 if( elem_size == sizeof(int) ) 1317 { 1318 for( i = 0; i < len; i++, src += src_step ) 1319 { 1320 t0 = src[0]; t1 = src[1]; 1321 dst0[i] = t0; dst1[i] = t1; 1322 } 1323 } 1324 else if( elem_size == sizeof(int)*2 ) 1325 { 1326 for( i = 0; i < len*2; i += 2, src += src_step ) 1327 { 1328 t0 = src[0]; t1 = src[1]; 1329 dst0[i] = t0; dst0[i+1] = t1; 1330 t0 = src[2]; t1 = src[3]; 1331 dst1[i] = t0; dst1[i+1] = t1; 1332 } 1333 } 1334 else if( elem_size == sizeof(int)*4 ) 1335 { 1336 for( i = 0; i < len*4; i += 4, src += src_step ) 1337 { 1338 t0 = src[0]; t1 = src[1]; 1339 dst0[i] = t0; dst0[i+1] = t1; 1340 t0 = src[2]; t1 = src[3]; 1341 dst0[i+2] = t0; dst0[i+3] = t1; 1342 t0 = src[4]; t1 = src[5]; 1343 dst1[i] = t0; dst1[i+1] = t1; 1344 t0 = src[6]; t1 = src[7]; 1345 dst1[i+2] = t0; dst1[i+3] = t1; 1346 } 1347 } 1348 } 1349 1350 1351 static void 1352 CopyTo2Columns( const uchar* _src0, const uchar* _src1, 1353 uchar* _dst, size_t dst_step, 1354 int len, size_t elem_size ) 1355 { 1356 int i, t0, t1; 1357 const int* src0 = (const int*)_src0; 1358 const int* src1 = (const int*)_src1; 1359 int* dst = (int*)_dst; 1360 dst_step /= sizeof(dst[0]); 1361 1362 if( elem_size == sizeof(int) ) 1363 { 1364 for( i = 0; i < len; i++, dst += dst_step ) 1365 { 1366 t0 = src0[i]; t1 = src1[i]; 1367 dst[0] = t0; dst[1] = t1; 1368 } 1369 } 1370 else if( elem_size == sizeof(int)*2 ) 1371 { 1372 for( i = 0; i < len*2; i += 2, dst += dst_step ) 1373 { 1374 t0 = src0[i]; t1 = src0[i+1]; 1375 dst[0] = t0; dst[1] = t1; 1376 t0 = src1[i]; t1 = src1[i+1]; 1377 dst[2] = t0; dst[3] = t1; 1378 } 1379 } 1380 else if( elem_size == sizeof(int)*4 ) 1381 { 1382 for( i = 0; i < len*4; i += 4, dst += dst_step ) 1383 { 1384 t0 = src0[i]; t1 = src0[i+1]; 1385 dst[0] = t0; dst[1] = t1; 1386 t0 = src0[i+2]; t1 = src0[i+3]; 1387 dst[2] = t0; dst[3] = t1; 1388 t0 = src1[i]; t1 = src1[i+1]; 1389 dst[4] = t0; dst[5] = t1; 1390 t0 = src1[i+2]; t1 = src1[i+3]; 1391 dst[6] = t0; dst[7] = t1; 1392 } 1393 } 1394 } 1395 1396 1397 static void 1398 ExpandCCS( uchar* _ptr, int n, int elem_size ) 1399 { 1400 int i; 1401 if( elem_size == (int)sizeof(float) ) 1402 { 1403 float* p = (float*)_ptr; 1404 for( i = 1; i < (n+1)/2; i++ ) 1405 { 1406 p[(n-i)*2] = p[i*2-1]; 1407 p[(n-i)*2+1] = -p[i*2]; 1408 } 1409 if( (n & 1) == 0 ) 1410 { 1411 p[n] = p[n-1]; 1412 p[n+1] = 0.f; 1413 n--; 1414 } 1415 for( i = n-1; i > 0; i-- ) 1416 p[i+1] = p[i]; 1417 p[1] = 0.f; 1418 } 1419 else 1420 { 1421 double* p = (double*)_ptr; 1422 for( i = 1; i < (n+1)/2; i++ ) 1423 { 1424 p[(n-i)*2] = p[i*2-1]; 1425 p[(n-i)*2+1] = -p[i*2]; 1426 } 1427 if( (n & 1) == 0 ) 1428 { 1429 p[n] = p[n-1]; 1430 p[n+1] = 0.f; 1431 n--; 1432 } 1433 for( i = n-1; i > 0; i-- ) 1434 p[i+1] = p[i]; 1435 p[1] = 0.f; 1436 } 1437 } 1438 1439 1440 typedef void (*DFTFunc)( 1441 const void* src, void* dst, int n, int nf, int* factors, 1442 const int* itab, const void* wave, int tab_size, 1443 const void* spec, void* buf, int inv, double scale ); 1444 1445 static void DFT_32f( const Complexf* src, Complexf* dst, int n, 1446 int nf, const int* factors, const int* itab, 1447 const Complexf* wave, int tab_size, 1448 const void* spec, Complexf* buf, 1449 int flags, double scale ) 1450 { 1451 DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); 1452 } 1453 1454 static void DFT_64f( const Complexd* src, Complexd* dst, int n, 1455 int nf, const int* factors, const int* itab, 1456 const Complexd* wave, int tab_size, 1457 const void* spec, Complexd* buf, 1458 int flags, double scale ) 1459 { 1460 DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); 1461 } 1462 1463 1464 static void RealDFT_32f( const float* src, float* dst, int n, int nf, int* factors, 1465 const int* itab, const Complexf* wave, int tab_size, const void* spec, 1466 Complexf* buf, int flags, double scale ) 1467 { 1468 RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); 1469 } 1470 1471 static void RealDFT_64f( const double* src, double* dst, int n, int nf, int* factors, 1472 const int* itab, const Complexd* wave, int tab_size, const void* spec, 1473 Complexd* buf, int flags, double scale ) 1474 { 1475 RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); 1476 } 1477 1478 static void CCSIDFT_32f( const float* src, float* dst, int n, int nf, int* factors, 1479 const int* itab, const Complexf* wave, int tab_size, const void* spec, 1480 Complexf* buf, int flags, double scale ) 1481 { 1482 CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); 1483 } 1484 1485 static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* factors, 1486 const int* itab, const Complexd* wave, int tab_size, const void* spec, 1487 Complexd* buf, int flags, double scale ) 1488 { 1489 CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale); 1490 } 1491 1492 } 1493 1494 #ifdef USE_IPP_DFT 1495 typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*); 1496 typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*); 1497 #endif 1498 1499 namespace cv 1500 { 1501 #if defined USE_IPP_DFT 1502 1503 typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*); 1504 typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*); 1505 1506 template <typename Dft> 1507 class Dft_C_IPPLoop_Invoker : public ParallelLoopBody 1508 { 1509 public: 1510 1511 Dft_C_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) : 1512 ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) 1513 { 1514 *ok = true; 1515 } 1516 1517 virtual void operator()(const Range& range) const 1518 { 1519 IppStatus status; 1520 Ipp8u* pBuffer = 0; 1521 Ipp8u* pMemInit= 0; 1522 int sizeBuffer=0; 1523 int sizeSpec=0; 1524 int sizeInit=0; 1525 1526 IppiSize srcRoiSize = {src.cols, 1}; 1527 1528 status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); 1529 if ( status < 0 ) 1530 { 1531 *ok = false; 1532 return; 1533 } 1534 1535 IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec ); 1536 1537 if ( sizeInit > 0 ) 1538 pMemInit = (Ipp8u*)ippMalloc( sizeInit ); 1539 1540 if ( sizeBuffer > 0 ) 1541 pBuffer = (Ipp8u*)ippMalloc( sizeBuffer ); 1542 1543 status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit ); 1544 1545 if ( sizeInit > 0 ) 1546 ippFree( pMemInit ); 1547 1548 if ( status < 0 ) 1549 { 1550 ippFree( pDFTSpec ); 1551 if ( sizeBuffer > 0 ) 1552 ippFree( pBuffer ); 1553 *ok = false; 1554 return; 1555 } 1556 1557 for( int i = range.start; i < range.end; ++i) 1558 if(!ippidft(src.ptr<Ipp32fc>(i), (int)src.step,dst.ptr<Ipp32fc>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer)) 1559 { 1560 *ok = false; 1561 } 1562 1563 if ( sizeBuffer > 0 ) 1564 ippFree( pBuffer ); 1565 1566 ippFree( pDFTSpec ); 1567 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 1568 } 1569 1570 private: 1571 const Mat& src; 1572 Mat& dst; 1573 const Dft& ippidft; 1574 int norm_flag; 1575 bool *ok; 1576 1577 const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&); 1578 }; 1579 1580 template <typename Dft> 1581 class Dft_R_IPPLoop_Invoker : public ParallelLoopBody 1582 { 1583 public: 1584 1585 Dft_R_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) : 1586 ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok) 1587 { 1588 *ok = true; 1589 } 1590 1591 virtual void operator()(const Range& range) const 1592 { 1593 IppStatus status; 1594 Ipp8u* pBuffer = 0; 1595 Ipp8u* pMemInit= 0; 1596 int sizeBuffer=0; 1597 int sizeSpec=0; 1598 int sizeInit=0; 1599 1600 IppiSize srcRoiSize = {src.cols, 1}; 1601 1602 status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); 1603 if ( status < 0 ) 1604 { 1605 *ok = false; 1606 return; 1607 } 1608 1609 IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec ); 1610 1611 if ( sizeInit > 0 ) 1612 pMemInit = (Ipp8u*)ippMalloc( sizeInit ); 1613 1614 if ( sizeBuffer > 0 ) 1615 pBuffer = (Ipp8u*)ippMalloc( sizeBuffer ); 1616 1617 status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit ); 1618 1619 if ( sizeInit > 0 ) 1620 ippFree( pMemInit ); 1621 1622 if ( status < 0 ) 1623 { 1624 ippFree( pDFTSpec ); 1625 if ( sizeBuffer > 0 ) 1626 ippFree( pBuffer ); 1627 *ok = false; 1628 return; 1629 } 1630 1631 for( int i = range.start; i < range.end; ++i) 1632 if(!ippidft(src.ptr<float>(i), (int)src.step,dst.ptr<float>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer)) 1633 { 1634 *ok = false; 1635 } 1636 1637 if ( sizeBuffer > 0 ) 1638 ippFree( pBuffer ); 1639 1640 ippFree( pDFTSpec ); 1641 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 1642 } 1643 1644 private: 1645 const Mat& src; 1646 Mat& dst; 1647 const Dft& ippidft; 1648 int norm_flag; 1649 bool *ok; 1650 1651 const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&); 1652 }; 1653 1654 template <typename Dft> 1655 bool Dft_C_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag) 1656 { 1657 bool ok; 1658 parallel_for_(Range(0, src.rows), Dft_C_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) ); 1659 return ok; 1660 } 1661 1662 template <typename Dft> 1663 bool Dft_R_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag) 1664 { 1665 bool ok; 1666 parallel_for_(Range(0, src.rows), Dft_R_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) ); 1667 return ok; 1668 } 1669 1670 struct IPPDFT_C_Functor 1671 { 1672 IPPDFT_C_Functor(ippiDFT_C_Func _func) : func(_func){} 1673 1674 bool operator()(const Ipp32fc* src, int srcStep, Ipp32fc* dst, int dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const 1675 { 1676 return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false; 1677 } 1678 private: 1679 ippiDFT_C_Func func; 1680 }; 1681 1682 struct IPPDFT_R_Functor 1683 { 1684 IPPDFT_R_Functor(ippiDFT_R_Func _func) : func(_func){} 1685 1686 bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const 1687 { 1688 return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false; 1689 } 1690 private: 1691 ippiDFT_R_Func func; 1692 }; 1693 1694 static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) 1695 { 1696 IppStatus status; 1697 Ipp8u* pBuffer = 0; 1698 Ipp8u* pMemInit= 0; 1699 int sizeBuffer=0; 1700 int sizeSpec=0; 1701 int sizeInit=0; 1702 1703 IppiSize srcRoiSize = {src.cols, src.rows}; 1704 1705 status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); 1706 if ( status < 0 ) 1707 return false; 1708 1709 IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec ); 1710 1711 if ( sizeInit > 0 ) 1712 pMemInit = (Ipp8u*)ippMalloc( sizeInit ); 1713 1714 if ( sizeBuffer > 0 ) 1715 pBuffer = (Ipp8u*)ippMalloc( sizeBuffer ); 1716 1717 status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit ); 1718 1719 if ( sizeInit > 0 ) 1720 ippFree( pMemInit ); 1721 1722 if ( status < 0 ) 1723 { 1724 ippFree( pDFTSpec ); 1725 if ( sizeBuffer > 0 ) 1726 ippFree( pBuffer ); 1727 return false; 1728 } 1729 1730 if (!inv) 1731 status = ippiDFTFwd_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer ); 1732 else 1733 status = ippiDFTInv_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer ); 1734 1735 if ( sizeBuffer > 0 ) 1736 ippFree( pBuffer ); 1737 1738 ippFree( pDFTSpec ); 1739 1740 if(status >= 0) 1741 { 1742 CV_IMPL_ADD(CV_IMPL_IPP); 1743 return true; 1744 } 1745 return false; 1746 } 1747 1748 static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag) 1749 { 1750 IppStatus status; 1751 Ipp8u* pBuffer = 0; 1752 Ipp8u* pMemInit= 0; 1753 int sizeBuffer=0; 1754 int sizeSpec=0; 1755 int sizeInit=0; 1756 1757 IppiSize srcRoiSize = {src.cols, src.rows}; 1758 1759 status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer ); 1760 if ( status < 0 ) 1761 return false; 1762 1763 IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec ); 1764 1765 if ( sizeInit > 0 ) 1766 pMemInit = (Ipp8u*)ippMalloc( sizeInit ); 1767 1768 if ( sizeBuffer > 0 ) 1769 pBuffer = (Ipp8u*)ippMalloc( sizeBuffer ); 1770 1771 status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit ); 1772 1773 if ( sizeInit > 0 ) 1774 ippFree( pMemInit ); 1775 1776 if ( status < 0 ) 1777 { 1778 ippFree( pDFTSpec ); 1779 if ( sizeBuffer > 0 ) 1780 ippFree( pBuffer ); 1781 return false; 1782 } 1783 1784 if (!inv) 1785 status = ippiDFTFwd_RToPack_32f_C1R( src.ptr<float>(), (int)(src.step), dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer ); 1786 else 1787 status = ippiDFTInv_PackToR_32f_C1R( src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer ); 1788 1789 if ( sizeBuffer > 0 ) 1790 ippFree( pBuffer ); 1791 1792 ippFree( pDFTSpec ); 1793 1794 if(status >= 0) 1795 { 1796 CV_IMPL_ADD(CV_IMPL_IPP); 1797 return true; 1798 } 1799 return false; 1800 } 1801 1802 #endif 1803 } 1804 1805 #ifdef HAVE_OPENCL 1806 1807 namespace cv 1808 { 1809 1810 enum FftType 1811 { 1812 R2R = 0, // real to CCS in case forward transform, CCS to real otherwise 1813 C2R = 1, // complex to real in case inverse transform 1814 R2C = 2, // real to complex in case forward transform 1815 C2C = 3 // complex to complex 1816 }; 1817 1818 struct OCL_FftPlan 1819 { 1820 private: 1821 UMat twiddles; 1822 String buildOptions; 1823 int thread_count; 1824 int dft_size; 1825 int dft_depth; 1826 bool status; 1827 1828 public: 1829 OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true) 1830 { 1831 CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F ); 1832 1833 int min_radix; 1834 std::vector<int> radixes, blocks; 1835 ocl_getRadixes(dft_size, radixes, blocks, min_radix); 1836 thread_count = dft_size / min_radix; 1837 1838 if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize()) 1839 { 1840 status = false; 1841 return; 1842 } 1843 1844 // generate string with radix calls 1845 String radix_processing; 1846 int n = 1, twiddle_size = 0; 1847 for (size_t i=0; i<radixes.size(); i++) 1848 { 1849 int radix = radixes[i], block = blocks[i]; 1850 if (block > 1) 1851 radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix); 1852 else 1853 radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix); 1854 twiddle_size += (radix-1)*n; 1855 n *= radix; 1856 } 1857 1858 twiddles.create(1, twiddle_size, CV_MAKE_TYPE(dft_depth, 2)); 1859 if (dft_depth == CV_32F) 1860 fillRadixTable<float>(twiddles, radixes); 1861 else 1862 fillRadixTable<double>(twiddles, radixes); 1863 1864 buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s", 1865 dft_size, min_radix, ocl::typeToStr(dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)), 1866 dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str()); 1867 } 1868 1869 bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const 1870 { 1871 if (!status) 1872 return false; 1873 1874 UMat src = _src.getUMat(); 1875 UMat dst = _dst.getUMat(); 1876 1877 size_t globalsize[2]; 1878 size_t localsize[2]; 1879 String kernel_name; 1880 1881 bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1; 1882 bool inv = (flags & DFT_INVERSE) != 0; 1883 String options = buildOptions; 1884 1885 if (rows) 1886 { 1887 globalsize[0] = thread_count; globalsize[1] = src.rows; 1888 localsize[0] = thread_count; localsize[1] = 1; 1889 kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows"; 1890 if ((is1d || inv) && (flags & DFT_SCALE)) 1891 options += " -D DFT_SCALE"; 1892 } 1893 else 1894 { 1895 globalsize[0] = num_dfts; globalsize[1] = thread_count; 1896 localsize[0] = 1; localsize[1] = thread_count; 1897 kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols"; 1898 if (flags & DFT_SCALE) 1899 options += " -D DFT_SCALE"; 1900 } 1901 1902 options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT"; 1903 options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT"; 1904 options += is1d ? " -D IS_1D" : ""; 1905 1906 if (!inv) 1907 { 1908 if ((is1d && src.channels() == 1) || (rows && (fftType == R2R))) 1909 options += " -D NO_CONJUGATE"; 1910 } 1911 else 1912 { 1913 if (rows && (fftType == C2R || fftType == R2R)) 1914 options += " -D NO_CONJUGATE"; 1915 if (dst.cols % 2 == 0) 1916 options += " -D EVEN"; 1917 } 1918 1919 ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options); 1920 if (k.empty()) 1921 return false; 1922 1923 k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, num_dfts); 1924 return k.run(2, globalsize, localsize, false); 1925 } 1926 1927 private: 1928 static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix) 1929 { 1930 int factors[34]; 1931 int nf = DFTFactorize(cols, factors); 1932 1933 int n = 1; 1934 int factor_index = 0; 1935 min_radix = INT_MAX; 1936 1937 // 2^n transforms 1938 if ((factors[factor_index] & 1) == 0) 1939 { 1940 for( ; n < factors[factor_index];) 1941 { 1942 int radix = 2, block = 1; 1943 if (8*n <= factors[0]) 1944 radix = 8; 1945 else if (4*n <= factors[0]) 1946 { 1947 radix = 4; 1948 if (cols % 12 == 0) 1949 block = 3; 1950 else if (cols % 8 == 0) 1951 block = 2; 1952 } 1953 else 1954 { 1955 if (cols % 10 == 0) 1956 block = 5; 1957 else if (cols % 8 == 0) 1958 block = 4; 1959 else if (cols % 6 == 0) 1960 block = 3; 1961 else if (cols % 4 == 0) 1962 block = 2; 1963 } 1964 1965 radixes.push_back(radix); 1966 blocks.push_back(block); 1967 min_radix = min(min_radix, block*radix); 1968 n *= radix; 1969 } 1970 factor_index++; 1971 } 1972 1973 // all the other transforms 1974 for( ; factor_index < nf; factor_index++) 1975 { 1976 int radix = factors[factor_index], block = 1; 1977 if (radix == 3) 1978 { 1979 if (cols % 12 == 0) 1980 block = 4; 1981 else if (cols % 9 == 0) 1982 block = 3; 1983 else if (cols % 6 == 0) 1984 block = 2; 1985 } 1986 else if (radix == 5) 1987 { 1988 if (cols % 10 == 0) 1989 block = 2; 1990 } 1991 radixes.push_back(radix); 1992 blocks.push_back(block); 1993 min_radix = min(min_radix, block*radix); 1994 } 1995 } 1996 1997 template <typename T> 1998 static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes) 1999 { 2000 Mat tw = twiddles.getMat(ACCESS_WRITE); 2001 T* ptr = tw.ptr<T>(); 2002 int ptr_index = 0; 2003 2004 int n = 1; 2005 for (size_t i=0; i<radixes.size(); i++) 2006 { 2007 int radix = radixes[i]; 2008 n *= radix; 2009 2010 for (int j=1; j<radix; j++) 2011 { 2012 double theta = -CV_2PI*j/n; 2013 2014 for (int k=0; k<(n/radix); k++) 2015 { 2016 ptr[ptr_index++] = (T) cos(k*theta); 2017 ptr[ptr_index++] = (T) sin(k*theta); 2018 } 2019 } 2020 } 2021 } 2022 }; 2023 2024 class OCL_FftPlanCache 2025 { 2026 public: 2027 static OCL_FftPlanCache & getInstance() 2028 { 2029 static OCL_FftPlanCache planCache; 2030 return planCache; 2031 } 2032 2033 Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth) 2034 { 2035 int key = (dft_size << 16) | (depth & 0xFFFF); 2036 std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(key); 2037 if (f != planStorage.end()) 2038 { 2039 return f->second; 2040 } 2041 else 2042 { 2043 Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth)); 2044 planStorage[key] = newPlan; 2045 return newPlan; 2046 } 2047 } 2048 2049 ~OCL_FftPlanCache() 2050 { 2051 planStorage.clear(); 2052 } 2053 2054 protected: 2055 OCL_FftPlanCache() : 2056 planStorage() 2057 { 2058 } 2059 std::map<int, Ptr<OCL_FftPlan> > planStorage; 2060 }; 2061 2062 static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType) 2063 { 2064 int type = _src.type(), depth = CV_MAT_DEPTH(type); 2065 Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), depth); 2066 return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true); 2067 } 2068 2069 static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType) 2070 { 2071 int type = _src.type(), depth = CV_MAT_DEPTH(type); 2072 Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), depth); 2073 return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false); 2074 } 2075 2076 static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows) 2077 { 2078 int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type); 2079 Size ssize = _src.size(); 2080 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; 2081 2082 if ( !((cn == 1 || cn == 2) && (depth == CV_32F || (depth == CV_64F && doubleSupport))) ) 2083 return false; 2084 2085 // if is not a multiplication of prime numbers { 2, 3, 5 } 2086 if (ssize.area() != getOptimalDFTSize(ssize.area())) 2087 return false; 2088 2089 UMat src = _src.getUMat(); 2090 int complex_input = cn == 2 ? 1 : 0; 2091 int complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; 2092 int real_input = cn == 1 ? 1 : 0; 2093 int real_output = (flags & DFT_REAL_OUTPUT) != 0; 2094 bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0; 2095 2096 if( nonzero_rows <= 0 || nonzero_rows > _src.rows() ) 2097 nonzero_rows = _src.rows(); 2098 bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1; 2099 2100 // if output format is not specified 2101 if (complex_output + real_output == 0) 2102 { 2103 if (real_input) 2104 real_output = 1; 2105 else 2106 complex_output = 1; 2107 } 2108 2109 FftType fftType = (FftType)(complex_input << 0 | complex_output << 1); 2110 2111 // Forward Complex to CCS not supported 2112 if (fftType == C2R && !inv) 2113 fftType = C2C; 2114 2115 // Inverse CCS to Complex not supported 2116 if (fftType == R2C && inv) 2117 fftType = R2R; 2118 2119 UMat output; 2120 if (fftType == C2C || fftType == R2C) 2121 { 2122 // complex output 2123 _dst.create(src.size(), CV_MAKETYPE(depth, 2)); 2124 output = _dst.getUMat(); 2125 } 2126 else 2127 { 2128 // real output 2129 if (is1d) 2130 { 2131 _dst.create(src.size(), CV_MAKETYPE(depth, 1)); 2132 output = _dst.getUMat(); 2133 } 2134 else 2135 { 2136 _dst.create(src.size(), CV_MAKETYPE(depth, 1)); 2137 output.create(src.size(), CV_MAKETYPE(depth, 2)); 2138 } 2139 } 2140 2141 if (!inv) 2142 { 2143 if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType)) 2144 return false; 2145 2146 if (!is1d) 2147 { 2148 int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols; 2149 if (!ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType)) 2150 return false; 2151 } 2152 } 2153 else 2154 { 2155 if (fftType == C2C) 2156 { 2157 // complex output 2158 if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType)) 2159 return false; 2160 2161 if (!is1d) 2162 { 2163 if (!ocl_dft_cols(output, output, output.cols, flags, fftType)) 2164 return false; 2165 } 2166 } 2167 else 2168 { 2169 if (is1d) 2170 { 2171 if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType)) 2172 return false; 2173 } 2174 else 2175 { 2176 int nonzero_cols = src.cols/2 + 1; 2177 if (!ocl_dft_cols(src, output, nonzero_cols, flags, fftType)) 2178 return false; 2179 2180 if (!ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType)) 2181 return false; 2182 } 2183 } 2184 } 2185 return true; 2186 } 2187 2188 } // namespace cv; 2189 2190 #endif 2191 2192 #ifdef HAVE_CLAMDFFT 2193 2194 namespace cv { 2195 2196 #define CLAMDDFT_Assert(func) \ 2197 { \ 2198 clAmdFftStatus s = (func); \ 2199 CV_Assert(s == CLFFT_SUCCESS); \ 2200 } 2201 2202 class PlanCache 2203 { 2204 struct FftPlan 2205 { 2206 FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) : 2207 dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step), 2208 doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType), 2209 context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0) 2210 { 2211 bool dft_inverse = (flags & DFT_INVERSE) != 0; 2212 bool dft_scale = (flags & DFT_SCALE) != 0; 2213 bool dft_rows = (flags & DFT_ROWS) != 0; 2214 2215 clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL; 2216 clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D; 2217 2218 size_t batchSize = dft_rows ? dft_size.height : 1; 2219 size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 }; 2220 size_t clStridesIn[3] = { 1, 1, 1 }; 2221 size_t clStridesOut[3] = { 1, 1, 1 }; 2222 int elemSize = doubleFP ? sizeof(double) : sizeof(float); 2223 2224 switch (fftType) 2225 { 2226 case C2C: 2227 inLayout = CLFFT_COMPLEX_INTERLEAVED; 2228 outLayout = CLFFT_COMPLEX_INTERLEAVED; 2229 clStridesIn[1] = src_step / (elemSize << 1); 2230 clStridesOut[1] = dst_step / (elemSize << 1); 2231 break; 2232 case R2C: 2233 inLayout = CLFFT_REAL; 2234 outLayout = CLFFT_HERMITIAN_INTERLEAVED; 2235 clStridesIn[1] = src_step / elemSize; 2236 clStridesOut[1] = dst_step / (elemSize << 1); 2237 break; 2238 case C2R: 2239 inLayout = CLFFT_HERMITIAN_INTERLEAVED; 2240 outLayout = CLFFT_REAL; 2241 clStridesIn[1] = src_step / (elemSize << 1); 2242 clStridesOut[1] = dst_step / elemSize; 2243 break; 2244 case R2R: 2245 default: 2246 CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type"); 2247 break; 2248 } 2249 2250 clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1]; 2251 clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1]; 2252 2253 CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn)) 2254 2255 // setting plan properties 2256 CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE)); 2257 CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE)) 2258 CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout)) 2259 CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize)) 2260 CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn)) 2261 CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut)) 2262 CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim])) 2263 2264 float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f; 2265 CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale)) 2266 2267 // ready to bake 2268 cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr(); 2269 CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL)) 2270 } 2271 2272 ~FftPlan() 2273 { 2274 // clAmdFftDestroyPlan(&plHandle); 2275 } 2276 2277 friend class PlanCache; 2278 2279 private: 2280 Size dft_size; 2281 int src_step, dst_step; 2282 bool doubleFP; 2283 bool inplace; 2284 int flags; 2285 FftType fftType; 2286 2287 cl_context context; 2288 clAmdFftPlanHandle plHandle; 2289 }; 2290 2291 public: 2292 static PlanCache & getInstance() 2293 { 2294 static PlanCache planCache; 2295 return planCache; 2296 } 2297 2298 clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP, 2299 bool inplace, int flags, FftType fftType) 2300 { 2301 cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr(); 2302 2303 for (size_t i = 0, size = planStorage.size(); i < size; ++i) 2304 { 2305 const FftPlan * const plan = planStorage[i]; 2306 2307 if (plan->dft_size == dft_size && 2308 plan->flags == flags && 2309 plan->src_step == src_step && 2310 plan->dst_step == dst_step && 2311 plan->doubleFP == doubleFP && 2312 plan->fftType == fftType && 2313 plan->inplace == inplace) 2314 { 2315 if (plan->context != currentContext) 2316 { 2317 planStorage.erase(planStorage.begin() + i); 2318 break; 2319 } 2320 2321 return plan->plHandle; 2322 } 2323 } 2324 2325 // no baked plan is found, so let's create a new one 2326 Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType)); 2327 planStorage.push_back(newPlan); 2328 2329 return newPlan->plHandle; 2330 } 2331 2332 ~PlanCache() 2333 { 2334 planStorage.clear(); 2335 } 2336 2337 protected: 2338 PlanCache() : 2339 planStorage() 2340 { 2341 } 2342 2343 std::vector<Ptr<FftPlan> > planStorage; 2344 }; 2345 2346 extern "C" { 2347 2348 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p) 2349 { 2350 UMatData * u = (UMatData *)p; 2351 2352 if( u && CV_XADD(&u->urefcount, -1) == 1 ) 2353 u->currAllocator->deallocate(u); 2354 u = 0; 2355 2356 clReleaseEvent(e), e = 0; 2357 } 2358 2359 } 2360 2361 static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags) 2362 { 2363 int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type); 2364 Size ssize = _src.size(); 2365 2366 bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0; 2367 if ( (!doubleSupport && depth == CV_64F) || 2368 !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) || 2369 _src.offset() != 0) 2370 return false; 2371 2372 // if is not a multiplication of prime numbers { 2, 3, 5 } 2373 if (ssize.area() != getOptimalDFTSize(ssize.area())) 2374 return false; 2375 2376 int dst_complex_input = cn == 2 ? 1 : 0; 2377 bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0; 2378 int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0; 2379 bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0; 2380 2381 CV_Assert(dft_complex_output + dft_real_output < 2); 2382 FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1); 2383 2384 switch (fftType) 2385 { 2386 case C2C: 2387 _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2)); 2388 break; 2389 case R2C: // TODO implement it if possible 2390 case C2R: // TODO implement it if possible 2391 case R2R: // AMD Fft does not support this type 2392 default: 2393 return false; 2394 } 2395 2396 UMat src = _src.getUMat(), dst = _dst.getUMat(); 2397 bool inplace = src.u == dst.u; 2398 2399 clAmdFftPlanHandle plHandle = PlanCache::getInstance(). 2400 getPlanHandle(ssize, (int)src.step, (int)dst.step, 2401 depth == CV_64F, inplace, flags, fftType); 2402 2403 // get the bufferSize 2404 size_t bufferSize = 0; 2405 CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize)) 2406 UMat tmpBuffer(1, (int)bufferSize, CV_8UC1); 2407 2408 cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ); 2409 cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW); 2410 2411 cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr(); 2412 cl_event e = 0; 2413 2414 CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, 2415 1, &queue, 0, NULL, &e, 2416 &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW))) 2417 2418 tmpBuffer.addref(); 2419 clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u); 2420 return true; 2421 } 2422 2423 #undef DFT_ASSERT 2424 2425 } 2426 2427 #endif // HAVE_CLAMDFFT 2428 2429 namespace cv 2430 { 2431 static void complementComplexOutput(Mat& dst, int len, int dft_dims) 2432 { 2433 int i, n = dst.cols; 2434 size_t elem_size = dst.elemSize1(); 2435 if( elem_size == sizeof(float) ) 2436 { 2437 float* p0 = dst.ptr<float>(); 2438 size_t dstep = dst.step/sizeof(p0[0]); 2439 for( i = 0; i < len; i++ ) 2440 { 2441 float* p = p0 + dstep*i; 2442 float* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i); 2443 2444 for( int j = 1; j < (n+1)/2; j++ ) 2445 { 2446 p[(n-j)*2] = q[j*2]; 2447 p[(n-j)*2+1] = -q[j*2+1]; 2448 } 2449 } 2450 } 2451 else 2452 { 2453 double* p0 = dst.ptr<double>(); 2454 size_t dstep = dst.step/sizeof(p0[0]); 2455 for( i = 0; i < len; i++ ) 2456 { 2457 double* p = p0 + dstep*i; 2458 double* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i); 2459 2460 for( int j = 1; j < (n+1)/2; j++ ) 2461 { 2462 p[(n-j)*2] = q[j*2]; 2463 p[(n-j)*2+1] = -q[j*2+1]; 2464 } 2465 } 2466 } 2467 } 2468 } 2469 2470 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows ) 2471 { 2472 #ifdef HAVE_CLAMDFFT 2473 CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU && 2474 _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0, 2475 ocl_dft_amdfft(_src0, _dst, flags)) 2476 #endif 2477 2478 #ifdef HAVE_OPENCL 2479 CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2, 2480 ocl_dft(_src0, _dst, flags, nonzero_rows)) 2481 #endif 2482 2483 static DFTFunc dft_tbl[6] = 2484 { 2485 (DFTFunc)DFT_32f, 2486 (DFTFunc)RealDFT_32f, 2487 (DFTFunc)CCSIDFT_32f, 2488 (DFTFunc)DFT_64f, 2489 (DFTFunc)RealDFT_64f, 2490 (DFTFunc)CCSIDFT_64f 2491 }; 2492 AutoBuffer<uchar> buf; 2493 Mat src0 = _src0.getMat(), src = src0; 2494 int prev_len = 0, stage = 0; 2495 bool inv = (flags & DFT_INVERSE) != 0; 2496 int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0); 2497 int type = src.type(), depth = src.depth(); 2498 int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2; 2499 int factors[34]; 2500 bool inplace_transform = false; 2501 #ifdef USE_IPP_DFT 2502 AutoBuffer<uchar> ippbuf; 2503 int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1; 2504 #endif 2505 2506 CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); 2507 2508 if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) ) 2509 _dst.create( src.size(), CV_MAKETYPE(depth, 2) ); 2510 else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) ) 2511 _dst.create( src.size(), depth ); 2512 else 2513 _dst.create( src.size(), type ); 2514 2515 Mat dst = _dst.getMat(); 2516 2517 #if defined USE_IPP_DFT 2518 CV_IPP_CHECK() 2519 { 2520 if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0) 2521 { 2522 if ((flags & DFT_ROWS) == 0) 2523 { 2524 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT))) 2525 { 2526 if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag)) 2527 { 2528 CV_IMPL_ADD(CV_IMPL_IPP); 2529 return; 2530 } 2531 setIppErrorStatus(); 2532 } 2533 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT))) 2534 { 2535 if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag)) 2536 { 2537 CV_IMPL_ADD(CV_IMPL_IPP); 2538 return; 2539 } 2540 setIppErrorStatus(); 2541 } 2542 } 2543 else 2544 { 2545 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT))) 2546 { 2547 ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R; 2548 if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag)) 2549 { 2550 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 2551 return; 2552 } 2553 setIppErrorStatus(); 2554 } 2555 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT))) 2556 { 2557 ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R; 2558 if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag)) 2559 { 2560 CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT); 2561 return; 2562 } 2563 setIppErrorStatus(); 2564 } 2565 } 2566 } 2567 } 2568 #endif 2569 2570 if( !real_transform ) 2571 elem_size = complex_elem_size; 2572 2573 if( src.cols == 1 && nonzero_rows > 0 ) 2574 CV_Error( CV_StsNotImplemented, 2575 "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n" 2576 "For fast convolution/correlation use 2-column matrix or single-row matrix instead" ); 2577 2578 // determine, which transform to do first - row-wise 2579 // (stage 0) or column-wise (stage 1) transform 2580 if( !(flags & DFT_ROWS) && src.rows > 1 && 2581 ((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) || 2582 (src.cols > 1 && inv && real_transform)) ) 2583 stage = 1; 2584 2585 for(;;) 2586 { 2587 double scale = 1; 2588 uchar* wave = 0; 2589 int* itab = 0; 2590 uchar* ptr; 2591 int i, len, count, sz = 0; 2592 int use_buf = 0, odd_real = 0; 2593 DFTFunc dft_func; 2594 2595 if( stage == 0 ) // row-wise transform 2596 { 2597 len = !inv ? src.cols : dst.cols; 2598 count = src.rows; 2599 if( len == 1 && !(flags & DFT_ROWS) ) 2600 { 2601 len = !inv ? src.rows : dst.rows; 2602 count = 1; 2603 } 2604 odd_real = real_transform && (len & 1); 2605 } 2606 else 2607 { 2608 len = dst.rows; 2609 count = !inv ? src0.cols : dst.cols; 2610 sz = 2*len*complex_elem_size; 2611 } 2612 2613 void *spec = 0; 2614 #ifdef USE_IPP_DFT 2615 if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available 2616 { 2617 int specsize=0, initsize=0, worksize=0; 2618 IppDFTGetSizeFunc getSizeFunc = 0; 2619 IppDFTInitFunc initFunc = 0; 2620 2621 if( real_transform && stage == 0 ) 2622 { 2623 if( depth == CV_32F ) 2624 { 2625 getSizeFunc = ippsDFTGetSize_R_32f; 2626 initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f; 2627 } 2628 else 2629 { 2630 getSizeFunc = ippsDFTGetSize_R_64f; 2631 initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f; 2632 } 2633 } 2634 else 2635 { 2636 if( depth == CV_32F ) 2637 { 2638 getSizeFunc = ippsDFTGetSize_C_32fc; 2639 initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc; 2640 } 2641 else 2642 { 2643 getSizeFunc = ippsDFTGetSize_C_64fc; 2644 initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc; 2645 } 2646 } 2647 if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 ) 2648 { 2649 ippbuf.allocate(specsize + initsize + 64); 2650 spec = alignPtr(&ippbuf[0], 32); 2651 uchar* initbuf = alignPtr((uchar*)spec + specsize, 32); 2652 if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 ) 2653 spec = 0; 2654 sz += worksize; 2655 } 2656 else 2657 setIppErrorStatus(); 2658 } 2659 else 2660 #endif 2661 { 2662 if( len != prev_len ) 2663 nf = DFTFactorize( len, factors ); 2664 2665 inplace_transform = factors[0] == factors[nf-1]; 2666 sz += len*(complex_elem_size + sizeof(int)); 2667 i = nf > 1 && (factors[0] & 1) == 0; 2668 if( (factors[i] & 1) != 0 && factors[i] > 5 ) 2669 sz += (factors[i]+1)*complex_elem_size; 2670 2671 if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) || 2672 (stage == 1 && !inplace_transform) ) 2673 { 2674 use_buf = 1; 2675 sz += len*complex_elem_size; 2676 } 2677 } 2678 2679 ptr = (uchar*)buf; 2680 buf.allocate( sz + 32 ); 2681 if( ptr != (uchar*)buf ) 2682 prev_len = 0; // because we release the buffer, 2683 // force recalculation of 2684 // twiddle factors and permutation table 2685 ptr = (uchar*)buf; 2686 if( !spec ) 2687 { 2688 wave = ptr; 2689 ptr += len*complex_elem_size; 2690 itab = (int*)ptr; 2691 ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 ); 2692 2693 if( len != prev_len || (!inplace_transform && inv && real_transform)) 2694 DFTInit( len, nf, factors, itab, complex_elem_size, 2695 wave, stage == 0 && inv && real_transform ); 2696 // otherwise reuse the tables calculated on the previous stage 2697 } 2698 2699 if( stage == 0 ) 2700 { 2701 uchar* tmp_buf = 0; 2702 int dptr_offset = 0; 2703 int dst_full_len = len*elem_size; 2704 int _flags = (int)inv + (src.channels() != dst.channels() ? 2705 DFT_COMPLEX_INPUT_OR_OUTPUT : 0); 2706 if( use_buf ) 2707 { 2708 tmp_buf = ptr; 2709 ptr += len*complex_elem_size; 2710 if( odd_real && !inv && len > 1 && 2711 !(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT)) 2712 dptr_offset = elem_size; 2713 } 2714 2715 if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) ) 2716 dst_full_len += (len & 1) ? elem_size : complex_elem_size; 2717 2718 dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3]; 2719 2720 if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) ) 2721 stage = 1; 2722 else if( flags & CV_DXT_SCALE ) 2723 scale = 1./(len * (flags & DFT_ROWS ? 1 : count)); 2724 2725 if( nonzero_rows <= 0 || nonzero_rows > count ) 2726 nonzero_rows = count; 2727 2728 for( i = 0; i < nonzero_rows; i++ ) 2729 { 2730 const uchar* sptr = src.ptr(i); 2731 uchar* dptr0 = dst.ptr(i); 2732 uchar* dptr = dptr0; 2733 2734 if( tmp_buf ) 2735 dptr = tmp_buf; 2736 2737 dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale ); 2738 if( dptr != dptr0 ) 2739 memcpy( dptr0, dptr + dptr_offset, dst_full_len ); 2740 } 2741 2742 for( ; i < count; i++ ) 2743 { 2744 uchar* dptr0 = dst.ptr(i); 2745 memset( dptr0, 0, dst_full_len ); 2746 } 2747 2748 if( stage != 1 ) 2749 { 2750 if( !inv && real_transform && dst.channels() == 2 ) 2751 complementComplexOutput(dst, nonzero_rows, 1); 2752 break; 2753 } 2754 src = dst; 2755 } 2756 else 2757 { 2758 int a = 0, b = count; 2759 uchar *buf0, *buf1, *dbuf0, *dbuf1; 2760 const uchar* sptr0 = src.ptr(); 2761 uchar* dptr0 = dst.ptr(); 2762 buf0 = ptr; 2763 ptr += len*complex_elem_size; 2764 buf1 = ptr; 2765 ptr += len*complex_elem_size; 2766 dbuf0 = buf0, dbuf1 = buf1; 2767 2768 if( use_buf ) 2769 { 2770 dbuf1 = ptr; 2771 dbuf0 = buf1; 2772 ptr += len*complex_elem_size; 2773 } 2774 2775 dft_func = dft_tbl[(depth == CV_64F)*3]; 2776 2777 if( real_transform && inv && src.cols > 1 ) 2778 stage = 0; 2779 else if( flags & CV_DXT_SCALE ) 2780 scale = 1./(len * count); 2781 2782 if( real_transform ) 2783 { 2784 int even; 2785 a = 1; 2786 even = (count & 1) == 0; 2787 b = (count+1)/2; 2788 if( !inv ) 2789 { 2790 memset( buf0, 0, len*complex_elem_size ); 2791 CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size ); 2792 sptr0 += dst.channels()*elem_size; 2793 if( even ) 2794 { 2795 memset( buf1, 0, len*complex_elem_size ); 2796 CopyColumn( sptr0 + (count-2)*elem_size, src.step, 2797 buf1, complex_elem_size, len, elem_size ); 2798 } 2799 } 2800 else if( src.channels() == 1 ) 2801 { 2802 CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size ); 2803 ExpandCCS( buf0, len, elem_size ); 2804 if( even ) 2805 { 2806 CopyColumn( sptr0 + (count-1)*elem_size, src.step, 2807 buf1, elem_size, len, elem_size ); 2808 ExpandCCS( buf1, len, elem_size ); 2809 } 2810 sptr0 += elem_size; 2811 } 2812 else 2813 { 2814 CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size ); 2815 if( even ) 2816 { 2817 CopyColumn( sptr0 + b*complex_elem_size, src.step, 2818 buf1, complex_elem_size, len, complex_elem_size ); 2819 } 2820 sptr0 += complex_elem_size; 2821 } 2822 2823 if( even ) 2824 dft_func( buf1, dbuf1, len, nf, factors, itab, 2825 wave, len, spec, ptr, inv, scale ); 2826 dft_func( buf0, dbuf0, len, nf, factors, itab, 2827 wave, len, spec, ptr, inv, scale ); 2828 2829 if( dst.channels() == 1 ) 2830 { 2831 if( !inv ) 2832 { 2833 // copy the half of output vector to the first/last column. 2834 // before doing that, defgragment the vector 2835 memcpy( dbuf0 + elem_size, dbuf0, elem_size ); 2836 CopyColumn( dbuf0 + elem_size, elem_size, dptr0, 2837 dst.step, len, elem_size ); 2838 if( even ) 2839 { 2840 memcpy( dbuf1 + elem_size, dbuf1, elem_size ); 2841 CopyColumn( dbuf1 + elem_size, elem_size, 2842 dptr0 + (count-1)*elem_size, 2843 dst.step, len, elem_size ); 2844 } 2845 dptr0 += elem_size; 2846 } 2847 else 2848 { 2849 // copy the real part of the complex vector to the first/last column 2850 CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size ); 2851 if( even ) 2852 CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size, 2853 dst.step, len, elem_size ); 2854 dptr0 += elem_size; 2855 } 2856 } 2857 else 2858 { 2859 assert( !inv ); 2860 CopyColumn( dbuf0, complex_elem_size, dptr0, 2861 dst.step, len, complex_elem_size ); 2862 if( even ) 2863 CopyColumn( dbuf1, complex_elem_size, 2864 dptr0 + b*complex_elem_size, 2865 dst.step, len, complex_elem_size ); 2866 dptr0 += complex_elem_size; 2867 } 2868 } 2869 2870 for( i = a; i < b; i += 2 ) 2871 { 2872 if( i+1 < b ) 2873 { 2874 CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size ); 2875 dft_func( buf1, dbuf1, len, nf, factors, itab, 2876 wave, len, spec, ptr, inv, scale ); 2877 } 2878 else 2879 CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size ); 2880 2881 dft_func( buf0, dbuf0, len, nf, factors, itab, 2882 wave, len, spec, ptr, inv, scale ); 2883 2884 if( i+1 < b ) 2885 CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size ); 2886 else 2887 CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size ); 2888 sptr0 += 2*complex_elem_size; 2889 dptr0 += 2*complex_elem_size; 2890 } 2891 2892 if( stage != 0 ) 2893 { 2894 if( !inv && real_transform && dst.channels() == 2 && len > 1 ) 2895 complementComplexOutput(dst, len, 2); 2896 break; 2897 } 2898 src = dst; 2899 } 2900 } 2901 } 2902 2903 2904 void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows ) 2905 { 2906 dft( src, dst, flags | DFT_INVERSE, nonzero_rows ); 2907 } 2908 2909 #ifdef HAVE_OPENCL 2910 2911 namespace cv { 2912 2913 static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB, 2914 OutputArray _dst, int flags, bool conjB ) 2915 { 2916 int atype = _srcA.type(), btype = _srcB.type(), 2917 rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1; 2918 Size asize = _srcA.size(), bsize = _srcB.size(); 2919 CV_Assert(asize == bsize); 2920 2921 if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 ) 2922 return false; 2923 2924 UMat A = _srcA.getUMat(), B = _srcB.getUMat(); 2925 CV_Assert(A.size() == B.size()); 2926 2927 _dst.create(A.size(), atype); 2928 UMat dst = _dst.getUMat(); 2929 2930 ocl::Kernel k("mulAndScaleSpectrums", 2931 ocl::core::mulspectrums_oclsrc, 2932 format("%s", conjB ? "-D CONJ" : "")); 2933 if (k.empty()) 2934 return false; 2935 2936 k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B), 2937 ocl::KernelArg::WriteOnly(dst), rowsPerWI); 2938 2939 size_t globalsize[2] = { asize.width, (asize.height + rowsPerWI - 1) / rowsPerWI }; 2940 return k.run(2, globalsize, NULL, false); 2941 } 2942 2943 } 2944 2945 #endif 2946 2947 void cv::mulSpectrums( InputArray _srcA, InputArray _srcB, 2948 OutputArray _dst, int flags, bool conjB ) 2949 { 2950 CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2, 2951 ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB)) 2952 2953 Mat srcA = _srcA.getMat(), srcB = _srcB.getMat(); 2954 int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type(); 2955 int rows = srcA.rows, cols = srcA.cols; 2956 int j, k; 2957 2958 CV_Assert( type == srcB.type() && srcA.size() == srcB.size() ); 2959 CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 ); 2960 2961 _dst.create( srcA.rows, srcA.cols, type ); 2962 Mat dst = _dst.getMat(); 2963 2964 bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 && 2965 srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous())); 2966 2967 if( is_1d && !(flags & DFT_ROWS) ) 2968 cols = cols + rows - 1, rows = 1; 2969 2970 int ncols = cols*cn; 2971 int j0 = cn == 1; 2972 int j1 = ncols - (cols % 2 == 0 && cn == 1); 2973 2974 if( depth == CV_32F ) 2975 { 2976 const float* dataA = srcA.ptr<float>(); 2977 const float* dataB = srcB.ptr<float>(); 2978 float* dataC = dst.ptr<float>(); 2979 2980 size_t stepA = srcA.step/sizeof(dataA[0]); 2981 size_t stepB = srcB.step/sizeof(dataB[0]); 2982 size_t stepC = dst.step/sizeof(dataC[0]); 2983 2984 if( !is_1d && cn == 1 ) 2985 { 2986 for( k = 0; k < (cols % 2 ? 1 : 2); k++ ) 2987 { 2988 if( k == 1 ) 2989 dataA += cols - 1, dataB += cols - 1, dataC += cols - 1; 2990 dataC[0] = dataA[0]*dataB[0]; 2991 if( rows % 2 == 0 ) 2992 dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB]; 2993 if( !conjB ) 2994 for( j = 1; j <= rows - 2; j += 2 ) 2995 { 2996 double re = (double)dataA[j*stepA]*dataB[j*stepB] - 2997 (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; 2998 double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] + 2999 (double)dataA[(j+1)*stepA]*dataB[j*stepB]; 3000 dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im; 3001 } 3002 else 3003 for( j = 1; j <= rows - 2; j += 2 ) 3004 { 3005 double re = (double)dataA[j*stepA]*dataB[j*stepB] + 3006 (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; 3007 double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] - 3008 (double)dataA[j*stepA]*dataB[(j+1)*stepB]; 3009 dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im; 3010 } 3011 if( k == 1 ) 3012 dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1; 3013 } 3014 } 3015 3016 for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC ) 3017 { 3018 if( is_1d && cn == 1 ) 3019 { 3020 dataC[0] = dataA[0]*dataB[0]; 3021 if( cols % 2 == 0 ) 3022 dataC[j1] = dataA[j1]*dataB[j1]; 3023 } 3024 3025 if( !conjB ) 3026 for( j = j0; j < j1; j += 2 ) 3027 { 3028 double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1]; 3029 double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1]; 3030 dataC[j] = (float)re; dataC[j+1] = (float)im; 3031 } 3032 else 3033 for( j = j0; j < j1; j += 2 ) 3034 { 3035 double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1]; 3036 double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1]; 3037 dataC[j] = (float)re; dataC[j+1] = (float)im; 3038 } 3039 } 3040 } 3041 else 3042 { 3043 const double* dataA = srcA.ptr<double>(); 3044 const double* dataB = srcB.ptr<double>(); 3045 double* dataC = dst.ptr<double>(); 3046 3047 size_t stepA = srcA.step/sizeof(dataA[0]); 3048 size_t stepB = srcB.step/sizeof(dataB[0]); 3049 size_t stepC = dst.step/sizeof(dataC[0]); 3050 3051 if( !is_1d && cn == 1 ) 3052 { 3053 for( k = 0; k < (cols % 2 ? 1 : 2); k++ ) 3054 { 3055 if( k == 1 ) 3056 dataA += cols - 1, dataB += cols - 1, dataC += cols - 1; 3057 dataC[0] = dataA[0]*dataB[0]; 3058 if( rows % 2 == 0 ) 3059 dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB]; 3060 if( !conjB ) 3061 for( j = 1; j <= rows - 2; j += 2 ) 3062 { 3063 double re = dataA[j*stepA]*dataB[j*stepB] - 3064 dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; 3065 double im = dataA[j*stepA]*dataB[(j+1)*stepB] + 3066 dataA[(j+1)*stepA]*dataB[j*stepB]; 3067 dataC[j*stepC] = re; dataC[(j+1)*stepC] = im; 3068 } 3069 else 3070 for( j = 1; j <= rows - 2; j += 2 ) 3071 { 3072 double re = dataA[j*stepA]*dataB[j*stepB] + 3073 dataA[(j+1)*stepA]*dataB[(j+1)*stepB]; 3074 double im = dataA[(j+1)*stepA]*dataB[j*stepB] - 3075 dataA[j*stepA]*dataB[(j+1)*stepB]; 3076 dataC[j*stepC] = re; dataC[(j+1)*stepC] = im; 3077 } 3078 if( k == 1 ) 3079 dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1; 3080 } 3081 } 3082 3083 for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC ) 3084 { 3085 if( is_1d && cn == 1 ) 3086 { 3087 dataC[0] = dataA[0]*dataB[0]; 3088 if( cols % 2 == 0 ) 3089 dataC[j1] = dataA[j1]*dataB[j1]; 3090 } 3091 3092 if( !conjB ) 3093 for( j = j0; j < j1; j += 2 ) 3094 { 3095 double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1]; 3096 double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1]; 3097 dataC[j] = re; dataC[j+1] = im; 3098 } 3099 else 3100 for( j = j0; j < j1; j += 2 ) 3101 { 3102 double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1]; 3103 double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1]; 3104 dataC[j] = re; dataC[j+1] = im; 3105 } 3106 } 3107 } 3108 } 3109 3110 3111 /****************************************************************************************\ 3112 Discrete Cosine Transform 3113 \****************************************************************************************/ 3114 3115 namespace cv 3116 { 3117 3118 /* DCT is calculated using DFT, as described here: 3119 http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/: 3120 */ 3121 template<typename T> static void 3122 DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, 3123 int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave, 3124 const Complex<T>* dct_wave, const void* spec, Complex<T>* buf ) 3125 { 3126 static const T sin_45 = (T)0.70710678118654752440084436210485; 3127 int j, n2 = n >> 1; 3128 3129 src_step /= sizeof(src[0]); 3130 dst_step /= sizeof(dst[0]); 3131 T* dst1 = dst + (n-1)*dst_step; 3132 3133 if( n == 1 ) 3134 { 3135 dst[0] = src[0]; 3136 return; 3137 } 3138 3139 for( j = 0; j < n2; j++, src += src_step*2 ) 3140 { 3141 dft_src[j] = src[0]; 3142 dft_src[n-j-1] = src[src_step]; 3143 } 3144 3145 RealDFT( dft_src, dft_dst, n, nf, factors, 3146 itab, dft_wave, n, spec, buf, 0, 1.0 ); 3147 src = dft_dst; 3148 3149 dst[0] = (T)(src[0]*dct_wave->re*sin_45); 3150 dst += dst_step; 3151 for( j = 1, dct_wave++; j < n2; j++, dct_wave++, 3152 dst += dst_step, dst1 -= dst_step ) 3153 { 3154 T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2]; 3155 T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2]; 3156 dst[0] = t0; 3157 dst1[0] = t1; 3158 } 3159 3160 dst[0] = src[n-1]*dct_wave->re; 3161 } 3162 3163 3164 template<typename T> static void 3165 IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step, 3166 int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave, 3167 const Complex<T>* dct_wave, const void* spec, Complex<T>* buf ) 3168 { 3169 static const T sin_45 = (T)0.70710678118654752440084436210485; 3170 int j, n2 = n >> 1; 3171 3172 src_step /= sizeof(src[0]); 3173 dst_step /= sizeof(dst[0]); 3174 const T* src1 = src + (n-1)*src_step; 3175 3176 if( n == 1 ) 3177 { 3178 dst[0] = src[0]; 3179 return; 3180 } 3181 3182 dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45); 3183 src += src_step; 3184 for( j = 1, dct_wave++; j < n2; j++, dct_wave++, 3185 src += src_step, src1 -= src_step ) 3186 { 3187 T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0]; 3188 T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0]; 3189 dft_src[j*2-1] = t0; 3190 dft_src[j*2] = t1; 3191 } 3192 3193 dft_src[n-1] = (T)(src[0]*2*dct_wave->re); 3194 CCSIDFT( dft_src, dft_dst, n, nf, factors, itab, 3195 dft_wave, n, spec, buf, 0, 1.0 ); 3196 3197 for( j = 0; j < n2; j++, dst += dst_step*2 ) 3198 { 3199 dst[0] = dft_dst[j]; 3200 dst[dst_step] = dft_dst[n-j-1]; 3201 } 3202 } 3203 3204 3205 static void 3206 DCTInit( int n, int elem_size, void* _wave, int inv ) 3207 { 3208 static const double DctScale[] = 3209 { 3210 0.707106781186547570, 0.500000000000000000, 0.353553390593273790, 3211 0.250000000000000000, 0.176776695296636890, 0.125000000000000000, 3212 0.088388347648318447, 0.062500000000000000, 0.044194173824159223, 3213 0.031250000000000000, 0.022097086912079612, 0.015625000000000000, 3214 0.011048543456039806, 0.007812500000000000, 0.005524271728019903, 3215 0.003906250000000000, 0.002762135864009952, 0.001953125000000000, 3216 0.001381067932004976, 0.000976562500000000, 0.000690533966002488, 3217 0.000488281250000000, 0.000345266983001244, 0.000244140625000000, 3218 0.000172633491500622, 0.000122070312500000, 0.000086316745750311, 3219 0.000061035156250000, 0.000043158372875155, 0.000030517578125000 3220 }; 3221 3222 int i; 3223 Complex<double> w, w1; 3224 double t, scale; 3225 3226 if( n == 1 ) 3227 return; 3228 3229 assert( (n&1) == 0 ); 3230 3231 if( (n & (n - 1)) == 0 ) 3232 { 3233 int m; 3234 for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ ) 3235 ; 3236 scale = (!inv ? 2 : 1)*DctScale[m]; 3237 w1.re = DFTTab[m+2][0]; 3238 w1.im = -DFTTab[m+2][1]; 3239 } 3240 else 3241 { 3242 t = 1./(2*n); 3243 scale = (!inv ? 2 : 1)*std::sqrt(t); 3244 w1.im = sin(-CV_PI*t); 3245 w1.re = std::sqrt(1. - w1.im*w1.im); 3246 } 3247 n >>= 1; 3248 3249 if( elem_size == sizeof(Complex<double>) ) 3250 { 3251 Complex<double>* wave = (Complex<double>*)_wave; 3252 3253 w.re = scale; 3254 w.im = 0.; 3255 3256 for( i = 0; i <= n; i++ ) 3257 { 3258 wave[i] = w; 3259 t = w.re*w1.re - w.im*w1.im; 3260 w.im = w.re*w1.im + w.im*w1.re; 3261 w.re = t; 3262 } 3263 } 3264 else 3265 { 3266 Complex<float>* wave = (Complex<float>*)_wave; 3267 assert( elem_size == sizeof(Complex<float>) ); 3268 3269 w.re = (float)scale; 3270 w.im = 0.f; 3271 3272 for( i = 0; i <= n; i++ ) 3273 { 3274 wave[i].re = (float)w.re; 3275 wave[i].im = (float)w.im; 3276 t = w.re*w1.re - w.im*w1.im; 3277 w.im = w.re*w1.im + w.im*w1.re; 3278 w.re = t; 3279 } 3280 } 3281 } 3282 3283 3284 typedef void (*DCTFunc)(const void* src, int src_step, void* dft_src, 3285 void* dft_dst, void* dst, int dst_step, int n, 3286 int nf, int* factors, const int* itab, const void* dft_wave, 3287 const void* dct_wave, const void* spec, void* buf ); 3288 3289 static void DCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst, 3290 float* dst, int dst_step, int n, int nf, int* factors, const int* itab, 3291 const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf ) 3292 { 3293 DCT(src, src_step, dft_src, dft_dst, dst, dst_step, 3294 n, nf, factors, itab, dft_wave, dct_wave, spec, buf); 3295 } 3296 3297 static void IDCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst, 3298 float* dst, int dst_step, int n, int nf, int* factors, const int* itab, 3299 const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf ) 3300 { 3301 IDCT(src, src_step, dft_src, dft_dst, dst, dst_step, 3302 n, nf, factors, itab, dft_wave, dct_wave, spec, buf); 3303 } 3304 3305 static void DCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst, 3306 double* dst, int dst_step, int n, int nf, int* factors, const int* itab, 3307 const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf ) 3308 { 3309 DCT(src, src_step, dft_src, dft_dst, dst, dst_step, 3310 n, nf, factors, itab, dft_wave, dct_wave, spec, buf); 3311 } 3312 3313 static void IDCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst, 3314 double* dst, int dst_step, int n, int nf, int* factors, const int* itab, 3315 const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf ) 3316 { 3317 IDCT(src, src_step, dft_src, dft_dst, dst, dst_step, 3318 n, nf, factors, itab, dft_wave, dct_wave, spec, buf); 3319 } 3320 3321 } 3322 3323 namespace cv 3324 { 3325 #if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 3326 3327 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*); 3328 typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm); 3329 typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec); 3330 typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*); 3331 3332 template <typename Dct> 3333 class DctIPPLoop_Invoker : public ParallelLoopBody 3334 { 3335 public: 3336 3337 DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dct* _ippidct, bool _inv, bool *_ok) : 3338 ParallelLoopBody(), src(&_src), dst(&_dst), ippidct(_ippidct), inv(_inv), ok(_ok) 3339 { 3340 *ok = true; 3341 } 3342 3343 virtual void operator()(const Range& range) const 3344 { 3345 void* pDCTSpec; 3346 AutoBuffer<uchar> buf; 3347 uchar* pBuffer = 0; 3348 int bufSize=0; 3349 3350 IppiSize srcRoiSize = {src->cols, 1}; 3351 3352 CV_SUPPRESS_DEPRECATED_START 3353 3354 ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f; 3355 ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f; 3356 ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f; 3357 3358 if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0) 3359 { 3360 buf.allocate( bufSize ); 3361 pBuffer = (uchar*)buf; 3362 3363 for( int i = range.start; i < range.end; ++i) 3364 if(!(*ippidct)(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer)) 3365 *ok = false; 3366 } 3367 else 3368 *ok = false; 3369 3370 if (pDCTSpec) 3371 ippFree(pDCTSpec); 3372 3373 CV_SUPPRESS_DEPRECATED_END 3374 } 3375 3376 private: 3377 const Mat* src; 3378 Mat* dst; 3379 const Dct* ippidct; 3380 bool inv; 3381 bool *ok; 3382 }; 3383 3384 template <typename Dct> 3385 bool DctIPPLoop(const Mat& src, Mat& dst, const Dct& ippidct, bool inv) 3386 { 3387 bool ok; 3388 parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker<Dct>(src, dst, &ippidct, inv, &ok), src.rows/(double)(1<<4) ); 3389 return ok; 3390 } 3391 3392 struct IPPDCTFunctor 3393 { 3394 IPPDCTFunctor(ippiDCTFunc _func) : func(_func){} 3395 3396 bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer) const 3397 { 3398 return func ? func(src, srcStep, dst, dstStep, pDCTSpec, pBuffer) >= 0 : false; 3399 } 3400 private: 3401 ippiDCTFunc func; 3402 }; 3403 3404 static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row) 3405 { 3406 ippiDCTFunc ippFunc = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R ; 3407 3408 if (row) 3409 return(DctIPPLoop(src,dst,IPPDCTFunctor(ippFunc),inv)); 3410 else 3411 { 3412 IppStatus status; 3413 void* pDCTSpec; 3414 AutoBuffer<uchar> buf; 3415 uchar* pBuffer = 0; 3416 int bufSize=0; 3417 3418 IppiSize srcRoiSize = {src.cols, src.rows}; 3419 3420 CV_SUPPRESS_DEPRECATED_START 3421 3422 ippiDCTInitAlloc ippInitAlloc = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f; 3423 ippiDCTFree ippFree = inv ? (ippiDCTFree)ippiDCTInvFree_32f : (ippiDCTFree)ippiDCTFwdFree_32f; 3424 ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f; 3425 3426 status = ippStsErr; 3427 3428 if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0) 3429 { 3430 buf.allocate( bufSize ); 3431 pBuffer = (uchar*)buf; 3432 3433 status = ippFunc(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer); 3434 } 3435 3436 if (pDCTSpec) 3437 ippFree(pDCTSpec); 3438 3439 CV_SUPPRESS_DEPRECATED_END 3440 3441 return status >= 0; 3442 } 3443 } 3444 3445 #endif 3446 } 3447 3448 void cv::dct( InputArray _src0, OutputArray _dst, int flags ) 3449 { 3450 static DCTFunc dct_tbl[4] = 3451 { 3452 (DCTFunc)DCT_32f, 3453 (DCTFunc)IDCT_32f, 3454 (DCTFunc)DCT_64f, 3455 (DCTFunc)IDCT_64f 3456 }; 3457 3458 bool inv = (flags & DCT_INVERSE) != 0; 3459 Mat src0 = _src0.getMat(), src = src0; 3460 int type = src.type(), depth = src.depth(); 3461 void *spec = 0; 3462 3463 double scale = 1.; 3464 int prev_len = 0, nf = 0, stage, end_stage; 3465 uchar *src_dft_buf = 0, *dst_dft_buf = 0; 3466 uchar *dft_wave = 0, *dct_wave = 0; 3467 int* itab = 0; 3468 uchar* ptr = 0; 3469 int elem_size = (int)src.elemSize(), complex_elem_size = elem_size*2; 3470 int factors[34], inplace_transform; 3471 int i, len, count; 3472 AutoBuffer<uchar> buf; 3473 3474 CV_Assert( type == CV_32FC1 || type == CV_64FC1 ); 3475 _dst.create( src.rows, src.cols, type ); 3476 Mat dst = _dst.getMat(); 3477 3478 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) 3479 CV_IPP_CHECK() 3480 { 3481 bool row = (flags & DCT_ROWS) != 0; 3482 if (src.type() == CV_32F) 3483 { 3484 if(ippi_DCT_32f(src,dst,inv, row)) 3485 { 3486 CV_IMPL_ADD(CV_IMPL_IPP); 3487 return; 3488 } 3489 setIppErrorStatus(); 3490 } 3491 } 3492 #endif 3493 3494 DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2]; 3495 3496 if( (flags & DCT_ROWS) || src.rows == 1 || 3497 (src.cols == 1 && (src.isContinuous() && dst.isContinuous()))) 3498 { 3499 stage = end_stage = 0; 3500 } 3501 else 3502 { 3503 stage = src.cols == 1; 3504 end_stage = 1; 3505 } 3506 3507 for( ; stage <= end_stage; stage++ ) 3508 { 3509 const uchar* sptr = src.ptr(); 3510 uchar* dptr = dst.ptr(); 3511 size_t sstep0, sstep1, dstep0, dstep1; 3512 3513 if( stage == 0 ) 3514 { 3515 len = src.cols; 3516 count = src.rows; 3517 if( len == 1 && !(flags & DCT_ROWS) ) 3518 { 3519 len = src.rows; 3520 count = 1; 3521 } 3522 sstep0 = src.step; 3523 dstep0 = dst.step; 3524 sstep1 = dstep1 = elem_size; 3525 } 3526 else 3527 { 3528 len = dst.rows; 3529 count = dst.cols; 3530 sstep1 = src.step; 3531 dstep1 = dst.step; 3532 sstep0 = dstep0 = elem_size; 3533 } 3534 3535 if( len != prev_len ) 3536 { 3537 int sz; 3538 3539 if( len > 1 && (len & 1) ) 3540 CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" ); 3541 3542 sz = len*elem_size; 3543 sz += (len/2 + 1)*complex_elem_size; 3544 3545 spec = 0; 3546 inplace_transform = 1; 3547 { 3548 sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size; 3549 3550 nf = DFTFactorize( len, factors ); 3551 inplace_transform = factors[0] == factors[nf-1]; 3552 3553 i = nf > 1 && (factors[0] & 1) == 0; 3554 if( (factors[i] & 1) != 0 && factors[i] > 5 ) 3555 sz += (factors[i]+1)*complex_elem_size; 3556 3557 if( !inplace_transform ) 3558 sz += len*elem_size; 3559 } 3560 3561 buf.allocate( sz + 32 ); 3562 ptr = (uchar*)buf; 3563 3564 if( !spec ) 3565 { 3566 dft_wave = ptr; 3567 ptr += len*complex_elem_size; 3568 itab = (int*)ptr; 3569 ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 ); 3570 DFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv ); 3571 } 3572 3573 dct_wave = ptr; 3574 ptr += (len/2 + 1)*complex_elem_size; 3575 src_dft_buf = dst_dft_buf = ptr; 3576 ptr += len*elem_size; 3577 if( !inplace_transform ) 3578 { 3579 dst_dft_buf = ptr; 3580 ptr += len*elem_size; 3581 } 3582 DCTInit( len, complex_elem_size, dct_wave, inv ); 3583 if( !inv ) 3584 scale += scale; 3585 prev_len = len; 3586 } 3587 // otherwise reuse the tables calculated on the previous stage 3588 for( i = 0; i < count; i++ ) 3589 { 3590 dct_func( sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf, 3591 dptr + i*dstep0, (int)dstep1, len, nf, factors, 3592 itab, dft_wave, dct_wave, spec, ptr ); 3593 } 3594 src = dst; 3595 } 3596 } 3597 3598 3599 void cv::idct( InputArray src, OutputArray dst, int flags ) 3600 { 3601 dct( src, dst, flags | DCT_INVERSE ); 3602 } 3603 3604 namespace cv 3605 { 3606 3607 static const int optimalDFTSizeTab[] = { 3608 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48, 3609 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160, 3610 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375, 3611 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720, 3612 729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200, 3613 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875, 3614 1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592, 3615 2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840, 3616 3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400, 3617 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290, 3618 7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000, 3619 10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500, 3620 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000, 3621 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683, 3622 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576, 3623 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720, 3624 31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864, 3625 37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080, 3626 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675, 3627 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800, 3628 65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125, 3629 78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312, 3630 93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000, 3631 109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000, 3632 128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456, 3633 150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888, 3634 168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400, 3635 196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000, 3636 230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000, 3637 259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912, 3638 295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050, 3639 331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000, 3640 388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000, 3641 437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520, 3642 492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750, 3643 552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080, 3644 625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125, 3645 708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432, 3646 787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736, 3647 885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150, 3648 995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500, 3649 1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000, 3650 1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720, 3651 1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000, 3652 1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500, 3653 1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616, 3654 1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240, 3655 1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000, 3656 2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000, 3657 2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960, 3658 2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000, 3659 2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360, 3660 2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500, 3661 3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800, 3662 3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940, 3663 3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000, 3664 3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200, 3665 4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976, 3666 4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000, 3667 4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000, 3668 5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000, 3669 5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000, 3670 6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000, 3671 6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250, 3672 7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272, 3673 7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000, 3674 8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000, 3675 8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000, 3676 9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280, 3677 10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832, 3678 10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000, 3679 11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875, 3680 12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200, 3681 13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500, 3682 14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544, 3683 15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000, 3684 16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000, 3685 17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400, 3686 18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800, 3687 19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520, 3688 20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880, 3689 22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872, 3690 23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240, 3691 25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856, 3692 27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000, 3693 29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000, 3694 31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000, 3695 32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000, 3696 35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800, 3697 37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600, 3698 39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000, 3699 41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750, 3700 44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200, 3701 47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000, 3702 50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375, 3703 53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104, 3704 56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000, 3705 60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250, 3706 63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864, 3707 67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800, 3708 71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720, 3709 75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150, 3710 80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000, 3711 84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000, 3712 90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488, 3713 95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000, 3714 100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600, 3715 104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000, 3716 110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000, 3717 117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000, 3718 122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984, 3719 127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728, 3720 134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760, 3721 141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200, 3722 150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000, 3723 157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000, 3724 163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120, 3725 170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000, 3726 181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800, 3727 189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000, 3728 199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000, 3729 205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848, 3730 216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160, 3731 227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450, 3732 240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000, 3733 251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000, 3734 262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792, 3735 273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464, 3736 288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888, 3737 302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800, 3738 314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000, 3739 328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240, 3740 341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000, 3741 362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600, 3742 379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000, 3743 398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000, 3744 410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696, 3745 432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832, 3746 453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375, 3747 477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000, 3748 497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000, 3749 512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912, 3750 537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000, 3751 566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000, 3752 590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032, 3753 614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920, 3754 637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250, 3755 671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000, 3756 703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875, 3757 737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904, 3758 765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400, 3759 797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000, 3760 829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392, 3761 864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664, 3762 906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000, 3763 949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000, 3764 984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000, 3765 1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000, 3766 1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168, 3767 1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800, 3768 1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000, 3769 1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125, 3770 1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000, 3771 1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000, 3772 1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000, 3773 1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600, 3774 1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750, 3775 1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000, 3776 1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000, 3777 1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360, 3778 1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600, 3779 1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000, 3780 1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328, 3781 1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800, 3782 1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000, 3783 1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920, 3784 2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000, 3785 2097152000, 2099520000, 2109375000, 2123366400, 2125764000 3786 }; 3787 3788 } 3789 3790 int cv::getOptimalDFTSize( int size0 ) 3791 { 3792 int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1; 3793 if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] ) 3794 return -1; 3795 3796 while( a < b ) 3797 { 3798 int c = (a + b) >> 1; 3799 if( size0 <= optimalDFTSizeTab[c] ) 3800 b = c; 3801 else 3802 a = c+1; 3803 } 3804 3805 return optimalDFTSizeTab[b]; 3806 } 3807 3808 CV_IMPL void 3809 cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows ) 3810 { 3811 cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0; 3812 int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) | 3813 ((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) | 3814 ((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0); 3815 3816 CV_Assert( src.size == dst.size ); 3817 3818 if( src.type() != dst.type() ) 3819 { 3820 if( dst.channels() == 2 ) 3821 _flags |= cv::DFT_COMPLEX_OUTPUT; 3822 else 3823 _flags |= cv::DFT_REAL_OUTPUT; 3824 } 3825 3826 cv::dft( src, dst, _flags, nonzero_rows ); 3827 CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect 3828 } 3829 3830 3831 CV_IMPL void 3832 cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr, 3833 CvArr* dstarr, int flags ) 3834 { 3835 cv::Mat srcA = cv::cvarrToMat(srcAarr), 3836 srcB = cv::cvarrToMat(srcBarr), 3837 dst = cv::cvarrToMat(dstarr); 3838 CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() ); 3839 3840 cv::mulSpectrums(srcA, srcB, dst, 3841 (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0, 3842 (flags & CV_DXT_MUL_CONJ) != 0 ); 3843 } 3844 3845 3846 CV_IMPL void 3847 cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags ) 3848 { 3849 cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr); 3850 CV_Assert( src.size == dst.size && src.type() == dst.type() ); 3851 int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) | 3852 ((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0); 3853 cv::dct( src, dst, _flags ); 3854 } 3855 3856 3857 CV_IMPL int 3858 cvGetOptimalDFTSize( int size0 ) 3859 { 3860 return cv::getOptimalDFTSize(size0); 3861 } 3862 3863 /* End of file. */ 3864