Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "precomp.hpp"
     43 #include "opencv2/core/opencl/runtime/opencl_clamdfft.hpp"
     44 #include "opencv2/core/opencl/runtime/opencl_core.hpp"
     45 #include "opencl_kernels_core.hpp"
     46 #include <map>
     47 
     48 namespace cv
     49 {
     50 
     51 // On Win64 optimized versions of DFT and DCT fail the tests (fixed in VS2010)
     52 #if defined _MSC_VER && !defined CV_ICC && defined _M_X64 && _MSC_VER < 1600
     53 # pragma optimize("", off)
     54 # pragma warning(disable: 4748)
     55 #endif
     56 
     57 #if IPP_VERSION_X100 >= 701
     58 #define USE_IPP_DFT 1
     59 #else
     60 #undef USE_IPP_DFT
     61 #endif
     62 
     63 /****************************************************************************************\
     64                                Discrete Fourier Transform
     65 \****************************************************************************************/
     66 
     67 #define CV_MAX_LOCAL_DFT_SIZE  (1 << 15)
     68 
     69 static unsigned char bitrevTab[] =
     70 {
     71   0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
     72   0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
     73   0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
     74   0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
     75   0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
     76   0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
     77   0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
     78   0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
     79   0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
     80   0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
     81   0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
     82   0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
     83   0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
     84   0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
     85   0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
     86   0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
     87 };
     88 
     89 static const double DFTTab[][2] =
     90 {
     91 { 1.00000000000000000, 0.00000000000000000 },
     92 {-1.00000000000000000, 0.00000000000000000 },
     93 { 0.00000000000000000, 1.00000000000000000 },
     94 { 0.70710678118654757, 0.70710678118654746 },
     95 { 0.92387953251128674, 0.38268343236508978 },
     96 { 0.98078528040323043, 0.19509032201612825 },
     97 { 0.99518472667219693, 0.09801714032956060 },
     98 { 0.99879545620517241, 0.04906767432741802 },
     99 { 0.99969881869620425, 0.02454122852291229 },
    100 { 0.99992470183914450, 0.01227153828571993 },
    101 { 0.99998117528260111, 0.00613588464915448 },
    102 { 0.99999529380957619, 0.00306795676296598 },
    103 { 0.99999882345170188, 0.00153398018628477 },
    104 { 0.99999970586288223, 0.00076699031874270 },
    105 { 0.99999992646571789, 0.00038349518757140 },
    106 { 0.99999998161642933, 0.00019174759731070 },
    107 { 0.99999999540410733, 0.00009587379909598 },
    108 { 0.99999999885102686, 0.00004793689960307 },
    109 { 0.99999999971275666, 0.00002396844980842 },
    110 { 0.99999999992818922, 0.00001198422490507 },
    111 { 0.99999999998204725, 0.00000599211245264 },
    112 { 0.99999999999551181, 0.00000299605622633 },
    113 { 0.99999999999887801, 0.00000149802811317 },
    114 { 0.99999999999971945, 0.00000074901405658 },
    115 { 0.99999999999992983, 0.00000037450702829 },
    116 { 0.99999999999998246, 0.00000018725351415 },
    117 { 0.99999999999999567, 0.00000009362675707 },
    118 { 0.99999999999999889, 0.00000004681337854 },
    119 { 0.99999999999999978, 0.00000002340668927 },
    120 { 0.99999999999999989, 0.00000001170334463 },
    121 { 1.00000000000000000, 0.00000000585167232 },
    122 { 1.00000000000000000, 0.00000000292583616 }
    123 };
    124 
    125 #define BitRev(i,shift) \
    126    ((int)((((unsigned)bitrevTab[(i)&255] << 24)+ \
    127            ((unsigned)bitrevTab[((i)>> 8)&255] << 16)+ \
    128            ((unsigned)bitrevTab[((i)>>16)&255] <<  8)+ \
    129            ((unsigned)bitrevTab[((i)>>24)])) >> (shift)))
    130 
    131 static int
    132 DFTFactorize( int n, int* factors )
    133 {
    134     int nf = 0, f, i, j;
    135 
    136     if( n <= 5 )
    137     {
    138         factors[0] = n;
    139         return 1;
    140     }
    141 
    142     f = (((n - 1)^n)+1) >> 1;
    143     if( f > 1 )
    144     {
    145         factors[nf++] = f;
    146         n = f == n ? 1 : n/f;
    147     }
    148 
    149     for( f = 3; n > 1; )
    150     {
    151         int d = n/f;
    152         if( d*f == n )
    153         {
    154             factors[nf++] = f;
    155             n = d;
    156         }
    157         else
    158         {
    159             f += 2;
    160             if( f*f > n )
    161                 break;
    162         }
    163     }
    164 
    165     if( n > 1 )
    166         factors[nf++] = n;
    167 
    168     f = (factors[0] & 1) == 0;
    169     for( i = f; i < (nf+f)/2; i++ )
    170         CV_SWAP( factors[i], factors[nf-i-1+f], j );
    171 
    172     return nf;
    173 }
    174 
    175 static void
    176 DFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
    177 {
    178     int digits[34], radix[34];
    179     int n = factors[0], m = 0;
    180     int* itab0 = itab;
    181     int i, j, k;
    182     Complex<double> w, w1;
    183     double t;
    184 
    185     if( n0 <= 5 )
    186     {
    187         itab[0] = 0;
    188         itab[n0-1] = n0-1;
    189 
    190         if( n0 != 4 )
    191         {
    192             for( i = 1; i < n0-1; i++ )
    193                 itab[i] = i;
    194         }
    195         else
    196         {
    197             itab[1] = 2;
    198             itab[2] = 1;
    199         }
    200         if( n0 == 5 )
    201         {
    202             if( elem_size == sizeof(Complex<double>) )
    203                 ((Complex<double>*)_wave)[0] = Complex<double>(1.,0.);
    204             else
    205                 ((Complex<float>*)_wave)[0] = Complex<float>(1.f,0.f);
    206         }
    207         if( n0 != 4 )
    208             return;
    209         m = 2;
    210     }
    211     else
    212     {
    213         // radix[] is initialized from index 'nf' down to zero
    214         assert (nf < 34);
    215         radix[nf] = 1;
    216         digits[nf] = 0;
    217         for( i = 0; i < nf; i++ )
    218         {
    219             digits[i] = 0;
    220             radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
    221         }
    222 
    223         if( inv_itab && factors[0] != factors[nf-1] )
    224             itab = (int*)_wave;
    225 
    226         if( (n & 1) == 0 )
    227         {
    228             int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
    229             for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
    230                 ;
    231             if( n <= 2  )
    232             {
    233                 itab[0] = 0;
    234                 itab[1] = na2;
    235             }
    236             else if( n <= 256 )
    237             {
    238                 int shift = 10 - m;
    239                 for( i = 0; i <= n - 4; i += 4 )
    240                 {
    241                     j = (bitrevTab[i>>2]>>shift)*a;
    242                     itab[i] = j;
    243                     itab[i+1] = j + na2;
    244                     itab[i+2] = j + na4;
    245                     itab[i+3] = j + na2 + na4;
    246                 }
    247             }
    248             else
    249             {
    250                 int shift = 34 - m;
    251                 for( i = 0; i < n; i += 4 )
    252                 {
    253                     int i4 = i >> 2;
    254                     j = BitRev(i4,shift)*a;
    255                     itab[i] = j;
    256                     itab[i+1] = j + na2;
    257                     itab[i+2] = j + na4;
    258                     itab[i+3] = j + na2 + na4;
    259                 }
    260             }
    261 
    262             digits[1]++;
    263 
    264             if( nf >= 2 )
    265             {
    266                 for( i = n, j = radix[2]; i < n0; )
    267                 {
    268                     for( k = 0; k < n; k++ )
    269                         itab[i+k] = itab[k] + j;
    270                     if( (i += n) >= n0 )
    271                         break;
    272                     j += radix[2];
    273                     for( k = 1; ++digits[k] >= factors[k]; k++ )
    274                     {
    275                         digits[k] = 0;
    276                         j += radix[k+2] - radix[k];
    277                     }
    278                 }
    279             }
    280         }
    281         else
    282         {
    283             for( i = 0, j = 0;; )
    284             {
    285                 itab[i] = j;
    286                 if( ++i >= n0 )
    287                     break;
    288                 j += radix[1];
    289                 for( k = 0; ++digits[k] >= factors[k]; k++ )
    290                 {
    291                     digits[k] = 0;
    292                     j += radix[k+2] - radix[k];
    293                 }
    294             }
    295         }
    296 
    297         if( itab != itab0 )
    298         {
    299             itab0[0] = 0;
    300             for( i = n0 & 1; i < n0; i += 2 )
    301             {
    302                 int k0 = itab[i];
    303                 int k1 = itab[i+1];
    304                 itab0[k0] = i;
    305                 itab0[k1] = i+1;
    306             }
    307         }
    308     }
    309 
    310     if( (n0 & (n0-1)) == 0 )
    311     {
    312         w.re = w1.re = DFTTab[m][0];
    313         w.im = w1.im = -DFTTab[m][1];
    314     }
    315     else
    316     {
    317         t = -CV_PI*2/n0;
    318         w.im = w1.im = sin(t);
    319         w.re = w1.re = std::sqrt(1. - w1.im*w1.im);
    320     }
    321     n = (n0+1)/2;
    322 
    323     if( elem_size == sizeof(Complex<double>) )
    324     {
    325         Complex<double>* wave = (Complex<double>*)_wave;
    326 
    327         wave[0].re = 1.;
    328         wave[0].im = 0.;
    329 
    330         if( (n0 & 1) == 0 )
    331         {
    332             wave[n].re = -1.;
    333             wave[n].im = 0;
    334         }
    335 
    336         for( i = 1; i < n; i++ )
    337         {
    338             wave[i] = w;
    339             wave[n0-i].re = w.re;
    340             wave[n0-i].im = -w.im;
    341 
    342             t = w.re*w1.re - w.im*w1.im;
    343             w.im = w.re*w1.im + w.im*w1.re;
    344             w.re = t;
    345         }
    346     }
    347     else
    348     {
    349         Complex<float>* wave = (Complex<float>*)_wave;
    350         assert( elem_size == sizeof(Complex<float>) );
    351 
    352         wave[0].re = 1.f;
    353         wave[0].im = 0.f;
    354 
    355         if( (n0 & 1) == 0 )
    356         {
    357             wave[n].re = -1.f;
    358             wave[n].im = 0.f;
    359         }
    360 
    361         for( i = 1; i < n; i++ )
    362         {
    363             wave[i].re = (float)w.re;
    364             wave[i].im = (float)w.im;
    365             wave[n0-i].re = (float)w.re;
    366             wave[n0-i].im = (float)-w.im;
    367 
    368             t = w.re*w1.re - w.im*w1.im;
    369             w.im = w.re*w1.im + w.im*w1.re;
    370             w.re = t;
    371         }
    372     }
    373 }
    374 
    375 template<typename T> struct DFT_VecR4
    376 {
    377     int operator()(Complex<T>*, int, int, int&, const Complex<T>*) const { return 1; }
    378 };
    379 
    380 #if CV_SSE3
    381 
    382 // optimized radix-4 transform
    383 template<> struct DFT_VecR4<float>
    384 {
    385     int operator()(Complex<float>* dst, int N, int n0, int& _dw0, const Complex<float>* wave) const
    386     {
    387         int n = 1, i, j, nx, dw, dw0 = _dw0;
    388         __m128 z = _mm_setzero_ps(), x02=z, x13=z, w01=z, w23=z, y01, y23, t0, t1;
    389         Cv32suf t; t.i = 0x80000000;
    390         __m128 neg0_mask = _mm_load_ss(&t.f);
    391         __m128 neg3_mask = _mm_shuffle_ps(neg0_mask, neg0_mask, _MM_SHUFFLE(0,1,2,3));
    392 
    393         for( ; n*4 <= N; )
    394         {
    395             nx = n;
    396             n *= 4;
    397             dw0 /= 4;
    398 
    399             for( i = 0; i < n0; i += n )
    400             {
    401                 Complexf *v0, *v1;
    402 
    403                 v0 = dst + i;
    404                 v1 = v0 + nx*2;
    405 
    406                 x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
    407                 x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
    408                 x02 = _mm_loadh_pi(x02, (const __m64*)&v1[0]);
    409                 x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]);
    410 
    411                 y01 = _mm_add_ps(x02, x13);
    412                 y23 = _mm_sub_ps(x02, x13);
    413                 t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
    414                 t0 = _mm_movelh_ps(y01, y23);
    415                 y01 = _mm_add_ps(t0, t1);
    416                 y23 = _mm_sub_ps(t0, t1);
    417 
    418                 _mm_storel_pi((__m64*)&v0[0], y01);
    419                 _mm_storeh_pi((__m64*)&v0[nx], y01);
    420                 _mm_storel_pi((__m64*)&v1[0], y23);
    421                 _mm_storeh_pi((__m64*)&v1[nx], y23);
    422 
    423                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
    424                 {
    425                     v0 = dst + i + j;
    426                     v1 = v0 + nx*2;
    427 
    428                     x13 = _mm_loadl_pi(x13, (const __m64*)&v0[nx]);
    429                     w23 = _mm_loadl_pi(w23, (const __m64*)&wave[dw*2]);
    430                     x13 = _mm_loadh_pi(x13, (const __m64*)&v1[nx]); // x1, x3 = r1 i1 r3 i3
    431                     w23 = _mm_loadh_pi(w23, (const __m64*)&wave[dw*3]); // w2, w3 = wr2 wi2 wr3 wi3
    432 
    433                     t0 = _mm_mul_ps(_mm_moveldup_ps(x13), w23);
    434                     t1 = _mm_mul_ps(_mm_movehdup_ps(x13), _mm_shuffle_ps(w23, w23, _MM_SHUFFLE(2,3,0,1)));
    435                     x13 = _mm_addsub_ps(t0, t1);
    436                     // re(x1*w2), im(x1*w2), re(x3*w3), im(x3*w3)
    437                     x02 = _mm_loadl_pi(x02, (const __m64*)&v1[0]); // x2 = r2 i2
    438                     w01 = _mm_loadl_pi(w01, (const __m64*)&wave[dw]); // w1 = wr1 wi1
    439                     x02 = _mm_shuffle_ps(x02, x02, _MM_SHUFFLE(0,0,1,1));
    440                     w01 = _mm_shuffle_ps(w01, w01, _MM_SHUFFLE(1,0,0,1));
    441                     x02 = _mm_mul_ps(x02, w01);
    442                     x02 = _mm_addsub_ps(x02, _mm_movelh_ps(x02, x02));
    443                     // re(x0) im(x0) re(x2*w1), im(x2*w1)
    444                     x02 = _mm_loadl_pi(x02, (const __m64*)&v0[0]);
    445 
    446                     y01 = _mm_add_ps(x02, x13);
    447                     y23 = _mm_sub_ps(x02, x13);
    448                     t1 = _mm_xor_ps(_mm_shuffle_ps(y01, y23, _MM_SHUFFLE(2,3,3,2)), neg3_mask);
    449                     t0 = _mm_movelh_ps(y01, y23);
    450                     y01 = _mm_add_ps(t0, t1);
    451                     y23 = _mm_sub_ps(t0, t1);
    452 
    453                     _mm_storel_pi((__m64*)&v0[0], y01);
    454                     _mm_storeh_pi((__m64*)&v0[nx], y01);
    455                     _mm_storel_pi((__m64*)&v1[0], y23);
    456                     _mm_storeh_pi((__m64*)&v1[nx], y23);
    457                 }
    458             }
    459         }
    460 
    461         _dw0 = dw0;
    462         return n;
    463     }
    464 };
    465 
    466 #endif
    467 
    468 #ifdef USE_IPP_DFT
    469 static IppStatus ippsDFTFwd_CToC( const Complex<float>* src, Complex<float>* dst,
    470                              const void* spec, uchar* buf)
    471 {
    472     return ippsDFTFwd_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst,
    473                                  (const IppsDFTSpec_C_32fc*)spec, buf);
    474 }
    475 
    476 static IppStatus ippsDFTFwd_CToC( const Complex<double>* src, Complex<double>* dst,
    477                              const void* spec, uchar* buf)
    478 {
    479     return ippsDFTFwd_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst,
    480                                  (const IppsDFTSpec_C_64fc*)spec, buf);
    481 }
    482 
    483 static IppStatus ippsDFTInv_CToC( const Complex<float>* src, Complex<float>* dst,
    484                              const void* spec, uchar* buf)
    485 {
    486     return ippsDFTInv_CToC_32fc( (const Ipp32fc*)src, (Ipp32fc*)dst,
    487                                  (const IppsDFTSpec_C_32fc*)spec, buf);
    488 }
    489 
    490 static IppStatus ippsDFTInv_CToC( const Complex<double>* src, Complex<double>* dst,
    491                                   const void* spec, uchar* buf)
    492 {
    493     return ippsDFTInv_CToC_64fc( (const Ipp64fc*)src, (Ipp64fc*)dst,
    494                                  (const IppsDFTSpec_C_64fc*)spec, buf);
    495 }
    496 
    497 static IppStatus ippsDFTFwd_RToPack( const float* src, float* dst,
    498                                      const void* spec, uchar* buf)
    499 {
    500     return ippsDFTFwd_RToPack_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
    501 }
    502 
    503 static IppStatus ippsDFTFwd_RToPack( const double* src, double* dst,
    504                                      const void* spec, uchar* buf)
    505 {
    506     return ippsDFTFwd_RToPack_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
    507 }
    508 
    509 static IppStatus ippsDFTInv_PackToR( const float* src, float* dst,
    510                                      const void* spec, uchar* buf)
    511 {
    512     return ippsDFTInv_PackToR_32f( src, dst, (const IppsDFTSpec_R_32f*)spec, buf);
    513 }
    514 
    515 static IppStatus ippsDFTInv_PackToR( const double* src, double* dst,
    516                                      const void* spec, uchar* buf)
    517 {
    518     return ippsDFTInv_PackToR_64f( src, dst, (const IppsDFTSpec_R_64f*)spec, buf);
    519 }
    520 #endif
    521 
    522 enum { DFT_NO_PERMUTE=256, DFT_COMPLEX_INPUT_OR_OUTPUT=512 };
    523 
    524 // mixed-radix complex discrete Fourier transform: double-precision version
    525 template<typename T> static void
    526 DFT( const Complex<T>* src, Complex<T>* dst, int n,
    527      int nf, const int* factors, const int* itab,
    528      const Complex<T>* wave, int tab_size,
    529      const void*
    530 #ifdef USE_IPP_DFT
    531      spec
    532 #endif
    533      , Complex<T>* buf,
    534      int flags, double _scale )
    535 {
    536     static const T sin_120 = (T)0.86602540378443864676372317075294;
    537     static const T fft5_2 = (T)0.559016994374947424102293417182819;
    538     static const T fft5_3 = (T)-0.951056516295153572116439333379382;
    539     static const T fft5_4 = (T)-1.538841768587626701285145288018455;
    540     static const T fft5_5 = (T)0.363271264002680442947733378740309;
    541 
    542     int n0 = n, f_idx, nx;
    543     int inv = flags & DFT_INVERSE;
    544     int dw0 = tab_size, dw;
    545     int i, j, k;
    546     Complex<T> t;
    547     T scale = (T)_scale;
    548     int tab_step;
    549 
    550 #ifdef USE_IPP_DFT
    551     if( spec )
    552     {
    553         if( !inv )
    554         {
    555             if (ippsDFTFwd_CToC( src, dst, spec, (uchar*)buf ) >= 0)
    556             {
    557                 CV_IMPL_ADD(CV_IMPL_IPP);
    558                 return;
    559             }
    560         }
    561         else
    562         {
    563             if (ippsDFTInv_CToC( src, dst, spec, (uchar*)buf ) >= 0)
    564             {
    565                 CV_IMPL_ADD(CV_IMPL_IPP);
    566                 return;
    567             }
    568         }
    569         setIppErrorStatus();
    570     }
    571 #endif
    572 
    573     tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n;
    574 
    575     // 0. shuffle data
    576     if( dst != src )
    577     {
    578         assert( (flags & DFT_NO_PERMUTE) == 0 );
    579         if( !inv )
    580         {
    581             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
    582             {
    583                 int k0 = itab[0], k1 = itab[tab_step];
    584                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
    585                 dst[i] = src[k0]; dst[i+1] = src[k1];
    586             }
    587 
    588             if( i < n )
    589                 dst[n-1] = src[n-1];
    590         }
    591         else
    592         {
    593             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
    594             {
    595                 int k0 = itab[0], k1 = itab[tab_step];
    596                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
    597                 t.re = src[k0].re; t.im = -src[k0].im;
    598                 dst[i] = t;
    599                 t.re = src[k1].re; t.im = -src[k1].im;
    600                 dst[i+1] = t;
    601             }
    602 
    603             if( i < n )
    604             {
    605                 t.re = src[n-1].re; t.im = -src[n-1].im;
    606                 dst[i] = t;
    607             }
    608         }
    609     }
    610     else
    611     {
    612         if( (flags & DFT_NO_PERMUTE) == 0 )
    613         {
    614             CV_Assert( factors[0] == factors[nf-1] );
    615             if( nf == 1 )
    616             {
    617                 if( (n & 3) == 0 )
    618                 {
    619                     int n2 = n/2;
    620                     Complex<T>* dsth = dst + n2;
    621 
    622                     for( i = 0; i < n2; i += 2, itab += tab_step*2 )
    623                     {
    624                         j = itab[0];
    625                         assert( (unsigned)j < (unsigned)n2 );
    626 
    627                         CV_SWAP(dst[i+1], dsth[j], t);
    628                         if( j > i )
    629                         {
    630                             CV_SWAP(dst[i], dst[j], t);
    631                             CV_SWAP(dsth[i+1], dsth[j+1], t);
    632                         }
    633                     }
    634                 }
    635                 // else do nothing
    636             }
    637             else
    638             {
    639                 for( i = 0; i < n; i++, itab += tab_step )
    640                 {
    641                     j = itab[0];
    642                     assert( (unsigned)j < (unsigned)n );
    643                     if( j > i )
    644                         CV_SWAP(dst[i], dst[j], t);
    645                 }
    646             }
    647         }
    648 
    649         if( inv )
    650         {
    651             for( i = 0; i <= n - 2; i += 2 )
    652             {
    653                 T t0 = -dst[i].im;
    654                 T t1 = -dst[i+1].im;
    655                 dst[i].im = t0; dst[i+1].im = t1;
    656             }
    657 
    658             if( i < n )
    659                 dst[n-1].im = -dst[n-1].im;
    660         }
    661     }
    662 
    663     n = 1;
    664     // 1. power-2 transforms
    665     if( (factors[0] & 1) == 0 )
    666     {
    667         if( factors[0] >= 4 && checkHardwareSupport(CV_CPU_SSE3))
    668         {
    669             DFT_VecR4<T> vr4;
    670             n = vr4(dst, factors[0], n0, dw0, wave);
    671         }
    672 
    673         // radix-4 transform
    674         for( ; n*4 <= factors[0]; )
    675         {
    676             nx = n;
    677             n *= 4;
    678             dw0 /= 4;
    679 
    680             for( i = 0; i < n0; i += n )
    681             {
    682                 Complex<T> *v0, *v1;
    683                 T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
    684 
    685                 v0 = dst + i;
    686                 v1 = v0 + nx*2;
    687 
    688                 r0 = v1[0].re; i0 = v1[0].im;
    689                 r4 = v1[nx].re; i4 = v1[nx].im;
    690 
    691                 r1 = r0 + r4; i1 = i0 + i4;
    692                 r3 = i0 - i4; i3 = r4 - r0;
    693 
    694                 r2 = v0[0].re; i2 = v0[0].im;
    695                 r4 = v0[nx].re; i4 = v0[nx].im;
    696 
    697                 r0 = r2 + r4; i0 = i2 + i4;
    698                 r2 -= r4; i2 -= i4;
    699 
    700                 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
    701                 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
    702                 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
    703                 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
    704 
    705                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
    706                 {
    707                     v0 = dst + i + j;
    708                     v1 = v0 + nx*2;
    709 
    710                     r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
    711                     i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
    712                     r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
    713                     i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
    714                     r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
    715                     i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
    716 
    717                     r1 = i0 + i3; i1 = r0 + r3;
    718                     r3 = r0 - r3; i3 = i3 - i0;
    719                     r4 = v0[0].re; i4 = v0[0].im;
    720 
    721                     r0 = r4 + r2; i0 = i4 + i2;
    722                     r2 = r4 - r2; i2 = i4 - i2;
    723 
    724                     v0[0].re = r0 + r1; v0[0].im = i0 + i1;
    725                     v1[0].re = r0 - r1; v1[0].im = i0 - i1;
    726                     v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
    727                     v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
    728                 }
    729             }
    730         }
    731 
    732         for( ; n < factors[0]; )
    733         {
    734             // do the remaining radix-2 transform
    735             nx = n;
    736             n *= 2;
    737             dw0 /= 2;
    738 
    739             for( i = 0; i < n0; i += n )
    740             {
    741                 Complex<T>* v = dst + i;
    742                 T r0 = v[0].re + v[nx].re;
    743                 T i0 = v[0].im + v[nx].im;
    744                 T r1 = v[0].re - v[nx].re;
    745                 T i1 = v[0].im - v[nx].im;
    746                 v[0].re = r0; v[0].im = i0;
    747                 v[nx].re = r1; v[nx].im = i1;
    748 
    749                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
    750                 {
    751                     v = dst + i + j;
    752                     r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
    753                     i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
    754                     r0 = v[0].re; i0 = v[0].im;
    755 
    756                     v[0].re = r0 + r1; v[0].im = i0 + i1;
    757                     v[nx].re = r0 - r1; v[nx].im = i0 - i1;
    758                 }
    759             }
    760         }
    761     }
    762 
    763     // 2. all the other transforms
    764     for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ )
    765     {
    766         int factor = factors[f_idx];
    767         nx = n;
    768         n *= factor;
    769         dw0 /= factor;
    770 
    771         if( factor == 3 )
    772         {
    773             // radix-3
    774             for( i = 0; i < n0; i += n )
    775             {
    776                 Complex<T>* v = dst + i;
    777 
    778                 T r1 = v[nx].re + v[nx*2].re;
    779                 T i1 = v[nx].im + v[nx*2].im;
    780                 T r0 = v[0].re;
    781                 T i0 = v[0].im;
    782                 T r2 = sin_120*(v[nx].im - v[nx*2].im);
    783                 T i2 = sin_120*(v[nx*2].re - v[nx].re);
    784                 v[0].re = r0 + r1; v[0].im = i0 + i1;
    785                 r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
    786                 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
    787                 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
    788 
    789                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
    790                 {
    791                     v = dst + i + j;
    792                     r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
    793                     i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
    794                     i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
    795                     r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
    796                     r1 = r0 + i2; i1 = i0 + r2;
    797 
    798                     r2 = sin_120*(i0 - r2); i2 = sin_120*(i2 - r0);
    799                     r0 = v[0].re; i0 = v[0].im;
    800                     v[0].re = r0 + r1; v[0].im = i0 + i1;
    801                     r0 -= (T)0.5*r1; i0 -= (T)0.5*i1;
    802                     v[nx].re = r0 + r2; v[nx].im = i0 + i2;
    803                     v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
    804                 }
    805             }
    806         }
    807         else if( factor == 5 )
    808         {
    809             // radix-5
    810             for( i = 0; i < n0; i += n )
    811             {
    812                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
    813                 {
    814                     Complex<T>* v0 = dst + i + j;
    815                     Complex<T>* v1 = v0 + nx*2;
    816                     Complex<T>* v2 = v1 + nx*2;
    817 
    818                     T r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
    819 
    820                     r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
    821                     i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
    822                     r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
    823                     i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
    824 
    825                     r1 = r3 + r2; i1 = i3 + i2;
    826                     r3 -= r2; i3 -= i2;
    827 
    828                     r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
    829                     i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
    830                     r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
    831                     i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
    832 
    833                     r2 = r4 + r0; i2 = i4 + i0;
    834                     r4 -= r0; i4 -= i0;
    835 
    836                     r0 = v0[0].re; i0 = v0[0].im;
    837                     r5 = r1 + r2; i5 = i1 + i2;
    838 
    839                     v0[0].re = r0 + r5; v0[0].im = i0 + i5;
    840 
    841                     r0 -= (T)0.25*r5; i0 -= (T)0.25*i5;
    842                     r1 = fft5_2*(r1 - r2); i1 = fft5_2*(i1 - i2);
    843                     r2 = -fft5_3*(i3 + i4); i2 = fft5_3*(r3 + r4);
    844 
    845                     i3 *= -fft5_5; r3 *= fft5_5;
    846                     i4 *= -fft5_4; r4 *= fft5_4;
    847 
    848                     r5 = r2 + i3; i5 = i2 + r3;
    849                     r2 -= i4; i2 -= r4;
    850 
    851                     r3 = r0 + r1; i3 = i0 + i1;
    852                     r0 -= r1; i0 -= i1;
    853 
    854                     v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
    855                     v2[0].re = r3 - r2; v2[0].im = i3 - i2;
    856 
    857                     v1[0].re = r0 + r5; v1[0].im = i0 + i5;
    858                     v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
    859                 }
    860             }
    861         }
    862         else
    863         {
    864             // radix-"factor" - an odd number
    865             int p, q, factor2 = (factor - 1)/2;
    866             int d, dd, dw_f = tab_size/factor;
    867             Complex<T>* a = buf;
    868             Complex<T>* b = buf + factor2;
    869 
    870             for( i = 0; i < n0; i += n )
    871             {
    872                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
    873                 {
    874                     Complex<T>* v = dst + i + j;
    875                     Complex<T> v_0 = v[0];
    876                     Complex<T> vn_0 = v_0;
    877 
    878                     if( j == 0 )
    879                     {
    880                         for( p = 1, k = nx; p <= factor2; p++, k += nx )
    881                         {
    882                             T r0 = v[k].re + v[n-k].re;
    883                             T i0 = v[k].im - v[n-k].im;
    884                             T r1 = v[k].re - v[n-k].re;
    885                             T i1 = v[k].im + v[n-k].im;
    886 
    887                             vn_0.re += r0; vn_0.im += i1;
    888                             a[p-1].re = r0; a[p-1].im = i0;
    889                             b[p-1].re = r1; b[p-1].im = i1;
    890                         }
    891                     }
    892                     else
    893                     {
    894                         const Complex<T>* wave_ = wave + dw*factor;
    895                         d = dw;
    896 
    897                         for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
    898                         {
    899                             T r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
    900                             T i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
    901 
    902                             T r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
    903                             T i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
    904 
    905                             T r0 = r2 + r1;
    906                             T i0 = i2 - i1;
    907                             r1 = r2 - r1;
    908                             i1 = i2 + i1;
    909 
    910                             vn_0.re += r0; vn_0.im += i1;
    911                             a[p-1].re = r0; a[p-1].im = i0;
    912                             b[p-1].re = r1; b[p-1].im = i1;
    913                         }
    914                     }
    915 
    916                     v[0] = vn_0;
    917 
    918                     for( p = 1, k = nx; p <= factor2; p++, k += nx )
    919                     {
    920                         Complex<T> s0 = v_0, s1 = v_0;
    921                         d = dd = dw_f*p;
    922 
    923                         for( q = 0; q < factor2; q++ )
    924                         {
    925                             T r0 = wave[d].re * a[q].re;
    926                             T i0 = wave[d].im * a[q].im;
    927                             T r1 = wave[d].re * b[q].im;
    928                             T i1 = wave[d].im * b[q].re;
    929 
    930                             s1.re += r0 + i0; s0.re += r0 - i0;
    931                             s1.im += r1 - i1; s0.im += r1 + i1;
    932 
    933                             d += dd;
    934                             d -= -(d >= tab_size) & tab_size;
    935                         }
    936 
    937                         v[k] = s0;
    938                         v[n-k] = s1;
    939                     }
    940                 }
    941             }
    942         }
    943     }
    944 
    945     if( scale != 1 )
    946     {
    947         T re_scale = scale, im_scale = scale;
    948         if( inv )
    949             im_scale = -im_scale;
    950 
    951         for( i = 0; i < n0; i++ )
    952         {
    953             T t0 = dst[i].re*re_scale;
    954             T t1 = dst[i].im*im_scale;
    955             dst[i].re = t0;
    956             dst[i].im = t1;
    957         }
    958     }
    959     else if( inv )
    960     {
    961         for( i = 0; i <= n0 - 2; i += 2 )
    962         {
    963             T t0 = -dst[i].im;
    964             T t1 = -dst[i+1].im;
    965             dst[i].im = t0;
    966             dst[i+1].im = t1;
    967         }
    968 
    969         if( i < n0 )
    970             dst[n0-1].im = -dst[n0-1].im;
    971     }
    972 }
    973 
    974 
    975 /* FFT of real vector
    976    output vector format:
    977      re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
    978      re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
    979 template<typename T> static void
    980 RealDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
    981          const Complex<T>* wave, int tab_size, const void*
    982 #ifdef USE_IPP_DFT
    983          spec
    984 #endif
    985          ,
    986          Complex<T>* buf, int flags, double _scale )
    987 {
    988     int complex_output = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
    989     T scale = (T)_scale;
    990     int j, n2 = n >> 1;
    991     dst += complex_output;
    992 
    993 #ifdef USE_IPP_DFT
    994     if( spec )
    995     {
    996         if (ippsDFTFwd_RToPack( src, dst, spec, (uchar*)buf ) >=0)
    997         {
    998             if( complex_output )
    999             {
   1000                 dst[-1] = dst[0];
   1001                 dst[0] = 0;
   1002                 if( (n & 1) == 0 )
   1003                     dst[n] = 0;
   1004             }
   1005             CV_IMPL_ADD(CV_IMPL_IPP);
   1006             return;
   1007         }
   1008         setIppErrorStatus();
   1009     }
   1010 #endif
   1011     assert( tab_size == n );
   1012 
   1013     if( n == 1 )
   1014     {
   1015         dst[0] = src[0]*scale;
   1016     }
   1017     else if( n == 2 )
   1018     {
   1019         T t = (src[0] + src[1])*scale;
   1020         dst[1] = (src[0] - src[1])*scale;
   1021         dst[0] = t;
   1022     }
   1023     else if( n & 1 )
   1024     {
   1025         dst -= complex_output;
   1026         Complex<T>* _dst = (Complex<T>*)dst;
   1027         _dst[0].re = src[0]*scale;
   1028         _dst[0].im = 0;
   1029         for( j = 1; j < n; j += 2 )
   1030         {
   1031             T t0 = src[itab[j]]*scale;
   1032             T t1 = src[itab[j+1]]*scale;
   1033             _dst[j].re = t0;
   1034             _dst[j].im = 0;
   1035             _dst[j+1].re = t1;
   1036             _dst[j+1].im = 0;
   1037         }
   1038         DFT( _dst, _dst, n, nf, factors, itab, wave,
   1039              tab_size, 0, buf, DFT_NO_PERMUTE, 1 );
   1040         if( !complex_output )
   1041             dst[1] = dst[0];
   1042     }
   1043     else
   1044     {
   1045         T t0, t;
   1046         T h1_re, h1_im, h2_re, h2_im;
   1047         T scale2 = scale*(T)0.5;
   1048         factors[0] >>= 1;
   1049 
   1050         DFT( (Complex<T>*)src, (Complex<T>*)dst, n2, nf - (factors[0] == 1),
   1051              factors + (factors[0] == 1),
   1052              itab, wave, tab_size, 0, buf, 0, 1 );
   1053         factors[0] <<= 1;
   1054 
   1055         t = dst[0] - dst[1];
   1056         dst[0] = (dst[0] + dst[1])*scale;
   1057         dst[1] = t*scale;
   1058 
   1059         t0 = dst[n2];
   1060         t = dst[n-1];
   1061         dst[n-1] = dst[1];
   1062 
   1063         for( j = 2, wave++; j < n2; j += 2, wave++ )
   1064         {
   1065             /* calc odd */
   1066             h2_re = scale2*(dst[j+1] + t);
   1067             h2_im = scale2*(dst[n-j] - dst[j]);
   1068 
   1069             /* calc even */
   1070             h1_re = scale2*(dst[j] + dst[n-j]);
   1071             h1_im = scale2*(dst[j+1] - t);
   1072 
   1073             /* rotate */
   1074             t = h2_re*wave->re - h2_im*wave->im;
   1075             h2_im = h2_re*wave->im + h2_im*wave->re;
   1076             h2_re = t;
   1077             t = dst[n-j-1];
   1078 
   1079             dst[j-1] = h1_re + h2_re;
   1080             dst[n-j-1] = h1_re - h2_re;
   1081             dst[j] = h1_im + h2_im;
   1082             dst[n-j] = h2_im - h1_im;
   1083         }
   1084 
   1085         if( j <= n2 )
   1086         {
   1087             dst[n2-1] = t0*scale;
   1088             dst[n2] = -t*scale;
   1089         }
   1090     }
   1091 
   1092     if( complex_output && ((n & 1) == 0 || n == 1))
   1093     {
   1094         dst[-1] = dst[0];
   1095         dst[0] = 0;
   1096         if( n > 1 )
   1097             dst[n] = 0;
   1098     }
   1099 }
   1100 
   1101 /* Inverse FFT of complex conjugate-symmetric vector
   1102    input vector format:
   1103       re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
   1104       re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
   1105 template<typename T> static void
   1106 CCSIDFT( const T* src, T* dst, int n, int nf, int* factors, const int* itab,
   1107          const Complex<T>* wave, int tab_size,
   1108          const void*
   1109 #ifdef USE_IPP_DFT
   1110          spec
   1111 #endif
   1112          , Complex<T>* buf,
   1113          int flags, double _scale )
   1114 {
   1115     int complex_input = (flags & DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;
   1116     int j, k, n2 = (n+1) >> 1;
   1117     T scale = (T)_scale;
   1118     T save_s1 = 0.;
   1119     T t0, t1, t2, t3, t;
   1120 
   1121     assert( tab_size == n );
   1122 
   1123     if( complex_input )
   1124     {
   1125         assert( src != dst );
   1126         save_s1 = src[1];
   1127         ((T*)src)[1] = src[0];
   1128         src++;
   1129     }
   1130 #ifdef USE_IPP_DFT
   1131     if( spec )
   1132     {
   1133         if (ippsDFTInv_PackToR( src, dst, spec, (uchar*)buf ) >=0)
   1134         {
   1135             if( complex_input )
   1136                 ((T*)src)[0] = (T)save_s1;
   1137             CV_IMPL_ADD(CV_IMPL_IPP);
   1138             return;
   1139         }
   1140 
   1141         setIppErrorStatus();
   1142     }
   1143 #endif
   1144     if( n == 1 )
   1145     {
   1146         dst[0] = (T)(src[0]*scale);
   1147     }
   1148     else if( n == 2 )
   1149     {
   1150         t = (src[0] + src[1])*scale;
   1151         dst[1] = (src[0] - src[1])*scale;
   1152         dst[0] = t;
   1153     }
   1154     else if( n & 1 )
   1155     {
   1156         Complex<T>* _src = (Complex<T>*)(src-1);
   1157         Complex<T>* _dst = (Complex<T>*)dst;
   1158 
   1159         _dst[0].re = src[0];
   1160         _dst[0].im = 0;
   1161         for( j = 1; j < n2; j++ )
   1162         {
   1163             int k0 = itab[j], k1 = itab[n-j];
   1164             t0 = _src[j].re; t1 = _src[j].im;
   1165             _dst[k0].re = t0; _dst[k0].im = -t1;
   1166             _dst[k1].re = t0; _dst[k1].im = t1;
   1167         }
   1168 
   1169         DFT( _dst, _dst, n, nf, factors, itab, wave,
   1170              tab_size, 0, buf, DFT_NO_PERMUTE, 1. );
   1171         dst[0] *= scale;
   1172         for( j = 1; j < n; j += 2 )
   1173         {
   1174             t0 = dst[j*2]*scale;
   1175             t1 = dst[j*2+2]*scale;
   1176             dst[j] = t0;
   1177             dst[j+1] = t1;
   1178         }
   1179     }
   1180     else
   1181     {
   1182         int inplace = src == dst;
   1183         const Complex<T>* w = wave;
   1184 
   1185         t = src[1];
   1186         t0 = (src[0] + src[n-1]);
   1187         t1 = (src[n-1] - src[0]);
   1188         dst[0] = t0;
   1189         dst[1] = t1;
   1190 
   1191         for( j = 2, w++; j < n2; j += 2, w++ )
   1192         {
   1193             T h1_re, h1_im, h2_re, h2_im;
   1194 
   1195             h1_re = (t + src[n-j-1]);
   1196             h1_im = (src[j] - src[n-j]);
   1197 
   1198             h2_re = (t - src[n-j-1]);
   1199             h2_im = (src[j] + src[n-j]);
   1200 
   1201             t = h2_re*w->re + h2_im*w->im;
   1202             h2_im = h2_im*w->re - h2_re*w->im;
   1203             h2_re = t;
   1204 
   1205             t = src[j+1];
   1206             t0 = h1_re - h2_im;
   1207             t1 = -h1_im - h2_re;
   1208             t2 = h1_re + h2_im;
   1209             t3 = h1_im - h2_re;
   1210 
   1211             if( inplace )
   1212             {
   1213                 dst[j] = t0;
   1214                 dst[j+1] = t1;
   1215                 dst[n-j] = t2;
   1216                 dst[n-j+1]= t3;
   1217             }
   1218             else
   1219             {
   1220                 int j2 = j >> 1;
   1221                 k = itab[j2];
   1222                 dst[k] = t0;
   1223                 dst[k+1] = t1;
   1224                 k = itab[n2-j2];
   1225                 dst[k] = t2;
   1226                 dst[k+1]= t3;
   1227             }
   1228         }
   1229 
   1230         if( j <= n2 )
   1231         {
   1232             t0 = t*2;
   1233             t1 = src[n2]*2;
   1234 
   1235             if( inplace )
   1236             {
   1237                 dst[n2] = t0;
   1238                 dst[n2+1] = t1;
   1239             }
   1240             else
   1241             {
   1242                 k = itab[n2];
   1243                 dst[k*2] = t0;
   1244                 dst[k*2+1] = t1;
   1245             }
   1246         }
   1247 
   1248         factors[0] >>= 1;
   1249         DFT( (Complex<T>*)dst, (Complex<T>*)dst, n2,
   1250              nf - (factors[0] == 1),
   1251              factors + (factors[0] == 1), itab,
   1252              wave, tab_size, 0, buf,
   1253              inplace ? 0 : DFT_NO_PERMUTE, 1. );
   1254         factors[0] <<= 1;
   1255 
   1256         for( j = 0; j < n; j += 2 )
   1257         {
   1258             t0 = dst[j]*scale;
   1259             t1 = dst[j+1]*(-scale);
   1260             dst[j] = t0;
   1261             dst[j+1] = t1;
   1262         }
   1263     }
   1264     if( complex_input )
   1265         ((T*)src)[0] = (T)save_s1;
   1266 }
   1267 
   1268 static void
   1269 CopyColumn( const uchar* _src, size_t src_step,
   1270             uchar* _dst, size_t dst_step,
   1271             int len, size_t elem_size )
   1272 {
   1273     int i, t0, t1;
   1274     const int* src = (const int*)_src;
   1275     int* dst = (int*)_dst;
   1276     src_step /= sizeof(src[0]);
   1277     dst_step /= sizeof(dst[0]);
   1278 
   1279     if( elem_size == sizeof(int) )
   1280     {
   1281         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
   1282             dst[0] = src[0];
   1283     }
   1284     else if( elem_size == sizeof(int)*2 )
   1285     {
   1286         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
   1287         {
   1288             t0 = src[0]; t1 = src[1];
   1289             dst[0] = t0; dst[1] = t1;
   1290         }
   1291     }
   1292     else if( elem_size == sizeof(int)*4 )
   1293     {
   1294         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
   1295         {
   1296             t0 = src[0]; t1 = src[1];
   1297             dst[0] = t0; dst[1] = t1;
   1298             t0 = src[2]; t1 = src[3];
   1299             dst[2] = t0; dst[3] = t1;
   1300         }
   1301     }
   1302 }
   1303 
   1304 
   1305 static void
   1306 CopyFrom2Columns( const uchar* _src, size_t src_step,
   1307                   uchar* _dst0, uchar* _dst1,
   1308                   int len, size_t elem_size )
   1309 {
   1310     int i, t0, t1;
   1311     const int* src = (const int*)_src;
   1312     int* dst0 = (int*)_dst0;
   1313     int* dst1 = (int*)_dst1;
   1314     src_step /= sizeof(src[0]);
   1315 
   1316     if( elem_size == sizeof(int) )
   1317     {
   1318         for( i = 0; i < len; i++, src += src_step )
   1319         {
   1320             t0 = src[0]; t1 = src[1];
   1321             dst0[i] = t0; dst1[i] = t1;
   1322         }
   1323     }
   1324     else if( elem_size == sizeof(int)*2 )
   1325     {
   1326         for( i = 0; i < len*2; i += 2, src += src_step )
   1327         {
   1328             t0 = src[0]; t1 = src[1];
   1329             dst0[i] = t0; dst0[i+1] = t1;
   1330             t0 = src[2]; t1 = src[3];
   1331             dst1[i] = t0; dst1[i+1] = t1;
   1332         }
   1333     }
   1334     else if( elem_size == sizeof(int)*4 )
   1335     {
   1336         for( i = 0; i < len*4; i += 4, src += src_step )
   1337         {
   1338             t0 = src[0]; t1 = src[1];
   1339             dst0[i] = t0; dst0[i+1] = t1;
   1340             t0 = src[2]; t1 = src[3];
   1341             dst0[i+2] = t0; dst0[i+3] = t1;
   1342             t0 = src[4]; t1 = src[5];
   1343             dst1[i] = t0; dst1[i+1] = t1;
   1344             t0 = src[6]; t1 = src[7];
   1345             dst1[i+2] = t0; dst1[i+3] = t1;
   1346         }
   1347     }
   1348 }
   1349 
   1350 
   1351 static void
   1352 CopyTo2Columns( const uchar* _src0, const uchar* _src1,
   1353                 uchar* _dst, size_t dst_step,
   1354                 int len, size_t elem_size )
   1355 {
   1356     int i, t0, t1;
   1357     const int* src0 = (const int*)_src0;
   1358     const int* src1 = (const int*)_src1;
   1359     int* dst = (int*)_dst;
   1360     dst_step /= sizeof(dst[0]);
   1361 
   1362     if( elem_size == sizeof(int) )
   1363     {
   1364         for( i = 0; i < len; i++, dst += dst_step )
   1365         {
   1366             t0 = src0[i]; t1 = src1[i];
   1367             dst[0] = t0; dst[1] = t1;
   1368         }
   1369     }
   1370     else if( elem_size == sizeof(int)*2 )
   1371     {
   1372         for( i = 0; i < len*2; i += 2, dst += dst_step )
   1373         {
   1374             t0 = src0[i]; t1 = src0[i+1];
   1375             dst[0] = t0; dst[1] = t1;
   1376             t0 = src1[i]; t1 = src1[i+1];
   1377             dst[2] = t0; dst[3] = t1;
   1378         }
   1379     }
   1380     else if( elem_size == sizeof(int)*4 )
   1381     {
   1382         for( i = 0; i < len*4; i += 4, dst += dst_step )
   1383         {
   1384             t0 = src0[i]; t1 = src0[i+1];
   1385             dst[0] = t0; dst[1] = t1;
   1386             t0 = src0[i+2]; t1 = src0[i+3];
   1387             dst[2] = t0; dst[3] = t1;
   1388             t0 = src1[i]; t1 = src1[i+1];
   1389             dst[4] = t0; dst[5] = t1;
   1390             t0 = src1[i+2]; t1 = src1[i+3];
   1391             dst[6] = t0; dst[7] = t1;
   1392         }
   1393     }
   1394 }
   1395 
   1396 
   1397 static void
   1398 ExpandCCS( uchar* _ptr, int n, int elem_size )
   1399 {
   1400     int i;
   1401     if( elem_size == (int)sizeof(float) )
   1402     {
   1403         float* p = (float*)_ptr;
   1404         for( i = 1; i < (n+1)/2; i++ )
   1405         {
   1406             p[(n-i)*2] = p[i*2-1];
   1407             p[(n-i)*2+1] = -p[i*2];
   1408         }
   1409         if( (n & 1) == 0 )
   1410         {
   1411             p[n] = p[n-1];
   1412             p[n+1] = 0.f;
   1413             n--;
   1414         }
   1415         for( i = n-1; i > 0; i-- )
   1416             p[i+1] = p[i];
   1417         p[1] = 0.f;
   1418     }
   1419     else
   1420     {
   1421         double* p = (double*)_ptr;
   1422         for( i = 1; i < (n+1)/2; i++ )
   1423         {
   1424             p[(n-i)*2] = p[i*2-1];
   1425             p[(n-i)*2+1] = -p[i*2];
   1426         }
   1427         if( (n & 1) == 0 )
   1428         {
   1429             p[n] = p[n-1];
   1430             p[n+1] = 0.f;
   1431             n--;
   1432         }
   1433         for( i = n-1; i > 0; i-- )
   1434             p[i+1] = p[i];
   1435         p[1] = 0.f;
   1436     }
   1437 }
   1438 
   1439 
   1440 typedef void (*DFTFunc)(
   1441      const void* src, void* dst, int n, int nf, int* factors,
   1442      const int* itab, const void* wave, int tab_size,
   1443      const void* spec, void* buf, int inv, double scale );
   1444 
   1445 static void DFT_32f( const Complexf* src, Complexf* dst, int n,
   1446     int nf, const int* factors, const int* itab,
   1447     const Complexf* wave, int tab_size,
   1448     const void* spec, Complexf* buf,
   1449     int flags, double scale )
   1450 {
   1451     DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
   1452 }
   1453 
   1454 static void DFT_64f( const Complexd* src, Complexd* dst, int n,
   1455     int nf, const int* factors, const int* itab,
   1456     const Complexd* wave, int tab_size,
   1457     const void* spec, Complexd* buf,
   1458     int flags, double scale )
   1459 {
   1460     DFT(src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
   1461 }
   1462 
   1463 
   1464 static void RealDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
   1465         const int* itab,  const Complexf* wave, int tab_size, const void* spec,
   1466         Complexf* buf, int flags, double scale )
   1467 {
   1468     RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
   1469 }
   1470 
   1471 static void RealDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
   1472         const int* itab,  const Complexd* wave, int tab_size, const void* spec,
   1473         Complexd* buf, int flags, double scale )
   1474 {
   1475     RealDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
   1476 }
   1477 
   1478 static void CCSIDFT_32f( const float* src, float* dst, int n, int nf, int* factors,
   1479                          const int* itab,  const Complexf* wave, int tab_size, const void* spec,
   1480                          Complexf* buf, int flags, double scale )
   1481 {
   1482     CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
   1483 }
   1484 
   1485 static void CCSIDFT_64f( const double* src, double* dst, int n, int nf, int* factors,
   1486                          const int* itab,  const Complexd* wave, int tab_size, const void* spec,
   1487                          Complexd* buf, int flags, double scale )
   1488 {
   1489     CCSIDFT( src, dst, n, nf, factors, itab, wave, tab_size, spec, buf, flags, scale);
   1490 }
   1491 
   1492 }
   1493 
   1494 #ifdef USE_IPP_DFT
   1495 typedef IppStatus (CV_STDCALL* IppDFTGetSizeFunc)(int, int, IppHintAlgorithm, int*, int*, int*);
   1496 typedef IppStatus (CV_STDCALL* IppDFTInitFunc)(int, int, IppHintAlgorithm, void*, uchar*);
   1497 #endif
   1498 
   1499 namespace cv
   1500 {
   1501 #if defined USE_IPP_DFT
   1502 
   1503 typedef IppStatus (CV_STDCALL* ippiDFT_C_Func)(const Ipp32fc*, int, Ipp32fc*, int, const IppiDFTSpec_C_32fc*, Ipp8u*);
   1504 typedef IppStatus (CV_STDCALL* ippiDFT_R_Func)(const Ipp32f* , int, Ipp32f* , int, const IppiDFTSpec_R_32f* , Ipp8u*);
   1505 
   1506 template <typename Dft>
   1507 class Dft_C_IPPLoop_Invoker : public ParallelLoopBody
   1508 {
   1509 public:
   1510 
   1511     Dft_C_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
   1512         ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
   1513     {
   1514         *ok = true;
   1515     }
   1516 
   1517     virtual void operator()(const Range& range) const
   1518     {
   1519         IppStatus status;
   1520         Ipp8u* pBuffer = 0;
   1521         Ipp8u* pMemInit= 0;
   1522         int sizeBuffer=0;
   1523         int sizeSpec=0;
   1524         int sizeInit=0;
   1525 
   1526         IppiSize srcRoiSize = {src.cols, 1};
   1527 
   1528         status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
   1529         if ( status < 0 )
   1530         {
   1531             *ok = false;
   1532             return;
   1533         }
   1534 
   1535         IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec );
   1536 
   1537         if ( sizeInit > 0 )
   1538             pMemInit = (Ipp8u*)ippMalloc( sizeInit );
   1539 
   1540         if ( sizeBuffer > 0 )
   1541             pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
   1542 
   1543         status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
   1544 
   1545         if ( sizeInit > 0 )
   1546             ippFree( pMemInit );
   1547 
   1548         if ( status < 0 )
   1549         {
   1550             ippFree( pDFTSpec );
   1551             if ( sizeBuffer > 0 )
   1552                 ippFree( pBuffer );
   1553             *ok = false;
   1554             return;
   1555         }
   1556 
   1557         for( int i = range.start; i < range.end; ++i)
   1558             if(!ippidft(src.ptr<Ipp32fc>(i), (int)src.step,dst.ptr<Ipp32fc>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
   1559             {
   1560                 *ok = false;
   1561             }
   1562 
   1563         if ( sizeBuffer > 0 )
   1564             ippFree( pBuffer );
   1565 
   1566         ippFree( pDFTSpec );
   1567         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
   1568     }
   1569 
   1570 private:
   1571     const Mat& src;
   1572     Mat& dst;
   1573     const Dft& ippidft;
   1574     int norm_flag;
   1575     bool *ok;
   1576 
   1577     const Dft_C_IPPLoop_Invoker& operator= (const Dft_C_IPPLoop_Invoker&);
   1578 };
   1579 
   1580 template <typename Dft>
   1581 class Dft_R_IPPLoop_Invoker : public ParallelLoopBody
   1582 {
   1583 public:
   1584 
   1585     Dft_R_IPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dft& _ippidft, int _norm_flag, bool *_ok) :
   1586         ParallelLoopBody(), src(_src), dst(_dst), ippidft(_ippidft), norm_flag(_norm_flag), ok(_ok)
   1587     {
   1588         *ok = true;
   1589     }
   1590 
   1591     virtual void operator()(const Range& range) const
   1592     {
   1593         IppStatus status;
   1594         Ipp8u* pBuffer = 0;
   1595         Ipp8u* pMemInit= 0;
   1596         int sizeBuffer=0;
   1597         int sizeSpec=0;
   1598         int sizeInit=0;
   1599 
   1600         IppiSize srcRoiSize = {src.cols, 1};
   1601 
   1602         status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
   1603         if ( status < 0 )
   1604         {
   1605             *ok = false;
   1606             return;
   1607         }
   1608 
   1609         IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec );
   1610 
   1611         if ( sizeInit > 0 )
   1612             pMemInit = (Ipp8u*)ippMalloc( sizeInit );
   1613 
   1614         if ( sizeBuffer > 0 )
   1615             pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
   1616 
   1617         status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
   1618 
   1619         if ( sizeInit > 0 )
   1620             ippFree( pMemInit );
   1621 
   1622         if ( status < 0 )
   1623         {
   1624             ippFree( pDFTSpec );
   1625             if ( sizeBuffer > 0 )
   1626                 ippFree( pBuffer );
   1627             *ok = false;
   1628             return;
   1629         }
   1630 
   1631         for( int i = range.start; i < range.end; ++i)
   1632             if(!ippidft(src.ptr<float>(i), (int)src.step,dst.ptr<float>(i), (int)dst.step, pDFTSpec, (Ipp8u*)pBuffer))
   1633             {
   1634                 *ok = false;
   1635             }
   1636 
   1637         if ( sizeBuffer > 0 )
   1638             ippFree( pBuffer );
   1639 
   1640         ippFree( pDFTSpec );
   1641         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
   1642     }
   1643 
   1644 private:
   1645     const Mat& src;
   1646     Mat& dst;
   1647     const Dft& ippidft;
   1648     int norm_flag;
   1649     bool *ok;
   1650 
   1651     const Dft_R_IPPLoop_Invoker& operator= (const Dft_R_IPPLoop_Invoker&);
   1652 };
   1653 
   1654 template <typename Dft>
   1655 bool Dft_C_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
   1656 {
   1657     bool ok;
   1658     parallel_for_(Range(0, src.rows), Dft_C_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
   1659     return ok;
   1660 }
   1661 
   1662 template <typename Dft>
   1663 bool Dft_R_IPPLoop(const Mat& src, Mat& dst, const Dft& ippidft, int norm_flag)
   1664 {
   1665     bool ok;
   1666     parallel_for_(Range(0, src.rows), Dft_R_IPPLoop_Invoker<Dft>(src, dst, ippidft, norm_flag, &ok), src.total()/(double)(1<<16) );
   1667     return ok;
   1668 }
   1669 
   1670 struct IPPDFT_C_Functor
   1671 {
   1672     IPPDFT_C_Functor(ippiDFT_C_Func _func) : func(_func){}
   1673 
   1674     bool operator()(const Ipp32fc* src, int srcStep, Ipp32fc* dst, int dstStep, const IppiDFTSpec_C_32fc* pDFTSpec, Ipp8u* pBuffer) const
   1675     {
   1676         return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false;
   1677     }
   1678 private:
   1679     ippiDFT_C_Func func;
   1680 };
   1681 
   1682 struct IPPDFT_R_Functor
   1683 {
   1684     IPPDFT_R_Functor(ippiDFT_R_Func _func) : func(_func){}
   1685 
   1686     bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const IppiDFTSpec_R_32f* pDFTSpec, Ipp8u* pBuffer) const
   1687     {
   1688         return func ? func(src, srcStep, dst, dstStep, pDFTSpec, pBuffer) >= 0 : false;
   1689     }
   1690 private:
   1691     ippiDFT_R_Func func;
   1692 };
   1693 
   1694 static bool ippi_DFT_C_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
   1695 {
   1696     IppStatus status;
   1697     Ipp8u* pBuffer = 0;
   1698     Ipp8u* pMemInit= 0;
   1699     int sizeBuffer=0;
   1700     int sizeSpec=0;
   1701     int sizeInit=0;
   1702 
   1703     IppiSize srcRoiSize = {src.cols, src.rows};
   1704 
   1705     status = ippiDFTGetSize_C_32fc(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
   1706     if ( status < 0 )
   1707         return false;
   1708 
   1709     IppiDFTSpec_C_32fc* pDFTSpec = (IppiDFTSpec_C_32fc*)ippMalloc( sizeSpec );
   1710 
   1711     if ( sizeInit > 0 )
   1712         pMemInit = (Ipp8u*)ippMalloc( sizeInit );
   1713 
   1714     if ( sizeBuffer > 0 )
   1715         pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
   1716 
   1717     status = ippiDFTInit_C_32fc( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
   1718 
   1719     if ( sizeInit > 0 )
   1720         ippFree( pMemInit );
   1721 
   1722     if ( status < 0 )
   1723     {
   1724         ippFree( pDFTSpec );
   1725         if ( sizeBuffer > 0 )
   1726             ippFree( pBuffer );
   1727         return false;
   1728     }
   1729 
   1730     if (!inv)
   1731         status = ippiDFTFwd_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
   1732     else
   1733         status = ippiDFTInv_CToC_32fc_C1R( src.ptr<Ipp32fc>(), (int)src.step, dst.ptr<Ipp32fc>(), (int)dst.step, pDFTSpec, pBuffer );
   1734 
   1735     if ( sizeBuffer > 0 )
   1736         ippFree( pBuffer );
   1737 
   1738     ippFree( pDFTSpec );
   1739 
   1740     if(status >= 0)
   1741     {
   1742         CV_IMPL_ADD(CV_IMPL_IPP);
   1743         return true;
   1744     }
   1745     return false;
   1746 }
   1747 
   1748 static bool ippi_DFT_R_32F(const Mat& src, Mat& dst, bool inv, int norm_flag)
   1749 {
   1750     IppStatus status;
   1751     Ipp8u* pBuffer = 0;
   1752     Ipp8u* pMemInit= 0;
   1753     int sizeBuffer=0;
   1754     int sizeSpec=0;
   1755     int sizeInit=0;
   1756 
   1757     IppiSize srcRoiSize = {src.cols, src.rows};
   1758 
   1759     status = ippiDFTGetSize_R_32f(srcRoiSize, norm_flag, ippAlgHintNone, &sizeSpec, &sizeInit, &sizeBuffer );
   1760     if ( status < 0 )
   1761         return false;
   1762 
   1763     IppiDFTSpec_R_32f* pDFTSpec = (IppiDFTSpec_R_32f*)ippMalloc( sizeSpec );
   1764 
   1765     if ( sizeInit > 0 )
   1766         pMemInit = (Ipp8u*)ippMalloc( sizeInit );
   1767 
   1768     if ( sizeBuffer > 0 )
   1769         pBuffer = (Ipp8u*)ippMalloc( sizeBuffer );
   1770 
   1771     status = ippiDFTInit_R_32f( srcRoiSize, norm_flag, ippAlgHintNone, pDFTSpec, pMemInit );
   1772 
   1773     if ( sizeInit > 0 )
   1774         ippFree( pMemInit );
   1775 
   1776     if ( status < 0 )
   1777     {
   1778         ippFree( pDFTSpec );
   1779         if ( sizeBuffer > 0 )
   1780             ippFree( pBuffer );
   1781         return false;
   1782     }
   1783 
   1784     if (!inv)
   1785         status = ippiDFTFwd_RToPack_32f_C1R( src.ptr<float>(), (int)(src.step), dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
   1786     else
   1787         status = ippiDFTInv_PackToR_32f_C1R( src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDFTSpec, pBuffer );
   1788 
   1789     if ( sizeBuffer > 0 )
   1790         ippFree( pBuffer );
   1791 
   1792     ippFree( pDFTSpec );
   1793 
   1794     if(status >= 0)
   1795     {
   1796         CV_IMPL_ADD(CV_IMPL_IPP);
   1797         return true;
   1798     }
   1799     return false;
   1800 }
   1801 
   1802 #endif
   1803 }
   1804 
   1805 #ifdef HAVE_OPENCL
   1806 
   1807 namespace cv
   1808 {
   1809 
   1810 enum FftType
   1811 {
   1812     R2R = 0, // real to CCS in case forward transform, CCS to real otherwise
   1813     C2R = 1, // complex to real in case inverse transform
   1814     R2C = 2, // real to complex in case forward transform
   1815     C2C = 3  // complex to complex
   1816 };
   1817 
   1818 struct OCL_FftPlan
   1819 {
   1820 private:
   1821     UMat twiddles;
   1822     String buildOptions;
   1823     int thread_count;
   1824     int dft_size;
   1825     int dft_depth;
   1826     bool status;
   1827 
   1828 public:
   1829     OCL_FftPlan(int _size, int _depth) : dft_size(_size), dft_depth(_depth), status(true)
   1830     {
   1831         CV_Assert( dft_depth == CV_32F || dft_depth == CV_64F );
   1832 
   1833         int min_radix;
   1834         std::vector<int> radixes, blocks;
   1835         ocl_getRadixes(dft_size, radixes, blocks, min_radix);
   1836         thread_count = dft_size / min_radix;
   1837 
   1838         if (thread_count > (int) ocl::Device::getDefault().maxWorkGroupSize())
   1839         {
   1840             status = false;
   1841             return;
   1842         }
   1843 
   1844         // generate string with radix calls
   1845         String radix_processing;
   1846         int n = 1, twiddle_size = 0;
   1847         for (size_t i=0; i<radixes.size(); i++)
   1848         {
   1849             int radix = radixes[i], block = blocks[i];
   1850             if (block > 1)
   1851                 radix_processing += format("fft_radix%d_B%d(smem,twiddles+%d,ind,%d,%d);", radix, block, twiddle_size, n, dft_size/radix);
   1852             else
   1853                 radix_processing += format("fft_radix%d(smem,twiddles+%d,ind,%d,%d);", radix, twiddle_size, n, dft_size/radix);
   1854             twiddle_size += (radix-1)*n;
   1855             n *= radix;
   1856         }
   1857 
   1858         twiddles.create(1, twiddle_size, CV_MAKE_TYPE(dft_depth, 2));
   1859         if (dft_depth == CV_32F)
   1860             fillRadixTable<float>(twiddles, radixes);
   1861         else
   1862             fillRadixTable<double>(twiddles, radixes);
   1863 
   1864         buildOptions = format("-D LOCAL_SIZE=%d -D kercn=%d -D FT=%s -D CT=%s%s -D RADIX_PROCESS=%s",
   1865                               dft_size, min_radix, ocl::typeToStr(dft_depth), ocl::typeToStr(CV_MAKE_TYPE(dft_depth, 2)),
   1866                               dft_depth == CV_64F ? " -D DOUBLE_SUPPORT" : "", radix_processing.c_str());
   1867     }
   1868 
   1869     bool enqueueTransform(InputArray _src, OutputArray _dst, int num_dfts, int flags, int fftType, bool rows = true) const
   1870     {
   1871         if (!status)
   1872             return false;
   1873 
   1874         UMat src = _src.getUMat();
   1875         UMat dst = _dst.getUMat();
   1876 
   1877         size_t globalsize[2];
   1878         size_t localsize[2];
   1879         String kernel_name;
   1880 
   1881         bool is1d = (flags & DFT_ROWS) != 0 || num_dfts == 1;
   1882         bool inv = (flags & DFT_INVERSE) != 0;
   1883         String options = buildOptions;
   1884 
   1885         if (rows)
   1886         {
   1887             globalsize[0] = thread_count; globalsize[1] = src.rows;
   1888             localsize[0] = thread_count; localsize[1] = 1;
   1889             kernel_name = !inv ? "fft_multi_radix_rows" : "ifft_multi_radix_rows";
   1890             if ((is1d || inv) && (flags & DFT_SCALE))
   1891                 options += " -D DFT_SCALE";
   1892         }
   1893         else
   1894         {
   1895             globalsize[0] = num_dfts; globalsize[1] = thread_count;
   1896             localsize[0] = 1; localsize[1] = thread_count;
   1897             kernel_name = !inv ? "fft_multi_radix_cols" : "ifft_multi_radix_cols";
   1898             if (flags & DFT_SCALE)
   1899                 options += " -D DFT_SCALE";
   1900         }
   1901 
   1902         options += src.channels() == 1 ? " -D REAL_INPUT" : " -D COMPLEX_INPUT";
   1903         options += dst.channels() == 1 ? " -D REAL_OUTPUT" : " -D COMPLEX_OUTPUT";
   1904         options += is1d ? " -D IS_1D" : "";
   1905 
   1906         if (!inv)
   1907         {
   1908             if ((is1d && src.channels() == 1) || (rows && (fftType == R2R)))
   1909                 options += " -D NO_CONJUGATE";
   1910         }
   1911         else
   1912         {
   1913             if (rows && (fftType == C2R || fftType == R2R))
   1914                 options += " -D NO_CONJUGATE";
   1915             if (dst.cols % 2 == 0)
   1916                 options += " -D EVEN";
   1917         }
   1918 
   1919         ocl::Kernel k(kernel_name.c_str(), ocl::core::fft_oclsrc, options);
   1920         if (k.empty())
   1921             return false;
   1922 
   1923         k.args(ocl::KernelArg::ReadOnly(src), ocl::KernelArg::WriteOnly(dst), ocl::KernelArg::ReadOnlyNoSize(twiddles), thread_count, num_dfts);
   1924         return k.run(2, globalsize, localsize, false);
   1925     }
   1926 
   1927 private:
   1928     static void ocl_getRadixes(int cols, std::vector<int>& radixes, std::vector<int>& blocks, int& min_radix)
   1929     {
   1930         int factors[34];
   1931         int nf = DFTFactorize(cols, factors);
   1932 
   1933         int n = 1;
   1934         int factor_index = 0;
   1935         min_radix = INT_MAX;
   1936 
   1937         // 2^n transforms
   1938         if ((factors[factor_index] & 1) == 0)
   1939         {
   1940             for( ; n < factors[factor_index];)
   1941             {
   1942                 int radix = 2, block = 1;
   1943                 if (8*n <= factors[0])
   1944                     radix = 8;
   1945                 else if (4*n <= factors[0])
   1946                 {
   1947                     radix = 4;
   1948                     if (cols % 12 == 0)
   1949                         block = 3;
   1950                     else if (cols % 8 == 0)
   1951                         block = 2;
   1952                 }
   1953                 else
   1954                 {
   1955                     if (cols % 10 == 0)
   1956                         block = 5;
   1957                     else if (cols % 8 == 0)
   1958                         block = 4;
   1959                     else if (cols % 6 == 0)
   1960                         block = 3;
   1961                     else if (cols % 4 == 0)
   1962                         block = 2;
   1963                 }
   1964 
   1965                 radixes.push_back(radix);
   1966                 blocks.push_back(block);
   1967                 min_radix = min(min_radix, block*radix);
   1968                 n *= radix;
   1969             }
   1970             factor_index++;
   1971         }
   1972 
   1973         // all the other transforms
   1974         for( ; factor_index < nf; factor_index++)
   1975         {
   1976             int radix = factors[factor_index], block = 1;
   1977             if (radix == 3)
   1978             {
   1979                 if (cols % 12 == 0)
   1980                     block = 4;
   1981                 else if (cols % 9 == 0)
   1982                     block = 3;
   1983                 else if (cols % 6 == 0)
   1984                     block = 2;
   1985             }
   1986             else if (radix == 5)
   1987             {
   1988                 if (cols % 10 == 0)
   1989                     block = 2;
   1990             }
   1991             radixes.push_back(radix);
   1992             blocks.push_back(block);
   1993             min_radix = min(min_radix, block*radix);
   1994         }
   1995     }
   1996 
   1997     template <typename T>
   1998     static void fillRadixTable(UMat twiddles, const std::vector<int>& radixes)
   1999     {
   2000         Mat tw = twiddles.getMat(ACCESS_WRITE);
   2001         T* ptr = tw.ptr<T>();
   2002         int ptr_index = 0;
   2003 
   2004         int n = 1;
   2005         for (size_t i=0; i<radixes.size(); i++)
   2006         {
   2007             int radix = radixes[i];
   2008             n *= radix;
   2009 
   2010             for (int j=1; j<radix; j++)
   2011             {
   2012                 double theta = -CV_2PI*j/n;
   2013 
   2014                 for (int k=0; k<(n/radix); k++)
   2015                 {
   2016                     ptr[ptr_index++] = (T) cos(k*theta);
   2017                     ptr[ptr_index++] = (T) sin(k*theta);
   2018                 }
   2019             }
   2020         }
   2021     }
   2022 };
   2023 
   2024 class OCL_FftPlanCache
   2025 {
   2026 public:
   2027     static OCL_FftPlanCache & getInstance()
   2028     {
   2029         static OCL_FftPlanCache planCache;
   2030         return planCache;
   2031     }
   2032 
   2033     Ptr<OCL_FftPlan> getFftPlan(int dft_size, int depth)
   2034     {
   2035         int key = (dft_size << 16) | (depth & 0xFFFF);
   2036         std::map<int, Ptr<OCL_FftPlan> >::iterator f = planStorage.find(key);
   2037         if (f != planStorage.end())
   2038         {
   2039             return f->second;
   2040         }
   2041         else
   2042         {
   2043             Ptr<OCL_FftPlan> newPlan = Ptr<OCL_FftPlan>(new OCL_FftPlan(dft_size, depth));
   2044             planStorage[key] = newPlan;
   2045             return newPlan;
   2046         }
   2047     }
   2048 
   2049     ~OCL_FftPlanCache()
   2050     {
   2051         planStorage.clear();
   2052     }
   2053 
   2054 protected:
   2055     OCL_FftPlanCache() :
   2056         planStorage()
   2057     {
   2058     }
   2059     std::map<int, Ptr<OCL_FftPlan> > planStorage;
   2060 };
   2061 
   2062 static bool ocl_dft_rows(InputArray _src, OutputArray _dst, int nonzero_rows, int flags, int fftType)
   2063 {
   2064     int type = _src.type(), depth = CV_MAT_DEPTH(type);
   2065     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.cols(), depth);
   2066     return plan->enqueueTransform(_src, _dst, nonzero_rows, flags, fftType, true);
   2067 }
   2068 
   2069 static bool ocl_dft_cols(InputArray _src, OutputArray _dst, int nonzero_cols, int flags, int fftType)
   2070 {
   2071     int type = _src.type(), depth = CV_MAT_DEPTH(type);
   2072     Ptr<OCL_FftPlan> plan = OCL_FftPlanCache::getInstance().getFftPlan(_src.rows(), depth);
   2073     return plan->enqueueTransform(_src, _dst, nonzero_cols, flags, fftType, false);
   2074 }
   2075 
   2076 static bool ocl_dft(InputArray _src, OutputArray _dst, int flags, int nonzero_rows)
   2077 {
   2078     int type = _src.type(), cn = CV_MAT_CN(type), depth = CV_MAT_DEPTH(type);
   2079     Size ssize = _src.size();
   2080     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
   2081 
   2082     if ( !((cn == 1 || cn == 2) && (depth == CV_32F || (depth == CV_64F && doubleSupport))) )
   2083         return false;
   2084 
   2085     // if is not a multiplication of prime numbers { 2, 3, 5 }
   2086     if (ssize.area() != getOptimalDFTSize(ssize.area()))
   2087         return false;
   2088 
   2089     UMat src = _src.getUMat();
   2090     int complex_input = cn == 2 ? 1 : 0;
   2091     int complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
   2092     int real_input = cn == 1 ? 1 : 0;
   2093     int real_output = (flags & DFT_REAL_OUTPUT) != 0;
   2094     bool inv = (flags & DFT_INVERSE) != 0 ? 1 : 0;
   2095 
   2096     if( nonzero_rows <= 0 || nonzero_rows > _src.rows() )
   2097         nonzero_rows = _src.rows();
   2098     bool is1d = (flags & DFT_ROWS) != 0 || nonzero_rows == 1;
   2099 
   2100     // if output format is not specified
   2101     if (complex_output + real_output == 0)
   2102     {
   2103         if (real_input)
   2104             real_output = 1;
   2105         else
   2106             complex_output = 1;
   2107     }
   2108 
   2109     FftType fftType = (FftType)(complex_input << 0 | complex_output << 1);
   2110 
   2111     // Forward Complex to CCS not supported
   2112     if (fftType == C2R && !inv)
   2113         fftType = C2C;
   2114 
   2115     // Inverse CCS to Complex not supported
   2116     if (fftType == R2C && inv)
   2117         fftType = R2R;
   2118 
   2119     UMat output;
   2120     if (fftType == C2C || fftType == R2C)
   2121     {
   2122         // complex output
   2123         _dst.create(src.size(), CV_MAKETYPE(depth, 2));
   2124         output = _dst.getUMat();
   2125     }
   2126     else
   2127     {
   2128         // real output
   2129         if (is1d)
   2130         {
   2131             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
   2132             output = _dst.getUMat();
   2133         }
   2134         else
   2135         {
   2136             _dst.create(src.size(), CV_MAKETYPE(depth, 1));
   2137             output.create(src.size(), CV_MAKETYPE(depth, 2));
   2138         }
   2139     }
   2140 
   2141     if (!inv)
   2142     {
   2143         if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
   2144             return false;
   2145 
   2146         if (!is1d)
   2147         {
   2148             int nonzero_cols = fftType == R2R ? output.cols/2 + 1 : output.cols;
   2149             if (!ocl_dft_cols(output, _dst, nonzero_cols, flags, fftType))
   2150                 return false;
   2151         }
   2152     }
   2153     else
   2154     {
   2155         if (fftType == C2C)
   2156         {
   2157             // complex output
   2158             if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
   2159                 return false;
   2160 
   2161             if (!is1d)
   2162             {
   2163                 if (!ocl_dft_cols(output, output, output.cols, flags, fftType))
   2164                     return false;
   2165             }
   2166         }
   2167         else
   2168         {
   2169             if (is1d)
   2170             {
   2171                 if (!ocl_dft_rows(src, output, nonzero_rows, flags, fftType))
   2172                     return false;
   2173             }
   2174             else
   2175             {
   2176                 int nonzero_cols = src.cols/2 + 1;
   2177                 if (!ocl_dft_cols(src, output, nonzero_cols, flags, fftType))
   2178                     return false;
   2179 
   2180                 if (!ocl_dft_rows(output, _dst, nonzero_rows, flags, fftType))
   2181                     return false;
   2182             }
   2183         }
   2184     }
   2185     return true;
   2186 }
   2187 
   2188 } // namespace cv;
   2189 
   2190 #endif
   2191 
   2192 #ifdef HAVE_CLAMDFFT
   2193 
   2194 namespace cv {
   2195 
   2196 #define CLAMDDFT_Assert(func) \
   2197     { \
   2198         clAmdFftStatus s = (func); \
   2199         CV_Assert(s == CLFFT_SUCCESS); \
   2200     }
   2201 
   2202 class PlanCache
   2203 {
   2204     struct FftPlan
   2205     {
   2206         FftPlan(const Size & _dft_size, int _src_step, int _dst_step, bool _doubleFP, bool _inplace, int _flags, FftType _fftType) :
   2207             dft_size(_dft_size), src_step(_src_step), dst_step(_dst_step),
   2208             doubleFP(_doubleFP), inplace(_inplace), flags(_flags), fftType(_fftType),
   2209             context((cl_context)ocl::Context::getDefault().ptr()), plHandle(0)
   2210         {
   2211             bool dft_inverse = (flags & DFT_INVERSE) != 0;
   2212             bool dft_scale = (flags & DFT_SCALE) != 0;
   2213             bool dft_rows = (flags & DFT_ROWS) != 0;
   2214 
   2215             clAmdFftLayout inLayout = CLFFT_REAL, outLayout = CLFFT_REAL;
   2216             clAmdFftDim dim = dft_size.height == 1 || dft_rows ? CLFFT_1D : CLFFT_2D;
   2217 
   2218             size_t batchSize = dft_rows ? dft_size.height : 1;
   2219             size_t clLengthsIn[3] = { dft_size.width, dft_rows ? 1 : dft_size.height, 1 };
   2220             size_t clStridesIn[3] = { 1, 1, 1 };
   2221             size_t clStridesOut[3]  = { 1, 1, 1 };
   2222             int elemSize = doubleFP ? sizeof(double) : sizeof(float);
   2223 
   2224             switch (fftType)
   2225             {
   2226             case C2C:
   2227                 inLayout = CLFFT_COMPLEX_INTERLEAVED;
   2228                 outLayout = CLFFT_COMPLEX_INTERLEAVED;
   2229                 clStridesIn[1] = src_step / (elemSize << 1);
   2230                 clStridesOut[1] = dst_step / (elemSize << 1);
   2231                 break;
   2232             case R2C:
   2233                 inLayout = CLFFT_REAL;
   2234                 outLayout = CLFFT_HERMITIAN_INTERLEAVED;
   2235                 clStridesIn[1] = src_step / elemSize;
   2236                 clStridesOut[1] = dst_step / (elemSize << 1);
   2237                 break;
   2238             case C2R:
   2239                 inLayout = CLFFT_HERMITIAN_INTERLEAVED;
   2240                 outLayout = CLFFT_REAL;
   2241                 clStridesIn[1] = src_step / (elemSize << 1);
   2242                 clStridesOut[1] = dst_step / elemSize;
   2243                 break;
   2244             case R2R:
   2245             default:
   2246                 CV_Error(Error::StsNotImplemented, "AMD Fft does not support this type");
   2247                 break;
   2248             }
   2249 
   2250             clStridesIn[2] = dft_rows ? clStridesIn[1] : dft_size.width * clStridesIn[1];
   2251             clStridesOut[2] = dft_rows ? clStridesOut[1] : dft_size.width * clStridesOut[1];
   2252 
   2253             CLAMDDFT_Assert(clAmdFftCreateDefaultPlan(&plHandle, (cl_context)ocl::Context::getDefault().ptr(), dim, clLengthsIn))
   2254 
   2255             // setting plan properties
   2256             CLAMDDFT_Assert(clAmdFftSetPlanPrecision(plHandle, doubleFP ? CLFFT_DOUBLE : CLFFT_SINGLE));
   2257             CLAMDDFT_Assert(clAmdFftSetResultLocation(plHandle, inplace ? CLFFT_INPLACE : CLFFT_OUTOFPLACE))
   2258             CLAMDDFT_Assert(clAmdFftSetLayout(plHandle, inLayout, outLayout))
   2259             CLAMDDFT_Assert(clAmdFftSetPlanBatchSize(plHandle, batchSize))
   2260             CLAMDDFT_Assert(clAmdFftSetPlanInStride(plHandle, dim, clStridesIn))
   2261             CLAMDDFT_Assert(clAmdFftSetPlanOutStride(plHandle, dim, clStridesOut))
   2262             CLAMDDFT_Assert(clAmdFftSetPlanDistance(plHandle, clStridesIn[dim], clStridesOut[dim]))
   2263 
   2264             float scale = dft_scale ? 1.0f / (dft_rows ? dft_size.width : dft_size.area()) : 1.0f;
   2265             CLAMDDFT_Assert(clAmdFftSetPlanScale(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD, scale))
   2266 
   2267             // ready to bake
   2268             cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
   2269             CLAMDDFT_Assert(clAmdFftBakePlan(plHandle, 1, &queue, NULL, NULL))
   2270         }
   2271 
   2272         ~FftPlan()
   2273         {
   2274 //            clAmdFftDestroyPlan(&plHandle);
   2275         }
   2276 
   2277         friend class PlanCache;
   2278 
   2279     private:
   2280         Size dft_size;
   2281         int src_step, dst_step;
   2282         bool doubleFP;
   2283         bool inplace;
   2284         int flags;
   2285         FftType fftType;
   2286 
   2287         cl_context context;
   2288         clAmdFftPlanHandle plHandle;
   2289     };
   2290 
   2291 public:
   2292     static PlanCache & getInstance()
   2293     {
   2294         static PlanCache planCache;
   2295         return planCache;
   2296     }
   2297 
   2298     clAmdFftPlanHandle getPlanHandle(const Size & dft_size, int src_step, int dst_step, bool doubleFP,
   2299                                      bool inplace, int flags, FftType fftType)
   2300     {
   2301         cl_context currentContext = (cl_context)ocl::Context::getDefault().ptr();
   2302 
   2303         for (size_t i = 0, size = planStorage.size(); i < size; ++i)
   2304         {
   2305             const FftPlan * const plan = planStorage[i];
   2306 
   2307             if (plan->dft_size == dft_size &&
   2308                 plan->flags == flags &&
   2309                 plan->src_step == src_step &&
   2310                 plan->dst_step == dst_step &&
   2311                 plan->doubleFP == doubleFP &&
   2312                 plan->fftType == fftType &&
   2313                 plan->inplace == inplace)
   2314             {
   2315                 if (plan->context != currentContext)
   2316                 {
   2317                     planStorage.erase(planStorage.begin() + i);
   2318                     break;
   2319                 }
   2320 
   2321                 return plan->plHandle;
   2322             }
   2323         }
   2324 
   2325         // no baked plan is found, so let's create a new one
   2326         Ptr<FftPlan> newPlan = Ptr<FftPlan>(new FftPlan(dft_size, src_step, dst_step, doubleFP, inplace, flags, fftType));
   2327         planStorage.push_back(newPlan);
   2328 
   2329         return newPlan->plHandle;
   2330     }
   2331 
   2332     ~PlanCache()
   2333     {
   2334         planStorage.clear();
   2335     }
   2336 
   2337 protected:
   2338     PlanCache() :
   2339         planStorage()
   2340     {
   2341     }
   2342 
   2343     std::vector<Ptr<FftPlan> > planStorage;
   2344 };
   2345 
   2346 extern "C" {
   2347 
   2348 static void CL_CALLBACK oclCleanupCallback(cl_event e, cl_int, void *p)
   2349 {
   2350     UMatData * u = (UMatData *)p;
   2351 
   2352     if( u && CV_XADD(&u->urefcount, -1) == 1 )
   2353         u->currAllocator->deallocate(u);
   2354     u = 0;
   2355 
   2356     clReleaseEvent(e), e = 0;
   2357 }
   2358 
   2359 }
   2360 
   2361 static bool ocl_dft_amdfft(InputArray _src, OutputArray _dst, int flags)
   2362 {
   2363     int type = _src.type(), depth = CV_MAT_DEPTH(type), cn = CV_MAT_CN(type);
   2364     Size ssize = _src.size();
   2365 
   2366     bool doubleSupport = ocl::Device::getDefault().doubleFPConfig() > 0;
   2367     if ( (!doubleSupport && depth == CV_64F) ||
   2368          !(type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2) ||
   2369          _src.offset() != 0)
   2370         return false;
   2371 
   2372     // if is not a multiplication of prime numbers { 2, 3, 5 }
   2373     if (ssize.area() != getOptimalDFTSize(ssize.area()))
   2374         return false;
   2375 
   2376     int dst_complex_input = cn == 2 ? 1 : 0;
   2377     bool dft_inverse = (flags & DFT_INVERSE) != 0 ? 1 : 0;
   2378     int dft_complex_output = (flags & DFT_COMPLEX_OUTPUT) != 0;
   2379     bool dft_real_output = (flags & DFT_REAL_OUTPUT) != 0;
   2380 
   2381     CV_Assert(dft_complex_output + dft_real_output < 2);
   2382     FftType fftType = (FftType)(dst_complex_input << 0 | dft_complex_output << 1);
   2383 
   2384     switch (fftType)
   2385     {
   2386     case C2C:
   2387         _dst.create(ssize.height, ssize.width, CV_MAKE_TYPE(depth, 2));
   2388         break;
   2389     case R2C: // TODO implement it if possible
   2390     case C2R: // TODO implement it if possible
   2391     case R2R: // AMD Fft does not support this type
   2392     default:
   2393         return false;
   2394     }
   2395 
   2396     UMat src = _src.getUMat(), dst = _dst.getUMat();
   2397     bool inplace = src.u == dst.u;
   2398 
   2399     clAmdFftPlanHandle plHandle = PlanCache::getInstance().
   2400             getPlanHandle(ssize, (int)src.step, (int)dst.step,
   2401                           depth == CV_64F, inplace, flags, fftType);
   2402 
   2403     // get the bufferSize
   2404     size_t bufferSize = 0;
   2405     CLAMDDFT_Assert(clAmdFftGetTmpBufSize(plHandle, &bufferSize))
   2406     UMat tmpBuffer(1, (int)bufferSize, CV_8UC1);
   2407 
   2408     cl_mem srcarg = (cl_mem)src.handle(ACCESS_READ);
   2409     cl_mem dstarg = (cl_mem)dst.handle(ACCESS_RW);
   2410 
   2411     cl_command_queue queue = (cl_command_queue)ocl::Queue::getDefault().ptr();
   2412     cl_event e = 0;
   2413 
   2414     CLAMDDFT_Assert(clAmdFftEnqueueTransform(plHandle, dft_inverse ? CLFFT_BACKWARD : CLFFT_FORWARD,
   2415                                        1, &queue, 0, NULL, &e,
   2416                                        &srcarg, &dstarg, (cl_mem)tmpBuffer.handle(ACCESS_RW)))
   2417 
   2418     tmpBuffer.addref();
   2419     clSetEventCallback(e, CL_COMPLETE, oclCleanupCallback, tmpBuffer.u);
   2420     return true;
   2421 }
   2422 
   2423 #undef DFT_ASSERT
   2424 
   2425 }
   2426 
   2427 #endif // HAVE_CLAMDFFT
   2428 
   2429 namespace cv
   2430 {
   2431 static void complementComplexOutput(Mat& dst, int len, int dft_dims)
   2432 {
   2433     int i, n = dst.cols;
   2434     size_t elem_size = dst.elemSize1();
   2435     if( elem_size == sizeof(float) )
   2436     {
   2437         float* p0 = dst.ptr<float>();
   2438         size_t dstep = dst.step/sizeof(p0[0]);
   2439         for( i = 0; i < len; i++ )
   2440         {
   2441             float* p = p0 + dstep*i;
   2442             float* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
   2443 
   2444             for( int j = 1; j < (n+1)/2; j++ )
   2445             {
   2446                 p[(n-j)*2] = q[j*2];
   2447                 p[(n-j)*2+1] = -q[j*2+1];
   2448             }
   2449         }
   2450     }
   2451     else
   2452     {
   2453         double* p0 = dst.ptr<double>();
   2454         size_t dstep = dst.step/sizeof(p0[0]);
   2455         for( i = 0; i < len; i++ )
   2456         {
   2457             double* p = p0 + dstep*i;
   2458             double* q = dft_dims == 1 || i == 0 || i*2 == len ? p : p0 + dstep*(len-i);
   2459 
   2460             for( int j = 1; j < (n+1)/2; j++ )
   2461             {
   2462                 p[(n-j)*2] = q[j*2];
   2463                 p[(n-j)*2+1] = -q[j*2+1];
   2464             }
   2465         }
   2466     }
   2467 }
   2468 }
   2469 
   2470 void cv::dft( InputArray _src0, OutputArray _dst, int flags, int nonzero_rows )
   2471 {
   2472 #ifdef HAVE_CLAMDFFT
   2473     CV_OCL_RUN(ocl::haveAmdFft() && ocl::Device::getDefault().type() != ocl::Device::TYPE_CPU &&
   2474             _dst.isUMat() && _src0.dims() <= 2 && nonzero_rows == 0,
   2475                ocl_dft_amdfft(_src0, _dst, flags))
   2476 #endif
   2477 
   2478 #ifdef HAVE_OPENCL
   2479     CV_OCL_RUN(_dst.isUMat() && _src0.dims() <= 2,
   2480                ocl_dft(_src0, _dst, flags, nonzero_rows))
   2481 #endif
   2482 
   2483     static DFTFunc dft_tbl[6] =
   2484     {
   2485         (DFTFunc)DFT_32f,
   2486         (DFTFunc)RealDFT_32f,
   2487         (DFTFunc)CCSIDFT_32f,
   2488         (DFTFunc)DFT_64f,
   2489         (DFTFunc)RealDFT_64f,
   2490         (DFTFunc)CCSIDFT_64f
   2491     };
   2492     AutoBuffer<uchar> buf;
   2493     Mat src0 = _src0.getMat(), src = src0;
   2494     int prev_len = 0, stage = 0;
   2495     bool inv = (flags & DFT_INVERSE) != 0;
   2496     int nf = 0, real_transform = src.channels() == 1 || (inv && (flags & DFT_REAL_OUTPUT)!=0);
   2497     int type = src.type(), depth = src.depth();
   2498     int elem_size = (int)src.elemSize1(), complex_elem_size = elem_size*2;
   2499     int factors[34];
   2500     bool inplace_transform = false;
   2501 #ifdef USE_IPP_DFT
   2502     AutoBuffer<uchar> ippbuf;
   2503     int ipp_norm_flag = !(flags & DFT_SCALE) ? 8 : inv ? 2 : 1;
   2504 #endif
   2505 
   2506     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
   2507 
   2508     if( !inv && src.channels() == 1 && (flags & DFT_COMPLEX_OUTPUT) )
   2509         _dst.create( src.size(), CV_MAKETYPE(depth, 2) );
   2510     else if( inv && src.channels() == 2 && (flags & DFT_REAL_OUTPUT) )
   2511         _dst.create( src.size(), depth );
   2512     else
   2513         _dst.create( src.size(), type );
   2514 
   2515     Mat dst = _dst.getMat();
   2516 
   2517 #if defined USE_IPP_DFT
   2518     CV_IPP_CHECK()
   2519     {
   2520         if ((src.depth() == CV_32F) && (src.total()>(int)(1<<6)) && nonzero_rows == 0)
   2521         {
   2522             if ((flags & DFT_ROWS) == 0)
   2523             {
   2524                 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
   2525                 {
   2526                     if (ippi_DFT_C_32F(src, dst, inv, ipp_norm_flag))
   2527                     {
   2528                         CV_IMPL_ADD(CV_IMPL_IPP);
   2529                         return;
   2530                     }
   2531                     setIppErrorStatus();
   2532                 }
   2533                 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
   2534                 {
   2535                     if (ippi_DFT_R_32F(src, dst, inv, ipp_norm_flag))
   2536                     {
   2537                         CV_IMPL_ADD(CV_IMPL_IPP);
   2538                         return;
   2539                     }
   2540                     setIppErrorStatus();
   2541                 }
   2542             }
   2543             else
   2544             {
   2545                 if (src.channels() == 2 && !(inv && (flags & DFT_REAL_OUTPUT)))
   2546                 {
   2547                     ippiDFT_C_Func ippiFunc = inv ? (ippiDFT_C_Func)ippiDFTInv_CToC_32fc_C1R : (ippiDFT_C_Func)ippiDFTFwd_CToC_32fc_C1R;
   2548                     if (Dft_C_IPPLoop(src, dst, IPPDFT_C_Functor(ippiFunc),ipp_norm_flag))
   2549                     {
   2550                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
   2551                         return;
   2552                     }
   2553                     setIppErrorStatus();
   2554                 }
   2555                 if (src.channels() == 1 && (inv || !(flags & DFT_COMPLEX_OUTPUT)))
   2556                 {
   2557                     ippiDFT_R_Func ippiFunc = inv ? (ippiDFT_R_Func)ippiDFTInv_PackToR_32f_C1R : (ippiDFT_R_Func)ippiDFTFwd_RToPack_32f_C1R;
   2558                     if (Dft_R_IPPLoop(src, dst, IPPDFT_R_Functor(ippiFunc),ipp_norm_flag))
   2559                     {
   2560                         CV_IMPL_ADD(CV_IMPL_IPP|CV_IMPL_MT);
   2561                         return;
   2562                     }
   2563                     setIppErrorStatus();
   2564                 }
   2565             }
   2566         }
   2567     }
   2568 #endif
   2569 
   2570     if( !real_transform )
   2571         elem_size = complex_elem_size;
   2572 
   2573     if( src.cols == 1 && nonzero_rows > 0 )
   2574         CV_Error( CV_StsNotImplemented,
   2575         "This mode (using nonzero_rows with a single-column matrix) breaks the function's logic, so it is prohibited.\n"
   2576         "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
   2577 
   2578     // determine, which transform to do first - row-wise
   2579     // (stage 0) or column-wise (stage 1) transform
   2580     if( !(flags & DFT_ROWS) && src.rows > 1 &&
   2581         ((src.cols == 1 && (!src.isContinuous() || !dst.isContinuous())) ||
   2582          (src.cols > 1 && inv && real_transform)) )
   2583         stage = 1;
   2584 
   2585     for(;;)
   2586     {
   2587         double scale = 1;
   2588         uchar* wave = 0;
   2589         int* itab = 0;
   2590         uchar* ptr;
   2591         int i, len, count, sz = 0;
   2592         int use_buf = 0, odd_real = 0;
   2593         DFTFunc dft_func;
   2594 
   2595         if( stage == 0 ) // row-wise transform
   2596         {
   2597             len = !inv ? src.cols : dst.cols;
   2598             count = src.rows;
   2599             if( len == 1 && !(flags & DFT_ROWS) )
   2600             {
   2601                 len = !inv ? src.rows : dst.rows;
   2602                 count = 1;
   2603             }
   2604             odd_real = real_transform && (len & 1);
   2605         }
   2606         else
   2607         {
   2608             len = dst.rows;
   2609             count = !inv ? src0.cols : dst.cols;
   2610             sz = 2*len*complex_elem_size;
   2611         }
   2612 
   2613         void *spec = 0;
   2614 #ifdef USE_IPP_DFT
   2615         if( CV_IPP_CHECK_COND && (len*count >= 64) ) // use IPP DFT if available
   2616         {
   2617             int specsize=0, initsize=0, worksize=0;
   2618             IppDFTGetSizeFunc getSizeFunc = 0;
   2619             IppDFTInitFunc initFunc = 0;
   2620 
   2621             if( real_transform && stage == 0 )
   2622             {
   2623                 if( depth == CV_32F )
   2624                 {
   2625                     getSizeFunc = ippsDFTGetSize_R_32f;
   2626                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_32f;
   2627                 }
   2628                 else
   2629                 {
   2630                     getSizeFunc = ippsDFTGetSize_R_64f;
   2631                     initFunc = (IppDFTInitFunc)ippsDFTInit_R_64f;
   2632                 }
   2633             }
   2634             else
   2635             {
   2636                 if( depth == CV_32F )
   2637                 {
   2638                     getSizeFunc = ippsDFTGetSize_C_32fc;
   2639                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_32fc;
   2640                 }
   2641                 else
   2642                 {
   2643                     getSizeFunc = ippsDFTGetSize_C_64fc;
   2644                     initFunc = (IppDFTInitFunc)ippsDFTInit_C_64fc;
   2645                 }
   2646             }
   2647             if( getSizeFunc(len, ipp_norm_flag, ippAlgHintNone, &specsize, &initsize, &worksize) >= 0 )
   2648             {
   2649                 ippbuf.allocate(specsize + initsize + 64);
   2650                 spec = alignPtr(&ippbuf[0], 32);
   2651                 uchar* initbuf = alignPtr((uchar*)spec + specsize, 32);
   2652                 if( initFunc(len, ipp_norm_flag, ippAlgHintNone, spec, initbuf) < 0 )
   2653                     spec = 0;
   2654                 sz += worksize;
   2655             }
   2656             else
   2657                 setIppErrorStatus();
   2658         }
   2659         else
   2660 #endif
   2661         {
   2662             if( len != prev_len )
   2663                 nf = DFTFactorize( len, factors );
   2664 
   2665             inplace_transform = factors[0] == factors[nf-1];
   2666             sz += len*(complex_elem_size + sizeof(int));
   2667             i = nf > 1 && (factors[0] & 1) == 0;
   2668             if( (factors[i] & 1) != 0 && factors[i] > 5 )
   2669                 sz += (factors[i]+1)*complex_elem_size;
   2670 
   2671             if( (stage == 0 && ((src.data == dst.data && !inplace_transform) || odd_real)) ||
   2672                 (stage == 1 && !inplace_transform) )
   2673             {
   2674                 use_buf = 1;
   2675                 sz += len*complex_elem_size;
   2676             }
   2677         }
   2678 
   2679         ptr = (uchar*)buf;
   2680         buf.allocate( sz + 32 );
   2681         if( ptr != (uchar*)buf )
   2682             prev_len = 0; // because we release the buffer,
   2683                           // force recalculation of
   2684                           // twiddle factors and permutation table
   2685         ptr = (uchar*)buf;
   2686         if( !spec )
   2687         {
   2688             wave = ptr;
   2689             ptr += len*complex_elem_size;
   2690             itab = (int*)ptr;
   2691             ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
   2692 
   2693             if( len != prev_len || (!inplace_transform && inv && real_transform))
   2694                 DFTInit( len, nf, factors, itab, complex_elem_size,
   2695                             wave, stage == 0 && inv && real_transform );
   2696             // otherwise reuse the tables calculated on the previous stage
   2697         }
   2698 
   2699         if( stage == 0 )
   2700         {
   2701             uchar* tmp_buf = 0;
   2702             int dptr_offset = 0;
   2703             int dst_full_len = len*elem_size;
   2704             int _flags = (int)inv + (src.channels() != dst.channels() ?
   2705                          DFT_COMPLEX_INPUT_OR_OUTPUT : 0);
   2706             if( use_buf )
   2707             {
   2708                 tmp_buf = ptr;
   2709                 ptr += len*complex_elem_size;
   2710                 if( odd_real && !inv && len > 1 &&
   2711                     !(_flags & DFT_COMPLEX_INPUT_OR_OUTPUT))
   2712                     dptr_offset = elem_size;
   2713             }
   2714 
   2715             if( !inv && (_flags & DFT_COMPLEX_INPUT_OR_OUTPUT) )
   2716                 dst_full_len += (len & 1) ? elem_size : complex_elem_size;
   2717 
   2718             dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3];
   2719 
   2720             if( count > 1 && !(flags & DFT_ROWS) && (!inv || !real_transform) )
   2721                 stage = 1;
   2722             else if( flags & CV_DXT_SCALE )
   2723                 scale = 1./(len * (flags & DFT_ROWS ? 1 : count));
   2724 
   2725             if( nonzero_rows <= 0 || nonzero_rows > count )
   2726                 nonzero_rows = count;
   2727 
   2728             for( i = 0; i < nonzero_rows; i++ )
   2729             {
   2730                 const uchar* sptr = src.ptr(i);
   2731                 uchar* dptr0 = dst.ptr(i);
   2732                 uchar* dptr = dptr0;
   2733 
   2734                 if( tmp_buf )
   2735                     dptr = tmp_buf;
   2736 
   2737                 dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale );
   2738                 if( dptr != dptr0 )
   2739                     memcpy( dptr0, dptr + dptr_offset, dst_full_len );
   2740             }
   2741 
   2742             for( ; i < count; i++ )
   2743             {
   2744                 uchar* dptr0 = dst.ptr(i);
   2745                 memset( dptr0, 0, dst_full_len );
   2746             }
   2747 
   2748             if( stage != 1 )
   2749             {
   2750                 if( !inv && real_transform && dst.channels() == 2 )
   2751                     complementComplexOutput(dst, nonzero_rows, 1);
   2752                 break;
   2753             }
   2754             src = dst;
   2755         }
   2756         else
   2757         {
   2758             int a = 0, b = count;
   2759             uchar *buf0, *buf1, *dbuf0, *dbuf1;
   2760             const uchar* sptr0 = src.ptr();
   2761             uchar* dptr0 = dst.ptr();
   2762             buf0 = ptr;
   2763             ptr += len*complex_elem_size;
   2764             buf1 = ptr;
   2765             ptr += len*complex_elem_size;
   2766             dbuf0 = buf0, dbuf1 = buf1;
   2767 
   2768             if( use_buf )
   2769             {
   2770                 dbuf1 = ptr;
   2771                 dbuf0 = buf1;
   2772                 ptr += len*complex_elem_size;
   2773             }
   2774 
   2775             dft_func = dft_tbl[(depth == CV_64F)*3];
   2776 
   2777             if( real_transform && inv && src.cols > 1 )
   2778                 stage = 0;
   2779             else if( flags & CV_DXT_SCALE )
   2780                 scale = 1./(len * count);
   2781 
   2782             if( real_transform )
   2783             {
   2784                 int even;
   2785                 a = 1;
   2786                 even = (count & 1) == 0;
   2787                 b = (count+1)/2;
   2788                 if( !inv )
   2789                 {
   2790                     memset( buf0, 0, len*complex_elem_size );
   2791                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, elem_size );
   2792                     sptr0 += dst.channels()*elem_size;
   2793                     if( even )
   2794                     {
   2795                         memset( buf1, 0, len*complex_elem_size );
   2796                         CopyColumn( sptr0 + (count-2)*elem_size, src.step,
   2797                                     buf1, complex_elem_size, len, elem_size );
   2798                     }
   2799                 }
   2800                 else if( src.channels() == 1 )
   2801                 {
   2802                     CopyColumn( sptr0, src.step, buf0, elem_size, len, elem_size );
   2803                     ExpandCCS( buf0, len, elem_size );
   2804                     if( even )
   2805                     {
   2806                         CopyColumn( sptr0 + (count-1)*elem_size, src.step,
   2807                                     buf1, elem_size, len, elem_size );
   2808                         ExpandCCS( buf1, len, elem_size );
   2809                     }
   2810                     sptr0 += elem_size;
   2811                 }
   2812                 else
   2813                 {
   2814                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
   2815                     if( even )
   2816                     {
   2817                         CopyColumn( sptr0 + b*complex_elem_size, src.step,
   2818                                        buf1, complex_elem_size, len, complex_elem_size );
   2819                     }
   2820                     sptr0 += complex_elem_size;
   2821                 }
   2822 
   2823                 if( even )
   2824                     dft_func( buf1, dbuf1, len, nf, factors, itab,
   2825                               wave, len, spec, ptr, inv, scale );
   2826                 dft_func( buf0, dbuf0, len, nf, factors, itab,
   2827                           wave, len, spec, ptr, inv, scale );
   2828 
   2829                 if( dst.channels() == 1 )
   2830                 {
   2831                     if( !inv )
   2832                     {
   2833                         // copy the half of output vector to the first/last column.
   2834                         // before doing that, defgragment the vector
   2835                         memcpy( dbuf0 + elem_size, dbuf0, elem_size );
   2836                         CopyColumn( dbuf0 + elem_size, elem_size, dptr0,
   2837                                        dst.step, len, elem_size );
   2838                         if( even )
   2839                         {
   2840                             memcpy( dbuf1 + elem_size, dbuf1, elem_size );
   2841                             CopyColumn( dbuf1 + elem_size, elem_size,
   2842                                            dptr0 + (count-1)*elem_size,
   2843                                            dst.step, len, elem_size );
   2844                         }
   2845                         dptr0 += elem_size;
   2846                     }
   2847                     else
   2848                     {
   2849                         // copy the real part of the complex vector to the first/last column
   2850                         CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, elem_size );
   2851                         if( even )
   2852                             CopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
   2853                                            dst.step, len, elem_size );
   2854                         dptr0 += elem_size;
   2855                     }
   2856                 }
   2857                 else
   2858                 {
   2859                     assert( !inv );
   2860                     CopyColumn( dbuf0, complex_elem_size, dptr0,
   2861                                    dst.step, len, complex_elem_size );
   2862                     if( even )
   2863                         CopyColumn( dbuf1, complex_elem_size,
   2864                                        dptr0 + b*complex_elem_size,
   2865                                        dst.step, len, complex_elem_size );
   2866                     dptr0 += complex_elem_size;
   2867                 }
   2868             }
   2869 
   2870             for( i = a; i < b; i += 2 )
   2871             {
   2872                 if( i+1 < b )
   2873                 {
   2874                     CopyFrom2Columns( sptr0, src.step, buf0, buf1, len, complex_elem_size );
   2875                     dft_func( buf1, dbuf1, len, nf, factors, itab,
   2876                               wave, len, spec, ptr, inv, scale );
   2877                 }
   2878                 else
   2879                     CopyColumn( sptr0, src.step, buf0, complex_elem_size, len, complex_elem_size );
   2880 
   2881                 dft_func( buf0, dbuf0, len, nf, factors, itab,
   2882                           wave, len, spec, ptr, inv, scale );
   2883 
   2884                 if( i+1 < b )
   2885                     CopyTo2Columns( dbuf0, dbuf1, dptr0, dst.step, len, complex_elem_size );
   2886                 else
   2887                     CopyColumn( dbuf0, complex_elem_size, dptr0, dst.step, len, complex_elem_size );
   2888                 sptr0 += 2*complex_elem_size;
   2889                 dptr0 += 2*complex_elem_size;
   2890             }
   2891 
   2892             if( stage != 0 )
   2893             {
   2894                 if( !inv && real_transform && dst.channels() == 2 && len > 1 )
   2895                     complementComplexOutput(dst, len, 2);
   2896                 break;
   2897             }
   2898             src = dst;
   2899         }
   2900     }
   2901 }
   2902 
   2903 
   2904 void cv::idft( InputArray src, OutputArray dst, int flags, int nonzero_rows )
   2905 {
   2906     dft( src, dst, flags | DFT_INVERSE, nonzero_rows );
   2907 }
   2908 
   2909 #ifdef HAVE_OPENCL
   2910 
   2911 namespace cv {
   2912 
   2913 static bool ocl_mulSpectrums( InputArray _srcA, InputArray _srcB,
   2914                               OutputArray _dst, int flags, bool conjB )
   2915 {
   2916     int atype = _srcA.type(), btype = _srcB.type(),
   2917             rowsPerWI = ocl::Device::getDefault().isIntel() ? 4 : 1;
   2918     Size asize = _srcA.size(), bsize = _srcB.size();
   2919     CV_Assert(asize == bsize);
   2920 
   2921     if ( !(atype == CV_32FC2 && btype == CV_32FC2) || flags != 0 )
   2922         return false;
   2923 
   2924     UMat A = _srcA.getUMat(), B = _srcB.getUMat();
   2925     CV_Assert(A.size() == B.size());
   2926 
   2927     _dst.create(A.size(), atype);
   2928     UMat dst = _dst.getUMat();
   2929 
   2930     ocl::Kernel k("mulAndScaleSpectrums",
   2931                   ocl::core::mulspectrums_oclsrc,
   2932                   format("%s", conjB ? "-D CONJ" : ""));
   2933     if (k.empty())
   2934         return false;
   2935 
   2936     k.args(ocl::KernelArg::ReadOnlyNoSize(A), ocl::KernelArg::ReadOnlyNoSize(B),
   2937            ocl::KernelArg::WriteOnly(dst), rowsPerWI);
   2938 
   2939     size_t globalsize[2] = { asize.width, (asize.height + rowsPerWI - 1) / rowsPerWI };
   2940     return k.run(2, globalsize, NULL, false);
   2941 }
   2942 
   2943 }
   2944 
   2945 #endif
   2946 
   2947 void cv::mulSpectrums( InputArray _srcA, InputArray _srcB,
   2948                        OutputArray _dst, int flags, bool conjB )
   2949 {
   2950     CV_OCL_RUN(_dst.isUMat() && _srcA.dims() <= 2 && _srcB.dims() <= 2,
   2951             ocl_mulSpectrums(_srcA, _srcB, _dst, flags, conjB))
   2952 
   2953     Mat srcA = _srcA.getMat(), srcB = _srcB.getMat();
   2954     int depth = srcA.depth(), cn = srcA.channels(), type = srcA.type();
   2955     int rows = srcA.rows, cols = srcA.cols;
   2956     int j, k;
   2957 
   2958     CV_Assert( type == srcB.type() && srcA.size() == srcB.size() );
   2959     CV_Assert( type == CV_32FC1 || type == CV_32FC2 || type == CV_64FC1 || type == CV_64FC2 );
   2960 
   2961     _dst.create( srcA.rows, srcA.cols, type );
   2962     Mat dst = _dst.getMat();
   2963 
   2964     bool is_1d = (flags & DFT_ROWS) || (rows == 1 || (cols == 1 &&
   2965              srcA.isContinuous() && srcB.isContinuous() && dst.isContinuous()));
   2966 
   2967     if( is_1d && !(flags & DFT_ROWS) )
   2968         cols = cols + rows - 1, rows = 1;
   2969 
   2970     int ncols = cols*cn;
   2971     int j0 = cn == 1;
   2972     int j1 = ncols - (cols % 2 == 0 && cn == 1);
   2973 
   2974     if( depth == CV_32F )
   2975     {
   2976         const float* dataA = srcA.ptr<float>();
   2977         const float* dataB = srcB.ptr<float>();
   2978         float* dataC = dst.ptr<float>();
   2979 
   2980         size_t stepA = srcA.step/sizeof(dataA[0]);
   2981         size_t stepB = srcB.step/sizeof(dataB[0]);
   2982         size_t stepC = dst.step/sizeof(dataC[0]);
   2983 
   2984         if( !is_1d && cn == 1 )
   2985         {
   2986             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
   2987             {
   2988                 if( k == 1 )
   2989                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
   2990                 dataC[0] = dataA[0]*dataB[0];
   2991                 if( rows % 2 == 0 )
   2992                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
   2993                 if( !conjB )
   2994                     for( j = 1; j <= rows - 2; j += 2 )
   2995                     {
   2996                         double re = (double)dataA[j*stepA]*dataB[j*stepB] -
   2997                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   2998                         double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] +
   2999                                     (double)dataA[(j+1)*stepA]*dataB[j*stepB];
   3000                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
   3001                     }
   3002                 else
   3003                     for( j = 1; j <= rows - 2; j += 2 )
   3004                     {
   3005                         double re = (double)dataA[j*stepA]*dataB[j*stepB] +
   3006                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   3007                         double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] -
   3008                                     (double)dataA[j*stepA]*dataB[(j+1)*stepB];
   3009                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
   3010                     }
   3011                 if( k == 1 )
   3012                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
   3013             }
   3014         }
   3015 
   3016         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
   3017         {
   3018             if( is_1d && cn == 1 )
   3019             {
   3020                 dataC[0] = dataA[0]*dataB[0];
   3021                 if( cols % 2 == 0 )
   3022                     dataC[j1] = dataA[j1]*dataB[j1];
   3023             }
   3024 
   3025             if( !conjB )
   3026                 for( j = j0; j < j1; j += 2 )
   3027                 {
   3028                     double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1];
   3029                     double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1];
   3030                     dataC[j] = (float)re; dataC[j+1] = (float)im;
   3031                 }
   3032             else
   3033                 for( j = j0; j < j1; j += 2 )
   3034                 {
   3035                     double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1];
   3036                     double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1];
   3037                     dataC[j] = (float)re; dataC[j+1] = (float)im;
   3038                 }
   3039         }
   3040     }
   3041     else
   3042     {
   3043         const double* dataA = srcA.ptr<double>();
   3044         const double* dataB = srcB.ptr<double>();
   3045         double* dataC = dst.ptr<double>();
   3046 
   3047         size_t stepA = srcA.step/sizeof(dataA[0]);
   3048         size_t stepB = srcB.step/sizeof(dataB[0]);
   3049         size_t stepC = dst.step/sizeof(dataC[0]);
   3050 
   3051         if( !is_1d && cn == 1 )
   3052         {
   3053             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
   3054             {
   3055                 if( k == 1 )
   3056                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
   3057                 dataC[0] = dataA[0]*dataB[0];
   3058                 if( rows % 2 == 0 )
   3059                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
   3060                 if( !conjB )
   3061                     for( j = 1; j <= rows - 2; j += 2 )
   3062                     {
   3063                         double re = dataA[j*stepA]*dataB[j*stepB] -
   3064                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   3065                         double im = dataA[j*stepA]*dataB[(j+1)*stepB] +
   3066                                     dataA[(j+1)*stepA]*dataB[j*stepB];
   3067                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
   3068                     }
   3069                 else
   3070                     for( j = 1; j <= rows - 2; j += 2 )
   3071                     {
   3072                         double re = dataA[j*stepA]*dataB[j*stepB] +
   3073                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   3074                         double im = dataA[(j+1)*stepA]*dataB[j*stepB] -
   3075                                     dataA[j*stepA]*dataB[(j+1)*stepB];
   3076                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
   3077                     }
   3078                 if( k == 1 )
   3079                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
   3080             }
   3081         }
   3082 
   3083         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
   3084         {
   3085             if( is_1d && cn == 1 )
   3086             {
   3087                 dataC[0] = dataA[0]*dataB[0];
   3088                 if( cols % 2 == 0 )
   3089                     dataC[j1] = dataA[j1]*dataB[j1];
   3090             }
   3091 
   3092             if( !conjB )
   3093                 for( j = j0; j < j1; j += 2 )
   3094                 {
   3095                     double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1];
   3096                     double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1];
   3097                     dataC[j] = re; dataC[j+1] = im;
   3098                 }
   3099             else
   3100                 for( j = j0; j < j1; j += 2 )
   3101                 {
   3102                     double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1];
   3103                     double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1];
   3104                     dataC[j] = re; dataC[j+1] = im;
   3105                 }
   3106         }
   3107     }
   3108 }
   3109 
   3110 
   3111 /****************************************************************************************\
   3112                                Discrete Cosine Transform
   3113 \****************************************************************************************/
   3114 
   3115 namespace cv
   3116 {
   3117 
   3118 /* DCT is calculated using DFT, as described here:
   3119    http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
   3120 */
   3121 template<typename T> static void
   3122 DCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
   3123      int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
   3124      const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
   3125 {
   3126     static const T sin_45 = (T)0.70710678118654752440084436210485;
   3127     int j, n2 = n >> 1;
   3128 
   3129     src_step /= sizeof(src[0]);
   3130     dst_step /= sizeof(dst[0]);
   3131     T* dst1 = dst + (n-1)*dst_step;
   3132 
   3133     if( n == 1 )
   3134     {
   3135         dst[0] = src[0];
   3136         return;
   3137     }
   3138 
   3139     for( j = 0; j < n2; j++, src += src_step*2 )
   3140     {
   3141         dft_src[j] = src[0];
   3142         dft_src[n-j-1] = src[src_step];
   3143     }
   3144 
   3145     RealDFT( dft_src, dft_dst, n, nf, factors,
   3146              itab, dft_wave, n, spec, buf, 0, 1.0 );
   3147     src = dft_dst;
   3148 
   3149     dst[0] = (T)(src[0]*dct_wave->re*sin_45);
   3150     dst += dst_step;
   3151     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
   3152                                     dst += dst_step, dst1 -= dst_step )
   3153     {
   3154         T t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];
   3155         T t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];
   3156         dst[0] = t0;
   3157         dst1[0] = t1;
   3158     }
   3159 
   3160     dst[0] = src[n-1]*dct_wave->re;
   3161 }
   3162 
   3163 
   3164 template<typename T> static void
   3165 IDCT( const T* src, int src_step, T* dft_src, T* dft_dst, T* dst, int dst_step,
   3166       int n, int nf, int* factors, const int* itab, const Complex<T>* dft_wave,
   3167       const Complex<T>* dct_wave, const void* spec, Complex<T>* buf )
   3168 {
   3169     static const T sin_45 = (T)0.70710678118654752440084436210485;
   3170     int j, n2 = n >> 1;
   3171 
   3172     src_step /= sizeof(src[0]);
   3173     dst_step /= sizeof(dst[0]);
   3174     const T* src1 = src + (n-1)*src_step;
   3175 
   3176     if( n == 1 )
   3177     {
   3178         dst[0] = src[0];
   3179         return;
   3180     }
   3181 
   3182     dft_src[0] = (T)(src[0]*2*dct_wave->re*sin_45);
   3183     src += src_step;
   3184     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,
   3185                                     src += src_step, src1 -= src_step )
   3186     {
   3187         T t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];
   3188         T t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];
   3189         dft_src[j*2-1] = t0;
   3190         dft_src[j*2] = t1;
   3191     }
   3192 
   3193     dft_src[n-1] = (T)(src[0]*2*dct_wave->re);
   3194     CCSIDFT( dft_src, dft_dst, n, nf, factors, itab,
   3195              dft_wave, n, spec, buf, 0, 1.0 );
   3196 
   3197     for( j = 0; j < n2; j++, dst += dst_step*2 )
   3198     {
   3199         dst[0] = dft_dst[j];
   3200         dst[dst_step] = dft_dst[n-j-1];
   3201     }
   3202 }
   3203 
   3204 
   3205 static void
   3206 DCTInit( int n, int elem_size, void* _wave, int inv )
   3207 {
   3208     static const double DctScale[] =
   3209     {
   3210     0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
   3211     0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
   3212     0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
   3213     0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
   3214     0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
   3215     0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
   3216     0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
   3217     0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
   3218     0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
   3219     0.000061035156250000, 0.000043158372875155, 0.000030517578125000
   3220     };
   3221 
   3222     int i;
   3223     Complex<double> w, w1;
   3224     double t, scale;
   3225 
   3226     if( n == 1 )
   3227         return;
   3228 
   3229     assert( (n&1) == 0 );
   3230 
   3231     if( (n & (n - 1)) == 0 )
   3232     {
   3233         int m;
   3234         for( m = 0; (unsigned)(1 << m) < (unsigned)n; m++ )
   3235             ;
   3236         scale = (!inv ? 2 : 1)*DctScale[m];
   3237         w1.re = DFTTab[m+2][0];
   3238         w1.im = -DFTTab[m+2][1];
   3239     }
   3240     else
   3241     {
   3242         t = 1./(2*n);
   3243         scale = (!inv ? 2 : 1)*std::sqrt(t);
   3244         w1.im = sin(-CV_PI*t);
   3245         w1.re = std::sqrt(1. - w1.im*w1.im);
   3246     }
   3247     n >>= 1;
   3248 
   3249     if( elem_size == sizeof(Complex<double>) )
   3250     {
   3251         Complex<double>* wave = (Complex<double>*)_wave;
   3252 
   3253         w.re = scale;
   3254         w.im = 0.;
   3255 
   3256         for( i = 0; i <= n; i++ )
   3257         {
   3258             wave[i] = w;
   3259             t = w.re*w1.re - w.im*w1.im;
   3260             w.im = w.re*w1.im + w.im*w1.re;
   3261             w.re = t;
   3262         }
   3263     }
   3264     else
   3265     {
   3266         Complex<float>* wave = (Complex<float>*)_wave;
   3267         assert( elem_size == sizeof(Complex<float>) );
   3268 
   3269         w.re = (float)scale;
   3270         w.im = 0.f;
   3271 
   3272         for( i = 0; i <= n; i++ )
   3273         {
   3274             wave[i].re = (float)w.re;
   3275             wave[i].im = (float)w.im;
   3276             t = w.re*w1.re - w.im*w1.im;
   3277             w.im = w.re*w1.im + w.im*w1.re;
   3278             w.re = t;
   3279         }
   3280     }
   3281 }
   3282 
   3283 
   3284 typedef void (*DCTFunc)(const void* src, int src_step, void* dft_src,
   3285                         void* dft_dst, void* dst, int dst_step, int n,
   3286                         int nf, int* factors, const int* itab, const void* dft_wave,
   3287                         const void* dct_wave, const void* spec, void* buf );
   3288 
   3289 static void DCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
   3290                     float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
   3291                     const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
   3292 {
   3293     DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
   3294         n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
   3295 }
   3296 
   3297 static void IDCT_32f(const float* src, int src_step, float* dft_src, float* dft_dst,
   3298                     float* dst, int dst_step, int n, int nf, int* factors, const int* itab,
   3299                     const Complexf* dft_wave, const Complexf* dct_wave, const void* spec, Complexf* buf )
   3300 {
   3301     IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
   3302          n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
   3303 }
   3304 
   3305 static void DCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
   3306                     double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
   3307                     const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
   3308 {
   3309     DCT(src, src_step, dft_src, dft_dst, dst, dst_step,
   3310         n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
   3311 }
   3312 
   3313 static void IDCT_64f(const double* src, int src_step, double* dft_src, double* dft_dst,
   3314                      double* dst, int dst_step, int n, int nf, int* factors, const int* itab,
   3315                      const Complexd* dft_wave, const Complexd* dct_wave, const void* spec, Complexd* buf )
   3316 {
   3317     IDCT(src, src_step, dft_src, dft_dst, dst, dst_step,
   3318          n, nf, factors, itab, dft_wave, dct_wave, spec, buf);
   3319 }
   3320 
   3321 }
   3322 
   3323 namespace cv
   3324 {
   3325 #if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7
   3326 
   3327 typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
   3328 typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
   3329 typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
   3330 typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
   3331 
   3332 template <typename Dct>
   3333 class DctIPPLoop_Invoker : public ParallelLoopBody
   3334 {
   3335 public:
   3336 
   3337     DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dct* _ippidct, bool _inv, bool *_ok) :
   3338         ParallelLoopBody(), src(&_src), dst(&_dst), ippidct(_ippidct), inv(_inv), ok(_ok)
   3339     {
   3340         *ok = true;
   3341     }
   3342 
   3343     virtual void operator()(const Range& range) const
   3344     {
   3345         void* pDCTSpec;
   3346         AutoBuffer<uchar> buf;
   3347         uchar* pBuffer = 0;
   3348         int bufSize=0;
   3349 
   3350         IppiSize srcRoiSize = {src->cols, 1};
   3351 
   3352         CV_SUPPRESS_DEPRECATED_START
   3353 
   3354         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
   3355         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
   3356         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
   3357 
   3358         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
   3359         {
   3360             buf.allocate( bufSize );
   3361             pBuffer = (uchar*)buf;
   3362 
   3363             for( int i = range.start; i < range.end; ++i)
   3364                 if(!(*ippidct)(src->ptr<float>(i), (int)src->step,dst->ptr<float>(i), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer))
   3365                     *ok = false;
   3366         }
   3367         else
   3368             *ok = false;
   3369 
   3370         if (pDCTSpec)
   3371             ippFree(pDCTSpec);
   3372 
   3373         CV_SUPPRESS_DEPRECATED_END
   3374     }
   3375 
   3376 private:
   3377     const Mat* src;
   3378     Mat* dst;
   3379     const Dct* ippidct;
   3380     bool inv;
   3381     bool *ok;
   3382 };
   3383 
   3384 template <typename Dct>
   3385 bool DctIPPLoop(const Mat& src, Mat& dst, const Dct& ippidct, bool inv)
   3386 {
   3387     bool ok;
   3388     parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker<Dct>(src, dst, &ippidct, inv, &ok), src.rows/(double)(1<<4) );
   3389     return ok;
   3390 }
   3391 
   3392 struct IPPDCTFunctor
   3393 {
   3394     IPPDCTFunctor(ippiDCTFunc _func) : func(_func){}
   3395 
   3396     bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer) const
   3397     {
   3398         return func ? func(src, srcStep, dst, dstStep, pDCTSpec, pBuffer) >= 0 : false;
   3399     }
   3400 private:
   3401     ippiDCTFunc func;
   3402 };
   3403 
   3404 static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
   3405 {
   3406     ippiDCTFunc ippFunc = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R ;
   3407 
   3408     if (row)
   3409         return(DctIPPLoop(src,dst,IPPDCTFunctor(ippFunc),inv));
   3410     else
   3411     {
   3412         IppStatus status;
   3413         void* pDCTSpec;
   3414         AutoBuffer<uchar> buf;
   3415         uchar* pBuffer = 0;
   3416         int bufSize=0;
   3417 
   3418         IppiSize srcRoiSize = {src.cols, src.rows};
   3419 
   3420         CV_SUPPRESS_DEPRECATED_START
   3421 
   3422         ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
   3423         ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
   3424         ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
   3425 
   3426         status = ippStsErr;
   3427 
   3428         if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
   3429         {
   3430             buf.allocate( bufSize );
   3431             pBuffer = (uchar*)buf;
   3432 
   3433             status = ippFunc(src.ptr<float>(), (int)src.step, dst.ptr<float>(), (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer);
   3434         }
   3435 
   3436         if (pDCTSpec)
   3437             ippFree(pDCTSpec);
   3438 
   3439         CV_SUPPRESS_DEPRECATED_END
   3440 
   3441         return status >= 0;
   3442     }
   3443 }
   3444 
   3445 #endif
   3446 }
   3447 
   3448 void cv::dct( InputArray _src0, OutputArray _dst, int flags )
   3449 {
   3450     static DCTFunc dct_tbl[4] =
   3451     {
   3452         (DCTFunc)DCT_32f,
   3453         (DCTFunc)IDCT_32f,
   3454         (DCTFunc)DCT_64f,
   3455         (DCTFunc)IDCT_64f
   3456     };
   3457 
   3458     bool inv = (flags & DCT_INVERSE) != 0;
   3459     Mat src0 = _src0.getMat(), src = src0;
   3460     int type = src.type(), depth = src.depth();
   3461     void *spec = 0;
   3462 
   3463     double scale = 1.;
   3464     int prev_len = 0, nf = 0, stage, end_stage;
   3465     uchar *src_dft_buf = 0, *dst_dft_buf = 0;
   3466     uchar *dft_wave = 0, *dct_wave = 0;
   3467     int* itab = 0;
   3468     uchar* ptr = 0;
   3469     int elem_size = (int)src.elemSize(), complex_elem_size = elem_size*2;
   3470     int factors[34], inplace_transform;
   3471     int i, len, count;
   3472     AutoBuffer<uchar> buf;
   3473 
   3474     CV_Assert( type == CV_32FC1 || type == CV_64FC1 );
   3475     _dst.create( src.rows, src.cols, type );
   3476     Mat dst = _dst.getMat();
   3477 
   3478 #if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7)
   3479     CV_IPP_CHECK()
   3480     {
   3481         bool row = (flags & DCT_ROWS) != 0;
   3482         if (src.type() == CV_32F)
   3483         {
   3484             if(ippi_DCT_32f(src,dst,inv, row))
   3485             {
   3486                 CV_IMPL_ADD(CV_IMPL_IPP);
   3487                 return;
   3488             }
   3489             setIppErrorStatus();
   3490         }
   3491     }
   3492 #endif
   3493 
   3494     DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];
   3495 
   3496     if( (flags & DCT_ROWS) || src.rows == 1 ||
   3497         (src.cols == 1 && (src.isContinuous() && dst.isContinuous())))
   3498     {
   3499         stage = end_stage = 0;
   3500     }
   3501     else
   3502     {
   3503         stage = src.cols == 1;
   3504         end_stage = 1;
   3505     }
   3506 
   3507     for( ; stage <= end_stage; stage++ )
   3508     {
   3509         const uchar* sptr = src.ptr();
   3510         uchar* dptr = dst.ptr();
   3511         size_t sstep0, sstep1, dstep0, dstep1;
   3512 
   3513         if( stage == 0 )
   3514         {
   3515             len = src.cols;
   3516             count = src.rows;
   3517             if( len == 1 && !(flags & DCT_ROWS) )
   3518             {
   3519                 len = src.rows;
   3520                 count = 1;
   3521             }
   3522             sstep0 = src.step;
   3523             dstep0 = dst.step;
   3524             sstep1 = dstep1 = elem_size;
   3525         }
   3526         else
   3527         {
   3528             len = dst.rows;
   3529             count = dst.cols;
   3530             sstep1 = src.step;
   3531             dstep1 = dst.step;
   3532             sstep0 = dstep0 = elem_size;
   3533         }
   3534 
   3535         if( len != prev_len )
   3536         {
   3537             int sz;
   3538 
   3539             if( len > 1 && (len & 1) )
   3540                 CV_Error( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
   3541 
   3542             sz = len*elem_size;
   3543             sz += (len/2 + 1)*complex_elem_size;
   3544 
   3545             spec = 0;
   3546             inplace_transform = 1;
   3547             {
   3548                 sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size;
   3549 
   3550                 nf = DFTFactorize( len, factors );
   3551                 inplace_transform = factors[0] == factors[nf-1];
   3552 
   3553                 i = nf > 1 && (factors[0] & 1) == 0;
   3554                 if( (factors[i] & 1) != 0 && factors[i] > 5 )
   3555                     sz += (factors[i]+1)*complex_elem_size;
   3556 
   3557                 if( !inplace_transform )
   3558                     sz += len*elem_size;
   3559             }
   3560 
   3561             buf.allocate( sz + 32 );
   3562             ptr = (uchar*)buf;
   3563 
   3564             if( !spec )
   3565             {
   3566                 dft_wave = ptr;
   3567                 ptr += len*complex_elem_size;
   3568                 itab = (int*)ptr;
   3569                 ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
   3570                 DFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv );
   3571             }
   3572 
   3573             dct_wave = ptr;
   3574             ptr += (len/2 + 1)*complex_elem_size;
   3575             src_dft_buf = dst_dft_buf = ptr;
   3576             ptr += len*elem_size;
   3577             if( !inplace_transform )
   3578             {
   3579                 dst_dft_buf = ptr;
   3580                 ptr += len*elem_size;
   3581             }
   3582             DCTInit( len, complex_elem_size, dct_wave, inv );
   3583             if( !inv )
   3584                 scale += scale;
   3585             prev_len = len;
   3586         }
   3587         // otherwise reuse the tables calculated on the previous stage
   3588         for( i = 0; i < count; i++ )
   3589         {
   3590             dct_func( sptr + i*sstep0, (int)sstep1, src_dft_buf, dst_dft_buf,
   3591                       dptr + i*dstep0, (int)dstep1, len, nf, factors,
   3592                       itab, dft_wave, dct_wave, spec, ptr );
   3593         }
   3594         src = dst;
   3595     }
   3596 }
   3597 
   3598 
   3599 void cv::idct( InputArray src, OutputArray dst, int flags )
   3600 {
   3601     dct( src, dst, flags | DCT_INVERSE );
   3602 }
   3603 
   3604 namespace cv
   3605 {
   3606 
   3607 static const int optimalDFTSizeTab[] = {
   3608 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
   3609 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
   3610 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
   3611 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
   3612 729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
   3613 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
   3614 1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
   3615 2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
   3616 3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
   3617 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
   3618 7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
   3619 10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
   3620 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
   3621 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
   3622 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
   3623 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
   3624 31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
   3625 37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
   3626 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
   3627 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
   3628 65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
   3629 78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
   3630 93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
   3631 109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
   3632 128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
   3633 150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
   3634 168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
   3635 196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
   3636 230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
   3637 259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
   3638 295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
   3639 331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
   3640 388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
   3641 437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
   3642 492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
   3643 552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
   3644 625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
   3645 708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
   3646 787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
   3647 885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
   3648 995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
   3649 1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
   3650 1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
   3651 1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
   3652 1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
   3653 1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
   3654 1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
   3655 1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
   3656 2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
   3657 2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
   3658 2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
   3659 2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
   3660 2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
   3661 3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
   3662 3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
   3663 3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
   3664 3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
   3665 4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
   3666 4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
   3667 4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
   3668 5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
   3669 5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
   3670 6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
   3671 6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
   3672 7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
   3673 7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
   3674 8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
   3675 8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
   3676 9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
   3677 10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
   3678 10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
   3679 11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
   3680 12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
   3681 13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
   3682 14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
   3683 15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
   3684 16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
   3685 17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
   3686 18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
   3687 19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
   3688 20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
   3689 22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
   3690 23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
   3691 25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
   3692 27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
   3693 29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
   3694 31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
   3695 32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
   3696 35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
   3697 37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
   3698 39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
   3699 41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
   3700 44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
   3701 47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
   3702 50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
   3703 53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
   3704 56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
   3705 60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
   3706 63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
   3707 67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
   3708 71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
   3709 75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
   3710 80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
   3711 84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
   3712 90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
   3713 95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
   3714 100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
   3715 104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
   3716 110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
   3717 117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
   3718 122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
   3719 127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
   3720 134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
   3721 141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
   3722 150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
   3723 157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
   3724 163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
   3725 170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
   3726 181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
   3727 189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
   3728 199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
   3729 205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
   3730 216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
   3731 227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
   3732 240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
   3733 251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
   3734 262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
   3735 273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
   3736 288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
   3737 302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
   3738 314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
   3739 328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
   3740 341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
   3741 362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
   3742 379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
   3743 398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
   3744 410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
   3745 432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
   3746 453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
   3747 477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
   3748 497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
   3749 512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
   3750 537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
   3751 566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
   3752 590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
   3753 614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
   3754 637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
   3755 671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
   3756 703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
   3757 737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
   3758 765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
   3759 797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
   3760 829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
   3761 864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
   3762 906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
   3763 949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
   3764 984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
   3765 1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
   3766 1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
   3767 1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
   3768 1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
   3769 1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
   3770 1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
   3771 1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
   3772 1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
   3773 1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
   3774 1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
   3775 1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
   3776 1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
   3777 1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
   3778 1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
   3779 1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
   3780 1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
   3781 1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
   3782 1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
   3783 1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
   3784 2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
   3785 2097152000, 2099520000, 2109375000, 2123366400, 2125764000
   3786 };
   3787 
   3788 }
   3789 
   3790 int cv::getOptimalDFTSize( int size0 )
   3791 {
   3792     int a = 0, b = sizeof(optimalDFTSizeTab)/sizeof(optimalDFTSizeTab[0]) - 1;
   3793     if( (unsigned)size0 >= (unsigned)optimalDFTSizeTab[b] )
   3794         return -1;
   3795 
   3796     while( a < b )
   3797     {
   3798         int c = (a + b) >> 1;
   3799         if( size0 <= optimalDFTSizeTab[c] )
   3800             b = c;
   3801         else
   3802             a = c+1;
   3803     }
   3804 
   3805     return optimalDFTSizeTab[b];
   3806 }
   3807 
   3808 CV_IMPL void
   3809 cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
   3810 {
   3811     cv::Mat src = cv::cvarrToMat(srcarr), dst0 = cv::cvarrToMat(dstarr), dst = dst0;
   3812     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DFT_INVERSE : 0) |
   3813         ((flags & CV_DXT_SCALE) ? cv::DFT_SCALE : 0) |
   3814         ((flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0);
   3815 
   3816     CV_Assert( src.size == dst.size );
   3817 
   3818     if( src.type() != dst.type() )
   3819     {
   3820         if( dst.channels() == 2 )
   3821             _flags |= cv::DFT_COMPLEX_OUTPUT;
   3822         else
   3823             _flags |= cv::DFT_REAL_OUTPUT;
   3824     }
   3825 
   3826     cv::dft( src, dst, _flags, nonzero_rows );
   3827     CV_Assert( dst.data == dst0.data ); // otherwise it means that the destination size or type was incorrect
   3828 }
   3829 
   3830 
   3831 CV_IMPL void
   3832 cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
   3833                 CvArr* dstarr, int flags )
   3834 {
   3835     cv::Mat srcA = cv::cvarrToMat(srcAarr),
   3836         srcB = cv::cvarrToMat(srcBarr),
   3837         dst = cv::cvarrToMat(dstarr);
   3838     CV_Assert( srcA.size == dst.size && srcA.type() == dst.type() );
   3839 
   3840     cv::mulSpectrums(srcA, srcB, dst,
   3841         (flags & CV_DXT_ROWS) ? cv::DFT_ROWS : 0,
   3842         (flags & CV_DXT_MUL_CONJ) != 0 );
   3843 }
   3844 
   3845 
   3846 CV_IMPL void
   3847 cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
   3848 {
   3849     cv::Mat src = cv::cvarrToMat(srcarr), dst = cv::cvarrToMat(dstarr);
   3850     CV_Assert( src.size == dst.size && src.type() == dst.type() );
   3851     int _flags = ((flags & CV_DXT_INVERSE) ? cv::DCT_INVERSE : 0) |
   3852             ((flags & CV_DXT_ROWS) ? cv::DCT_ROWS : 0);
   3853     cv::dct( src, dst, _flags );
   3854 }
   3855 
   3856 
   3857 CV_IMPL int
   3858 cvGetOptimalDFTSize( int size0 )
   3859 {
   3860     return cv::getOptimalDFTSize(size0);
   3861 }
   3862 
   3863 /* End of file. */
   3864