Home | History | Annotate | Download | only in src
      1 /*M///////////////////////////////////////////////////////////////////////////////////////
      2 //
      3 //  IMPORTANT: READ BEFORE DOWNLOADING, COPYING, INSTALLING OR USING.
      4 //
      5 //  By downloading, copying, installing or using the software you agree to this license.
      6 //  If you do not agree to this license, do not download, install,
      7 //  copy or use the software.
      8 //
      9 //
     10 //                        Intel License Agreement
     11 //                For Open Source Computer Vision Library
     12 //
     13 // Copyright (C) 2000, Intel Corporation, all rights reserved.
     14 // Third party copyrights are property of their respective owners.
     15 //
     16 // Redistribution and use in source and binary forms, with or without modification,
     17 // are permitted provided that the following conditions are met:
     18 //
     19 //   * Redistribution's of source code must retain the above copyright notice,
     20 //     this list of conditions and the following disclaimer.
     21 //
     22 //   * Redistribution's in binary form must reproduce the above copyright notice,
     23 //     this list of conditions and the following disclaimer in the documentation
     24 //     and/or other materials provided with the distribution.
     25 //
     26 //   * The name of Intel Corporation may not be used to endorse or promote products
     27 //     derived from this software without specific prior written permission.
     28 //
     29 // This software is provided by the copyright holders and contributors "as is" and
     30 // any express or implied warranties, including, but not limited to, the implied
     31 // warranties of merchantability and fitness for a particular purpose are disclaimed.
     32 // In no event shall the Intel Corporation or contributors be liable for any direct,
     33 // indirect, incidental, special, exemplary, or consequential damages
     34 // (including, but not limited to, procurement of substitute goods or services;
     35 // loss of use, data, or profits; or business interruption) however caused
     36 // and on any theory of liability, whether in contract, strict liability,
     37 // or tort (including negligence or otherwise) arising in any way out of
     38 // the use of this software, even if advised of the possibility of such damage.
     39 //
     40 //M*/
     41 
     42 #include "_cxcore.h"
     43 
     44 // On Win64 (IA64) optimized versions of DFT and DCT fail the tests
     45 #if defined WIN64 && !defined EM64T
     46 #pragma optimize("", off)
     47 #endif
     48 
     49 icvDFTInitAlloc_C_32fc_t icvDFTInitAlloc_C_32fc_p = 0;
     50 icvDFTFree_C_32fc_t icvDFTFree_C_32fc_p = 0;
     51 icvDFTGetBufSize_C_32fc_t icvDFTGetBufSize_C_32fc_p = 0;
     52 icvDFTFwd_CToC_32fc_t icvDFTFwd_CToC_32fc_p = 0;
     53 icvDFTInv_CToC_32fc_t icvDFTInv_CToC_32fc_p = 0;
     54 
     55 icvDFTInitAlloc_C_64fc_t icvDFTInitAlloc_C_64fc_p = 0;
     56 icvDFTFree_C_64fc_t icvDFTFree_C_64fc_p = 0;
     57 icvDFTGetBufSize_C_64fc_t icvDFTGetBufSize_C_64fc_p = 0;
     58 icvDFTFwd_CToC_64fc_t icvDFTFwd_CToC_64fc_p = 0;
     59 icvDFTInv_CToC_64fc_t icvDFTInv_CToC_64fc_p = 0;
     60 
     61 icvDFTInitAlloc_R_32f_t icvDFTInitAlloc_R_32f_p = 0;
     62 icvDFTFree_R_32f_t icvDFTFree_R_32f_p = 0;
     63 icvDFTGetBufSize_R_32f_t icvDFTGetBufSize_R_32f_p = 0;
     64 icvDFTFwd_RToPack_32f_t icvDFTFwd_RToPack_32f_p = 0;
     65 icvDFTInv_PackToR_32f_t icvDFTInv_PackToR_32f_p = 0;
     66 
     67 icvDFTInitAlloc_R_64f_t icvDFTInitAlloc_R_64f_p = 0;
     68 icvDFTFree_R_64f_t icvDFTFree_R_64f_p = 0;
     69 icvDFTGetBufSize_R_64f_t icvDFTGetBufSize_R_64f_p = 0;
     70 icvDFTFwd_RToPack_64f_t icvDFTFwd_RToPack_64f_p = 0;
     71 icvDFTInv_PackToR_64f_t icvDFTInv_PackToR_64f_p = 0;
     72 
     73 /*icvDCTFwdInitAlloc_32f_t icvDCTFwdInitAlloc_32f_p = 0;
     74 icvDCTFwdFree_32f_t icvDCTFwdFree_32f_p = 0;
     75 icvDCTFwdGetBufSize_32f_t icvDCTFwdGetBufSize_32f_p = 0;
     76 icvDCTFwd_32f_t icvDCTFwd_32f_p = 0;
     77 
     78 icvDCTInvInitAlloc_32f_t icvDCTInvInitAlloc_32f_p = 0;
     79 icvDCTInvFree_32f_t icvDCTInvFree_32f_p = 0;
     80 icvDCTInvGetBufSize_32f_t icvDCTInvGetBufSize_32f_p = 0;
     81 icvDCTInv_32f_t icvDCTInv_32f_p = 0;
     82 
     83 icvDCTFwdInitAlloc_64f_t icvDCTFwdInitAlloc_64f_p = 0;
     84 icvDCTFwdFree_64f_t icvDCTFwdFree_64f_p = 0;
     85 icvDCTFwdGetBufSize_64f_t icvDCTFwdGetBufSize_64f_p = 0;
     86 icvDCTFwd_64f_t icvDCTFwd_64f_p = 0;
     87 
     88 icvDCTInvInitAlloc_64f_t icvDCTInvInitAlloc_64f_p = 0;
     89 icvDCTInvFree_64f_t icvDCTInvFree_64f_p = 0;
     90 icvDCTInvGetBufSize_64f_t icvDCTInvGetBufSize_64f_p = 0;
     91 icvDCTInv_64f_t icvDCTInv_64f_p = 0;*/
     92 
     93 /****************************************************************************************\
     94                                Discrete Fourier Transform
     95 \****************************************************************************************/
     96 
     97 #define CV_MAX_LOCAL_DFT_SIZE  (1 << 15)
     98 
     99 static const uchar log2tab[] = { 0, 0, 1, 0, 2, 0, 0, 0, 3, 0, 0, 0, 0, 0, 0, 0 };
    100 static int icvlog2( int n )
    101 {
    102     int m = 0;
    103     int f = (n >= (1 << 16))*16;
    104     n >>= f;
    105     m += f;
    106     f = (n >= (1 << 8))*8;
    107     n >>= f;
    108     m += f;
    109     f = (n >= (1 << 4))*4;
    110     n >>= f;
    111     return m + f + log2tab[n];
    112 }
    113 
    114 static unsigned char icvRevTable[] =
    115 {
    116   0x00,0x80,0x40,0xc0,0x20,0xa0,0x60,0xe0,0x10,0x90,0x50,0xd0,0x30,0xb0,0x70,0xf0,
    117   0x08,0x88,0x48,0xc8,0x28,0xa8,0x68,0xe8,0x18,0x98,0x58,0xd8,0x38,0xb8,0x78,0xf8,
    118   0x04,0x84,0x44,0xc4,0x24,0xa4,0x64,0xe4,0x14,0x94,0x54,0xd4,0x34,0xb4,0x74,0xf4,
    119   0x0c,0x8c,0x4c,0xcc,0x2c,0xac,0x6c,0xec,0x1c,0x9c,0x5c,0xdc,0x3c,0xbc,0x7c,0xfc,
    120   0x02,0x82,0x42,0xc2,0x22,0xa2,0x62,0xe2,0x12,0x92,0x52,0xd2,0x32,0xb2,0x72,0xf2,
    121   0x0a,0x8a,0x4a,0xca,0x2a,0xaa,0x6a,0xea,0x1a,0x9a,0x5a,0xda,0x3a,0xba,0x7a,0xfa,
    122   0x06,0x86,0x46,0xc6,0x26,0xa6,0x66,0xe6,0x16,0x96,0x56,0xd6,0x36,0xb6,0x76,0xf6,
    123   0x0e,0x8e,0x4e,0xce,0x2e,0xae,0x6e,0xee,0x1e,0x9e,0x5e,0xde,0x3e,0xbe,0x7e,0xfe,
    124   0x01,0x81,0x41,0xc1,0x21,0xa1,0x61,0xe1,0x11,0x91,0x51,0xd1,0x31,0xb1,0x71,0xf1,
    125   0x09,0x89,0x49,0xc9,0x29,0xa9,0x69,0xe9,0x19,0x99,0x59,0xd9,0x39,0xb9,0x79,0xf9,
    126   0x05,0x85,0x45,0xc5,0x25,0xa5,0x65,0xe5,0x15,0x95,0x55,0xd5,0x35,0xb5,0x75,0xf5,
    127   0x0d,0x8d,0x4d,0xcd,0x2d,0xad,0x6d,0xed,0x1d,0x9d,0x5d,0xdd,0x3d,0xbd,0x7d,0xfd,
    128   0x03,0x83,0x43,0xc3,0x23,0xa3,0x63,0xe3,0x13,0x93,0x53,0xd3,0x33,0xb3,0x73,0xf3,
    129   0x0b,0x8b,0x4b,0xcb,0x2b,0xab,0x6b,0xeb,0x1b,0x9b,0x5b,0xdb,0x3b,0xbb,0x7b,0xfb,
    130   0x07,0x87,0x47,0xc7,0x27,0xa7,0x67,0xe7,0x17,0x97,0x57,0xd7,0x37,0xb7,0x77,0xf7,
    131   0x0f,0x8f,0x4f,0xcf,0x2f,0xaf,0x6f,0xef,0x1f,0x9f,0x5f,0xdf,0x3f,0xbf,0x7f,0xff
    132 };
    133 
    134 static const double icvDxtTab[][2] =
    135 {
    136 { 1.00000000000000000, 0.00000000000000000 },
    137 {-1.00000000000000000, 0.00000000000000000 },
    138 { 0.00000000000000000, 1.00000000000000000 },
    139 { 0.70710678118654757, 0.70710678118654746 },
    140 { 0.92387953251128674, 0.38268343236508978 },
    141 { 0.98078528040323043, 0.19509032201612825 },
    142 { 0.99518472667219693, 0.09801714032956060 },
    143 { 0.99879545620517241, 0.04906767432741802 },
    144 { 0.99969881869620425, 0.02454122852291229 },
    145 { 0.99992470183914450, 0.01227153828571993 },
    146 { 0.99998117528260111, 0.00613588464915448 },
    147 { 0.99999529380957619, 0.00306795676296598 },
    148 { 0.99999882345170188, 0.00153398018628477 },
    149 { 0.99999970586288223, 0.00076699031874270 },
    150 { 0.99999992646571789, 0.00038349518757140 },
    151 { 0.99999998161642933, 0.00019174759731070 },
    152 { 0.99999999540410733, 0.00009587379909598 },
    153 { 0.99999999885102686, 0.00004793689960307 },
    154 { 0.99999999971275666, 0.00002396844980842 },
    155 { 0.99999999992818922, 0.00001198422490507 },
    156 { 0.99999999998204725, 0.00000599211245264 },
    157 { 0.99999999999551181, 0.00000299605622633 },
    158 { 0.99999999999887801, 0.00000149802811317 },
    159 { 0.99999999999971945, 0.00000074901405658 },
    160 { 0.99999999999992983, 0.00000037450702829 },
    161 { 0.99999999999998246, 0.00000018725351415 },
    162 { 0.99999999999999567, 0.00000009362675707 },
    163 { 0.99999999999999889, 0.00000004681337854 },
    164 { 0.99999999999999978, 0.00000002340668927 },
    165 { 0.99999999999999989, 0.00000001170334463 },
    166 { 1.00000000000000000, 0.00000000585167232 },
    167 { 1.00000000000000000, 0.00000000292583616 }
    168 };
    169 
    170 #define icvBitRev(i,shift) \
    171    ((int)((((unsigned)icvRevTable[(i)&255] << 24)+ \
    172            ((unsigned)icvRevTable[((i)>> 8)&255] << 16)+ \
    173            ((unsigned)icvRevTable[((i)>>16)&255] <<  8)+ \
    174            ((unsigned)icvRevTable[((i)>>24)])) >> (shift)))
    175 
    176 static int
    177 icvDFTFactorize( int n, int* factors )
    178 {
    179     int nf = 0, f, i, j;
    180 
    181     if( n <= 5 )
    182     {
    183         factors[0] = n;
    184         return 1;
    185     }
    186 
    187     f = (((n - 1)^n)+1) >> 1;
    188     if( f > 1 )
    189     {
    190         factors[nf++] = f;
    191         n = f == n ? 1 : n/f;
    192     }
    193 
    194     for( f = 3; n > 1; )
    195     {
    196         int d = n/f;
    197         if( d*f == n )
    198         {
    199             factors[nf++] = f;
    200             n = d;
    201         }
    202         else
    203         {
    204             f += 2;
    205             if( f*f > n )
    206                 break;
    207         }
    208     }
    209 
    210     if( n > 1 )
    211         factors[nf++] = n;
    212 
    213     f = (factors[0] & 1) == 0;
    214     for( i = f; i < (nf+f)/2; i++ )
    215         CV_SWAP( factors[i], factors[nf-i-1+f], j );
    216 
    217     return nf;
    218 }
    219 
    220 static void
    221 icvDFTInit( int n0, int nf, int* factors, int* itab, int elem_size, void* _wave, int inv_itab )
    222 {
    223     int digits[34], radix[34];
    224     int n = factors[0], m = 0;
    225     int* itab0 = itab;
    226     int i, j, k;
    227     CvComplex64f w, w1;
    228     double t;
    229 
    230     if( n0 <= 5 )
    231     {
    232         itab[0] = 0;
    233         itab[n0-1] = n0-1;
    234 
    235         if( n0 != 4 )
    236         {
    237             for( i = 1; i < n0-1; i++ )
    238                 itab[i] = i;
    239         }
    240         else
    241         {
    242             itab[1] = 2;
    243             itab[2] = 1;
    244         }
    245         if( n0 == 5 )
    246         {
    247             if( elem_size == sizeof(CvComplex64f) )
    248                 ((CvComplex64f*)_wave)[0] = CvComplex64f(1.,0.);
    249             else
    250                 ((CvComplex32f*)_wave)[0] = CvComplex32f(1.f,0.f);
    251         }
    252         if( n0 != 4 )
    253             return;
    254         m = 2;
    255     }
    256     else
    257     {
    258 	// radix[] is initialized from index 'nf' down to zero
    259         assert (nf < 34);
    260         radix[nf] = 1;
    261         digits[nf] = 0;
    262         for( i = 0; i < nf; i++ )
    263         {
    264             digits[i] = 0;
    265             radix[nf-i-1] = radix[nf-i]*factors[nf-i-1];
    266         }
    267 
    268         if( inv_itab && factors[0] != factors[nf-1] )
    269             itab = (int*)_wave;
    270 
    271         if( (n & 1) == 0 )
    272         {
    273             int a = radix[1], na2 = n*a>>1, na4 = na2 >> 1;
    274             m = icvlog2(n);
    275 
    276             if( n <= 2  )
    277             {
    278                 itab[0] = 0;
    279                 itab[1] = na2;
    280             }
    281             else if( n <= 256 )
    282             {
    283                 int shift = 10 - m;
    284                 for( i = 0; i <= n - 4; i += 4 )
    285                 {
    286                     j = (icvRevTable[i>>2]>>shift)*a;
    287                     itab[i] = j;
    288                     itab[i+1] = j + na2;
    289                     itab[i+2] = j + na4;
    290                     itab[i+3] = j + na2 + na4;
    291                 }
    292             }
    293             else
    294             {
    295                 int shift = 34 - m;
    296                 for( i = 0; i < n; i += 4 )
    297                 {
    298                     int i4 = i >> 2;
    299                     j = icvBitRev(i4,shift)*a;
    300                     itab[i] = j;
    301                     itab[i+1] = j + na2;
    302                     itab[i+2] = j + na4;
    303                     itab[i+3] = j + na2 + na4;
    304                 }
    305             }
    306 
    307             digits[1]++;
    308 
    309             if( nf >= 2 )
    310             {
    311                 for( i = n, j = radix[2]; i < n0; )
    312                 {
    313                     for( k = 0; k < n; k++ )
    314                         itab[i+k] = itab[k] + j;
    315                     if( (i += n) >= n0 )
    316                         break;
    317                     j += radix[2];
    318                     for( k = 1; ++digits[k] >= factors[k]; k++ )
    319                     {
    320                         digits[k] = 0;
    321                         j += radix[k+2] - radix[k];
    322                     }
    323                 }
    324             }
    325         }
    326         else
    327         {
    328             for( i = 0, j = 0;; )
    329             {
    330                 itab[i] = j;
    331                 if( ++i >= n0 )
    332                     break;
    333                 j += radix[1];
    334                 for( k = 0; ++digits[k] >= factors[k]; k++ )
    335                 {
    336                     digits[k] = 0;
    337                     j += radix[k+2] - radix[k];
    338                 }
    339             }
    340         }
    341 
    342         if( itab != itab0 )
    343         {
    344             itab0[0] = 0;
    345             for( i = n0 & 1; i < n0; i += 2 )
    346             {
    347                 int k0 = itab[i];
    348                 int k1 = itab[i+1];
    349                 itab0[k0] = i;
    350                 itab0[k1] = i+1;
    351             }
    352         }
    353     }
    354 
    355     if( (n0 & (n0-1)) == 0 )
    356     {
    357         w.re = w1.re = icvDxtTab[m][0];
    358         w.im = w1.im = -icvDxtTab[m][1];
    359     }
    360     else
    361     {
    362         t = -CV_PI*2/n0;
    363         w.im = w1.im = sin(t);
    364         w.re = w1.re = sqrt(1. - w1.im*w1.im);
    365     }
    366     n = (n0+1)/2;
    367 
    368     if( elem_size == sizeof(CvComplex64f) )
    369     {
    370         CvComplex64f* wave = (CvComplex64f*)_wave;
    371 
    372         wave[0].re = 1.;
    373         wave[0].im = 0.;
    374 
    375         if( (n0 & 1) == 0 )
    376         {
    377             wave[n].re = -1.;
    378             wave[n].im = 0;
    379         }
    380 
    381         for( i = 1; i < n; i++ )
    382         {
    383             wave[i] = w;
    384             wave[n0-i].re = w.re;
    385             wave[n0-i].im = -w.im;
    386 
    387             t = w.re*w1.re - w.im*w1.im;
    388             w.im = w.re*w1.im + w.im*w1.re;
    389             w.re = t;
    390         }
    391     }
    392     else
    393     {
    394         CvComplex32f* wave = (CvComplex32f*)_wave;
    395         assert( elem_size == sizeof(CvComplex32f) );
    396 
    397         wave[0].re = 1.f;
    398         wave[0].im = 0.f;
    399 
    400         if( (n0 & 1) == 0 )
    401         {
    402             wave[n].re = -1.f;
    403             wave[n].im = 0.f;
    404         }
    405 
    406         for( i = 1; i < n; i++ )
    407         {
    408             wave[i].re = (float)w.re;
    409             wave[i].im = (float)w.im;
    410             wave[n0-i].re = (float)w.re;
    411             wave[n0-i].im = (float)-w.im;
    412 
    413             t = w.re*w1.re - w.im*w1.im;
    414             w.im = w.re*w1.im + w.im*w1.re;
    415             w.re = t;
    416         }
    417     }
    418 }
    419 
    420 
    421 static const double icv_sin_120 = 0.86602540378443864676372317075294;
    422 static const double icv_sin_45 = 0.70710678118654752440084436210485;
    423 static const double icv_fft5_2 = 0.559016994374947424102293417182819;
    424 static const double icv_fft5_3 = -0.951056516295153572116439333379382;
    425 static const double icv_fft5_4 = -1.538841768587626701285145288018455;
    426 static const double icv_fft5_5 = 0.363271264002680442947733378740309;
    427 
    428 #define ICV_DFT_NO_PERMUTE 2
    429 #define ICV_DFT_COMPLEX_INPUT_OR_OUTPUT 4
    430 
    431 // mixed-radix complex discrete Fourier transform: double-precision version
    432 static CvStatus CV_STDCALL
    433 icvDFT_64fc( const CvComplex64f* src, CvComplex64f* dst, int n,
    434              int nf, int* factors, const int* itab,
    435              const CvComplex64f* wave, int tab_size,
    436              const void* spec, CvComplex64f* buf,
    437              int flags, double scale )
    438 {
    439     int n0 = n, f_idx, nx;
    440     int inv = flags & CV_DXT_INVERSE;
    441     int dw0 = tab_size, dw;
    442     int i, j, k;
    443     CvComplex64f t;
    444     int tab_step;
    445 
    446     if( spec )
    447     {
    448         assert( icvDFTFwd_CToC_64fc_p != 0 && icvDFTInv_CToC_64fc_p != 0 );
    449         return !inv ?
    450             icvDFTFwd_CToC_64fc_p( src, dst, spec, buf ):
    451             icvDFTInv_CToC_64fc_p( src, dst, spec, buf );
    452     }
    453 
    454     tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n;
    455 
    456     // 0. shuffle data
    457     if( dst != src )
    458     {
    459         assert( (flags & ICV_DFT_NO_PERMUTE) == 0 );
    460         if( !inv )
    461         {
    462             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
    463             {
    464                 int k0 = itab[0], k1 = itab[tab_step];
    465                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
    466                 dst[i] = src[k0]; dst[i+1] = src[k1];
    467             }
    468 
    469             if( i < n )
    470                 dst[n-1] = src[n-1];
    471         }
    472         else
    473         {
    474             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
    475             {
    476                 int k0 = itab[0], k1 = itab[tab_step];
    477                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
    478                 t.re = src[k0].re; t.im = -src[k0].im;
    479                 dst[i] = t;
    480                 t.re = src[k1].re; t.im = -src[k1].im;
    481                 dst[i+1] = t;
    482             }
    483 
    484             if( i < n )
    485             {
    486                 t.re = src[n-1].re; t.im = -src[n-1].im;
    487                 dst[i] = t;
    488             }
    489         }
    490     }
    491     else
    492     {
    493         if( (flags & ICV_DFT_NO_PERMUTE) == 0 )
    494         {
    495             if( factors[0] != factors[nf-1] )
    496                 return CV_INPLACE_NOT_SUPPORTED_ERR;
    497             if( nf == 1 )
    498             {
    499                 if( (n & 3) == 0 )
    500                 {
    501                     int n2 = n/2;
    502                     CvComplex64f* dsth = dst + n2;
    503 
    504                     for( i = 0; i < n2; i += 2, itab += tab_step*2 )
    505                     {
    506                         j = itab[0];
    507                         assert( (unsigned)j < (unsigned)n2 );
    508 
    509                         CV_SWAP(dst[i+1], dsth[j], t);
    510                         if( j > i )
    511                         {
    512                             CV_SWAP(dst[i], dst[j], t);
    513                             CV_SWAP(dsth[i+1], dsth[j+1], t);
    514                         }
    515                     }
    516                 }
    517                 // else do nothing
    518             }
    519             else
    520             {
    521                 for( i = 0; i < n; i++, itab += tab_step )
    522                 {
    523                     j = itab[0];
    524                     assert( (unsigned)j < (unsigned)n );
    525                     if( j > i )
    526                         CV_SWAP(dst[i], dst[j], t);
    527                 }
    528             }
    529         }
    530 
    531         if( inv )
    532         {
    533             for( i = 0; i <= n - 2; i += 2 )
    534             {
    535                 double t0 = -dst[i].im;
    536                 double t1 = -dst[i+1].im;
    537                 dst[i].im = t0; dst[i+1].im = t1;
    538             }
    539 
    540             if( i < n )
    541                 dst[n-1].im = -dst[n-1].im;
    542         }
    543     }
    544 
    545     n = 1;
    546     // 1. power-2 transforms
    547     if( (factors[0] & 1) == 0 )
    548     {
    549         // radix-4 transform
    550         for( ; n*4 <= factors[0]; )
    551         {
    552             nx = n;
    553             n *= 4;
    554             dw0 /= 4;
    555 
    556             for( i = 0; i < n0; i += n )
    557             {
    558                 CvComplex64f* v0;
    559                 CvComplex64f* v1;
    560                 double r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
    561 
    562                 v0 = dst + i;
    563                 v1 = v0 + nx*2;
    564 
    565                 r2 = v0[0].re; i2 = v0[0].im;
    566                 r1 = v0[nx].re; i1 = v0[nx].im;
    567 
    568                 r0 = r1 + r2; i0 = i1 + i2;
    569                 r2 -= r1; i2 -= i1;
    570 
    571                 i3 = v1[nx].re; r3 = v1[nx].im;
    572                 i4 = v1[0].re; r4 = v1[0].im;
    573 
    574                 r1 = i4 + i3; i1 = r4 + r3;
    575                 r3 = r4 - r3; i3 = i3 - i4;
    576 
    577                 v0[0].re = r0 + r1; v0[0].im = i0 + i1;
    578                 v1[0].re = r0 - r1; v1[0].im = i0 - i1;
    579                 v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
    580                 v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
    581 
    582                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
    583                 {
    584                     v0 = dst + i + j;
    585                     v1 = v0 + nx*2;
    586 
    587                     r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
    588                     i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
    589                     r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
    590                     i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
    591                     r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
    592                     i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
    593 
    594                     r1 = i0 + i3; i1 = r0 + r3;
    595                     r3 = r0 - r3; i3 = i3 - i0;
    596                     r4 = v0[0].re; i4 = v0[0].im;
    597 
    598                     r0 = r4 + r2; i0 = i4 + i2;
    599                     r2 = r4 - r2; i2 = i4 - i2;
    600 
    601                     v0[0].re = r0 + r1; v0[0].im = i0 + i1;
    602                     v1[0].re = r0 - r1; v1[0].im = i0 - i1;
    603                     v0[nx].re = r2 + r3; v0[nx].im = i2 + i3;
    604                     v1[nx].re = r2 - r3; v1[nx].im = i2 - i3;
    605                 }
    606             }
    607         }
    608 
    609         for( ; n < factors[0]; )
    610         {
    611             // do the remaining radix-2 transform
    612             nx = n;
    613             n *= 2;
    614             dw0 /= 2;
    615 
    616             for( i = 0; i < n0; i += n )
    617             {
    618                 CvComplex64f* v = dst + i;
    619                 double r0 = v[0].re + v[nx].re;
    620                 double i0 = v[0].im + v[nx].im;
    621                 double r1 = v[0].re - v[nx].re;
    622                 double i1 = v[0].im - v[nx].im;
    623                 v[0].re = r0; v[0].im = i0;
    624                 v[nx].re = r1; v[nx].im = i1;
    625 
    626                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
    627                 {
    628                     v = dst + i + j;
    629                     r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
    630                     i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
    631                     r0 = v[0].re; i0 = v[0].im;
    632 
    633                     v[0].re = r0 + r1; v[0].im = i0 + i1;
    634                     v[nx].re = r0 - r1; v[nx].im = i0 - i1;
    635                 }
    636             }
    637         }
    638     }
    639 
    640     // 2. all the other transforms
    641     for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ )
    642     {
    643         int factor = factors[f_idx];
    644         nx = n;
    645         n *= factor;
    646         dw0 /= factor;
    647 
    648         if( factor == 3 )
    649         {
    650             // radix-3
    651             for( i = 0; i < n0; i += n )
    652             {
    653                 CvComplex64f* v = dst + i;
    654 
    655                 double r1 = v[nx].re + v[nx*2].re;
    656                 double i1 = v[nx].im + v[nx*2].im;
    657                 double r0 = v[0].re;
    658                 double i0 = v[0].im;
    659                 double r2 = icv_sin_120*(v[nx].im - v[nx*2].im);
    660                 double i2 = icv_sin_120*(v[nx*2].re - v[nx].re);
    661                 v[0].re = r0 + r1; v[0].im = i0 + i1;
    662                 r0 -= 0.5*r1; i0 -= 0.5*i1;
    663                 v[nx].re = r0 + r2; v[nx].im = i0 + i2;
    664                 v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
    665 
    666                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
    667                 {
    668                     v = dst + i + j;
    669                     r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
    670                     i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
    671                     i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
    672                     r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
    673                     r1 = r0 + i2; i1 = i0 + r2;
    674 
    675                     r2 = icv_sin_120*(i0 - r2); i2 = icv_sin_120*(i2 - r0);
    676                     r0 = v[0].re; i0 = v[0].im;
    677                     v[0].re = r0 + r1; v[0].im = i0 + i1;
    678                     r0 -= 0.5*r1; i0 -= 0.5*i1;
    679                     v[nx].re = r0 + r2; v[nx].im = i0 + i2;
    680                     v[nx*2].re = r0 - r2; v[nx*2].im = i0 - i2;
    681                 }
    682             }
    683         }
    684         else if( factor == 5 )
    685         {
    686             // radix-5
    687             for( i = 0; i < n0; i += n )
    688             {
    689                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
    690                 {
    691                     CvComplex64f* v0 = dst + i + j;
    692                     CvComplex64f* v1 = v0 + nx*2;
    693                     CvComplex64f* v2 = v1 + nx*2;
    694 
    695                     double r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
    696 
    697                     r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
    698                     i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
    699                     r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
    700                     i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
    701 
    702                     r1 = r3 + r2; i1 = i3 + i2;
    703                     r3 -= r2; i3 -= i2;
    704 
    705                     r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
    706                     i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
    707                     r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
    708                     i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
    709 
    710                     r2 = r4 + r0; i2 = i4 + i0;
    711                     r4 -= r0; i4 -= i0;
    712 
    713                     r0 = v0[0].re; i0 = v0[0].im;
    714                     r5 = r1 + r2; i5 = i1 + i2;
    715 
    716                     v0[0].re = r0 + r5; v0[0].im = i0 + i5;
    717 
    718                     r0 -= 0.25*r5; i0 -= 0.25*i5;
    719                     r1 = icv_fft5_2*(r1 - r2); i1 = icv_fft5_2*(i1 - i2);
    720                     r2 = -icv_fft5_3*(i3 + i4); i2 = icv_fft5_3*(r3 + r4);
    721 
    722                     i3 *= -icv_fft5_5; r3 *= icv_fft5_5;
    723                     i4 *= -icv_fft5_4; r4 *= icv_fft5_4;
    724 
    725                     r5 = r2 + i3; i5 = i2 + r3;
    726                     r2 -= i4; i2 -= r4;
    727 
    728                     r3 = r0 + r1; i3 = i0 + i1;
    729                     r0 -= r1; i0 -= i1;
    730 
    731                     v0[nx].re = r3 + r2; v0[nx].im = i3 + i2;
    732                     v2[0].re = r3 - r2; v2[0].im = i3 - i2;
    733 
    734                     v1[0].re = r0 + r5; v1[0].im = i0 + i5;
    735                     v1[nx].re = r0 - r5; v1[nx].im = i0 - i5;
    736                 }
    737             }
    738         }
    739         else
    740         {
    741             // radix-"factor" - an odd number
    742             int p, q, factor2 = (factor - 1)/2;
    743             int d, dd, dw_f = tab_size/factor;
    744             CvComplex64f* a = buf;
    745             CvComplex64f* b = buf + factor2;
    746 
    747             for( i = 0; i < n0; i += n )
    748             {
    749                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
    750                 {
    751                     CvComplex64f* v = dst + i + j;
    752                     CvComplex64f v_0 = v[0];
    753                     CvComplex64f vn_0 = v_0;
    754 
    755                     if( j == 0 )
    756                     {
    757                         for( p = 1, k = nx; p <= factor2; p++, k += nx )
    758                         {
    759                             double r0 = v[k].re + v[n-k].re;
    760                             double i0 = v[k].im - v[n-k].im;
    761                             double r1 = v[k].re - v[n-k].re;
    762                             double i1 = v[k].im + v[n-k].im;
    763 
    764                             vn_0.re += r0; vn_0.im += i1;
    765                             a[p-1].re = r0; a[p-1].im = i0;
    766                             b[p-1].re = r1; b[p-1].im = i1;
    767                         }
    768                     }
    769                     else
    770                     {
    771                         const CvComplex64f* wave_ = wave + dw*factor;
    772                         d = dw;
    773 
    774                         for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
    775                         {
    776                             double r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
    777                             double i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
    778 
    779                             double r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
    780                             double i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
    781 
    782                             double r0 = r2 + r1;
    783                             double i0 = i2 - i1;
    784                             r1 = r2 - r1;
    785                             i1 = i2 + i1;
    786 
    787                             vn_0.re += r0; vn_0.im += i1;
    788                             a[p-1].re = r0; a[p-1].im = i0;
    789                             b[p-1].re = r1; b[p-1].im = i1;
    790                         }
    791                     }
    792 
    793                     v[0] = vn_0;
    794 
    795                     for( p = 1, k = nx; p <= factor2; p++, k += nx )
    796                     {
    797                         CvComplex64f s0 = v_0, s1 = v_0;
    798                         d = dd = dw_f*p;
    799 
    800                         for( q = 0; q < factor2; q++ )
    801                         {
    802                             double r0 = wave[d].re * a[q].re;
    803                             double i0 = wave[d].im * a[q].im;
    804                             double r1 = wave[d].re * b[q].im;
    805                             double i1 = wave[d].im * b[q].re;
    806 
    807                             s1.re += r0 + i0; s0.re += r0 - i0;
    808                             s1.im += r1 - i1; s0.im += r1 + i1;
    809 
    810                             d += dd;
    811                             d -= -(d >= tab_size) & tab_size;
    812                         }
    813 
    814                         v[k] = s0;
    815                         v[n-k] = s1;
    816                     }
    817                 }
    818             }
    819         }
    820     }
    821 
    822     if( fabs(scale - 1.) > DBL_EPSILON )
    823     {
    824         double re_scale = scale, im_scale = scale;
    825         if( inv )
    826             im_scale = -im_scale;
    827 
    828         for( i = 0; i < n0; i++ )
    829         {
    830             double t0 = dst[i].re*re_scale;
    831             double t1 = dst[i].im*im_scale;
    832             dst[i].re = t0;
    833             dst[i].im = t1;
    834         }
    835     }
    836     else if( inv )
    837     {
    838         for( i = 0; i <= n0 - 2; i += 2 )
    839         {
    840             double t0 = -dst[i].im;
    841             double t1 = -dst[i+1].im;
    842             dst[i].im = t0;
    843             dst[i+1].im = t1;
    844         }
    845 
    846         if( i < n0 )
    847             dst[n0-1].im = -dst[n0-1].im;
    848     }
    849 
    850     return CV_OK;
    851 }
    852 
    853 
    854 // mixed-radix complex discrete Fourier transform: single-precision version
    855 static CvStatus CV_STDCALL
    856 icvDFT_32fc( const CvComplex32f* src, CvComplex32f* dst, int n,
    857              int nf, int* factors, const int* itab,
    858              const CvComplex32f* wave, int tab_size,
    859              const void* spec, CvComplex32f* buf,
    860              int flags, double scale )
    861 {
    862     int n0 = n, f_idx, nx;
    863     int inv = flags & CV_DXT_INVERSE;
    864     int dw0 = tab_size, dw;
    865     int i, j, k;
    866     CvComplex32f t;
    867     int tab_step = tab_size == n ? 1 : tab_size == n*2 ? 2 : tab_size/n;
    868 
    869     if( spec )
    870     {
    871         assert( icvDFTFwd_CToC_32fc_p != 0 && icvDFTInv_CToC_32fc_p != 0 );
    872         return !inv ?
    873             icvDFTFwd_CToC_32fc_p( src, dst, spec, buf ):
    874             icvDFTInv_CToC_32fc_p( src, dst, spec, buf );
    875     }
    876 
    877     // 0. shuffle data
    878     if( dst != src )
    879     {
    880         assert( (flags & ICV_DFT_NO_PERMUTE) == 0 );
    881         if( !inv )
    882         {
    883             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
    884             {
    885                 int k0 = itab[0], k1 = itab[tab_step];
    886                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
    887                 dst[i] = src[k0]; dst[i+1] = src[k1];
    888             }
    889 
    890             if( i < n )
    891                 dst[n-1] = src[n-1];
    892         }
    893         else
    894         {
    895             for( i = 0; i <= n - 2; i += 2, itab += 2*tab_step )
    896             {
    897                 int k0 = itab[0], k1 = itab[tab_step];
    898                 assert( (unsigned)k0 < (unsigned)n && (unsigned)k1 < (unsigned)n );
    899                 t.re = src[k0].re; t.im = -src[k0].im;
    900                 dst[i] = t;
    901                 t.re = src[k1].re; t.im = -src[k1].im;
    902                 dst[i+1] = t;
    903             }
    904 
    905             if( i < n )
    906             {
    907                 t.re = src[n-1].re; t.im = -src[n-1].im;
    908                 dst[i] = t;
    909             }
    910         }
    911     }
    912     else
    913     {
    914         if( (flags & ICV_DFT_NO_PERMUTE) == 0 )
    915         {
    916             if( factors[0] != factors[nf-1] )
    917                 return CV_INPLACE_NOT_SUPPORTED_ERR;
    918             if( nf == 1 )
    919             {
    920                 if( (n & 3) == 0 )
    921                 {
    922                     int n2 = n/2;
    923                     CvComplex32f* dsth = dst + n2;
    924 
    925                     for( i = 0; i < n2; i += 2, itab += tab_step*2 )
    926                     {
    927                         j = itab[0];
    928                         assert( (unsigned)j < (unsigned)n2 );
    929 
    930                         CV_SWAP(dst[i+1], dsth[j], t);
    931                         if( j > i )
    932                         {
    933                             CV_SWAP(dst[i], dst[j], t);
    934                             CV_SWAP(dsth[i+1], dsth[j+1], t);
    935                         }
    936                     }
    937                 }
    938                 // else do nothing
    939             }
    940             else
    941             {
    942                 for( i = 0;
    943                 i < n;
    944                 i++)
    945                 {
    946                     j = itab[0];
    947                     assert( (unsigned)j < (unsigned)n );
    948                     if( j > i )
    949                         CV_SWAP(dst[i], dst[j], t);
    950                     itab += tab_step;
    951                 }
    952             }
    953         }
    954 
    955         if( inv )
    956         {
    957             // conjugate the vector - i.e. invert sign of the imaginary part
    958             int* idst = (int*)dst;
    959             for( i = 0; i <= n - 2; i += 2 )
    960             {
    961                 int t0 = idst[i*2+1] ^ 0x80000000;
    962                 int t1 = idst[i*2+3] ^ 0x80000000;
    963                 idst[i*2+1] = t0; idst[i*2+3] = t1;
    964             }
    965 
    966             if( i < n )
    967                 idst[2*i+1] ^= 0x80000000;
    968         }
    969     }
    970 
    971     n = 1;
    972     // 1. power-2 transforms
    973     if( (factors[0] & 1) == 0 )
    974     {
    975         // radix-4 transform
    976         for( ; n*4 <= factors[0]; )
    977         {
    978             nx = n;
    979             n *= 4;
    980             dw0 /= 4;
    981 
    982             for( i = 0; i < n0; i += n )
    983             {
    984                 CvComplex32f* v0;
    985                 CvComplex32f* v1;
    986                 double r0, i0, r1, i1, r2, i2, r3, i3, r4, i4;
    987 
    988                 v0 = dst + i;
    989                 v1 = v0 + nx*2;
    990 
    991                 r2 = v0[0].re; i2 = v0[0].im;
    992                 r1 = v0[nx].re; i1 = v0[nx].im;
    993 
    994                 r0 = r1 + r2; i0 = i1 + i2;
    995                 r2 -= r1; i2 -= i1;
    996 
    997                 i3 = v1[nx].re; r3 = v1[nx].im;
    998                 i4 = v1[0].re; r4 = v1[0].im;
    999 
   1000                 r1 = i4 + i3; i1 = r4 + r3;
   1001                 r3 = r4 - r3; i3 = i3 - i4;
   1002 
   1003                 v0[0].re = (float)(r0 + r1); v0[0].im = (float)(i0 + i1);
   1004                 v1[0].re = (float)(r0 - r1); v1[0].im = (float)(i0 - i1);
   1005                 v0[nx].re = (float)(r2 + r3); v0[nx].im = (float)(i2 + i3);
   1006                 v1[nx].re = (float)(r2 - r3); v1[nx].im = (float)(i2 - i3);
   1007 
   1008                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
   1009                 {
   1010                     v0 = dst + i + j;
   1011                     v1 = v0 + nx*2;
   1012 
   1013                     r2 = v0[nx].re*wave[dw*2].re - v0[nx].im*wave[dw*2].im;
   1014                     i2 = v0[nx].re*wave[dw*2].im + v0[nx].im*wave[dw*2].re;
   1015                     r0 = v1[0].re*wave[dw].im + v1[0].im*wave[dw].re;
   1016                     i0 = v1[0].re*wave[dw].re - v1[0].im*wave[dw].im;
   1017                     r3 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
   1018                     i3 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
   1019 
   1020                     r1 = i0 + i3; i1 = r0 + r3;
   1021                     r3 = r0 - r3; i3 = i3 - i0;
   1022                     r4 = v0[0].re; i4 = v0[0].im;
   1023 
   1024                     r0 = r4 + r2; i0 = i4 + i2;
   1025                     r2 = r4 - r2; i2 = i4 - i2;
   1026 
   1027                     v0[0].re = (float)(r0 + r1); v0[0].im = (float)(i0 + i1);
   1028                     v1[0].re = (float)(r0 - r1); v1[0].im = (float)(i0 - i1);
   1029                     v0[nx].re = (float)(r2 + r3); v0[nx].im = (float)(i2 + i3);
   1030                     v1[nx].re = (float)(r2 - r3); v1[nx].im = (float)(i2 - i3);
   1031                 }
   1032             }
   1033         }
   1034 
   1035         for( ; n < factors[0]; )
   1036         {
   1037             // do the remaining radix-2 transform
   1038             nx = n;
   1039             n *= 2;
   1040             dw0 /= 2;
   1041 
   1042             for( i = 0; i < n0; i += n )
   1043             {
   1044                 CvComplex32f* v = dst + i;
   1045                 double r0 = v[0].re + v[nx].re;
   1046                 double i0 = v[0].im + v[nx].im;
   1047                 double r1 = v[0].re - v[nx].re;
   1048                 double i1 = v[0].im - v[nx].im;
   1049                 v[0].re = (float)r0; v[0].im = (float)i0;
   1050                 v[nx].re = (float)r1; v[nx].im = (float)i1;
   1051 
   1052                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
   1053                 {
   1054                     v = dst + i + j;
   1055                     r1 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
   1056                     i1 = v[nx].im*wave[dw].re + v[nx].re*wave[dw].im;
   1057                     r0 = v[0].re; i0 = v[0].im;
   1058 
   1059                     v[0].re = (float)(r0 + r1); v[0].im = (float)(i0 + i1);
   1060                     v[nx].re = (float)(r0 - r1); v[nx].im = (float)(i0 - i1);
   1061                 }
   1062             }
   1063         }
   1064     }
   1065 
   1066     // 2. all the other transforms
   1067     for( f_idx = (factors[0]&1) ? 0 : 1; f_idx < nf; f_idx++ )
   1068     {
   1069         int factor = factors[f_idx];
   1070         nx = n;
   1071         n *= factor;
   1072         dw0 /= factor;
   1073 
   1074         if( factor == 3 )
   1075         {
   1076             // radix-3
   1077             for( i = 0; i < n0; i += n )
   1078             {
   1079                 CvComplex32f* v = dst + i;
   1080 
   1081                 double r1 = v[nx].re + v[nx*2].re;
   1082                 double i1 = v[nx].im + v[nx*2].im;
   1083                 double r0 = v[0].re;
   1084                 double i0 = v[0].im;
   1085                 double r2 = icv_sin_120*(v[nx].im - v[nx*2].im);
   1086                 double i2 = icv_sin_120*(v[nx*2].re - v[nx].re);
   1087                 v[0].re = (float)(r0 + r1); v[0].im = (float)(i0 + i1);
   1088                 r0 -= 0.5*r1; i0 -= 0.5*i1;
   1089                 v[nx].re = (float)(r0 + r2); v[nx].im = (float)(i0 + i2);
   1090                 v[nx*2].re = (float)(r0 - r2); v[nx*2].im = (float)(i0 - i2);
   1091 
   1092                 for( j = 1, dw = dw0; j < nx; j++, dw += dw0 )
   1093                 {
   1094                     v = dst + i + j;
   1095                     r0 = v[nx].re*wave[dw].re - v[nx].im*wave[dw].im;
   1096                     i0 = v[nx].re*wave[dw].im + v[nx].im*wave[dw].re;
   1097                     i2 = v[nx*2].re*wave[dw*2].re - v[nx*2].im*wave[dw*2].im;
   1098                     r2 = v[nx*2].re*wave[dw*2].im + v[nx*2].im*wave[dw*2].re;
   1099                     r1 = r0 + i2; i1 = i0 + r2;
   1100 
   1101                     r2 = icv_sin_120*(i0 - r2); i2 = icv_sin_120*(i2 - r0);
   1102                     r0 = v[0].re; i0 = v[0].im;
   1103                     v[0].re = (float)(r0 + r1); v[0].im = (float)(i0 + i1);
   1104                     r0 -= 0.5*r1; i0 -= 0.5*i1;
   1105                     v[nx].re = (float)(r0 + r2); v[nx].im = (float)(i0 + i2);
   1106                     v[nx*2].re = (float)(r0 - r2); v[nx*2].im = (float)(i0 - i2);
   1107                 }
   1108             }
   1109         }
   1110         else if( factor == 5 )
   1111         {
   1112             // radix-5
   1113             for( i = 0; i < n0; i += n )
   1114             {
   1115                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
   1116                 {
   1117                     CvComplex32f* v0 = dst + i + j;
   1118                     CvComplex32f* v1 = v0 + nx*2;
   1119                     CvComplex32f* v2 = v1 + nx*2;
   1120 
   1121                     double r0, i0, r1, i1, r2, i2, r3, i3, r4, i4, r5, i5;
   1122 
   1123                     r3 = v0[nx].re*wave[dw].re - v0[nx].im*wave[dw].im;
   1124                     i3 = v0[nx].re*wave[dw].im + v0[nx].im*wave[dw].re;
   1125                     r2 = v2[0].re*wave[dw*4].re - v2[0].im*wave[dw*4].im;
   1126                     i2 = v2[0].re*wave[dw*4].im + v2[0].im*wave[dw*4].re;
   1127 
   1128                     r1 = r3 + r2; i1 = i3 + i2;
   1129                     r3 -= r2; i3 -= i2;
   1130 
   1131                     r4 = v1[nx].re*wave[dw*3].re - v1[nx].im*wave[dw*3].im;
   1132                     i4 = v1[nx].re*wave[dw*3].im + v1[nx].im*wave[dw*3].re;
   1133                     r0 = v1[0].re*wave[dw*2].re - v1[0].im*wave[dw*2].im;
   1134                     i0 = v1[0].re*wave[dw*2].im + v1[0].im*wave[dw*2].re;
   1135 
   1136                     r2 = r4 + r0; i2 = i4 + i0;
   1137                     r4 -= r0; i4 -= i0;
   1138 
   1139                     r0 = v0[0].re; i0 = v0[0].im;
   1140                     r5 = r1 + r2; i5 = i1 + i2;
   1141 
   1142                     v0[0].re = (float)(r0 + r5); v0[0].im = (float)(i0 + i5);
   1143 
   1144                     r0 -= 0.25*r5; i0 -= 0.25*i5;
   1145                     r1 = icv_fft5_2*(r1 - r2); i1 = icv_fft5_2*(i1 - i2);
   1146                     r2 = -icv_fft5_3*(i3 + i4); i2 = icv_fft5_3*(r3 + r4);
   1147 
   1148                     i3 *= -icv_fft5_5; r3 *= icv_fft5_5;
   1149                     i4 *= -icv_fft5_4; r4 *= icv_fft5_4;
   1150 
   1151                     r5 = r2 + i3; i5 = i2 + r3;
   1152                     r2 -= i4; i2 -= r4;
   1153 
   1154                     r3 = r0 + r1; i3 = i0 + i1;
   1155                     r0 -= r1; i0 -= i1;
   1156 
   1157                     v0[nx].re = (float)(r3 + r2); v0[nx].im = (float)(i3 + i2);
   1158                     v2[0].re = (float)(r3 - r2); v2[0].im = (float)(i3 - i2);
   1159 
   1160                     v1[0].re = (float)(r0 + r5); v1[0].im = (float)(i0 + i5);
   1161                     v1[nx].re = (float)(r0 - r5); v1[nx].im = (float)(i0 - i5);
   1162                 }
   1163             }
   1164         }
   1165         else
   1166         {
   1167             // radix-"factor" - an odd number
   1168             int p, q, factor2 = (factor - 1)/2;
   1169             int d, dd, dw_f = tab_size/factor;
   1170             CvComplex32f* a = buf;
   1171             CvComplex32f* b = buf + factor2;
   1172 
   1173             for( i = 0; i < n0; i += n )
   1174             {
   1175                 for( j = 0, dw = 0; j < nx; j++, dw += dw0 )
   1176                 {
   1177                     CvComplex32f* v = dst + i + j;
   1178                     CvComplex32f v_0 = v[0];
   1179                     CvComplex64f vn_0(v_0);
   1180 
   1181                     if( j == 0 )
   1182                     {
   1183                         for( p = 1, k = nx; p <= factor2; p++, k += nx )
   1184                         {
   1185                             double r0 = v[k].re + v[n-k].re;
   1186                             double i0 = v[k].im - v[n-k].im;
   1187                             double r1 = v[k].re - v[n-k].re;
   1188                             double i1 = v[k].im + v[n-k].im;
   1189 
   1190                             vn_0.re += r0; vn_0.im += i1;
   1191                             a[p-1].re = (float)r0; a[p-1].im = (float)i0;
   1192                             b[p-1].re = (float)r1; b[p-1].im = (float)i1;
   1193                         }
   1194                     }
   1195                     else
   1196                     {
   1197                         const CvComplex32f* wave_ = wave + dw*factor;
   1198                         d = dw;
   1199 
   1200                         for( p = 1, k = nx; p <= factor2; p++, k += nx, d += dw )
   1201                         {
   1202                             double r2 = v[k].re*wave[d].re - v[k].im*wave[d].im;
   1203                             double i2 = v[k].re*wave[d].im + v[k].im*wave[d].re;
   1204 
   1205                             double r1 = v[n-k].re*wave_[-d].re - v[n-k].im*wave_[-d].im;
   1206                             double i1 = v[n-k].re*wave_[-d].im + v[n-k].im*wave_[-d].re;
   1207 
   1208                             double r0 = r2 + r1;
   1209                             double i0 = i2 - i1;
   1210                             r1 = r2 - r1;
   1211                             i1 = i2 + i1;
   1212 
   1213                             vn_0.re += r0; vn_0.im += i1;
   1214                             a[p-1].re = (float)r0; a[p-1].im = (float)i0;
   1215                             b[p-1].re = (float)r1; b[p-1].im = (float)i1;
   1216                         }
   1217                     }
   1218 
   1219                     v[0].re = (float)vn_0.re;
   1220                     v[0].im = (float)vn_0.im;
   1221 
   1222                     for( p = 1, k = nx; p <= factor2; p++, k += nx )
   1223                     {
   1224                         CvComplex64f s0(v_0), s1 = s0;
   1225                         d = dd = dw_f*p;
   1226 
   1227                         for( q = 0; q < factor2; q++ )
   1228                         {
   1229                             double r0 = wave[d].re * a[q].re;
   1230                             double i0 = wave[d].im * a[q].im;
   1231                             double r1 = wave[d].re * b[q].im;
   1232                             double i1 = wave[d].im * b[q].re;
   1233 
   1234                             s1.re += r0 + i0; s0.re += r0 - i0;
   1235                             s1.im += r1 - i1; s0.im += r1 + i1;
   1236 
   1237                             d += dd;
   1238                             d -= -(d >= tab_size) & tab_size;
   1239                         }
   1240 
   1241                         v[k].re = (float)s0.re;
   1242                         v[k].im = (float)s0.im;
   1243                         v[n-k].re = (float)s1.re;
   1244                         v[n-k].im = (float)s1.im;
   1245                     }
   1246                 }
   1247             }
   1248         }
   1249     }
   1250 
   1251     if( fabs(scale - 1.) > DBL_EPSILON )
   1252     {
   1253         double re_scale = scale, im_scale = scale;
   1254         if( inv )
   1255             im_scale = -im_scale;
   1256 
   1257         for( i = 0; i < n0; i++ )
   1258         {
   1259             double t0 = dst[i].re*re_scale;
   1260             double t1 = dst[i].im*im_scale;
   1261             dst[i].re = (float)t0;
   1262             dst[i].im = (float)t1;
   1263         }
   1264     }
   1265     else if( inv )
   1266     {
   1267         for( i = 0; i <= n0 - 2; i += 2 )
   1268         {
   1269             double t0 = -dst[i].im;
   1270             double t1 = -dst[i+1].im;
   1271             dst[i].im = (float)t0;
   1272             dst[i+1].im = (float)t1;
   1273         }
   1274 
   1275         if( i < n0 )
   1276             dst[n0-1].im = -dst[n0-1].im;
   1277     }
   1278 
   1279     return CV_OK;
   1280 }
   1281 
   1282 
   1283 /* FFT of real vector
   1284    output vector format:
   1285      re(0), re(1), im(1), ... , re(n/2-1), im((n+1)/2-1) [, re((n+1)/2)] OR ...
   1286      re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
   1287 #define ICV_REAL_DFT( flavor, datatype )                                \
   1288 static CvStatus CV_STDCALL                                              \
   1289 icvRealDFT_##flavor( const datatype* src, datatype* dst,                \
   1290                      int n, int nf, int* factors, const int* itab,      \
   1291                      const CvComplex##flavor* wave, int tab_size,       \
   1292                      const void* spec, CvComplex##flavor* buf,          \
   1293                      int flags, double scale )                          \
   1294 {                                                                       \
   1295     int complex_output = (flags & ICV_DFT_COMPLEX_INPUT_OR_OUTPUT) != 0;\
   1296     int j, n2 = n >> 1;                                                 \
   1297     dst += complex_output;                                              \
   1298                                                                         \
   1299     if( spec )                                                          \
   1300     {                                                                   \
   1301         icvDFTFwd_RToPack_##flavor##_p( src, dst, spec, buf );          \
   1302         goto finalize;                                                  \
   1303     }                                                                   \
   1304                                                                         \
   1305     assert( tab_size == n );                                            \
   1306                                                                         \
   1307     if( n == 1 )                                                        \
   1308     {                                                                   \
   1309         dst[0] = (datatype)(src[0]*scale);                              \
   1310     }                                                                   \
   1311     else if( n == 2 )                                                   \
   1312     {                                                                   \
   1313         double t = (src[0] + src[1])*scale;                             \
   1314         dst[1] = (datatype)((src[0] - src[1])*scale);                   \
   1315         dst[0] = (datatype)t;                                           \
   1316     }                                                                   \
   1317     else if( n & 1 )                                                    \
   1318     {                                                                   \
   1319         dst -= complex_output;                                          \
   1320         CvComplex##flavor* _dst = (CvComplex##flavor*)dst;              \
   1321         _dst[0].re = (datatype)(src[0]*scale);                          \
   1322         _dst[0].im = 0;                                                 \
   1323         for( j = 1; j < n; j += 2 )                                     \
   1324         {                                                               \
   1325             double t0 = src[itab[j]]*scale;                             \
   1326             double t1 = src[itab[j+1]]*scale;                           \
   1327             _dst[j].re = (datatype)t0;                                  \
   1328             _dst[j].im = 0;                                             \
   1329             _dst[j+1].re = (datatype)t1;                                \
   1330             _dst[j+1].im = 0;                                           \
   1331         }                                                               \
   1332         icvDFT_##flavor##c( _dst, _dst, n, nf, factors, itab, wave,     \
   1333                             tab_size, 0, buf, ICV_DFT_NO_PERMUTE, 1. ); \
   1334         if( !complex_output )                                           \
   1335             dst[1] = dst[0];                                            \
   1336         return CV_OK;                                                   \
   1337     }                                                                   \
   1338     else                                                                \
   1339     {                                                                   \
   1340         double t0, t;                                                   \
   1341         double h1_re, h1_im, h2_re, h2_im;                              \
   1342         double scale2 = scale*0.5;                                      \
   1343         factors[0] >>= 1;                                               \
   1344                                                                         \
   1345         icvDFT_##flavor##c( (CvComplex##flavor*)src,                    \
   1346                             (CvComplex##flavor*)dst, n2,                \
   1347                             nf - (factors[0] == 1),                     \
   1348                             factors + (factors[0] == 1),                \
   1349                             itab, wave, tab_size, 0, buf, 0, 1. );      \
   1350         factors[0] <<= 1;                                               \
   1351                                                                         \
   1352         t = dst[0] - dst[1];                                            \
   1353         dst[0] = (datatype)((dst[0] + dst[1])*scale);                   \
   1354         dst[1] = (datatype)(t*scale);                                   \
   1355                                                                         \
   1356         t0 = dst[n2];                                                   \
   1357         t = dst[n-1];                                                   \
   1358         dst[n-1] = dst[1];                                              \
   1359                                                                         \
   1360         for( j = 2, wave++; j < n2; j += 2, wave++ )                    \
   1361         {                                                               \
   1362             /* calc odd */                                              \
   1363             h2_re = scale2*(dst[j+1] + t);                              \
   1364             h2_im = scale2*(dst[n-j] - dst[j]);                         \
   1365                                                                         \
   1366             /* calc even */                                             \
   1367             h1_re = scale2*(dst[j] + dst[n-j]);                         \
   1368             h1_im = scale2*(dst[j+1] - t);                              \
   1369                                                                         \
   1370             /* rotate */                                                \
   1371             t = h2_re*wave->re - h2_im*wave->im;                        \
   1372             h2_im = h2_re*wave->im + h2_im*wave->re;                    \
   1373             h2_re = t;                                                  \
   1374             t = dst[n-j-1];                                             \
   1375                                                                         \
   1376             dst[j-1] = (datatype)(h1_re + h2_re);                       \
   1377             dst[n-j-1] = (datatype)(h1_re - h2_re);                     \
   1378             dst[j] = (datatype)(h1_im + h2_im);                         \
   1379             dst[n-j] = (datatype)(h2_im - h1_im);                       \
   1380         }                                                               \
   1381                                                                         \
   1382         if( j <= n2 )                                                   \
   1383         {                                                               \
   1384             dst[n2-1] = (datatype)(t0*scale);                           \
   1385             dst[n2] = (datatype)(-t*scale);                             \
   1386         }                                                               \
   1387     }                                                                   \
   1388                                                                         \
   1389 finalize:                                                               \
   1390     if( complex_output )                                                \
   1391     {                                                                   \
   1392         dst[-1] = dst[0];                                               \
   1393         dst[0] = 0;                                                     \
   1394         if( (n & 1) == 0 )                                              \
   1395             dst[n] = 0;                                                 \
   1396     }                                                                   \
   1397                                                                         \
   1398     return CV_OK;                                                       \
   1399 }
   1400 
   1401 
   1402 /* Inverse FFT of complex conjugate-symmetric vector
   1403    input vector format:
   1404       re[0], re[1], im[1], ... , re[n/2-1], im[n/2-1], re[n/2] OR
   1405       re(0), 0, re(1), im(1), ..., re(n/2-1), im((n+1)/2-1) [, re((n+1)/2), 0] */
   1406 #define ICV_CCS_IDFT( flavor, datatype )                                \
   1407 static CvStatus CV_STDCALL                                              \
   1408 icvCCSIDFT_##flavor( const datatype* src, datatype* dst,                \
   1409                      int n, int nf, int* factors, const int* itab,      \
   1410                      const CvComplex##flavor* wave, int tab_size,       \
   1411                      const void* spec, CvComplex##flavor* buf,          \
   1412                      int flags, double scale )                          \
   1413 {                                                                       \
   1414     int complex_input = (flags & ICV_DFT_COMPLEX_INPUT_OR_OUTPUT) != 0; \
   1415     int j, k, n2 = (n+1) >> 1;                                          \
   1416     double save_s1 = 0.;                                                \
   1417     double t0, t1, t2, t3, t;                                           \
   1418                                                                         \
   1419     assert( tab_size == n );                                            \
   1420                                                                         \
   1421     if( complex_input )                                                 \
   1422     {                                                                   \
   1423         assert( src != dst );                                           \
   1424         save_s1 = src[1];                                               \
   1425         ((datatype*)src)[1] = src[0];                                   \
   1426         src++;                                                          \
   1427     }                                                                   \
   1428                                                                         \
   1429     if( spec )                                                          \
   1430     {                                                                   \
   1431         icvDFTInv_PackToR_##flavor##_p( src, dst, spec, buf );          \
   1432         goto finalize;                                                  \
   1433     }                                                                   \
   1434                                                                         \
   1435     if( n == 1 )                                                        \
   1436     {                                                                   \
   1437         dst[0] = (datatype)(src[0]*scale);                              \
   1438     }                                                                   \
   1439     else if( n == 2 )                                                   \
   1440     {                                                                   \
   1441         t = (src[0] + src[1])*scale;                                    \
   1442         dst[1] = (datatype)((src[0] - src[1])*scale);                   \
   1443         dst[0] = (datatype)t;                                           \
   1444     }                                                                   \
   1445     else if( n & 1 )                                                    \
   1446     {                                                                   \
   1447         CvComplex##flavor* _src = (CvComplex##flavor*)(src-1);          \
   1448         CvComplex##flavor* _dst = (CvComplex##flavor*)dst;              \
   1449                                                                         \
   1450         _dst[0].re = src[0];                                            \
   1451         _dst[0].im = 0;                                                 \
   1452         for( j = 1; j < n2; j++ )                                       \
   1453         {                                                               \
   1454             int k0 = itab[j], k1 = itab[n-j];                           \
   1455             t0 = _src[j].re; t1 = _src[j].im;                           \
   1456             _dst[k0].re = (datatype)t0; _dst[k0].im = (datatype)-t1;    \
   1457             _dst[k1].re = (datatype)t0; _dst[k1].im = (datatype)t1;     \
   1458         }                                                               \
   1459                                                                         \
   1460         icvDFT_##flavor##c( _dst, _dst, n, nf, factors, itab, wave,     \
   1461                             tab_size, 0, buf, ICV_DFT_NO_PERMUTE, 1. ); \
   1462         dst[0] = (datatype)(dst[0]*scale);                              \
   1463         for( j = 1; j < n; j += 2 )                                     \
   1464         {                                                               \
   1465             t0 = dst[j*2]*scale;                                        \
   1466             t1 = dst[j*2+2]*scale;                                      \
   1467             dst[j] = (datatype)t0;                                      \
   1468             dst[j+1] = (datatype)t1;                                    \
   1469         }                                                               \
   1470     }                                                                   \
   1471     else                                                                \
   1472     {                                                                   \
   1473         int inplace = src == dst;                                       \
   1474         const CvComplex##flavor* w = wave;                              \
   1475                                                                         \
   1476         t = src[1];                                                     \
   1477         t0 = (src[0] + src[n-1]);                                       \
   1478         t1 = (src[n-1] - src[0]);                                       \
   1479         dst[0] = (datatype)t0;                                          \
   1480         dst[1] = (datatype)t1;                                          \
   1481                                                                         \
   1482         for( j = 2, w++; j < n2; j += 2, w++ )                          \
   1483         {                                                               \
   1484             double h1_re, h1_im, h2_re, h2_im;                          \
   1485                                                                         \
   1486             h1_re = (t + src[n-j-1]);                                   \
   1487             h1_im = (src[j] - src[n-j]);                                \
   1488                                                                         \
   1489             h2_re = (t - src[n-j-1]);                                   \
   1490             h2_im = (src[j] + src[n-j]);                                \
   1491                                                                         \
   1492             t = h2_re*w->re + h2_im*w->im;                              \
   1493             h2_im = h2_im*w->re - h2_re*w->im;                          \
   1494             h2_re = t;                                                  \
   1495                                                                         \
   1496             t = src[j+1];                                               \
   1497             t0 = h1_re - h2_im;                                         \
   1498             t1 = -h1_im - h2_re;                                        \
   1499             t2 = h1_re + h2_im;                                         \
   1500             t3 = h1_im - h2_re;                                         \
   1501                                                                         \
   1502             if( inplace )                                               \
   1503             {                                                           \
   1504                 dst[j] = (datatype)t0;                                  \
   1505                 dst[j+1] = (datatype)t1;                                \
   1506                 dst[n-j] = (datatype)t2;                                \
   1507                 dst[n-j+1]= (datatype)t3;                               \
   1508             }                                                           \
   1509             else                                                        \
   1510             {                                                           \
   1511                 int j2 = j >> 1;                                        \
   1512                 k = itab[j2];                                           \
   1513                 dst[k] = (datatype)t0;                                  \
   1514                 dst[k+1] = (datatype)t1;                                \
   1515                 k = itab[n2-j2];                                        \
   1516                 dst[k] = (datatype)t2;                                  \
   1517                 dst[k+1]= (datatype)t3;                                 \
   1518             }                                                           \
   1519         }                                                               \
   1520                                                                         \
   1521         if( j <= n2 )                                                   \
   1522         {                                                               \
   1523             t0 = t*2;                                                   \
   1524             t1 = src[n2]*2;                                             \
   1525                                                                         \
   1526             if( inplace )                                               \
   1527             {                                                           \
   1528                 dst[n2] = (datatype)t0;                                 \
   1529                 dst[n2+1] = (datatype)t1;                               \
   1530             }                                                           \
   1531             else                                                        \
   1532             {                                                           \
   1533                 k = itab[n2];                                           \
   1534                 dst[k*2] = (datatype)t0;                                \
   1535                 dst[k*2+1] = (datatype)t1;                              \
   1536             }                                                           \
   1537         }                                                               \
   1538                                                                         \
   1539         factors[0] >>= 1;                                               \
   1540         icvDFT_##flavor##c( (CvComplex##flavor*)dst,                    \
   1541                             (CvComplex##flavor*)dst, n2,                \
   1542                             nf - (factors[0] == 1),                     \
   1543                             factors + (factors[0] == 1), itab,          \
   1544                             wave, tab_size, 0, buf,                     \
   1545                             inplace ? 0 : ICV_DFT_NO_PERMUTE, 1. );     \
   1546         factors[0] <<= 1;                                               \
   1547                                                                         \
   1548         for( j = 0; j < n; j += 2 )                                     \
   1549         {                                                               \
   1550             t0 = dst[j]*scale;                                          \
   1551             t1 = dst[j+1]*(-scale);                                     \
   1552             dst[j] = (datatype)t0;                                      \
   1553             dst[j+1] = (datatype)t1;                                    \
   1554         }                                                               \
   1555     }                                                                   \
   1556                                                                         \
   1557 finalize:                                                               \
   1558     if( complex_input )                                                 \
   1559         ((datatype*)src)[0] = (datatype)save_s1;                        \
   1560                                                                         \
   1561     return CV_OK;                                                       \
   1562 }
   1563 
   1564 
   1565 ICV_REAL_DFT( 64f, double )
   1566 ICV_CCS_IDFT( 64f, double )
   1567 ICV_REAL_DFT( 32f, float )
   1568 ICV_CCS_IDFT( 32f, float )
   1569 
   1570 
   1571 static void
   1572 icvCopyColumn( const uchar* _src, int src_step,
   1573                uchar* _dst, int dst_step,
   1574                int len, int elem_size )
   1575 {
   1576     int i, t0, t1;
   1577     const int* src = (const int*)_src;
   1578     int* dst = (int*)_dst;
   1579     src_step /= sizeof(src[0]);
   1580     dst_step /= sizeof(dst[0]);
   1581 
   1582     if( elem_size == sizeof(int) )
   1583     {
   1584         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
   1585             dst[0] = src[0];
   1586     }
   1587     else if( elem_size == sizeof(int)*2 )
   1588     {
   1589         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
   1590         {
   1591             t0 = src[0]; t1 = src[1];
   1592             dst[0] = t0; dst[1] = t1;
   1593         }
   1594     }
   1595     else if( elem_size == sizeof(int)*4 )
   1596     {
   1597         for( i = 0; i < len; i++, src += src_step, dst += dst_step )
   1598         {
   1599             t0 = src[0]; t1 = src[1];
   1600             dst[0] = t0; dst[1] = t1;
   1601             t0 = src[2]; t1 = src[3];
   1602             dst[2] = t0; dst[3] = t1;
   1603         }
   1604     }
   1605 }
   1606 
   1607 
   1608 static void
   1609 icvCopyFrom2Columns( const uchar* _src, int src_step,
   1610                      uchar* _dst0, uchar* _dst1,
   1611                      int len, int elem_size )
   1612 {
   1613     int i, t0, t1;
   1614     const int* src = (const int*)_src;
   1615     int* dst0 = (int*)_dst0;
   1616     int* dst1 = (int*)_dst1;
   1617     src_step /= sizeof(src[0]);
   1618 
   1619     if( elem_size == sizeof(int) )
   1620     {
   1621         for( i = 0; i < len; i++, src += src_step )
   1622         {
   1623             t0 = src[0]; t1 = src[1];
   1624             dst0[i] = t0; dst1[i] = t1;
   1625         }
   1626     }
   1627     else if( elem_size == sizeof(int)*2 )
   1628     {
   1629         for( i = 0; i < len*2; i += 2, src += src_step )
   1630         {
   1631             t0 = src[0]; t1 = src[1];
   1632             dst0[i] = t0; dst0[i+1] = t1;
   1633             t0 = src[2]; t1 = src[3];
   1634             dst1[i] = t0; dst1[i+1] = t1;
   1635         }
   1636     }
   1637     else if( elem_size == sizeof(int)*4 )
   1638     {
   1639         for( i = 0; i < len*4; i += 4, src += src_step )
   1640         {
   1641             t0 = src[0]; t1 = src[1];
   1642             dst0[i] = t0; dst0[i+1] = t1;
   1643             t0 = src[2]; t1 = src[3];
   1644             dst0[i+2] = t0; dst0[i+3] = t1;
   1645             t0 = src[4]; t1 = src[5];
   1646             dst1[i] = t0; dst1[i+1] = t1;
   1647             t0 = src[6]; t1 = src[7];
   1648             dst1[i+2] = t0; dst1[i+3] = t1;
   1649         }
   1650     }
   1651 }
   1652 
   1653 
   1654 static void
   1655 icvCopyTo2Columns( const uchar* _src0, const uchar* _src1,
   1656                    uchar* _dst, int dst_step,
   1657                    int len, int elem_size )
   1658 {
   1659     int i, t0, t1;
   1660     const int* src0 = (const int*)_src0;
   1661     const int* src1 = (const int*)_src1;
   1662     int* dst = (int*)_dst;
   1663     dst_step /= sizeof(dst[0]);
   1664 
   1665     if( elem_size == sizeof(int) )
   1666     {
   1667         for( i = 0; i < len; i++, dst += dst_step )
   1668         {
   1669             t0 = src0[i]; t1 = src1[i];
   1670             dst[0] = t0; dst[1] = t1;
   1671         }
   1672     }
   1673     else if( elem_size == sizeof(int)*2 )
   1674     {
   1675         for( i = 0; i < len*2; i += 2, dst += dst_step )
   1676         {
   1677             t0 = src0[i]; t1 = src0[i+1];
   1678             dst[0] = t0; dst[1] = t1;
   1679             t0 = src1[i]; t1 = src1[i+1];
   1680             dst[2] = t0; dst[3] = t1;
   1681         }
   1682     }
   1683     else if( elem_size == sizeof(int)*4 )
   1684     {
   1685         for( i = 0; i < len*4; i += 4, dst += dst_step )
   1686         {
   1687             t0 = src0[i]; t1 = src0[i+1];
   1688             dst[0] = t0; dst[1] = t1;
   1689             t0 = src0[i+2]; t1 = src0[i+3];
   1690             dst[2] = t0; dst[3] = t1;
   1691             t0 = src1[i]; t1 = src1[i+1];
   1692             dst[4] = t0; dst[5] = t1;
   1693             t0 = src1[i+2]; t1 = src1[i+3];
   1694             dst[6] = t0; dst[7] = t1;
   1695         }
   1696     }
   1697 }
   1698 
   1699 
   1700 static void
   1701 icvExpandCCS( uchar* _ptr, int len, int elem_size )
   1702 {
   1703     int i;
   1704     _ptr -= elem_size;
   1705     memcpy( _ptr, _ptr + elem_size, elem_size );
   1706     memset( _ptr + elem_size, 0, elem_size );
   1707     if( (len & 1) == 0 )
   1708         memset( _ptr + (len+1)*elem_size, 0, elem_size );
   1709 
   1710     if( elem_size == sizeof(float) )
   1711     {
   1712         CvComplex32f* ptr = (CvComplex32f*)_ptr;
   1713 
   1714         for( i = 1; i < (len+1)/2; i++ )
   1715         {
   1716             CvComplex32f t;
   1717             t.re = ptr[i].re;
   1718             t.im = -ptr[i].im;
   1719             ptr[len-i] = t;
   1720         }
   1721     }
   1722     else
   1723     {
   1724         CvComplex64f* ptr = (CvComplex64f*)_ptr;
   1725 
   1726         for( i = 1; i < (len+1)/2; i++ )
   1727         {
   1728             CvComplex64f t;
   1729             t.re = ptr[i].re;
   1730             t.im = -ptr[i].im;
   1731             ptr[len-i] = t;
   1732         }
   1733     }
   1734 }
   1735 
   1736 
   1737 typedef CvStatus (CV_STDCALL *CvDFTFunc)(
   1738      const void* src, void* dst, int n, int nf, int* factors,
   1739      const int* itab, const void* wave, int tab_size,
   1740      const void* spec, void* buf, int inv, double scale );
   1741 
   1742 CV_IMPL void
   1743 cvDFT( const CvArr* srcarr, CvArr* dstarr, int flags, int nonzero_rows )
   1744 {
   1745     static CvDFTFunc dft_tbl[6];
   1746     static int inittab = 0;
   1747 
   1748     void* buffer = 0;
   1749     int local_alloc = 1;
   1750     int depth = -1;
   1751     void *spec_c = 0, *spec_r = 0, *spec = 0;
   1752 
   1753     CV_FUNCNAME( "cvDFT" );
   1754 
   1755     __BEGIN__;
   1756 
   1757     int prev_len = 0, buf_size = 0, stage = 0;
   1758     int nf = 0, inv = (flags & CV_DXT_INVERSE) != 0;
   1759     int real_transform = 0;
   1760     CvMat *src = (CvMat*)srcarr, *dst = (CvMat*)dstarr;
   1761     CvMat srcstub, dststub, *src0;
   1762     int complex_elem_size, elem_size;
   1763     int factors[34], inplace_transform = 0;
   1764     int ipp_norm_flag = 0;
   1765 
   1766     if( !inittab )
   1767     {
   1768         dft_tbl[0] = (CvDFTFunc)icvDFT_32fc;
   1769         dft_tbl[1] = (CvDFTFunc)icvRealDFT_32f;
   1770         dft_tbl[2] = (CvDFTFunc)icvCCSIDFT_32f;
   1771         dft_tbl[3] = (CvDFTFunc)icvDFT_64fc;
   1772         dft_tbl[4] = (CvDFTFunc)icvRealDFT_64f;
   1773         dft_tbl[5] = (CvDFTFunc)icvCCSIDFT_64f;
   1774         inittab = 1;
   1775     }
   1776 
   1777     if( !CV_IS_MAT( src ))
   1778     {
   1779         int coi = 0;
   1780         CV_CALL( src = cvGetMat( src, &srcstub, &coi ));
   1781 
   1782         if( coi != 0 )
   1783             CV_ERROR( CV_BadCOI, "" );
   1784     }
   1785 
   1786     if( !CV_IS_MAT( dst ))
   1787     {
   1788         int coi = 0;
   1789         CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
   1790 
   1791         if( coi != 0 )
   1792             CV_ERROR( CV_BadCOI, "" );
   1793     }
   1794 
   1795     src0 = src;
   1796     elem_size = CV_ELEM_SIZE1(src->type);
   1797     complex_elem_size = elem_size*2;
   1798 
   1799     // check types and sizes
   1800     if( !CV_ARE_DEPTHS_EQ(src, dst) )
   1801         CV_ERROR( CV_StsUnmatchedFormats, "" );
   1802 
   1803     depth = CV_MAT_DEPTH(src->type);
   1804     if( depth < CV_32F )
   1805         CV_ERROR( CV_StsUnsupportedFormat,
   1806         "Only 32fC1, 32fC2, 64fC1 and 64fC2 formats are supported" );
   1807 
   1808     if( CV_ARE_CNS_EQ(src, dst) )
   1809     {
   1810         if( CV_MAT_CN(src->type) > 2 )
   1811             CV_ERROR( CV_StsUnsupportedFormat,
   1812             "Only 32fC1, 32fC2, 64fC1 and 64fC2 formats are supported" );
   1813 
   1814         if( !CV_ARE_SIZES_EQ(src, dst) )
   1815             CV_ERROR( CV_StsUnmatchedSizes, "" );
   1816         real_transform = CV_MAT_CN(src->type) == 1;
   1817         if( !real_transform )
   1818             elem_size = complex_elem_size;
   1819     }
   1820     else if( !inv && CV_MAT_CN(src->type) == 1 && CV_MAT_CN(dst->type) == 2 )
   1821     {
   1822         if( (src->cols != 1 || dst->cols != 1 ||
   1823             (src->rows/2+1 != dst->rows && src->rows != dst->rows)) &&
   1824             (src->cols/2+1 != dst->cols || src->rows != dst->rows) )
   1825             CV_ERROR( CV_StsUnmatchedSizes, "" );
   1826         real_transform = 1;
   1827     }
   1828     else if( inv && CV_MAT_CN(src->type) == 2 && CV_MAT_CN(dst->type) == 1 )
   1829     {
   1830         if( (src->cols != 1 || dst->cols != 1 ||
   1831             (dst->rows/2+1 != src->rows && src->rows != dst->rows)) &&
   1832             (dst->cols/2+1 != src->cols || src->rows != dst->rows) )
   1833             CV_ERROR( CV_StsUnmatchedSizes, "" );
   1834         real_transform = 1;
   1835     }
   1836     else
   1837         CV_ERROR( CV_StsUnmatchedFormats,
   1838         "Incorrect or unsupported combination of input & output formats" );
   1839 
   1840     if( src->cols == 1 && nonzero_rows > 0 )
   1841         CV_ERROR( CV_StsNotImplemented,
   1842         "This mode (using nonzero_rows with a single-column matrix) breaks the function logic, so it is prohibited.\n"
   1843         "For fast convolution/correlation use 2-column matrix or single-row matrix instead" );
   1844 
   1845     // determine, which transform to do first - row-wise
   1846     // (stage 0) or column-wise (stage 1) transform
   1847     if( !(flags & CV_DXT_ROWS) && src->rows > 1 &&
   1848         ((src->cols == 1 && !CV_IS_MAT_CONT(src->type & dst->type)) ||
   1849         (src->cols > 1 && inv && real_transform)) )
   1850         stage = 1;
   1851 
   1852     ipp_norm_flag = !(flags & CV_DXT_SCALE) ? 8 : (flags & CV_DXT_INVERSE) ? 2 : 1;
   1853 
   1854     for(;;)
   1855     {
   1856         double scale = 1;
   1857         uchar* wave = 0;
   1858         int* itab = 0;
   1859         uchar* ptr;
   1860         int i, len, count, sz = 0;
   1861         int use_buf = 0, odd_real = 0;
   1862         CvDFTFunc dft_func;
   1863 
   1864         if( stage == 0 ) // row-wise transform
   1865         {
   1866             len = !inv ? src->cols : dst->cols;
   1867             count = src->rows;
   1868             if( len == 1 && !(flags & CV_DXT_ROWS) )
   1869             {
   1870                 len = !inv ? src->rows : dst->rows;
   1871                 count = 1;
   1872             }
   1873             odd_real = real_transform && (len & 1);
   1874         }
   1875         else
   1876         {
   1877             len = dst->rows;
   1878             count = !inv ? src0->cols : dst->cols;
   1879             sz = 2*len*complex_elem_size;
   1880         }
   1881 
   1882         spec = 0;
   1883         if( len*count >= 64 && icvDFTInitAlloc_R_32f_p != 0 ) // use IPP DFT if available
   1884         {
   1885             int ipp_sz = 0;
   1886 
   1887             if( real_transform && stage == 0 )
   1888             {
   1889                 if( depth == CV_32F )
   1890                 {
   1891                     if( spec_r )
   1892                         IPPI_CALL( icvDFTFree_R_32f_p( spec_r ));
   1893                     IPPI_CALL( icvDFTInitAlloc_R_32f_p(
   1894                         &spec_r, len, ipp_norm_flag, cvAlgHintNone ));
   1895                     IPPI_CALL( icvDFTGetBufSize_R_32f_p( spec_r, &ipp_sz ));
   1896                 }
   1897                 else
   1898                 {
   1899                     if( spec_r )
   1900                         IPPI_CALL( icvDFTFree_R_64f_p( spec_r ));
   1901                     IPPI_CALL( icvDFTInitAlloc_R_64f_p(
   1902                         &spec_r, len, ipp_norm_flag, cvAlgHintNone ));
   1903                     IPPI_CALL( icvDFTGetBufSize_R_64f_p( spec_r, &ipp_sz ));
   1904                 }
   1905                 spec = spec_r;
   1906             }
   1907             else
   1908             {
   1909                 if( depth == CV_32F )
   1910                 {
   1911                     if( spec_c )
   1912                         IPPI_CALL( icvDFTFree_C_32fc_p( spec_c ));
   1913                     IPPI_CALL( icvDFTInitAlloc_C_32fc_p(
   1914                         &spec_c, len, ipp_norm_flag, cvAlgHintNone ));
   1915                     IPPI_CALL( icvDFTGetBufSize_C_32fc_p( spec_c, &ipp_sz ));
   1916                 }
   1917                 else
   1918                 {
   1919                     if( spec_c )
   1920                         IPPI_CALL( icvDFTFree_C_64fc_p( spec_c ));
   1921                     IPPI_CALL( icvDFTInitAlloc_C_64fc_p(
   1922                         &spec_c, len, ipp_norm_flag, cvAlgHintNone ));
   1923                     IPPI_CALL( icvDFTGetBufSize_C_64fc_p( spec_c, &ipp_sz ));
   1924                 }
   1925                 spec = spec_c;
   1926             }
   1927 
   1928             sz += ipp_sz;
   1929         }
   1930         else
   1931         {
   1932             if( len != prev_len )
   1933                 nf = icvDFTFactorize( len, factors );
   1934 
   1935             inplace_transform = factors[0] == factors[nf-1];
   1936             sz += len*(complex_elem_size + sizeof(int));
   1937             i = nf > 1 && (factors[0] & 1) == 0;
   1938             if( (factors[i] & 1) != 0 && factors[i] > 5 )
   1939                 sz += (factors[i]+1)*complex_elem_size;
   1940 
   1941             if( (stage == 0 && ((src->data.ptr == dst->data.ptr && !inplace_transform) || odd_real)) ||
   1942                 (stage == 1 && !inplace_transform) )
   1943             {
   1944                 use_buf = 1;
   1945                 sz += len*complex_elem_size;
   1946             }
   1947         }
   1948 
   1949         if( sz > buf_size )
   1950         {
   1951             prev_len = 0; // because we release the buffer,
   1952                           // force recalculation of
   1953                           // twiddle factors and permutation table
   1954             if( !local_alloc && buffer )
   1955                 cvFree( &buffer );
   1956             if( sz <= CV_MAX_LOCAL_DFT_SIZE )
   1957             {
   1958                 buf_size = sz = CV_MAX_LOCAL_DFT_SIZE;
   1959                 buffer = cvStackAlloc(sz + 32);
   1960                 local_alloc = 1;
   1961             }
   1962             else
   1963             {
   1964                 CV_CALL( buffer = cvAlloc(sz + 32) );
   1965                 buf_size = sz;
   1966                 local_alloc = 0;
   1967             }
   1968         }
   1969 
   1970         ptr = (uchar*)buffer;
   1971         if( !spec )
   1972         {
   1973             wave = ptr;
   1974             ptr += len*complex_elem_size;
   1975             itab = (int*)ptr;
   1976             ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
   1977 
   1978             if( len != prev_len || (!inplace_transform && inv && real_transform))
   1979                 icvDFTInit( len, nf, factors, itab, complex_elem_size,
   1980                             wave, stage == 0 && inv && real_transform );
   1981             // otherwise reuse the tables calculated on the previous stage
   1982         }
   1983 
   1984         if( stage == 0 )
   1985         {
   1986             uchar* tmp_buf = 0;
   1987             int dptr_offset = 0;
   1988             int dst_full_len = len*elem_size;
   1989             int _flags = inv + (CV_MAT_CN(src->type) != CV_MAT_CN(dst->type) ?
   1990                          ICV_DFT_COMPLEX_INPUT_OR_OUTPUT : 0);
   1991             if( use_buf )
   1992             {
   1993                 tmp_buf = ptr;
   1994                 ptr += len*complex_elem_size;
   1995                 if( odd_real && !inv && len > 1 &&
   1996                     !(_flags & ICV_DFT_COMPLEX_INPUT_OR_OUTPUT))
   1997                     dptr_offset = elem_size;
   1998             }
   1999 
   2000             if( !inv && (_flags & ICV_DFT_COMPLEX_INPUT_OR_OUTPUT) )
   2001                 dst_full_len += (len & 1) ? elem_size : complex_elem_size;
   2002 
   2003             dft_func = dft_tbl[(!real_transform ? 0 : !inv ? 1 : 2) + (depth == CV_64F)*3];
   2004 
   2005             if( count > 1 && !(flags & CV_DXT_ROWS) && (!inv || !real_transform) )
   2006                 stage = 1;
   2007             else if( flags & CV_DXT_SCALE )
   2008                 scale = 1./(len * (flags & CV_DXT_ROWS ? 1 : count));
   2009 
   2010             if( nonzero_rows <= 0 || nonzero_rows > count )
   2011                 nonzero_rows = count;
   2012 
   2013             for( i = 0; i < nonzero_rows; i++ )
   2014             {
   2015                 uchar* sptr = src->data.ptr + i*src->step;
   2016                 uchar* dptr0 = dst->data.ptr + i*dst->step;
   2017                 uchar* dptr = dptr0;
   2018 
   2019                 if( tmp_buf )
   2020                     dptr = tmp_buf;
   2021 
   2022                 dft_func( sptr, dptr, len, nf, factors, itab, wave, len, spec, ptr, _flags, scale );
   2023                 if( dptr != dptr0 )
   2024                     memcpy( dptr0, dptr + dptr_offset, dst_full_len );
   2025             }
   2026 
   2027             for( ; i < count; i++ )
   2028             {
   2029                 uchar* dptr0 = dst->data.ptr + i*dst->step;
   2030                 memset( dptr0, 0, dst_full_len );
   2031             }
   2032 
   2033             if( stage != 1 )
   2034                 break;
   2035             src = dst;
   2036         }
   2037         else
   2038         {
   2039             int a = 0, b = count;
   2040             uchar *buf0, *buf1, *dbuf0, *dbuf1;
   2041             uchar* sptr0 = src->data.ptr;
   2042             uchar* dptr0 = dst->data.ptr;
   2043             buf0 = ptr;
   2044             ptr += len*complex_elem_size;
   2045             buf1 = ptr;
   2046             ptr += len*complex_elem_size;
   2047             dbuf0 = buf0, dbuf1 = buf1;
   2048 
   2049             if( use_buf )
   2050             {
   2051                 dbuf1 = ptr;
   2052                 dbuf0 = buf1;
   2053                 ptr += len*complex_elem_size;
   2054             }
   2055 
   2056             dft_func = dft_tbl[(depth == CV_64F)*3];
   2057 
   2058             if( real_transform && inv && src->cols > 1 )
   2059                 stage = 0;
   2060             else if( flags & CV_DXT_SCALE )
   2061                 scale = 1./(len * count);
   2062 
   2063             if( real_transform )
   2064             {
   2065                 int even;
   2066                 a = 1;
   2067                 even = (count & 1) == 0;
   2068                 b = (count+1)/2;
   2069                 if( !inv )
   2070                 {
   2071                     memset( buf0, 0, len*complex_elem_size );
   2072                     icvCopyColumn( sptr0, src->step, buf0, complex_elem_size, len, elem_size );
   2073                     sptr0 += CV_MAT_CN(dst->type)*elem_size;
   2074                     if( even )
   2075                     {
   2076                         memset( buf1, 0, len*complex_elem_size );
   2077                         icvCopyColumn( sptr0 + (count-2)*elem_size, src->step,
   2078                                        buf1, complex_elem_size, len, elem_size );
   2079                     }
   2080                 }
   2081                 else if( CV_MAT_CN(src->type) == 1 )
   2082                 {
   2083                     icvCopyColumn( sptr0, src->step, buf0 + elem_size, elem_size, len, elem_size );
   2084                     icvExpandCCS( buf0 + elem_size, len, elem_size );
   2085                     if( even )
   2086                     {
   2087                         icvCopyColumn( sptr0 + (count-1)*elem_size, src->step,
   2088                                        buf1 + elem_size, elem_size, len, elem_size );
   2089                         icvExpandCCS( buf1 + elem_size, len, elem_size );
   2090                     }
   2091                     sptr0 += elem_size;
   2092                 }
   2093                 else
   2094                 {
   2095                     icvCopyColumn( sptr0, src->step, buf0, complex_elem_size, len, complex_elem_size );
   2096                     //memcpy( buf0 + elem_size, buf0, elem_size );
   2097                     //icvExpandCCS( buf0 + elem_size, len, elem_size );
   2098                     if( even )
   2099                     {
   2100                         icvCopyColumn( sptr0 + b*complex_elem_size, src->step,
   2101                                        buf1, complex_elem_size, len, complex_elem_size );
   2102                         //memcpy( buf0 + elem_size, buf0, elem_size );
   2103                         //icvExpandCCS( buf0 + elem_size, len, elem_size );
   2104                     }
   2105                     sptr0 += complex_elem_size;
   2106                 }
   2107 
   2108                 if( even )
   2109                     IPPI_CALL( dft_func( buf1, dbuf1, len, nf, factors, itab,
   2110                                          wave, len, spec, ptr, inv, scale ));
   2111                 IPPI_CALL( dft_func( buf0, dbuf0, len, nf, factors, itab,
   2112                                      wave, len, spec, ptr, inv, scale ));
   2113 
   2114                 if( CV_MAT_CN(dst->type) == 1 )
   2115                 {
   2116                     if( !inv )
   2117                     {
   2118                         // copy the half of output vector to the first/last column.
   2119                         // before doing that, defgragment the vector
   2120                         memcpy( dbuf0 + elem_size, dbuf0, elem_size );
   2121                         icvCopyColumn( dbuf0 + elem_size, elem_size, dptr0,
   2122                                        dst->step, len, elem_size );
   2123                         if( even )
   2124                         {
   2125                             memcpy( dbuf1 + elem_size, dbuf1, elem_size );
   2126                             icvCopyColumn( dbuf1 + elem_size, elem_size,
   2127                                            dptr0 + (count-1)*elem_size,
   2128                                            dst->step, len, elem_size );
   2129                         }
   2130                         dptr0 += elem_size;
   2131                     }
   2132                     else
   2133                     {
   2134                         // copy the real part of the complex vector to the first/last column
   2135                         icvCopyColumn( dbuf0, complex_elem_size, dptr0, dst->step, len, elem_size );
   2136                         if( even )
   2137                             icvCopyColumn( dbuf1, complex_elem_size, dptr0 + (count-1)*elem_size,
   2138                                            dst->step, len, elem_size );
   2139                         dptr0 += elem_size;
   2140                     }
   2141                 }
   2142                 else
   2143                 {
   2144                     assert( !inv );
   2145                     icvCopyColumn( dbuf0, complex_elem_size, dptr0,
   2146                                    dst->step, len, complex_elem_size );
   2147                     if( even )
   2148                         icvCopyColumn( dbuf1, complex_elem_size,
   2149                                        dptr0 + b*complex_elem_size,
   2150                                        dst->step, len, complex_elem_size );
   2151                     dptr0 += complex_elem_size;
   2152                 }
   2153             }
   2154 
   2155             for( i = a; i < b; i += 2 )
   2156             {
   2157                 if( i+1 < b )
   2158                 {
   2159                     icvCopyFrom2Columns( sptr0, src->step, buf0, buf1, len, complex_elem_size );
   2160                     IPPI_CALL( dft_func( buf1, dbuf1, len, nf, factors, itab,
   2161                                          wave, len, spec, ptr, inv, scale ));
   2162                 }
   2163                 else
   2164                     icvCopyColumn( sptr0, src->step, buf0, complex_elem_size, len, complex_elem_size );
   2165 
   2166                 IPPI_CALL( dft_func( buf0, dbuf0, len, nf, factors, itab,
   2167                                      wave, len, spec, ptr, inv, scale ));
   2168 
   2169                 if( i+1 < b )
   2170                     icvCopyTo2Columns( dbuf0, dbuf1, dptr0, dst->step, len, complex_elem_size );
   2171                 else
   2172                     icvCopyColumn( dbuf0, complex_elem_size, dptr0, dst->step, len, complex_elem_size );
   2173                 sptr0 += 2*complex_elem_size;
   2174                 dptr0 += 2*complex_elem_size;
   2175             }
   2176 
   2177             if( stage != 0 )
   2178                 break;
   2179             src = dst;
   2180         }
   2181     }
   2182 
   2183     __END__;
   2184 
   2185     if( buffer && !local_alloc )
   2186         cvFree( &buffer );
   2187 
   2188     if( spec_c )
   2189     {
   2190         if( depth == CV_32F )
   2191             icvDFTFree_C_32fc_p( spec_c );
   2192         else
   2193             icvDFTFree_C_64fc_p( spec_c );
   2194     }
   2195 
   2196     if( spec_r )
   2197     {
   2198         if( depth == CV_32F )
   2199             icvDFTFree_R_32f_p( spec_r );
   2200         else
   2201             icvDFTFree_R_64f_p( spec_r );
   2202     }
   2203 }
   2204 
   2205 
   2206 CV_IMPL void
   2207 cvMulSpectrums( const CvArr* srcAarr, const CvArr* srcBarr,
   2208                 CvArr* dstarr, int flags )
   2209 {
   2210     CV_FUNCNAME( "cvMulSpectrums" );
   2211 
   2212     __BEGIN__;
   2213 
   2214     CvMat stubA, *srcA = (CvMat*)srcAarr;
   2215     CvMat stubB, *srcB = (CvMat*)srcBarr;
   2216     CvMat dststub, *dst = (CvMat*)dstarr;
   2217     int stepA, stepB, stepC;
   2218     int type, cn, is_1d;
   2219     int j, j0, j1, k, rows, cols, ncols;
   2220 
   2221     if( !CV_IS_MAT(srcA))
   2222         CV_CALL( srcA = cvGetMat( srcA, &stubA, 0 ));
   2223 
   2224     if( !CV_IS_MAT(srcB))
   2225         CV_CALL( srcB = cvGetMat( srcB, &stubB, 0 ));
   2226 
   2227     if( !CV_IS_MAT(dst))
   2228         CV_CALL( dst = cvGetMat( dst, &dststub, 0 ));
   2229 
   2230     if( !CV_ARE_TYPES_EQ( srcA, srcB ) || !CV_ARE_TYPES_EQ( srcA, dst ))
   2231         CV_ERROR( CV_StsUnmatchedFormats, "" );
   2232 
   2233     if( !CV_ARE_SIZES_EQ( srcA, srcB ) || !CV_ARE_SIZES_EQ( srcA, dst ))
   2234         CV_ERROR( CV_StsUnmatchedSizes, "" );
   2235 
   2236     type = CV_MAT_TYPE( dst->type );
   2237     cn = CV_MAT_CN(type);
   2238     rows = srcA->rows;
   2239     cols = srcA->cols;
   2240     is_1d = (flags & CV_DXT_ROWS) ||
   2241             (rows == 1 || (cols == 1 &&
   2242              CV_IS_MAT_CONT( srcA->type & srcB->type & dst->type )));
   2243 
   2244     if( is_1d && !(flags & CV_DXT_ROWS) )
   2245         cols = cols + rows - 1, rows = 1;
   2246     ncols = cols*cn;
   2247     j0 = cn == 1;
   2248     j1 = ncols - (cols % 2 == 0 && cn == 1);
   2249 
   2250     if( CV_MAT_DEPTH(type) == CV_32F )
   2251     {
   2252         float* dataA = srcA->data.fl;
   2253         float* dataB = srcB->data.fl;
   2254         float* dataC = dst->data.fl;
   2255 
   2256         stepA = srcA->step/sizeof(dataA[0]);
   2257         stepB = srcB->step/sizeof(dataB[0]);
   2258         stepC = dst->step/sizeof(dataC[0]);
   2259 
   2260         if( !is_1d && cn == 1 )
   2261         {
   2262             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
   2263             {
   2264                 if( k == 1 )
   2265                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
   2266                 dataC[0] = dataA[0]*dataB[0];
   2267                 if( rows % 2 == 0 )
   2268                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
   2269                 if( !(flags & CV_DXT_MUL_CONJ) )
   2270                     for( j = 1; j <= rows - 2; j += 2 )
   2271                     {
   2272                         double re = (double)dataA[j*stepA]*dataB[j*stepB] -
   2273                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   2274                         double im = (double)dataA[j*stepA]*dataB[(j+1)*stepB] +
   2275                                     (double)dataA[(j+1)*stepA]*dataB[j*stepB];
   2276                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
   2277                     }
   2278                 else
   2279                     for( j = 1; j <= rows - 2; j += 2 )
   2280                     {
   2281                         double re = (double)dataA[j*stepA]*dataB[j*stepB] +
   2282                                     (double)dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   2283                         double im = (double)dataA[(j+1)*stepA]*dataB[j*stepB] -
   2284                                     (double)dataA[j*stepA]*dataB[(j+1)*stepB];
   2285                         dataC[j*stepC] = (float)re; dataC[(j+1)*stepC] = (float)im;
   2286                     }
   2287                 if( k == 1 )
   2288                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
   2289             }
   2290         }
   2291 
   2292         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
   2293         {
   2294             if( is_1d && cn == 1 )
   2295             {
   2296                 dataC[0] = dataA[0]*dataB[0];
   2297                 if( cols % 2 == 0 )
   2298                     dataC[j1] = dataA[j1]*dataB[j1];
   2299             }
   2300 
   2301             if( !(flags & CV_DXT_MUL_CONJ) )
   2302                 for( j = j0; j < j1; j += 2 )
   2303                 {
   2304                     double re = (double)dataA[j]*dataB[j] - (double)dataA[j+1]*dataB[j+1];
   2305                     double im = (double)dataA[j+1]*dataB[j] + (double)dataA[j]*dataB[j+1];
   2306                     dataC[j] = (float)re; dataC[j+1] = (float)im;
   2307                 }
   2308             else
   2309                 for( j = j0; j < j1; j += 2 )
   2310                 {
   2311                     double re = (double)dataA[j]*dataB[j] + (double)dataA[j+1]*dataB[j+1];
   2312                     double im = (double)dataA[j+1]*dataB[j] - (double)dataA[j]*dataB[j+1];
   2313                     dataC[j] = (float)re; dataC[j+1] = (float)im;
   2314                 }
   2315         }
   2316     }
   2317     else if( CV_MAT_DEPTH(type) == CV_64F )
   2318     {
   2319         double* dataA = srcA->data.db;
   2320         double* dataB = srcB->data.db;
   2321         double* dataC = dst->data.db;
   2322 
   2323         stepA = srcA->step/sizeof(dataA[0]);
   2324         stepB = srcB->step/sizeof(dataB[0]);
   2325         stepC = dst->step/sizeof(dataC[0]);
   2326 
   2327         if( !is_1d && cn == 1 )
   2328         {
   2329             for( k = 0; k < (cols % 2 ? 1 : 2); k++ )
   2330             {
   2331                 if( k == 1 )
   2332                     dataA += cols - 1, dataB += cols - 1, dataC += cols - 1;
   2333                 dataC[0] = dataA[0]*dataB[0];
   2334                 if( rows % 2 == 0 )
   2335                     dataC[(rows-1)*stepC] = dataA[(rows-1)*stepA]*dataB[(rows-1)*stepB];
   2336                 if( !(flags & CV_DXT_MUL_CONJ) )
   2337                     for( j = 1; j <= rows - 2; j += 2 )
   2338                     {
   2339                         double re = dataA[j*stepA]*dataB[j*stepB] -
   2340                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   2341                         double im = dataA[j*stepA]*dataB[(j+1)*stepB] +
   2342                                     dataA[(j+1)*stepA]*dataB[j*stepB];
   2343                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
   2344                     }
   2345                 else
   2346                     for( j = 1; j <= rows - 2; j += 2 )
   2347                     {
   2348                         double re = dataA[j*stepA]*dataB[j*stepB] +
   2349                                     dataA[(j+1)*stepA]*dataB[(j+1)*stepB];
   2350                         double im = dataA[(j+1)*stepA]*dataB[j*stepB] -
   2351                                     dataA[j*stepA]*dataB[(j+1)*stepB];
   2352                         dataC[j*stepC] = re; dataC[(j+1)*stepC] = im;
   2353                     }
   2354                 if( k == 1 )
   2355                     dataA -= cols - 1, dataB -= cols - 1, dataC -= cols - 1;
   2356             }
   2357         }
   2358 
   2359         for( ; rows--; dataA += stepA, dataB += stepB, dataC += stepC )
   2360         {
   2361             if( is_1d && cn == 1 )
   2362             {
   2363                 dataC[0] = dataA[0]*dataB[0];
   2364                 if( cols % 2 == 0 )
   2365                     dataC[j1] = dataA[j1]*dataB[j1];
   2366             }
   2367 
   2368             if( !(flags & CV_DXT_MUL_CONJ) )
   2369                 for( j = j0; j < j1; j += 2 )
   2370                 {
   2371                     double re = dataA[j]*dataB[j] - dataA[j+1]*dataB[j+1];
   2372                     double im = dataA[j+1]*dataB[j] + dataA[j]*dataB[j+1];
   2373                     dataC[j] = re; dataC[j+1] = im;
   2374                 }
   2375             else
   2376                 for( j = j0; j < j1; j += 2 )
   2377                 {
   2378                     double re = dataA[j]*dataB[j] + dataA[j+1]*dataB[j+1];
   2379                     double im = dataA[j+1]*dataB[j] - dataA[j]*dataB[j+1];
   2380                     dataC[j] = re; dataC[j+1] = im;
   2381                 }
   2382         }
   2383     }
   2384     else
   2385     {
   2386         CV_ERROR( CV_StsUnsupportedFormat, "Only 32f and 64f types are supported" );
   2387     }
   2388 
   2389     __END__;
   2390 }
   2391 
   2392 
   2393 /****************************************************************************************\
   2394                                Discrete Cosine Transform
   2395 \****************************************************************************************/
   2396 
   2397 /* DCT is calculated using DFT, as described here:
   2398    http://www.ece.utexas.edu/~bevans/courses/ee381k/lectures/09_DCT/lecture9/:
   2399 */
   2400 #define ICV_DCT_FWD( flavor, datatype )                                 \
   2401 static CvStatus CV_STDCALL                                              \
   2402 icvDCT_fwd_##flavor( const datatype* src, int src_step, datatype* dft_src,\
   2403                      datatype* dft_dst, datatype* dst, int dst_step,    \
   2404                      int n, int nf, int* factors, const int* itab,      \
   2405                      const CvComplex##flavor* dft_wave,                 \
   2406                      const CvComplex##flavor* dct_wave,                 \
   2407                      const void* spec, CvComplex##flavor* buf )         \
   2408 {                                                                       \
   2409     int j, n2 = n >> 1;                                                 \
   2410                                                                         \
   2411     src_step /= sizeof(src[0]);                                         \
   2412     dst_step /= sizeof(dst[0]);                                         \
   2413     datatype* dst1 = dst + (n-1)*dst_step;                              \
   2414                                                                         \
   2415     if( n == 1 )                                                        \
   2416     {                                                                   \
   2417         dst[0] = src[0];                                                \
   2418         return CV_OK;                                                   \
   2419     }                                                                   \
   2420                                                                         \
   2421     for( j = 0; j < n2; j++, src += src_step*2 )                        \
   2422     {                                                                   \
   2423         dft_src[j] = src[0];                                            \
   2424         dft_src[n-j-1] = src[src_step];                                 \
   2425     }                                                                   \
   2426                                                                         \
   2427     icvRealDFT_##flavor( dft_src, dft_dst, n, nf, factors,              \
   2428                          itab, dft_wave, n, spec, buf, 0, 1.0 );        \
   2429     src = dft_dst;                                                      \
   2430                                                                         \
   2431     dst[0] = (datatype)(src[0]*dct_wave->re*icv_sin_45);                \
   2432     dst += dst_step;                                                    \
   2433     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,                    \
   2434                                     dst += dst_step, dst1 -= dst_step ) \
   2435     {                                                                   \
   2436         double t0 = dct_wave->re*src[j*2-1] - dct_wave->im*src[j*2];    \
   2437         double t1 = -dct_wave->im*src[j*2-1] - dct_wave->re*src[j*2];   \
   2438         dst[0] = (datatype)t0;                                          \
   2439         dst1[0] = (datatype)t1;                                         \
   2440     }                                                                   \
   2441                                                                         \
   2442     dst[0] = (datatype)(src[n-1]*dct_wave->re);                         \
   2443     return CV_OK;                                                       \
   2444 }
   2445 
   2446 
   2447 #define ICV_DCT_INV( flavor, datatype )                                 \
   2448 static CvStatus CV_STDCALL                                              \
   2449 icvDCT_inv_##flavor( const datatype* src, int src_step, datatype* dft_src,\
   2450                      datatype* dft_dst, datatype* dst, int dst_step,    \
   2451                      int n, int nf, int* factors, const int* itab,      \
   2452                      const CvComplex##flavor* dft_wave,                 \
   2453                      const CvComplex##flavor* dct_wave,                 \
   2454                      const void* spec, CvComplex##flavor* buf )         \
   2455 {                                                                       \
   2456     int j, n2 = n >> 1;                                                 \
   2457                                                                         \
   2458     src_step /= sizeof(src[0]);                                         \
   2459     dst_step /= sizeof(dst[0]);                                         \
   2460     const datatype* src1 = src + (n-1)*src_step;                        \
   2461                                                                         \
   2462     if( n == 1 )                                                        \
   2463     {                                                                   \
   2464         dst[0] = src[0];                                                \
   2465         return CV_OK;                                                   \
   2466     }                                                                   \
   2467                                                                         \
   2468     dft_src[0] = (datatype)(src[0]*2*dct_wave->re*icv_sin_45);          \
   2469     src += src_step;                                                    \
   2470     for( j = 1, dct_wave++; j < n2; j++, dct_wave++,                    \
   2471                                     src += src_step, src1 -= src_step ) \
   2472     {                                                                   \
   2473         double t0 = dct_wave->re*src[0] - dct_wave->im*src1[0];         \
   2474         double t1 = -dct_wave->im*src[0] - dct_wave->re*src1[0];        \
   2475         dft_src[j*2-1] = (datatype)t0;                                  \
   2476         dft_src[j*2] = (datatype)t1;                                    \
   2477     }                                                                   \
   2478                                                                         \
   2479     dft_src[n-1] = (datatype)(src[0]*2*dct_wave->re);                   \
   2480     icvCCSIDFT_##flavor( dft_src, dft_dst, n, nf, factors, itab,        \
   2481                          dft_wave, n, spec, buf, CV_DXT_INVERSE, 1.0 ); \
   2482                                                                         \
   2483     for( j = 0; j < n2; j++, dst += dst_step*2 )                        \
   2484     {                                                                   \
   2485         dst[0] = dft_dst[j];                                            \
   2486         dst[dst_step] = dft_dst[n-j-1];                                 \
   2487     }                                                                   \
   2488     return CV_OK;                                                       \
   2489 }
   2490 
   2491 
   2492 ICV_DCT_FWD( 64f, double )
   2493 ICV_DCT_INV( 64f, double )
   2494 ICV_DCT_FWD( 32f, float )
   2495 ICV_DCT_INV( 32f, float )
   2496 
   2497 static void
   2498 icvDCTInit( int n, int elem_size, void* _wave, int inv )
   2499 {
   2500     static const double icvDctScale[] =
   2501     {
   2502     0.707106781186547570, 0.500000000000000000, 0.353553390593273790,
   2503     0.250000000000000000, 0.176776695296636890, 0.125000000000000000,
   2504     0.088388347648318447, 0.062500000000000000, 0.044194173824159223,
   2505     0.031250000000000000, 0.022097086912079612, 0.015625000000000000,
   2506     0.011048543456039806, 0.007812500000000000, 0.005524271728019903,
   2507     0.003906250000000000, 0.002762135864009952, 0.001953125000000000,
   2508     0.001381067932004976, 0.000976562500000000, 0.000690533966002488,
   2509     0.000488281250000000, 0.000345266983001244, 0.000244140625000000,
   2510     0.000172633491500622, 0.000122070312500000, 0.000086316745750311,
   2511     0.000061035156250000, 0.000043158372875155, 0.000030517578125000
   2512     };
   2513 
   2514     int i;
   2515     CvComplex64f w, w1;
   2516     double t, scale;
   2517 
   2518     if( n == 1 )
   2519         return;
   2520 
   2521     assert( (n&1) == 0 );
   2522 
   2523     if( (n & (n - 1)) == 0 )
   2524     {
   2525         int m = icvlog2(n);
   2526         scale = (!inv ? 2 : 1)*icvDctScale[m];
   2527         w1.re = icvDxtTab[m+2][0];
   2528         w1.im = -icvDxtTab[m+2][1];
   2529     }
   2530     else
   2531     {
   2532         t = 1./(2*n);
   2533         scale = (!inv ? 2 : 1)*sqrt(t);
   2534         w1.im = sin(-CV_PI*t);
   2535         w1.re = sqrt(1. - w1.im*w1.im);
   2536     }
   2537     n >>= 1;
   2538 
   2539     if( elem_size == sizeof(CvComplex64f) )
   2540     {
   2541         CvComplex64f* wave = (CvComplex64f*)_wave;
   2542 
   2543         w.re = scale;
   2544         w.im = 0.;
   2545 
   2546         for( i = 0; i <= n; i++ )
   2547         {
   2548             wave[i] = w;
   2549             t = w.re*w1.re - w.im*w1.im;
   2550             w.im = w.re*w1.im + w.im*w1.re;
   2551             w.re = t;
   2552         }
   2553     }
   2554     else
   2555     {
   2556         CvComplex32f* wave = (CvComplex32f*)_wave;
   2557         assert( elem_size == sizeof(CvComplex32f) );
   2558 
   2559         w.re = (float)scale;
   2560         w.im = 0.f;
   2561 
   2562         for( i = 0; i <= n; i++ )
   2563         {
   2564             wave[i].re = (float)w.re;
   2565             wave[i].im = (float)w.im;
   2566             t = w.re*w1.re - w.im*w1.im;
   2567             w.im = w.re*w1.im + w.im*w1.re;
   2568             w.re = t;
   2569         }
   2570     }
   2571 }
   2572 
   2573 
   2574 typedef CvStatus (CV_STDCALL * CvDCTFunc)(
   2575                 const void* src, int src_step, void* dft_src,
   2576                 void* dft_dst, void* dst, int dst_step, int n,
   2577                 int nf, int* factors, const int* itab, const void* dft_wave,
   2578                 const void* dct_wave, const void* spec, void* buf );
   2579 
   2580 CV_IMPL void
   2581 cvDCT( const CvArr* srcarr, CvArr* dstarr, int flags )
   2582 {
   2583     static CvDCTFunc dct_tbl[4];
   2584     static int inittab = 0;
   2585 
   2586     void* buffer = 0;
   2587     int local_alloc = 1;
   2588     int inv = (flags & CV_DXT_INVERSE) != 0, depth = -1;
   2589     void *spec_dft = 0, *spec = 0;
   2590 
   2591     CV_FUNCNAME( "cvDCT" );
   2592 
   2593     __BEGIN__;
   2594 
   2595     double scale = 1.;
   2596     int prev_len = 0, buf_size = 0, nf = 0, stage, end_stage;
   2597     CvMat *src = (CvMat*)srcarr, *dst = (CvMat*)dstarr;
   2598     uchar *src_dft_buf = 0, *dst_dft_buf = 0;
   2599     uchar *dft_wave = 0, *dct_wave = 0;
   2600     int* itab = 0;
   2601     uchar* ptr = 0;
   2602     CvMat srcstub, dststub;
   2603     int complex_elem_size, elem_size;
   2604     int factors[34], inplace_transform;
   2605     int i, len, count;
   2606     CvDCTFunc dct_func;
   2607 
   2608     if( !inittab )
   2609     {
   2610         dct_tbl[0] = (CvDCTFunc)icvDCT_fwd_32f;
   2611         dct_tbl[1] = (CvDCTFunc)icvDCT_inv_32f;
   2612         dct_tbl[2] = (CvDCTFunc)icvDCT_fwd_64f;
   2613         dct_tbl[3] = (CvDCTFunc)icvDCT_inv_64f;
   2614         inittab = 1;
   2615     }
   2616 
   2617     if( !CV_IS_MAT( src ))
   2618     {
   2619         int coi = 0;
   2620         CV_CALL( src = cvGetMat( src, &srcstub, &coi ));
   2621 
   2622         if( coi != 0 )
   2623             CV_ERROR( CV_BadCOI, "" );
   2624     }
   2625 
   2626     if( !CV_IS_MAT( dst ))
   2627     {
   2628         int coi = 0;
   2629         CV_CALL( dst = cvGetMat( dst, &dststub, &coi ));
   2630 
   2631         if( coi != 0 )
   2632             CV_ERROR( CV_BadCOI, "" );
   2633     }
   2634 
   2635     depth = CV_MAT_DEPTH(src->type);
   2636     elem_size = CV_ELEM_SIZE1(depth);
   2637     complex_elem_size = elem_size*2;
   2638 
   2639     // check types and sizes
   2640     if( !CV_ARE_TYPES_EQ(src, dst) )
   2641         CV_ERROR( CV_StsUnmatchedFormats, "" );
   2642 
   2643     if( depth < CV_32F || CV_MAT_CN(src->type) != 1 )
   2644         CV_ERROR( CV_StsUnsupportedFormat,
   2645         "Only 32fC1 and 64fC1 formats are supported" );
   2646 
   2647     dct_func = dct_tbl[inv + (depth == CV_64F)*2];
   2648 
   2649     if( (flags & CV_DXT_ROWS) || src->rows == 1 ||
   2650         (src->cols == 1 && CV_IS_MAT_CONT(src->type & dst->type)))
   2651     {
   2652         stage = end_stage = 0;
   2653     }
   2654     else
   2655     {
   2656         stage = src->cols == 1;
   2657         end_stage = 1;
   2658     }
   2659 
   2660     for( ; stage <= end_stage; stage++ )
   2661     {
   2662         uchar *sptr = src->data.ptr, *dptr = dst->data.ptr;
   2663         int sstep0, sstep1, dstep0, dstep1;
   2664 
   2665         if( stage == 0 )
   2666         {
   2667             len = src->cols;
   2668             count = src->rows;
   2669             if( len == 1 && !(flags & CV_DXT_ROWS) )
   2670             {
   2671                 len = src->rows;
   2672                 count = 1;
   2673             }
   2674             sstep0 = src->step;
   2675             dstep0 = dst->step;
   2676             sstep1 = dstep1 = elem_size;
   2677         }
   2678         else
   2679         {
   2680             len = dst->rows;
   2681             count = dst->cols;
   2682             sstep1 = src->step;
   2683             dstep1 = dst->step;
   2684             sstep0 = dstep0 = elem_size;
   2685         }
   2686 
   2687         if( len != prev_len )
   2688         {
   2689             int sz;
   2690 
   2691             if( len > 1 && (len & 1) )
   2692                 CV_ERROR( CV_StsNotImplemented, "Odd-size DCT\'s are not implemented" );
   2693 
   2694             sz = len*elem_size;
   2695             sz += (len/2 + 1)*complex_elem_size;
   2696 
   2697             spec = 0;
   2698             inplace_transform = 1;
   2699             if( len*count >= 64 && icvDFTInitAlloc_R_32f_p )
   2700             {
   2701                 int ipp_sz = 0;
   2702                 if( depth == CV_32F )
   2703                 {
   2704                     if( spec_dft )
   2705                         IPPI_CALL( icvDFTFree_R_32f_p( spec_dft ));
   2706                     IPPI_CALL( icvDFTInitAlloc_R_32f_p( &spec_dft, len, 8, cvAlgHintNone ));
   2707                     IPPI_CALL( icvDFTGetBufSize_R_32f_p( spec_dft, &ipp_sz ));
   2708                 }
   2709                 else
   2710                 {
   2711                     if( spec_dft )
   2712                         IPPI_CALL( icvDFTFree_R_64f_p( spec_dft ));
   2713                     IPPI_CALL( icvDFTInitAlloc_R_64f_p( &spec_dft, len, 8, cvAlgHintNone ));
   2714                     IPPI_CALL( icvDFTGetBufSize_R_64f_p( spec_dft, &ipp_sz ));
   2715                 }
   2716                 spec = spec_dft;
   2717                 sz += ipp_sz;
   2718             }
   2719             else
   2720             {
   2721                 sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size;
   2722 
   2723                 nf = icvDFTFactorize( len, factors );
   2724                 inplace_transform = factors[0] == factors[nf-1];
   2725 
   2726                 i = nf > 1 && (factors[0] & 1) == 0;
   2727                 if( (factors[i] & 1) != 0 && factors[i] > 5 )
   2728                     sz += (factors[i]+1)*complex_elem_size;
   2729 
   2730                 if( !inplace_transform )
   2731                     sz += len*elem_size;
   2732             }
   2733 
   2734             if( sz > buf_size )
   2735             {
   2736                 if( !local_alloc && buffer )
   2737                     cvFree( &buffer );
   2738                 if( sz <= CV_MAX_LOCAL_DFT_SIZE )
   2739                 {
   2740                     buf_size = sz = CV_MAX_LOCAL_DFT_SIZE;
   2741                     buffer = cvStackAlloc(sz + 32);
   2742                     local_alloc = 1;
   2743                 }
   2744                 else
   2745                 {
   2746                     CV_CALL( buffer = cvAlloc(sz + 32) );
   2747                     buf_size = sz;
   2748                     local_alloc = 0;
   2749                 }
   2750             }
   2751 
   2752             ptr = (uchar*)buffer;
   2753             if( !spec )
   2754             {
   2755                 dft_wave = ptr;
   2756                 ptr += len*complex_elem_size;
   2757                 itab = (int*)ptr;
   2758                 ptr = (uchar*)cvAlignPtr( ptr + len*sizeof(int), 16 );
   2759                 icvDFTInit( len, nf, factors, itab, complex_elem_size, dft_wave, inv );
   2760             }
   2761 
   2762             dct_wave = ptr;
   2763             ptr += (len/2 + 1)*complex_elem_size;
   2764             src_dft_buf = dst_dft_buf = ptr;
   2765             ptr += len*elem_size;
   2766             if( !inplace_transform )
   2767             {
   2768                 dst_dft_buf = ptr;
   2769                 ptr += len*elem_size;
   2770             }
   2771             icvDCTInit( len, complex_elem_size, dct_wave, inv );
   2772             if( !inv )
   2773                 scale += scale;
   2774             prev_len = len;
   2775         }
   2776         // otherwise reuse the tables calculated on the previous stage
   2777 
   2778         for( i = 0; i < count; i++ )
   2779         {
   2780             dct_func( sptr + i*sstep0, sstep1, src_dft_buf, dst_dft_buf,
   2781                       dptr + i*dstep0, dstep1, len, nf, factors,
   2782                       itab, dft_wave, dct_wave, spec, ptr );
   2783         }
   2784         src = dst;
   2785     }
   2786 
   2787     __END__;
   2788 
   2789     if( spec_dft )
   2790     {
   2791         if( depth == CV_32F )
   2792             icvDFTFree_R_32f_p( spec_dft );
   2793         else
   2794             icvDFTFree_R_64f_p( spec_dft );
   2795     }
   2796 
   2797     if( buffer && !local_alloc )
   2798         cvFree( &buffer );
   2799 }
   2800 
   2801 
   2802 static const int icvOptimalDFTSize[] = {
   2803 1, 2, 3, 4, 5, 6, 8, 9, 10, 12, 15, 16, 18, 20, 24, 25, 27, 30, 32, 36, 40, 45, 48,
   2804 50, 54, 60, 64, 72, 75, 80, 81, 90, 96, 100, 108, 120, 125, 128, 135, 144, 150, 160,
   2805 162, 180, 192, 200, 216, 225, 240, 243, 250, 256, 270, 288, 300, 320, 324, 360, 375,
   2806 384, 400, 405, 432, 450, 480, 486, 500, 512, 540, 576, 600, 625, 640, 648, 675, 720,
   2807 729, 750, 768, 800, 810, 864, 900, 960, 972, 1000, 1024, 1080, 1125, 1152, 1200,
   2808 1215, 1250, 1280, 1296, 1350, 1440, 1458, 1500, 1536, 1600, 1620, 1728, 1800, 1875,
   2809 1920, 1944, 2000, 2025, 2048, 2160, 2187, 2250, 2304, 2400, 2430, 2500, 2560, 2592,
   2810 2700, 2880, 2916, 3000, 3072, 3125, 3200, 3240, 3375, 3456, 3600, 3645, 3750, 3840,
   2811 3888, 4000, 4050, 4096, 4320, 4374, 4500, 4608, 4800, 4860, 5000, 5120, 5184, 5400,
   2812 5625, 5760, 5832, 6000, 6075, 6144, 6250, 6400, 6480, 6561, 6750, 6912, 7200, 7290,
   2813 7500, 7680, 7776, 8000, 8100, 8192, 8640, 8748, 9000, 9216, 9375, 9600, 9720, 10000,
   2814 10125, 10240, 10368, 10800, 10935, 11250, 11520, 11664, 12000, 12150, 12288, 12500,
   2815 12800, 12960, 13122, 13500, 13824, 14400, 14580, 15000, 15360, 15552, 15625, 16000,
   2816 16200, 16384, 16875, 17280, 17496, 18000, 18225, 18432, 18750, 19200, 19440, 19683,
   2817 20000, 20250, 20480, 20736, 21600, 21870, 22500, 23040, 23328, 24000, 24300, 24576,
   2818 25000, 25600, 25920, 26244, 27000, 27648, 28125, 28800, 29160, 30000, 30375, 30720,
   2819 31104, 31250, 32000, 32400, 32768, 32805, 33750, 34560, 34992, 36000, 36450, 36864,
   2820 37500, 38400, 38880, 39366, 40000, 40500, 40960, 41472, 43200, 43740, 45000, 46080,
   2821 46656, 46875, 48000, 48600, 49152, 50000, 50625, 51200, 51840, 52488, 54000, 54675,
   2822 55296, 56250, 57600, 58320, 59049, 60000, 60750, 61440, 62208, 62500, 64000, 64800,
   2823 65536, 65610, 67500, 69120, 69984, 72000, 72900, 73728, 75000, 76800, 77760, 78125,
   2824 78732, 80000, 81000, 81920, 82944, 84375, 86400, 87480, 90000, 91125, 92160, 93312,
   2825 93750, 96000, 97200, 98304, 98415, 100000, 101250, 102400, 103680, 104976, 108000,
   2826 109350, 110592, 112500, 115200, 116640, 118098, 120000, 121500, 122880, 124416, 125000,
   2827 128000, 129600, 131072, 131220, 135000, 138240, 139968, 140625, 144000, 145800, 147456,
   2828 150000, 151875, 153600, 155520, 156250, 157464, 160000, 162000, 163840, 164025, 165888,
   2829 168750, 172800, 174960, 177147, 180000, 182250, 184320, 186624, 187500, 192000, 194400,
   2830 196608, 196830, 200000, 202500, 204800, 207360, 209952, 216000, 218700, 221184, 225000,
   2831 230400, 233280, 234375, 236196, 240000, 243000, 245760, 248832, 250000, 253125, 256000,
   2832 259200, 262144, 262440, 270000, 273375, 276480, 279936, 281250, 288000, 291600, 294912,
   2833 295245, 300000, 303750, 307200, 311040, 312500, 314928, 320000, 324000, 327680, 328050,
   2834 331776, 337500, 345600, 349920, 354294, 360000, 364500, 368640, 373248, 375000, 384000,
   2835 388800, 390625, 393216, 393660, 400000, 405000, 409600, 414720, 419904, 421875, 432000,
   2836 437400, 442368, 450000, 455625, 460800, 466560, 468750, 472392, 480000, 486000, 491520,
   2837 492075, 497664, 500000, 506250, 512000, 518400, 524288, 524880, 531441, 540000, 546750,
   2838 552960, 559872, 562500, 576000, 583200, 589824, 590490, 600000, 607500, 614400, 622080,
   2839 625000, 629856, 640000, 648000, 655360, 656100, 663552, 675000, 691200, 699840, 703125,
   2840 708588, 720000, 729000, 737280, 746496, 750000, 759375, 768000, 777600, 781250, 786432,
   2841 787320, 800000, 810000, 819200, 820125, 829440, 839808, 843750, 864000, 874800, 884736,
   2842 885735, 900000, 911250, 921600, 933120, 937500, 944784, 960000, 972000, 983040, 984150,
   2843 995328, 1000000, 1012500, 1024000, 1036800, 1048576, 1049760, 1062882, 1080000, 1093500,
   2844 1105920, 1119744, 1125000, 1152000, 1166400, 1171875, 1179648, 1180980, 1200000,
   2845 1215000, 1228800, 1244160, 1250000, 1259712, 1265625, 1280000, 1296000, 1310720,
   2846 1312200, 1327104, 1350000, 1366875, 1382400, 1399680, 1406250, 1417176, 1440000,
   2847 1458000, 1474560, 1476225, 1492992, 1500000, 1518750, 1536000, 1555200, 1562500,
   2848 1572864, 1574640, 1594323, 1600000, 1620000, 1638400, 1640250, 1658880, 1679616,
   2849 1687500, 1728000, 1749600, 1769472, 1771470, 1800000, 1822500, 1843200, 1866240,
   2850 1875000, 1889568, 1920000, 1944000, 1953125, 1966080, 1968300, 1990656, 2000000,
   2851 2025000, 2048000, 2073600, 2097152, 2099520, 2109375, 2125764, 2160000, 2187000,
   2852 2211840, 2239488, 2250000, 2278125, 2304000, 2332800, 2343750, 2359296, 2361960,
   2853 2400000, 2430000, 2457600, 2460375, 2488320, 2500000, 2519424, 2531250, 2560000,
   2854 2592000, 2621440, 2624400, 2654208, 2657205, 2700000, 2733750, 2764800, 2799360,
   2855 2812500, 2834352, 2880000, 2916000, 2949120, 2952450, 2985984, 3000000, 3037500,
   2856 3072000, 3110400, 3125000, 3145728, 3149280, 3188646, 3200000, 3240000, 3276800,
   2857 3280500, 3317760, 3359232, 3375000, 3456000, 3499200, 3515625, 3538944, 3542940,
   2858 3600000, 3645000, 3686400, 3732480, 3750000, 3779136, 3796875, 3840000, 3888000,
   2859 3906250, 3932160, 3936600, 3981312, 4000000, 4050000, 4096000, 4100625, 4147200,
   2860 4194304, 4199040, 4218750, 4251528, 4320000, 4374000, 4423680, 4428675, 4478976,
   2861 4500000, 4556250, 4608000, 4665600, 4687500, 4718592, 4723920, 4782969, 4800000,
   2862 4860000, 4915200, 4920750, 4976640, 5000000, 5038848, 5062500, 5120000, 5184000,
   2863 5242880, 5248800, 5308416, 5314410, 5400000, 5467500, 5529600, 5598720, 5625000,
   2864 5668704, 5760000, 5832000, 5859375, 5898240, 5904900, 5971968, 6000000, 6075000,
   2865 6144000, 6220800, 6250000, 6291456, 6298560, 6328125, 6377292, 6400000, 6480000,
   2866 6553600, 6561000, 6635520, 6718464, 6750000, 6834375, 6912000, 6998400, 7031250,
   2867 7077888, 7085880, 7200000, 7290000, 7372800, 7381125, 7464960, 7500000, 7558272,
   2868 7593750, 7680000, 7776000, 7812500, 7864320, 7873200, 7962624, 7971615, 8000000,
   2869 8100000, 8192000, 8201250, 8294400, 8388608, 8398080, 8437500, 8503056, 8640000,
   2870 8748000, 8847360, 8857350, 8957952, 9000000, 9112500, 9216000, 9331200, 9375000,
   2871 9437184, 9447840, 9565938, 9600000, 9720000, 9765625, 9830400, 9841500, 9953280,
   2872 10000000, 10077696, 10125000, 10240000, 10368000, 10485760, 10497600, 10546875, 10616832,
   2873 10628820, 10800000, 10935000, 11059200, 11197440, 11250000, 11337408, 11390625, 11520000,
   2874 11664000, 11718750, 11796480, 11809800, 11943936, 12000000, 12150000, 12288000, 12301875,
   2875 12441600, 12500000, 12582912, 12597120, 12656250, 12754584, 12800000, 12960000, 13107200,
   2876 13122000, 13271040, 13286025, 13436928, 13500000, 13668750, 13824000, 13996800, 14062500,
   2877 14155776, 14171760, 14400000, 14580000, 14745600, 14762250, 14929920, 15000000, 15116544,
   2878 15187500, 15360000, 15552000, 15625000, 15728640, 15746400, 15925248, 15943230, 16000000,
   2879 16200000, 16384000, 16402500, 16588800, 16777216, 16796160, 16875000, 17006112, 17280000,
   2880 17496000, 17578125, 17694720, 17714700, 17915904, 18000000, 18225000, 18432000, 18662400,
   2881 18750000, 18874368, 18895680, 18984375, 19131876, 19200000, 19440000, 19531250, 19660800,
   2882 19683000, 19906560, 20000000, 20155392, 20250000, 20480000, 20503125, 20736000, 20971520,
   2883 20995200, 21093750, 21233664, 21257640, 21600000, 21870000, 22118400, 22143375, 22394880,
   2884 22500000, 22674816, 22781250, 23040000, 23328000, 23437500, 23592960, 23619600, 23887872,
   2885 23914845, 24000000, 24300000, 24576000, 24603750, 24883200, 25000000, 25165824, 25194240,
   2886 25312500, 25509168, 25600000, 25920000, 26214400, 26244000, 26542080, 26572050, 26873856,
   2887 27000000, 27337500, 27648000, 27993600, 28125000, 28311552, 28343520, 28800000, 29160000,
   2888 29296875, 29491200, 29524500, 29859840, 30000000, 30233088, 30375000, 30720000, 31104000,
   2889 31250000, 31457280, 31492800, 31640625, 31850496, 31886460, 32000000, 32400000, 32768000,
   2890 32805000, 33177600, 33554432, 33592320, 33750000, 34012224, 34171875, 34560000, 34992000,
   2891 35156250, 35389440, 35429400, 35831808, 36000000, 36450000, 36864000, 36905625, 37324800,
   2892 37500000, 37748736, 37791360, 37968750, 38263752, 38400000, 38880000, 39062500, 39321600,
   2893 39366000, 39813120, 39858075, 40000000, 40310784, 40500000, 40960000, 41006250, 41472000,
   2894 41943040, 41990400, 42187500, 42467328, 42515280, 43200000, 43740000, 44236800, 44286750,
   2895 44789760, 45000000, 45349632, 45562500, 46080000, 46656000, 46875000, 47185920, 47239200,
   2896 47775744, 47829690, 48000000, 48600000, 48828125, 49152000, 49207500, 49766400, 50000000,
   2897 50331648, 50388480, 50625000, 51018336, 51200000, 51840000, 52428800, 52488000, 52734375,
   2898 53084160, 53144100, 53747712, 54000000, 54675000, 55296000, 55987200, 56250000, 56623104,
   2899 56687040, 56953125, 57600000, 58320000, 58593750, 58982400, 59049000, 59719680, 60000000,
   2900 60466176, 60750000, 61440000, 61509375, 62208000, 62500000, 62914560, 62985600, 63281250,
   2901 63700992, 63772920, 64000000, 64800000, 65536000, 65610000, 66355200, 66430125, 67108864,
   2902 67184640, 67500000, 68024448, 68343750, 69120000, 69984000, 70312500, 70778880, 70858800,
   2903 71663616, 72000000, 72900000, 73728000, 73811250, 74649600, 75000000, 75497472, 75582720,
   2904 75937500, 76527504, 76800000, 77760000, 78125000, 78643200, 78732000, 79626240, 79716150,
   2905 80000000, 80621568, 81000000, 81920000, 82012500, 82944000, 83886080, 83980800, 84375000,
   2906 84934656, 85030560, 86400000, 87480000, 87890625, 88473600, 88573500, 89579520, 90000000,
   2907 90699264, 91125000, 92160000, 93312000, 93750000, 94371840, 94478400, 94921875, 95551488,
   2908 95659380, 96000000, 97200000, 97656250, 98304000, 98415000, 99532800, 100000000,
   2909 100663296, 100776960, 101250000, 102036672, 102400000, 102515625, 103680000, 104857600,
   2910 104976000, 105468750, 106168320, 106288200, 107495424, 108000000, 109350000, 110592000,
   2911 110716875, 111974400, 112500000, 113246208, 113374080, 113906250, 115200000, 116640000,
   2912 117187500, 117964800, 118098000, 119439360, 119574225, 120000000, 120932352, 121500000,
   2913 122880000, 123018750, 124416000, 125000000, 125829120, 125971200, 126562500, 127401984,
   2914 127545840, 128000000, 129600000, 131072000, 131220000, 132710400, 132860250, 134217728,
   2915 134369280, 135000000, 136048896, 136687500, 138240000, 139968000, 140625000, 141557760,
   2916 141717600, 143327232, 144000000, 145800000, 146484375, 147456000, 147622500, 149299200,
   2917 150000000, 150994944, 151165440, 151875000, 153055008, 153600000, 155520000, 156250000,
   2918 157286400, 157464000, 158203125, 159252480, 159432300, 160000000, 161243136, 162000000,
   2919 163840000, 164025000, 165888000, 167772160, 167961600, 168750000, 169869312, 170061120,
   2920 170859375, 172800000, 174960000, 175781250, 176947200, 177147000, 179159040, 180000000,
   2921 181398528, 182250000, 184320000, 184528125, 186624000, 187500000, 188743680, 188956800,
   2922 189843750, 191102976, 191318760, 192000000, 194400000, 195312500, 196608000, 196830000,
   2923 199065600, 199290375, 200000000, 201326592, 201553920, 202500000, 204073344, 204800000,
   2924 205031250, 207360000, 209715200, 209952000, 210937500, 212336640, 212576400, 214990848,
   2925 216000000, 218700000, 221184000, 221433750, 223948800, 225000000, 226492416, 226748160,
   2926 227812500, 230400000, 233280000, 234375000, 235929600, 236196000, 238878720, 239148450,
   2927 240000000, 241864704, 243000000, 244140625, 245760000, 246037500, 248832000, 250000000,
   2928 251658240, 251942400, 253125000, 254803968, 255091680, 256000000, 259200000, 262144000,
   2929 262440000, 263671875, 265420800, 265720500, 268435456, 268738560, 270000000, 272097792,
   2930 273375000, 276480000, 279936000, 281250000, 283115520, 283435200, 284765625, 286654464,
   2931 288000000, 291600000, 292968750, 294912000, 295245000, 298598400, 300000000, 301989888,
   2932 302330880, 303750000, 306110016, 307200000, 307546875, 311040000, 312500000, 314572800,
   2933 314928000, 316406250, 318504960, 318864600, 320000000, 322486272, 324000000, 327680000,
   2934 328050000, 331776000, 332150625, 335544320, 335923200, 337500000, 339738624, 340122240,
   2935 341718750, 345600000, 349920000, 351562500, 353894400, 354294000, 358318080, 360000000,
   2936 362797056, 364500000, 368640000, 369056250, 373248000, 375000000, 377487360, 377913600,
   2937 379687500, 382205952, 382637520, 384000000, 388800000, 390625000, 393216000, 393660000,
   2938 398131200, 398580750, 400000000, 402653184, 403107840, 405000000, 408146688, 409600000,
   2939 410062500, 414720000, 419430400, 419904000, 421875000, 424673280, 425152800, 429981696,
   2940 432000000, 437400000, 439453125, 442368000, 442867500, 447897600, 450000000, 452984832,
   2941 453496320, 455625000, 460800000, 466560000, 468750000, 471859200, 472392000, 474609375,
   2942 477757440, 478296900, 480000000, 483729408, 486000000, 488281250, 491520000, 492075000,
   2943 497664000, 500000000, 503316480, 503884800, 506250000, 509607936, 510183360, 512000000,
   2944 512578125, 518400000, 524288000, 524880000, 527343750, 530841600, 531441000, 536870912,
   2945 537477120, 540000000, 544195584, 546750000, 552960000, 553584375, 559872000, 562500000,
   2946 566231040, 566870400, 569531250, 573308928, 576000000, 583200000, 585937500, 589824000,
   2947 590490000, 597196800, 597871125, 600000000, 603979776, 604661760, 607500000, 612220032,
   2948 614400000, 615093750, 622080000, 625000000, 629145600, 629856000, 632812500, 637009920,
   2949 637729200, 640000000, 644972544, 648000000, 655360000, 656100000, 663552000, 664301250,
   2950 671088640, 671846400, 675000000, 679477248, 680244480, 683437500, 691200000, 699840000,
   2951 703125000, 707788800, 708588000, 716636160, 720000000, 725594112, 729000000, 732421875,
   2952 737280000, 738112500, 746496000, 750000000, 754974720, 755827200, 759375000, 764411904,
   2953 765275040, 768000000, 777600000, 781250000, 786432000, 787320000, 791015625, 796262400,
   2954 797161500, 800000000, 805306368, 806215680, 810000000, 816293376, 819200000, 820125000,
   2955 829440000, 838860800, 839808000, 843750000, 849346560, 850305600, 854296875, 859963392,
   2956 864000000, 874800000, 878906250, 884736000, 885735000, 895795200, 900000000, 905969664,
   2957 906992640, 911250000, 921600000, 922640625, 933120000, 937500000, 943718400, 944784000,
   2958 949218750, 955514880, 956593800, 960000000, 967458816, 972000000, 976562500, 983040000,
   2959 984150000, 995328000, 996451875, 1000000000, 1006632960, 1007769600, 1012500000,
   2960 1019215872, 1020366720, 1024000000, 1025156250, 1036800000, 1048576000, 1049760000,
   2961 1054687500, 1061683200, 1062882000, 1073741824, 1074954240, 1080000000, 1088391168,
   2962 1093500000, 1105920000, 1107168750, 1119744000, 1125000000, 1132462080, 1133740800,
   2963 1139062500, 1146617856, 1152000000, 1166400000, 1171875000, 1179648000, 1180980000,
   2964 1194393600, 1195742250, 1200000000, 1207959552, 1209323520, 1215000000, 1220703125,
   2965 1224440064, 1228800000, 1230187500, 1244160000, 1250000000, 1258291200, 1259712000,
   2966 1265625000, 1274019840, 1275458400, 1280000000, 1289945088, 1296000000, 1310720000,
   2967 1312200000, 1318359375, 1327104000, 1328602500, 1342177280, 1343692800, 1350000000,
   2968 1358954496, 1360488960, 1366875000, 1382400000, 1399680000, 1406250000, 1415577600,
   2969 1417176000, 1423828125, 1433272320, 1440000000, 1451188224, 1458000000, 1464843750,
   2970 1474560000, 1476225000, 1492992000, 1500000000, 1509949440, 1511654400, 1518750000,
   2971 1528823808, 1530550080, 1536000000, 1537734375, 1555200000, 1562500000, 1572864000,
   2972 1574640000, 1582031250, 1592524800, 1594323000, 1600000000, 1610612736, 1612431360,
   2973 1620000000, 1632586752, 1638400000, 1640250000, 1658880000, 1660753125, 1677721600,
   2974 1679616000, 1687500000, 1698693120, 1700611200, 1708593750, 1719926784, 1728000000,
   2975 1749600000, 1757812500, 1769472000, 1771470000, 1791590400, 1800000000, 1811939328,
   2976 1813985280, 1822500000, 1843200000, 1845281250, 1866240000, 1875000000, 1887436800,
   2977 1889568000, 1898437500, 1911029760, 1913187600, 1920000000, 1934917632, 1944000000,
   2978 1953125000, 1966080000, 1968300000, 1990656000, 1992903750, 2000000000, 2013265920,
   2979 2015539200, 2025000000, 2038431744, 2040733440, 2048000000, 2050312500, 2073600000,
   2980 2097152000, 2099520000, 2109375000, 2123366400, 2125764000
   2981 };
   2982 
   2983 
   2984 CV_IMPL int
   2985 cvGetOptimalDFTSize( int size0 )
   2986 {
   2987     int a = 0, b = sizeof(icvOptimalDFTSize)/sizeof(icvOptimalDFTSize[0]) - 1;
   2988     if( (unsigned)size0 >= (unsigned)icvOptimalDFTSize[b] )
   2989         return -1;
   2990 
   2991     while( a < b )
   2992     {
   2993         int c = (a + b) >> 1;
   2994         if( size0 <= icvOptimalDFTSize[c] )
   2995             b = c;
   2996         else
   2997             a = c+1;
   2998     }
   2999 
   3000     return icvOptimalDFTSize[b];
   3001 }
   3002 
   3003 /* End of file. */
   3004