Home | History | Annotate | Download | only in src
      1 
      2 /* -----------------------------------------------------------------------------------------------------------
      3 Software License for The Fraunhofer FDK AAC Codec Library for Android
      4 
      5  Copyright  1995 - 2012 Fraunhofer-Gesellschaft zur Frderung der angewandten Forschung e.V.
      6   All rights reserved.
      7 
      8  1.    INTRODUCTION
      9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements
     10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio.
     11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices.
     12 
     13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual
     14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by
     15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part
     16 of the MPEG specifications.
     17 
     18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer)
     19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners
     20 individually for the purpose of encoding or decoding bit streams in products that are compliant with
     21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license
     22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec
     23 software may already be covered under those patent licenses when it is used for those licensed purposes only.
     24 
     25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality,
     26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional
     27 applications information and documentation.
     28 
     29 2.    COPYRIGHT LICENSE
     30 
     31 Redistribution and use in source and binary forms, with or without modification, are permitted without
     32 payment of copyright license fees provided that you satisfy the following conditions:
     33 
     34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or
     35 your modifications thereto in source code form.
     36 
     37 You must retain the complete text of this software license in the documentation and/or other materials
     38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form.
     39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your
     40 modifications thereto to recipients of copies in binary form.
     41 
     42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without
     43 prior written permission.
     44 
     45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec
     46 software or your modifications thereto.
     47 
     48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software
     49 and the date of any change. For modified versions of the FDK AAC Codec, the term
     50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term
     51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android."
     52 
     53 3.    NO PATENT LICENSE
     54 
     55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer,
     56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with
     57 respect to this software.
     58 
     59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized
     60 by appropriate patent licenses.
     61 
     62 4.    DISCLAIMER
     63 
     64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors
     65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties
     66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
     67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages,
     68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits,
     69 or business interruption, however caused and on any theory of liability, whether in contract, strict
     70 liability, or tort (including negligence), arising in any way out of the use of this software, even if
     71 advised of the possibility of such damage.
     72 
     73 5.    CONTACT INFORMATION
     74 
     75 Fraunhofer Institute for Integrated Circuits IIS
     76 Attention: Audio and Multimedia Departments - FDK AAC LL
     77 Am Wolfsmantel 33
     78 91058 Erlangen, Germany
     79 
     80 www.iis.fraunhofer.de/amm
     81 amm-info (at) iis.fraunhofer.de
     82 ----------------------------------------------------------------------------------------------------------- */
     83 
     84 /***************************  Fraunhofer IIS FDK Tools  **********************
     85 
     86    Author(s):   Josef Hoepfl, DSP Solutions
     87    Description: Fix point FFT
     88 
     89 ******************************************************************************/
     90 
     91 #include "fft.h"
     92 
     93 #include "fft_rad2.h"
     94 #include "FDK_tools_rom.h"
     95 
     96 
     97 
     98 
     99 
    100 #define F3C(x) STC(x)
    101 
    102 #define     C31       (F3C(0x91261468))      /* FL2FXCONST_DBL(-0.86602540)   */
    103 
    104 /* Performs the FFT of length 3 according to the algorithm after winograd.
    105    No scaling of the input vector because the scaling is already done in the rotation vector. */
    106 static FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat)
    107 {
    108   FIXP_DBL r1,r2;
    109   FIXP_DBL s1,s2;
    110   /* real part */
    111   r1      = pDat[2] + pDat[4];
    112   r2      = fMult((pDat[2] - pDat[4]), C31);
    113   pDat[0] = pDat[0] + r1;
    114   r1      = pDat[0] - r1 - (r1>>1);
    115 
    116   /* imaginary part */
    117   s1      = pDat[3] + pDat[5];
    118   s2      = fMult((pDat[3] - pDat[5]), C31);
    119   pDat[1] = pDat[1] + s1;
    120   s1      = pDat[1] - s1 - (s1>>1);
    121 
    122   /* combination */
    123   pDat[2] = r1 - s2;
    124   pDat[4] = r1 + s2;
    125   pDat[3] = s1 + r2;
    126   pDat[5] = s1 - r2;
    127 }
    128 
    129 
    130 #define F5C(x) STC(x)
    131 
    132 #define     C51       (F5C(0x79bc3854))      /* FL2FXCONST_DBL( 0.95105652)   */
    133 #define     C52       (F5C(0x9d839db0))      /* FL2FXCONST_DBL(-1.53884180/2) */
    134 #define     C53       (F5C(0xd18053ce))      /* FL2FXCONST_DBL(-0.36327126)   */
    135 #define     C54       (F5C(0x478dde64))      /* FL2FXCONST_DBL( 0.55901699)   */
    136 #define     C55       (F5C(0xb0000001))      /* FL2FXCONST_DBL(-1.25/2)       */
    137 
    138 /* performs the FFT of length 5 according to the algorithm after winograd */
    139 static FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat)
    140 {
    141   FIXP_DBL r1,r2,r3,r4;
    142   FIXP_DBL s1,s2,s3,s4;
    143   FIXP_DBL t;
    144 
    145   /* real part */
    146   r1      = pDat[2] + pDat[8];
    147   r4      = pDat[2] - pDat[8];
    148   r3      = pDat[4] + pDat[6];
    149   r2      = pDat[4] - pDat[6];
    150   t       = fMult((r1-r3), C54);
    151   r1      = r1 + r3;
    152   pDat[0] = pDat[0] + r1;
    153   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
    154      the values as fracts */
    155   r1      = pDat[0] + (fMultDiv2(r1, C55) <<(2));
    156   r3      = r1 - t;
    157   r1      = r1 + t;
    158   t       = fMult((r4 + r2), C51);
    159   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
    160      the values as fracts */
    161   r4      = t + (fMultDiv2(r4, C52) <<(2));
    162   r2      = t + fMult(r2, C53);
    163 
    164   /* imaginary part */
    165   s1      = pDat[3] + pDat[9];
    166   s4      = pDat[3] - pDat[9];
    167   s3      = pDat[5] + pDat[7];
    168   s2      = pDat[5] - pDat[7];
    169   t       = fMult((s1 - s3), C54);
    170   s1      = s1 + s3;
    171   pDat[1] = pDat[1] + s1;
    172   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
    173      the values as fracts */
    174   s1      = pDat[1] + (fMultDiv2(s1, C55) <<(2));
    175   s3      = s1 - t;
    176   s1      = s1 + t;
    177   t       = fMult((s4 + s2), C51);
    178   /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of
    179      the values as fracts */
    180   s4      = t + (fMultDiv2(s4, C52) <<(2));
    181   s2      = t + fMult(s2, C53);
    182 
    183   /* combination */
    184   pDat[2] = r1 + s2;
    185   pDat[8] = r1 - s2;
    186   pDat[4] = r3 - s4;
    187   pDat[6] = r3 + s4;
    188 
    189   pDat[3] = s1 - r2;
    190   pDat[9] = s1 + r2;
    191   pDat[5] = s3 + r4;
    192   pDat[7] = s3 - r4;
    193 }
    194 
    195 
    196 
    197 
    198 #define N3                    3
    199 #define N5                    5
    200 #define N6                    6
    201 #define N15                   15
    202 
    203 /* Performs the FFT of length 15. It is split into FFTs of length 3 and length 5. */
    204 static inline void fft15(FIXP_DBL *pInput)
    205 {
    206   FIXP_DBL  aDst[2*N15];
    207   FIXP_DBL  aDst1[2*N15];
    208   int    i,k,l;
    209 
    210   /* Sort input vector for fft's of length 3
    211   input3(0:2)   = [input(0) input(5) input(10)];
    212   input3(3:5)   = [input(3) input(8) input(13)];
    213   input3(6:8)   = [input(6) input(11) input(1)];
    214   input3(9:11)  = [input(9) input(14) input(4)];
    215   input3(12:14) = [input(12) input(2) input(7)]; */
    216   {
    217     const FIXP_DBL *pSrc = pInput;
    218     FIXP_DBL *RESTRICT pDst = aDst;
    219     /* Merge 3 loops into one, skip call of fft3 */
    220     for(i=0,l=0,k=0; i<N5; i++, k+=6)
    221     {
    222       pDst[k+0] = pSrc[l];
    223       pDst[k+1] = pSrc[l+1];
    224       l += 2*N5;
    225       if (l >= (2*N15))
    226           l -= (2*N15);
    227 
    228       pDst[k+2] = pSrc[l];
    229       pDst[k+3] = pSrc[l+1];
    230       l += 2*N5;
    231       if (l >= (2*N15))
    232           l -= (2*N15);
    233       pDst[k+4] = pSrc[l];
    234       pDst[k+5] = pSrc[l+1];
    235       l += (2*N5) + (2*N3);
    236       if (l >= (2*N15))
    237           l -= (2*N15);
    238 
    239       /* fft3 merged with shift right by 2 loop */
    240       FIXP_DBL r1,r2,r3;
    241       FIXP_DBL s1,s2;
    242       /* real part */
    243       r1      = pDst[k+2] + pDst[k+4];
    244       r2      = fMult((pDst[k+2] - pDst[k+4]), C31);
    245       s1      = pDst[k+0];
    246       pDst[k+0] = (s1 + r1)>>2;
    247       r1      = s1 - (r1>>1);
    248 
    249       /* imaginary part */
    250       s1      = pDst[k+3] + pDst[k+5];
    251       s2      = fMult((pDst[k+3] - pDst[k+5]), C31);
    252       r3      = pDst[k+1];
    253       pDst[k+1] = (r3 + s1)>>2;
    254       s1      = r3 - (s1>>1);
    255 
    256       /* combination */
    257       pDst[k+2] = (r1 - s2)>>2;
    258       pDst[k+4] = (r1 + s2)>>2;
    259       pDst[k+3] = (s1 + r2)>>2;
    260       pDst[k+5] = (s1 - r2)>>2;
    261     }
    262   }
    263   /* Sort input vector for fft's of length 5
    264   input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
    265   input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
    266   input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
    267   /* Merge 2 loops into one, brings about 10% */
    268   {
    269     const FIXP_DBL *pSrc = aDst;
    270     FIXP_DBL *RESTRICT pDst = aDst1;
    271     for(i=0,l=0,k=0; i<N3; i++, k+=10)
    272     {
    273       l = 2*i;
    274       pDst[k+0] = pSrc[l+0];
    275       pDst[k+1] = pSrc[l+1];
    276       pDst[k+2] = pSrc[l+0+(2*N3)];
    277       pDst[k+3] = pSrc[l+1+(2*N3)];
    278       pDst[k+4] = pSrc[l+0+(4*N3)];
    279       pDst[k+5] = pSrc[l+1+(4*N3)];
    280       pDst[k+6] = pSrc[l+0+(6*N3)];
    281       pDst[k+7] = pSrc[l+1+(6*N3)];
    282       pDst[k+8] = pSrc[l+0+(8*N3)];
    283       pDst[k+9] = pSrc[l+1+(8*N3)];
    284       fft5(&pDst[k]);
    285     }
    286   }
    287   /* Sort output vector of length 15
    288   output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
    289             out5(10) out5(1)  out5(7)  out5(13) out5(4)
    290             out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
    291   /* optimize clumsy loop, brings about 5% */
    292   {
    293     const FIXP_DBL *pSrc = aDst1;
    294     FIXP_DBL *RESTRICT pDst = pInput;
    295     for(i=0,l=0,k=0; i<N3; i++, k+=10)
    296     {
    297       pDst[k+0] = pSrc[l];
    298       pDst[k+1] = pSrc[l+1];
    299       l += (2*N6);
    300       if (l >= (2*N15))
    301           l -= (2*N15);
    302       pDst[k+2] = pSrc[l];
    303       pDst[k+3] = pSrc[l+1];
    304       l += (2*N6);
    305       if (l >= (2*N15))
    306           l -= (2*N15);
    307       pDst[k+4] = pSrc[l];
    308       pDst[k+5] = pSrc[l+1];
    309       l += (2*N6);
    310       if (l >= (2*N15))
    311           l -= (2*N15);
    312       pDst[k+6] = pSrc[l];
    313       pDst[k+7] = pSrc[l+1];
    314       l += (2*N6);
    315       if (l >= (2*N15))
    316           l -= (2*N15);
    317       pDst[k+8] = pSrc[l];
    318       pDst[k+9] = pSrc[l+1];
    319       l += 2;    /* no modulo check needed, it cannot occur */
    320     }
    321   }
    322 }
    323 
    324 #define W_PiFOURTH STC(0x5a82799a)
    325 #ifndef SUMDIFF_PIFOURTH
    326 #define SUMDIFF_PIFOURTH(diff,sum,a,b) \
    327   { \
    328     FIXP_DBL wa, wb;\
    329     wa = fMultDiv2(a, W_PiFOURTH);\
    330     wb = fMultDiv2(b, W_PiFOURTH);\
    331     diff = wb - wa;\
    332     sum  = wb + wa;\
    333   }
    334 #endif
    335 
    336 /* This version is more overflow save, but less cycle optimal. */
    337 #define SUMDIFF_EIGTH(x, y, ix, iy, vr, vi, ur, ui) \
    338   vr = (x[ 0 + ix]>>1) + (x[16 + ix]>>1);  /* Re A + Re B */ \
    339   vi = (x[ 8 + ix]>>1) + (x[24 + ix]>>1);     /* Re C + Re D */ \
    340   ur = (x[ 1 + ix]>>1) + (x[17 + ix]>>1);  /* Im A + Im B */ \
    341   ui = (x[ 9 + ix]>>1) + (x[25 + ix]>>1);     /* Im C + Im D */ \
    342   y[ 0 + iy] = vr + vi;     /* Re A' = ReA + ReB +ReC + ReD */    \
    343   y[ 4 + iy] = vr - vi;     /* Re C' = -(ReC+ReD) + (ReA+ReB) */  \
    344   y[ 1 + iy] = ur + ui;     /* Im A' = sum of imag values */      \
    345   y[ 5 + iy] = ur - ui;     /* Im C' = -Im C -Im D +Im A +Im B */ \
    346   vr -= x[16 + ix];              /* Re A - Re B */ \
    347   vi = vi - x[24 + ix];          /* Re C - Re D */ \
    348   ur -= x[17 + ix];              /* Im A - Im B */ \
    349   ui = ui - x[25 + ix];          /* Im C - Im D */ \
    350   y[ 2 + iy] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */ \
    351   y[ 6 + iy] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */ \
    352   y[ 3 + iy] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */  \
    353   y[ 7 + iy] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    354 
    355 static const FIXP_STP fft16_w16[2] =  { STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d) };
    356 
    357 LNK_SECTION_CODE_L1
    358 inline void fft_16(FIXP_DBL *RESTRICT x)
    359 {
    360   FIXP_DBL vr, vi, ur, ui;
    361   FIXP_DBL y[32];
    362 
    363   SUMDIFF_EIGTH(x, y, 0,  0, vr, vi, ur, ui);
    364   SUMDIFF_EIGTH(x, y, 4,  8, vr, vi, ur, ui);
    365   SUMDIFF_EIGTH(x, y, 2, 16, vr, vi, ur, ui);
    366   SUMDIFF_EIGTH(x, y, 6, 24, vr, vi, ur, ui);
    367 
    368 // xt1 =  0
    369 // xt2 =  8
    370   vr = y[ 8];
    371   vi = y[ 9];
    372   ur = y[ 0]>>1;
    373   ui = y[ 1]>>1;
    374   x[ 0] = ur + (vr>>1);
    375   x[ 1] = ui + (vi>>1);
    376   x[ 8] = ur - (vr>>1);
    377   x[ 9] = ui - (vi>>1);
    378 
    379 // xt1 =  4
    380 // xt2 = 12
    381   vr = y[13];
    382   vi = y[12];
    383   ur = y[ 4]>>1;
    384   ui = y[ 5]>>1;
    385   x[ 4] = ur + (vr>>1);
    386   x[ 5] = ui - (vi>>1);
    387   x[12] = ur - (vr>>1);
    388   x[13] = ui + (vi>>1);
    389 
    390 // xt1 = 16
    391 // xt2 = 24
    392   vr = y[24];
    393   vi = y[25];
    394   ur = y[16]>>1;
    395   ui = y[17]>>1;
    396   x[16] = ur + (vr>>1);
    397   x[17] = ui + (vi>>1);
    398   x[24] = ur - (vr>>1);
    399   x[25] = ui - (vi>>1);
    400 
    401 // xt1 = 20
    402 // xt2 = 28
    403   vr = y[29];
    404   vi = y[28];
    405   ur = y[20]>>1;
    406   ui = y[21]>>1;
    407   x[20] = ur + (vr>>1);
    408   x[21] = ui - (vi>>1);
    409   x[28] = ur - (vr>>1);
    410   x[29] = ui + (vi>>1);
    411 
    412   // xt1 =  2
    413 // xt2 = 10
    414   SUMDIFF_PIFOURTH(vi, vr, y[10], y[11])
    415   //vr = fMultDiv2((y[11] + y[10]),W_PiFOURTH);
    416   //vi = fMultDiv2((y[11] - y[10]),W_PiFOURTH);
    417   ur = y[ 2];
    418   ui = y[ 3];
    419   x[ 2] = (ur>>1) + vr;
    420   x[ 3] = (ui>>1) + vi;
    421   x[10] = (ur>>1) - vr;
    422   x[11] = (ui>>1) - vi;
    423 
    424 // xt1 =  6
    425 // xt2 = 14
    426   SUMDIFF_PIFOURTH(vr, vi, y[14], y[15])
    427   ur = y[ 6];
    428   ui = y[ 7];
    429   x[ 6] = (ur>>1) + vr;
    430   x[ 7] = (ui>>1) - vi;
    431   x[14] = (ur>>1) - vr;
    432   x[15] = (ui>>1) + vi;
    433 
    434 // xt1 = 18
    435 // xt2 = 26
    436   SUMDIFF_PIFOURTH(vi, vr, y[26], y[27])
    437   ur = y[18];
    438   ui = y[19];
    439   x[18] = (ur>>1) + vr;
    440   x[19] = (ui>>1) + vi;
    441   x[26] = (ur>>1) - vr;
    442   x[27] = (ui>>1) - vi;
    443 
    444 // xt1 = 22
    445 // xt2 = 30
    446   SUMDIFF_PIFOURTH(vr, vi, y[30], y[31])
    447   ur = y[22];
    448   ui = y[23];
    449   x[22] = (ur>>1) + vr;
    450   x[23] = (ui>>1) - vi;
    451   x[30] = (ur>>1) - vr;
    452   x[31] = (ui>>1) + vi;
    453 
    454 // xt1 =  0
    455 // xt2 = 16
    456   vr = x[16];
    457   vi = x[17];
    458   ur = x[ 0]>>1;
    459   ui = x[ 1]>>1;
    460   x[ 0] = ur + (vr>>1);
    461   x[ 1] = ui + (vi>>1);
    462   x[16] = ur - (vr>>1);
    463   x[17] = ui - (vi>>1);
    464 
    465 // xt1 =  8
    466 // xt2 = 24
    467   vi = x[24];
    468   vr = x[25];
    469   ur = x[ 8]>>1;
    470   ui = x[ 9]>>1;
    471   x[ 8] = ur + (vr>>1);
    472   x[ 9] = ui - (vi>>1);
    473   x[24] = ur - (vr>>1);
    474   x[25] = ui + (vi>>1);
    475 
    476 // xt1 =  2
    477 // xt2 = 18
    478   cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
    479   ur = x[ 2];
    480   ui = x[ 3];
    481   x[ 2] = (ur>>1) + vr;
    482   x[ 3] = (ui>>1) + vi;
    483   x[18] = (ur>>1) - vr;
    484   x[19] = (ui>>1) - vi;
    485 
    486 // xt1 = 10
    487 // xt2 = 26
    488   cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
    489   ur = x[10];
    490   ui = x[11];
    491   x[10] = (ur>>1) + vr;
    492   x[11] = (ui>>1) - vi;
    493   x[26] = (ur>>1) - vr;
    494   x[27] = (ui>>1) + vi;
    495 
    496 // xt1 =  4
    497 // xt2 = 20
    498   SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
    499   ur = x[ 4];
    500   ui = x[ 5];
    501   x[ 4] = (ur>>1) + vr;
    502   x[ 5] = (ui>>1) + vi;
    503   x[20] = (ur>>1) - vr;
    504   x[21] = (ui>>1) - vi;
    505 
    506 // xt1 = 12
    507 // xt2 = 28
    508   SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
    509   ur = x[12];
    510   ui = x[13];
    511   x[12] = (ur>>1) + vr;
    512   x[13] = (ui>>1) - vi;
    513   x[28] = (ur>>1) - vr;
    514   x[29] = (ui>>1) + vi;
    515 
    516 // xt1 =  6
    517 // xt2 = 22
    518   cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
    519   ur = x[ 6];
    520   ui = x[ 7];
    521   x[ 6] = (ur>>1) + vr;
    522   x[ 7] = (ui>>1) + vi;
    523   x[22] = (ur>>1) - vr;
    524   x[23] = (ui>>1) - vi;
    525 
    526 // xt1 = 14
    527 // xt2 = 30
    528   cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
    529   ur = x[14];
    530   ui = x[15];
    531   x[14] = (ur>>1) + vr;
    532   x[15] = (ui>>1) - vi;
    533   x[30] = (ur>>1) - vr;
    534   x[31] = (ui>>1) + vi;
    535 }
    536 
    537 #ifndef FUNCTION_fft_32
    538 static const FIXP_STP fft32_w32[6] =
    539 {
    540   STCP (0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), STCP(0x7d8a5f40, 0x18f8b83c),
    541   STCP (0x6a6d98a4, 0x471cece7), STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)
    542 };
    543 
    544 LNK_SECTION_CODE_L1
    545 inline void fft_32(FIXP_DBL *x)
    546 {
    547 
    548 #define W_PiFOURTH STC(0x5a82799a)
    549 
    550   FIXP_DBL vr,vi,ur,ui;
    551   FIXP_DBL y[64];
    552 
    553   /*
    554    * 1+2 stage radix 4
    555    */
    556 
    557 /////////////////////////////////////////////////////////////////////////////////////////
    558 
    559   // i = 0
    560   vr = (x[ 0] + x[32])>>1;  /* Re A + Re B */
    561   vi = (x[16] + x[48]);     /* Re C + Re D */
    562   ur = (x[ 1] + x[33])>>1;  /* Im A + Im B */
    563   ui = (x[17] + x[49]);     /* Im C + Im D */
    564 
    565   y[ 0] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    566   y[ 4] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    567   y[ 1] = ur + (ui>>1);     /* Im A' = sum of imag values */
    568   y[ 5] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    569 
    570   vr -= x[32];              /* Re A - Re B */
    571   vi = (vi>>1) - x[48];     /* Re C - Re D */
    572   ur -= x[33];              /* Im A - Im B */
    573   ui = (ui>>1) - x[49];     /* Im C - Im D */
    574 
    575   y[ 2] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    576   y[ 6] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    577   y[ 3] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    578   y[ 7] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    579 
    580   //i=8
    581   vr = (x[ 8] + x[40])>>1;  /* Re A + Re B */
    582   vi = (x[24] + x[56]);     /* Re C + Re D */
    583   ur = (x[ 9] + x[41])>>1;  /* Im A + Im B */
    584   ui = (x[25] + x[57]);     /* Im C + Im D */
    585 
    586   y[ 8] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    587   y[12] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    588   y[ 9] = ur + (ui>>1);     /* Im A' = sum of imag values */
    589   y[13] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    590 
    591   vr -= x[40];              /* Re A - Re B */
    592   vi = (vi>>1) - x[56];     /* Re C - Re D */
    593   ur -= x[41];              /* Im A - Im B */
    594   ui = (ui>>1) - x[57];     /* Im C - Im D */
    595 
    596   y[10] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    597   y[14] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    598   y[11] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    599   y[15] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    600 
    601   //i=16
    602   vr = (x[ 4] + x[36])>>1;  /* Re A + Re B */
    603   vi = (x[20] + x[52]);     /* Re C + Re D */
    604   ur = (x[ 5] + x[37])>>1;  /* Im A + Im B */
    605   ui = (x[21] + x[53]);     /* Im C + Im D */
    606 
    607   y[16] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    608   y[20] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    609   y[17] = ur + (ui>>1);     /* Im A' = sum of imag values */
    610   y[21] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    611 
    612   vr -= x[36];              /* Re A - Re B */
    613   vi = (vi>>1) - x[52];     /* Re C - Re D */
    614   ur -= x[37];              /* Im A - Im B */
    615   ui = (ui>>1) - x[53];     /* Im C - Im D */
    616 
    617   y[18] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    618   y[22] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    619   y[19] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    620   y[23] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    621 
    622   //i=24
    623   vr = (x[12] + x[44])>>1;  /* Re A + Re B */
    624   vi = (x[28] + x[60]);     /* Re C + Re D */
    625   ur = (x[13] + x[45])>>1;  /* Im A + Im B */
    626   ui = (x[29] + x[61]);     /* Im C + Im D */
    627 
    628   y[24] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    629   y[28] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    630   y[25] = ur + (ui>>1);     /* Im A' = sum of imag values */
    631   y[29] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    632 
    633   vr -= x[44];              /* Re A - Re B */
    634   vi = (vi>>1) - x[60];     /* Re C - Re D */
    635   ur -= x[45];              /* Im A - Im B */
    636   ui = (ui>>1) - x[61];     /* Im C - Im D */
    637 
    638   y[26] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    639   y[30] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    640   y[27] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    641   y[31] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    642 
    643   // i = 32
    644   vr = (x[ 2] + x[34])>>1;  /* Re A + Re B */
    645   vi = (x[18] + x[50]);     /* Re C + Re D */
    646   ur = (x[ 3] + x[35])>>1;  /* Im A + Im B */
    647   ui = (x[19] + x[51]);     /* Im C + Im D */
    648 
    649   y[32] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    650   y[36] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    651   y[33] = ur + (ui>>1);     /* Im A' = sum of imag values */
    652   y[37] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    653 
    654   vr -= x[34];              /* Re A - Re B */
    655   vi = (vi>>1) - x[50];     /* Re C - Re D */
    656   ur -= x[35];              /* Im A - Im B */
    657   ui = (ui>>1) - x[51];     /* Im C - Im D */
    658 
    659   y[34] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    660   y[38] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    661   y[35] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    662   y[39] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    663 
    664   //i=40
    665   vr = (x[10] + x[42])>>1;  /* Re A + Re B */
    666   vi = (x[26] + x[58]);     /* Re C + Re D */
    667   ur = (x[11] + x[43])>>1;  /* Im A + Im B */
    668   ui = (x[27] + x[59]);     /* Im C + Im D */
    669 
    670   y[40] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    671   y[44] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    672   y[41] = ur + (ui>>1);     /* Im A' = sum of imag values */
    673   y[45] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    674 
    675   vr -= x[42];              /* Re A - Re B */
    676   vi = (vi>>1) - x[58];     /* Re C - Re D */
    677   ur -= x[43];              /* Im A - Im B */
    678   ui = (ui>>1) - x[59];     /* Im C - Im D */
    679 
    680   y[42] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    681   y[46] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    682   y[43] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    683   y[47] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    684 
    685   //i=48
    686   vr = (x[ 6] + x[38])>>1;  /* Re A + Re B */
    687   vi = (x[22] + x[54]);     /* Re C + Re D */
    688   ur = (x[ 7] + x[39])>>1;  /* Im A + Im B */
    689   ui = (x[23] + x[55]);     /* Im C + Im D */
    690 
    691   y[48] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    692   y[52] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    693   y[49] = ur + (ui>>1);     /* Im A' = sum of imag values */
    694   y[53] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    695 
    696   vr -= x[38];              /* Re A - Re B */
    697   vi = (vi>>1) - x[54];     /* Re C - Re D */
    698   ur -= x[39];              /* Im A - Im B */
    699   ui = (ui>>1) - x[55];     /* Im C - Im D */
    700 
    701   y[50] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    702   y[54] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    703   y[51] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    704   y[55] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    705 
    706   //i=56
    707   vr = (x[14] + x[46])>>1;  /* Re A + Re B */
    708   vi = (x[30] + x[62]);     /* Re C + Re D */
    709   ur = (x[15] + x[47])>>1;  /* Im A + Im B */
    710   ui = (x[31] + x[63]);     /* Im C + Im D */
    711 
    712   y[56] = vr + (vi>>1);     /* Re A' = ReA + ReB +ReC + ReD */
    713   y[60] = vr - (vi>>1);     /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    714   y[57] = ur + (ui>>1);     /* Im A' = sum of imag values */
    715   y[61] = ur - (ui>>1);     /* Im C' = -Im C -Im D +Im A +Im B */
    716 
    717   vr -= x[46];              /* Re A - Re B */
    718   vi = (vi>>1) - x[62];     /* Re C - Re D */
    719   ur -= x[47];              /* Im A - Im B */
    720   ui = (ui>>1) - x[63];     /* Im C - Im D */
    721 
    722   y[58] = ui + vr;          /* Re B' = Im C - Im D  + Re A - Re B */
    723   y[62] = vr - ui;          /* Re D' = -Im C + Im D + Re A - Re B */
    724   y[59] = ur - vi;          /* Im B'= -Re C + Re D + Im A - Im B */
    725   y[63] = vi + ur;          /* Im D'= Re C - Re D + Im A - Im B */
    726 
    727 
    728   FIXP_DBL *xt = &x[0];
    729   FIXP_DBL *yt = &y[0];
    730 
    731   int j = 4;
    732   do
    733   {
    734     vr = yt[8];
    735     vi = yt[9];
    736     ur = yt[0]>>1;
    737     ui = yt[1]>>1;
    738     xt[ 0] = ur + (vr>>1);
    739     xt[ 1] = ui + (vi>>1);
    740     xt[ 8] = ur - (vr>>1);
    741     xt[ 9] = ui - (vi>>1);
    742 
    743     vr = yt[13];
    744     vi = yt[12];
    745     ur = yt[4]>>1;
    746     ui = yt[5]>>1;
    747     xt[ 4] = ur + (vr>>1);
    748     xt[ 5] = ui - (vi>>1);
    749     xt[12] = ur - (vr>>1);
    750     xt[13] = ui + (vi>>1);
    751 
    752     SUMDIFF_PIFOURTH(vi, vr, yt[10], yt[11])
    753     ur = yt[2];
    754     ui = yt[3];
    755     xt[ 2] = (ur>>1) + vr;
    756     xt[ 3] = (ui>>1) + vi;
    757     xt[10] = (ur>>1) - vr;
    758     xt[11] = (ui>>1) - vi;
    759 
    760     SUMDIFF_PIFOURTH(vr, vi, yt[14], yt[15])
    761     ur = yt[6];
    762     ui = yt[7];
    763 
    764     xt[ 6] = (ur>>1) + vr;
    765     xt[ 7] = (ui>>1) - vi;
    766     xt[14] = (ur>>1) - vr;
    767     xt[15] = (ui>>1) + vi;
    768     xt += 16;
    769     yt += 16;
    770   } while (--j != 0);
    771 
    772   vr = x[16];
    773   vi = x[17];
    774   ur = x[ 0]>>1;
    775   ui = x[ 1]>>1;
    776   x[ 0] = ur + (vr>>1);
    777   x[ 1] = ui + (vi>>1);
    778   x[16] = ur - (vr>>1);
    779   x[17] = ui - (vi>>1);
    780 
    781   vi = x[24];
    782   vr = x[25];
    783   ur = x[ 8]>>1;
    784   ui = x[ 9]>>1;
    785   x[ 8] = ur + (vr>>1);
    786   x[ 9] = ui - (vi>>1);
    787   x[24] = ur - (vr>>1);
    788   x[25] = ui + (vi>>1);
    789 
    790   vr = x[48];
    791   vi = x[49];
    792   ur = x[32]>>1;
    793   ui = x[33]>>1;
    794   x[32] = ur + (vr>>1);
    795   x[33] = ui + (vi>>1);
    796   x[48] = ur - (vr>>1);
    797   x[49] = ui - (vi>>1);
    798 
    799   vi = x[56];
    800   vr = x[57];
    801   ur = x[40]>>1;
    802   ui = x[41]>>1;
    803   x[40] = ur + (vr>>1);
    804   x[41] = ui - (vi>>1);
    805   x[56] = ur - (vr>>1);
    806   x[57] = ui + (vi>>1);
    807 
    808   cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
    809   ur = x[ 2];
    810   ui = x[ 3];
    811   x[ 2] = (ur>>1) + vr;
    812   x[ 3] = (ui>>1) + vi;
    813   x[18] = (ur>>1) - vr;
    814   x[19] = (ui>>1) - vi;
    815 
    816   cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
    817   ur = x[10];
    818   ui = x[11];
    819   x[10] = (ur>>1) + vr;
    820   x[11] = (ui>>1) - vi;
    821   x[26] = (ur>>1) - vr;
    822   x[27] = (ui>>1) + vi;
    823 
    824   cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
    825   ur = x[34];
    826   ui = x[35];
    827   x[34] = (ur>>1) + vr;
    828   x[35] = (ui>>1) + vi;
    829   x[50] = (ur>>1) - vr;
    830   x[51] = (ui>>1) - vi;
    831 
    832   cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
    833   ur = x[42];
    834   ui = x[43];
    835   x[42] = (ur>>1) + vr;
    836   x[43] = (ui>>1) - vi;
    837   x[58] = (ur>>1) - vr;
    838   x[59] = (ui>>1) + vi;
    839 
    840   SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
    841   ur = x[ 4];
    842   ui = x[ 5];
    843   x[ 4] = (ur>>1) + vr;
    844   x[ 5] = (ui>>1) + vi;
    845   x[20] = (ur>>1) - vr;
    846   x[21] = (ui>>1) - vi;
    847 
    848   SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
    849   ur = x[12];
    850   ui = x[13];
    851   x[12] = (ur>>1) + vr;
    852   x[13] = (ui>>1) - vi;
    853   x[28] = (ur>>1) - vr;
    854   x[29] = (ui>>1) + vi;
    855 
    856   SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
    857   ur = x[36];
    858   ui = x[37];
    859   x[36] = (ur>>1) + vr;
    860   x[37] = (ui>>1) + vi;
    861   x[52] = (ur>>1) - vr;
    862   x[53] = (ui>>1) - vi;
    863 
    864   SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
    865   ur = x[44];
    866   ui = x[45];
    867   x[44] = (ur>>1) + vr;
    868   x[45] = (ui>>1) - vi;
    869   x[60] = (ur>>1) - vr;
    870   x[61] = (ui>>1) + vi;
    871 
    872 
    873   cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
    874   ur = x[ 6];
    875   ui = x[ 7];
    876   x[ 6] = (ur>>1) + vr;
    877   x[ 7] = (ui>>1) + vi;
    878   x[22] = (ur>>1) - vr;
    879   x[23] = (ui>>1) - vi;
    880 
    881   cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
    882   ur = x[14];
    883   ui = x[15];
    884   x[14] = (ur>>1) + vr;
    885   x[15] = (ui>>1) - vi;
    886   x[30] = (ur>>1) - vr;
    887   x[31] = (ui>>1) + vi;
    888 
    889   cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
    890   ur = x[38];
    891   ui = x[39];
    892   x[38] = (ur>>1) + vr;
    893   x[39] = (ui>>1) + vi;
    894   x[54] = (ur>>1) - vr;
    895   x[55] = (ui>>1) - vi;
    896 
    897   cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
    898   ur = x[46];
    899   ui = x[47];
    900 
    901   x[46] = (ur>>1) + vr;
    902   x[47] = (ui>>1) - vi;
    903   x[62] = (ur>>1) - vr;
    904   x[63] = (ui>>1) + vi;
    905 
    906   vr = x[32];
    907   vi = x[33];
    908   ur = x[ 0]>>1;
    909   ui = x[ 1]>>1;
    910   x[ 0] = ur + (vr>>1);
    911   x[ 1] = ui + (vi>>1);
    912   x[32] = ur - (vr>>1);
    913   x[33] = ui - (vi>>1);
    914 
    915   vi = x[48];
    916   vr = x[49];
    917   ur = x[16]>>1;
    918   ui = x[17]>>1;
    919   x[16] = ur + (vr>>1);
    920   x[17] = ui - (vi>>1);
    921   x[48] = ur - (vr>>1);
    922   x[49] = ui + (vi>>1);
    923 
    924   cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
    925   ur = x[ 2];
    926   ui = x[ 3];
    927   x[ 2] = (ur>>1) + vr;
    928   x[ 3] = (ui>>1) + vi;
    929   x[34] = (ur>>1) - vr;
    930   x[35] = (ui>>1) - vi;
    931 
    932   cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
    933   ur = x[18];
    934   ui = x[19];
    935   x[18] = (ur>>1) + vr;
    936   x[19] = (ui>>1) - vi;
    937   x[50] = (ur>>1) - vr;
    938   x[51] = (ui>>1) + vi;
    939 
    940   cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
    941   ur = x[ 4];
    942   ui = x[ 5];
    943   x[ 4] = (ur>>1) + vr;
    944   x[ 5] = (ui>>1) + vi;
    945   x[36] = (ur>>1) - vr;
    946   x[37] = (ui>>1) - vi;
    947 
    948   cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
    949   ur = x[20];
    950   ui = x[21];
    951   x[20] = (ur>>1) + vr;
    952   x[21] = (ui>>1) - vi;
    953   x[52] = (ur>>1) - vr;
    954   x[53] = (ui>>1) + vi;
    955 
    956   cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
    957   ur = x[ 6];
    958   ui = x[ 7];
    959   x[ 6] = (ur>>1) + vr;
    960   x[ 7] = (ui>>1) + vi;
    961   x[38] = (ur>>1) - vr;
    962   x[39] = (ui>>1) - vi;
    963 
    964   cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
    965   ur = x[22];
    966   ui = x[23];
    967   x[22] = (ur>>1) + vr;
    968   x[23] = (ui>>1) - vi;
    969   x[54] = (ur>>1) - vr;
    970   x[55] = (ui>>1) + vi;
    971 
    972   SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
    973   ur = x[ 8];
    974   ui = x[ 9];
    975   x[ 8] = (ur>>1) + vr;
    976   x[ 9] = (ui>>1) + vi;
    977   x[40] = (ur>>1) - vr;
    978   x[41] = (ui>>1) - vi;
    979 
    980   SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
    981   ur = x[24];
    982   ui = x[25];
    983   x[24] = (ur>>1) + vr;
    984   x[25] = (ui>>1) - vi;
    985   x[56] = (ur>>1) - vr;
    986   x[57] = (ui>>1) + vi;
    987 
    988   cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
    989   ur = x[10];
    990   ui = x[11];
    991 
    992   x[10] = (ur>>1) + vr;
    993   x[11] = (ui>>1) + vi;
    994   x[42] = (ur>>1) - vr;
    995   x[43] = (ui>>1) - vi;
    996 
    997   cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
    998   ur = x[26];
    999   ui = x[27];
   1000   x[26] = (ur>>1) + vr;
   1001   x[27] = (ui>>1) - vi;
   1002   x[58] = (ur>>1) - vr;
   1003   x[59] = (ui>>1) + vi;
   1004 
   1005   cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
   1006   ur = x[12];
   1007   ui = x[13];
   1008   x[12] = (ur>>1) + vr;
   1009   x[13] = (ui>>1) + vi;
   1010   x[44] = (ur>>1) - vr;
   1011   x[45] = (ui>>1) - vi;
   1012 
   1013   cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
   1014   ur = x[28];
   1015   ui = x[29];
   1016   x[28] = (ur>>1) + vr;
   1017   x[29] = (ui>>1) - vi;
   1018   x[60] = (ur>>1) - vr;
   1019   x[61] = (ui>>1) + vi;
   1020 
   1021   cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
   1022   ur = x[14];
   1023   ui = x[15];
   1024   x[14] = (ur>>1) + vr;
   1025   x[15] = (ui>>1) + vi;
   1026   x[46] = (ur>>1) - vr;
   1027   x[47] = (ui>>1) - vi;
   1028 
   1029   cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
   1030   ur = x[30];
   1031   ui = x[31];
   1032   x[30] = (ur>>1) + vr;
   1033   x[31] = (ui>>1) - vi;
   1034   x[62] = (ur>>1) - vr;
   1035   x[63] = (ui>>1) + vi;
   1036 }
   1037 #endif /* #ifndef FUNCTION_fft_32 */
   1038 
   1039 
   1040 /**
   1041  * \brief Apply rotation vectors to a data buffer.
   1042  * \param cl length of each row of input data.
   1043  * \param l total length of input data.
   1044  * \param pVecRe real part of rotation ceofficient vector.
   1045  * \param pVecIm imaginary part of rotation ceofficient vector.
   1046  */
   1047 static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, const int l, const FIXP_STB *pVecRe, const FIXP_STB *pVecIm)
   1048 {
   1049   FIXP_DBL re, im;
   1050   FIXP_STB vre, vim;
   1051 
   1052   int i, c;
   1053 
   1054   for(i=0; i<cl; i++) {
   1055     re  = pData[2*i];
   1056     im  = pData[2*i+1];
   1057 
   1058     pData[2*i]   = re>>2; /* * 0.25 */
   1059     pData[2*i+1] = im>>2; /* * 0.25 */
   1060   }
   1061   for(; i<l; i+=cl)
   1062   {
   1063     re  = pData[2*i];
   1064     im  = pData[2*i+1];
   1065 
   1066     pData[2*i]   = re>>2; /* * 0.25 */
   1067     pData[2*i+1] = im>>2; /* * 0.25 */
   1068 
   1069     for (c=i+1; c<i+cl; c++)
   1070     {
   1071       re  = pData[2*c]>>1;
   1072       im  = pData[2*c+1]>>1;
   1073       vre = *pVecRe++;
   1074       vim = *pVecIm++;
   1075 
   1076       cplxMultDiv2(&pData[2*c+1], &pData[2*c], im, re, vre, vim);
   1077     }
   1078   }
   1079 }
   1080 
   1081 #define FFT_TWO_STAGE_MACRO_ENABLE
   1082 
   1083 
   1084 #ifdef FFT_TWO_STAGE_MACRO_ENABLE
   1085 
   1086 #define fftN2(pInput, length, dim1, dim2, fft_func1, fft_func2, RotVectorReal, RotVectorImag) \
   1087 { \
   1088   int       i, j; \
   1089  \
   1090   C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); \
   1091   C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); \
   1092  \
   1093   FDK_ASSERT(length == dim1*dim2); \
   1094  \
   1095   /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the \
   1096   output samples are at the address of pDst. The input vector for the fft of length dim1 is built \
   1097   of the interleaved samples in pSrc, the output samples are stored consecutively. \
   1098   */ \
   1099   { \
   1100     const FIXP_DBL* pSrc = pInput; \
   1101     FIXP_DBL  *RESTRICT pDst = aDst; \
   1102     \
   1103     for(i=0; i<dim2; i++) \
   1104     { \
   1105       for(j=0; j<dim1; j++) \
   1106       { \
   1107         pDst[2*j]   = pSrc[2*j*dim2]; \
   1108         pDst[2*j+1] = pSrc[2*j*dim2+1]; \
   1109       } \
   1110       \
   1111       fft_func1(pDst); \
   1112       pSrc += 2; \
   1113       pDst = pDst + 2*dim1; \
   1114     } \
   1115   } \
   1116   \
   1117   /* Perform the modulation of the output of the fft of length dim1 */ \
   1118   fft_apply_rot_vector(aDst, dim1, length, RotVectorReal, RotVectorImag); \
   1119   \
   1120   /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the \
   1121   output samples are at the address of pInput. The input vector for the fft of length dim2 is built \
   1122   of the interleaved samples in aDst, the output samples are stored consecutively at the address \
   1123   of pInput. \
   1124   */ \
   1125   { \
   1126     const FIXP_DBL* pSrc       = aDst; \
   1127     FIXP_DBL *RESTRICT pDst    = aDst2; \
   1128     FIXP_DBL *RESTRICT pDstOut = pInput; \
   1129     \
   1130     for(i=0; i<dim1; i++) \
   1131     { \
   1132       for(j=0; j<dim2; j++) \
   1133       { \
   1134         pDst[2*j]   = pSrc[2*j*dim1]; \
   1135         pDst[2*j+1] = pSrc[2*j*dim1+1]; \
   1136       } \
   1137       \
   1138       fft_func2(pDst); \
   1139       \
   1140       for(j=0; j<dim2; j++) \
   1141       { \
   1142         pDstOut[2*j*dim1]   = pDst[2*j]; \
   1143         pDstOut[2*j*dim1+1] = pDst[2*j+1]; \
   1144       } \
   1145       pSrc += 2; \
   1146       pDstOut += 2; \
   1147     } \
   1148   } \
   1149   \
   1150   C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); \
   1151   C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); \
   1152 } \
   1153 
   1154 #else /* FFT_TWO_STAGE_MACRO_ENABLE */
   1155 
   1156 /* select either switch case of function pointer. */
   1157 //#define FFT_TWO_STAGE_SWITCH_CASE
   1158 
   1159 static inline void fftN2(
   1160         FIXP_DBL *pInput,
   1161         const int length,
   1162         const int dim1,
   1163         const int dim2,
   1164         void (* const fft1)(FIXP_DBL *),
   1165         void (* const fft2)(FIXP_DBL *),
   1166         const FIXP_STB *RotVectorReal,
   1167         const FIXP_STB *RotVectorImag
   1168         )
   1169 {
   1170   /* The real part of the input samples are at the addresses with even indices and the imaginary
   1171   part of the input samples are at the addresses with odd indices. The output samples are stored
   1172   at the address of pInput
   1173   */
   1174   FIXP_DBL  *pSrc, *pDst, *pDstOut;
   1175   int       i, j;
   1176 
   1177   C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2);
   1178   C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2);
   1179 
   1180   FDK_ASSERT(length == dim1*dim2);
   1181 
   1182   /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the
   1183   output samples are at the address of pDst. The input vector for the fft of length dim1 is built
   1184   of the interleaved samples in pSrc, the output samples are stored consecutively.
   1185   */
   1186   pSrc = pInput;
   1187   pDst = aDst;
   1188   for(i=0; i<length/dim1; i++)
   1189   {
   1190     for(j=0; j<length/dim2; j++)
   1191     {
   1192       pDst[2*j]   = pSrc[2*j*dim2];
   1193       pDst[2*j+1] = pSrc[2*j*dim2+1];
   1194     }
   1195 
   1196     /* fft of size dim1 */
   1197 #ifndef FFT_TWO_STAGE_SWITCH_CASE
   1198     fft1(pDst);
   1199 #else
   1200     switch (dim1) {
   1201       case 3: fft3(pDst); break;
   1202       case 4: fft_4(pDst); break;
   1203       case 5: fft5(pDst); break;
   1204       case 8: fft_8(pDst); break;
   1205       case 15: fft15(pDst); break;
   1206       case 16: fft_16(pDst); break;
   1207       case 32: fft_32(pDst); break;
   1208       /*case 64: fft_64(pDst); break;*/
   1209       case 128: fft_128(pDst); break;
   1210     }
   1211 #endif
   1212     pSrc += 2;
   1213     pDst = pDst + 2*length/dim2;
   1214   }
   1215 
   1216   /* Perform the modulation of the output of the fft of length dim1 */
   1217   pSrc=aDst;
   1218   fft_apply_rot_vector(pSrc, length/dim2, length, RotVectorReal, RotVectorImag);
   1219 
   1220   /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the
   1221   output samples are at the address of pInput. The input vector for the fft of length dim2 is built
   1222   of the interleaved samples in aDst, the output samples are stored consecutively at the address
   1223   of pInput.
   1224   */
   1225   pSrc    = aDst;
   1226   pDst    = aDst2;
   1227   pDstOut = pInput;
   1228   for(i=0; i<length/dim2; i++)
   1229   {
   1230     for(j=0; j<length/dim1; j++)
   1231     {
   1232       pDst[2*j]   = pSrc[2*j*dim1];
   1233       pDst[2*j+1] = pSrc[2*j*dim1+1];
   1234     }
   1235 
   1236 #ifndef FFT_TWO_STAGE_SWITCH_CASE
   1237     fft2(pDst);
   1238 #else
   1239     switch (dim2) {
   1240       case 3: fft3(pDst); break;
   1241       case 4: fft_4(pDst); break;
   1242       case 5: fft5(pDst); break;
   1243       case 8: fft_8(pDst); break;
   1244       case 15: fft15(pDst); break;
   1245       case 16: fft_16(pDst); break;
   1246       case 32: fft_32(pDst); break;
   1247       /*case 64: fft_64(pDst); break;*/
   1248       case 128: fft_128(pDst); break;
   1249     }
   1250 #endif
   1251 
   1252     for(j=0; j<length/dim1; j++)
   1253     {
   1254       pDstOut[2*j*dim1]   = pDst[2*j];
   1255       pDstOut[2*j*dim1+1] = pDst[2*j+1];
   1256     }
   1257     pSrc += 2;
   1258     pDstOut += 2;
   1259   }
   1260 
   1261   C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2);
   1262   C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2);
   1263 }
   1264 
   1265 #endif /* FFT_TWO_STAGE_MACRO_ENABLE */
   1266 
   1267 
   1268 
   1269 
   1270 
   1271 
   1272 
   1273 
   1274 
   1275 
   1276 
   1277 
   1278 #define SCALEFACTOR60         5
   1279 /**
   1280 The function performs the fft of length 60. It is splittet into fft's of length 4 and fft's of
   1281 length 15. Between the fft's a modolation is calculated.
   1282 */
   1283 static inline void fft60(FIXP_DBL *pInput, INT *pScalefactor)
   1284 {
   1285   fftN2(
   1286           pInput, 60, 4, 15,
   1287           fft_4, fft15,
   1288           RotVectorReal60, RotVectorImag60
   1289           );
   1290   *pScalefactor += SCALEFACTOR60;
   1291 }
   1292 
   1293 
   1294 
   1295 /* Fallback implementation in case of no better implementation available. */
   1296 
   1297 #define SCALEFACTOR240        7
   1298 
   1299 /**
   1300 The function performs the fft of length 240. It is splittet into fft's of length 16 and fft's of
   1301 length 15. Between the fft's a modulation is calculated.
   1302 */
   1303 static inline void fft240(FIXP_DBL *pInput, INT *pScalefactor)
   1304 {
   1305   fftN2(
   1306           pInput, 240, 16, 15,
   1307           fft_16, fft15,
   1308           RotVectorReal240, RotVectorImag240
   1309           );
   1310   *pScalefactor += SCALEFACTOR240;
   1311 }
   1312 
   1313 
   1314 #define SCALEFACTOR480        8
   1315 #define N32                   32
   1316 #define TABLE_SIZE_16        (32/2)
   1317 
   1318 /**
   1319 The function performs the fft of length 480. It is splittet into fft's of length 32 and fft's of
   1320 length 15. Between the fft's a modulation is calculated.
   1321 */
   1322 static inline void fft480(FIXP_DBL *pInput, INT *pScalefactor)
   1323 {
   1324   fftN2(
   1325           pInput, 480, 32, 15,
   1326           fft_32, fft15,
   1327           RotVectorReal480, RotVectorImag480
   1328           );
   1329   *pScalefactor += SCALEFACTOR480;
   1330 }
   1331 
   1332 void fft(int length, FIXP_DBL *pInput, INT *pScalefactor)
   1333 {
   1334   if (length == 32)
   1335   {
   1336       fft_32(pInput);
   1337       *pScalefactor += SCALEFACTOR32;
   1338   }
   1339   else
   1340   {
   1341 
   1342   switch (length) {
   1343     case 16:
   1344       fft_16(pInput);
   1345       *pScalefactor += SCALEFACTOR16;
   1346       break;
   1347     case 8:
   1348       fft_8(pInput);
   1349       *pScalefactor += SCALEFACTOR8;
   1350       break;
   1351     case 3:
   1352       fft3(pInput);
   1353       break;
   1354     case 4:
   1355       fft_4(pInput);
   1356       *pScalefactor += SCALEFACTOR4;
   1357       break;
   1358     case 5:
   1359       fft5(pInput);
   1360       break;
   1361     case 15:
   1362       fft15(pInput);
   1363       *pScalefactor += 2;
   1364       break;
   1365     case 60:
   1366       fft60(pInput, pScalefactor);
   1367       break;
   1368     case 64:
   1369       dit_fft(pInput, 6, SineTable512, 512);
   1370       *pScalefactor += SCALEFACTOR64;
   1371       break;
   1372     case 240:
   1373       fft240(pInput, pScalefactor);
   1374       break;
   1375     case 256:
   1376       dit_fft(pInput, 8, SineTable512, 512);
   1377       *pScalefactor += SCALEFACTOR256;
   1378       break;
   1379     case 480:
   1380       fft480(pInput, pScalefactor);
   1381       break;
   1382     case 512:
   1383       dit_fft(pInput, 9, SineTable512, 512);
   1384       *pScalefactor += SCALEFACTOR512;
   1385       break;
   1386     default:
   1387       FDK_ASSERT(0); /* FFT length not supported! */
   1388       break;
   1389   }
   1390   }
   1391 }
   1392 
   1393 
   1394 void ifft(int length, FIXP_DBL *pInput, INT *scalefactor)
   1395 {
   1396   switch (length) {
   1397     default:
   1398       FDK_ASSERT(0); /* IFFT length not supported! */
   1399       break;
   1400   }
   1401 }
   1402 
   1403 
   1404