Home | History | Annotate | Download | only in src
      1 /* -----------------------------------------------------------------------------
      2 Software License for The Fraunhofer FDK AAC Codec Library for Android
      3 
      4  Copyright  1995 - 2018 Fraunhofer-Gesellschaft zur Frderung der angewandten
      5 Forschung e.V. All rights reserved.
      6 
      7  1.    INTRODUCTION
      8 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software
      9 that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding
     10 scheme for digital audio. This FDK AAC Codec software is intended to be used on
     11 a wide variety of Android devices.
     12 
     13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient
     14 general perceptual audio codecs. AAC-ELD is considered the best-performing
     15 full-bandwidth communications codec by independent studies and is widely
     16 deployed. AAC has been standardized by ISO and IEC as part of the MPEG
     17 specifications.
     18 
     19 Patent licenses for necessary patent claims for the FDK AAC Codec (including
     20 those of Fraunhofer) may be obtained through Via Licensing
     21 (www.vialicensing.com) or through the respective patent owners individually for
     22 the purpose of encoding or decoding bit streams in products that are compliant
     23 with the ISO/IEC MPEG audio standards. Please note that most manufacturers of
     24 Android devices already license these patent claims through Via Licensing or
     25 directly from the patent owners, and therefore FDK AAC Codec software may
     26 already be covered under those patent licenses when it is used for those
     27 licensed purposes only.
     28 
     29 Commercially-licensed AAC software libraries, including floating-point versions
     30 with enhanced sound quality, are also available from Fraunhofer. Users are
     31 encouraged to check the Fraunhofer website for additional applications
     32 information and documentation.
     33 
     34 2.    COPYRIGHT LICENSE
     35 
     36 Redistribution and use in source and binary forms, with or without modification,
     37 are permitted without payment of copyright license fees provided that you
     38 satisfy the following conditions:
     39 
     40 You must retain the complete text of this software license in redistributions of
     41 the FDK AAC Codec or your modifications thereto in source code form.
     42 
     43 You must retain the complete text of this software license in the documentation
     44 and/or other materials provided with redistributions of the FDK AAC Codec or
     45 your modifications thereto in binary form. You must make available free of
     46 charge copies of the complete source code of the FDK AAC Codec and your
     47 modifications thereto to recipients of copies in binary form.
     48 
     49 The name of Fraunhofer may not be used to endorse or promote products derived
     50 from this library without prior written permission.
     51 
     52 You may not charge copyright license fees for anyone to use, copy or distribute
     53 the FDK AAC Codec software or your modifications thereto.
     54 
     55 Your modified versions of the FDK AAC Codec must carry prominent notices stating
     56 that you changed the software and the date of any change. For modified versions
     57 of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android"
     58 must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK
     59 AAC Codec Library for Android."
     60 
     61 3.    NO PATENT LICENSE
     62 
     63 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without
     64 limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE.
     65 Fraunhofer provides no warranty of patent non-infringement with respect to this
     66 software.
     67 
     68 You may use this FDK AAC Codec software or modifications thereto only for
     69 purposes that are authorized by appropriate patent licenses.
     70 
     71 4.    DISCLAIMER
     72 
     73 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright
     74 holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES,
     75 including but not limited to the implied warranties of merchantability and
     76 fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR
     77 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary,
     78 or consequential damages, including but not limited to procurement of substitute
     79 goods or services; loss of use, data, or profits, or business interruption,
     80 however caused and on any theory of liability, whether in contract, strict
     81 liability, or tort (including negligence), arising in any way out of the use of
     82 this software, even if advised of the possibility of such damage.
     83 
     84 5.    CONTACT INFORMATION
     85 
     86 Fraunhofer Institute for Integrated Circuits IIS
     87 Attention: Audio and Multimedia Departments - FDK AAC LL
     88 Am Wolfsmantel 33
     89 91058 Erlangen, Germany
     90 
     91 www.iis.fraunhofer.de/amm
     92 amm-info (at) iis.fraunhofer.de
     93 ----------------------------------------------------------------------------- */
     94 
     95 /******************* Library for basic calculation routines ********************
     96 
     97    Author(s):   Josef Hoepfl, DSP Solutions
     98 
     99    Description: Fix point FFT
    100 
    101 *******************************************************************************/
    102 
    103 #include "fft_rad2.h"
    104 #include "FDK_tools_rom.h"
    105 
    106 #define W_PiFOURTH STC(0x5a82799a)
    107 //#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a))
    108 #ifndef SUMDIFF_PIFOURTH
    109 #define SUMDIFF_PIFOURTH(diff, sum, a, b) \
    110   {                                       \
    111     FIXP_DBL wa, wb;                      \
    112     wa = fMultDiv2(a, W_PiFOURTH);        \
    113     wb = fMultDiv2(b, W_PiFOURTH);        \
    114     diff = wb - wa;                       \
    115     sum = wb + wa;                        \
    116   }
    117 #define SUMDIFF_PIFOURTH16(diff, sum, a, b)       \
    118   {                                               \
    119     FIXP_SGL wa, wb;                              \
    120     wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \
    121     wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \
    122     diff = wb - wa;                               \
    123     sum = wb + wa;                                \
    124   }
    125 #endif
    126 
    127 #define SCALEFACTOR2048 10
    128 #define SCALEFACTOR1024 9
    129 #define SCALEFACTOR512 8
    130 #define SCALEFACTOR256 7
    131 #define SCALEFACTOR128 6
    132 #define SCALEFACTOR64 5
    133 #define SCALEFACTOR32 4
    134 #define SCALEFACTOR16 3
    135 #define SCALEFACTOR8 2
    136 #define SCALEFACTOR4 1
    137 #define SCALEFACTOR2 1
    138 
    139 #define SCALEFACTOR3 1
    140 #define SCALEFACTOR5 1
    141 #define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2)
    142 #define SCALEFACTOR7 2
    143 #define SCALEFACTOR9 2
    144 #define SCALEFACTOR10 5
    145 #define SCALEFACTOR12 3
    146 #define SCALEFACTOR15 3
    147 #define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2)
    148 #define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2)
    149 #define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2)
    150 #define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2)
    151 #define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2)
    152 #define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2)
    153 #define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2)
    154 #define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2)
    155 #define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2)
    156 #define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2)
    157 #define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2)
    158 #define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2)
    159 #define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2)
    160 #define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2)
    161 #define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2)
    162 #define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2)
    163 #define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2)
    164 #define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2)
    165 #define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2)
    166 
    167 #include "fft.h"
    168 
    169 #ifndef FUNCTION_fft2
    170 
    171 /* Performs the FFT of length 2. Input vector unscaled, output vector scaled
    172  * with factor 0.5 */
    173 static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) {
    174   FIXP_DBL r1, i1;
    175   FIXP_DBL r2, i2;
    176 
    177   /* real part */
    178   r1 = pDat[2];
    179   r2 = pDat[0];
    180 
    181   /* imaginary part */
    182   i1 = pDat[3];
    183   i2 = pDat[1];
    184 
    185   /* real part */
    186   pDat[0] = (r2 + r1) >> 1;
    187   pDat[2] = (r2 - r1) >> 1;
    188 
    189   /* imaginary part */
    190   pDat[1] = (i2 + i1) >> 1;
    191   pDat[3] = (i2 - i1) >> 1;
    192 }
    193 #endif /* FUNCTION_fft2 */
    194 
    195 #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
    196 
    197 #ifndef FUNCTION_fft3
    198 /* Performs the FFT of length 3 according to the algorithm after winograd. */
    199 static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) {
    200   FIXP_DBL r1, r2;
    201   FIXP_DBL s1, s2;
    202   FIXP_DBL pD;
    203 
    204   /* real part */
    205   r1 = pDat[2] + pDat[4];
    206   r2 = fMultDiv2((pDat[2] - pDat[4]), C31);
    207   pD = pDat[0] >> 1;
    208   pDat[0] = pD + (r1 >> 1);
    209   r1 = pD - (r1 >> 2);
    210 
    211   /* imaginary part */
    212   s1 = pDat[3] + pDat[5];
    213   s2 = fMultDiv2((pDat[3] - pDat[5]), C31);
    214   pD = pDat[1] >> 1;
    215   pDat[1] = pD + (s1 >> 1);
    216   s1 = pD - (s1 >> 2);
    217 
    218   /* combination */
    219   pDat[2] = r1 - s2;
    220   pDat[4] = r1 + s2;
    221   pDat[3] = s1 + r2;
    222   pDat[5] = s1 - r2;
    223 }
    224 #endif /* #ifndef FUNCTION_fft3 */
    225 
    226 #define F5C(x) STC(x)
    227 
    228 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
    229 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
    230 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
    231 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
    232 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
    233 
    234 /* performs the FFT of length 5 according to the algorithm after winograd */
    235 /* This version works with a prescale of 2 instead of 3 */
    236 static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) {
    237   FIXP_DBL r1, r2, r3, r4;
    238   FIXP_DBL s1, s2, s3, s4;
    239   FIXP_DBL t;
    240 
    241   /* real part */
    242   r1 = (pDat[2] + pDat[8]) >> 1;
    243   r4 = (pDat[2] - pDat[8]) >> 1;
    244   r3 = (pDat[4] + pDat[6]) >> 1;
    245   r2 = (pDat[4] - pDat[6]) >> 1;
    246   t = fMult((r1 - r3), C54);
    247   r1 = r1 + r3;
    248   pDat[0] = (pDat[0] >> 1) + r1;
    249   /* Bit shift left because of the constant C55 which was scaled with the factor
    250      0.5 because of the representation of the values as fracts */
    251   r1 = pDat[0] + (fMultDiv2(r1, C55) << (2));
    252   r3 = r1 - t;
    253   r1 = r1 + t;
    254   t = fMult((r4 + r2), C51);
    255   /* Bit shift left because of the constant C55 which was scaled with the factor
    256      0.5 because of the representation of the values as fracts */
    257   r4 = t + (fMultDiv2(r4, C52) << (2));
    258   r2 = t + fMult(r2, C53);
    259 
    260   /* imaginary part */
    261   s1 = (pDat[3] + pDat[9]) >> 1;
    262   s4 = (pDat[3] - pDat[9]) >> 1;
    263   s3 = (pDat[5] + pDat[7]) >> 1;
    264   s2 = (pDat[5] - pDat[7]) >> 1;
    265   t = fMult((s1 - s3), C54);
    266   s1 = s1 + s3;
    267   pDat[1] = (pDat[1] >> 1) + s1;
    268   /* Bit shift left because of the constant C55 which was scaled with the factor
    269      0.5 because of the representation of the values as fracts */
    270   s1 = pDat[1] + (fMultDiv2(s1, C55) << (2));
    271   s3 = s1 - t;
    272   s1 = s1 + t;
    273   t = fMult((s4 + s2), C51);
    274   /* Bit shift left because of the constant C55 which was scaled with the factor
    275      0.5 because of the representation of the values as fracts */
    276   s4 = t + (fMultDiv2(s4, C52) << (2));
    277   s2 = t + fMult(s2, C53);
    278 
    279   /* combination */
    280   pDat[2] = r1 + s2;
    281   pDat[8] = r1 - s2;
    282   pDat[4] = r3 - s4;
    283   pDat[6] = r3 + s4;
    284 
    285   pDat[3] = s1 - r2;
    286   pDat[9] = s1 + r2;
    287   pDat[5] = s3 + r4;
    288   pDat[7] = s3 - r4;
    289 }
    290 
    291 #define F5C(x) STC(x)
    292 
    293 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652)   */
    294 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */
    295 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126)   */
    296 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699)   */
    297 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2)       */
    298 /**
    299  * \brief    Function performs a complex 10-point FFT
    300  *           The FFT is performed inplace. The result of the FFT
    301  *           is scaled by SCALEFACTOR10 bits.
    302  *
    303  *           WOPS FLC version:                    1093 cycles
    304  *           WOPS with 32x16 bit multiplications:  196 cycles
    305  *
    306  * \param    [i/o] re    real input / output
    307  * \param    [i/o] im    imag input / output
    308  * \param    [i  ] s     stride real and imag input / output
    309  *
    310  * \return   void
    311  */
    312 static void fft10(FIXP_DBL *x)  // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s)
    313 {
    314   FIXP_DBL t;
    315   FIXP_DBL x0, x1, x2, x3, x4;
    316   FIXP_DBL r1, r2, r3, r4;
    317   FIXP_DBL s1, s2, s3, s4;
    318   FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09;
    319   FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19;
    320 
    321   const int s = 1;  // stride factor
    322 
    323   /* 2 fft5 stages */
    324 
    325   /* real part */
    326   x0 = (x[s * 0] >> SCALEFACTOR10);
    327   x1 = (x[s * 4] >> SCALEFACTOR10);
    328   x2 = (x[s * 8] >> SCALEFACTOR10);
    329   x3 = (x[s * 12] >> SCALEFACTOR10);
    330   x4 = (x[s * 16] >> SCALEFACTOR10);
    331 
    332   r1 = (x3 + x2);
    333   r4 = (x3 - x2);
    334   r3 = (x1 + x4);
    335   r2 = (x1 - x4);
    336   t = fMult((r1 - r3), C54);
    337   r1 = (r1 + r3);
    338   y00 = (x0 + r1);
    339   r1 = (y00 + ((fMult(r1, C55) << 1)));
    340   r3 = (r1 - t);
    341   r1 = (r1 + t);
    342   t = fMult((r4 + r2), C51);
    343   r4 = (t + (fMult(r4, C52) << 1));
    344   r2 = (t + fMult(r2, C53));
    345 
    346   /* imaginary part */
    347   x0 = (x[s * 0 + 1] >> SCALEFACTOR10);
    348   x1 = (x[s * 4 + 1] >> SCALEFACTOR10);
    349   x2 = (x[s * 8 + 1] >> SCALEFACTOR10);
    350   x3 = (x[s * 12 + 1] >> SCALEFACTOR10);
    351   x4 = (x[s * 16 + 1] >> SCALEFACTOR10);
    352 
    353   s1 = (x3 + x2);
    354   s4 = (x3 - x2);
    355   s3 = (x1 + x4);
    356   s2 = (x1 - x4);
    357   t = fMult((s1 - s3), C54);
    358   s1 = (s1 + s3);
    359   y01 = (x0 + s1);
    360   s1 = (y01 + (fMult(s1, C55) << 1));
    361   s3 = (s1 - t);
    362   s1 = (s1 + t);
    363   t = fMult((s4 + s2), C51);
    364   s4 = (t + (fMult(s4, C52) << 1));
    365   s2 = (t + fMult(s2, C53));
    366 
    367   /* combination */
    368   y04 = (r1 + s2);
    369   y16 = (r1 - s2);
    370   y08 = (r3 - s4);
    371   y12 = (r3 + s4);
    372 
    373   y05 = (s1 - r2);
    374   y17 = (s1 + r2);
    375   y09 = (s3 + r4);
    376   y13 = (s3 - r4);
    377 
    378   /* real part */
    379   x0 = (x[s * 10] >> SCALEFACTOR10);
    380   x1 = (x[s * 2] >> SCALEFACTOR10);
    381   x2 = (x[s * 6] >> SCALEFACTOR10);
    382   x3 = (x[s * 14] >> SCALEFACTOR10);
    383   x4 = (x[s * 18] >> SCALEFACTOR10);
    384 
    385   r1 = (x1 + x4);
    386   r4 = (x1 - x4);
    387   r3 = (x3 + x2);
    388   r2 = (x3 - x2);
    389   t = fMult((r1 - r3), C54);
    390   r1 = (r1 + r3);
    391   y02 = (x0 + r1);
    392   r1 = (y02 + ((fMult(r1, C55) << 1)));
    393   r3 = (r1 - t);
    394   r1 = (r1 + t);
    395   t = fMult(((r4 + r2)), C51);
    396   r4 = (t + (fMult(r4, C52) << 1));
    397   r2 = (t + fMult(r2, C53));
    398 
    399   /* imaginary part */
    400   x0 = (x[s * 10 + 1] >> SCALEFACTOR10);
    401   x1 = (x[s * 2 + 1] >> SCALEFACTOR10);
    402   x2 = (x[s * 6 + 1] >> SCALEFACTOR10);
    403   x3 = (x[s * 14 + 1] >> SCALEFACTOR10);
    404   x4 = (x[s * 18 + 1] >> SCALEFACTOR10);
    405 
    406   s1 = (x1 + x4);
    407   s4 = (x1 - x4);
    408   s3 = (x3 + x2);
    409   s2 = (x3 - x2);
    410   t = fMult((s1 - s3), C54);
    411   s1 = (s1 + s3);
    412   y03 = (x0 + s1);
    413   s1 = (y03 + (fMult(s1, C55) << 1));
    414   s3 = (s1 - t);
    415   s1 = (s1 + t);
    416   t = fMult((s4 + s2), C51);
    417   s4 = (t + (fMult(s4, C52) << 1));
    418   s2 = (t + fMult(s2, C53));
    419 
    420   /* combination */
    421   y06 = (r1 + s2);
    422   y18 = (r1 - s2);
    423   y10 = (r3 - s4);
    424   y14 = (r3 + s4);
    425 
    426   y07 = (s1 - r2);
    427   y19 = (s1 + r2);
    428   y11 = (s3 + r4);
    429   y15 = (s3 - r4);
    430 
    431   /* 5 fft2 stages */
    432   x[s * 0] = (y00 + y02);
    433   x[s * 0 + 1] = (y01 + y03);
    434   x[s * 10] = (y00 - y02);
    435   x[s * 10 + 1] = (y01 - y03);
    436 
    437   x[s * 4] = (y04 + y06);
    438   x[s * 4 + 1] = (y05 + y07);
    439   x[s * 14] = (y04 - y06);
    440   x[s * 14 + 1] = (y05 - y07);
    441 
    442   x[s * 8] = (y08 + y10);
    443   x[s * 8 + 1] = (y09 + y11);
    444   x[s * 18] = (y08 - y10);
    445   x[s * 18 + 1] = (y09 - y11);
    446 
    447   x[s * 12] = (y12 + y14);
    448   x[s * 12 + 1] = (y13 + y15);
    449   x[s * 2] = (y12 - y14);
    450   x[s * 2 + 1] = (y13 - y15);
    451 
    452   x[s * 16] = (y16 + y18);
    453   x[s * 16 + 1] = (y17 + y19);
    454   x[s * 6] = (y16 - y18);
    455   x[s * 6 + 1] = (y17 - y19);
    456 }
    457 
    458 #ifndef FUNCTION_fft12
    459 #define FUNCTION_fft12
    460 
    461 #undef C31
    462 #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2  */
    463 
    464 static inline void fft12(FIXP_DBL *pInput) {
    465   FIXP_DBL aDst[24];
    466   FIXP_DBL *pSrc, *pDst;
    467   int i;
    468 
    469   pSrc = pInput;
    470   pDst = aDst;
    471   FIXP_DBL r1, r2, s1, s2, pD;
    472 
    473   /* First 3*2 samples are shifted right by 2 before output */
    474   r1 = pSrc[8] + pSrc[16];
    475   r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
    476   pD = pSrc[0] >> 1;
    477   pDst[0] = (pD + (r1 >> 1)) >> 1;
    478   r1 = pD - (r1 >> 2);
    479 
    480   /* imaginary part */
    481   s1 = pSrc[9] + pSrc[17];
    482   s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
    483   pD = pSrc[1] >> 1;
    484   pDst[1] = (pD + (s1 >> 1)) >> 1;
    485   s1 = pD - (s1 >> 2);
    486 
    487   /* combination */
    488   pDst[2] = (r1 - s2) >> 1;
    489   pDst[3] = (s1 + r2) >> 1;
    490   pDst[4] = (r1 + s2) >> 1;
    491   pDst[5] = (s1 - r2) >> 1;
    492   pSrc += 2;
    493   pDst += 6;
    494 
    495   const FIXP_STB *pVecRe = RotVectorReal12;
    496   const FIXP_STB *pVecIm = RotVectorImag12;
    497   FIXP_DBL re, im;
    498   FIXP_STB vre, vim;
    499   for (i = 0; i < 2; i++) {
    500     /* sample 0,1 are shifted right by 2 before output */
    501     /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before
    502      * output */
    503 
    504     r1 = pSrc[8] + pSrc[16];
    505     r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
    506     pD = pSrc[0] >> 1;
    507     pDst[0] = (pD + (r1 >> 1)) >> 1;
    508     r1 = pD - (r1 >> 2);
    509 
    510     /* imaginary part */
    511     s1 = pSrc[9] + pSrc[17];
    512     s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
    513     pD = pSrc[1] >> 1;
    514     pDst[1] = (pD + (s1 >> 1)) >> 1;
    515     s1 = pD - (s1 >> 2);
    516 
    517     /* combination */
    518     re = (r1 - s2) >> 0;
    519     im = (s1 + r2) >> 0;
    520     vre = *pVecRe++;
    521     vim = *pVecIm++;
    522     cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim);
    523 
    524     re = (r1 + s2) >> 0;
    525     im = (s1 - r2) >> 0;
    526     vre = *pVecRe++;
    527     vim = *pVecIm++;
    528     cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim);
    529 
    530     pDst += 6;
    531     pSrc += 2;
    532   }
    533   /* sample 0,1 are shifted right by 2 before output */
    534   /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */
    535   /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */
    536   r1 = pSrc[8] + pSrc[16];
    537   r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31);
    538   pD = pSrc[0] >> 1;
    539   pDst[0] = (pD + (r1 >> 1)) >> 1;
    540   r1 = pD - (r1 >> 2);
    541 
    542   /* imaginary part */
    543   s1 = pSrc[9] + pSrc[17];
    544   s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31);
    545   pD = pSrc[1] >> 1;
    546   pDst[1] = (pD + (s1 >> 1)) >> 1;
    547   s1 = pD - (s1 >> 2);
    548 
    549   /* combination */
    550   pDst[2] = (s1 + r2) >> 1;
    551   pDst[3] = (s2 - r1) >> 1;
    552   pDst[4] = -((r1 + s2) >> 1);
    553   pDst[5] = (r2 - s1) >> 1;
    554 
    555   /* Perform 3 times the fft of length 4. The input samples are at the address
    556   of aDst and the output samples are at the address of pInput. The input vector
    557   for the fft of length 4 is built of the interleaved samples in aDst, the
    558   output samples are stored consecutively at the address of pInput.
    559   */
    560   pSrc = aDst;
    561   pDst = pInput;
    562   for (i = 0; i < 3; i++) {
    563     /* inline FFT4 merged with incoming resorting loop */
    564     FIXP_DBL a00, a10, a20, a30, tmp0, tmp1;
    565 
    566     a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */
    567     a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */
    568     a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */
    569     a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */
    570 
    571     pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */
    572     pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */
    573 
    574     tmp0 = a00 - pSrc[12]; /* Re A - Re B */
    575     tmp1 = a20 - pSrc[13]; /* Im A - Im B */
    576 
    577     pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */
    578     pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */
    579 
    580     a10 = a10 - pSrc[18]; /* Re C - Re D */
    581     a30 = a30 - pSrc[19]; /* Im C - Im D */
    582 
    583     pDst[6] = tmp0 + a30;  /* Re B' = Re A - Re B + Im C - Im D */
    584     pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */
    585     pDst[7] = tmp1 - a10;  /* Im B' = Im A - Im B - Re C + Re D */
    586     pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */
    587 
    588     pSrc += 2;
    589     pDst += 2;
    590   }
    591 }
    592 #endif /* FUNCTION_fft12 */
    593 
    594 #ifndef FUNCTION_fft15
    595 
    596 #define N3 3
    597 #define N5 5
    598 #define N6 6
    599 #define N15 15
    600 
    601 /* Performs the FFT of length 15. It is split into FFTs of length 3 and
    602  * length 5. */
    603 static inline void fft15(FIXP_DBL *pInput) {
    604   FIXP_DBL aDst[2 * N15];
    605   FIXP_DBL aDst1[2 * N15];
    606   int i, k, l;
    607 
    608   /* Sort input vector for fft's of length 3
    609   input3(0:2)   = [input(0) input(5) input(10)];
    610   input3(3:5)   = [input(3) input(8) input(13)];
    611   input3(6:8)   = [input(6) input(11) input(1)];
    612   input3(9:11)  = [input(9) input(14) input(4)];
    613   input3(12:14) = [input(12) input(2) input(7)]; */
    614   {
    615     const FIXP_DBL *pSrc = pInput;
    616     FIXP_DBL *RESTRICT pDst = aDst;
    617     /* Merge 3 loops into one, skip call of fft3 */
    618     for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) {
    619       pDst[k + 0] = pSrc[l];
    620       pDst[k + 1] = pSrc[l + 1];
    621       l += 2 * N5;
    622       if (l >= (2 * N15)) l -= (2 * N15);
    623 
    624       pDst[k + 2] = pSrc[l];
    625       pDst[k + 3] = pSrc[l + 1];
    626       l += 2 * N5;
    627       if (l >= (2 * N15)) l -= (2 * N15);
    628       pDst[k + 4] = pSrc[l];
    629       pDst[k + 5] = pSrc[l + 1];
    630       l += (2 * N5) + (2 * N3);
    631       if (l >= (2 * N15)) l -= (2 * N15);
    632 
    633       /* fft3 merged with shift right by 2 loop */
    634       FIXP_DBL r1, r2, r3;
    635       FIXP_DBL s1, s2;
    636       /* real part */
    637       r1 = pDst[k + 2] + pDst[k + 4];
    638       r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31);
    639       s1 = pDst[k + 0];
    640       pDst[k + 0] = (s1 + r1) >> 2;
    641       r1 = s1 - (r1 >> 1);
    642 
    643       /* imaginary part */
    644       s1 = pDst[k + 3] + pDst[k + 5];
    645       s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31);
    646       r3 = pDst[k + 1];
    647       pDst[k + 1] = (r3 + s1) >> 2;
    648       s1 = r3 - (s1 >> 1);
    649 
    650       /* combination */
    651       pDst[k + 2] = (r1 - s2) >> 2;
    652       pDst[k + 4] = (r1 + s2) >> 2;
    653       pDst[k + 3] = (s1 + r2) >> 2;
    654       pDst[k + 5] = (s1 - r2) >> 2;
    655     }
    656   }
    657   /* Sort input vector for fft's of length 5
    658   input5(0:4)   = [output3(0) output3(3) output3(6) output3(9) output3(12)];
    659   input5(5:9)   = [output3(1) output3(4) output3(7) output3(10) output3(13)];
    660   input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */
    661   /* Merge 2 loops into one, brings about 10% */
    662   {
    663     const FIXP_DBL *pSrc = aDst;
    664     FIXP_DBL *RESTRICT pDst = aDst1;
    665     for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
    666       l = 2 * i;
    667       pDst[k + 0] = pSrc[l + 0];
    668       pDst[k + 1] = pSrc[l + 1];
    669       pDst[k + 2] = pSrc[l + 0 + (2 * N3)];
    670       pDst[k + 3] = pSrc[l + 1 + (2 * N3)];
    671       pDst[k + 4] = pSrc[l + 0 + (4 * N3)];
    672       pDst[k + 5] = pSrc[l + 1 + (4 * N3)];
    673       pDst[k + 6] = pSrc[l + 0 + (6 * N3)];
    674       pDst[k + 7] = pSrc[l + 1 + (6 * N3)];
    675       pDst[k + 8] = pSrc[l + 0 + (8 * N3)];
    676       pDst[k + 9] = pSrc[l + 1 + (8 * N3)];
    677       fft5(&pDst[k]);
    678     }
    679   }
    680   /* Sort output vector of length 15
    681   output = [out5(0)  out5(6)  out5(12) out5(3)  out5(9)
    682             out5(10) out5(1)  out5(7)  out5(13) out5(4)
    683             out5(5)  out5(11) out5(2)  out5(8)  out5(14)]; */
    684   /* optimize clumsy loop, brings about 5% */
    685   {
    686     const FIXP_DBL *pSrc = aDst1;
    687     FIXP_DBL *RESTRICT pDst = pInput;
    688     for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) {
    689       pDst[k + 0] = pSrc[l];
    690       pDst[k + 1] = pSrc[l + 1];
    691       l += (2 * N6);
    692       if (l >= (2 * N15)) l -= (2 * N15);
    693       pDst[k + 2] = pSrc[l];
    694       pDst[k + 3] = pSrc[l + 1];
    695       l += (2 * N6);
    696       if (l >= (2 * N15)) l -= (2 * N15);
    697       pDst[k + 4] = pSrc[l];
    698       pDst[k + 5] = pSrc[l + 1];
    699       l += (2 * N6);
    700       if (l >= (2 * N15)) l -= (2 * N15);
    701       pDst[k + 6] = pSrc[l];
    702       pDst[k + 7] = pSrc[l + 1];
    703       l += (2 * N6);
    704       if (l >= (2 * N15)) l -= (2 * N15);
    705       pDst[k + 8] = pSrc[l];
    706       pDst[k + 9] = pSrc[l + 1];
    707       l += 2; /* no modulo check needed, it cannot occur */
    708     }
    709   }
    710 }
    711 #endif /* FUNCTION_fft15 */
    712 
    713 /*
    714  Select shift placement.
    715  Some processors like ARM may shift "for free" in combination with an addition
    716  or substraction, but others don't so either combining shift with +/- or reduce
    717  the total amount or shift operations is optimal
    718  */
    719 #if !defined(__arm__)
    720 #define SHIFT_A >> 1
    721 #define SHIFT_B
    722 #else
    723 #define SHIFT_A
    724 #define SHIFT_B >> 1
    725 #endif
    726 
    727 #ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \
    728                          */
    729 
    730 /* This defines prevents this array to be declared twice, if 16-bit fft is
    731  * enabled too */
    732 #define FUNCTION_DATA_fft_16_w16
    733 static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d),
    734                                       STCP(0x30fbc54d, 0x7641af3d)};
    735 
    736 LNK_SECTION_CODE_L1
    737 inline void fft_16(FIXP_DBL *RESTRICT x) {
    738   FIXP_DBL vr, ur;
    739   FIXP_DBL vr2, ur2;
    740   FIXP_DBL vr3, ur3;
    741   FIXP_DBL vr4, ur4;
    742   FIXP_DBL vi, ui;
    743   FIXP_DBL vi2, ui2;
    744   FIXP_DBL vi3, ui3;
    745 
    746   vr = (x[0] >> 1) + (x[16] >> 1);       /* Re A + Re B */
    747   ur = (x[1] >> 1) + (x[17] >> 1);       /* Im A + Im B */
    748   vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */
    749   ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */
    750   x[0] = vr + (vi SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
    751   x[1] = ur + (ui SHIFT_B);              /* Im A' = sum of imag values */
    752 
    753   vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */
    754   ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */
    755 
    756   x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    757   x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
    758   vr -= x[16];              /* Re A - Re B */
    759   vi = (vi SHIFT_B)-x[24];  /* Re C - Re D */
    760   ur -= x[17];              /* Im A - Im B */
    761   ui = (ui SHIFT_B)-x[25];  /* Im C - Im D */
    762 
    763   vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */
    764   ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */
    765 
    766   x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
    767   x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
    768 
    769   vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */
    770   ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */
    771 
    772   x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
    773   x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
    774 
    775   vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */
    776   ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */
    777   x[8] = vr2 + (vi2 SHIFT_B);              /* Re A' = ReA + ReB +ReC + ReD */
    778   x[9] = ur2 + (ui2 SHIFT_B);              /* Im A' = sum of imag values */
    779   x[12] = vr2 - (vi2 SHIFT_B);             /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    780   x[13] = ur2 - (ui2 SHIFT_B);             /* Im C' = -Im C -Im D +Im A +Im B */
    781   vr2 -= x[20];                            /* Re A - Re B */
    782   ur2 -= x[21];                            /* Im A - Im B */
    783   vi2 = (vi2 SHIFT_B)-x[28];               /* Re C - Re D */
    784   ui2 = (ui2 SHIFT_B)-x[29];               /* Im C - Im D */
    785 
    786   vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */
    787   ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */
    788 
    789   x[10] = ui2 + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
    790   x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
    791 
    792   vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */
    793   ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */
    794 
    795   x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
    796   x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */
    797 
    798   x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
    799   x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */
    800   x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    801   x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
    802   vr3 -= x[18];               /* Re A - Re B */
    803   ur3 -= x[19];               /* Im A - Im B */
    804   vi = (vi SHIFT_B)-x[26];    /* Re C - Re D */
    805   ui = (ui SHIFT_B)-x[27];    /* Im C - Im D */
    806   x[18] = ui + vr3;           /* Re B' = Im C - Im D  + Re A - Re B */
    807   x[19] = ur3 - vi;           /* Im B'= -Re C + Re D + Im A - Im B */
    808 
    809   x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
    810   x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
    811   x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
    812   x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
    813   vr4 -= x[22];                /* Re A - Re B */
    814   ur4 -= x[23];                /* Im A - Im B */
    815 
    816   x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
    817   x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */
    818 
    819   vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */
    820   ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */
    821   x[26] = ui3 + vr4;         /* Re B' = Im C - Im D  + Re A - Re B */
    822   x[30] = vr4 - ui3;         /* Re D' = -Im C + Im D + Re A - Re B */
    823   x[27] = ur4 - vi3;         /* Im B'= -Re C + Re D + Im A - Im B */
    824   x[31] = vi3 + ur4;         /* Im D'= Re C - Re D + Im A - Im B */
    825 
    826   // xt1 =  0
    827   // xt2 =  8
    828   vr = x[8];
    829   vi = x[9];
    830   ur = x[0] >> 1;
    831   ui = x[1] >> 1;
    832   x[0] = ur + (vr >> 1);
    833   x[1] = ui + (vi >> 1);
    834   x[8] = ur - (vr >> 1);
    835   x[9] = ui - (vi >> 1);
    836 
    837   // xt1 =  4
    838   // xt2 = 12
    839   vr = x[13];
    840   vi = x[12];
    841   ur = x[4] >> 1;
    842   ui = x[5] >> 1;
    843   x[4] = ur + (vr >> 1);
    844   x[5] = ui - (vi >> 1);
    845   x[12] = ur - (vr >> 1);
    846   x[13] = ui + (vi >> 1);
    847 
    848   // xt1 = 16
    849   // xt2 = 24
    850   vr = x[24];
    851   vi = x[25];
    852   ur = x[16] >> 1;
    853   ui = x[17] >> 1;
    854   x[16] = ur + (vr >> 1);
    855   x[17] = ui + (vi >> 1);
    856   x[24] = ur - (vr >> 1);
    857   x[25] = ui - (vi >> 1);
    858 
    859   // xt1 = 20
    860   // xt2 = 28
    861   vr = x[29];
    862   vi = x[28];
    863   ur = x[20] >> 1;
    864   ui = x[21] >> 1;
    865   x[20] = ur + (vr >> 1);
    866   x[21] = ui - (vi >> 1);
    867   x[28] = ur - (vr >> 1);
    868   x[29] = ui + (vi >> 1);
    869 
    870   // xt1 =  2
    871   // xt2 = 10
    872   SUMDIFF_PIFOURTH(vi, vr, x[10], x[11])
    873   // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH);
    874   // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH);
    875   ur = x[2];
    876   ui = x[3];
    877   x[2] = (ur >> 1) + vr;
    878   x[3] = (ui >> 1) + vi;
    879   x[10] = (ur >> 1) - vr;
    880   x[11] = (ui >> 1) - vi;
    881 
    882   // xt1 =  6
    883   // xt2 = 14
    884   SUMDIFF_PIFOURTH(vr, vi, x[14], x[15])
    885   ur = x[6];
    886   ui = x[7];
    887   x[6] = (ur >> 1) + vr;
    888   x[7] = (ui >> 1) - vi;
    889   x[14] = (ur >> 1) - vr;
    890   x[15] = (ui >> 1) + vi;
    891 
    892   // xt1 = 18
    893   // xt2 = 26
    894   SUMDIFF_PIFOURTH(vi, vr, x[26], x[27])
    895   ur = x[18];
    896   ui = x[19];
    897   x[18] = (ur >> 1) + vr;
    898   x[19] = (ui >> 1) + vi;
    899   x[26] = (ur >> 1) - vr;
    900   x[27] = (ui >> 1) - vi;
    901 
    902   // xt1 = 22
    903   // xt2 = 30
    904   SUMDIFF_PIFOURTH(vr, vi, x[30], x[31])
    905   ur = x[22];
    906   ui = x[23];
    907   x[22] = (ur >> 1) + vr;
    908   x[23] = (ui >> 1) - vi;
    909   x[30] = (ur >> 1) - vr;
    910   x[31] = (ui >> 1) + vi;
    911 
    912   // xt1 =  0
    913   // xt2 = 16
    914   vr = x[16];
    915   vi = x[17];
    916   ur = x[0] >> 1;
    917   ui = x[1] >> 1;
    918   x[0] = ur + (vr >> 1);
    919   x[1] = ui + (vi >> 1);
    920   x[16] = ur - (vr >> 1);
    921   x[17] = ui - (vi >> 1);
    922 
    923   // xt1 =  8
    924   // xt2 = 24
    925   vi = x[24];
    926   vr = x[25];
    927   ur = x[8] >> 1;
    928   ui = x[9] >> 1;
    929   x[8] = ur + (vr >> 1);
    930   x[9] = ui - (vi >> 1);
    931   x[24] = ur - (vr >> 1);
    932   x[25] = ui + (vi >> 1);
    933 
    934   // xt1 =  2
    935   // xt2 = 18
    936   cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]);
    937   ur = x[2];
    938   ui = x[3];
    939   x[2] = (ur >> 1) + vr;
    940   x[3] = (ui >> 1) + vi;
    941   x[18] = (ur >> 1) - vr;
    942   x[19] = (ui >> 1) - vi;
    943 
    944   // xt1 = 10
    945   // xt2 = 26
    946   cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]);
    947   ur = x[10];
    948   ui = x[11];
    949   x[10] = (ur >> 1) + vr;
    950   x[11] = (ui >> 1) - vi;
    951   x[26] = (ur >> 1) - vr;
    952   x[27] = (ui >> 1) + vi;
    953 
    954   // xt1 =  4
    955   // xt2 = 20
    956   SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
    957   ur = x[4];
    958   ui = x[5];
    959   x[4] = (ur >> 1) + vr;
    960   x[5] = (ui >> 1) + vi;
    961   x[20] = (ur >> 1) - vr;
    962   x[21] = (ui >> 1) - vi;
    963 
    964   // xt1 = 12
    965   // xt2 = 28
    966   SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
    967   ur = x[12];
    968   ui = x[13];
    969   x[12] = (ur >> 1) + vr;
    970   x[13] = (ui >> 1) - vi;
    971   x[28] = (ur >> 1) - vr;
    972   x[29] = (ui >> 1) + vi;
    973 
    974   // xt1 =  6
    975   // xt2 = 22
    976   cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]);
    977   ur = x[6];
    978   ui = x[7];
    979   x[6] = (ur >> 1) + vr;
    980   x[7] = (ui >> 1) + vi;
    981   x[22] = (ur >> 1) - vr;
    982   x[23] = (ui >> 1) - vi;
    983 
    984   // xt1 = 14
    985   // xt2 = 30
    986   cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]);
    987   ur = x[14];
    988   ui = x[15];
    989   x[14] = (ur >> 1) + vr;
    990   x[15] = (ui >> 1) - vi;
    991   x[30] = (ur >> 1) - vr;
    992   x[31] = (ui >> 1) + vi;
    993 }
    994 #endif /* FUNCTION_fft_16 */
    995 
    996 #ifndef FUNCTION_fft_32
    997 static const FIXP_STP fft32_w32[6] = {
    998     STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d),
    999     STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7),
   1000     STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)};
   1001 #define W_PiFOURTH STC(0x5a82799a)
   1002 
   1003 LNK_SECTION_CODE_L1
   1004 inline void fft_32(FIXP_DBL *const _x) {
   1005   /*
   1006    * 1+2 stage radix 4
   1007    */
   1008 
   1009   /////////////////////////////////////////////////////////////////////////////////////////
   1010   {
   1011     FIXP_DBL *const x = _x;
   1012     FIXP_DBL vi, ui;
   1013     FIXP_DBL vi2, ui2;
   1014     FIXP_DBL vi3, ui3;
   1015     FIXP_DBL vr, ur;
   1016     FIXP_DBL vr2, ur2;
   1017     FIXP_DBL vr3, ur3;
   1018     FIXP_DBL vr4, ur4;
   1019 
   1020     // i = 0
   1021     vr = (x[0] + x[32]) >> 1;     /* Re A + Re B */
   1022     ur = (x[1] + x[33]) >> 1;     /* Im A + Im B */
   1023     vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */
   1024     ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */
   1025 
   1026     x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1027     x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
   1028 
   1029     vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */
   1030     ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */
   1031 
   1032     x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1033     x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1034 
   1035     vr -= x[32];             /* Re A - Re B */
   1036     ur -= x[33];             /* Im A - Im B */
   1037     vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */
   1038     ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */
   1039 
   1040     vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */
   1041     ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */
   1042 
   1043     x[2] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
   1044     x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
   1045 
   1046     vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */
   1047     ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */
   1048 
   1049     x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
   1050     x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
   1051 
   1052     // i=16
   1053     vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */
   1054     ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */
   1055 
   1056     x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1057     x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
   1058     x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1059     x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1060 
   1061     vr2 -= x[36];            /* Re A - Re B */
   1062     ur2 -= x[37];            /* Im A - Im B */
   1063     vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */
   1064     ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */
   1065 
   1066     vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */
   1067     ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */
   1068 
   1069     x[18] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
   1070     x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
   1071 
   1072     vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */
   1073     ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */
   1074 
   1075     x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
   1076     x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
   1077 
   1078     // i = 32
   1079 
   1080     x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1081     x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
   1082     x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1083     x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1084 
   1085     vr3 -= x[34];              /* Re A - Re B */
   1086     ur3 -= x[35];              /* Im A - Im B */
   1087     vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */
   1088     ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */
   1089 
   1090     x[34] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
   1091     x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
   1092 
   1093     // i=48
   1094 
   1095     x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1096     x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1097     x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
   1098     x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1099 
   1100     vr4 -= x[38]; /* Re A - Re B */
   1101     ur4 -= x[39]; /* Im A - Im B */
   1102 
   1103     x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
   1104     x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
   1105 
   1106     vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */
   1107     ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */
   1108 
   1109     x[50] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
   1110     x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
   1111     x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
   1112     x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
   1113 
   1114     // i=8
   1115     vr = (x[8] + x[40]) >> 1;     /* Re A + Re B */
   1116     ur = (x[9] + x[41]) >> 1;     /* Im A + Im B */
   1117     vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */
   1118     ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */
   1119 
   1120     x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1121     x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */
   1122 
   1123     vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */
   1124     ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */
   1125 
   1126     x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1127     x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1128 
   1129     vr -= x[40];             /* Re A - Re B */
   1130     ur -= x[41];             /* Im A - Im B */
   1131     vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */
   1132     ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */
   1133 
   1134     vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */
   1135     ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */
   1136 
   1137     x[10] = ui + vr; /* Re B' = Im C - Im D  + Re A - Re B */
   1138     x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */
   1139 
   1140     vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */
   1141     ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */
   1142 
   1143     x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */
   1144     x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */
   1145 
   1146     // i=24
   1147     vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */
   1148     ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */
   1149 
   1150     x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1151     x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1152     x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */
   1153     x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1154 
   1155     vr2 -= x[44];            /* Re A - Re B */
   1156     ur2 -= x[45];            /* Im A - Im B */
   1157     vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */
   1158     ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */
   1159 
   1160     vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */
   1161     ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */
   1162 
   1163     x[26] = ui + vr2; /* Re B' = Im C - Im D  + Re A - Re B */
   1164     x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */
   1165 
   1166     vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */
   1167     ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */
   1168 
   1169     x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */
   1170     x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */
   1171 
   1172     // i=40
   1173 
   1174     x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1175     x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1176     x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */
   1177     x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1178 
   1179     vr3 -= x[42];              /* Re A - Re B */
   1180     ur3 -= x[43];              /* Im A - Im B */
   1181     vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */
   1182     ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */
   1183 
   1184     x[42] = ui2 + vr3; /* Re B' = Im C - Im D  + Re A - Re B */
   1185     x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */
   1186 
   1187     // i=56
   1188 
   1189     x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */
   1190     x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */
   1191     x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */
   1192     x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */
   1193 
   1194     vr4 -= x[46]; /* Re A - Re B */
   1195     ur4 -= x[47]; /* Im A - Im B */
   1196 
   1197     x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */
   1198     x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */
   1199 
   1200     vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */
   1201     ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */
   1202 
   1203     x[58] = ui3 + vr4; /* Re B' = Im C - Im D  + Re A - Re B */
   1204     x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */
   1205     x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */
   1206     x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */
   1207   }
   1208 
   1209   {
   1210     FIXP_DBL *xt = _x;
   1211 
   1212     int j = 4;
   1213     do {
   1214       FIXP_DBL vi, ui, vr, ur;
   1215 
   1216       vr = xt[8];
   1217       vi = xt[9];
   1218       ur = xt[0] >> 1;
   1219       ui = xt[1] >> 1;
   1220       xt[0] = ur + (vr >> 1);
   1221       xt[1] = ui + (vi >> 1);
   1222       xt[8] = ur - (vr >> 1);
   1223       xt[9] = ui - (vi >> 1);
   1224 
   1225       vr = xt[13];
   1226       vi = xt[12];
   1227       ur = xt[4] >> 1;
   1228       ui = xt[5] >> 1;
   1229       xt[4] = ur + (vr >> 1);
   1230       xt[5] = ui - (vi >> 1);
   1231       xt[12] = ur - (vr >> 1);
   1232       xt[13] = ui + (vi >> 1);
   1233 
   1234       SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11])
   1235       ur = xt[2];
   1236       ui = xt[3];
   1237       xt[2] = (ur >> 1) + vr;
   1238       xt[3] = (ui >> 1) + vi;
   1239       xt[10] = (ur >> 1) - vr;
   1240       xt[11] = (ui >> 1) - vi;
   1241 
   1242       SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15])
   1243       ur = xt[6];
   1244       ui = xt[7];
   1245 
   1246       xt[6] = (ur >> 1) + vr;
   1247       xt[7] = (ui >> 1) - vi;
   1248       xt[14] = (ur >> 1) - vr;
   1249       xt[15] = (ui >> 1) + vi;
   1250       xt += 16;
   1251     } while (--j != 0);
   1252   }
   1253 
   1254   {
   1255     FIXP_DBL *const x = _x;
   1256     FIXP_DBL vi, ui, vr, ur;
   1257 
   1258     vr = x[16];
   1259     vi = x[17];
   1260     ur = x[0] >> 1;
   1261     ui = x[1] >> 1;
   1262     x[0] = ur + (vr >> 1);
   1263     x[1] = ui + (vi >> 1);
   1264     x[16] = ur - (vr >> 1);
   1265     x[17] = ui - (vi >> 1);
   1266 
   1267     vi = x[24];
   1268     vr = x[25];
   1269     ur = x[8] >> 1;
   1270     ui = x[9] >> 1;
   1271     x[8] = ur + (vr >> 1);
   1272     x[9] = ui - (vi >> 1);
   1273     x[24] = ur - (vr >> 1);
   1274     x[25] = ui + (vi >> 1);
   1275 
   1276     vr = x[48];
   1277     vi = x[49];
   1278     ur = x[32] >> 1;
   1279     ui = x[33] >> 1;
   1280     x[32] = ur + (vr >> 1);
   1281     x[33] = ui + (vi >> 1);
   1282     x[48] = ur - (vr >> 1);
   1283     x[49] = ui - (vi >> 1);
   1284 
   1285     vi = x[56];
   1286     vr = x[57];
   1287     ur = x[40] >> 1;
   1288     ui = x[41] >> 1;
   1289     x[40] = ur + (vr >> 1);
   1290     x[41] = ui - (vi >> 1);
   1291     x[56] = ur - (vr >> 1);
   1292     x[57] = ui + (vi >> 1);
   1293 
   1294     cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]);
   1295     ur = x[2];
   1296     ui = x[3];
   1297     x[2] = (ur >> 1) + vr;
   1298     x[3] = (ui >> 1) + vi;
   1299     x[18] = (ur >> 1) - vr;
   1300     x[19] = (ui >> 1) - vi;
   1301 
   1302     cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]);
   1303     ur = x[10];
   1304     ui = x[11];
   1305     x[10] = (ur >> 1) + vr;
   1306     x[11] = (ui >> 1) - vi;
   1307     x[26] = (ur >> 1) - vr;
   1308     x[27] = (ui >> 1) + vi;
   1309 
   1310     cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]);
   1311     ur = x[34];
   1312     ui = x[35];
   1313     x[34] = (ur >> 1) + vr;
   1314     x[35] = (ui >> 1) + vi;
   1315     x[50] = (ur >> 1) - vr;
   1316     x[51] = (ui >> 1) - vi;
   1317 
   1318     cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]);
   1319     ur = x[42];
   1320     ui = x[43];
   1321     x[42] = (ur >> 1) + vr;
   1322     x[43] = (ui >> 1) - vi;
   1323     x[58] = (ur >> 1) - vr;
   1324     x[59] = (ui >> 1) + vi;
   1325 
   1326     SUMDIFF_PIFOURTH(vi, vr, x[20], x[21])
   1327     ur = x[4];
   1328     ui = x[5];
   1329     x[4] = (ur >> 1) + vr;
   1330     x[5] = (ui >> 1) + vi;
   1331     x[20] = (ur >> 1) - vr;
   1332     x[21] = (ui >> 1) - vi;
   1333 
   1334     SUMDIFF_PIFOURTH(vr, vi, x[28], x[29])
   1335     ur = x[12];
   1336     ui = x[13];
   1337     x[12] = (ur >> 1) + vr;
   1338     x[13] = (ui >> 1) - vi;
   1339     x[28] = (ur >> 1) - vr;
   1340     x[29] = (ui >> 1) + vi;
   1341 
   1342     SUMDIFF_PIFOURTH(vi, vr, x[52], x[53])
   1343     ur = x[36];
   1344     ui = x[37];
   1345     x[36] = (ur >> 1) + vr;
   1346     x[37] = (ui >> 1) + vi;
   1347     x[52] = (ur >> 1) - vr;
   1348     x[53] = (ui >> 1) - vi;
   1349 
   1350     SUMDIFF_PIFOURTH(vr, vi, x[60], x[61])
   1351     ur = x[44];
   1352     ui = x[45];
   1353     x[44] = (ur >> 1) + vr;
   1354     x[45] = (ui >> 1) - vi;
   1355     x[60] = (ur >> 1) - vr;
   1356     x[61] = (ui >> 1) + vi;
   1357 
   1358     cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]);
   1359     ur = x[6];
   1360     ui = x[7];
   1361     x[6] = (ur >> 1) + vr;
   1362     x[7] = (ui >> 1) + vi;
   1363     x[22] = (ur >> 1) - vr;
   1364     x[23] = (ui >> 1) - vi;
   1365 
   1366     cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]);
   1367     ur = x[14];
   1368     ui = x[15];
   1369     x[14] = (ur >> 1) + vr;
   1370     x[15] = (ui >> 1) - vi;
   1371     x[30] = (ur >> 1) - vr;
   1372     x[31] = (ui >> 1) + vi;
   1373 
   1374     cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]);
   1375     ur = x[38];
   1376     ui = x[39];
   1377     x[38] = (ur >> 1) + vr;
   1378     x[39] = (ui >> 1) + vi;
   1379     x[54] = (ur >> 1) - vr;
   1380     x[55] = (ui >> 1) - vi;
   1381 
   1382     cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]);
   1383     ur = x[46];
   1384     ui = x[47];
   1385 
   1386     x[46] = (ur >> 1) + vr;
   1387     x[47] = (ui >> 1) - vi;
   1388     x[62] = (ur >> 1) - vr;
   1389     x[63] = (ui >> 1) + vi;
   1390 
   1391     vr = x[32];
   1392     vi = x[33];
   1393     ur = x[0] >> 1;
   1394     ui = x[1] >> 1;
   1395     x[0] = ur + (vr >> 1);
   1396     x[1] = ui + (vi >> 1);
   1397     x[32] = ur - (vr >> 1);
   1398     x[33] = ui - (vi >> 1);
   1399 
   1400     vi = x[48];
   1401     vr = x[49];
   1402     ur = x[16] >> 1;
   1403     ui = x[17] >> 1;
   1404     x[16] = ur + (vr >> 1);
   1405     x[17] = ui - (vi >> 1);
   1406     x[48] = ur - (vr >> 1);
   1407     x[49] = ui + (vi >> 1);
   1408 
   1409     cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]);
   1410     ur = x[2];
   1411     ui = x[3];
   1412     x[2] = (ur >> 1) + vr;
   1413     x[3] = (ui >> 1) + vi;
   1414     x[34] = (ur >> 1) - vr;
   1415     x[35] = (ui >> 1) - vi;
   1416 
   1417     cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]);
   1418     ur = x[18];
   1419     ui = x[19];
   1420     x[18] = (ur >> 1) + vr;
   1421     x[19] = (ui >> 1) - vi;
   1422     x[50] = (ur >> 1) - vr;
   1423     x[51] = (ui >> 1) + vi;
   1424 
   1425     cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]);
   1426     ur = x[4];
   1427     ui = x[5];
   1428     x[4] = (ur >> 1) + vr;
   1429     x[5] = (ui >> 1) + vi;
   1430     x[36] = (ur >> 1) - vr;
   1431     x[37] = (ui >> 1) - vi;
   1432 
   1433     cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]);
   1434     ur = x[20];
   1435     ui = x[21];
   1436     x[20] = (ur >> 1) + vr;
   1437     x[21] = (ui >> 1) - vi;
   1438     x[52] = (ur >> 1) - vr;
   1439     x[53] = (ui >> 1) + vi;
   1440 
   1441     cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]);
   1442     ur = x[6];
   1443     ui = x[7];
   1444     x[6] = (ur >> 1) + vr;
   1445     x[7] = (ui >> 1) + vi;
   1446     x[38] = (ur >> 1) - vr;
   1447     x[39] = (ui >> 1) - vi;
   1448 
   1449     cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]);
   1450     ur = x[22];
   1451     ui = x[23];
   1452     x[22] = (ur >> 1) + vr;
   1453     x[23] = (ui >> 1) - vi;
   1454     x[54] = (ur >> 1) - vr;
   1455     x[55] = (ui >> 1) + vi;
   1456 
   1457     SUMDIFF_PIFOURTH(vi, vr, x[40], x[41])
   1458     ur = x[8];
   1459     ui = x[9];
   1460     x[8] = (ur >> 1) + vr;
   1461     x[9] = (ui >> 1) + vi;
   1462     x[40] = (ur >> 1) - vr;
   1463     x[41] = (ui >> 1) - vi;
   1464 
   1465     SUMDIFF_PIFOURTH(vr, vi, x[56], x[57])
   1466     ur = x[24];
   1467     ui = x[25];
   1468     x[24] = (ur >> 1) + vr;
   1469     x[25] = (ui >> 1) - vi;
   1470     x[56] = (ur >> 1) - vr;
   1471     x[57] = (ui >> 1) + vi;
   1472 
   1473     cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]);
   1474     ur = x[10];
   1475     ui = x[11];
   1476 
   1477     x[10] = (ur >> 1) + vr;
   1478     x[11] = (ui >> 1) + vi;
   1479     x[42] = (ur >> 1) - vr;
   1480     x[43] = (ui >> 1) - vi;
   1481 
   1482     cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]);
   1483     ur = x[26];
   1484     ui = x[27];
   1485     x[26] = (ur >> 1) + vr;
   1486     x[27] = (ui >> 1) - vi;
   1487     x[58] = (ur >> 1) - vr;
   1488     x[59] = (ui >> 1) + vi;
   1489 
   1490     cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]);
   1491     ur = x[12];
   1492     ui = x[13];
   1493     x[12] = (ur >> 1) + vr;
   1494     x[13] = (ui >> 1) + vi;
   1495     x[44] = (ur >> 1) - vr;
   1496     x[45] = (ui >> 1) - vi;
   1497 
   1498     cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]);
   1499     ur = x[28];
   1500     ui = x[29];
   1501     x[28] = (ur >> 1) + vr;
   1502     x[29] = (ui >> 1) - vi;
   1503     x[60] = (ur >> 1) - vr;
   1504     x[61] = (ui >> 1) + vi;
   1505 
   1506     cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]);
   1507     ur = x[14];
   1508     ui = x[15];
   1509     x[14] = (ur >> 1) + vr;
   1510     x[15] = (ui >> 1) + vi;
   1511     x[46] = (ur >> 1) - vr;
   1512     x[47] = (ui >> 1) - vi;
   1513 
   1514     cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]);
   1515     ur = x[30];
   1516     ui = x[31];
   1517     x[30] = (ur >> 1) + vr;
   1518     x[31] = (ui >> 1) - vi;
   1519     x[62] = (ur >> 1) - vr;
   1520     x[63] = (ui >> 1) + vi;
   1521   }
   1522 }
   1523 #endif /* #ifndef FUNCTION_fft_32 */
   1524 
   1525 /**
   1526  * \brief Apply rotation vectors to a data buffer.
   1527  * \param cl length of each row of input data.
   1528  * \param l total length of input data.
   1529  * \param pVecRe real part of rotation coefficient vector.
   1530  * \param pVecIm imaginary part of rotation coefficient vector.
   1531  */
   1532 
   1533 /*
   1534    This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000
   1535    (-1.0) instead. At the end, the sign of the result is inverted
   1536 */
   1537 #define noFFT_APPLY_ROT_VECTOR_HQ
   1538 
   1539 #ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL
   1540 static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl,
   1541                                         const int l, const FIXP_STB *pVecRe,
   1542                                         const FIXP_STB *pVecIm) {
   1543   FIXP_DBL re, im;
   1544   FIXP_STB vre, vim;
   1545 
   1546   int i, c;
   1547 
   1548   for (i = 0; i < cl; i++) {
   1549     re = pData[2 * i];
   1550     im = pData[2 * i + 1];
   1551 
   1552     pData[2 * i] = re >> 2;     /* * 0.25 */
   1553     pData[2 * i + 1] = im >> 2; /* * 0.25 */
   1554   }
   1555   for (; i < l; i += cl) {
   1556     re = pData[2 * i];
   1557     im = pData[2 * i + 1];
   1558 
   1559     pData[2 * i] = re >> 2;     /* * 0.25 */
   1560     pData[2 * i + 1] = im >> 2; /* * 0.25 */
   1561 
   1562     for (c = i + 1; c < i + cl; c++) {
   1563       re = pData[2 * c] >> 1;
   1564       im = pData[2 * c + 1] >> 1;
   1565       vre = *pVecRe++;
   1566       vim = *pVecIm++;
   1567 
   1568       cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim);
   1569     }
   1570   }
   1571 }
   1572 #endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */
   1573 
   1574 /* select either switch case of function pointer. */
   1575 //#define FFT_TWO_STAGE_SWITCH_CASE
   1576 #ifndef FUNCTION_fftN2_func
   1577 static inline void fftN2_func(FIXP_DBL *pInput, const int length,
   1578                               const int dim1, const int dim2,
   1579                               void (*const fft1)(FIXP_DBL *),
   1580                               void (*const fft2)(FIXP_DBL *),
   1581                               const FIXP_STB *RotVectorReal,
   1582                               const FIXP_STB *RotVectorImag, FIXP_DBL *aDst,
   1583                               FIXP_DBL *aDst2) {
   1584   /* The real part of the input samples are at the addresses with even indices
   1585   and the imaginary part of the input samples are at the addresses with odd
   1586   indices. The output samples are stored at the address of pInput
   1587   */
   1588   FIXP_DBL *pSrc, *pDst, *pDstOut;
   1589   int i;
   1590 
   1591   FDK_ASSERT(length == dim1 * dim2);
   1592 
   1593   /* Perform dim2 times the fft of length dim1. The input samples are at the
   1594   address of pSrc and the output samples are at the address of pDst. The input
   1595   vector for the fft of length dim1 is built of the interleaved samples in pSrc,
   1596   the output samples are stored consecutively.
   1597   */
   1598   pSrc = pInput;
   1599   pDst = aDst;
   1600   for (i = 0; i < dim2; i++) {
   1601     for (int j = 0; j < dim1; j++) {
   1602       pDst[2 * j] = pSrc[2 * j * dim2];
   1603       pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1];
   1604     }
   1605 
   1606       /* fft of size dim1 */
   1607 #ifndef FFT_TWO_STAGE_SWITCH_CASE
   1608     fft1(pDst);
   1609 #else
   1610     switch (dim1) {
   1611       case 2:
   1612         fft2(pDst);
   1613         break;
   1614       case 3:
   1615         fft3(pDst);
   1616         break;
   1617       case 4:
   1618         fft_4(pDst);
   1619         break;
   1620       /* case 5: fft5(pDst); break; */
   1621       /* case 8: fft_8(pDst); break; */
   1622       case 12:
   1623         fft12(pDst);
   1624         break;
   1625       /* case 15: fft15(pDst); break; */
   1626       case 16:
   1627         fft_16(pDst);
   1628         break;
   1629       case 32:
   1630         fft_32(pDst);
   1631         break;
   1632         /*case 64: fft_64(pDst); break;*/
   1633         /* case 128: fft_128(pDst); break; */
   1634     }
   1635 #endif
   1636     pSrc += 2;
   1637     pDst = pDst + 2 * dim1;
   1638   }
   1639 
   1640   /* Perform the modulation of the output of the fft of length dim1 */
   1641   pSrc = aDst;
   1642   fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag);
   1643 
   1644   /* Perform dim1 times the fft of length dim2. The input samples are at the
   1645   address of aDst and the output samples are at the address of pInput. The input
   1646   vector for the fft of length dim2 is built of the interleaved samples in aDst,
   1647   the output samples are stored consecutively at the address of pInput.
   1648   */
   1649   pSrc = aDst;
   1650   pDst = aDst2;
   1651   pDstOut = pInput;
   1652   for (i = 0; i < dim1; i++) {
   1653     for (int j = 0; j < dim2; j++) {
   1654       pDst[2 * j] = pSrc[2 * j * dim1];
   1655       pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1];
   1656     }
   1657 
   1658 #ifndef FFT_TWO_STAGE_SWITCH_CASE
   1659     fft2(pDst);
   1660 #else
   1661     switch (dim2) {
   1662       case 4:
   1663         fft_4(pDst);
   1664         break;
   1665       case 9:
   1666         fft9(pDst);
   1667         break;
   1668       case 12:
   1669         fft12(pDst);
   1670         break;
   1671       case 15:
   1672         fft15(pDst);
   1673         break;
   1674       case 16:
   1675         fft_16(pDst);
   1676         break;
   1677       case 32:
   1678         fft_32(pDst);
   1679         break;
   1680     }
   1681 #endif
   1682 
   1683     for (int j = 0; j < dim2; j++) {
   1684       pDstOut[2 * j * dim1] = pDst[2 * j];
   1685       pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1];
   1686     }
   1687     pSrc += 2;
   1688     pDstOut += 2;
   1689   }
   1690 }
   1691 #endif /* FUNCTION_fftN2_function */
   1692 
   1693 #define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \
   1694               RotVectorReal, RotVectorImag)                                \
   1695   {                                                                        \
   1696     C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length)                    \
   1697     C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2)                     \
   1698     fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2,           \
   1699                RotVectorReal, RotVectorImag, aDst, aDst2);                 \
   1700     C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2)                       \
   1701     C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length)                      \
   1702   }
   1703 
   1704   /*!
   1705    *
   1706    *  \brief  complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480
   1707    *  \param  pInput contains the input signal prescaled right by 2
   1708    *          pInput contains the output signal scaled by SCALEFACTOR<#length>
   1709    *          The output signal does not have any fixed headroom
   1710    *  \return void
   1711    *
   1712    */
   1713 
   1714 #ifndef FUNCTION_fft6
   1715 static inline void fft6(FIXP_DBL *pInput) {
   1716   fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6);
   1717 }
   1718 #endif /* #ifndef FUNCTION_fft6 */
   1719 
   1720 #ifndef FUNCTION_fft12
   1721 static inline void fft12(FIXP_DBL *pInput) {
   1722   fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12,
   1723         RotVectorImag12); /* 16,58 */
   1724 }
   1725 #endif /* #ifndef FUNCTION_fft12 */
   1726 
   1727 #ifndef FUNCTION_fft20
   1728 static inline void fft20(FIXP_DBL *pInput) {
   1729   fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20,
   1730         RotVectorImag20);
   1731 }
   1732 #endif /* FUNCTION_fft20 */
   1733 
   1734 static inline void fft24(FIXP_DBL *pInput) {
   1735   fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24,
   1736         RotVectorImag24); /* 16,73 */
   1737 }
   1738 
   1739 static inline void fft48(FIXP_DBL *pInput) {
   1740   fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48,
   1741         RotVectorImag48); /* 16,32 */
   1742 }
   1743 
   1744 #ifndef FUNCTION_fft60
   1745 static inline void fft60(FIXP_DBL *pInput) {
   1746   fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60,
   1747         RotVectorImag60); /* 15,51 */
   1748 }
   1749 #endif /* FUNCTION_fft60 */
   1750 
   1751 #ifndef FUNCTION_fft80
   1752 static inline void fft80(FIXP_DBL *pInput) {
   1753   fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80,
   1754         RotVectorImag80); /*  */
   1755 }
   1756 #endif
   1757 
   1758 #ifndef FUNCTION_fft96
   1759 static inline void fft96(FIXP_DBL *pInput) {
   1760   fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96,
   1761         RotVectorImag96); /* 15,47 */
   1762 }
   1763 #endif /* FUNCTION_fft96*/
   1764 
   1765 #ifndef FUNCTION_fft120
   1766 static inline void fft120(FIXP_DBL *pInput) {
   1767   fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120,
   1768         RotVectorImag120);
   1769 }
   1770 #endif /* FUNCTION_fft120 */
   1771 
   1772 #ifndef FUNCTION_fft192
   1773 static inline void fft192(FIXP_DBL *pInput) {
   1774   fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192,
   1775         RotVectorImag192); /* 15,50 */
   1776 }
   1777 #endif
   1778 
   1779 #ifndef FUNCTION_fft240
   1780 static inline void fft240(FIXP_DBL *pInput) {
   1781   fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240,
   1782         RotVectorImag240); /* 15.44 */
   1783 }
   1784 #endif
   1785 
   1786 #ifndef FUNCTION_fft384
   1787 static inline void fft384(FIXP_DBL *pInput) {
   1788   fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384,
   1789         RotVectorImag384); /* 16.02 */
   1790 }
   1791 #endif /* FUNCTION_fft384 */
   1792 
   1793 #ifndef FUNCTION_fft480
   1794 static inline void fft480(FIXP_DBL *pInput) {
   1795   fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480,
   1796         RotVectorImag480); /* 15.84 */
   1797 }
   1798 #endif /* FUNCTION_fft480 */
   1799 
   1800 void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) {
   1801   /* Ensure, that the io-ptr is always (at least 8-byte) aligned */
   1802   C_ALLOC_ALIGNED_CHECK(pInput);
   1803 
   1804   if (length == 32) {
   1805     fft_32(pInput);
   1806     *pScalefactor += SCALEFACTOR32;
   1807   } else {
   1808     switch (length) {
   1809       case 16:
   1810         fft_16(pInput);
   1811         *pScalefactor += SCALEFACTOR16;
   1812         break;
   1813       case 8:
   1814         fft_8(pInput);
   1815         *pScalefactor += SCALEFACTOR8;
   1816         break;
   1817       case 2:
   1818         fft2(pInput);
   1819         *pScalefactor += SCALEFACTOR2;
   1820         break;
   1821       case 3:
   1822         fft3(pInput);
   1823         *pScalefactor += SCALEFACTOR3;
   1824         break;
   1825       case 4:
   1826         fft_4(pInput);
   1827         *pScalefactor += SCALEFACTOR4;
   1828         break;
   1829       case 5:
   1830         fft5(pInput);
   1831         *pScalefactor += SCALEFACTOR5;
   1832         break;
   1833       case 6:
   1834         fft6(pInput);
   1835         *pScalefactor += SCALEFACTOR6;
   1836         break;
   1837       case 10:
   1838         fft10(pInput);
   1839         *pScalefactor += SCALEFACTOR10;
   1840         break;
   1841       case 12:
   1842         fft12(pInput);
   1843         *pScalefactor += SCALEFACTOR12;
   1844         break;
   1845       case 15:
   1846         fft15(pInput);
   1847         *pScalefactor += SCALEFACTOR15;
   1848         break;
   1849       case 20:
   1850         fft20(pInput);
   1851         *pScalefactor += SCALEFACTOR20;
   1852         break;
   1853       case 24:
   1854         fft24(pInput);
   1855         *pScalefactor += SCALEFACTOR24;
   1856         break;
   1857       case 48:
   1858         fft48(pInput);
   1859         *pScalefactor += SCALEFACTOR48;
   1860         break;
   1861       case 60:
   1862         fft60(pInput);
   1863         *pScalefactor += SCALEFACTOR60;
   1864         break;
   1865       case 64:
   1866         dit_fft(pInput, 6, SineTable512, 512);
   1867         *pScalefactor += SCALEFACTOR64;
   1868         break;
   1869       case 80:
   1870         fft80(pInput);
   1871         *pScalefactor += SCALEFACTOR80;
   1872         break;
   1873       case 96:
   1874         fft96(pInput);
   1875         *pScalefactor += SCALEFACTOR96;
   1876         break;
   1877       case 120:
   1878         fft120(pInput);
   1879         *pScalefactor += SCALEFACTOR120;
   1880         break;
   1881       case 128:
   1882         dit_fft(pInput, 7, SineTable512, 512);
   1883         *pScalefactor += SCALEFACTOR128;
   1884         break;
   1885       case 192:
   1886         fft192(pInput);
   1887         *pScalefactor += SCALEFACTOR192;
   1888         break;
   1889       case 240:
   1890         fft240(pInput);
   1891         *pScalefactor += SCALEFACTOR240;
   1892         break;
   1893       case 256:
   1894         dit_fft(pInput, 8, SineTable512, 512);
   1895         *pScalefactor += SCALEFACTOR256;
   1896         break;
   1897       case 384:
   1898         fft384(pInput);
   1899         *pScalefactor += SCALEFACTOR384;
   1900         break;
   1901       case 480:
   1902         fft480(pInput);
   1903         *pScalefactor += SCALEFACTOR480;
   1904         break;
   1905       case 512:
   1906         dit_fft(pInput, 9, SineTable512, 512);
   1907         *pScalefactor += SCALEFACTOR512;
   1908         break;
   1909       default:
   1910         FDK_ASSERT(0); /* FFT length not supported! */
   1911         break;
   1912     }
   1913   }
   1914 }
   1915 
   1916 void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) {
   1917   switch (length) {
   1918     default:
   1919       FDK_ASSERT(0); /* IFFT length not supported! */
   1920       break;
   1921   }
   1922 }
   1923