1 /* ----------------------------------------------------------------------------- 2 Software License for The Fraunhofer FDK AAC Codec Library for Android 3 4 Copyright 1995 - 2018 Fraunhofer-Gesellschaft zur Frderung der angewandten 5 Forschung e.V. All rights reserved. 6 7 1. INTRODUCTION 8 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software 9 that implements the MPEG Advanced Audio Coding ("AAC") encoding and decoding 10 scheme for digital audio. This FDK AAC Codec software is intended to be used on 11 a wide variety of Android devices. 12 13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient 14 general perceptual audio codecs. AAC-ELD is considered the best-performing 15 full-bandwidth communications codec by independent studies and is widely 16 deployed. AAC has been standardized by ISO and IEC as part of the MPEG 17 specifications. 18 19 Patent licenses for necessary patent claims for the FDK AAC Codec (including 20 those of Fraunhofer) may be obtained through Via Licensing 21 (www.vialicensing.com) or through the respective patent owners individually for 22 the purpose of encoding or decoding bit streams in products that are compliant 23 with the ISO/IEC MPEG audio standards. Please note that most manufacturers of 24 Android devices already license these patent claims through Via Licensing or 25 directly from the patent owners, and therefore FDK AAC Codec software may 26 already be covered under those patent licenses when it is used for those 27 licensed purposes only. 28 29 Commercially-licensed AAC software libraries, including floating-point versions 30 with enhanced sound quality, are also available from Fraunhofer. Users are 31 encouraged to check the Fraunhofer website for additional applications 32 information and documentation. 33 34 2. COPYRIGHT LICENSE 35 36 Redistribution and use in source and binary forms, with or without modification, 37 are permitted without payment of copyright license fees provided that you 38 satisfy the following conditions: 39 40 You must retain the complete text of this software license in redistributions of 41 the FDK AAC Codec or your modifications thereto in source code form. 42 43 You must retain the complete text of this software license in the documentation 44 and/or other materials provided with redistributions of the FDK AAC Codec or 45 your modifications thereto in binary form. You must make available free of 46 charge copies of the complete source code of the FDK AAC Codec and your 47 modifications thereto to recipients of copies in binary form. 48 49 The name of Fraunhofer may not be used to endorse or promote products derived 50 from this library without prior written permission. 51 52 You may not charge copyright license fees for anyone to use, copy or distribute 53 the FDK AAC Codec software or your modifications thereto. 54 55 Your modified versions of the FDK AAC Codec must carry prominent notices stating 56 that you changed the software and the date of any change. For modified versions 57 of the FDK AAC Codec, the term "Fraunhofer FDK AAC Codec Library for Android" 58 must be replaced by the term "Third-Party Modified Version of the Fraunhofer FDK 59 AAC Codec Library for Android." 60 61 3. NO PATENT LICENSE 62 63 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without 64 limitation the patents of Fraunhofer, ARE GRANTED BY THIS SOFTWARE LICENSE. 65 Fraunhofer provides no warranty of patent non-infringement with respect to this 66 software. 67 68 You may use this FDK AAC Codec software or modifications thereto only for 69 purposes that are authorized by appropriate patent licenses. 70 71 4. DISCLAIMER 72 73 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright 74 holders and contributors "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, 75 including but not limited to the implied warranties of merchantability and 76 fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 77 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, 78 or consequential damages, including but not limited to procurement of substitute 79 goods or services; loss of use, data, or profits, or business interruption, 80 however caused and on any theory of liability, whether in contract, strict 81 liability, or tort (including negligence), arising in any way out of the use of 82 this software, even if advised of the possibility of such damage. 83 84 5. CONTACT INFORMATION 85 86 Fraunhofer Institute for Integrated Circuits IIS 87 Attention: Audio and Multimedia Departments - FDK AAC LL 88 Am Wolfsmantel 33 89 91058 Erlangen, Germany 90 91 www.iis.fraunhofer.de/amm 92 amm-info (at) iis.fraunhofer.de 93 ----------------------------------------------------------------------------- */ 94 95 /******************* Library for basic calculation routines ******************** 96 97 Author(s): Josef Hoepfl, DSP Solutions 98 99 Description: Fix point FFT 100 101 *******************************************************************************/ 102 103 #include "fft_rad2.h" 104 #include "FDK_tools_rom.h" 105 106 #define W_PiFOURTH STC(0x5a82799a) 107 //#define W_PiFOURTH ((FIXP_DBL)(0x5a82799a)) 108 #ifndef SUMDIFF_PIFOURTH 109 #define SUMDIFF_PIFOURTH(diff, sum, a, b) \ 110 { \ 111 FIXP_DBL wa, wb; \ 112 wa = fMultDiv2(a, W_PiFOURTH); \ 113 wb = fMultDiv2(b, W_PiFOURTH); \ 114 diff = wb - wa; \ 115 sum = wb + wa; \ 116 } 117 #define SUMDIFF_PIFOURTH16(diff, sum, a, b) \ 118 { \ 119 FIXP_SGL wa, wb; \ 120 wa = FX_DBL2FX_SGL(fMultDiv2(a, W_PiFOURTH)); \ 121 wb = FX_DBL2FX_SGL(fMultDiv2(b, W_PiFOURTH)); \ 122 diff = wb - wa; \ 123 sum = wb + wa; \ 124 } 125 #endif 126 127 #define SCALEFACTOR2048 10 128 #define SCALEFACTOR1024 9 129 #define SCALEFACTOR512 8 130 #define SCALEFACTOR256 7 131 #define SCALEFACTOR128 6 132 #define SCALEFACTOR64 5 133 #define SCALEFACTOR32 4 134 #define SCALEFACTOR16 3 135 #define SCALEFACTOR8 2 136 #define SCALEFACTOR4 1 137 #define SCALEFACTOR2 1 138 139 #define SCALEFACTOR3 1 140 #define SCALEFACTOR5 1 141 #define SCALEFACTOR6 (SCALEFACTOR2 + SCALEFACTOR3 + 2) 142 #define SCALEFACTOR7 2 143 #define SCALEFACTOR9 2 144 #define SCALEFACTOR10 5 145 #define SCALEFACTOR12 3 146 #define SCALEFACTOR15 3 147 #define SCALEFACTOR18 (SCALEFACTOR2 + SCALEFACTOR9 + 2) 148 #define SCALEFACTOR20 (SCALEFACTOR4 + SCALEFACTOR5 + 2) 149 #define SCALEFACTOR21 (SCALEFACTOR3 + SCALEFACTOR7 + 2) 150 #define SCALEFACTOR24 (SCALEFACTOR2 + SCALEFACTOR12 + 2) 151 #define SCALEFACTOR30 (SCALEFACTOR2 + SCALEFACTOR15 + 2) 152 #define SCALEFACTOR40 (SCALEFACTOR5 + SCALEFACTOR8 + 2) 153 #define SCALEFACTOR48 (SCALEFACTOR4 + SCALEFACTOR12 + 2) 154 #define SCALEFACTOR60 (SCALEFACTOR4 + SCALEFACTOR15 + 2) 155 #define SCALEFACTOR80 (SCALEFACTOR5 + SCALEFACTOR16 + 2) 156 #define SCALEFACTOR96 (SCALEFACTOR3 + SCALEFACTOR32 + 2) 157 #define SCALEFACTOR120 (SCALEFACTOR8 + SCALEFACTOR15 + 2) 158 #define SCALEFACTOR160 (SCALEFACTOR10 + SCALEFACTOR16 + 2) 159 #define SCALEFACTOR168 (SCALEFACTOR21 + SCALEFACTOR8 + 2) 160 #define SCALEFACTOR192 (SCALEFACTOR12 + SCALEFACTOR16 + 2) 161 #define SCALEFACTOR240 (SCALEFACTOR16 + SCALEFACTOR15 + 2) 162 #define SCALEFACTOR320 (SCALEFACTOR10 + SCALEFACTOR32 + 2) 163 #define SCALEFACTOR336 (SCALEFACTOR21 + SCALEFACTOR16 + 2) 164 #define SCALEFACTOR384 (SCALEFACTOR12 + SCALEFACTOR32 + 2) 165 #define SCALEFACTOR480 (SCALEFACTOR32 + SCALEFACTOR15 + 2) 166 167 #include "fft.h" 168 169 #ifndef FUNCTION_fft2 170 171 /* Performs the FFT of length 2. Input vector unscaled, output vector scaled 172 * with factor 0.5 */ 173 static FDK_FORCEINLINE void fft2(FIXP_DBL *RESTRICT pDat) { 174 FIXP_DBL r1, i1; 175 FIXP_DBL r2, i2; 176 177 /* real part */ 178 r1 = pDat[2]; 179 r2 = pDat[0]; 180 181 /* imaginary part */ 182 i1 = pDat[3]; 183 i2 = pDat[1]; 184 185 /* real part */ 186 pDat[0] = (r2 + r1) >> 1; 187 pDat[2] = (r2 - r1) >> 1; 188 189 /* imaginary part */ 190 pDat[1] = (i2 + i1) >> 1; 191 pDat[3] = (i2 - i1) >> 1; 192 } 193 #endif /* FUNCTION_fft2 */ 194 195 #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */ 196 197 #ifndef FUNCTION_fft3 198 /* Performs the FFT of length 3 according to the algorithm after winograd. */ 199 static FDK_FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) { 200 FIXP_DBL r1, r2; 201 FIXP_DBL s1, s2; 202 FIXP_DBL pD; 203 204 /* real part */ 205 r1 = pDat[2] + pDat[4]; 206 r2 = fMultDiv2((pDat[2] - pDat[4]), C31); 207 pD = pDat[0] >> 1; 208 pDat[0] = pD + (r1 >> 1); 209 r1 = pD - (r1 >> 2); 210 211 /* imaginary part */ 212 s1 = pDat[3] + pDat[5]; 213 s2 = fMultDiv2((pDat[3] - pDat[5]), C31); 214 pD = pDat[1] >> 1; 215 pDat[1] = pD + (s1 >> 1); 216 s1 = pD - (s1 >> 2); 217 218 /* combination */ 219 pDat[2] = r1 - s2; 220 pDat[4] = r1 + s2; 221 pDat[3] = s1 + r2; 222 pDat[5] = s1 - r2; 223 } 224 #endif /* #ifndef FUNCTION_fft3 */ 225 226 #define F5C(x) STC(x) 227 228 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ 229 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ 230 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ 231 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ 232 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ 233 234 /* performs the FFT of length 5 according to the algorithm after winograd */ 235 /* This version works with a prescale of 2 instead of 3 */ 236 static FDK_FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) { 237 FIXP_DBL r1, r2, r3, r4; 238 FIXP_DBL s1, s2, s3, s4; 239 FIXP_DBL t; 240 241 /* real part */ 242 r1 = (pDat[2] + pDat[8]) >> 1; 243 r4 = (pDat[2] - pDat[8]) >> 1; 244 r3 = (pDat[4] + pDat[6]) >> 1; 245 r2 = (pDat[4] - pDat[6]) >> 1; 246 t = fMult((r1 - r3), C54); 247 r1 = r1 + r3; 248 pDat[0] = (pDat[0] >> 1) + r1; 249 /* Bit shift left because of the constant C55 which was scaled with the factor 250 0.5 because of the representation of the values as fracts */ 251 r1 = pDat[0] + (fMultDiv2(r1, C55) << (2)); 252 r3 = r1 - t; 253 r1 = r1 + t; 254 t = fMult((r4 + r2), C51); 255 /* Bit shift left because of the constant C55 which was scaled with the factor 256 0.5 because of the representation of the values as fracts */ 257 r4 = t + (fMultDiv2(r4, C52) << (2)); 258 r2 = t + fMult(r2, C53); 259 260 /* imaginary part */ 261 s1 = (pDat[3] + pDat[9]) >> 1; 262 s4 = (pDat[3] - pDat[9]) >> 1; 263 s3 = (pDat[5] + pDat[7]) >> 1; 264 s2 = (pDat[5] - pDat[7]) >> 1; 265 t = fMult((s1 - s3), C54); 266 s1 = s1 + s3; 267 pDat[1] = (pDat[1] >> 1) + s1; 268 /* Bit shift left because of the constant C55 which was scaled with the factor 269 0.5 because of the representation of the values as fracts */ 270 s1 = pDat[1] + (fMultDiv2(s1, C55) << (2)); 271 s3 = s1 - t; 272 s1 = s1 + t; 273 t = fMult((s4 + s2), C51); 274 /* Bit shift left because of the constant C55 which was scaled with the factor 275 0.5 because of the representation of the values as fracts */ 276 s4 = t + (fMultDiv2(s4, C52) << (2)); 277 s2 = t + fMult(s2, C53); 278 279 /* combination */ 280 pDat[2] = r1 + s2; 281 pDat[8] = r1 - s2; 282 pDat[4] = r3 - s4; 283 pDat[6] = r3 + s4; 284 285 pDat[3] = s1 - r2; 286 pDat[9] = s1 + r2; 287 pDat[5] = s3 + r4; 288 pDat[7] = s3 - r4; 289 } 290 291 #define F5C(x) STC(x) 292 293 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ 294 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ 295 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ 296 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ 297 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ 298 /** 299 * \brief Function performs a complex 10-point FFT 300 * The FFT is performed inplace. The result of the FFT 301 * is scaled by SCALEFACTOR10 bits. 302 * 303 * WOPS FLC version: 1093 cycles 304 * WOPS with 32x16 bit multiplications: 196 cycles 305 * 306 * \param [i/o] re real input / output 307 * \param [i/o] im imag input / output 308 * \param [i ] s stride real and imag input / output 309 * 310 * \return void 311 */ 312 static void fft10(FIXP_DBL *x) // FIXP_DBL *re, FIXP_DBL *im, FIXP_SGL s) 313 { 314 FIXP_DBL t; 315 FIXP_DBL x0, x1, x2, x3, x4; 316 FIXP_DBL r1, r2, r3, r4; 317 FIXP_DBL s1, s2, s3, s4; 318 FIXP_DBL y00, y01, y02, y03, y04, y05, y06, y07, y08, y09; 319 FIXP_DBL y10, y11, y12, y13, y14, y15, y16, y17, y18, y19; 320 321 const int s = 1; // stride factor 322 323 /* 2 fft5 stages */ 324 325 /* real part */ 326 x0 = (x[s * 0] >> SCALEFACTOR10); 327 x1 = (x[s * 4] >> SCALEFACTOR10); 328 x2 = (x[s * 8] >> SCALEFACTOR10); 329 x3 = (x[s * 12] >> SCALEFACTOR10); 330 x4 = (x[s * 16] >> SCALEFACTOR10); 331 332 r1 = (x3 + x2); 333 r4 = (x3 - x2); 334 r3 = (x1 + x4); 335 r2 = (x1 - x4); 336 t = fMult((r1 - r3), C54); 337 r1 = (r1 + r3); 338 y00 = (x0 + r1); 339 r1 = (y00 + ((fMult(r1, C55) << 1))); 340 r3 = (r1 - t); 341 r1 = (r1 + t); 342 t = fMult((r4 + r2), C51); 343 r4 = (t + (fMult(r4, C52) << 1)); 344 r2 = (t + fMult(r2, C53)); 345 346 /* imaginary part */ 347 x0 = (x[s * 0 + 1] >> SCALEFACTOR10); 348 x1 = (x[s * 4 + 1] >> SCALEFACTOR10); 349 x2 = (x[s * 8 + 1] >> SCALEFACTOR10); 350 x3 = (x[s * 12 + 1] >> SCALEFACTOR10); 351 x4 = (x[s * 16 + 1] >> SCALEFACTOR10); 352 353 s1 = (x3 + x2); 354 s4 = (x3 - x2); 355 s3 = (x1 + x4); 356 s2 = (x1 - x4); 357 t = fMult((s1 - s3), C54); 358 s1 = (s1 + s3); 359 y01 = (x0 + s1); 360 s1 = (y01 + (fMult(s1, C55) << 1)); 361 s3 = (s1 - t); 362 s1 = (s1 + t); 363 t = fMult((s4 + s2), C51); 364 s4 = (t + (fMult(s4, C52) << 1)); 365 s2 = (t + fMult(s2, C53)); 366 367 /* combination */ 368 y04 = (r1 + s2); 369 y16 = (r1 - s2); 370 y08 = (r3 - s4); 371 y12 = (r3 + s4); 372 373 y05 = (s1 - r2); 374 y17 = (s1 + r2); 375 y09 = (s3 + r4); 376 y13 = (s3 - r4); 377 378 /* real part */ 379 x0 = (x[s * 10] >> SCALEFACTOR10); 380 x1 = (x[s * 2] >> SCALEFACTOR10); 381 x2 = (x[s * 6] >> SCALEFACTOR10); 382 x3 = (x[s * 14] >> SCALEFACTOR10); 383 x4 = (x[s * 18] >> SCALEFACTOR10); 384 385 r1 = (x1 + x4); 386 r4 = (x1 - x4); 387 r3 = (x3 + x2); 388 r2 = (x3 - x2); 389 t = fMult((r1 - r3), C54); 390 r1 = (r1 + r3); 391 y02 = (x0 + r1); 392 r1 = (y02 + ((fMult(r1, C55) << 1))); 393 r3 = (r1 - t); 394 r1 = (r1 + t); 395 t = fMult(((r4 + r2)), C51); 396 r4 = (t + (fMult(r4, C52) << 1)); 397 r2 = (t + fMult(r2, C53)); 398 399 /* imaginary part */ 400 x0 = (x[s * 10 + 1] >> SCALEFACTOR10); 401 x1 = (x[s * 2 + 1] >> SCALEFACTOR10); 402 x2 = (x[s * 6 + 1] >> SCALEFACTOR10); 403 x3 = (x[s * 14 + 1] >> SCALEFACTOR10); 404 x4 = (x[s * 18 + 1] >> SCALEFACTOR10); 405 406 s1 = (x1 + x4); 407 s4 = (x1 - x4); 408 s3 = (x3 + x2); 409 s2 = (x3 - x2); 410 t = fMult((s1 - s3), C54); 411 s1 = (s1 + s3); 412 y03 = (x0 + s1); 413 s1 = (y03 + (fMult(s1, C55) << 1)); 414 s3 = (s1 - t); 415 s1 = (s1 + t); 416 t = fMult((s4 + s2), C51); 417 s4 = (t + (fMult(s4, C52) << 1)); 418 s2 = (t + fMult(s2, C53)); 419 420 /* combination */ 421 y06 = (r1 + s2); 422 y18 = (r1 - s2); 423 y10 = (r3 - s4); 424 y14 = (r3 + s4); 425 426 y07 = (s1 - r2); 427 y19 = (s1 + r2); 428 y11 = (s3 + r4); 429 y15 = (s3 - r4); 430 431 /* 5 fft2 stages */ 432 x[s * 0] = (y00 + y02); 433 x[s * 0 + 1] = (y01 + y03); 434 x[s * 10] = (y00 - y02); 435 x[s * 10 + 1] = (y01 - y03); 436 437 x[s * 4] = (y04 + y06); 438 x[s * 4 + 1] = (y05 + y07); 439 x[s * 14] = (y04 - y06); 440 x[s * 14 + 1] = (y05 - y07); 441 442 x[s * 8] = (y08 + y10); 443 x[s * 8 + 1] = (y09 + y11); 444 x[s * 18] = (y08 - y10); 445 x[s * 18 + 1] = (y09 - y11); 446 447 x[s * 12] = (y12 + y14); 448 x[s * 12 + 1] = (y13 + y15); 449 x[s * 2] = (y12 - y14); 450 x[s * 2 + 1] = (y13 - y15); 451 452 x[s * 16] = (y16 + y18); 453 x[s * 16 + 1] = (y17 + y19); 454 x[s * 6] = (y16 - y18); 455 x[s * 6 + 1] = (y17 - y19); 456 } 457 458 #ifndef FUNCTION_fft12 459 #define FUNCTION_fft12 460 461 #undef C31 462 #define C31 (STC(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) = -sqrt(3)/2 */ 463 464 static inline void fft12(FIXP_DBL *pInput) { 465 FIXP_DBL aDst[24]; 466 FIXP_DBL *pSrc, *pDst; 467 int i; 468 469 pSrc = pInput; 470 pDst = aDst; 471 FIXP_DBL r1, r2, s1, s2, pD; 472 473 /* First 3*2 samples are shifted right by 2 before output */ 474 r1 = pSrc[8] + pSrc[16]; 475 r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); 476 pD = pSrc[0] >> 1; 477 pDst[0] = (pD + (r1 >> 1)) >> 1; 478 r1 = pD - (r1 >> 2); 479 480 /* imaginary part */ 481 s1 = pSrc[9] + pSrc[17]; 482 s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); 483 pD = pSrc[1] >> 1; 484 pDst[1] = (pD + (s1 >> 1)) >> 1; 485 s1 = pD - (s1 >> 2); 486 487 /* combination */ 488 pDst[2] = (r1 - s2) >> 1; 489 pDst[3] = (s1 + r2) >> 1; 490 pDst[4] = (r1 + s2) >> 1; 491 pDst[5] = (s1 - r2) >> 1; 492 pSrc += 2; 493 pDst += 6; 494 495 const FIXP_STB *pVecRe = RotVectorReal12; 496 const FIXP_STB *pVecIm = RotVectorImag12; 497 FIXP_DBL re, im; 498 FIXP_STB vre, vim; 499 for (i = 0; i < 2; i++) { 500 /* sample 0,1 are shifted right by 2 before output */ 501 /* sample 2,3 4,5 are shifted right by 1 and complex multiplied before 502 * output */ 503 504 r1 = pSrc[8] + pSrc[16]; 505 r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); 506 pD = pSrc[0] >> 1; 507 pDst[0] = (pD + (r1 >> 1)) >> 1; 508 r1 = pD - (r1 >> 2); 509 510 /* imaginary part */ 511 s1 = pSrc[9] + pSrc[17]; 512 s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); 513 pD = pSrc[1] >> 1; 514 pDst[1] = (pD + (s1 >> 1)) >> 1; 515 s1 = pD - (s1 >> 2); 516 517 /* combination */ 518 re = (r1 - s2) >> 0; 519 im = (s1 + r2) >> 0; 520 vre = *pVecRe++; 521 vim = *pVecIm++; 522 cplxMultDiv2(&pDst[3], &pDst[2], im, re, vre, vim); 523 524 re = (r1 + s2) >> 0; 525 im = (s1 - r2) >> 0; 526 vre = *pVecRe++; 527 vim = *pVecIm++; 528 cplxMultDiv2(&pDst[5], &pDst[4], im, re, vre, vim); 529 530 pDst += 6; 531 pSrc += 2; 532 } 533 /* sample 0,1 are shifted right by 2 before output */ 534 /* sample 2,3 is shifted right by 1 and complex multiplied with (0.0,+1.0) */ 535 /* sample 4,5 is shifted right by 1 and complex multiplied with (-1.0,0.0) */ 536 r1 = pSrc[8] + pSrc[16]; 537 r2 = fMultDiv2((pSrc[8] - pSrc[16]), C31); 538 pD = pSrc[0] >> 1; 539 pDst[0] = (pD + (r1 >> 1)) >> 1; 540 r1 = pD - (r1 >> 2); 541 542 /* imaginary part */ 543 s1 = pSrc[9] + pSrc[17]; 544 s2 = fMultDiv2((pSrc[9] - pSrc[17]), C31); 545 pD = pSrc[1] >> 1; 546 pDst[1] = (pD + (s1 >> 1)) >> 1; 547 s1 = pD - (s1 >> 2); 548 549 /* combination */ 550 pDst[2] = (s1 + r2) >> 1; 551 pDst[3] = (s2 - r1) >> 1; 552 pDst[4] = -((r1 + s2) >> 1); 553 pDst[5] = (r2 - s1) >> 1; 554 555 /* Perform 3 times the fft of length 4. The input samples are at the address 556 of aDst and the output samples are at the address of pInput. The input vector 557 for the fft of length 4 is built of the interleaved samples in aDst, the 558 output samples are stored consecutively at the address of pInput. 559 */ 560 pSrc = aDst; 561 pDst = pInput; 562 for (i = 0; i < 3; i++) { 563 /* inline FFT4 merged with incoming resorting loop */ 564 FIXP_DBL a00, a10, a20, a30, tmp0, tmp1; 565 566 a00 = (pSrc[0] + pSrc[12]) >> 1; /* Re A + Re B */ 567 a10 = (pSrc[6] + pSrc[18]) >> 1; /* Re C + Re D */ 568 a20 = (pSrc[1] + pSrc[13]) >> 1; /* Im A + Im B */ 569 a30 = (pSrc[7] + pSrc[19]) >> 1; /* Im C + Im D */ 570 571 pDst[0] = a00 + a10; /* Re A' = Re A + Re B + Re C + Re D */ 572 pDst[1] = a20 + a30; /* Im A' = Im A + Im B + Im C + Im D */ 573 574 tmp0 = a00 - pSrc[12]; /* Re A - Re B */ 575 tmp1 = a20 - pSrc[13]; /* Im A - Im B */ 576 577 pDst[12] = a00 - a10; /* Re C' = Re A + Re B - Re C - Re D */ 578 pDst[13] = a20 - a30; /* Im C' = Im A + Im B - Im C - Im D */ 579 580 a10 = a10 - pSrc[18]; /* Re C - Re D */ 581 a30 = a30 - pSrc[19]; /* Im C - Im D */ 582 583 pDst[6] = tmp0 + a30; /* Re B' = Re A - Re B + Im C - Im D */ 584 pDst[18] = tmp0 - a30; /* Re D' = Re A - Re B - Im C + Im D */ 585 pDst[7] = tmp1 - a10; /* Im B' = Im A - Im B - Re C + Re D */ 586 pDst[19] = tmp1 + a10; /* Im D' = Im A - Im B + Re C - Re D */ 587 588 pSrc += 2; 589 pDst += 2; 590 } 591 } 592 #endif /* FUNCTION_fft12 */ 593 594 #ifndef FUNCTION_fft15 595 596 #define N3 3 597 #define N5 5 598 #define N6 6 599 #define N15 15 600 601 /* Performs the FFT of length 15. It is split into FFTs of length 3 and 602 * length 5. */ 603 static inline void fft15(FIXP_DBL *pInput) { 604 FIXP_DBL aDst[2 * N15]; 605 FIXP_DBL aDst1[2 * N15]; 606 int i, k, l; 607 608 /* Sort input vector for fft's of length 3 609 input3(0:2) = [input(0) input(5) input(10)]; 610 input3(3:5) = [input(3) input(8) input(13)]; 611 input3(6:8) = [input(6) input(11) input(1)]; 612 input3(9:11) = [input(9) input(14) input(4)]; 613 input3(12:14) = [input(12) input(2) input(7)]; */ 614 { 615 const FIXP_DBL *pSrc = pInput; 616 FIXP_DBL *RESTRICT pDst = aDst; 617 /* Merge 3 loops into one, skip call of fft3 */ 618 for (i = 0, l = 0, k = 0; i < N5; i++, k += 6) { 619 pDst[k + 0] = pSrc[l]; 620 pDst[k + 1] = pSrc[l + 1]; 621 l += 2 * N5; 622 if (l >= (2 * N15)) l -= (2 * N15); 623 624 pDst[k + 2] = pSrc[l]; 625 pDst[k + 3] = pSrc[l + 1]; 626 l += 2 * N5; 627 if (l >= (2 * N15)) l -= (2 * N15); 628 pDst[k + 4] = pSrc[l]; 629 pDst[k + 5] = pSrc[l + 1]; 630 l += (2 * N5) + (2 * N3); 631 if (l >= (2 * N15)) l -= (2 * N15); 632 633 /* fft3 merged with shift right by 2 loop */ 634 FIXP_DBL r1, r2, r3; 635 FIXP_DBL s1, s2; 636 /* real part */ 637 r1 = pDst[k + 2] + pDst[k + 4]; 638 r2 = fMult((pDst[k + 2] - pDst[k + 4]), C31); 639 s1 = pDst[k + 0]; 640 pDst[k + 0] = (s1 + r1) >> 2; 641 r1 = s1 - (r1 >> 1); 642 643 /* imaginary part */ 644 s1 = pDst[k + 3] + pDst[k + 5]; 645 s2 = fMult((pDst[k + 3] - pDst[k + 5]), C31); 646 r3 = pDst[k + 1]; 647 pDst[k + 1] = (r3 + s1) >> 2; 648 s1 = r3 - (s1 >> 1); 649 650 /* combination */ 651 pDst[k + 2] = (r1 - s2) >> 2; 652 pDst[k + 4] = (r1 + s2) >> 2; 653 pDst[k + 3] = (s1 + r2) >> 2; 654 pDst[k + 5] = (s1 - r2) >> 2; 655 } 656 } 657 /* Sort input vector for fft's of length 5 658 input5(0:4) = [output3(0) output3(3) output3(6) output3(9) output3(12)]; 659 input5(5:9) = [output3(1) output3(4) output3(7) output3(10) output3(13)]; 660 input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */ 661 /* Merge 2 loops into one, brings about 10% */ 662 { 663 const FIXP_DBL *pSrc = aDst; 664 FIXP_DBL *RESTRICT pDst = aDst1; 665 for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) { 666 l = 2 * i; 667 pDst[k + 0] = pSrc[l + 0]; 668 pDst[k + 1] = pSrc[l + 1]; 669 pDst[k + 2] = pSrc[l + 0 + (2 * N3)]; 670 pDst[k + 3] = pSrc[l + 1 + (2 * N3)]; 671 pDst[k + 4] = pSrc[l + 0 + (4 * N3)]; 672 pDst[k + 5] = pSrc[l + 1 + (4 * N3)]; 673 pDst[k + 6] = pSrc[l + 0 + (6 * N3)]; 674 pDst[k + 7] = pSrc[l + 1 + (6 * N3)]; 675 pDst[k + 8] = pSrc[l + 0 + (8 * N3)]; 676 pDst[k + 9] = pSrc[l + 1 + (8 * N3)]; 677 fft5(&pDst[k]); 678 } 679 } 680 /* Sort output vector of length 15 681 output = [out5(0) out5(6) out5(12) out5(3) out5(9) 682 out5(10) out5(1) out5(7) out5(13) out5(4) 683 out5(5) out5(11) out5(2) out5(8) out5(14)]; */ 684 /* optimize clumsy loop, brings about 5% */ 685 { 686 const FIXP_DBL *pSrc = aDst1; 687 FIXP_DBL *RESTRICT pDst = pInput; 688 for (i = 0, l = 0, k = 0; i < N3; i++, k += 10) { 689 pDst[k + 0] = pSrc[l]; 690 pDst[k + 1] = pSrc[l + 1]; 691 l += (2 * N6); 692 if (l >= (2 * N15)) l -= (2 * N15); 693 pDst[k + 2] = pSrc[l]; 694 pDst[k + 3] = pSrc[l + 1]; 695 l += (2 * N6); 696 if (l >= (2 * N15)) l -= (2 * N15); 697 pDst[k + 4] = pSrc[l]; 698 pDst[k + 5] = pSrc[l + 1]; 699 l += (2 * N6); 700 if (l >= (2 * N15)) l -= (2 * N15); 701 pDst[k + 6] = pSrc[l]; 702 pDst[k + 7] = pSrc[l + 1]; 703 l += (2 * N6); 704 if (l >= (2 * N15)) l -= (2 * N15); 705 pDst[k + 8] = pSrc[l]; 706 pDst[k + 9] = pSrc[l + 1]; 707 l += 2; /* no modulo check needed, it cannot occur */ 708 } 709 } 710 } 711 #endif /* FUNCTION_fft15 */ 712 713 /* 714 Select shift placement. 715 Some processors like ARM may shift "for free" in combination with an addition 716 or substraction, but others don't so either combining shift with +/- or reduce 717 the total amount or shift operations is optimal 718 */ 719 #if !defined(__arm__) 720 #define SHIFT_A >> 1 721 #define SHIFT_B 722 #else 723 #define SHIFT_A 724 #define SHIFT_B >> 1 725 #endif 726 727 #ifndef FUNCTION_fft_16 /* we check, if fft_16 (FIXP_DBL *) is not yet defined \ 728 */ 729 730 /* This defines prevents this array to be declared twice, if 16-bit fft is 731 * enabled too */ 732 #define FUNCTION_DATA_fft_16_w16 733 static const FIXP_STP fft16_w16[2] = {STCP(0x7641af3d, 0x30fbc54d), 734 STCP(0x30fbc54d, 0x7641af3d)}; 735 736 LNK_SECTION_CODE_L1 737 inline void fft_16(FIXP_DBL *RESTRICT x) { 738 FIXP_DBL vr, ur; 739 FIXP_DBL vr2, ur2; 740 FIXP_DBL vr3, ur3; 741 FIXP_DBL vr4, ur4; 742 FIXP_DBL vi, ui; 743 FIXP_DBL vi2, ui2; 744 FIXP_DBL vi3, ui3; 745 746 vr = (x[0] >> 1) + (x[16] >> 1); /* Re A + Re B */ 747 ur = (x[1] >> 1) + (x[17] >> 1); /* Im A + Im B */ 748 vi = (x[8] SHIFT_A) + (x[24] SHIFT_A); /* Re C + Re D */ 749 ui = (x[9] SHIFT_A) + (x[25] SHIFT_A); /* Im C + Im D */ 750 x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 751 x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ 752 753 vr2 = (x[4] >> 1) + (x[20] >> 1); /* Re A + Re B */ 754 ur2 = (x[5] >> 1) + (x[21] >> 1); /* Im A + Im B */ 755 756 x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 757 x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 758 vr -= x[16]; /* Re A - Re B */ 759 vi = (vi SHIFT_B)-x[24]; /* Re C - Re D */ 760 ur -= x[17]; /* Im A - Im B */ 761 ui = (ui SHIFT_B)-x[25]; /* Im C - Im D */ 762 763 vr3 = (x[2] >> 1) + (x[18] >> 1); /* Re A + Re B */ 764 ur3 = (x[3] >> 1) + (x[19] >> 1); /* Im A + Im B */ 765 766 x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 767 x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 768 769 vr4 = (x[6] >> 1) + (x[22] >> 1); /* Re A + Re B */ 770 ur4 = (x[7] >> 1) + (x[23] >> 1); /* Im A + Im B */ 771 772 x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 773 x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 774 775 vi2 = (x[12] SHIFT_A) + (x[28] SHIFT_A); /* Re C + Re D */ 776 ui2 = (x[13] SHIFT_A) + (x[29] SHIFT_A); /* Im C + Im D */ 777 x[8] = vr2 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 778 x[9] = ur2 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ 779 x[12] = vr2 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 780 x[13] = ur2 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 781 vr2 -= x[20]; /* Re A - Re B */ 782 ur2 -= x[21]; /* Im A - Im B */ 783 vi2 = (vi2 SHIFT_B)-x[28]; /* Re C - Re D */ 784 ui2 = (ui2 SHIFT_B)-x[29]; /* Im C - Im D */ 785 786 vi = (x[10] SHIFT_A) + (x[26] SHIFT_A); /* Re C + Re D */ 787 ui = (x[11] SHIFT_A) + (x[27] SHIFT_A); /* Im C + Im D */ 788 789 x[10] = ui2 + vr2; /* Re B' = Im C - Im D + Re A - Re B */ 790 x[11] = ur2 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ 791 792 vi3 = (x[14] SHIFT_A) + (x[30] SHIFT_A); /* Re C + Re D */ 793 ui3 = (x[15] SHIFT_A) + (x[31] SHIFT_A); /* Im C + Im D */ 794 795 x[14] = vr2 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ 796 x[15] = vi2 + ur2; /* Im D'= Re C - Re D + Im A - Im B */ 797 798 x[16] = vr3 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 799 x[17] = ur3 + (ui SHIFT_B); /* Im A' = sum of imag values */ 800 x[20] = vr3 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 801 x[21] = ur3 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 802 vr3 -= x[18]; /* Re A - Re B */ 803 ur3 -= x[19]; /* Im A - Im B */ 804 vi = (vi SHIFT_B)-x[26]; /* Re C - Re D */ 805 ui = (ui SHIFT_B)-x[27]; /* Im C - Im D */ 806 x[18] = ui + vr3; /* Re B' = Im C - Im D + Re A - Re B */ 807 x[19] = ur3 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 808 809 x[24] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 810 x[28] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 811 x[25] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ 812 x[29] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 813 vr4 -= x[22]; /* Re A - Re B */ 814 ur4 -= x[23]; /* Im A - Im B */ 815 816 x[22] = vr3 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 817 x[23] = vi + ur3; /* Im D'= Re C - Re D + Im A - Im B */ 818 819 vi3 = (vi3 SHIFT_B)-x[30]; /* Re C - Re D */ 820 ui3 = (ui3 SHIFT_B)-x[31]; /* Im C - Im D */ 821 x[26] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ 822 x[30] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ 823 x[27] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ 824 x[31] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ 825 826 // xt1 = 0 827 // xt2 = 8 828 vr = x[8]; 829 vi = x[9]; 830 ur = x[0] >> 1; 831 ui = x[1] >> 1; 832 x[0] = ur + (vr >> 1); 833 x[1] = ui + (vi >> 1); 834 x[8] = ur - (vr >> 1); 835 x[9] = ui - (vi >> 1); 836 837 // xt1 = 4 838 // xt2 = 12 839 vr = x[13]; 840 vi = x[12]; 841 ur = x[4] >> 1; 842 ui = x[5] >> 1; 843 x[4] = ur + (vr >> 1); 844 x[5] = ui - (vi >> 1); 845 x[12] = ur - (vr >> 1); 846 x[13] = ui + (vi >> 1); 847 848 // xt1 = 16 849 // xt2 = 24 850 vr = x[24]; 851 vi = x[25]; 852 ur = x[16] >> 1; 853 ui = x[17] >> 1; 854 x[16] = ur + (vr >> 1); 855 x[17] = ui + (vi >> 1); 856 x[24] = ur - (vr >> 1); 857 x[25] = ui - (vi >> 1); 858 859 // xt1 = 20 860 // xt2 = 28 861 vr = x[29]; 862 vi = x[28]; 863 ur = x[20] >> 1; 864 ui = x[21] >> 1; 865 x[20] = ur + (vr >> 1); 866 x[21] = ui - (vi >> 1); 867 x[28] = ur - (vr >> 1); 868 x[29] = ui + (vi >> 1); 869 870 // xt1 = 2 871 // xt2 = 10 872 SUMDIFF_PIFOURTH(vi, vr, x[10], x[11]) 873 // vr = fMultDiv2((x[11] + x[10]),W_PiFOURTH); 874 // vi = fMultDiv2((x[11] - x[10]),W_PiFOURTH); 875 ur = x[2]; 876 ui = x[3]; 877 x[2] = (ur >> 1) + vr; 878 x[3] = (ui >> 1) + vi; 879 x[10] = (ur >> 1) - vr; 880 x[11] = (ui >> 1) - vi; 881 882 // xt1 = 6 883 // xt2 = 14 884 SUMDIFF_PIFOURTH(vr, vi, x[14], x[15]) 885 ur = x[6]; 886 ui = x[7]; 887 x[6] = (ur >> 1) + vr; 888 x[7] = (ui >> 1) - vi; 889 x[14] = (ur >> 1) - vr; 890 x[15] = (ui >> 1) + vi; 891 892 // xt1 = 18 893 // xt2 = 26 894 SUMDIFF_PIFOURTH(vi, vr, x[26], x[27]) 895 ur = x[18]; 896 ui = x[19]; 897 x[18] = (ur >> 1) + vr; 898 x[19] = (ui >> 1) + vi; 899 x[26] = (ur >> 1) - vr; 900 x[27] = (ui >> 1) - vi; 901 902 // xt1 = 22 903 // xt2 = 30 904 SUMDIFF_PIFOURTH(vr, vi, x[30], x[31]) 905 ur = x[22]; 906 ui = x[23]; 907 x[22] = (ur >> 1) + vr; 908 x[23] = (ui >> 1) - vi; 909 x[30] = (ur >> 1) - vr; 910 x[31] = (ui >> 1) + vi; 911 912 // xt1 = 0 913 // xt2 = 16 914 vr = x[16]; 915 vi = x[17]; 916 ur = x[0] >> 1; 917 ui = x[1] >> 1; 918 x[0] = ur + (vr >> 1); 919 x[1] = ui + (vi >> 1); 920 x[16] = ur - (vr >> 1); 921 x[17] = ui - (vi >> 1); 922 923 // xt1 = 8 924 // xt2 = 24 925 vi = x[24]; 926 vr = x[25]; 927 ur = x[8] >> 1; 928 ui = x[9] >> 1; 929 x[8] = ur + (vr >> 1); 930 x[9] = ui - (vi >> 1); 931 x[24] = ur - (vr >> 1); 932 x[25] = ui + (vi >> 1); 933 934 // xt1 = 2 935 // xt2 = 18 936 cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]); 937 ur = x[2]; 938 ui = x[3]; 939 x[2] = (ur >> 1) + vr; 940 x[3] = (ui >> 1) + vi; 941 x[18] = (ur >> 1) - vr; 942 x[19] = (ui >> 1) - vi; 943 944 // xt1 = 10 945 // xt2 = 26 946 cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]); 947 ur = x[10]; 948 ui = x[11]; 949 x[10] = (ur >> 1) + vr; 950 x[11] = (ui >> 1) - vi; 951 x[26] = (ur >> 1) - vr; 952 x[27] = (ui >> 1) + vi; 953 954 // xt1 = 4 955 // xt2 = 20 956 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) 957 ur = x[4]; 958 ui = x[5]; 959 x[4] = (ur >> 1) + vr; 960 x[5] = (ui >> 1) + vi; 961 x[20] = (ur >> 1) - vr; 962 x[21] = (ui >> 1) - vi; 963 964 // xt1 = 12 965 // xt2 = 28 966 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) 967 ur = x[12]; 968 ui = x[13]; 969 x[12] = (ur >> 1) + vr; 970 x[13] = (ui >> 1) - vi; 971 x[28] = (ur >> 1) - vr; 972 x[29] = (ui >> 1) + vi; 973 974 // xt1 = 6 975 // xt2 = 22 976 cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]); 977 ur = x[6]; 978 ui = x[7]; 979 x[6] = (ur >> 1) + vr; 980 x[7] = (ui >> 1) + vi; 981 x[22] = (ur >> 1) - vr; 982 x[23] = (ui >> 1) - vi; 983 984 // xt1 = 14 985 // xt2 = 30 986 cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]); 987 ur = x[14]; 988 ui = x[15]; 989 x[14] = (ur >> 1) + vr; 990 x[15] = (ui >> 1) - vi; 991 x[30] = (ur >> 1) - vr; 992 x[31] = (ui >> 1) + vi; 993 } 994 #endif /* FUNCTION_fft_16 */ 995 996 #ifndef FUNCTION_fft_32 997 static const FIXP_STP fft32_w32[6] = { 998 STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), 999 STCP(0x7d8a5f40, 0x18f8b83c), STCP(0x6a6d98a4, 0x471cece7), 1000 STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40)}; 1001 #define W_PiFOURTH STC(0x5a82799a) 1002 1003 LNK_SECTION_CODE_L1 1004 inline void fft_32(FIXP_DBL *const _x) { 1005 /* 1006 * 1+2 stage radix 4 1007 */ 1008 1009 ///////////////////////////////////////////////////////////////////////////////////////// 1010 { 1011 FIXP_DBL *const x = _x; 1012 FIXP_DBL vi, ui; 1013 FIXP_DBL vi2, ui2; 1014 FIXP_DBL vi3, ui3; 1015 FIXP_DBL vr, ur; 1016 FIXP_DBL vr2, ur2; 1017 FIXP_DBL vr3, ur3; 1018 FIXP_DBL vr4, ur4; 1019 1020 // i = 0 1021 vr = (x[0] + x[32]) >> 1; /* Re A + Re B */ 1022 ur = (x[1] + x[33]) >> 1; /* Im A + Im B */ 1023 vi = (x[16] + x[48]) SHIFT_A; /* Re C + Re D */ 1024 ui = (x[17] + x[49]) SHIFT_A; /* Im C + Im D */ 1025 1026 x[0] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1027 x[1] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ 1028 1029 vr2 = (x[4] + x[36]) >> 1; /* Re A + Re B */ 1030 ur2 = (x[5] + x[37]) >> 1; /* Im A + Im B */ 1031 1032 x[4] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1033 x[5] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1034 1035 vr -= x[32]; /* Re A - Re B */ 1036 ur -= x[33]; /* Im A - Im B */ 1037 vi = (vi SHIFT_B)-x[48]; /* Re C - Re D */ 1038 ui = (ui SHIFT_B)-x[49]; /* Im C - Im D */ 1039 1040 vr3 = (x[2] + x[34]) >> 1; /* Re A + Re B */ 1041 ur3 = (x[3] + x[35]) >> 1; /* Im A + Im B */ 1042 1043 x[2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 1044 x[3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 1045 1046 vr4 = (x[6] + x[38]) >> 1; /* Re A + Re B */ 1047 ur4 = (x[7] + x[39]) >> 1; /* Im A + Im B */ 1048 1049 x[6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 1050 x[7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 1051 1052 // i=16 1053 vi = (x[20] + x[52]) SHIFT_A; /* Re C + Re D */ 1054 ui = (x[21] + x[53]) SHIFT_A; /* Im C + Im D */ 1055 1056 x[16] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1057 x[17] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */ 1058 x[20] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1059 x[21] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1060 1061 vr2 -= x[36]; /* Re A - Re B */ 1062 ur2 -= x[37]; /* Im A - Im B */ 1063 vi = (vi SHIFT_B)-x[52]; /* Re C - Re D */ 1064 ui = (ui SHIFT_B)-x[53]; /* Im C - Im D */ 1065 1066 vi2 = (x[18] + x[50]) SHIFT_A; /* Re C + Re D */ 1067 ui2 = (x[19] + x[51]) SHIFT_A; /* Im C + Im D */ 1068 1069 x[18] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */ 1070 x[19] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 1071 1072 vi3 = (x[22] + x[54]) SHIFT_A; /* Re C + Re D */ 1073 ui3 = (x[23] + x[55]) SHIFT_A; /* Im C + Im D */ 1074 1075 x[22] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 1076 x[23] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */ 1077 1078 // i = 32 1079 1080 x[32] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1081 x[33] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ 1082 x[36] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1083 x[37] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1084 1085 vr3 -= x[34]; /* Re A - Re B */ 1086 ur3 -= x[35]; /* Im A - Im B */ 1087 vi2 = (vi2 SHIFT_B)-x[50]; /* Re C - Re D */ 1088 ui2 = (ui2 SHIFT_B)-x[51]; /* Im C - Im D */ 1089 1090 x[34] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */ 1091 x[35] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ 1092 1093 // i=48 1094 1095 x[48] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1096 x[52] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1097 x[49] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ 1098 x[53] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1099 1100 vr4 -= x[38]; /* Re A - Re B */ 1101 ur4 -= x[39]; /* Im A - Im B */ 1102 1103 x[38] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ 1104 x[39] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */ 1105 1106 vi3 = (vi3 SHIFT_B)-x[54]; /* Re C - Re D */ 1107 ui3 = (ui3 SHIFT_B)-x[55]; /* Im C - Im D */ 1108 1109 x[50] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ 1110 x[54] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ 1111 x[51] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ 1112 x[55] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ 1113 1114 // i=8 1115 vr = (x[8] + x[40]) >> 1; /* Re A + Re B */ 1116 ur = (x[9] + x[41]) >> 1; /* Im A + Im B */ 1117 vi = (x[24] + x[56]) SHIFT_A; /* Re C + Re D */ 1118 ui = (x[25] + x[57]) SHIFT_A; /* Im C + Im D */ 1119 1120 x[8] = vr + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1121 x[9] = ur + (ui SHIFT_B); /* Im A' = sum of imag values */ 1122 1123 vr2 = (x[12] + x[44]) >> 1; /* Re A + Re B */ 1124 ur2 = (x[13] + x[45]) >> 1; /* Im A + Im B */ 1125 1126 x[12] = vr - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1127 x[13] = ur - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1128 1129 vr -= x[40]; /* Re A - Re B */ 1130 ur -= x[41]; /* Im A - Im B */ 1131 vi = (vi SHIFT_B)-x[56]; /* Re C - Re D */ 1132 ui = (ui SHIFT_B)-x[57]; /* Im C - Im D */ 1133 1134 vr3 = (x[10] + x[42]) >> 1; /* Re A + Re B */ 1135 ur3 = (x[11] + x[43]) >> 1; /* Im A + Im B */ 1136 1137 x[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 1138 x[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 1139 1140 vr4 = (x[14] + x[46]) >> 1; /* Re A + Re B */ 1141 ur4 = (x[15] + x[47]) >> 1; /* Im A + Im B */ 1142 1143 x[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 1144 x[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 1145 1146 // i=24 1147 vi = (x[28] + x[60]) SHIFT_A; /* Re C + Re D */ 1148 ui = (x[29] + x[61]) SHIFT_A; /* Im C + Im D */ 1149 1150 x[24] = vr2 + (vi SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1151 x[28] = vr2 - (vi SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1152 x[25] = ur2 + (ui SHIFT_B); /* Im A' = sum of imag values */ 1153 x[29] = ur2 - (ui SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1154 1155 vr2 -= x[44]; /* Re A - Re B */ 1156 ur2 -= x[45]; /* Im A - Im B */ 1157 vi = (vi SHIFT_B)-x[60]; /* Re C - Re D */ 1158 ui = (ui SHIFT_B)-x[61]; /* Im C - Im D */ 1159 1160 vi2 = (x[26] + x[58]) SHIFT_A; /* Re C + Re D */ 1161 ui2 = (x[27] + x[59]) SHIFT_A; /* Im C + Im D */ 1162 1163 x[26] = ui + vr2; /* Re B' = Im C - Im D + Re A - Re B */ 1164 x[27] = ur2 - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 1165 1166 vi3 = (x[30] + x[62]) SHIFT_A; /* Re C + Re D */ 1167 ui3 = (x[31] + x[63]) SHIFT_A; /* Im C + Im D */ 1168 1169 x[30] = vr2 - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 1170 x[31] = vi + ur2; /* Im D'= Re C - Re D + Im A - Im B */ 1171 1172 // i=40 1173 1174 x[40] = vr3 + (vi2 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1175 x[44] = vr3 - (vi2 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1176 x[41] = ur3 + (ui2 SHIFT_B); /* Im A' = sum of imag values */ 1177 x[45] = ur3 - (ui2 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1178 1179 vr3 -= x[42]; /* Re A - Re B */ 1180 ur3 -= x[43]; /* Im A - Im B */ 1181 vi2 = (vi2 SHIFT_B)-x[58]; /* Re C - Re D */ 1182 ui2 = (ui2 SHIFT_B)-x[59]; /* Im C - Im D */ 1183 1184 x[42] = ui2 + vr3; /* Re B' = Im C - Im D + Re A - Re B */ 1185 x[43] = ur3 - vi2; /* Im B'= -Re C + Re D + Im A - Im B */ 1186 1187 // i=56 1188 1189 x[56] = vr4 + (vi3 SHIFT_B); /* Re A' = ReA + ReB +ReC + ReD */ 1190 x[60] = vr4 - (vi3 SHIFT_B); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 1191 x[57] = ur4 + (ui3 SHIFT_B); /* Im A' = sum of imag values */ 1192 x[61] = ur4 - (ui3 SHIFT_B); /* Im C' = -Im C -Im D +Im A +Im B */ 1193 1194 vr4 -= x[46]; /* Re A - Re B */ 1195 ur4 -= x[47]; /* Im A - Im B */ 1196 1197 x[46] = vr3 - ui2; /* Re D' = -Im C + Im D + Re A - Re B */ 1198 x[47] = vi2 + ur3; /* Im D'= Re C - Re D + Im A - Im B */ 1199 1200 vi3 = (vi3 SHIFT_B)-x[62]; /* Re C - Re D */ 1201 ui3 = (ui3 SHIFT_B)-x[63]; /* Im C - Im D */ 1202 1203 x[58] = ui3 + vr4; /* Re B' = Im C - Im D + Re A - Re B */ 1204 x[62] = vr4 - ui3; /* Re D' = -Im C + Im D + Re A - Re B */ 1205 x[59] = ur4 - vi3; /* Im B'= -Re C + Re D + Im A - Im B */ 1206 x[63] = vi3 + ur4; /* Im D'= Re C - Re D + Im A - Im B */ 1207 } 1208 1209 { 1210 FIXP_DBL *xt = _x; 1211 1212 int j = 4; 1213 do { 1214 FIXP_DBL vi, ui, vr, ur; 1215 1216 vr = xt[8]; 1217 vi = xt[9]; 1218 ur = xt[0] >> 1; 1219 ui = xt[1] >> 1; 1220 xt[0] = ur + (vr >> 1); 1221 xt[1] = ui + (vi >> 1); 1222 xt[8] = ur - (vr >> 1); 1223 xt[9] = ui - (vi >> 1); 1224 1225 vr = xt[13]; 1226 vi = xt[12]; 1227 ur = xt[4] >> 1; 1228 ui = xt[5] >> 1; 1229 xt[4] = ur + (vr >> 1); 1230 xt[5] = ui - (vi >> 1); 1231 xt[12] = ur - (vr >> 1); 1232 xt[13] = ui + (vi >> 1); 1233 1234 SUMDIFF_PIFOURTH(vi, vr, xt[10], xt[11]) 1235 ur = xt[2]; 1236 ui = xt[3]; 1237 xt[2] = (ur >> 1) + vr; 1238 xt[3] = (ui >> 1) + vi; 1239 xt[10] = (ur >> 1) - vr; 1240 xt[11] = (ui >> 1) - vi; 1241 1242 SUMDIFF_PIFOURTH(vr, vi, xt[14], xt[15]) 1243 ur = xt[6]; 1244 ui = xt[7]; 1245 1246 xt[6] = (ur >> 1) + vr; 1247 xt[7] = (ui >> 1) - vi; 1248 xt[14] = (ur >> 1) - vr; 1249 xt[15] = (ui >> 1) + vi; 1250 xt += 16; 1251 } while (--j != 0); 1252 } 1253 1254 { 1255 FIXP_DBL *const x = _x; 1256 FIXP_DBL vi, ui, vr, ur; 1257 1258 vr = x[16]; 1259 vi = x[17]; 1260 ur = x[0] >> 1; 1261 ui = x[1] >> 1; 1262 x[0] = ur + (vr >> 1); 1263 x[1] = ui + (vi >> 1); 1264 x[16] = ur - (vr >> 1); 1265 x[17] = ui - (vi >> 1); 1266 1267 vi = x[24]; 1268 vr = x[25]; 1269 ur = x[8] >> 1; 1270 ui = x[9] >> 1; 1271 x[8] = ur + (vr >> 1); 1272 x[9] = ui - (vi >> 1); 1273 x[24] = ur - (vr >> 1); 1274 x[25] = ui + (vi >> 1); 1275 1276 vr = x[48]; 1277 vi = x[49]; 1278 ur = x[32] >> 1; 1279 ui = x[33] >> 1; 1280 x[32] = ur + (vr >> 1); 1281 x[33] = ui + (vi >> 1); 1282 x[48] = ur - (vr >> 1); 1283 x[49] = ui - (vi >> 1); 1284 1285 vi = x[56]; 1286 vr = x[57]; 1287 ur = x[40] >> 1; 1288 ui = x[41] >> 1; 1289 x[40] = ur + (vr >> 1); 1290 x[41] = ui - (vi >> 1); 1291 x[56] = ur - (vr >> 1); 1292 x[57] = ui + (vi >> 1); 1293 1294 cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]); 1295 ur = x[2]; 1296 ui = x[3]; 1297 x[2] = (ur >> 1) + vr; 1298 x[3] = (ui >> 1) + vi; 1299 x[18] = (ur >> 1) - vr; 1300 x[19] = (ui >> 1) - vi; 1301 1302 cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]); 1303 ur = x[10]; 1304 ui = x[11]; 1305 x[10] = (ur >> 1) + vr; 1306 x[11] = (ui >> 1) - vi; 1307 x[26] = (ur >> 1) - vr; 1308 x[27] = (ui >> 1) + vi; 1309 1310 cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]); 1311 ur = x[34]; 1312 ui = x[35]; 1313 x[34] = (ur >> 1) + vr; 1314 x[35] = (ui >> 1) + vi; 1315 x[50] = (ur >> 1) - vr; 1316 x[51] = (ui >> 1) - vi; 1317 1318 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]); 1319 ur = x[42]; 1320 ui = x[43]; 1321 x[42] = (ur >> 1) + vr; 1322 x[43] = (ui >> 1) - vi; 1323 x[58] = (ur >> 1) - vr; 1324 x[59] = (ui >> 1) + vi; 1325 1326 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) 1327 ur = x[4]; 1328 ui = x[5]; 1329 x[4] = (ur >> 1) + vr; 1330 x[5] = (ui >> 1) + vi; 1331 x[20] = (ur >> 1) - vr; 1332 x[21] = (ui >> 1) - vi; 1333 1334 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) 1335 ur = x[12]; 1336 ui = x[13]; 1337 x[12] = (ur >> 1) + vr; 1338 x[13] = (ui >> 1) - vi; 1339 x[28] = (ur >> 1) - vr; 1340 x[29] = (ui >> 1) + vi; 1341 1342 SUMDIFF_PIFOURTH(vi, vr, x[52], x[53]) 1343 ur = x[36]; 1344 ui = x[37]; 1345 x[36] = (ur >> 1) + vr; 1346 x[37] = (ui >> 1) + vi; 1347 x[52] = (ur >> 1) - vr; 1348 x[53] = (ui >> 1) - vi; 1349 1350 SUMDIFF_PIFOURTH(vr, vi, x[60], x[61]) 1351 ur = x[44]; 1352 ui = x[45]; 1353 x[44] = (ur >> 1) + vr; 1354 x[45] = (ui >> 1) - vi; 1355 x[60] = (ur >> 1) - vr; 1356 x[61] = (ui >> 1) + vi; 1357 1358 cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]); 1359 ur = x[6]; 1360 ui = x[7]; 1361 x[6] = (ur >> 1) + vr; 1362 x[7] = (ui >> 1) + vi; 1363 x[22] = (ur >> 1) - vr; 1364 x[23] = (ui >> 1) - vi; 1365 1366 cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]); 1367 ur = x[14]; 1368 ui = x[15]; 1369 x[14] = (ur >> 1) + vr; 1370 x[15] = (ui >> 1) - vi; 1371 x[30] = (ur >> 1) - vr; 1372 x[31] = (ui >> 1) + vi; 1373 1374 cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]); 1375 ur = x[38]; 1376 ui = x[39]; 1377 x[38] = (ur >> 1) + vr; 1378 x[39] = (ui >> 1) + vi; 1379 x[54] = (ur >> 1) - vr; 1380 x[55] = (ui >> 1) - vi; 1381 1382 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]); 1383 ur = x[46]; 1384 ui = x[47]; 1385 1386 x[46] = (ur >> 1) + vr; 1387 x[47] = (ui >> 1) - vi; 1388 x[62] = (ur >> 1) - vr; 1389 x[63] = (ui >> 1) + vi; 1390 1391 vr = x[32]; 1392 vi = x[33]; 1393 ur = x[0] >> 1; 1394 ui = x[1] >> 1; 1395 x[0] = ur + (vr >> 1); 1396 x[1] = ui + (vi >> 1); 1397 x[32] = ur - (vr >> 1); 1398 x[33] = ui - (vi >> 1); 1399 1400 vi = x[48]; 1401 vr = x[49]; 1402 ur = x[16] >> 1; 1403 ui = x[17] >> 1; 1404 x[16] = ur + (vr >> 1); 1405 x[17] = ui - (vi >> 1); 1406 x[48] = ur - (vr >> 1); 1407 x[49] = ui + (vi >> 1); 1408 1409 cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]); 1410 ur = x[2]; 1411 ui = x[3]; 1412 x[2] = (ur >> 1) + vr; 1413 x[3] = (ui >> 1) + vi; 1414 x[34] = (ur >> 1) - vr; 1415 x[35] = (ui >> 1) - vi; 1416 1417 cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]); 1418 ur = x[18]; 1419 ui = x[19]; 1420 x[18] = (ur >> 1) + vr; 1421 x[19] = (ui >> 1) - vi; 1422 x[50] = (ur >> 1) - vr; 1423 x[51] = (ui >> 1) + vi; 1424 1425 cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]); 1426 ur = x[4]; 1427 ui = x[5]; 1428 x[4] = (ur >> 1) + vr; 1429 x[5] = (ui >> 1) + vi; 1430 x[36] = (ur >> 1) - vr; 1431 x[37] = (ui >> 1) - vi; 1432 1433 cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]); 1434 ur = x[20]; 1435 ui = x[21]; 1436 x[20] = (ur >> 1) + vr; 1437 x[21] = (ui >> 1) - vi; 1438 x[52] = (ur >> 1) - vr; 1439 x[53] = (ui >> 1) + vi; 1440 1441 cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]); 1442 ur = x[6]; 1443 ui = x[7]; 1444 x[6] = (ur >> 1) + vr; 1445 x[7] = (ui >> 1) + vi; 1446 x[38] = (ur >> 1) - vr; 1447 x[39] = (ui >> 1) - vi; 1448 1449 cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]); 1450 ur = x[22]; 1451 ui = x[23]; 1452 x[22] = (ur >> 1) + vr; 1453 x[23] = (ui >> 1) - vi; 1454 x[54] = (ur >> 1) - vr; 1455 x[55] = (ui >> 1) + vi; 1456 1457 SUMDIFF_PIFOURTH(vi, vr, x[40], x[41]) 1458 ur = x[8]; 1459 ui = x[9]; 1460 x[8] = (ur >> 1) + vr; 1461 x[9] = (ui >> 1) + vi; 1462 x[40] = (ur >> 1) - vr; 1463 x[41] = (ui >> 1) - vi; 1464 1465 SUMDIFF_PIFOURTH(vr, vi, x[56], x[57]) 1466 ur = x[24]; 1467 ui = x[25]; 1468 x[24] = (ur >> 1) + vr; 1469 x[25] = (ui >> 1) - vi; 1470 x[56] = (ur >> 1) - vr; 1471 x[57] = (ui >> 1) + vi; 1472 1473 cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]); 1474 ur = x[10]; 1475 ui = x[11]; 1476 1477 x[10] = (ur >> 1) + vr; 1478 x[11] = (ui >> 1) + vi; 1479 x[42] = (ur >> 1) - vr; 1480 x[43] = (ui >> 1) - vi; 1481 1482 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]); 1483 ur = x[26]; 1484 ui = x[27]; 1485 x[26] = (ur >> 1) + vr; 1486 x[27] = (ui >> 1) - vi; 1487 x[58] = (ur >> 1) - vr; 1488 x[59] = (ui >> 1) + vi; 1489 1490 cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]); 1491 ur = x[12]; 1492 ui = x[13]; 1493 x[12] = (ur >> 1) + vr; 1494 x[13] = (ui >> 1) + vi; 1495 x[44] = (ur >> 1) - vr; 1496 x[45] = (ui >> 1) - vi; 1497 1498 cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]); 1499 ur = x[28]; 1500 ui = x[29]; 1501 x[28] = (ur >> 1) + vr; 1502 x[29] = (ui >> 1) - vi; 1503 x[60] = (ur >> 1) - vr; 1504 x[61] = (ui >> 1) + vi; 1505 1506 cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]); 1507 ur = x[14]; 1508 ui = x[15]; 1509 x[14] = (ur >> 1) + vr; 1510 x[15] = (ui >> 1) + vi; 1511 x[46] = (ur >> 1) - vr; 1512 x[47] = (ui >> 1) - vi; 1513 1514 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]); 1515 ur = x[30]; 1516 ui = x[31]; 1517 x[30] = (ur >> 1) + vr; 1518 x[31] = (ui >> 1) - vi; 1519 x[62] = (ur >> 1) - vr; 1520 x[63] = (ui >> 1) + vi; 1521 } 1522 } 1523 #endif /* #ifndef FUNCTION_fft_32 */ 1524 1525 /** 1526 * \brief Apply rotation vectors to a data buffer. 1527 * \param cl length of each row of input data. 1528 * \param l total length of input data. 1529 * \param pVecRe real part of rotation coefficient vector. 1530 * \param pVecIm imaginary part of rotation coefficient vector. 1531 */ 1532 1533 /* 1534 This defines patches each inaccurate 0x7FFF i.e. 0.9999 and uses 0x8000 1535 (-1.0) instead. At the end, the sign of the result is inverted 1536 */ 1537 #define noFFT_APPLY_ROT_VECTOR_HQ 1538 1539 #ifndef FUNCTION_fft_apply_rot_vector__FIXP_DBL 1540 static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, 1541 const int l, const FIXP_STB *pVecRe, 1542 const FIXP_STB *pVecIm) { 1543 FIXP_DBL re, im; 1544 FIXP_STB vre, vim; 1545 1546 int i, c; 1547 1548 for (i = 0; i < cl; i++) { 1549 re = pData[2 * i]; 1550 im = pData[2 * i + 1]; 1551 1552 pData[2 * i] = re >> 2; /* * 0.25 */ 1553 pData[2 * i + 1] = im >> 2; /* * 0.25 */ 1554 } 1555 for (; i < l; i += cl) { 1556 re = pData[2 * i]; 1557 im = pData[2 * i + 1]; 1558 1559 pData[2 * i] = re >> 2; /* * 0.25 */ 1560 pData[2 * i + 1] = im >> 2; /* * 0.25 */ 1561 1562 for (c = i + 1; c < i + cl; c++) { 1563 re = pData[2 * c] >> 1; 1564 im = pData[2 * c + 1] >> 1; 1565 vre = *pVecRe++; 1566 vim = *pVecIm++; 1567 1568 cplxMultDiv2(&pData[2 * c + 1], &pData[2 * c], im, re, vre, vim); 1569 } 1570 } 1571 } 1572 #endif /* FUNCTION_fft_apply_rot_vector__FIXP_DBL */ 1573 1574 /* select either switch case of function pointer. */ 1575 //#define FFT_TWO_STAGE_SWITCH_CASE 1576 #ifndef FUNCTION_fftN2_func 1577 static inline void fftN2_func(FIXP_DBL *pInput, const int length, 1578 const int dim1, const int dim2, 1579 void (*const fft1)(FIXP_DBL *), 1580 void (*const fft2)(FIXP_DBL *), 1581 const FIXP_STB *RotVectorReal, 1582 const FIXP_STB *RotVectorImag, FIXP_DBL *aDst, 1583 FIXP_DBL *aDst2) { 1584 /* The real part of the input samples are at the addresses with even indices 1585 and the imaginary part of the input samples are at the addresses with odd 1586 indices. The output samples are stored at the address of pInput 1587 */ 1588 FIXP_DBL *pSrc, *pDst, *pDstOut; 1589 int i; 1590 1591 FDK_ASSERT(length == dim1 * dim2); 1592 1593 /* Perform dim2 times the fft of length dim1. The input samples are at the 1594 address of pSrc and the output samples are at the address of pDst. The input 1595 vector for the fft of length dim1 is built of the interleaved samples in pSrc, 1596 the output samples are stored consecutively. 1597 */ 1598 pSrc = pInput; 1599 pDst = aDst; 1600 for (i = 0; i < dim2; i++) { 1601 for (int j = 0; j < dim1; j++) { 1602 pDst[2 * j] = pSrc[2 * j * dim2]; 1603 pDst[2 * j + 1] = pSrc[2 * j * dim2 + 1]; 1604 } 1605 1606 /* fft of size dim1 */ 1607 #ifndef FFT_TWO_STAGE_SWITCH_CASE 1608 fft1(pDst); 1609 #else 1610 switch (dim1) { 1611 case 2: 1612 fft2(pDst); 1613 break; 1614 case 3: 1615 fft3(pDst); 1616 break; 1617 case 4: 1618 fft_4(pDst); 1619 break; 1620 /* case 5: fft5(pDst); break; */ 1621 /* case 8: fft_8(pDst); break; */ 1622 case 12: 1623 fft12(pDst); 1624 break; 1625 /* case 15: fft15(pDst); break; */ 1626 case 16: 1627 fft_16(pDst); 1628 break; 1629 case 32: 1630 fft_32(pDst); 1631 break; 1632 /*case 64: fft_64(pDst); break;*/ 1633 /* case 128: fft_128(pDst); break; */ 1634 } 1635 #endif 1636 pSrc += 2; 1637 pDst = pDst + 2 * dim1; 1638 } 1639 1640 /* Perform the modulation of the output of the fft of length dim1 */ 1641 pSrc = aDst; 1642 fft_apply_rot_vector(pSrc, dim1, length, RotVectorReal, RotVectorImag); 1643 1644 /* Perform dim1 times the fft of length dim2. The input samples are at the 1645 address of aDst and the output samples are at the address of pInput. The input 1646 vector for the fft of length dim2 is built of the interleaved samples in aDst, 1647 the output samples are stored consecutively at the address of pInput. 1648 */ 1649 pSrc = aDst; 1650 pDst = aDst2; 1651 pDstOut = pInput; 1652 for (i = 0; i < dim1; i++) { 1653 for (int j = 0; j < dim2; j++) { 1654 pDst[2 * j] = pSrc[2 * j * dim1]; 1655 pDst[2 * j + 1] = pSrc[2 * j * dim1 + 1]; 1656 } 1657 1658 #ifndef FFT_TWO_STAGE_SWITCH_CASE 1659 fft2(pDst); 1660 #else 1661 switch (dim2) { 1662 case 4: 1663 fft_4(pDst); 1664 break; 1665 case 9: 1666 fft9(pDst); 1667 break; 1668 case 12: 1669 fft12(pDst); 1670 break; 1671 case 15: 1672 fft15(pDst); 1673 break; 1674 case 16: 1675 fft_16(pDst); 1676 break; 1677 case 32: 1678 fft_32(pDst); 1679 break; 1680 } 1681 #endif 1682 1683 for (int j = 0; j < dim2; j++) { 1684 pDstOut[2 * j * dim1] = pDst[2 * j]; 1685 pDstOut[2 * j * dim1 + 1] = pDst[2 * j + 1]; 1686 } 1687 pSrc += 2; 1688 pDstOut += 2; 1689 } 1690 } 1691 #endif /* FUNCTION_fftN2_function */ 1692 1693 #define fftN2(DATA_TYPE, pInput, length, dim1, dim2, fft_func1, fft_func2, \ 1694 RotVectorReal, RotVectorImag) \ 1695 { \ 1696 C_AALLOC_SCRATCH_START(aDst, DATA_TYPE, 2 * length) \ 1697 C_AALLOC_SCRATCH_START(aDst2, DATA_TYPE, 2 * dim2) \ 1698 fftN2_func(pInput, length, dim1, dim2, fft_func1, fft_func2, \ 1699 RotVectorReal, RotVectorImag, aDst, aDst2); \ 1700 C_AALLOC_SCRATCH_END(aDst2, DATA_TYPE, 2 * dim2) \ 1701 C_AALLOC_SCRATCH_END(aDst, DATA_TYPE, 2 * length) \ 1702 } 1703 1704 /*! 1705 * 1706 * \brief complex FFT of length 12,18,24,30,48,60,96, 192, 240, 384, 480 1707 * \param pInput contains the input signal prescaled right by 2 1708 * pInput contains the output signal scaled by SCALEFACTOR<#length> 1709 * The output signal does not have any fixed headroom 1710 * \return void 1711 * 1712 */ 1713 1714 #ifndef FUNCTION_fft6 1715 static inline void fft6(FIXP_DBL *pInput) { 1716 fftN2(FIXP_DBL, pInput, 6, 2, 3, fft2, fft3, RotVectorReal6, RotVectorImag6); 1717 } 1718 #endif /* #ifndef FUNCTION_fft6 */ 1719 1720 #ifndef FUNCTION_fft12 1721 static inline void fft12(FIXP_DBL *pInput) { 1722 fftN2(FIXP_DBL, pInput, 12, 3, 4, fft3, fft_4, RotVectorReal12, 1723 RotVectorImag12); /* 16,58 */ 1724 } 1725 #endif /* #ifndef FUNCTION_fft12 */ 1726 1727 #ifndef FUNCTION_fft20 1728 static inline void fft20(FIXP_DBL *pInput) { 1729 fftN2(FIXP_DBL, pInput, 20, 4, 5, fft_4, fft5, RotVectorReal20, 1730 RotVectorImag20); 1731 } 1732 #endif /* FUNCTION_fft20 */ 1733 1734 static inline void fft24(FIXP_DBL *pInput) { 1735 fftN2(FIXP_DBL, pInput, 24, 2, 12, fft2, fft12, RotVectorReal24, 1736 RotVectorImag24); /* 16,73 */ 1737 } 1738 1739 static inline void fft48(FIXP_DBL *pInput) { 1740 fftN2(FIXP_DBL, pInput, 48, 4, 12, fft_4, fft12, RotVectorReal48, 1741 RotVectorImag48); /* 16,32 */ 1742 } 1743 1744 #ifndef FUNCTION_fft60 1745 static inline void fft60(FIXP_DBL *pInput) { 1746 fftN2(FIXP_DBL, pInput, 60, 4, 15, fft_4, fft15, RotVectorReal60, 1747 RotVectorImag60); /* 15,51 */ 1748 } 1749 #endif /* FUNCTION_fft60 */ 1750 1751 #ifndef FUNCTION_fft80 1752 static inline void fft80(FIXP_DBL *pInput) { 1753 fftN2(FIXP_DBL, pInput, 80, 5, 16, fft5, fft_16, RotVectorReal80, 1754 RotVectorImag80); /* */ 1755 } 1756 #endif 1757 1758 #ifndef FUNCTION_fft96 1759 static inline void fft96(FIXP_DBL *pInput) { 1760 fftN2(FIXP_DBL, pInput, 96, 3, 32, fft3, fft_32, RotVectorReal96, 1761 RotVectorImag96); /* 15,47 */ 1762 } 1763 #endif /* FUNCTION_fft96*/ 1764 1765 #ifndef FUNCTION_fft120 1766 static inline void fft120(FIXP_DBL *pInput) { 1767 fftN2(FIXP_DBL, pInput, 120, 8, 15, fft_8, fft15, RotVectorReal120, 1768 RotVectorImag120); 1769 } 1770 #endif /* FUNCTION_fft120 */ 1771 1772 #ifndef FUNCTION_fft192 1773 static inline void fft192(FIXP_DBL *pInput) { 1774 fftN2(FIXP_DBL, pInput, 192, 16, 12, fft_16, fft12, RotVectorReal192, 1775 RotVectorImag192); /* 15,50 */ 1776 } 1777 #endif 1778 1779 #ifndef FUNCTION_fft240 1780 static inline void fft240(FIXP_DBL *pInput) { 1781 fftN2(FIXP_DBL, pInput, 240, 16, 15, fft_16, fft15, RotVectorReal240, 1782 RotVectorImag240); /* 15.44 */ 1783 } 1784 #endif 1785 1786 #ifndef FUNCTION_fft384 1787 static inline void fft384(FIXP_DBL *pInput) { 1788 fftN2(FIXP_DBL, pInput, 384, 12, 32, fft12, fft_32, RotVectorReal384, 1789 RotVectorImag384); /* 16.02 */ 1790 } 1791 #endif /* FUNCTION_fft384 */ 1792 1793 #ifndef FUNCTION_fft480 1794 static inline void fft480(FIXP_DBL *pInput) { 1795 fftN2(FIXP_DBL, pInput, 480, 32, 15, fft_32, fft15, RotVectorReal480, 1796 RotVectorImag480); /* 15.84 */ 1797 } 1798 #endif /* FUNCTION_fft480 */ 1799 1800 void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) { 1801 /* Ensure, that the io-ptr is always (at least 8-byte) aligned */ 1802 C_ALLOC_ALIGNED_CHECK(pInput); 1803 1804 if (length == 32) { 1805 fft_32(pInput); 1806 *pScalefactor += SCALEFACTOR32; 1807 } else { 1808 switch (length) { 1809 case 16: 1810 fft_16(pInput); 1811 *pScalefactor += SCALEFACTOR16; 1812 break; 1813 case 8: 1814 fft_8(pInput); 1815 *pScalefactor += SCALEFACTOR8; 1816 break; 1817 case 2: 1818 fft2(pInput); 1819 *pScalefactor += SCALEFACTOR2; 1820 break; 1821 case 3: 1822 fft3(pInput); 1823 *pScalefactor += SCALEFACTOR3; 1824 break; 1825 case 4: 1826 fft_4(pInput); 1827 *pScalefactor += SCALEFACTOR4; 1828 break; 1829 case 5: 1830 fft5(pInput); 1831 *pScalefactor += SCALEFACTOR5; 1832 break; 1833 case 6: 1834 fft6(pInput); 1835 *pScalefactor += SCALEFACTOR6; 1836 break; 1837 case 10: 1838 fft10(pInput); 1839 *pScalefactor += SCALEFACTOR10; 1840 break; 1841 case 12: 1842 fft12(pInput); 1843 *pScalefactor += SCALEFACTOR12; 1844 break; 1845 case 15: 1846 fft15(pInput); 1847 *pScalefactor += SCALEFACTOR15; 1848 break; 1849 case 20: 1850 fft20(pInput); 1851 *pScalefactor += SCALEFACTOR20; 1852 break; 1853 case 24: 1854 fft24(pInput); 1855 *pScalefactor += SCALEFACTOR24; 1856 break; 1857 case 48: 1858 fft48(pInput); 1859 *pScalefactor += SCALEFACTOR48; 1860 break; 1861 case 60: 1862 fft60(pInput); 1863 *pScalefactor += SCALEFACTOR60; 1864 break; 1865 case 64: 1866 dit_fft(pInput, 6, SineTable512, 512); 1867 *pScalefactor += SCALEFACTOR64; 1868 break; 1869 case 80: 1870 fft80(pInput); 1871 *pScalefactor += SCALEFACTOR80; 1872 break; 1873 case 96: 1874 fft96(pInput); 1875 *pScalefactor += SCALEFACTOR96; 1876 break; 1877 case 120: 1878 fft120(pInput); 1879 *pScalefactor += SCALEFACTOR120; 1880 break; 1881 case 128: 1882 dit_fft(pInput, 7, SineTable512, 512); 1883 *pScalefactor += SCALEFACTOR128; 1884 break; 1885 case 192: 1886 fft192(pInput); 1887 *pScalefactor += SCALEFACTOR192; 1888 break; 1889 case 240: 1890 fft240(pInput); 1891 *pScalefactor += SCALEFACTOR240; 1892 break; 1893 case 256: 1894 dit_fft(pInput, 8, SineTable512, 512); 1895 *pScalefactor += SCALEFACTOR256; 1896 break; 1897 case 384: 1898 fft384(pInput); 1899 *pScalefactor += SCALEFACTOR384; 1900 break; 1901 case 480: 1902 fft480(pInput); 1903 *pScalefactor += SCALEFACTOR480; 1904 break; 1905 case 512: 1906 dit_fft(pInput, 9, SineTable512, 512); 1907 *pScalefactor += SCALEFACTOR512; 1908 break; 1909 default: 1910 FDK_ASSERT(0); /* FFT length not supported! */ 1911 break; 1912 } 1913 } 1914 } 1915 1916 void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) { 1917 switch (length) { 1918 default: 1919 FDK_ASSERT(0); /* IFFT length not supported! */ 1920 break; 1921 } 1922 } 1923