1 2 /* ----------------------------------------------------------------------------------------------------------- 3 Software License for The Fraunhofer FDK AAC Codec Library for Android 4 5 Copyright 1995 - 2012 Fraunhofer-Gesellschaft zur Frderung der angewandten Forschung e.V. 6 All rights reserved. 7 8 1. INTRODUCTION 9 The Fraunhofer FDK AAC Codec Library for Android ("FDK AAC Codec") is software that implements 10 the MPEG Advanced Audio Coding ("AAC") encoding and decoding scheme for digital audio. 11 This FDK AAC Codec software is intended to be used on a wide variety of Android devices. 12 13 AAC's HE-AAC and HE-AAC v2 versions are regarded as today's most efficient general perceptual 14 audio codecs. AAC-ELD is considered the best-performing full-bandwidth communications codec by 15 independent studies and is widely deployed. AAC has been standardized by ISO and IEC as part 16 of the MPEG specifications. 17 18 Patent licenses for necessary patent claims for the FDK AAC Codec (including those of Fraunhofer) 19 may be obtained through Via Licensing (www.vialicensing.com) or through the respective patent owners 20 individually for the purpose of encoding or decoding bit streams in products that are compliant with 21 the ISO/IEC MPEG audio standards. Please note that most manufacturers of Android devices already license 22 these patent claims through Via Licensing or directly from the patent owners, and therefore FDK AAC Codec 23 software may already be covered under those patent licenses when it is used for those licensed purposes only. 24 25 Commercially-licensed AAC software libraries, including floating-point versions with enhanced sound quality, 26 are also available from Fraunhofer. Users are encouraged to check the Fraunhofer website for additional 27 applications information and documentation. 28 29 2. COPYRIGHT LICENSE 30 31 Redistribution and use in source and binary forms, with or without modification, are permitted without 32 payment of copyright license fees provided that you satisfy the following conditions: 33 34 You must retain the complete text of this software license in redistributions of the FDK AAC Codec or 35 your modifications thereto in source code form. 36 37 You must retain the complete text of this software license in the documentation and/or other materials 38 provided with redistributions of the FDK AAC Codec or your modifications thereto in binary form. 39 You must make available free of charge copies of the complete source code of the FDK AAC Codec and your 40 modifications thereto to recipients of copies in binary form. 41 42 The name of Fraunhofer may not be used to endorse or promote products derived from this library without 43 prior written permission. 44 45 You may not charge copyright license fees for anyone to use, copy or distribute the FDK AAC Codec 46 software or your modifications thereto. 47 48 Your modified versions of the FDK AAC Codec must carry prominent notices stating that you changed the software 49 and the date of any change. For modified versions of the FDK AAC Codec, the term 50 "Fraunhofer FDK AAC Codec Library for Android" must be replaced by the term 51 "Third-Party Modified Version of the Fraunhofer FDK AAC Codec Library for Android." 52 53 3. NO PATENT LICENSE 54 55 NO EXPRESS OR IMPLIED LICENSES TO ANY PATENT CLAIMS, including without limitation the patents of Fraunhofer, 56 ARE GRANTED BY THIS SOFTWARE LICENSE. Fraunhofer provides no warranty of patent non-infringement with 57 respect to this software. 58 59 You may use this FDK AAC Codec software or modifications thereto only for purposes that are authorized 60 by appropriate patent licenses. 61 62 4. DISCLAIMER 63 64 This FDK AAC Codec software is provided by Fraunhofer on behalf of the copyright holders and contributors 65 "AS IS" and WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, including but not limited to the implied warranties 66 of merchantability and fitness for a particular purpose. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR 67 CONTRIBUTORS BE LIABLE for any direct, indirect, incidental, special, exemplary, or consequential damages, 68 including but not limited to procurement of substitute goods or services; loss of use, data, or profits, 69 or business interruption, however caused and on any theory of liability, whether in contract, strict 70 liability, or tort (including negligence), arising in any way out of the use of this software, even if 71 advised of the possibility of such damage. 72 73 5. CONTACT INFORMATION 74 75 Fraunhofer Institute for Integrated Circuits IIS 76 Attention: Audio and Multimedia Departments - FDK AAC LL 77 Am Wolfsmantel 33 78 91058 Erlangen, Germany 79 80 www.iis.fraunhofer.de/amm 81 amm-info (at) iis.fraunhofer.de 82 ----------------------------------------------------------------------------------------------------------- */ 83 84 /*************************** Fraunhofer IIS FDK Tools ********************** 85 86 Author(s): Josef Hoepfl, DSP Solutions 87 Description: Fix point FFT 88 89 ******************************************************************************/ 90 91 #include "fft.h" 92 93 #include "fft_rad2.h" 94 #include "FDK_tools_rom.h" 95 96 97 98 99 100 #define F3C(x) STC(x) 101 102 #define C31 (F3C(0x91261468)) /* FL2FXCONST_DBL(-0.86602540) */ 103 104 /* Performs the FFT of length 3 according to the algorithm after winograd. 105 No scaling of the input vector because the scaling is already done in the rotation vector. */ 106 static FORCEINLINE void fft3(FIXP_DBL *RESTRICT pDat) 107 { 108 FIXP_DBL r1,r2; 109 FIXP_DBL s1,s2; 110 /* real part */ 111 r1 = pDat[2] + pDat[4]; 112 r2 = fMult((pDat[2] - pDat[4]), C31); 113 pDat[0] = pDat[0] + r1; 114 r1 = pDat[0] - r1 - (r1>>1); 115 116 /* imaginary part */ 117 s1 = pDat[3] + pDat[5]; 118 s2 = fMult((pDat[3] - pDat[5]), C31); 119 pDat[1] = pDat[1] + s1; 120 s1 = pDat[1] - s1 - (s1>>1); 121 122 /* combination */ 123 pDat[2] = r1 - s2; 124 pDat[4] = r1 + s2; 125 pDat[3] = s1 + r2; 126 pDat[5] = s1 - r2; 127 } 128 129 130 #define F5C(x) STC(x) 131 132 #define C51 (F5C(0x79bc3854)) /* FL2FXCONST_DBL( 0.95105652) */ 133 #define C52 (F5C(0x9d839db0)) /* FL2FXCONST_DBL(-1.53884180/2) */ 134 #define C53 (F5C(0xd18053ce)) /* FL2FXCONST_DBL(-0.36327126) */ 135 #define C54 (F5C(0x478dde64)) /* FL2FXCONST_DBL( 0.55901699) */ 136 #define C55 (F5C(0xb0000001)) /* FL2FXCONST_DBL(-1.25/2) */ 137 138 /* performs the FFT of length 5 according to the algorithm after winograd */ 139 static FORCEINLINE void fft5(FIXP_DBL *RESTRICT pDat) 140 { 141 FIXP_DBL r1,r2,r3,r4; 142 FIXP_DBL s1,s2,s3,s4; 143 FIXP_DBL t; 144 145 /* real part */ 146 r1 = pDat[2] + pDat[8]; 147 r4 = pDat[2] - pDat[8]; 148 r3 = pDat[4] + pDat[6]; 149 r2 = pDat[4] - pDat[6]; 150 t = fMult((r1-r3), C54); 151 r1 = r1 + r3; 152 pDat[0] = pDat[0] + r1; 153 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of 154 the values as fracts */ 155 r1 = pDat[0] + (fMultDiv2(r1, C55) <<(2)); 156 r3 = r1 - t; 157 r1 = r1 + t; 158 t = fMult((r4 + r2), C51); 159 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of 160 the values as fracts */ 161 r4 = t + (fMultDiv2(r4, C52) <<(2)); 162 r2 = t + fMult(r2, C53); 163 164 /* imaginary part */ 165 s1 = pDat[3] + pDat[9]; 166 s4 = pDat[3] - pDat[9]; 167 s3 = pDat[5] + pDat[7]; 168 s2 = pDat[5] - pDat[7]; 169 t = fMult((s1 - s3), C54); 170 s1 = s1 + s3; 171 pDat[1] = pDat[1] + s1; 172 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of 173 the values as fracts */ 174 s1 = pDat[1] + (fMultDiv2(s1, C55) <<(2)); 175 s3 = s1 - t; 176 s1 = s1 + t; 177 t = fMult((s4 + s2), C51); 178 /* Bit shift left because of the constant C55 which was scaled with the factor 0.5 because of the representation of 179 the values as fracts */ 180 s4 = t + (fMultDiv2(s4, C52) <<(2)); 181 s2 = t + fMult(s2, C53); 182 183 /* combination */ 184 pDat[2] = r1 + s2; 185 pDat[8] = r1 - s2; 186 pDat[4] = r3 - s4; 187 pDat[6] = r3 + s4; 188 189 pDat[3] = s1 - r2; 190 pDat[9] = s1 + r2; 191 pDat[5] = s3 + r4; 192 pDat[7] = s3 - r4; 193 } 194 195 196 197 198 #define N3 3 199 #define N5 5 200 #define N6 6 201 #define N15 15 202 203 /* Performs the FFT of length 15. It is split into FFTs of length 3 and length 5. */ 204 static inline void fft15(FIXP_DBL *pInput) 205 { 206 FIXP_DBL aDst[2*N15]; 207 FIXP_DBL aDst1[2*N15]; 208 int i,k,l; 209 210 /* Sort input vector for fft's of length 3 211 input3(0:2) = [input(0) input(5) input(10)]; 212 input3(3:5) = [input(3) input(8) input(13)]; 213 input3(6:8) = [input(6) input(11) input(1)]; 214 input3(9:11) = [input(9) input(14) input(4)]; 215 input3(12:14) = [input(12) input(2) input(7)]; */ 216 { 217 const FIXP_DBL *pSrc = pInput; 218 FIXP_DBL *RESTRICT pDst = aDst; 219 /* Merge 3 loops into one, skip call of fft3 */ 220 for(i=0,l=0,k=0; i<N5; i++, k+=6) 221 { 222 pDst[k+0] = pSrc[l]; 223 pDst[k+1] = pSrc[l+1]; 224 l += 2*N5; 225 if (l >= (2*N15)) 226 l -= (2*N15); 227 228 pDst[k+2] = pSrc[l]; 229 pDst[k+3] = pSrc[l+1]; 230 l += 2*N5; 231 if (l >= (2*N15)) 232 l -= (2*N15); 233 pDst[k+4] = pSrc[l]; 234 pDst[k+5] = pSrc[l+1]; 235 l += (2*N5) + (2*N3); 236 if (l >= (2*N15)) 237 l -= (2*N15); 238 239 /* fft3 merged with shift right by 2 loop */ 240 FIXP_DBL r1,r2,r3; 241 FIXP_DBL s1,s2; 242 /* real part */ 243 r1 = pDst[k+2] + pDst[k+4]; 244 r2 = fMult((pDst[k+2] - pDst[k+4]), C31); 245 s1 = pDst[k+0]; 246 pDst[k+0] = (s1 + r1)>>2; 247 r1 = s1 - (r1>>1); 248 249 /* imaginary part */ 250 s1 = pDst[k+3] + pDst[k+5]; 251 s2 = fMult((pDst[k+3] - pDst[k+5]), C31); 252 r3 = pDst[k+1]; 253 pDst[k+1] = (r3 + s1)>>2; 254 s1 = r3 - (s1>>1); 255 256 /* combination */ 257 pDst[k+2] = (r1 - s2)>>2; 258 pDst[k+4] = (r1 + s2)>>2; 259 pDst[k+3] = (s1 + r2)>>2; 260 pDst[k+5] = (s1 - r2)>>2; 261 } 262 } 263 /* Sort input vector for fft's of length 5 264 input5(0:4) = [output3(0) output3(3) output3(6) output3(9) output3(12)]; 265 input5(5:9) = [output3(1) output3(4) output3(7) output3(10) output3(13)]; 266 input5(10:14) = [output3(2) output3(5) output3(8) output3(11) output3(14)]; */ 267 /* Merge 2 loops into one, brings about 10% */ 268 { 269 const FIXP_DBL *pSrc = aDst; 270 FIXP_DBL *RESTRICT pDst = aDst1; 271 for(i=0,l=0,k=0; i<N3; i++, k+=10) 272 { 273 l = 2*i; 274 pDst[k+0] = pSrc[l+0]; 275 pDst[k+1] = pSrc[l+1]; 276 pDst[k+2] = pSrc[l+0+(2*N3)]; 277 pDst[k+3] = pSrc[l+1+(2*N3)]; 278 pDst[k+4] = pSrc[l+0+(4*N3)]; 279 pDst[k+5] = pSrc[l+1+(4*N3)]; 280 pDst[k+6] = pSrc[l+0+(6*N3)]; 281 pDst[k+7] = pSrc[l+1+(6*N3)]; 282 pDst[k+8] = pSrc[l+0+(8*N3)]; 283 pDst[k+9] = pSrc[l+1+(8*N3)]; 284 fft5(&pDst[k]); 285 } 286 } 287 /* Sort output vector of length 15 288 output = [out5(0) out5(6) out5(12) out5(3) out5(9) 289 out5(10) out5(1) out5(7) out5(13) out5(4) 290 out5(5) out5(11) out5(2) out5(8) out5(14)]; */ 291 /* optimize clumsy loop, brings about 5% */ 292 { 293 const FIXP_DBL *pSrc = aDst1; 294 FIXP_DBL *RESTRICT pDst = pInput; 295 for(i=0,l=0,k=0; i<N3; i++, k+=10) 296 { 297 pDst[k+0] = pSrc[l]; 298 pDst[k+1] = pSrc[l+1]; 299 l += (2*N6); 300 if (l >= (2*N15)) 301 l -= (2*N15); 302 pDst[k+2] = pSrc[l]; 303 pDst[k+3] = pSrc[l+1]; 304 l += (2*N6); 305 if (l >= (2*N15)) 306 l -= (2*N15); 307 pDst[k+4] = pSrc[l]; 308 pDst[k+5] = pSrc[l+1]; 309 l += (2*N6); 310 if (l >= (2*N15)) 311 l -= (2*N15); 312 pDst[k+6] = pSrc[l]; 313 pDst[k+7] = pSrc[l+1]; 314 l += (2*N6); 315 if (l >= (2*N15)) 316 l -= (2*N15); 317 pDst[k+8] = pSrc[l]; 318 pDst[k+9] = pSrc[l+1]; 319 l += 2; /* no modulo check needed, it cannot occur */ 320 } 321 } 322 } 323 324 #define W_PiFOURTH STC(0x5a82799a) 325 #ifndef SUMDIFF_PIFOURTH 326 #define SUMDIFF_PIFOURTH(diff,sum,a,b) \ 327 { \ 328 FIXP_DBL wa, wb;\ 329 wa = fMultDiv2(a, W_PiFOURTH);\ 330 wb = fMultDiv2(b, W_PiFOURTH);\ 331 diff = wb - wa;\ 332 sum = wb + wa;\ 333 } 334 #endif 335 336 /* This version is more overflow save, but less cycle optimal. */ 337 #define SUMDIFF_EIGTH(x, y, ix, iy, vr, vi, ur, ui) \ 338 vr = (x[ 0 + ix]>>1) + (x[16 + ix]>>1); /* Re A + Re B */ \ 339 vi = (x[ 8 + ix]>>1) + (x[24 + ix]>>1); /* Re C + Re D */ \ 340 ur = (x[ 1 + ix]>>1) + (x[17 + ix]>>1); /* Im A + Im B */ \ 341 ui = (x[ 9 + ix]>>1) + (x[25 + ix]>>1); /* Im C + Im D */ \ 342 y[ 0 + iy] = vr + vi; /* Re A' = ReA + ReB +ReC + ReD */ \ 343 y[ 4 + iy] = vr - vi; /* Re C' = -(ReC+ReD) + (ReA+ReB) */ \ 344 y[ 1 + iy] = ur + ui; /* Im A' = sum of imag values */ \ 345 y[ 5 + iy] = ur - ui; /* Im C' = -Im C -Im D +Im A +Im B */ \ 346 vr -= x[16 + ix]; /* Re A - Re B */ \ 347 vi = vi - x[24 + ix]; /* Re C - Re D */ \ 348 ur -= x[17 + ix]; /* Im A - Im B */ \ 349 ui = ui - x[25 + ix]; /* Im C - Im D */ \ 350 y[ 2 + iy] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ \ 351 y[ 6 + iy] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ \ 352 y[ 3 + iy] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ \ 353 y[ 7 + iy] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 354 355 static const FIXP_STP fft16_w16[2] = { STCP(0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d) }; 356 357 LNK_SECTION_CODE_L1 358 inline void fft_16(FIXP_DBL *RESTRICT x) 359 { 360 FIXP_DBL vr, vi, ur, ui; 361 FIXP_DBL y[32]; 362 363 SUMDIFF_EIGTH(x, y, 0, 0, vr, vi, ur, ui); 364 SUMDIFF_EIGTH(x, y, 4, 8, vr, vi, ur, ui); 365 SUMDIFF_EIGTH(x, y, 2, 16, vr, vi, ur, ui); 366 SUMDIFF_EIGTH(x, y, 6, 24, vr, vi, ur, ui); 367 368 // xt1 = 0 369 // xt2 = 8 370 vr = y[ 8]; 371 vi = y[ 9]; 372 ur = y[ 0]>>1; 373 ui = y[ 1]>>1; 374 x[ 0] = ur + (vr>>1); 375 x[ 1] = ui + (vi>>1); 376 x[ 8] = ur - (vr>>1); 377 x[ 9] = ui - (vi>>1); 378 379 // xt1 = 4 380 // xt2 = 12 381 vr = y[13]; 382 vi = y[12]; 383 ur = y[ 4]>>1; 384 ui = y[ 5]>>1; 385 x[ 4] = ur + (vr>>1); 386 x[ 5] = ui - (vi>>1); 387 x[12] = ur - (vr>>1); 388 x[13] = ui + (vi>>1); 389 390 // xt1 = 16 391 // xt2 = 24 392 vr = y[24]; 393 vi = y[25]; 394 ur = y[16]>>1; 395 ui = y[17]>>1; 396 x[16] = ur + (vr>>1); 397 x[17] = ui + (vi>>1); 398 x[24] = ur - (vr>>1); 399 x[25] = ui - (vi>>1); 400 401 // xt1 = 20 402 // xt2 = 28 403 vr = y[29]; 404 vi = y[28]; 405 ur = y[20]>>1; 406 ui = y[21]>>1; 407 x[20] = ur + (vr>>1); 408 x[21] = ui - (vi>>1); 409 x[28] = ur - (vr>>1); 410 x[29] = ui + (vi>>1); 411 412 // xt1 = 2 413 // xt2 = 10 414 SUMDIFF_PIFOURTH(vi, vr, y[10], y[11]) 415 //vr = fMultDiv2((y[11] + y[10]),W_PiFOURTH); 416 //vi = fMultDiv2((y[11] - y[10]),W_PiFOURTH); 417 ur = y[ 2]; 418 ui = y[ 3]; 419 x[ 2] = (ur>>1) + vr; 420 x[ 3] = (ui>>1) + vi; 421 x[10] = (ur>>1) - vr; 422 x[11] = (ui>>1) - vi; 423 424 // xt1 = 6 425 // xt2 = 14 426 SUMDIFF_PIFOURTH(vr, vi, y[14], y[15]) 427 ur = y[ 6]; 428 ui = y[ 7]; 429 x[ 6] = (ur>>1) + vr; 430 x[ 7] = (ui>>1) - vi; 431 x[14] = (ur>>1) - vr; 432 x[15] = (ui>>1) + vi; 433 434 // xt1 = 18 435 // xt2 = 26 436 SUMDIFF_PIFOURTH(vi, vr, y[26], y[27]) 437 ur = y[18]; 438 ui = y[19]; 439 x[18] = (ur>>1) + vr; 440 x[19] = (ui>>1) + vi; 441 x[26] = (ur>>1) - vr; 442 x[27] = (ui>>1) - vi; 443 444 // xt1 = 22 445 // xt2 = 30 446 SUMDIFF_PIFOURTH(vr, vi, y[30], y[31]) 447 ur = y[22]; 448 ui = y[23]; 449 x[22] = (ur>>1) + vr; 450 x[23] = (ui>>1) - vi; 451 x[30] = (ur>>1) - vr; 452 x[31] = (ui>>1) + vi; 453 454 // xt1 = 0 455 // xt2 = 16 456 vr = x[16]; 457 vi = x[17]; 458 ur = x[ 0]>>1; 459 ui = x[ 1]>>1; 460 x[ 0] = ur + (vr>>1); 461 x[ 1] = ui + (vi>>1); 462 x[16] = ur - (vr>>1); 463 x[17] = ui - (vi>>1); 464 465 // xt1 = 8 466 // xt2 = 24 467 vi = x[24]; 468 vr = x[25]; 469 ur = x[ 8]>>1; 470 ui = x[ 9]>>1; 471 x[ 8] = ur + (vr>>1); 472 x[ 9] = ui - (vi>>1); 473 x[24] = ur - (vr>>1); 474 x[25] = ui + (vi>>1); 475 476 // xt1 = 2 477 // xt2 = 18 478 cplxMultDiv2(&vi, &vr, x[19], x[18], fft16_w16[0]); 479 ur = x[ 2]; 480 ui = x[ 3]; 481 x[ 2] = (ur>>1) + vr; 482 x[ 3] = (ui>>1) + vi; 483 x[18] = (ur>>1) - vr; 484 x[19] = (ui>>1) - vi; 485 486 // xt1 = 10 487 // xt2 = 26 488 cplxMultDiv2(&vr, &vi, x[27], x[26], fft16_w16[0]); 489 ur = x[10]; 490 ui = x[11]; 491 x[10] = (ur>>1) + vr; 492 x[11] = (ui>>1) - vi; 493 x[26] = (ur>>1) - vr; 494 x[27] = (ui>>1) + vi; 495 496 // xt1 = 4 497 // xt2 = 20 498 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) 499 ur = x[ 4]; 500 ui = x[ 5]; 501 x[ 4] = (ur>>1) + vr; 502 x[ 5] = (ui>>1) + vi; 503 x[20] = (ur>>1) - vr; 504 x[21] = (ui>>1) - vi; 505 506 // xt1 = 12 507 // xt2 = 28 508 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) 509 ur = x[12]; 510 ui = x[13]; 511 x[12] = (ur>>1) + vr; 512 x[13] = (ui>>1) - vi; 513 x[28] = (ur>>1) - vr; 514 x[29] = (ui>>1) + vi; 515 516 // xt1 = 6 517 // xt2 = 22 518 cplxMultDiv2(&vi, &vr, x[23], x[22], fft16_w16[1]); 519 ur = x[ 6]; 520 ui = x[ 7]; 521 x[ 6] = (ur>>1) + vr; 522 x[ 7] = (ui>>1) + vi; 523 x[22] = (ur>>1) - vr; 524 x[23] = (ui>>1) - vi; 525 526 // xt1 = 14 527 // xt2 = 30 528 cplxMultDiv2(&vr, &vi, x[31], x[30], fft16_w16[1]); 529 ur = x[14]; 530 ui = x[15]; 531 x[14] = (ur>>1) + vr; 532 x[15] = (ui>>1) - vi; 533 x[30] = (ur>>1) - vr; 534 x[31] = (ui>>1) + vi; 535 } 536 537 #ifndef FUNCTION_fft_32 538 static const FIXP_STP fft32_w32[6] = 539 { 540 STCP (0x7641af3d, 0x30fbc54d), STCP(0x30fbc54d, 0x7641af3d), STCP(0x7d8a5f40, 0x18f8b83c), 541 STCP (0x6a6d98a4, 0x471cece7), STCP(0x471cece7, 0x6a6d98a4), STCP(0x18f8b83c, 0x7d8a5f40) 542 }; 543 544 LNK_SECTION_CODE_L1 545 inline void fft_32(FIXP_DBL *x) 546 { 547 548 #define W_PiFOURTH STC(0x5a82799a) 549 550 FIXP_DBL vr,vi,ur,ui; 551 FIXP_DBL y[64]; 552 553 /* 554 * 1+2 stage radix 4 555 */ 556 557 ///////////////////////////////////////////////////////////////////////////////////////// 558 559 // i = 0 560 vr = (x[ 0] + x[32])>>1; /* Re A + Re B */ 561 vi = (x[16] + x[48]); /* Re C + Re D */ 562 ur = (x[ 1] + x[33])>>1; /* Im A + Im B */ 563 ui = (x[17] + x[49]); /* Im C + Im D */ 564 565 y[ 0] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 566 y[ 4] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 567 y[ 1] = ur + (ui>>1); /* Im A' = sum of imag values */ 568 y[ 5] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 569 570 vr -= x[32]; /* Re A - Re B */ 571 vi = (vi>>1) - x[48]; /* Re C - Re D */ 572 ur -= x[33]; /* Im A - Im B */ 573 ui = (ui>>1) - x[49]; /* Im C - Im D */ 574 575 y[ 2] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 576 y[ 6] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 577 y[ 3] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 578 y[ 7] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 579 580 //i=8 581 vr = (x[ 8] + x[40])>>1; /* Re A + Re B */ 582 vi = (x[24] + x[56]); /* Re C + Re D */ 583 ur = (x[ 9] + x[41])>>1; /* Im A + Im B */ 584 ui = (x[25] + x[57]); /* Im C + Im D */ 585 586 y[ 8] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 587 y[12] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 588 y[ 9] = ur + (ui>>1); /* Im A' = sum of imag values */ 589 y[13] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 590 591 vr -= x[40]; /* Re A - Re B */ 592 vi = (vi>>1) - x[56]; /* Re C - Re D */ 593 ur -= x[41]; /* Im A - Im B */ 594 ui = (ui>>1) - x[57]; /* Im C - Im D */ 595 596 y[10] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 597 y[14] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 598 y[11] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 599 y[15] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 600 601 //i=16 602 vr = (x[ 4] + x[36])>>1; /* Re A + Re B */ 603 vi = (x[20] + x[52]); /* Re C + Re D */ 604 ur = (x[ 5] + x[37])>>1; /* Im A + Im B */ 605 ui = (x[21] + x[53]); /* Im C + Im D */ 606 607 y[16] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 608 y[20] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 609 y[17] = ur + (ui>>1); /* Im A' = sum of imag values */ 610 y[21] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 611 612 vr -= x[36]; /* Re A - Re B */ 613 vi = (vi>>1) - x[52]; /* Re C - Re D */ 614 ur -= x[37]; /* Im A - Im B */ 615 ui = (ui>>1) - x[53]; /* Im C - Im D */ 616 617 y[18] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 618 y[22] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 619 y[19] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 620 y[23] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 621 622 //i=24 623 vr = (x[12] + x[44])>>1; /* Re A + Re B */ 624 vi = (x[28] + x[60]); /* Re C + Re D */ 625 ur = (x[13] + x[45])>>1; /* Im A + Im B */ 626 ui = (x[29] + x[61]); /* Im C + Im D */ 627 628 y[24] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 629 y[28] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 630 y[25] = ur + (ui>>1); /* Im A' = sum of imag values */ 631 y[29] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 632 633 vr -= x[44]; /* Re A - Re B */ 634 vi = (vi>>1) - x[60]; /* Re C - Re D */ 635 ur -= x[45]; /* Im A - Im B */ 636 ui = (ui>>1) - x[61]; /* Im C - Im D */ 637 638 y[26] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 639 y[30] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 640 y[27] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 641 y[31] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 642 643 // i = 32 644 vr = (x[ 2] + x[34])>>1; /* Re A + Re B */ 645 vi = (x[18] + x[50]); /* Re C + Re D */ 646 ur = (x[ 3] + x[35])>>1; /* Im A + Im B */ 647 ui = (x[19] + x[51]); /* Im C + Im D */ 648 649 y[32] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 650 y[36] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 651 y[33] = ur + (ui>>1); /* Im A' = sum of imag values */ 652 y[37] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 653 654 vr -= x[34]; /* Re A - Re B */ 655 vi = (vi>>1) - x[50]; /* Re C - Re D */ 656 ur -= x[35]; /* Im A - Im B */ 657 ui = (ui>>1) - x[51]; /* Im C - Im D */ 658 659 y[34] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 660 y[38] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 661 y[35] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 662 y[39] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 663 664 //i=40 665 vr = (x[10] + x[42])>>1; /* Re A + Re B */ 666 vi = (x[26] + x[58]); /* Re C + Re D */ 667 ur = (x[11] + x[43])>>1; /* Im A + Im B */ 668 ui = (x[27] + x[59]); /* Im C + Im D */ 669 670 y[40] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 671 y[44] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 672 y[41] = ur + (ui>>1); /* Im A' = sum of imag values */ 673 y[45] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 674 675 vr -= x[42]; /* Re A - Re B */ 676 vi = (vi>>1) - x[58]; /* Re C - Re D */ 677 ur -= x[43]; /* Im A - Im B */ 678 ui = (ui>>1) - x[59]; /* Im C - Im D */ 679 680 y[42] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 681 y[46] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 682 y[43] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 683 y[47] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 684 685 //i=48 686 vr = (x[ 6] + x[38])>>1; /* Re A + Re B */ 687 vi = (x[22] + x[54]); /* Re C + Re D */ 688 ur = (x[ 7] + x[39])>>1; /* Im A + Im B */ 689 ui = (x[23] + x[55]); /* Im C + Im D */ 690 691 y[48] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 692 y[52] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 693 y[49] = ur + (ui>>1); /* Im A' = sum of imag values */ 694 y[53] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 695 696 vr -= x[38]; /* Re A - Re B */ 697 vi = (vi>>1) - x[54]; /* Re C - Re D */ 698 ur -= x[39]; /* Im A - Im B */ 699 ui = (ui>>1) - x[55]; /* Im C - Im D */ 700 701 y[50] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 702 y[54] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 703 y[51] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 704 y[55] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 705 706 //i=56 707 vr = (x[14] + x[46])>>1; /* Re A + Re B */ 708 vi = (x[30] + x[62]); /* Re C + Re D */ 709 ur = (x[15] + x[47])>>1; /* Im A + Im B */ 710 ui = (x[31] + x[63]); /* Im C + Im D */ 711 712 y[56] = vr + (vi>>1); /* Re A' = ReA + ReB +ReC + ReD */ 713 y[60] = vr - (vi>>1); /* Re C' = -(ReC+ReD) + (ReA+ReB) */ 714 y[57] = ur + (ui>>1); /* Im A' = sum of imag values */ 715 y[61] = ur - (ui>>1); /* Im C' = -Im C -Im D +Im A +Im B */ 716 717 vr -= x[46]; /* Re A - Re B */ 718 vi = (vi>>1) - x[62]; /* Re C - Re D */ 719 ur -= x[47]; /* Im A - Im B */ 720 ui = (ui>>1) - x[63]; /* Im C - Im D */ 721 722 y[58] = ui + vr; /* Re B' = Im C - Im D + Re A - Re B */ 723 y[62] = vr - ui; /* Re D' = -Im C + Im D + Re A - Re B */ 724 y[59] = ur - vi; /* Im B'= -Re C + Re D + Im A - Im B */ 725 y[63] = vi + ur; /* Im D'= Re C - Re D + Im A - Im B */ 726 727 728 FIXP_DBL *xt = &x[0]; 729 FIXP_DBL *yt = &y[0]; 730 731 int j = 4; 732 do 733 { 734 vr = yt[8]; 735 vi = yt[9]; 736 ur = yt[0]>>1; 737 ui = yt[1]>>1; 738 xt[ 0] = ur + (vr>>1); 739 xt[ 1] = ui + (vi>>1); 740 xt[ 8] = ur - (vr>>1); 741 xt[ 9] = ui - (vi>>1); 742 743 vr = yt[13]; 744 vi = yt[12]; 745 ur = yt[4]>>1; 746 ui = yt[5]>>1; 747 xt[ 4] = ur + (vr>>1); 748 xt[ 5] = ui - (vi>>1); 749 xt[12] = ur - (vr>>1); 750 xt[13] = ui + (vi>>1); 751 752 SUMDIFF_PIFOURTH(vi, vr, yt[10], yt[11]) 753 ur = yt[2]; 754 ui = yt[3]; 755 xt[ 2] = (ur>>1) + vr; 756 xt[ 3] = (ui>>1) + vi; 757 xt[10] = (ur>>1) - vr; 758 xt[11] = (ui>>1) - vi; 759 760 SUMDIFF_PIFOURTH(vr, vi, yt[14], yt[15]) 761 ur = yt[6]; 762 ui = yt[7]; 763 764 xt[ 6] = (ur>>1) + vr; 765 xt[ 7] = (ui>>1) - vi; 766 xt[14] = (ur>>1) - vr; 767 xt[15] = (ui>>1) + vi; 768 xt += 16; 769 yt += 16; 770 } while (--j != 0); 771 772 vr = x[16]; 773 vi = x[17]; 774 ur = x[ 0]>>1; 775 ui = x[ 1]>>1; 776 x[ 0] = ur + (vr>>1); 777 x[ 1] = ui + (vi>>1); 778 x[16] = ur - (vr>>1); 779 x[17] = ui - (vi>>1); 780 781 vi = x[24]; 782 vr = x[25]; 783 ur = x[ 8]>>1; 784 ui = x[ 9]>>1; 785 x[ 8] = ur + (vr>>1); 786 x[ 9] = ui - (vi>>1); 787 x[24] = ur - (vr>>1); 788 x[25] = ui + (vi>>1); 789 790 vr = x[48]; 791 vi = x[49]; 792 ur = x[32]>>1; 793 ui = x[33]>>1; 794 x[32] = ur + (vr>>1); 795 x[33] = ui + (vi>>1); 796 x[48] = ur - (vr>>1); 797 x[49] = ui - (vi>>1); 798 799 vi = x[56]; 800 vr = x[57]; 801 ur = x[40]>>1; 802 ui = x[41]>>1; 803 x[40] = ur + (vr>>1); 804 x[41] = ui - (vi>>1); 805 x[56] = ur - (vr>>1); 806 x[57] = ui + (vi>>1); 807 808 cplxMultDiv2(&vi, &vr, x[19], x[18], fft32_w32[0]); 809 ur = x[ 2]; 810 ui = x[ 3]; 811 x[ 2] = (ur>>1) + vr; 812 x[ 3] = (ui>>1) + vi; 813 x[18] = (ur>>1) - vr; 814 x[19] = (ui>>1) - vi; 815 816 cplxMultDiv2(&vr, &vi, x[27], x[26], fft32_w32[0]); 817 ur = x[10]; 818 ui = x[11]; 819 x[10] = (ur>>1) + vr; 820 x[11] = (ui>>1) - vi; 821 x[26] = (ur>>1) - vr; 822 x[27] = (ui>>1) + vi; 823 824 cplxMultDiv2(&vi, &vr, x[51], x[50], fft32_w32[0]); 825 ur = x[34]; 826 ui = x[35]; 827 x[34] = (ur>>1) + vr; 828 x[35] = (ui>>1) + vi; 829 x[50] = (ur>>1) - vr; 830 x[51] = (ui>>1) - vi; 831 832 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[0]); 833 ur = x[42]; 834 ui = x[43]; 835 x[42] = (ur>>1) + vr; 836 x[43] = (ui>>1) - vi; 837 x[58] = (ur>>1) - vr; 838 x[59] = (ui>>1) + vi; 839 840 SUMDIFF_PIFOURTH(vi, vr, x[20], x[21]) 841 ur = x[ 4]; 842 ui = x[ 5]; 843 x[ 4] = (ur>>1) + vr; 844 x[ 5] = (ui>>1) + vi; 845 x[20] = (ur>>1) - vr; 846 x[21] = (ui>>1) - vi; 847 848 SUMDIFF_PIFOURTH(vr, vi, x[28], x[29]) 849 ur = x[12]; 850 ui = x[13]; 851 x[12] = (ur>>1) + vr; 852 x[13] = (ui>>1) - vi; 853 x[28] = (ur>>1) - vr; 854 x[29] = (ui>>1) + vi; 855 856 SUMDIFF_PIFOURTH(vi, vr, x[52], x[53]) 857 ur = x[36]; 858 ui = x[37]; 859 x[36] = (ur>>1) + vr; 860 x[37] = (ui>>1) + vi; 861 x[52] = (ur>>1) - vr; 862 x[53] = (ui>>1) - vi; 863 864 SUMDIFF_PIFOURTH(vr, vi, x[60], x[61]) 865 ur = x[44]; 866 ui = x[45]; 867 x[44] = (ur>>1) + vr; 868 x[45] = (ui>>1) - vi; 869 x[60] = (ur>>1) - vr; 870 x[61] = (ui>>1) + vi; 871 872 873 cplxMultDiv2(&vi, &vr, x[23], x[22], fft32_w32[1]); 874 ur = x[ 6]; 875 ui = x[ 7]; 876 x[ 6] = (ur>>1) + vr; 877 x[ 7] = (ui>>1) + vi; 878 x[22] = (ur>>1) - vr; 879 x[23] = (ui>>1) - vi; 880 881 cplxMultDiv2(&vr, &vi, x[31], x[30], fft32_w32[1]); 882 ur = x[14]; 883 ui = x[15]; 884 x[14] = (ur>>1) + vr; 885 x[15] = (ui>>1) - vi; 886 x[30] = (ur>>1) - vr; 887 x[31] = (ui>>1) + vi; 888 889 cplxMultDiv2(&vi, &vr, x[55], x[54], fft32_w32[1]); 890 ur = x[38]; 891 ui = x[39]; 892 x[38] = (ur>>1) + vr; 893 x[39] = (ui>>1) + vi; 894 x[54] = (ur>>1) - vr; 895 x[55] = (ui>>1) - vi; 896 897 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[1]); 898 ur = x[46]; 899 ui = x[47]; 900 901 x[46] = (ur>>1) + vr; 902 x[47] = (ui>>1) - vi; 903 x[62] = (ur>>1) - vr; 904 x[63] = (ui>>1) + vi; 905 906 vr = x[32]; 907 vi = x[33]; 908 ur = x[ 0]>>1; 909 ui = x[ 1]>>1; 910 x[ 0] = ur + (vr>>1); 911 x[ 1] = ui + (vi>>1); 912 x[32] = ur - (vr>>1); 913 x[33] = ui - (vi>>1); 914 915 vi = x[48]; 916 vr = x[49]; 917 ur = x[16]>>1; 918 ui = x[17]>>1; 919 x[16] = ur + (vr>>1); 920 x[17] = ui - (vi>>1); 921 x[48] = ur - (vr>>1); 922 x[49] = ui + (vi>>1); 923 924 cplxMultDiv2(&vi, &vr, x[35], x[34], fft32_w32[2]); 925 ur = x[ 2]; 926 ui = x[ 3]; 927 x[ 2] = (ur>>1) + vr; 928 x[ 3] = (ui>>1) + vi; 929 x[34] = (ur>>1) - vr; 930 x[35] = (ui>>1) - vi; 931 932 cplxMultDiv2(&vr, &vi, x[51], x[50], fft32_w32[2]); 933 ur = x[18]; 934 ui = x[19]; 935 x[18] = (ur>>1) + vr; 936 x[19] = (ui>>1) - vi; 937 x[50] = (ur>>1) - vr; 938 x[51] = (ui>>1) + vi; 939 940 cplxMultDiv2(&vi, &vr, x[37], x[36], fft32_w32[0]); 941 ur = x[ 4]; 942 ui = x[ 5]; 943 x[ 4] = (ur>>1) + vr; 944 x[ 5] = (ui>>1) + vi; 945 x[36] = (ur>>1) - vr; 946 x[37] = (ui>>1) - vi; 947 948 cplxMultDiv2(&vr, &vi, x[53], x[52], fft32_w32[0]); 949 ur = x[20]; 950 ui = x[21]; 951 x[20] = (ur>>1) + vr; 952 x[21] = (ui>>1) - vi; 953 x[52] = (ur>>1) - vr; 954 x[53] = (ui>>1) + vi; 955 956 cplxMultDiv2(&vi, &vr, x[39], x[38], fft32_w32[3]); 957 ur = x[ 6]; 958 ui = x[ 7]; 959 x[ 6] = (ur>>1) + vr; 960 x[ 7] = (ui>>1) + vi; 961 x[38] = (ur>>1) - vr; 962 x[39] = (ui>>1) - vi; 963 964 cplxMultDiv2(&vr, &vi, x[55], x[54], fft32_w32[3]); 965 ur = x[22]; 966 ui = x[23]; 967 x[22] = (ur>>1) + vr; 968 x[23] = (ui>>1) - vi; 969 x[54] = (ur>>1) - vr; 970 x[55] = (ui>>1) + vi; 971 972 SUMDIFF_PIFOURTH(vi, vr, x[40], x[41]) 973 ur = x[ 8]; 974 ui = x[ 9]; 975 x[ 8] = (ur>>1) + vr; 976 x[ 9] = (ui>>1) + vi; 977 x[40] = (ur>>1) - vr; 978 x[41] = (ui>>1) - vi; 979 980 SUMDIFF_PIFOURTH(vr, vi, x[56], x[57]) 981 ur = x[24]; 982 ui = x[25]; 983 x[24] = (ur>>1) + vr; 984 x[25] = (ui>>1) - vi; 985 x[56] = (ur>>1) - vr; 986 x[57] = (ui>>1) + vi; 987 988 cplxMultDiv2(&vi, &vr, x[43], x[42], fft32_w32[4]); 989 ur = x[10]; 990 ui = x[11]; 991 992 x[10] = (ur>>1) + vr; 993 x[11] = (ui>>1) + vi; 994 x[42] = (ur>>1) - vr; 995 x[43] = (ui>>1) - vi; 996 997 cplxMultDiv2(&vr, &vi, x[59], x[58], fft32_w32[4]); 998 ur = x[26]; 999 ui = x[27]; 1000 x[26] = (ur>>1) + vr; 1001 x[27] = (ui>>1) - vi; 1002 x[58] = (ur>>1) - vr; 1003 x[59] = (ui>>1) + vi; 1004 1005 cplxMultDiv2(&vi, &vr, x[45], x[44], fft32_w32[1]); 1006 ur = x[12]; 1007 ui = x[13]; 1008 x[12] = (ur>>1) + vr; 1009 x[13] = (ui>>1) + vi; 1010 x[44] = (ur>>1) - vr; 1011 x[45] = (ui>>1) - vi; 1012 1013 cplxMultDiv2(&vr, &vi, x[61], x[60], fft32_w32[1]); 1014 ur = x[28]; 1015 ui = x[29]; 1016 x[28] = (ur>>1) + vr; 1017 x[29] = (ui>>1) - vi; 1018 x[60] = (ur>>1) - vr; 1019 x[61] = (ui>>1) + vi; 1020 1021 cplxMultDiv2(&vi, &vr, x[47], x[46], fft32_w32[5]); 1022 ur = x[14]; 1023 ui = x[15]; 1024 x[14] = (ur>>1) + vr; 1025 x[15] = (ui>>1) + vi; 1026 x[46] = (ur>>1) - vr; 1027 x[47] = (ui>>1) - vi; 1028 1029 cplxMultDiv2(&vr, &vi, x[63], x[62], fft32_w32[5]); 1030 ur = x[30]; 1031 ui = x[31]; 1032 x[30] = (ur>>1) + vr; 1033 x[31] = (ui>>1) - vi; 1034 x[62] = (ur>>1) - vr; 1035 x[63] = (ui>>1) + vi; 1036 } 1037 #endif /* #ifndef FUNCTION_fft_32 */ 1038 1039 1040 /** 1041 * \brief Apply rotation vectors to a data buffer. 1042 * \param cl length of each row of input data. 1043 * \param l total length of input data. 1044 * \param pVecRe real part of rotation ceofficient vector. 1045 * \param pVecIm imaginary part of rotation ceofficient vector. 1046 */ 1047 static inline void fft_apply_rot_vector(FIXP_DBL *RESTRICT pData, const int cl, const int l, const FIXP_STB *pVecRe, const FIXP_STB *pVecIm) 1048 { 1049 FIXP_DBL re, im; 1050 FIXP_STB vre, vim; 1051 1052 int i, c; 1053 1054 for(i=0; i<cl; i++) { 1055 re = pData[2*i]; 1056 im = pData[2*i+1]; 1057 1058 pData[2*i] = re>>2; /* * 0.25 */ 1059 pData[2*i+1] = im>>2; /* * 0.25 */ 1060 } 1061 for(; i<l; i+=cl) 1062 { 1063 re = pData[2*i]; 1064 im = pData[2*i+1]; 1065 1066 pData[2*i] = re>>2; /* * 0.25 */ 1067 pData[2*i+1] = im>>2; /* * 0.25 */ 1068 1069 for (c=i+1; c<i+cl; c++) 1070 { 1071 re = pData[2*c]>>1; 1072 im = pData[2*c+1]>>1; 1073 vre = *pVecRe++; 1074 vim = *pVecIm++; 1075 1076 cplxMultDiv2(&pData[2*c+1], &pData[2*c], im, re, vre, vim); 1077 } 1078 } 1079 } 1080 1081 #define FFT_TWO_STAGE_MACRO_ENABLE 1082 1083 1084 #ifdef FFT_TWO_STAGE_MACRO_ENABLE 1085 1086 #define fftN2(pInput, length, dim1, dim2, fft_func1, fft_func2, RotVectorReal, RotVectorImag) \ 1087 { \ 1088 int i, j; \ 1089 \ 1090 C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); \ 1091 C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); \ 1092 \ 1093 FDK_ASSERT(length == dim1*dim2); \ 1094 \ 1095 /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the \ 1096 output samples are at the address of pDst. The input vector for the fft of length dim1 is built \ 1097 of the interleaved samples in pSrc, the output samples are stored consecutively. \ 1098 */ \ 1099 { \ 1100 const FIXP_DBL* pSrc = pInput; \ 1101 FIXP_DBL *RESTRICT pDst = aDst; \ 1102 \ 1103 for(i=0; i<dim2; i++) \ 1104 { \ 1105 for(j=0; j<dim1; j++) \ 1106 { \ 1107 pDst[2*j] = pSrc[2*j*dim2]; \ 1108 pDst[2*j+1] = pSrc[2*j*dim2+1]; \ 1109 } \ 1110 \ 1111 fft_func1(pDst); \ 1112 pSrc += 2; \ 1113 pDst = pDst + 2*dim1; \ 1114 } \ 1115 } \ 1116 \ 1117 /* Perform the modulation of the output of the fft of length dim1 */ \ 1118 fft_apply_rot_vector(aDst, dim1, length, RotVectorReal, RotVectorImag); \ 1119 \ 1120 /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the \ 1121 output samples are at the address of pInput. The input vector for the fft of length dim2 is built \ 1122 of the interleaved samples in aDst, the output samples are stored consecutively at the address \ 1123 of pInput. \ 1124 */ \ 1125 { \ 1126 const FIXP_DBL* pSrc = aDst; \ 1127 FIXP_DBL *RESTRICT pDst = aDst2; \ 1128 FIXP_DBL *RESTRICT pDstOut = pInput; \ 1129 \ 1130 for(i=0; i<dim1; i++) \ 1131 { \ 1132 for(j=0; j<dim2; j++) \ 1133 { \ 1134 pDst[2*j] = pSrc[2*j*dim1]; \ 1135 pDst[2*j+1] = pSrc[2*j*dim1+1]; \ 1136 } \ 1137 \ 1138 fft_func2(pDst); \ 1139 \ 1140 for(j=0; j<dim2; j++) \ 1141 { \ 1142 pDstOut[2*j*dim1] = pDst[2*j]; \ 1143 pDstOut[2*j*dim1+1] = pDst[2*j+1]; \ 1144 } \ 1145 pSrc += 2; \ 1146 pDstOut += 2; \ 1147 } \ 1148 } \ 1149 \ 1150 C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); \ 1151 C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); \ 1152 } \ 1153 1154 #else /* FFT_TWO_STAGE_MACRO_ENABLE */ 1155 1156 /* select either switch case of function pointer. */ 1157 //#define FFT_TWO_STAGE_SWITCH_CASE 1158 1159 static inline void fftN2( 1160 FIXP_DBL *pInput, 1161 const int length, 1162 const int dim1, 1163 const int dim2, 1164 void (* const fft1)(FIXP_DBL *), 1165 void (* const fft2)(FIXP_DBL *), 1166 const FIXP_STB *RotVectorReal, 1167 const FIXP_STB *RotVectorImag 1168 ) 1169 { 1170 /* The real part of the input samples are at the addresses with even indices and the imaginary 1171 part of the input samples are at the addresses with odd indices. The output samples are stored 1172 at the address of pInput 1173 */ 1174 FIXP_DBL *pSrc, *pDst, *pDstOut; 1175 int i, j; 1176 1177 C_ALLOC_SCRATCH_START(aDst, FIXP_DBL, length*2); 1178 C_ALLOC_SCRATCH_START(aDst2, FIXP_DBL, dim2*2); 1179 1180 FDK_ASSERT(length == dim1*dim2); 1181 1182 /* Perform dim2 times the fft of length dim1. The input samples are at the address of pSrc and the 1183 output samples are at the address of pDst. The input vector for the fft of length dim1 is built 1184 of the interleaved samples in pSrc, the output samples are stored consecutively. 1185 */ 1186 pSrc = pInput; 1187 pDst = aDst; 1188 for(i=0; i<length/dim1; i++) 1189 { 1190 for(j=0; j<length/dim2; j++) 1191 { 1192 pDst[2*j] = pSrc[2*j*dim2]; 1193 pDst[2*j+1] = pSrc[2*j*dim2+1]; 1194 } 1195 1196 /* fft of size dim1 */ 1197 #ifndef FFT_TWO_STAGE_SWITCH_CASE 1198 fft1(pDst); 1199 #else 1200 switch (dim1) { 1201 case 3: fft3(pDst); break; 1202 case 4: fft_4(pDst); break; 1203 case 5: fft5(pDst); break; 1204 case 8: fft_8(pDst); break; 1205 case 15: fft15(pDst); break; 1206 case 16: fft_16(pDst); break; 1207 case 32: fft_32(pDst); break; 1208 /*case 64: fft_64(pDst); break;*/ 1209 case 128: fft_128(pDst); break; 1210 } 1211 #endif 1212 pSrc += 2; 1213 pDst = pDst + 2*length/dim2; 1214 } 1215 1216 /* Perform the modulation of the output of the fft of length dim1 */ 1217 pSrc=aDst; 1218 fft_apply_rot_vector(pSrc, length/dim2, length, RotVectorReal, RotVectorImag); 1219 1220 /* Perform dim1 times the fft of length dim2. The input samples are at the address of aDst and the 1221 output samples are at the address of pInput. The input vector for the fft of length dim2 is built 1222 of the interleaved samples in aDst, the output samples are stored consecutively at the address 1223 of pInput. 1224 */ 1225 pSrc = aDst; 1226 pDst = aDst2; 1227 pDstOut = pInput; 1228 for(i=0; i<length/dim2; i++) 1229 { 1230 for(j=0; j<length/dim1; j++) 1231 { 1232 pDst[2*j] = pSrc[2*j*dim1]; 1233 pDst[2*j+1] = pSrc[2*j*dim1+1]; 1234 } 1235 1236 #ifndef FFT_TWO_STAGE_SWITCH_CASE 1237 fft2(pDst); 1238 #else 1239 switch (dim2) { 1240 case 3: fft3(pDst); break; 1241 case 4: fft_4(pDst); break; 1242 case 5: fft5(pDst); break; 1243 case 8: fft_8(pDst); break; 1244 case 15: fft15(pDst); break; 1245 case 16: fft_16(pDst); break; 1246 case 32: fft_32(pDst); break; 1247 /*case 64: fft_64(pDst); break;*/ 1248 case 128: fft_128(pDst); break; 1249 } 1250 #endif 1251 1252 for(j=0; j<length/dim1; j++) 1253 { 1254 pDstOut[2*j*dim1] = pDst[2*j]; 1255 pDstOut[2*j*dim1+1] = pDst[2*j+1]; 1256 } 1257 pSrc += 2; 1258 pDstOut += 2; 1259 } 1260 1261 C_ALLOC_SCRATCH_END(aDst2, FIXP_DBL, dim2*2); 1262 C_ALLOC_SCRATCH_END(aDst, FIXP_DBL, length*2); 1263 } 1264 1265 #endif /* FFT_TWO_STAGE_MACRO_ENABLE */ 1266 1267 1268 1269 1270 1271 1272 1273 1274 1275 1276 1277 1278 #define SCALEFACTOR60 5 1279 /** 1280 The function performs the fft of length 60. It is splittet into fft's of length 4 and fft's of 1281 length 15. Between the fft's a modolation is calculated. 1282 */ 1283 static inline void fft60(FIXP_DBL *pInput, INT *pScalefactor) 1284 { 1285 fftN2( 1286 pInput, 60, 4, 15, 1287 fft_4, fft15, 1288 RotVectorReal60, RotVectorImag60 1289 ); 1290 *pScalefactor += SCALEFACTOR60; 1291 } 1292 1293 1294 1295 /* Fallback implementation in case of no better implementation available. */ 1296 1297 #define SCALEFACTOR240 7 1298 1299 /** 1300 The function performs the fft of length 240. It is splittet into fft's of length 16 and fft's of 1301 length 15. Between the fft's a modulation is calculated. 1302 */ 1303 static inline void fft240(FIXP_DBL *pInput, INT *pScalefactor) 1304 { 1305 fftN2( 1306 pInput, 240, 16, 15, 1307 fft_16, fft15, 1308 RotVectorReal240, RotVectorImag240 1309 ); 1310 *pScalefactor += SCALEFACTOR240; 1311 } 1312 1313 1314 #define SCALEFACTOR480 8 1315 #define N32 32 1316 #define TABLE_SIZE_16 (32/2) 1317 1318 /** 1319 The function performs the fft of length 480. It is splittet into fft's of length 32 and fft's of 1320 length 15. Between the fft's a modulation is calculated. 1321 */ 1322 static inline void fft480(FIXP_DBL *pInput, INT *pScalefactor) 1323 { 1324 fftN2( 1325 pInput, 480, 32, 15, 1326 fft_32, fft15, 1327 RotVectorReal480, RotVectorImag480 1328 ); 1329 *pScalefactor += SCALEFACTOR480; 1330 } 1331 1332 void fft(int length, FIXP_DBL *pInput, INT *pScalefactor) 1333 { 1334 if (length == 32) 1335 { 1336 fft_32(pInput); 1337 *pScalefactor += SCALEFACTOR32; 1338 } 1339 else 1340 { 1341 1342 switch (length) { 1343 case 16: 1344 fft_16(pInput); 1345 *pScalefactor += SCALEFACTOR16; 1346 break; 1347 case 8: 1348 fft_8(pInput); 1349 *pScalefactor += SCALEFACTOR8; 1350 break; 1351 case 3: 1352 fft3(pInput); 1353 break; 1354 case 4: 1355 fft_4(pInput); 1356 *pScalefactor += SCALEFACTOR4; 1357 break; 1358 case 5: 1359 fft5(pInput); 1360 break; 1361 case 15: 1362 fft15(pInput); 1363 *pScalefactor += 2; 1364 break; 1365 case 60: 1366 fft60(pInput, pScalefactor); 1367 break; 1368 case 64: 1369 dit_fft(pInput, 6, SineTable512, 512); 1370 *pScalefactor += SCALEFACTOR64; 1371 break; 1372 case 240: 1373 fft240(pInput, pScalefactor); 1374 break; 1375 case 256: 1376 dit_fft(pInput, 8, SineTable512, 512); 1377 *pScalefactor += SCALEFACTOR256; 1378 break; 1379 case 480: 1380 fft480(pInput, pScalefactor); 1381 break; 1382 case 512: 1383 dit_fft(pInput, 9, SineTable512, 512); 1384 *pScalefactor += SCALEFACTOR512; 1385 break; 1386 default: 1387 FDK_ASSERT(0); /* FFT length not supported! */ 1388 break; 1389 } 1390 } 1391 } 1392 1393 1394 void ifft(int length, FIXP_DBL *pInput, INT *scalefactor) 1395 { 1396 switch (length) { 1397 default: 1398 FDK_ASSERT(0); /* IFFT length not supported! */ 1399 break; 1400 } 1401 } 1402 1403 1404