1 /****************************************************************************** 2 * * 3 * Copyright (C) 2018 The Android Open Source Project 4 * 5 * Licensed under the Apache License, Version 2.0 (the "License"); 6 * you may not use this file except in compliance with the License. 7 * You may obtain a copy of the License at: 8 * 9 * http://www.apache.org/licenses/LICENSE-2.0 10 * 11 * Unless required by applicable law or agreed to in writing, software 12 * distributed under the License is distributed on an "AS IS" BASIS, 13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 14 * See the License for the specific language governing permissions and 15 * limitations under the License. 16 * 17 ***************************************************************************** 18 * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore 19 */ 20 #include <stdlib.h> 21 #include <stdio.h> 22 23 #include <ixheaacd_type_def.h> 24 #include "ixheaacd_interface.h" 25 #include "ixheaacd_constants.h" 26 #include <ixheaacd_basic_ops32.h> 27 #include "ixheaacd_function_selector.h" 28 29 extern const WORD32 ixheaacd_twiddle_table_fft_32x32[514]; 30 extern const WORD32 ixheaacd_twiddle_table_3pr[1155]; 31 extern const WORD32 ixheaacd_twiddle_table_3pi[1155]; 32 extern const WORD8 ixheaacd_mps_dig_rev[16]; 33 34 #define PLATFORM_INLINE __inline 35 36 #define DIG_REV(i, m, j) \ 37 do { \ 38 unsigned _ = (i); \ 39 _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2); \ 40 _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4); \ 41 _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8); \ 42 (j) = _ >> (m); \ 43 } while (0) 44 45 static PLATFORM_INLINE WORD32 ixheaacd_mult32(WORD32 a, WORD32 b) { 46 WORD32 result; 47 WORD64 temp_result; 48 49 temp_result = (WORD64)a * (WORD64)b; 50 result = (WORD32)(temp_result >> 31); 51 52 return (result); 53 } 54 55 static PLATFORM_INLINE WORD32 ixheaacd_mac32(WORD32 a, WORD32 b, WORD32 c) { 56 WORD32 result; 57 58 result = a + ixheaacd_mult32(b, c); 59 60 return (result); 61 } 62 63 static PLATFORM_INLINE WORD32 ixheaacd_mult32_shl(WORD32 a, WORD32 b) { 64 WORD32 result; 65 WORD64 temp_result; 66 67 temp_result = (WORD64)a * (WORD64)b; 68 result = (WORD32)(temp_result >> 32); 69 70 return (result << 1); 71 } 72 73 VOID ixheaacd_mps_complex_fft_64_dec(WORD32 *ptr_x, WORD32 *fin_re, 74 WORD32 *fin_im, WORD32 nlength) { 75 WORD32 i, j, k, n_stages; 76 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 77 WORD32 del, nodespacing, in_loop_cnt; 78 WORD32 y[128]; 79 WORD32 npoints = nlength; 80 WORD32 *ptr_y = y; 81 const WORD32 *ptr_w; 82 n_stages = 30 - ixheaacd_norm32(npoints); 83 84 n_stages = n_stages >> 1; 85 86 ptr_w = ixheaacd_twiddle_table_fft_32x32; 87 88 for (i = 0; i < npoints; i += 4) { 89 WORD32 *inp = ptr_x; 90 h2 = ixheaacd_mps_dig_rev[i >> 2]; 91 inp += (h2); 92 93 x0r = *inp; 94 x0i = *(inp + 1); 95 inp += (npoints >> 1); 96 97 x1r = *inp; 98 x1i = *(inp + 1); 99 inp += (npoints >> 1); 100 101 x2r = *inp; 102 x2i = *(inp + 1); 103 inp += (npoints >> 1); 104 105 x3r = *inp; 106 x3i = *(inp + 1); 107 108 x0r = x0r + x2r; 109 x0i = x0i + x2i; 110 x2r = x0r - (x2r << 1); 111 x2i = x0i - (x2i << 1); 112 x1r = x1r + x3r; 113 x1i = x1i + x3i; 114 x3r = x1r - (x3r << 1); 115 x3i = x1i - (x3i << 1); 116 117 x0r = x0r + x1r; 118 x0i = x0i + x1i; 119 x1r = x0r - (x1r << 1); 120 x1i = x0i - (x1i << 1); 121 x2r = x2r + x3i; 122 x2i = x2i - x3r; 123 x3i = x2r - (x3i << 1); 124 x3r = x2i + (x3r << 1); 125 126 *ptr_y++ = x0r; 127 *ptr_y++ = x0i; 128 *ptr_y++ = x2r; 129 *ptr_y++ = x2i; 130 *ptr_y++ = x1r; 131 *ptr_y++ = x1i; 132 *ptr_y++ = x3i; 133 *ptr_y++ = x3r; 134 } 135 ptr_y -= 2 * npoints; 136 del = 4; 137 nodespacing = 64; 138 in_loop_cnt = npoints >> 4; 139 for (i = n_stages - 1; i > 0; i--) { 140 const WORD32 *twiddles = ptr_w; 141 WORD32 *data = ptr_y; 142 WORD32 w1h, w2h, w3h, w1l, w2l, w3l; 143 WORD32 sec_loop_cnt; 144 145 for (k = in_loop_cnt; k != 0; k--) { 146 x0r = (*data); 147 x0i = (*(data + 1)); 148 data += (del << 1); 149 150 x1r = (*data); 151 x1i = (*(data + 1)); 152 data += (del << 1); 153 154 x2r = (*data); 155 x2i = (*(data + 1)); 156 data += (del << 1); 157 158 x3r = (*data); 159 x3i = (*(data + 1)); 160 data -= 3 * (del << 1); 161 162 x0r = x0r + x2r; 163 x0i = x0i + x2i; 164 x2r = x0r - (x2r << 1); 165 x2i = x0i - (x2i << 1); 166 x1r = x1r + x3r; 167 x1i = x1i + x3i; 168 x3r = x1r - (x3r << 1); 169 x3i = x1i - (x3i << 1); 170 171 x0r = x0r + x1r; 172 x0i = x0i + x1i; 173 x1r = x0r - (x1r << 1); 174 x1i = x0i - (x1i << 1); 175 x2r = x2r + x3i; 176 x2i = x2i - x3r; 177 x3i = x2r - (x3i << 1); 178 x3r = x2i + (x3r << 1); 179 180 *data = x0r; 181 *(data + 1) = x0i; 182 data += (del << 1); 183 184 *data = x2r; 185 *(data + 1) = x2i; 186 data += (del << 1); 187 188 *data = x1r; 189 *(data + 1) = x1i; 190 data += (del << 1); 191 192 *data = x3i; 193 *(data + 1) = x3r; 194 data += (del << 1); 195 } 196 data = ptr_y + 2; 197 198 sec_loop_cnt = (nodespacing * del); 199 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - 200 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - 201 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - 202 (sec_loop_cnt / 256); 203 j = nodespacing; 204 205 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { 206 w1h = *(twiddles + 2 * j); 207 w1l = *(twiddles + 2 * j + 1); 208 w2h = *(twiddles + 2 * (j << 1)); 209 w2l = *(twiddles + 2 * (j << 1) + 1); 210 w3h = *(twiddles + 2 * j + 2 * (j << 1)); 211 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1); 212 213 for (k = in_loop_cnt; k != 0; k--) { 214 WORD32 tmp; 215 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 216 217 data += (del << 1); 218 219 x1r = *data; 220 x1i = *(data + 1); 221 data += (del << 1); 222 223 x2r = *data; 224 x2i = *(data + 1); 225 data += (del << 1); 226 227 x3r = *data; 228 x3i = *(data + 1); 229 data -= 3 * (del << 1); 230 231 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 232 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 233 x1r = tmp; 234 235 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h)); 236 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l); 237 x2r = tmp; 238 239 tmp = (ixheaacd_mult32(x3r, w3l) - ixheaacd_mult32(x3i, w3h)); 240 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l); 241 x3r = tmp; 242 243 x0r = (*data); 244 x0i = (*(data + 1)); 245 246 x0r = x0r + (x2r); 247 x0i = x0i + (x2i); 248 x2r = x0r - (x2r << 1); 249 x2i = x0i - (x2i << 1); 250 x1r = x1r + x3r; 251 x1i = x1i + x3i; 252 x3r = x1r - (x3r << 1); 253 x3i = x1i - (x3i << 1); 254 255 x0r = x0r + (x1r); 256 x0i = x0i + (x1i); 257 x1r = x0r - (x1r << 1); 258 x1i = x0i - (x1i << 1); 259 x2r = x2r + (x3i); 260 x2i = x2i - (x3r); 261 x3i = x2r - (x3i << 1); 262 x3r = x2i + (x3r << 1); 263 264 *data = x0r; 265 *(data + 1) = x0i; 266 data += (del << 1); 267 268 *data = x2r; 269 *(data + 1) = x2i; 270 data += (del << 1); 271 272 *data = x1r; 273 *(data + 1) = x1i; 274 data += (del << 1); 275 276 *data = x3i; 277 *(data + 1) = x3r; 278 data += (del << 1); 279 } 280 data -= 2 * npoints; 281 data += 2; 282 } 283 for (; j <= (nodespacing * del) >> 1; j += nodespacing) { 284 w1h = *(twiddles + 2 * j); 285 w2h = *(twiddles + 2 * (j << 1)); 286 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 287 w1l = *(twiddles + 2 * j + 1); 288 w2l = *(twiddles + 2 * (j << 1) + 1); 289 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 290 291 for (k = in_loop_cnt; k != 0; k--) { 292 WORD32 tmp; 293 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 294 295 data += (del << 1); 296 297 x1r = *data; 298 x1i = *(data + 1); 299 data += (del << 1); 300 301 x2r = *data; 302 x2i = *(data + 1); 303 data += (del << 1); 304 305 x3r = *data; 306 x3i = *(data + 1); 307 data -= 3 * (del << 1); 308 309 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 310 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 311 x1r = tmp; 312 313 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h)); 314 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l); 315 x2r = tmp; 316 317 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l)); 318 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h); 319 x3r = tmp; 320 321 x0r = (*data); 322 x0i = (*(data + 1)); 323 324 x0r = x0r + (x2r); 325 x0i = x0i + (x2i); 326 x2r = x0r - (x2r << 1); 327 x2i = x0i - (x2i << 1); 328 x1r = x1r + x3r; 329 x1i = x1i + x3i; 330 x3r = x1r - (x3r << 1); 331 x3i = x1i - (x3i << 1); 332 333 x0r = x0r + (x1r); 334 x0i = x0i + (x1i); 335 x1r = x0r - (x1r << 1); 336 x1i = x0i - (x1i << 1); 337 x2r = x2r + (x3i); 338 x2i = x2i - (x3r); 339 x3i = x2r - (x3i << 1); 340 x3r = x2i + (x3r << 1); 341 342 *data = x0r; 343 *(data + 1) = x0i; 344 data += (del << 1); 345 346 *data = x2r; 347 *(data + 1) = x2i; 348 data += (del << 1); 349 350 *data = x1r; 351 *(data + 1) = x1i; 352 data += (del << 1); 353 354 *data = x3i; 355 *(data + 1) = x3r; 356 data += (del << 1); 357 } 358 data -= 2 * npoints; 359 data += 2; 360 } 361 for (; j <= sec_loop_cnt * 2; j += nodespacing) { 362 w1h = *(twiddles + 2 * j); 363 w2h = *(twiddles + 2 * (j << 1) - 512); 364 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 365 w1l = *(twiddles + 2 * j + 1); 366 w2l = *(twiddles + 2 * (j << 1) - 511); 367 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 368 369 for (k = in_loop_cnt; k != 0; k--) { 370 WORD32 tmp; 371 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 372 373 data += (del << 1); 374 375 x1r = *data; 376 x1i = *(data + 1); 377 data += (del << 1); 378 379 x2r = *data; 380 x2i = *(data + 1); 381 data += (del << 1); 382 383 x3r = *data; 384 x3i = *(data + 1); 385 data -= 3 * (del << 1); 386 387 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 388 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 389 x1r = tmp; 390 391 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l)); 392 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h); 393 x2r = tmp; 394 395 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l)); 396 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h); 397 x3r = tmp; 398 399 x0r = (*data); 400 x0i = (*(data + 1)); 401 402 x0r = x0r + (x2r); 403 x0i = x0i + (x2i); 404 x2r = x0r - (x2r << 1); 405 x2i = x0i - (x2i << 1); 406 x1r = x1r + x3r; 407 x1i = x1i + x3i; 408 x3r = x1r - (x3r << 1); 409 x3i = x1i - (x3i << 1); 410 411 x0r = x0r + (x1r); 412 x0i = x0i + (x1i); 413 x1r = x0r - (x1r << 1); 414 x1i = x0i - (x1i << 1); 415 x2r = x2r + (x3i); 416 x2i = x2i - (x3r); 417 x3i = x2r - (x3i << 1); 418 x3r = x2i + (x3r << 1); 419 420 *data = x0r; 421 *(data + 1) = x0i; 422 data += (del << 1); 423 424 *data = x2r; 425 *(data + 1) = x2i; 426 data += (del << 1); 427 428 *data = x1r; 429 *(data + 1) = x1i; 430 data += (del << 1); 431 432 *data = x3i; 433 *(data + 1) = x3r; 434 data += (del << 1); 435 } 436 data -= 2 * npoints; 437 data += 2; 438 } 439 for (; j < nodespacing * del; j += nodespacing) { 440 w1h = *(twiddles + 2 * j); 441 w2h = *(twiddles + 2 * (j << 1) - 512); 442 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024); 443 w1l = *(twiddles + 2 * j + 1); 444 w2l = *(twiddles + 2 * (j << 1) - 511); 445 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023); 446 447 for (k = in_loop_cnt; k != 0; k--) { 448 WORD32 tmp; 449 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 450 451 data += (del << 1); 452 453 x1r = *data; 454 x1i = *(data + 1); 455 data += (del << 1); 456 457 x2r = *data; 458 x2i = *(data + 1); 459 data += (del << 1); 460 461 x3r = *data; 462 x3i = *(data + 1); 463 data -= 3 * (del << 1); 464 465 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 466 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 467 x1r = tmp; 468 469 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l)); 470 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h); 471 x2r = tmp; 472 473 tmp = (-ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h)); 474 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l); 475 x3r = tmp; 476 477 x0r = (*data); 478 x0i = (*(data + 1)); 479 480 x0r = x0r + (x2r); 481 x0i = x0i + (x2i); 482 x2r = x0r - (x2r << 1); 483 x2i = x0i - (x2i << 1); 484 x1r = x1r + x3r; 485 x1i = x1i - x3i; 486 x3r = x1r - (x3r << 1); 487 x3i = x1i + (x3i << 1); 488 489 x0r = x0r + (x1r); 490 x0i = x0i + (x1i); 491 x1r = x0r - (x1r << 1); 492 x1i = x0i - (x1i << 1); 493 x2r = x2r + (x3i); 494 x2i = x2i - (x3r); 495 x3i = x2r - (x3i << 1); 496 x3r = x2i + (x3r << 1); 497 498 *data = x0r; 499 *(data + 1) = x0i; 500 data += (del << 1); 501 502 *data = x2r; 503 *(data + 1) = x2i; 504 data += (del << 1); 505 506 *data = x1r; 507 *(data + 1) = x1i; 508 data += (del << 1); 509 510 *data = x3i; 511 *(data + 1) = x3r; 512 data += (del << 1); 513 } 514 data -= 2 * npoints; 515 data += 2; 516 } 517 nodespacing >>= 2; 518 del <<= 2; 519 in_loop_cnt >>= 2; 520 } 521 522 for (i = 0; i < 2 * nlength; i += 2) { 523 fin_re[i] = y[i]; 524 fin_im[i] = y[i + 1]; 525 } 526 527 return; 528 } 529 530 VOID ixheaacd_complex_fft_p2_dec(WORD32 *xr, WORD32 *xi, WORD32 nlength, 531 WORD32 fft_mode, WORD32 *preshift) { 532 WORD32 i, j, k, n_stages; 533 WORD32 h2, x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 534 WORD32 del, nodespacing, in_loop_cnt; 535 WORD32 not_power_4; 536 WORD32 npts, shift; 537 WORD32 dig_rev_shift; 538 WORD32 ptr_x[1024]; 539 WORD32 y[1024]; 540 WORD32 npoints = nlength; 541 WORD32 n = 0; 542 WORD32 *ptr_y = y; 543 const WORD32 *ptr_w; 544 dig_rev_shift = ixheaacd_norm32(npoints) + 1 - 16; 545 n_stages = 30 - ixheaacd_norm32(npoints); 546 not_power_4 = n_stages & 1; 547 548 n_stages = n_stages >> 1; 549 550 npts = npoints; 551 while (npts >> 1) { 552 n++; 553 npts = npts >> 1; 554 } 555 556 if (n % 2 == 0) 557 shift = ((n + 4)) / 2; 558 else 559 shift = ((n + 3) / 2); 560 561 for (i = 0; i < nlength; i++) { 562 ptr_x[2 * i] = (xr[i] / (1 << (shift))); 563 ptr_x[2 * i + 1] = (xi[i] / (1 << (shift))); 564 } 565 566 if (fft_mode == -1) { 567 ptr_w = ixheaacd_twiddle_table_fft_32x32; 568 569 for (i = 0; i < npoints; i += 4) { 570 WORD32 *inp = ptr_x; 571 572 DIG_REV(i, dig_rev_shift, h2); 573 if (not_power_4) { 574 h2 += 1; 575 h2 &= ~1; 576 } 577 inp += (h2); 578 579 x0r = *inp; 580 x0i = *(inp + 1); 581 inp += (npoints >> 1); 582 583 x1r = *inp; 584 x1i = *(inp + 1); 585 inp += (npoints >> 1); 586 587 x2r = *inp; 588 x2i = *(inp + 1); 589 inp += (npoints >> 1); 590 591 x3r = *inp; 592 x3i = *(inp + 1); 593 594 x0r = x0r + x2r; 595 x0i = x0i + x2i; 596 x2r = x0r - (x2r << 1); 597 x2i = x0i - (x2i << 1); 598 x1r = x1r + x3r; 599 x1i = x1i + x3i; 600 x3r = x1r - (x3r << 1); 601 x3i = x1i - (x3i << 1); 602 603 x0r = x0r + x1r; 604 x0i = x0i + x1i; 605 x1r = x0r - (x1r << 1); 606 x1i = x0i - (x1i << 1); 607 x2r = x2r + x3i; 608 x2i = x2i - x3r; 609 x3i = x2r - (x3i << 1); 610 x3r = x2i + (x3r << 1); 611 612 *ptr_y++ = x0r; 613 *ptr_y++ = x0i; 614 *ptr_y++ = x2r; 615 *ptr_y++ = x2i; 616 *ptr_y++ = x1r; 617 *ptr_y++ = x1i; 618 *ptr_y++ = x3i; 619 *ptr_y++ = x3r; 620 } 621 ptr_y -= 2 * npoints; 622 del = 4; 623 nodespacing = 64; 624 in_loop_cnt = npoints >> 4; 625 for (i = n_stages - 1; i > 0; i--) { 626 const WORD32 *twiddles = ptr_w; 627 WORD32 *data = ptr_y; 628 WORD32 w1h, w2h, w3h, w1l, w2l, w3l; 629 WORD32 sec_loop_cnt; 630 631 for (k = in_loop_cnt; k != 0; k--) { 632 x0r = (*data); 633 x0i = (*(data + 1)); 634 data += (del << 1); 635 636 x1r = (*data); 637 x1i = (*(data + 1)); 638 data += (del << 1); 639 640 x2r = (*data); 641 x2i = (*(data + 1)); 642 data += (del << 1); 643 644 x3r = (*data); 645 x3i = (*(data + 1)); 646 data -= 3 * (del << 1); 647 648 x0r = x0r + x2r; 649 x0i = x0i + x2i; 650 x2r = x0r - (x2r << 1); 651 x2i = x0i - (x2i << 1); 652 x1r = x1r + x3r; 653 x1i = x1i + x3i; 654 x3r = x1r - (x3r << 1); 655 x3i = x1i - (x3i << 1); 656 657 x0r = x0r + x1r; 658 x0i = x0i + x1i; 659 x1r = x0r - (x1r << 1); 660 x1i = x0i - (x1i << 1); 661 x2r = x2r + x3i; 662 x2i = x2i - x3r; 663 x3i = x2r - (x3i << 1); 664 x3r = x2i + (x3r << 1); 665 666 *data = x0r; 667 *(data + 1) = x0i; 668 data += (del << 1); 669 670 *data = x2r; 671 *(data + 1) = x2i; 672 data += (del << 1); 673 674 *data = x1r; 675 *(data + 1) = x1i; 676 data += (del << 1); 677 678 *data = x3i; 679 *(data + 1) = x3r; 680 data += (del << 1); 681 } 682 data = ptr_y + 2; 683 684 sec_loop_cnt = (nodespacing * del); 685 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - 686 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - 687 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - 688 (sec_loop_cnt / 256); 689 j = nodespacing; 690 691 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { 692 w1h = *(twiddles + 2 * j); 693 w1l = *(twiddles + 2 * j + 1); 694 w2h = *(twiddles + 2 * (j << 1)); 695 w2l = *(twiddles + 2 * (j << 1) + 1); 696 w3h = *(twiddles + 2 * j + 2 * (j << 1)); 697 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1); 698 699 for (k = in_loop_cnt; k != 0; k--) { 700 WORD32 tmp; 701 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 702 703 data += (del << 1); 704 705 x1r = *data; 706 x1i = *(data + 1); 707 data += (del << 1); 708 709 x2r = *data; 710 x2i = *(data + 1); 711 data += (del << 1); 712 713 x3r = *data; 714 x3i = *(data + 1); 715 data -= 3 * (del << 1); 716 717 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 718 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 719 x1r = tmp; 720 721 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h)); 722 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l); 723 x2r = tmp; 724 725 tmp = (ixheaacd_mult32(x3r, w3l) - ixheaacd_mult32(x3i, w3h)); 726 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l); 727 x3r = tmp; 728 729 x0r = (*data); 730 x0i = (*(data + 1)); 731 732 x0r = x0r + (x2r); 733 x0i = x0i + (x2i); 734 x2r = x0r - (x2r << 1); 735 x2i = x0i - (x2i << 1); 736 x1r = x1r + x3r; 737 x1i = x1i + x3i; 738 x3r = x1r - (x3r << 1); 739 x3i = x1i - (x3i << 1); 740 741 x0r = x0r + (x1r); 742 x0i = x0i + (x1i); 743 x1r = x0r - (x1r << 1); 744 x1i = x0i - (x1i << 1); 745 x2r = x2r + (x3i); 746 x2i = x2i - (x3r); 747 x3i = x2r - (x3i << 1); 748 x3r = x2i + (x3r << 1); 749 750 *data = x0r; 751 *(data + 1) = x0i; 752 data += (del << 1); 753 754 *data = x2r; 755 *(data + 1) = x2i; 756 data += (del << 1); 757 758 *data = x1r; 759 *(data + 1) = x1i; 760 data += (del << 1); 761 762 *data = x3i; 763 *(data + 1) = x3r; 764 data += (del << 1); 765 } 766 data -= 2 * npoints; 767 data += 2; 768 } 769 for (; j <= (nodespacing * del) >> 1; j += nodespacing) { 770 w1h = *(twiddles + 2 * j); 771 w2h = *(twiddles + 2 * (j << 1)); 772 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 773 w1l = *(twiddles + 2 * j + 1); 774 w2l = *(twiddles + 2 * (j << 1) + 1); 775 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 776 777 for (k = in_loop_cnt; k != 0; k--) { 778 WORD32 tmp; 779 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 780 data += (del << 1); 781 782 x1r = *data; 783 x1i = *(data + 1); 784 data += (del << 1); 785 786 x2r = *data; 787 x2i = *(data + 1); 788 data += (del << 1); 789 790 x3r = *data; 791 x3i = *(data + 1); 792 data -= 3 * (del << 1); 793 794 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 795 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 796 x1r = tmp; 797 798 tmp = (ixheaacd_mult32(x2r, w2l) - ixheaacd_mult32(x2i, w2h)); 799 x2i = ixheaacd_mac32(ixheaacd_mult32(x2r, w2h), x2i, w2l); 800 x2r = tmp; 801 802 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l)); 803 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h); 804 x3r = tmp; 805 806 x0r = (*data); 807 x0i = (*(data + 1)); 808 809 x0r = x0r + (x2r); 810 x0i = x0i + (x2i); 811 x2r = x0r - (x2r << 1); 812 x2i = x0i - (x2i << 1); 813 x1r = x1r + x3r; 814 x1i = x1i + x3i; 815 x3r = x1r - (x3r << 1); 816 x3i = x1i - (x3i << 1); 817 818 x0r = x0r + (x1r); 819 x0i = x0i + (x1i); 820 x1r = x0r - (x1r << 1); 821 x1i = x0i - (x1i << 1); 822 x2r = x2r + (x3i); 823 x2i = x2i - (x3r); 824 x3i = x2r - (x3i << 1); 825 x3r = x2i + (x3r << 1); 826 827 *data = x0r; 828 *(data + 1) = x0i; 829 data += (del << 1); 830 831 *data = x2r; 832 *(data + 1) = x2i; 833 data += (del << 1); 834 835 *data = x1r; 836 *(data + 1) = x1i; 837 data += (del << 1); 838 839 *data = x3i; 840 *(data + 1) = x3r; 841 data += (del << 1); 842 } 843 data -= 2 * npoints; 844 data += 2; 845 } 846 for (; j <= sec_loop_cnt * 2; j += nodespacing) { 847 w1h = *(twiddles + 2 * j); 848 w2h = *(twiddles + 2 * (j << 1) - 512); 849 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 850 w1l = *(twiddles + 2 * j + 1); 851 w2l = *(twiddles + 2 * (j << 1) - 511); 852 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 853 854 for (k = in_loop_cnt; k != 0; k--) { 855 WORD32 tmp; 856 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 857 858 data += (del << 1); 859 860 x1r = *data; 861 x1i = *(data + 1); 862 data += (del << 1); 863 864 x2r = *data; 865 x2i = *(data + 1); 866 data += (del << 1); 867 868 x3r = *data; 869 x3i = *(data + 1); 870 data -= 3 * (del << 1); 871 872 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 873 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 874 x1r = tmp; 875 876 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l)); 877 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h); 878 x2r = tmp; 879 880 tmp = (ixheaacd_mult32(x3r, w3h) + ixheaacd_mult32(x3i, w3l)); 881 x3i = -ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h); 882 x3r = tmp; 883 884 x0r = (*data); 885 x0i = (*(data + 1)); 886 887 x0r = x0r + (x2r); 888 x0i = x0i + (x2i); 889 x2r = x0r - (x2r << 1); 890 x2i = x0i - (x2i << 1); 891 x1r = x1r + x3r; 892 x1i = x1i + x3i; 893 x3r = x1r - (x3r << 1); 894 x3i = x1i - (x3i << 1); 895 896 x0r = x0r + (x1r); 897 x0i = x0i + (x1i); 898 x1r = x0r - (x1r << 1); 899 x1i = x0i - (x1i << 1); 900 x2r = x2r + (x3i); 901 x2i = x2i - (x3r); 902 x3i = x2r - (x3i << 1); 903 x3r = x2i + (x3r << 1); 904 905 *data = x0r; 906 *(data + 1) = x0i; 907 data += (del << 1); 908 909 *data = x2r; 910 *(data + 1) = x2i; 911 data += (del << 1); 912 913 *data = x1r; 914 *(data + 1) = x1i; 915 data += (del << 1); 916 917 *data = x3i; 918 *(data + 1) = x3r; 919 data += (del << 1); 920 } 921 data -= 2 * npoints; 922 data += 2; 923 } 924 for (; j < nodespacing * del; j += nodespacing) { 925 w1h = *(twiddles + 2 * j); 926 w2h = *(twiddles + 2 * (j << 1) - 512); 927 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024); 928 w1l = *(twiddles + 2 * j + 1); 929 w2l = *(twiddles + 2 * (j << 1) - 511); 930 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023); 931 932 for (k = in_loop_cnt; k != 0; k--) { 933 WORD32 tmp; 934 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 935 936 data += (del << 1); 937 938 x1r = *data; 939 x1i = *(data + 1); 940 data += (del << 1); 941 942 x2r = *data; 943 x2i = *(data + 1); 944 data += (del << 1); 945 946 x3r = *data; 947 x3i = *(data + 1); 948 data -= 3 * (del << 1); 949 950 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 951 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 952 x1r = tmp; 953 954 tmp = (ixheaacd_mult32(x2r, w2h) + ixheaacd_mult32(x2i, w2l)); 955 x2i = -ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h); 956 x2r = tmp; 957 958 tmp = (-ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h)); 959 x3i = ixheaacd_mac32(ixheaacd_mult32(x3r, w3h), x3i, w3l); 960 x3r = tmp; 961 962 x0r = (*data); 963 x0i = (*(data + 1)); 964 965 x0r = x0r + (x2r); 966 x0i = x0i + (x2i); 967 x2r = x0r - (x2r << 1); 968 x2i = x0i - (x2i << 1); 969 x1r = x1r + x3r; 970 x1i = x1i - x3i; 971 x3r = x1r - (x3r << 1); 972 x3i = x1i + (x3i << 1); 973 974 x0r = x0r + (x1r); 975 x0i = x0i + (x1i); 976 x1r = x0r - (x1r << 1); 977 x1i = x0i - (x1i << 1); 978 x2r = x2r + (x3i); 979 x2i = x2i - (x3r); 980 x3i = x2r - (x3i << 1); 981 x3r = x2i + (x3r << 1); 982 983 *data = x0r; 984 *(data + 1) = x0i; 985 data += (del << 1); 986 987 *data = x2r; 988 *(data + 1) = x2i; 989 data += (del << 1); 990 991 *data = x1r; 992 *(data + 1) = x1i; 993 data += (del << 1); 994 995 *data = x3i; 996 *(data + 1) = x3r; 997 data += (del << 1); 998 } 999 data -= 2 * npoints; 1000 data += 2; 1001 } 1002 nodespacing >>= 2; 1003 del <<= 2; 1004 in_loop_cnt >>= 2; 1005 } 1006 if (not_power_4) { 1007 const WORD32 *twiddles = ptr_w; 1008 nodespacing <<= 1; 1009 shift += 1; 1010 1011 for (j = del / 2; j != 0; j--) { 1012 WORD32 w1h = *twiddles; 1013 WORD32 w1l = *(twiddles + 1); 1014 WORD32 tmp; 1015 twiddles += nodespacing * 2; 1016 1017 x0r = *ptr_y; 1018 x0i = *(ptr_y + 1); 1019 ptr_y += (del << 1); 1020 1021 x1r = *ptr_y; 1022 x1i = *(ptr_y + 1); 1023 1024 tmp = (ixheaacd_mult32(x1r, w1l) - ixheaacd_mult32(x1i, w1h)); 1025 x1i = ixheaacd_mac32(ixheaacd_mult32(x1r, w1h), x1i, w1l); 1026 x1r = tmp; 1027 1028 *ptr_y = (x0r) / 2 - (x1r) / 2; 1029 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1030 ptr_y -= (del << 1); 1031 1032 *ptr_y = (x0r) / 2 + (x1r) / 2; 1033 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1034 ptr_y += 2; 1035 } 1036 twiddles = ptr_w; 1037 for (j = del / 2; j != 0; j--) { 1038 WORD32 w1h = *twiddles; 1039 WORD32 w1l = *(twiddles + 1); 1040 WORD32 tmp; 1041 twiddles += nodespacing * 2; 1042 1043 x0r = *ptr_y; 1044 x0i = *(ptr_y + 1); 1045 ptr_y += (del << 1); 1046 1047 x1r = *ptr_y; 1048 x1i = *(ptr_y + 1); 1049 1050 tmp = (ixheaacd_mult32(x1r, w1h) + ixheaacd_mult32(x1i, w1l)); 1051 x1i = -ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h); 1052 x1r = tmp; 1053 1054 *ptr_y = (x0r) / 2 - (x1r) / 2; 1055 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1056 ptr_y -= (del << 1); 1057 1058 *ptr_y = (x0r) / 2 + (x1r) / 2; 1059 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1060 ptr_y += 2; 1061 } 1062 } 1063 1064 } 1065 1066 else { 1067 ptr_w = ixheaacd_twiddle_table_fft_32x32; 1068 1069 for (i = 0; i < npoints; i += 4) { 1070 WORD32 *inp = ptr_x; 1071 1072 DIG_REV(i, dig_rev_shift, h2); 1073 if (not_power_4) { 1074 h2 += 1; 1075 h2 &= ~1; 1076 } 1077 inp += (h2); 1078 1079 x0r = *inp; 1080 x0i = *(inp + 1); 1081 inp += (npoints >> 1); 1082 1083 x1r = *inp; 1084 x1i = *(inp + 1); 1085 inp += (npoints >> 1); 1086 1087 x2r = *inp; 1088 x2i = *(inp + 1); 1089 inp += (npoints >> 1); 1090 1091 x3r = *inp; 1092 x3i = *(inp + 1); 1093 1094 x0r = x0r + x2r; 1095 x0i = x0i + x2i; 1096 x2r = x0r - (x2r << 1); 1097 x2i = x0i - (x2i << 1); 1098 x1r = x1r + x3r; 1099 x1i = x1i + x3i; 1100 x3r = x1r - (x3r << 1); 1101 x3i = x1i - (x3i << 1); 1102 1103 x0r = x0r + x1r; 1104 x0i = x0i + x1i; 1105 x1r = x0r - (x1r << 1); 1106 x1i = x0i - (x1i << 1); 1107 x2r = x2r - x3i; 1108 x2i = x2i + x3r; 1109 x3i = x2r + (x3i << 1); 1110 x3r = x2i - (x3r << 1); 1111 1112 *ptr_y++ = x0r; 1113 *ptr_y++ = x0i; 1114 *ptr_y++ = x2r; 1115 *ptr_y++ = x2i; 1116 *ptr_y++ = x1r; 1117 *ptr_y++ = x1i; 1118 *ptr_y++ = x3i; 1119 *ptr_y++ = x3r; 1120 } 1121 ptr_y -= 2 * npoints; 1122 del = 4; 1123 nodespacing = 64; 1124 in_loop_cnt = npoints >> 4; 1125 for (i = n_stages - 1; i > 0; i--) { 1126 const WORD32 *twiddles = ptr_w; 1127 WORD32 *data = ptr_y; 1128 WORD32 w1h, w2h, w3h, w1l, w2l, w3l; 1129 WORD32 sec_loop_cnt; 1130 1131 for (k = in_loop_cnt; k != 0; k--) { 1132 x0r = (*data); 1133 x0i = (*(data + 1)); 1134 data += (del << 1); 1135 1136 x1r = (*data); 1137 x1i = (*(data + 1)); 1138 data += (del << 1); 1139 1140 x2r = (*data); 1141 x2i = (*(data + 1)); 1142 data += (del << 1); 1143 1144 x3r = (*data); 1145 x3i = (*(data + 1)); 1146 data -= 3 * (del << 1); 1147 1148 x0r = x0r + x2r; 1149 x0i = x0i + x2i; 1150 x2r = x0r - (x2r << 1); 1151 x2i = x0i - (x2i << 1); 1152 x1r = x1r + x3r; 1153 x1i = x1i + x3i; 1154 x3r = x1r - (x3r << 1); 1155 x3i = x1i - (x3i << 1); 1156 1157 x0r = ixheaacd_add32_sat(x0r, x1r); 1158 x0i = ixheaacd_add32_sat(x0i, x1i); 1159 x1r = ixheaacd_sub32_sat(x0r, (x1r << 1)); 1160 x1i = ixheaacd_sub32_sat(x0i, (x1i << 1)); 1161 x2r = ixheaacd_sub32_sat(x2r, x3i); 1162 x2i = ixheaacd_add32_sat(x2i, x3r); 1163 x3i = ixheaacd_add32_sat(x2r, (x3i << 1)); 1164 x3r = ixheaacd_sub32_sat(x2i, (x3r << 1)); 1165 1166 *data = x0r; 1167 *(data + 1) = x0i; 1168 data += (del << 1); 1169 1170 *data = x2r; 1171 *(data + 1) = x2i; 1172 data += (del << 1); 1173 1174 *data = x1r; 1175 *(data + 1) = x1i; 1176 data += (del << 1); 1177 1178 *data = x3i; 1179 *(data + 1) = x3r; 1180 data += (del << 1); 1181 } 1182 data = ptr_y + 2; 1183 1184 sec_loop_cnt = (nodespacing * del); 1185 sec_loop_cnt = (sec_loop_cnt / 4) + (sec_loop_cnt / 8) - 1186 (sec_loop_cnt / 16) + (sec_loop_cnt / 32) - 1187 (sec_loop_cnt / 64) + (sec_loop_cnt / 128) - 1188 (sec_loop_cnt / 256); 1189 j = nodespacing; 1190 1191 for (j = nodespacing; j <= sec_loop_cnt; j += nodespacing) { 1192 w1h = *(twiddles + 2 * j); 1193 w2h = *(twiddles + 2 * (j << 1)); 1194 w3h = *(twiddles + 2 * j + 2 * (j << 1)); 1195 w1l = *(twiddles + 2 * j + 1); 1196 w2l = *(twiddles + 2 * (j << 1) + 1); 1197 w3l = *(twiddles + 2 * j + 2 * (j << 1) + 1); 1198 1199 for (k = in_loop_cnt; k != 0; k--) { 1200 WORD32 tmp; 1201 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1202 1203 data += (del << 1); 1204 1205 x1r = *data; 1206 x1i = *(data + 1); 1207 data += (del << 1); 1208 1209 x2r = *data; 1210 x2i = *(data + 1); 1211 data += (del << 1); 1212 1213 x3r = *data; 1214 x3i = *(data + 1); 1215 data -= 3 * (del << 1); 1216 1217 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h)); 1218 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l); 1219 x1r = tmp; 1220 1221 tmp = (ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h)); 1222 x2i = ixheaacd_mac32(-ixheaacd_mult32(x2r, w2h), x2i, w2l); 1223 x2r = tmp; 1224 1225 tmp = (ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h)); 1226 x3i = ixheaacd_mac32(-ixheaacd_mult32(x3r, w3h), x3i, w3l); 1227 x3r = tmp; 1228 1229 x0r = (*data); 1230 x0i = (*(data + 1)); 1231 1232 x0r = x0r + (x2r); 1233 x0i = x0i + (x2i); 1234 x2r = x0r - (x2r << 1); 1235 x2i = x0i - (x2i << 1); 1236 x1r = x1r + x3r; 1237 x1i = x1i + x3i; 1238 x3r = x1r - (x3r << 1); 1239 x3i = x1i - (x3i << 1); 1240 1241 x0r = x0r + (x1r); 1242 x0i = x0i + (x1i); 1243 x1r = x0r - (x1r << 1); 1244 x1i = x0i - (x1i << 1); 1245 x2r = x2r - (x3i); 1246 x2i = x2i + (x3r); 1247 x3i = x2r + (x3i << 1); 1248 x3r = x2i - (x3r << 1); 1249 1250 *data = x0r; 1251 *(data + 1) = x0i; 1252 data += (del << 1); 1253 1254 *data = x2r; 1255 *(data + 1) = x2i; 1256 data += (del << 1); 1257 1258 *data = x1r; 1259 *(data + 1) = x1i; 1260 data += (del << 1); 1261 1262 *data = x3i; 1263 *(data + 1) = x3r; 1264 data += (del << 1); 1265 } 1266 data -= 2 * npoints; 1267 data += 2; 1268 } 1269 for (; j <= (nodespacing * del) >> 1; j += nodespacing) { 1270 w1h = *(twiddles + 2 * j); 1271 w2h = *(twiddles + 2 * (j << 1)); 1272 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 1273 w1l = *(twiddles + 2 * j + 1); 1274 w2l = *(twiddles + 2 * (j << 1) + 1); 1275 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 1276 1277 for (k = in_loop_cnt; k != 0; k--) { 1278 WORD32 tmp; 1279 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1280 1281 data += (del << 1); 1282 1283 x1r = *data; 1284 x1i = *(data + 1); 1285 data += (del << 1); 1286 1287 x2r = *data; 1288 x2i = *(data + 1); 1289 data += (del << 1); 1290 1291 x3r = *data; 1292 x3i = *(data + 1); 1293 data -= 3 * (del << 1); 1294 1295 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h)); 1296 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l); 1297 x1r = tmp; 1298 1299 tmp = (ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h)); 1300 x2i = ixheaacd_mac32(-ixheaacd_mult32(x2r, w2h), x2i, w2l); 1301 x2r = tmp; 1302 1303 tmp = (ixheaacd_mult32(x3r, w3h) - ixheaacd_mult32(x3i, w3l)); 1304 x3i = ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h); 1305 x3r = tmp; 1306 1307 x0r = (*data); 1308 x0i = (*(data + 1)); 1309 1310 x0r = x0r + (x2r); 1311 x0i = x0i + (x2i); 1312 x2r = x0r - (x2r << 1); 1313 x2i = x0i - (x2i << 1); 1314 x1r = x1r + x3r; 1315 x1i = x1i + x3i; 1316 x3r = x1r - (x3r << 1); 1317 x3i = x1i - (x3i << 1); 1318 1319 x0r = x0r + (x1r); 1320 x0i = x0i + (x1i); 1321 x1r = x0r - (x1r << 1); 1322 x1i = x0i - (x1i << 1); 1323 x2r = x2r - (x3i); 1324 x2i = x2i + (x3r); 1325 x3i = x2r + (x3i << 1); 1326 x3r = x2i - (x3r << 1); 1327 1328 *data = x0r; 1329 *(data + 1) = x0i; 1330 data += (del << 1); 1331 1332 *data = x2r; 1333 *(data + 1) = x2i; 1334 data += (del << 1); 1335 1336 *data = x1r; 1337 *(data + 1) = x1i; 1338 data += (del << 1); 1339 1340 *data = x3i; 1341 *(data + 1) = x3r; 1342 data += (del << 1); 1343 } 1344 data -= 2 * npoints; 1345 data += 2; 1346 } 1347 for (; j <= sec_loop_cnt * 2; j += nodespacing) { 1348 w1h = *(twiddles + 2 * j); 1349 w2h = *(twiddles + 2 * (j << 1) - 512); 1350 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 512); 1351 w1l = *(twiddles + 2 * j + 1); 1352 w2l = *(twiddles + 2 * (j << 1) - 511); 1353 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 511); 1354 1355 for (k = in_loop_cnt; k != 0; k--) { 1356 WORD32 tmp; 1357 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1358 1359 data += (del << 1); 1360 1361 x1r = *data; 1362 x1i = *(data + 1); 1363 data += (del << 1); 1364 1365 x2r = *data; 1366 x2i = *(data + 1); 1367 data += (del << 1); 1368 1369 x3r = *data; 1370 x3i = *(data + 1); 1371 data -= 3 * (del << 1); 1372 1373 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h)); 1374 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l); 1375 x1r = tmp; 1376 1377 tmp = (ixheaacd_mult32(x2r, w2h) - ixheaacd_mult32(x2i, w2l)); 1378 x2i = ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h); 1379 x2r = tmp; 1380 1381 tmp = (ixheaacd_mult32(x3r, w3h) - ixheaacd_mult32(x3i, w3l)); 1382 x3i = ixheaacd_mult32(x3r, w3l) + ixheaacd_mult32(x3i, w3h); 1383 x3r = tmp; 1384 1385 x0r = (*data); 1386 x0i = (*(data + 1)); 1387 1388 x0r = x0r + (x2r); 1389 x0i = x0i + (x2i); 1390 x2r = x0r - (x2r << 1); 1391 x2i = x0i - (x2i << 1); 1392 x1r = x1r + x3r; 1393 x1i = x1i + x3i; 1394 x3r = x1r - (x3r << 1); 1395 x3i = x1i - (x3i << 1); 1396 1397 x0r = x0r + (x1r); 1398 x0i = x0i + (x1i); 1399 x1r = x0r - (x1r << 1); 1400 x1i = x0i - (x1i << 1); 1401 x2r = x2r - (x3i); 1402 x2i = x2i + (x3r); 1403 x3i = x2r + (x3i << 1); 1404 x3r = x2i - (x3r << 1); 1405 1406 *data = x0r; 1407 *(data + 1) = x0i; 1408 data += (del << 1); 1409 1410 *data = x2r; 1411 *(data + 1) = x2i; 1412 data += (del << 1); 1413 1414 *data = x1r; 1415 *(data + 1) = x1i; 1416 data += (del << 1); 1417 1418 *data = x3i; 1419 *(data + 1) = x3r; 1420 data += (del << 1); 1421 } 1422 data -= 2 * npoints; 1423 data += 2; 1424 } 1425 for (; j < nodespacing * del; j += nodespacing) { 1426 w1h = *(twiddles + 2 * j); 1427 w2h = *(twiddles + 2 * (j << 1) - 512); 1428 w3h = *(twiddles + 2 * j + 2 * (j << 1) - 1024); 1429 w1l = *(twiddles + 2 * j + 1); 1430 w2l = *(twiddles + 2 * (j << 1) - 511); 1431 w3l = *(twiddles + 2 * j + 2 * (j << 1) - 1023); 1432 1433 for (k = in_loop_cnt; k != 0; k--) { 1434 WORD32 tmp; 1435 WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 1436 1437 data += (del << 1); 1438 1439 x1r = *data; 1440 x1i = *(data + 1); 1441 data += (del << 1); 1442 1443 x2r = *data; 1444 x2i = *(data + 1); 1445 data += (del << 1); 1446 1447 x3r = *data; 1448 x3i = *(data + 1); 1449 data -= 3 * (del << 1); 1450 1451 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h)); 1452 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l); 1453 x1r = tmp; 1454 1455 tmp = (ixheaacd_mult32(x2r, w2h) - ixheaacd_mult32(x2i, w2l)); 1456 x2i = ixheaacd_mult32(x2r, w2l) + ixheaacd_mult32(x2i, w2h); 1457 x2r = tmp; 1458 1459 tmp = (-ixheaacd_mult32(x3r, w3l) - ixheaacd_mult32(x3i, w3h)); 1460 x3i = ixheaacd_mac32(-ixheaacd_mult32(x3r, w3h), x3i, w3l); 1461 x3r = tmp; 1462 1463 x0r = (*data); 1464 x0i = (*(data + 1)); 1465 1466 x0r = x0r + (x2r); 1467 x0i = x0i + (x2i); 1468 x2r = x0r - (x2r << 1); 1469 x2i = x0i - (x2i << 1); 1470 x1r = x1r + x3r; 1471 x1i = x1i - x3i; 1472 x3r = x1r - (x3r << 1); 1473 x3i = x1i + (x3i << 1); 1474 1475 x0r = x0r + (x1r); 1476 x0i = x0i + (x1i); 1477 x1r = x0r - (x1r << 1); 1478 x1i = x0i - (x1i << 1); 1479 x2r = x2r - (x3i); 1480 x2i = x2i + (x3r); 1481 x3i = x2r + (x3i << 1); 1482 x3r = x2i - (x3r << 1); 1483 1484 *data = x0r; 1485 *(data + 1) = x0i; 1486 data += (del << 1); 1487 1488 *data = x2r; 1489 *(data + 1) = x2i; 1490 data += (del << 1); 1491 1492 *data = x1r; 1493 *(data + 1) = x1i; 1494 data += (del << 1); 1495 1496 *data = x3i; 1497 *(data + 1) = x3r; 1498 data += (del << 1); 1499 } 1500 data -= 2 * npoints; 1501 data += 2; 1502 } 1503 nodespacing >>= 2; 1504 del <<= 2; 1505 in_loop_cnt >>= 2; 1506 } 1507 if (not_power_4) { 1508 const WORD32 *twiddles = ptr_w; 1509 nodespacing <<= 1; 1510 shift += 1; 1511 for (j = del / 2; j != 0; j--) { 1512 WORD32 w1h = *twiddles; 1513 WORD32 w1l = *(twiddles + 1); 1514 1515 WORD32 tmp; 1516 twiddles += nodespacing * 2; 1517 1518 x0r = *ptr_y; 1519 x0i = *(ptr_y + 1); 1520 ptr_y += (del << 1); 1521 1522 x1r = *ptr_y; 1523 x1i = *(ptr_y + 1); 1524 1525 tmp = (ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h)); 1526 x1i = ixheaacd_mac32(-ixheaacd_mult32(x1r, w1h), x1i, w1l); 1527 x1r = tmp; 1528 1529 *ptr_y = (x0r) / 2 - (x1r) / 2; 1530 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1531 ptr_y -= (del << 1); 1532 1533 *ptr_y = (x0r) / 2 + (x1r) / 2; 1534 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1535 ptr_y += 2; 1536 } 1537 twiddles = ptr_w; 1538 for (j = del / 2; j != 0; j--) { 1539 WORD32 w1h = *twiddles; 1540 WORD32 w1l = *(twiddles + 1); 1541 WORD32 tmp; 1542 twiddles += nodespacing * 2; 1543 1544 x0r = *ptr_y; 1545 x0i = *(ptr_y + 1); 1546 ptr_y += (del << 1); 1547 1548 x1r = *ptr_y; 1549 x1i = *(ptr_y + 1); 1550 1551 tmp = (ixheaacd_mult32(x1r, w1h) - ixheaacd_mult32(x1i, w1l)); 1552 x1i = ixheaacd_mult32(x1r, w1l) + ixheaacd_mult32(x1i, w1h); 1553 x1r = tmp; 1554 1555 *ptr_y = (x0r) / 2 - (x1r) / 2; 1556 *(ptr_y + 1) = (x0i) / 2 - (x1i) / 2; 1557 ptr_y -= (del << 1); 1558 1559 *ptr_y = (x0r) / 2 + (x1r) / 2; 1560 *(ptr_y + 1) = (x0i) / 2 + (x1i) / 2; 1561 ptr_y += 2; 1562 } 1563 } 1564 } 1565 1566 for (i = 0; i < nlength; i++) { 1567 xr[i] = y[2 * i]; 1568 xi[i] = y[2 * i + 1]; 1569 } 1570 1571 *preshift = shift - *preshift; 1572 return; 1573 } 1574 1575 static PLATFORM_INLINE void ixheaacd_complex_3point_fft(WORD32 *inp, WORD32 *op, 1576 WORD32 sign_dir) { 1577 WORD32 add_r, sub_r; 1578 WORD32 add_i, sub_i; 1579 WORD32 temp_real, temp_imag, temp; 1580 1581 WORD32 p1, p2, p3, p4; 1582 1583 WORD32 sinmu; 1584 sinmu = -1859775393 * sign_dir; 1585 1586 temp_real = ixheaacd_add32_sat(inp[0], inp[2]); 1587 temp_imag = ixheaacd_add32_sat(inp[1], inp[3]); 1588 1589 add_r = ixheaacd_add32_sat(inp[2], inp[4]); 1590 add_i = ixheaacd_add32_sat(inp[3], inp[5]); 1591 1592 sub_r = ixheaacd_sub32_sat(inp[2], inp[4]); 1593 sub_i = ixheaacd_sub32_sat(inp[3], inp[5]); 1594 1595 p1 = add_r >> 1; 1596 p4 = add_i >> 1; 1597 p2 = ixheaacd_mult32_shl(sub_i, sinmu); 1598 p3 = ixheaacd_mult32_shl(sub_r, sinmu); 1599 1600 temp = ixheaacd_sub32(inp[0], p1); 1601 1602 op[0] = ixheaacd_add32_sat(temp_real, inp[4]); 1603 op[1] = ixheaacd_add32_sat(temp_imag, inp[5]); 1604 op[2] = ixheaacd_add32_sat(temp, p2); 1605 op[3] = ixheaacd_sub32_sat(ixheaacd_sub32_sat(inp[1], p3), p4); 1606 op[4] = ixheaacd_sub32_sat(temp, p2); 1607 op[5] = ixheaacd_sub32_sat(ixheaacd_add32_sat(inp[1], p3), p4); 1608 1609 return; 1610 } 1611 1612 VOID ixheaacd_complex_fft_p3(WORD32 *xr, WORD32 *xi, WORD32 nlength, 1613 WORD32 fft_mode, WORD32 *preshift) { 1614 WORD32 i, j; 1615 WORD32 shift = 0; 1616 WORD32 xr_3[384]; 1617 WORD32 xi_3[384]; 1618 WORD32 x[1024]; 1619 WORD32 y[1024]; 1620 WORD32 cnfac, npts; 1621 WORD32 mpass = nlength; 1622 WORD32 n = 0; 1623 WORD32 *ptr_x = x; 1624 WORD32 *ptr_y = y; 1625 1626 cnfac = 0; 1627 while (mpass % 3 == 0) { 1628 mpass /= 3; 1629 cnfac++; 1630 } 1631 npts = mpass; 1632 1633 for (i = 0; i < 3 * cnfac; i++) { 1634 for (j = 0; j < mpass; j++) { 1635 xr_3[j] = xr[3 * j + i]; 1636 xi_3[j] = xi[3 * j + i]; 1637 } 1638 1639 (*ixheaacd_complex_fft_p2)(xr_3, xi_3, mpass, fft_mode, &shift); 1640 1641 for (j = 0; j < mpass; j++) { 1642 xr[3 * j + i] = xr_3[j]; 1643 xi[3 * j + i] = xi_3[j]; 1644 } 1645 } 1646 1647 while (npts >> 1) { 1648 n++; 1649 npts = npts >> 1; 1650 } 1651 1652 if (n % 2 == 0) 1653 shift = ((n + 4)) / 2; 1654 else 1655 shift = ((n + 5) / 2); 1656 1657 *preshift = shift - *preshift + 1; 1658 1659 for (i = 0; i < nlength; i++) { 1660 ptr_x[2 * i] = (xr[i] >> 1); 1661 ptr_x[2 * i + 1] = (xi[i] >> 1); 1662 } 1663 1664 { 1665 const WORD32 *w1r, *w1i; 1666 WORD32 tmp; 1667 w1r = ixheaacd_twiddle_table_3pr; 1668 w1i = ixheaacd_twiddle_table_3pi; 1669 1670 if (fft_mode < 0) { 1671 for (i = 0; i < nlength; i += 3) { 1672 tmp = ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i], (*w1r)), 1673 ixheaacd_mult32(ptr_x[2 * i + 1], (*w1i))); 1674 ptr_x[2 * i + 1] = 1675 ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i], (*w1i)), 1676 ixheaacd_mult32(ptr_x[2 * i + 1], (*w1r))); 1677 ptr_x[2 * i] = tmp; 1678 1679 w1r++; 1680 w1i++; 1681 1682 tmp = ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 2], (*w1r)), 1683 ixheaacd_mult32(ptr_x[2 * i + 3], (*w1i))); 1684 ptr_x[2 * i + 3] = 1685 ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 2], (*w1i)), 1686 ixheaacd_mult32(ptr_x[2 * i + 3], (*w1r))); 1687 ptr_x[2 * i + 2] = tmp; 1688 1689 w1r++; 1690 w1i++; 1691 1692 tmp = ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 4], (*w1r)), 1693 ixheaacd_mult32(ptr_x[2 * i + 5], (*w1i))); 1694 ptr_x[2 * i + 5] = 1695 ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 4], (*w1i)), 1696 ixheaacd_mult32(ptr_x[2 * i + 5], (*w1r))); 1697 ptr_x[2 * i + 4] = tmp; 1698 1699 w1r += 3 * (128 / mpass - 1) + 1; 1700 w1i += 3 * (128 / mpass - 1) + 1; 1701 } 1702 } 1703 1704 else { 1705 for (i = 0; i < nlength; i += 3) { 1706 tmp = ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i], (*w1r)), 1707 ixheaacd_mult32(ptr_x[2 * i + 1], (*w1i))); 1708 ptr_x[2 * i + 1] = 1709 ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 1], (*w1r)), 1710 ixheaacd_mult32(ptr_x[2 * i], (*w1i))); 1711 ptr_x[2 * i] = tmp; 1712 1713 w1r++; 1714 w1i++; 1715 1716 tmp = ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 2], (*w1r)), 1717 ixheaacd_mult32(ptr_x[2 * i + 3], (*w1i))); 1718 ptr_x[2 * i + 3] = 1719 ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 3], (*w1r)), 1720 ixheaacd_mult32(ptr_x[2 * i + 2], (*w1i))); 1721 ptr_x[2 * i + 2] = tmp; 1722 1723 w1r++; 1724 w1i++; 1725 1726 tmp = ixheaacd_add32_sat(ixheaacd_mult32(ptr_x[2 * i + 4], (*w1r)), 1727 ixheaacd_mult32(ptr_x[2 * i + 5], (*w1i))); 1728 ptr_x[2 * i + 5] = 1729 ixheaacd_sub32_sat(ixheaacd_mult32(ptr_x[2 * i + 5], (*w1r)), 1730 ixheaacd_mult32(ptr_x[2 * i + 4], (*w1i))); 1731 ptr_x[2 * i + 4] = tmp; 1732 1733 w1r += 3 * (128 / mpass - 1) + 1; 1734 w1i += 3 * (128 / mpass - 1) + 1; 1735 } 1736 } 1737 } 1738 1739 for (i = 0; i < mpass; i++) { 1740 ixheaacd_complex_3point_fft(ptr_x, ptr_y, fft_mode); 1741 1742 ptr_x = ptr_x + 6; 1743 ptr_y = ptr_y + 6; 1744 } 1745 1746 for (i = 0; i < mpass; i++) { 1747 xr[i] = y[6 * i]; 1748 xi[i] = y[6 * i + 1]; 1749 } 1750 1751 for (i = 0; i < mpass; i++) { 1752 xr[mpass + i] = y[6 * i + 2]; 1753 xi[mpass + i] = y[6 * i + 3]; 1754 } 1755 1756 for (i = 0; i < mpass; i++) { 1757 xr[2 * mpass + i] = y[6 * i + 4]; 1758 xi[2 * mpass + i] = y[6 * i + 5]; 1759 } 1760 return; 1761 } 1762 1763 VOID ixheaacd_complex_fft(WORD32 *data_r, WORD32 *data_i, WORD32 nlength, 1764 WORD32 fft_mode, WORD32 *preshift) { 1765 if (nlength & (nlength - 1)) { 1766 if ((nlength != 24) && (nlength != 48) && (nlength != 96) && 1767 (nlength != 192) && (nlength != 384)) { 1768 printf("%d point FFT not supported", nlength); 1769 exit(0); 1770 } 1771 ixheaacd_complex_fft_p3(data_r, data_i, nlength, fft_mode, preshift); 1772 } else 1773 (*ixheaacd_complex_fft_p2)(data_r, data_i, nlength, fft_mode, preshift); 1774 1775 return; 1776 } 1777