Home | History | Annotate | Download | only in decoder
      1 /******************************************************************************
      2  *                                                                            *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 #include "ixheaacd_sbr_common.h"
     21 #include <ixheaacd_type_def.h>
     22 
     23 #include "ixheaacd_constants.h"
     24 #include <ixheaacd_basic_ops32.h>
     25 #include <ixheaacd_basic_ops16.h>
     26 #include <ixheaacd_basic_ops40.h>
     27 #include "ixheaacd_basic_ops.h"
     28 
     29 #include "ixheaacd_defines.h"
     30 #include "ixheaacd_common_rom.h"
     31 #include "ixheaacd_basic_funcs.h"
     32 #include <ixheaacd_aac_rom.h>
     33 #include "ixheaacd_aac_imdct.h"
     34 #include "ixheaacd_intrinsics.h"
     35 
     36 #include <ixheaacd_basic_op.h>
     37 #include "ixheaacd_function_selector.h"
     38 
     39 #include "ixheaacd_audioobjtypes.h"
     40 #include "ixheaacd_tns.h"
     41 
     42 #define DIG_REV(i, m, j)                                      \
     43   do {                                                        \
     44     unsigned _ = (i);                                         \
     45     _ = ((_ & 0x33333333) << 2) | ((_ & ~0x33333333) >> 2);   \
     46     _ = ((_ & 0x0F0F0F0F) << 4) | ((_ & ~0x0F0F0F0F) >> 4);   \
     47     _ = ((_ & 0x00FF00FF) << 8) | ((_ & ~0x00FF00FF) >> 8);   \
     48     _ = ((_ & 0x0000FFFF) << 16) | ((_ & ~0x0000FFFF) >> 16); \
     49     (j) = _ >> (m);                                           \
     50   } while (0)
     51 
     52 #define MPYHIRC(x, y)                                                         \
     53                                                                               \
     54   (((WORD32)((short)(x >> 16) * (unsigned short)(y & 0x0000FFFF) + 0x4000) >> \
     55     15) +                                                                     \
     56    ((WORD32)((short)(x >> 16) * (short)((y) >> 16)) << 1))
     57 
     58 #define MPYLUHS(x, y) \
     59   ((WORD32)((unsigned short)(x & 0x0000FFFF) * (short)(y >> 16)))
     60 
     61 #define MDCT_LEN 480
     62 #define FFT15X2 30
     63 #define MDCT_LEN_BY2 240
     64 #define FFT5 5
     65 #define FFT16 16
     66 #define FFT15 15
     67 #define FFT16X2 32
     68 
     69 WORD32 ixheaacd_fft5out[FFT15X2];
     70 
     71 static PLATFORM_INLINE WORD32 ixheaacd_shr32_drc(WORD32 a, WORD32 b) {
     72   WORD32 out_val;
     73 
     74   b = ((UWORD32)(b << 24) >> 24);
     75   if (b >= 31) {
     76     if (a < 0)
     77       out_val = -1;
     78     else
     79       out_val = 0;
     80   } else {
     81     a = ixheaacd_add32_sat(a, (1 << (b - 1)));
     82     out_val = (WORD32)a >> b;
     83   }
     84 
     85   return out_val;
     86 }
     87 
     88 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16hin32_drc(WORD32 a, WORD32 b) {
     89   WORD32 result;
     90   WORD64 temp_result;
     91   temp_result = (WORD64)a * (WORD64)(b >> 16);
     92   result = (WORD32)(temp_result >> 16);
     93   return (result);
     94 }
     95 
     96 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32(WORD32 a, WORD32 b) {
     97   WORD32 result;
     98   WORD64 temp_result;
     99   temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
    100   result = (WORD32)(temp_result >> 16);
    101   return (result);
    102 }
    103 
    104 static PLATFORM_INLINE WORD32 ixheaacd_mac32x16lin32(WORD32 a, WORD32 b,
    105                                                      WORD32 c) {
    106   WORD32 result;
    107   result = a + ixheaacd_mult32x16lin32(b, c);
    108   return (result);
    109 }
    110 
    111 static PLATFORM_INLINE WORD32 ixheaacd_mult32x16lin32_drc(WORD32 a, WORD32 b) {
    112   WORD32 result;
    113   WORD64 temp_result;
    114   temp_result = (WORD64)a * (WORD64)(((b & 0xFFFF) << 16) >> 16);
    115   if (temp_result < (WORD64)MIN_32)
    116     result = MIN_32;
    117   else if (temp_result > (WORD64)MAX_32)
    118     result = MAX_32;
    119   else
    120     result = (WORD32)(temp_result);
    121   return (result);
    122 }
    123 
    124 WORD16 ixheaacd_neg_expo_inc_dec(WORD16 neg_expo) { return (neg_expo + 2); }
    125 
    126 WORD16 ixheaacd_neg_expo_inc_arm(WORD16 neg_expo) { return (neg_expo + 3); }
    127 
    128 VOID ixheaacd_pretwiddle_compute_dec(
    129     WORD32 *spec_data1, WORD32 *spec_data2, WORD32 *out_ptr,
    130     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints4,
    131     WORD32 neg_expo) {
    132   WORD32 i;
    133   WORD32 tempr, tempi;
    134   WORD32 tempr1, tempi1;
    135   WORD32 npoints2 = npoints4 * 2;
    136   WORD32 *out_ptr1 = out_ptr + (npoints2 << 1) - 1;
    137   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
    138 
    139   WORD16 cos = 0, cos1 = 0, sin = 0, sin1 = 0;
    140   if (neg_expo < 0) {
    141     neg_expo = -neg_expo;
    142     if (npoints4 == 256) {
    143       cos = *cos_sin_ptr++;
    144       sin = *cos_sin_ptr++;
    145     } else if (npoints4 == 32) {
    146       cos = *cos_sin_ptr++;
    147       sin = *cos_sin_ptr;
    148       cos_sin_ptr += 15;
    149     }
    150     tempr = *spec_data1++;
    151     tempi = *spec_data2--;
    152 
    153     *out_ptr =
    154         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
    155 
    156     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
    157     out_ptr++;
    158 
    159     *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, cos),
    160                               ixheaacd_mult32x16in32(tempr, sin));
    161 
    162     *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
    163     out_ptr++;
    164 
    165     for (i = 0; i < npoints4 - 1; i++) {
    166       if (npoints4 == 256) {
    167         sin = *cos_sin_ptr++;
    168         cos = *cos_sin_ptr++;
    169       } else if (npoints4 == 32) {
    170         sin = *cos_sin_ptr++;
    171         cos = *cos_sin_ptr;
    172         cos_sin_ptr += 15;
    173       }
    174 
    175       tempi1 = *spec_data1++;
    176       tempr = *spec_data1++;
    177       tempr1 = *spec_data2--;
    178       tempi = *spec_data2--;
    179 
    180       *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos),
    181                                  ixheaacd_mult32x16in32(tempr1, sin));
    182 
    183       *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
    184       out_ptr1--;
    185 
    186       *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos),
    187                                         tempi1, sin);
    188       *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
    189       out_ptr1--;
    190 
    191       *out_ptr =
    192           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
    193       *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
    194       out_ptr++;
    195 
    196       *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, sin),
    197                                 ixheaacd_mult32x16in32(tempr, cos));
    198       *out_ptr = ixheaacd_shl32(*out_ptr, neg_expo);
    199       out_ptr++;
    200     }
    201     cos1 = *cos_sin_ptr++;
    202     sin1 = *cos_sin_ptr;
    203 
    204     tempr1 = *spec_data2;
    205     tempi1 = *spec_data1;
    206 
    207     *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos1),
    208                                ixheaacd_mult32x16in32(tempr1, sin1));
    209     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
    210     out_ptr1--;
    211 
    212     *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos1),
    213                                       tempi1, sin1);
    214     *out_ptr1 = ixheaacd_shl32(*out_ptr1, neg_expo);
    215     out_ptr1--;
    216 
    217   } else {
    218     if (npoints4 == 256) {
    219       cos = *cos_sin_ptr++;
    220       sin = *cos_sin_ptr++;
    221 
    222     } else if (npoints4 == 32) {
    223       cos = *cos_sin_ptr++;
    224       sin = *cos_sin_ptr;
    225       cos_sin_ptr += 15;
    226     }
    227     tempr = *spec_data1++;
    228     tempi = *spec_data2--;
    229 
    230     *out_ptr =
    231         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
    232     *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
    233     out_ptr++;
    234 
    235     *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, cos),
    236                               ixheaacd_mult32x16in32(tempr, sin));
    237 
    238     *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
    239     out_ptr++;
    240 
    241     for (i = 0; i < npoints4 - 1; i++) {
    242       if (npoints4 == 256) {
    243         sin = *cos_sin_ptr++;
    244         cos = *cos_sin_ptr++;
    245       } else if (npoints4 == 32) {
    246         sin = *cos_sin_ptr++;
    247         cos = *cos_sin_ptr;
    248         cos_sin_ptr += 15;
    249       }
    250 
    251       tempi1 = *spec_data1++;
    252       tempr = *spec_data1++;
    253       tempr1 = *spec_data2--;
    254       tempi = *spec_data2--;
    255 
    256       *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos),
    257                                  ixheaacd_mult32x16in32(tempr1, sin));
    258       *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
    259       out_ptr1--;
    260 
    261       *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos),
    262                                         tempi1, sin);
    263       *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
    264       out_ptr1--;
    265 
    266       *out_ptr =
    267           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
    268       *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
    269       out_ptr++;
    270 
    271       *out_ptr = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi, sin),
    272                                 ixheaacd_mult32x16in32(tempr, cos));
    273       *out_ptr = ixheaacd_shr32(*out_ptr, neg_expo);
    274       out_ptr++;
    275     }
    276     cos1 = *cos_sin_ptr++;
    277     sin1 = *cos_sin_ptr;
    278 
    279     tempr1 = *spec_data2;
    280     tempi1 = *spec_data1;
    281 
    282     *out_ptr1 = ixheaacd_sub32(ixheaacd_mult32x16in32(tempi1, cos1),
    283                                ixheaacd_mult32x16in32(tempr1, sin1));
    284     *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
    285     out_ptr1--;
    286 
    287     *out_ptr1 = ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr1, cos1),
    288                                       tempi1, sin1);
    289     *out_ptr1 = ixheaacd_shr32(*out_ptr1, neg_expo);
    290     out_ptr1--;
    291   }
    292 }
    293 
    294 VOID ixheaacd_post_twiddle_dec(WORD32 out_ptr[], WORD32 spec_data[],
    295                                ia_aac_dec_imdct_tables_struct *ptr_imdct_tables,
    296                                WORD npoints) {
    297   WORD i;
    298   WORD16 cos, cos1, sin, sin1;
    299   WORD32 *spec_data1 = spec_data + npoints - 1;
    300   WORD32 *out_ptr1 = out_ptr + npoints - 1;
    301   WORD16 adjust = 50, adjust1 = -50;
    302   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
    303 
    304   if (npoints == 1024) {
    305     WORD32 tempr, tempi, outi, outr, temp1, temp2;
    306     tempr = *spec_data++;
    307     tempi = *spec_data++;
    308 
    309     cos = *cos_sin_ptr;
    310     cos_sin_ptr++;
    311     sin = *cos_sin_ptr;
    312     cos_sin_ptr++;
    313 
    314     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    315                           ixheaacd_mult32x16in32(tempi, cos));
    316     outr =
    317         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
    318 
    319     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    320     temp2 = ixheaacd_mult32x16in32(outr, adjust);
    321 
    322     outr = outr + temp1;
    323     outi = outi + temp2;
    324     *out_ptr1-- = outi;
    325     *out_ptr++ = outr;
    326 
    327     for (i = 0; i < (npoints / 2 - 2); i++) {
    328       sin = *cos_sin_ptr++;
    329       cos = *cos_sin_ptr++;
    330 
    331       tempi = *spec_data1--;
    332       tempr = *spec_data1--;
    333 
    334       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    335                             ixheaacd_mult32x16in32(tempi, cos));
    336       outr =
    337           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
    338 
    339       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    340       temp2 = ixheaacd_mult32x16in32(outr, adjust);
    341 
    342       outr = outr + temp1;
    343       outi = outi + temp2;
    344 
    345       *out_ptr++ = outi;
    346       *out_ptr1-- = outr;
    347 
    348       i++;
    349       tempr = *spec_data++;
    350       tempi = *spec_data++;
    351 
    352       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
    353                             ixheaacd_mult32x16in32(tempi, sin));
    354       outr =
    355           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
    356 
    357       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    358       temp2 = ixheaacd_mult32x16in32(outr, adjust);
    359 
    360       outr = outr + temp1;
    361       outi = outi + temp2;
    362 
    363       *out_ptr1-- = outi;
    364       *out_ptr++ = outr;
    365     }
    366     cos1 = *cos_sin_ptr++;
    367     sin1 = *cos_sin_ptr;
    368 
    369     tempi = *spec_data1--;
    370     tempr = *spec_data1--;
    371 
    372     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
    373                           ixheaacd_mult32x16in32(tempi, cos1));
    374     outr =
    375         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos1), tempi, sin1);
    376 
    377     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    378     temp2 = ixheaacd_mult32x16in32(outr, adjust);
    379 
    380     outr = outr + temp1;
    381     outi = outi + temp2;
    382 
    383     *out_ptr++ = outi;
    384     *out_ptr1-- = outr;
    385   } else if (npoints == 128) {
    386     WORD32 tempr, tempi, outi, outr, temp1, temp2;
    387     tempr = *spec_data++;
    388     tempi = *spec_data++;
    389 
    390     cos = *cos_sin_ptr++;
    391     sin = *cos_sin_ptr;
    392     cos_sin_ptr += 15;
    393 
    394     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    395                           ixheaacd_mult32x16in32(tempi, cos));
    396     outr =
    397         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
    398 
    399     temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
    400     temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
    401 
    402     outr = outr + temp1;
    403     outi = outi + temp2;
    404     *out_ptr1-- = outi;
    405     *out_ptr++ = outr;
    406 
    407     for (i = 0; i < (npoints / 2 - 2); i++) {
    408       sin = *cos_sin_ptr++;
    409       cos = *cos_sin_ptr;
    410       cos_sin_ptr += 15;
    411 
    412       tempi = *spec_data1--;
    413       tempr = *spec_data1--;
    414 
    415       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    416                             ixheaacd_mult32x16in32(tempi, cos));
    417       outr =
    418           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos), tempi, sin);
    419 
    420       temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
    421       temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
    422 
    423       outr = outr + temp1;
    424       outi = outi + temp2;
    425 
    426       *out_ptr++ = outi;
    427       *out_ptr1-- = outr;
    428 
    429       i++;
    430       tempr = *spec_data++;
    431       tempi = *spec_data++;
    432 
    433       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
    434                             ixheaacd_mult32x16in32(tempi, sin));
    435       outr =
    436           ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, sin), tempi, cos);
    437 
    438       temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
    439       temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
    440 
    441       outr = outr + temp1;
    442       outi = outi + temp2;
    443 
    444       *out_ptr1-- = outi;
    445       *out_ptr++ = outr;
    446     }
    447     cos1 = *cos_sin_ptr++;
    448     sin1 = *cos_sin_ptr;
    449 
    450     tempi = *spec_data1--;
    451     tempr = *spec_data1--;
    452 
    453     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
    454                           ixheaacd_mult32x16in32(tempi, cos1));
    455     outr =
    456         ixheaacd_mac32x16in32(ixheaacd_mult32x16in32(tempr, cos1), tempi, sin1);
    457 
    458     temp1 = ixheaacd_mult32x16in32(outi, -(201 << 1));
    459     temp2 = ixheaacd_mult32x16in32(outr, 201 << 1);
    460 
    461     outr = outr + temp1;
    462     outi = outi + temp2;
    463 
    464     *out_ptr++ = outi;
    465     *out_ptr1-- = outr;
    466   }
    467 }
    468 
    469 VOID ixheaacd_post_twid_overlap_add_dec(
    470     WORD16 pcm_out[], WORD32 spec_data[],
    471     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD npoints,
    472     WORD32 *ptr_overlap_buf, WORD16 q_shift, const WORD16 *window,
    473     WORD16 ch_fac) {
    474   WORD i;
    475   WORD16 cos, cos1, sin, sin1;
    476   WORD32 size = npoints / 2;
    477   WORD16 *pcmout1 = pcm_out + (ch_fac * size);
    478   const WORD16 *cos_sin_ptr = ptr_imdct_tables->cosine_array_2048_256;
    479 
    480   pcm_out = pcmout1 - ch_fac;
    481   spec_data += size;
    482 
    483   if (q_shift > 0) {
    484     WORD32 tempr, tempi, outr, outi, win1, accu, temp1, temp2;
    485     WORD16 adjust, adjust1;
    486     WORD32 overlap_data;
    487 
    488     tempr = *(spec_data - size);
    489     tempi = *(spec_data - size + 1);
    490     adjust = 50;
    491     adjust1 = -50;
    492     cos = *cos_sin_ptr++;
    493     sin = *cos_sin_ptr++;
    494     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    495                           ixheaacd_mult32x16in32(tempi, cos));
    496     outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
    497                           ixheaacd_mult32x16in32(tempi, sin));
    498 
    499     overlap_data = *ptr_overlap_buf;
    500 
    501     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    502     temp2 = ixheaacd_mult32x16in32(outr, adjust);
    503 
    504     outr = outr + temp1;
    505     outi = outi + temp2;
    506 
    507     *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 - q_shift);
    508 
    509     win1 = *((WORD32 *)window + size - 1);
    510     accu = ixheaacd_sub32_sat(
    511         ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
    512         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    513 
    514     *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    515 
    516     pcm_out -= ch_fac;
    517     accu = ixheaacd_sub32_sat(
    518         ixheaacd_shl32_sat(
    519             ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
    520             q_shift),
    521         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
    522 
    523     *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    524 
    525     pcmout1 += ch_fac;
    526 
    527     for (i = size - 2; i != 0;) {
    528       sin = *cos_sin_ptr++;
    529       cos = *cos_sin_ptr++;
    530 
    531       tempr = *(spec_data + i);
    532       tempi = *(spec_data + i + 1);
    533 
    534       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
    535                             ixheaacd_mult32x16in32(tempi, sin));
    536       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    537                             ixheaacd_mult32x16in32(tempi, cos));
    538 
    539       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    540       temp2 = ixheaacd_mult32x16in32(outr, adjust);
    541 
    542       outr = outr + temp1;
    543       outi = outi + temp2;
    544 
    545       overlap_data = *ptr_overlap_buf;
    546 
    547       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 - q_shift);
    548 
    549       win1 = *((WORD32 *)window + i);
    550       accu = ixheaacd_sub32_sat(
    551           ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
    552           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    553       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    554       pcm_out -= ch_fac;
    555       accu = ixheaacd_sub32_sat(
    556           ixheaacd_shl32_sat(
    557               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
    558               q_shift),
    559           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
    560       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    561       pcmout1 += ch_fac;
    562 
    563       tempr = *(spec_data - i);
    564       tempi = *(spec_data - i + 1);
    565 
    566       i -= 2;
    567 
    568       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
    569                             ixheaacd_mult32x16in32(tempi, sin));
    570       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, sin),
    571                             ixheaacd_mult32x16in32(tempi, cos));
    572 
    573       overlap_data = *ptr_overlap_buf;
    574 
    575       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    576 
    577       temp2 = ixheaacd_mult32x16in32(outr, adjust);
    578 
    579       outr = outr + temp1;
    580       outi = outi + temp2;
    581 
    582       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 - q_shift);
    583 
    584       win1 = *((WORD32 *)window + i + 1);
    585       accu = ixheaacd_sub32_sat(
    586           ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outi, win1), q_shift),
    587           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    588       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    589       pcm_out -= ch_fac;
    590       accu = ixheaacd_sub32_sat(
    591           ixheaacd_shl32_sat(
    592               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
    593               q_shift),
    594           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
    595       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    596       pcmout1 += ch_fac;
    597     }
    598     cos1 = *cos_sin_ptr++;
    599     sin1 = *cos_sin_ptr;
    600 
    601     tempr = *(spec_data + i);
    602     tempi = *(spec_data + i + 1);
    603 
    604     outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos1),
    605                           ixheaacd_mult32x16in32(tempi, sin1));
    606     outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
    607                           ixheaacd_mult32x16in32(tempi, cos1));
    608 
    609     temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    610 
    611     temp2 = ixheaacd_mult32x16in32(outr, adjust);
    612 
    613     outr = outr + temp1;
    614     outi = outi + temp2;
    615 
    616     overlap_data = *ptr_overlap_buf;
    617 
    618     *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 - q_shift);
    619     win1 = *((WORD32 *)window + i);
    620     accu = ixheaacd_sub32_sat(
    621         ixheaacd_shl32_sat(ixheaacd_mult32x16lin32(outr, win1), q_shift),
    622         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    623     *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    624     pcm_out -= ch_fac;
    625     accu = ixheaacd_sub32_sat(
    626         ixheaacd_shl32_sat(
    627             ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
    628             q_shift),
    629         ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
    630     *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    631     pcmout1 += ch_fac;
    632   } else {
    633     q_shift = -q_shift;
    634     {
    635       WORD32 tempr, tempi, temp1, temp2, outr, outi, win1, accu;
    636       WORD16 adjust, adjust1;
    637       WORD16 overlap_data;
    638       tempr = *(spec_data - size);
    639       tempi = *(spec_data - size + 1);
    640 
    641       adjust = 50;
    642       adjust1 = -50;
    643       cos = *cos_sin_ptr++;
    644       sin = *cos_sin_ptr++;
    645 
    646       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    647                             ixheaacd_mult32x16in32(tempi, cos));
    648       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
    649                             ixheaacd_mult32x16in32(tempi, sin));
    650 
    651       overlap_data = *ptr_overlap_buf;
    652 
    653       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    654       temp2 = ixheaacd_mult32x16in32(outr, adjust);
    655 
    656       outr = outr + temp1;
    657       outi = outi + temp2;
    658 
    659       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 + q_shift);
    660 
    661       win1 = *((WORD32 *)window + size - 1);
    662       accu = ixheaacd_sub32_sat(
    663           ixheaacd_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
    664           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    665 
    666       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    667 
    668       pcm_out -= ch_fac;
    669       accu = ixheaacd_sub32_sat(
    670           ixheaacd_shr32(
    671               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
    672               q_shift),
    673           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
    674 
    675       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    676       pcmout1 += ch_fac;
    677 
    678       for (i = size - 2; i != 0;) {
    679         sin = *cos_sin_ptr++;
    680         cos = *cos_sin_ptr++;
    681 
    682         tempr = *(spec_data + i);
    683         tempi = *(spec_data + i + 1);
    684 
    685         outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos),
    686                               ixheaacd_mult32x16in32(tempi, sin));
    687         outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin),
    688                               ixheaacd_mult32x16in32(tempi, cos));
    689 
    690         overlap_data = *ptr_overlap_buf;
    691 
    692         temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    693 
    694         temp2 = ixheaacd_mult32x16in32(outr, adjust);
    695         outr = outr + temp1;
    696         outi = outi + temp2;
    697         *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 + q_shift);
    698 
    699         win1 = *((WORD32 *)window + i);
    700         accu = ixheaacd_sub32_sat(
    701             ixheaacd_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
    702             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    703 
    704         *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    705         pcm_out -= ch_fac;
    706         accu = ixheaacd_sub32_sat(
    707             ixheaacd_shr32(
    708                 ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
    709                 q_shift),
    710             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
    711         *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    712         pcmout1 += ch_fac;
    713 
    714         tempr = *(spec_data - i);
    715         tempi = *(spec_data - i + 1);
    716         i -= 2;
    717 
    718         outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, cos),
    719                               ixheaacd_mult32x16in32(tempi, sin));
    720         outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, sin),
    721                               ixheaacd_mult32x16in32(tempi, cos));
    722 
    723         overlap_data = *ptr_overlap_buf;
    724 
    725         temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    726         temp2 = ixheaacd_mult32x16in32(outr, adjust);
    727 
    728         outr = outr + temp1;
    729         outi = outi + temp2;
    730 
    731         *ptr_overlap_buf++ = ixheaacd_shr32_drc(outr, 16 + q_shift);
    732 
    733         win1 = *((WORD32 *)window + i + 1);
    734         accu = ixheaacd_sub32_sat(
    735             ixheaacd_shr32(ixheaacd_mult32x16lin32(outi, win1), q_shift),
    736             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    737 
    738         *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    739         pcm_out -= ch_fac;
    740         accu = ixheaacd_sub32_sat(
    741             ixheaacd_shr32(
    742                 ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outi), win1),
    743                 q_shift),
    744             ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1)));
    745 
    746         *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    747         pcmout1 += ch_fac;
    748       }
    749       cos1 = *cos_sin_ptr++;
    750       sin1 = *cos_sin_ptr++;
    751 
    752       tempr = *(spec_data + i);
    753       tempi = *(spec_data + i + 1);
    754 
    755       outr = ixheaacd_add32(ixheaacd_mult32x16in32(tempr, cos1),
    756                             ixheaacd_mult32x16in32(tempi, sin1));
    757       outi = ixheaacd_sub32(ixheaacd_mult32x16in32(tempr, sin1),
    758                             ixheaacd_mult32x16in32(tempi, cos1));
    759 
    760       overlap_data = *ptr_overlap_buf;
    761 
    762       temp1 = ixheaacd_mult32x16in32(outi, adjust1);
    763 
    764       temp2 = ixheaacd_mult32x16in32(outr, adjust);
    765 
    766       outr = outr + temp1;
    767       outi = outi + temp2;
    768 
    769       *ptr_overlap_buf++ = ixheaacd_shr32_drc(outi, 16 + q_shift);
    770 
    771       win1 = *((WORD32 *)window + i);
    772       accu = ixheaacd_sub32_sat(
    773           ixheaacd_shr32(ixheaacd_mult32x16lin32(outr, win1), q_shift),
    774           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)(win1 >> 16)));
    775 
    776       *pcm_out = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    777       pcm_out -= ch_fac;
    778       accu = ixheaacd_sub32_sat(
    779           ixheaacd_shr32(
    780               ixheaacd_mult32x16hin32_drc(ixheaacd_negate32(outr), win1),
    781               q_shift),
    782           ixheaacd_mult32x16lin32_drc(overlap_data, (WORD16)win1));
    783       *pcmout1 = ixheaacd_round16(ixheaacd_shl32_sat(accu, 2));
    784       pcmout1 += ch_fac;
    785     }
    786   }
    787 }
    788 
    789 VOID ixheaacd_imdct_using_fft_dec(
    790     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 npoints,
    791     WORD32 *ptr_x, WORD32 *ptr_y)
    792 
    793 {
    794   WORD32 i, j, k, k1, n_stages;
    795   WORD32 x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i, x4r, x4i, x5r, x5i, x6r, x6i,
    796       x7r, x7i;
    797   WORD32 del, nodespacing, in_loop_cnt, tmp, twiddle_val, *ptr_tmp;
    798   const WORD32 *ptr_twiddle;
    799   WORD8 *ptr_dig_rev_table;
    800   n_stages = ixheaacd_norm32(npoints);
    801 
    802   n_stages = (30 - n_stages) / 3;
    803 
    804   ptr_tmp = ptr_y;
    805 
    806   ptr_twiddle = ptr_imdct_tables->fft_twiddle;
    807   ptr_dig_rev_table = ((npoints << 1) == 1024)
    808                           ? ptr_imdct_tables->dig_rev_table8_long
    809                           : ptr_imdct_tables->dig_rev_table8_short;
    810 
    811   for (i = npoints; i != 0; i -= 8) {
    812     WORD32 *data = ptr_x;
    813     data = data + (*ptr_dig_rev_table++ << 1);
    814 
    815     x0r = *data;
    816     x0i = *(data + 1);
    817     data += (npoints >> 1);
    818 
    819     x2r = *data;
    820     x2i = *(data + 1);
    821     data += (npoints >> 1);
    822 
    823     x4r = *data;
    824     x4i = *(data + 1);
    825     data += (npoints >> 1);
    826 
    827     x6r = *data;
    828     x6i = *(data + 1);
    829     data -= 5 * (npoints >> 2);
    830 
    831     x0r = x0r + x4r;
    832     x0i = x0i + x4i;
    833     x4r = x0r - (x4r << 1);
    834     x4i = x0i - (x4i << 1);
    835 
    836     x2r = x2r + x6r;
    837     x2i = x2i + x6i;
    838     x6r = x2r - (x6r << 1);
    839     x6i = x2i - (x6i << 1);
    840 
    841     x0r = x0r + x2r;
    842     x0i = x0i + x2i;
    843     x2r = x0r - (x2r << 1);
    844     x2i = x0i - (x2i << 1);
    845 
    846     x4r = x4r + x6i;
    847     x4i = x4i - x6r;
    848     tmp = x6r;
    849     x6r = x4r - (x6i << 1);
    850     x6i = x4i + (tmp << 1);
    851 
    852     x1r = *data;
    853     x1i = *(data + 1);
    854     data += (npoints >> 1);
    855 
    856     x3r = *data;
    857     x3i = *(data + 1);
    858     data += (npoints >> 1);
    859 
    860     x5r = *data;
    861     x5i = *(data + 1);
    862     data += (npoints >> 1);
    863 
    864     x7r = *data;
    865     x7i = *(data + 1);
    866     data -= 7 * (npoints >> 2);
    867 
    868     x1r = x1r + x5r;
    869     x1i = x1i + x5i;
    870     x5r = x1r - (x5r << 1);
    871     x5i = x1i - (x5i << 1);
    872 
    873     x3r = x3r + x7r;
    874     x3i = x3i + x7i;
    875     x7r = x3r - (x7r << 1);
    876     x7i = x3i - (x7i << 1);
    877 
    878     x1r = x1r + x3r;
    879     x1i = x1i + x3i;
    880     x3r = x1r - (x3r << 1);
    881     x3i = x1i - (x3i << 1);
    882 
    883     x5r = x5r + x5i;
    884     x5i = x5r - (x5i << 1);
    885 
    886     x7r = x7r + x7i;
    887     x7i = x7r - (x7i << 1);
    888 
    889     x7i = x5r - x7i;
    890     x5r = x7i - (x5r << 1);
    891 
    892     x5i = x7r - x5i;
    893     x7r = x5i - (x7r << 1);
    894 
    895     x7i = x7i << 1;
    896     x5r = x5r << 1;
    897     x5i = x5i << 1;
    898     x7r = x7r << 1;
    899 
    900     x0r = x0r + x1r;
    901     x0i = x0i + x1i;
    902     x1r = x0r - (x1r << 1);
    903     x1i = x0i - (x1i << 1);
    904 
    905     x2r = x2r + x3i;
    906     tmp = x2r - (x3i << 1);
    907     x2i = x2i - x3r;
    908     x3i = x2i + (x3r << 1);
    909 
    910     *ptr_tmp = x0r;
    911     *(ptr_tmp + 1) = x0i;
    912     ptr_tmp += 4;
    913 
    914     *ptr_tmp = x2r;
    915     *(ptr_tmp + 1) = x2i;
    916     ptr_tmp += 4;
    917 
    918     *ptr_tmp = x1r;
    919     *(ptr_tmp + 1) = x1i;
    920     ptr_tmp += 4;
    921 
    922     *ptr_tmp = tmp;
    923     *(ptr_tmp + 1) = x3i;
    924     ptr_tmp -= 10;
    925 
    926     tmp = 0x5A82;
    927 
    928     x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
    929     x4r = x7i - (x4r << 1);
    930 
    931     x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
    932     x4i = x7r - (x4i << 1);
    933 
    934     x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
    935     x6r = x5i - (x6r << 1);
    936 
    937     x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
    938     x6i = x5r - (x6i << 1);
    939 
    940     *ptr_tmp = x7i;
    941     *(ptr_tmp + 1) = x7r;
    942     ptr_tmp += 4;
    943 
    944     *ptr_tmp = x5i;
    945     *(ptr_tmp + 1) = x5r;
    946     ptr_tmp += 4;
    947 
    948     *ptr_tmp = -x4r;
    949     *(ptr_tmp + 1) = -x4i;
    950     ptr_tmp += 4;
    951 
    952     *ptr_tmp = -x6r;
    953     *(ptr_tmp + 1) = -x6i;
    954     ptr_tmp += 2;
    955   }
    956 
    957   del = 8;
    958 
    959   nodespacing = 64;
    960   in_loop_cnt = npoints >> 6;
    961 
    962   for (k1 = n_stages - 2; k1 > 0; k1--) {
    963     WORD32 *data = ptr_y;
    964     const WORD32 *twiddles;
    965 
    966     for (i = 0; i != npoints; i += 8 * del) {
    967       data = ptr_y + (i << 1);
    968       x0r = *data;
    969       x0i = *(data + 1);
    970       data += (del << 2);
    971 
    972       x2r = *data;
    973       x2i = *(data + 1);
    974       data += (del << 2);
    975 
    976       x4r = *data;
    977       x4i = *(data + 1);
    978       data += (del << 2);
    979 
    980       x6r = *data;
    981       x6i = *(data + 1);
    982       data -= 5 * (del << 1);
    983 
    984       x0r = x0r + x4r;
    985       x0i = x0i + x4i;
    986       x4r = x0r - (x4r << 1);
    987       x4i = x0i - (x4i << 1);
    988 
    989       x2r = x2r + x6r;
    990       x2i = x2i + x6i;
    991       x6r = x2r - (x6r << 1);
    992       x6i = x2i - (x6i << 1);
    993 
    994       x0r = x0r + x2r;
    995       x0i = x0i + x2i;
    996       x2r = x0r - (x2r << 1);
    997       x2i = x0i - (x2i << 1);
    998 
    999       x4r = x4r + x6i;
   1000       x4i = x4i - x6r;
   1001       tmp = x6r;
   1002       x6r = x4r - (x6i << 1);
   1003       x6i = x4i + (tmp << 1);
   1004 
   1005       x1r = *data;
   1006       x1i = *(data + 1);
   1007       data += (del << 2);
   1008 
   1009       x3r = *data;
   1010       x3i = *(data + 1);
   1011       data += (del << 2);
   1012 
   1013       x5r = *data;
   1014       x5i = *(data + 1);
   1015       data += (del << 2);
   1016 
   1017       x7r = *data;
   1018       x7i = *(data + 1);
   1019       data -= 7 * (del << 1);
   1020 
   1021       x1r = x1r + x5r;
   1022       x1i = x1i + x5i;
   1023       x5r = x1r - (x5r << 1);
   1024       x5i = x1i - (x5i << 1);
   1025 
   1026       x3r = x3r + x7r;
   1027       x3i = x3i + x7i;
   1028       x7r = x3r - (x7r << 1);
   1029       x7i = x3i - (x7i << 1);
   1030 
   1031       x1r = x1r + x3r;
   1032       x1i = x1i + x3i;
   1033       x3r = x1r - (x3r << 1);
   1034       x3i = x1i - (x3i << 1);
   1035 
   1036       x5r = x5r + x5i;
   1037       x5i = x5r - (x5i << 1);
   1038 
   1039       x7r = x7r + x7i;
   1040       x7i = x7r - (x7i << 1);
   1041 
   1042       x7i = x5r - x7i;
   1043       x5r = x7i - (x5r << 1);
   1044 
   1045       x5i = x7r - x5i;
   1046       x7r = x5i - (x7r << 1);
   1047 
   1048       x7i = x7i << 1;
   1049       x5r = x5r << 1;
   1050       x5i = x5i << 1;
   1051       x7r = x7r << 1;
   1052 
   1053       x0r = x0r + x1r;
   1054       x0i = x0i + x1i;
   1055       x1r = x0r - (x1r << 1);
   1056       x1i = x0i - (x1i << 1);
   1057 
   1058       x2r = x2r + x3i;
   1059       tmp = x2r - (x3i << 1);
   1060       x2i = x2i - x3r;
   1061       x3i = x2i + (x3r << 1);
   1062 
   1063       *data = x0r;
   1064       *(data + 1) = x0i;
   1065       data += (del << 2);
   1066 
   1067       *data = x2r;
   1068       *(data + 1) = x2i;
   1069       data += (del << 2);
   1070 
   1071       *data = x1r;
   1072       *(data + 1) = x1i;
   1073       data += (del << 2);
   1074 
   1075       *data = tmp;
   1076       *(data + 1) = x3i;
   1077       data -= 5 * (del << 1);
   1078 
   1079       tmp = 0x5A82;
   1080 
   1081       x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
   1082       x4r = x7i - (x4r << 1);
   1083       x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
   1084       x4i = x7r - (x4i << 1);
   1085 
   1086       x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
   1087       x6r = x5i - (x6r << 1);
   1088       x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
   1089       x6i = x5r - (x6i << 1);
   1090 
   1091       *data = x7i;
   1092       *(data + 1) = x7r;
   1093       data += (del << 2);
   1094 
   1095       *data = x5i;
   1096       *(data + 1) = x5r;
   1097       data += (del << 2);
   1098 
   1099       *data = -x4r;
   1100       *(data + 1) = -x4i;
   1101       data += (del << 2);
   1102 
   1103       *data = -x6r;
   1104       *(data + 1) = -x6i;
   1105 
   1106       data -= 7 * (del << 1);
   1107     }
   1108 
   1109     twiddles = ptr_twiddle;
   1110     data = ptr_y;
   1111 
   1112     for (j = nodespacing; j < nodespacing * del; j += nodespacing) {
   1113       data = data + 2;
   1114 
   1115       for (k = in_loop_cnt; k != 0; k--) {
   1116         data += (del << 2);
   1117         x2r = *data;
   1118         x2i = *(data + 1);
   1119 
   1120         data += (del << 2);
   1121         x4r = *data;
   1122         x4i = *(data + 1);
   1123 
   1124         data += (del << 2);
   1125         x6r = *data;
   1126         x6i = *(data + 1);
   1127 
   1128         data -= 6 * (del << 1);
   1129 
   1130         twiddles += (j >> 2);
   1131 
   1132         twiddle_val = *(twiddles);
   1133 
   1134         tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
   1135                ixheaacd_mult32x16hin32_drc(x2i, twiddle_val));
   1136         x2i = (ixheaacd_mac32x16lin32(
   1137                   ixheaacd_mult32x16hin32_drc(x2r, twiddle_val), x2i,
   1138                   twiddle_val))
   1139               << 1;
   1140         x2r = tmp << 1;
   1141 
   1142         twiddles += (j >> 2);
   1143         twiddle_val = *(twiddles);
   1144 
   1145         tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
   1146                ixheaacd_mult32x16hin32_drc(x4i, twiddle_val));
   1147         x4i = (ixheaacd_mac32x16lin32(
   1148                   ixheaacd_mult32x16hin32_drc(x4r, twiddle_val), x4i,
   1149                   twiddle_val))
   1150               << 1;
   1151         x4r = tmp << 1;
   1152 
   1153         twiddles += (j >> 2);
   1154         twiddle_val = *(twiddles);
   1155 
   1156         tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
   1157                ixheaacd_mult32x16hin32_drc(x6i, twiddle_val));
   1158         x6i = (ixheaacd_mac32x16lin32(
   1159                   ixheaacd_mult32x16hin32_drc(x6r, twiddle_val), x6i,
   1160                   twiddle_val))
   1161               << 1;
   1162         x6r = tmp << 1;
   1163 
   1164         x0r = *data;
   1165         x0i = *(data + 1);
   1166         data += (del << 1);
   1167 
   1168         x0r = x0r + x4r;
   1169         x0i = x0i + x4i;
   1170         x4r = x0r - (x4r << 1);
   1171         x4i = x0i - (x4i << 1);
   1172 
   1173         x2r = x2r + x6r;
   1174         x2i = x2i + x6i;
   1175         x6r = x2r - (x6r << 1);
   1176         x6i = x2i - (x6i << 1);
   1177 
   1178         x0r = x0r + x2r;
   1179         x0i = x0i + x2i;
   1180         x2r = x0r - (x2r << 1);
   1181         x2i = x0i - (x2i << 1);
   1182 
   1183         x4r = x4r + x6i;
   1184         x4i = x4i - x6r;
   1185         tmp = x6r;
   1186         x6r = x4r - (x6i << 1);
   1187         x6i = x4i + (tmp << 1);
   1188 
   1189         x1r = *data;
   1190         x1i = *(data + 1);
   1191         data += (del << 2);
   1192 
   1193         twiddles -= 5 * (j >> 3);
   1194         twiddle_val = *(twiddles);
   1195 
   1196         tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
   1197                ixheaacd_mult32x16hin32_drc(x1i, twiddle_val));
   1198         x1i = (ixheaacd_mac32x16lin32(
   1199                   ixheaacd_mult32x16hin32_drc(x1r, twiddle_val), x1i,
   1200                   twiddle_val))
   1201               << 1;
   1202         x1r = tmp << 1;
   1203 
   1204         x3r = *data;
   1205         x3i = *(data + 1);
   1206         data += (del << 2);
   1207 
   1208         twiddles += (j >> 2);
   1209         twiddle_val = *(twiddles);
   1210 
   1211         tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
   1212                ixheaacd_mult32x16hin32_drc(x3i, twiddle_val));
   1213         x3i = (ixheaacd_mac32x16lin32(
   1214             ixheaacd_mult32x16hin32_drc(x3r, twiddle_val), x3i, twiddle_val));
   1215         x3r = tmp;
   1216 
   1217         x5r = *data;
   1218         x5i = *(data + 1);
   1219         data += (del << 2);
   1220 
   1221         twiddles += (j >> 2);
   1222         twiddle_val = *(twiddles);
   1223 
   1224         tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
   1225                ixheaacd_mult32x16hin32_drc(x5i, twiddle_val));
   1226         x5i = (ixheaacd_mac32x16lin32(
   1227             ixheaacd_mult32x16hin32_drc(x5r, twiddle_val), x5i, twiddle_val));
   1228         x5r = tmp;
   1229 
   1230         x7r = *data;
   1231         x7i = *(data + 1);
   1232         data -= 7 * (del << 1);
   1233 
   1234         twiddles += (j >> 2);
   1235         twiddle_val = *(twiddles);
   1236         twiddles -= 7 * (j >> 3);
   1237 
   1238         tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
   1239                ixheaacd_mult32x16hin32_drc(x7i, twiddle_val));
   1240         x7i = (ixheaacd_mac32x16lin32(
   1241             ixheaacd_mult32x16hin32_drc(x7r, twiddle_val), x7i, twiddle_val));
   1242         x7r = tmp;
   1243 
   1244         x1r = x1r + (x5r << 1);
   1245         x1i = x1i + (x5i << 1);
   1246         x5r = x1r - (x5r << 2);
   1247         x5i = x1i - (x5i << 2);
   1248 
   1249         x3r = x3r + x7r;
   1250         x3i = x3i + x7i;
   1251         x7r = x3r - (x7r << 1);
   1252         x7i = x3i - (x7i << 1);
   1253 
   1254         x1r = x1r + (x3r << 1);
   1255         x1i = x1i + (x3i << 1);
   1256         x3r = x1r - (x3r << 2);
   1257         x3i = x1i - (x3i << 2);
   1258 
   1259         x5r = x5r + x5i;
   1260         x5i = x5r - (x5i << 1);
   1261 
   1262         x7r = x7r + x7i;
   1263         x7i = x7r - (x7i << 1);
   1264 
   1265         x7i = x5r - (x7i << 1);
   1266         x5r = x7i - (x5r << 1);
   1267 
   1268         x5i = (x7r << 1) - x5i;
   1269         x7r = x5i - (x7r << 2);
   1270 
   1271         x7i = x7i << 1;
   1272         x5r = x5r << 1;
   1273         x5i = x5i << 1;
   1274         x7r = x7r << 1;
   1275 
   1276         x0r = x0r + x1r;
   1277         x0i = x0i + x1i;
   1278         x1r = x0r - (x1r << 1);
   1279         x1i = x0i - (x1i << 1);
   1280 
   1281         x2r = x2r + x3i;
   1282         tmp = x2r - (x3i << 1);
   1283         x2i = x2i - x3r;
   1284         x3i = x2i + (x3r << 1);
   1285 
   1286         *data = x0r;
   1287         *(data + 1) = x0i;
   1288         data += (del << 2);
   1289 
   1290         *data = x2r;
   1291         *(data + 1) = x2i;
   1292         data += (del << 2);
   1293 
   1294         *data = x1r;
   1295         *(data + 1) = x1i;
   1296         data += (del << 2);
   1297 
   1298         *data = tmp;
   1299         *(data + 1) = x3i;
   1300         data -= 5 * (del << 1);
   1301 
   1302         tmp = 0x5A82;
   1303 
   1304         x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
   1305         x4r = x7i - (x4r << 1);
   1306 
   1307         x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
   1308         x4i = x7r - (x4i << 1);
   1309 
   1310         x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
   1311         x6r = x5i - (x6r << 1);
   1312 
   1313         x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
   1314         x6i = x5r - (x6i << 1);
   1315 
   1316         *data = x7i;
   1317         *(data + 1) = x7r;
   1318         data += (del << 2);
   1319 
   1320         *data = x5i;
   1321         *(data + 1) = x5r;
   1322         data += (del << 2);
   1323 
   1324         *data = -x4r;
   1325         *(data + 1) = -x4i;
   1326         data += (del << 2);
   1327 
   1328         *data = -x6r;
   1329         *(data + 1) = -x6i;
   1330 
   1331         data -= 7 * (del << 1);
   1332         data += (del << 4);
   1333       }
   1334       data -= npoints << 1;
   1335     }
   1336     nodespacing >>= 3;
   1337     del <<= 3;
   1338     in_loop_cnt >>= 3;
   1339   }
   1340 
   1341   {
   1342     WORD32 *data = ptr_y;
   1343     const WORD32 *twiddles;
   1344     twiddles = ptr_twiddle;
   1345     data = ptr_y;
   1346     data = data - 2;
   1347 
   1348     for (j = 0; j < nodespacing * del; j += nodespacing) {
   1349       data = data + 2;
   1350 
   1351       {
   1352         data += (del << 2);
   1353         x2r = *data;
   1354         x2i = *(data + 1);
   1355 
   1356         data += (del << 2);
   1357         x4r = *data;
   1358         x4i = *(data + 1);
   1359 
   1360         data += (del << 2);
   1361         x6r = *data;
   1362         x6i = *(data + 1);
   1363 
   1364         data -= 6 * (del << 1);
   1365 
   1366         twiddles += (j >> 2);
   1367 
   1368         twiddle_val = *(twiddles);
   1369 
   1370         tmp = (ixheaacd_mult32x16lin32(x2r, twiddle_val) -
   1371                ixheaacd_mult32x16hin32_drc(x2i, twiddle_val));
   1372         x2i = (ixheaacd_mac32x16lin32(
   1373                   ixheaacd_mult32x16hin32_drc(x2r, twiddle_val), x2i,
   1374                   twiddle_val))
   1375               << 1;
   1376         x2r = tmp << 1;
   1377 
   1378         twiddles += (j >> 2);
   1379         twiddle_val = *(twiddles);
   1380 
   1381         tmp = (ixheaacd_mult32x16lin32(x4r, twiddle_val) -
   1382                ixheaacd_mult32x16hin32_drc(x4i, twiddle_val));
   1383         x4i = (ixheaacd_mac32x16lin32(
   1384                   ixheaacd_mult32x16hin32_drc(x4r, twiddle_val), x4i,
   1385                   twiddle_val))
   1386               << 1;
   1387         x4r = tmp << 1;
   1388 
   1389         twiddles += (j >> 2);
   1390         twiddle_val = *(twiddles);
   1391 
   1392         tmp = (ixheaacd_mult32x16lin32(x6r, twiddle_val) -
   1393                ixheaacd_mult32x16hin32_drc(x6i, twiddle_val));
   1394         x6i = (ixheaacd_mac32x16lin32(
   1395                   ixheaacd_mult32x16hin32_drc(x6r, twiddle_val), x6i,
   1396                   twiddle_val))
   1397               << 1;
   1398         x6r = tmp << 1;
   1399 
   1400         x0r = *data;
   1401         x0i = *(data + 1);
   1402         data += (del << 1);
   1403 
   1404         x0r = x0r + x4r;
   1405         x0i = x0i + x4i;
   1406         x4r = x0r - (x4r << 1);
   1407         x4i = x0i - (x4i << 1);
   1408 
   1409         x2r = x2r + x6r;
   1410         x2i = x2i + x6i;
   1411         x6r = x2r - (x6r << 1);
   1412         x6i = x2i - (x6i << 1);
   1413 
   1414         x0r = x0r + x2r;
   1415         x0i = x0i + x2i;
   1416         x2r = x0r - (x2r << 1);
   1417         x2i = x0i - (x2i << 1);
   1418 
   1419         x4r = x4r + x6i;
   1420         x4i = x4i - x6r;
   1421         tmp = x6r;
   1422         x6r = x4r - (x6i << 1);
   1423         x6i = x4i + (tmp << 1);
   1424 
   1425         x1r = *data;
   1426         x1i = *(data + 1);
   1427         data += (del << 2);
   1428 
   1429         twiddles -= 5 * (j >> 3);
   1430         twiddle_val = *(twiddles);
   1431 
   1432         tmp = (ixheaacd_mult32x16lin32(x1r, twiddle_val) -
   1433                ixheaacd_mult32x16hin32_drc(x1i, twiddle_val));
   1434         x1i = (ixheaacd_mac32x16lin32(
   1435                   ixheaacd_mult32x16hin32_drc(x1r, twiddle_val), x1i,
   1436                   twiddle_val))
   1437               << 1;
   1438         x1r = tmp << 1;
   1439 
   1440         x3r = *data;
   1441         x3i = *(data + 1);
   1442         data += (del << 2);
   1443 
   1444         twiddles += (j >> 2);
   1445         twiddle_val = *(twiddles);
   1446 
   1447         tmp = (ixheaacd_mult32x16lin32(x3r, twiddle_val) -
   1448                ixheaacd_mult32x16hin32_drc(x3i, twiddle_val));
   1449         x3i = (ixheaacd_mac32x16lin32(
   1450             ixheaacd_mult32x16hin32_drc(x3r, twiddle_val), x3i, twiddle_val));
   1451         x3r = tmp;
   1452 
   1453         x5r = *data;
   1454         x5i = *(data + 1);
   1455         data += (del << 2);
   1456 
   1457         twiddles += (j >> 2);
   1458         twiddle_val = *(twiddles);
   1459 
   1460         tmp = (ixheaacd_mult32x16lin32(x5r, twiddle_val) -
   1461                ixheaacd_mult32x16hin32_drc(x5i, twiddle_val));
   1462         x5i = (ixheaacd_mac32x16lin32(
   1463             ixheaacd_mult32x16hin32_drc(x5r, twiddle_val), x5i, twiddle_val));
   1464         x5r = tmp;
   1465 
   1466         x7r = *data;
   1467         x7i = *(data + 1);
   1468         data -= 7 * (del << 1);
   1469 
   1470         twiddles += (j >> 2);
   1471         twiddle_val = *(twiddles);
   1472         twiddles -= 7 * (j >> 3);
   1473 
   1474         tmp = (ixheaacd_mult32x16lin32(x7r, twiddle_val) -
   1475                ixheaacd_mult32x16hin32_drc(x7i, twiddle_val));
   1476         x7i = (ixheaacd_mac32x16lin32(
   1477             ixheaacd_mult32x16hin32_drc(x7r, twiddle_val), x7i, twiddle_val));
   1478         x7r = tmp;
   1479 
   1480         x1r = x1r + (x5r << 1);
   1481         x1i = x1i + (x5i << 1);
   1482         x5r = x1r - (x5r << 2);
   1483         x5i = x1i - (x5i << 2);
   1484 
   1485         x3r = x3r + x7r;
   1486         x3i = x3i + x7i;
   1487         x7r = x3r - (x7r << 1);
   1488         x7i = x3i - (x7i << 1);
   1489 
   1490         x1r = x1r + (x3r << 1);
   1491         x1i = x1i + (x3i << 1);
   1492         x3r = x1r - (x3r << 2);
   1493         x3i = x1i - (x3i << 2);
   1494 
   1495         x5r = x5r + x5i;
   1496         x5i = x5r - (x5i << 1);
   1497 
   1498         x7r = x7r + x7i;
   1499         x7i = x7r - (x7i << 1);
   1500 
   1501         x7i = x5r - (x7i << 1);
   1502         x5r = x7i - (x5r << 1);
   1503 
   1504         x5i = (x7r << 1) - x5i;
   1505         x7r = x5i - (x7r << 2);
   1506 
   1507         x7i = x7i << 1;
   1508         x5r = x5r << 1;
   1509         x5i = x5i << 1;
   1510         x7r = x7r << 1;
   1511 
   1512         x0r = x0r + x1r;
   1513         x0i = x0i + x1i;
   1514         x1r = x0r - (x1r << 1);
   1515         x1i = x0i - (x1i << 1);
   1516 
   1517         x2r = x2r + x3i;
   1518         tmp = x2r - (x3i << 1);
   1519         x2i = x2i - x3r;
   1520         x3i = x2i + (x3r << 1);
   1521 
   1522         *data = x0r;
   1523         *(data + 1) = x0i;
   1524         data += (del << 2);
   1525 
   1526         *data = x2r;
   1527         *(data + 1) = x2i;
   1528         data += (del << 2);
   1529 
   1530         *data = x1r;
   1531         *(data + 1) = x1i;
   1532         data += (del << 2);
   1533 
   1534         *data = tmp;
   1535         *(data + 1) = x3i;
   1536         data -= 5 * (del << 1);
   1537 
   1538         tmp = 0x5A82;
   1539 
   1540         x7i = x4r + (ixheaacd_mult32x16lin32(x7i, tmp));
   1541         x4r = x7i - (x4r << 1);
   1542 
   1543         x7r = x4i + (ixheaacd_mult32x16lin32(x7r, tmp));
   1544         x4i = x7r - (x4i << 1);
   1545 
   1546         x5i = x6r + (ixheaacd_mult32x16lin32(x5i, tmp));
   1547         x6r = x5i - (x6r << 1);
   1548 
   1549         x5r = x6i + (ixheaacd_mult32x16lin32(x5r, tmp));
   1550         x6i = x5r - (x6i << 1);
   1551 
   1552         *data = x7i;
   1553         *(data + 1) = x7r;
   1554         data += (del << 2);
   1555 
   1556         *data = x5i;
   1557         *(data + 1) = x5r;
   1558         data += (del << 2);
   1559 
   1560         *data = -x4r;
   1561         *(data + 1) = -x4i;
   1562         data += (del << 2);
   1563 
   1564         *data = -x6r;
   1565         *(data + 1) = -x6i;
   1566 
   1567         data -= 7 * (del << 1);
   1568         data += (del << 4);
   1569       }
   1570       data -= npoints << 1;
   1571     }
   1572 
   1573     nodespacing >>= 3;
   1574     del <<= 3;
   1575     in_loop_cnt >>= 3;
   1576   }
   1577 }
   1578 
   1579 WORD32 ixheaacd_inverse_transform(
   1580     WORD32 spec_data[], WORD32 scratch[],
   1581     ia_aac_dec_imdct_tables_struct *ptr_imdct_tables, WORD32 expo,
   1582     WORD32 npoints) {
   1583   (*ixheaacd_pretwiddle_compute)(spec_data, spec_data + npoints - 1, scratch,
   1584                                  ptr_imdct_tables, (npoints >> 2), expo);
   1585 
   1586   (*ixheaacd_imdct_using_fft)(ptr_imdct_tables, npoints >> 1, scratch,
   1587                               spec_data);
   1588 
   1589   expo += 2;
   1590 
   1591   return expo;
   1592 }
   1593 
   1594 VOID ixheaacd_mdct_480_ld(WORD32 *inp, WORD32 *scratch, WORD32 *mdct_scale,
   1595                           WORD32 mdct_flag,
   1596                           ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
   1597                           WORD32 object_type) {
   1598   WORD32 expo, neg_expo = 0, k;
   1599 
   1600   WORD32 const_mltfac = 1145324612;
   1601 
   1602   expo = (*ixheaacd_calc_max_spectral_line)(inp, MDCT_LEN) - 1;
   1603   ;
   1604 
   1605   memcpy(scratch, inp, sizeof(WORD32) * MDCT_LEN);
   1606 
   1607   neg_expo = 7 - expo;
   1608 
   1609   ixheaacd_pre_twiddle(inp, scratch, 480, imdct_tables_ptr->cosine_array_960,
   1610                        neg_expo);
   1611 
   1612   ixheaacd_fft_480_ld(inp, scratch, imdct_tables_ptr);
   1613 
   1614   if (object_type == AOT_ER_AAC_LD) {
   1615     ixheaacd_post_twiddle_ld(inp, scratch, imdct_tables_ptr->cosine_array_960,
   1616                              480);
   1617   } else if (object_type == AOT_ER_AAC_ELD) {
   1618     ixheaacd_post_twiddle_eld(inp + (480), scratch,
   1619                               imdct_tables_ptr->cosine_array_960, 480);
   1620   }
   1621 
   1622   if (0 == mdct_flag) {
   1623     WORD32 *data = inp;
   1624 
   1625     if (object_type != AOT_ER_AAC_ELD) {
   1626       for (k = MDCT_LEN - 1; k >= 0; k -= 2) {
   1627         *data = ixheaacd_mult32_shl(*data, const_mltfac);
   1628         data++;
   1629         *data = ixheaacd_mult32_shl(*data, const_mltfac);
   1630         data++;
   1631       }
   1632       neg_expo += 1;
   1633     } else {
   1634       data = inp + 480;
   1635       for (k = (MDCT_LEN << 1) - 1; k >= 0; k -= 2) {
   1636         *data = ixheaacd_mult32_shl(*data, const_mltfac);
   1637         data++;
   1638         *data = ixheaacd_mult32_shl(*data, const_mltfac);
   1639         data++;
   1640       }
   1641       neg_expo += 1;
   1642     }
   1643   }
   1644 
   1645   *mdct_scale = neg_expo + 3;
   1646 }
   1647 
   1648 VOID ixheaacd_inverse_transform_512(
   1649     WORD32 data[], WORD32 temp[], WORD32 *imdct_scale, WORD32 *cos_sin_ptr,
   1650     ia_aac_dec_imdct_tables_struct *imdct_tables_ptr, WORD32 object_type) {
   1651   WORD32 n;
   1652   WORD32 npoints_2;
   1653   WORD16 expo, neg_expo;
   1654 
   1655   n = 512;
   1656 
   1657   npoints_2 = n >> 1;
   1658 
   1659   expo = (*ixheaacd_calc_max_spectral_line)(data, n) - 1;
   1660 
   1661   memcpy(temp, data, sizeof(WORD32) * n);
   1662 
   1663   neg_expo = 7 - expo;
   1664 
   1665   ixheaacd_pre_twiddle(data, temp, n, cos_sin_ptr, neg_expo);
   1666 
   1667   (*ixheaacd_fft32x32_ld)(imdct_tables_ptr, npoints_2, data, temp);
   1668 
   1669   neg_expo = (*ixheaacd_neg_expo_inc)(neg_expo);
   1670 
   1671   *imdct_scale = neg_expo + 1;
   1672 
   1673   if (object_type == AOT_ER_AAC_ELD)
   1674     ixheaacd_post_twiddle_eld((data + n), temp, cos_sin_ptr, n);
   1675   else
   1676     ixheaacd_post_twiddle_ld((data), temp, cos_sin_ptr, n);
   1677 }
   1678 
   1679 VOID ixheaacd_fft_480_ld(WORD32 *inp, WORD32 *op,
   1680                          ia_aac_dec_imdct_tables_struct *imdct_tables_ptr) {
   1681   WORD32 i;
   1682   WORD32 *buf1, *buf2;
   1683   UWORD8 *re_arr_tab_sml_240_ptr;
   1684 
   1685   (*ixheaacd_aac_ld_dec_rearrange)(inp, op, MDCT_LEN_BY2,
   1686                                    imdct_tables_ptr->re_arr_tab_16);
   1687 
   1688   buf1 = op;
   1689   buf2 = inp;
   1690 
   1691   for (i = 0; i < FFT15; i++) {
   1692     (*ixheaacd_fft32x32_ld2)(imdct_tables_ptr, 16, buf1, buf2);
   1693 
   1694     buf1 += (FFT16X2);
   1695     buf2 += (FFT16X2);
   1696   }
   1697   re_arr_tab_sml_240_ptr = imdct_tables_ptr->re_arr_tab_sml_240;
   1698   buf1 = inp;
   1699 
   1700   for (i = 0; i < FFT16; i++) {
   1701     (*ixheaacd_fft_15_ld)(buf1, op, ixheaacd_fft5out, re_arr_tab_sml_240_ptr);
   1702     re_arr_tab_sml_240_ptr += FFT15;
   1703     buf1 += 2;
   1704   }
   1705 }
   1706 
   1707 VOID ixheaacd_pre_twiddle(WORD32 *xptr, WORD32 *data, WORD32 n,
   1708                           WORD32 *cos_sin_ptr, WORD32 neg_expo) {
   1709   WORD npoints_4, i;
   1710   WORD32 tempr, tempi, temp;
   1711   WORD32 c, c1, s, s1;
   1712   WORD32 *in_ptr1, *in_ptr2;
   1713 
   1714   npoints_4 = n >> 2;
   1715 
   1716   in_ptr1 = data;
   1717   in_ptr2 = data + n - 1;
   1718 
   1719   if (neg_expo >= 0) {
   1720     for (i = npoints_4 - 1; i >= 0; i--) {
   1721       c = *cos_sin_ptr++;
   1722       c1 = *cos_sin_ptr++;
   1723       s = *cos_sin_ptr++;
   1724       s1 = *cos_sin_ptr++;
   1725 
   1726       tempr = *in_ptr1;
   1727       tempi = *in_ptr2;
   1728 
   1729       in_ptr1 += 2;
   1730       in_ptr2 -= 2;
   1731 
   1732       temp =
   1733           -ixheaacd_add32(ixheaacd_mult32(tempr, c), ixheaacd_mult32(tempi, s));
   1734       *xptr++ = ixheaacd_shr32(temp, neg_expo);
   1735 
   1736       temp =
   1737           ixheaacd_sub32(ixheaacd_mult32(tempr, s), ixheaacd_mult32(tempi, c));
   1738       *xptr++ = ixheaacd_shr32(temp, neg_expo);
   1739 
   1740       tempr = *in_ptr1;
   1741       tempi = *in_ptr2;
   1742 
   1743       in_ptr1 += 2;
   1744       in_ptr2 -= 2;
   1745 
   1746       temp = -ixheaacd_add32(ixheaacd_mult32(tempr, c1),
   1747                              ixheaacd_mult32(tempi, s1));
   1748       *xptr++ = ixheaacd_shr32(temp, neg_expo);
   1749 
   1750       temp = ixheaacd_sub32(ixheaacd_mult32(tempr, s1),
   1751                             ixheaacd_mult32(tempi, c1));
   1752       *xptr++ = ixheaacd_shr32(temp, neg_expo);
   1753     }
   1754   } else {
   1755     neg_expo = -neg_expo;
   1756 
   1757     for (i = npoints_4 - 1; i >= 0; i--) {
   1758       c = *cos_sin_ptr++;
   1759       c1 = *cos_sin_ptr++;
   1760       s = *cos_sin_ptr++;
   1761       s1 = *cos_sin_ptr++;
   1762 
   1763       tempr = *in_ptr1;
   1764       tempi = *in_ptr2;
   1765 
   1766       in_ptr1 += 2;
   1767       in_ptr2 -= 2;
   1768 
   1769       temp =
   1770           -ixheaacd_add32(ixheaacd_mult32(tempr, c), ixheaacd_mult32(tempi, s));
   1771       *xptr++ = ixheaacd_shl32(temp, neg_expo);
   1772 
   1773       temp =
   1774           ixheaacd_sub32(ixheaacd_mult32(tempr, s), ixheaacd_mult32(tempi, c));
   1775       *xptr++ = ixheaacd_shl32(temp, neg_expo);
   1776 
   1777       tempr = *in_ptr1;
   1778       tempi = *in_ptr2;
   1779 
   1780       in_ptr1 += 2;
   1781       in_ptr2 -= 2;
   1782 
   1783       temp = -ixheaacd_add32(ixheaacd_mult32(tempr, c1),
   1784                              ixheaacd_mult32(tempi, s1));
   1785       *xptr++ = ixheaacd_shl32(temp, neg_expo);
   1786 
   1787       temp = ixheaacd_sub32(ixheaacd_mult32(tempr, s1),
   1788                             ixheaacd_mult32(tempi, c1));
   1789       *xptr++ = ixheaacd_shl32(temp, neg_expo);
   1790     }
   1791   }
   1792 }
   1793 
   1794 VOID ixheaacd_post_twiddle_ld(WORD32 out[], WORD32 x[],
   1795                               const WORD32 *cos_sin_ptr, WORD m) {
   1796   WORD i;
   1797 
   1798   WORD32 *ptr_x = &x[0];
   1799   WORD32 *ptr_out, *ptr_out1;
   1800 
   1801   ptr_out = &out[0];
   1802   ptr_out1 = &out[m - 1];
   1803 
   1804   for (i = (m >> 2) - 1; i >= 0; i--) {
   1805     WORD32 c, c1, s, s1;
   1806     WORD32 re, im;
   1807 
   1808     c = *cos_sin_ptr++;
   1809     c1 = *cos_sin_ptr++;
   1810     s = *cos_sin_ptr++;
   1811     s1 = *cos_sin_ptr++;
   1812 
   1813     re = *ptr_x++;
   1814     im = *ptr_x++;
   1815 
   1816     *ptr_out1 = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
   1817 
   1818     *ptr_out = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
   1819 
   1820     ptr_out += 2;
   1821     ptr_out1 -= 2;
   1822 
   1823     re = *ptr_x++;
   1824     im = *ptr_x++;
   1825 
   1826     *ptr_out1 =
   1827         ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
   1828     *ptr_out =
   1829         -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
   1830 
   1831     ptr_out += 2;
   1832     ptr_out1 -= 2;
   1833   }
   1834 }
   1835 
   1836 VOID ixheaacd_post_twiddle_eld(WORD32 out[], WORD32 x[],
   1837                                const WORD32 *cos_sin_ptr, WORD m) {
   1838   WORD i = 0;
   1839 
   1840   WORD32 *ptr_x = &x[0];
   1841   WORD32 *ptr_out_767, *ptr_out_256;
   1842   WORD32 *ptr_out_768, *ptr_out_255;
   1843   WORD32 *ptr_out_0, *ptr_out_1279;
   1844   WORD32 tempr, tempi;
   1845 
   1846   ptr_out_767 = &out[m + (m >> 1) - 1 - 2 * i];
   1847   ptr_out_256 = &out[(m >> 1) + 2 * i];
   1848 
   1849   ptr_out_768 = &out[m + (m >> 1) + 2 * i];
   1850   ptr_out_255 = &out[(m >> 1) - 1 - 2 * i];
   1851 
   1852   for (i = 0; i < (m >> 3); i++) {
   1853     WORD32 c, c1, s, s1;
   1854     WORD32 re, im;
   1855 
   1856     c = *cos_sin_ptr++;
   1857     c1 = *cos_sin_ptr++;
   1858     s = *cos_sin_ptr++;
   1859     s1 = *cos_sin_ptr++;
   1860 
   1861     re = *ptr_x++;
   1862     im = *ptr_x++;
   1863 
   1864     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
   1865     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
   1866 
   1867     *ptr_out_767 = tempr;
   1868     *ptr_out_256 = tempi;
   1869 
   1870     *ptr_out_768 = *ptr_out_767;
   1871     *ptr_out_255 = -*ptr_out_256;
   1872 
   1873     ptr_out_256 += 2;
   1874     ptr_out_767 -= 2;
   1875     ptr_out_768 += 2;
   1876     ptr_out_255 -= 2;
   1877 
   1878     re = *ptr_x++;
   1879     im = *ptr_x++;
   1880 
   1881     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
   1882     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
   1883 
   1884     *ptr_out_767 = tempr;
   1885     *ptr_out_256 = tempi;
   1886 
   1887     *ptr_out_768 = *ptr_out_767;
   1888     *ptr_out_255 = -*ptr_out_256;
   1889 
   1890     ptr_out_256 += 2;
   1891     ptr_out_767 -= 2;
   1892     ptr_out_768 += 2;
   1893     ptr_out_255 -= 2;
   1894   }
   1895 
   1896   ptr_out_0 = &out[2 * 2 * i - (m >> 1)];
   1897   ptr_out_1279 = &out[m + m + (m >> 1) - 1 - 2 * 2 * i];
   1898 
   1899   for (; i < (m >> 2); i++) {
   1900     WORD32 c, c1, s, s1;
   1901     WORD32 re, im;
   1902 
   1903     c = *cos_sin_ptr++;
   1904     c1 = *cos_sin_ptr++;
   1905     s = *cos_sin_ptr++;
   1906     s1 = *cos_sin_ptr++;
   1907 
   1908     re = *ptr_x++;
   1909     im = *ptr_x++;
   1910 
   1911     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c), ixheaacd_mult32(re, s));
   1912     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c), ixheaacd_mult32(im, s));
   1913 
   1914     *ptr_out_767 = tempr;
   1915     *ptr_out_256 = tempi;
   1916 
   1917     *ptr_out_0 = -*ptr_out_767;
   1918     *ptr_out_1279 = *ptr_out_256;
   1919 
   1920     ptr_out_256 += 2;
   1921     ptr_out_767 -= 2;
   1922     ptr_out_0 += 2;
   1923     ptr_out_1279 -= 2;
   1924 
   1925     re = *ptr_x++;
   1926     im = *ptr_x++;
   1927 
   1928     tempi = ixheaacd_sub32(ixheaacd_mult32(im, c1), ixheaacd_mult32(re, s1));
   1929     tempr = -ixheaacd_add32(ixheaacd_mult32(re, c1), ixheaacd_mult32(im, s1));
   1930 
   1931     *ptr_out_767 = tempr;
   1932     *ptr_out_256 = tempi;
   1933 
   1934     *ptr_out_0 = -*ptr_out_767;
   1935     *ptr_out_1279 = *ptr_out_256;
   1936 
   1937     ptr_out_256 += 2;
   1938     ptr_out_767 -= 2;
   1939     ptr_out_0 += 2;
   1940     ptr_out_1279 -= 2;
   1941   }
   1942 }
   1943 
   1944 VOID ixheaacd_fft32x32_ld_dec(ia_aac_dec_imdct_tables_struct *imdct_tables_ptr,
   1945                               WORD32 npoints, WORD32 *ptr_x, WORD32 *ptr_y) {
   1946   WORD32 i, j, l1, l2, h2, predj, tw_offset, stride, fft_jmp;
   1947   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
   1948   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
   1949   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
   1950   WORD32 x_0, x_1, x_2, x_3, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
   1951   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
   1952   WORD32 x_4, x_5, x_6, x_7, x_h2_0, x_h2_1;
   1953   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
   1954   WORD32 si10, si20, si30, co10, co20, co30;
   1955   WORD32 *w;
   1956   WORD32 *x, *x2, *x0;
   1957   WORD32 *y0, *y1, *y2, *y3;
   1958   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
   1959   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
   1960   WORD32 n0, j0;
   1961   WORD32 radix;
   1962   WORD32 norm;
   1963   WORD32 m;
   1964   WORD32 *ptr_w;
   1965 
   1966   if (npoints == 256)
   1967     ptr_w = imdct_tables_ptr->w_256;
   1968   else
   1969     ptr_w = imdct_tables_ptr->w_16;
   1970 
   1971   for (i = 31, m = 1; (npoints & (1 << i)) == 0; i--, m++)
   1972     ;
   1973   radix = m & 1 ? 2 : 4;
   1974   norm = m - 2;
   1975 
   1976   stride = npoints;
   1977   tw_offset = 0;
   1978   fft_jmp = 6 * stride;
   1979 
   1980   while (stride > radix) {
   1981     j = 0;
   1982     fft_jmp >>= 2;
   1983 
   1984     h2 = stride >> 1;
   1985     l1 = stride;
   1986     l2 = stride + (stride >> 1);
   1987 
   1988     x = ptr_x;
   1989     w = ptr_w + tw_offset;
   1990     tw_offset += fft_jmp;
   1991 
   1992     stride >>= 2;
   1993 
   1994     for (i = 0; i < npoints; i += 4) {
   1995       co10 = w[j + 1];
   1996       si10 = w[j + 0];
   1997       co20 = w[j + 3];
   1998       si20 = w[j + 2];
   1999       co30 = w[j + 5];
   2000       si30 = w[j + 4];
   2001 
   2002       x_0 = x[0];
   2003       x_1 = x[1];
   2004       x_l1_0 = x[l1];
   2005       x_l1_1 = x[l1 + 1];
   2006       x_l2_0 = x[l2];
   2007       x_l2_1 = x[l2 + 1];
   2008       x_h2_0 = x[h2];
   2009       x_h2_1 = x[h2 + 1];
   2010 
   2011       xh0_0 = x_0 + x_l1_0;
   2012       xh1_0 = x_1 + x_l1_1;
   2013       xl0_0 = x_0 - x_l1_0;
   2014       xl1_0 = x_1 - x_l1_1;
   2015       xh20_0 = x_h2_0 + x_l2_0;
   2016       xh21_0 = x_h2_1 + x_l2_1;
   2017       xl20_0 = x_h2_0 - x_l2_0;
   2018       xl21_0 = x_h2_1 - x_l2_1;
   2019 
   2020       x0 = x;
   2021       x2 = x0;
   2022 
   2023       j += 6;
   2024       x += 2;
   2025       predj = (j - fft_jmp);
   2026       if (!predj) x += fft_jmp;
   2027       if (!predj) j = 0;
   2028 
   2029       x0[0] = xh0_0 + xh20_0;
   2030       x0[1] = xh1_0 + xh21_0;
   2031       xt0_0 = xh0_0 - xh20_0;
   2032       yt0_0 = xh1_0 - xh21_0;
   2033       xt1_0 = xl0_0 + xl21_0;
   2034       yt2_0 = xl1_0 + xl20_0;
   2035       xt2_0 = xl0_0 - xl21_0;
   2036       yt1_0 = xl1_0 - xl20_0;
   2037 
   2038       x2[h2] =
   2039           MPYHIRC(si10, yt1_0) + MPYHIRC(co10, xt1_0) +
   2040           (((MPYLUHS(si10, yt1_0) + MPYLUHS(co10, xt1_0) + 0x8000) >> 16) << 1);
   2041 
   2042       x2[h2 + 1] =
   2043           MPYHIRC(co10, yt1_0) - MPYHIRC(si10, xt1_0) +
   2044           (((MPYLUHS(co10, yt1_0) - MPYLUHS(si10, xt1_0) + 0x8000) >> 16) << 1);
   2045 
   2046       x2[l1] =
   2047           MPYHIRC(si20, yt0_0) + MPYHIRC(co20, xt0_0) +
   2048           (((MPYLUHS(si20, yt0_0) + MPYLUHS(co20, xt0_0) + 0x8000) >> 16) << 1);
   2049 
   2050       x2[l1 + 1] =
   2051           MPYHIRC(co20, yt0_0) - MPYHIRC(si20, xt0_0) +
   2052           (((MPYLUHS(co20, yt0_0) - MPYLUHS(si20, xt0_0) + 0x8000) >> 16) << 1);
   2053 
   2054       x2[l2] =
   2055           MPYHIRC(si30, yt2_0) + MPYHIRC(co30, xt2_0) +
   2056           (((MPYLUHS(si30, yt2_0) + MPYLUHS(co30, xt2_0) + 0x8000) >> 16) << 1);
   2057 
   2058       x2[l2 + 1] =
   2059           MPYHIRC(co30, yt2_0) - MPYHIRC(si30, xt2_0) +
   2060           (((MPYLUHS(co30, yt2_0) - MPYLUHS(si30, xt2_0) + 0x8000) >> 16) << 1);
   2061     }
   2062   }
   2063 
   2064   y0 = ptr_y;
   2065   y2 = ptr_y + (WORD32)npoints;
   2066   x0 = ptr_x;
   2067   x2 = ptr_x + (WORD32)(npoints >> 1);
   2068 
   2069   if (radix == 2) {
   2070     y1 = y0 + (WORD32)(npoints >> 2);
   2071     y3 = y2 + (WORD32)(npoints >> 2);
   2072     l1 = norm + 1;
   2073     j0 = 8;
   2074     n0 = npoints >> 1;
   2075   } else {
   2076     y1 = y0 + (WORD32)(npoints >> 1);
   2077     y3 = y2 + (WORD32)(npoints >> 1);
   2078     l1 = norm + 2;
   2079     j0 = 4;
   2080     n0 = npoints >> 2;
   2081   }
   2082 
   2083   j = 0;
   2084 
   2085   for (i = 0; i < npoints; i += 8) {
   2086     DIG_REV(j, l1, h2);
   2087 
   2088     x_0 = x0[0];
   2089     x_1 = x0[1];
   2090     x_2 = x0[2];
   2091     x_3 = x0[3];
   2092     x_4 = x0[4];
   2093     x_5 = x0[5];
   2094     x_6 = x0[6];
   2095     x_7 = x0[7];
   2096     x0 += 8;
   2097 
   2098     xh0_0 = x_0 + x_4;
   2099     xh1_0 = x_1 + x_5;
   2100     xl0_0 = x_0 - x_4;
   2101     xl1_0 = x_1 - x_5;
   2102     xh0_1 = x_2 + x_6;
   2103     xh1_1 = x_3 + x_7;
   2104     xl0_1 = x_2 - x_6;
   2105     xl1_1 = x_3 - x_7;
   2106 
   2107     n00 = xh0_0 + xh0_1;
   2108     n01 = xh1_0 + xh1_1;
   2109     n10 = xl0_0 + xl1_1;
   2110     n11 = xl1_0 - xl0_1;
   2111     n20 = xh0_0 - xh0_1;
   2112     n21 = xh1_0 - xh1_1;
   2113     n30 = xl0_0 - xl1_1;
   2114     n31 = xl1_0 + xl0_1;
   2115 
   2116     if (radix == 2) {
   2117       n00 = x_0 + x_2;
   2118       n01 = x_1 + x_3;
   2119       n20 = x_0 - x_2;
   2120       n21 = x_1 - x_3;
   2121       n10 = x_4 + x_6;
   2122       n11 = x_5 + x_7;
   2123       n30 = x_4 - x_6;
   2124       n31 = x_5 - x_7;
   2125     }
   2126 
   2127     y0[2 * h2] = n00;
   2128     y0[2 * h2 + 1] = n01;
   2129     y1[2 * h2] = n10;
   2130     y1[2 * h2 + 1] = n11;
   2131     y2[2 * h2] = n20;
   2132     y2[2 * h2 + 1] = n21;
   2133     y3[2 * h2] = n30;
   2134     y3[2 * h2 + 1] = n31;
   2135 
   2136     x_8 = x2[0];
   2137     x_9 = x2[1];
   2138     x_a = x2[2];
   2139     x_b = x2[3];
   2140     x_c = x2[4];
   2141     x_d = x2[5];
   2142     x_e = x2[6];
   2143     x_f = x2[7];
   2144     x2 += 8;
   2145 
   2146     xh0_2 = x_8 + x_c;
   2147     xh1_2 = x_9 + x_d;
   2148     xl0_2 = x_8 - x_c;
   2149     xl1_2 = x_9 - x_d;
   2150     xh0_3 = x_a + x_e;
   2151     xh1_3 = x_b + x_f;
   2152     xl0_3 = x_a - x_e;
   2153     xl1_3 = x_b - x_f;
   2154 
   2155     n02 = xh0_2 + xh0_3;
   2156     n03 = xh1_2 + xh1_3;
   2157     n12 = xl0_2 + xl1_3;
   2158     n13 = xl1_2 - xl0_3;
   2159     n22 = xh0_2 - xh0_3;
   2160     n23 = xh1_2 - xh1_3;
   2161     n32 = xl0_2 - xl1_3;
   2162     n33 = xl1_2 + xl0_3;
   2163 
   2164     if (radix == 2) {
   2165       n02 = x_8 + x_a;
   2166       n03 = x_9 + x_b;
   2167       n22 = x_8 - x_a;
   2168       n23 = x_9 - x_b;
   2169       n12 = x_c + x_e;
   2170       n13 = x_d + x_f;
   2171       n32 = x_c - x_e;
   2172       n33 = x_d - x_f;
   2173     }
   2174 
   2175     y0[2 * h2 + 2] = n02;
   2176     y0[2 * h2 + 3] = n03;
   2177     y1[2 * h2 + 2] = n12;
   2178     y1[2 * h2 + 3] = n13;
   2179     y2[2 * h2 + 2] = n22;
   2180     y2[2 * h2 + 3] = n23;
   2181     y3[2 * h2 + 2] = n32;
   2182     y3[2 * h2 + 3] = n33;
   2183 
   2184     j += j0;
   2185 
   2186     if (j == n0) {
   2187       j += n0;
   2188       x0 += (WORD32)npoints >> 1;
   2189       x2 += (WORD32)npoints >> 1;
   2190     }
   2191   }
   2192 }
   2193 
   2194 VOID ixheaacd_rearrange_dec(WORD32 *ip, WORD32 *op, WORD32 mdct_len_2,
   2195                             UWORD8 *re_arr_tab) {
   2196   WORD32 n, i = 0;
   2197 
   2198   for (n = 0; n < mdct_len_2; n++) {
   2199     WORD32 idx = re_arr_tab[n] << 1;
   2200 
   2201     op[i++] = ip[idx];
   2202     op[i++] = ip[idx + 1];
   2203   }
   2204 }
   2205 
   2206 VOID ixheaacd_fft_15_ld_dec(WORD32 *inp, WORD32 *op, WORD32 *fft3out,
   2207                             UWORD8 *re_arr_tab_sml_240_ptr) {
   2208   WORD32 i, n, idx;
   2209   WORD32 *buf1, *buf2, *buf1a;
   2210   WORD32 add_r, sub_r;
   2211   WORD32 add_i, sub_i;
   2212   WORD32 x01_real, x_01_imag, temp;
   2213   WORD32 p1, p2, p3, p4;
   2214 
   2215   WORD32 sinmu = 1859775393;
   2216   WORD32 cos_51 = 2042378317;
   2217   WORD32 cos_52 = -1652318768;
   2218   WORD32 cos_53 = -780119100;
   2219   WORD32 cos_54 = 1200479854;
   2220   WORD32 cos_55 = -1342177280;
   2221 
   2222   WORD32 r1, r2, r3, r4;
   2223   WORD32 s1, s2, s3, s4, t, temp1, temp2;
   2224   WORD32 *fft3outptr = fft3out;
   2225 
   2226   WORD32 xr_0, xr_1, xr_2;
   2227   WORD32 xi_0, xi_1, xi_2;
   2228 
   2229   buf2 = fft3out;
   2230   buf1 = buf1a = fft3out;
   2231   n = 0;
   2232 
   2233   {
   2234     *buf1++ = inp[0];
   2235     *buf1++ = inp[1];
   2236 
   2237     *buf1++ = inp[96];
   2238     *buf1++ = inp[97];
   2239 
   2240     *buf1++ = inp[192];
   2241     *buf1++ = inp[193];
   2242 
   2243     *buf1++ = inp[288];
   2244     *buf1++ = inp[289];
   2245 
   2246     *buf1++ = inp[384];
   2247     *buf1++ = inp[385];
   2248 
   2249     r1 = buf1a[2] + buf1a[8];
   2250     r4 = buf1a[2] - buf1a[8];
   2251     r3 = buf1a[4] + buf1a[6];
   2252     r2 = buf1a[4] - buf1a[6];
   2253 
   2254     t = ixheaacd_mult32_shl((r1 - r3), cos_54);
   2255 
   2256     r1 = r1 + r3;
   2257 
   2258     temp1 = buf1a[0] + r1;
   2259 
   2260     r1 = temp1 + ((ixheaacd_mult32_shl(r1, cos_55)) << 1);
   2261 
   2262     r3 = r1 - t;
   2263     r1 = r1 + t;
   2264 
   2265     t = ixheaacd_mult32_shl((r4 + r2), cos_51);
   2266     r4 = t + (ixheaacd_mult32_shl(r4, cos_52) << 1);
   2267     r2 = t + ixheaacd_mult32_shl(r2, cos_53);
   2268 
   2269     s1 = buf1a[3] + buf1a[9];
   2270     s4 = buf1a[3] - buf1a[9];
   2271     s3 = buf1a[5] + buf1a[7];
   2272     s2 = buf1a[5] - buf1a[7];
   2273 
   2274     t = ixheaacd_mult32_shl((s1 - s3), cos_54);
   2275     s1 = s1 + s3;
   2276 
   2277     temp2 = buf1a[1] + s1;
   2278 
   2279     s1 = temp2 + ((ixheaacd_mult32_shl(s1, cos_55)) << 1);
   2280 
   2281     s3 = s1 - t;
   2282     s1 = s1 + t;
   2283 
   2284     t = ixheaacd_mult32_shl((s4 + s2), cos_51);
   2285     s4 = t + ((ixheaacd_mult32_shl(s4, cos_52)) << 1);
   2286     s2 = t + (ixheaacd_mult32_shl(s2, cos_53));
   2287 
   2288     *buf2++ = temp1;
   2289     *buf2++ = temp2;
   2290     *buf2++ = r1 + s2;
   2291     *buf2++ = s1 - r2;
   2292     *buf2++ = r3 - s4;
   2293     *buf2++ = s3 + r4;
   2294     *buf2++ = r3 + s4;
   2295     *buf2++ = s3 - r4;
   2296     *buf2++ = r1 - s2;
   2297     *buf2++ = s1 + r2;
   2298     buf1a = buf1;
   2299 
   2300     *buf1++ = inp[160];
   2301     *buf1++ = inp[161];
   2302 
   2303     *buf1++ = inp[256];
   2304     *buf1++ = inp[257];
   2305 
   2306     *buf1++ = inp[352];
   2307     *buf1++ = inp[353];
   2308 
   2309     *buf1++ = inp[448];
   2310     *buf1++ = inp[449];
   2311 
   2312     *buf1++ = inp[64];
   2313     *buf1++ = inp[65];
   2314 
   2315     r1 = buf1a[2] + buf1a[8];
   2316     r4 = buf1a[2] - buf1a[8];
   2317     r3 = buf1a[4] + buf1a[6];
   2318     r2 = buf1a[4] - buf1a[6];
   2319 
   2320     t = ixheaacd_mult32_shl((r1 - r3), cos_54);
   2321 
   2322     r1 = r1 + r3;
   2323 
   2324     temp1 = buf1a[0] + r1;
   2325 
   2326     r1 = temp1 + ((ixheaacd_mult32_shl(r1, cos_55)) << 1);
   2327 
   2328     r3 = r1 - t;
   2329     r1 = r1 + t;
   2330 
   2331     t = ixheaacd_mult32_shl((r4 + r2), cos_51);
   2332     r4 = t + (ixheaacd_mult32_shl(r4, cos_52) << 1);
   2333     r2 = t + ixheaacd_mult32_shl(r2, cos_53);
   2334 
   2335     s1 = buf1a[3] + buf1a[9];
   2336     s4 = buf1a[3] - buf1a[9];
   2337     s3 = buf1a[5] + buf1a[7];
   2338     s2 = buf1a[5] - buf1a[7];
   2339 
   2340     t = ixheaacd_mult32_shl((s1 - s3), cos_54);
   2341 
   2342     s1 = s1 + s3;
   2343 
   2344     temp2 = buf1a[1] + s1;
   2345 
   2346     s1 = temp2 + ((ixheaacd_mult32_shl(s1, cos_55)) << 1);
   2347 
   2348     s3 = s1 - t;
   2349     s1 = s1 + t;
   2350 
   2351     t = ixheaacd_mult32_shl((s4 + s2), cos_51);
   2352     s4 = t + ((ixheaacd_mult32_shl(s4, cos_52)) << 1);
   2353     s2 = t + (ixheaacd_mult32_shl(s2, cos_53));
   2354 
   2355     *buf2++ = temp1;
   2356     *buf2++ = temp2;
   2357     *buf2++ = r1 + s2;
   2358     *buf2++ = s1 - r2;
   2359     *buf2++ = r3 - s4;
   2360     *buf2++ = s3 + r4;
   2361     *buf2++ = r3 + s4;
   2362     *buf2++ = s3 - r4;
   2363     *buf2++ = r1 - s2;
   2364     *buf2++ = s1 + r2;
   2365     buf1a = buf1;
   2366     ;
   2367 
   2368     *buf1++ = inp[320];
   2369     *buf1++ = inp[321];
   2370 
   2371     *buf1++ = inp[416];
   2372     *buf1++ = inp[417];
   2373 
   2374     *buf1++ = inp[32];
   2375     *buf1++ = inp[33];
   2376 
   2377     *buf1++ = inp[128];
   2378     *buf1++ = inp[129];
   2379 
   2380     *buf1++ = inp[224];
   2381     *buf1++ = inp[225];
   2382 
   2383     r1 = buf1a[2] + buf1a[8];
   2384     r4 = buf1a[2] - buf1a[8];
   2385     r3 = buf1a[4] + buf1a[6];
   2386     r2 = buf1a[4] - buf1a[6];
   2387 
   2388     t = ixheaacd_mult32_shl((r1 - r3), cos_54);
   2389 
   2390     r1 = r1 + r3;
   2391 
   2392     temp1 = buf1a[0] + r1;
   2393 
   2394     r1 = temp1 + ((ixheaacd_mult32_shl(r1, cos_55)) << 1);
   2395 
   2396     r3 = r1 - t;
   2397     r1 = r1 + t;
   2398 
   2399     t = ixheaacd_mult32_shl((r4 + r2), cos_51);
   2400     r4 = t + (ixheaacd_mult32_shl(r4, cos_52) << 1);
   2401     r2 = t + ixheaacd_mult32_shl(r2, cos_53);
   2402 
   2403     s1 = buf1a[3] + buf1a[9];
   2404     s4 = buf1a[3] - buf1a[9];
   2405     s3 = buf1a[5] + buf1a[7];
   2406     s2 = buf1a[5] - buf1a[7];
   2407 
   2408     t = ixheaacd_mult32_shl((s1 - s3), cos_54);
   2409 
   2410     s1 = s1 + s3;
   2411 
   2412     temp2 = buf1a[1] + s1;
   2413 
   2414     s1 = temp2 + ((ixheaacd_mult32_shl(s1, cos_55)) << 1);
   2415 
   2416     s3 = s1 - t;
   2417     s1 = s1 + t;
   2418 
   2419     t = ixheaacd_mult32_shl((s4 + s2), cos_51);
   2420     s4 = t + ((ixheaacd_mult32_shl(s4, cos_52)) << 1);
   2421     s2 = t + (ixheaacd_mult32_shl(s2, cos_53));
   2422 
   2423     *buf2++ = temp1;
   2424     *buf2++ = temp2;
   2425     *buf2++ = r1 + s2;
   2426     *buf2++ = s1 - r2;
   2427     *buf2++ = r3 - s4;
   2428     *buf2++ = s3 + r4;
   2429     *buf2++ = r3 + s4;
   2430     *buf2++ = s3 - r4;
   2431     *buf2++ = r1 - s2;
   2432     *buf2++ = s1 + r2;
   2433     buf1a = buf1;
   2434     ;
   2435   }
   2436 
   2437   n = 0;
   2438   for (i = 0; i < FFT5; i++) {
   2439     xr_0 = fft3outptr[0];
   2440     xi_0 = fft3outptr[1];
   2441 
   2442     xr_1 = fft3outptr[10];
   2443     xi_1 = fft3outptr[11];
   2444 
   2445     xr_2 = fft3outptr[20];
   2446     xi_2 = fft3outptr[21];
   2447 
   2448     x01_real = ixheaacd_add32(xr_0, xr_1);
   2449     x_01_imag = ixheaacd_add32(xi_0, xi_1);
   2450 
   2451     add_r = ixheaacd_add32(xr_1, xr_2);
   2452     add_i = ixheaacd_add32(xi_1, xi_2);
   2453 
   2454     sub_r = ixheaacd_sub32(xr_1, xr_2);
   2455     sub_i = ixheaacd_sub32(xi_1, xi_2);
   2456 
   2457     p1 = add_r >> 1;
   2458 
   2459     p2 = ixheaacd_mult32_shl(sub_i, sinmu);
   2460     p3 = ixheaacd_mult32_shl(sub_r, sinmu);
   2461 
   2462     p4 = add_i >> 1;
   2463 
   2464     temp = ixheaacd_sub32(xr_0, p1);
   2465     temp1 = ixheaacd_add32(xi_0, p3);
   2466     temp2 = ixheaacd_sub32(xi_0, p3);
   2467 
   2468     idx = re_arr_tab_sml_240_ptr[n++] << 1;
   2469     op[idx] = ixheaacd_add32(x01_real, xr_2);
   2470     op[idx + 1] = ixheaacd_add32(x_01_imag, xi_2);
   2471 
   2472     idx = re_arr_tab_sml_240_ptr[n++] << 1;
   2473     op[idx] = ixheaacd_add32(temp, p2);
   2474     op[idx + 1] = ixheaacd_sub32(temp2, p4);
   2475 
   2476     idx = re_arr_tab_sml_240_ptr[n++] << 1;
   2477     op[idx] = ixheaacd_sub32(temp, p2);
   2478     op[idx + 1] = ixheaacd_sub32(temp1, p4);
   2479     fft3outptr += 2;
   2480   }
   2481 }