Home | History | Annotate | Download | only in armv8
      1 /******************************************************************************
      2  *                                                                            *
      3  * Copyright (C) 2018 The Android Open Source Project
      4  *
      5  * Licensed under the Apache License, Version 2.0 (the "License");
      6  * you may not use this file except in compliance with the License.
      7  * You may obtain a copy of the License at:
      8  *
      9  * http://www.apache.org/licenses/LICENSE-2.0
     10  *
     11  * Unless required by applicable law or agreed to in writing, software
     12  * distributed under the License is distributed on an "AS IS" BASIS,
     13  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     14  * See the License for the specific language governing permissions and
     15  * limitations under the License.
     16  *
     17  *****************************************************************************
     18  * Originally developed and contributed by Ittiam Systems Pvt. Ltd, Bangalore
     19 */
     20 #include <string.h>
     21 #include "ixheaacd_sbr_common.h"
     22 #include <ixheaacd_type_def.h>
     23 
     24 #include "ixheaacd_constants.h"
     25 #include "ixheaacd_basic_ops32.h"
     26 #include "ixheaacd_basic_ops16.h"
     27 #include "ixheaacd_basic_ops40.h"
     28 #include "ixheaacd_basic_ops.h"
     29 
     30 #include "ixheaacd_intrinsics.h"
     31 #include "ixheaacd_common_rom.h"
     32 #include "ixheaacd_bitbuffer.h"
     33 #include "ixheaacd_sbrdecsettings.h"
     34 #include "ixheaacd_sbr_scale.h"
     35 #include "ixheaacd_lpp_tran.h"
     36 #include "ixheaacd_env_extr_part.h"
     37 #include "ixheaacd_sbr_rom.h"
     38 #include "ixheaacd_hybrid.h"
     39 #include "ixheaacd_ps_dec.h"
     40 #include "ixheaacd_env_extr.h"
     41 #include "ixheaacd_qmf_dec.h"
     42 
     43 #include <ixheaacd_basic_op.h>
     44 #include "ixheaacd_env_calc.h"
     45 
     46 #include "ixheaacd_interface.h"
     47 #include "ixheaacd_function_selector.h"
     48 #include "ixheaacd_audioobjtypes.h"
     49 
     50 #define mult16x16_16(a, b) ixheaacd_mult16((a), (b))
     51 #define mac16x16(a, b, c) ixheaacd_mac16x16in32((a), (b), (c))
     52 #define mpy_32x16(a, b) fixmuldiv2_32x16b((a), (b))
     53 #define mpy_16x16(a, b) ixheaacd_mult16x16in32((a), (b))
     54 #define mpy_32x32(a, b) ixheaacd_mult32((a), (b))
     55 #define mpy_32x16H_n(a, b) ixheaacd_mult32x16hin32((a), (b))
     56 #define msu16x16(a, b, c) msu16x16in32((a), (b), (c))
     57 
     58 #define DCT3_LEN (32)
     59 #define DCT2_LEN (64)
     60 
     61 #define LP_SHIFT_VAL 7
     62 #define HQ_SHIFT_64 4
     63 #define RADIXSHIFT 1
     64 #define ROUNDING_SPECTRA 1
     65 #define HQ_SHIFT_VAL 4
     66 
     67 VOID ixheaacd_dct3_32(WORD32 *input, WORD32 *output,
     68                       const WORD16 *main_twidle_fwd, const WORD16 *post_tbl,
     69                       const WORD16 *w_16, const WORD32 *p_table) {
     70   WORD32 n, k;
     71 
     72   WORD32 temp1[6];
     73   WORD32 temp2[4];
     74   WORD16 twid_re, twid_im;
     75   WORD32 *ptr_reverse, *ptr_forward, *p_out, *ptr_out1;
     76   const WORD16 *twidle_fwd, *twidle_rev;
     77 
     78   ptr_forward = &input[49];
     79   ptr_reverse = &input[47];
     80 
     81   p_out = output;
     82   twidle_fwd = main_twidle_fwd;
     83   twidle_fwd += 4;
     84 
     85   *p_out++ = input[48] >> LP_SHIFT_VAL;
     86   *p_out++ = 0;
     87 
     88   for (n = 1; n < DCT3_LEN / 2; n++) {
     89     temp1[0] = *ptr_forward++;
     90     temp1[1] = *ptr_reverse--;
     91     temp1[0] = ixheaacd_add32(ixheaacd_shr32(temp1[0], LP_SHIFT_VAL),
     92                               ixheaacd_shr32(temp1[1], LP_SHIFT_VAL));
     93 
     94     temp1[2] = *(ptr_forward - 33);
     95     temp1[3] = *(ptr_reverse - 31);
     96     temp1[1] = ixheaacd_sub32(ixheaacd_shr32(temp1[2], LP_SHIFT_VAL),
     97                               ixheaacd_shr32(temp1[3], LP_SHIFT_VAL));
     98     twid_re = *twidle_fwd++;
     99 
    100     twid_im = *twidle_fwd;
    101     twidle_fwd += 3;
    102     *p_out++ = mac32x16in32_dual(temp1[0], twid_re, temp1[1], twid_im);
    103     *p_out++ = msu32x16in32_dual(temp1[0], twid_im, temp1[1], twid_re);
    104   }
    105   twid_re = *twidle_fwd++;
    106 
    107   twid_im = *twidle_fwd;
    108   twidle_fwd += 3;
    109 
    110   temp1[1] = *ptr_reverse--;
    111   temp1[0] = *(ptr_reverse - 31);
    112   temp1[1] = ixheaacd_sub32(ixheaacd_shr32(temp1[1], LP_SHIFT_VAL),
    113                             ixheaacd_shr32(temp1[0], LP_SHIFT_VAL));
    114 
    115   temp1[0] = temp1[1];
    116 
    117   temp2[2] = mac32x16in32_dual(temp1[0], twid_re, temp1[1], twid_im);
    118   temp2[3] = msu32x16in32_dual(temp1[0], twid_im, temp1[1], twid_re);
    119 
    120   ptr_forward = output;
    121   ptr_reverse = &output[DCT3_LEN - 1];
    122   temp2[0] = *ptr_forward++;
    123   temp2[1] = *ptr_forward--;
    124 
    125   temp1[0] = -temp2[1] - temp2[3];
    126   temp1[1] = temp2[0] - temp2[2];
    127   temp2[0] = (temp2[0] + temp2[2] + temp1[0]);
    128   temp2[1] = (temp2[1] - temp2[3] + temp1[1]);
    129 
    130   temp2[0] >>= 1;
    131   temp2[1] >>= 1;
    132 
    133   *ptr_forward++ = temp2[0];
    134   *ptr_forward++ = temp2[1];
    135 
    136   twidle_fwd = post_tbl + 2;
    137   twidle_rev = post_tbl + 14;
    138 
    139   for (n = 1; n < DCT3_LEN / 4; n++) {
    140     temp2[0] = *ptr_forward++;
    141     temp2[1] = *ptr_forward--;
    142     temp2[3] = *ptr_reverse--;
    143     temp2[2] = *ptr_reverse++;
    144 
    145     twid_re = *twidle_rev;
    146     twidle_rev -= 2;
    147     twid_im = *twidle_fwd;
    148     twidle_fwd += 2;
    149 
    150     temp1[0] = temp2[0] - temp2[2];
    151     temp1[1] = (temp2[0] + temp2[2]);
    152 
    153     temp1[2] = temp2[1] + temp2[3];
    154     temp1[3] = (temp2[1] - temp2[3]);
    155     temp1[4] = mac32x16in32_dual(temp1[0], twid_re, temp1[2], twid_im);
    156     temp1[5] = msu32x16in32_dual(temp1[0], twid_im, temp1[2], twid_re);
    157 
    158     temp1[1] >>= 1;
    159     temp1[3] >>= 1;
    160 
    161     *ptr_forward++ = temp1[1] - temp1[4];
    162     *ptr_forward++ = temp1[3] + temp1[5];
    163 
    164     *ptr_reverse-- = -temp1[3] + temp1[5];
    165     *ptr_reverse-- = temp1[1] + temp1[4];
    166   }
    167   temp2[0] = *ptr_forward++;
    168   temp2[1] = *ptr_forward--;
    169   temp2[3] = *ptr_reverse--;
    170   temp2[2] = *ptr_reverse++;
    171 
    172   twid_re = *twidle_rev;
    173   twidle_rev -= 2;
    174   twid_im = *twidle_fwd;
    175   twidle_fwd += 2;
    176 
    177   temp1[0] = temp2[0] - temp2[2];
    178   temp1[1] = (temp2[0] + temp2[2]);
    179 
    180   temp1[2] = temp2[1] + temp2[3];
    181   temp1[3] = (temp2[1] - temp2[3]);
    182 
    183   temp1[4] = -mac32x16in32_dual(temp1[0], twid_re, temp1[2], twid_im);
    184   temp1[5] = msu32x16in32_dual(temp1[0], twid_im, temp1[2], twid_re);
    185 
    186   temp1[1] >>= 1;
    187   temp1[3] >>= 1;
    188   *ptr_forward++ = temp1[1] + temp1[4];
    189   *ptr_forward++ = temp1[3] + temp1[5];
    190 
    191   ixheaacd_radix4bfly(w_16, output, 1, 4);
    192   ixheaacd_postradixcompute4(input, output, p_table, 16);
    193 
    194   output[0] = input[0];
    195   output[2] = input[1];
    196 
    197   p_out = input + 2;
    198   ptr_forward = output + 1;
    199   ptr_reverse = output + 30;
    200   ptr_out1 = input + 18;
    201 
    202   for (k = (DCT3_LEN / 4) - 1; k != 0; k--) {
    203     WORD32 tempre, tempim;
    204 
    205     tempre = *p_out++;
    206     tempim = *p_out++;
    207     *ptr_forward = (tempim);
    208     ptr_forward += 2;
    209     *ptr_forward = (tempre);
    210     ptr_forward += 2;
    211 
    212     tempre = *ptr_out1++;
    213     tempim = *ptr_out1++;
    214     *ptr_reverse = (tempim);
    215     ptr_reverse -= 2;
    216     *ptr_reverse = (tempre);
    217     ptr_reverse -= 2;
    218   }
    219 
    220   {
    221     WORD32 tempre, tempim;
    222     tempre = *p_out++;
    223     tempim = *p_out++;
    224     *ptr_forward = (tempim);
    225     ptr_forward += 2;
    226     *ptr_forward = (tempre);
    227     ptr_forward += 2;
    228   }
    229 
    230   return;
    231 }
    232 VOID ixheaacd_dct2_64(WORD32 *x, WORD32 *X,
    233                       ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
    234                       WORD16 *filter_states) {
    235   ixheaacd_pretwdct2(x, X);
    236 
    237   ixheaacd_sbr_imdct_using_fft(qmf_dec_tables_ptr->w1024, 32, X, x,
    238                                qmf_dec_tables_ptr->dig_rev_table2_128,
    239                                qmf_dec_tables_ptr->dig_rev_table2_128,
    240                                qmf_dec_tables_ptr->dig_rev_table2_128,
    241                                qmf_dec_tables_ptr->dig_rev_table2_128);
    242 
    243   ixheaacd_fftposttw(x, qmf_dec_tables_ptr);
    244 
    245   ixheaacd_posttwdct2(x, filter_states, qmf_dec_tables_ptr);
    246 
    247   return;
    248 }
    249 
    250 VOID ixheaacd_cos_sin_mod(WORD32 *subband,
    251                           ia_sbr_qmf_filter_bank_struct *qmf_bank,
    252                           WORD16 *p_twiddle, WORD32 *p_dig_rev_tbl) {
    253   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
    254 
    255   const WORD16 *p_sin;
    256   const WORD16 *p_sin_cos = &qmf_bank->cos_twiddle[0];
    257   WORD32 subband_tmp[128];
    258 
    259   ixheaacd_cos_sin_mod_loop1(subband, M, p_sin_cos, subband_tmp);
    260 
    261   if (M == 32) {
    262     ixheaacd_sbr_imdct_using_fft(
    263         (const WORD32 *)p_twiddle, 32, subband_tmp, subband,
    264         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
    265         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
    266 
    267     ixheaacd_sbr_imdct_using_fft(
    268         (const WORD32 *)p_twiddle, 32, &subband_tmp[64], &subband[64],
    269         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
    270         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
    271 
    272   } else {
    273     ixheaacd_sbr_imdct_using_fft(
    274         (const WORD32 *)p_twiddle, 16, subband_tmp, subband,
    275         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
    276         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
    277 
    278     ixheaacd_sbr_imdct_using_fft(
    279         (const WORD32 *)p_twiddle, 16, &subband_tmp[64], &subband[64],
    280         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl,
    281         (UWORD8 *)p_dig_rev_tbl, (UWORD8 *)p_dig_rev_tbl);
    282   }
    283 
    284   p_sin = &qmf_bank->alt_sin_twiddle[0];
    285   ixheaacd_cos_sin_mod_loop2(subband, p_sin, M);
    286 }
    287 
    288 VOID ixheaacd_fwd_modulation(const WORD32 *p_time_in1, WORD32 *real_subband,
    289                              WORD32 *imag_subband,
    290                              ia_sbr_qmf_filter_bank_struct *qmf_bank,
    291                              ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
    292   WORD32 i;
    293   const WORD32 *p_time_in2 = &p_time_in1[2 * NO_ANALYSIS_CHANNELS - 1];
    294   WORD32 temp1, temp2;
    295   WORD32 *t_real_subband = real_subband;
    296   WORD32 *t_imag_subband = imag_subband;
    297   const WORD16 *tcos;
    298 
    299   for (i = NO_ANALYSIS_CHANNELS - 1; i >= 0; i--) {
    300     temp1 = ixheaacd_shr32(*p_time_in1++, HQ_SHIFT_VAL);
    301     temp2 = ixheaacd_shr32(*p_time_in2--, HQ_SHIFT_VAL);
    302 
    303     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
    304     ;
    305     *t_imag_subband++ = ixheaacd_add32(temp1, temp2);
    306     ;
    307   }
    308 
    309   ixheaacd_cos_sin_mod(real_subband, qmf_bank,
    310                        (WORD16 *)qmf_dec_tables_ptr->w1024,
    311                        (WORD32 *)qmf_dec_tables_ptr->dig_rev_table2_128);
    312 
    313   tcos = qmf_bank->t_cos;
    314 
    315   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
    316     WORD16 cosh, sinh;
    317     WORD32 re, im;
    318 
    319     re = *real_subband;
    320     im = *imag_subband;
    321     cosh = *tcos++;
    322     sinh = *tcos++;
    323     *real_subband++ = ixheaacd_add32(ixheaacd_mult32x16in32_shl(re, cosh),
    324                                      ixheaacd_mult32x16in32_shl(im, sinh));
    325     *imag_subband++ = ixheaacd_sub32_sat(ixheaacd_mult32x16in32_shl(im, cosh),
    326                                          ixheaacd_mult32x16in32_shl(re, sinh));
    327   }
    328 }
    329 
    330 VOID ixheaacd_cplx_anal_qmffilt(const WORD16 *time_sample_buf,
    331                                 ia_sbr_scale_fact_struct *sbr_scale_factor,
    332                                 WORD32 **qmf_real, WORD32 **qmf_imag,
    333                                 ia_sbr_qmf_filter_bank_struct *qmf_bank,
    334                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr,
    335                                 WORD32 ch_fac, WORD32 low_pow_flag,
    336                                 WORD audio_object_type) {
    337   WORD32 i, k;
    338   WORD32 num_time_slots = qmf_bank->num_time_slots;
    339 
    340   WORD32 analysis_buffer[4 * NO_ANALYSIS_CHANNELS];
    341   WORD16 *filter_states = qmf_bank->core_samples_buffer;
    342 
    343   WORD16 *fp1, *fp2, *tmp;
    344 
    345   WORD16 *filter_1;
    346   WORD16 *filter_2;
    347   WORD16 *filt_ptr;
    348   if (audio_object_type != AOT_ER_AAC_ELD &&
    349       audio_object_type != AOT_ER_AAC_LD) {
    350     qmf_bank->filter_pos +=
    351         (qmf_dec_tables_ptr->qmf_c - qmf_bank->analy_win_coeff);
    352     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c;
    353   } else {
    354     qmf_bank->filter_pos +=
    355         (qmf_dec_tables_ptr->qmf_c_eld3 - qmf_bank->analy_win_coeff);
    356     qmf_bank->analy_win_coeff = qmf_dec_tables_ptr->qmf_c_eld3;
    357   }
    358 
    359   filter_1 = qmf_bank->filter_pos;
    360 
    361   if (audio_object_type != AOT_ER_AAC_ELD &&
    362       audio_object_type != AOT_ER_AAC_LD) {
    363     filter_2 = filter_1 + 64;
    364   } else {
    365     filter_2 = filter_1 + 32;
    366   }
    367 
    368   sbr_scale_factor->st_lb_scale = 0;
    369   sbr_scale_factor->lb_scale = -10;
    370   if (!low_pow_flag) {
    371     if (audio_object_type != AOT_ER_AAC_ELD &&
    372         audio_object_type != AOT_ER_AAC_LD) {
    373       sbr_scale_factor->lb_scale = -8;
    374     } else {
    375       sbr_scale_factor->lb_scale = -9;
    376     }
    377     qmf_bank->cos_twiddle =
    378         (WORD16 *)qmf_dec_tables_ptr->sbr_sin_cos_twiddle_l32;
    379     qmf_bank->alt_sin_twiddle =
    380         (WORD16 *)qmf_dec_tables_ptr->sbr_alt_sin_twiddle_l32;
    381     if (audio_object_type != AOT_ER_AAC_ELD &&
    382         audio_object_type != AOT_ER_AAC_LD) {
    383       qmf_bank->t_cos = (WORD16 *)qmf_dec_tables_ptr->sbr_t_cos_sin_l32;
    384     } else {
    385       qmf_bank->t_cos =
    386           (WORD16 *)qmf_dec_tables_ptr->ixheaacd_sbr_t_cos_sin_l32_eld;
    387     }
    388   }
    389 
    390   fp1 = qmf_bank->anal_filter_states;
    391   fp2 = qmf_bank->anal_filter_states + NO_ANALYSIS_CHANNELS;
    392 
    393   if (audio_object_type == AOT_ER_AAC_ELD ||
    394       audio_object_type == AOT_ER_AAC_LD) {
    395     filter_2 = qmf_bank->filter_2;
    396     fp1 = qmf_bank->fp1_anal;
    397     fp2 = qmf_bank->fp2_anal;
    398   }
    399 
    400   for (i = 0; i < num_time_slots; i++) {
    401     for (k = 0; k < NO_ANALYSIS_CHANNELS; k++)
    402       filter_states[NO_ANALYSIS_CHANNELS - 1 - k] = time_sample_buf[ch_fac * k];
    403 
    404     if (audio_object_type != AOT_ER_AAC_ELD &&
    405         audio_object_type != AOT_ER_AAC_LD) {
    406       ixheaacd_sbr_qmfanal32_winadds(fp1, fp2, filter_1, filter_2,
    407                                      analysis_buffer, filter_states,
    408                                      time_sample_buf, ch_fac);
    409     }
    410 
    411     else {
    412       ixheaacd_sbr_qmfanal32_winadd_eld(fp1, fp2, filter_1, filter_2,
    413                                         analysis_buffer);
    414     }
    415 
    416     time_sample_buf += NO_ANALYSIS_CHANNELS * ch_fac;
    417 
    418     filter_states -= NO_ANALYSIS_CHANNELS;
    419     if (filter_states < qmf_bank->anal_filter_states) {
    420       filter_states = qmf_bank->anal_filter_states + 288;
    421     }
    422 
    423     tmp = fp1;
    424     fp1 = fp2;
    425     fp2 = tmp;
    426     if (audio_object_type != AOT_ER_AAC_ELD &&
    427         audio_object_type != AOT_ER_AAC_LD) {
    428       filter_1 += 64;
    429       filter_2 += 64;
    430     } else {
    431       filter_1 += 32;
    432       filter_2 += 32;
    433     }
    434 
    435     filt_ptr = filter_1;
    436     filter_1 = filter_2;
    437     filter_2 = filt_ptr;
    438     if (audio_object_type != AOT_ER_AAC_ELD &&
    439         audio_object_type != AOT_ER_AAC_LD) {
    440       if (filter_2 > (qmf_bank->analy_win_coeff + 640)) {
    441         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
    442         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 64;
    443       }
    444     } else {
    445       if (filter_2 > (qmf_bank->analy_win_coeff + 320)) {
    446         filter_1 = (WORD16 *)qmf_bank->analy_win_coeff;
    447         filter_2 = (WORD16 *)qmf_bank->analy_win_coeff + 32;
    448       }
    449     }
    450 
    451     if (!low_pow_flag) {
    452       ixheaacd_fwd_modulation(analysis_buffer, qmf_real[i], qmf_imag[i],
    453                               qmf_bank, qmf_dec_tables_ptr);
    454     } else {
    455       ixheaacd_dct3_32(
    456           (WORD32 *)analysis_buffer, qmf_real[i], qmf_dec_tables_ptr->dct23_tw,
    457           qmf_dec_tables_ptr->post_fft_tbl, qmf_dec_tables_ptr->w_16,
    458           qmf_dec_tables_ptr->dig_rev_table4_16);
    459     }
    460   }
    461 
    462   qmf_bank->filter_pos = filter_1;
    463   qmf_bank->core_samples_buffer = filter_states;
    464 
    465   if (audio_object_type == AOT_ER_AAC_ELD || audio_object_type == AOT_ER_AAC_LD)
    466 
    467   {
    468     qmf_bank->fp1_anal = fp1;
    469     qmf_bank->fp2_anal = fp2;
    470     qmf_bank->filter_2 = filter_2;
    471   }
    472 }
    473 
    474 VOID ixheaacd_inv_modulation_lp(WORD32 *qmf_real, WORD16 *filter_states,
    475                                 ia_sbr_qmf_filter_bank_struct *syn_qmf,
    476                                 ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
    477   WORD32 L = syn_qmf->no_channels;
    478   const WORD32 M = (L >> 1);
    479   WORD32 *dct_in = qmf_real;
    480   WORD32 time_out[2 * NO_SYNTHESIS_CHANNELS];
    481 
    482   WORD32 ui_rem = ((WORD64)(&time_out[0]) % 8);
    483   WORD32 *ptime_out = (pVOID)((WORD8 *)&time_out[0] + 8 - ui_rem);
    484 
    485   if (L == 64)
    486     ixheaacd_dct2_64(dct_in, ptime_out, qmf_dec_tables_ptr, filter_states + M);
    487   else
    488     ixheaacd_dct2_32(dct_in, time_out, qmf_dec_tables_ptr, filter_states);
    489 
    490   filter_states[3 * M] = 0;
    491 }
    492 
    493 VOID ixheaacd_inv_emodulation(WORD32 *qmf_real,
    494                               ia_sbr_qmf_filter_bank_struct *syn_qmf,
    495                               ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
    496   ixheaacd_cos_sin_mod(qmf_real, syn_qmf, (WORD16 *)qmf_dec_tables_ptr->w1024,
    497                        (WORD32 *)qmf_dec_tables_ptr->dig_rev_table2_128);
    498 }
    499 
    500 VOID ixheaacd_esbr_radix4bfly(const WORD32 *w, WORD32 *x, WORD32 index1,
    501                               WORD32 index) {
    502   int i;
    503   WORD32 l1, l2, h2, fft_jmp;
    504   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
    505   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
    506   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
    507   WORD32 x_h2_0, x_h2_1;
    508   WORD32 si10, si20, si30, co10, co20, co30;
    509 
    510   WORD64 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
    511   WORD64 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
    512   WORD32 *x_l1;
    513   WORD32 *x_l2;
    514   WORD32 *x_h2;
    515   const WORD32 *w_ptr = w;
    516   WORD32 i1;
    517 
    518   h2 = index << 1;
    519   l1 = index << 2;
    520   l2 = (index << 2) + (index << 1);
    521 
    522   x_l1 = &(x[l1]);
    523   x_l2 = &(x[l2]);
    524   x_h2 = &(x[h2]);
    525 
    526   fft_jmp = 6 * (index);
    527 
    528   for (i1 = 0; i1 < index1; i1++) {
    529     for (i = 0; i < index; i++) {
    530       si10 = (*w_ptr++);
    531       co10 = (*w_ptr++);
    532       si20 = (*w_ptr++);
    533       co20 = (*w_ptr++);
    534       si30 = (*w_ptr++);
    535       co30 = (*w_ptr++);
    536 
    537       x_0 = x[0];
    538       x_h2_0 = x[h2];
    539       x_l1_0 = x[l1];
    540       x_l2_0 = x[l2];
    541 
    542       xh0_0 = x_0 + x_l1_0;
    543       xl0_0 = x_0 - x_l1_0;
    544 
    545       xh20_0 = x_h2_0 + x_l2_0;
    546       xl20_0 = x_h2_0 - x_l2_0;
    547 
    548       x[0] = xh0_0 + xh20_0;
    549       xt0_0 = xh0_0 - xh20_0;
    550 
    551       x_1 = x[1];
    552       x_h2_1 = x[h2 + 1];
    553       x_l1_1 = x[l1 + 1];
    554       x_l2_1 = x[l2 + 1];
    555 
    556       xh1_0 = x_1 + x_l1_1;
    557       xl1_0 = x_1 - x_l1_1;
    558 
    559       xh21_0 = x_h2_1 + x_l2_1;
    560       xl21_0 = x_h2_1 - x_l2_1;
    561 
    562       x[1] = xh1_0 + xh21_0;
    563       yt0_0 = xh1_0 - xh21_0;
    564 
    565       xt1_0 = xl0_0 + xl21_0;
    566       xt2_0 = xl0_0 - xl21_0;
    567 
    568       yt2_0 = xl1_0 + xl20_0;
    569       yt1_0 = xl1_0 - xl20_0;
    570 
    571       mul_11 = ixheaacd_mult64(xt2_0, co30);
    572       mul_3 = ixheaacd_mult64(yt2_0, si30);
    573       x[l2] = (WORD32)((mul_3 + mul_11) >> 32) << RADIXSHIFT;
    574 
    575       mul_5 = ixheaacd_mult64(xt2_0, si30);
    576       mul_9 = ixheaacd_mult64(yt2_0, co30);
    577       x[l2 + 1] = (WORD32)((mul_9 - mul_5) >> 32) << RADIXSHIFT;
    578 
    579       mul_12 = ixheaacd_mult64(xt0_0, co20);
    580       mul_2 = ixheaacd_mult64(yt0_0, si20);
    581       x[l1] = (WORD32)((mul_2 + mul_12) >> 32) << RADIXSHIFT;
    582 
    583       mul_6 = ixheaacd_mult64(xt0_0, si20);
    584       mul_8 = ixheaacd_mult64(yt0_0, co20);
    585       x[l1 + 1] = (WORD32)((mul_8 - mul_6) >> 32) << RADIXSHIFT;
    586 
    587       mul_4 = ixheaacd_mult64(xt1_0, co10);
    588       mul_1 = ixheaacd_mult64(yt1_0, si10);
    589       x[h2] = (WORD32)((mul_1 + mul_4) >> 32) << RADIXSHIFT;
    590 
    591       mul_10 = ixheaacd_mult64(xt1_0, si10);
    592       mul_7 = ixheaacd_mult64(yt1_0, co10);
    593       x[h2 + 1] = (WORD32)((mul_7 - mul_10) >> 32) << RADIXSHIFT;
    594 
    595       x += 2;
    596     }
    597     x += fft_jmp;
    598     w_ptr = w_ptr - fft_jmp;
    599   }
    600 }
    601 
    602 VOID ixheaacd_esbr_postradixcompute2(WORD32 *ptr_y, WORD32 *ptr_x,
    603                                      const WORD32 *pdig_rev_tbl,
    604                                      WORD32 npoints) {
    605   WORD32 i, k;
    606   WORD32 h2;
    607   WORD32 x_0, x_1, x_2, x_3;
    608   WORD32 x_4, x_5, x_6, x_7;
    609   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
    610   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
    611   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
    612   WORD32 n0, j0;
    613   WORD32 *x2, *x0;
    614   WORD32 *y0, *y1, *y2, *y3;
    615 
    616   y0 = ptr_y;
    617   y2 = ptr_y + (WORD32)npoints;
    618   x0 = ptr_x;
    619   x2 = ptr_x + (WORD32)(npoints >> 1);
    620 
    621   y1 = y0 + (WORD32)(npoints >> 2);
    622   y3 = y2 + (WORD32)(npoints >> 2);
    623   j0 = 8;
    624   n0 = npoints >> 1;
    625 
    626   for (k = 0; k < 2; k++) {
    627     for (i = 0; i<npoints>> 1; i += 8) {
    628       h2 = *pdig_rev_tbl++ >> 2;
    629 
    630       x_0 = *x0++;
    631       x_1 = *x0++;
    632       x_2 = *x0++;
    633       x_3 = *x0++;
    634       x_4 = *x0++;
    635       x_5 = *x0++;
    636       x_6 = *x0++;
    637       x_7 = *x0++;
    638 
    639       n00 = x_0 + x_2;
    640       n01 = x_1 + x_3;
    641       n20 = x_0 - x_2;
    642       n21 = x_1 - x_3;
    643       n10 = x_4 + x_6;
    644       n11 = x_5 + x_7;
    645       n30 = x_4 - x_6;
    646       n31 = x_5 - x_7;
    647 
    648       y0[h2] = n00;
    649       y0[h2 + 1] = n01;
    650       y1[h2] = n10;
    651       y1[h2 + 1] = n11;
    652       y2[h2] = n20;
    653       y2[h2 + 1] = n21;
    654       y3[h2] = n30;
    655       y3[h2 + 1] = n31;
    656 
    657       x_8 = *x2++;
    658       x_9 = *x2++;
    659       x_a = *x2++;
    660       x_b = *x2++;
    661       x_c = *x2++;
    662       x_d = *x2++;
    663       x_e = *x2++;
    664       x_f = *x2++;
    665 
    666       n02 = x_8 + x_a;
    667       n03 = x_9 + x_b;
    668       n22 = x_8 - x_a;
    669       n23 = x_9 - x_b;
    670       n12 = x_c + x_e;
    671       n13 = x_d + x_f;
    672       n32 = x_c - x_e;
    673       n33 = x_d - x_f;
    674 
    675       y0[h2 + 2] = n02;
    676       y0[h2 + 3] = n03;
    677       y1[h2 + 2] = n12;
    678       y1[h2 + 3] = n13;
    679       y2[h2 + 2] = n22;
    680       y2[h2 + 3] = n23;
    681       y3[h2 + 2] = n32;
    682       y3[h2 + 3] = n33;
    683     }
    684     x0 += (WORD32)npoints >> 1;
    685     x2 += (WORD32)npoints >> 1;
    686   }
    687 }
    688 
    689 VOID ixheaacd_esbr_postradixcompute4(WORD32 *ptr_y, WORD32 *ptr_x,
    690                                      const WORD32 *p_dig_rev_tbl,
    691                                      WORD32 npoints) {
    692   WORD32 i, k;
    693   WORD32 h2;
    694   WORD32 xh0_0, xh1_0, xl0_0, xl1_0;
    695   WORD32 xh0_1, xh1_1, xl0_1, xl1_1;
    696   WORD32 x_0, x_1, x_2, x_3;
    697   WORD32 xh0_2, xh1_2, xl0_2, xl1_2, xh0_3, xh1_3, xl0_3, xl1_3;
    698   WORD32 x_4, x_5, x_6, x_7;
    699   WORD32 x_8, x_9, x_a, x_b, x_c, x_d, x_e, x_f;
    700   WORD32 n00, n10, n20, n30, n01, n11, n21, n31;
    701   WORD32 n02, n12, n22, n32, n03, n13, n23, n33;
    702   WORD32 n0, j0;
    703   WORD32 *x2, *x0;
    704   WORD32 *y0, *y1, *y2, *y3;
    705 
    706   y0 = ptr_y;
    707   y2 = ptr_y + (WORD32)npoints;
    708   x0 = ptr_x;
    709   x2 = ptr_x + (WORD32)(npoints >> 1);
    710 
    711   y1 = y0 + (WORD32)(npoints >> 1);
    712   y3 = y2 + (WORD32)(npoints >> 1);
    713 
    714   j0 = 4;
    715   n0 = npoints >> 2;
    716 
    717   for (k = 0; k < 2; k++) {
    718     for (i = 0; i<npoints>> 1; i += 8) {
    719       h2 = *p_dig_rev_tbl++ >> 2;
    720       x_0 = *x0++;
    721       x_1 = *x0++;
    722       x_2 = *x0++;
    723       x_3 = *x0++;
    724       x_4 = *x0++;
    725       x_5 = *x0++;
    726       x_6 = *x0++;
    727       x_7 = *x0++;
    728 
    729       xh0_0 = x_0 + x_4;
    730       xh1_0 = x_1 + x_5;
    731       xl0_0 = x_0 - x_4;
    732       xl1_0 = x_1 - x_5;
    733       xh0_1 = x_2 + x_6;
    734       xh1_1 = x_3 + x_7;
    735       xl0_1 = x_2 - x_6;
    736       xl1_1 = x_3 - x_7;
    737 
    738       n00 = xh0_0 + xh0_1;
    739       n01 = xh1_0 + xh1_1;
    740       n10 = xl0_0 + xl1_1;
    741       n11 = xl1_0 - xl0_1;
    742       n20 = xh0_0 - xh0_1;
    743       n21 = xh1_0 - xh1_1;
    744       n30 = xl0_0 - xl1_1;
    745       n31 = xl1_0 + xl0_1;
    746 
    747       y0[h2] = n00;
    748       y0[h2 + 1] = n01;
    749       y1[h2] = n10;
    750       y1[h2 + 1] = n11;
    751       y2[h2] = n20;
    752       y2[h2 + 1] = n21;
    753       y3[h2] = n30;
    754       y3[h2 + 1] = n31;
    755 
    756       x_8 = *x2++;
    757       x_9 = *x2++;
    758       x_a = *x2++;
    759       x_b = *x2++;
    760       x_c = *x2++;
    761       x_d = *x2++;
    762       x_e = *x2++;
    763       x_f = *x2++;
    764 
    765       xh0_2 = x_8 + x_c;
    766       xh1_2 = x_9 + x_d;
    767       xl0_2 = x_8 - x_c;
    768       xl1_2 = x_9 - x_d;
    769       xh0_3 = x_a + x_e;
    770       xh1_3 = x_b + x_f;
    771       xl0_3 = x_a - x_e;
    772       xl1_3 = x_b - x_f;
    773 
    774       n02 = xh0_2 + xh0_3;
    775       n03 = xh1_2 + xh1_3;
    776       n12 = xl0_2 + xl1_3;
    777       n13 = xl1_2 - xl0_3;
    778       n22 = xh0_2 - xh0_3;
    779       n23 = xh1_2 - xh1_3;
    780       n32 = xl0_2 - xl1_3;
    781       n33 = xl1_2 + xl0_3;
    782 
    783       y0[h2 + 2] = n02;
    784       y0[h2 + 3] = n03;
    785       y1[h2 + 2] = n12;
    786       y1[h2 + 3] = n13;
    787       y2[h2 + 2] = n22;
    788       y2[h2 + 3] = n23;
    789       y3[h2 + 2] = n32;
    790       y3[h2 + 3] = n33;
    791     }
    792     x0 += (WORD32)npoints >> 1;
    793     x2 += (WORD32)npoints >> 1;
    794   }
    795 }
    796 
    797 VOID ixheaacd_esbr_cos_sin_mod(WORD32 *subband,
    798                                ia_sbr_qmf_filter_bank_struct *qmf_bank,
    799                                WORD32 *p_twiddle, WORD32 *p_dig_rev_tbl) {
    800   WORD32 z;
    801   WORD32 temp[128];
    802   WORD32 scaleshift = 0;
    803 
    804   WORD32 re2, re3;
    805   WORD32 wim, wre;
    806 
    807   WORD32 i, M_2;
    808   WORD32 M = ixheaacd_shr32(qmf_bank->no_channels, 1);
    809 
    810   const WORD32 *p_sin;
    811   const WORD32 *p_sin_cos;
    812 
    813   WORD32 subband_tmp[128];
    814   WORD32 re;
    815   WORD32 im;
    816   WORD32 *psubband, *psubband1;
    817   WORD32 *psubband_t, *psubband1_t;
    818   WORD32 *psubband2, *psubband12;
    819   WORD32 *psubband_t2, *psubband1_t2;
    820 
    821   M_2 = ixheaacd_shr32(M, 1);
    822 
    823   p_sin_cos = qmf_bank->esbr_cos_twiddle;
    824 
    825   psubband = &subband[0];
    826   psubband1 = &subband[2 * M - 1];
    827   psubband_t = subband_tmp;
    828   psubband1_t = &subband_tmp[2 * M - 1];
    829 
    830   psubband2 = &subband[64];
    831   psubband12 = &subband[2 * M - 1 + 64];
    832   psubband_t2 = &subband_tmp[64];
    833   psubband1_t2 = &subband_tmp[2 * M - 1 + 64];
    834 
    835   for (i = (M_2 >> 1) - 1; i >= 0; i--) {
    836     re = *psubband++;
    837     im = *psubband1--;
    838 
    839     wim = *p_sin_cos++;
    840     wre = *p_sin_cos++;
    841 
    842     *psubband_t++ = (WORD32)(
    843         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
    844         32);
    845     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
    846                                                  ixheaacd_mult64(re, wim))) >>
    847                              32);
    848 
    849     re = *psubband2++;
    850     im = *psubband12--;
    851 
    852     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
    853                                                   ixheaacd_mult64(re, wre))) >>
    854                               32);
    855     *psubband_t2++ = (WORD32)(
    856         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
    857         32);
    858 
    859     re = *psubband1--;
    860     im = *psubband++;
    861 
    862     wim = *p_sin_cos++;
    863     wre = *p_sin_cos++;
    864 
    865     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
    866                                                   ixheaacd_mult64(re, wim))) >>
    867                               32);
    868     *psubband1_t-- = (WORD32)(
    869         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
    870         32);
    871 
    872     re = *psubband12--;
    873     im = *psubband2++;
    874 
    875     *psubband1_t2-- = (WORD32)(
    876         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
    877         32);
    878     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
    879                                                    ixheaacd_mult64(re, wre))) >>
    880                                32);
    881 
    882     re = *psubband++;
    883     im = *psubband1--;
    884 
    885     wim = *p_sin_cos++;
    886     wre = *p_sin_cos++;
    887 
    888     *psubband_t++ = (WORD32)(
    889         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
    890         32);
    891     *psubband_t++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
    892                                                  ixheaacd_mult64(re, wim))) >>
    893                              32);
    894 
    895     re = *psubband2++;
    896     im = *psubband12--;
    897 
    898     *psubband_t2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
    899                                                   ixheaacd_mult64(re, wre))) >>
    900                               32);
    901     *psubband_t2++ = (WORD32)(
    902         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
    903         32);
    904 
    905     re = *psubband1--;
    906     im = *psubband++;
    907     ;
    908 
    909     wim = *p_sin_cos++;
    910     wre = *p_sin_cos++;
    911 
    912     *psubband1_t-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
    913                                                   ixheaacd_mult64(re, wim))) >>
    914                               32);
    915     *psubband1_t-- = (WORD32)(
    916         (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
    917         32);
    918 
    919     re = *psubband12--;
    920     im = *psubband2++;
    921     ;
    922 
    923     *psubband1_t2-- = (WORD32)(
    924         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
    925         32);
    926     *psubband1_t2-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
    927                                                    ixheaacd_mult64(re, wre))) >>
    928                                32);
    929   }
    930 
    931   if (M == 32) {
    932     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 8);
    933     ixheaacd_esbr_radix4bfly(p_twiddle + 48, subband_tmp, 4, 2);
    934     ixheaacd_esbr_postradixcompute2(subband, subband_tmp, p_dig_rev_tbl, 32);
    935 
    936     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 8);
    937     ixheaacd_esbr_radix4bfly(p_twiddle + 48, &subband_tmp[64], 4, 2);
    938     ixheaacd_esbr_postradixcompute2(&subband[64], &subband_tmp[64],
    939                                     p_dig_rev_tbl, 32);
    940 
    941   }
    942 
    943   else if (M == 16) {
    944     ixheaacd_esbr_radix4bfly(p_twiddle, subband_tmp, 1, 4);
    945     ixheaacd_esbr_postradixcompute4(subband, subband_tmp, p_dig_rev_tbl, 16);
    946 
    947     ixheaacd_esbr_radix4bfly(p_twiddle, &subband_tmp[64], 1, 4);
    948     ixheaacd_esbr_postradixcompute4(&subband[64], &subband_tmp[64],
    949                                     p_dig_rev_tbl, 16);
    950 
    951   }
    952 
    953   else if (M == 12) {
    954     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
    955       temp[z] = subband_tmp[2 * z];
    956       temp[12 + z] = subband_tmp[2 * z + 1];
    957     }
    958 
    959     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
    960 
    961     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
    962       subband[2 * z] = temp[z];
    963       subband[2 * z + 1] = temp[z + 12];
    964     }
    965     scaleshift = 0;
    966     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
    967       temp[z] = subband_tmp[64 + 2 * z];
    968       temp[12 + z] = subband_tmp[64 + 2 * z + 1];
    969     }
    970 
    971     ixheaacd_complex_fft_p3(temp, &temp[12], 12, -1, &scaleshift);
    972 
    973     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
    974       subband[64 + 2 * z] = temp[z];
    975       subband[64 + 2 * z + 1] = temp[z + 12];
    976     }
    977 
    978   }
    979 
    980   else {
    981     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
    982       temp[z] = subband_tmp[2 * z];
    983       temp[8 + z] = subband_tmp[2 * z + 1];
    984     }
    985 
    986     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
    987 
    988     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
    989       subband[2 * z] = temp[z] << scaleshift;
    990       subband[2 * z + 1] = temp[z + 8] << scaleshift;
    991     }
    992     scaleshift = 0;
    993     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
    994       temp[z] = subband_tmp[64 + 2 * z];
    995       temp[8 + z] = subband_tmp[64 + 2 * z + 1];
    996     }
    997 
    998     (*ixheaacd_complex_fft_p2)(temp, &temp[8], 8, -1, &scaleshift);
    999 
   1000     for (z = 0; z < (qmf_bank->no_channels >> 1); z++) {
   1001       subband[64 + 2 * z] = temp[z] << scaleshift;
   1002       subband[64 + 2 * z + 1] = temp[8 + z] << scaleshift;
   1003     }
   1004   }
   1005 
   1006   psubband = &subband[0];
   1007   psubband1 = &subband[2 * M - 1];
   1008 
   1009   re = *psubband1;
   1010 
   1011   *psubband = *psubband >> 1;
   1012   psubband++;
   1013   *psubband1 = ixheaacd_negate32(*psubband >> 1);
   1014   psubband1--;
   1015 
   1016   p_sin = qmf_bank->esbr_alt_sin_twiddle;
   1017 
   1018   wim = *p_sin++;
   1019   wre = *p_sin++;
   1020 
   1021   im = *psubband1;
   1022   ;
   1023 
   1024   *psubband1-- = (WORD32)(
   1025       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
   1026       32);
   1027   *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
   1028                                              ixheaacd_mult64(re, wim))) >>
   1029                          32);
   1030 
   1031   psubband2 = &subband[64];
   1032   psubband12 = &subband[2 * M - 1 + 64];
   1033 
   1034   re = *psubband12;
   1035   ;
   1036 
   1037   *psubband12-- = ixheaacd_negate32_sat(*psubband2 >> 1);
   1038   ;
   1039   *psubband2 = psubband2[1] >> 1;
   1040   ;
   1041   psubband2++;
   1042 
   1043   im = *psubband12;
   1044   ;
   1045 
   1046   *psubband2++ = ixheaacd_negate32_sat((WORD32)(
   1047       (ixheaacd_add64(ixheaacd_mult64(re, wre), ixheaacd_mult64(im, wim))) >>
   1048       32));
   1049   *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wim),
   1050                                                ixheaacd_mult64(im, wre))) >>
   1051                            32);
   1052 
   1053   for (i = (M_2 - 2); i >= 0; i--) {
   1054     im = psubband[0];
   1055     ;
   1056     re = psubband[1];
   1057     ;
   1058     re2 = *psubband1;
   1059     ;
   1060 
   1061     *psubband++ = (WORD32)(
   1062         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
   1063         32);
   1064     *psubband1-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wim),
   1065                                                 ixheaacd_mult64(re, wre))) >>
   1066                             32);
   1067 
   1068     im = psubband2[0];
   1069     ;
   1070     re = psubband2[1];
   1071     ;
   1072     re3 = *psubband12;
   1073     ;
   1074 
   1075     *psubband12-- = ixheaacd_negate32_sat((WORD32)(
   1076         (ixheaacd_add64(ixheaacd_mult64(re, wim), ixheaacd_mult64(im, wre))) >>
   1077         32));
   1078     *psubband2++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re, wre),
   1079                                                 ixheaacd_mult64(im, wim))) >>
   1080                             32);
   1081 
   1082     wim = *p_sin++;
   1083     wre = *p_sin++;
   1084     im = psubband1[0];
   1085     ;
   1086 
   1087     *psubband1-- = (WORD32)(
   1088         (ixheaacd_add64(ixheaacd_mult64(re2, wre), ixheaacd_mult64(im, wim))) >>
   1089         32);
   1090     *psubband++ = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, wre),
   1091                                                ixheaacd_mult64(re2, wim))) >>
   1092                            32);
   1093 
   1094     im = psubband12[0];
   1095     ;
   1096 
   1097     *psubband2++ = ixheaacd_negate32_sat((WORD32)(
   1098         (ixheaacd_add64(ixheaacd_mult64(re3, wre), ixheaacd_mult64(im, wim))) >>
   1099         32));
   1100     *psubband12-- = (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(re3, wim),
   1101                                                  ixheaacd_mult64(im, wre))) >>
   1102                              32);
   1103   }
   1104 }
   1105 
   1106 VOID ixheaacd_esbr_fwd_modulation(
   1107     const WORD32 *time_sample_buf, WORD32 *real_subband, WORD32 *imag_subband,
   1108     ia_sbr_qmf_filter_bank_struct *qmf_bank,
   1109     ia_qmf_dec_tables_struct *qmf_dec_tables_ptr) {
   1110   WORD32 i;
   1111   const WORD32 *time_sample_buf1 =
   1112       &time_sample_buf[2 * qmf_bank->no_channels - 1];
   1113   WORD32 temp1, temp2;
   1114   WORD32 *t_real_subband = real_subband;
   1115   WORD32 *t_imag_subband = imag_subband;
   1116   const WORD32 *tcos;
   1117 
   1118   for (i = qmf_bank->no_channels - 1; i >= 0; i--) {
   1119     temp1 = ixheaacd_shr32(*time_sample_buf++, HQ_SHIFT_64);
   1120     temp2 = ixheaacd_shr32(*time_sample_buf1--, HQ_SHIFT_64);
   1121 
   1122     *t_real_subband++ = ixheaacd_sub32_sat(temp1, temp2);
   1123     ;
   1124     *t_imag_subband++ = ixheaacd_add32(temp1, temp2);
   1125     ;
   1126   }
   1127 
   1128   ixheaacd_esbr_cos_sin_mod(real_subband, qmf_bank,
   1129                             qmf_dec_tables_ptr->esbr_w_16,
   1130                             qmf_dec_tables_ptr->dig_rev_table4_16);
   1131 
   1132   tcos = qmf_bank->esbr_t_cos;
   1133 
   1134   for (i = (qmf_bank->usb - qmf_bank->lsb - 1); i >= 0; i--) {
   1135     WORD32 cosh, sinh;
   1136     WORD32 re, im;
   1137 
   1138     re = *real_subband;
   1139     im = *imag_subband;
   1140     cosh = *tcos++;
   1141     sinh = *tcos++;
   1142     *real_subband++ = (WORD32)((ixheaacd_add64(ixheaacd_mult64(re, cosh),
   1143                                                ixheaacd_mult64(im, sinh))) >>
   1144                                31);
   1145     *imag_subband++ =
   1146         (WORD32)((ixheaacd_sub64_sat(ixheaacd_mult64(im, cosh),
   1147                                      ixheaacd_mult64(re, sinh))) >>
   1148                  31);
   1149   }
   1150 }
   1151 
   1152 VOID ixheaacd_esbr_qmfsyn64_winadd(WORD32 *tmp1, WORD32 *tmp2, WORD32 *inp1,
   1153                                    WORD32 *sample_buffer, WORD32 ch_fac) {
   1154   WORD32 k;
   1155 
   1156   for (k = 0; k < 64; k++) {
   1157     WORD64 syn_out = 0;
   1158 
   1159     syn_out =
   1160         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[0 + k], inp1[k + 0]));
   1161     syn_out =
   1162         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[256 + k], inp1[k + 128]));
   1163     syn_out =
   1164         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[512 + k], inp1[k + 256]));
   1165     syn_out =
   1166         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[768 + k], inp1[k + 384]));
   1167     syn_out =
   1168         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp1[1024 + k], inp1[k + 512]));
   1169 
   1170     syn_out =
   1171         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[128 + k], inp1[k + 64]));
   1172     syn_out =
   1173         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[384 + k], inp1[k + 192]));
   1174     syn_out =
   1175         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[640 + k], inp1[k + 320]));
   1176     syn_out =
   1177         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[896 + k], inp1[k + 448]));
   1178     syn_out =
   1179         ixheaacd_add64(syn_out, ixheaacd_mult64(tmp2[1152 + k], inp1[k + 576]));
   1180 
   1181     sample_buffer[ch_fac * k] = (WORD32)(syn_out >> 31);
   1182   }
   1183 }
   1184 
   1185 VOID ixheaacd_shiftrountine(WORD32 *qmf_real, WORD32 *qmf_imag, WORD32 len,
   1186                             WORD32 common_shift) {
   1187   WORD32 treal, timag;
   1188   WORD32 j;
   1189 
   1190   if (common_shift < 0) {
   1191     WORD32 cshift = -common_shift;
   1192     cshift = ixheaacd_min32(cshift, 31);
   1193     for (j = len - 1; j >= 0; j--) {
   1194       treal = *qmf_real;
   1195       timag = *qmf_imag;
   1196 
   1197       treal = (ixheaacd_shr32(treal, cshift));
   1198       timag = (ixheaacd_shr32(timag, cshift));
   1199 
   1200       *qmf_real++ = treal;
   1201       *qmf_imag++ = timag;
   1202     }
   1203   } else {
   1204     for (j = len - 1; j >= 0; j--) {
   1205       treal = (ixheaacd_shl32_sat(*qmf_real, common_shift));
   1206       timag = (ixheaacd_shl32_sat(*qmf_imag, common_shift));
   1207       *qmf_real++ = treal;
   1208       *qmf_imag++ = timag;
   1209     }
   1210   }
   1211 }
   1212 
   1213 VOID ixheaacd_shiftrountine_with_rnd_hq(WORD32 *qmf_real, WORD32 *qmf_imag,
   1214                                         WORD32 *filter_states, WORD32 len,
   1215                                         WORD32 shift) {
   1216   WORD32 *filter_states_rev = filter_states + len;
   1217   WORD32 treal, timag;
   1218   WORD32 j;
   1219 
   1220   for (j = (len - 1); j >= 0; j -= 2) {
   1221     WORD32 r1, r2, i1, i2;
   1222     i2 = qmf_imag[j];
   1223     r2 = qmf_real[j];
   1224     r1 = *qmf_real++;
   1225     i1 = *qmf_imag++;
   1226 
   1227     timag = ixheaacd_add32(i1, r1);
   1228     timag = (ixheaacd_shl32_sat(timag, shift));
   1229     filter_states_rev[j] = timag;
   1230 
   1231     treal = ixheaacd_sub32(i2, r2);
   1232     treal = (ixheaacd_shl32_sat(treal, shift));
   1233     filter_states[j] = treal;
   1234 
   1235     treal = ixheaacd_sub32(i1, r1);
   1236     treal = (ixheaacd_shl32_sat(treal, shift));
   1237     *filter_states++ = treal;
   1238 
   1239     timag = ixheaacd_add32(i2, r2);
   1240     timag = (ixheaacd_shl32_sat(timag, shift));
   1241     *filter_states_rev++ = timag;
   1242   }
   1243 }
   1244 
   1245 VOID ixheaacd_radix4bfly(const WORD16 *w, WORD32 *x, WORD32 index1,
   1246                          WORD32 index) {
   1247   int i;
   1248   WORD32 l1, l2, h2, fft_jmp;
   1249   WORD32 xt0_0, yt0_0, xt1_0, yt1_0, xt2_0, yt2_0;
   1250   WORD32 xh0_0, xh1_0, xh20_0, xh21_0, xl0_0, xl1_0, xl20_0, xl21_0;
   1251   WORD32 x_0, x_1, x_l1_0, x_l1_1, x_l2_0, x_l2_1;
   1252   WORD32 x_h2_0, x_h2_1;
   1253   WORD16 si10, si20, si30, co10, co20, co30;
   1254 
   1255   WORD32 mul_1, mul_2, mul_3, mul_4, mul_5, mul_6;
   1256   WORD32 mul_7, mul_8, mul_9, mul_10, mul_11, mul_12;
   1257   WORD32 *x_l1;
   1258   WORD32 *x_l2;
   1259   WORD32 *x_h2;
   1260   const WORD16 *w_ptr = w;
   1261   WORD32 i1;
   1262 
   1263   h2 = index << 1;
   1264   l1 = index << 2;
   1265   l2 = (index << 2) + (index << 1);
   1266 
   1267   x_l1 = &(x[l1]);
   1268   x_l2 = &(x[l2]);
   1269   x_h2 = &(x[h2]);
   1270 
   1271   fft_jmp = 6 * (index);
   1272 
   1273   for (i1 = 0; i1 < index1; i1++) {
   1274     for (i = 0; i < index; i++) {
   1275       si10 = (*w_ptr++);
   1276       co10 = (*w_ptr++);
   1277       si20 = (*w_ptr++);
   1278       co20 = (*w_ptr++);
   1279       si30 = (*w_ptr++);
   1280       co30 = (*w_ptr++);
   1281 
   1282       x_0 = x[0];
   1283       x_h2_0 = x[h2];
   1284       x_l1_0 = x[l1];
   1285       x_l2_0 = x[l2];
   1286 
   1287       xh0_0 = x_0 + x_l1_0;
   1288       xl0_0 = x_0 - x_l1_0;
   1289 
   1290       xh20_0 = x_h2_0 + x_l2_0;
   1291       xl20_0 = x_h2_0 - x_l2_0;
   1292 
   1293       x[0] = xh0_0 + xh20_0;
   1294       xt0_0 = xh0_0 - xh20_0;
   1295 
   1296       x_1 = x[1];
   1297       x_h2_1 = x[h2 + 1];
   1298       x_l1_1 = x[l1 + 1];
   1299       x_l2_1 = x[l2 + 1];
   1300 
   1301       xh1_0 = x_1 + x_l1_1;
   1302       xl1_0 = x_1 - x_l1_1;
   1303 
   1304       xh21_0 = x_h2_1 + x_l2_1;
   1305       xl21_0 = x_h2_1 - x_l2_1;
   1306 
   1307       x[1] = xh1_0 + xh21_0;
   1308       yt0_0 = xh1_0 - xh21_0;
   1309 
   1310       xt1_0 = xl0_0 + xl21_0;
   1311       xt2_0 = xl0_0 - xl21_0;
   1312 
   1313       yt2_0 = xl1_0 + xl20_0;
   1314       yt1_0 = xl1_0 - xl20_0;
   1315 
   1316       mul_11 = ixheaacd_mult32x16in32(xt2_0, co30);
   1317       mul_3 = ixheaacd_mult32x16in32(yt2_0, si30);
   1318       x[l2] = (mul_3 + mul_11) << RADIXSHIFT;
   1319 
   1320       mul_5 = ixheaacd_mult32x16in32(xt2_0, si30);
   1321       mul_9 = ixheaacd_mult32x16in32(yt2_0, co30);
   1322       x[l2 + 1] = (mul_9 - mul_5) << RADIXSHIFT;
   1323 
   1324       mul_12 = ixheaacd_mult32x16in32(xt0_0, co20);
   1325       mul_2 = ixheaacd_mult32x16in32(yt0_0, si20);
   1326       x[l1] = (mul_2 + mul_12) << RADIXSHIFT;
   1327 
   1328       mul_6 = ixheaacd_mult32x16in32(xt0_0, si20);
   1329       mul_8 = ixheaacd_mult32x16in32(yt0_0, co20);
   1330       x[l1 + 1] = (mul_8 - mul_6) << RADIXSHIFT;
   1331 
   1332       mul_4 = ixheaacd_mult32x16in32(xt1_0, co10);
   1333       mul_1 = ixheaacd_mult32x16in32(yt1_0, si10);
   1334       x[h2] = (mul_1 + mul_4) << RADIXSHIFT;
   1335 
   1336       mul_10 = ixheaacd_mult32x16in32(xt1_0, si10);
   1337       mul_7 = ixheaacd_mult32x16in32(yt1_0, co10);
   1338       x[h2 + 1] = (mul_7 - mul_10) << RADIXSHIFT;
   1339 
   1340       x += 2;
   1341     }
   1342     x += fft_jmp;
   1343     w_ptr = w_ptr - fft_jmp;
   1344   }
   1345 }
   1346