Home | History | Annotate | Download | only in src
      1 /*
      2  ** Copyright 2003-2010, VisualOn, Inc.
      3  **
      4  ** Licensed under the Apache License, Version 2.0 (the "License");
      5  ** you may not use this file except in compliance with the License.
      6  ** You may obtain a copy of the License at
      7  **
      8  **     http://www.apache.org/licenses/LICENSE-2.0
      9  **
     10  ** Unless required by applicable law or agreed to in writing, software
     11  ** distributed under the License is distributed on an "AS IS" BASIS,
     12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  ** See the License for the specific language governing permissions and
     14  ** limitations under the License.
     15  */
     16 
     17 /***********************************************************************
     18 *      File: c4t64fx.c                                                 *
     19 *                                                                      *
     20 *      Description:Performs algebraic codebook search for higher modes *
     21 *                                                                      *
     22 ************************************************************************/
     23 
     24 /************************************************************************
     25 * Function: ACELP_4t64_fx()                                             *
     26 *                                                                       *
     27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook.                   *
     28 * 4 tracks x 16 positions per track = 64 samples.                       *
     29 *                                                                       *
     30 * 20 bits --> 4 pulses in a frame of 64 samples.                        *
     31 * 36 bits --> 8 pulses in a frame of 64 samples.                        *
     32 * 44 bits --> 10 pulses in a frame of 64 samples.                       *
     33 * 52 bits --> 12 pulses in a frame of 64 samples.                       *
     34 * 64 bits --> 16 pulses in a frame of 64 samples.                       *
     35 * 72 bits --> 18 pulses in a frame of 64 samples.                       *
     36 * 88 bits --> 24 pulses in a frame of 64 samples.                       *
     37 *                                                                       *
     38 * All pulses can have two (2) possible amplitudes: +1 or -1.            *
     39 * Each pulse can have sixteen (16) possible positions.                  *
     40 *************************************************************************/
     41 
     42 #include "typedef.h"
     43 #include "basic_op.h"
     44 #include "math_op.h"
     45 #include "acelp.h"
     46 #include "cnst.h"
     47 
     48 #include "q_pulse.h"
     49 
     50 #undef LOG_TAG
     51 #define LOG_TAG "amrwbenc"
     52 #include "log/log.h"
     53 
     54 static Word16 tipos[36] = {
     55     0, 1, 2, 3,                            /* starting point &ipos[0], 1st iter */
     56     1, 2, 3, 0,                            /* starting point &ipos[4], 2nd iter */
     57     2, 3, 0, 1,                            /* starting point &ipos[8], 3rd iter */
     58     3, 0, 1, 2,                            /* starting point &ipos[12], 4th iter */
     59     0, 1, 2, 3,
     60     1, 2, 3, 0,
     61     2, 3, 0, 1,
     62     3, 0, 1, 2,
     63     0, 1, 2, 3};                           /* end point for 24 pulses &ipos[35], 4th iter */
     64 
     65 #define NB_PULSE_MAX  24
     66 
     67 #define L_SUBFR   64
     68 #define NB_TRACK  4
     69 #define STEP      4
     70 #define NB_POS    16
     71 #define MSIZE     256
     72 #define NB_MAX    8
     73 #define NPMAXPT   ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
     74 
     75 /* Private functions */
     76 void cor_h_vec_012(
     77         Word16 h[],                           /* (i) scaled impulse response                 */
     78         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     79         Word16 track,                         /* (i) track to use                            */
     80         Word16 sign[],                        /* (i) sign vector                             */
     81         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     82         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     83         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
     84         );
     85 
     86 void cor_h_vec_012_asm(
     87         Word16 h[],                           /* (i) scaled impulse response                 */
     88         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     89         Word16 track,                         /* (i) track to use                            */
     90         Word16 sign[],                        /* (i) sign vector                             */
     91         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     92         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     93         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
     94         );
     95 
     96 void cor_h_vec_30(
     97         Word16 h[],                           /* (i) scaled impulse response                 */
     98         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     99         Word16 track,                         /* (i) track to use                            */
    100         Word16 sign[],                        /* (i) sign vector                             */
    101         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
    102         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
    103         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    104         );
    105 
    106 void search_ixiy(
    107         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
    108         Word16 track_x,                       /* (i) track of pulse 1                   */
    109         Word16 track_y,                       /* (i) track of pulse 2                   */
    110         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
    111         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
    112         Word16 * ix,                          /* (o) position of pulse 1                */
    113         Word16 * iy,                          /* (o) position of pulse 2                */
    114         Word16 dn[],                          /* (i) corr. between target and h[]       */
    115         Word16 dn2[],                         /* (i) vector of selected positions       */
    116         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
    117         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
    118         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
    119         );
    120 
    121 
    122 void ACELP_4t64_fx(
    123         Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
    124         Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
    125         Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
    126         Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
    127         Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
    128         Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
    129         Word16 ser_size,                      /* (i) : bit rate                                         */
    130         Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
    131         /* (o) : index (36): 9+9+9+9 = 36 bits.                   */
    132         /* (o) : index (44): 13+9+13+9 = 44 bits.                 */
    133         /* (o) : index (52): 13+13+13+13 = 52 bits.               */
    134         /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
    135         /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
    136         /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
    137         )
    138 {
    139     Word32 i, j, k;
    140     Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
    141     Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
    142     Word16 *p0, *p1, *p2, *p3, *psign;
    143     Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
    144     Word32 s, cor, L_tmp, L_index;
    145     Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
    146     Word16 ind[NPMAXPT * NB_TRACK];
    147     Word16 codvec[NB_PULSE_MAX], nbpos[10];
    148     Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
    149     Word16 h_buf[4 * L_SUBFR];
    150     Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
    151     Word16 ipos[NB_PULSE_MAX];
    152 
    153     switch (nbbits)
    154     {
    155         case 20:                               /* 20 bits, 4 pulses, 4 tracks */
    156             nbiter = 4;                          /* 4x16x16=1024 loop */
    157             alp = 8192;                          /* alp = 2.0 (Q12) */
    158             nb_pulse = 4;
    159             nbpos[0] = 4;
    160             nbpos[1] = 8;
    161             break;
    162         case 36:                               /* 36 bits, 8 pulses, 4 tracks */
    163             nbiter = 4;                          /* 4x20x16=1280 loop */
    164             alp = 4096;                          /* alp = 1.0 (Q12) */
    165             nb_pulse = 8;
    166             nbpos[0] = 4;
    167             nbpos[1] = 8;
    168             nbpos[2] = 8;
    169             break;
    170         case 44:                               /* 44 bits, 10 pulses, 4 tracks */
    171             nbiter = 4;                          /* 4x26x16=1664 loop */
    172             alp = 4096;                          /* alp = 1.0 (Q12) */
    173             nb_pulse = 10;
    174             nbpos[0] = 4;
    175             nbpos[1] = 6;
    176             nbpos[2] = 8;
    177             nbpos[3] = 8;
    178             break;
    179         case 52:                               /* 52 bits, 12 pulses, 4 tracks */
    180             nbiter = 4;                          /* 4x26x16=1664 loop */
    181             alp = 4096;                          /* alp = 1.0 (Q12) */
    182             nb_pulse = 12;
    183             nbpos[0] = 4;
    184             nbpos[1] = 6;
    185             nbpos[2] = 8;
    186             nbpos[3] = 8;
    187             break;
    188         case 64:                               /* 64 bits, 16 pulses, 4 tracks */
    189             nbiter = 3;                          /* 3x36x16=1728 loop */
    190             alp = 3277;                          /* alp = 0.8 (Q12) */
    191             nb_pulse = 16;
    192             nbpos[0] = 4;
    193             nbpos[1] = 4;
    194             nbpos[2] = 6;
    195             nbpos[3] = 6;
    196             nbpos[4] = 8;
    197             nbpos[5] = 8;
    198             break;
    199         case 72:                               /* 72 bits, 18 pulses, 4 tracks */
    200             nbiter = 3;                          /* 3x35x16=1680 loop */
    201             alp = 3072;                          /* alp = 0.75 (Q12) */
    202             nb_pulse = 18;
    203             nbpos[0] = 2;
    204             nbpos[1] = 3;
    205             nbpos[2] = 4;
    206             nbpos[3] = 5;
    207             nbpos[4] = 6;
    208             nbpos[5] = 7;
    209             nbpos[6] = 8;
    210             break;
    211         case 88:                               /* 88 bits, 24 pulses, 4 tracks */
    212             if(ser_size > 462)
    213                 nbiter = 1;
    214             else
    215                 nbiter = 2;                    /* 2x53x16=1696 loop */
    216 
    217             alp = 2048;                          /* alp = 0.5 (Q12) */
    218             nb_pulse = 24;
    219             nbpos[0] = 2;
    220             nbpos[1] = 2;
    221             nbpos[2] = 3;
    222             nbpos[3] = 4;
    223             nbpos[4] = 5;
    224             nbpos[5] = 6;
    225             nbpos[6] = 7;
    226             nbpos[7] = 8;
    227             nbpos[8] = 8;
    228             nbpos[9] = 8;
    229             break;
    230         default:
    231             nbiter = 0;
    232             alp = 0;
    233             nb_pulse = 0;
    234     }
    235 
    236     for (i = 0; i < nb_pulse; i++)
    237     {
    238         codvec[i] = i;
    239     }
    240 
    241     /*----------------------------------------------------------------*
    242      * Find sign for each pulse position.                             *
    243      *----------------------------------------------------------------*/
    244     /* calculate energy for normalization of cn[] and dn[] */
    245     /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
    246 #ifdef ASM_OPT                  /* asm optimization branch */
    247     s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
    248 #else
    249     s = Dot_product12(cn, cn, L_SUBFR, &exp);
    250 #endif
    251 
    252     Isqrt_n(&s, &exp);
    253     s = L_shl(s, (exp + 5));
    254     k_cn = extract_h(L_add(s, 0x8000));
    255 
    256     /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
    257 #ifdef ASM_OPT                      /* asm optimization branch */
    258     s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
    259 #else
    260     s = Dot_product12(dn, dn, L_SUBFR, &exp);
    261 #endif
    262 
    263     Isqrt_n(&s, &exp);
    264     k_dn = voround(L_shl(s, (exp + 5 + 3)));    /* k_dn = 256..4096 */
    265     k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */
    266 
    267     /* mix normalized cn[] and dn[] */
    268     p0 = cn;
    269     p1 = dn;
    270     p2 = dn2;
    271 
    272     for (i = 0; i < L_SUBFR/4; i++)
    273     {
    274         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    275         *p2++ = s >> 7;
    276         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    277         *p2++ = s >> 7;
    278         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    279         *p2++ = s >> 7;
    280         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    281         *p2++ = s >> 7;
    282     }
    283 
    284     /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
    285     for(i = 0; i < L_SUBFR; i++)
    286     {
    287         val = dn[i];
    288         ps = dn2[i];
    289         if (ps >= 0)
    290         {
    291             sign[i] = 32767;             /* sign = +1 (Q12) */
    292             vec[i] = -32768;
    293         } else
    294         {
    295             sign[i] = -32768;            /* sign = -1 (Q12) */
    296             vec[i] = 32767;
    297             dn[i] = -val;
    298             dn2[i] = -ps;
    299         }
    300     }
    301     /*----------------------------------------------------------------*
    302      * Select NB_MAX position per track according to max of dn2[].    *
    303      *----------------------------------------------------------------*/
    304     pos = 0;
    305     for (i = 0; i < NB_TRACK; i++)
    306     {
    307         for (k = 0; k < NB_MAX; k++)
    308         {
    309             ps = -1;
    310             for (j = i; j < L_SUBFR; j += STEP)
    311             {
    312                 if(dn2[j] > ps)
    313                 {
    314                     ps = dn2[j];
    315                     pos = j;
    316                 }
    317             }
    318             dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
    319             if (k == 0)
    320             {
    321                 pos_max[i] = pos;
    322             }
    323         }
    324     }
    325 
    326     /*--------------------------------------------------------------*
    327      * Scale h[] to avoid overflow and to get maximum of precision  *
    328      * on correlation.                                              *
    329      *                                                              *
    330      * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
    331      *  ==> This allow addition of 16 pulses without saturation.    *
    332      *                                                              *
    333      * Energy worst case (on resonant impulse response),            *
    334      * - energy of h[] is approximately MAX/16.                     *
    335      * - During search, the energy is divided by 8 to avoid         *
    336      *   overflow on "alp". (energy of h[] = MAX/128).              *
    337      *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
    338      *--------------------------------------------------------------*/
    339 
    340     /* impulse response buffer for fast computation */
    341 
    342     h = h_buf;
    343     h_inv = h_buf + (2 * L_SUBFR);
    344     L_tmp = 0;
    345     for (i = 0; i < L_SUBFR; i++)
    346     {
    347         *h++ = 0;
    348         *h_inv++ = 0;
    349         L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1);
    350     }
    351     /* scale h[] down (/2) when energy of h[] is high with many pulses used */
    352     val = extract_h(L_tmp);
    353     h_shift = 0;
    354 
    355     if ((nb_pulse >= 12) && (val > 1024))
    356     {
    357         h_shift = 1;
    358     }
    359     p0 = H;
    360     p1 = h;
    361     p2 = h_inv;
    362 
    363     for (i = 0; i < L_SUBFR/4; i++)
    364     {
    365         *p1 = *p0++ >> h_shift;
    366         *p2++ = -(*p1++);
    367         *p1 = *p0++ >> h_shift;
    368         *p2++ = -(*p1++);
    369         *p1 = *p0++ >> h_shift;
    370         *p2++ = -(*p1++);
    371         *p1 = *p0++ >> h_shift;
    372         *p2++ = -(*p1++);
    373     }
    374 
    375     /*------------------------------------------------------------*
    376      * Compute rrixix[][] needed for the codebook search.         *
    377      * This algorithm compute impulse response energy of all      *
    378      * positions (16) in each track (4).       Total = 4x16 = 64. *
    379      *------------------------------------------------------------*/
    380 
    381     /* storage order --> i3i3, i2i2, i1i1, i0i0 */
    382 
    383     /* Init pointers to last position of rrixix[] */
    384     p0 = &rrixix[0][NB_POS - 1];
    385     p1 = &rrixix[1][NB_POS - 1];
    386     p2 = &rrixix[2][NB_POS - 1];
    387     p3 = &rrixix[3][NB_POS - 1];
    388 
    389     ptr_h1 = h;
    390     cor = 0x00008000L;                             /* for rounding */
    391     for (i = 0; i < NB_POS; i++)
    392     {
    393         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    394         ptr_h1++;
    395         *p3-- = extract_h(cor);
    396         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    397         ptr_h1++;
    398         *p2-- = extract_h(cor);
    399         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    400         ptr_h1++;
    401         *p1-- = extract_h(cor);
    402         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    403         ptr_h1++;
    404         *p0-- = extract_h(cor);
    405     }
    406 
    407     /*------------------------------------------------------------*
    408      * Compute rrixiy[][] needed for the codebook search.         *
    409      * This algorithm compute correlation between 2 pulses        *
    410      * (2 impulses responses) in 4 possible adjacents tracks.     *
    411      * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
    412      *------------------------------------------------------------*/
    413 
    414     /* storage order --> i2i3, i1i2, i0i1, i3i0 */
    415 
    416     pos = MSIZE - 1;
    417     ptr_hf = h + 1;
    418 
    419     for (k = 0; k < NB_POS; k++)
    420     {
    421         p3 = &rrixiy[2][pos];
    422         p2 = &rrixiy[1][pos];
    423         p1 = &rrixiy[0][pos];
    424         p0 = &rrixiy[3][pos - NB_POS];
    425 
    426         cor = 0x00008000L;                   /* for rounding */
    427         ptr_h1 = h;
    428         ptr_h2 = ptr_hf;
    429 
    430         for (i = k + 1; i < NB_POS; i++)
    431         {
    432             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    433             ptr_h1++;
    434             ptr_h2++;
    435             *p3 = extract_h(cor);
    436             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    437             ptr_h1++;
    438             ptr_h2++;
    439             *p2 = extract_h(cor);
    440             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    441             ptr_h1++;
    442             ptr_h2++;
    443             *p1 = extract_h(cor);
    444             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    445             ptr_h1++;
    446             ptr_h2++;
    447             *p0 = extract_h(cor);
    448 
    449             p3 -= (NB_POS + 1);
    450             p2 -= (NB_POS + 1);
    451             p1 -= (NB_POS + 1);
    452             p0 -= (NB_POS + 1);
    453         }
    454         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    455         ptr_h1++;
    456         ptr_h2++;
    457         *p3 = extract_h(cor);
    458         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    459         ptr_h1++;
    460         ptr_h2++;
    461         *p2 = extract_h(cor);
    462         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    463         ptr_h1++;
    464         ptr_h2++;
    465         *p1 = extract_h(cor);
    466 
    467         pos -= NB_POS;
    468         ptr_hf += STEP;
    469     }
    470 
    471     /* storage order --> i3i0, i2i3, i1i2, i0i1 */
    472 
    473     pos = MSIZE - 1;
    474     ptr_hf = h + 3;
    475 
    476     for (k = 0; k < NB_POS; k++)
    477     {
    478         p3 = &rrixiy[3][pos];
    479         p2 = &rrixiy[2][pos - 1];
    480         p1 = &rrixiy[1][pos - 1];
    481         p0 = &rrixiy[0][pos - 1];
    482 
    483         cor = 0x00008000L;                              /* for rounding */
    484         ptr_h1 = h;
    485         ptr_h2 = ptr_hf;
    486 
    487         for (i = k + 1; i < NB_POS; i++)
    488         {
    489             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    490             ptr_h1++;
    491             ptr_h2++;
    492             *p3 = extract_h(cor);
    493             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    494             ptr_h1++;
    495             ptr_h2++;
    496             *p2 = extract_h(cor);
    497             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    498             ptr_h1++;
    499             ptr_h2++;
    500             *p1 = extract_h(cor);
    501             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    502             ptr_h1++;
    503             ptr_h2++;
    504             *p0 = extract_h(cor);
    505 
    506             p3 -= (NB_POS + 1);
    507             p2 -= (NB_POS + 1);
    508             p1 -= (NB_POS + 1);
    509             p0 -= (NB_POS + 1);
    510         }
    511         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    512         ptr_h1++;
    513         ptr_h2++;
    514         *p3 = extract_h(cor);
    515 
    516         pos--;
    517         ptr_hf += STEP;
    518     }
    519 
    520     /*------------------------------------------------------------*
    521      * Modification of rrixiy[][] to take signs into account.     *
    522      *------------------------------------------------------------*/
    523 
    524     p0 = &rrixiy[0][0];
    525 
    526     for (k = 0; k < NB_TRACK; k++)
    527     {
    528         j_temp = (k + 1)&0x03;
    529         for (i = k; i < L_SUBFR; i += STEP)
    530         {
    531             psign = sign;
    532             if (psign[i] < 0)
    533             {
    534                 psign = vec;
    535             }
    536             j = j_temp;
    537             for (; j < L_SUBFR; j += STEP)
    538             {
    539                 *p0 = vo_mult(*p0, psign[j]);
    540                 p0++;
    541             }
    542         }
    543     }
    544 
    545     /*-------------------------------------------------------------------*
    546      *                       Deep first search                           *
    547      *-------------------------------------------------------------------*/
    548 
    549     psk = -1;
    550     alpk = 1;
    551 
    552     for (k = 0; k < nbiter; k++)
    553     {
    554         j_temp = k<<2;
    555         for (i = 0; i < nb_pulse; i++)
    556             ipos[i] = tipos[j_temp + i];
    557 
    558         if(nbbits == 20)
    559         {
    560             pos = 0;
    561             ps = 0;
    562             alp = 0;
    563             for (i = 0; i < L_SUBFR; i++)
    564             {
    565                 vec[i] = 0;
    566             }
    567         } else if ((nbbits == 36) || (nbbits == 44))
    568         {
    569             /* first stage: fix 2 pulses */
    570             pos = 2;
    571 
    572             ix = ind[0] = pos_max[ipos[0]];
    573             iy = ind[1] = pos_max[ipos[1]];
    574             ps = dn[ix] + dn[iy];
    575             i = ix >> 2;                /* ix / STEP */
    576             j = iy >> 2;                /* iy / STEP */
    577             s = rrixix[ipos[0]][i] << 13;
    578             s += rrixix[ipos[1]][j] << 13;
    579             i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
    580             s += rrixiy[ipos[0]][i] << 14;
    581             alp = (s + 0x8000) >> 16;
    582             if (sign[ix] < 0)
    583                 p0 = h_inv - ix;
    584             else
    585                 p0 = h - ix;
    586             if (sign[iy] < 0)
    587                 p1 = h_inv - iy;
    588             else
    589                 p1 = h - iy;
    590 
    591             for (i = 0; i < L_SUBFR; i++)
    592             {
    593                 vec[i] = (*p0++) + (*p1++);
    594             }
    595 
    596             if(nbbits == 44)
    597             {
    598                 ipos[8] = 0;
    599                 ipos[9] = 1;
    600             }
    601         } else
    602         {
    603             /* first stage: fix 4 pulses */
    604             pos = 4;
    605 
    606             ix = ind[0] = pos_max[ipos[0]];
    607             iy = ind[1] = pos_max[ipos[1]];
    608             i = ind[2] = pos_max[ipos[2]];
    609             j = ind[3] = pos_max[ipos[3]];
    610             ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
    611 
    612             if (sign[ix] < 0)
    613                 p0 = h_inv - ix;
    614             else
    615                 p0 = h - ix;
    616 
    617             if (sign[iy] < 0)
    618                 p1 = h_inv - iy;
    619             else
    620                 p1 = h - iy;
    621 
    622             if (sign[i] < 0)
    623                 p2 = h_inv - i;
    624             else
    625                 p2 = h - i;
    626 
    627             if (sign[j] < 0)
    628                 p3 = h_inv - j;
    629             else
    630                 p3 = h - j;
    631 
    632             L_tmp = 0L;
    633             for(i = 0; i < L_SUBFR; i++)
    634             {
    635                 Word32 vecSq2;
    636                 vec[i]  = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
    637                 vecSq2 = (vec[i] * vec[i]) << 1;
    638                 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) {
    639                     L_tmp = INT_MAX;
    640                 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) {
    641                     L_tmp = INT_MIN;
    642                 } else {
    643                     L_tmp  += vecSq2;
    644                 }
    645             }
    646 
    647             alp = ((L_tmp >> 3) + 0x8000) >> 16;
    648 
    649             if(nbbits == 72)
    650             {
    651                 ipos[16] = 0;
    652                 ipos[17] = 1;
    653             }
    654         }
    655 
    656         /* other stages of 2 pulses */
    657 
    658         for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
    659         {
    660             /*--------------------------------------------------*
    661              * Calculate correlation of all possible positions  *
    662              * of the next 2 pulses with previous fixed pulses. *
    663              * Each pulse can have 16 possible positions.       *
    664              *--------------------------------------------------*/
    665             if(ipos[j] == 3)
    666             {
    667                 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    668             }
    669             else
    670             {
    671 #ifdef ASM_OPT                 /* asm optimization branch */
    672                 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    673 #else
    674                 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    675 #endif
    676             }
    677             /*--------------------------------------------------*
    678              * Find best positions of 2 pulses.                 *
    679              *--------------------------------------------------*/
    680             search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
    681                     &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
    682 
    683             ind[j] = ix;
    684             ind[j + 1] = iy;
    685 
    686             if (sign[ix] < 0)
    687                 p0 = h_inv - ix;
    688             else
    689                 p0 = h - ix;
    690             if (sign[iy] < 0)
    691                 p1 = h_inv - iy;
    692             else
    693                 p1 = h - iy;
    694 
    695             for (i = 0; i < L_SUBFR; i+=4)
    696             {
    697                 vec[i]   += add1((*p0++), (*p1++));
    698                 vec[i+1] += add1((*p0++), (*p1++));
    699                 vec[i+2] += add1((*p0++), (*p1++));
    700                 vec[i+3] += add1((*p0++), (*p1++));
    701             }
    702         }
    703         /* memorise the best codevector */
    704         ps = vo_mult(ps, ps);
    705         s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp));
    706         if (s > 0)
    707         {
    708             psk = ps;
    709             alpk = alp;
    710             for (i = 0; i < nb_pulse; i++)
    711             {
    712                 codvec[i] = ind[i];
    713             }
    714             for (i = 0; i < L_SUBFR; i++)
    715             {
    716                 y[i] = vec[i];
    717             }
    718         }
    719     }
    720     /*-------------------------------------------------------------------*
    721      * Build the codeword, the filtered codeword and index of codevector.*
    722      *-------------------------------------------------------------------*/
    723     for (i = 0; i < NPMAXPT * NB_TRACK; i++)
    724     {
    725         ind[i] = -1;
    726     }
    727     for (i = 0; i < L_SUBFR; i++)
    728     {
    729         code[i] = 0;
    730         y[i] = vo_shr_r(y[i], 3);               /* Q12 to Q9 */
    731     }
    732     val = (512 >> h_shift);               /* codeword in Q9 format */
    733     for (k = 0; k < nb_pulse; k++)
    734     {
    735         i = codvec[k];                       /* read pulse position */
    736         j = sign[i];                         /* read sign           */
    737         index = i >> 2;                 /* index = pos of pulse (0..15) */
    738         track = (Word16) (i & 0x03);         /* track = i % NB_TRACK (0..3)  */
    739 
    740         if (j > 0)
    741         {
    742             code[i] += val;
    743             codvec[k] += 128;
    744         } else
    745         {
    746             code[i] -= val;
    747             index += NB_POS;
    748         }
    749 
    750         i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
    751 
    752         while (i < NPMAXPT * NB_TRACK && ind[i] >= 0)
    753         {
    754             i += 1;
    755         }
    756         if (i < NPMAXPT * NB_TRACK) {
    757             ind[i] = index;
    758         } else {
    759             ALOGE("b/132647222, OOB access in ind array track=%d i=%d", track, i);
    760             android_errorWriteLog(0x534e4554, "132647222");
    761         }
    762     }
    763 
    764     k = 0;
    765     /* Build index of codevector */
    766     if(nbbits == 20)
    767     {
    768         for (track = 0; track < NB_TRACK; track++)
    769         {
    770             _index[track] = (Word16)(quant_1p_N1(ind[k], 4));
    771             k += NPMAXPT;
    772         }
    773     } else if(nbbits == 36)
    774     {
    775         for (track = 0; track < NB_TRACK; track++)
    776         {
    777             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
    778             k += NPMAXPT;
    779         }
    780     } else if(nbbits == 44)
    781     {
    782         for (track = 0; track < NB_TRACK - 2; track++)
    783         {
    784             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
    785             k += NPMAXPT;
    786         }
    787         for (track = 2; track < NB_TRACK; track++)
    788         {
    789             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
    790             k += NPMAXPT;
    791         }
    792     } else if(nbbits == 52)
    793     {
    794         for (track = 0; track < NB_TRACK; track++)
    795         {
    796             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
    797             k += NPMAXPT;
    798         }
    799     } else if(nbbits == 64)
    800     {
    801         for (track = 0; track < NB_TRACK; track++)
    802         {
    803             L_index = quant_4p_4N(&ind[k], 4);
    804             _index[track] = (Word16)((L_index >> 14) & 3);
    805             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
    806             k += NPMAXPT;
    807         }
    808     } else if(nbbits == 72)
    809     {
    810         for (track = 0; track < NB_TRACK - 2; track++)
    811         {
    812             L_index = quant_5p_5N(&ind[k], 4);
    813             _index[track] = (Word16)((L_index >> 10) & 0x03FF);
    814             _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
    815             k += NPMAXPT;
    816         }
    817         for (track = 2; track < NB_TRACK; track++)
    818         {
    819             L_index = quant_4p_4N(&ind[k], 4);
    820             _index[track] = (Word16)((L_index >> 14) & 3);
    821             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
    822             k += NPMAXPT;
    823         }
    824     } else if(nbbits == 88)
    825     {
    826         for (track = 0; track < NB_TRACK; track++)
    827         {
    828             L_index = quant_6p_6N_2(&ind[k], 4);
    829             _index[track] = (Word16)((L_index >> 11) & 0x07FF);
    830             _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
    831             k += NPMAXPT;
    832         }
    833     }
    834     return;
    835 }
    836 
    837 
    838 /*-------------------------------------------------------------------*
    839  * Function  cor_h_vec()                                             *
    840  * ~~~~~~~~~~~~~~~~~~~~~                                             *
    841  * Compute correlations of h[] with vec[] for the specified track.   *
    842  *-------------------------------------------------------------------*/
    843 void cor_h_vec_30(
    844         Word16 h[],                           /* (i) scaled impulse response                 */
    845         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
    846         Word16 track,                         /* (i) track to use                            */
    847         Word16 sign[],                        /* (i) sign vector                             */
    848         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
    849         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
    850         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    851         )
    852 {
    853     Word32 i, j, pos, corr;
    854     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
    855     Word32 L_sum1,L_sum2;
    856     cor_x = cor_1;
    857     cor_y = cor_2;
    858     p0 = rrixix[track];
    859     p3 = rrixix[0];
    860     pos = track;
    861 
    862     for (i = 0; i < NB_POS; i+=2)
    863     {
    864         L_sum1 = L_sum2 = 0L;
    865         p1 = h;
    866         p2 = &vec[pos];
    867         for (j=pos;j < L_SUBFR; j++)
    868         {
    869             L_sum1 = L_add(L_sum1, *p1 * *p2);
    870             p2-=3;
    871             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    872             p2+=4;
    873         }
    874         p2-=3;
    875         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    876         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    877         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    878 
    879         L_sum1 = L_shl(L_sum1, 2);
    880         L_sum2 = L_shl(L_sum2, 2);
    881 
    882         corr = voround(L_sum1);
    883         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
    884         corr = voround(L_sum2);
    885         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
    886         pos += STEP;
    887 
    888         L_sum1 = L_sum2 = 0L;
    889         p1 = h;
    890         p2 = &vec[pos];
    891         for (j=pos;j < L_SUBFR; j++)
    892         {
    893             L_sum1 = L_add(L_sum1, *p1 * *p2);
    894             p2-=3;
    895             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    896             p2+=4;
    897         }
    898         p2-=3;
    899         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    900         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    901         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    902 
    903         L_sum1 = L_shl(L_sum1, 2);
    904         L_sum2 = L_shl(L_sum2, 2);
    905 
    906         corr = voround(L_sum1);
    907         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
    908         corr = voround(L_sum2);
    909         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
    910         pos += STEP;
    911     }
    912     return;
    913 }
    914 
    915 void cor_h_vec_012(
    916         Word16 h[],                           /* (i) scaled impulse response                 */
    917         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
    918         Word16 track,                         /* (i) track to use                            */
    919         Word16 sign[],                        /* (i) sign vector                             */
    920         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
    921         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
    922         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    923         )
    924 {
    925     Word32 i, j, pos, corr;
    926     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
    927     Word32 L_sum1,L_sum2;
    928     cor_x = cor_1;
    929     cor_y = cor_2;
    930     p0 = rrixix[track];
    931     p3 = rrixix[track+1];
    932     pos = track;
    933 
    934     for (i = 0; i < NB_POS; i+=2)
    935     {
    936         L_sum1 = L_sum2 = 0L;
    937         p1 = h;
    938         p2 = &vec[pos];
    939         for (j=62-pos ;j >= 0; j--)
    940         {
    941             L_sum1 = L_add(L_sum1, *p1 * *p2++);
    942             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    943         }
    944         L_sum1 = L_add(L_sum1, *p1 * *p2);
    945         L_sum1 = L_shl(L_sum1, 2);
    946         L_sum2 = L_shl(L_sum2, 2);
    947 
    948         corr = voround(L_sum1);
    949         cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
    950         corr = voround(L_sum2);
    951         cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
    952         pos += STEP;
    953 
    954         L_sum1 = L_sum2 = 0L;
    955         p1 = h;
    956         p2 = &vec[pos];
    957         for (j= 62-pos;j >= 0; j--)
    958         {
    959             L_sum1 = L_add(L_sum1, *p1 * *p2++);
    960             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    961         }
    962         L_sum1 = L_add(L_sum1, *p1 * *p2);
    963         L_sum1 = L_shl(L_sum1, 2);
    964         L_sum2 = L_shl(L_sum2, 2);
    965 
    966         corr = voround(L_sum1);
    967         cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
    968         corr = voround(L_sum2);
    969         cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
    970         pos += STEP;
    971     }
    972     return;
    973 }
    974 
    975 /*-------------------------------------------------------------------*
    976  * Function  search_ixiy()                                           *
    977  * ~~~~~~~~~~~~~~~~~~~~~~~                                           *
    978  * Find the best positions of 2 pulses in a subframe.                *
    979  *-------------------------------------------------------------------*/
    980 
    981 void search_ixiy(
    982         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
    983         Word16 track_x,                       /* (i) track of pulse 1                   */
    984         Word16 track_y,                       /* (i) track of pulse 2                   */
    985         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
    986         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
    987         Word16 * ix,                          /* (o) position of pulse 1                */
    988         Word16 * iy,                          /* (o) position of pulse 2                */
    989         Word16 dn[],                          /* (i) corr. between target and h[]       */
    990         Word16 dn2[],                         /* (i) vector of selected positions       */
    991         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
    992         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
    993         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
    994         )
    995 {
    996     Word32 x, y, pos, thres_ix;
    997     Word16 ps1, ps2, sq, sqk;
    998     Word16 alp_16, alpk;
    999     Word16 *p0, *p1, *p2;
   1000     Word32 s, alp0, alp1, alp2;
   1001 
   1002     p0 = cor_x;
   1003     p1 = cor_y;
   1004     p2 = rrixiy[track_x];
   1005 
   1006     thres_ix = nb_pos_ix - NB_MAX;
   1007 
   1008     alp0 = L_deposit_h(*alp);
   1009     alp0 = (alp0 + 0x00008000L);       /* for rounding */
   1010 
   1011     sqk = -1;
   1012     alpk = 1;
   1013 
   1014     for (x = track_x; x < L_SUBFR; x += STEP)
   1015     {
   1016         ps1 = *ps + dn[x];
   1017         alp1 = L_add(alp0, ((*p0++)<<13));
   1018 
   1019         if (dn2[x] < thres_ix)
   1020         {
   1021             pos = -1;
   1022             for (y = track_y; y < L_SUBFR; y += STEP)
   1023             {
   1024                 ps2 = add1(ps1, dn[y]);
   1025 
   1026                 alp2 = L_add(alp1, ((*p1++)<<13));
   1027                 alp2 = L_add(alp2, ((*p2++)<<14));
   1028                 alp_16 = extract_h(alp2);
   1029                 sq = vo_mult(ps2, ps2);
   1030                 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16));
   1031 
   1032                 if (s > 0)
   1033                 {
   1034                     sqk = sq;
   1035                     alpk = alp_16;
   1036                     pos = y;
   1037                 }
   1038             }
   1039             p1 -= NB_POS;
   1040 
   1041             if (pos >= 0)
   1042             {
   1043                 *ix = x;
   1044                 *iy = pos;
   1045             }
   1046         } else
   1047         {
   1048             p2 += NB_POS;
   1049         }
   1050     }
   1051 
   1052     *ps = add1(*ps, add1(dn[*ix], dn[*iy]));
   1053     *alp = alpk;
   1054 
   1055     return;
   1056 }
   1057 
   1058 
   1059 
   1060 
   1061