Home | History | Annotate | Download | only in src
      1 /*
      2  ** Copyright 2003-2010, VisualOn, Inc.
      3  **
      4  ** Licensed under the Apache License, Version 2.0 (the "License");
      5  ** you may not use this file except in compliance with the License.
      6  ** You may obtain a copy of the License at
      7  **
      8  **     http://www.apache.org/licenses/LICENSE-2.0
      9  **
     10  ** Unless required by applicable law or agreed to in writing, software
     11  ** distributed under the License is distributed on an "AS IS" BASIS,
     12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  ** See the License for the specific language governing permissions and
     14  ** limitations under the License.
     15  */
     16 
     17 /***********************************************************************
     18 *      File: c4t64fx.c                                                 *
     19 *                                                                      *
     20 *      Description:Performs algebraic codebook search for higher modes *
     21 *                                                                      *
     22 ************************************************************************/
     23 
     24 /************************************************************************
     25 * Function: ACELP_4t64_fx()                                             *
     26 *                                                                       *
     27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook.                   *
     28 * 4 tracks x 16 positions per track = 64 samples.                       *
     29 *                                                                       *
     30 * 20 bits --> 4 pulses in a frame of 64 samples.                        *
     31 * 36 bits --> 8 pulses in a frame of 64 samples.                        *
     32 * 44 bits --> 10 pulses in a frame of 64 samples.                       *
     33 * 52 bits --> 12 pulses in a frame of 64 samples.                       *
     34 * 64 bits --> 16 pulses in a frame of 64 samples.                       *
     35 * 72 bits --> 18 pulses in a frame of 64 samples.                       *
     36 * 88 bits --> 24 pulses in a frame of 64 samples.                       *
     37 *                                                                       *
     38 * All pulses can have two (2) possible amplitudes: +1 or -1.            *
     39 * Each pulse can have sixteen (16) possible positions.                  *
     40 *************************************************************************/
     41 
     42 #include "typedef.h"
     43 #include "basic_op.h"
     44 #include "math_op.h"
     45 #include "acelp.h"
     46 #include "cnst.h"
     47 
     48 #include "q_pulse.h"
     49 
     50 static Word16 tipos[36] = {
     51     0, 1, 2, 3,                            /* starting point &ipos[0], 1st iter */
     52     1, 2, 3, 0,                            /* starting point &ipos[4], 2nd iter */
     53     2, 3, 0, 1,                            /* starting point &ipos[8], 3rd iter */
     54     3, 0, 1, 2,                            /* starting point &ipos[12], 4th iter */
     55     0, 1, 2, 3,
     56     1, 2, 3, 0,
     57     2, 3, 0, 1,
     58     3, 0, 1, 2,
     59     0, 1, 2, 3};                           /* end point for 24 pulses &ipos[35], 4th iter */
     60 
     61 #define NB_PULSE_MAX  24
     62 
     63 #define L_SUBFR   64
     64 #define NB_TRACK  4
     65 #define STEP      4
     66 #define NB_POS    16
     67 #define MSIZE     256
     68 #define NB_MAX    8
     69 #define NPMAXPT   ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
     70 
     71 /* Private functions */
     72 void cor_h_vec_012(
     73         Word16 h[],                           /* (i) scaled impulse response                 */
     74         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     75         Word16 track,                         /* (i) track to use                            */
     76         Word16 sign[],                        /* (i) sign vector                             */
     77         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     78         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     79         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
     80         );
     81 
     82 void cor_h_vec_012_asm(
     83         Word16 h[],                           /* (i) scaled impulse response                 */
     84         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     85         Word16 track,                         /* (i) track to use                            */
     86         Word16 sign[],                        /* (i) sign vector                             */
     87         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     88         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     89         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
     90         );
     91 
     92 void cor_h_vec_30(
     93         Word16 h[],                           /* (i) scaled impulse response                 */
     94         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     95         Word16 track,                         /* (i) track to use                            */
     96         Word16 sign[],                        /* (i) sign vector                             */
     97         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     98         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     99         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    100         );
    101 
    102 void search_ixiy(
    103         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
    104         Word16 track_x,                       /* (i) track of pulse 1                   */
    105         Word16 track_y,                       /* (i) track of pulse 2                   */
    106         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
    107         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
    108         Word16 * ix,                          /* (o) position of pulse 1                */
    109         Word16 * iy,                          /* (o) position of pulse 2                */
    110         Word16 dn[],                          /* (i) corr. between target and h[]       */
    111         Word16 dn2[],                         /* (i) vector of selected positions       */
    112         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
    113         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
    114         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
    115         );
    116 
    117 
    118 void ACELP_4t64_fx(
    119         Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
    120         Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
    121         Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
    122         Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
    123         Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
    124         Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
    125         Word16 ser_size,                      /* (i) : bit rate                                         */
    126         Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
    127         /* (o) : index (36): 9+9+9+9 = 36 bits.                   */
    128         /* (o) : index (44): 13+9+13+9 = 44 bits.                 */
    129         /* (o) : index (52): 13+13+13+13 = 52 bits.               */
    130         /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
    131         /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
    132         /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
    133         )
    134 {
    135     Word32 i, j, k;
    136     Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
    137     Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
    138     Word16 *p0, *p1, *p2, *p3, *psign;
    139     Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
    140     Word32 s, cor, L_tmp, L_index;
    141     Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
    142     Word16 ind[NPMAXPT * NB_TRACK];
    143     Word16 codvec[NB_PULSE_MAX], nbpos[10];
    144     Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
    145     Word16 h_buf[4 * L_SUBFR];
    146     Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
    147     Word16 ipos[NB_PULSE_MAX];
    148 
    149     switch (nbbits)
    150     {
    151         case 20:                               /* 20 bits, 4 pulses, 4 tracks */
    152             nbiter = 4;                          /* 4x16x16=1024 loop */
    153             alp = 8192;                          /* alp = 2.0 (Q12) */
    154             nb_pulse = 4;
    155             nbpos[0] = 4;
    156             nbpos[1] = 8;
    157             break;
    158         case 36:                               /* 36 bits, 8 pulses, 4 tracks */
    159             nbiter = 4;                          /* 4x20x16=1280 loop */
    160             alp = 4096;                          /* alp = 1.0 (Q12) */
    161             nb_pulse = 8;
    162             nbpos[0] = 4;
    163             nbpos[1] = 8;
    164             nbpos[2] = 8;
    165             break;
    166         case 44:                               /* 44 bits, 10 pulses, 4 tracks */
    167             nbiter = 4;                          /* 4x26x16=1664 loop */
    168             alp = 4096;                          /* alp = 1.0 (Q12) */
    169             nb_pulse = 10;
    170             nbpos[0] = 4;
    171             nbpos[1] = 6;
    172             nbpos[2] = 8;
    173             nbpos[3] = 8;
    174             break;
    175         case 52:                               /* 52 bits, 12 pulses, 4 tracks */
    176             nbiter = 4;                          /* 4x26x16=1664 loop */
    177             alp = 4096;                          /* alp = 1.0 (Q12) */
    178             nb_pulse = 12;
    179             nbpos[0] = 4;
    180             nbpos[1] = 6;
    181             nbpos[2] = 8;
    182             nbpos[3] = 8;
    183             break;
    184         case 64:                               /* 64 bits, 16 pulses, 4 tracks */
    185             nbiter = 3;                          /* 3x36x16=1728 loop */
    186             alp = 3277;                          /* alp = 0.8 (Q12) */
    187             nb_pulse = 16;
    188             nbpos[0] = 4;
    189             nbpos[1] = 4;
    190             nbpos[2] = 6;
    191             nbpos[3] = 6;
    192             nbpos[4] = 8;
    193             nbpos[5] = 8;
    194             break;
    195         case 72:                               /* 72 bits, 18 pulses, 4 tracks */
    196             nbiter = 3;                          /* 3x35x16=1680 loop */
    197             alp = 3072;                          /* alp = 0.75 (Q12) */
    198             nb_pulse = 18;
    199             nbpos[0] = 2;
    200             nbpos[1] = 3;
    201             nbpos[2] = 4;
    202             nbpos[3] = 5;
    203             nbpos[4] = 6;
    204             nbpos[5] = 7;
    205             nbpos[6] = 8;
    206             break;
    207         case 88:                               /* 88 bits, 24 pulses, 4 tracks */
    208             if(ser_size > 462)
    209                 nbiter = 1;
    210             else
    211                 nbiter = 2;                    /* 2x53x16=1696 loop */
    212 
    213             alp = 2048;                          /* alp = 0.5 (Q12) */
    214             nb_pulse = 24;
    215             nbpos[0] = 2;
    216             nbpos[1] = 2;
    217             nbpos[2] = 3;
    218             nbpos[3] = 4;
    219             nbpos[4] = 5;
    220             nbpos[5] = 6;
    221             nbpos[6] = 7;
    222             nbpos[7] = 8;
    223             nbpos[8] = 8;
    224             nbpos[9] = 8;
    225             break;
    226         default:
    227             nbiter = 0;
    228             alp = 0;
    229             nb_pulse = 0;
    230     }
    231 
    232     for (i = 0; i < nb_pulse; i++)
    233     {
    234         codvec[i] = i;
    235     }
    236 
    237     /*----------------------------------------------------------------*
    238      * Find sign for each pulse position.                             *
    239      *----------------------------------------------------------------*/
    240     /* calculate energy for normalization of cn[] and dn[] */
    241     /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
    242 #ifdef ASM_OPT                  /* asm optimization branch */
    243     s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
    244 #else
    245     s = Dot_product12(cn, cn, L_SUBFR, &exp);
    246 #endif
    247 
    248     Isqrt_n(&s, &exp);
    249     s = L_shl(s, (exp + 5));
    250     k_cn = extract_h(L_add(s, 0x8000));
    251 
    252     /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
    253 #ifdef ASM_OPT                      /* asm optimization branch */
    254     s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
    255 #else
    256     s = Dot_product12(dn, dn, L_SUBFR, &exp);
    257 #endif
    258 
    259     Isqrt_n(&s, &exp);
    260     k_dn = voround(L_shl(s, (exp + 5 + 3)));    /* k_dn = 256..4096 */
    261     k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */
    262 
    263     /* mix normalized cn[] and dn[] */
    264     p0 = cn;
    265     p1 = dn;
    266     p2 = dn2;
    267 
    268     for (i = 0; i < L_SUBFR/4; i++)
    269     {
    270         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    271         *p2++ = s >> 7;
    272         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    273         *p2++ = s >> 7;
    274         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    275         *p2++ = s >> 7;
    276         s = L_add((k_cn* (*p0++)), (k_dn * (*p1++)));
    277         *p2++ = s >> 7;
    278     }
    279 
    280     /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
    281     for(i = 0; i < L_SUBFR; i++)
    282     {
    283         val = dn[i];
    284         ps = dn2[i];
    285         if (ps >= 0)
    286         {
    287             sign[i] = 32767;             /* sign = +1 (Q12) */
    288             vec[i] = -32768;
    289         } else
    290         {
    291             sign[i] = -32768;            /* sign = -1 (Q12) */
    292             vec[i] = 32767;
    293             dn[i] = -val;
    294             dn2[i] = -ps;
    295         }
    296     }
    297     /*----------------------------------------------------------------*
    298      * Select NB_MAX position per track according to max of dn2[].    *
    299      *----------------------------------------------------------------*/
    300     pos = 0;
    301     for (i = 0; i < NB_TRACK; i++)
    302     {
    303         for (k = 0; k < NB_MAX; k++)
    304         {
    305             ps = -1;
    306             for (j = i; j < L_SUBFR; j += STEP)
    307             {
    308                 if(dn2[j] > ps)
    309                 {
    310                     ps = dn2[j];
    311                     pos = j;
    312                 }
    313             }
    314             dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
    315             if (k == 0)
    316             {
    317                 pos_max[i] = pos;
    318             }
    319         }
    320     }
    321 
    322     /*--------------------------------------------------------------*
    323      * Scale h[] to avoid overflow and to get maximum of precision  *
    324      * on correlation.                                              *
    325      *                                                              *
    326      * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
    327      *  ==> This allow addition of 16 pulses without saturation.    *
    328      *                                                              *
    329      * Energy worst case (on resonant impulse response),            *
    330      * - energy of h[] is approximately MAX/16.                     *
    331      * - During search, the energy is divided by 8 to avoid         *
    332      *   overflow on "alp". (energy of h[] = MAX/128).              *
    333      *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
    334      *--------------------------------------------------------------*/
    335 
    336     /* impulse response buffer for fast computation */
    337 
    338     h = h_buf;
    339     h_inv = h_buf + (2 * L_SUBFR);
    340     L_tmp = 0;
    341     for (i = 0; i < L_SUBFR; i++)
    342     {
    343         *h++ = 0;
    344         *h_inv++ = 0;
    345         L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1);
    346     }
    347     /* scale h[] down (/2) when energy of h[] is high with many pulses used */
    348     val = extract_h(L_tmp);
    349     h_shift = 0;
    350 
    351     if ((nb_pulse >= 12) && (val > 1024))
    352     {
    353         h_shift = 1;
    354     }
    355     p0 = H;
    356     p1 = h;
    357     p2 = h_inv;
    358 
    359     for (i = 0; i < L_SUBFR/4; i++)
    360     {
    361         *p1 = *p0++ >> h_shift;
    362         *p2++ = -(*p1++);
    363         *p1 = *p0++ >> h_shift;
    364         *p2++ = -(*p1++);
    365         *p1 = *p0++ >> h_shift;
    366         *p2++ = -(*p1++);
    367         *p1 = *p0++ >> h_shift;
    368         *p2++ = -(*p1++);
    369     }
    370 
    371     /*------------------------------------------------------------*
    372      * Compute rrixix[][] needed for the codebook search.         *
    373      * This algorithm compute impulse response energy of all      *
    374      * positions (16) in each track (4).       Total = 4x16 = 64. *
    375      *------------------------------------------------------------*/
    376 
    377     /* storage order --> i3i3, i2i2, i1i1, i0i0 */
    378 
    379     /* Init pointers to last position of rrixix[] */
    380     p0 = &rrixix[0][NB_POS - 1];
    381     p1 = &rrixix[1][NB_POS - 1];
    382     p2 = &rrixix[2][NB_POS - 1];
    383     p3 = &rrixix[3][NB_POS - 1];
    384 
    385     ptr_h1 = h;
    386     cor = 0x00008000L;                             /* for rounding */
    387     for (i = 0; i < NB_POS; i++)
    388     {
    389         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    390         ptr_h1++;
    391         *p3-- = extract_h(cor);
    392         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    393         ptr_h1++;
    394         *p2-- = extract_h(cor);
    395         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    396         ptr_h1++;
    397         *p1-- = extract_h(cor);
    398         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1)));
    399         ptr_h1++;
    400         *p0-- = extract_h(cor);
    401     }
    402 
    403     /*------------------------------------------------------------*
    404      * Compute rrixiy[][] needed for the codebook search.         *
    405      * This algorithm compute correlation between 2 pulses        *
    406      * (2 impulses responses) in 4 possible adjacents tracks.     *
    407      * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
    408      *------------------------------------------------------------*/
    409 
    410     /* storage order --> i2i3, i1i2, i0i1, i3i0 */
    411 
    412     pos = MSIZE - 1;
    413     ptr_hf = h + 1;
    414 
    415     for (k = 0; k < NB_POS; k++)
    416     {
    417         p3 = &rrixiy[2][pos];
    418         p2 = &rrixiy[1][pos];
    419         p1 = &rrixiy[0][pos];
    420         p0 = &rrixiy[3][pos - NB_POS];
    421 
    422         cor = 0x00008000L;                   /* for rounding */
    423         ptr_h1 = h;
    424         ptr_h2 = ptr_hf;
    425 
    426         for (i = k + 1; i < NB_POS; i++)
    427         {
    428             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    429             ptr_h1++;
    430             ptr_h2++;
    431             *p3 = extract_h(cor);
    432             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    433             ptr_h1++;
    434             ptr_h2++;
    435             *p2 = extract_h(cor);
    436             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    437             ptr_h1++;
    438             ptr_h2++;
    439             *p1 = extract_h(cor);
    440             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    441             ptr_h1++;
    442             ptr_h2++;
    443             *p0 = extract_h(cor);
    444 
    445             p3 -= (NB_POS + 1);
    446             p2 -= (NB_POS + 1);
    447             p1 -= (NB_POS + 1);
    448             p0 -= (NB_POS + 1);
    449         }
    450         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    451         ptr_h1++;
    452         ptr_h2++;
    453         *p3 = extract_h(cor);
    454         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    455         ptr_h1++;
    456         ptr_h2++;
    457         *p2 = extract_h(cor);
    458         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    459         ptr_h1++;
    460         ptr_h2++;
    461         *p1 = extract_h(cor);
    462 
    463         pos -= NB_POS;
    464         ptr_hf += STEP;
    465     }
    466 
    467     /* storage order --> i3i0, i2i3, i1i2, i0i1 */
    468 
    469     pos = MSIZE - 1;
    470     ptr_hf = h + 3;
    471 
    472     for (k = 0; k < NB_POS; k++)
    473     {
    474         p3 = &rrixiy[3][pos];
    475         p2 = &rrixiy[2][pos - 1];
    476         p1 = &rrixiy[1][pos - 1];
    477         p0 = &rrixiy[0][pos - 1];
    478 
    479         cor = 0x00008000L;                              /* for rounding */
    480         ptr_h1 = h;
    481         ptr_h2 = ptr_hf;
    482 
    483         for (i = k + 1; i < NB_POS; i++)
    484         {
    485             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    486             ptr_h1++;
    487             ptr_h2++;
    488             *p3 = extract_h(cor);
    489             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    490             ptr_h1++;
    491             ptr_h2++;
    492             *p2 = extract_h(cor);
    493             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    494             ptr_h1++;
    495             ptr_h2++;
    496             *p1 = extract_h(cor);
    497             cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    498             ptr_h1++;
    499             ptr_h2++;
    500             *p0 = extract_h(cor);
    501 
    502             p3 -= (NB_POS + 1);
    503             p2 -= (NB_POS + 1);
    504             p1 -= (NB_POS + 1);
    505             p0 -= (NB_POS + 1);
    506         }
    507         cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2)));
    508         ptr_h1++;
    509         ptr_h2++;
    510         *p3 = extract_h(cor);
    511 
    512         pos--;
    513         ptr_hf += STEP;
    514     }
    515 
    516     /*------------------------------------------------------------*
    517      * Modification of rrixiy[][] to take signs into account.     *
    518      *------------------------------------------------------------*/
    519 
    520     p0 = &rrixiy[0][0];
    521 
    522     for (k = 0; k < NB_TRACK; k++)
    523     {
    524         j_temp = (k + 1)&0x03;
    525         for (i = k; i < L_SUBFR; i += STEP)
    526         {
    527             psign = sign;
    528             if (psign[i] < 0)
    529             {
    530                 psign = vec;
    531             }
    532             j = j_temp;
    533             for (; j < L_SUBFR; j += STEP)
    534             {
    535                 *p0 = vo_mult(*p0, psign[j]);
    536                 p0++;
    537             }
    538         }
    539     }
    540 
    541     /*-------------------------------------------------------------------*
    542      *                       Deep first search                           *
    543      *-------------------------------------------------------------------*/
    544 
    545     psk = -1;
    546     alpk = 1;
    547 
    548     for (k = 0; k < nbiter; k++)
    549     {
    550         j_temp = k<<2;
    551         for (i = 0; i < nb_pulse; i++)
    552             ipos[i] = tipos[j_temp + i];
    553 
    554         if(nbbits == 20)
    555         {
    556             pos = 0;
    557             ps = 0;
    558             alp = 0;
    559             for (i = 0; i < L_SUBFR; i++)
    560             {
    561                 vec[i] = 0;
    562             }
    563         } else if ((nbbits == 36) || (nbbits == 44))
    564         {
    565             /* first stage: fix 2 pulses */
    566             pos = 2;
    567 
    568             ix = ind[0] = pos_max[ipos[0]];
    569             iy = ind[1] = pos_max[ipos[1]];
    570             ps = dn[ix] + dn[iy];
    571             i = ix >> 2;                /* ix / STEP */
    572             j = iy >> 2;                /* iy / STEP */
    573             s = rrixix[ipos[0]][i] << 13;
    574             s += rrixix[ipos[1]][j] << 13;
    575             i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
    576             s += rrixiy[ipos[0]][i] << 14;
    577             alp = (s + 0x8000) >> 16;
    578             if (sign[ix] < 0)
    579                 p0 = h_inv - ix;
    580             else
    581                 p0 = h - ix;
    582             if (sign[iy] < 0)
    583                 p1 = h_inv - iy;
    584             else
    585                 p1 = h - iy;
    586 
    587             for (i = 0; i < L_SUBFR; i++)
    588             {
    589                 vec[i] = (*p0++) + (*p1++);
    590             }
    591 
    592             if(nbbits == 44)
    593             {
    594                 ipos[8] = 0;
    595                 ipos[9] = 1;
    596             }
    597         } else
    598         {
    599             /* first stage: fix 4 pulses */
    600             pos = 4;
    601 
    602             ix = ind[0] = pos_max[ipos[0]];
    603             iy = ind[1] = pos_max[ipos[1]];
    604             i = ind[2] = pos_max[ipos[2]];
    605             j = ind[3] = pos_max[ipos[3]];
    606             ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
    607 
    608             if (sign[ix] < 0)
    609                 p0 = h_inv - ix;
    610             else
    611                 p0 = h - ix;
    612 
    613             if (sign[iy] < 0)
    614                 p1 = h_inv - iy;
    615             else
    616                 p1 = h - iy;
    617 
    618             if (sign[i] < 0)
    619                 p2 = h_inv - i;
    620             else
    621                 p2 = h - i;
    622 
    623             if (sign[j] < 0)
    624                 p3 = h_inv - j;
    625             else
    626                 p3 = h - j;
    627 
    628             L_tmp = 0L;
    629             for(i = 0; i < L_SUBFR; i++)
    630             {
    631                 Word32 vecSq2;
    632                 vec[i]  = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
    633                 vecSq2 = (vec[i] * vec[i]) << 1;
    634                 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) {
    635                     L_tmp = INT_MAX;
    636                 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) {
    637                     L_tmp = INT_MIN;
    638                 } else {
    639                     L_tmp  += vecSq2;
    640                 }
    641             }
    642 
    643             alp = ((L_tmp >> 3) + 0x8000) >> 16;
    644 
    645             if(nbbits == 72)
    646             {
    647                 ipos[16] = 0;
    648                 ipos[17] = 1;
    649             }
    650         }
    651 
    652         /* other stages of 2 pulses */
    653 
    654         for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
    655         {
    656             /*--------------------------------------------------*
    657              * Calculate correlation of all possible positions  *
    658              * of the next 2 pulses with previous fixed pulses. *
    659              * Each pulse can have 16 possible positions.       *
    660              *--------------------------------------------------*/
    661             if(ipos[j] == 3)
    662             {
    663                 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    664             }
    665             else
    666             {
    667 #ifdef ASM_OPT                 /* asm optimization branch */
    668                 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    669 #else
    670                 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    671 #endif
    672             }
    673             /*--------------------------------------------------*
    674              * Find best positions of 2 pulses.                 *
    675              *--------------------------------------------------*/
    676             search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
    677                     &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
    678 
    679             ind[j] = ix;
    680             ind[j + 1] = iy;
    681 
    682             if (sign[ix] < 0)
    683                 p0 = h_inv - ix;
    684             else
    685                 p0 = h - ix;
    686             if (sign[iy] < 0)
    687                 p1 = h_inv - iy;
    688             else
    689                 p1 = h - iy;
    690 
    691             for (i = 0; i < L_SUBFR; i+=4)
    692             {
    693                 vec[i]   += add1((*p0++), (*p1++));
    694                 vec[i+1] += add1((*p0++), (*p1++));
    695                 vec[i+2] += add1((*p0++), (*p1++));
    696                 vec[i+3] += add1((*p0++), (*p1++));
    697             }
    698         }
    699         /* memorise the best codevector */
    700         ps = vo_mult(ps, ps);
    701         s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp));
    702         if (s > 0)
    703         {
    704             psk = ps;
    705             alpk = alp;
    706             for (i = 0; i < nb_pulse; i++)
    707             {
    708                 codvec[i] = ind[i];
    709             }
    710             for (i = 0; i < L_SUBFR; i++)
    711             {
    712                 y[i] = vec[i];
    713             }
    714         }
    715     }
    716     /*-------------------------------------------------------------------*
    717      * Build the codeword, the filtered codeword and index of codevector.*
    718      *-------------------------------------------------------------------*/
    719     for (i = 0; i < NPMAXPT * NB_TRACK; i++)
    720     {
    721         ind[i] = -1;
    722     }
    723     for (i = 0; i < L_SUBFR; i++)
    724     {
    725         code[i] = 0;
    726         y[i] = vo_shr_r(y[i], 3);               /* Q12 to Q9 */
    727     }
    728     val = (512 >> h_shift);               /* codeword in Q9 format */
    729     for (k = 0; k < nb_pulse; k++)
    730     {
    731         i = codvec[k];                       /* read pulse position */
    732         j = sign[i];                         /* read sign           */
    733         index = i >> 2;                 /* index = pos of pulse (0..15) */
    734         track = (Word16) (i & 0x03);         /* track = i % NB_TRACK (0..3)  */
    735 
    736         if (j > 0)
    737         {
    738             code[i] += val;
    739             codvec[k] += 128;
    740         } else
    741         {
    742             code[i] -= val;
    743             index += NB_POS;
    744         }
    745 
    746         i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
    747 
    748         while (ind[i] >= 0)
    749         {
    750             i += 1;
    751         }
    752         ind[i] = index;
    753     }
    754 
    755     k = 0;
    756     /* Build index of codevector */
    757     if(nbbits == 20)
    758     {
    759         for (track = 0; track < NB_TRACK; track++)
    760         {
    761             _index[track] = (Word16)(quant_1p_N1(ind[k], 4));
    762             k += NPMAXPT;
    763         }
    764     } else if(nbbits == 36)
    765     {
    766         for (track = 0; track < NB_TRACK; track++)
    767         {
    768             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
    769             k += NPMAXPT;
    770         }
    771     } else if(nbbits == 44)
    772     {
    773         for (track = 0; track < NB_TRACK - 2; track++)
    774         {
    775             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
    776             k += NPMAXPT;
    777         }
    778         for (track = 2; track < NB_TRACK; track++)
    779         {
    780             _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
    781             k += NPMAXPT;
    782         }
    783     } else if(nbbits == 52)
    784     {
    785         for (track = 0; track < NB_TRACK; track++)
    786         {
    787             _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
    788             k += NPMAXPT;
    789         }
    790     } else if(nbbits == 64)
    791     {
    792         for (track = 0; track < NB_TRACK; track++)
    793         {
    794             L_index = quant_4p_4N(&ind[k], 4);
    795             _index[track] = (Word16)((L_index >> 14) & 3);
    796             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
    797             k += NPMAXPT;
    798         }
    799     } else if(nbbits == 72)
    800     {
    801         for (track = 0; track < NB_TRACK - 2; track++)
    802         {
    803             L_index = quant_5p_5N(&ind[k], 4);
    804             _index[track] = (Word16)((L_index >> 10) & 0x03FF);
    805             _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
    806             k += NPMAXPT;
    807         }
    808         for (track = 2; track < NB_TRACK; track++)
    809         {
    810             L_index = quant_4p_4N(&ind[k], 4);
    811             _index[track] = (Word16)((L_index >> 14) & 3);
    812             _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
    813             k += NPMAXPT;
    814         }
    815     } else if(nbbits == 88)
    816     {
    817         for (track = 0; track < NB_TRACK; track++)
    818         {
    819             L_index = quant_6p_6N_2(&ind[k], 4);
    820             _index[track] = (Word16)((L_index >> 11) & 0x07FF);
    821             _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
    822             k += NPMAXPT;
    823         }
    824     }
    825     return;
    826 }
    827 
    828 
    829 /*-------------------------------------------------------------------*
    830  * Function  cor_h_vec()                                             *
    831  * ~~~~~~~~~~~~~~~~~~~~~                                             *
    832  * Compute correlations of h[] with vec[] for the specified track.   *
    833  *-------------------------------------------------------------------*/
    834 void cor_h_vec_30(
    835         Word16 h[],                           /* (i) scaled impulse response                 */
    836         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
    837         Word16 track,                         /* (i) track to use                            */
    838         Word16 sign[],                        /* (i) sign vector                             */
    839         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
    840         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
    841         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    842         )
    843 {
    844     Word32 i, j, pos, corr;
    845     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
    846     Word32 L_sum1,L_sum2;
    847     cor_x = cor_1;
    848     cor_y = cor_2;
    849     p0 = rrixix[track];
    850     p3 = rrixix[0];
    851     pos = track;
    852 
    853     for (i = 0; i < NB_POS; i+=2)
    854     {
    855         L_sum1 = L_sum2 = 0L;
    856         p1 = h;
    857         p2 = &vec[pos];
    858         for (j=pos;j < L_SUBFR; j++)
    859         {
    860             L_sum1 = L_add(L_sum1, *p1 * *p2);
    861             p2-=3;
    862             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    863             p2+=4;
    864         }
    865         p2-=3;
    866         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    867         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    868         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    869 
    870         L_sum1 = L_shl(L_sum1, 2);
    871         L_sum2 = L_shl(L_sum2, 2);
    872 
    873         corr = voround(L_sum1);
    874         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
    875         corr = voround(L_sum2);
    876         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
    877         pos += STEP;
    878 
    879         L_sum1 = L_sum2 = 0L;
    880         p1 = h;
    881         p2 = &vec[pos];
    882         for (j=pos;j < L_SUBFR; j++)
    883         {
    884             L_sum1 = L_add(L_sum1, *p1 * *p2);
    885             p2-=3;
    886             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    887             p2+=4;
    888         }
    889         p2-=3;
    890         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    891         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    892         L_sum2 = L_add(L_sum2, *p1++ * *p2++);
    893 
    894         L_sum1 = L_shl(L_sum1, 2);
    895         L_sum2 = L_shl(L_sum2, 2);
    896 
    897         corr = voround(L_sum1);
    898         *cor_x++ = mult(corr, sign[pos]) + (*p0++);
    899         corr = voround(L_sum2);
    900         *cor_y++ = mult(corr, sign[pos-3]) + (*p3++);
    901         pos += STEP;
    902     }
    903     return;
    904 }
    905 
    906 void cor_h_vec_012(
    907         Word16 h[],                           /* (i) scaled impulse response                 */
    908         Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
    909         Word16 track,                         /* (i) track to use                            */
    910         Word16 sign[],                        /* (i) sign vector                             */
    911         Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
    912         Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
    913         Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    914         )
    915 {
    916     Word32 i, j, pos, corr;
    917     Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
    918     Word32 L_sum1,L_sum2;
    919     cor_x = cor_1;
    920     cor_y = cor_2;
    921     p0 = rrixix[track];
    922     p3 = rrixix[track+1];
    923     pos = track;
    924 
    925     for (i = 0; i < NB_POS; i+=2)
    926     {
    927         L_sum1 = L_sum2 = 0L;
    928         p1 = h;
    929         p2 = &vec[pos];
    930         for (j=62-pos ;j >= 0; j--)
    931         {
    932             L_sum1 = L_add(L_sum1, *p1 * *p2++);
    933             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    934         }
    935         L_sum1 = L_add(L_sum1, *p1 * *p2);
    936         L_sum1 = L_shl(L_sum1, 2);
    937         L_sum2 = L_shl(L_sum2, 2);
    938 
    939         corr = voround(L_sum1);
    940         cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
    941         corr = voround(L_sum2);
    942         cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
    943         pos += STEP;
    944 
    945         L_sum1 = L_sum2 = 0L;
    946         p1 = h;
    947         p2 = &vec[pos];
    948         for (j= 62-pos;j >= 0; j--)
    949         {
    950             L_sum1 = L_add(L_sum1, *p1 * *p2++);
    951             L_sum2 = L_add(L_sum2, *p1++ * *p2);
    952         }
    953         L_sum1 = L_add(L_sum1, *p1 * *p2);
    954         L_sum1 = L_shl(L_sum1, 2);
    955         L_sum2 = L_shl(L_sum2, 2);
    956 
    957         corr = voround(L_sum1);
    958         cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
    959         corr = voround(L_sum2);
    960         cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
    961         pos += STEP;
    962     }
    963     return;
    964 }
    965 
    966 /*-------------------------------------------------------------------*
    967  * Function  search_ixiy()                                           *
    968  * ~~~~~~~~~~~~~~~~~~~~~~~                                           *
    969  * Find the best positions of 2 pulses in a subframe.                *
    970  *-------------------------------------------------------------------*/
    971 
    972 void search_ixiy(
    973         Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
    974         Word16 track_x,                       /* (i) track of pulse 1                   */
    975         Word16 track_y,                       /* (i) track of pulse 2                   */
    976         Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
    977         Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
    978         Word16 * ix,                          /* (o) position of pulse 1                */
    979         Word16 * iy,                          /* (o) position of pulse 2                */
    980         Word16 dn[],                          /* (i) corr. between target and h[]       */
    981         Word16 dn2[],                         /* (i) vector of selected positions       */
    982         Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
    983         Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
    984         Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
    985         )
    986 {
    987     Word32 x, y, pos, thres_ix;
    988     Word16 ps1, ps2, sq, sqk;
    989     Word16 alp_16, alpk;
    990     Word16 *p0, *p1, *p2;
    991     Word32 s, alp0, alp1, alp2;
    992 
    993     p0 = cor_x;
    994     p1 = cor_y;
    995     p2 = rrixiy[track_x];
    996 
    997     thres_ix = nb_pos_ix - NB_MAX;
    998 
    999     alp0 = L_deposit_h(*alp);
   1000     alp0 = (alp0 + 0x00008000L);       /* for rounding */
   1001 
   1002     sqk = -1;
   1003     alpk = 1;
   1004 
   1005     for (x = track_x; x < L_SUBFR; x += STEP)
   1006     {
   1007         ps1 = *ps + dn[x];
   1008         alp1 = L_add(alp0, ((*p0++)<<13));
   1009 
   1010         if (dn2[x] < thres_ix)
   1011         {
   1012             pos = -1;
   1013             for (y = track_y; y < L_SUBFR; y += STEP)
   1014             {
   1015                 ps2 = add1(ps1, dn[y]);
   1016 
   1017                 alp2 = L_add(alp1, ((*p1++)<<13));
   1018                 alp2 = L_add(alp2, ((*p2++)<<14));
   1019                 alp_16 = extract_h(alp2);
   1020                 sq = vo_mult(ps2, ps2);
   1021                 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16));
   1022 
   1023                 if (s > 0)
   1024                 {
   1025                     sqk = sq;
   1026                     alpk = alp_16;
   1027                     pos = y;
   1028                 }
   1029             }
   1030             p1 -= NB_POS;
   1031 
   1032             if (pos >= 0)
   1033             {
   1034                 *ix = x;
   1035                 *iy = pos;
   1036             }
   1037         } else
   1038         {
   1039             p2 += NB_POS;
   1040         }
   1041     }
   1042 
   1043     *ps = add1(*ps, add1(dn[*ix], dn[*iy]));
   1044     *alp = alpk;
   1045 
   1046     return;
   1047 }
   1048 
   1049 
   1050 
   1051 
   1052