Home | History | Annotate | Download | only in src
      1 /*
      2  ** Copyright 2003-2010, VisualOn, Inc.
      3  **
      4  ** Licensed under the Apache License, Version 2.0 (the "License");
      5  ** you may not use this file except in compliance with the License.
      6  ** You may obtain a copy of the License at
      7  **
      8  **     http://www.apache.org/licenses/LICENSE-2.0
      9  **
     10  ** Unless required by applicable law or agreed to in writing, software
     11  ** distributed under the License is distributed on an "AS IS" BASIS,
     12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  ** See the License for the specific language governing permissions and
     14  ** limitations under the License.
     15  */
     16 
     17 /***********************************************************************
     18 *      File: c4t64fx.c                                                 *
     19 *                                                                      *
     20 *	   Description:Performs algebraic codebook search for higher modes *
     21 *                                                                      *
     22 ************************************************************************/
     23 
     24 /************************************************************************
     25 * Function: ACELP_4t64_fx()                                             *
     26 *                                                                       *
     27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook.                   *
     28 * 4 tracks x 16 positions per track = 64 samples.                       *
     29 *                                                                       *
     30 * 20 bits --> 4 pulses in a frame of 64 samples.                        *
     31 * 36 bits --> 8 pulses in a frame of 64 samples.                        *
     32 * 44 bits --> 10 pulses in a frame of 64 samples.                       *
     33 * 52 bits --> 12 pulses in a frame of 64 samples.                       *
     34 * 64 bits --> 16 pulses in a frame of 64 samples.                       *
     35 * 72 bits --> 18 pulses in a frame of 64 samples.                       *
     36 * 88 bits --> 24 pulses in a frame of 64 samples.                       *
     37 *                                                                       *
     38 * All pulses can have two (2) possible amplitudes: +1 or -1.            *
     39 * Each pulse can have sixteen (16) possible positions.                  *
     40 *************************************************************************/
     41 
     42 #include "typedef.h"
     43 #include "basic_op.h"
     44 #include "math_op.h"
     45 #include "acelp.h"
     46 #include "cnst.h"
     47 
     48 #include "q_pulse.h"
     49 
     50 static Word16 tipos[36] = {
     51 	0, 1, 2, 3,                            /* starting point &ipos[0], 1st iter */
     52 	1, 2, 3, 0,                            /* starting point &ipos[4], 2nd iter */
     53 	2, 3, 0, 1,                            /* starting point &ipos[8], 3rd iter */
     54 	3, 0, 1, 2,                            /* starting point &ipos[12], 4th iter */
     55 	0, 1, 2, 3,
     56 	1, 2, 3, 0,
     57 	2, 3, 0, 1,
     58 	3, 0, 1, 2,
     59 	0, 1, 2, 3};                           /* end point for 24 pulses &ipos[35], 4th iter */
     60 
     61 #define NB_PULSE_MAX  24
     62 
     63 #define L_SUBFR   64
     64 #define NB_TRACK  4
     65 #define STEP      4
     66 #define NB_POS    16
     67 #define MSIZE     256
     68 #define NB_MAX    8
     69 #define NPMAXPT   ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK)
     70 
     71 /* Private functions */
     72 void cor_h_vec_012(
     73 		Word16 h[],                           /* (i) scaled impulse response                 */
     74 		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     75 		Word16 track,                         /* (i) track to use                            */
     76 		Word16 sign[],                        /* (i) sign vector                             */
     77 		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     78 		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     79 		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
     80 		);
     81 
     82 void cor_h_vec_012_asm(
     83 		Word16 h[],                           /* (i) scaled impulse response                 */
     84 		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     85 		Word16 track,                         /* (i) track to use                            */
     86 		Word16 sign[],                        /* (i) sign vector                             */
     87 		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     88 		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     89 		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
     90 		);
     91 
     92 void cor_h_vec_30(
     93 		Word16 h[],                           /* (i) scaled impulse response                 */
     94 		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
     95 		Word16 track,                         /* (i) track to use                            */
     96 		Word16 sign[],                        /* (i) sign vector                             */
     97 		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
     98 		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
     99 		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    100 		);
    101 
    102 void search_ixiy(
    103 		Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
    104 		Word16 track_x,                       /* (i) track of pulse 1                   */
    105 		Word16 track_y,                       /* (i) track of pulse 2                   */
    106 		Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
    107 		Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
    108 		Word16 * ix,                          /* (o) position of pulse 1                */
    109 		Word16 * iy,                          /* (o) position of pulse 2                */
    110 		Word16 dn[],                          /* (i) corr. between target and h[]       */
    111 		Word16 dn2[],                         /* (i) vector of selected positions       */
    112 		Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
    113 		Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
    114 		Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
    115 		);
    116 
    117 
    118 void ACELP_4t64_fx(
    119 		Word16 dn[],                          /* (i) <12b : correlation between target x[] and H[]      */
    120 		Word16 cn[],                          /* (i) <12b : residual after long term prediction         */
    121 		Word16 H[],                           /* (i) Q12: impulse response of weighted synthesis filter */
    122 		Word16 code[],                        /* (o) Q9 : algebraic (fixed) codebook excitation         */
    123 		Word16 y[],                           /* (o) Q9 : filtered fixed codebook excitation            */
    124 		Word16 nbbits,                        /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits                */
    125 		Word16 ser_size,                      /* (i) : bit rate                                         */
    126 		Word16 _index[]                       /* (o) : index (20): 5+5+5+5 = 20 bits.                   */
    127 		/* (o) : index (36): 9+9+9+9 = 36 bits.                   */
    128 		/* (o) : index (44): 13+9+13+9 = 44 bits.                 */
    129 		/* (o) : index (52): 13+13+13+13 = 52 bits.               */
    130 		/* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits.       */
    131 		/* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits.     */
    132 		/* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits.   */
    133 		)
    134 {
    135 	Word32 i, j, k;
    136 	Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp;
    137 	Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp;
    138 	Word16 *p0, *p1, *p2, *p3, *psign;
    139 	Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift;
    140 	Word32 s, cor, L_tmp, L_index;
    141 	Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR];
    142 	Word16 ind[NPMAXPT * NB_TRACK];
    143 	Word16 codvec[NB_PULSE_MAX], nbpos[10];
    144 	Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK];
    145 	Word16 h_buf[4 * L_SUBFR];
    146 	Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE];
    147 	Word16 ipos[NB_PULSE_MAX];
    148 
    149 	switch (nbbits)
    150 	{
    151 		case 20:                               /* 20 bits, 4 pulses, 4 tracks */
    152 			nbiter = 4;                          /* 4x16x16=1024 loop */
    153 			alp = 8192;                          /* alp = 2.0 (Q12) */
    154 			nb_pulse = 4;
    155 			nbpos[0] = 4;
    156 			nbpos[1] = 8;
    157 			break;
    158 		case 36:                               /* 36 bits, 8 pulses, 4 tracks */
    159 			nbiter = 4;                          /* 4x20x16=1280 loop */
    160 			alp = 4096;                          /* alp = 1.0 (Q12) */
    161 			nb_pulse = 8;
    162 			nbpos[0] = 4;
    163 			nbpos[1] = 8;
    164 			nbpos[2] = 8;
    165 			break;
    166 		case 44:                               /* 44 bits, 10 pulses, 4 tracks */
    167 			nbiter = 4;                          /* 4x26x16=1664 loop */
    168 			alp = 4096;                          /* alp = 1.0 (Q12) */
    169 			nb_pulse = 10;
    170 			nbpos[0] = 4;
    171 			nbpos[1] = 6;
    172 			nbpos[2] = 8;
    173 			nbpos[3] = 8;
    174 			break;
    175 		case 52:                               /* 52 bits, 12 pulses, 4 tracks */
    176 			nbiter = 4;                          /* 4x26x16=1664 loop */
    177 			alp = 4096;                          /* alp = 1.0 (Q12) */
    178 			nb_pulse = 12;
    179 			nbpos[0] = 4;
    180 			nbpos[1] = 6;
    181 			nbpos[2] = 8;
    182 			nbpos[3] = 8;
    183 			break;
    184 		case 64:                               /* 64 bits, 16 pulses, 4 tracks */
    185 			nbiter = 3;                          /* 3x36x16=1728 loop */
    186 			alp = 3277;                          /* alp = 0.8 (Q12) */
    187 			nb_pulse = 16;
    188 			nbpos[0] = 4;
    189 			nbpos[1] = 4;
    190 			nbpos[2] = 6;
    191 			nbpos[3] = 6;
    192 			nbpos[4] = 8;
    193 			nbpos[5] = 8;
    194 			break;
    195 		case 72:                               /* 72 bits, 18 pulses, 4 tracks */
    196 			nbiter = 3;                          /* 3x35x16=1680 loop */
    197 			alp = 3072;                          /* alp = 0.75 (Q12) */
    198 			nb_pulse = 18;
    199 			nbpos[0] = 2;
    200 			nbpos[1] = 3;
    201 			nbpos[2] = 4;
    202 			nbpos[3] = 5;
    203 			nbpos[4] = 6;
    204 			nbpos[5] = 7;
    205 			nbpos[6] = 8;
    206 			break;
    207 		case 88:                               /* 88 bits, 24 pulses, 4 tracks */
    208 			if(ser_size > 462)
    209 				nbiter = 1;
    210 			else
    211 				nbiter = 2;                    /* 2x53x16=1696 loop */
    212 
    213 			alp = 2048;                          /* alp = 0.5 (Q12) */
    214 			nb_pulse = 24;
    215 			nbpos[0] = 2;
    216 			nbpos[1] = 2;
    217 			nbpos[2] = 3;
    218 			nbpos[3] = 4;
    219 			nbpos[4] = 5;
    220 			nbpos[5] = 6;
    221 			nbpos[6] = 7;
    222 			nbpos[7] = 8;
    223 			nbpos[8] = 8;
    224 			nbpos[9] = 8;
    225 			break;
    226 		default:
    227 			nbiter = 0;
    228 			alp = 0;
    229 			nb_pulse = 0;
    230 	}
    231 
    232 	for (i = 0; i < nb_pulse; i++)
    233 	{
    234 		codvec[i] = i;
    235 	}
    236 
    237 	/*----------------------------------------------------------------*
    238 	 * Find sign for each pulse position.                             *
    239 	 *----------------------------------------------------------------*/
    240 	/* calculate energy for normalization of cn[] and dn[] */
    241 	/* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */
    242 #ifdef ASM_OPT                  /* asm optimization branch */
    243 	s = Dot_product12_asm(cn, cn, L_SUBFR, &exp);
    244 #else
    245 	s = Dot_product12(cn, cn, L_SUBFR, &exp);
    246 #endif
    247 
    248 	Isqrt_n(&s, &exp);
    249 	s = L_shl(s, (exp + 5));
    250 	k_cn = extract_h(L_add(s, 0x8000));
    251 
    252 	/* set k_dn = 32..512 (ener_dn = 2^30..2^22) */
    253 #ifdef ASM_OPT                      /* asm optimization branch */
    254 	s = Dot_product12_asm(dn, dn, L_SUBFR, &exp);
    255 #else
    256 	s = Dot_product12(dn, dn, L_SUBFR, &exp);
    257 #endif
    258 
    259 	Isqrt_n(&s, &exp);
    260 	k_dn = (L_shl(s, (exp + 5 + 3)) + 0x8000) >> 16;    /* k_dn = 256..4096 */
    261 	k_dn = vo_mult_r(alp, k_dn);              /* alp in Q12 */
    262 
    263 	/* mix normalized cn[] and dn[] */
    264 	p0 = cn;
    265 	p1 = dn;
    266 	p2 = dn2;
    267 
    268 	for (i = 0; i < L_SUBFR/4; i++)
    269 	{
    270 		s = (k_cn* (*p0++))+(k_dn * (*p1++));
    271 		*p2++ = s >> 7;
    272 		s = (k_cn* (*p0++))+(k_dn * (*p1++));
    273 		*p2++ = s >> 7;
    274 		s = (k_cn* (*p0++))+(k_dn * (*p1++));
    275 		*p2++ = s >> 7;
    276 		s = (k_cn* (*p0++))+(k_dn * (*p1++));
    277 		*p2++ = s >> 7;
    278 	}
    279 
    280 	/* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[]    */
    281 	for(i = 0; i < L_SUBFR; i++)
    282 	{
    283 		val = dn[i];
    284 		ps = dn2[i];
    285 		if (ps >= 0)
    286 		{
    287 			sign[i] = 32767;             /* sign = +1 (Q12) */
    288 			vec[i] = -32768;
    289 		} else
    290 		{
    291 			sign[i] = -32768;            /* sign = -1 (Q12) */
    292 			vec[i] = 32767;
    293 			dn[i] = -val;
    294 			dn2[i] = -ps;
    295 		}
    296 	}
    297 	/*----------------------------------------------------------------*
    298 	 * Select NB_MAX position per track according to max of dn2[].    *
    299 	 *----------------------------------------------------------------*/
    300 	pos = 0;
    301 	for (i = 0; i < NB_TRACK; i++)
    302 	{
    303 		for (k = 0; k < NB_MAX; k++)
    304 		{
    305 			ps = -1;
    306 			for (j = i; j < L_SUBFR; j += STEP)
    307 			{
    308 				if(dn2[j] > ps)
    309 				{
    310 					ps = dn2[j];
    311 					pos = j;
    312 				}
    313 			}
    314 			dn2[pos] = (k - NB_MAX);     /* dn2 < 0 when position is selected */
    315 			if (k == 0)
    316 			{
    317 				pos_max[i] = pos;
    318 			}
    319 		}
    320 	}
    321 
    322 	/*--------------------------------------------------------------*
    323 	 * Scale h[] to avoid overflow and to get maximum of precision  *
    324 	 * on correlation.                                              *
    325 	 *                                                              *
    326 	 * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16).         *
    327 	 *  ==> This allow addition of 16 pulses without saturation.    *
    328 	 *                                                              *
    329 	 * Energy worst case (on resonant impulse response),            *
    330 	 * - energy of h[] is approximately MAX/16.                     *
    331 	 * - During search, the energy is divided by 8 to avoid         *
    332 	 *   overflow on "alp". (energy of h[] = MAX/128).              *
    333 	 *  ==> "alp" worst case detected is 22854 on sinusoidal wave.  *
    334 	 *--------------------------------------------------------------*/
    335 
    336 	/* impulse response buffer for fast computation */
    337 
    338 	h = h_buf;
    339 	h_inv = h_buf + (2 * L_SUBFR);
    340 	L_tmp = 0;
    341 	for (i = 0; i < L_SUBFR; i++)
    342 	{
    343 		*h++ = 0;
    344 		*h_inv++ = 0;
    345 		L_tmp += (H[i] * H[i]) << 1;
    346 	}
    347 	/* scale h[] down (/2) when energy of h[] is high with many pulses used */
    348 	val = extract_h(L_tmp);
    349 	h_shift = 0;
    350 
    351 	if ((nb_pulse >= 12) && (val > 1024))
    352 	{
    353 		h_shift = 1;
    354 	}
    355 	p0 = H;
    356 	p1 = h;
    357 	p2 = h_inv;
    358 
    359 	for (i = 0; i < L_SUBFR/4; i++)
    360 	{
    361 		*p1 = *p0++ >> h_shift;
    362 		*p2++ = -(*p1++);
    363 		*p1 = *p0++ >> h_shift;
    364 		*p2++ = -(*p1++);
    365 		*p1 = *p0++ >> h_shift;
    366 		*p2++ = -(*p1++);
    367 		*p1 = *p0++ >> h_shift;
    368 		*p2++ = -(*p1++);
    369 	}
    370 
    371 	/*------------------------------------------------------------*
    372 	 * Compute rrixix[][] needed for the codebook search.         *
    373 	 * This algorithm compute impulse response energy of all      *
    374 	 * positions (16) in each track (4).       Total = 4x16 = 64. *
    375 	 *------------------------------------------------------------*/
    376 
    377 	/* storage order --> i3i3, i2i2, i1i1, i0i0 */
    378 
    379 	/* Init pointers to last position of rrixix[] */
    380 	p0 = &rrixix[0][NB_POS - 1];
    381 	p1 = &rrixix[1][NB_POS - 1];
    382 	p2 = &rrixix[2][NB_POS - 1];
    383 	p3 = &rrixix[3][NB_POS - 1];
    384 
    385 	ptr_h1 = h;
    386 	cor = 0x00008000L;                             /* for rounding */
    387 	for (i = 0; i < NB_POS; i++)
    388 	{
    389 		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
    390 		ptr_h1++;
    391 		*p3-- = extract_h(cor);
    392 		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
    393 		ptr_h1++;
    394 		*p2-- = extract_h(cor);
    395 		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
    396 		ptr_h1++;
    397 		*p1-- = extract_h(cor);
    398 		cor += vo_L_mult((*ptr_h1), (*ptr_h1));
    399 		ptr_h1++;
    400 		*p0-- = extract_h(cor);
    401 	}
    402 
    403 	/*------------------------------------------------------------*
    404 	 * Compute rrixiy[][] needed for the codebook search.         *
    405 	 * This algorithm compute correlation between 2 pulses        *
    406 	 * (2 impulses responses) in 4 possible adjacents tracks.     *
    407 	 * (track 0-1, 1-2, 2-3 and 3-0).     Total = 4x16x16 = 1024. *
    408 	 *------------------------------------------------------------*/
    409 
    410 	/* storage order --> i2i3, i1i2, i0i1, i3i0 */
    411 
    412 	pos = MSIZE - 1;
    413 	ptr_hf = h + 1;
    414 
    415 	for (k = 0; k < NB_POS; k++)
    416 	{
    417 		p3 = &rrixiy[2][pos];
    418 		p2 = &rrixiy[1][pos];
    419 		p1 = &rrixiy[0][pos];
    420 		p0 = &rrixiy[3][pos - NB_POS];
    421 
    422 		cor = 0x00008000L;                   /* for rounding */
    423 		ptr_h1 = h;
    424 		ptr_h2 = ptr_hf;
    425 
    426 		for (i = k + 1; i < NB_POS; i++)
    427 		{
    428 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    429 			ptr_h1++;
    430 			ptr_h2++;
    431 			*p3 = extract_h(cor);
    432 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    433 			ptr_h1++;
    434 			ptr_h2++;
    435 			*p2 = extract_h(cor);
    436 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    437 			ptr_h1++;
    438 			ptr_h2++;
    439 			*p1 = extract_h(cor);
    440 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    441 			ptr_h1++;
    442 			ptr_h2++;
    443 			*p0 = extract_h(cor);
    444 
    445 			p3 -= (NB_POS + 1);
    446 			p2 -= (NB_POS + 1);
    447 			p1 -= (NB_POS + 1);
    448 			p0 -= (NB_POS + 1);
    449 		}
    450 		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    451 		ptr_h1++;
    452 		ptr_h2++;
    453 		*p3 = extract_h(cor);
    454 		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    455 		ptr_h1++;
    456 		ptr_h2++;
    457 		*p2 = extract_h(cor);
    458 		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    459 		ptr_h1++;
    460 		ptr_h2++;
    461 		*p1 = extract_h(cor);
    462 
    463 		pos -= NB_POS;
    464 		ptr_hf += STEP;
    465 	}
    466 
    467 	/* storage order --> i3i0, i2i3, i1i2, i0i1 */
    468 
    469 	pos = MSIZE - 1;
    470 	ptr_hf = h + 3;
    471 
    472 	for (k = 0; k < NB_POS; k++)
    473 	{
    474 		p3 = &rrixiy[3][pos];
    475 		p2 = &rrixiy[2][pos - 1];
    476 		p1 = &rrixiy[1][pos - 1];
    477 		p0 = &rrixiy[0][pos - 1];
    478 
    479 		cor = 0x00008000L;								/* for rounding */
    480 		ptr_h1 = h;
    481 		ptr_h2 = ptr_hf;
    482 
    483 		for (i = k + 1; i < NB_POS; i++)
    484 		{
    485 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    486 			ptr_h1++;
    487 			ptr_h2++;
    488 			*p3 = extract_h(cor);
    489 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    490 			ptr_h1++;
    491 			ptr_h2++;
    492 			*p2 = extract_h(cor);
    493 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    494 			ptr_h1++;
    495 			ptr_h2++;
    496 			*p1 = extract_h(cor);
    497 			cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    498 			ptr_h1++;
    499 			ptr_h2++;
    500 			*p0 = extract_h(cor);
    501 
    502 			p3 -= (NB_POS + 1);
    503 			p2 -= (NB_POS + 1);
    504 			p1 -= (NB_POS + 1);
    505 			p0 -= (NB_POS + 1);
    506 		}
    507 		cor += vo_L_mult((*ptr_h1), (*ptr_h2));
    508 		ptr_h1++;
    509 		ptr_h2++;
    510 		*p3 = extract_h(cor);
    511 
    512 		pos--;
    513 		ptr_hf += STEP;
    514 	}
    515 
    516 	/*------------------------------------------------------------*
    517 	 * Modification of rrixiy[][] to take signs into account.     *
    518 	 *------------------------------------------------------------*/
    519 
    520 	p0 = &rrixiy[0][0];
    521 
    522 	for (k = 0; k < NB_TRACK; k++)
    523 	{
    524 		j_temp = (k + 1)&0x03;
    525 		for (i = k; i < L_SUBFR; i += STEP)
    526 		{
    527 			psign = sign;
    528 			if (psign[i] < 0)
    529 			{
    530 				psign = vec;
    531 			}
    532 			j = j_temp;
    533 			for (; j < L_SUBFR; j += STEP)
    534 			{
    535 				*p0 = vo_mult(*p0, psign[j]);
    536 				p0++;
    537 			}
    538 		}
    539 	}
    540 
    541 	/*-------------------------------------------------------------------*
    542 	 *                       Deep first search                           *
    543 	 *-------------------------------------------------------------------*/
    544 
    545 	psk = -1;
    546 	alpk = 1;
    547 
    548 	for (k = 0; k < nbiter; k++)
    549 	{
    550 		j_temp = k<<2;
    551 		for (i = 0; i < nb_pulse; i++)
    552 			ipos[i] = tipos[j_temp + i];
    553 
    554 		if(nbbits == 20)
    555 		{
    556 			pos = 0;
    557 			ps = 0;
    558 			alp = 0;
    559 			for (i = 0; i < L_SUBFR; i++)
    560 			{
    561 				vec[i] = 0;
    562 			}
    563 		} else if ((nbbits == 36) || (nbbits == 44))
    564 		{
    565 			/* first stage: fix 2 pulses */
    566 			pos = 2;
    567 
    568 			ix = ind[0] = pos_max[ipos[0]];
    569 			iy = ind[1] = pos_max[ipos[1]];
    570 			ps = dn[ix] + dn[iy];
    571 			i = ix >> 2;                /* ix / STEP */
    572 			j = iy >> 2;                /* iy / STEP */
    573 			s = rrixix[ipos[0]][i] << 13;
    574 			s += rrixix[ipos[1]][j] << 13;
    575 			i = (i << 4) + j;         /* (ix/STEP)*NB_POS + (iy/STEP) */
    576 			s += rrixiy[ipos[0]][i] << 14;
    577 			alp = (s + 0x8000) >> 16;
    578 			if (sign[ix] < 0)
    579 				p0 = h_inv - ix;
    580 			else
    581 				p0 = h - ix;
    582 			if (sign[iy] < 0)
    583 				p1 = h_inv - iy;
    584 			else
    585 				p1 = h - iy;
    586 
    587 			for (i = 0; i < L_SUBFR; i++)
    588 			{
    589 				vec[i] = (*p0++) + (*p1++);
    590 			}
    591 
    592 			if(nbbits == 44)
    593 			{
    594 				ipos[8] = 0;
    595 				ipos[9] = 1;
    596 			}
    597 		} else
    598 		{
    599 			/* first stage: fix 4 pulses */
    600 			pos = 4;
    601 
    602 			ix = ind[0] = pos_max[ipos[0]];
    603 			iy = ind[1] = pos_max[ipos[1]];
    604 			i = ind[2] = pos_max[ipos[2]];
    605 			j = ind[3] = pos_max[ipos[3]];
    606 			ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]);
    607 
    608 			if (sign[ix] < 0)
    609 				p0 = h_inv - ix;
    610 			else
    611 				p0 = h - ix;
    612 
    613 			if (sign[iy] < 0)
    614 				p1 = h_inv - iy;
    615 			else
    616 				p1 = h - iy;
    617 
    618 			if (sign[i] < 0)
    619 				p2 = h_inv - i;
    620 			else
    621 				p2 = h - i;
    622 
    623 			if (sign[j] < 0)
    624 				p3 = h_inv - j;
    625 			else
    626 				p3 = h - j;
    627 
    628 			L_tmp = 0L;
    629 			for(i = 0; i < L_SUBFR; i++)
    630 			{
    631 				vec[i]  = add1(add1(add1(*p0++, *p1++), *p2++), *p3++);
    632 				L_tmp  += (vec[i] * vec[i]) << 1;
    633 			}
    634 
    635 			alp = ((L_tmp >> 3) + 0x8000) >> 16;
    636 
    637 			if(nbbits == 72)
    638 			{
    639 				ipos[16] = 0;
    640 				ipos[17] = 1;
    641 			}
    642 		}
    643 
    644 		/* other stages of 2 pulses */
    645 
    646 		for (j = pos, st = 0; j < nb_pulse; j += 2, st++)
    647 		{
    648 			/*--------------------------------------------------*
    649 			 * Calculate correlation of all possible positions  *
    650 			 * of the next 2 pulses with previous fixed pulses. *
    651 			 * Each pulse can have 16 possible positions.       *
    652 			 *--------------------------------------------------*/
    653 			if(ipos[j] == 3)
    654 			{
    655 				cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    656 			}
    657 			else
    658 			{
    659 #ifdef ASM_OPT                 /* asm optimization branch */
    660 				cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    661 #else
    662 				cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y);
    663 #endif
    664 			}
    665 			/*--------------------------------------------------*
    666 			 * Find best positions of 2 pulses.                 *
    667 			 *--------------------------------------------------*/
    668 			search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp,
    669 					&ix, &iy, dn, dn2, cor_x, cor_y, rrixiy);
    670 
    671 			ind[j] = ix;
    672 			ind[j + 1] = iy;
    673 
    674 			if (sign[ix] < 0)
    675 				p0 = h_inv - ix;
    676 			else
    677 				p0 = h - ix;
    678 			if (sign[iy] < 0)
    679 				p1 = h_inv - iy;
    680 			else
    681 				p1 = h - iy;
    682 
    683 			for (i = 0; i < L_SUBFR; i+=4)
    684 			{
    685 				vec[i]   += add1((*p0++), (*p1++));
    686 				vec[i+1] += add1((*p0++), (*p1++));
    687 				vec[i+2] += add1((*p0++), (*p1++));
    688 				vec[i+3] += add1((*p0++), (*p1++));
    689 			}
    690 		}
    691 		/* memorise the best codevector */
    692 		ps = vo_mult(ps, ps);
    693 		s = vo_L_msu(vo_L_mult(alpk, ps), psk, alp);
    694 		if (s > 0)
    695 		{
    696 			psk = ps;
    697 			alpk = alp;
    698 			for (i = 0; i < nb_pulse; i++)
    699 			{
    700 				codvec[i] = ind[i];
    701 			}
    702 			for (i = 0; i < L_SUBFR; i++)
    703 			{
    704 				y[i] = vec[i];
    705 			}
    706 		}
    707 	}
    708 	/*-------------------------------------------------------------------*
    709 	 * Build the codeword, the filtered codeword and index of codevector.*
    710 	 *-------------------------------------------------------------------*/
    711 	for (i = 0; i < NPMAXPT * NB_TRACK; i++)
    712 	{
    713 		ind[i] = -1;
    714 	}
    715 	for (i = 0; i < L_SUBFR; i++)
    716 	{
    717 		code[i] = 0;
    718 		y[i] = vo_shr_r(y[i], 3);               /* Q12 to Q9 */
    719 	}
    720 	val = (512 >> h_shift);               /* codeword in Q9 format */
    721 	for (k = 0; k < nb_pulse; k++)
    722 	{
    723 		i = codvec[k];                       /* read pulse position */
    724 		j = sign[i];                         /* read sign           */
    725 		index = i >> 2;                 /* index = pos of pulse (0..15) */
    726 		track = (Word16) (i & 0x03);         /* track = i % NB_TRACK (0..3)  */
    727 
    728 		if (j > 0)
    729 		{
    730 			code[i] += val;
    731 			codvec[k] += 128;
    732 		} else
    733 		{
    734 			code[i] -= val;
    735 			index += NB_POS;
    736 		}
    737 
    738 		i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1));
    739 
    740 		while (ind[i] >= 0)
    741 		{
    742 			i += 1;
    743 		}
    744 		ind[i] = index;
    745 	}
    746 
    747 	k = 0;
    748 	/* Build index of codevector */
    749 	if(nbbits == 20)
    750 	{
    751 		for (track = 0; track < NB_TRACK; track++)
    752 		{
    753 			_index[track] = (Word16)(quant_1p_N1(ind[k], 4));
    754 			k += NPMAXPT;
    755 		}
    756 	} else if(nbbits == 36)
    757 	{
    758 		for (track = 0; track < NB_TRACK; track++)
    759 		{
    760 			_index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
    761 			k += NPMAXPT;
    762 		}
    763 	} else if(nbbits == 44)
    764 	{
    765 		for (track = 0; track < NB_TRACK - 2; track++)
    766 		{
    767 			_index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
    768 			k += NPMAXPT;
    769 		}
    770 		for (track = 2; track < NB_TRACK; track++)
    771 		{
    772 			_index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4));
    773 			k += NPMAXPT;
    774 		}
    775 	} else if(nbbits == 52)
    776 	{
    777 		for (track = 0; track < NB_TRACK; track++)
    778 		{
    779 			_index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4));
    780 			k += NPMAXPT;
    781 		}
    782 	} else if(nbbits == 64)
    783 	{
    784 		for (track = 0; track < NB_TRACK; track++)
    785 		{
    786 			L_index = quant_4p_4N(&ind[k], 4);
    787 			_index[track] = (Word16)((L_index >> 14) & 3);
    788 			_index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
    789 			k += NPMAXPT;
    790 		}
    791 	} else if(nbbits == 72)
    792 	{
    793 		for (track = 0; track < NB_TRACK - 2; track++)
    794 		{
    795 			L_index = quant_5p_5N(&ind[k], 4);
    796 			_index[track] = (Word16)((L_index >> 10) & 0x03FF);
    797 			_index[track + NB_TRACK] = (Word16)(L_index & 0x03FF);
    798 			k += NPMAXPT;
    799 		}
    800 		for (track = 2; track < NB_TRACK; track++)
    801 		{
    802 			L_index = quant_4p_4N(&ind[k], 4);
    803 			_index[track] = (Word16)((L_index >> 14) & 3);
    804 			_index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF);
    805 			k += NPMAXPT;
    806 		}
    807 	} else if(nbbits == 88)
    808 	{
    809 		for (track = 0; track < NB_TRACK; track++)
    810 		{
    811 			L_index = quant_6p_6N_2(&ind[k], 4);
    812 			_index[track] = (Word16)((L_index >> 11) & 0x07FF);
    813 			_index[track + NB_TRACK] = (Word16)(L_index & 0x07FF);
    814 			k += NPMAXPT;
    815 		}
    816 	}
    817 	return;
    818 }
    819 
    820 
    821 /*-------------------------------------------------------------------*
    822  * Function  cor_h_vec()                                             *
    823  * ~~~~~~~~~~~~~~~~~~~~~                                             *
    824  * Compute correlations of h[] with vec[] for the specified track.   *
    825  *-------------------------------------------------------------------*/
    826 void cor_h_vec_30(
    827 		Word16 h[],                           /* (i) scaled impulse response                 */
    828 		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
    829 		Word16 track,                         /* (i) track to use                            */
    830 		Word16 sign[],                        /* (i) sign vector                             */
    831 		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
    832 		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
    833 		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    834 		)
    835 {
    836 	Word32 i, j, pos, corr;
    837 	Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
    838 	Word32 L_sum1,L_sum2;
    839 	cor_x = cor_1;
    840 	cor_y = cor_2;
    841 	p0 = rrixix[track];
    842 	p3 = rrixix[0];
    843 	pos = track;
    844 
    845 	for (i = 0; i < NB_POS; i+=2)
    846 	{
    847 		L_sum1 = L_sum2 = 0L;
    848 		p1 = h;
    849 		p2 = &vec[pos];
    850 		for (j=pos;j < L_SUBFR; j++)
    851 		{
    852 			L_sum1 += *p1 * *p2;
    853 			p2-=3;
    854 			L_sum2 += *p1++ * *p2;
    855 			p2+=4;
    856 		}
    857 		p2-=3;
    858 		L_sum2 += *p1++ * *p2++;
    859 		L_sum2 += *p1++ * *p2++;
    860 		L_sum2 += *p1++ * *p2++;
    861 
    862 		L_sum1 = (L_sum1 << 2);
    863 		L_sum2 = (L_sum2 << 2);
    864 
    865 		corr = vo_round(L_sum1);
    866 		*cor_x++ = vo_mult(corr, sign[pos]) + (*p0++);
    867 		corr = vo_round(L_sum2);
    868 		*cor_y++ = vo_mult(corr, sign[pos-3]) + (*p3++);
    869 		pos += STEP;
    870 
    871 		L_sum1 = L_sum2 = 0L;
    872 		p1 = h;
    873 		p2 = &vec[pos];
    874 		for (j=pos;j < L_SUBFR; j++)
    875 		{
    876 			L_sum1 += *p1 * *p2;
    877 			p2-=3;
    878 			L_sum2 += *p1++ * *p2;
    879 			p2+=4;
    880 		}
    881 		p2-=3;
    882 		L_sum2 += *p1++ * *p2++;
    883 		L_sum2 += *p1++ * *p2++;
    884 		L_sum2 += *p1++ * *p2++;
    885 
    886 		L_sum1 = (L_sum1 << 2);
    887 		L_sum2 = (L_sum2 << 2);
    888 
    889 		corr = vo_round(L_sum1);
    890 		*cor_x++ = vo_mult(corr, sign[pos]) + (*p0++);
    891 		corr = vo_round(L_sum2);
    892 		*cor_y++ = vo_mult(corr, sign[pos-3]) + (*p3++);
    893 		pos += STEP;
    894 	}
    895 	return;
    896 }
    897 
    898 void cor_h_vec_012(
    899 		Word16 h[],                           /* (i) scaled impulse response                 */
    900 		Word16 vec[],                         /* (i) scaled vector (/8) to correlate with h[] */
    901 		Word16 track,                         /* (i) track to use                            */
    902 		Word16 sign[],                        /* (i) sign vector                             */
    903 		Word16 rrixix[][NB_POS],              /* (i) correlation of h[x] with h[x]      */
    904 		Word16 cor_1[],                       /* (o) result of correlation (NB_POS elements) */
    905 		Word16 cor_2[]                        /* (o) result of correlation (NB_POS elements) */
    906 		)
    907 {
    908 	Word32 i, j, pos, corr;
    909 	Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y;
    910 	Word32 L_sum1,L_sum2;
    911 	cor_x = cor_1;
    912 	cor_y = cor_2;
    913 	p0 = rrixix[track];
    914 	p3 = rrixix[track+1];
    915 	pos = track;
    916 
    917 	for (i = 0; i < NB_POS; i+=2)
    918 	{
    919 		L_sum1 = L_sum2 = 0L;
    920 		p1 = h;
    921 		p2 = &vec[pos];
    922 		for (j=62-pos ;j >= 0; j--)
    923 		{
    924 			L_sum1 += *p1 * *p2++;
    925 			L_sum2 += *p1++ * *p2;
    926 		}
    927 		L_sum1 += *p1 * *p2;
    928 		L_sum1 = (L_sum1 << 2);
    929 		L_sum2 = (L_sum2 << 2);
    930 
    931 		corr = (L_sum1 + 0x8000) >> 16;
    932 		cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++);
    933 		corr = (L_sum2 + 0x8000) >> 16;
    934 		cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++);
    935 		pos += STEP;
    936 
    937 		L_sum1 = L_sum2 = 0L;
    938 		p1 = h;
    939 		p2 = &vec[pos];
    940 		for (j= 62-pos;j >= 0; j--)
    941 		{
    942 			L_sum1 += *p1 * *p2++;
    943 			L_sum2 += *p1++ * *p2;
    944 		}
    945 		L_sum1 += *p1 * *p2;
    946 		L_sum1 = (L_sum1 << 2);
    947 		L_sum2 = (L_sum2 << 2);
    948 
    949 		corr = (L_sum1 + 0x8000) >> 16;
    950 		cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++);
    951 		corr = (L_sum2 + 0x8000) >> 16;
    952 		cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++);
    953 		pos += STEP;
    954 	}
    955 	return;
    956 }
    957 
    958 /*-------------------------------------------------------------------*
    959  * Function  search_ixiy()                                           *
    960  * ~~~~~~~~~~~~~~~~~~~~~~~                                           *
    961  * Find the best positions of 2 pulses in a subframe.                *
    962  *-------------------------------------------------------------------*/
    963 
    964 void search_ixiy(
    965 		Word16 nb_pos_ix,                     /* (i) nb of pos for pulse 1 (1..8)       */
    966 		Word16 track_x,                       /* (i) track of pulse 1                   */
    967 		Word16 track_y,                       /* (i) track of pulse 2                   */
    968 		Word16 * ps,                          /* (i/o) correlation of all fixed pulses  */
    969 		Word16 * alp,                         /* (i/o) energy of all fixed pulses       */
    970 		Word16 * ix,                          /* (o) position of pulse 1                */
    971 		Word16 * iy,                          /* (o) position of pulse 2                */
    972 		Word16 dn[],                          /* (i) corr. between target and h[]       */
    973 		Word16 dn2[],                         /* (i) vector of selected positions       */
    974 		Word16 cor_x[],                       /* (i) corr. of pulse 1 with fixed pulses */
    975 		Word16 cor_y[],                       /* (i) corr. of pulse 2 with fixed pulses */
    976 		Word16 rrixiy[][MSIZE]                /* (i) corr. of pulse 1 with pulse 2   */
    977 		)
    978 {
    979 	Word32 x, y, pos, thres_ix;
    980 	Word16 ps1, ps2, sq, sqk;
    981 	Word16 alp_16, alpk;
    982 	Word16 *p0, *p1, *p2;
    983 	Word32 s, alp0, alp1, alp2;
    984 
    985 	p0 = cor_x;
    986 	p1 = cor_y;
    987 	p2 = rrixiy[track_x];
    988 
    989 	thres_ix = nb_pos_ix - NB_MAX;
    990 
    991 	alp0 = L_deposit_h(*alp);
    992 	alp0 = (alp0 + 0x00008000L);       /* for rounding */
    993 
    994 	sqk = -1;
    995 	alpk = 1;
    996 
    997 	for (x = track_x; x < L_SUBFR; x += STEP)
    998 	{
    999 		ps1 = *ps + dn[x];
   1000 		alp1 = alp0 + ((*p0++)<<13);
   1001 
   1002 		if (dn2[x] < thres_ix)
   1003 		{
   1004 			pos = -1;
   1005 			for (y = track_y; y < L_SUBFR; y += STEP)
   1006 			{
   1007 				ps2 = add1(ps1, dn[y]);
   1008 
   1009 				alp2 = alp1 + ((*p1++)<<13);
   1010 				alp2 = alp2 + ((*p2++)<<14);
   1011 				alp_16 = extract_h(alp2);
   1012 				sq = vo_mult(ps2, ps2);
   1013 				s = vo_L_mult(alpk, sq) - ((sqk * alp_16)<<1);
   1014 
   1015 				if (s > 0)
   1016 				{
   1017 					sqk = sq;
   1018 					alpk = alp_16;
   1019 					pos = y;
   1020 				}
   1021 			}
   1022 			p1 -= NB_POS;
   1023 
   1024 			if (pos >= 0)
   1025 			{
   1026 				*ix = x;
   1027 				*iy = pos;
   1028 			}
   1029 		} else
   1030 		{
   1031 			p2 += NB_POS;
   1032 		}
   1033 	}
   1034 
   1035 	*ps = add1(*ps, add1(dn[*ix], dn[*iy]));
   1036 	*alp = alpk;
   1037 
   1038 	return;
   1039 }
   1040 
   1041 
   1042 
   1043 
   1044