1 /* 2 ** Copyright 2003-2010, VisualOn, Inc. 3 ** 4 ** Licensed under the Apache License, Version 2.0 (the "License"); 5 ** you may not use this file except in compliance with the License. 6 ** You may obtain a copy of the License at 7 ** 8 ** http://www.apache.org/licenses/LICENSE-2.0 9 ** 10 ** Unless required by applicable law or agreed to in writing, software 11 ** distributed under the License is distributed on an "AS IS" BASIS, 12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ** See the License for the specific language governing permissions and 14 ** limitations under the License. 15 */ 16 17 /*********************************************************************** 18 * File: c4t64fx.c * 19 * * 20 * Description:Performs algebraic codebook search for higher modes * 21 * * 22 ************************************************************************/ 23 24 /************************************************************************ 25 * Function: ACELP_4t64_fx() * 26 * * 27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook. * 28 * 4 tracks x 16 positions per track = 64 samples. * 29 * * 30 * 20 bits --> 4 pulses in a frame of 64 samples. * 31 * 36 bits --> 8 pulses in a frame of 64 samples. * 32 * 44 bits --> 10 pulses in a frame of 64 samples. * 33 * 52 bits --> 12 pulses in a frame of 64 samples. * 34 * 64 bits --> 16 pulses in a frame of 64 samples. * 35 * 72 bits --> 18 pulses in a frame of 64 samples. * 36 * 88 bits --> 24 pulses in a frame of 64 samples. * 37 * * 38 * All pulses can have two (2) possible amplitudes: +1 or -1. * 39 * Each pulse can have sixteen (16) possible positions. * 40 *************************************************************************/ 41 42 #include "typedef.h" 43 #include "basic_op.h" 44 #include "math_op.h" 45 #include "acelp.h" 46 #include "cnst.h" 47 48 #include "q_pulse.h" 49 50 static Word16 tipos[36] = { 51 0, 1, 2, 3, /* starting point &ipos[0], 1st iter */ 52 1, 2, 3, 0, /* starting point &ipos[4], 2nd iter */ 53 2, 3, 0, 1, /* starting point &ipos[8], 3rd iter */ 54 3, 0, 1, 2, /* starting point &ipos[12], 4th iter */ 55 0, 1, 2, 3, 56 1, 2, 3, 0, 57 2, 3, 0, 1, 58 3, 0, 1, 2, 59 0, 1, 2, 3}; /* end point for 24 pulses &ipos[35], 4th iter */ 60 61 #define NB_PULSE_MAX 24 62 63 #define L_SUBFR 64 64 #define NB_TRACK 4 65 #define STEP 4 66 #define NB_POS 16 67 #define MSIZE 256 68 #define NB_MAX 8 69 #define NPMAXPT ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK) 70 71 /* Private functions */ 72 void cor_h_vec_012( 73 Word16 h[], /* (i) scaled impulse response */ 74 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 75 Word16 track, /* (i) track to use */ 76 Word16 sign[], /* (i) sign vector */ 77 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 78 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 79 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 80 ); 81 82 void cor_h_vec_012_asm( 83 Word16 h[], /* (i) scaled impulse response */ 84 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 85 Word16 track, /* (i) track to use */ 86 Word16 sign[], /* (i) sign vector */ 87 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 88 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 89 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 90 ); 91 92 void cor_h_vec_30( 93 Word16 h[], /* (i) scaled impulse response */ 94 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 95 Word16 track, /* (i) track to use */ 96 Word16 sign[], /* (i) sign vector */ 97 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 98 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 99 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 100 ); 101 102 void search_ixiy( 103 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */ 104 Word16 track_x, /* (i) track of pulse 1 */ 105 Word16 track_y, /* (i) track of pulse 2 */ 106 Word16 * ps, /* (i/o) correlation of all fixed pulses */ 107 Word16 * alp, /* (i/o) energy of all fixed pulses */ 108 Word16 * ix, /* (o) position of pulse 1 */ 109 Word16 * iy, /* (o) position of pulse 2 */ 110 Word16 dn[], /* (i) corr. between target and h[] */ 111 Word16 dn2[], /* (i) vector of selected positions */ 112 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */ 113 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */ 114 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */ 115 ); 116 117 118 void ACELP_4t64_fx( 119 Word16 dn[], /* (i) <12b : correlation between target x[] and H[] */ 120 Word16 cn[], /* (i) <12b : residual after long term prediction */ 121 Word16 H[], /* (i) Q12: impulse response of weighted synthesis filter */ 122 Word16 code[], /* (o) Q9 : algebraic (fixed) codebook excitation */ 123 Word16 y[], /* (o) Q9 : filtered fixed codebook excitation */ 124 Word16 nbbits, /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits */ 125 Word16 ser_size, /* (i) : bit rate */ 126 Word16 _index[] /* (o) : index (20): 5+5+5+5 = 20 bits. */ 127 /* (o) : index (36): 9+9+9+9 = 36 bits. */ 128 /* (o) : index (44): 13+9+13+9 = 44 bits. */ 129 /* (o) : index (52): 13+13+13+13 = 52 bits. */ 130 /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits. */ 131 /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits. */ 132 /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits. */ 133 ) 134 { 135 Word32 i, j, k; 136 Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp; 137 Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp; 138 Word16 *p0, *p1, *p2, *p3, *psign; 139 Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift; 140 Word32 s, cor, L_tmp, L_index; 141 Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR]; 142 Word16 ind[NPMAXPT * NB_TRACK]; 143 Word16 codvec[NB_PULSE_MAX], nbpos[10]; 144 Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK]; 145 Word16 h_buf[4 * L_SUBFR]; 146 Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE]; 147 Word16 ipos[NB_PULSE_MAX]; 148 149 switch (nbbits) 150 { 151 case 20: /* 20 bits, 4 pulses, 4 tracks */ 152 nbiter = 4; /* 4x16x16=1024 loop */ 153 alp = 8192; /* alp = 2.0 (Q12) */ 154 nb_pulse = 4; 155 nbpos[0] = 4; 156 nbpos[1] = 8; 157 break; 158 case 36: /* 36 bits, 8 pulses, 4 tracks */ 159 nbiter = 4; /* 4x20x16=1280 loop */ 160 alp = 4096; /* alp = 1.0 (Q12) */ 161 nb_pulse = 8; 162 nbpos[0] = 4; 163 nbpos[1] = 8; 164 nbpos[2] = 8; 165 break; 166 case 44: /* 44 bits, 10 pulses, 4 tracks */ 167 nbiter = 4; /* 4x26x16=1664 loop */ 168 alp = 4096; /* alp = 1.0 (Q12) */ 169 nb_pulse = 10; 170 nbpos[0] = 4; 171 nbpos[1] = 6; 172 nbpos[2] = 8; 173 nbpos[3] = 8; 174 break; 175 case 52: /* 52 bits, 12 pulses, 4 tracks */ 176 nbiter = 4; /* 4x26x16=1664 loop */ 177 alp = 4096; /* alp = 1.0 (Q12) */ 178 nb_pulse = 12; 179 nbpos[0] = 4; 180 nbpos[1] = 6; 181 nbpos[2] = 8; 182 nbpos[3] = 8; 183 break; 184 case 64: /* 64 bits, 16 pulses, 4 tracks */ 185 nbiter = 3; /* 3x36x16=1728 loop */ 186 alp = 3277; /* alp = 0.8 (Q12) */ 187 nb_pulse = 16; 188 nbpos[0] = 4; 189 nbpos[1] = 4; 190 nbpos[2] = 6; 191 nbpos[3] = 6; 192 nbpos[4] = 8; 193 nbpos[5] = 8; 194 break; 195 case 72: /* 72 bits, 18 pulses, 4 tracks */ 196 nbiter = 3; /* 3x35x16=1680 loop */ 197 alp = 3072; /* alp = 0.75 (Q12) */ 198 nb_pulse = 18; 199 nbpos[0] = 2; 200 nbpos[1] = 3; 201 nbpos[2] = 4; 202 nbpos[3] = 5; 203 nbpos[4] = 6; 204 nbpos[5] = 7; 205 nbpos[6] = 8; 206 break; 207 case 88: /* 88 bits, 24 pulses, 4 tracks */ 208 if(ser_size > 462) 209 nbiter = 1; 210 else 211 nbiter = 2; /* 2x53x16=1696 loop */ 212 213 alp = 2048; /* alp = 0.5 (Q12) */ 214 nb_pulse = 24; 215 nbpos[0] = 2; 216 nbpos[1] = 2; 217 nbpos[2] = 3; 218 nbpos[3] = 4; 219 nbpos[4] = 5; 220 nbpos[5] = 6; 221 nbpos[6] = 7; 222 nbpos[7] = 8; 223 nbpos[8] = 8; 224 nbpos[9] = 8; 225 break; 226 default: 227 nbiter = 0; 228 alp = 0; 229 nb_pulse = 0; 230 } 231 232 for (i = 0; i < nb_pulse; i++) 233 { 234 codvec[i] = i; 235 } 236 237 /*----------------------------------------------------------------* 238 * Find sign for each pulse position. * 239 *----------------------------------------------------------------*/ 240 /* calculate energy for normalization of cn[] and dn[] */ 241 /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */ 242 #ifdef ASM_OPT /* asm optimization branch */ 243 s = Dot_product12_asm(cn, cn, L_SUBFR, &exp); 244 #else 245 s = Dot_product12(cn, cn, L_SUBFR, &exp); 246 #endif 247 248 Isqrt_n(&s, &exp); 249 s = L_shl(s, (exp + 5)); 250 k_cn = extract_h(L_add(s, 0x8000)); 251 252 /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */ 253 #ifdef ASM_OPT /* asm optimization branch */ 254 s = Dot_product12_asm(dn, dn, L_SUBFR, &exp); 255 #else 256 s = Dot_product12(dn, dn, L_SUBFR, &exp); 257 #endif 258 259 Isqrt_n(&s, &exp); 260 k_dn = voround(L_shl(s, (exp + 5 + 3))); /* k_dn = 256..4096 */ 261 k_dn = vo_mult_r(alp, k_dn); /* alp in Q12 */ 262 263 /* mix normalized cn[] and dn[] */ 264 p0 = cn; 265 p1 = dn; 266 p2 = dn2; 267 268 for (i = 0; i < L_SUBFR/4; i++) 269 { 270 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 271 *p2++ = s >> 7; 272 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 273 *p2++ = s >> 7; 274 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 275 *p2++ = s >> 7; 276 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 277 *p2++ = s >> 7; 278 } 279 280 /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[] */ 281 for(i = 0; i < L_SUBFR; i++) 282 { 283 val = dn[i]; 284 ps = dn2[i]; 285 if (ps >= 0) 286 { 287 sign[i] = 32767; /* sign = +1 (Q12) */ 288 vec[i] = -32768; 289 } else 290 { 291 sign[i] = -32768; /* sign = -1 (Q12) */ 292 vec[i] = 32767; 293 dn[i] = -val; 294 dn2[i] = -ps; 295 } 296 } 297 /*----------------------------------------------------------------* 298 * Select NB_MAX position per track according to max of dn2[]. * 299 *----------------------------------------------------------------*/ 300 pos = 0; 301 for (i = 0; i < NB_TRACK; i++) 302 { 303 for (k = 0; k < NB_MAX; k++) 304 { 305 ps = -1; 306 for (j = i; j < L_SUBFR; j += STEP) 307 { 308 if(dn2[j] > ps) 309 { 310 ps = dn2[j]; 311 pos = j; 312 } 313 } 314 dn2[pos] = (k - NB_MAX); /* dn2 < 0 when position is selected */ 315 if (k == 0) 316 { 317 pos_max[i] = pos; 318 } 319 } 320 } 321 322 /*--------------------------------------------------------------* 323 * Scale h[] to avoid overflow and to get maximum of precision * 324 * on correlation. * 325 * * 326 * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16). * 327 * ==> This allow addition of 16 pulses without saturation. * 328 * * 329 * Energy worst case (on resonant impulse response), * 330 * - energy of h[] is approximately MAX/16. * 331 * - During search, the energy is divided by 8 to avoid * 332 * overflow on "alp". (energy of h[] = MAX/128). * 333 * ==> "alp" worst case detected is 22854 on sinusoidal wave. * 334 *--------------------------------------------------------------*/ 335 336 /* impulse response buffer for fast computation */ 337 338 h = h_buf; 339 h_inv = h_buf + (2 * L_SUBFR); 340 L_tmp = 0; 341 for (i = 0; i < L_SUBFR; i++) 342 { 343 *h++ = 0; 344 *h_inv++ = 0; 345 L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1); 346 } 347 /* scale h[] down (/2) when energy of h[] is high with many pulses used */ 348 val = extract_h(L_tmp); 349 h_shift = 0; 350 351 if ((nb_pulse >= 12) && (val > 1024)) 352 { 353 h_shift = 1; 354 } 355 p0 = H; 356 p1 = h; 357 p2 = h_inv; 358 359 for (i = 0; i < L_SUBFR/4; i++) 360 { 361 *p1 = *p0++ >> h_shift; 362 *p2++ = -(*p1++); 363 *p1 = *p0++ >> h_shift; 364 *p2++ = -(*p1++); 365 *p1 = *p0++ >> h_shift; 366 *p2++ = -(*p1++); 367 *p1 = *p0++ >> h_shift; 368 *p2++ = -(*p1++); 369 } 370 371 /*------------------------------------------------------------* 372 * Compute rrixix[][] needed for the codebook search. * 373 * This algorithm compute impulse response energy of all * 374 * positions (16) in each track (4). Total = 4x16 = 64. * 375 *------------------------------------------------------------*/ 376 377 /* storage order --> i3i3, i2i2, i1i1, i0i0 */ 378 379 /* Init pointers to last position of rrixix[] */ 380 p0 = &rrixix[0][NB_POS - 1]; 381 p1 = &rrixix[1][NB_POS - 1]; 382 p2 = &rrixix[2][NB_POS - 1]; 383 p3 = &rrixix[3][NB_POS - 1]; 384 385 ptr_h1 = h; 386 cor = 0x00008000L; /* for rounding */ 387 for (i = 0; i < NB_POS; i++) 388 { 389 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 390 ptr_h1++; 391 *p3-- = extract_h(cor); 392 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 393 ptr_h1++; 394 *p2-- = extract_h(cor); 395 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 396 ptr_h1++; 397 *p1-- = extract_h(cor); 398 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 399 ptr_h1++; 400 *p0-- = extract_h(cor); 401 } 402 403 /*------------------------------------------------------------* 404 * Compute rrixiy[][] needed for the codebook search. * 405 * This algorithm compute correlation between 2 pulses * 406 * (2 impulses responses) in 4 possible adjacents tracks. * 407 * (track 0-1, 1-2, 2-3 and 3-0). Total = 4x16x16 = 1024. * 408 *------------------------------------------------------------*/ 409 410 /* storage order --> i2i3, i1i2, i0i1, i3i0 */ 411 412 pos = MSIZE - 1; 413 ptr_hf = h + 1; 414 415 for (k = 0; k < NB_POS; k++) 416 { 417 p3 = &rrixiy[2][pos]; 418 p2 = &rrixiy[1][pos]; 419 p1 = &rrixiy[0][pos]; 420 p0 = &rrixiy[3][pos - NB_POS]; 421 422 cor = 0x00008000L; /* for rounding */ 423 ptr_h1 = h; 424 ptr_h2 = ptr_hf; 425 426 for (i = k + 1; i < NB_POS; i++) 427 { 428 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 429 ptr_h1++; 430 ptr_h2++; 431 *p3 = extract_h(cor); 432 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 433 ptr_h1++; 434 ptr_h2++; 435 *p2 = extract_h(cor); 436 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 437 ptr_h1++; 438 ptr_h2++; 439 *p1 = extract_h(cor); 440 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 441 ptr_h1++; 442 ptr_h2++; 443 *p0 = extract_h(cor); 444 445 p3 -= (NB_POS + 1); 446 p2 -= (NB_POS + 1); 447 p1 -= (NB_POS + 1); 448 p0 -= (NB_POS + 1); 449 } 450 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 451 ptr_h1++; 452 ptr_h2++; 453 *p3 = extract_h(cor); 454 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 455 ptr_h1++; 456 ptr_h2++; 457 *p2 = extract_h(cor); 458 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 459 ptr_h1++; 460 ptr_h2++; 461 *p1 = extract_h(cor); 462 463 pos -= NB_POS; 464 ptr_hf += STEP; 465 } 466 467 /* storage order --> i3i0, i2i3, i1i2, i0i1 */ 468 469 pos = MSIZE - 1; 470 ptr_hf = h + 3; 471 472 for (k = 0; k < NB_POS; k++) 473 { 474 p3 = &rrixiy[3][pos]; 475 p2 = &rrixiy[2][pos - 1]; 476 p1 = &rrixiy[1][pos - 1]; 477 p0 = &rrixiy[0][pos - 1]; 478 479 cor = 0x00008000L; /* for rounding */ 480 ptr_h1 = h; 481 ptr_h2 = ptr_hf; 482 483 for (i = k + 1; i < NB_POS; i++) 484 { 485 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 486 ptr_h1++; 487 ptr_h2++; 488 *p3 = extract_h(cor); 489 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 490 ptr_h1++; 491 ptr_h2++; 492 *p2 = extract_h(cor); 493 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 494 ptr_h1++; 495 ptr_h2++; 496 *p1 = extract_h(cor); 497 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 498 ptr_h1++; 499 ptr_h2++; 500 *p0 = extract_h(cor); 501 502 p3 -= (NB_POS + 1); 503 p2 -= (NB_POS + 1); 504 p1 -= (NB_POS + 1); 505 p0 -= (NB_POS + 1); 506 } 507 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 508 ptr_h1++; 509 ptr_h2++; 510 *p3 = extract_h(cor); 511 512 pos--; 513 ptr_hf += STEP; 514 } 515 516 /*------------------------------------------------------------* 517 * Modification of rrixiy[][] to take signs into account. * 518 *------------------------------------------------------------*/ 519 520 p0 = &rrixiy[0][0]; 521 522 for (k = 0; k < NB_TRACK; k++) 523 { 524 j_temp = (k + 1)&0x03; 525 for (i = k; i < L_SUBFR; i += STEP) 526 { 527 psign = sign; 528 if (psign[i] < 0) 529 { 530 psign = vec; 531 } 532 j = j_temp; 533 for (; j < L_SUBFR; j += STEP) 534 { 535 *p0 = vo_mult(*p0, psign[j]); 536 p0++; 537 } 538 } 539 } 540 541 /*-------------------------------------------------------------------* 542 * Deep first search * 543 *-------------------------------------------------------------------*/ 544 545 psk = -1; 546 alpk = 1; 547 548 for (k = 0; k < nbiter; k++) 549 { 550 j_temp = k<<2; 551 for (i = 0; i < nb_pulse; i++) 552 ipos[i] = tipos[j_temp + i]; 553 554 if(nbbits == 20) 555 { 556 pos = 0; 557 ps = 0; 558 alp = 0; 559 for (i = 0; i < L_SUBFR; i++) 560 { 561 vec[i] = 0; 562 } 563 } else if ((nbbits == 36) || (nbbits == 44)) 564 { 565 /* first stage: fix 2 pulses */ 566 pos = 2; 567 568 ix = ind[0] = pos_max[ipos[0]]; 569 iy = ind[1] = pos_max[ipos[1]]; 570 ps = dn[ix] + dn[iy]; 571 i = ix >> 2; /* ix / STEP */ 572 j = iy >> 2; /* iy / STEP */ 573 s = rrixix[ipos[0]][i] << 13; 574 s += rrixix[ipos[1]][j] << 13; 575 i = (i << 4) + j; /* (ix/STEP)*NB_POS + (iy/STEP) */ 576 s += rrixiy[ipos[0]][i] << 14; 577 alp = (s + 0x8000) >> 16; 578 if (sign[ix] < 0) 579 p0 = h_inv - ix; 580 else 581 p0 = h - ix; 582 if (sign[iy] < 0) 583 p1 = h_inv - iy; 584 else 585 p1 = h - iy; 586 587 for (i = 0; i < L_SUBFR; i++) 588 { 589 vec[i] = (*p0++) + (*p1++); 590 } 591 592 if(nbbits == 44) 593 { 594 ipos[8] = 0; 595 ipos[9] = 1; 596 } 597 } else 598 { 599 /* first stage: fix 4 pulses */ 600 pos = 4; 601 602 ix = ind[0] = pos_max[ipos[0]]; 603 iy = ind[1] = pos_max[ipos[1]]; 604 i = ind[2] = pos_max[ipos[2]]; 605 j = ind[3] = pos_max[ipos[3]]; 606 ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]); 607 608 if (sign[ix] < 0) 609 p0 = h_inv - ix; 610 else 611 p0 = h - ix; 612 613 if (sign[iy] < 0) 614 p1 = h_inv - iy; 615 else 616 p1 = h - iy; 617 618 if (sign[i] < 0) 619 p2 = h_inv - i; 620 else 621 p2 = h - i; 622 623 if (sign[j] < 0) 624 p3 = h_inv - j; 625 else 626 p3 = h - j; 627 628 L_tmp = 0L; 629 for(i = 0; i < L_SUBFR; i++) 630 { 631 Word32 vecSq2; 632 vec[i] = add1(add1(add1(*p0++, *p1++), *p2++), *p3++); 633 vecSq2 = (vec[i] * vec[i]) << 1; 634 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) { 635 L_tmp = INT_MAX; 636 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) { 637 L_tmp = INT_MIN; 638 } else { 639 L_tmp += vecSq2; 640 } 641 } 642 643 alp = ((L_tmp >> 3) + 0x8000) >> 16; 644 645 if(nbbits == 72) 646 { 647 ipos[16] = 0; 648 ipos[17] = 1; 649 } 650 } 651 652 /* other stages of 2 pulses */ 653 654 for (j = pos, st = 0; j < nb_pulse; j += 2, st++) 655 { 656 /*--------------------------------------------------* 657 * Calculate correlation of all possible positions * 658 * of the next 2 pulses with previous fixed pulses. * 659 * Each pulse can have 16 possible positions. * 660 *--------------------------------------------------*/ 661 if(ipos[j] == 3) 662 { 663 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y); 664 } 665 else 666 { 667 #ifdef ASM_OPT /* asm optimization branch */ 668 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y); 669 #else 670 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y); 671 #endif 672 } 673 /*--------------------------------------------------* 674 * Find best positions of 2 pulses. * 675 *--------------------------------------------------*/ 676 search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp, 677 &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy); 678 679 ind[j] = ix; 680 ind[j + 1] = iy; 681 682 if (sign[ix] < 0) 683 p0 = h_inv - ix; 684 else 685 p0 = h - ix; 686 if (sign[iy] < 0) 687 p1 = h_inv - iy; 688 else 689 p1 = h - iy; 690 691 for (i = 0; i < L_SUBFR; i+=4) 692 { 693 vec[i] += add1((*p0++), (*p1++)); 694 vec[i+1] += add1((*p0++), (*p1++)); 695 vec[i+2] += add1((*p0++), (*p1++)); 696 vec[i+3] += add1((*p0++), (*p1++)); 697 } 698 } 699 /* memorise the best codevector */ 700 ps = vo_mult(ps, ps); 701 s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp)); 702 if (s > 0) 703 { 704 psk = ps; 705 alpk = alp; 706 for (i = 0; i < nb_pulse; i++) 707 { 708 codvec[i] = ind[i]; 709 } 710 for (i = 0; i < L_SUBFR; i++) 711 { 712 y[i] = vec[i]; 713 } 714 } 715 } 716 /*-------------------------------------------------------------------* 717 * Build the codeword, the filtered codeword and index of codevector.* 718 *-------------------------------------------------------------------*/ 719 for (i = 0; i < NPMAXPT * NB_TRACK; i++) 720 { 721 ind[i] = -1; 722 } 723 for (i = 0; i < L_SUBFR; i++) 724 { 725 code[i] = 0; 726 y[i] = vo_shr_r(y[i], 3); /* Q12 to Q9 */ 727 } 728 val = (512 >> h_shift); /* codeword in Q9 format */ 729 for (k = 0; k < nb_pulse; k++) 730 { 731 i = codvec[k]; /* read pulse position */ 732 j = sign[i]; /* read sign */ 733 index = i >> 2; /* index = pos of pulse (0..15) */ 734 track = (Word16) (i & 0x03); /* track = i % NB_TRACK (0..3) */ 735 736 if (j > 0) 737 { 738 code[i] += val; 739 codvec[k] += 128; 740 } else 741 { 742 code[i] -= val; 743 index += NB_POS; 744 } 745 746 i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1)); 747 748 while (ind[i] >= 0) 749 { 750 i += 1; 751 } 752 ind[i] = index; 753 } 754 755 k = 0; 756 /* Build index of codevector */ 757 if(nbbits == 20) 758 { 759 for (track = 0; track < NB_TRACK; track++) 760 { 761 _index[track] = (Word16)(quant_1p_N1(ind[k], 4)); 762 k += NPMAXPT; 763 } 764 } else if(nbbits == 36) 765 { 766 for (track = 0; track < NB_TRACK; track++) 767 { 768 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4)); 769 k += NPMAXPT; 770 } 771 } else if(nbbits == 44) 772 { 773 for (track = 0; track < NB_TRACK - 2; track++) 774 { 775 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4)); 776 k += NPMAXPT; 777 } 778 for (track = 2; track < NB_TRACK; track++) 779 { 780 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4)); 781 k += NPMAXPT; 782 } 783 } else if(nbbits == 52) 784 { 785 for (track = 0; track < NB_TRACK; track++) 786 { 787 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4)); 788 k += NPMAXPT; 789 } 790 } else if(nbbits == 64) 791 { 792 for (track = 0; track < NB_TRACK; track++) 793 { 794 L_index = quant_4p_4N(&ind[k], 4); 795 _index[track] = (Word16)((L_index >> 14) & 3); 796 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF); 797 k += NPMAXPT; 798 } 799 } else if(nbbits == 72) 800 { 801 for (track = 0; track < NB_TRACK - 2; track++) 802 { 803 L_index = quant_5p_5N(&ind[k], 4); 804 _index[track] = (Word16)((L_index >> 10) & 0x03FF); 805 _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF); 806 k += NPMAXPT; 807 } 808 for (track = 2; track < NB_TRACK; track++) 809 { 810 L_index = quant_4p_4N(&ind[k], 4); 811 _index[track] = (Word16)((L_index >> 14) & 3); 812 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF); 813 k += NPMAXPT; 814 } 815 } else if(nbbits == 88) 816 { 817 for (track = 0; track < NB_TRACK; track++) 818 { 819 L_index = quant_6p_6N_2(&ind[k], 4); 820 _index[track] = (Word16)((L_index >> 11) & 0x07FF); 821 _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF); 822 k += NPMAXPT; 823 } 824 } 825 return; 826 } 827 828 829 /*-------------------------------------------------------------------* 830 * Function cor_h_vec() * 831 * ~~~~~~~~~~~~~~~~~~~~~ * 832 * Compute correlations of h[] with vec[] for the specified track. * 833 *-------------------------------------------------------------------*/ 834 void cor_h_vec_30( 835 Word16 h[], /* (i) scaled impulse response */ 836 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 837 Word16 track, /* (i) track to use */ 838 Word16 sign[], /* (i) sign vector */ 839 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 840 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 841 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 842 ) 843 { 844 Word32 i, j, pos, corr; 845 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y; 846 Word32 L_sum1,L_sum2; 847 cor_x = cor_1; 848 cor_y = cor_2; 849 p0 = rrixix[track]; 850 p3 = rrixix[0]; 851 pos = track; 852 853 for (i = 0; i < NB_POS; i+=2) 854 { 855 L_sum1 = L_sum2 = 0L; 856 p1 = h; 857 p2 = &vec[pos]; 858 for (j=pos;j < L_SUBFR; j++) 859 { 860 L_sum1 = L_add(L_sum1, *p1 * *p2); 861 p2-=3; 862 L_sum2 = L_add(L_sum2, *p1++ * *p2); 863 p2+=4; 864 } 865 p2-=3; 866 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 867 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 868 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 869 870 L_sum1 = L_shl(L_sum1, 2); 871 L_sum2 = L_shl(L_sum2, 2); 872 873 corr = voround(L_sum1); 874 *cor_x++ = mult(corr, sign[pos]) + (*p0++); 875 corr = voround(L_sum2); 876 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++); 877 pos += STEP; 878 879 L_sum1 = L_sum2 = 0L; 880 p1 = h; 881 p2 = &vec[pos]; 882 for (j=pos;j < L_SUBFR; j++) 883 { 884 L_sum1 = L_add(L_sum1, *p1 * *p2); 885 p2-=3; 886 L_sum2 = L_add(L_sum2, *p1++ * *p2); 887 p2+=4; 888 } 889 p2-=3; 890 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 891 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 892 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 893 894 L_sum1 = L_shl(L_sum1, 2); 895 L_sum2 = L_shl(L_sum2, 2); 896 897 corr = voround(L_sum1); 898 *cor_x++ = mult(corr, sign[pos]) + (*p0++); 899 corr = voround(L_sum2); 900 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++); 901 pos += STEP; 902 } 903 return; 904 } 905 906 void cor_h_vec_012( 907 Word16 h[], /* (i) scaled impulse response */ 908 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 909 Word16 track, /* (i) track to use */ 910 Word16 sign[], /* (i) sign vector */ 911 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 912 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 913 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 914 ) 915 { 916 Word32 i, j, pos, corr; 917 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y; 918 Word32 L_sum1,L_sum2; 919 cor_x = cor_1; 920 cor_y = cor_2; 921 p0 = rrixix[track]; 922 p3 = rrixix[track+1]; 923 pos = track; 924 925 for (i = 0; i < NB_POS; i+=2) 926 { 927 L_sum1 = L_sum2 = 0L; 928 p1 = h; 929 p2 = &vec[pos]; 930 for (j=62-pos ;j >= 0; j--) 931 { 932 L_sum1 = L_add(L_sum1, *p1 * *p2++); 933 L_sum2 = L_add(L_sum2, *p1++ * *p2); 934 } 935 L_sum1 = L_add(L_sum1, *p1 * *p2); 936 L_sum1 = L_shl(L_sum1, 2); 937 L_sum2 = L_shl(L_sum2, 2); 938 939 corr = voround(L_sum1); 940 cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++); 941 corr = voround(L_sum2); 942 cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++); 943 pos += STEP; 944 945 L_sum1 = L_sum2 = 0L; 946 p1 = h; 947 p2 = &vec[pos]; 948 for (j= 62-pos;j >= 0; j--) 949 { 950 L_sum1 = L_add(L_sum1, *p1 * *p2++); 951 L_sum2 = L_add(L_sum2, *p1++ * *p2); 952 } 953 L_sum1 = L_add(L_sum1, *p1 * *p2); 954 L_sum1 = L_shl(L_sum1, 2); 955 L_sum2 = L_shl(L_sum2, 2); 956 957 corr = voround(L_sum1); 958 cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++); 959 corr = voround(L_sum2); 960 cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++); 961 pos += STEP; 962 } 963 return; 964 } 965 966 /*-------------------------------------------------------------------* 967 * Function search_ixiy() * 968 * ~~~~~~~~~~~~~~~~~~~~~~~ * 969 * Find the best positions of 2 pulses in a subframe. * 970 *-------------------------------------------------------------------*/ 971 972 void search_ixiy( 973 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */ 974 Word16 track_x, /* (i) track of pulse 1 */ 975 Word16 track_y, /* (i) track of pulse 2 */ 976 Word16 * ps, /* (i/o) correlation of all fixed pulses */ 977 Word16 * alp, /* (i/o) energy of all fixed pulses */ 978 Word16 * ix, /* (o) position of pulse 1 */ 979 Word16 * iy, /* (o) position of pulse 2 */ 980 Word16 dn[], /* (i) corr. between target and h[] */ 981 Word16 dn2[], /* (i) vector of selected positions */ 982 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */ 983 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */ 984 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */ 985 ) 986 { 987 Word32 x, y, pos, thres_ix; 988 Word16 ps1, ps2, sq, sqk; 989 Word16 alp_16, alpk; 990 Word16 *p0, *p1, *p2; 991 Word32 s, alp0, alp1, alp2; 992 993 p0 = cor_x; 994 p1 = cor_y; 995 p2 = rrixiy[track_x]; 996 997 thres_ix = nb_pos_ix - NB_MAX; 998 999 alp0 = L_deposit_h(*alp); 1000 alp0 = (alp0 + 0x00008000L); /* for rounding */ 1001 1002 sqk = -1; 1003 alpk = 1; 1004 1005 for (x = track_x; x < L_SUBFR; x += STEP) 1006 { 1007 ps1 = *ps + dn[x]; 1008 alp1 = L_add(alp0, ((*p0++)<<13)); 1009 1010 if (dn2[x] < thres_ix) 1011 { 1012 pos = -1; 1013 for (y = track_y; y < L_SUBFR; y += STEP) 1014 { 1015 ps2 = add1(ps1, dn[y]); 1016 1017 alp2 = L_add(alp1, ((*p1++)<<13)); 1018 alp2 = L_add(alp2, ((*p2++)<<14)); 1019 alp_16 = extract_h(alp2); 1020 sq = vo_mult(ps2, ps2); 1021 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16)); 1022 1023 if (s > 0) 1024 { 1025 sqk = sq; 1026 alpk = alp_16; 1027 pos = y; 1028 } 1029 } 1030 p1 -= NB_POS; 1031 1032 if (pos >= 0) 1033 { 1034 *ix = x; 1035 *iy = pos; 1036 } 1037 } else 1038 { 1039 p2 += NB_POS; 1040 } 1041 } 1042 1043 *ps = add1(*ps, add1(dn[*ix], dn[*iy])); 1044 *alp = alpk; 1045 1046 return; 1047 } 1048 1049 1050 1051 1052