1 /* 2 ** Copyright 2003-2010, VisualOn, Inc. 3 ** 4 ** Licensed under the Apache License, Version 2.0 (the "License"); 5 ** you may not use this file except in compliance with the License. 6 ** You may obtain a copy of the License at 7 ** 8 ** http://www.apache.org/licenses/LICENSE-2.0 9 ** 10 ** Unless required by applicable law or agreed to in writing, software 11 ** distributed under the License is distributed on an "AS IS" BASIS, 12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ** See the License for the specific language governing permissions and 14 ** limitations under the License. 15 */ 16 17 /*********************************************************************** 18 * File: c4t64fx.c * 19 * * 20 * Description:Performs algebraic codebook search for higher modes * 21 * * 22 ************************************************************************/ 23 24 /************************************************************************ 25 * Function: ACELP_4t64_fx() * 26 * * 27 * 20, 36, 44, 52, 64, 72, 88 bits algebraic codebook. * 28 * 4 tracks x 16 positions per track = 64 samples. * 29 * * 30 * 20 bits --> 4 pulses in a frame of 64 samples. * 31 * 36 bits --> 8 pulses in a frame of 64 samples. * 32 * 44 bits --> 10 pulses in a frame of 64 samples. * 33 * 52 bits --> 12 pulses in a frame of 64 samples. * 34 * 64 bits --> 16 pulses in a frame of 64 samples. * 35 * 72 bits --> 18 pulses in a frame of 64 samples. * 36 * 88 bits --> 24 pulses in a frame of 64 samples. * 37 * * 38 * All pulses can have two (2) possible amplitudes: +1 or -1. * 39 * Each pulse can have sixteen (16) possible positions. * 40 *************************************************************************/ 41 42 #include "typedef.h" 43 #include "basic_op.h" 44 #include "math_op.h" 45 #include "acelp.h" 46 #include "cnst.h" 47 48 #include "q_pulse.h" 49 50 #undef LOG_TAG 51 #define LOG_TAG "amrwbenc" 52 #include "log/log.h" 53 54 static Word16 tipos[36] = { 55 0, 1, 2, 3, /* starting point &ipos[0], 1st iter */ 56 1, 2, 3, 0, /* starting point &ipos[4], 2nd iter */ 57 2, 3, 0, 1, /* starting point &ipos[8], 3rd iter */ 58 3, 0, 1, 2, /* starting point &ipos[12], 4th iter */ 59 0, 1, 2, 3, 60 1, 2, 3, 0, 61 2, 3, 0, 1, 62 3, 0, 1, 2, 63 0, 1, 2, 3}; /* end point for 24 pulses &ipos[35], 4th iter */ 64 65 #define NB_PULSE_MAX 24 66 67 #define L_SUBFR 64 68 #define NB_TRACK 4 69 #define STEP 4 70 #define NB_POS 16 71 #define MSIZE 256 72 #define NB_MAX 8 73 #define NPMAXPT ((NB_PULSE_MAX+NB_TRACK-1)/NB_TRACK) 74 75 /* Private functions */ 76 void cor_h_vec_012( 77 Word16 h[], /* (i) scaled impulse response */ 78 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 79 Word16 track, /* (i) track to use */ 80 Word16 sign[], /* (i) sign vector */ 81 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 82 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 83 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 84 ); 85 86 void cor_h_vec_012_asm( 87 Word16 h[], /* (i) scaled impulse response */ 88 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 89 Word16 track, /* (i) track to use */ 90 Word16 sign[], /* (i) sign vector */ 91 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 92 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 93 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 94 ); 95 96 void cor_h_vec_30( 97 Word16 h[], /* (i) scaled impulse response */ 98 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 99 Word16 track, /* (i) track to use */ 100 Word16 sign[], /* (i) sign vector */ 101 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 102 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 103 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 104 ); 105 106 void search_ixiy( 107 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */ 108 Word16 track_x, /* (i) track of pulse 1 */ 109 Word16 track_y, /* (i) track of pulse 2 */ 110 Word16 * ps, /* (i/o) correlation of all fixed pulses */ 111 Word16 * alp, /* (i/o) energy of all fixed pulses */ 112 Word16 * ix, /* (o) position of pulse 1 */ 113 Word16 * iy, /* (o) position of pulse 2 */ 114 Word16 dn[], /* (i) corr. between target and h[] */ 115 Word16 dn2[], /* (i) vector of selected positions */ 116 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */ 117 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */ 118 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */ 119 ); 120 121 122 void ACELP_4t64_fx( 123 Word16 dn[], /* (i) <12b : correlation between target x[] and H[] */ 124 Word16 cn[], /* (i) <12b : residual after long term prediction */ 125 Word16 H[], /* (i) Q12: impulse response of weighted synthesis filter */ 126 Word16 code[], /* (o) Q9 : algebraic (fixed) codebook excitation */ 127 Word16 y[], /* (o) Q9 : filtered fixed codebook excitation */ 128 Word16 nbbits, /* (i) : 20, 36, 44, 52, 64, 72 or 88 bits */ 129 Word16 ser_size, /* (i) : bit rate */ 130 Word16 _index[] /* (o) : index (20): 5+5+5+5 = 20 bits. */ 131 /* (o) : index (36): 9+9+9+9 = 36 bits. */ 132 /* (o) : index (44): 13+9+13+9 = 44 bits. */ 133 /* (o) : index (52): 13+13+13+13 = 52 bits. */ 134 /* (o) : index (64): 2+2+2+2+14+14+14+14 = 64 bits. */ 135 /* (o) : index (72): 10+2+10+2+10+14+10+14 = 72 bits. */ 136 /* (o) : index (88): 11+11+11+11+11+11+11+11 = 88 bits. */ 137 ) 138 { 139 Word32 i, j, k; 140 Word16 st, ix, iy, pos, index, track, nb_pulse, nbiter, j_temp; 141 Word16 psk, ps, alpk, alp, val, k_cn, k_dn, exp; 142 Word16 *p0, *p1, *p2, *p3, *psign; 143 Word16 *h, *h_inv, *ptr_h1, *ptr_h2, *ptr_hf, h_shift; 144 Word32 s, cor, L_tmp, L_index; 145 Word16 dn2[L_SUBFR], sign[L_SUBFR], vec[L_SUBFR]; 146 Word16 ind[NPMAXPT * NB_TRACK]; 147 Word16 codvec[NB_PULSE_MAX], nbpos[10]; 148 Word16 cor_x[NB_POS], cor_y[NB_POS], pos_max[NB_TRACK]; 149 Word16 h_buf[4 * L_SUBFR]; 150 Word16 rrixix[NB_TRACK][NB_POS], rrixiy[NB_TRACK][MSIZE]; 151 Word16 ipos[NB_PULSE_MAX]; 152 153 switch (nbbits) 154 { 155 case 20: /* 20 bits, 4 pulses, 4 tracks */ 156 nbiter = 4; /* 4x16x16=1024 loop */ 157 alp = 8192; /* alp = 2.0 (Q12) */ 158 nb_pulse = 4; 159 nbpos[0] = 4; 160 nbpos[1] = 8; 161 break; 162 case 36: /* 36 bits, 8 pulses, 4 tracks */ 163 nbiter = 4; /* 4x20x16=1280 loop */ 164 alp = 4096; /* alp = 1.0 (Q12) */ 165 nb_pulse = 8; 166 nbpos[0] = 4; 167 nbpos[1] = 8; 168 nbpos[2] = 8; 169 break; 170 case 44: /* 44 bits, 10 pulses, 4 tracks */ 171 nbiter = 4; /* 4x26x16=1664 loop */ 172 alp = 4096; /* alp = 1.0 (Q12) */ 173 nb_pulse = 10; 174 nbpos[0] = 4; 175 nbpos[1] = 6; 176 nbpos[2] = 8; 177 nbpos[3] = 8; 178 break; 179 case 52: /* 52 bits, 12 pulses, 4 tracks */ 180 nbiter = 4; /* 4x26x16=1664 loop */ 181 alp = 4096; /* alp = 1.0 (Q12) */ 182 nb_pulse = 12; 183 nbpos[0] = 4; 184 nbpos[1] = 6; 185 nbpos[2] = 8; 186 nbpos[3] = 8; 187 break; 188 case 64: /* 64 bits, 16 pulses, 4 tracks */ 189 nbiter = 3; /* 3x36x16=1728 loop */ 190 alp = 3277; /* alp = 0.8 (Q12) */ 191 nb_pulse = 16; 192 nbpos[0] = 4; 193 nbpos[1] = 4; 194 nbpos[2] = 6; 195 nbpos[3] = 6; 196 nbpos[4] = 8; 197 nbpos[5] = 8; 198 break; 199 case 72: /* 72 bits, 18 pulses, 4 tracks */ 200 nbiter = 3; /* 3x35x16=1680 loop */ 201 alp = 3072; /* alp = 0.75 (Q12) */ 202 nb_pulse = 18; 203 nbpos[0] = 2; 204 nbpos[1] = 3; 205 nbpos[2] = 4; 206 nbpos[3] = 5; 207 nbpos[4] = 6; 208 nbpos[5] = 7; 209 nbpos[6] = 8; 210 break; 211 case 88: /* 88 bits, 24 pulses, 4 tracks */ 212 if(ser_size > 462) 213 nbiter = 1; 214 else 215 nbiter = 2; /* 2x53x16=1696 loop */ 216 217 alp = 2048; /* alp = 0.5 (Q12) */ 218 nb_pulse = 24; 219 nbpos[0] = 2; 220 nbpos[1] = 2; 221 nbpos[2] = 3; 222 nbpos[3] = 4; 223 nbpos[4] = 5; 224 nbpos[5] = 6; 225 nbpos[6] = 7; 226 nbpos[7] = 8; 227 nbpos[8] = 8; 228 nbpos[9] = 8; 229 break; 230 default: 231 nbiter = 0; 232 alp = 0; 233 nb_pulse = 0; 234 } 235 236 for (i = 0; i < nb_pulse; i++) 237 { 238 codvec[i] = i; 239 } 240 241 /*----------------------------------------------------------------* 242 * Find sign for each pulse position. * 243 *----------------------------------------------------------------*/ 244 /* calculate energy for normalization of cn[] and dn[] */ 245 /* set k_cn = 32..32767 (ener_cn = 2^30..256-0) */ 246 #ifdef ASM_OPT /* asm optimization branch */ 247 s = Dot_product12_asm(cn, cn, L_SUBFR, &exp); 248 #else 249 s = Dot_product12(cn, cn, L_SUBFR, &exp); 250 #endif 251 252 Isqrt_n(&s, &exp); 253 s = L_shl(s, (exp + 5)); 254 k_cn = extract_h(L_add(s, 0x8000)); 255 256 /* set k_dn = 32..512 (ener_dn = 2^30..2^22) */ 257 #ifdef ASM_OPT /* asm optimization branch */ 258 s = Dot_product12_asm(dn, dn, L_SUBFR, &exp); 259 #else 260 s = Dot_product12(dn, dn, L_SUBFR, &exp); 261 #endif 262 263 Isqrt_n(&s, &exp); 264 k_dn = voround(L_shl(s, (exp + 5 + 3))); /* k_dn = 256..4096 */ 265 k_dn = vo_mult_r(alp, k_dn); /* alp in Q12 */ 266 267 /* mix normalized cn[] and dn[] */ 268 p0 = cn; 269 p1 = dn; 270 p2 = dn2; 271 272 for (i = 0; i < L_SUBFR/4; i++) 273 { 274 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 275 *p2++ = s >> 7; 276 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 277 *p2++ = s >> 7; 278 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 279 *p2++ = s >> 7; 280 s = L_add((k_cn* (*p0++)), (k_dn * (*p1++))); 281 *p2++ = s >> 7; 282 } 283 284 /* set sign according to dn2[] = k_cn*cn[] + k_dn*dn[] */ 285 for(i = 0; i < L_SUBFR; i++) 286 { 287 val = dn[i]; 288 ps = dn2[i]; 289 if (ps >= 0) 290 { 291 sign[i] = 32767; /* sign = +1 (Q12) */ 292 vec[i] = -32768; 293 } else 294 { 295 sign[i] = -32768; /* sign = -1 (Q12) */ 296 vec[i] = 32767; 297 dn[i] = -val; 298 dn2[i] = -ps; 299 } 300 } 301 /*----------------------------------------------------------------* 302 * Select NB_MAX position per track according to max of dn2[]. * 303 *----------------------------------------------------------------*/ 304 pos = 0; 305 for (i = 0; i < NB_TRACK; i++) 306 { 307 for (k = 0; k < NB_MAX; k++) 308 { 309 ps = -1; 310 for (j = i; j < L_SUBFR; j += STEP) 311 { 312 if(dn2[j] > ps) 313 { 314 ps = dn2[j]; 315 pos = j; 316 } 317 } 318 dn2[pos] = (k - NB_MAX); /* dn2 < 0 when position is selected */ 319 if (k == 0) 320 { 321 pos_max[i] = pos; 322 } 323 } 324 } 325 326 /*--------------------------------------------------------------* 327 * Scale h[] to avoid overflow and to get maximum of precision * 328 * on correlation. * 329 * * 330 * Maximum of h[] (h[0]) is fixed to 2048 (MAX16 / 16). * 331 * ==> This allow addition of 16 pulses without saturation. * 332 * * 333 * Energy worst case (on resonant impulse response), * 334 * - energy of h[] is approximately MAX/16. * 335 * - During search, the energy is divided by 8 to avoid * 336 * overflow on "alp". (energy of h[] = MAX/128). * 337 * ==> "alp" worst case detected is 22854 on sinusoidal wave. * 338 *--------------------------------------------------------------*/ 339 340 /* impulse response buffer for fast computation */ 341 342 h = h_buf; 343 h_inv = h_buf + (2 * L_SUBFR); 344 L_tmp = 0; 345 for (i = 0; i < L_SUBFR; i++) 346 { 347 *h++ = 0; 348 *h_inv++ = 0; 349 L_tmp = L_add(L_tmp, (H[i] * H[i]) << 1); 350 } 351 /* scale h[] down (/2) when energy of h[] is high with many pulses used */ 352 val = extract_h(L_tmp); 353 h_shift = 0; 354 355 if ((nb_pulse >= 12) && (val > 1024)) 356 { 357 h_shift = 1; 358 } 359 p0 = H; 360 p1 = h; 361 p2 = h_inv; 362 363 for (i = 0; i < L_SUBFR/4; i++) 364 { 365 *p1 = *p0++ >> h_shift; 366 *p2++ = -(*p1++); 367 *p1 = *p0++ >> h_shift; 368 *p2++ = -(*p1++); 369 *p1 = *p0++ >> h_shift; 370 *p2++ = -(*p1++); 371 *p1 = *p0++ >> h_shift; 372 *p2++ = -(*p1++); 373 } 374 375 /*------------------------------------------------------------* 376 * Compute rrixix[][] needed for the codebook search. * 377 * This algorithm compute impulse response energy of all * 378 * positions (16) in each track (4). Total = 4x16 = 64. * 379 *------------------------------------------------------------*/ 380 381 /* storage order --> i3i3, i2i2, i1i1, i0i0 */ 382 383 /* Init pointers to last position of rrixix[] */ 384 p0 = &rrixix[0][NB_POS - 1]; 385 p1 = &rrixix[1][NB_POS - 1]; 386 p2 = &rrixix[2][NB_POS - 1]; 387 p3 = &rrixix[3][NB_POS - 1]; 388 389 ptr_h1 = h; 390 cor = 0x00008000L; /* for rounding */ 391 for (i = 0; i < NB_POS; i++) 392 { 393 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 394 ptr_h1++; 395 *p3-- = extract_h(cor); 396 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 397 ptr_h1++; 398 *p2-- = extract_h(cor); 399 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 400 ptr_h1++; 401 *p1-- = extract_h(cor); 402 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h1))); 403 ptr_h1++; 404 *p0-- = extract_h(cor); 405 } 406 407 /*------------------------------------------------------------* 408 * Compute rrixiy[][] needed for the codebook search. * 409 * This algorithm compute correlation between 2 pulses * 410 * (2 impulses responses) in 4 possible adjacents tracks. * 411 * (track 0-1, 1-2, 2-3 and 3-0). Total = 4x16x16 = 1024. * 412 *------------------------------------------------------------*/ 413 414 /* storage order --> i2i3, i1i2, i0i1, i3i0 */ 415 416 pos = MSIZE - 1; 417 ptr_hf = h + 1; 418 419 for (k = 0; k < NB_POS; k++) 420 { 421 p3 = &rrixiy[2][pos]; 422 p2 = &rrixiy[1][pos]; 423 p1 = &rrixiy[0][pos]; 424 p0 = &rrixiy[3][pos - NB_POS]; 425 426 cor = 0x00008000L; /* for rounding */ 427 ptr_h1 = h; 428 ptr_h2 = ptr_hf; 429 430 for (i = k + 1; i < NB_POS; i++) 431 { 432 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 433 ptr_h1++; 434 ptr_h2++; 435 *p3 = extract_h(cor); 436 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 437 ptr_h1++; 438 ptr_h2++; 439 *p2 = extract_h(cor); 440 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 441 ptr_h1++; 442 ptr_h2++; 443 *p1 = extract_h(cor); 444 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 445 ptr_h1++; 446 ptr_h2++; 447 *p0 = extract_h(cor); 448 449 p3 -= (NB_POS + 1); 450 p2 -= (NB_POS + 1); 451 p1 -= (NB_POS + 1); 452 p0 -= (NB_POS + 1); 453 } 454 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 455 ptr_h1++; 456 ptr_h2++; 457 *p3 = extract_h(cor); 458 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 459 ptr_h1++; 460 ptr_h2++; 461 *p2 = extract_h(cor); 462 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 463 ptr_h1++; 464 ptr_h2++; 465 *p1 = extract_h(cor); 466 467 pos -= NB_POS; 468 ptr_hf += STEP; 469 } 470 471 /* storage order --> i3i0, i2i3, i1i2, i0i1 */ 472 473 pos = MSIZE - 1; 474 ptr_hf = h + 3; 475 476 for (k = 0; k < NB_POS; k++) 477 { 478 p3 = &rrixiy[3][pos]; 479 p2 = &rrixiy[2][pos - 1]; 480 p1 = &rrixiy[1][pos - 1]; 481 p0 = &rrixiy[0][pos - 1]; 482 483 cor = 0x00008000L; /* for rounding */ 484 ptr_h1 = h; 485 ptr_h2 = ptr_hf; 486 487 for (i = k + 1; i < NB_POS; i++) 488 { 489 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 490 ptr_h1++; 491 ptr_h2++; 492 *p3 = extract_h(cor); 493 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 494 ptr_h1++; 495 ptr_h2++; 496 *p2 = extract_h(cor); 497 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 498 ptr_h1++; 499 ptr_h2++; 500 *p1 = extract_h(cor); 501 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 502 ptr_h1++; 503 ptr_h2++; 504 *p0 = extract_h(cor); 505 506 p3 -= (NB_POS + 1); 507 p2 -= (NB_POS + 1); 508 p1 -= (NB_POS + 1); 509 p0 -= (NB_POS + 1); 510 } 511 cor = L_add(cor, vo_L_mult((*ptr_h1), (*ptr_h2))); 512 ptr_h1++; 513 ptr_h2++; 514 *p3 = extract_h(cor); 515 516 pos--; 517 ptr_hf += STEP; 518 } 519 520 /*------------------------------------------------------------* 521 * Modification of rrixiy[][] to take signs into account. * 522 *------------------------------------------------------------*/ 523 524 p0 = &rrixiy[0][0]; 525 526 for (k = 0; k < NB_TRACK; k++) 527 { 528 j_temp = (k + 1)&0x03; 529 for (i = k; i < L_SUBFR; i += STEP) 530 { 531 psign = sign; 532 if (psign[i] < 0) 533 { 534 psign = vec; 535 } 536 j = j_temp; 537 for (; j < L_SUBFR; j += STEP) 538 { 539 *p0 = vo_mult(*p0, psign[j]); 540 p0++; 541 } 542 } 543 } 544 545 /*-------------------------------------------------------------------* 546 * Deep first search * 547 *-------------------------------------------------------------------*/ 548 549 psk = -1; 550 alpk = 1; 551 552 for (k = 0; k < nbiter; k++) 553 { 554 j_temp = k<<2; 555 for (i = 0; i < nb_pulse; i++) 556 ipos[i] = tipos[j_temp + i]; 557 558 if(nbbits == 20) 559 { 560 pos = 0; 561 ps = 0; 562 alp = 0; 563 for (i = 0; i < L_SUBFR; i++) 564 { 565 vec[i] = 0; 566 } 567 } else if ((nbbits == 36) || (nbbits == 44)) 568 { 569 /* first stage: fix 2 pulses */ 570 pos = 2; 571 572 ix = ind[0] = pos_max[ipos[0]]; 573 iy = ind[1] = pos_max[ipos[1]]; 574 ps = dn[ix] + dn[iy]; 575 i = ix >> 2; /* ix / STEP */ 576 j = iy >> 2; /* iy / STEP */ 577 s = rrixix[ipos[0]][i] << 13; 578 s += rrixix[ipos[1]][j] << 13; 579 i = (i << 4) + j; /* (ix/STEP)*NB_POS + (iy/STEP) */ 580 s += rrixiy[ipos[0]][i] << 14; 581 alp = (s + 0x8000) >> 16; 582 if (sign[ix] < 0) 583 p0 = h_inv - ix; 584 else 585 p0 = h - ix; 586 if (sign[iy] < 0) 587 p1 = h_inv - iy; 588 else 589 p1 = h - iy; 590 591 for (i = 0; i < L_SUBFR; i++) 592 { 593 vec[i] = (*p0++) + (*p1++); 594 } 595 596 if(nbbits == 44) 597 { 598 ipos[8] = 0; 599 ipos[9] = 1; 600 } 601 } else 602 { 603 /* first stage: fix 4 pulses */ 604 pos = 4; 605 606 ix = ind[0] = pos_max[ipos[0]]; 607 iy = ind[1] = pos_max[ipos[1]]; 608 i = ind[2] = pos_max[ipos[2]]; 609 j = ind[3] = pos_max[ipos[3]]; 610 ps = add1(add1(add1(dn[ix], dn[iy]), dn[i]), dn[j]); 611 612 if (sign[ix] < 0) 613 p0 = h_inv - ix; 614 else 615 p0 = h - ix; 616 617 if (sign[iy] < 0) 618 p1 = h_inv - iy; 619 else 620 p1 = h - iy; 621 622 if (sign[i] < 0) 623 p2 = h_inv - i; 624 else 625 p2 = h - i; 626 627 if (sign[j] < 0) 628 p3 = h_inv - j; 629 else 630 p3 = h - j; 631 632 L_tmp = 0L; 633 for(i = 0; i < L_SUBFR; i++) 634 { 635 Word32 vecSq2; 636 vec[i] = add1(add1(add1(*p0++, *p1++), *p2++), *p3++); 637 vecSq2 = (vec[i] * vec[i]) << 1; 638 if (vecSq2 > 0 && L_tmp > INT_MAX - vecSq2) { 639 L_tmp = INT_MAX; 640 } else if (vecSq2 < 0 && L_tmp < INT_MIN - vecSq2) { 641 L_tmp = INT_MIN; 642 } else { 643 L_tmp += vecSq2; 644 } 645 } 646 647 alp = ((L_tmp >> 3) + 0x8000) >> 16; 648 649 if(nbbits == 72) 650 { 651 ipos[16] = 0; 652 ipos[17] = 1; 653 } 654 } 655 656 /* other stages of 2 pulses */ 657 658 for (j = pos, st = 0; j < nb_pulse; j += 2, st++) 659 { 660 /*--------------------------------------------------* 661 * Calculate correlation of all possible positions * 662 * of the next 2 pulses with previous fixed pulses. * 663 * Each pulse can have 16 possible positions. * 664 *--------------------------------------------------*/ 665 if(ipos[j] == 3) 666 { 667 cor_h_vec_30(h, vec, ipos[j], sign, rrixix, cor_x, cor_y); 668 } 669 else 670 { 671 #ifdef ASM_OPT /* asm optimization branch */ 672 cor_h_vec_012_asm(h, vec, ipos[j], sign, rrixix, cor_x, cor_y); 673 #else 674 cor_h_vec_012(h, vec, ipos[j], sign, rrixix, cor_x, cor_y); 675 #endif 676 } 677 /*--------------------------------------------------* 678 * Find best positions of 2 pulses. * 679 *--------------------------------------------------*/ 680 search_ixiy(nbpos[st], ipos[j], ipos[j + 1], &ps, &alp, 681 &ix, &iy, dn, dn2, cor_x, cor_y, rrixiy); 682 683 ind[j] = ix; 684 ind[j + 1] = iy; 685 686 if (sign[ix] < 0) 687 p0 = h_inv - ix; 688 else 689 p0 = h - ix; 690 if (sign[iy] < 0) 691 p1 = h_inv - iy; 692 else 693 p1 = h - iy; 694 695 for (i = 0; i < L_SUBFR; i+=4) 696 { 697 vec[i] += add1((*p0++), (*p1++)); 698 vec[i+1] += add1((*p0++), (*p1++)); 699 vec[i+2] += add1((*p0++), (*p1++)); 700 vec[i+3] += add1((*p0++), (*p1++)); 701 } 702 } 703 /* memorise the best codevector */ 704 ps = vo_mult(ps, ps); 705 s = L_sub(vo_L_mult(alpk, ps), vo_L_mult(psk, alp)); 706 if (s > 0) 707 { 708 psk = ps; 709 alpk = alp; 710 for (i = 0; i < nb_pulse; i++) 711 { 712 codvec[i] = ind[i]; 713 } 714 for (i = 0; i < L_SUBFR; i++) 715 { 716 y[i] = vec[i]; 717 } 718 } 719 } 720 /*-------------------------------------------------------------------* 721 * Build the codeword, the filtered codeword and index of codevector.* 722 *-------------------------------------------------------------------*/ 723 for (i = 0; i < NPMAXPT * NB_TRACK; i++) 724 { 725 ind[i] = -1; 726 } 727 for (i = 0; i < L_SUBFR; i++) 728 { 729 code[i] = 0; 730 y[i] = vo_shr_r(y[i], 3); /* Q12 to Q9 */ 731 } 732 val = (512 >> h_shift); /* codeword in Q9 format */ 733 for (k = 0; k < nb_pulse; k++) 734 { 735 i = codvec[k]; /* read pulse position */ 736 j = sign[i]; /* read sign */ 737 index = i >> 2; /* index = pos of pulse (0..15) */ 738 track = (Word16) (i & 0x03); /* track = i % NB_TRACK (0..3) */ 739 740 if (j > 0) 741 { 742 code[i] += val; 743 codvec[k] += 128; 744 } else 745 { 746 code[i] -= val; 747 index += NB_POS; 748 } 749 750 i = (Word16)((vo_L_mult(track, NPMAXPT) >> 1)); 751 752 while (i < NPMAXPT * NB_TRACK && ind[i] >= 0) 753 { 754 i += 1; 755 } 756 if (i < NPMAXPT * NB_TRACK) { 757 ind[i] = index; 758 } else { 759 ALOGE("b/132647222, OOB access in ind array track=%d i=%d", track, i); 760 android_errorWriteLog(0x534e4554, "132647222"); 761 } 762 } 763 764 k = 0; 765 /* Build index of codevector */ 766 if(nbbits == 20) 767 { 768 for (track = 0; track < NB_TRACK; track++) 769 { 770 _index[track] = (Word16)(quant_1p_N1(ind[k], 4)); 771 k += NPMAXPT; 772 } 773 } else if(nbbits == 36) 774 { 775 for (track = 0; track < NB_TRACK; track++) 776 { 777 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4)); 778 k += NPMAXPT; 779 } 780 } else if(nbbits == 44) 781 { 782 for (track = 0; track < NB_TRACK - 2; track++) 783 { 784 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4)); 785 k += NPMAXPT; 786 } 787 for (track = 2; track < NB_TRACK; track++) 788 { 789 _index[track] = (Word16)(quant_2p_2N1(ind[k], ind[k + 1], 4)); 790 k += NPMAXPT; 791 } 792 } else if(nbbits == 52) 793 { 794 for (track = 0; track < NB_TRACK; track++) 795 { 796 _index[track] = (Word16)(quant_3p_3N1(ind[k], ind[k + 1], ind[k + 2], 4)); 797 k += NPMAXPT; 798 } 799 } else if(nbbits == 64) 800 { 801 for (track = 0; track < NB_TRACK; track++) 802 { 803 L_index = quant_4p_4N(&ind[k], 4); 804 _index[track] = (Word16)((L_index >> 14) & 3); 805 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF); 806 k += NPMAXPT; 807 } 808 } else if(nbbits == 72) 809 { 810 for (track = 0; track < NB_TRACK - 2; track++) 811 { 812 L_index = quant_5p_5N(&ind[k], 4); 813 _index[track] = (Word16)((L_index >> 10) & 0x03FF); 814 _index[track + NB_TRACK] = (Word16)(L_index & 0x03FF); 815 k += NPMAXPT; 816 } 817 for (track = 2; track < NB_TRACK; track++) 818 { 819 L_index = quant_4p_4N(&ind[k], 4); 820 _index[track] = (Word16)((L_index >> 14) & 3); 821 _index[track + NB_TRACK] = (Word16)(L_index & 0x3FFF); 822 k += NPMAXPT; 823 } 824 } else if(nbbits == 88) 825 { 826 for (track = 0; track < NB_TRACK; track++) 827 { 828 L_index = quant_6p_6N_2(&ind[k], 4); 829 _index[track] = (Word16)((L_index >> 11) & 0x07FF); 830 _index[track + NB_TRACK] = (Word16)(L_index & 0x07FF); 831 k += NPMAXPT; 832 } 833 } 834 return; 835 } 836 837 838 /*-------------------------------------------------------------------* 839 * Function cor_h_vec() * 840 * ~~~~~~~~~~~~~~~~~~~~~ * 841 * Compute correlations of h[] with vec[] for the specified track. * 842 *-------------------------------------------------------------------*/ 843 void cor_h_vec_30( 844 Word16 h[], /* (i) scaled impulse response */ 845 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 846 Word16 track, /* (i) track to use */ 847 Word16 sign[], /* (i) sign vector */ 848 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 849 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 850 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 851 ) 852 { 853 Word32 i, j, pos, corr; 854 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y; 855 Word32 L_sum1,L_sum2; 856 cor_x = cor_1; 857 cor_y = cor_2; 858 p0 = rrixix[track]; 859 p3 = rrixix[0]; 860 pos = track; 861 862 for (i = 0; i < NB_POS; i+=2) 863 { 864 L_sum1 = L_sum2 = 0L; 865 p1 = h; 866 p2 = &vec[pos]; 867 for (j=pos;j < L_SUBFR; j++) 868 { 869 L_sum1 = L_add(L_sum1, *p1 * *p2); 870 p2-=3; 871 L_sum2 = L_add(L_sum2, *p1++ * *p2); 872 p2+=4; 873 } 874 p2-=3; 875 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 876 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 877 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 878 879 L_sum1 = L_shl(L_sum1, 2); 880 L_sum2 = L_shl(L_sum2, 2); 881 882 corr = voround(L_sum1); 883 *cor_x++ = mult(corr, sign[pos]) + (*p0++); 884 corr = voround(L_sum2); 885 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++); 886 pos += STEP; 887 888 L_sum1 = L_sum2 = 0L; 889 p1 = h; 890 p2 = &vec[pos]; 891 for (j=pos;j < L_SUBFR; j++) 892 { 893 L_sum1 = L_add(L_sum1, *p1 * *p2); 894 p2-=3; 895 L_sum2 = L_add(L_sum2, *p1++ * *p2); 896 p2+=4; 897 } 898 p2-=3; 899 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 900 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 901 L_sum2 = L_add(L_sum2, *p1++ * *p2++); 902 903 L_sum1 = L_shl(L_sum1, 2); 904 L_sum2 = L_shl(L_sum2, 2); 905 906 corr = voround(L_sum1); 907 *cor_x++ = mult(corr, sign[pos]) + (*p0++); 908 corr = voround(L_sum2); 909 *cor_y++ = mult(corr, sign[pos-3]) + (*p3++); 910 pos += STEP; 911 } 912 return; 913 } 914 915 void cor_h_vec_012( 916 Word16 h[], /* (i) scaled impulse response */ 917 Word16 vec[], /* (i) scaled vector (/8) to correlate with h[] */ 918 Word16 track, /* (i) track to use */ 919 Word16 sign[], /* (i) sign vector */ 920 Word16 rrixix[][NB_POS], /* (i) correlation of h[x] with h[x] */ 921 Word16 cor_1[], /* (o) result of correlation (NB_POS elements) */ 922 Word16 cor_2[] /* (o) result of correlation (NB_POS elements) */ 923 ) 924 { 925 Word32 i, j, pos, corr; 926 Word16 *p0, *p1, *p2,*p3,*cor_x,*cor_y; 927 Word32 L_sum1,L_sum2; 928 cor_x = cor_1; 929 cor_y = cor_2; 930 p0 = rrixix[track]; 931 p3 = rrixix[track+1]; 932 pos = track; 933 934 for (i = 0; i < NB_POS; i+=2) 935 { 936 L_sum1 = L_sum2 = 0L; 937 p1 = h; 938 p2 = &vec[pos]; 939 for (j=62-pos ;j >= 0; j--) 940 { 941 L_sum1 = L_add(L_sum1, *p1 * *p2++); 942 L_sum2 = L_add(L_sum2, *p1++ * *p2); 943 } 944 L_sum1 = L_add(L_sum1, *p1 * *p2); 945 L_sum1 = L_shl(L_sum1, 2); 946 L_sum2 = L_shl(L_sum2, 2); 947 948 corr = voround(L_sum1); 949 cor_x[i] = vo_mult(corr, sign[pos]) + (*p0++); 950 corr = voround(L_sum2); 951 cor_y[i] = vo_mult(corr, sign[pos + 1]) + (*p3++); 952 pos += STEP; 953 954 L_sum1 = L_sum2 = 0L; 955 p1 = h; 956 p2 = &vec[pos]; 957 for (j= 62-pos;j >= 0; j--) 958 { 959 L_sum1 = L_add(L_sum1, *p1 * *p2++); 960 L_sum2 = L_add(L_sum2, *p1++ * *p2); 961 } 962 L_sum1 = L_add(L_sum1, *p1 * *p2); 963 L_sum1 = L_shl(L_sum1, 2); 964 L_sum2 = L_shl(L_sum2, 2); 965 966 corr = voround(L_sum1); 967 cor_x[i+1] = vo_mult(corr, sign[pos]) + (*p0++); 968 corr = voround(L_sum2); 969 cor_y[i+1] = vo_mult(corr, sign[pos + 1]) + (*p3++); 970 pos += STEP; 971 } 972 return; 973 } 974 975 /*-------------------------------------------------------------------* 976 * Function search_ixiy() * 977 * ~~~~~~~~~~~~~~~~~~~~~~~ * 978 * Find the best positions of 2 pulses in a subframe. * 979 *-------------------------------------------------------------------*/ 980 981 void search_ixiy( 982 Word16 nb_pos_ix, /* (i) nb of pos for pulse 1 (1..8) */ 983 Word16 track_x, /* (i) track of pulse 1 */ 984 Word16 track_y, /* (i) track of pulse 2 */ 985 Word16 * ps, /* (i/o) correlation of all fixed pulses */ 986 Word16 * alp, /* (i/o) energy of all fixed pulses */ 987 Word16 * ix, /* (o) position of pulse 1 */ 988 Word16 * iy, /* (o) position of pulse 2 */ 989 Word16 dn[], /* (i) corr. between target and h[] */ 990 Word16 dn2[], /* (i) vector of selected positions */ 991 Word16 cor_x[], /* (i) corr. of pulse 1 with fixed pulses */ 992 Word16 cor_y[], /* (i) corr. of pulse 2 with fixed pulses */ 993 Word16 rrixiy[][MSIZE] /* (i) corr. of pulse 1 with pulse 2 */ 994 ) 995 { 996 Word32 x, y, pos, thres_ix; 997 Word16 ps1, ps2, sq, sqk; 998 Word16 alp_16, alpk; 999 Word16 *p0, *p1, *p2; 1000 Word32 s, alp0, alp1, alp2; 1001 1002 p0 = cor_x; 1003 p1 = cor_y; 1004 p2 = rrixiy[track_x]; 1005 1006 thres_ix = nb_pos_ix - NB_MAX; 1007 1008 alp0 = L_deposit_h(*alp); 1009 alp0 = (alp0 + 0x00008000L); /* for rounding */ 1010 1011 sqk = -1; 1012 alpk = 1; 1013 1014 for (x = track_x; x < L_SUBFR; x += STEP) 1015 { 1016 ps1 = *ps + dn[x]; 1017 alp1 = L_add(alp0, ((*p0++)<<13)); 1018 1019 if (dn2[x] < thres_ix) 1020 { 1021 pos = -1; 1022 for (y = track_y; y < L_SUBFR; y += STEP) 1023 { 1024 ps2 = add1(ps1, dn[y]); 1025 1026 alp2 = L_add(alp1, ((*p1++)<<13)); 1027 alp2 = L_add(alp2, ((*p2++)<<14)); 1028 alp_16 = extract_h(alp2); 1029 sq = vo_mult(ps2, ps2); 1030 s = L_sub(vo_L_mult(alpk, sq), L_mult(sqk, alp_16)); 1031 1032 if (s > 0) 1033 { 1034 sqk = sq; 1035 alpk = alp_16; 1036 pos = y; 1037 } 1038 } 1039 p1 -= NB_POS; 1040 1041 if (pos >= 0) 1042 { 1043 *ix = x; 1044 *iy = pos; 1045 } 1046 } else 1047 { 1048 p2 += NB_POS; 1049 } 1050 } 1051 1052 *ps = add1(*ps, add1(dn[*ix], dn[*iy])); 1053 *alp = alpk; 1054 1055 return; 1056 } 1057 1058 1059 1060 1061