1 /* 2 ** Copyright 2003-2010, VisualOn, Inc. 3 ** 4 ** Licensed under the Apache License, Version 2.0 (the "License"); 5 ** you may not use this file except in compliance with the License. 6 ** You may obtain a copy of the License at 7 ** 8 ** http://www.apache.org/licenses/LICENSE-2.0 9 ** 10 ** Unless required by applicable law or agreed to in writing, software 11 ** distributed under the License is distributed on an "AS IS" BASIS, 12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ** See the License for the specific language governing permissions and 14 ** limitations under the License. 15 */ 16 17 /*********************************************************************** 18 * File: dtx.c * 19 * * 20 * Description:DTX functions * 21 * * 22 ************************************************************************/ 23 24 #include <stdio.h> 25 #include <stdlib.h> 26 #include "typedef.h" 27 #include "basic_op.h" 28 #include "oper_32b.h" 29 #include "math_op.h" 30 #include "cnst.h" 31 #include "acelp.h" /* prototype of functions */ 32 #include "bits.h" 33 #include "dtx.h" 34 #include "log2.h" 35 #include "mem_align.h" 36 37 static void aver_isf_history( 38 Word16 isf_old[], 39 Word16 indices[], 40 Word32 isf_aver[] 41 ); 42 43 static void find_frame_indices( 44 Word16 isf_old_tx[], 45 Word16 indices[], 46 dtx_encState * st 47 ); 48 49 static Word16 dithering_control( 50 dtx_encState * st 51 ); 52 53 /* excitation energy adjustment depending on speech coder mode used, Q7 */ 54 static Word16 en_adjust[9] = 55 { 56 230, /* mode0 = 7k : -5.4dB */ 57 179, /* mode1 = 9k : -4.2dB */ 58 141, /* mode2 = 12k : -3.3dB */ 59 128, /* mode3 = 14k : -3.0dB */ 60 122, /* mode4 = 16k : -2.85dB */ 61 115, /* mode5 = 18k : -2.7dB */ 62 115, /* mode6 = 20k : -2.7dB */ 63 115, /* mode7 = 23k : -2.7dB */ 64 115 /* mode8 = 24k : -2.7dB */ 65 }; 66 67 /************************************************************************** 68 * 69 * Function : dtx_enc_init 70 * 71 **************************************************************************/ 72 Word16 dtx_enc_init(dtx_encState ** st, Word16 isf_init[], VO_MEM_OPERATOR *pMemOP) 73 { 74 dtx_encState *s; 75 76 if (st == (dtx_encState **) NULL) 77 { 78 fprintf(stderr, "dtx_enc_init: invalid parameter\n"); 79 return -1; 80 } 81 *st = NULL; 82 83 /* allocate memory */ 84 if ((s = (dtx_encState *)mem_malloc(pMemOP, sizeof(dtx_encState), 32, VO_INDEX_ENC_AMRWB)) == NULL) 85 { 86 fprintf(stderr, "dtx_enc_init: can not malloc state structure\n"); 87 return -1; 88 } 89 dtx_enc_reset(s, isf_init); 90 *st = s; 91 return 0; 92 } 93 94 /************************************************************************** 95 * 96 * Function : dtx_enc_reset 97 * 98 **************************************************************************/ 99 Word16 dtx_enc_reset(dtx_encState * st, Word16 isf_init[]) 100 { 101 Word32 i; 102 103 if (st == (dtx_encState *) NULL) 104 { 105 fprintf(stderr, "dtx_enc_reset: invalid parameter\n"); 106 return -1; 107 } 108 st->hist_ptr = 0; 109 st->log_en_index = 0; 110 111 /* Init isf_hist[] */ 112 for (i = 0; i < DTX_HIST_SIZE; i++) 113 { 114 Copy(isf_init, &st->isf_hist[i * M], M); 115 } 116 st->cng_seed = RANDOM_INITSEED; 117 118 /* Reset energy history */ 119 Set_zero(st->log_en_hist, DTX_HIST_SIZE); 120 121 st->dtxHangoverCount = DTX_HANG_CONST; 122 st->decAnaElapsedCount = 32767; 123 124 for (i = 0; i < 28; i++) 125 { 126 st->D[i] = 0; 127 } 128 129 for (i = 0; i < DTX_HIST_SIZE - 1; i++) 130 { 131 st->sumD[i] = 0; 132 } 133 134 return 1; 135 } 136 137 /************************************************************************** 138 * 139 * Function : dtx_enc_exit 140 * 141 **************************************************************************/ 142 void dtx_enc_exit(dtx_encState ** st, VO_MEM_OPERATOR *pMemOP) 143 { 144 if (st == NULL || *st == NULL) 145 return; 146 /* deallocate memory */ 147 mem_free(pMemOP, *st, VO_INDEX_ENC_AMRWB); 148 *st = NULL; 149 return; 150 } 151 152 153 /************************************************************************** 154 * 155 * Function : dtx_enc 156 * 157 **************************************************************************/ 158 Word16 dtx_enc( 159 dtx_encState * st, /* i/o : State struct */ 160 Word16 isf[M], /* o : CN ISF vector */ 161 Word16 * exc2, /* o : CN excitation */ 162 Word16 ** prms 163 ) 164 { 165 Word32 i, j; 166 Word16 indice[7]; 167 Word16 log_en, gain, level, exp, exp0, tmp; 168 Word16 log_en_int_e, log_en_int_m; 169 Word32 L_isf[M], ener32, level32; 170 Word16 isf_order[3]; 171 Word16 CN_dith; 172 173 /* VOX mode computation of SID parameters */ 174 log_en = 0; 175 for (i = 0; i < M; i++) 176 { 177 L_isf[i] = 0; 178 } 179 /* average energy and isf */ 180 for (i = 0; i < DTX_HIST_SIZE; i++) 181 { 182 /* Division by DTX_HIST_SIZE = 8 has been done in dtx_buffer. log_en is in Q10 */ 183 log_en = add(log_en, st->log_en_hist[i]); 184 185 } 186 find_frame_indices(st->isf_hist, isf_order, st); 187 aver_isf_history(st->isf_hist, isf_order, L_isf); 188 189 for (j = 0; j < M; j++) 190 { 191 isf[j] = (Word16)(L_isf[j] >> 3); /* divide by 8 */ 192 } 193 194 /* quantize logarithmic energy to 6 bits (-6 : 66 dB) which corresponds to -2:22 in log2(E). */ 195 /* st->log_en_index = (short)( (log_en + 2.0) * 2.625 ); */ 196 197 /* increase dynamics to 7 bits (Q8) */ 198 log_en = (log_en >> 2); 199 200 /* Add 2 in Q8 = 512 to get log2(E) between 0:24 */ 201 log_en = add(log_en, 512); 202 203 /* Multiply by 2.625 to get full 6 bit range. 2.625 = 21504 in Q13. The result is in Q6 */ 204 log_en = mult(log_en, 21504); 205 206 /* Quantize Energy */ 207 st->log_en_index = shr(log_en, 6); 208 209 if(st->log_en_index > 63) 210 { 211 st->log_en_index = 63; 212 } 213 if (st->log_en_index < 0) 214 { 215 st->log_en_index = 0; 216 } 217 /* Quantize ISFs */ 218 Qisf_ns(isf, isf, indice); 219 220 221 Parm_serial(indice[0], 6, prms); 222 Parm_serial(indice[1], 6, prms); 223 Parm_serial(indice[2], 6, prms); 224 Parm_serial(indice[3], 5, prms); 225 Parm_serial(indice[4], 5, prms); 226 227 Parm_serial((st->log_en_index), 6, prms); 228 229 CN_dith = dithering_control(st); 230 Parm_serial(CN_dith, 1, prms); 231 232 /* level = (float)( pow( 2.0f, (float)st->log_en_index / 2.625 - 2.0 ) ); */ 233 /* log2(E) in Q9 (log2(E) lies in between -2:22) */ 234 log_en = shl(st->log_en_index, 15 - 6); 235 236 /* Divide by 2.625; log_en will be between 0:24 */ 237 log_en = mult(log_en, 12483); 238 /* the result corresponds to log2(gain) in Q10 */ 239 240 /* Find integer part */ 241 log_en_int_e = (log_en >> 10); 242 243 /* Find fractional part */ 244 log_en_int_m = (Word16) (log_en & 0x3ff); 245 log_en_int_m = shl(log_en_int_m, 5); 246 247 /* Subtract 2 from log_en in Q9, i.e divide the gain by 2 (energy by 4) */ 248 /* Add 16 in order to have the result of pow2 in Q16 */ 249 log_en_int_e = add(log_en_int_e, 16 - 1); 250 251 level32 = Pow2(log_en_int_e, log_en_int_m); /* Q16 */ 252 exp0 = norm_l(level32); 253 level32 = (level32 << exp0); /* level in Q31 */ 254 exp0 = (15 - exp0); 255 level = extract_h(level32); /* level in Q15 */ 256 257 /* generate white noise vector */ 258 for (i = 0; i < L_FRAME; i++) 259 { 260 exc2[i] = (Random(&(st->cng_seed)) >> 4); 261 } 262 263 /* gain = level / sqrt(ener) * sqrt(L_FRAME) */ 264 265 /* energy of generated excitation */ 266 ener32 = Dot_product12(exc2, exc2, L_FRAME, &exp); 267 268 Isqrt_n(&ener32, &exp); 269 270 gain = extract_h(ener32); 271 272 gain = mult(level, gain); /* gain in Q15 */ 273 274 exp = add(exp0, exp); 275 276 /* Multiply by sqrt(L_FRAME)=16, i.e. shift left by 4 */ 277 exp += 4; 278 279 for (i = 0; i < L_FRAME; i++) 280 { 281 tmp = mult(exc2[i], gain); /* Q0 * Q15 */ 282 exc2[i] = shl(tmp, exp); 283 } 284 285 return 0; 286 } 287 288 /************************************************************************** 289 * 290 * Function : dtx_buffer Purpose : handles the DTX buffer 291 * 292 **************************************************************************/ 293 Word16 dtx_buffer( 294 dtx_encState * st, /* i/o : State struct */ 295 Word16 isf_new[], /* i : isf vector */ 296 Word32 enr, /* i : residual energy (in L_FRAME) */ 297 Word16 codec_mode 298 ) 299 { 300 Word16 log_en; 301 302 Word16 log_en_e; 303 Word16 log_en_m; 304 st->hist_ptr = add(st->hist_ptr, 1); 305 if(st->hist_ptr == DTX_HIST_SIZE) 306 { 307 st->hist_ptr = 0; 308 } 309 /* copy lsp vector into buffer */ 310 Copy(isf_new, &st->isf_hist[st->hist_ptr * M], M); 311 312 /* log_en = (float)log10(enr*0.0059322)/(float)log10(2.0f); */ 313 Log2(enr, &log_en_e, &log_en_m); 314 315 /* convert exponent and mantissa to Word16 Q7. Q7 is used to simplify averaging in dtx_enc */ 316 log_en = shl(log_en_e, 7); /* Q7 */ 317 log_en = add(log_en, shr(log_en_m, 15 - 7)); 318 319 /* Find energy per sample by multiplying with 0.0059322, i.e subtract log2(1/0.0059322) = 7.39722 The 320 * constant 0.0059322 takes into account windowings and analysis length from autocorrelation 321 * computations; 7.39722 in Q7 = 947 */ 322 /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */ 323 /* log_en = sub( log_en, 947 + en_adjust[codec_mode] ); */ 324 325 /* Find energy per sample (divide by L_FRAME=256), i.e subtract log2(256) = 8.0 (1024 in Q7) */ 326 /* Subtract 3 dB = 0.99658 in log2(E) = 127 in Q7. */ 327 328 log_en = sub(log_en, add(1024, en_adjust[codec_mode])); 329 330 /* Insert into the buffer */ 331 st->log_en_hist[st->hist_ptr] = log_en; 332 return 0; 333 } 334 335 /************************************************************************** 336 * 337 * Function : tx_dtx_handler Purpose : adds extra speech hangover 338 * to analyze speech on 339 * the decoding side. 340 **************************************************************************/ 341 void tx_dtx_handler(dtx_encState * st, /* i/o : State struct */ 342 Word16 vad_flag, /* i : vad decision */ 343 Word16 * usedMode /* i/o : mode changed or not */ 344 ) 345 { 346 347 /* this state machine is in synch with the GSMEFR txDtx machine */ 348 st->decAnaElapsedCount = add(st->decAnaElapsedCount, 1); 349 350 if (vad_flag != 0) 351 { 352 st->dtxHangoverCount = DTX_HANG_CONST; 353 } else 354 { /* non-speech */ 355 if (st->dtxHangoverCount == 0) 356 { /* out of decoder analysis hangover */ 357 st->decAnaElapsedCount = 0; 358 *usedMode = MRDTX; 359 } else 360 { /* in possible analysis hangover */ 361 st->dtxHangoverCount = sub(st->dtxHangoverCount, 1); 362 363 /* decAnaElapsedCount + dtxHangoverCount < DTX_ELAPSED_FRAMES_THRESH */ 364 if (sub(add(st->decAnaElapsedCount, st->dtxHangoverCount), 365 DTX_ELAPSED_FRAMES_THRESH) < 0) 366 { 367 *usedMode = MRDTX; 368 /* if short time since decoder update, do not add extra HO */ 369 } 370 /* else override VAD and stay in speech mode *usedMode and add extra hangover */ 371 } 372 } 373 374 return; 375 } 376 377 378 379 static void aver_isf_history( 380 Word16 isf_old[], 381 Word16 indices[], 382 Word32 isf_aver[] 383 ) 384 { 385 Word32 i, j, k; 386 Word16 isf_tmp[2 * M]; 387 Word32 L_tmp; 388 389 /* Memorize in isf_tmp[][] the ISF vectors to be replaced by */ 390 /* the median ISF vector prior to the averaging */ 391 for (k = 0; k < 2; k++) 392 { 393 if ((indices[k] + 1) != 0) 394 { 395 for (i = 0; i < M; i++) 396 { 397 isf_tmp[k * M + i] = isf_old[indices[k] * M + i]; 398 isf_old[indices[k] * M + i] = isf_old[indices[2] * M + i]; 399 } 400 } 401 } 402 403 /* Perform the ISF averaging */ 404 for (j = 0; j < M; j++) 405 { 406 L_tmp = 0; 407 408 for (i = 0; i < DTX_HIST_SIZE; i++) 409 { 410 L_tmp = L_add(L_tmp, L_deposit_l(isf_old[i * M + j])); 411 } 412 isf_aver[j] = L_tmp; 413 } 414 415 /* Retrieve from isf_tmp[][] the ISF vectors saved prior to averaging */ 416 for (k = 0; k < 2; k++) 417 { 418 if ((indices[k] + 1) != 0) 419 { 420 for (i = 0; i < M; i++) 421 { 422 isf_old[indices[k] * M + i] = isf_tmp[k * M + i]; 423 } 424 } 425 } 426 427 return; 428 } 429 430 static void find_frame_indices( 431 Word16 isf_old_tx[], 432 Word16 indices[], 433 dtx_encState * st 434 ) 435 { 436 Word32 L_tmp, summin, summax, summax2nd; 437 Word16 i, j, tmp; 438 Word16 ptr; 439 440 /* Remove the effect of the oldest frame from the column */ 441 /* sum sumD[0..DTX_HIST_SIZE-1]. sumD[DTX_HIST_SIZE] is */ 442 /* not updated since it will be removed later. */ 443 444 tmp = DTX_HIST_SIZE_MIN_ONE; 445 j = -1; 446 for (i = 0; i < DTX_HIST_SIZE_MIN_ONE; i++) 447 { 448 j = add(j, tmp); 449 st->sumD[i] = L_sub(st->sumD[i], st->D[j]); 450 tmp = sub(tmp, 1); 451 } 452 453 /* Shift the column sum sumD. The element sumD[DTX_HIST_SIZE-1] */ 454 /* corresponding to the oldest frame is removed. The sum of */ 455 /* the distances between the latest isf and other isfs, */ 456 /* i.e. the element sumD[0], will be computed during this call. */ 457 /* Hence this element is initialized to zero. */ 458 459 for (i = DTX_HIST_SIZE_MIN_ONE; i > 0; i--) 460 { 461 st->sumD[i] = st->sumD[i - 1]; 462 } 463 st->sumD[0] = 0; 464 465 /* Remove the oldest frame from the distance matrix. */ 466 /* Note that the distance matrix is replaced by a one- */ 467 /* dimensional array to save static memory. */ 468 469 tmp = 0; 470 for (i = 27; i >= 12; i = (Word16) (i - tmp)) 471 { 472 tmp = add(tmp, 1); 473 for (j = tmp; j > 0; j--) 474 { 475 st->D[i - j + 1] = st->D[i - j - tmp]; 476 } 477 } 478 479 /* Compute the first column of the distance matrix D */ 480 /* (squared Euclidean distances from isf1[] to isf_old_tx[][]). */ 481 482 ptr = st->hist_ptr; 483 for (i = 1; i < DTX_HIST_SIZE; i++) 484 { 485 /* Compute the distance between the latest isf and the other isfs. */ 486 ptr = sub(ptr, 1); 487 if (ptr < 0) 488 { 489 ptr = DTX_HIST_SIZE_MIN_ONE; 490 } 491 L_tmp = 0; 492 for (j = 0; j < M; j++) 493 { 494 tmp = sub(isf_old_tx[st->hist_ptr * M + j], isf_old_tx[ptr * M + j]); 495 L_tmp = L_mac(L_tmp, tmp, tmp); 496 } 497 st->D[i - 1] = L_tmp; 498 499 /* Update also the column sums. */ 500 st->sumD[0] = L_add(st->sumD[0], st->D[i - 1]); 501 st->sumD[i] = L_add(st->sumD[i], st->D[i - 1]); 502 } 503 504 /* Find the minimum and maximum distances */ 505 summax = st->sumD[0]; 506 summin = st->sumD[0]; 507 indices[0] = 0; 508 indices[2] = 0; 509 for (i = 1; i < DTX_HIST_SIZE; i++) 510 { 511 if (L_sub(st->sumD[i], summax) > 0) 512 { 513 indices[0] = i; 514 summax = st->sumD[i]; 515 } 516 if (L_sub(st->sumD[i], summin) < 0) 517 { 518 indices[2] = i; 519 summin = st->sumD[i]; 520 } 521 } 522 523 /* Find the second largest distance */ 524 summax2nd = -2147483647L; 525 indices[1] = -1; 526 for (i = 0; i < DTX_HIST_SIZE; i++) 527 { 528 if ((L_sub(st->sumD[i], summax2nd) > 0) && (sub(i, indices[0]) != 0)) 529 { 530 indices[1] = i; 531 summax2nd = st->sumD[i]; 532 } 533 } 534 535 for (i = 0; i < 3; i++) 536 { 537 indices[i] = sub(st->hist_ptr, indices[i]); 538 if (indices[i] < 0) 539 { 540 indices[i] = add(indices[i], DTX_HIST_SIZE); 541 } 542 } 543 544 /* If maximum distance/MED_THRESH is smaller than minimum distance */ 545 /* then the median ISF vector replacement is not performed */ 546 tmp = norm_l(summax); 547 summax = (summax << tmp); 548 summin = (summin << tmp); 549 L_tmp = L_mult(voround(summax), INV_MED_THRESH); 550 if(L_tmp <= summin) 551 { 552 indices[0] = -1; 553 } 554 /* If second largest distance/MED_THRESH is smaller than */ 555 /* minimum distance then the median ISF vector replacement is */ 556 /* not performed */ 557 summax2nd = L_shl(summax2nd, tmp); 558 L_tmp = L_mult(voround(summax2nd), INV_MED_THRESH); 559 if(L_tmp <= summin) 560 { 561 indices[1] = -1; 562 } 563 return; 564 } 565 566 static Word16 dithering_control( 567 dtx_encState * st 568 ) 569 { 570 Word16 tmp, mean, CN_dith, gain_diff; 571 Word32 i, ISF_diff; 572 573 /* determine how stationary the spectrum of background noise is */ 574 ISF_diff = 0; 575 for (i = 0; i < 8; i++) 576 { 577 ISF_diff = L_add(ISF_diff, st->sumD[i]); 578 } 579 if ((ISF_diff >> 26) > 0) 580 { 581 CN_dith = 1; 582 } else 583 { 584 CN_dith = 0; 585 } 586 587 /* determine how stationary the energy of background noise is */ 588 mean = 0; 589 for (i = 0; i < DTX_HIST_SIZE; i++) 590 { 591 mean = add(mean, st->log_en_hist[i]); 592 } 593 mean = (mean >> 3); 594 gain_diff = 0; 595 for (i = 0; i < DTX_HIST_SIZE; i++) 596 { 597 tmp = abs_s(sub(st->log_en_hist[i], mean)); 598 gain_diff = add(gain_diff, tmp); 599 } 600 if (gain_diff > GAIN_THR) 601 { 602 CN_dith = 1; 603 } 604 return CN_dith; 605 } 606