1 /* 2 ** Copyright 2003-2010, VisualOn, Inc. 3 ** 4 ** Licensed under the Apache License, Version 2.0 (the "License"); 5 ** you may not use this file except in compliance with the License. 6 ** You may obtain a copy of the License at 7 ** 8 ** http://www.apache.org/licenses/LICENSE-2.0 9 ** 10 ** Unless required by applicable law or agreed to in writing, software 11 ** distributed under the License is distributed on an "AS IS" BASIS, 12 ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 ** See the License for the specific language governing permissions and 14 ** limitations under the License. 15 */ 16 17 /*********************************************************************** 18 * File: wb_vad.c * 19 * * 20 * Description: Voice Activity Detection * 21 * * 22 ************************************************************************/ 23 24 #include <stdlib.h> 25 #include <stdio.h> 26 #include "cnst.h" 27 #include "wb_vad.h" 28 #include "typedef.h" 29 #include "basic_op.h" 30 #include "math_op.h" 31 #include "wb_vad_c.h" 32 #include "mem_align.h" 33 34 /****************************************************************************** 35 * Calculate Log2 and scale the signal: 36 * 37 * ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1] 38 * 39 * input output 40 * 32768 16384 41 * 1 31744 42 * 43 * When input is in the range of [1,2^16], max error is 0.0380%. 44 *********************************************************************************/ 45 46 static Word16 ilog2( /* return: output value of the log2 */ 47 Word16 mant /* i: value to be converted */ 48 ) 49 { 50 Word16 ex, ex2, res; 51 Word32 i, l_temp; 52 53 if (mant <= 0) 54 { 55 mant = 1; 56 } 57 ex = norm_s(mant); 58 mant = mant << ex; 59 60 for (i = 0; i < 3; i++) 61 mant = vo_mult(mant, mant); 62 l_temp = vo_L_mult(mant, mant); 63 64 ex2 = norm_l(l_temp); 65 mant = extract_h(l_temp << ex2); 66 67 res = (ex + 16) << 10; 68 res = add1(res, (ex2 << 6)); 69 res = vo_sub(add1(res, 127), (mant >> 8)); 70 return (res); 71 } 72 73 /****************************************************************************** 74 * 75 * Function : filter5 76 * Purpose : Fifth-order half-band lowpass/highpass filter pair with 77 * decimation. 78 * 79 *******************************************************************************/ 80 81 static void filter5( 82 Word16 * in0, /* i/o : input values; output low-pass part */ 83 Word16 * in1, /* i/o : input values; output high-pass part */ 84 Word16 data[] /* i/o : filter memory */ 85 ) 86 { 87 Word16 temp0, temp1, temp2; 88 89 temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0])); 90 temp1 = add1(data[0], vo_mult(COEFF5_1, temp0)); 91 data[0] = temp0; 92 93 temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1])); 94 temp2 = add1(data[1], vo_mult(COEFF5_2, temp0)); 95 data[1] = temp0; 96 97 *in0 = extract_h((vo_L_add(temp1, temp2) << 15)); 98 *in1 = extract_h((vo_L_sub(temp1, temp2) << 15)); 99 } 100 101 /****************************************************************************** 102 * 103 * Function : filter3 104 * Purpose : Third-order half-band lowpass/highpass filter pair with 105 * decimation. 106 * 107 *******************************************************************************/ 108 109 static void filter3( 110 Word16 * in0, /* i/o : input values; output low-pass part */ 111 Word16 * in1, /* i/o : input values; output high-pass part */ 112 Word16 * data /* i/o : filter memory */ 113 ) 114 { 115 Word16 temp1, temp2; 116 117 temp1 = vo_sub(*in1, vo_mult(COEFF3, *data)); 118 temp2 = add1(*data, vo_mult(COEFF3, temp1)); 119 *data = temp1; 120 121 *in1 = extract_h((vo_L_sub(*in0, temp2) << 15)); 122 *in0 = extract_h((vo_L_add(*in0, temp2) << 15)); 123 } 124 125 /****************************************************************************** 126 * 127 * Function : level_calculation 128 * Purpose : Calculate signal level in a sub-band. Level is calculated 129 * by summing absolute values of the input data. 130 * 131 * Signal level calculated from of the end of the frame 132 * (data[count1 - count2]) is stored to (*sub_level) 133 * and added to the level of the next frame. 134 * 135 ******************************************************************************/ 136 137 static Word16 level_calculation( /* return: signal level */ 138 Word16 data[], /* i : signal buffer */ 139 Word16 * sub_level, /* i : level calculated at the end of the previous frame*/ 140 /* o : level of signal calculated from the last */ 141 /* (count2 - count1) samples */ 142 Word16 count1, /* i : number of samples to be counted */ 143 Word16 count2, /* i : number of samples to be counted */ 144 Word16 ind_m, /* i : step size for the index of the data buffer */ 145 Word16 ind_a, /* i : starting index of the data buffer */ 146 Word16 scale /* i : scaling for the level calculation */ 147 ) 148 { 149 Word32 i, l_temp1, l_temp2; 150 Word16 level; 151 152 l_temp1 = 0L; 153 for (i = count1; i < count2; i++) 154 { 155 l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1); 156 } 157 158 l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale)); 159 *sub_level = extract_h(L_shl(l_temp1, scale)); 160 161 for (i = 0; i < count1; i++) 162 { 163 l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1); 164 } 165 level = extract_h(L_shl2(l_temp2, scale)); 166 167 return level; 168 } 169 170 /****************************************************************************** 171 * 172 * Function : filter_bank 173 * Purpose : Divide input signal into bands and calculate level of 174 * the signal in each band 175 * 176 *******************************************************************************/ 177 178 static void filter_bank( 179 VadVars * st, /* i/o : State struct */ 180 Word16 in[], /* i : input frame */ 181 Word16 level[] /* o : signal levels at each band */ 182 ) 183 { 184 Word32 i; 185 Word16 tmp_buf[FRAME_LEN]; 186 187 /* shift input 1 bit down for safe scaling */ 188 for (i = 0; i < FRAME_LEN; i++) 189 { 190 tmp_buf[i] = in[i] >> 1; 191 } 192 193 /* run the filter bank */ 194 for (i = 0; i < 128; i++) 195 { 196 filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]); 197 } 198 for (i = 0; i < 64; i++) 199 { 200 filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]); 201 filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]); 202 } 203 for (i = 0; i < 32; i++) 204 { 205 filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]); 206 filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]); 207 filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]); 208 } 209 for (i = 0; i < 16; i++) 210 { 211 filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]); 212 filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]); 213 filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]); 214 } 215 216 for (i = 0; i < 8; i++) 217 { 218 filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]); 219 filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]); 220 } 221 222 /* calculate levels in each frequency band */ 223 224 /* 4800 - 6400 Hz */ 225 level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14); 226 /* 4000 - 4800 Hz */ 227 level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15); 228 /* 3200 - 4000 Hz */ 229 level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15); 230 /* 2400 - 3200 Hz */ 231 level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15); 232 /* 2000 - 2400 Hz */ 233 level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16); 234 /* 1600 - 2000 Hz */ 235 level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16); 236 /* 1200 - 1600 Hz */ 237 level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16); 238 /* 800 - 1200 Hz */ 239 level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16); 240 /* 600 - 800 Hz */ 241 level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17); 242 /* 400 - 600 Hz */ 243 level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17); 244 /* 200 - 400 Hz */ 245 level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17); 246 /* 0 - 200 Hz */ 247 level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17); 248 } 249 250 /****************************************************************************** 251 * 252 * Function : update_cntrl 253 * Purpose : Control update of the background noise estimate. 254 * 255 *******************************************************************************/ 256 257 static void update_cntrl( 258 VadVars * st, /* i/o : State structure */ 259 Word16 level[] /* i : sub-band levels of the input frame */ 260 ) 261 { 262 Word32 i; 263 Word16 num, temp, stat_rat, exp, denom; 264 Word16 alpha; 265 266 /* if a tone has been detected for a while, initialize stat_count */ 267 if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0) 268 { 269 st->stat_count = STAT_COUNT; 270 } else 271 { 272 /* if 8 last vad-decisions have been "0", reinitialize stat_count */ 273 if ((st->vadreg & 0x7f80) == 0) 274 { 275 st->stat_count = STAT_COUNT; 276 } else 277 { 278 stat_rat = 0; 279 for (i = 0; i < COMPLEN; i++) 280 { 281 if(level[i] > st->ave_level[i]) 282 { 283 num = level[i]; 284 denom = st->ave_level[i]; 285 } else 286 { 287 num = st->ave_level[i]; 288 denom = level[i]; 289 } 290 /* Limit nimimum value of num and denom to STAT_THR_LEVEL */ 291 if(num < STAT_THR_LEVEL) 292 { 293 num = STAT_THR_LEVEL; 294 } 295 if(denom < STAT_THR_LEVEL) 296 { 297 denom = STAT_THR_LEVEL; 298 } 299 exp = norm_s(denom); 300 denom = denom << exp; 301 302 /* stat_rat = num/denom * 64 */ 303 temp = div_s(num >> 1, denom); 304 stat_rat = add1(stat_rat, shr(temp, (8 - exp))); 305 } 306 307 /* compare stat_rat with a threshold and update stat_count */ 308 if(stat_rat > STAT_THR) 309 { 310 st->stat_count = STAT_COUNT; 311 } else 312 { 313 if ((st->vadreg & 0x4000) != 0) 314 { 315 316 if (st->stat_count != 0) 317 { 318 st->stat_count = st->stat_count - 1; 319 } 320 } 321 } 322 } 323 } 324 325 /* Update average amplitude estimate for stationarity estimation */ 326 alpha = ALPHA4; 327 if(st->stat_count == STAT_COUNT) 328 { 329 alpha = 32767; 330 } else if ((st->vadreg & 0x4000) == 0) 331 { 332 alpha = ALPHA5; 333 } 334 for (i = 0; i < COMPLEN; i++) 335 { 336 st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i]))); 337 } 338 } 339 340 /****************************************************************************** 341 * 342 * Function : hangover_addition 343 * Purpose : Add hangover after speech bursts 344 * 345 *******************************************************************************/ 346 347 static Word16 hangover_addition( /* return: VAD_flag indicating final VAD decision */ 348 VadVars * st, /* i/o : State structure */ 349 Word16 low_power, /* i : flag power of the input frame */ 350 Word16 hang_len, /* i : hangover length */ 351 Word16 burst_len /* i : minimum burst length for hangover addition */ 352 ) 353 { 354 /* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0" */ 355 if (low_power != 0) 356 { 357 st->burst_count = 0; 358 st->hang_count = 0; 359 return 0; 360 } 361 /* update the counters (hang_count, burst_count) */ 362 if ((st->vadreg & 0x4000) != 0) 363 { 364 st->burst_count = st->burst_count + 1; 365 if(st->burst_count >= burst_len) 366 { 367 st->hang_count = hang_len; 368 } 369 return 1; 370 } else 371 { 372 st->burst_count = 0; 373 if (st->hang_count > 0) 374 { 375 st->hang_count = st->hang_count - 1; 376 return 1; 377 } 378 } 379 return 0; 380 } 381 382 /****************************************************************************** 383 * 384 * Function : noise_estimate_update 385 * Purpose : Update of background noise estimate 386 * 387 *******************************************************************************/ 388 389 static void noise_estimate_update( 390 VadVars * st, /* i/o : State structure */ 391 Word16 level[] /* i : sub-band levels of the input frame */ 392 ) 393 { 394 Word32 i; 395 Word16 alpha_up, alpha_down, bckr_add = 2; 396 397 /* Control update of bckr_est[] */ 398 update_cntrl(st, level); 399 400 /* Choose update speed */ 401 if ((0x7800 & st->vadreg) == 0) 402 { 403 alpha_up = ALPHA_UP1; 404 alpha_down = ALPHA_DOWN1; 405 } else 406 { 407 if ((st->stat_count == 0)) 408 { 409 alpha_up = ALPHA_UP2; 410 alpha_down = ALPHA_DOWN2; 411 } else 412 { 413 alpha_up = 0; 414 alpha_down = ALPHA3; 415 bckr_add = 0; 416 } 417 } 418 419 /* Update noise estimate (bckr_est) */ 420 for (i = 0; i < COMPLEN; i++) 421 { 422 Word16 temp; 423 temp = (st->old_level[i] - st->bckr_est[i]); 424 425 if (temp < 0) 426 { /* update downwards */ 427 st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp))); 428 /* limit minimum value of the noise estimate to NOISE_MIN */ 429 if(st->bckr_est[i] < NOISE_MIN) 430 { 431 st->bckr_est[i] = NOISE_MIN; 432 } 433 } else 434 { /* update upwards */ 435 st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp))); 436 437 /* limit maximum value of the noise estimate to NOISE_MAX */ 438 if(st->bckr_est[i] > NOISE_MAX) 439 { 440 st->bckr_est[i] = NOISE_MAX; 441 } 442 } 443 } 444 445 /* Update signal levels of the previous frame (old_level) */ 446 for (i = 0; i < COMPLEN; i++) 447 { 448 st->old_level[i] = level[i]; 449 } 450 } 451 452 /****************************************************************************** 453 * 454 * Function : vad_decision 455 * Purpose : Calculates VAD_flag 456 * 457 *******************************************************************************/ 458 459 static Word16 vad_decision( /* return value : VAD_flag */ 460 VadVars * st, /* i/o : State structure */ 461 Word16 level[COMPLEN], /* i : sub-band levels of the input frame */ 462 Word32 pow_sum /* i : power of the input frame */ 463 ) 464 { 465 Word32 i; 466 Word32 L_snr_sum; 467 Word32 L_temp; 468 Word16 vad_thr, temp, noise_level; 469 Word16 low_power_flag; 470 Word16 hang_len, burst_len; 471 Word16 ilog2_speech_level, ilog2_noise_level; 472 Word16 temp2; 473 474 /* Calculate squared sum of the input levels (level) divided by the background noise components 475 * (bckr_est). */ 476 L_snr_sum = 0; 477 for (i = 0; i < COMPLEN; i++) 478 { 479 Word16 exp; 480 481 exp = norm_s(st->bckr_est[i]); 482 temp = (st->bckr_est[i] << exp); 483 temp = div_s((level[i] >> 1), temp); 484 temp = shl(temp, (exp - (UNIRSHFT - 1))); 485 L_snr_sum = L_mac(L_snr_sum, temp, temp); 486 } 487 488 /* Calculate average level of estimated background noise */ 489 L_temp = 0; 490 for (i = 1; i < COMPLEN; i++) /* ignore lowest band */ 491 { 492 L_temp = vo_L_add(L_temp, st->bckr_est[i]); 493 } 494 495 noise_level = extract_h((L_temp << 12)); 496 /* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */ 497 temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3; 498 499 if(st->speech_level < temp) 500 { 501 st->speech_level = temp; 502 } 503 ilog2_noise_level = ilog2(noise_level); 504 505 /* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by 506 * subtracting MIN_SPEECH_SNR*noise_level from speech level */ 507 ilog2_speech_level = ilog2(st->speech_level - temp); 508 509 temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH); 510 511 temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1))); 512 if (temp2 < SP_CH_MIN) 513 { 514 temp2 = SP_CH_MIN; 515 } 516 if (temp2 > SP_CH_MAX) 517 { 518 temp2 = SP_CH_MAX; 519 } 520 vad_thr = temp + temp2; 521 522 if(vad_thr < THR_MIN) 523 { 524 vad_thr = THR_MIN; 525 } 526 /* Shift VAD decision register */ 527 st->vadreg = (st->vadreg >> 1); 528 529 /* Make intermediate VAD decision */ 530 if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN))) 531 { 532 st->vadreg = (Word16) (st->vadreg | 0x4000); 533 } 534 /* check if the input power (pow_sum) is lower than a threshold" */ 535 if(pow_sum < VAD_POW_LOW) 536 { 537 low_power_flag = 1; 538 } else 539 { 540 low_power_flag = 0; 541 } 542 /* Update background noise estimates */ 543 noise_estimate_update(st, level); 544 545 /* Calculate values for hang_len and burst_len based on vad_thr */ 546 hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH); 547 if(hang_len < HANG_LOW) 548 { 549 hang_len = HANG_LOW; 550 } 551 burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH); 552 553 return (hangover_addition(st, low_power_flag, hang_len, burst_len)); 554 } 555 556 /****************************************************************************** 557 * 558 * Function : Estimate_Speech() 559 * Purpose : Estimate speech level 560 * 561 * Maximum signal level is searched and stored to the variable sp_max. 562 * The speech frames must locate within SP_EST_COUNT number of frames. 563 * Thus, noisy frames having occasional VAD = "1" decisions will not 564 * affect to the estimated speech_level. 565 * 566 *******************************************************************************/ 567 568 static void Estimate_Speech( 569 VadVars * st, /* i/o : State structure */ 570 Word16 in_level /* level of the input frame */ 571 ) 572 { 573 Word16 alpha; 574 575 /* if the required activity count cannot be achieved, reset counters */ 576 if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT)) 577 { 578 st->sp_est_cnt = 0; 579 st->sp_max = 0; 580 st->sp_max_cnt = 0; 581 } 582 st->sp_est_cnt += 1; 583 584 if (((st->vadreg & 0x4000)||(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1)) 585 { 586 /* update sp_max */ 587 if(in_level > st->sp_max) 588 { 589 st->sp_max = in_level; 590 } 591 st->sp_max_cnt += 1; 592 593 if(st->sp_max_cnt >= SP_ACTIVITY_COUNT) 594 { 595 Word16 tmp; 596 /* update speech estimate */ 597 tmp = (st->sp_max >> 1); /* scale to get "average" speech level */ 598 599 /* select update speed */ 600 if(tmp > st->speech_level) 601 { 602 alpha = ALPHA_SP_UP; 603 } else 604 { 605 alpha = ALPHA_SP_DOWN; 606 } 607 if(tmp > MIN_SPEECH_LEVEL2) 608 { 609 st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level))); 610 } 611 /* clear all counters used for speech estimation */ 612 st->sp_max = 0; 613 st->sp_max_cnt = 0; 614 st->sp_est_cnt = 0; 615 } 616 } 617 } 618 619 /****************************************************************************** 620 * 621 * Function: wb_vad_init 622 * Purpose: Allocates state memory and initializes state memory 623 * 624 *******************************************************************************/ 625 626 Word16 wb_vad_init( /* return: non-zero with error, zero for ok. */ 627 VadVars ** state, /* i/o : State structure */ 628 VO_MEM_OPERATOR *pMemOP 629 ) 630 { 631 VadVars *s; 632 633 if (state == (VadVars **) NULL) 634 { 635 fprintf(stderr, "vad_init: invalid parameter\n"); 636 return -1; 637 } 638 *state = NULL; 639 640 /* allocate memory */ 641 if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL) 642 { 643 fprintf(stderr, "vad_init: can not malloc state structure\n"); 644 return -1; 645 } 646 wb_vad_reset(s); 647 648 *state = s; 649 650 return 0; 651 } 652 653 /****************************************************************************** 654 * 655 * Function: wb_vad_reset 656 * Purpose: Initializes state memory 657 * 658 *******************************************************************************/ 659 660 Word16 wb_vad_reset( /* return: non-zero with error, zero for ok. */ 661 VadVars * state /* i/o : State structure */ 662 ) 663 { 664 Word32 i, j; 665 666 if (state == (VadVars *) NULL) 667 { 668 fprintf(stderr, "vad_reset: invalid parameter\n"); 669 return -1; 670 } 671 state->tone_flag = 0; 672 state->vadreg = 0; 673 state->hang_count = 0; 674 state->burst_count = 0; 675 state->hang_count = 0; 676 677 /* initialize memory used by the filter bank */ 678 for (i = 0; i < F_5TH_CNT; i++) 679 { 680 for (j = 0; j < 2; j++) 681 { 682 state->a_data5[i][j] = 0; 683 } 684 } 685 686 for (i = 0; i < F_3TH_CNT; i++) 687 { 688 state->a_data3[i] = 0; 689 } 690 691 /* initialize the rest of the memory */ 692 for (i = 0; i < COMPLEN; i++) 693 { 694 state->bckr_est[i] = NOISE_INIT; 695 state->old_level[i] = NOISE_INIT; 696 state->ave_level[i] = NOISE_INIT; 697 state->sub_level[i] = 0; 698 } 699 700 state->sp_est_cnt = 0; 701 state->sp_max = 0; 702 state->sp_max_cnt = 0; 703 state->speech_level = SPEECH_LEVEL_INIT; 704 state->prev_pow_sum = 0; 705 return 0; 706 } 707 708 /****************************************************************************** 709 * 710 * Function: wb_vad_exit 711 * Purpose: The memory used for state memory is freed 712 * 713 *******************************************************************************/ 714 715 void wb_vad_exit( 716 VadVars ** state, /* i/o : State structure */ 717 VO_MEM_OPERATOR *pMemOP 718 ) 719 { 720 if (state == NULL || *state == NULL) 721 return; 722 /* deallocate memory */ 723 mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB); 724 *state = NULL; 725 return; 726 } 727 728 /****************************************************************************** 729 * 730 * Function : wb_vad_tone_detection 731 * Purpose : Search maximum pitch gain from a frame. Set tone flag if 732 * pitch gain is high. This is used to detect 733 * signaling tones and other signals with high pitch gain. 734 * 735 *******************************************************************************/ 736 737 void wb_vad_tone_detection( 738 VadVars * st, /* i/o : State struct */ 739 Word16 p_gain /* pitch gain */ 740 ) 741 { 742 /* update tone flag */ 743 st->tone_flag = (st->tone_flag >> 1); 744 745 /* if (pitch_gain > TONE_THR) set tone flag */ 746 if (p_gain > TONE_THR) 747 { 748 st->tone_flag = (Word16) (st->tone_flag | 0x4000); 749 } 750 } 751 752 /****************************************************************************** 753 * 754 * Function : wb_vad 755 * Purpose : Main program for Voice Activity Detection (VAD) for AMR 756 * 757 *******************************************************************************/ 758 759 Word16 wb_vad( /* Return value : VAD Decision, 1 = speech, 0 = noise */ 760 VadVars * st, /* i/o : State structure */ 761 Word16 in_buf[] /* i : samples of the input frame */ 762 ) 763 { 764 Word16 level[COMPLEN]; 765 Word32 i; 766 Word16 VAD_flag, temp; 767 Word32 L_temp, pow_sum; 768 769 /* Calculate power of the input frame. */ 770 L_temp = 0L; 771 for (i = 0; i < FRAME_LEN; i++) 772 { 773 L_temp = L_mac(L_temp, in_buf[i], in_buf[i]); 774 } 775 776 /* pow_sum = power of current frame and previous frame */ 777 pow_sum = L_add(L_temp, st->prev_pow_sum); 778 779 /* save power of current frame for next call */ 780 st->prev_pow_sum = L_temp; 781 782 /* If input power is very low, clear tone flag */ 783 if (pow_sum < POW_TONE_THR) 784 { 785 st->tone_flag = (Word16) (st->tone_flag & 0x1fff); 786 } 787 /* Run the filter bank and calculate signal levels at each band */ 788 filter_bank(st, in_buf, level); 789 790 /* compute VAD decision */ 791 VAD_flag = vad_decision(st, level, pow_sum); 792 793 /* Calculate input level */ 794 L_temp = 0; 795 for (i = 1; i < COMPLEN; i++) /* ignore lowest band */ 796 { 797 L_temp = vo_L_add(L_temp, level[i]); 798 } 799 800 temp = extract_h(L_temp << 12); 801 802 Estimate_Speech(st, temp); /* Estimate speech level */ 803 return (VAD_flag); 804 } 805 806 807 808 809