Home | History | Annotate | Download | only in src
      1 /*
      2  ** Copyright 2003-2010, VisualOn, Inc.
      3  **
      4  ** Licensed under the Apache License, Version 2.0 (the "License");
      5  ** you may not use this file except in compliance with the License.
      6  ** You may obtain a copy of the License at
      7  **
      8  **     http://www.apache.org/licenses/LICENSE-2.0
      9  **
     10  ** Unless required by applicable law or agreed to in writing, software
     11  ** distributed under the License is distributed on an "AS IS" BASIS,
     12  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  ** See the License for the specific language governing permissions and
     14  ** limitations under the License.
     15  */
     16 
     17 /***********************************************************************
     18 *      File: wb_vad.c                                                  *
     19 *                                                                      *
     20 *      Description: Voice Activity Detection                           *
     21 *                                                                      *
     22 ************************************************************************/
     23 
     24 #include <stdlib.h>
     25 #include <stdio.h>
     26 #include "cnst.h"
     27 #include "wb_vad.h"
     28 #include "typedef.h"
     29 #include "basic_op.h"
     30 #include "math_op.h"
     31 #include "wb_vad_c.h"
     32 #include "mem_align.h"
     33 
     34 /******************************************************************************
     35 *  Calculate Log2 and scale the signal:
     36 *
     37 *    ilog2(Word32 in) = -1024*log10(in * 2^-31)/log10(2), where in = [1, 2^31-1]
     38 *
     39 *  input   output
     40 *  32768   16384
     41 *  1       31744
     42 *
     43 * When input is in the range of [1,2^16], max error is 0.0380%.
     44 *********************************************************************************/
     45 
     46 static Word16 ilog2(                       /* return: output value of the log2 */
     47         Word16 mant                        /* i: value to be converted */
     48         )
     49 {
     50     Word16 ex, ex2, res;
     51     Word32 i, l_temp;
     52 
     53     if (mant <= 0)
     54     {
     55         mant = 1;
     56     }
     57     ex = norm_s(mant);
     58     mant = mant << ex;
     59 
     60     for (i = 0; i < 3; i++)
     61         mant = vo_mult(mant, mant);
     62     l_temp = vo_L_mult(mant, mant);
     63 
     64     ex2 = norm_l(l_temp);
     65     mant = extract_h(l_temp << ex2);
     66 
     67     res = (ex + 16) << 10;
     68     res = add1(res, (ex2 << 6));
     69     res = vo_sub(add1(res, 127), (mant >> 8));
     70     return (res);
     71 }
     72 
     73 /******************************************************************************
     74 *
     75 *     Function     : filter5
     76 *     Purpose      : Fifth-order half-band lowpass/highpass filter pair with
     77 *                    decimation.
     78 *
     79 *******************************************************************************/
     80 
     81 static void filter5(
     82         Word16 * in0,                         /* i/o : input values; output low-pass part  */
     83         Word16 * in1,                         /* i/o : input values; output high-pass part */
     84         Word16 data[]                         /* i/o : filter memory                       */
     85         )
     86 {
     87     Word16 temp0, temp1, temp2;
     88 
     89     temp0 = vo_sub(*in0, vo_mult(COEFF5_1, data[0]));
     90     temp1 = add1(data[0], vo_mult(COEFF5_1, temp0));
     91     data[0] = temp0;
     92 
     93     temp0 = vo_sub(*in1, vo_mult(COEFF5_2, data[1]));
     94     temp2 = add1(data[1], vo_mult(COEFF5_2, temp0));
     95     data[1] = temp0;
     96 
     97     *in0 = extract_h((vo_L_add(temp1, temp2) << 15));
     98     *in1 = extract_h((vo_L_sub(temp1, temp2) << 15));
     99 }
    100 
    101 /******************************************************************************
    102 *
    103 *     Function     : filter3
    104 *     Purpose      : Third-order half-band lowpass/highpass filter pair with
    105 *                    decimation.
    106 *
    107 *******************************************************************************/
    108 
    109 static void filter3(
    110         Word16 * in0,                         /* i/o : input values; output low-pass part  */
    111         Word16 * in1,                         /* i/o : input values; output high-pass part */
    112         Word16 * data                         /* i/o : filter memory                       */
    113         )
    114 {
    115     Word16 temp1, temp2;
    116 
    117     temp1 = vo_sub(*in1, vo_mult(COEFF3, *data));
    118     temp2 = add1(*data, vo_mult(COEFF3, temp1));
    119     *data = temp1;
    120 
    121     *in1 = extract_h((vo_L_sub(*in0, temp2) << 15));
    122     *in0 = extract_h((vo_L_add(*in0, temp2) << 15));
    123 }
    124 
    125 /******************************************************************************
    126 *
    127 *     Function   : level_calculation
    128 *     Purpose    : Calculate signal level in a sub-band. Level is calculated
    129 *                  by summing absolute values of the input data.
    130 *
    131 *                  Signal level calculated from of the end of the frame
    132 *                  (data[count1 - count2]) is stored to (*sub_level)
    133 *                  and added to the level of the next frame.
    134 *
    135 ******************************************************************************/
    136 
    137 static Word16 level_calculation(                      /* return: signal level */
    138         Word16 data[],                        /* i   : signal buffer                                    */
    139         Word16 * sub_level,                   /* i   : level calculated at the end of the previous frame*/
    140                                               /* o   : level of signal calculated from the last         */
    141                                               /*       (count2 - count1) samples                        */
    142         Word16 count1,                        /* i   : number of samples to be counted                  */
    143         Word16 count2,                        /* i   : number of samples to be counted                  */
    144         Word16 ind_m,                         /* i   : step size for the index of the data buffer       */
    145         Word16 ind_a,                         /* i   : starting index of the data buffer                */
    146         Word16 scale                          /* i   : scaling for the level calculation                */
    147         )
    148 {
    149     Word32 i, l_temp1, l_temp2;
    150     Word16 level;
    151 
    152     l_temp1 = 0L;
    153     for (i = count1; i < count2; i++)
    154     {
    155         l_temp1 += (abs_s(data[ind_m * i + ind_a])<<1);
    156     }
    157 
    158     l_temp2 = vo_L_add(l_temp1, L_shl(*sub_level, 16 - scale));
    159     *sub_level = extract_h(L_shl(l_temp1, scale));
    160 
    161     for (i = 0; i < count1; i++)
    162     {
    163         l_temp2 += (abs_s(data[ind_m * i + ind_a])<<1);
    164     }
    165     level = extract_h(L_shl2(l_temp2, scale));
    166 
    167     return level;
    168 }
    169 
    170 /******************************************************************************
    171 *
    172 *     Function     : filter_bank
    173 *     Purpose      : Divide input signal into bands and calculate level of
    174 *                    the signal in each band
    175 *
    176 *******************************************************************************/
    177 
    178 static void filter_bank(
    179         VadVars * st,                         /* i/o : State struct               */
    180         Word16 in[],                          /* i   : input frame                */
    181         Word16 level[]                        /* o   : signal levels at each band */
    182         )
    183 {
    184     Word32 i;
    185     Word16 tmp_buf[FRAME_LEN];
    186 
    187     /* shift input 1 bit down for safe scaling */
    188     for (i = 0; i < FRAME_LEN; i++)
    189     {
    190         tmp_buf[i] = in[i] >> 1;
    191     }
    192 
    193     /* run the filter bank */
    194     for (i = 0; i < 128; i++)
    195     {
    196         filter5(&tmp_buf[2 * i], &tmp_buf[2 * i + 1], st->a_data5[0]);
    197     }
    198     for (i = 0; i < 64; i++)
    199     {
    200         filter5(&tmp_buf[4 * i], &tmp_buf[4 * i + 2], st->a_data5[1]);
    201         filter5(&tmp_buf[4 * i + 1], &tmp_buf[4 * i + 3], st->a_data5[2]);
    202     }
    203     for (i = 0; i < 32; i++)
    204     {
    205         filter5(&tmp_buf[8 * i], &tmp_buf[8 * i + 4], st->a_data5[3]);
    206         filter5(&tmp_buf[8 * i + 2], &tmp_buf[8 * i + 6], st->a_data5[4]);
    207         filter3(&tmp_buf[8 * i + 3], &tmp_buf[8 * i + 7], &st->a_data3[0]);
    208     }
    209     for (i = 0; i < 16; i++)
    210     {
    211         filter3(&tmp_buf[16 * i + 0], &tmp_buf[16 * i + 8], &st->a_data3[1]);
    212         filter3(&tmp_buf[16 * i + 4], &tmp_buf[16 * i + 12], &st->a_data3[2]);
    213         filter3(&tmp_buf[16 * i + 6], &tmp_buf[16 * i + 14], &st->a_data3[3]);
    214     }
    215 
    216     for (i = 0; i < 8; i++)
    217     {
    218         filter3(&tmp_buf[32 * i + 0], &tmp_buf[32 * i + 16], &st->a_data3[4]);
    219         filter3(&tmp_buf[32 * i + 8], &tmp_buf[32 * i + 24], &st->a_data3[5]);
    220     }
    221 
    222     /* calculate levels in each frequency band */
    223 
    224     /* 4800 - 6400 Hz */
    225     level[11] = level_calculation(tmp_buf, &st->sub_level[11], 16, 64, 4, 1, 14);
    226     /* 4000 - 4800 Hz */
    227     level[10] = level_calculation(tmp_buf, &st->sub_level[10], 8, 32, 8, 7, 15);
    228     /* 3200 - 4000 Hz */
    229     level[9] = level_calculation(tmp_buf, &st->sub_level[9],8, 32, 8, 3, 15);
    230     /* 2400 - 3200 Hz */
    231     level[8] = level_calculation(tmp_buf, &st->sub_level[8],8, 32, 8, 2, 15);
    232     /* 2000 - 2400 Hz */
    233     level[7] = level_calculation(tmp_buf, &st->sub_level[7],4, 16, 16, 14, 16);
    234     /* 1600 - 2000 Hz */
    235     level[6] = level_calculation(tmp_buf, &st->sub_level[6],4, 16, 16, 6, 16);
    236     /* 1200 - 1600 Hz */
    237     level[5] = level_calculation(tmp_buf, &st->sub_level[5],4, 16, 16, 4, 16);
    238     /* 800 - 1200 Hz */
    239     level[4] = level_calculation(tmp_buf, &st->sub_level[4],4, 16, 16, 12, 16);
    240     /* 600 - 800 Hz */
    241     level[3] = level_calculation(tmp_buf, &st->sub_level[3],2, 8, 32, 8, 17);
    242     /* 400 - 600 Hz */
    243     level[2] = level_calculation(tmp_buf, &st->sub_level[2],2, 8, 32, 24, 17);
    244     /* 200 - 400 Hz */
    245     level[1] = level_calculation(tmp_buf, &st->sub_level[1],2, 8, 32, 16, 17);
    246     /* 0 - 200 Hz */
    247     level[0] = level_calculation(tmp_buf, &st->sub_level[0],2, 8, 32, 0, 17);
    248 }
    249 
    250 /******************************************************************************
    251 *
    252 *     Function   : update_cntrl
    253 *     Purpose    : Control update of the background noise estimate.
    254 *
    255 *******************************************************************************/
    256 
    257 static void update_cntrl(
    258         VadVars * st,                         /* i/o : State structure                    */
    259         Word16 level[]                        /* i   : sub-band levels of the input frame */
    260         )
    261 {
    262     Word32 i;
    263     Word16 num, temp, stat_rat, exp, denom;
    264     Word16 alpha;
    265 
    266     /* if a tone has been detected for a while, initialize stat_count */
    267     if (sub((Word16) (st->tone_flag & 0x7c00), 0x7c00) == 0)
    268     {
    269         st->stat_count = STAT_COUNT;
    270     } else
    271     {
    272         /* if 8 last vad-decisions have been "0", reinitialize stat_count */
    273         if ((st->vadreg & 0x7f80) == 0)
    274         {
    275             st->stat_count = STAT_COUNT;
    276         } else
    277         {
    278             stat_rat = 0;
    279             for (i = 0; i < COMPLEN; i++)
    280             {
    281                 if(level[i] > st->ave_level[i])
    282                 {
    283                     num = level[i];
    284                     denom = st->ave_level[i];
    285                 } else
    286                 {
    287                     num = st->ave_level[i];
    288                     denom = level[i];
    289                 }
    290                 /* Limit nimimum value of num and denom to STAT_THR_LEVEL */
    291                 if(num < STAT_THR_LEVEL)
    292                 {
    293                     num = STAT_THR_LEVEL;
    294                 }
    295                 if(denom < STAT_THR_LEVEL)
    296                 {
    297                     denom = STAT_THR_LEVEL;
    298                 }
    299                 exp = norm_s(denom);
    300                 denom = denom << exp;
    301 
    302                 /* stat_rat = num/denom * 64 */
    303                 temp = div_s(num >> 1, denom);
    304                 stat_rat = add1(stat_rat, shr(temp, (8 - exp)));
    305             }
    306 
    307             /* compare stat_rat with a threshold and update stat_count */
    308             if(stat_rat > STAT_THR)
    309             {
    310                 st->stat_count = STAT_COUNT;
    311             } else
    312             {
    313                 if ((st->vadreg & 0x4000) != 0)
    314                 {
    315 
    316                     if (st->stat_count != 0)
    317                     {
    318                         st->stat_count = st->stat_count - 1;
    319                     }
    320                 }
    321             }
    322         }
    323     }
    324 
    325     /* Update average amplitude estimate for stationarity estimation */
    326     alpha = ALPHA4;
    327     if(st->stat_count == STAT_COUNT)
    328     {
    329         alpha = 32767;
    330     } else if ((st->vadreg & 0x4000) == 0)
    331     {
    332         alpha = ALPHA5;
    333     }
    334     for (i = 0; i < COMPLEN; i++)
    335     {
    336         st->ave_level[i] = add1(st->ave_level[i], vo_mult_r(alpha, vo_sub(level[i], st->ave_level[i])));
    337     }
    338 }
    339 
    340 /******************************************************************************
    341 *
    342 *     Function     : hangover_addition
    343 *     Purpose      : Add hangover after speech bursts
    344 *
    345 *******************************************************************************/
    346 
    347 static Word16 hangover_addition(                      /* return: VAD_flag indicating final VAD decision */
    348         VadVars * st,                         /* i/o : State structure                     */
    349         Word16 low_power,                     /* i   : flag power of the input frame    */
    350         Word16 hang_len,                      /* i   : hangover length */
    351         Word16 burst_len                      /* i   : minimum burst length for hangover addition */
    352         )
    353 {
    354     /* if the input power (pow_sum) is lower than a threshold, clear counters and set VAD_flag to "0"         */
    355     if (low_power != 0)
    356     {
    357         st->burst_count = 0;
    358         st->hang_count = 0;
    359         return 0;
    360     }
    361     /* update the counters (hang_count, burst_count) */
    362     if ((st->vadreg & 0x4000) != 0)
    363     {
    364         st->burst_count = st->burst_count + 1;
    365         if(st->burst_count >= burst_len)
    366         {
    367             st->hang_count = hang_len;
    368         }
    369         return 1;
    370     } else
    371     {
    372         st->burst_count = 0;
    373         if (st->hang_count > 0)
    374         {
    375             st->hang_count = st->hang_count - 1;
    376             return 1;
    377         }
    378     }
    379     return 0;
    380 }
    381 
    382 /******************************************************************************
    383 *
    384 *     Function   : noise_estimate_update
    385 *     Purpose    : Update of background noise estimate
    386 *
    387 *******************************************************************************/
    388 
    389 static void noise_estimate_update(
    390         VadVars * st,                         /* i/o : State structure                       */
    391         Word16 level[]                        /* i   : sub-band levels of the input frame */
    392         )
    393 {
    394     Word32 i;
    395     Word16 alpha_up, alpha_down, bckr_add = 2;
    396 
    397     /* Control update of bckr_est[] */
    398     update_cntrl(st, level);
    399 
    400     /* Choose update speed */
    401     if ((0x7800 & st->vadreg) == 0)
    402     {
    403         alpha_up = ALPHA_UP1;
    404         alpha_down = ALPHA_DOWN1;
    405     } else
    406     {
    407         if (st->stat_count == 0)
    408         {
    409             alpha_up = ALPHA_UP2;
    410             alpha_down = ALPHA_DOWN2;
    411         } else
    412         {
    413             alpha_up = 0;
    414             alpha_down = ALPHA3;
    415             bckr_add = 0;
    416         }
    417     }
    418 
    419     /* Update noise estimate (bckr_est) */
    420     for (i = 0; i < COMPLEN; i++)
    421     {
    422         Word16 temp;
    423         temp = (st->old_level[i] - st->bckr_est[i]);
    424 
    425         if (temp < 0)
    426         {                                  /* update downwards */
    427             st->bckr_est[i] = add1(-2, add(st->bckr_est[i],vo_mult_r(alpha_down, temp)));
    428             /* limit minimum value of the noise estimate to NOISE_MIN */
    429             if(st->bckr_est[i] < NOISE_MIN)
    430             {
    431                 st->bckr_est[i] = NOISE_MIN;
    432             }
    433         } else
    434         {                                  /* update upwards */
    435             st->bckr_est[i] = add1(bckr_add, add1(st->bckr_est[i],vo_mult_r(alpha_up, temp)));
    436 
    437             /* limit maximum value of the noise estimate to NOISE_MAX */
    438             if(st->bckr_est[i] > NOISE_MAX)
    439             {
    440                 st->bckr_est[i] = NOISE_MAX;
    441             }
    442         }
    443     }
    444 
    445     /* Update signal levels of the previous frame (old_level) */
    446     for (i = 0; i < COMPLEN; i++)
    447     {
    448         st->old_level[i] = level[i];
    449     }
    450 }
    451 
    452 /******************************************************************************
    453 *
    454 *     Function     : vad_decision
    455 *     Purpose      : Calculates VAD_flag
    456 *
    457 *******************************************************************************/
    458 
    459 static Word16 vad_decision(                           /* return value : VAD_flag */
    460         VadVars * st,                         /* i/o : State structure                       */
    461         Word16 level[COMPLEN],                /* i   : sub-band levels of the input frame */
    462         Word32 pow_sum                        /* i   : power of the input frame           */
    463         )
    464 {
    465     Word32 i;
    466     Word32 L_snr_sum;
    467     Word32 L_temp;
    468     Word16 vad_thr, temp, noise_level;
    469     Word16 low_power_flag;
    470     Word16 hang_len, burst_len;
    471     Word16 ilog2_speech_level, ilog2_noise_level;
    472     Word16 temp2;
    473 
    474     /* Calculate squared sum of the input levels (level) divided by the background noise components
    475      * (bckr_est). */
    476     L_snr_sum = 0;
    477     for (i = 0; i < COMPLEN; i++)
    478     {
    479         Word16 exp;
    480 
    481         exp = norm_s(st->bckr_est[i]);
    482         temp = (st->bckr_est[i] << exp);
    483         temp = div_s((level[i] >> 1), temp);
    484         temp = shl(temp, (exp - (UNIRSHFT - 1)));
    485         L_snr_sum = L_mac(L_snr_sum, temp, temp);
    486     }
    487 
    488     /* Calculate average level of estimated background noise */
    489     L_temp = 0;
    490     for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
    491     {
    492         L_temp = vo_L_add(L_temp, st->bckr_est[i]);
    493     }
    494 
    495     noise_level = extract_h((L_temp << 12));
    496     /* if SNR is lower than a threshold (MIN_SPEECH_SNR), and increase speech_level */
    497     temp = vo_mult(noise_level, MIN_SPEECH_SNR) << 3;
    498 
    499     if(st->speech_level < temp)
    500     {
    501         st->speech_level = temp;
    502     }
    503     ilog2_noise_level = ilog2(noise_level);
    504 
    505     /* If SNR is very poor, speech_level is probably corrupted by noise level. This is correctred by
    506      * subtracting MIN_SPEECH_SNR*noise_level from speech level */
    507     ilog2_speech_level = ilog2(st->speech_level - temp);
    508 
    509     temp = add1(vo_mult(NO_SLOPE, (ilog2_noise_level - NO_P1)), THR_HIGH);
    510 
    511     temp2 = add1(SP_CH_MIN, vo_mult(SP_SLOPE, (ilog2_speech_level - SP_P1)));
    512     if (temp2 < SP_CH_MIN)
    513     {
    514         temp2 = SP_CH_MIN;
    515     }
    516     if (temp2 > SP_CH_MAX)
    517     {
    518         temp2 = SP_CH_MAX;
    519     }
    520     vad_thr = temp + temp2;
    521 
    522     if(vad_thr < THR_MIN)
    523     {
    524         vad_thr = THR_MIN;
    525     }
    526     /* Shift VAD decision register */
    527     st->vadreg = (st->vadreg >> 1);
    528 
    529     /* Make intermediate VAD decision */
    530     if(L_snr_sum > vo_L_mult(vad_thr, (512 * COMPLEN)))
    531     {
    532         st->vadreg = (Word16) (st->vadreg | 0x4000);
    533     }
    534     /* check if the input power (pow_sum) is lower than a threshold" */
    535     if(pow_sum < VAD_POW_LOW)
    536     {
    537         low_power_flag = 1;
    538     } else
    539     {
    540         low_power_flag = 0;
    541     }
    542     /* Update background noise estimates */
    543     noise_estimate_update(st, level);
    544 
    545     /* Calculate values for hang_len and burst_len based on vad_thr */
    546     hang_len = add1(vo_mult(HANG_SLOPE, (vad_thr - HANG_P1)), HANG_HIGH);
    547     if(hang_len < HANG_LOW)
    548     {
    549         hang_len = HANG_LOW;
    550     }
    551     burst_len = add1(vo_mult(BURST_SLOPE, (vad_thr - BURST_P1)), BURST_HIGH);
    552 
    553     return (hangover_addition(st, low_power_flag, hang_len, burst_len));
    554 }
    555 
    556 /******************************************************************************
    557 *
    558 *     Function : Estimate_Speech()
    559 *     Purpose  : Estimate speech level
    560 *
    561 * Maximum signal level is searched and stored to the variable sp_max.
    562 * The speech frames must locate within SP_EST_COUNT number of frames.
    563 * Thus, noisy frames having occasional VAD = "1" decisions will not
    564 * affect to the estimated speech_level.
    565 *
    566 *******************************************************************************/
    567 
    568 static void Estimate_Speech(
    569         VadVars * st,                         /* i/o : State structure    */
    570         Word16 in_level                       /* level of the input frame */
    571         )
    572 {
    573     Word16 alpha;
    574 
    575     /* if the required activity count cannot be achieved, reset counters */
    576     if((st->sp_est_cnt - st->sp_max_cnt) > (SP_EST_COUNT - SP_ACTIVITY_COUNT))
    577     {
    578         st->sp_est_cnt = 0;
    579         st->sp_max = 0;
    580         st->sp_max_cnt = 0;
    581     }
    582     st->sp_est_cnt += 1;
    583 
    584     if (((st->vadreg & 0x4000)||(in_level > st->speech_level)) && (in_level > MIN_SPEECH_LEVEL1))
    585     {
    586         /* update sp_max */
    587         if(in_level > st->sp_max)
    588         {
    589             st->sp_max = in_level;
    590         }
    591         st->sp_max_cnt += 1;
    592 
    593         if(st->sp_max_cnt >= SP_ACTIVITY_COUNT)
    594         {
    595             Word16 tmp;
    596             /* update speech estimate */
    597             tmp = (st->sp_max >> 1);      /* scale to get "average" speech level */
    598 
    599             /* select update speed */
    600             if(tmp > st->speech_level)
    601             {
    602                 alpha = ALPHA_SP_UP;
    603             } else
    604             {
    605                 alpha = ALPHA_SP_DOWN;
    606             }
    607             if(tmp > MIN_SPEECH_LEVEL2)
    608             {
    609                 st->speech_level = add1(st->speech_level, vo_mult_r(alpha, vo_sub(tmp, st->speech_level)));
    610             }
    611             /* clear all counters used for speech estimation */
    612             st->sp_max = 0;
    613             st->sp_max_cnt = 0;
    614             st->sp_est_cnt = 0;
    615         }
    616     }
    617 }
    618 
    619 /******************************************************************************
    620 *
    621 *  Function:   wb_vad_init
    622 *  Purpose:    Allocates state memory and initializes state memory
    623 *
    624 *******************************************************************************/
    625 
    626 Word16 wb_vad_init(                        /* return: non-zero with error, zero for ok. */
    627         VadVars ** state,                     /* i/o : State structure    */
    628         VO_MEM_OPERATOR *pMemOP
    629         )
    630 {
    631     VadVars *s;
    632 
    633     if (state == (VadVars **) NULL)
    634     {
    635         fprintf(stderr, "vad_init: invalid parameter\n");
    636         return -1;
    637     }
    638     *state = NULL;
    639 
    640     /* allocate memory */
    641     if ((s = (VadVars *) mem_malloc(pMemOP, sizeof(VadVars), 32, VO_INDEX_ENC_AMRWB)) == NULL)
    642     {
    643         fprintf(stderr, "vad_init: can not malloc state structure\n");
    644         return -1;
    645     }
    646     wb_vad_reset(s);
    647 
    648     *state = s;
    649 
    650     return 0;
    651 }
    652 
    653 /******************************************************************************
    654 *
    655 *  Function:   wb_vad_reset
    656 *  Purpose:    Initializes state memory
    657 *
    658 *******************************************************************************/
    659 
    660 Word16 wb_vad_reset(                       /* return: non-zero with error, zero for ok. */
    661         VadVars * state                       /* i/o : State structure    */
    662         )
    663 {
    664     Word32 i, j;
    665 
    666     if (state == (VadVars *) NULL)
    667     {
    668         fprintf(stderr, "vad_reset: invalid parameter\n");
    669         return -1;
    670     }
    671     state->tone_flag = 0;
    672     state->vadreg = 0;
    673     state->hang_count = 0;
    674     state->burst_count = 0;
    675     state->hang_count = 0;
    676 
    677     /* initialize memory used by the filter bank */
    678     for (i = 0; i < F_5TH_CNT; i++)
    679     {
    680         for (j = 0; j < 2; j++)
    681         {
    682             state->a_data5[i][j] = 0;
    683         }
    684     }
    685 
    686     for (i = 0; i < F_3TH_CNT; i++)
    687     {
    688         state->a_data3[i] = 0;
    689     }
    690 
    691     /* initialize the rest of the memory */
    692     for (i = 0; i < COMPLEN; i++)
    693     {
    694         state->bckr_est[i] = NOISE_INIT;
    695         state->old_level[i] = NOISE_INIT;
    696         state->ave_level[i] = NOISE_INIT;
    697         state->sub_level[i] = 0;
    698     }
    699 
    700     state->sp_est_cnt = 0;
    701     state->sp_max = 0;
    702     state->sp_max_cnt = 0;
    703     state->speech_level = SPEECH_LEVEL_INIT;
    704     state->prev_pow_sum = 0;
    705     return 0;
    706 }
    707 
    708 /******************************************************************************
    709 *
    710 *  Function:   wb_vad_exit
    711 *  Purpose:    The memory used for state memory is freed
    712 *
    713 *******************************************************************************/
    714 
    715 void wb_vad_exit(
    716         VadVars ** state,                      /* i/o : State structure    */
    717         VO_MEM_OPERATOR *pMemOP
    718         )
    719 {
    720     if (state == NULL || *state == NULL)
    721         return;
    722     /* deallocate memory */
    723     mem_free(pMemOP, *state, VO_INDEX_ENC_AMRWB);
    724     *state = NULL;
    725     return;
    726 }
    727 
    728 /******************************************************************************
    729 *
    730 *     Function     : wb_vad_tone_detection
    731 *     Purpose      : Search maximum pitch gain from a frame. Set tone flag if
    732 *                    pitch gain is high. This is used to detect
    733 *                    signaling tones and other signals with high pitch gain.
    734 *
    735 *******************************************************************************/
    736 
    737 void wb_vad_tone_detection(
    738         VadVars * st,                         /* i/o : State struct            */
    739         Word16 p_gain                         /* pitch gain      */
    740         )
    741 {
    742     /* update tone flag */
    743     st->tone_flag = (st->tone_flag >> 1);
    744 
    745     /* if (pitch_gain > TONE_THR) set tone flag */
    746     if (p_gain > TONE_THR)
    747     {
    748         st->tone_flag = (Word16) (st->tone_flag | 0x4000);
    749     }
    750 }
    751 
    752 /******************************************************************************
    753 *
    754 *     Function     : wb_vad
    755 *     Purpose      : Main program for Voice Activity Detection (VAD) for AMR
    756 *
    757 *******************************************************************************/
    758 
    759 Word16 wb_vad(                                /* Return value : VAD Decision, 1 = speech, 0 = noise */
    760         VadVars * st,                         /* i/o : State structure                 */
    761         Word16 in_buf[]                       /* i   : samples of the input frame   */
    762          )
    763 {
    764     Word16 level[COMPLEN];
    765     Word32 i;
    766     Word16 VAD_flag, temp;
    767     Word32 L_temp, pow_sum;
    768 
    769     /* Calculate power of the input frame. */
    770     L_temp = 0L;
    771     for (i = 0; i < FRAME_LEN; i++)
    772     {
    773         L_temp = L_mac(L_temp, in_buf[i], in_buf[i]);
    774     }
    775 
    776     /* pow_sum = power of current frame and previous frame */
    777     pow_sum = L_add(L_temp, st->prev_pow_sum);
    778 
    779     /* save power of current frame for next call */
    780     st->prev_pow_sum = L_temp;
    781 
    782     /* If input power is very low, clear tone flag */
    783     if (pow_sum < POW_TONE_THR)
    784     {
    785         st->tone_flag = (Word16) (st->tone_flag & 0x1fff);
    786     }
    787     /* Run the filter bank and calculate signal levels at each band */
    788     filter_bank(st, in_buf, level);
    789 
    790     /* compute VAD decision */
    791     VAD_flag = vad_decision(st, level, pow_sum);
    792 
    793     /* Calculate input level */
    794     L_temp = 0;
    795     for (i = 1; i < COMPLEN; i++)          /* ignore lowest band */
    796     {
    797         L_temp = vo_L_add(L_temp, level[i]);
    798     }
    799 
    800     temp = extract_h(L_temp << 12);
    801 
    802     Estimate_Speech(st, temp);             /* Estimate speech level */
    803     return (VAD_flag);
    804 }
    805 
    806 
    807 
    808 
    809