Home | History | Annotate | Download | only in vad
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 /*
     12  * This file includes the implementation of the internal filterbank associated functions.
     13  * For function description, see vad_filterbank.h.
     14  */
     15 
     16 #include "vad_filterbank.h"
     17 
     18 #include "signal_processing_library.h"
     19 #include "typedefs.h"
     20 #include "vad_defines.h"
     21 
     22 // Constant 160*log10(2) in Q9
     23 static const int16_t kLogConst = 24660;
     24 
     25 // Coefficients used by WebRtcVad_HpOutput, Q14
     26 static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 };
     27 static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 };
     28 
     29 // Allpass filter coefficients, upper and lower, in Q15
     30 // Upper: 0.64, Lower: 0.17
     31 static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 };
     32 
     33 // Adjustment for division with two in WebRtcVad_SplitFilter
     34 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 };
     35 
     36 void WebRtcVad_HpOutput(int16_t* in_vector,
     37                         int in_vector_length,
     38                         int16_t* filter_state,
     39                         int16_t* out_vector) {
     40   int i;
     41   int16_t* in_ptr = in_vector;
     42   int16_t* out_ptr = out_vector;
     43   int32_t tmp32 = 0;
     44 
     45 
     46   // The sum of the absolute values of the impulse response:
     47   // The zero/pole-filter has a max amplification of a single sample of: 1.4546
     48   // Impulse response: 0.4047 -0.6179 -0.0266  0.1993  0.1035  -0.0194
     49   // The all-zero section has a max amplification of a single sample of: 1.6189
     50   // Impulse response: 0.4047 -0.8094  0.4047  0       0        0
     51   // The all-pole section has a max amplification of a single sample of: 1.9931
     52   // Impulse response: 1.0000  0.4734 -0.1189 -0.2187 -0.0627   0.04532
     53 
     54   for (i = 0; i < in_vector_length; i++) {
     55     // all-zero section (filter coefficients in Q14)
     56     tmp32 = (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*in_ptr));
     57     tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]);
     58     tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2],
     59                                             filter_state[1]);  // Q14
     60     filter_state[1] = filter_state[0];
     61     filter_state[0] = *in_ptr++;
     62 
     63     // all-pole section
     64     tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1],
     65                                             filter_state[2]);  // Q14
     66     tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]);
     67     filter_state[3] = filter_state[2];
     68     filter_state[2] = (int16_t) WEBRTC_SPL_RSHIFT_W32 (tmp32, 14);
     69     *out_ptr++ = filter_state[2];
     70   }
     71 }
     72 
     73 void WebRtcVad_Allpass(int16_t* in_vector,
     74                        int16_t filter_coefficients,
     75                        int vector_length,
     76                        int16_t* filter_state,
     77                        int16_t* out_vector) {
     78   // The filter can only cause overflow (in the w16 output variable)
     79   // if more than 4 consecutive input numbers are of maximum value and
     80   // has the the same sign as the impulse responses first taps.
     81   // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779
     82   // 0.2418 -0.1547 0.0990
     83 
     84   int i;
     85   int16_t tmp16 = 0;
     86   int32_t tmp32 = 0, in32 = 0;
     87   int32_t state32 = WEBRTC_SPL_LSHIFT_W32((int32_t) (*filter_state), 16); // Q31
     88 
     89   for (i = 0; i < vector_length; i++) {
     90     tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector));
     91     tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
     92     *out_vector++ = tmp16;
     93     in32 = WEBRTC_SPL_LSHIFT_W32(((int32_t) (*in_vector)), 14);
     94     state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16);
     95     state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1);
     96     in_vector += 2;
     97   }
     98 
     99   *filter_state = (int16_t) WEBRTC_SPL_RSHIFT_W32(state32, 16);
    100 }
    101 
    102 void WebRtcVad_SplitFilter(int16_t* in_vector,
    103                            int in_vector_length,
    104                            int16_t* upper_state,
    105                            int16_t* lower_state,
    106                            int16_t* out_vector_hp,
    107                            int16_t* out_vector_lp) {
    108   int16_t tmp_out;
    109   int i;
    110   int half_length = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1);
    111 
    112   // All-pass filtering upper branch
    113   WebRtcVad_Allpass(&in_vector[0], kAllPassCoefsQ15[0], half_length,
    114                     upper_state, out_vector_hp);
    115 
    116   // All-pass filtering lower branch
    117   WebRtcVad_Allpass(&in_vector[1], kAllPassCoefsQ15[1], half_length,
    118                     lower_state, out_vector_lp);
    119 
    120   // Make LP and HP signals
    121   for (i = 0; i < half_length; i++) {
    122     tmp_out = *out_vector_hp;
    123     *out_vector_hp++ -= *out_vector_lp;
    124     *out_vector_lp++ += tmp_out;
    125   }
    126 }
    127 
    128 int16_t WebRtcVad_get_features(VadInstT* inst,
    129                                int16_t* in_vector,
    130                                int frame_size,
    131                                int16_t* out_vector) {
    132   int16_t power = 0;
    133   // We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to
    134   // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will
    135   // have at most 120 samples after the first split and at most 60 samples after
    136   // the second split.
    137   int16_t hp_120[120], lp_120[120];
    138   int16_t hp_60[60], lp_60[60];
    139   // Initialize variables for the first SplitFilter().
    140   int length = frame_size;
    141   int frequency_band = 0;
    142   int16_t* in_ptr = in_vector;
    143   int16_t* hp_out_ptr = hp_120;
    144   int16_t* lp_out_ptr = lp_120;
    145 
    146   // Split at 2000 Hz and downsample
    147   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
    148                         &inst->lower_state[frequency_band], hp_out_ptr,
    149                         lp_out_ptr);
    150 
    151   // Split at 3000 Hz and downsample
    152   frequency_band = 1;
    153   in_ptr = hp_120;
    154   hp_out_ptr = hp_60;
    155   lp_out_ptr = lp_60;
    156   length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
    157 
    158   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
    159                         &inst->lower_state[frequency_band], hp_out_ptr,
    160                         lp_out_ptr);
    161 
    162   // Energy in 3000 Hz - 4000 Hz
    163   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
    164   WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[5], &power,
    165                         &out_vector[5]);
    166 
    167   // Energy in 2000 Hz - 3000 Hz
    168   WebRtcVad_LogOfEnergy(lp_60, length, kOffsetVector[4], &power,
    169                         &out_vector[4]);
    170 
    171   // Split at 1000 Hz and downsample
    172   frequency_band = 2;
    173   in_ptr = lp_120;
    174   hp_out_ptr = hp_60;
    175   lp_out_ptr = lp_60;
    176   length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1);
    177   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
    178                         &inst->lower_state[frequency_band], hp_out_ptr,
    179                         lp_out_ptr);
    180 
    181   // Energy in 1000 Hz - 2000 Hz
    182   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
    183   WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[3], &power,
    184                         &out_vector[3]);
    185 
    186   // Split at 500 Hz
    187   frequency_band = 3;
    188   in_ptr = lp_60;
    189   hp_out_ptr = hp_120;
    190   lp_out_ptr = lp_120;
    191 
    192   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
    193                         &inst->lower_state[frequency_band], hp_out_ptr,
    194                         lp_out_ptr);
    195 
    196   // Energy in 500 Hz - 1000 Hz
    197   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
    198   WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[2], &power,
    199                         &out_vector[2]);
    200 
    201   // Split at 250 Hz
    202   frequency_band = 4;
    203   in_ptr = lp_120;
    204   hp_out_ptr = hp_60;
    205   lp_out_ptr = lp_60;
    206 
    207   WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band],
    208                         &inst->lower_state[frequency_band], hp_out_ptr,
    209                         lp_out_ptr);
    210 
    211   // Energy in 250 Hz - 500 Hz
    212   length = WEBRTC_SPL_RSHIFT_W16(length, 1);
    213   WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[1], &power,
    214                         &out_vector[1]);
    215 
    216   // Remove DC and LFs
    217   WebRtcVad_HpOutput(lp_60, length, inst->hp_filter_state, hp_120);
    218 
    219   // Power in 80 Hz - 250 Hz
    220   WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[0], &power,
    221                         &out_vector[0]);
    222 
    223   return power;
    224 }
    225 
    226 void WebRtcVad_LogOfEnergy(int16_t* vector,
    227                            int vector_length,
    228                            int16_t offset,
    229                            int16_t* power,
    230                            int16_t* log_energy) {
    231   int shfts = 0, shfts2 = 0;
    232   int16_t energy_s16 = 0;
    233   int16_t zeros = 0, frac = 0, log2 = 0;
    234   int32_t energy = WebRtcSpl_Energy(vector, vector_length, &shfts);
    235 
    236   if (energy > 0) {
    237 
    238     shfts2 = 16 - WebRtcSpl_NormW32(energy);
    239     shfts += shfts2;
    240     // "shfts" is the total number of right shifts that has been done to
    241     // energy_s16.
    242     energy_s16 = (int16_t) WEBRTC_SPL_SHIFT_W32(energy, -shfts2);
    243 
    244     // Find:
    245     // 160*log10(energy_s16*2^shfts) = 160*log10(2)*log2(energy_s16*2^shfts) =
    246     // 160*log10(2)*(log2(energy_s16) + log2(2^shfts)) =
    247     // 160*log10(2)*(log2(energy_s16) + shfts)
    248 
    249     zeros = WebRtcSpl_NormU32(energy_s16);
    250     frac = (int16_t) (((uint32_t) ((int32_t) (energy_s16) << zeros)
    251         & 0x7FFFFFFF) >> 21);
    252     log2 = (int16_t) (((31 - zeros) << 10) + frac);
    253 
    254     *log_energy = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19)
    255         + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9);
    256 
    257     if (*log_energy < 0) {
    258       *log_energy = 0;
    259     }
    260   } else {
    261     *log_energy = 0;
    262     shfts = -15;
    263     energy_s16 = 0;
    264   }
    265 
    266   *log_energy += offset;
    267 
    268   // Total power in frame
    269   if (*power <= MIN_ENERGY) {
    270     if (shfts > 0) {
    271       *power += MIN_ENERGY + 1;
    272     } else if (WEBRTC_SPL_SHIFT_W16(energy_s16, shfts) > MIN_ENERGY) {
    273       *power += MIN_ENERGY + 1;
    274     } else {
    275       *power += WEBRTC_SPL_SHIFT_W16(energy_s16, shfts);
    276     }
    277   }
    278 }
    279