1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * This file includes the implementation of the internal filterbank associated functions. 13 * For function description, see vad_filterbank.h. 14 */ 15 16 #include "vad_filterbank.h" 17 18 #include "signal_processing_library.h" 19 #include "typedefs.h" 20 #include "vad_defines.h" 21 22 // Constant 160*log10(2) in Q9 23 static const int16_t kLogConst = 24660; 24 25 // Coefficients used by WebRtcVad_HpOutput, Q14 26 static const int16_t kHpZeroCoefs[3] = { 6631, -13262, 6631 }; 27 static const int16_t kHpPoleCoefs[3] = { 16384, -7756, 5620 }; 28 29 // Allpass filter coefficients, upper and lower, in Q15 30 // Upper: 0.64, Lower: 0.17 31 static const int16_t kAllPassCoefsQ15[2] = { 20972, 5571 }; 32 33 // Adjustment for division with two in WebRtcVad_SplitFilter 34 static const int16_t kOffsetVector[6] = { 368, 368, 272, 176, 176, 176 }; 35 36 void WebRtcVad_HpOutput(int16_t* in_vector, 37 int in_vector_length, 38 int16_t* filter_state, 39 int16_t* out_vector) { 40 int i; 41 int16_t* in_ptr = in_vector; 42 int16_t* out_ptr = out_vector; 43 int32_t tmp32 = 0; 44 45 46 // The sum of the absolute values of the impulse response: 47 // The zero/pole-filter has a max amplification of a single sample of: 1.4546 48 // Impulse response: 0.4047 -0.6179 -0.0266 0.1993 0.1035 -0.0194 49 // The all-zero section has a max amplification of a single sample of: 1.6189 50 // Impulse response: 0.4047 -0.8094 0.4047 0 0 0 51 // The all-pole section has a max amplification of a single sample of: 1.9931 52 // Impulse response: 1.0000 0.4734 -0.1189 -0.2187 -0.0627 0.04532 53 54 for (i = 0; i < in_vector_length; i++) { 55 // all-zero section (filter coefficients in Q14) 56 tmp32 = (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[0], (*in_ptr)); 57 tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[1], filter_state[0]); 58 tmp32 += (int32_t) WEBRTC_SPL_MUL_16_16(kHpZeroCoefs[2], 59 filter_state[1]); // Q14 60 filter_state[1] = filter_state[0]; 61 filter_state[0] = *in_ptr++; 62 63 // all-pole section 64 tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[1], 65 filter_state[2]); // Q14 66 tmp32 -= (int32_t) WEBRTC_SPL_MUL_16_16(kHpPoleCoefs[2], filter_state[3]); 67 filter_state[3] = filter_state[2]; 68 filter_state[2] = (int16_t) WEBRTC_SPL_RSHIFT_W32 (tmp32, 14); 69 *out_ptr++ = filter_state[2]; 70 } 71 } 72 73 void WebRtcVad_Allpass(int16_t* in_vector, 74 int16_t filter_coefficients, 75 int vector_length, 76 int16_t* filter_state, 77 int16_t* out_vector) { 78 // The filter can only cause overflow (in the w16 output variable) 79 // if more than 4 consecutive input numbers are of maximum value and 80 // has the the same sign as the impulse responses first taps. 81 // First 6 taps of the impulse response: 0.6399 0.5905 -0.3779 82 // 0.2418 -0.1547 0.0990 83 84 int i; 85 int16_t tmp16 = 0; 86 int32_t tmp32 = 0, in32 = 0; 87 int32_t state32 = WEBRTC_SPL_LSHIFT_W32((int32_t) (*filter_state), 16); // Q31 88 89 for (i = 0; i < vector_length; i++) { 90 tmp32 = state32 + WEBRTC_SPL_MUL_16_16(filter_coefficients, (*in_vector)); 91 tmp16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16); 92 *out_vector++ = tmp16; 93 in32 = WEBRTC_SPL_LSHIFT_W32(((int32_t) (*in_vector)), 14); 94 state32 = in32 - WEBRTC_SPL_MUL_16_16(filter_coefficients, tmp16); 95 state32 = WEBRTC_SPL_LSHIFT_W32(state32, 1); 96 in_vector += 2; 97 } 98 99 *filter_state = (int16_t) WEBRTC_SPL_RSHIFT_W32(state32, 16); 100 } 101 102 void WebRtcVad_SplitFilter(int16_t* in_vector, 103 int in_vector_length, 104 int16_t* upper_state, 105 int16_t* lower_state, 106 int16_t* out_vector_hp, 107 int16_t* out_vector_lp) { 108 int16_t tmp_out; 109 int i; 110 int half_length = WEBRTC_SPL_RSHIFT_W16(in_vector_length, 1); 111 112 // All-pass filtering upper branch 113 WebRtcVad_Allpass(&in_vector[0], kAllPassCoefsQ15[0], half_length, 114 upper_state, out_vector_hp); 115 116 // All-pass filtering lower branch 117 WebRtcVad_Allpass(&in_vector[1], kAllPassCoefsQ15[1], half_length, 118 lower_state, out_vector_lp); 119 120 // Make LP and HP signals 121 for (i = 0; i < half_length; i++) { 122 tmp_out = *out_vector_hp; 123 *out_vector_hp++ -= *out_vector_lp; 124 *out_vector_lp++ += tmp_out; 125 } 126 } 127 128 int16_t WebRtcVad_get_features(VadInstT* inst, 129 int16_t* in_vector, 130 int frame_size, 131 int16_t* out_vector) { 132 int16_t power = 0; 133 // We expect |frame_size| to be 80, 160 or 240 samples, which corresponds to 134 // 10, 20 or 30 ms in 8 kHz. Therefore, the intermediate downsampled data will 135 // have at most 120 samples after the first split and at most 60 samples after 136 // the second split. 137 int16_t hp_120[120], lp_120[120]; 138 int16_t hp_60[60], lp_60[60]; 139 // Initialize variables for the first SplitFilter(). 140 int length = frame_size; 141 int frequency_band = 0; 142 int16_t* in_ptr = in_vector; 143 int16_t* hp_out_ptr = hp_120; 144 int16_t* lp_out_ptr = lp_120; 145 146 // Split at 2000 Hz and downsample 147 WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band], 148 &inst->lower_state[frequency_band], hp_out_ptr, 149 lp_out_ptr); 150 151 // Split at 3000 Hz and downsample 152 frequency_band = 1; 153 in_ptr = hp_120; 154 hp_out_ptr = hp_60; 155 lp_out_ptr = lp_60; 156 length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1); 157 158 WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band], 159 &inst->lower_state[frequency_band], hp_out_ptr, 160 lp_out_ptr); 161 162 // Energy in 3000 Hz - 4000 Hz 163 length = WEBRTC_SPL_RSHIFT_W16(length, 1); 164 WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[5], &power, 165 &out_vector[5]); 166 167 // Energy in 2000 Hz - 3000 Hz 168 WebRtcVad_LogOfEnergy(lp_60, length, kOffsetVector[4], &power, 169 &out_vector[4]); 170 171 // Split at 1000 Hz and downsample 172 frequency_band = 2; 173 in_ptr = lp_120; 174 hp_out_ptr = hp_60; 175 lp_out_ptr = lp_60; 176 length = WEBRTC_SPL_RSHIFT_W16(frame_size, 1); 177 WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band], 178 &inst->lower_state[frequency_band], hp_out_ptr, 179 lp_out_ptr); 180 181 // Energy in 1000 Hz - 2000 Hz 182 length = WEBRTC_SPL_RSHIFT_W16(length, 1); 183 WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[3], &power, 184 &out_vector[3]); 185 186 // Split at 500 Hz 187 frequency_band = 3; 188 in_ptr = lp_60; 189 hp_out_ptr = hp_120; 190 lp_out_ptr = lp_120; 191 192 WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band], 193 &inst->lower_state[frequency_band], hp_out_ptr, 194 lp_out_ptr); 195 196 // Energy in 500 Hz - 1000 Hz 197 length = WEBRTC_SPL_RSHIFT_W16(length, 1); 198 WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[2], &power, 199 &out_vector[2]); 200 201 // Split at 250 Hz 202 frequency_band = 4; 203 in_ptr = lp_120; 204 hp_out_ptr = hp_60; 205 lp_out_ptr = lp_60; 206 207 WebRtcVad_SplitFilter(in_ptr, length, &inst->upper_state[frequency_band], 208 &inst->lower_state[frequency_band], hp_out_ptr, 209 lp_out_ptr); 210 211 // Energy in 250 Hz - 500 Hz 212 length = WEBRTC_SPL_RSHIFT_W16(length, 1); 213 WebRtcVad_LogOfEnergy(hp_60, length, kOffsetVector[1], &power, 214 &out_vector[1]); 215 216 // Remove DC and LFs 217 WebRtcVad_HpOutput(lp_60, length, inst->hp_filter_state, hp_120); 218 219 // Power in 80 Hz - 250 Hz 220 WebRtcVad_LogOfEnergy(hp_120, length, kOffsetVector[0], &power, 221 &out_vector[0]); 222 223 return power; 224 } 225 226 void WebRtcVad_LogOfEnergy(int16_t* vector, 227 int vector_length, 228 int16_t offset, 229 int16_t* power, 230 int16_t* log_energy) { 231 int shfts = 0, shfts2 = 0; 232 int16_t energy_s16 = 0; 233 int16_t zeros = 0, frac = 0, log2 = 0; 234 int32_t energy = WebRtcSpl_Energy(vector, vector_length, &shfts); 235 236 if (energy > 0) { 237 238 shfts2 = 16 - WebRtcSpl_NormW32(energy); 239 shfts += shfts2; 240 // "shfts" is the total number of right shifts that has been done to 241 // energy_s16. 242 energy_s16 = (int16_t) WEBRTC_SPL_SHIFT_W32(energy, -shfts2); 243 244 // Find: 245 // 160*log10(energy_s16*2^shfts) = 160*log10(2)*log2(energy_s16*2^shfts) = 246 // 160*log10(2)*(log2(energy_s16) + log2(2^shfts)) = 247 // 160*log10(2)*(log2(energy_s16) + shfts) 248 249 zeros = WebRtcSpl_NormU32(energy_s16); 250 frac = (int16_t) (((uint32_t) ((int32_t) (energy_s16) << zeros) 251 & 0x7FFFFFFF) >> 21); 252 log2 = (int16_t) (((31 - zeros) << 10) + frac); 253 254 *log_energy = (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(kLogConst, log2, 19) 255 + (int16_t) WEBRTC_SPL_MUL_16_16_RSFT(shfts, kLogConst, 9); 256 257 if (*log_energy < 0) { 258 *log_energy = 0; 259 } 260 } else { 261 *log_energy = 0; 262 shfts = -15; 263 energy_s16 = 0; 264 } 265 266 *log_energy += offset; 267 268 // Total power in frame 269 if (*power <= MIN_ENERGY) { 270 if (shfts > 0) { 271 *power += MIN_ENERGY + 1; 272 } else if (WEBRTC_SPL_SHIFT_W16(energy_s16, shfts) > MIN_ENERGY) { 273 *power += MIN_ENERGY + 1; 274 } else { 275 *power += WEBRTC_SPL_SHIFT_W16(energy_s16, shfts); 276 } 277 } 278 } 279