1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 /* 13 * This header file includes the descriptions of the core VAD calls. 14 */ 15 16 #ifndef WEBRTC_VAD_CORE_H_ 17 #define WEBRTC_VAD_CORE_H_ 18 19 #include "typedefs.h" 20 #include "vad_defines.h" 21 22 typedef struct VadInstT_ 23 { 24 25 WebRtc_Word16 vad; 26 WebRtc_Word32 downsampling_filter_states[4]; 27 WebRtc_Word16 noise_means[NUM_TABLE_VALUES]; 28 WebRtc_Word16 speech_means[NUM_TABLE_VALUES]; 29 WebRtc_Word16 noise_stds[NUM_TABLE_VALUES]; 30 WebRtc_Word16 speech_stds[NUM_TABLE_VALUES]; 31 WebRtc_Word32 frame_counter; 32 WebRtc_Word16 over_hang; // Over Hang 33 WebRtc_Word16 num_of_speech; 34 WebRtc_Word16 index_vector[16 * NUM_CHANNELS]; 35 WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS]; 36 WebRtc_Word16 mean_value[NUM_CHANNELS]; 37 WebRtc_Word16 upper_state[5]; 38 WebRtc_Word16 lower_state[5]; 39 WebRtc_Word16 hp_filter_state[4]; 40 WebRtc_Word16 over_hang_max_1[3]; 41 WebRtc_Word16 over_hang_max_2[3]; 42 WebRtc_Word16 individual[3]; 43 WebRtc_Word16 total[3]; 44 45 short init_flag; 46 47 } VadInstT; 48 49 /**************************************************************************** 50 * WebRtcVad_InitCore(...) 51 * 52 * This function initializes a VAD instance 53 * 54 * Input: 55 * - inst : Instance that should be initialized 56 * - mode : Aggressiveness degree 57 * 0 (High quality) - 3 (Highly aggressive) 58 * 59 * Output: 60 * - inst : Initialized instance 61 * 62 * Return value : 0 - Ok 63 * -1 - Error 64 */ 65 int WebRtcVad_InitCore(VadInstT* inst, short mode); 66 67 /**************************************************************************** 68 * WebRtcVad_set_mode_core(...) 69 * 70 * This function changes the VAD settings 71 * 72 * Input: 73 * - inst : VAD instance 74 * - mode : Aggressiveness degree 75 * 0 (High quality) - 3 (Highly aggressive) 76 * 77 * Output: 78 * - inst : Changed instance 79 * 80 * Return value : 0 - Ok 81 * -1 - Error 82 */ 83 84 int WebRtcVad_set_mode_core(VadInstT* inst, short mode); 85 86 /**************************************************************************** 87 * WebRtcVad_CalcVad32khz(...) 88 * WebRtcVad_CalcVad16khz(...) 89 * WebRtcVad_CalcVad8khz(...) 90 * 91 * Calculate probability for active speech and make VAD decision. 92 * 93 * Input: 94 * - inst : Instance that should be initialized 95 * - speech_frame : Input speech frame 96 * - frame_length : Number of input samples 97 * 98 * Output: 99 * - inst : Updated filter states etc. 100 * 101 * Return value : VAD decision 102 * 0 - No active speech 103 * 1-6 - Active speech 104 */ 105 WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame, 106 int frame_length); 107 WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame, 108 int frame_length); 109 WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame, 110 int frame_length); 111 112 /**************************************************************************** 113 * WebRtcVad_GmmProbability(...) 114 * 115 * This function calculates the probabilities for background noise and 116 * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide 117 * which type of signal is most probable. 118 * 119 * Input: 120 * - inst : Pointer to VAD instance 121 * - feature_vector : Feature vector = log10(energy in frequency band) 122 * - total_power : Total power in frame. 123 * - frame_length : Number of input samples 124 * 125 * Output: 126 * VAD decision : 0 - noise, 1 - speech 127 * 128 */ 129 WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector, 130 WebRtc_Word16 total_power, int frame_length); 131 132 #endif // WEBRTC_VAD_CORE_H_ 133