1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 12 /* 13 * This header file includes the descriptions of the core VAD calls. 14 */ 15 16 #ifndef WEBRTC_VAD_CORE_H_ 17 #define WEBRTC_VAD_CORE_H_ 18 19 #include "typedefs.h" 20 #include "vad_defines.h" 21 22 typedef struct VadInstT_ 23 { 24 25 WebRtc_Word16 vad; 26 WebRtc_Word32 downsampling_filter_states[4]; 27 WebRtc_Word16 noise_means[NUM_TABLE_VALUES]; 28 WebRtc_Word16 speech_means[NUM_TABLE_VALUES]; 29 WebRtc_Word16 noise_stds[NUM_TABLE_VALUES]; 30 WebRtc_Word16 speech_stds[NUM_TABLE_VALUES]; 31 // TODO(bjornv): Change to |frame_count|. 32 WebRtc_Word32 frame_counter; 33 WebRtc_Word16 over_hang; // Over Hang 34 WebRtc_Word16 num_of_speech; 35 // TODO(bjornv): Change to |age_vector|. 36 WebRtc_Word16 index_vector[16 * NUM_CHANNELS]; 37 WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS]; 38 // TODO(bjornv): Change to |median|. 39 WebRtc_Word16 mean_value[NUM_CHANNELS]; 40 WebRtc_Word16 upper_state[5]; 41 WebRtc_Word16 lower_state[5]; 42 WebRtc_Word16 hp_filter_state[4]; 43 WebRtc_Word16 over_hang_max_1[3]; 44 WebRtc_Word16 over_hang_max_2[3]; 45 WebRtc_Word16 individual[3]; 46 WebRtc_Word16 total[3]; 47 48 short init_flag; 49 50 } VadInstT; 51 52 /**************************************************************************** 53 * WebRtcVad_InitCore(...) 54 * 55 * This function initializes a VAD instance 56 * 57 * Input: 58 * - inst : Instance that should be initialized 59 * - mode : Aggressiveness degree 60 * 0 (High quality) - 3 (Highly aggressive) 61 * 62 * Output: 63 * - inst : Initialized instance 64 * 65 * Return value : 0 - Ok 66 * -1 - Error 67 */ 68 int WebRtcVad_InitCore(VadInstT* inst, short mode); 69 70 /**************************************************************************** 71 * WebRtcVad_set_mode_core(...) 72 * 73 * This function changes the VAD settings 74 * 75 * Input: 76 * - inst : VAD instance 77 * - mode : Aggressiveness degree 78 * 0 (High quality) - 3 (Highly aggressive) 79 * 80 * Output: 81 * - inst : Changed instance 82 * 83 * Return value : 0 - Ok 84 * -1 - Error 85 */ 86 87 int WebRtcVad_set_mode_core(VadInstT* inst, short mode); 88 89 /**************************************************************************** 90 * WebRtcVad_CalcVad32khz(...) 91 * WebRtcVad_CalcVad16khz(...) 92 * WebRtcVad_CalcVad8khz(...) 93 * 94 * Calculate probability for active speech and make VAD decision. 95 * 96 * Input: 97 * - inst : Instance that should be initialized 98 * - speech_frame : Input speech frame 99 * - frame_length : Number of input samples 100 * 101 * Output: 102 * - inst : Updated filter states etc. 103 * 104 * Return value : VAD decision 105 * 0 - No active speech 106 * 1-6 - Active speech 107 */ 108 WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame, 109 int frame_length); 110 WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame, 111 int frame_length); 112 WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame, 113 int frame_length); 114 115 /**************************************************************************** 116 * WebRtcVad_GmmProbability(...) 117 * 118 * This function calculates the probabilities for background noise and 119 * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide 120 * which type of signal is most probable. 121 * 122 * Input: 123 * - inst : Pointer to VAD instance 124 * - feature_vector : Feature vector = log10(energy in frequency band) 125 * - total_power : Total power in frame. 126 * - frame_length : Number of input samples 127 * 128 * Output: 129 * VAD decision : 0 - noise, 1 - speech 130 * 131 */ 132 WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector, 133 WebRtc_Word16 total_power, int frame_length); 134 135 #endif // WEBRTC_VAD_CORE_H_ 136