Home | History | Annotate | Download | only in vad
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 /*
     13  * This header file includes the descriptions of the core VAD calls.
     14  */
     15 
     16 #ifndef WEBRTC_VAD_CORE_H_
     17 #define WEBRTC_VAD_CORE_H_
     18 
     19 #include "typedefs.h"
     20 #include "vad_defines.h"
     21 
     22 typedef struct VadInstT_
     23 {
     24 
     25     WebRtc_Word16 vad;
     26     WebRtc_Word32 downsampling_filter_states[4];
     27     WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
     28     WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
     29     WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
     30     WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
     31     // TODO(bjornv): Change to |frame_count|.
     32     WebRtc_Word32 frame_counter;
     33     WebRtc_Word16 over_hang; // Over Hang
     34     WebRtc_Word16 num_of_speech;
     35     // TODO(bjornv): Change to |age_vector|.
     36     WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
     37     WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
     38     // TODO(bjornv): Change to |median|.
     39     WebRtc_Word16 mean_value[NUM_CHANNELS];
     40     WebRtc_Word16 upper_state[5];
     41     WebRtc_Word16 lower_state[5];
     42     WebRtc_Word16 hp_filter_state[4];
     43     WebRtc_Word16 over_hang_max_1[3];
     44     WebRtc_Word16 over_hang_max_2[3];
     45     WebRtc_Word16 individual[3];
     46     WebRtc_Word16 total[3];
     47 
     48     short init_flag;
     49 
     50 } VadInstT;
     51 
     52 /****************************************************************************
     53  * WebRtcVad_InitCore(...)
     54  *
     55  * This function initializes a VAD instance
     56  *
     57  * Input:
     58  *      - inst      : Instance that should be initialized
     59  *      - mode      : Aggressiveness degree
     60  *                    0 (High quality) - 3 (Highly aggressive)
     61  *
     62  * Output:
     63  *      - inst      : Initialized instance
     64  *
     65  * Return value     :  0 - Ok
     66  *                    -1 - Error
     67  */
     68 int WebRtcVad_InitCore(VadInstT* inst, short mode);
     69 
     70 /****************************************************************************
     71  * WebRtcVad_set_mode_core(...)
     72  *
     73  * This function changes the VAD settings
     74  *
     75  * Input:
     76  *      - inst      : VAD instance
     77  *      - mode      : Aggressiveness degree
     78  *                    0 (High quality) - 3 (Highly aggressive)
     79  *
     80  * Output:
     81  *      - inst      : Changed  instance
     82  *
     83  * Return value     :  0 - Ok
     84  *                    -1 - Error
     85  */
     86 
     87 int WebRtcVad_set_mode_core(VadInstT* inst, short mode);
     88 
     89 /****************************************************************************
     90  * WebRtcVad_CalcVad32khz(...)
     91  * WebRtcVad_CalcVad16khz(...)
     92  * WebRtcVad_CalcVad8khz(...)
     93  *
     94  * Calculate probability for active speech and make VAD decision.
     95  *
     96  * Input:
     97  *      - inst          : Instance that should be initialized
     98  *      - speech_frame  : Input speech frame
     99  *      - frame_length  : Number of input samples
    100  *
    101  * Output:
    102  *      - inst          : Updated filter states etc.
    103  *
    104  * Return value         : VAD decision
    105  *                        0 - No active speech
    106  *                        1-6 - Active speech
    107  */
    108 WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
    109                                      int frame_length);
    110 WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
    111                                      int frame_length);
    112 WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
    113                                     int frame_length);
    114 
    115 /****************************************************************************
    116  * WebRtcVad_GmmProbability(...)
    117  *
    118  * This function calculates the probabilities for background noise and
    119  * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
    120  * which type of signal is most probable.
    121  *
    122  * Input:
    123  *      - inst              : Pointer to VAD instance
    124  *      - feature_vector    : Feature vector = log10(energy in frequency band)
    125  *      - total_power       : Total power in frame.
    126  *      - frame_length      : Number of input samples
    127  *
    128  * Output:
    129  *      VAD decision        : 0 - noise, 1 - speech
    130  *
    131  */
    132 WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
    133                                        WebRtc_Word16 total_power, int frame_length);
    134 
    135 #endif // WEBRTC_VAD_CORE_H_
    136