Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 /*
     13  * This header file includes the descriptions of the core VAD calls.
     14  */
     15 
     16 #ifndef WEBRTC_VAD_CORE_H_
     17 #define WEBRTC_VAD_CORE_H_
     18 
     19 #include "typedefs.h"
     20 #include "vad_defines.h"
     21 
     22 typedef struct VadInstT_
     23 {
     24 
     25     WebRtc_Word16 vad;
     26     WebRtc_Word32 downsampling_filter_states[4];
     27     WebRtc_Word16 noise_means[NUM_TABLE_VALUES];
     28     WebRtc_Word16 speech_means[NUM_TABLE_VALUES];
     29     WebRtc_Word16 noise_stds[NUM_TABLE_VALUES];
     30     WebRtc_Word16 speech_stds[NUM_TABLE_VALUES];
     31     WebRtc_Word32 frame_counter;
     32     WebRtc_Word16 over_hang; // Over Hang
     33     WebRtc_Word16 num_of_speech;
     34     WebRtc_Word16 index_vector[16 * NUM_CHANNELS];
     35     WebRtc_Word16 low_value_vector[16 * NUM_CHANNELS];
     36     WebRtc_Word16 mean_value[NUM_CHANNELS];
     37     WebRtc_Word16 upper_state[5];
     38     WebRtc_Word16 lower_state[5];
     39     WebRtc_Word16 hp_filter_state[4];
     40     WebRtc_Word16 over_hang_max_1[3];
     41     WebRtc_Word16 over_hang_max_2[3];
     42     WebRtc_Word16 individual[3];
     43     WebRtc_Word16 total[3];
     44 
     45     short init_flag;
     46 
     47 } VadInstT;
     48 
     49 /****************************************************************************
     50  * WebRtcVad_InitCore(...)
     51  *
     52  * This function initializes a VAD instance
     53  *
     54  * Input:
     55  *      - inst      : Instance that should be initialized
     56  *      - mode      : Aggressiveness degree
     57  *                    0 (High quality) - 3 (Highly aggressive)
     58  *
     59  * Output:
     60  *      - inst      : Initialized instance
     61  *
     62  * Return value     :  0 - Ok
     63  *                    -1 - Error
     64  */
     65 int WebRtcVad_InitCore(VadInstT* inst, short mode);
     66 
     67 /****************************************************************************
     68  * WebRtcVad_set_mode_core(...)
     69  *
     70  * This function changes the VAD settings
     71  *
     72  * Input:
     73  *      - inst      : VAD instance
     74  *      - mode      : Aggressiveness degree
     75  *                    0 (High quality) - 3 (Highly aggressive)
     76  *
     77  * Output:
     78  *      - inst      : Changed  instance
     79  *
     80  * Return value     :  0 - Ok
     81  *                    -1 - Error
     82  */
     83 
     84 int WebRtcVad_set_mode_core(VadInstT* inst, short mode);
     85 
     86 /****************************************************************************
     87  * WebRtcVad_CalcVad32khz(...)
     88  * WebRtcVad_CalcVad16khz(...)
     89  * WebRtcVad_CalcVad8khz(...)
     90  *
     91  * Calculate probability for active speech and make VAD decision.
     92  *
     93  * Input:
     94  *      - inst          : Instance that should be initialized
     95  *      - speech_frame  : Input speech frame
     96  *      - frame_length  : Number of input samples
     97  *
     98  * Output:
     99  *      - inst          : Updated filter states etc.
    100  *
    101  * Return value         : VAD decision
    102  *                        0 - No active speech
    103  *                        1-6 - Active speech
    104  */
    105 WebRtc_Word16 WebRtcVad_CalcVad32khz(VadInstT* inst, WebRtc_Word16* speech_frame,
    106                                      int frame_length);
    107 WebRtc_Word16 WebRtcVad_CalcVad16khz(VadInstT* inst, WebRtc_Word16* speech_frame,
    108                                      int frame_length);
    109 WebRtc_Word16 WebRtcVad_CalcVad8khz(VadInstT* inst, WebRtc_Word16* speech_frame,
    110                                     int frame_length);
    111 
    112 /****************************************************************************
    113  * WebRtcVad_GmmProbability(...)
    114  *
    115  * This function calculates the probabilities for background noise and
    116  * speech using Gaussian Mixture Models. A hypothesis-test is performed to decide
    117  * which type of signal is most probable.
    118  *
    119  * Input:
    120  *      - inst              : Pointer to VAD instance
    121  *      - feature_vector    : Feature vector = log10(energy in frequency band)
    122  *      - total_power       : Total power in frame.
    123  *      - frame_length      : Number of input samples
    124  *
    125  * Output:
    126  *      VAD decision        : 0 - noise, 1 - speech
    127  *
    128  */
    129 WebRtc_Word16 WebRtcVad_GmmProbability(VadInstT* inst, WebRtc_Word16* feature_vector,
    130                                        WebRtc_Word16 total_power, int frame_length);
    131 
    132 #endif // WEBRTC_VAD_CORE_H_
    133