Home | History | Annotate | Download | only in vad
      1 /*
      2  *  Copyright (c) 2012 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 
     12 /*
     13  * This header file includes the descriptions of the core VAD calls.
     14  */
     15 
     16 #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
     17 #define WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
     18 
     19 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h"
     20 #include "webrtc/typedefs.h"
     21 
     22 enum { kNumChannels = 6 };  // Number of frequency bands (named channels).
     23 enum { kNumGaussians = 2 };  // Number of Gaussians per channel in the GMM.
     24 enum { kTableSize = kNumChannels * kNumGaussians };
     25 enum { kMinEnergy = 10 };  // Minimum energy required to trigger audio signal.
     26 
     27 typedef struct VadInstT_
     28 {
     29 
     30     int vad;
     31     int32_t downsampling_filter_states[4];
     32     WebRtcSpl_State48khzTo8khz state_48_to_8;
     33     int16_t noise_means[kTableSize];
     34     int16_t speech_means[kTableSize];
     35     int16_t noise_stds[kTableSize];
     36     int16_t speech_stds[kTableSize];
     37     // TODO(bjornv): Change to |frame_count|.
     38     int32_t frame_counter;
     39     int16_t over_hang; // Over Hang
     40     int16_t num_of_speech;
     41     // TODO(bjornv): Change to |age_vector|.
     42     int16_t index_vector[16 * kNumChannels];
     43     int16_t low_value_vector[16 * kNumChannels];
     44     // TODO(bjornv): Change to |median|.
     45     int16_t mean_value[kNumChannels];
     46     int16_t upper_state[5];
     47     int16_t lower_state[5];
     48     int16_t hp_filter_state[4];
     49     int16_t over_hang_max_1[3];
     50     int16_t over_hang_max_2[3];
     51     int16_t individual[3];
     52     int16_t total[3];
     53 
     54     int init_flag;
     55 
     56 } VadInstT;
     57 
     58 // Initializes the core VAD component. The default aggressiveness mode is
     59 // controlled by |kDefaultMode| in vad_core.c.
     60 //
     61 // - self [i/o] : Instance that should be initialized
     62 //
     63 // returns      : 0 (OK), -1 (NULL pointer in or if the default mode can't be
     64 //                set)
     65 int WebRtcVad_InitCore(VadInstT* self);
     66 
     67 /****************************************************************************
     68  * WebRtcVad_set_mode_core(...)
     69  *
     70  * This function changes the VAD settings
     71  *
     72  * Input:
     73  *      - inst      : VAD instance
     74  *      - mode      : Aggressiveness degree
     75  *                    0 (High quality) - 3 (Highly aggressive)
     76  *
     77  * Output:
     78  *      - inst      : Changed  instance
     79  *
     80  * Return value     :  0 - Ok
     81  *                    -1 - Error
     82  */
     83 
     84 int WebRtcVad_set_mode_core(VadInstT* self, int mode);
     85 
     86 /****************************************************************************
     87  * WebRtcVad_CalcVad48khz(...)
     88  * WebRtcVad_CalcVad32khz(...)
     89  * WebRtcVad_CalcVad16khz(...)
     90  * WebRtcVad_CalcVad8khz(...)
     91  *
     92  * Calculate probability for active speech and make VAD decision.
     93  *
     94  * Input:
     95  *      - inst          : Instance that should be initialized
     96  *      - speech_frame  : Input speech frame
     97  *      - frame_length  : Number of input samples
     98  *
     99  * Output:
    100  *      - inst          : Updated filter states etc.
    101  *
    102  * Return value         : VAD decision
    103  *                        0 - No active speech
    104  *                        1-6 - Active speech
    105  */
    106 int WebRtcVad_CalcVad48khz(VadInstT* inst, const int16_t* speech_frame,
    107                            size_t frame_length);
    108 int WebRtcVad_CalcVad32khz(VadInstT* inst, const int16_t* speech_frame,
    109                            size_t frame_length);
    110 int WebRtcVad_CalcVad16khz(VadInstT* inst, const int16_t* speech_frame,
    111                            size_t frame_length);
    112 int WebRtcVad_CalcVad8khz(VadInstT* inst, const int16_t* speech_frame,
    113                           size_t frame_length);
    114 
    115 #endif  // WEBRTC_COMMON_AUDIO_VAD_VAD_CORE_H_
    116