Home | History | Annotate | Download | only in vad
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 /*
     12  * This header file includes the description of the internal VAD call
     13  * WebRtcVad_GaussianProbability.
     14  */
     15 
     16 #ifndef WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
     17 #define WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
     18 
     19 #include "typedefs.h"
     20 #include "vad_core.h"
     21 
     22 // TODO(bjornv): Move local functions to vad_filterbank.c and make static.
     23 /****************************************************************************
     24  * WebRtcVad_HpOutput(...)
     25  *
     26  * This function removes DC from the lowest frequency band
     27  *
     28  * Input:
     29  *      - in_vector         : Samples in the frequency interval 0 - 250 Hz
     30  *      - in_vector_length  : Length of input and output vector
     31  *      - filter_state      : Current state of the filter
     32  *
     33  * Output:
     34  *      - out_vector        : Samples in the frequency interval 80 - 250 Hz
     35  *      - filter_state      : Updated state of the filter
     36  *
     37  */
     38 void WebRtcVad_HpOutput(int16_t* in_vector,
     39                         int in_vector_length,
     40                         int16_t* filter_state,
     41                         int16_t* out_vector);
     42 
     43 /****************************************************************************
     44  * WebRtcVad_Allpass(...)
     45  *
     46  * This function is used when before splitting a speech file into
     47  * different frequency bands
     48  *
     49  * Note! Do NOT let the arrays in_vector and out_vector correspond to the same address.
     50  *
     51  * Input:
     52  *      - in_vector             : (Q0)
     53  *      - filter_coefficients   : (Q15)
     54  *      - vector_length         : Length of input and output vector
     55  *      - filter_state          : Current state of the filter (Q(-1))
     56  *
     57  * Output:
     58  *      - out_vector            : Output speech signal (Q(-1))
     59  *      - filter_state          : Updated state of the filter (Q(-1))
     60  *
     61  */
     62 void WebRtcVad_Allpass(int16_t* in_vector,
     63                        int16_t filter_coefficients,
     64                        int vector_length,
     65                        int16_t* filter_state,
     66                        int16_t* outw16);
     67 
     68 /****************************************************************************
     69  * WebRtcVad_SplitFilter(...)
     70  *
     71  * This function is used when before splitting a speech file into
     72  * different frequency bands
     73  *
     74  * Input:
     75  *      - in_vector         : Input signal to be split into two frequency bands.
     76  *      - upper_state       : Current state of the upper filter
     77  *      - lower_state       : Current state of the lower filter
     78  *      - in_vector_length  : Length of input vector
     79  *
     80  * Output:
     81  *      - out_vector_hp     : Upper half of the spectrum
     82  *      - out_vector_lp     : Lower half of the spectrum
     83  *      - upper_state       : Updated state of the upper filter
     84  *      - lower_state       : Updated state of the lower filter
     85  *
     86  */
     87 void WebRtcVad_SplitFilter(int16_t* in_vector,
     88                            int in_vector_length,
     89                            int16_t* upper_state,
     90                            int16_t* lower_state,
     91                            int16_t* out_vector_hp,
     92                            int16_t* out_vector_lp);
     93 
     94 /****************************************************************************
     95  * WebRtcVad_get_features(...)
     96  *
     97  * This function is used to get the logarithm of the power of each of the
     98  * 6 frequency bands used by the VAD:
     99  *        80 Hz - 250 Hz
    100  *        250 Hz - 500 Hz
    101  *        500 Hz - 1000 Hz
    102  *        1000 Hz - 2000 Hz
    103  *        2000 Hz - 3000 Hz
    104  *        3000 Hz - 4000 Hz
    105  *
    106  * Input:
    107  *      - inst        : Pointer to VAD instance
    108  *      - in_vector   : Input speech signal
    109  *      - frame_size  : Frame size, in number of samples
    110  *
    111  * Output:
    112  *      - out_vector  : 10*log10(power in each freq. band), Q4
    113  *
    114  * Return: total power in the signal (NOTE! This value is not exact since it
    115  *         is only used in a comparison.
    116  */
    117 int16_t WebRtcVad_get_features(VadInstT* inst,
    118                                int16_t* in_vector,
    119                                int frame_size,
    120                                int16_t* out_vector);
    121 
    122 /****************************************************************************
    123  * WebRtcVad_LogOfEnergy(...)
    124  *
    125  * This function is used to get the logarithm of the power of one frequency band.
    126  *
    127  * Input:
    128  *      - vector            : Input speech samples for one frequency band
    129  *      - offset            : Offset value for the current frequency band
    130  *      - vector_length     : Length of input vector
    131  *
    132  * Output:
    133  *      - log_energy        : 10*log10(energy);
    134  *      - power             : Update total power in speech frame. NOTE! This value
    135  *                            is not exact since it is only used in a comparison.
    136  *
    137  */
    138 void WebRtcVad_LogOfEnergy(int16_t* vector,
    139                            int vector_length,
    140                            int16_t offset,
    141                            int16_t* power,
    142                            int16_t* log_energy);
    143 
    144 #endif  // WEBRTC_COMMON_AUDIO_VAD_VAD_FILTERBANK_H_
    145