Home | History | Annotate | Download | only in libspeexdsp
      1 /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
      2    Copyright (C) 2004-2006 Epic Games
      3 
      4    File: preprocess.c
      5    Preprocessor with denoising based on the algorithm by Ephraim and Malah
      6 
      7    Redistribution and use in source and binary forms, with or without
      8    modification, are permitted provided that the following conditions are
      9    met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12    this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15    notice, this list of conditions and the following disclaimer in the
     16    documentation and/or other materials provided with the distribution.
     17 
     18    3. The name of the author may not be used to endorse or promote products
     19    derived from this software without specific prior written permission.
     20 
     21    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     24    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
     25    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     26    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     27    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     29    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     30    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     31    POSSIBILITY OF SUCH DAMAGE.
     32 */
     33 
     34 
     35 /*
     36    Recommended papers:
     37 
     38    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
     39    short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
     40    Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
     41 
     42    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
     43    log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
     44    Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
     45 
     46    I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
     47    Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
     48 
     49    Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
     50    approach to combined acoustic echo cancellation and noise reduction". IEEE
     51    Transactions on Speech and Audio Processing, 2002.
     52 
     53    J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
     54    of simultaneous non-stationary sources". In Proceedings IEEE International
     55    Conference on Acoustics, Speech, and Signal Processing, 2004.
     56 */
     57 
     58 #ifdef HAVE_CONFIG_H
     59 #include "config.h"
     60 #endif
     61 
     62 #include <math.h>
     63 #include "speex/speex_preprocess.h"
     64 #include "speex/speex_echo.h"
     65 #include "arch.h"
     66 #include "fftwrap.h"
     67 #include "filterbank.h"
     68 #include "math_approx.h"
     69 #include "os_support.h"
     70 
     71 #define LOUDNESS_EXP 5.f
     72 #define AMP_SCALE .001f
     73 #define AMP_SCALE_1 1000.f
     74 
     75 #define NB_BANDS 24
     76 
     77 #define SPEECH_PROB_START_DEFAULT       QCONST16(0.35f,15)
     78 #define SPEECH_PROB_CONTINUE_DEFAULT    QCONST16(0.20f,15)
     79 #define NOISE_SUPPRESS_DEFAULT       -15
     80 #define ECHO_SUPPRESS_DEFAULT        -40
     81 #define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
     82 
     83 #ifndef NULL
     84 #define NULL 0
     85 #endif
     86 
     87 #define SQR(x) ((x)*(x))
     88 #define SQR16(x) (MULT16_16((x),(x)))
     89 #define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
     90 
     91 #ifdef FIXED_POINT
     92 static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
     93 {
     94    if (SHR32(a,7) >= b)
     95    {
     96       return 32767;
     97    } else {
     98       if (b>=QCONST32(1,23))
     99       {
    100          a = SHR32(a,8);
    101          b = SHR32(b,8);
    102       }
    103       if (b>=QCONST32(1,19))
    104       {
    105          a = SHR32(a,4);
    106          b = SHR32(b,4);
    107       }
    108       if (b>=QCONST32(1,15))
    109       {
    110          a = SHR32(a,4);
    111          b = SHR32(b,4);
    112       }
    113       a = SHL32(a,8);
    114       return PDIV32_16(a,b);
    115    }
    116 
    117 }
    118 static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
    119 {
    120    if (SHR32(a,15) >= b)
    121    {
    122       return 32767;
    123    } else {
    124       if (b>=QCONST32(1,23))
    125       {
    126          a = SHR32(a,8);
    127          b = SHR32(b,8);
    128       }
    129       if (b>=QCONST32(1,19))
    130       {
    131          a = SHR32(a,4);
    132          b = SHR32(b,4);
    133       }
    134       if (b>=QCONST32(1,15))
    135       {
    136          a = SHR32(a,4);
    137          b = SHR32(b,4);
    138       }
    139       a = SHL32(a,15)-a;
    140       return DIV32_16(a,b);
    141    }
    142 }
    143 #define SNR_SCALING 256.f
    144 #define SNR_SCALING_1 0.0039062f
    145 #define SNR_SHIFT 8
    146 
    147 #define FRAC_SCALING 32767.f
    148 #define FRAC_SCALING_1 3.0518e-05
    149 #define FRAC_SHIFT 1
    150 
    151 #define EXPIN_SCALING 2048.f
    152 #define EXPIN_SCALING_1 0.00048828f
    153 #define EXPIN_SHIFT 11
    154 #define EXPOUT_SCALING_1 1.5259e-05
    155 
    156 #define NOISE_SHIFT 7
    157 
    158 #else
    159 
    160 #define DIV32_16_Q8(a,b) ((a)/(b))
    161 #define DIV32_16_Q15(a,b) ((a)/(b))
    162 #define SNR_SCALING 1.f
    163 #define SNR_SCALING_1 1.f
    164 #define SNR_SHIFT 0
    165 #define FRAC_SCALING 1.f
    166 #define FRAC_SCALING_1 1.f
    167 #define FRAC_SHIFT 0
    168 #define NOISE_SHIFT 0
    169 
    170 #define EXPIN_SCALING 1.f
    171 #define EXPIN_SCALING_1 1.f
    172 #define EXPOUT_SCALING_1 1.f
    173 
    174 #endif
    175 
    176 /** Speex pre-processor state. */
    177 struct SpeexPreprocessState_ {
    178    /* Basic info */
    179    int    frame_size;        /**< Number of samples processed each time */
    180    int    ps_size;           /**< Number of points in the power spectrum */
    181    int    sampling_rate;     /**< Sampling rate of the input/output */
    182    int    nbands;
    183    FilterBank *bank;
    184 
    185    /* Parameters */
    186    int    denoise_enabled;
    187    int    vad_enabled;
    188    int    dereverb_enabled;
    189    spx_word16_t  reverb_decay;
    190    spx_word16_t  reverb_level;
    191    spx_word16_t speech_prob_start;
    192    spx_word16_t speech_prob_continue;
    193    int    noise_suppress;
    194    int    echo_suppress;
    195    int    echo_suppress_active;
    196    SpeexEchoState *echo_state;
    197 
    198    spx_word16_t	speech_prob;  /**< Probability last frame was speech */
    199 
    200    /* DSP-related arrays */
    201    spx_word16_t *frame;      /**< Processing frame (2*ps_size) */
    202    spx_word16_t *ft;         /**< Processing frame in freq domain (2*ps_size) */
    203    spx_word32_t *ps;         /**< Current power spectrum */
    204    spx_word16_t *gain2;      /**< Adjusted gains */
    205    spx_word16_t *gain_floor; /**< Minimum gain allowed */
    206    spx_word16_t *window;     /**< Analysis/Synthesis window */
    207    spx_word32_t *noise;      /**< Noise estimate */
    208    spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
    209    spx_word32_t *old_ps;     /**< Power spectrum for last frame */
    210    spx_word16_t *gain;       /**< Ephraim Malah gain */
    211    spx_word16_t *prior;      /**< A-priori SNR */
    212    spx_word16_t *post;       /**< A-posteriori SNR */
    213 
    214    spx_word32_t *S;          /**< Smoothed power spectrum */
    215    spx_word32_t *Smin;       /**< See Cohen paper */
    216    spx_word32_t *Stmp;       /**< See Cohen paper */
    217    int *update_prob;         /**< Probability of speech presence for noise update */
    218 
    219    spx_word16_t *zeta;       /**< Smoothed a priori SNR */
    220    spx_word32_t *echo_noise;
    221    spx_word32_t *residual_echo;
    222 
    223    /* Misc */
    224    spx_word16_t *inbuf;      /**< Input buffer (overlapped analysis) */
    225    spx_word16_t *outbuf;     /**< Output buffer (for overlap and add) */
    226 
    227    /* AGC stuff, only for floating point for now */
    228 #ifndef FIXED_POINT
    229    int    agc_enabled;
    230    float  agc_level;
    231    float  loudness_accum;
    232    float *loudness_weight;   /**< Perceptual loudness curve */
    233    float  loudness;          /**< Loudness estimate */
    234    float  agc_gain;          /**< Current AGC gain */
    235    float  max_gain;          /**< Maximum gain allowed */
    236    float  max_increase_step; /**< Maximum increase in gain from one frame to another */
    237    float  max_decrease_step; /**< Maximum decrease in gain from one frame to another */
    238    float  prev_loudness;     /**< Loudness of previous frame */
    239    float  init_max;          /**< Current gain limit during initialisation */
    240 #endif
    241    int    nb_adapt;          /**< Number of frames used for adaptation so far */
    242    int    was_speech;
    243    int    min_count;         /**< Number of frames processed so far */
    244    void  *fft_lookup;        /**< Lookup table for the FFT */
    245 #ifdef FIXED_POINT
    246    int    frame_shift;
    247 #endif
    248 };
    249 
    250 
    251 static void conj_window(spx_word16_t *w, int len)
    252 {
    253    int i;
    254    for (i=0;i<len;i++)
    255    {
    256       spx_word16_t tmp;
    257 #ifdef FIXED_POINT
    258       spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
    259 #else
    260       spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
    261 #endif
    262       int inv=0;
    263       if (x<QCONST16(1.f,13))
    264       {
    265       } else if (x<QCONST16(2.f,13))
    266       {
    267          x=QCONST16(2.f,13)-x;
    268          inv=1;
    269       } else if (x<QCONST16(3.f,13))
    270       {
    271          x=x-QCONST16(2.f,13);
    272          inv=1;
    273       } else {
    274          x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
    275       }
    276       x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
    277       tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
    278       if (inv)
    279          tmp=SUB16(Q15_ONE,tmp);
    280       w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
    281    }
    282 }
    283 
    284 
    285 #ifdef FIXED_POINT
    286 /* This function approximates the gain function
    287    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
    288    which multiplied by xi/(1+xi) is the optimal gain
    289    in the loudness domain ( sqrt[amplitude] )
    290    Input in Q11 format, output in Q15
    291 */
    292 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
    293 {
    294    int ind;
    295    spx_word16_t frac;
    296    /* Q13 table */
    297    static const spx_word16_t table[21] = {
    298        6730,  8357,  9868, 11267, 12563, 13770, 14898,
    299       15959, 16961, 17911, 18816, 19682, 20512, 21311,
    300       22082, 22827, 23549, 24250, 24931, 25594, 26241};
    301       ind = SHR32(xx,10);
    302       if (ind<0)
    303          return Q15_ONE;
    304       if (ind>19)
    305          return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
    306       frac = SHL32(xx-SHL32(ind,10),5);
    307       return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
    308 }
    309 
    310 static inline spx_word16_t qcurve(spx_word16_t x)
    311 {
    312    x = MAX16(x, 1);
    313    return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
    314 }
    315 
    316 /* Compute the gain floor based on different floors for the background noise and residual echo */
    317 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
    318 {
    319    int i;
    320 
    321    if (noise_suppress > effective_echo_suppress)
    322    {
    323       spx_word16_t noise_gain, gain_ratio;
    324       noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
    325       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
    326 
    327       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
    328       for (i=0;i<len;i++)
    329          gain_floor[i] = MULT16_16_Q15(noise_gain,
    330                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
    331                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
    332    } else {
    333       spx_word16_t echo_gain, gain_ratio;
    334       echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
    335       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
    336 
    337       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
    338       for (i=0;i<len;i++)
    339          gain_floor[i] = MULT16_16_Q15(echo_gain,
    340                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
    341                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
    342    }
    343 }
    344 
    345 #else
    346 /* This function approximates the gain function
    347    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
    348    which multiplied by xi/(1+xi) is the optimal gain
    349    in the loudness domain ( sqrt[amplitude] )
    350 */
    351 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
    352 {
    353    int ind;
    354    float integer, frac;
    355    float x;
    356    static const float table[21] = {
    357       0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
    358       1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
    359       2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
    360       x = EXPIN_SCALING_1*xx;
    361       integer = floor(2*x);
    362       ind = (int)integer;
    363       if (ind<0)
    364          return FRAC_SCALING;
    365       if (ind>19)
    366          return FRAC_SCALING*(1+.1296/x);
    367       frac = 2*x-integer;
    368       return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
    369 }
    370 
    371 static inline spx_word16_t qcurve(spx_word16_t x)
    372 {
    373    return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
    374 }
    375 
    376 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
    377 {
    378    int i;
    379    float echo_floor;
    380    float noise_floor;
    381 
    382    noise_floor = exp(.2302585f*noise_suppress);
    383    echo_floor = exp(.2302585f*effective_echo_suppress);
    384 
    385    /* Compute the gain floor based on different floors for the background noise and residual echo */
    386    for (i=0;i<len;i++)
    387       gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
    388 }
    389 
    390 #endif
    391 EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
    392 {
    393    int i;
    394    int N, N3, N4, M;
    395 
    396    SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
    397    st->frame_size = frame_size;
    398 
    399    /* Round ps_size down to the nearest power of two */
    400 #if 0
    401    i=1;
    402    st->ps_size = st->frame_size;
    403    while(1)
    404    {
    405       if (st->ps_size & ~i)
    406       {
    407          st->ps_size &= ~i;
    408          i<<=1;
    409       } else {
    410          break;
    411       }
    412    }
    413 
    414 
    415    if (st->ps_size < 3*st->frame_size/4)
    416       st->ps_size = st->ps_size * 3 / 2;
    417 #else
    418    st->ps_size = st->frame_size;
    419 #endif
    420 
    421    N = st->ps_size;
    422    N3 = 2*N - st->frame_size;
    423    N4 = st->frame_size - N3;
    424 
    425    st->sampling_rate = sampling_rate;
    426    st->denoise_enabled = 1;
    427    st->vad_enabled = 0;
    428    st->dereverb_enabled = 0;
    429    st->reverb_decay = 0;
    430    st->reverb_level = 0;
    431    st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
    432    st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
    433    st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
    434 
    435    st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
    436    st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
    437 
    438    st->echo_state = NULL;
    439 
    440    st->nbands = NB_BANDS;
    441    M = st->nbands;
    442    st->bank = filterbank_new(M, sampling_rate, N, 1);
    443 
    444    st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
    445    st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
    446    st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
    447 
    448    st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    449    st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    450    st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    451    st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    452    st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    453    st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    454    st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    455    st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    456    st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    457    st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    458    st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    459    st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    460 
    461    st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    462    st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    463    st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    464    st->update_prob = (int*)speex_alloc(N*sizeof(int));
    465 
    466    st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
    467    st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
    468 
    469    conj_window(st->window, 2*N3);
    470    for (i=2*N3;i<2*st->ps_size;i++)
    471       st->window[i]=Q15_ONE;
    472 
    473    if (N4>0)
    474    {
    475       for (i=N3-1;i>=0;i--)
    476       {
    477          st->window[i+N3+N4]=st->window[i+N3];
    478          st->window[i+N3]=1;
    479       }
    480    }
    481    for (i=0;i<N+M;i++)
    482    {
    483       st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
    484       st->reverb_estimate[i]=0;
    485       st->old_ps[i]=1;
    486       st->gain[i]=Q15_ONE;
    487       st->post[i]=SHL16(1, SNR_SHIFT);
    488       st->prior[i]=SHL16(1, SNR_SHIFT);
    489    }
    490 
    491    for (i=0;i<N;i++)
    492       st->update_prob[i] = 1;
    493    for (i=0;i<N3;i++)
    494    {
    495       st->inbuf[i]=0;
    496       st->outbuf[i]=0;
    497    }
    498 #ifndef FIXED_POINT
    499    st->agc_enabled = 0;
    500    st->agc_level = 8000;
    501    st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
    502    for (i=0;i<N;i++)
    503    {
    504       float ff=((float)i)*.5*sampling_rate/((float)N);
    505       /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
    506       st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
    507       if (st->loudness_weight[i]<.01f)
    508          st->loudness_weight[i]=.01f;
    509       st->loudness_weight[i] *= st->loudness_weight[i];
    510    }
    511    /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
    512    st->loudness = 1e-15;
    513    st->agc_gain = 1;
    514    st->max_gain = 30;
    515    st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
    516    st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
    517    st->prev_loudness = 1;
    518    st->init_max = 1;
    519 #endif
    520    st->was_speech = 0;
    521 
    522    st->fft_lookup = spx_fft_init(2*N);
    523 
    524    st->nb_adapt=0;
    525    st->min_count=0;
    526    return st;
    527 }
    528 
    529 EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st)
    530 {
    531    speex_free(st->frame);
    532    speex_free(st->ft);
    533    speex_free(st->ps);
    534    speex_free(st->gain2);
    535    speex_free(st->gain_floor);
    536    speex_free(st->window);
    537    speex_free(st->noise);
    538    speex_free(st->reverb_estimate);
    539    speex_free(st->old_ps);
    540    speex_free(st->gain);
    541    speex_free(st->prior);
    542    speex_free(st->post);
    543 #ifndef FIXED_POINT
    544    speex_free(st->loudness_weight);
    545 #endif
    546    speex_free(st->echo_noise);
    547    speex_free(st->residual_echo);
    548 
    549    speex_free(st->S);
    550    speex_free(st->Smin);
    551    speex_free(st->Stmp);
    552    speex_free(st->update_prob);
    553    speex_free(st->zeta);
    554 
    555    speex_free(st->inbuf);
    556    speex_free(st->outbuf);
    557 
    558    spx_fft_destroy(st->fft_lookup);
    559    filterbank_destroy(st->bank);
    560    speex_free(st);
    561 }
    562 
    563 /* FIXME: The AGC doesn't work yet with fixed-point*/
    564 #ifndef FIXED_POINT
    565 static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
    566 {
    567    int i;
    568    int N = st->ps_size;
    569    float target_gain;
    570    float loudness=1.f;
    571    float rate;
    572 
    573    for (i=2;i<N;i++)
    574    {
    575       loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
    576    }
    577    loudness=sqrt(loudness);
    578       /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
    579    loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
    580    if (Pframe>.3f)
    581    {
    582       /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
    583       rate = .03*Pframe*Pframe;
    584       st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
    585       st->loudness_accum = (1-rate)*st->loudness_accum + rate;
    586       if (st->init_max < st->max_gain && st->nb_adapt > 20)
    587          st->init_max *= 1.f + .1f*Pframe*Pframe;
    588    }
    589    /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
    590 
    591    target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
    592 
    593    if ((Pframe>.5  && st->nb_adapt > 20) || target_gain < st->agc_gain)
    594    {
    595       if (target_gain > st->max_increase_step*st->agc_gain)
    596          target_gain = st->max_increase_step*st->agc_gain;
    597       if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
    598          target_gain = st->max_decrease_step*st->agc_gain;
    599       if (target_gain > st->max_gain)
    600          target_gain = st->max_gain;
    601       if (target_gain > st->init_max)
    602          target_gain = st->init_max;
    603 
    604       st->agc_gain = target_gain;
    605    }
    606    /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
    607 
    608    for (i=0;i<2*N;i++)
    609       ft[i] *= st->agc_gain;
    610    st->prev_loudness = loudness;
    611 }
    612 #endif
    613 
    614 static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
    615 {
    616    int i;
    617    int N = st->ps_size;
    618    int N3 = 2*N - st->frame_size;
    619    int N4 = st->frame_size - N3;
    620    spx_word32_t *ps=st->ps;
    621 
    622    /* 'Build' input frame */
    623    for (i=0;i<N3;i++)
    624       st->frame[i]=st->inbuf[i];
    625    for (i=0;i<st->frame_size;i++)
    626       st->frame[N3+i]=x[i];
    627 
    628    /* Update inbuf */
    629    for (i=0;i<N3;i++)
    630       st->inbuf[i]=x[N4+i];
    631 
    632    /* Windowing */
    633    for (i=0;i<2*N;i++)
    634       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
    635 
    636 #ifdef FIXED_POINT
    637    {
    638       spx_word16_t max_val=0;
    639       for (i=0;i<2*N;i++)
    640          max_val = MAX16(max_val, ABS16(st->frame[i]));
    641       st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
    642       for (i=0;i<2*N;i++)
    643          st->frame[i] = SHL16(st->frame[i], st->frame_shift);
    644    }
    645 #endif
    646 
    647    /* Perform FFT */
    648    spx_fft(st->fft_lookup, st->frame, st->ft);
    649 
    650    /* Power spectrum */
    651    ps[0]=MULT16_16(st->ft[0],st->ft[0]);
    652    for (i=1;i<N;i++)
    653       ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
    654    for (i=0;i<N;i++)
    655       st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
    656 
    657    filterbank_compute_bank32(st->bank, ps, ps+N);
    658 }
    659 
    660 static void update_noise_prob(SpeexPreprocessState *st)
    661 {
    662    int i;
    663    int min_range;
    664    int N = st->ps_size;
    665 
    666    for (i=1;i<N-1;i++)
    667       st->S[i] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
    668                       + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
    669    st->S[0] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
    670    st->S[N-1] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
    671 
    672    if (st->nb_adapt==1)
    673    {
    674       for (i=0;i<N;i++)
    675          st->Smin[i] = st->Stmp[i] = 0;
    676    }
    677 
    678    if (st->nb_adapt < 100)
    679       min_range = 15;
    680    else if (st->nb_adapt < 1000)
    681       min_range = 50;
    682    else if (st->nb_adapt < 10000)
    683       min_range = 150;
    684    else
    685       min_range = 300;
    686    if (st->min_count > min_range)
    687    {
    688       st->min_count = 0;
    689       for (i=0;i<N;i++)
    690       {
    691          st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
    692          st->Stmp[i] = st->S[i];
    693       }
    694    } else {
    695       for (i=0;i<N;i++)
    696       {
    697          st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
    698          st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
    699       }
    700    }
    701    for (i=0;i<N;i++)
    702    {
    703       if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i])
    704          st->update_prob[i] = 1;
    705       else
    706          st->update_prob[i] = 0;
    707       /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
    708       /*fprintf (stderr, "%f ", st->update_prob[i]);*/
    709    }
    710 
    711 }
    712 
    713 #define NOISE_OVERCOMPENS 1.
    714 
    715 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
    716 
    717 EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
    718 {
    719    return speex_preprocess_run(st, x);
    720 }
    721 
    722 EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
    723 {
    724    int i;
    725    int M;
    726    int N = st->ps_size;
    727    int N3 = 2*N - st->frame_size;
    728    int N4 = st->frame_size - N3;
    729    spx_word32_t *ps=st->ps;
    730    spx_word32_t Zframe;
    731    spx_word16_t Pframe;
    732    spx_word16_t beta, beta_1;
    733    spx_word16_t effective_echo_suppress;
    734 
    735    st->nb_adapt++;
    736    if (st->nb_adapt>20000)
    737       st->nb_adapt = 20000;
    738    st->min_count++;
    739 
    740    beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
    741    beta_1 = Q15_ONE-beta;
    742    M = st->nbands;
    743    /* Deal with residual echo if provided */
    744    if (st->echo_state)
    745    {
    746       speex_echo_get_residual(st->echo_state, st->residual_echo, N);
    747 #ifndef FIXED_POINT
    748       /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
    749       if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
    750       {
    751          for (i=0;i<N;i++)
    752             st->residual_echo[i] = 0;
    753       }
    754 #endif
    755       for (i=0;i<N;i++)
    756          st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
    757       filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
    758    } else {
    759       for (i=0;i<N+M;i++)
    760          st->echo_noise[i] = 0;
    761    }
    762    preprocess_analysis(st, x);
    763 
    764    update_noise_prob(st);
    765 
    766    /* Noise estimation always updated for the 10 first frames */
    767    /*if (st->nb_adapt<10)
    768    {
    769       for (i=1;i<N-1;i++)
    770          st->update_prob[i] = 0;
    771    }
    772    */
    773 
    774    /* Update the noise estimate for the frequencies where it can be */
    775    for (i=0;i<N;i++)
    776    {
    777       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
    778          st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
    779    }
    780    filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
    781 
    782    /* Special case for first frame */
    783    if (st->nb_adapt==1)
    784       for (i=0;i<N+M;i++)
    785          st->old_ps[i] = ps[i];
    786 
    787    /* Compute a posteriori SNR */
    788    for (i=0;i<N+M;i++)
    789    {
    790       spx_word16_t gamma;
    791 
    792       /* Total noise estimate including residual echo and reverberation */
    793       spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
    794 
    795       /* A posteriori SNR = ps/noise - 1*/
    796       st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
    797       st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
    798 
    799       /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
    800       gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
    801 
    802       /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
    803       st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
    804       st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
    805    }
    806 
    807    /*print_vec(st->post, N+M, "");*/
    808 
    809    /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
    810    st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
    811    for (i=1;i<N-1;i++)
    812       st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
    813                            MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
    814    for (i=N-1;i<N+M;i++)
    815       st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
    816 
    817    /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
    818    Zframe = 0;
    819    for (i=N;i<N+M;i++)
    820       Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
    821    Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
    822 
    823    effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
    824 
    825    compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
    826 
    827    /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
    828       Technically this is actually wrong because the EM gaim assumes a slightly different probability
    829       distribution */
    830    for (i=N;i<N+M;i++)
    831    {
    832       /* See EM and Cohen papers*/
    833       spx_word32_t theta;
    834       /* Gain from hypergeometric function */
    835       spx_word32_t MM;
    836       /* Weiner filter gain */
    837       spx_word16_t prior_ratio;
    838       /* a priority probability of speech presence based on Bark sub-band alone */
    839       spx_word16_t P1;
    840       /* Speech absence a priori probability (considering sub-band and frame) */
    841       spx_word16_t q;
    842 #ifdef FIXED_POINT
    843       spx_word16_t tmp;
    844 #endif
    845 
    846       prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
    847       theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
    848 
    849       MM = hypergeom_gain(theta);
    850       /* Gain with bound */
    851       st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
    852       /* Save old Bark power spectrum */
    853       st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
    854 
    855       P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
    856       q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
    857 #ifdef FIXED_POINT
    858       theta = MIN32(theta, EXTEND32(32767));
    859 /*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
    860       tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
    861 /*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
    862       st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
    863 #else
    864       st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
    865 #endif
    866    }
    867    /* Convert the EM gains and speech prob to linear frequency */
    868    filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
    869    filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
    870 
    871    /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
    872    if (1)
    873    {
    874       filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
    875 
    876       /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
    877       for (i=0;i<N;i++)
    878       {
    879          spx_word32_t MM;
    880          spx_word32_t theta;
    881          spx_word16_t prior_ratio;
    882          spx_word16_t tmp;
    883          spx_word16_t p;
    884          spx_word16_t g;
    885 
    886          /* Wiener filter gain */
    887          prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
    888          theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
    889 
    890          /* Optimal estimator for loudness domain */
    891          MM = hypergeom_gain(theta);
    892          /* EM gain with bound */
    893          g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
    894          /* Interpolated speech probability of presence */
    895          p = st->gain2[i];
    896 
    897          /* Constrain the gain to be close to the Bark scale gain */
    898          if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
    899             g = MULT16_16(3,st->gain[i]);
    900          st->gain[i] = g;
    901 
    902          /* Save old power spectrum */
    903          st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
    904 
    905          /* Apply gain floor */
    906          if (st->gain[i] < st->gain_floor[i])
    907             st->gain[i] = st->gain_floor[i];
    908 
    909          /* Exponential decay model for reverberation (unused) */
    910          /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
    911 
    912          /* Take into account speech probability of presence (loudness domain MMSE estimator) */
    913          /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
    914          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
    915          st->gain2[i]=SQR16_Q15(tmp);
    916 
    917          /* Use this if you want a log-domain MMSE estimator instead */
    918          /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
    919       }
    920    } else {
    921       for (i=N;i<N+M;i++)
    922       {
    923          spx_word16_t tmp;
    924          spx_word16_t p = st->gain2[i];
    925          st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
    926          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
    927          st->gain2[i]=SQR16_Q15(tmp);
    928       }
    929       filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
    930    }
    931 
    932    /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
    933    if (!st->denoise_enabled)
    934    {
    935       for (i=0;i<N+M;i++)
    936          st->gain2[i]=Q15_ONE;
    937    }
    938 
    939    /* Apply computed gain */
    940    for (i=1;i<N;i++)
    941    {
    942       st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
    943       st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
    944    }
    945    st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
    946    st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
    947 
    948    /*FIXME: This *will* not work for fixed-point */
    949 #ifndef FIXED_POINT
    950    if (st->agc_enabled)
    951       speex_compute_agc(st, Pframe, st->ft);
    952 #endif
    953 
    954    /* Inverse FFT with 1/N scaling */
    955    spx_ifft(st->fft_lookup, st->ft, st->frame);
    956    /* Scale back to original (lower) amplitude */
    957    for (i=0;i<2*N;i++)
    958       st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
    959 
    960    /*FIXME: This *will* not work for fixed-point */
    961 #ifndef FIXED_POINT
    962    if (st->agc_enabled)
    963    {
    964       float max_sample=0;
    965       for (i=0;i<2*N;i++)
    966          if (fabs(st->frame[i])>max_sample)
    967             max_sample = fabs(st->frame[i]);
    968       if (max_sample>28000.f)
    969       {
    970          float damp = 28000.f/max_sample;
    971          for (i=0;i<2*N;i++)
    972             st->frame[i] *= damp;
    973       }
    974    }
    975 #endif
    976 
    977    /* Synthesis window (for WOLA) */
    978    for (i=0;i<2*N;i++)
    979       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
    980 
    981    /* Perform overlap and add */
    982    for (i=0;i<N3;i++)
    983       x[i] = st->outbuf[i] + st->frame[i];
    984    for (i=0;i<N4;i++)
    985       x[N3+i] = st->frame[N3+i];
    986 
    987    /* Update outbuf */
    988    for (i=0;i<N3;i++)
    989       st->outbuf[i] = st->frame[st->frame_size+i];
    990 
    991    /* FIXME: This VAD is a kludge */
    992    st->speech_prob = Pframe;
    993    if (st->vad_enabled)
    994    {
    995       if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue))
    996       {
    997          st->was_speech=1;
    998          return 1;
    999       } else
   1000       {
   1001          st->was_speech=0;
   1002          return 0;
   1003       }
   1004    } else {
   1005       return 1;
   1006    }
   1007 }
   1008 
   1009 EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
   1010 {
   1011    int i;
   1012    int N = st->ps_size;
   1013    int N3 = 2*N - st->frame_size;
   1014    int M;
   1015    spx_word32_t *ps=st->ps;
   1016 
   1017    M = st->nbands;
   1018    st->min_count++;
   1019 
   1020    preprocess_analysis(st, x);
   1021 
   1022    update_noise_prob(st);
   1023 
   1024    for (i=1;i<N-1;i++)
   1025    {
   1026       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
   1027       {
   1028          st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
   1029       }
   1030    }
   1031 
   1032    for (i=0;i<N3;i++)
   1033       st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
   1034 
   1035    /* Save old power spectrum */
   1036    for (i=0;i<N+M;i++)
   1037       st->old_ps[i] = ps[i];
   1038 
   1039    for (i=0;i<N;i++)
   1040       st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
   1041 }
   1042 
   1043 
   1044 EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
   1045 {
   1046    int i;
   1047    SpeexPreprocessState *st;
   1048    st=(SpeexPreprocessState*)state;
   1049    switch(request)
   1050    {
   1051    case SPEEX_PREPROCESS_SET_DENOISE:
   1052       st->denoise_enabled = (*(spx_int32_t*)ptr);
   1053       break;
   1054    case SPEEX_PREPROCESS_GET_DENOISE:
   1055       (*(spx_int32_t*)ptr) = st->denoise_enabled;
   1056       break;
   1057 #ifndef FIXED_POINT
   1058    case SPEEX_PREPROCESS_SET_AGC:
   1059       st->agc_enabled = (*(spx_int32_t*)ptr);
   1060       break;
   1061    case SPEEX_PREPROCESS_GET_AGC:
   1062       (*(spx_int32_t*)ptr) = st->agc_enabled;
   1063       break;
   1064 #ifndef DISABLE_FLOAT_API
   1065    case SPEEX_PREPROCESS_SET_AGC_LEVEL:
   1066       st->agc_level = (*(float*)ptr);
   1067       if (st->agc_level<1)
   1068          st->agc_level=1;
   1069       if (st->agc_level>32768)
   1070          st->agc_level=32768;
   1071       break;
   1072    case SPEEX_PREPROCESS_GET_AGC_LEVEL:
   1073       (*(float*)ptr) = st->agc_level;
   1074       break;
   1075 #endif /* #ifndef DISABLE_FLOAT_API */
   1076    case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
   1077       st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
   1078       break;
   1079    case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
   1080       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
   1081       break;
   1082    case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
   1083       st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
   1084       break;
   1085    case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
   1086       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
   1087       break;
   1088    case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
   1089       st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
   1090       break;
   1091    case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
   1092       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
   1093       break;
   1094 #endif
   1095    case SPEEX_PREPROCESS_SET_VAD:
   1096       speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
   1097       st->vad_enabled = (*(spx_int32_t*)ptr);
   1098       break;
   1099    case SPEEX_PREPROCESS_GET_VAD:
   1100       (*(spx_int32_t*)ptr) = st->vad_enabled;
   1101       break;
   1102 
   1103    case SPEEX_PREPROCESS_SET_DEREVERB:
   1104       st->dereverb_enabled = (*(spx_int32_t*)ptr);
   1105       for (i=0;i<st->ps_size;i++)
   1106          st->reverb_estimate[i]=0;
   1107       break;
   1108    case SPEEX_PREPROCESS_GET_DEREVERB:
   1109       (*(spx_int32_t*)ptr) = st->dereverb_enabled;
   1110       break;
   1111 
   1112    case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
   1113       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1114       /*st->reverb_level = (*(float*)ptr);*/
   1115       break;
   1116    case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
   1117       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1118       /*(*(float*)ptr) = st->reverb_level;*/
   1119       break;
   1120 
   1121    case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
   1122       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1123       /*st->reverb_decay = (*(float*)ptr);*/
   1124       break;
   1125    case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
   1126       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1127       /*(*(float*)ptr) = st->reverb_decay;*/
   1128       break;
   1129 
   1130    case SPEEX_PREPROCESS_SET_PROB_START:
   1131       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
   1132       st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
   1133       break;
   1134    case SPEEX_PREPROCESS_GET_PROB_START:
   1135       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
   1136       break;
   1137 
   1138    case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
   1139       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
   1140       st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
   1141       break;
   1142    case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
   1143       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
   1144       break;
   1145 
   1146    case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
   1147       st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
   1148       break;
   1149    case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
   1150       (*(spx_int32_t*)ptr) = st->noise_suppress;
   1151       break;
   1152    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
   1153       st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
   1154       break;
   1155    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
   1156       (*(spx_int32_t*)ptr) = st->echo_suppress;
   1157       break;
   1158    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
   1159       st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
   1160       break;
   1161    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
   1162       (*(spx_int32_t*)ptr) = st->echo_suppress_active;
   1163       break;
   1164    case SPEEX_PREPROCESS_SET_ECHO_STATE:
   1165       st->echo_state = (SpeexEchoState*)ptr;
   1166       break;
   1167    case SPEEX_PREPROCESS_GET_ECHO_STATE:
   1168       (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state;
   1169       break;
   1170 #ifndef FIXED_POINT
   1171    case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
   1172       (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
   1173       break;
   1174    case SPEEX_PREPROCESS_GET_AGC_GAIN:
   1175       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain));
   1176       break;
   1177 #endif
   1178    case SPEEX_PREPROCESS_GET_PSD_SIZE:
   1179    case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE:
   1180       (*(spx_int32_t*)ptr) = st->ps_size;
   1181       break;
   1182    case SPEEX_PREPROCESS_GET_PSD:
   1183       for(i=0;i<st->ps_size;i++)
   1184       	((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i];
   1185       break;
   1186    case SPEEX_PREPROCESS_GET_NOISE_PSD:
   1187       for(i=0;i<st->ps_size;i++)
   1188       	((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT);
   1189       break;
   1190    case SPEEX_PREPROCESS_GET_PROB:
   1191       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100);
   1192       break;
   1193 #ifndef FIXED_POINT
   1194    case SPEEX_PREPROCESS_SET_AGC_TARGET:
   1195       st->agc_level = (*(spx_int32_t*)ptr);
   1196       if (st->agc_level<1)
   1197          st->agc_level=1;
   1198       if (st->agc_level>32768)
   1199          st->agc_level=32768;
   1200       break;
   1201    case SPEEX_PREPROCESS_GET_AGC_TARGET:
   1202       (*(spx_int32_t*)ptr) = st->agc_level;
   1203       break;
   1204 #endif
   1205    default:
   1206       speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
   1207       return -1;
   1208    }
   1209    return 0;
   1210 }
   1211 
   1212 #ifdef FIXED_DEBUG
   1213 long long spx_mips=0;
   1214 #endif
   1215 
   1216