Home | History | Annotate | Download | only in libspeex
      1 /* Copyright (C) 2003 Epic Games (written by Jean-Marc Valin)
      2    Copyright (C) 2004-2006 Epic Games
      3 
      4    File: preprocess.c
      5    Preprocessor with denoising based on the algorithm by Ephraim and Malah
      6 
      7    Redistribution and use in source and binary forms, with or without
      8    modification, are permitted provided that the following conditions are
      9    met:
     10 
     11    1. Redistributions of source code must retain the above copyright notice,
     12    this list of conditions and the following disclaimer.
     13 
     14    2. Redistributions in binary form must reproduce the above copyright
     15    notice, this list of conditions and the following disclaimer in the
     16    documentation and/or other materials provided with the distribution.
     17 
     18    3. The name of the author may not be used to endorse or promote products
     19    derived from this software without specific prior written permission.
     20 
     21    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     22    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     23    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     24    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
     25    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     26    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     27    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     28    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     29    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     30    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     31    POSSIBILITY OF SUCH DAMAGE.
     32 */
     33 
     34 
     35 /*
     36    Recommended papers:
     37 
     38    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
     39    short-time spectral amplitude estimator". IEEE Transactions on Acoustics,
     40    Speech and Signal Processing, vol. ASSP-32, no. 6, pp. 1109-1121, 1984.
     41 
     42    Y. Ephraim and D. Malah, "Speech enhancement using minimum mean-square error
     43    log-spectral amplitude estimator". IEEE Transactions on Acoustics, Speech and
     44    Signal Processing, vol. ASSP-33, no. 2, pp. 443-445, 1985.
     45 
     46    I. Cohen and B. Berdugo, "Speech enhancement for non-stationary noise environments".
     47    Signal Processing, vol. 81, no. 2, pp. 2403-2418, 2001.
     48 
     49    Stefan Gustafsson, Rainer Martin, Peter Jax, and Peter Vary. "A psychoacoustic
     50    approach to combined acoustic echo cancellation and noise reduction". IEEE
     51    Transactions on Speech and Audio Processing, 2002.
     52 
     53    J.-M. Valin, J. Rouat, and F. Michaud, "Microphone array post-filter for separation
     54    of simultaneous non-stationary sources". In Proceedings IEEE International
     55    Conference on Acoustics, Speech, and Signal Processing, 2004.
     56 */
     57 
     58 #ifdef HAVE_CONFIG_H
     59 #include "config.h"
     60 #endif
     61 
     62 #include <math.h>
     63 #include "speex/speex_preprocess.h"
     64 #include "speex/speex_echo.h"
     65 #include "arch.h"
     66 #include "fftwrap.h"
     67 #include "filterbank.h"
     68 #include "math_approx.h"
     69 #include "os_support.h"
     70 
     71 #ifndef M_PI
     72 #define M_PI 3.14159263
     73 #endif
     74 
     75 #define LOUDNESS_EXP 5.f
     76 #define AMP_SCALE .001f
     77 #define AMP_SCALE_1 1000.f
     78 
     79 #define NB_BANDS 24
     80 
     81 #define SPEECH_PROB_START_DEFAULT       QCONST16(0.35f,15)
     82 #define SPEECH_PROB_CONTINUE_DEFAULT    QCONST16(0.20f,15)
     83 #define NOISE_SUPPRESS_DEFAULT       -15
     84 #define ECHO_SUPPRESS_DEFAULT        -40
     85 #define ECHO_SUPPRESS_ACTIVE_DEFAULT -15
     86 
     87 #ifndef NULL
     88 #define NULL 0
     89 #endif
     90 
     91 #define SQR(x) ((x)*(x))
     92 #define SQR16(x) (MULT16_16((x),(x)))
     93 #define SQR16_Q15(x) (MULT16_16_Q15((x),(x)))
     94 
     95 #ifdef FIXED_POINT
     96 static inline spx_word16_t DIV32_16_Q8(spx_word32_t a, spx_word32_t b)
     97 {
     98    if (SHR32(a,7) >= b)
     99    {
    100       return 32767;
    101    } else {
    102       if (b>=QCONST32(1,23))
    103       {
    104          a = SHR32(a,8);
    105          b = SHR32(b,8);
    106       }
    107       if (b>=QCONST32(1,19))
    108       {
    109          a = SHR32(a,4);
    110          b = SHR32(b,4);
    111       }
    112       if (b>=QCONST32(1,15))
    113       {
    114          a = SHR32(a,4);
    115          b = SHR32(b,4);
    116       }
    117       a = SHL32(a,8);
    118       return PDIV32_16(a,b);
    119    }
    120 
    121 }
    122 static inline spx_word16_t DIV32_16_Q15(spx_word32_t a, spx_word32_t b)
    123 {
    124    if (SHR32(a,15) >= b)
    125    {
    126       return 32767;
    127    } else {
    128       if (b>=QCONST32(1,23))
    129       {
    130          a = SHR32(a,8);
    131          b = SHR32(b,8);
    132       }
    133       if (b>=QCONST32(1,19))
    134       {
    135          a = SHR32(a,4);
    136          b = SHR32(b,4);
    137       }
    138       if (b>=QCONST32(1,15))
    139       {
    140          a = SHR32(a,4);
    141          b = SHR32(b,4);
    142       }
    143       a = SHL32(a,15)-a;
    144       return DIV32_16(a,b);
    145    }
    146 }
    147 #define SNR_SCALING 256.f
    148 #define SNR_SCALING_1 0.0039062f
    149 #define SNR_SHIFT 8
    150 
    151 #define FRAC_SCALING 32767.f
    152 #define FRAC_SCALING_1 3.0518e-05
    153 #define FRAC_SHIFT 1
    154 
    155 #define EXPIN_SCALING 2048.f
    156 #define EXPIN_SCALING_1 0.00048828f
    157 #define EXPIN_SHIFT 11
    158 #define EXPOUT_SCALING_1 1.5259e-05
    159 
    160 #define NOISE_SHIFT 7
    161 
    162 #else
    163 
    164 #define DIV32_16_Q8(a,b) ((a)/(b))
    165 #define DIV32_16_Q15(a,b) ((a)/(b))
    166 #define SNR_SCALING 1.f
    167 #define SNR_SCALING_1 1.f
    168 #define SNR_SHIFT 0
    169 #define FRAC_SCALING 1.f
    170 #define FRAC_SCALING_1 1.f
    171 #define FRAC_SHIFT 0
    172 #define NOISE_SHIFT 0
    173 
    174 #define EXPIN_SCALING 1.f
    175 #define EXPIN_SCALING_1 1.f
    176 #define EXPOUT_SCALING_1 1.f
    177 
    178 #endif
    179 
    180 /** Speex pre-processor state. */
    181 struct SpeexPreprocessState_ {
    182    /* Basic info */
    183    int    frame_size;        /**< Number of samples processed each time */
    184    int    ps_size;           /**< Number of points in the power spectrum */
    185    int    sampling_rate;     /**< Sampling rate of the input/output */
    186    int    nbands;
    187    FilterBank *bank;
    188 
    189    /* Parameters */
    190    int    denoise_enabled;
    191    int    vad_enabled;
    192    int    dereverb_enabled;
    193    spx_word16_t  reverb_decay;
    194    spx_word16_t  reverb_level;
    195    spx_word16_t speech_prob_start;
    196    spx_word16_t speech_prob_continue;
    197    int    noise_suppress;
    198    int    echo_suppress;
    199    int    echo_suppress_active;
    200    SpeexEchoState *echo_state;
    201 
    202    spx_word16_t	speech_prob;  /**< Probability last frame was speech */
    203 
    204    /* DSP-related arrays */
    205    spx_word16_t *frame;      /**< Processing frame (2*ps_size) */
    206    spx_word16_t *ft;         /**< Processing frame in freq domain (2*ps_size) */
    207    spx_word32_t *ps;         /**< Current power spectrum */
    208    spx_word16_t *gain2;      /**< Adjusted gains */
    209    spx_word16_t *gain_floor; /**< Minimum gain allowed */
    210    spx_word16_t *window;     /**< Analysis/Synthesis window */
    211    spx_word32_t *noise;      /**< Noise estimate */
    212    spx_word32_t *reverb_estimate; /**< Estimate of reverb energy */
    213    spx_word32_t *old_ps;     /**< Power spectrum for last frame */
    214    spx_word16_t *gain;       /**< Ephraim Malah gain */
    215    spx_word16_t *prior;      /**< A-priori SNR */
    216    spx_word16_t *post;       /**< A-posteriori SNR */
    217 
    218    spx_word32_t *S;          /**< Smoothed power spectrum */
    219    spx_word32_t *Smin;       /**< See Cohen paper */
    220    spx_word32_t *Stmp;       /**< See Cohen paper */
    221    int *update_prob;         /**< Probability of speech presence for noise update */
    222 
    223    spx_word16_t *zeta;       /**< Smoothed a priori SNR */
    224    spx_word32_t *echo_noise;
    225    spx_word32_t *residual_echo;
    226 
    227    /* Misc */
    228    spx_word16_t *inbuf;      /**< Input buffer (overlapped analysis) */
    229    spx_word16_t *outbuf;     /**< Output buffer (for overlap and add) */
    230 
    231    /* AGC stuff, only for floating point for now */
    232 #ifndef FIXED_POINT
    233    int    agc_enabled;
    234    float  agc_level;
    235    float  loudness_accum;
    236    float *loudness_weight;   /**< Perceptual loudness curve */
    237    float  loudness;          /**< Loudness estimate */
    238    float  agc_gain;          /**< Current AGC gain */
    239    float  max_gain;          /**< Maximum gain allowed */
    240    float  max_increase_step; /**< Maximum increase in gain from one frame to another */
    241    float  max_decrease_step; /**< Maximum decrease in gain from one frame to another */
    242    float  prev_loudness;     /**< Loudness of previous frame */
    243    float  init_max;          /**< Current gain limit during initialisation */
    244 #endif
    245    int    nb_adapt;          /**< Number of frames used for adaptation so far */
    246    int    was_speech;
    247    int    min_count;         /**< Number of frames processed so far */
    248    void  *fft_lookup;        /**< Lookup table for the FFT */
    249 #ifdef FIXED_POINT
    250    int    frame_shift;
    251 #endif
    252 };
    253 
    254 
    255 static void conj_window(spx_word16_t *w, int len)
    256 {
    257    int i;
    258    for (i=0;i<len;i++)
    259    {
    260       spx_word16_t tmp;
    261 #ifdef FIXED_POINT
    262       spx_word16_t x = DIV32_16(MULT16_16(32767,i),len);
    263 #else
    264       spx_word16_t x = DIV32_16(MULT16_16(QCONST16(4.f,13),i),len);
    265 #endif
    266       int inv=0;
    267       if (x<QCONST16(1.f,13))
    268       {
    269       } else if (x<QCONST16(2.f,13))
    270       {
    271          x=QCONST16(2.f,13)-x;
    272          inv=1;
    273       } else if (x<QCONST16(3.f,13))
    274       {
    275          x=x-QCONST16(2.f,13);
    276          inv=1;
    277       } else {
    278          x=QCONST16(2.f,13)-x+QCONST16(2.f,13); /* 4 - x */
    279       }
    280       x = MULT16_16_Q14(QCONST16(1.271903f,14), x);
    281       tmp = SQR16_Q15(QCONST16(.5f,15)-MULT16_16_P15(QCONST16(.5f,15),spx_cos_norm(SHL32(EXTEND32(x),2))));
    282       if (inv)
    283          tmp=SUB16(Q15_ONE,tmp);
    284       w[i]=spx_sqrt(SHL32(EXTEND32(tmp),15));
    285    }
    286 }
    287 
    288 
    289 #ifdef FIXED_POINT
    290 /* This function approximates the gain function
    291    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
    292    which multiplied by xi/(1+xi) is the optimal gain
    293    in the loudness domain ( sqrt[amplitude] )
    294    Input in Q11 format, output in Q15
    295 */
    296 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
    297 {
    298    int ind;
    299    spx_word16_t frac;
    300    /* Q13 table */
    301    static const spx_word16_t table[21] = {
    302        6730,  8357,  9868, 11267, 12563, 13770, 14898,
    303       15959, 16961, 17911, 18816, 19682, 20512, 21311,
    304       22082, 22827, 23549, 24250, 24931, 25594, 26241};
    305       ind = SHR32(xx,10);
    306       if (ind<0)
    307          return Q15_ONE;
    308       if (ind>19)
    309          return ADD32(EXTEND32(Q15_ONE),EXTEND32(DIV32_16(QCONST32(.1296,23), SHR32(xx,EXPIN_SHIFT-SNR_SHIFT))));
    310       frac = SHL32(xx-SHL32(ind,10),5);
    311       return SHL32(DIV32_16(PSHR32(MULT16_16(Q15_ONE-frac,table[ind]) + MULT16_16(frac,table[ind+1]),7),(spx_sqrt(SHL32(xx,15)+6711))),7);
    312 }
    313 
    314 static inline spx_word16_t qcurve(spx_word16_t x)
    315 {
    316    x = MAX16(x, 1);
    317    return DIV32_16(SHL32(EXTEND32(32767),9),ADD16(512,MULT16_16_Q15(QCONST16(.60f,15),DIV32_16(32767,x))));
    318 }
    319 
    320 /* Compute the gain floor based on different floors for the background noise and residual echo */
    321 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
    322 {
    323    int i;
    324 
    325    if (noise_suppress > effective_echo_suppress)
    326    {
    327       spx_word16_t noise_gain, gain_ratio;
    328       noise_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),noise_suppress)),1)));
    329       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),effective_echo_suppress-noise_suppress)),1)));
    330 
    331       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
    332       for (i=0;i<len;i++)
    333          gain_floor[i] = MULT16_16_Q15(noise_gain,
    334                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(PSHR32(noise[i],NOISE_SHIFT) + MULT16_32_Q15(gain_ratio,echo[i]),
    335                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
    336    } else {
    337       spx_word16_t echo_gain, gain_ratio;
    338       echo_gain = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(0.11513,11),effective_echo_suppress)),1)));
    339       gain_ratio = EXTRACT16(MIN32(Q15_ONE,SHR32(spx_exp(MULT16_16(QCONST16(.2302585f,11),noise_suppress-effective_echo_suppress)),1)));
    340 
    341       /* gain_floor = sqrt [ (noise*noise_floor + echo*echo_floor) / (noise+echo) ] */
    342       for (i=0;i<len;i++)
    343          gain_floor[i] = MULT16_16_Q15(echo_gain,
    344                                        spx_sqrt(SHL32(EXTEND32(DIV32_16_Q15(MULT16_32_Q15(gain_ratio,PSHR32(noise[i],NOISE_SHIFT)) + echo[i],
    345                                              (1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]) )),15)));
    346    }
    347 }
    348 
    349 #else
    350 /* This function approximates the gain function
    351    y = gamma(1.25)^2 * M(-.25;1;-x) / sqrt(x)
    352    which multiplied by xi/(1+xi) is the optimal gain
    353    in the loudness domain ( sqrt[amplitude] )
    354 */
    355 static inline spx_word32_t hypergeom_gain(spx_word32_t xx)
    356 {
    357    int ind;
    358    float integer, frac;
    359    float x;
    360    static const float table[21] = {
    361       0.82157f, 1.02017f, 1.20461f, 1.37534f, 1.53363f, 1.68092f, 1.81865f,
    362       1.94811f, 2.07038f, 2.18638f, 2.29688f, 2.40255f, 2.50391f, 2.60144f,
    363       2.69551f, 2.78647f, 2.87458f, 2.96015f, 3.04333f, 3.12431f, 3.20326f};
    364       x = EXPIN_SCALING_1*xx;
    365       integer = floor(2*x);
    366       ind = (int)integer;
    367       if (ind<0)
    368          return FRAC_SCALING;
    369       if (ind>19)
    370          return FRAC_SCALING*(1+.1296/x);
    371       frac = 2*x-integer;
    372       return FRAC_SCALING*((1-frac)*table[ind] + frac*table[ind+1])/sqrt(x+.0001f);
    373 }
    374 
    375 static inline spx_word16_t qcurve(spx_word16_t x)
    376 {
    377    return 1.f/(1.f+.15f/(SNR_SCALING_1*x));
    378 }
    379 
    380 static void compute_gain_floor(int noise_suppress, int effective_echo_suppress, spx_word32_t *noise, spx_word32_t *echo, spx_word16_t *gain_floor, int len)
    381 {
    382    int i;
    383    float echo_floor;
    384    float noise_floor;
    385 
    386    noise_floor = exp(.2302585f*noise_suppress);
    387    echo_floor = exp(.2302585f*effective_echo_suppress);
    388 
    389    /* Compute the gain floor based on different floors for the background noise and residual echo */
    390    for (i=0;i<len;i++)
    391       gain_floor[i] = FRAC_SCALING*sqrt(noise_floor*PSHR32(noise[i],NOISE_SHIFT) + echo_floor*echo[i])/sqrt(1+PSHR32(noise[i],NOISE_SHIFT) + echo[i]);
    392 }
    393 
    394 #endif
    395 EXPORT SpeexPreprocessState *speex_preprocess_state_init(int frame_size, int sampling_rate)
    396 {
    397    int i;
    398    int N, N3, N4, M;
    399 
    400    SpeexPreprocessState *st = (SpeexPreprocessState *)speex_alloc(sizeof(SpeexPreprocessState));
    401    st->frame_size = frame_size;
    402 
    403    /* Round ps_size down to the nearest power of two */
    404 #if 0
    405    i=1;
    406    st->ps_size = st->frame_size;
    407    while(1)
    408    {
    409       if (st->ps_size & ~i)
    410       {
    411          st->ps_size &= ~i;
    412          i<<=1;
    413       } else {
    414          break;
    415       }
    416    }
    417 
    418 
    419    if (st->ps_size < 3*st->frame_size/4)
    420       st->ps_size = st->ps_size * 3 / 2;
    421 #else
    422    st->ps_size = st->frame_size;
    423 #endif
    424 
    425    N = st->ps_size;
    426    N3 = 2*N - st->frame_size;
    427    N4 = st->frame_size - N3;
    428 
    429    st->sampling_rate = sampling_rate;
    430    st->denoise_enabled = 1;
    431    st->vad_enabled = 0;
    432    st->dereverb_enabled = 0;
    433    st->reverb_decay = 0;
    434    st->reverb_level = 0;
    435    st->noise_suppress = NOISE_SUPPRESS_DEFAULT;
    436    st->echo_suppress = ECHO_SUPPRESS_DEFAULT;
    437    st->echo_suppress_active = ECHO_SUPPRESS_ACTIVE_DEFAULT;
    438 
    439    st->speech_prob_start = SPEECH_PROB_START_DEFAULT;
    440    st->speech_prob_continue = SPEECH_PROB_CONTINUE_DEFAULT;
    441 
    442    st->echo_state = NULL;
    443 
    444    st->nbands = NB_BANDS;
    445    M = st->nbands;
    446    st->bank = filterbank_new(M, sampling_rate, N, 1);
    447 
    448    st->frame = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
    449    st->window = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
    450    st->ft = (spx_word16_t*)speex_alloc(2*N*sizeof(spx_word16_t));
    451 
    452    st->ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    453    st->noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    454    st->echo_noise = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    455    st->residual_echo = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    456    st->reverb_estimate = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    457    st->old_ps = (spx_word32_t*)speex_alloc((N+M)*sizeof(spx_word32_t));
    458    st->prior = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    459    st->post = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    460    st->gain = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    461    st->gain2 = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    462    st->gain_floor = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    463    st->zeta = (spx_word16_t*)speex_alloc((N+M)*sizeof(spx_word16_t));
    464 
    465    st->S = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    466    st->Smin = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    467    st->Stmp = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    468    st->update_prob = (int*)speex_alloc(N*sizeof(int));
    469 
    470    st->inbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
    471    st->outbuf = (spx_word16_t*)speex_alloc(N3*sizeof(spx_word16_t));
    472 
    473    conj_window(st->window, 2*N3);
    474    for (i=2*N3;i<2*st->ps_size;i++)
    475       st->window[i]=Q15_ONE;
    476 
    477    if (N4>0)
    478    {
    479       for (i=N3-1;i>=0;i--)
    480       {
    481          st->window[i+N3+N4]=st->window[i+N3];
    482          st->window[i+N3]=1;
    483       }
    484    }
    485    for (i=0;i<N+M;i++)
    486    {
    487       st->noise[i]=QCONST32(1.f,NOISE_SHIFT);
    488       st->reverb_estimate[i]=0;
    489       st->old_ps[i]=1;
    490       st->gain[i]=Q15_ONE;
    491       st->post[i]=SHL16(1, SNR_SHIFT);
    492       st->prior[i]=SHL16(1, SNR_SHIFT);
    493    }
    494 
    495    for (i=0;i<N;i++)
    496       st->update_prob[i] = 1;
    497    for (i=0;i<N3;i++)
    498    {
    499       st->inbuf[i]=0;
    500       st->outbuf[i]=0;
    501    }
    502 #ifndef FIXED_POINT
    503    st->agc_enabled = 0;
    504    st->agc_level = 8000;
    505    st->loudness_weight = (float*)speex_alloc(N*sizeof(float));
    506    for (i=0;i<N;i++)
    507    {
    508       float ff=((float)i)*.5*sampling_rate/((float)N);
    509       /*st->loudness_weight[i] = .5f*(1.f/(1.f+ff/8000.f))+1.f*exp(-.5f*(ff-3800.f)*(ff-3800.f)/9e5f);*/
    510       st->loudness_weight[i] = .35f-.35f*ff/16000.f+.73f*exp(-.5f*(ff-3800)*(ff-3800)/9e5f);
    511       if (st->loudness_weight[i]<.01f)
    512          st->loudness_weight[i]=.01f;
    513       st->loudness_weight[i] *= st->loudness_weight[i];
    514    }
    515    /*st->loudness = pow(AMP_SCALE*st->agc_level,LOUDNESS_EXP);*/
    516    st->loudness = 1e-15;
    517    st->agc_gain = 1;
    518    st->max_gain = 30;
    519    st->max_increase_step = exp(0.11513f * 12.*st->frame_size / st->sampling_rate);
    520    st->max_decrease_step = exp(-0.11513f * 40.*st->frame_size / st->sampling_rate);
    521    st->prev_loudness = 1;
    522    st->init_max = 1;
    523 #endif
    524    st->was_speech = 0;
    525 
    526    st->fft_lookup = spx_fft_init(2*N);
    527 
    528    st->nb_adapt=0;
    529    st->min_count=0;
    530    return st;
    531 }
    532 
    533 EXPORT void speex_preprocess_state_destroy(SpeexPreprocessState *st)
    534 {
    535    speex_free(st->frame);
    536    speex_free(st->ft);
    537    speex_free(st->ps);
    538    speex_free(st->gain2);
    539    speex_free(st->gain_floor);
    540    speex_free(st->window);
    541    speex_free(st->noise);
    542    speex_free(st->reverb_estimate);
    543    speex_free(st->old_ps);
    544    speex_free(st->gain);
    545    speex_free(st->prior);
    546    speex_free(st->post);
    547 #ifndef FIXED_POINT
    548    speex_free(st->loudness_weight);
    549 #endif
    550    speex_free(st->echo_noise);
    551    speex_free(st->residual_echo);
    552 
    553    speex_free(st->S);
    554    speex_free(st->Smin);
    555    speex_free(st->Stmp);
    556    speex_free(st->update_prob);
    557    speex_free(st->zeta);
    558 
    559    speex_free(st->inbuf);
    560    speex_free(st->outbuf);
    561 
    562    spx_fft_destroy(st->fft_lookup);
    563    filterbank_destroy(st->bank);
    564    speex_free(st);
    565 }
    566 
    567 /* FIXME: The AGC doesn't work yet with fixed-point*/
    568 #ifndef FIXED_POINT
    569 static void speex_compute_agc(SpeexPreprocessState *st, spx_word16_t Pframe, spx_word16_t *ft)
    570 {
    571    int i;
    572    int N = st->ps_size;
    573    float target_gain;
    574    float loudness=1.f;
    575    float rate;
    576 
    577    for (i=2;i<N;i++)
    578    {
    579       loudness += 2.f*N*st->ps[i]* st->loudness_weight[i];
    580    }
    581    loudness=sqrt(loudness);
    582       /*if (loudness < 2*pow(st->loudness, 1.0/LOUDNESS_EXP) &&
    583    loudness*2 > pow(st->loudness, 1.0/LOUDNESS_EXP))*/
    584    if (Pframe>.3f)
    585    {
    586       /*rate=2.0f*Pframe*Pframe/(1+st->nb_loudness_adapt);*/
    587       rate = .03*Pframe*Pframe;
    588       st->loudness = (1-rate)*st->loudness + (rate)*pow(AMP_SCALE*loudness, LOUDNESS_EXP);
    589       st->loudness_accum = (1-rate)*st->loudness_accum + rate;
    590       if (st->init_max < st->max_gain && st->nb_adapt > 20)
    591          st->init_max *= 1.f + .1f*Pframe*Pframe;
    592    }
    593    /*printf ("%f %f %f %f\n", Pframe, loudness, pow(st->loudness, 1.0f/LOUDNESS_EXP), st->loudness2);*/
    594 
    595    target_gain = AMP_SCALE*st->agc_level*pow(st->loudness/(1e-4+st->loudness_accum), -1.0f/LOUDNESS_EXP);
    596 
    597    if ((Pframe>.5  && st->nb_adapt > 20) || target_gain < st->agc_gain)
    598    {
    599       if (target_gain > st->max_increase_step*st->agc_gain)
    600          target_gain = st->max_increase_step*st->agc_gain;
    601       if (target_gain < st->max_decrease_step*st->agc_gain && loudness < 10*st->prev_loudness)
    602          target_gain = st->max_decrease_step*st->agc_gain;
    603       if (target_gain > st->max_gain)
    604          target_gain = st->max_gain;
    605       if (target_gain > st->init_max)
    606          target_gain = st->init_max;
    607 
    608       st->agc_gain = target_gain;
    609    }
    610    /*fprintf (stderr, "%f %f %f\n", loudness, (float)AMP_SCALE_1*pow(st->loudness, 1.0f/LOUDNESS_EXP), st->agc_gain);*/
    611 
    612    for (i=0;i<2*N;i++)
    613       ft[i] *= st->agc_gain;
    614    st->prev_loudness = loudness;
    615 }
    616 #endif
    617 
    618 static void preprocess_analysis(SpeexPreprocessState *st, spx_int16_t *x)
    619 {
    620    int i;
    621    int N = st->ps_size;
    622    int N3 = 2*N - st->frame_size;
    623    int N4 = st->frame_size - N3;
    624    spx_word32_t *ps=st->ps;
    625 
    626    /* 'Build' input frame */
    627    for (i=0;i<N3;i++)
    628       st->frame[i]=st->inbuf[i];
    629    for (i=0;i<st->frame_size;i++)
    630       st->frame[N3+i]=x[i];
    631 
    632    /* Update inbuf */
    633    for (i=0;i<N3;i++)
    634       st->inbuf[i]=x[N4+i];
    635 
    636    /* Windowing */
    637    for (i=0;i<2*N;i++)
    638       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
    639 
    640 #ifdef FIXED_POINT
    641    {
    642       spx_word16_t max_val=0;
    643       for (i=0;i<2*N;i++)
    644          max_val = MAX16(max_val, ABS16(st->frame[i]));
    645       st->frame_shift = 14-spx_ilog2(EXTEND32(max_val));
    646       for (i=0;i<2*N;i++)
    647          st->frame[i] = SHL16(st->frame[i], st->frame_shift);
    648    }
    649 #endif
    650 
    651    /* Perform FFT */
    652    spx_fft(st->fft_lookup, st->frame, st->ft);
    653 
    654    /* Power spectrum */
    655    ps[0]=MULT16_16(st->ft[0],st->ft[0]);
    656    for (i=1;i<N;i++)
    657       ps[i]=MULT16_16(st->ft[2*i-1],st->ft[2*i-1]) + MULT16_16(st->ft[2*i],st->ft[2*i]);
    658    for (i=0;i<N;i++)
    659       st->ps[i] = PSHR32(st->ps[i], 2*st->frame_shift);
    660 
    661    filterbank_compute_bank32(st->bank, ps, ps+N);
    662 }
    663 
    664 static void update_noise_prob(SpeexPreprocessState *st)
    665 {
    666    int i;
    667    int min_range;
    668    int N = st->ps_size;
    669 
    670    for (i=1;i<N-1;i++)
    671       st->S[i] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i-1])
    672                       + MULT16_32_Q15(QCONST16(.1f,15),st->ps[i]) + MULT16_32_Q15(QCONST16(.05f,15),st->ps[i+1]);
    673    st->S[0] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[0]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[0]);
    674    st->S[N-1] =  MULT16_32_Q15(QCONST16(.8f,15),st->S[N-1]) + MULT16_32_Q15(QCONST16(.2f,15),st->ps[N-1]);
    675 
    676    if (st->nb_adapt==1)
    677    {
    678       for (i=0;i<N;i++)
    679          st->Smin[i] = st->Stmp[i] = 0;
    680    }
    681 
    682    if (st->nb_adapt < 100)
    683       min_range = 15;
    684    else if (st->nb_adapt < 1000)
    685       min_range = 50;
    686    else if (st->nb_adapt < 10000)
    687       min_range = 150;
    688    else
    689       min_range = 300;
    690    if (st->min_count > min_range)
    691    {
    692       st->min_count = 0;
    693       for (i=0;i<N;i++)
    694       {
    695          st->Smin[i] = MIN32(st->Stmp[i], st->S[i]);
    696          st->Stmp[i] = st->S[i];
    697       }
    698    } else {
    699       for (i=0;i<N;i++)
    700       {
    701          st->Smin[i] = MIN32(st->Smin[i], st->S[i]);
    702          st->Stmp[i] = MIN32(st->Stmp[i], st->S[i]);
    703       }
    704    }
    705    for (i=0;i<N;i++)
    706    {
    707       if (MULT16_32_Q15(QCONST16(.4f,15),st->S[i]) > st->Smin[i])
    708          st->update_prob[i] = 1;
    709       else
    710          st->update_prob[i] = 0;
    711       /*fprintf (stderr, "%f ", st->S[i]/st->Smin[i]);*/
    712       /*fprintf (stderr, "%f ", st->update_prob[i]);*/
    713    }
    714 
    715 }
    716 
    717 #define NOISE_OVERCOMPENS 1.
    718 
    719 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
    720 
    721 EXPORT int speex_preprocess(SpeexPreprocessState *st, spx_int16_t *x, spx_int32_t *echo)
    722 {
    723    return speex_preprocess_run(st, x);
    724 }
    725 
    726 EXPORT int speex_preprocess_run(SpeexPreprocessState *st, spx_int16_t *x)
    727 {
    728    int i;
    729    int M;
    730    int N = st->ps_size;
    731    int N3 = 2*N - st->frame_size;
    732    int N4 = st->frame_size - N3;
    733    spx_word32_t *ps=st->ps;
    734    spx_word32_t Zframe;
    735    spx_word16_t Pframe;
    736    spx_word16_t beta, beta_1;
    737    spx_word16_t effective_echo_suppress;
    738 
    739    st->nb_adapt++;
    740    if (st->nb_adapt>20000)
    741       st->nb_adapt = 20000;
    742    st->min_count++;
    743 
    744    beta = MAX16(QCONST16(.03,15),DIV32_16(Q15_ONE,st->nb_adapt));
    745    beta_1 = Q15_ONE-beta;
    746    M = st->nbands;
    747    /* Deal with residual echo if provided */
    748    if (st->echo_state)
    749    {
    750       speex_echo_get_residual(st->echo_state, st->residual_echo, N);
    751 #ifndef FIXED_POINT
    752       /* If there are NaNs or ridiculous values, it'll show up in the DC and we just reset everything to zero */
    753       if (!(st->residual_echo[0] >=0 && st->residual_echo[0]<N*1e9f))
    754       {
    755          for (i=0;i<N;i++)
    756             st->residual_echo[i] = 0;
    757       }
    758 #endif
    759       for (i=0;i<N;i++)
    760          st->echo_noise[i] = MAX32(MULT16_32_Q15(QCONST16(.6f,15),st->echo_noise[i]), st->residual_echo[i]);
    761       filterbank_compute_bank32(st->bank, st->echo_noise, st->echo_noise+N);
    762    } else {
    763       for (i=0;i<N+M;i++)
    764          st->echo_noise[i] = 0;
    765    }
    766    preprocess_analysis(st, x);
    767 
    768    update_noise_prob(st);
    769 
    770    /* Noise estimation always updated for the 10 first frames */
    771    /*if (st->nb_adapt<10)
    772    {
    773       for (i=1;i<N-1;i++)
    774          st->update_prob[i] = 0;
    775    }
    776    */
    777 
    778    /* Update the noise estimate for the frequencies where it can be */
    779    for (i=0;i<N;i++)
    780    {
    781       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i], NOISE_SHIFT))
    782          st->noise[i] = MAX32(EXTEND32(0),MULT16_32_Q15(beta_1,st->noise[i]) + MULT16_32_Q15(beta,SHL32(st->ps[i],NOISE_SHIFT)));
    783    }
    784    filterbank_compute_bank32(st->bank, st->noise, st->noise+N);
    785 
    786    /* Special case for first frame */
    787    if (st->nb_adapt==1)
    788       for (i=0;i<N+M;i++)
    789          st->old_ps[i] = ps[i];
    790 
    791    /* Compute a posteriori SNR */
    792    for (i=0;i<N+M;i++)
    793    {
    794       spx_word16_t gamma;
    795 
    796       /* Total noise estimate including residual echo and reverberation */
    797       spx_word32_t tot_noise = ADD32(ADD32(ADD32(EXTEND32(1), PSHR32(st->noise[i],NOISE_SHIFT)) , st->echo_noise[i]) , st->reverb_estimate[i]);
    798 
    799       /* A posteriori SNR = ps/noise - 1*/
    800       st->post[i] = SUB16(DIV32_16_Q8(ps[i],tot_noise), QCONST16(1.f,SNR_SHIFT));
    801       st->post[i]=MIN16(st->post[i], QCONST16(100.f,SNR_SHIFT));
    802 
    803       /* Computing update gamma = .1 + .9*(old/(old+noise))^2 */
    804       gamma = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.89f,15),SQR16_Q15(DIV32_16_Q15(st->old_ps[i],ADD32(st->old_ps[i],tot_noise))));
    805 
    806       /* A priori SNR update = gamma*max(0,post) + (1-gamma)*old/noise */
    807       st->prior[i] = EXTRACT16(PSHR32(ADD32(MULT16_16(gamma,MAX16(0,st->post[i])), MULT16_16(Q15_ONE-gamma,DIV32_16_Q8(st->old_ps[i],tot_noise))), 15));
    808       st->prior[i]=MIN16(st->prior[i], QCONST16(100.f,SNR_SHIFT));
    809    }
    810 
    811    /*print_vec(st->post, N+M, "");*/
    812 
    813    /* Recursive average of the a priori SNR. A bit smoothed for the psd components */
    814    st->zeta[0] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[0]), MULT16_16(QCONST16(.3f,15),st->prior[0])),15);
    815    for (i=1;i<N-1;i++)
    816       st->zeta[i] = PSHR32(ADD32(ADD32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.15f,15),st->prior[i])),
    817                            MULT16_16(QCONST16(.075f,15),st->prior[i-1])), MULT16_16(QCONST16(.075f,15),st->prior[i+1])),15);
    818    for (i=N-1;i<N+M;i++)
    819       st->zeta[i] = PSHR32(ADD32(MULT16_16(QCONST16(.7f,15),st->zeta[i]), MULT16_16(QCONST16(.3f,15),st->prior[i])),15);
    820 
    821    /* Speech probability of presence for the entire frame is based on the average filterbank a priori SNR */
    822    Zframe = 0;
    823    for (i=N;i<N+M;i++)
    824       Zframe = ADD32(Zframe, EXTEND32(st->zeta[i]));
    825    Pframe = QCONST16(.1f,15)+MULT16_16_Q15(QCONST16(.899f,15),qcurve(DIV32_16(Zframe,st->nbands)));
    826 
    827    effective_echo_suppress = EXTRACT16(PSHR32(ADD32(MULT16_16(SUB16(Q15_ONE,Pframe), st->echo_suppress), MULT16_16(Pframe, st->echo_suppress_active)),15));
    828 
    829    compute_gain_floor(st->noise_suppress, effective_echo_suppress, st->noise+N, st->echo_noise+N, st->gain_floor+N, M);
    830 
    831    /* Compute Ephraim & Malah gain speech probability of presence for each critical band (Bark scale)
    832       Technically this is actually wrong because the EM gaim assumes a slightly different probability
    833       distribution */
    834    for (i=N;i<N+M;i++)
    835    {
    836       /* See EM and Cohen papers*/
    837       spx_word32_t theta;
    838       /* Gain from hypergeometric function */
    839       spx_word32_t MM;
    840       /* Weiner filter gain */
    841       spx_word16_t prior_ratio;
    842       /* a priority probability of speech presence based on Bark sub-band alone */
    843       spx_word16_t P1;
    844       /* Speech absence a priori probability (considering sub-band and frame) */
    845       spx_word16_t q;
    846 #ifdef FIXED_POINT
    847       spx_word16_t tmp;
    848 #endif
    849 
    850       prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
    851       theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
    852 
    853       MM = hypergeom_gain(theta);
    854       /* Gain with bound */
    855       st->gain[i] = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
    856       /* Save old Bark power spectrum */
    857       st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
    858 
    859       P1 = QCONST16(.199f,15)+MULT16_16_Q15(QCONST16(.8f,15),qcurve (st->zeta[i]));
    860       q = Q15_ONE-MULT16_16_Q15(Pframe,P1);
    861 #ifdef FIXED_POINT
    862       theta = MIN32(theta, EXTEND32(32767));
    863 /*Q8*/tmp = MULT16_16_Q15((SHL32(1,SNR_SHIFT)+st->prior[i]),EXTRACT16(MIN32(Q15ONE,SHR32(spx_exp(-EXTRACT16(theta)),1))));
    864       tmp = MIN16(QCONST16(3.,SNR_SHIFT), tmp); /* Prevent overflows in the next line*/
    865 /*Q8*/tmp = EXTRACT16(PSHR32(MULT16_16(PDIV32_16(SHL32(EXTEND32(q),8),(Q15_ONE-q)),tmp),8));
    866       st->gain2[i]=DIV32_16(SHL32(EXTEND32(32767),SNR_SHIFT), ADD16(256,tmp));
    867 #else
    868       st->gain2[i]=1/(1.f + (q/(1.f-q))*(1+st->prior[i])*exp(-theta));
    869 #endif
    870    }
    871    /* Convert the EM gains and speech prob to linear frequency */
    872    filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
    873    filterbank_compute_psd16(st->bank,st->gain+N, st->gain);
    874 
    875    /* Use 1 for linear gain resolution (best) or 0 for Bark gain resolution (faster) */
    876    if (1)
    877    {
    878       filterbank_compute_psd16(st->bank,st->gain_floor+N, st->gain_floor);
    879 
    880       /* Compute gain according to the Ephraim-Malah algorithm -- linear frequency */
    881       for (i=0;i<N;i++)
    882       {
    883          spx_word32_t MM;
    884          spx_word32_t theta;
    885          spx_word16_t prior_ratio;
    886          spx_word16_t tmp;
    887          spx_word16_t p;
    888          spx_word16_t g;
    889 
    890          /* Wiener filter gain */
    891          prior_ratio = PDIV32_16(SHL32(EXTEND32(st->prior[i]), 15), ADD16(st->prior[i], SHL32(1,SNR_SHIFT)));
    892          theta = MULT16_32_P15(prior_ratio, QCONST32(1.f,EXPIN_SHIFT)+SHL32(EXTEND32(st->post[i]),EXPIN_SHIFT-SNR_SHIFT));
    893 
    894          /* Optimal estimator for loudness domain */
    895          MM = hypergeom_gain(theta);
    896          /* EM gain with bound */
    897          g = EXTRACT16(MIN32(Q15_ONE, MULT16_32_Q15(prior_ratio, MM)));
    898          /* Interpolated speech probability of presence */
    899          p = st->gain2[i];
    900 
    901          /* Constrain the gain to be close to the Bark scale gain */
    902          if (MULT16_16_Q15(QCONST16(.333f,15),g) > st->gain[i])
    903             g = MULT16_16(3,st->gain[i]);
    904          st->gain[i] = g;
    905 
    906          /* Save old power spectrum */
    907          st->old_ps[i] = MULT16_32_P15(QCONST16(.2f,15),st->old_ps[i]) + MULT16_32_P15(MULT16_16_P15(QCONST16(.8f,15),SQR16_Q15(st->gain[i])),ps[i]);
    908 
    909          /* Apply gain floor */
    910          if (st->gain[i] < st->gain_floor[i])
    911             st->gain[i] = st->gain_floor[i];
    912 
    913          /* Exponential decay model for reverberation (unused) */
    914          /*st->reverb_estimate[i] = st->reverb_decay*st->reverb_estimate[i] + st->reverb_decay*st->reverb_level*st->gain[i]*st->gain[i]*st->ps[i];*/
    915 
    916          /* Take into account speech probability of presence (loudness domain MMSE estimator) */
    917          /* gain2 = [p*sqrt(gain)+(1-p)*sqrt(gain _floor) ]^2 */
    918          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
    919          st->gain2[i]=SQR16_Q15(tmp);
    920 
    921          /* Use this if you want a log-domain MMSE estimator instead */
    922          /*st->gain2[i] = pow(st->gain[i], p) * pow(st->gain_floor[i],1.f-p);*/
    923       }
    924    } else {
    925       for (i=N;i<N+M;i++)
    926       {
    927          spx_word16_t tmp;
    928          spx_word16_t p = st->gain2[i];
    929          st->gain[i] = MAX16(st->gain[i], st->gain_floor[i]);
    930          tmp = MULT16_16_P15(p,spx_sqrt(SHL32(EXTEND32(st->gain[i]),15))) + MULT16_16_P15(SUB16(Q15_ONE,p),spx_sqrt(SHL32(EXTEND32(st->gain_floor[i]),15)));
    931          st->gain2[i]=SQR16_Q15(tmp);
    932       }
    933       filterbank_compute_psd16(st->bank,st->gain2+N, st->gain2);
    934    }
    935 
    936    /* If noise suppression is off, don't apply the gain (but then why call this in the first place!) */
    937    if (!st->denoise_enabled)
    938    {
    939       for (i=0;i<N+M;i++)
    940          st->gain2[i]=Q15_ONE;
    941    }
    942 
    943    /* Apply computed gain */
    944    for (i=1;i<N;i++)
    945    {
    946       st->ft[2*i-1] = MULT16_16_P15(st->gain2[i],st->ft[2*i-1]);
    947       st->ft[2*i] = MULT16_16_P15(st->gain2[i],st->ft[2*i]);
    948    }
    949    st->ft[0] = MULT16_16_P15(st->gain2[0],st->ft[0]);
    950    st->ft[2*N-1] = MULT16_16_P15(st->gain2[N-1],st->ft[2*N-1]);
    951 
    952    /*FIXME: This *will* not work for fixed-point */
    953 #ifndef FIXED_POINT
    954    if (st->agc_enabled)
    955       speex_compute_agc(st, Pframe, st->ft);
    956 #endif
    957 
    958    /* Inverse FFT with 1/N scaling */
    959    spx_ifft(st->fft_lookup, st->ft, st->frame);
    960    /* Scale back to original (lower) amplitude */
    961    for (i=0;i<2*N;i++)
    962       st->frame[i] = PSHR16(st->frame[i], st->frame_shift);
    963 
    964    /*FIXME: This *will* not work for fixed-point */
    965 #ifndef FIXED_POINT
    966    if (st->agc_enabled)
    967    {
    968       float max_sample=0;
    969       for (i=0;i<2*N;i++)
    970          if (fabs(st->frame[i])>max_sample)
    971             max_sample = fabs(st->frame[i]);
    972       if (max_sample>28000.f)
    973       {
    974          float damp = 28000.f/max_sample;
    975          for (i=0;i<2*N;i++)
    976             st->frame[i] *= damp;
    977       }
    978    }
    979 #endif
    980 
    981    /* Synthesis window (for WOLA) */
    982    for (i=0;i<2*N;i++)
    983       st->frame[i] = MULT16_16_Q15(st->frame[i], st->window[i]);
    984 
    985    /* Perform overlap and add */
    986    for (i=0;i<N3;i++)
    987       x[i] = st->outbuf[i] + st->frame[i];
    988    for (i=0;i<N4;i++)
    989       x[N3+i] = st->frame[N3+i];
    990 
    991    /* Update outbuf */
    992    for (i=0;i<N3;i++)
    993       st->outbuf[i] = st->frame[st->frame_size+i];
    994 
    995    /* FIXME: This VAD is a kludge */
    996    st->speech_prob = Pframe;
    997    if (st->vad_enabled)
    998    {
    999       if (st->speech_prob > st->speech_prob_start || (st->was_speech && st->speech_prob > st->speech_prob_continue))
   1000       {
   1001          st->was_speech=1;
   1002          return 1;
   1003       } else
   1004       {
   1005          st->was_speech=0;
   1006          return 0;
   1007       }
   1008    } else {
   1009       return 1;
   1010    }
   1011 }
   1012 
   1013 EXPORT void speex_preprocess_estimate_update(SpeexPreprocessState *st, spx_int16_t *x)
   1014 {
   1015    int i;
   1016    int N = st->ps_size;
   1017    int N3 = 2*N - st->frame_size;
   1018    int M;
   1019    spx_word32_t *ps=st->ps;
   1020 
   1021    M = st->nbands;
   1022    st->min_count++;
   1023 
   1024    preprocess_analysis(st, x);
   1025 
   1026    update_noise_prob(st);
   1027 
   1028    for (i=1;i<N-1;i++)
   1029    {
   1030       if (!st->update_prob[i] || st->ps[i] < PSHR32(st->noise[i],NOISE_SHIFT))
   1031       {
   1032          st->noise[i] = MULT16_32_Q15(QCONST16(.95f,15),st->noise[i]) + MULT16_32_Q15(QCONST16(.05f,15),SHL32(st->ps[i],NOISE_SHIFT));
   1033       }
   1034    }
   1035 
   1036    for (i=0;i<N3;i++)
   1037       st->outbuf[i] = MULT16_16_Q15(x[st->frame_size-N3+i],st->window[st->frame_size+i]);
   1038 
   1039    /* Save old power spectrum */
   1040    for (i=0;i<N+M;i++)
   1041       st->old_ps[i] = ps[i];
   1042 
   1043    for (i=0;i<N;i++)
   1044       st->reverb_estimate[i] = MULT16_32_Q15(st->reverb_decay, st->reverb_estimate[i]);
   1045 }
   1046 
   1047 
   1048 EXPORT int speex_preprocess_ctl(SpeexPreprocessState *state, int request, void *ptr)
   1049 {
   1050    int i;
   1051    SpeexPreprocessState *st;
   1052    st=(SpeexPreprocessState*)state;
   1053    switch(request)
   1054    {
   1055    case SPEEX_PREPROCESS_SET_DENOISE:
   1056       st->denoise_enabled = (*(spx_int32_t*)ptr);
   1057       break;
   1058    case SPEEX_PREPROCESS_GET_DENOISE:
   1059       (*(spx_int32_t*)ptr) = st->denoise_enabled;
   1060       break;
   1061 #ifndef FIXED_POINT
   1062    case SPEEX_PREPROCESS_SET_AGC:
   1063       st->agc_enabled = (*(spx_int32_t*)ptr);
   1064       break;
   1065    case SPEEX_PREPROCESS_GET_AGC:
   1066       (*(spx_int32_t*)ptr) = st->agc_enabled;
   1067       break;
   1068 #ifndef DISABLE_FLOAT_API
   1069    case SPEEX_PREPROCESS_SET_AGC_LEVEL:
   1070       st->agc_level = (*(float*)ptr);
   1071       if (st->agc_level<1)
   1072          st->agc_level=1;
   1073       if (st->agc_level>32768)
   1074          st->agc_level=32768;
   1075       break;
   1076    case SPEEX_PREPROCESS_GET_AGC_LEVEL:
   1077       (*(float*)ptr) = st->agc_level;
   1078       break;
   1079 #endif /* #ifndef DISABLE_FLOAT_API */
   1080    case SPEEX_PREPROCESS_SET_AGC_INCREMENT:
   1081       st->max_increase_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
   1082       break;
   1083    case SPEEX_PREPROCESS_GET_AGC_INCREMENT:
   1084       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_increase_step)*st->sampling_rate/st->frame_size);
   1085       break;
   1086    case SPEEX_PREPROCESS_SET_AGC_DECREMENT:
   1087       st->max_decrease_step = exp(0.11513f * (*(spx_int32_t*)ptr)*st->frame_size / st->sampling_rate);
   1088       break;
   1089    case SPEEX_PREPROCESS_GET_AGC_DECREMENT:
   1090       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_decrease_step)*st->sampling_rate/st->frame_size);
   1091       break;
   1092    case SPEEX_PREPROCESS_SET_AGC_MAX_GAIN:
   1093       st->max_gain = exp(0.11513f * (*(spx_int32_t*)ptr));
   1094       break;
   1095    case SPEEX_PREPROCESS_GET_AGC_MAX_GAIN:
   1096       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->max_gain));
   1097       break;
   1098 #endif
   1099    case SPEEX_PREPROCESS_SET_VAD:
   1100       // Disabled by mlebeau, we don't want this in the launched Google Mobile App for iPhone.
   1101       //speex_warning("The VAD has been replaced by a hack pending a complete rewrite");
   1102       st->vad_enabled = (*(spx_int32_t*)ptr);
   1103       break;
   1104    case SPEEX_PREPROCESS_GET_VAD:
   1105       (*(spx_int32_t*)ptr) = st->vad_enabled;
   1106       break;
   1107 
   1108    case SPEEX_PREPROCESS_SET_DEREVERB:
   1109       st->dereverb_enabled = (*(spx_int32_t*)ptr);
   1110       for (i=0;i<st->ps_size;i++)
   1111          st->reverb_estimate[i]=0;
   1112       break;
   1113    case SPEEX_PREPROCESS_GET_DEREVERB:
   1114       (*(spx_int32_t*)ptr) = st->dereverb_enabled;
   1115       break;
   1116 
   1117    case SPEEX_PREPROCESS_SET_DEREVERB_LEVEL:
   1118       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1119       /*st->reverb_level = (*(float*)ptr);*/
   1120       break;
   1121    case SPEEX_PREPROCESS_GET_DEREVERB_LEVEL:
   1122       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1123       /*(*(float*)ptr) = st->reverb_level;*/
   1124       break;
   1125 
   1126    case SPEEX_PREPROCESS_SET_DEREVERB_DECAY:
   1127       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1128       /*st->reverb_decay = (*(float*)ptr);*/
   1129       break;
   1130    case SPEEX_PREPROCESS_GET_DEREVERB_DECAY:
   1131       /* FIXME: Re-enable when de-reverberation is actually enabled again */
   1132       /*(*(float*)ptr) = st->reverb_decay;*/
   1133       break;
   1134 
   1135    case SPEEX_PREPROCESS_SET_PROB_START:
   1136       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
   1137       st->speech_prob_start = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
   1138       break;
   1139    case SPEEX_PREPROCESS_GET_PROB_START:
   1140       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_start, 100);
   1141       break;
   1142 
   1143    case SPEEX_PREPROCESS_SET_PROB_CONTINUE:
   1144       *(spx_int32_t*)ptr = MIN32(100,MAX32(0, *(spx_int32_t*)ptr));
   1145       st->speech_prob_continue = DIV32_16(MULT16_16(Q15ONE,*(spx_int32_t*)ptr), 100);
   1146       break;
   1147    case SPEEX_PREPROCESS_GET_PROB_CONTINUE:
   1148       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob_continue, 100);
   1149       break;
   1150 
   1151    case SPEEX_PREPROCESS_SET_NOISE_SUPPRESS:
   1152       st->noise_suppress = -ABS(*(spx_int32_t*)ptr);
   1153       break;
   1154    case SPEEX_PREPROCESS_GET_NOISE_SUPPRESS:
   1155       (*(spx_int32_t*)ptr) = st->noise_suppress;
   1156       break;
   1157    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS:
   1158       st->echo_suppress = -ABS(*(spx_int32_t*)ptr);
   1159       break;
   1160    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS:
   1161       (*(spx_int32_t*)ptr) = st->echo_suppress;
   1162       break;
   1163    case SPEEX_PREPROCESS_SET_ECHO_SUPPRESS_ACTIVE:
   1164       st->echo_suppress_active = -ABS(*(spx_int32_t*)ptr);
   1165       break;
   1166    case SPEEX_PREPROCESS_GET_ECHO_SUPPRESS_ACTIVE:
   1167       (*(spx_int32_t*)ptr) = st->echo_suppress_active;
   1168       break;
   1169    case SPEEX_PREPROCESS_SET_ECHO_STATE:
   1170       st->echo_state = (SpeexEchoState*)ptr;
   1171       break;
   1172    case SPEEX_PREPROCESS_GET_ECHO_STATE:
   1173       (*(SpeexEchoState**)ptr) = (SpeexEchoState*)st->echo_state;
   1174       break;
   1175 #ifndef FIXED_POINT
   1176    case SPEEX_PREPROCESS_GET_AGC_LOUDNESS:
   1177       (*(spx_int32_t*)ptr) = pow(st->loudness, 1.0/LOUDNESS_EXP);
   1178       break;
   1179    case SPEEX_PREPROCESS_GET_AGC_GAIN:
   1180       (*(spx_int32_t*)ptr) = floor(.5+8.6858*log(st->agc_gain));
   1181       break;
   1182 #endif
   1183    case SPEEX_PREPROCESS_GET_PSD_SIZE:
   1184    case SPEEX_PREPROCESS_GET_NOISE_PSD_SIZE:
   1185       (*(spx_int32_t*)ptr) = st->ps_size;
   1186       break;
   1187    case SPEEX_PREPROCESS_GET_PSD:
   1188       for(i=0;i<st->ps_size;i++)
   1189       	((spx_int32_t *)ptr)[i] = (spx_int32_t) st->ps[i];
   1190       break;
   1191    case SPEEX_PREPROCESS_GET_NOISE_PSD:
   1192       for(i=0;i<st->ps_size;i++)
   1193       	((spx_int32_t *)ptr)[i] = (spx_int32_t) PSHR32(st->noise[i], NOISE_SHIFT);
   1194       break;
   1195    case SPEEX_PREPROCESS_GET_PROB:
   1196       (*(spx_int32_t*)ptr) = MULT16_16_Q15(st->speech_prob, 100);
   1197       break;
   1198 #ifndef FIXED_POINT
   1199    case SPEEX_PREPROCESS_SET_AGC_TARGET:
   1200       st->agc_level = (*(spx_int32_t*)ptr);
   1201       if (st->agc_level<1)
   1202          st->agc_level=1;
   1203       if (st->agc_level>32768)
   1204          st->agc_level=32768;
   1205       break;
   1206    case SPEEX_PREPROCESS_GET_AGC_TARGET:
   1207       (*(spx_int32_t*)ptr) = st->agc_level;
   1208       break;
   1209 #endif
   1210    default:
   1211       speex_warning_int("Unknown speex_preprocess_ctl request: ", request);
   1212       return -1;
   1213    }
   1214    return 0;
   1215 }
   1216 
   1217 #ifdef FIXED_DEBUG
   1218 long long spx_mips=0;
   1219 #endif
   1220 
   1221