Home | History | Annotate | Download | only in libspeex
      1 /* Copyright (C) 2003-2008 Jean-Marc Valin
      2 
      3    File: mdf.c
      4    Echo canceller based on the MDF algorithm (see below)
      5 
      6    Redistribution and use in source and binary forms, with or without
      7    modification, are permitted provided that the following conditions are
      8    met:
      9 
     10    1. Redistributions of source code must retain the above copyright notice,
     11    this list of conditions and the following disclaimer.
     12 
     13    2. Redistributions in binary form must reproduce the above copyright
     14    notice, this list of conditions and the following disclaimer in the
     15    documentation and/or other materials provided with the distribution.
     16 
     17    3. The name of the author may not be used to endorse or promote products
     18    derived from this software without specific prior written permission.
     19 
     20    THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
     21    IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
     22    OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE
     23    DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT,
     24    INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES
     25    (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
     26    SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     27    HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
     28    STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN
     29    ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     30    POSSIBILITY OF SUCH DAMAGE.
     31 */
     32 
     33 /*
     34    The echo canceller is based on the MDF algorithm described in:
     35 
     36    J. S. Soo, K. K. Pang Multidelay block frequency adaptive filter,
     37    IEEE Trans. Acoust. Speech Signal Process., Vol. ASSP-38, No. 2,
     38    February 1990.
     39 
     40    We use the Alternatively Updated MDF (AUMDF) variant. Robustness to
     41    double-talk is achieved using a variable learning rate as described in:
     42 
     43    Valin, J.-M., On Adjusting the Learning Rate in Frequency Domain Echo
     44    Cancellation With Double-Talk. IEEE Transactions on Audio,
     45    Speech and Language Processing, Vol. 15, No. 3, pp. 1030-1034, 2007.
     46    http://people.xiph.org/~jm/papers/valin_taslp2006.pdf
     47 
     48    There is no explicit double-talk detection, but a continuous variation
     49    in the learning rate based on residual echo, double-talk and background
     50    noise.
     51 
     52    About the fixed-point version:
     53    All the signals are represented with 16-bit words. The filter weights
     54    are represented with 32-bit words, but only the top 16 bits are used
     55    in most cases. The lower 16 bits are completely unreliable (due to the
     56    fact that the update is done only on the top bits), but help in the
     57    adaptation -- probably by removing a "threshold effect" due to
     58    quantization (rounding going to zero) when the gradient is small.
     59 
     60    Another kludge that seems to work good: when performing the weight
     61    update, we only move half the way toward the "goal" this seems to
     62    reduce the effect of quantization noise in the update phase. This
     63    can be seen as applying a gradient descent on a "soft constraint"
     64    instead of having a hard constraint.
     65 
     66 */
     67 
     68 #ifdef HAVE_CONFIG_H
     69 #include "config.h"
     70 #endif
     71 
     72 #include "arch.h"
     73 #include "speex/speex_echo.h"
     74 #include "fftwrap.h"
     75 #include "pseudofloat.h"
     76 #include "math_approx.h"
     77 #include "os_support.h"
     78 
     79 #ifndef M_PI
     80 #define M_PI 3.14159265358979323846
     81 #endif
     82 
     83 #ifdef FIXED_POINT
     84 #define WEIGHT_SHIFT 11
     85 #define NORMALIZE_SCALEDOWN 5
     86 #define NORMALIZE_SCALEUP 3
     87 #else
     88 #define WEIGHT_SHIFT 0
     89 #endif
     90 
     91 #ifdef FIXED_POINT
     92 #define WORD2INT(x) ((x) < -32767 ? -32768 : ((x) > 32766 ? 32767 : (x)))
     93 #else
     94 #define WORD2INT(x) ((x) < -32767.5f ? -32768 : ((x) > 32766.5f ? 32767 : floor(.5+(x))))
     95 #endif
     96 
     97 /* If enabled, the AEC will use a foreground filter and a background filter to be more robust to double-talk
     98    and difficult signals in general. The cost is an extra FFT and a matrix-vector multiply */
     99 #define TWO_PATH
    100 
    101 #ifdef FIXED_POINT
    102 static const spx_float_t MIN_LEAK = {20972, -22};
    103 
    104 /* Constants for the two-path filter */
    105 static const spx_float_t VAR1_SMOOTH = {23593, -16};
    106 static const spx_float_t VAR2_SMOOTH = {23675, -15};
    107 static const spx_float_t VAR1_UPDATE = {16384, -15};
    108 static const spx_float_t VAR2_UPDATE = {16384, -16};
    109 static const spx_float_t VAR_BACKTRACK = {16384, -12};
    110 #define TOP16(x) ((x)>>16)
    111 
    112 #else
    113 
    114 static const spx_float_t MIN_LEAK = .005f;
    115 
    116 /* Constants for the two-path filter */
    117 static const spx_float_t VAR1_SMOOTH = .36f;
    118 static const spx_float_t VAR2_SMOOTH = .7225f;
    119 static const spx_float_t VAR1_UPDATE = .5f;
    120 static const spx_float_t VAR2_UPDATE = .25f;
    121 static const spx_float_t VAR_BACKTRACK = 4.f;
    122 #define TOP16(x) (x)
    123 #endif
    124 
    125 
    126 #define PLAYBACK_DELAY 2
    127 
    128 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *Yout, int len);
    129 
    130 
    131 /** Speex echo cancellation state. */
    132 struct SpeexEchoState_ {
    133    int frame_size;           /**< Number of samples processed each time */
    134    int window_size;
    135    int M;
    136    int cancel_count;
    137    int adapted;
    138    int saturated;
    139    int screwed_up;
    140    int C;                    /** Number of input channels (microphones) */
    141    int K;                    /** Number of output channels (loudspeakers) */
    142    spx_int32_t sampling_rate;
    143    spx_word16_t spec_average;
    144    spx_word16_t beta0;
    145    spx_word16_t beta_max;
    146    spx_word32_t sum_adapt;
    147    spx_word16_t leak_estimate;
    148 
    149    spx_word16_t *e;      /* scratch */
    150    spx_word16_t *x;      /* Far-end input buffer (2N) */
    151    spx_word16_t *X;      /* Far-end buffer (M+1 frames) in frequency domain */
    152    spx_word16_t *input;  /* scratch */
    153    spx_word16_t *y;      /* scratch */
    154    spx_word16_t *last_y;
    155    spx_word16_t *Y;      /* scratch */
    156    spx_word16_t *E;
    157    spx_word32_t *PHI;    /* scratch */
    158    spx_word32_t *W;      /* (Background) filter weights */
    159 #ifdef TWO_PATH
    160    spx_word16_t *foreground; /* Foreground filter weights */
    161    spx_word32_t  Davg1;  /* 1st recursive average of the residual power difference */
    162    spx_word32_t  Davg2;  /* 2nd recursive average of the residual power difference */
    163    spx_float_t   Dvar1;  /* Estimated variance of 1st estimator */
    164    spx_float_t   Dvar2;  /* Estimated variance of 2nd estimator */
    165 #endif
    166    spx_word32_t *power;  /* Power of the far-end signal */
    167    spx_float_t  *power_1;/* Inverse power of far-end */
    168    spx_word16_t *wtmp;   /* scratch */
    169 #ifdef FIXED_POINT
    170    spx_word16_t *wtmp2;  /* scratch */
    171 #endif
    172    spx_word32_t *Rf;     /* scratch */
    173    spx_word32_t *Yf;     /* scratch */
    174    spx_word32_t *Xf;     /* scratch */
    175    spx_word32_t *Eh;
    176    spx_word32_t *Yh;
    177    spx_float_t   Pey;
    178    spx_float_t   Pyy;
    179    spx_word16_t *window;
    180    spx_word16_t *prop;
    181    void *fft_table;
    182    spx_word16_t *memX, *memD, *memE;
    183    spx_word16_t preemph;
    184    spx_word16_t notch_radius;
    185    spx_mem_t *notch_mem;
    186 
    187    /* NOTE: If you only use speex_echo_cancel() and want to save some memory, remove this */
    188    spx_int16_t *play_buf;
    189    int play_buf_pos;
    190    int play_buf_started;
    191 };
    192 
    193 static inline void filter_dc_notch16(const spx_int16_t *in, spx_word16_t radius, spx_word16_t *out, int len, spx_mem_t *mem, int stride)
    194 {
    195    int i;
    196    spx_word16_t den2;
    197 #ifdef FIXED_POINT
    198    den2 = MULT16_16_Q15(radius,radius) + MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q15(32767-radius,32767-radius));
    199 #else
    200    den2 = radius*radius + .7*(1-radius)*(1-radius);
    201 #endif
    202    /*printf ("%d %d %d %d %d %d\n", num[0], num[1], num[2], den[0], den[1], den[2]);*/
    203    for (i=0;i<len;i++)
    204    {
    205       spx_word16_t vin = in[i*stride];
    206       spx_word32_t vout = mem[0] + SHL32(EXTEND32(vin),15);
    207 #ifdef FIXED_POINT
    208       mem[0] = mem[1] + SHL32(SHL32(-EXTEND32(vin),15) + MULT16_32_Q15(radius,vout),1);
    209 #else
    210       mem[0] = mem[1] + 2*(-vin + radius*vout);
    211 #endif
    212       mem[1] = SHL32(EXTEND32(vin),15) - MULT16_32_Q15(den2,vout);
    213       out[i] = SATURATE32(PSHR32(MULT16_32_Q15(radius,vout),15),32767);
    214    }
    215 }
    216 
    217 /* This inner product is slightly different from the codec version because of fixed-point */
    218 static inline spx_word32_t mdf_inner_prod(const spx_word16_t *x, const spx_word16_t *y, int len)
    219 {
    220    spx_word32_t sum=0;
    221    len >>= 1;
    222    while(len--)
    223    {
    224       spx_word32_t part=0;
    225       part = MAC16_16(part,*x++,*y++);
    226       part = MAC16_16(part,*x++,*y++);
    227       /* HINT: If you had a 40-bit accumulator, you could shift only at the end */
    228       sum = ADD32(sum,SHR32(part,6));
    229    }
    230    return sum;
    231 }
    232 
    233 /** Compute power spectrum of a half-complex (packed) vector */
    234 static inline void power_spectrum(const spx_word16_t *X, spx_word32_t *ps, int N)
    235 {
    236    int i, j;
    237    ps[0]=MULT16_16(X[0],X[0]);
    238    for (i=1,j=1;i<N-1;i+=2,j++)
    239    {
    240       ps[j] =  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
    241    }
    242    ps[j]=MULT16_16(X[i],X[i]);
    243 }
    244 
    245 /** Compute power spectrum of a half-complex (packed) vector and accumulate */
    246 static inline void power_spectrum_accum(const spx_word16_t *X, spx_word32_t *ps, int N)
    247 {
    248    int i, j;
    249    ps[0]+=MULT16_16(X[0],X[0]);
    250    for (i=1,j=1;i<N-1;i+=2,j++)
    251    {
    252       ps[j] +=  MULT16_16(X[i],X[i]) + MULT16_16(X[i+1],X[i+1]);
    253    }
    254    ps[j]+=MULT16_16(X[i],X[i]);
    255 }
    256 
    257 /** Compute cross-power spectrum of a half-complex (packed) vectors and add to acc */
    258 #ifdef FIXED_POINT
    259 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
    260 {
    261    int i,j;
    262    spx_word32_t tmp1=0,tmp2=0;
    263    for (j=0;j<M;j++)
    264    {
    265       tmp1 = MAC16_16(tmp1, X[j*N],TOP16(Y[j*N]));
    266    }
    267    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
    268    for (i=1;i<N-1;i+=2)
    269    {
    270       tmp1 = tmp2 = 0;
    271       for (j=0;j<M;j++)
    272       {
    273          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],TOP16(Y[j*N+i])), MULT16_16(X[j*N+i+1],TOP16(Y[j*N+i+1])));
    274          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],TOP16(Y[j*N+i])), X[j*N+i], TOP16(Y[j*N+i+1]));
    275       }
    276       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
    277       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
    278    }
    279    tmp1 = tmp2 = 0;
    280    for (j=0;j<M;j++)
    281    {
    282       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],TOP16(Y[(j+1)*N-1]));
    283    }
    284    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
    285 }
    286 static inline void spectral_mul_accum16(const spx_word16_t *X, const spx_word16_t *Y, spx_word16_t *acc, int N, int M)
    287 {
    288    int i,j;
    289    spx_word32_t tmp1=0,tmp2=0;
    290    for (j=0;j<M;j++)
    291    {
    292       tmp1 = MAC16_16(tmp1, X[j*N],Y[j*N]);
    293    }
    294    acc[0] = PSHR32(tmp1,WEIGHT_SHIFT);
    295    for (i=1;i<N-1;i+=2)
    296    {
    297       tmp1 = tmp2 = 0;
    298       for (j=0;j<M;j++)
    299       {
    300          tmp1 = SUB32(MAC16_16(tmp1, X[j*N+i],Y[j*N+i]), MULT16_16(X[j*N+i+1],Y[j*N+i+1]));
    301          tmp2 = MAC16_16(MAC16_16(tmp2, X[j*N+i+1],Y[j*N+i]), X[j*N+i], Y[j*N+i+1]);
    302       }
    303       acc[i] = PSHR32(tmp1,WEIGHT_SHIFT);
    304       acc[i+1] = PSHR32(tmp2,WEIGHT_SHIFT);
    305    }
    306    tmp1 = tmp2 = 0;
    307    for (j=0;j<M;j++)
    308    {
    309       tmp1 = MAC16_16(tmp1, X[(j+1)*N-1],Y[(j+1)*N-1]);
    310    }
    311    acc[N-1] = PSHR32(tmp1,WEIGHT_SHIFT);
    312 }
    313 
    314 #else
    315 static inline void spectral_mul_accum(const spx_word16_t *X, const spx_word32_t *Y, spx_word16_t *acc, int N, int M)
    316 {
    317    int i,j;
    318    for (i=0;i<N;i++)
    319       acc[i] = 0;
    320    for (j=0;j<M;j++)
    321    {
    322       acc[0] += X[0]*Y[0];
    323       for (i=1;i<N-1;i+=2)
    324       {
    325          acc[i] += (X[i]*Y[i] - X[i+1]*Y[i+1]);
    326          acc[i+1] += (X[i+1]*Y[i] + X[i]*Y[i+1]);
    327       }
    328       acc[i] += X[i]*Y[i];
    329       X += N;
    330       Y += N;
    331    }
    332 }
    333 #define spectral_mul_accum16 spectral_mul_accum
    334 #endif
    335 
    336 /** Compute weighted cross-power spectrum of a half-complex (packed) vector with conjugate */
    337 static inline void weighted_spectral_mul_conj(const spx_float_t *w, const spx_float_t p, const spx_word16_t *X, const spx_word16_t *Y, spx_word32_t *prod, int N)
    338 {
    339    int i, j;
    340    spx_float_t W;
    341    W = FLOAT_AMULT(p, w[0]);
    342    prod[0] = FLOAT_MUL32(W,MULT16_16(X[0],Y[0]));
    343    for (i=1,j=1;i<N-1;i+=2,j++)
    344    {
    345       W = FLOAT_AMULT(p, w[j]);
    346       prod[i] = FLOAT_MUL32(W,MAC16_16(MULT16_16(X[i],Y[i]), X[i+1],Y[i+1]));
    347       prod[i+1] = FLOAT_MUL32(W,MAC16_16(MULT16_16(-X[i+1],Y[i]), X[i],Y[i+1]));
    348    }
    349    W = FLOAT_AMULT(p, w[j]);
    350    prod[i] = FLOAT_MUL32(W,MULT16_16(X[i],Y[i]));
    351 }
    352 
    353 static inline void mdf_adjust_prop(const spx_word32_t *W, int N, int M, int P, spx_word16_t *prop)
    354 {
    355    int i, j, p;
    356    spx_word16_t max_sum = 1;
    357    spx_word32_t prop_sum = 1;
    358    for (i=0;i<M;i++)
    359    {
    360       spx_word32_t tmp = 1;
    361       for (p=0;p<P;p++)
    362          for (j=0;j<N;j++)
    363             tmp += MULT16_16(EXTRACT16(SHR32(W[p*N*M + i*N+j],18)), EXTRACT16(SHR32(W[p*N*M + i*N+j],18)));
    364 #ifdef FIXED_POINT
    365       /* Just a security in case an overflow were to occur */
    366       tmp = MIN32(ABS32(tmp), 536870912);
    367 #endif
    368       prop[i] = spx_sqrt(tmp);
    369       if (prop[i] > max_sum)
    370          max_sum = prop[i];
    371    }
    372    for (i=0;i<M;i++)
    373    {
    374       prop[i] += MULT16_16_Q15(QCONST16(.1f,15),max_sum);
    375       prop_sum += EXTEND32(prop[i]);
    376    }
    377    for (i=0;i<M;i++)
    378    {
    379       prop[i] = DIV32(MULT16_16(QCONST16(.99f,15), prop[i]),prop_sum);
    380       /*printf ("%f ", prop[i]);*/
    381    }
    382    /*printf ("\n");*/
    383 }
    384 
    385 #ifdef DUMP_ECHO_CANCEL_DATA
    386 #include <stdio.h>
    387 static FILE *rFile=NULL, *pFile=NULL, *oFile=NULL;
    388 
    389 static void dump_audio(const spx_int16_t *rec, const spx_int16_t *play, const spx_int16_t *out, int len)
    390 {
    391    if (!(rFile && pFile && oFile))
    392    {
    393       speex_fatal("Dump files not open");
    394    }
    395    fwrite(rec, sizeof(spx_int16_t), len, rFile);
    396    fwrite(play, sizeof(spx_int16_t), len, pFile);
    397    fwrite(out, sizeof(spx_int16_t), len, oFile);
    398 }
    399 #endif
    400 
    401 /** Creates a new echo canceller state */
    402 EXPORT SpeexEchoState *speex_echo_state_init(int frame_size, int filter_length)
    403 {
    404    return speex_echo_state_init_mc(frame_size, filter_length, 1, 1);
    405 }
    406 
    407 EXPORT SpeexEchoState *speex_echo_state_init_mc(int frame_size, int filter_length, int nb_mic, int nb_speakers)
    408 {
    409    int i,N,M, C, K;
    410    SpeexEchoState *st = (SpeexEchoState *)speex_alloc(sizeof(SpeexEchoState));
    411 
    412    st->K = nb_speakers;
    413    st->C = nb_mic;
    414    C=st->C;
    415    K=st->K;
    416 #ifdef DUMP_ECHO_CANCEL_DATA
    417    if (rFile || pFile || oFile)
    418       speex_fatal("Opening dump files twice");
    419    rFile = fopen("aec_rec.sw", "wb");
    420    pFile = fopen("aec_play.sw", "wb");
    421    oFile = fopen("aec_out.sw", "wb");
    422 #endif
    423 
    424    st->frame_size = frame_size;
    425    st->window_size = 2*frame_size;
    426    N = st->window_size;
    427    M = st->M = (filter_length+st->frame_size-1)/frame_size;
    428    st->cancel_count=0;
    429    st->sum_adapt = 0;
    430    st->saturated = 0;
    431    st->screwed_up = 0;
    432    /* This is the default sampling rate */
    433    st->sampling_rate = 8000;
    434    st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
    435 #ifdef FIXED_POINT
    436    st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
    437    st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
    438 #else
    439    st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
    440    st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
    441 #endif
    442    st->leak_estimate = 0;
    443 
    444    st->fft_table = spx_fft_init(N);
    445 
    446    st->e = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
    447    st->x = (spx_word16_t*)speex_alloc(K*N*sizeof(spx_word16_t));
    448    st->input = (spx_word16_t*)speex_alloc(C*st->frame_size*sizeof(spx_word16_t));
    449    st->y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
    450    st->last_y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
    451    st->Yf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    452    st->Rf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    453    st->Xf = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    454    st->Yh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    455    st->Eh = (spx_word32_t*)speex_alloc((st->frame_size+1)*sizeof(spx_word32_t));
    456 
    457    st->X = (spx_word16_t*)speex_alloc(K*(M+1)*N*sizeof(spx_word16_t));
    458    st->Y = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
    459    st->E = (spx_word16_t*)speex_alloc(C*N*sizeof(spx_word16_t));
    460    st->W = (spx_word32_t*)speex_alloc(C*K*M*N*sizeof(spx_word32_t));
    461 #ifdef TWO_PATH
    462    st->foreground = (spx_word16_t*)speex_alloc(M*N*C*K*sizeof(spx_word16_t));
    463 #endif
    464    st->PHI = (spx_word32_t*)speex_alloc(N*sizeof(spx_word32_t));
    465    st->power = (spx_word32_t*)speex_alloc((frame_size+1)*sizeof(spx_word32_t));
    466    st->power_1 = (spx_float_t*)speex_alloc((frame_size+1)*sizeof(spx_float_t));
    467    st->window = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
    468    st->prop = (spx_word16_t*)speex_alloc(M*sizeof(spx_word16_t));
    469    st->wtmp = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
    470 #ifdef FIXED_POINT
    471    st->wtmp2 = (spx_word16_t*)speex_alloc(N*sizeof(spx_word16_t));
    472    for (i=0;i<N>>1;i++)
    473    {
    474       st->window[i] = (16383-SHL16(spx_cos(DIV32_16(MULT16_16(25736,i<<1),N)),1));
    475       st->window[N-i-1] = st->window[i];
    476    }
    477 #else
    478    for (i=0;i<N;i++)
    479       st->window[i] = .5-.5*cos(2*M_PI*i/N);
    480 #endif
    481    for (i=0;i<=st->frame_size;i++)
    482       st->power_1[i] = FLOAT_ONE;
    483    for (i=0;i<N*M*K*C;i++)
    484       st->W[i] = 0;
    485    {
    486       spx_word32_t sum = 0;
    487       /* Ratio of ~10 between adaptation rate of first and last block */
    488       spx_word16_t decay = SHR32(spx_exp(NEG16(DIV32_16(QCONST16(2.4,11),M))),1);
    489       st->prop[0] = QCONST16(.7, 15);
    490       sum = EXTEND32(st->prop[0]);
    491       for (i=1;i<M;i++)
    492       {
    493          st->prop[i] = MULT16_16_Q15(st->prop[i-1], decay);
    494          sum = ADD32(sum, EXTEND32(st->prop[i]));
    495       }
    496       for (i=M-1;i>=0;i--)
    497       {
    498          st->prop[i] = DIV32(MULT16_16(QCONST16(.8f,15), st->prop[i]),sum);
    499       }
    500    }
    501 
    502    st->memX = (spx_word16_t*)speex_alloc(K*sizeof(spx_word16_t));
    503    st->memD = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
    504    st->memE = (spx_word16_t*)speex_alloc(C*sizeof(spx_word16_t));
    505    st->preemph = QCONST16(.9,15);
    506    if (st->sampling_rate<12000)
    507       st->notch_radius = QCONST16(.9, 15);
    508    else if (st->sampling_rate<24000)
    509       st->notch_radius = QCONST16(.982, 15);
    510    else
    511       st->notch_radius = QCONST16(.992, 15);
    512 
    513    st->notch_mem = (spx_mem_t*)speex_alloc(2*C*sizeof(spx_mem_t));
    514    st->adapted = 0;
    515    st->Pey = st->Pyy = FLOAT_ONE;
    516 
    517 #ifdef TWO_PATH
    518    st->Davg1 = st->Davg2 = 0;
    519    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
    520 #endif
    521 
    522    st->play_buf = (spx_int16_t*)speex_alloc(K*(PLAYBACK_DELAY+1)*st->frame_size*sizeof(spx_int16_t));
    523    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
    524    st->play_buf_started = 0;
    525 
    526    return st;
    527 }
    528 
    529 /** Resets echo canceller state */
    530 EXPORT void speex_echo_state_reset(SpeexEchoState *st)
    531 {
    532    int i, M, N, C, K;
    533    st->cancel_count=0;
    534    st->screwed_up = 0;
    535    N = st->window_size;
    536    M = st->M;
    537    C=st->C;
    538    K=st->K;
    539    for (i=0;i<N*M;i++)
    540       st->W[i] = 0;
    541 #ifdef TWO_PATH
    542    for (i=0;i<N*M;i++)
    543       st->foreground[i] = 0;
    544 #endif
    545    for (i=0;i<N*(M+1);i++)
    546       st->X[i] = 0;
    547    for (i=0;i<=st->frame_size;i++)
    548    {
    549       st->power[i] = 0;
    550       st->power_1[i] = FLOAT_ONE;
    551       st->Eh[i] = 0;
    552       st->Yh[i] = 0;
    553    }
    554    for (i=0;i<st->frame_size;i++)
    555    {
    556       st->last_y[i] = 0;
    557    }
    558    for (i=0;i<N*C;i++)
    559    {
    560       st->E[i] = 0;
    561    }
    562    for (i=0;i<N*K;i++)
    563    {
    564       st->x[i] = 0;
    565    }
    566    for (i=0;i<2*C;i++)
    567       st->notch_mem[i] = 0;
    568    for (i=0;i<C;i++)
    569       st->memD[i]=st->memE[i]=0;
    570    for (i=0;i<K;i++)
    571       st->memX[i]=0;
    572 
    573    st->saturated = 0;
    574    st->adapted = 0;
    575    st->sum_adapt = 0;
    576    st->Pey = st->Pyy = FLOAT_ONE;
    577 #ifdef TWO_PATH
    578    st->Davg1 = st->Davg2 = 0;
    579    st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
    580 #endif
    581    for (i=0;i<3*st->frame_size;i++)
    582       st->play_buf[i] = 0;
    583    st->play_buf_pos = PLAYBACK_DELAY*st->frame_size;
    584    st->play_buf_started = 0;
    585 
    586 }
    587 
    588 /** Destroys an echo canceller state */
    589 EXPORT void speex_echo_state_destroy(SpeexEchoState *st)
    590 {
    591    spx_fft_destroy(st->fft_table);
    592 
    593    speex_free(st->e);
    594    speex_free(st->x);
    595    speex_free(st->input);
    596    speex_free(st->y);
    597    speex_free(st->last_y);
    598    speex_free(st->Yf);
    599    speex_free(st->Rf);
    600    speex_free(st->Xf);
    601    speex_free(st->Yh);
    602    speex_free(st->Eh);
    603 
    604    speex_free(st->X);
    605    speex_free(st->Y);
    606    speex_free(st->E);
    607    speex_free(st->W);
    608 #ifdef TWO_PATH
    609    speex_free(st->foreground);
    610 #endif
    611    speex_free(st->PHI);
    612    speex_free(st->power);
    613    speex_free(st->power_1);
    614    speex_free(st->window);
    615    speex_free(st->prop);
    616    speex_free(st->wtmp);
    617 #ifdef FIXED_POINT
    618    speex_free(st->wtmp2);
    619 #endif
    620    speex_free(st->memX);
    621    speex_free(st->memD);
    622    speex_free(st->memE);
    623    speex_free(st->notch_mem);
    624 
    625    speex_free(st->play_buf);
    626    speex_free(st);
    627 
    628 #ifdef DUMP_ECHO_CANCEL_DATA
    629    fclose(rFile);
    630    fclose(pFile);
    631    fclose(oFile);
    632    rFile = pFile = oFile = NULL;
    633 #endif
    634 }
    635 
    636 EXPORT void speex_echo_capture(SpeexEchoState *st, const spx_int16_t *rec, spx_int16_t *out)
    637 {
    638    int i;
    639    /*speex_warning_int("capture with fill level ", st->play_buf_pos/st->frame_size);*/
    640    st->play_buf_started = 1;
    641    if (st->play_buf_pos>=st->frame_size)
    642    {
    643       speex_echo_cancellation(st, rec, st->play_buf, out);
    644       st->play_buf_pos -= st->frame_size;
    645       for (i=0;i<st->play_buf_pos;i++)
    646          st->play_buf[i] = st->play_buf[i+st->frame_size];
    647    } else {
    648       speex_warning("No playback frame available (your application is buggy and/or got xruns)");
    649       if (st->play_buf_pos!=0)
    650       {
    651          speex_warning("internal playback buffer corruption?");
    652          st->play_buf_pos = 0;
    653       }
    654       for (i=0;i<st->frame_size;i++)
    655          out[i] = rec[i];
    656    }
    657 }
    658 
    659 EXPORT void speex_echo_playback(SpeexEchoState *st, const spx_int16_t *play)
    660 {
    661    /*speex_warning_int("playback with fill level ", st->play_buf_pos/st->frame_size);*/
    662    if (!st->play_buf_started)
    663    {
    664       speex_warning("discarded first playback frame");
    665       return;
    666    }
    667    if (st->play_buf_pos<=PLAYBACK_DELAY*st->frame_size)
    668    {
    669       int i;
    670       for (i=0;i<st->frame_size;i++)
    671          st->play_buf[st->play_buf_pos+i] = play[i];
    672       st->play_buf_pos += st->frame_size;
    673       if (st->play_buf_pos <= (PLAYBACK_DELAY-1)*st->frame_size)
    674       {
    675          speex_warning("Auto-filling the buffer (your application is buggy and/or got xruns)");
    676          for (i=0;i<st->frame_size;i++)
    677             st->play_buf[st->play_buf_pos+i] = play[i];
    678          st->play_buf_pos += st->frame_size;
    679       }
    680    } else {
    681       speex_warning("Had to discard a playback frame (your application is buggy and/or got xruns)");
    682    }
    683 }
    684 
    685 /** Performs echo cancellation on a frame (deprecated, last arg now ignored) */
    686 EXPORT void speex_echo_cancel(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out, spx_int32_t *Yout)
    687 {
    688    speex_echo_cancellation(st, in, far_end, out);
    689 }
    690 
    691 /** Performs echo cancellation on a frame */
    692 EXPORT void speex_echo_cancellation(SpeexEchoState *st, const spx_int16_t *in, const spx_int16_t *far_end, spx_int16_t *out)
    693 {
    694    int i,j, chan, speak;
    695    int N,M, C, K;
    696    spx_word32_t Syy,See,Sxx,Sdd, Sff;
    697 #ifdef TWO_PATH
    698    spx_word32_t Dbf;
    699    int update_foreground;
    700 #endif
    701    spx_word32_t Sey;
    702    spx_word16_t ss, ss_1;
    703    spx_float_t Pey = FLOAT_ONE, Pyy=FLOAT_ONE;
    704    spx_float_t alpha, alpha_1;
    705    spx_word16_t RER;
    706    spx_word32_t tmp32;
    707 
    708    N = st->window_size;
    709    M = st->M;
    710    C = st->C;
    711    K = st->K;
    712 
    713    st->cancel_count++;
    714 #ifdef FIXED_POINT
    715    ss=DIV32_16(11469,M);
    716    ss_1 = SUB16(32767,ss);
    717 #else
    718    ss=.35/M;
    719    ss_1 = 1-ss;
    720 #endif
    721 
    722    for (chan = 0; chan < C; chan++)
    723    {
    724       /* Apply a notch filter to make sure DC doesn't end up causing problems */
    725       filter_dc_notch16(in+chan, st->notch_radius, st->input+chan*st->frame_size, st->frame_size, st->notch_mem+2*chan, C);
    726       /* Copy input data to buffer and apply pre-emphasis */
    727       /* Copy input data to buffer */
    728       for (i=0;i<st->frame_size;i++)
    729       {
    730          spx_word32_t tmp32;
    731          /* FIXME: This core has changed a bit, need to merge properly */
    732          tmp32 = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(MULT16_16_P15(st->preemph, st->memD[chan])));
    733 #ifdef FIXED_POINT
    734          if (tmp32 > 32767)
    735          {
    736             tmp32 = 32767;
    737             if (st->saturated == 0)
    738                st->saturated = 1;
    739          }
    740          if (tmp32 < -32767)
    741          {
    742             tmp32 = -32767;
    743             if (st->saturated == 0)
    744                st->saturated = 1;
    745          }
    746 #endif
    747          st->memD[chan] = st->input[chan*st->frame_size+i];
    748          st->input[chan*st->frame_size+i] = EXTRACT16(tmp32);
    749       }
    750    }
    751 
    752    for (speak = 0; speak < K; speak++)
    753    {
    754       for (i=0;i<st->frame_size;i++)
    755       {
    756          spx_word32_t tmp32;
    757          st->x[speak*N+i] = st->x[speak*N+i+st->frame_size];
    758          tmp32 = SUB32(EXTEND32(far_end[i*K+speak]), EXTEND32(MULT16_16_P15(st->preemph, st->memX[speak])));
    759 #ifdef FIXED_POINT
    760          /*FIXME: If saturation occurs here, we need to freeze adaptation for M frames (not just one) */
    761          if (tmp32 > 32767)
    762          {
    763             tmp32 = 32767;
    764             st->saturated = M+1;
    765          }
    766          if (tmp32 < -32767)
    767          {
    768             tmp32 = -32767;
    769             st->saturated = M+1;
    770          }
    771 #endif
    772          st->x[speak*N+i+st->frame_size] = EXTRACT16(tmp32);
    773          st->memX[speak] = far_end[i*K+speak];
    774       }
    775    }
    776 
    777    for (speak = 0; speak < K; speak++)
    778    {
    779       /* Shift memory: this could be optimized eventually*/
    780       for (j=M-1;j>=0;j--)
    781       {
    782          for (i=0;i<N;i++)
    783             st->X[(j+1)*N*K+speak*N+i] = st->X[j*N*K+speak*N+i];
    784       }
    785       /* Convert x (echo input) to frequency domain */
    786       spx_fft(st->fft_table, st->x+speak*N, &st->X[speak*N]);
    787    }
    788 
    789    Sxx = 0;
    790    for (speak = 0; speak < K; speak++)
    791    {
    792       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
    793       power_spectrum_accum(st->X+speak*N, st->Xf, N);
    794    }
    795 
    796    Sff = 0;
    797    for (chan = 0; chan < C; chan++)
    798    {
    799 #ifdef TWO_PATH
    800       /* Compute foreground filter */
    801       spectral_mul_accum16(st->X, st->foreground+chan*N*K*M, st->Y+chan*N, N, M*K);
    802       spx_ifft(st->fft_table, st->Y+chan*N, st->e+chan*N);
    803       for (i=0;i<st->frame_size;i++)
    804          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->e[chan*N+i+st->frame_size]);
    805       Sff += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
    806 #endif
    807    }
    808 
    809    /* Adjust proportional adaption rate */
    810    /* FIXME: Adjust that for C, K*/
    811    if (st->adapted)
    812       mdf_adjust_prop (st->W, N, M, C*K, st->prop);
    813    /* Compute weight gradient */
    814    if (st->saturated == 0)
    815    {
    816       for (chan = 0; chan < C; chan++)
    817       {
    818          for (speak = 0; speak < K; speak++)
    819          {
    820             for (j=M-1;j>=0;j--)
    821             {
    822                weighted_spectral_mul_conj(st->power_1, FLOAT_SHL(PSEUDOFLOAT(st->prop[j]),-15), &st->X[(j+1)*N*K+speak*N], st->E+chan*N, st->PHI, N);
    823                for (i=0;i<N;i++)
    824                   st->W[chan*N*K*M + j*N*K + speak*N + i] += st->PHI[i];
    825             }
    826          }
    827       }
    828    } else {
    829       st->saturated--;
    830    }
    831 
    832    /* FIXME: MC conversion required */
    833    /* Update weight to prevent circular convolution (MDF / AUMDF) */
    834    for (chan = 0; chan < C; chan++)
    835    {
    836       for (speak = 0; speak < K; speak++)
    837       {
    838          for (j=0;j<M;j++)
    839          {
    840             /* This is a variant of the Alternatively Updated MDF (AUMDF) */
    841             /* Remove the "if" to make this an MDF filter */
    842             if (j==0 || st->cancel_count%(M-1) == j-1)
    843             {
    844 #ifdef FIXED_POINT
    845                for (i=0;i<N;i++)
    846                   st->wtmp2[i] = EXTRACT16(PSHR32(st->W[chan*N*K*M + j*N*K + speak*N + i],NORMALIZE_SCALEDOWN+16));
    847                spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
    848                for (i=0;i<st->frame_size;i++)
    849                {
    850                   st->wtmp[i]=0;
    851                }
    852                for (i=st->frame_size;i<N;i++)
    853                {
    854                   st->wtmp[i]=SHL16(st->wtmp[i],NORMALIZE_SCALEUP);
    855                }
    856                spx_fft(st->fft_table, st->wtmp, st->wtmp2);
    857                /* The "-1" in the shift is a sort of kludge that trades less efficient update speed for decrease noise */
    858                for (i=0;i<N;i++)
    859                   st->W[chan*N*K*M + j*N*K + speak*N + i] -= SHL32(EXTEND32(st->wtmp2[i]),16+NORMALIZE_SCALEDOWN-NORMALIZE_SCALEUP-1);
    860 #else
    861                spx_ifft(st->fft_table, &st->W[chan*N*K*M + j*N*K + speak*N], st->wtmp);
    862                for (i=st->frame_size;i<N;i++)
    863                {
    864                   st->wtmp[i]=0;
    865                }
    866                spx_fft(st->fft_table, st->wtmp, &st->W[chan*N*K*M + j*N*K + speak*N]);
    867 #endif
    868             }
    869          }
    870       }
    871    }
    872 
    873    /* So we can use power_spectrum_accum */
    874    for (i=0;i<=st->frame_size;i++)
    875       st->Rf[i] = st->Yf[i] = st->Xf[i] = 0;
    876 
    877    Dbf = 0;
    878    See = 0;
    879 #ifdef TWO_PATH
    880    /* Difference in response, this is used to estimate the variance of our residual power estimate */
    881    for (chan = 0; chan < C; chan++)
    882    {
    883       spectral_mul_accum(st->X, st->W+chan*N*K*M, st->Y+chan*N, N, M*K);
    884       spx_ifft(st->fft_table, st->Y+chan*N, st->y+chan*N);
    885       for (i=0;i<st->frame_size;i++)
    886          st->e[chan*N+i] = SUB16(st->e[chan*N+i+st->frame_size], st->y[chan*N+i+st->frame_size]);
    887       Dbf += 10+mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
    888       for (i=0;i<st->frame_size;i++)
    889          st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
    890       See += mdf_inner_prod(st->e+chan*N, st->e+chan*N, st->frame_size);
    891    }
    892 #endif
    893 
    894 #ifndef TWO_PATH
    895    Sff = See;
    896 #endif
    897 
    898 #ifdef TWO_PATH
    899    /* Logic for updating the foreground filter */
    900 
    901    /* For two time windows, compute the mean of the energy difference, as well as the variance */
    902    st->Davg1 = ADD32(MULT16_32_Q15(QCONST16(.6f,15),st->Davg1), MULT16_32_Q15(QCONST16(.4f,15),SUB32(Sff,See)));
    903    st->Davg2 = ADD32(MULT16_32_Q15(QCONST16(.85f,15),st->Davg2), MULT16_32_Q15(QCONST16(.15f,15),SUB32(Sff,See)));
    904    st->Dvar1 = FLOAT_ADD(FLOAT_MULT(VAR1_SMOOTH, st->Dvar1), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.4f,15),Sff), MULT16_32_Q15(QCONST16(.4f,15),Dbf)));
    905    st->Dvar2 = FLOAT_ADD(FLOAT_MULT(VAR2_SMOOTH, st->Dvar2), FLOAT_MUL32U(MULT16_32_Q15(QCONST16(.15f,15),Sff), MULT16_32_Q15(QCONST16(.15f,15),Dbf)));
    906 
    907    /* Equivalent float code:
    908    st->Davg1 = .6*st->Davg1 + .4*(Sff-See);
    909    st->Davg2 = .85*st->Davg2 + .15*(Sff-See);
    910    st->Dvar1 = .36*st->Dvar1 + .16*Sff*Dbf;
    911    st->Dvar2 = .7225*st->Dvar2 + .0225*Sff*Dbf;
    912    */
    913 
    914    update_foreground = 0;
    915    /* Check if we have a statistically significant reduction in the residual echo */
    916    /* Note that this is *not* Gaussian, so we need to be careful about the longer tail */
    917    if (FLOAT_GT(FLOAT_MUL32U(SUB32(Sff,See),ABS32(SUB32(Sff,See))), FLOAT_MUL32U(Sff,Dbf)))
    918       update_foreground = 1;
    919    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg1, ABS32(st->Davg1)), FLOAT_MULT(VAR1_UPDATE,(st->Dvar1))))
    920       update_foreground = 1;
    921    else if (FLOAT_GT(FLOAT_MUL32U(st->Davg2, ABS32(st->Davg2)), FLOAT_MULT(VAR2_UPDATE,(st->Dvar2))))
    922       update_foreground = 1;
    923 
    924    /* Do we update? */
    925    if (update_foreground)
    926    {
    927       st->Davg1 = st->Davg2 = 0;
    928       st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
    929       /* Copy background filter to foreground filter */
    930       for (i=0;i<N*M*C*K;i++)
    931          st->foreground[i] = EXTRACT16(PSHR32(st->W[i],16));
    932       /* Apply a smooth transition so as to not introduce blocking artifacts */
    933       for (chan = 0; chan < C; chan++)
    934          for (i=0;i<st->frame_size;i++)
    935             st->e[chan*N+i+st->frame_size] = MULT16_16_Q15(st->window[i+st->frame_size],st->e[chan*N+i+st->frame_size]) + MULT16_16_Q15(st->window[i],st->y[chan*N+i+st->frame_size]);
    936    } else {
    937       int reset_background=0;
    938       /* Otherwise, check if the background filter is significantly worse */
    939       if (FLOAT_GT(FLOAT_MUL32U(NEG32(SUB32(Sff,See)),ABS32(SUB32(Sff,See))), FLOAT_MULT(VAR_BACKTRACK,FLOAT_MUL32U(Sff,Dbf))))
    940          reset_background = 1;
    941       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg1), ABS32(st->Davg1)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar1)))
    942          reset_background = 1;
    943       if (FLOAT_GT(FLOAT_MUL32U(NEG32(st->Davg2), ABS32(st->Davg2)), FLOAT_MULT(VAR_BACKTRACK,st->Dvar2)))
    944          reset_background = 1;
    945       if (reset_background)
    946       {
    947          /* Copy foreground filter to background filter */
    948          for (i=0;i<N*M*C*K;i++)
    949             st->W[i] = SHL32(EXTEND32(st->foreground[i]),16);
    950          /* We also need to copy the output so as to get correct adaptation */
    951          for (chan = 0; chan < C; chan++)
    952          {
    953             for (i=0;i<st->frame_size;i++)
    954                st->y[chan*N+i+st->frame_size] = st->e[chan*N+i+st->frame_size];
    955             for (i=0;i<st->frame_size;i++)
    956                st->e[chan*N+i] = SUB16(st->input[chan*st->frame_size+i], st->y[chan*N+i+st->frame_size]);
    957          }
    958          See = Sff;
    959          st->Davg1 = st->Davg2 = 0;
    960          st->Dvar1 = st->Dvar2 = FLOAT_ZERO;
    961       }
    962    }
    963 #endif
    964 
    965    Sey = Syy = Sdd = 0;
    966    for (chan = 0; chan < C; chan++)
    967    {
    968       /* Compute error signal (for the output with de-emphasis) */
    969       for (i=0;i<st->frame_size;i++)
    970       {
    971          spx_word32_t tmp_out;
    972 #ifdef TWO_PATH
    973          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->e[chan*N+i+st->frame_size]));
    974 #else
    975          tmp_out = SUB32(EXTEND32(st->input[chan*st->frame_size+i]), EXTEND32(st->y[chan*N+i+st->frame_size]));
    976 #endif
    977          tmp_out = ADD32(tmp_out, EXTEND32(MULT16_16_P15(st->preemph, st->memE[chan])));
    978       /* This is an arbitrary test for saturation in the microphone signal */
    979          if (in[i*C+chan] <= -32000 || in[i*C+chan] >= 32000)
    980          {
    981          if (st->saturated == 0)
    982             st->saturated = 1;
    983          }
    984          out[i*C+chan] = WORD2INT(tmp_out);
    985          st->memE[chan] = tmp_out;
    986       }
    987 
    988 #ifdef DUMP_ECHO_CANCEL_DATA
    989       dump_audio(in, far_end, out, st->frame_size);
    990 #endif
    991 
    992       /* Compute error signal (filter update version) */
    993       for (i=0;i<st->frame_size;i++)
    994       {
    995          st->e[chan*N+i+st->frame_size] = st->e[chan*N+i];
    996          st->e[chan*N+i] = 0;
    997       }
    998 
    999       /* Compute a bunch of correlations */
   1000       /* FIXME: bad merge */
   1001       Sey += mdf_inner_prod(st->e+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
   1002       Syy += mdf_inner_prod(st->y+chan*N+st->frame_size, st->y+chan*N+st->frame_size, st->frame_size);
   1003       Sdd += mdf_inner_prod(st->input+chan*st->frame_size, st->input+chan*st->frame_size, st->frame_size);
   1004 
   1005       /* Convert error to frequency domain */
   1006       spx_fft(st->fft_table, st->e+chan*N, st->E+chan*N);
   1007       for (i=0;i<st->frame_size;i++)
   1008          st->y[i+chan*N] = 0;
   1009       spx_fft(st->fft_table, st->y+chan*N, st->Y+chan*N);
   1010 
   1011       /* Compute power spectrum of echo (X), error (E) and filter response (Y) */
   1012       power_spectrum_accum(st->E+chan*N, st->Rf, N);
   1013       power_spectrum_accum(st->Y+chan*N, st->Yf, N);
   1014 
   1015    }
   1016 
   1017    /*printf ("%f %f %f %f\n", Sff, See, Syy, Sdd, st->update_cond);*/
   1018 
   1019    /* Do some sanity check */
   1020    if (!(Syy>=0 && Sxx>=0 && See >= 0)
   1021 #ifndef FIXED_POINT
   1022        || !(Sff < N*1e9 && Syy < N*1e9 && Sxx < N*1e9)
   1023 #endif
   1024       )
   1025    {
   1026       /* Things have gone really bad */
   1027       st->screwed_up += 50;
   1028       for (i=0;i<st->frame_size*C;i++)
   1029          out[i] = 0;
   1030    } else if (SHR32(Sff, 2) > ADD32(Sdd, SHR32(MULT16_16(N, 10000),6)))
   1031    {
   1032       /* AEC seems to add lots of echo instead of removing it, let's see if it will improve */
   1033       st->screwed_up++;
   1034    } else {
   1035       /* Everything's fine */
   1036       st->screwed_up=0;
   1037    }
   1038    if (st->screwed_up>=50)
   1039    {
   1040       speex_warning("The echo canceller started acting funny and got slapped (reset). It swears it will behave now.");
   1041       speex_echo_state_reset(st);
   1042       return;
   1043    }
   1044 
   1045    /* Add a small noise floor to make sure not to have problems when dividing */
   1046    See = MAX32(See, SHR32(MULT16_16(N, 100),6));
   1047 
   1048    for (speak = 0; speak < K; speak++)
   1049    {
   1050       Sxx += mdf_inner_prod(st->x+speak*N+st->frame_size, st->x+speak*N+st->frame_size, st->frame_size);
   1051       power_spectrum_accum(st->X+speak*N, st->Xf, N);
   1052    }
   1053 
   1054 
   1055    /* Smooth far end energy estimate over time */
   1056    for (j=0;j<=st->frame_size;j++)
   1057       st->power[j] = MULT16_32_Q15(ss_1,st->power[j]) + 1 + MULT16_32_Q15(ss,st->Xf[j]);
   1058 
   1059    /* Compute filtered spectra and (cross-)correlations */
   1060    for (j=st->frame_size;j>=0;j--)
   1061    {
   1062       spx_float_t Eh, Yh;
   1063       Eh = PSEUDOFLOAT(st->Rf[j] - st->Eh[j]);
   1064       Yh = PSEUDOFLOAT(st->Yf[j] - st->Yh[j]);
   1065       Pey = FLOAT_ADD(Pey,FLOAT_MULT(Eh,Yh));
   1066       Pyy = FLOAT_ADD(Pyy,FLOAT_MULT(Yh,Yh));
   1067 #ifdef FIXED_POINT
   1068       st->Eh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Eh[j]), st->spec_average, st->Rf[j]);
   1069       st->Yh[j] = MAC16_32_Q15(MULT16_32_Q15(SUB16(32767,st->spec_average),st->Yh[j]), st->spec_average, st->Yf[j]);
   1070 #else
   1071       st->Eh[j] = (1-st->spec_average)*st->Eh[j] + st->spec_average*st->Rf[j];
   1072       st->Yh[j] = (1-st->spec_average)*st->Yh[j] + st->spec_average*st->Yf[j];
   1073 #endif
   1074    }
   1075 
   1076    Pyy = FLOAT_SQRT(Pyy);
   1077    Pey = FLOAT_DIVU(Pey,Pyy);
   1078 
   1079    /* Compute correlation updatete rate */
   1080    tmp32 = MULT16_32_Q15(st->beta0,Syy);
   1081    if (tmp32 > MULT16_32_Q15(st->beta_max,See))
   1082       tmp32 = MULT16_32_Q15(st->beta_max,See);
   1083    alpha = FLOAT_DIV32(tmp32, See);
   1084    alpha_1 = FLOAT_SUB(FLOAT_ONE, alpha);
   1085    /* Update correlations (recursive average) */
   1086    st->Pey = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pey) , FLOAT_MULT(alpha,Pey));
   1087    st->Pyy = FLOAT_ADD(FLOAT_MULT(alpha_1,st->Pyy) , FLOAT_MULT(alpha,Pyy));
   1088    if (FLOAT_LT(st->Pyy, FLOAT_ONE))
   1089       st->Pyy = FLOAT_ONE;
   1090    /* We don't really hope to get better than 33 dB (MIN_LEAK-3dB) attenuation anyway */
   1091    if (FLOAT_LT(st->Pey, FLOAT_MULT(MIN_LEAK,st->Pyy)))
   1092       st->Pey = FLOAT_MULT(MIN_LEAK,st->Pyy);
   1093    if (FLOAT_GT(st->Pey, st->Pyy))
   1094       st->Pey = st->Pyy;
   1095    /* leak_estimate is the linear regression result */
   1096    st->leak_estimate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIVU(st->Pey, st->Pyy),14));
   1097    /* This looks like a stupid bug, but it's right (because we convert from Q14 to Q15) */
   1098    if (st->leak_estimate > 16383)
   1099       st->leak_estimate = 32767;
   1100    else
   1101       st->leak_estimate = SHL16(st->leak_estimate,1);
   1102    /*printf ("%f\n", st->leak_estimate);*/
   1103 
   1104    /* Compute Residual to Error Ratio */
   1105 #ifdef FIXED_POINT
   1106    tmp32 = MULT16_32_Q15(st->leak_estimate,Syy);
   1107    tmp32 = ADD32(SHR32(Sxx,13), ADD32(tmp32, SHL32(tmp32,1)));
   1108    /* Check for y in e (lower bound on RER) */
   1109    {
   1110       spx_float_t bound = PSEUDOFLOAT(Sey);
   1111       bound = FLOAT_DIVU(FLOAT_MULT(bound, bound), PSEUDOFLOAT(ADD32(1,Syy)));
   1112       if (FLOAT_GT(bound, PSEUDOFLOAT(See)))
   1113          tmp32 = See;
   1114       else if (tmp32 < FLOAT_EXTRACT32(bound))
   1115          tmp32 = FLOAT_EXTRACT32(bound);
   1116    }
   1117    if (tmp32 > SHR32(See,1))
   1118       tmp32 = SHR32(See,1);
   1119    RER = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32,See),15));
   1120 #else
   1121    RER = (.0001*Sxx + 3.*MULT16_32_Q15(st->leak_estimate,Syy)) / See;
   1122    /* Check for y in e (lower bound on RER) */
   1123    if (RER < Sey*Sey/(1+See*Syy))
   1124       RER = Sey*Sey/(1+See*Syy);
   1125    if (RER > .5)
   1126       RER = .5;
   1127 #endif
   1128 
   1129    /* We consider that the filter has had minimal adaptation if the following is true*/
   1130    if (!st->adapted && st->sum_adapt > SHL32(EXTEND32(M),15) && MULT16_32_Q15(st->leak_estimate,Syy) > MULT16_32_Q15(QCONST16(.03f,15),Syy))
   1131    {
   1132       st->adapted = 1;
   1133    }
   1134 
   1135    if (st->adapted)
   1136    {
   1137       /* Normal learning rate calculation once we're past the minimal adaptation phase */
   1138       for (i=0;i<=st->frame_size;i++)
   1139       {
   1140          spx_word32_t r, e;
   1141          /* Compute frequency-domain adaptation mask */
   1142          r = MULT16_32_Q15(st->leak_estimate,SHL32(st->Yf[i],3));
   1143          e = SHL32(st->Rf[i],3)+1;
   1144 #ifdef FIXED_POINT
   1145          if (r>SHR32(e,1))
   1146             r = SHR32(e,1);
   1147 #else
   1148          if (r>.5*e)
   1149             r = .5*e;
   1150 #endif
   1151          r = MULT16_32_Q15(QCONST16(.7,15),r) + MULT16_32_Q15(QCONST16(.3,15),(spx_word32_t)(MULT16_32_Q15(RER,e)));
   1152          /*st->power_1[i] = adapt_rate*r/(e*(1+st->power[i]));*/
   1153          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32_FLOAT(r,FLOAT_MUL32U(e,st->power[i]+10)),WEIGHT_SHIFT+16);
   1154       }
   1155    } else {
   1156       /* Temporary adaption rate if filter is not yet adapted enough */
   1157       spx_word16_t adapt_rate=0;
   1158 
   1159       if (Sxx > SHR32(MULT16_16(N, 1000),6))
   1160       {
   1161          tmp32 = MULT16_32_Q15(QCONST16(.25f, 15), Sxx);
   1162 #ifdef FIXED_POINT
   1163          if (tmp32 > SHR32(See,2))
   1164             tmp32 = SHR32(See,2);
   1165 #else
   1166          if (tmp32 > .25*See)
   1167             tmp32 = .25*See;
   1168 #endif
   1169          adapt_rate = FLOAT_EXTRACT16(FLOAT_SHL(FLOAT_DIV32(tmp32, See),15));
   1170       }
   1171       for (i=0;i<=st->frame_size;i++)
   1172          st->power_1[i] = FLOAT_SHL(FLOAT_DIV32(EXTEND32(adapt_rate),ADD32(st->power[i],10)),WEIGHT_SHIFT+1);
   1173 
   1174 
   1175       /* How much have we adapted so far? */
   1176       st->sum_adapt = ADD32(st->sum_adapt,adapt_rate);
   1177    }
   1178 
   1179    /* FIXME: MC conversion required */
   1180       for (i=0;i<st->frame_size;i++)
   1181          st->last_y[i] = st->last_y[st->frame_size+i];
   1182    if (st->adapted)
   1183    {
   1184       /* If the filter is adapted, take the filtered echo */
   1185       for (i=0;i<st->frame_size;i++)
   1186          st->last_y[st->frame_size+i] = in[i]-out[i];
   1187    } else {
   1188       /* If filter isn't adapted yet, all we can do is take the far end signal directly */
   1189       /* moved earlier: for (i=0;i<N;i++)
   1190       st->last_y[i] = st->x[i];*/
   1191    }
   1192 
   1193 }
   1194 
   1195 /* Compute spectrum of estimated echo for use in an echo post-filter */
   1196 void speex_echo_get_residual(SpeexEchoState *st, spx_word32_t *residual_echo, int len)
   1197 {
   1198    int i;
   1199    spx_word16_t leak2;
   1200    int N;
   1201 
   1202    N = st->window_size;
   1203 
   1204    /* Apply hanning window (should pre-compute it)*/
   1205    for (i=0;i<N;i++)
   1206       st->y[i] = MULT16_16_Q15(st->window[i],st->last_y[i]);
   1207 
   1208    /* Compute power spectrum of the echo */
   1209    spx_fft(st->fft_table, st->y, st->Y);
   1210    power_spectrum(st->Y, residual_echo, N);
   1211 
   1212 #ifdef FIXED_POINT
   1213    if (st->leak_estimate > 16383)
   1214       leak2 = 32767;
   1215    else
   1216       leak2 = SHL16(st->leak_estimate, 1);
   1217 #else
   1218    if (st->leak_estimate>.5)
   1219       leak2 = 1;
   1220    else
   1221       leak2 = 2*st->leak_estimate;
   1222 #endif
   1223    /* Estimate residual echo */
   1224    for (i=0;i<=st->frame_size;i++)
   1225       residual_echo[i] = (spx_int32_t)MULT16_32_Q15(leak2,residual_echo[i]);
   1226 
   1227 }
   1228 
   1229 EXPORT int speex_echo_ctl(SpeexEchoState *st, int request, void *ptr)
   1230 {
   1231    switch(request)
   1232    {
   1233 
   1234       case SPEEX_ECHO_GET_FRAME_SIZE:
   1235          (*(int*)ptr) = st->frame_size;
   1236          break;
   1237       case SPEEX_ECHO_SET_SAMPLING_RATE:
   1238          st->sampling_rate = (*(int*)ptr);
   1239          st->spec_average = DIV32_16(SHL32(EXTEND32(st->frame_size), 15), st->sampling_rate);
   1240 #ifdef FIXED_POINT
   1241          st->beta0 = DIV32_16(SHL32(EXTEND32(st->frame_size), 16), st->sampling_rate);
   1242          st->beta_max = DIV32_16(SHL32(EXTEND32(st->frame_size), 14), st->sampling_rate);
   1243 #else
   1244          st->beta0 = (2.0f*st->frame_size)/st->sampling_rate;
   1245          st->beta_max = (.5f*st->frame_size)/st->sampling_rate;
   1246 #endif
   1247          if (st->sampling_rate<12000)
   1248             st->notch_radius = QCONST16(.9, 15);
   1249          else if (st->sampling_rate<24000)
   1250             st->notch_radius = QCONST16(.982, 15);
   1251          else
   1252             st->notch_radius = QCONST16(.992, 15);
   1253          break;
   1254       case SPEEX_ECHO_GET_SAMPLING_RATE:
   1255          (*(int*)ptr) = st->sampling_rate;
   1256          break;
   1257       case SPEEX_ECHO_GET_IMPULSE_RESPONSE_SIZE:
   1258          /*FIXME: Implement this for multiple channels */
   1259          *((spx_int32_t *)ptr) = st->M * st->frame_size;
   1260          break;
   1261       case SPEEX_ECHO_GET_IMPULSE_RESPONSE:
   1262       {
   1263          int M = st->M, N = st->window_size, n = st->frame_size, i, j;
   1264          spx_int32_t *filt = (spx_int32_t *) ptr;
   1265          for(j=0;j<M;j++)
   1266          {
   1267             /*FIXME: Implement this for multiple channels */
   1268 #ifdef FIXED_POINT
   1269             for (i=0;i<N;i++)
   1270                st->wtmp2[i] = EXTRACT16(PSHR32(st->W[j*N+i],16+NORMALIZE_SCALEDOWN));
   1271             spx_ifft(st->fft_table, st->wtmp2, st->wtmp);
   1272 #else
   1273             spx_ifft(st->fft_table, &st->W[j*N], st->wtmp);
   1274 #endif
   1275             for(i=0;i<n;i++)
   1276                filt[j*n+i] = PSHR32(MULT16_16(32767,st->wtmp[i]), WEIGHT_SHIFT-NORMALIZE_SCALEDOWN);
   1277          }
   1278       }
   1279          break;
   1280       default:
   1281          speex_warning_int("Unknown speex_echo_ctl request: ", request);
   1282          return -1;
   1283    }
   1284    return 0;
   1285 }
   1286