Home | History | Annotate | Download | only in libspeex
      1 /* Copyright (C) 2002-2006 Jean-Marc Valin
      2    File: filters.c
      3    Various analysis/synthesis filters
      4 
      5    Redistribution and use in source and binary forms, with or without
      6    modification, are permitted provided that the following conditions
      7    are met:
      8 
      9    - Redistributions of source code must retain the above copyright
     10    notice, this list of conditions and the following disclaimer.
     11 
     12    - Redistributions in binary form must reproduce the above copyright
     13    notice, this list of conditions and the following disclaimer in the
     14    documentation and/or other materials provided with the distribution.
     15 
     16    - Neither the name of the Xiph.org Foundation nor the names of its
     17    contributors may be used to endorse or promote products derived from
     18    this software without specific prior written permission.
     19 
     20    THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
     21    ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
     22    LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
     23    A PARTICULAR PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR
     24    CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
     25    EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
     26    PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
     27    PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
     28    LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
     29    NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
     30    SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     31 */
     32 
     33 #ifdef HAVE_CONFIG_H
     34 #include "config.h"
     35 #endif
     36 
     37 #include "filters.h"
     38 #include "stack_alloc.h"
     39 #include "arch.h"
     40 #include "math_approx.h"
     41 #include "ltp.h"
     42 #include <math.h>
     43 
     44 #ifdef _USE_SSE
     45 #include "filters_sse.h"
     46 #elif defined (ARM4_ASM) || defined(ARM5E_ASM)
     47 #include "filters_arm4.h"
     48 #elif defined (BFIN_ASM)
     49 #include "filters_bfin.h"
     50 #endif
     51 
     52 
     53 
     54 void bw_lpc(spx_word16_t gamma, const spx_coef_t *lpc_in, spx_coef_t *lpc_out, int order)
     55 {
     56    int i;
     57    spx_word16_t tmp=gamma;
     58    for (i=0;i<order;i++)
     59    {
     60       lpc_out[i] = MULT16_16_P15(tmp,lpc_in[i]);
     61       tmp = MULT16_16_P15(tmp, gamma);
     62    }
     63 }
     64 
     65 void sanitize_values32(spx_word32_t *vec, spx_word32_t min_val, spx_word32_t max_val, int len)
     66 {
     67    int i;
     68    for (i=0;i<len;i++)
     69    {
     70       /* It's important we do the test that way so we can catch NaNs, which are neither greater nor smaller */
     71       if (!(vec[i]>=min_val && vec[i] <= max_val))
     72       {
     73          if (vec[i] < min_val)
     74             vec[i] = min_val;
     75          else if (vec[i] > max_val)
     76             vec[i] = max_val;
     77          else /* Has to be NaN */
     78             vec[i] = 0;
     79       }
     80    }
     81 }
     82 
     83 void highpass(const spx_word16_t *x, spx_word16_t *y, int len, int filtID, spx_mem_t *mem)
     84 {
     85    int i;
     86 #ifdef FIXED_POINT
     87    const spx_word16_t Pcoef[5][3] = {{16384, -31313, 14991}, {16384, -31569, 15249}, {16384, -31677, 15328}, {16384, -32313, 15947}, {16384, -22446, 6537}};
     88    const spx_word16_t Zcoef[5][3] = {{15672, -31344, 15672}, {15802, -31601, 15802}, {15847, -31694, 15847}, {16162, -32322, 16162}, {14418, -28836, 14418}};
     89 #else
     90    const spx_word16_t Pcoef[5][3] = {{1.00000f, -1.91120f, 0.91498f}, {1.00000f, -1.92683f, 0.93071f}, {1.00000f, -1.93338f, 0.93553f}, {1.00000f, -1.97226f, 0.97332f}, {1.00000f, -1.37000f, 0.39900f}};
     91    const spx_word16_t Zcoef[5][3] = {{0.95654f, -1.91309f, 0.95654f}, {0.96446f, -1.92879f, 0.96446f}, {0.96723f, -1.93445f, 0.96723f}, {0.98645f, -1.97277f, 0.98645f}, {0.88000f, -1.76000f, 0.88000f}};
     92 #endif
     93    const spx_word16_t *den, *num;
     94    if (filtID>4)
     95       filtID=4;
     96 
     97    den = Pcoef[filtID]; num = Zcoef[filtID];
     98    /*return;*/
     99    for (i=0;i<len;i++)
    100    {
    101       spx_word16_t yi;
    102       spx_word32_t vout = ADD32(MULT16_16(num[0], x[i]),mem[0]);
    103       yi = EXTRACT16(SATURATE(PSHR32(vout,14),32767));
    104       mem[0] = ADD32(MAC16_16(mem[1], num[1],x[i]), SHL32(MULT16_32_Q15(-den[1],vout),1));
    105       mem[1] = ADD32(MULT16_16(num[2],x[i]), SHL32(MULT16_32_Q15(-den[2],vout),1));
    106       y[i] = yi;
    107    }
    108 }
    109 
    110 #ifdef FIXED_POINT
    111 
    112 /* FIXME: These functions are ugly and probably introduce too much error */
    113 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
    114 {
    115    int i;
    116    for (i=0;i<len;i++)
    117    {
    118       y[i] = SHL32(MULT16_32_Q14(EXTRACT16(SHR32(x[i],7)),scale),7);
    119    }
    120 }
    121 
    122 void signal_div(const spx_word16_t *x, spx_word16_t *y, spx_word32_t scale, int len)
    123 {
    124    int i;
    125    if (scale > SHL32(EXTEND32(SIG_SCALING), 8))
    126    {
    127       spx_word16_t scale_1;
    128       scale = PSHR32(scale, SIG_SHIFT);
    129       scale_1 = EXTRACT16(PDIV32_16(SHL32(EXTEND32(SIG_SCALING),7),scale));
    130       for (i=0;i<len;i++)
    131       {
    132          y[i] = MULT16_16_P15(scale_1, x[i]);
    133       }
    134    } else if (scale > SHR32(EXTEND32(SIG_SCALING), 2)) {
    135       spx_word16_t scale_1;
    136       scale = PSHR32(scale, SIG_SHIFT-5);
    137       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
    138       for (i=0;i<len;i++)
    139       {
    140          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),8);
    141       }
    142    } else {
    143       spx_word16_t scale_1;
    144       scale = PSHR32(scale, SIG_SHIFT-7);
    145       if (scale < 5)
    146          scale = 5;
    147       scale_1 = DIV32_16(SHL32(EXTEND32(SIG_SCALING),3),scale);
    148       for (i=0;i<len;i++)
    149       {
    150          y[i] = PSHR32(MULT16_16(scale_1, SHL16(x[i],2)),6);
    151       }
    152    }
    153 }
    154 
    155 #else
    156 
    157 void signal_mul(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
    158 {
    159    int i;
    160    for (i=0;i<len;i++)
    161       y[i] = scale*x[i];
    162 }
    163 
    164 void signal_div(const spx_sig_t *x, spx_sig_t *y, spx_word32_t scale, int len)
    165 {
    166    int i;
    167    float scale_1 = 1/scale;
    168    for (i=0;i<len;i++)
    169       y[i] = scale_1*x[i];
    170 }
    171 #endif
    172 
    173 
    174 
    175 #ifdef FIXED_POINT
    176 
    177 
    178 
    179 spx_word16_t compute_rms(const spx_sig_t *x, int len)
    180 {
    181    int i;
    182    spx_word32_t sum=0;
    183    spx_sig_t max_val=1;
    184    int sig_shift;
    185 
    186    for (i=0;i<len;i++)
    187    {
    188       spx_sig_t tmp = x[i];
    189       if (tmp<0)
    190          tmp = -tmp;
    191       if (tmp > max_val)
    192          max_val = tmp;
    193    }
    194 
    195    sig_shift=0;
    196    while (max_val>16383)
    197    {
    198       sig_shift++;
    199       max_val >>= 1;
    200    }
    201 
    202    for (i=0;i<len;i+=4)
    203    {
    204       spx_word32_t sum2=0;
    205       spx_word16_t tmp;
    206       tmp = EXTRACT16(SHR32(x[i],sig_shift));
    207       sum2 = MAC16_16(sum2,tmp,tmp);
    208       tmp = EXTRACT16(SHR32(x[i+1],sig_shift));
    209       sum2 = MAC16_16(sum2,tmp,tmp);
    210       tmp = EXTRACT16(SHR32(x[i+2],sig_shift));
    211       sum2 = MAC16_16(sum2,tmp,tmp);
    212       tmp = EXTRACT16(SHR32(x[i+3],sig_shift));
    213       sum2 = MAC16_16(sum2,tmp,tmp);
    214       sum = ADD32(sum,SHR32(sum2,6));
    215    }
    216 
    217    return EXTRACT16(PSHR32(SHL32(EXTEND32(spx_sqrt(DIV32(sum,len))),(sig_shift+3)),SIG_SHIFT));
    218 }
    219 
    220 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
    221 {
    222    int i;
    223    spx_word16_t max_val=10;
    224 
    225    for (i=0;i<len;i++)
    226    {
    227       spx_sig_t tmp = x[i];
    228       if (tmp<0)
    229          tmp = -tmp;
    230       if (tmp > max_val)
    231          max_val = tmp;
    232    }
    233    if (max_val>16383)
    234    {
    235       spx_word32_t sum=0;
    236       for (i=0;i<len;i+=4)
    237       {
    238          spx_word32_t sum2=0;
    239          sum2 = MAC16_16(sum2,SHR16(x[i],1),SHR16(x[i],1));
    240          sum2 = MAC16_16(sum2,SHR16(x[i+1],1),SHR16(x[i+1],1));
    241          sum2 = MAC16_16(sum2,SHR16(x[i+2],1),SHR16(x[i+2],1));
    242          sum2 = MAC16_16(sum2,SHR16(x[i+3],1),SHR16(x[i+3],1));
    243          sum = ADD32(sum,SHR32(sum2,6));
    244       }
    245       return SHL16(spx_sqrt(DIV32(sum,len)),4);
    246    } else {
    247       spx_word32_t sum=0;
    248       int sig_shift=0;
    249       if (max_val < 8192)
    250          sig_shift=1;
    251       if (max_val < 4096)
    252          sig_shift=2;
    253       if (max_val < 2048)
    254          sig_shift=3;
    255       for (i=0;i<len;i+=4)
    256       {
    257          spx_word32_t sum2=0;
    258          sum2 = MAC16_16(sum2,SHL16(x[i],sig_shift),SHL16(x[i],sig_shift));
    259          sum2 = MAC16_16(sum2,SHL16(x[i+1],sig_shift),SHL16(x[i+1],sig_shift));
    260          sum2 = MAC16_16(sum2,SHL16(x[i+2],sig_shift),SHL16(x[i+2],sig_shift));
    261          sum2 = MAC16_16(sum2,SHL16(x[i+3],sig_shift),SHL16(x[i+3],sig_shift));
    262          sum = ADD32(sum,SHR32(sum2,6));
    263       }
    264       return SHL16(spx_sqrt(DIV32(sum,len)),3-sig_shift);
    265    }
    266 }
    267 
    268 #ifndef OVERRIDE_NORMALIZE16
    269 int normalize16(const spx_sig_t *x, spx_word16_t *y, spx_sig_t max_scale, int len)
    270 {
    271    int i;
    272    spx_sig_t max_val=1;
    273    int sig_shift;
    274 
    275    for (i=0;i<len;i++)
    276    {
    277       spx_sig_t tmp = x[i];
    278       if (tmp<0)
    279          tmp = NEG32(tmp);
    280       if (tmp >= max_val)
    281          max_val = tmp;
    282    }
    283 
    284    sig_shift=0;
    285    while (max_val>max_scale)
    286    {
    287       sig_shift++;
    288       max_val >>= 1;
    289    }
    290 
    291    for (i=0;i<len;i++)
    292       y[i] = EXTRACT16(SHR32(x[i], sig_shift));
    293 
    294    return sig_shift;
    295 }
    296 #endif
    297 
    298 #else
    299 
    300 spx_word16_t compute_rms(const spx_sig_t *x, int len)
    301 {
    302    int i;
    303    float sum=0;
    304    for (i=0;i<len;i++)
    305    {
    306       sum += x[i]*x[i];
    307    }
    308    return sqrt(.1+sum/len);
    309 }
    310 spx_word16_t compute_rms16(const spx_word16_t *x, int len)
    311 {
    312    return compute_rms(x, len);
    313 }
    314 #endif
    315 
    316 
    317 
    318 #ifndef OVERRIDE_FILTER_MEM16
    319 void filter_mem16(const spx_word16_t *x, const spx_coef_t *num, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
    320 {
    321    int i,j;
    322    spx_word16_t xi,yi,nyi;
    323    for (i=0;i<N;i++)
    324    {
    325       xi= x[i];
    326       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
    327       nyi = NEG16(yi);
    328       for (j=0;j<ord-1;j++)
    329       {
    330          mem[j] = MAC16_16(MAC16_16(mem[j+1], num[j],xi), den[j],nyi);
    331       }
    332       mem[ord-1] = ADD32(MULT16_16(num[ord-1],xi), MULT16_16(den[ord-1],nyi));
    333       y[i] = yi;
    334    }
    335 }
    336 #endif
    337 
    338 #ifndef OVERRIDE_IIR_MEM16
    339 void iir_mem16(const spx_word16_t *x, const spx_coef_t *den, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
    340 {
    341    int i,j;
    342    spx_word16_t yi,nyi;
    343 
    344    for (i=0;i<N;i++)
    345    {
    346       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
    347       nyi = NEG16(yi);
    348       for (j=0;j<ord-1;j++)
    349       {
    350          mem[j] = MAC16_16(mem[j+1],den[j],nyi);
    351       }
    352       mem[ord-1] = MULT16_16(den[ord-1],nyi);
    353       y[i] = yi;
    354    }
    355 }
    356 #endif
    357 
    358 #ifndef OVERRIDE_FIR_MEM16
    359 void fir_mem16(const spx_word16_t *x, const spx_coef_t *num, spx_word16_t *y, int N, int ord, spx_mem_t *mem, char *stack)
    360 {
    361    int i,j;
    362    spx_word16_t xi,yi;
    363 
    364    for (i=0;i<N;i++)
    365    {
    366       xi=x[i];
    367       yi = EXTRACT16(SATURATE(ADD32(EXTEND32(x[i]),PSHR32(mem[0],LPC_SHIFT)),32767));
    368       for (j=0;j<ord-1;j++)
    369       {
    370          mem[j] = MAC16_16(mem[j+1], num[j],xi);
    371       }
    372       mem[ord-1] = MULT16_16(num[ord-1],xi);
    373       y[i] = yi;
    374    }
    375 }
    376 #endif
    377 
    378 
    379 void syn_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
    380 {
    381    int i;
    382    VARDECL(spx_mem_t *mem);
    383    ALLOC(mem, ord, spx_mem_t);
    384    for (i=0;i<ord;i++)
    385       mem[i]=0;
    386    iir_mem16(xx, ak, y, N, ord, mem, stack);
    387    for (i=0;i<ord;i++)
    388       mem[i]=0;
    389    filter_mem16(y, awk1, awk2, y, N, ord, mem, stack);
    390 }
    391 void residue_percep_zero16(const spx_word16_t *xx, const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
    392 {
    393    int i;
    394    VARDECL(spx_mem_t *mem);
    395    ALLOC(mem, ord, spx_mem_t);
    396    for (i=0;i<ord;i++)
    397       mem[i]=0;
    398    filter_mem16(xx, ak, awk1, y, N, ord, mem, stack);
    399    for (i=0;i<ord;i++)
    400       mem[i]=0;
    401    fir_mem16(y, awk2, y, N, ord, mem, stack);
    402 }
    403 
    404 
    405 #ifndef OVERRIDE_COMPUTE_IMPULSE_RESPONSE
    406 void compute_impulse_response(const spx_coef_t *ak, const spx_coef_t *awk1, const spx_coef_t *awk2, spx_word16_t *y, int N, int ord, char *stack)
    407 {
    408    int i,j;
    409    spx_word16_t y1, ny1i, ny2i;
    410    VARDECL(spx_mem_t *mem1);
    411    VARDECL(spx_mem_t *mem2);
    412    ALLOC(mem1, ord, spx_mem_t);
    413    ALLOC(mem2, ord, spx_mem_t);
    414 
    415    y[0] = LPC_SCALING;
    416    for (i=0;i<ord;i++)
    417       y[i+1] = awk1[i];
    418    i++;
    419    for (;i<N;i++)
    420       y[i] = VERY_SMALL;
    421    for (i=0;i<ord;i++)
    422       mem1[i] = mem2[i] = 0;
    423    for (i=0;i<N;i++)
    424    {
    425       y1 = ADD16(y[i], EXTRACT16(PSHR32(mem1[0],LPC_SHIFT)));
    426       ny1i = NEG16(y1);
    427       y[i] = PSHR32(ADD32(SHL32(EXTEND32(y1),LPC_SHIFT+1),mem2[0]),LPC_SHIFT);
    428       ny2i = NEG16(y[i]);
    429       for (j=0;j<ord-1;j++)
    430       {
    431          mem1[j] = MAC16_16(mem1[j+1], awk2[j],ny1i);
    432          mem2[j] = MAC16_16(mem2[j+1], ak[j],ny2i);
    433       }
    434       mem1[ord-1] = MULT16_16(awk2[ord-1],ny1i);
    435       mem2[ord-1] = MULT16_16(ak[ord-1],ny2i);
    436    }
    437 }
    438 #endif
    439 
    440 /* Decomposes a signal into low-band and high-band using a QMF */
    441 void qmf_decomp(const spx_word16_t *xx, const spx_word16_t *aa, spx_word16_t *y1, spx_word16_t *y2, int N, int M, spx_word16_t *mem, char *stack)
    442 {
    443    int i,j,k,M2;
    444    VARDECL(spx_word16_t *a);
    445    VARDECL(spx_word16_t *x);
    446    spx_word16_t *x2;
    447 
    448    ALLOC(a, M, spx_word16_t);
    449    ALLOC(x, N+M-1, spx_word16_t);
    450    x2=x+M-1;
    451    M2=M>>1;
    452    for (i=0;i<M;i++)
    453       a[M-i-1]= aa[i];
    454    for (i=0;i<M-1;i++)
    455       x[i]=mem[M-i-2];
    456    for (i=0;i<N;i++)
    457       x[i+M-1]=SHR16(xx[i],1);
    458    for (i=0;i<M-1;i++)
    459       mem[i]=SHR16(xx[N-i-1],1);
    460    for (i=0,k=0;i<N;i+=2,k++)
    461    {
    462       spx_word32_t y1k=0, y2k=0;
    463       for (j=0;j<M2;j++)
    464       {
    465          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
    466          y2k=SUB32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
    467          j++;
    468          y1k=ADD32(y1k,MULT16_16(a[j],ADD16(x[i+j],x2[i-j])));
    469          y2k=ADD32(y2k,MULT16_16(a[j],SUB16(x[i+j],x2[i-j])));
    470       }
    471       y1[k] = EXTRACT16(SATURATE(PSHR32(y1k,15),32767));
    472       y2[k] = EXTRACT16(SATURATE(PSHR32(y2k,15),32767));
    473    }
    474 }
    475 
    476 /* Re-synthesised a signal from the QMF low-band and high-band signals */
    477 void qmf_synth(const spx_word16_t *x1, const spx_word16_t *x2, const spx_word16_t *a, spx_word16_t *y, int N, int M, spx_word16_t *mem1, spx_word16_t *mem2, char *stack)
    478    /* assumptions:
    479       all odd x[i] are zero -- well, actually they are left out of the array now
    480       N and M are multiples of 4 */
    481 {
    482    int i, j;
    483    int M2, N2;
    484    VARDECL(spx_word16_t *xx1);
    485    VARDECL(spx_word16_t *xx2);
    486 
    487    M2 = M>>1;
    488    N2 = N>>1;
    489    ALLOC(xx1, M2+N2, spx_word16_t);
    490    ALLOC(xx2, M2+N2, spx_word16_t);
    491 
    492    for (i = 0; i < N2; i++)
    493       xx1[i] = x1[N2-1-i];
    494    for (i = 0; i < M2; i++)
    495       xx1[N2+i] = mem1[2*i+1];
    496    for (i = 0; i < N2; i++)
    497       xx2[i] = x2[N2-1-i];
    498    for (i = 0; i < M2; i++)
    499       xx2[N2+i] = mem2[2*i+1];
    500 
    501    for (i = 0; i < N2; i += 2) {
    502       spx_sig_t y0, y1, y2, y3;
    503       spx_word16_t x10, x20;
    504 
    505       y0 = y1 = y2 = y3 = 0;
    506       x10 = xx1[N2-2-i];
    507       x20 = xx2[N2-2-i];
    508 
    509       for (j = 0; j < M2; j += 2) {
    510          spx_word16_t x11, x21;
    511          spx_word16_t a0, a1;
    512 
    513          a0 = a[2*j];
    514          a1 = a[2*j+1];
    515          x11 = xx1[N2-1+j-i];
    516          x21 = xx2[N2-1+j-i];
    517 
    518 #ifdef FIXED_POINT
    519          /* We multiply twice by the same coef to avoid overflows */
    520          y0 = MAC16_16(MAC16_16(y0, a0, x11), NEG16(a0), x21);
    521          y1 = MAC16_16(MAC16_16(y1, a1, x11), a1, x21);
    522          y2 = MAC16_16(MAC16_16(y2, a0, x10), NEG16(a0), x20);
    523          y3 = MAC16_16(MAC16_16(y3, a1, x10), a1, x20);
    524 #else
    525          y0 = ADD32(y0,MULT16_16(a0, x11-x21));
    526          y1 = ADD32(y1,MULT16_16(a1, x11+x21));
    527          y2 = ADD32(y2,MULT16_16(a0, x10-x20));
    528          y3 = ADD32(y3,MULT16_16(a1, x10+x20));
    529 #endif
    530          a0 = a[2*j+2];
    531          a1 = a[2*j+3];
    532          x10 = xx1[N2+j-i];
    533          x20 = xx2[N2+j-i];
    534 
    535 #ifdef FIXED_POINT
    536          /* We multiply twice by the same coef to avoid overflows */
    537          y0 = MAC16_16(MAC16_16(y0, a0, x10), NEG16(a0), x20);
    538          y1 = MAC16_16(MAC16_16(y1, a1, x10), a1, x20);
    539          y2 = MAC16_16(MAC16_16(y2, a0, x11), NEG16(a0), x21);
    540          y3 = MAC16_16(MAC16_16(y3, a1, x11), a1, x21);
    541 #else
    542          y0 = ADD32(y0,MULT16_16(a0, x10-x20));
    543          y1 = ADD32(y1,MULT16_16(a1, x10+x20));
    544          y2 = ADD32(y2,MULT16_16(a0, x11-x21));
    545          y3 = ADD32(y3,MULT16_16(a1, x11+x21));
    546 #endif
    547       }
    548 #ifdef FIXED_POINT
    549       y[2*i] = EXTRACT16(SATURATE32(PSHR32(y0,15),32767));
    550       y[2*i+1] = EXTRACT16(SATURATE32(PSHR32(y1,15),32767));
    551       y[2*i+2] = EXTRACT16(SATURATE32(PSHR32(y2,15),32767));
    552       y[2*i+3] = EXTRACT16(SATURATE32(PSHR32(y3,15),32767));
    553 #else
    554       /* Normalize up explicitly if we're in float */
    555       y[2*i] = 2.f*y0;
    556       y[2*i+1] = 2.f*y1;
    557       y[2*i+2] = 2.f*y2;
    558       y[2*i+3] = 2.f*y3;
    559 #endif
    560    }
    561 
    562    for (i = 0; i < M2; i++)
    563       mem1[2*i+1] = xx1[i];
    564    for (i = 0; i < M2; i++)
    565       mem2[2*i+1] = xx2[i];
    566 }
    567 
    568 #ifdef FIXED_POINT
    569 #if 0
    570 const spx_word16_t shift_filt[3][7] = {{-33,    1043,   -4551,   19959,   19959,   -4551,    1043},
    571                                  {-98,    1133,   -4425,   29179,    8895,   -2328,     444},
    572                                  {444,   -2328,    8895,   29179,   -4425,    1133,     -98}};
    573 #else
    574 const spx_word16_t shift_filt[3][7] = {{-390,    1540,   -4993,   20123,   20123,   -4993,    1540},
    575                                 {-1064,    2817,   -6694,   31589,    6837,    -990,    -209},
    576                                  {-209,    -990,    6837,   31589,   -6694,    2817,   -1064}};
    577 #endif
    578 #else
    579 #if 0
    580 const float shift_filt[3][7] = {{-9.9369e-04, 3.1831e-02, -1.3889e-01, 6.0910e-01, 6.0910e-01, -1.3889e-01, 3.1831e-02},
    581                           {-0.0029937, 0.0345613, -0.1350474, 0.8904793, 0.2714479, -0.0710304, 0.0135403},
    582                           {0.0135403, -0.0710304, 0.2714479, 0.8904793, -0.1350474, 0.0345613,  -0.0029937}};
    583 #else
    584 const float shift_filt[3][7] = {{-0.011915f, 0.046995f, -0.152373f, 0.614108f, 0.614108f, -0.152373f, 0.046995f},
    585                           {-0.0324855f, 0.0859768f, -0.2042986f, 0.9640297f, 0.2086420f, -0.0302054f, -0.0063646f},
    586                           {-0.0063646f, -0.0302054f, 0.2086420f, 0.9640297f, -0.2042986f, 0.0859768f, -0.0324855f}};
    587 #endif
    588 #endif
    589 
    590 int interp_pitch(
    591 spx_word16_t *exc,          /*decoded excitation*/
    592 spx_word16_t *interp,          /*decoded excitation*/
    593 int pitch,               /*pitch period*/
    594 int len
    595 )
    596 {
    597    int i,j,k;
    598    spx_word32_t corr[4][7];
    599    spx_word32_t maxcorr;
    600    int maxi, maxj;
    601    for (i=0;i<7;i++)
    602    {
    603       corr[0][i] = inner_prod(exc, exc-pitch-3+i, len);
    604    }
    605    for (i=0;i<3;i++)
    606    {
    607       for (j=0;j<7;j++)
    608       {
    609          int i1, i2;
    610          spx_word32_t tmp=0;
    611          i1 = 3-j;
    612          if (i1<0)
    613             i1 = 0;
    614          i2 = 10-j;
    615          if (i2>7)
    616             i2 = 7;
    617          for (k=i1;k<i2;k++)
    618             tmp += MULT16_32_Q15(shift_filt[i][k],corr[0][j+k-3]);
    619          corr[i+1][j] = tmp;
    620       }
    621    }
    622    maxi=maxj=0;
    623    maxcorr = corr[0][0];
    624    for (i=0;i<4;i++)
    625    {
    626       for (j=0;j<7;j++)
    627       {
    628          if (corr[i][j] > maxcorr)
    629          {
    630             maxcorr = corr[i][j];
    631             maxi=i;
    632             maxj=j;
    633          }
    634       }
    635    }
    636    for (i=0;i<len;i++)
    637    {
    638       spx_word32_t tmp = 0;
    639       if (maxi>0)
    640       {
    641          for (k=0;k<7;k++)
    642          {
    643             tmp += MULT16_16(exc[i-(pitch-maxj+3)+k-3],shift_filt[maxi-1][k]);
    644          }
    645       } else {
    646          tmp = SHL32(exc[i-(pitch-maxj+3)],15);
    647       }
    648       interp[i] = PSHR32(tmp,15);
    649    }
    650    return pitch-maxj+3;
    651 }
    652 
    653 void multicomb(
    654 spx_word16_t *exc,          /*decoded excitation*/
    655 spx_word16_t *new_exc,      /*enhanced excitation*/
    656 spx_coef_t *ak,           /*LPC filter coefs*/
    657 int p,               /*LPC order*/
    658 int nsf,             /*sub-frame size*/
    659 int pitch,           /*pitch period*/
    660 int max_pitch,
    661 spx_word16_t  comb_gain,    /*gain of comb filter*/
    662 char *stack
    663 )
    664 {
    665    int i;
    666    VARDECL(spx_word16_t *iexc);
    667    spx_word16_t old_ener, new_ener;
    668    int corr_pitch;
    669 
    670    spx_word16_t iexc0_mag, iexc1_mag, exc_mag;
    671    spx_word32_t corr0, corr1;
    672    spx_word16_t gain0, gain1;
    673    spx_word16_t pgain1, pgain2;
    674    spx_word16_t c1, c2;
    675    spx_word16_t g1, g2;
    676    spx_word16_t ngain;
    677    spx_word16_t gg1, gg2;
    678 #ifdef FIXED_POINT
    679    int scaledown=0;
    680 #endif
    681 #if 0 /* Set to 1 to enable full pitch search */
    682    int nol_pitch[6];
    683    spx_word16_t nol_pitch_coef[6];
    684    spx_word16_t ol_pitch_coef;
    685    open_loop_nbest_pitch(exc, 20, 120, nsf,
    686                          nol_pitch, nol_pitch_coef, 6, stack);
    687    corr_pitch=nol_pitch[0];
    688    ol_pitch_coef = nol_pitch_coef[0];
    689    /*Try to remove pitch multiples*/
    690    for (i=1;i<6;i++)
    691    {
    692 #ifdef FIXED_POINT
    693       if ((nol_pitch_coef[i]>MULT16_16_Q15(nol_pitch_coef[0],19661)) &&
    694 #else
    695       if ((nol_pitch_coef[i]>.6*nol_pitch_coef[0]) &&
    696 #endif
    697          (ABS(2*nol_pitch[i]-corr_pitch)<=2 || ABS(3*nol_pitch[i]-corr_pitch)<=3 ||
    698          ABS(4*nol_pitch[i]-corr_pitch)<=4 || ABS(5*nol_pitch[i]-corr_pitch)<=5))
    699       {
    700          corr_pitch = nol_pitch[i];
    701       }
    702    }
    703 #else
    704    corr_pitch = pitch;
    705 #endif
    706 
    707    ALLOC(iexc, 2*nsf, spx_word16_t);
    708 
    709    interp_pitch(exc, iexc, corr_pitch, 80);
    710    if (corr_pitch>max_pitch)
    711       interp_pitch(exc, iexc+nsf, 2*corr_pitch, 80);
    712    else
    713       interp_pitch(exc, iexc+nsf, -corr_pitch, 80);
    714 
    715 #ifdef FIXED_POINT
    716    for (i=0;i<nsf;i++)
    717    {
    718       if (ABS16(exc[i])>16383)
    719       {
    720          scaledown = 1;
    721          break;
    722       }
    723    }
    724    if (scaledown)
    725    {
    726       for (i=0;i<nsf;i++)
    727          exc[i] = SHR16(exc[i],1);
    728       for (i=0;i<2*nsf;i++)
    729          iexc[i] = SHR16(iexc[i],1);
    730    }
    731 #endif
    732    /*interp_pitch(exc, iexc+2*nsf, 2*corr_pitch, 80);*/
    733 
    734    /*printf ("%d %d %f\n", pitch, corr_pitch, max_corr*ener_1);*/
    735    iexc0_mag = spx_sqrt(1000+inner_prod(iexc,iexc,nsf));
    736    iexc1_mag = spx_sqrt(1000+inner_prod(iexc+nsf,iexc+nsf,nsf));
    737    exc_mag = spx_sqrt(1+inner_prod(exc,exc,nsf));
    738    corr0  = inner_prod(iexc,exc,nsf);
    739    if (corr0<0)
    740       corr0=0;
    741    corr1 = inner_prod(iexc+nsf,exc,nsf);
    742    if (corr1<0)
    743       corr1=0;
    744 #ifdef FIXED_POINT
    745    /* Doesn't cost much to limit the ratio and it makes the rest easier */
    746    if (SHL32(EXTEND32(iexc0_mag),6) < EXTEND32(exc_mag))
    747       iexc0_mag = ADD16(1,PSHR16(exc_mag,6));
    748    if (SHL32(EXTEND32(iexc1_mag),6) < EXTEND32(exc_mag))
    749       iexc1_mag = ADD16(1,PSHR16(exc_mag,6));
    750 #endif
    751    if (corr0 > MULT16_16(iexc0_mag,exc_mag))
    752       pgain1 = QCONST16(1., 14);
    753    else
    754       pgain1 = PDIV32_16(SHL32(PDIV32(corr0, exc_mag),14),iexc0_mag);
    755    if (corr1 > MULT16_16(iexc1_mag,exc_mag))
    756       pgain2 = QCONST16(1., 14);
    757    else
    758       pgain2 = PDIV32_16(SHL32(PDIV32(corr1, exc_mag),14),iexc1_mag);
    759    gg1 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc0_mag);
    760    gg2 = PDIV32_16(SHL32(EXTEND32(exc_mag),8), iexc1_mag);
    761    if (comb_gain>0)
    762    {
    763 #ifdef FIXED_POINT
    764       c1 = (MULT16_16_Q15(QCONST16(.4,15),comb_gain)+QCONST16(.07,15));
    765       c2 = QCONST16(.5,15)+MULT16_16_Q14(QCONST16(1.72,14),(c1-QCONST16(.07,15)));
    766 #else
    767       c1 = .4*comb_gain+.07;
    768       c2 = .5+1.72*(c1-.07);
    769 #endif
    770    } else
    771    {
    772       c1=c2=0;
    773    }
    774 #ifdef FIXED_POINT
    775    g1 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain1),pgain1);
    776    g2 = 32767 - MULT16_16_Q13(MULT16_16_Q15(c2, pgain2),pgain2);
    777 #else
    778    g1 = 1-c2*pgain1*pgain1;
    779    g2 = 1-c2*pgain2*pgain2;
    780 #endif
    781    if (g1<c1)
    782       g1 = c1;
    783    if (g2<c1)
    784       g2 = c1;
    785    g1 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g1);
    786    g2 = (spx_word16_t)PDIV32_16(SHL32(EXTEND32(c1),14),(spx_word16_t)g2);
    787    if (corr_pitch>max_pitch)
    788    {
    789       gain0 = MULT16_16_Q15(QCONST16(.7,15),MULT16_16_Q14(g1,gg1));
    790       gain1 = MULT16_16_Q15(QCONST16(.3,15),MULT16_16_Q14(g2,gg2));
    791    } else {
    792       gain0 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g1,gg1));
    793       gain1 = MULT16_16_Q15(QCONST16(.6,15),MULT16_16_Q14(g2,gg2));
    794    }
    795    for (i=0;i<nsf;i++)
    796       new_exc[i] = ADD16(exc[i], EXTRACT16(PSHR32(ADD32(MULT16_16(gain0,iexc[i]), MULT16_16(gain1,iexc[i+nsf])),8)));
    797    /* FIXME: compute_rms16 is currently not quite accurate enough (but close) */
    798    new_ener = compute_rms16(new_exc, nsf);
    799    old_ener = compute_rms16(exc, nsf);
    800 
    801    if (old_ener < 1)
    802       old_ener = 1;
    803    if (new_ener < 1)
    804       new_ener = 1;
    805    if (old_ener > new_ener)
    806       old_ener = new_ener;
    807    ngain = PDIV32_16(SHL32(EXTEND32(old_ener),14),new_ener);
    808 
    809    for (i=0;i<nsf;i++)
    810       new_exc[i] = MULT16_16_Q14(ngain, new_exc[i]);
    811 #ifdef FIXED_POINT
    812    if (scaledown)
    813    {
    814       for (i=0;i<nsf;i++)
    815          exc[i] = SHL16(exc[i],1);
    816       for (i=0;i<nsf;i++)
    817          new_exc[i] = SHL16(SATURATE16(new_exc[i],16383),1);
    818    }
    819 #endif
    820 }
    821 
    822