Home | History | Annotate | Download | only in source
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 /*
     12  * lattice.c
     13  *
     14  * Contains the normalized lattice filter routines (MA and AR) for iSAC codec
     15  *
     16  */
     17 
     18 #include "codec.h"
     19 #include "settings.h"
     20 
     21 #define LATTICE_MUL_32_32_RSFT16(a32a, a32b, b32)                  \
     22   ((int32_t)(WEBRTC_SPL_MUL(a32a, b32) + (WEBRTC_SPL_MUL_16_32_RSFT16(a32b, b32))))
     23 /* This macro is FORBIDDEN to use elsewhere than in a function in this file and
     24    its corresponding neon version. It might give unpredictable results, since a
     25    general int32_t*int32_t multiplication results in a 64 bit value.
     26    The result is then shifted just 16 steps to the right, giving need for 48
     27    bits, i.e. in the generel case, it will NOT fit in a int32_t. In the
     28    cases used in here, the int32_t will be enough, since (for a good
     29    reason) the involved multiplicands aren't big enough to overflow a
     30    int32_t after shifting right 16 bits. I have compared the result of a
     31    multiplication between t32 and tmp32, done in two ways:
     32    1) Using (int32_t) (((float)(tmp32))*((float)(tmp32b))/65536.0);
     33    2) Using LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
     34    By running 25 files, I haven't found any bigger diff than 64 - this was in the
     35    case when  method 1) gave 650235648 and 2) gave 650235712.
     36 */
     37 
     38 /* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter
     39    with coefficients cth_Q15[] and sth_Q15[].
     40    Implemented for both generic and ARMv7 platforms.
     41  */
     42 void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0,
     43                                 int16_t* ar_f_Q0,
     44                                 int16_t* cth_Q15,
     45                                 int16_t* sth_Q15,
     46                                 int16_t order_coef);
     47 
     48 /* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa(). It does:
     49    for 0 <= n < HALF_SUBFRAMELEN - 1:
     50      *ptr2 = input2 * (*ptr2) + input0 * (*ptr0));
     51      *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
     52    Note, function WebRtcIsacfix_FilterMaLoopNeon and WebRtcIsacfix_FilterMaLoopC
     53    are not bit-exact. The accuracy by the ARM Neon function is same or better.
     54 */
     55 void WebRtcIsacfix_FilterMaLoopC(int16_t input0,  // Filter coefficient
     56                                  int16_t input1,  // Filter coefficient
     57                                  int32_t input2,  // Inverse coeff. (1/input1)
     58                                  int32_t* ptr0,   // Sample buffer
     59                                  int32_t* ptr1,   // Sample buffer
     60                                  int32_t* ptr2) { // Sample buffer
     61   int n = 0;
     62 
     63   // Separate the 32-bit variable input2 into two 16-bit integers (high 16 and
     64   // low 16 bits), for using LATTICE_MUL_32_32_RSFT16 in the loop.
     65   int16_t t16a = (int16_t)(input2 >> 16);
     66   int16_t t16b = (int16_t)input2;
     67   if (t16b < 0) t16a++;
     68 
     69   // The loop filtering the samples *ptr0, *ptr1, *ptr2 with filter coefficients
     70   // input0, input1, and input2.
     71   for(n = 0; n < HALF_SUBFRAMELEN - 1; n++, ptr0++, ptr1++, ptr2++) {
     72     int32_t tmp32a = 0;
     73     int32_t tmp32b = 0;
     74 
     75     // Calculate *ptr2 = input2 * (*ptr2 + input0 * (*ptr0));
     76     tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr0); // Q15 * Q15 >> 15 = Q15
     77     tmp32b = *ptr2 + tmp32a; // Q15 + Q15 = Q15
     78     *ptr2 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
     79 
     80     // Calculate *ptr1 = input1 * (*ptr0) + input0 * (*ptr2);
     81     tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input1, *ptr0); // Q15*Q15>>15 = Q15
     82     tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr2); // Q15*Q15>>15 = Q15
     83     *ptr1 = tmp32a + tmp32b; // Q15 + Q15 = Q15
     84   }
     85 }
     86 
     87 /* filter the signal using normalized lattice filter */
     88 /* MA filter */
     89 void WebRtcIsacfix_NormLatticeFilterMa(int16_t orderCoef,
     90                                        int32_t *stateGQ15,
     91                                        int16_t *lat_inQ0,
     92                                        int16_t *filt_coefQ15,
     93                                        int32_t *gain_lo_hiQ17,
     94                                        int16_t lo_hi,
     95                                        int16_t *lat_outQ9)
     96 {
     97   int16_t sthQ15[MAX_AR_MODEL_ORDER];
     98   int16_t cthQ15[MAX_AR_MODEL_ORDER];
     99 
    100   int u, i, k, n;
    101   int16_t temp2,temp3;
    102   int16_t ord_1 = orderCoef+1;
    103   int32_t inv_cthQ16[MAX_AR_MODEL_ORDER];
    104 
    105   int32_t gain32, fQtmp;
    106   int16_t gain16;
    107   int16_t gain_sh;
    108 
    109   int32_t tmp32, tmp32b;
    110   int32_t fQ15vec[HALF_SUBFRAMELEN];
    111   int32_t gQ15[MAX_AR_MODEL_ORDER+1][HALF_SUBFRAMELEN];
    112   int16_t sh;
    113   int16_t t16a;
    114   int16_t t16b;
    115 
    116   for (u=0;u<SUBFRAMES;u++)
    117   {
    118     int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN);
    119 
    120     /* set the Direct Form coefficients */
    121     temp2 = (int16_t)WEBRTC_SPL_MUL_16_16(u, orderCoef);
    122     temp3 = (int16_t)WEBRTC_SPL_MUL_16_16(2, u)+lo_hi;
    123 
    124     /* compute lattice filter coefficients */
    125     memcpy(sthQ15, &filt_coefQ15[temp2], orderCoef * sizeof(int16_t));
    126 
    127     WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
    128 
    129     /* compute the gain */
    130     gain32 = gain_lo_hiQ17[temp3];
    131     gain_sh = WebRtcSpl_NormW32(gain32);
    132     gain32 = WEBRTC_SPL_LSHIFT_W32(gain32, gain_sh); //Q(17+gain_sh)
    133 
    134     for (k=0;k<orderCoef;k++)
    135     {
    136       gain32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], gain32); //Q15*Q(17+gain_sh)>>15 = Q(17+gain_sh)
    137       inv_cthQ16[k] = WebRtcSpl_DivW32W16((int32_t)2147483647, cthQ15[k]); // 1/cth[k] in Q31/Q15 = Q16
    138     }
    139     gain16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(gain32, 16); //Q(1+gain_sh)
    140 
    141     /* normalized lattice filter */
    142     /*****************************/
    143 
    144     /* initial conditions */
    145     for (i=0;i<HALF_SUBFRAMELEN;i++)
    146     {
    147       fQ15vec[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)lat_inQ0[i + temp1], 15); //Q15
    148       gQ15[0][i] = WEBRTC_SPL_LSHIFT_W32((int32_t)lat_inQ0[i + temp1], 15); //Q15
    149     }
    150 
    151 
    152     fQtmp = fQ15vec[0];
    153 
    154     /* get the state of f&g for the first input, for all orders */
    155     for (i=1;i<ord_1;i++)
    156     {
    157       // Calculate f[i][0] = inv_cth[i-1]*(f[i-1][0] + sth[i-1]*stateG[i-1]);
    158       tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], stateGQ15[i-1]);//Q15*Q15>>15 = Q15
    159       tmp32b= fQtmp + tmp32; //Q15+Q15=Q15
    160       tmp32 = inv_cthQ16[i-1]; //Q16
    161       t16a = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16);
    162       t16b = (int16_t) (tmp32-WEBRTC_SPL_LSHIFT_W32(((int32_t)t16a), 16));
    163       if (t16b<0) t16a++;
    164       tmp32 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b);
    165       fQtmp = tmp32; // Q15
    166 
    167       // Calculate g[i][0] = cth[i-1]*stateG[i-1] + sth[i-1]* f[i][0];
    168       tmp32  = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[i-1], stateGQ15[i-1]); //Q15*Q15>>15 = Q15
    169       tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], fQtmp); //Q15*Q15>>15 = Q15
    170       tmp32  = tmp32 + tmp32b;//Q15+Q15 = Q15
    171       gQ15[i][0] = tmp32; // Q15
    172     }
    173 
    174     /* filtering */
    175     /* save the states */
    176     for(k=0;k<orderCoef;k++)
    177     {
    178       // for 0 <= n < HALF_SUBFRAMELEN - 1:
    179       //   f[k+1][n+1] = inv_cth[k]*(f[k][n+1] + sth[k]*g[k][n]);
    180       //   g[k+1][n+1] = cth[k]*g[k][n] + sth[k]* f[k+1][n+1];
    181       WebRtcIsacfix_FilterMaLoopFix(sthQ15[k], cthQ15[k], inv_cthQ16[k],
    182                                     &gQ15[k][0], &gQ15[k+1][1], &fQ15vec[1]);
    183     }
    184 
    185     fQ15vec[0] = fQtmp;
    186 
    187     for(n=0;n<HALF_SUBFRAMELEN;n++)
    188     {
    189       //gain32 = WEBRTC_SPL_RSHIFT_W32(gain32, gain_sh); // Q(17+gain_sh) -> Q17
    190       tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh)
    191       sh = 9-gain_sh; //number of needed shifts to reach Q9
    192       t16a = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh);
    193       lat_outQ9[n + temp1] = t16a;
    194     }
    195 
    196     /* save the states */
    197     for (i=0;i<ord_1;i++)
    198     {
    199       stateGQ15[i] = gQ15[i][HALF_SUBFRAMELEN-1];
    200     }
    201     //process next frame
    202   }
    203 
    204   return;
    205 }
    206 
    207 
    208 
    209 
    210 
    211 /* ----------------AR filter-------------------------*/
    212 /* filter the signal using normalized lattice filter */
    213 void WebRtcIsacfix_NormLatticeFilterAr(int16_t orderCoef,
    214                                        int16_t *stateGQ0,
    215                                        int32_t *lat_inQ25,
    216                                        int16_t *filt_coefQ15,
    217                                        int32_t *gain_lo_hiQ17,
    218                                        int16_t lo_hi,
    219                                        int16_t *lat_outQ0)
    220 {
    221   int ii,n,k,i,u;
    222   int16_t sthQ15[MAX_AR_MODEL_ORDER];
    223   int16_t cthQ15[MAX_AR_MODEL_ORDER];
    224   int32_t tmp32;
    225 
    226 
    227   int16_t tmpAR;
    228   int16_t ARfQ0vec[HALF_SUBFRAMELEN];
    229   int16_t ARgQ0vec[MAX_AR_MODEL_ORDER+1];
    230 
    231   int32_t inv_gain32;
    232   int16_t inv_gain16;
    233   int16_t den16;
    234   int16_t sh;
    235 
    236   int16_t temp2,temp3;
    237   int16_t ord_1 = orderCoef+1;
    238 
    239   for (u=0;u<SUBFRAMES;u++)
    240   {
    241     int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN);
    242 
    243     //set the denominator and numerator of the Direct Form
    244     temp2 = (int16_t)WEBRTC_SPL_MUL_16_16(u, orderCoef);
    245     temp3 = (int16_t)WEBRTC_SPL_MUL_16_16(2, u) + lo_hi;
    246 
    247     for (ii=0; ii<orderCoef; ii++) {
    248       sthQ15[ii] = filt_coefQ15[temp2+ii];
    249     }
    250 
    251     WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15);
    252 
    253     /* Simulation of the 25 files shows that maximum value in
    254        the vector gain_lo_hiQ17[] is 441344, which means that
    255        it is log2((2^31)/441344) = 12.2 shifting bits from
    256        saturation. Therefore, it should be safe to use Q27 instead
    257        of Q17. */
    258 
    259     tmp32 = WEBRTC_SPL_LSHIFT_W32(gain_lo_hiQ17[temp3], 10); // Q27
    260 
    261     for (k=0;k<orderCoef;k++) {
    262       tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], tmp32); // Q15*Q27>>15 = Q27
    263     }
    264 
    265     sh = WebRtcSpl_NormW32(tmp32); // tmp32 is the gain
    266     den16 = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh-16); //Q(27+sh-16) = Q(sh+11) (all 16 bits are value bits)
    267     inv_gain32 = WebRtcSpl_DivW32W16((int32_t)2147483647, den16); // 1/gain in Q31/Q(sh+11) = Q(20-sh)
    268 
    269     //initial conditions
    270     inv_gain16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(inv_gain32, 2); // 1/gain in Q(20-sh-2) = Q(18-sh)
    271 
    272     for (i=0;i<HALF_SUBFRAMELEN;i++)
    273     {
    274 
    275       tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + temp1], 1); //Q25->Q26
    276       tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh)
    277       tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0
    278 
    279       ARfQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
    280     }
    281 
    282     for (i=orderCoef-1;i>=0;i--) //get the state of f&g for the first input, for all orders
    283     {
    284       tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cthQ15[i],ARfQ0vec[0])) - (WEBRTC_SPL_MUL_16_16(sthQ15[i],stateGQ0[i])) + 16384), 15);
    285       tmpAR = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
    286 
    287       tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sthQ15[i],ARfQ0vec[0])) + (WEBRTC_SPL_MUL_16_16(cthQ15[i], stateGQ0[i])) + 16384), 15);
    288       ARgQ0vec[i+1] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0
    289       ARfQ0vec[0] = tmpAR;
    290     }
    291     ARgQ0vec[0] = ARfQ0vec[0];
    292 
    293     // Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[].
    294     WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef);
    295 
    296     for(n=0;n<HALF_SUBFRAMELEN;n++)
    297     {
    298       lat_outQ0[n + temp1] = ARfQ0vec[n];
    299     }
    300 
    301 
    302     /* cannot use memcpy in the following */
    303 
    304     for (i=0;i<ord_1;i++)
    305     {
    306       stateGQ0[i] = ARgQ0vec[i];
    307     }
    308   }
    309 
    310   return;
    311 }
    312