1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * lattice.c 13 * 14 * Contains the normalized lattice filter routines (MA and AR) for iSAC codec 15 * 16 */ 17 18 #include "codec.h" 19 #include "settings.h" 20 21 #define LATTICE_MUL_32_32_RSFT16(a32a, a32b, b32) \ 22 ((int32_t)(WEBRTC_SPL_MUL(a32a, b32) + (WEBRTC_SPL_MUL_16_32_RSFT16(a32b, b32)))) 23 /* This macro is FORBIDDEN to use elsewhere than in a function in this file and 24 its corresponding neon version. It might give unpredictable results, since a 25 general int32_t*int32_t multiplication results in a 64 bit value. 26 The result is then shifted just 16 steps to the right, giving need for 48 27 bits, i.e. in the generel case, it will NOT fit in a int32_t. In the 28 cases used in here, the int32_t will be enough, since (for a good 29 reason) the involved multiplicands aren't big enough to overflow a 30 int32_t after shifting right 16 bits. I have compared the result of a 31 multiplication between t32 and tmp32, done in two ways: 32 1) Using (int32_t) (((float)(tmp32))*((float)(tmp32b))/65536.0); 33 2) Using LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b); 34 By running 25 files, I haven't found any bigger diff than 64 - this was in the 35 case when method 1) gave 650235648 and 2) gave 650235712. 36 */ 37 38 /* Function prototype: filtering ar_g_Q0[] and ar_f_Q0[] through an AR filter 39 with coefficients cth_Q15[] and sth_Q15[]. 40 Implemented for both generic and ARMv7 platforms. 41 */ 42 void WebRtcIsacfix_FilterArLoop(int16_t* ar_g_Q0, 43 int16_t* ar_f_Q0, 44 int16_t* cth_Q15, 45 int16_t* sth_Q15, 46 int16_t order_coef); 47 48 /* Inner loop used for function WebRtcIsacfix_NormLatticeFilterMa(). It does: 49 for 0 <= n < HALF_SUBFRAMELEN - 1: 50 *ptr2 = input2 * (*ptr2) + input0 * (*ptr0)); 51 *ptr1 = input1 * (*ptr0) + input0 * (*ptr2); 52 Note, function WebRtcIsacfix_FilterMaLoopNeon and WebRtcIsacfix_FilterMaLoopC 53 are not bit-exact. The accuracy by the ARM Neon function is same or better. 54 */ 55 void WebRtcIsacfix_FilterMaLoopC(int16_t input0, // Filter coefficient 56 int16_t input1, // Filter coefficient 57 int32_t input2, // Inverse coeff. (1/input1) 58 int32_t* ptr0, // Sample buffer 59 int32_t* ptr1, // Sample buffer 60 int32_t* ptr2) { // Sample buffer 61 int n = 0; 62 63 // Separate the 32-bit variable input2 into two 16-bit integers (high 16 and 64 // low 16 bits), for using LATTICE_MUL_32_32_RSFT16 in the loop. 65 int16_t t16a = (int16_t)(input2 >> 16); 66 int16_t t16b = (int16_t)input2; 67 if (t16b < 0) t16a++; 68 69 // The loop filtering the samples *ptr0, *ptr1, *ptr2 with filter coefficients 70 // input0, input1, and input2. 71 for(n = 0; n < HALF_SUBFRAMELEN - 1; n++, ptr0++, ptr1++, ptr2++) { 72 int32_t tmp32a = 0; 73 int32_t tmp32b = 0; 74 75 // Calculate *ptr2 = input2 * (*ptr2 + input0 * (*ptr0)); 76 tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr0); // Q15 * Q15 >> 15 = Q15 77 tmp32b = *ptr2 + tmp32a; // Q15 + Q15 = Q15 78 *ptr2 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b); 79 80 // Calculate *ptr1 = input1 * (*ptr0) + input0 * (*ptr2); 81 tmp32a = WEBRTC_SPL_MUL_16_32_RSFT15(input1, *ptr0); // Q15*Q15>>15 = Q15 82 tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(input0, *ptr2); // Q15*Q15>>15 = Q15 83 *ptr1 = tmp32a + tmp32b; // Q15 + Q15 = Q15 84 } 85 } 86 87 /* filter the signal using normalized lattice filter */ 88 /* MA filter */ 89 void WebRtcIsacfix_NormLatticeFilterMa(int16_t orderCoef, 90 int32_t *stateGQ15, 91 int16_t *lat_inQ0, 92 int16_t *filt_coefQ15, 93 int32_t *gain_lo_hiQ17, 94 int16_t lo_hi, 95 int16_t *lat_outQ9) 96 { 97 int16_t sthQ15[MAX_AR_MODEL_ORDER]; 98 int16_t cthQ15[MAX_AR_MODEL_ORDER]; 99 100 int u, i, k, n; 101 int16_t temp2,temp3; 102 int16_t ord_1 = orderCoef+1; 103 int32_t inv_cthQ16[MAX_AR_MODEL_ORDER]; 104 105 int32_t gain32, fQtmp; 106 int16_t gain16; 107 int16_t gain_sh; 108 109 int32_t tmp32, tmp32b; 110 int32_t fQ15vec[HALF_SUBFRAMELEN]; 111 int32_t gQ15[MAX_AR_MODEL_ORDER+1][HALF_SUBFRAMELEN]; 112 int16_t sh; 113 int16_t t16a; 114 int16_t t16b; 115 116 for (u=0;u<SUBFRAMES;u++) 117 { 118 int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN); 119 120 /* set the Direct Form coefficients */ 121 temp2 = (int16_t)WEBRTC_SPL_MUL_16_16(u, orderCoef); 122 temp3 = (int16_t)WEBRTC_SPL_MUL_16_16(2, u)+lo_hi; 123 124 /* compute lattice filter coefficients */ 125 memcpy(sthQ15, &filt_coefQ15[temp2], orderCoef * sizeof(int16_t)); 126 127 WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15); 128 129 /* compute the gain */ 130 gain32 = gain_lo_hiQ17[temp3]; 131 gain_sh = WebRtcSpl_NormW32(gain32); 132 gain32 = WEBRTC_SPL_LSHIFT_W32(gain32, gain_sh); //Q(17+gain_sh) 133 134 for (k=0;k<orderCoef;k++) 135 { 136 gain32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], gain32); //Q15*Q(17+gain_sh)>>15 = Q(17+gain_sh) 137 inv_cthQ16[k] = WebRtcSpl_DivW32W16((int32_t)2147483647, cthQ15[k]); // 1/cth[k] in Q31/Q15 = Q16 138 } 139 gain16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(gain32, 16); //Q(1+gain_sh) 140 141 /* normalized lattice filter */ 142 /*****************************/ 143 144 /* initial conditions */ 145 for (i=0;i<HALF_SUBFRAMELEN;i++) 146 { 147 fQ15vec[i] = WEBRTC_SPL_LSHIFT_W32((int32_t)lat_inQ0[i + temp1], 15); //Q15 148 gQ15[0][i] = WEBRTC_SPL_LSHIFT_W32((int32_t)lat_inQ0[i + temp1], 15); //Q15 149 } 150 151 152 fQtmp = fQ15vec[0]; 153 154 /* get the state of f&g for the first input, for all orders */ 155 for (i=1;i<ord_1;i++) 156 { 157 // Calculate f[i][0] = inv_cth[i-1]*(f[i-1][0] + sth[i-1]*stateG[i-1]); 158 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], stateGQ15[i-1]);//Q15*Q15>>15 = Q15 159 tmp32b= fQtmp + tmp32; //Q15+Q15=Q15 160 tmp32 = inv_cthQ16[i-1]; //Q16 161 t16a = (int16_t) WEBRTC_SPL_RSHIFT_W32(tmp32, 16); 162 t16b = (int16_t) (tmp32-WEBRTC_SPL_LSHIFT_W32(((int32_t)t16a), 16)); 163 if (t16b<0) t16a++; 164 tmp32 = LATTICE_MUL_32_32_RSFT16(t16a, t16b, tmp32b); 165 fQtmp = tmp32; // Q15 166 167 // Calculate g[i][0] = cth[i-1]*stateG[i-1] + sth[i-1]* f[i][0]; 168 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[i-1], stateGQ15[i-1]); //Q15*Q15>>15 = Q15 169 tmp32b = WEBRTC_SPL_MUL_16_32_RSFT15(sthQ15[i-1], fQtmp); //Q15*Q15>>15 = Q15 170 tmp32 = tmp32 + tmp32b;//Q15+Q15 = Q15 171 gQ15[i][0] = tmp32; // Q15 172 } 173 174 /* filtering */ 175 /* save the states */ 176 for(k=0;k<orderCoef;k++) 177 { 178 // for 0 <= n < HALF_SUBFRAMELEN - 1: 179 // f[k+1][n+1] = inv_cth[k]*(f[k][n+1] + sth[k]*g[k][n]); 180 // g[k+1][n+1] = cth[k]*g[k][n] + sth[k]* f[k+1][n+1]; 181 WebRtcIsacfix_FilterMaLoopFix(sthQ15[k], cthQ15[k], inv_cthQ16[k], 182 &gQ15[k][0], &gQ15[k+1][1], &fQ15vec[1]); 183 } 184 185 fQ15vec[0] = fQtmp; 186 187 for(n=0;n<HALF_SUBFRAMELEN;n++) 188 { 189 //gain32 = WEBRTC_SPL_RSHIFT_W32(gain32, gain_sh); // Q(17+gain_sh) -> Q17 190 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(gain16, fQ15vec[n]); //Q(1+gain_sh)*Q15>>16 = Q(gain_sh) 191 sh = 9-gain_sh; //number of needed shifts to reach Q9 192 t16a = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh); 193 lat_outQ9[n + temp1] = t16a; 194 } 195 196 /* save the states */ 197 for (i=0;i<ord_1;i++) 198 { 199 stateGQ15[i] = gQ15[i][HALF_SUBFRAMELEN-1]; 200 } 201 //process next frame 202 } 203 204 return; 205 } 206 207 208 209 210 211 /* ----------------AR filter-------------------------*/ 212 /* filter the signal using normalized lattice filter */ 213 void WebRtcIsacfix_NormLatticeFilterAr(int16_t orderCoef, 214 int16_t *stateGQ0, 215 int32_t *lat_inQ25, 216 int16_t *filt_coefQ15, 217 int32_t *gain_lo_hiQ17, 218 int16_t lo_hi, 219 int16_t *lat_outQ0) 220 { 221 int ii,n,k,i,u; 222 int16_t sthQ15[MAX_AR_MODEL_ORDER]; 223 int16_t cthQ15[MAX_AR_MODEL_ORDER]; 224 int32_t tmp32; 225 226 227 int16_t tmpAR; 228 int16_t ARfQ0vec[HALF_SUBFRAMELEN]; 229 int16_t ARgQ0vec[MAX_AR_MODEL_ORDER+1]; 230 231 int32_t inv_gain32; 232 int16_t inv_gain16; 233 int16_t den16; 234 int16_t sh; 235 236 int16_t temp2,temp3; 237 int16_t ord_1 = orderCoef+1; 238 239 for (u=0;u<SUBFRAMES;u++) 240 { 241 int32_t temp1 = WEBRTC_SPL_MUL_16_16(u, HALF_SUBFRAMELEN); 242 243 //set the denominator and numerator of the Direct Form 244 temp2 = (int16_t)WEBRTC_SPL_MUL_16_16(u, orderCoef); 245 temp3 = (int16_t)WEBRTC_SPL_MUL_16_16(2, u) + lo_hi; 246 247 for (ii=0; ii<orderCoef; ii++) { 248 sthQ15[ii] = filt_coefQ15[temp2+ii]; 249 } 250 251 WebRtcSpl_SqrtOfOneMinusXSquared(sthQ15, orderCoef, cthQ15); 252 253 /* Simulation of the 25 files shows that maximum value in 254 the vector gain_lo_hiQ17[] is 441344, which means that 255 it is log2((2^31)/441344) = 12.2 shifting bits from 256 saturation. Therefore, it should be safe to use Q27 instead 257 of Q17. */ 258 259 tmp32 = WEBRTC_SPL_LSHIFT_W32(gain_lo_hiQ17[temp3], 10); // Q27 260 261 for (k=0;k<orderCoef;k++) { 262 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT15(cthQ15[k], tmp32); // Q15*Q27>>15 = Q27 263 } 264 265 sh = WebRtcSpl_NormW32(tmp32); // tmp32 is the gain 266 den16 = (int16_t) WEBRTC_SPL_SHIFT_W32(tmp32, sh-16); //Q(27+sh-16) = Q(sh+11) (all 16 bits are value bits) 267 inv_gain32 = WebRtcSpl_DivW32W16((int32_t)2147483647, den16); // 1/gain in Q31/Q(sh+11) = Q(20-sh) 268 269 //initial conditions 270 inv_gain16 = (int16_t) WEBRTC_SPL_RSHIFT_W32(inv_gain32, 2); // 1/gain in Q(20-sh-2) = Q(18-sh) 271 272 for (i=0;i<HALF_SUBFRAMELEN;i++) 273 { 274 275 tmp32 = WEBRTC_SPL_LSHIFT_W32(lat_inQ25[i + temp1], 1); //Q25->Q26 276 tmp32 = WEBRTC_SPL_MUL_16_32_RSFT16(inv_gain16, tmp32); //lat_in[]*inv_gain in (Q(18-sh)*Q26)>>16 = Q(28-sh) 277 tmp32 = WEBRTC_SPL_SHIFT_W32(tmp32, -(28-sh)); // lat_in[]*inv_gain in Q0 278 279 ARfQ0vec[i] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0 280 } 281 282 for (i=orderCoef-1;i>=0;i--) //get the state of f&g for the first input, for all orders 283 { 284 tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(cthQ15[i],ARfQ0vec[0])) - (WEBRTC_SPL_MUL_16_16(sthQ15[i],stateGQ0[i])) + 16384), 15); 285 tmpAR = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0 286 287 tmp32 = WEBRTC_SPL_RSHIFT_W32(((WEBRTC_SPL_MUL_16_16(sthQ15[i],ARfQ0vec[0])) + (WEBRTC_SPL_MUL_16_16(cthQ15[i], stateGQ0[i])) + 16384), 15); 288 ARgQ0vec[i+1] = (int16_t)WebRtcSpl_SatW32ToW16(tmp32); // Q0 289 ARfQ0vec[0] = tmpAR; 290 } 291 ARgQ0vec[0] = ARfQ0vec[0]; 292 293 // Filter ARgQ0vec[] and ARfQ0vec[] through coefficients cthQ15[] and sthQ15[]. 294 WebRtcIsacfix_FilterArLoop(ARgQ0vec, ARfQ0vec, cthQ15, sthQ15, orderCoef); 295 296 for(n=0;n<HALF_SUBFRAMELEN;n++) 297 { 298 lat_outQ0[n + temp1] = ARfQ0vec[n]; 299 } 300 301 302 /* cannot use memcpy in the following */ 303 304 for (i=0;i<ord_1;i++) 305 { 306 stateGQ0[i] = ARgQ0vec[i]; 307 } 308 } 309 310 return; 311 } 312