1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * The core AEC algorithm, which is presented with time-aligned signals. 13 */ 14 15 #include "webrtc/modules/audio_processing/aec/aec_core.h" 16 17 #include <math.h> 18 19 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 20 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h" 21 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 22 23 static const int flagHbandCn = 1; // flag for adding comfort noise in H band 24 extern const float WebRtcAec_weightCurve[65]; 25 extern const float WebRtcAec_overDriveCurve[65]; 26 27 void WebRtcAec_ComfortNoise_mips(AecCore* aec, 28 float efw[2][PART_LEN1], 29 complex_t* comfortNoiseHband, 30 const float* noisePow, 31 const float* lambda) { 32 int i, num; 33 float rand[PART_LEN]; 34 float noise, noiseAvg, tmp, tmpAvg; 35 int16_t randW16[PART_LEN]; 36 complex_t u[PART_LEN1]; 37 38 const float pi2 = 6.28318530717959f; 39 const float pi2t = pi2 / 32768; 40 41 // Generate a uniform random array on [0 1] 42 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); 43 44 int16_t* randWptr = randW16; 45 float randTemp, randTemp2, randTemp3, randTemp4; 46 int32_t tmp1s, tmp2s, tmp3s, tmp4s; 47 48 for (i = 0; i < PART_LEN; i+=4) { 49 __asm __volatile ( 50 ".set push \n\t" 51 ".set noreorder \n\t" 52 "lh %[tmp1s], 0(%[randWptr]) \n\t" 53 "lh %[tmp2s], 2(%[randWptr]) \n\t" 54 "lh %[tmp3s], 4(%[randWptr]) \n\t" 55 "lh %[tmp4s], 6(%[randWptr]) \n\t" 56 "mtc1 %[tmp1s], %[randTemp] \n\t" 57 "mtc1 %[tmp2s], %[randTemp2] \n\t" 58 "mtc1 %[tmp3s], %[randTemp3] \n\t" 59 "mtc1 %[tmp4s], %[randTemp4] \n\t" 60 "cvt.s.w %[randTemp], %[randTemp] \n\t" 61 "cvt.s.w %[randTemp2], %[randTemp2] \n\t" 62 "cvt.s.w %[randTemp3], %[randTemp3] \n\t" 63 "cvt.s.w %[randTemp4], %[randTemp4] \n\t" 64 "addiu %[randWptr], %[randWptr], 8 \n\t" 65 "mul.s %[randTemp], %[randTemp], %[pi2t] \n\t" 66 "mul.s %[randTemp2], %[randTemp2], %[pi2t] \n\t" 67 "mul.s %[randTemp3], %[randTemp3], %[pi2t] \n\t" 68 "mul.s %[randTemp4], %[randTemp4], %[pi2t] \n\t" 69 ".set pop \n\t" 70 : [randWptr] "+r" (randWptr), [randTemp] "=&f" (randTemp), 71 [randTemp2] "=&f" (randTemp2), [randTemp3] "=&f" (randTemp3), 72 [randTemp4] "=&f" (randTemp4), [tmp1s] "=&r" (tmp1s), 73 [tmp2s] "=&r" (tmp2s), [tmp3s] "=&r" (tmp3s), 74 [tmp4s] "=&r" (tmp4s) 75 : [pi2t] "f" (pi2t) 76 : "memory" 77 ); 78 79 u[i+1][0] = cosf(randTemp); 80 u[i+1][1] = sinf(randTemp); 81 u[i+2][0] = cosf(randTemp2); 82 u[i+2][1] = sinf(randTemp2); 83 u[i+3][0] = cosf(randTemp3); 84 u[i+3][1] = sinf(randTemp3); 85 u[i+4][0] = cosf(randTemp4); 86 u[i+4][1] = sinf(randTemp4); 87 } 88 89 // Reject LF noise 90 float* u_ptr = &u[1][0]; 91 float noise2, noise3, noise4; 92 float tmp1f, tmp2f, tmp3f, tmp4f, tmp5f, tmp6f, tmp7f, tmp8f; 93 94 u[0][0] = 0; 95 u[0][1] = 0; 96 for (i = 1; i < PART_LEN1; i+=4) { 97 __asm __volatile ( 98 ".set push \n\t" 99 ".set noreorder \n\t" 100 "lwc1 %[noise], 4(%[noisePow]) \n\t" 101 "lwc1 %[noise2], 8(%[noisePow]) \n\t" 102 "lwc1 %[noise3], 12(%[noisePow]) \n\t" 103 "lwc1 %[noise4], 16(%[noisePow]) \n\t" 104 "sqrt.s %[noise], %[noise] \n\t" 105 "sqrt.s %[noise2], %[noise2] \n\t" 106 "sqrt.s %[noise3], %[noise3] \n\t" 107 "sqrt.s %[noise4], %[noise4] \n\t" 108 "lwc1 %[tmp1f], 0(%[u_ptr]) \n\t" 109 "lwc1 %[tmp2f], 4(%[u_ptr]) \n\t" 110 "lwc1 %[tmp3f], 8(%[u_ptr]) \n\t" 111 "lwc1 %[tmp4f], 12(%[u_ptr]) \n\t" 112 "lwc1 %[tmp5f], 16(%[u_ptr]) \n\t" 113 "lwc1 %[tmp6f], 20(%[u_ptr]) \n\t" 114 "lwc1 %[tmp7f], 24(%[u_ptr]) \n\t" 115 "lwc1 %[tmp8f], 28(%[u_ptr]) \n\t" 116 "addiu %[noisePow], %[noisePow], 16 \n\t" 117 "mul.s %[tmp1f], %[tmp1f], %[noise] \n\t" 118 "mul.s %[tmp2f], %[tmp2f], %[noise] \n\t" 119 "mul.s %[tmp3f], %[tmp3f], %[noise2] \n\t" 120 "mul.s %[tmp4f], %[tmp4f], %[noise2] \n\t" 121 "mul.s %[tmp5f], %[tmp5f], %[noise3] \n\t" 122 "mul.s %[tmp6f], %[tmp6f], %[noise3] \n\t" 123 "swc1 %[tmp1f], 0(%[u_ptr]) \n\t" 124 "swc1 %[tmp3f], 8(%[u_ptr]) \n\t" 125 "mul.s %[tmp8f], %[tmp8f], %[noise4] \n\t" 126 "mul.s %[tmp7f], %[tmp7f], %[noise4] \n\t" 127 "neg.s %[tmp2f] \n\t" 128 "neg.s %[tmp4f] \n\t" 129 "neg.s %[tmp6f] \n\t" 130 "neg.s %[tmp8f] \n\t" 131 "swc1 %[tmp5f], 16(%[u_ptr]) \n\t" 132 "swc1 %[tmp7f], 24(%[u_ptr]) \n\t" 133 "swc1 %[tmp2f], 4(%[u_ptr]) \n\t" 134 "swc1 %[tmp4f], 12(%[u_ptr]) \n\t" 135 "swc1 %[tmp6f], 20(%[u_ptr]) \n\t" 136 "swc1 %[tmp8f], 28(%[u_ptr]) \n\t" 137 "addiu %[u_ptr], %[u_ptr], 32 \n\t" 138 ".set pop \n\t" 139 : [u_ptr] "+r" (u_ptr), [noisePow] "+r" (noisePow), 140 [noise] "=&f" (noise), [noise2] "=&f" (noise2), 141 [noise3] "=&f" (noise3), [noise4] "=&f" (noise4), 142 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), 143 [tmp3f] "=&f" (tmp3f), [tmp4f] "=&f" (tmp4f), 144 [tmp5f] "=&f" (tmp5f), [tmp6f] "=&f" (tmp6f), 145 [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f) 146 : 147 : "memory" 148 ); 149 } 150 u[PART_LEN][1] = 0; 151 noisePow -= PART_LEN; 152 153 u_ptr = &u[0][0]; 154 float* u_ptr_end = &u[PART_LEN][0]; 155 float* efw_ptr_0 = &efw[0][0]; 156 float* efw_ptr_1 = &efw[1][0]; 157 float tmp9f, tmp10f; 158 const float tmp1c = 1.0; 159 160 __asm __volatile ( 161 ".set push \n\t" 162 ".set noreorder \n\t" 163 "1: \n\t" 164 "lwc1 %[tmp1f], 0(%[lambda]) \n\t" 165 "lwc1 %[tmp6f], 4(%[lambda]) \n\t" 166 "addiu %[lambda], %[lambda], 8 \n\t" 167 "c.lt.s %[tmp1f], %[tmp1c] \n\t" 168 "bc1f 4f \n\t" 169 " nop \n\t" 170 "c.lt.s %[tmp6f], %[tmp1c] \n\t" 171 "bc1f 3f \n\t" 172 " nop \n\t" 173 "2: \n\t" 174 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" 175 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" 176 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" 177 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" 178 "sqrt.s %[tmp1f], %[tmp1f] \n\t" 179 "sqrt.s %[tmp6f], %[tmp6f] \n\t" 180 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 181 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" 182 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 183 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" 184 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 185 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" 186 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 187 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" 188 #if !defined(MIPS32_R2_LE) 189 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" 190 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" 191 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" 192 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" 193 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" 194 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" 195 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" 196 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" 197 #else // #if !defined(MIPS32_R2_LE) 198 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" 199 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" 200 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" 201 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" 202 #endif // #if !defined(MIPS32_R2_LE) 203 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 204 "swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 205 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 206 "b 5f \n\t" 207 " swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 208 "3: \n\t" 209 "mul.s %[tmp1f], %[tmp1f], %[tmp1f] \n\t" 210 "sub.s %[tmp1f], %[tmp1c], %[tmp1f] \n\t" 211 "sqrt.s %[tmp1f], %[tmp1f] \n\t" 212 "lwc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 213 "lwc1 %[tmp3f], 0(%[u_ptr]) \n\t" 214 "lwc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 215 "lwc1 %[tmp5f], 4(%[u_ptr]) \n\t" 216 #if !defined(MIPS32_R2_LE) 217 "mul.s %[tmp3f], %[tmp1f], %[tmp3f] \n\t" 218 "add.s %[tmp2f], %[tmp2f], %[tmp3f] \n\t" 219 "mul.s %[tmp3f], %[tmp1f], %[tmp5f] \n\t" 220 "add.s %[tmp4f], %[tmp4f], %[tmp3f] \n\t" 221 #else // #if !defined(MIPS32_R2_LE) 222 "madd.s %[tmp2f], %[tmp2f], %[tmp1f], %[tmp3f] \n\t" 223 "madd.s %[tmp4f], %[tmp4f], %[tmp1f], %[tmp5f] \n\t" 224 #endif // #if !defined(MIPS32_R2_LE) 225 "swc1 %[tmp2f], 0(%[efw_ptr_0]) \n\t" 226 "b 5f \n\t" 227 " swc1 %[tmp4f], 0(%[efw_ptr_1]) \n\t" 228 "4: \n\t" 229 "c.lt.s %[tmp6f], %[tmp1c] \n\t" 230 "bc1f 5f \n\t" 231 " nop \n\t" 232 "mul.s %[tmp6f], %[tmp6f], %[tmp6f] \n\t" 233 "sub.s %[tmp6f], %[tmp1c], %[tmp6f] \n\t" 234 "sqrt.s %[tmp6f], %[tmp6f] \n\t" 235 "lwc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 236 "lwc1 %[tmp8f], 8(%[u_ptr]) \n\t" 237 "lwc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 238 "lwc1 %[tmp10f], 12(%[u_ptr]) \n\t" 239 #if !defined(MIPS32_R2_LE) 240 "mul.s %[tmp3f], %[tmp6f], %[tmp8f] \n\t" 241 "add.s %[tmp7f], %[tmp7f], %[tmp3f] \n\t" 242 "mul.s %[tmp3f], %[tmp6f], %[tmp10f] \n\t" 243 "add.s %[tmp9f], %[tmp9f], %[tmp3f] \n\t" 244 #else // #if !defined(MIPS32_R2_LE) 245 "madd.s %[tmp7f], %[tmp7f], %[tmp6f], %[tmp8f] \n\t" 246 "madd.s %[tmp9f], %[tmp9f], %[tmp6f], %[tmp10f] \n\t" 247 #endif // #if !defined(MIPS32_R2_LE) 248 "swc1 %[tmp7f], 4(%[efw_ptr_0]) \n\t" 249 "swc1 %[tmp9f], 4(%[efw_ptr_1]) \n\t" 250 "5: \n\t" 251 "addiu %[u_ptr], %[u_ptr], 16 \n\t" 252 "addiu %[efw_ptr_0], %[efw_ptr_0], 8 \n\t" 253 "bne %[u_ptr], %[u_ptr_end], 1b \n\t" 254 " addiu %[efw_ptr_1], %[efw_ptr_1], 8 \n\t" 255 ".set pop \n\t" 256 : [lambda] "+r" (lambda), [u_ptr] "+r" (u_ptr), 257 [efw_ptr_0] "+r" (efw_ptr_0), [efw_ptr_1] "+r" (efw_ptr_1), 258 [tmp1f] "=&f" (tmp1f), [tmp2f] "=&f" (tmp2f), [tmp3f] "=&f" (tmp3f), 259 [tmp4f] "=&f" (tmp4f), [tmp5f] "=&f" (tmp5f), 260 [tmp6f] "=&f" (tmp6f), [tmp7f] "=&f" (tmp7f), [tmp8f] "=&f" (tmp8f), 261 [tmp9f] "=&f" (tmp9f), [tmp10f] "=&f" (tmp10f) 262 : [tmp1c] "f" (tmp1c), [u_ptr_end] "r" (u_ptr_end) 263 : "memory" 264 ); 265 266 lambda -= PART_LEN; 267 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[PART_LEN] * lambda[PART_LEN], 0)); 268 //tmp = 1 - lambda[i]; 269 efw[0][PART_LEN] += tmp * u[PART_LEN][0]; 270 efw[1][PART_LEN] += tmp * u[PART_LEN][1]; 271 272 // For H band comfort noise 273 // TODO: don't compute noise and "tmp" twice. Use the previous results. 274 noiseAvg = 0.0; 275 tmpAvg = 0.0; 276 num = 0; 277 if (aec->sampFreq == 32000 && flagHbandCn == 1) { 278 for (i = 0; i < PART_LEN; i++) { 279 rand[i] = ((float)randW16[i]) / 32768; 280 } 281 282 // average noise scale 283 // average over second half of freq spectrum (i.e., 4->8khz) 284 // TODO: we shouldn't need num. We know how many elements we're summing. 285 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 286 num++; 287 noiseAvg += sqrtf(noisePow[i]); 288 } 289 noiseAvg /= (float)num; 290 291 // average nlp scale 292 // average over second half of freq spectrum (i.e., 4->8khz) 293 // TODO: we shouldn't need num. We know how many elements we're summing. 294 num = 0; 295 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 296 num++; 297 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 298 } 299 tmpAvg /= (float)num; 300 301 // Use average noise for H band 302 // TODO: we should probably have a new random vector here. 303 // Reject LF noise 304 u[0][0] = 0; 305 u[0][1] = 0; 306 for (i = 1; i < PART_LEN1; i++) { 307 tmp = pi2 * rand[i - 1]; 308 309 // Use average noise for H band 310 u[i][0] = noiseAvg * (float)cos(tmp); 311 u[i][1] = -noiseAvg * (float)sin(tmp); 312 } 313 u[PART_LEN][1] = 0; 314 315 for (i = 0; i < PART_LEN1; i++) { 316 // Use average NLP weight for H band 317 comfortNoiseHband[i][0] = tmpAvg * u[i][0]; 318 comfortNoiseHband[i][1] = tmpAvg * u[i][1]; 319 } 320 } 321 } 322 323 void WebRtcAec_FilterFar_mips(AecCore* aec, float yf[2][PART_LEN1]) { 324 int i; 325 for (i = 0; i < aec->num_partitions; i++) { 326 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; 327 int pos = i * PART_LEN1; 328 // Check for wrap 329 if (i + aec->xfBufBlockPos >= aec->num_partitions) { 330 xPos -= aec->num_partitions * (PART_LEN1); 331 } 332 float* yf0 = yf[0]; 333 float* yf1 = yf[1]; 334 float* aRe = aec->xfBuf[0] + xPos; 335 float* aIm = aec->xfBuf[1] + xPos; 336 float* bRe = aec->wfBuf[0] + pos; 337 float* bIm = aec->wfBuf[1] + pos; 338 float f0, f1, f2, f3, f4, f5, f6, f7, f8, f9, f10, f11, f12, f13; 339 int len = PART_LEN1 >> 1; 340 int len1 = PART_LEN1 & 1; 341 342 __asm __volatile ( 343 ".set push \n\t" 344 ".set noreorder \n\t" 345 "1: \n\t" 346 "lwc1 %[f0], 0(%[aRe]) \n\t" 347 "lwc1 %[f1], 0(%[bRe]) \n\t" 348 "lwc1 %[f2], 0(%[bIm]) \n\t" 349 "lwc1 %[f3], 0(%[aIm]) \n\t" 350 "lwc1 %[f4], 4(%[aRe]) \n\t" 351 "lwc1 %[f5], 4(%[bRe]) \n\t" 352 "lwc1 %[f6], 4(%[bIm]) \n\t" 353 "mul.s %[f8], %[f0], %[f1] \n\t" 354 "mul.s %[f0], %[f0], %[f2] \n\t" 355 "mul.s %[f9], %[f4], %[f5] \n\t" 356 "mul.s %[f4], %[f4], %[f6] \n\t" 357 "lwc1 %[f7], 4(%[aIm]) \n\t" 358 #if !defined(MIPS32_R2_LE) 359 "mul.s %[f12], %[f2], %[f3] \n\t" 360 "mul.s %[f1], %[f3], %[f1] \n\t" 361 "mul.s %[f11], %[f6], %[f7] \n\t" 362 "addiu %[aRe], %[aRe], 8 \n\t" 363 "addiu %[aIm], %[aIm], 8 \n\t" 364 "addiu %[len], %[len], -1 \n\t" 365 "sub.s %[f8], %[f8], %[f12] \n\t" 366 "mul.s %[f12], %[f7], %[f5] \n\t" 367 "lwc1 %[f2], 0(%[yf0]) \n\t" 368 "add.s %[f1], %[f0], %[f1] \n\t" 369 "lwc1 %[f3], 0(%[yf1]) \n\t" 370 "sub.s %[f9], %[f9], %[f11] \n\t" 371 "lwc1 %[f6], 4(%[yf0]) \n\t" 372 "add.s %[f4], %[f4], %[f12] \n\t" 373 #else // #if !defined(MIPS32_R2_LE) 374 "addiu %[aRe], %[aRe], 8 \n\t" 375 "addiu %[aIm], %[aIm], 8 \n\t" 376 "addiu %[len], %[len], -1 \n\t" 377 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" 378 "lwc1 %[f2], 0(%[yf0]) \n\t" 379 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" 380 "lwc1 %[f3], 0(%[yf1]) \n\t" 381 "nmsub.s %[f9], %[f9], %[f6], %[f7] \n\t" 382 "lwc1 %[f6], 4(%[yf0]) \n\t" 383 "madd.s %[f4], %[f4], %[f7], %[f5] \n\t" 384 #endif // #if !defined(MIPS32_R2_LE) 385 "lwc1 %[f5], 4(%[yf1]) \n\t" 386 "add.s %[f2], %[f2], %[f8] \n\t" 387 "addiu %[bRe], %[bRe], 8 \n\t" 388 "addiu %[bIm], %[bIm], 8 \n\t" 389 "add.s %[f3], %[f3], %[f1] \n\t" 390 "add.s %[f6], %[f6], %[f9] \n\t" 391 "add.s %[f5], %[f5], %[f4] \n\t" 392 "swc1 %[f2], 0(%[yf0]) \n\t" 393 "swc1 %[f3], 0(%[yf1]) \n\t" 394 "swc1 %[f6], 4(%[yf0]) \n\t" 395 "swc1 %[f5], 4(%[yf1]) \n\t" 396 "addiu %[yf0], %[yf0], 8 \n\t" 397 "bgtz %[len], 1b \n\t" 398 " addiu %[yf1], %[yf1], 8 \n\t" 399 "lwc1 %[f0], 0(%[aRe]) \n\t" 400 "lwc1 %[f1], 0(%[bRe]) \n\t" 401 "lwc1 %[f2], 0(%[bIm]) \n\t" 402 "lwc1 %[f3], 0(%[aIm]) \n\t" 403 "mul.s %[f8], %[f0], %[f1] \n\t" 404 "mul.s %[f0], %[f0], %[f2] \n\t" 405 #if !defined(MIPS32_R2_LE) 406 "mul.s %[f12], %[f2], %[f3] \n\t" 407 "mul.s %[f1], %[f3], %[f1] \n\t" 408 "sub.s %[f8], %[f8], %[f12] \n\t" 409 "lwc1 %[f2], 0(%[yf0]) \n\t" 410 "add.s %[f1], %[f0], %[f1] \n\t" 411 "lwc1 %[f3], 0(%[yf1]) \n\t" 412 #else // #if !defined(MIPS32_R2_LE) 413 "nmsub.s %[f8], %[f8], %[f2], %[f3] \n\t" 414 "lwc1 %[f2], 0(%[yf0]) \n\t" 415 "madd.s %[f1], %[f0], %[f3], %[f1] \n\t" 416 "lwc1 %[f3], 0(%[yf1]) \n\t" 417 #endif // #if !defined(MIPS32_R2_LE) 418 "add.s %[f2], %[f2], %[f8] \n\t" 419 "add.s %[f3], %[f3], %[f1] \n\t" 420 "swc1 %[f2], 0(%[yf0]) \n\t" 421 "swc1 %[f3], 0(%[yf1]) \n\t" 422 ".set pop \n\t" 423 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 424 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 425 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), 426 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), 427 [f12] "=&f" (f12), [f13] "=&f" (f13), [aRe] "+r" (aRe), 428 [aIm] "+r" (aIm), [bRe] "+r" (bRe), [bIm] "+r" (bIm), 429 [yf0] "+r" (yf0), [yf1] "+r" (yf1), [len] "+r" (len) 430 : 431 : "memory" 432 ); 433 } 434 } 435 436 void WebRtcAec_FilterAdaptation_mips(AecCore* aec, 437 float* fft, 438 float ef[2][PART_LEN1]) { 439 int i; 440 for (i = 0; i < aec->num_partitions; i++) { 441 int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); 442 int pos; 443 // Check for wrap 444 if (i + aec->xfBufBlockPos >= aec->num_partitions) { 445 xPos -= aec->num_partitions * PART_LEN1; 446 } 447 448 pos = i * PART_LEN1; 449 float* aRe = aec->xfBuf[0] + xPos; 450 float* aIm = aec->xfBuf[1] + xPos; 451 float* bRe = ef[0]; 452 float* bIm = ef[1]; 453 float* fft_tmp; 454 455 float f0, f1, f2, f3, f4, f5, f6 ,f7, f8, f9, f10, f11, f12; 456 int len = PART_LEN >> 1; 457 458 __asm __volatile ( 459 ".set push \n\t" 460 ".set noreorder \n\t" 461 "addiu %[fft_tmp], %[fft], 0 \n\t" 462 "1: \n\t" 463 "lwc1 %[f0], 0(%[aRe]) \n\t" 464 "lwc1 %[f1], 0(%[bRe]) \n\t" 465 "lwc1 %[f2], 0(%[bIm]) \n\t" 466 "lwc1 %[f4], 4(%[aRe]) \n\t" 467 "lwc1 %[f5], 4(%[bRe]) \n\t" 468 "lwc1 %[f6], 4(%[bIm]) \n\t" 469 "addiu %[aRe], %[aRe], 8 \n\t" 470 "addiu %[bRe], %[bRe], 8 \n\t" 471 "mul.s %[f8], %[f0], %[f1] \n\t" 472 "mul.s %[f0], %[f0], %[f2] \n\t" 473 "lwc1 %[f3], 0(%[aIm]) \n\t" 474 "mul.s %[f9], %[f4], %[f5] \n\t" 475 "lwc1 %[f7], 4(%[aIm]) \n\t" 476 "mul.s %[f4], %[f4], %[f6] \n\t" 477 #if !defined(MIPS32_R2_LE) 478 "mul.s %[f10], %[f3], %[f2] \n\t" 479 "mul.s %[f1], %[f3], %[f1] \n\t" 480 "mul.s %[f11], %[f7], %[f6] \n\t" 481 "mul.s %[f5], %[f7], %[f5] \n\t" 482 "addiu %[aIm], %[aIm], 8 \n\t" 483 "addiu %[bIm], %[bIm], 8 \n\t" 484 "addiu %[len], %[len], -1 \n\t" 485 "add.s %[f8], %[f8], %[f10] \n\t" 486 "sub.s %[f1], %[f0], %[f1] \n\t" 487 "add.s %[f9], %[f9], %[f11] \n\t" 488 "sub.s %[f5], %[f4], %[f5] \n\t" 489 #else // #if !defined(MIPS32_R2_LE) 490 "addiu %[aIm], %[aIm], 8 \n\t" 491 "addiu %[bIm], %[bIm], 8 \n\t" 492 "addiu %[len], %[len], -1 \n\t" 493 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" 494 "nmsub.s %[f1], %[f0], %[f3], %[f1] \n\t" 495 "madd.s %[f9], %[f9], %[f7], %[f6] \n\t" 496 "nmsub.s %[f5], %[f4], %[f7], %[f5] \n\t" 497 #endif // #if !defined(MIPS32_R2_LE) 498 "swc1 %[f8], 0(%[fft_tmp]) \n\t" 499 "swc1 %[f1], 4(%[fft_tmp]) \n\t" 500 "swc1 %[f9], 8(%[fft_tmp]) \n\t" 501 "swc1 %[f5], 12(%[fft_tmp]) \n\t" 502 "bgtz %[len], 1b \n\t" 503 " addiu %[fft_tmp], %[fft_tmp], 16 \n\t" 504 "lwc1 %[f0], 0(%[aRe]) \n\t" 505 "lwc1 %[f1], 0(%[bRe]) \n\t" 506 "lwc1 %[f2], 0(%[bIm]) \n\t" 507 "lwc1 %[f3], 0(%[aIm]) \n\t" 508 "mul.s %[f8], %[f0], %[f1] \n\t" 509 #if !defined(MIPS32_R2_LE) 510 "mul.s %[f10], %[f3], %[f2] \n\t" 511 "add.s %[f8], %[f8], %[f10] \n\t" 512 #else // #if !defined(MIPS32_R2_LE) 513 "madd.s %[f8], %[f8], %[f3], %[f2] \n\t" 514 #endif // #if !defined(MIPS32_R2_LE) 515 "swc1 %[f8], 4(%[fft]) \n\t" 516 ".set pop \n\t" 517 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 518 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 519 [f6] "=&f" (f6), [f7] "=&f" (f7), [f8] "=&f" (f8), 520 [f9] "=&f" (f9), [f10] "=&f" (f10), [f11] "=&f" (f11), 521 [f12] "=&f" (f12), [aRe] "+r" (aRe), [aIm] "+r" (aIm), 522 [bRe] "+r" (bRe), [bIm] "+r" (bIm), [fft_tmp] "=&r" (fft_tmp), 523 [len] "+r" (len) 524 : [fft] "r" (fft) 525 : "memory" 526 ); 527 528 aec_rdft_inverse_128(fft); 529 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 530 531 // fft scaling 532 { 533 float scale = 2.0f / PART_LEN2; 534 __asm __volatile ( 535 ".set push \n\t" 536 ".set noreorder \n\t" 537 "addiu %[fft_tmp], %[fft], 0 \n\t" 538 "addiu %[len], $zero, 8 \n\t" 539 "1: \n\t" 540 "addiu %[len], %[len], -1 \n\t" 541 "lwc1 %[f0], 0(%[fft_tmp]) \n\t" 542 "lwc1 %[f1], 4(%[fft_tmp]) \n\t" 543 "lwc1 %[f2], 8(%[fft_tmp]) \n\t" 544 "lwc1 %[f3], 12(%[fft_tmp]) \n\t" 545 "mul.s %[f0], %[f0], %[scale] \n\t" 546 "mul.s %[f1], %[f1], %[scale] \n\t" 547 "mul.s %[f2], %[f2], %[scale] \n\t" 548 "mul.s %[f3], %[f3], %[scale] \n\t" 549 "lwc1 %[f4], 16(%[fft_tmp]) \n\t" 550 "lwc1 %[f5], 20(%[fft_tmp]) \n\t" 551 "lwc1 %[f6], 24(%[fft_tmp]) \n\t" 552 "lwc1 %[f7], 28(%[fft_tmp]) \n\t" 553 "mul.s %[f4], %[f4], %[scale] \n\t" 554 "mul.s %[f5], %[f5], %[scale] \n\t" 555 "mul.s %[f6], %[f6], %[scale] \n\t" 556 "mul.s %[f7], %[f7], %[scale] \n\t" 557 "swc1 %[f0], 0(%[fft_tmp]) \n\t" 558 "swc1 %[f1], 4(%[fft_tmp]) \n\t" 559 "swc1 %[f2], 8(%[fft_tmp]) \n\t" 560 "swc1 %[f3], 12(%[fft_tmp]) \n\t" 561 "swc1 %[f4], 16(%[fft_tmp]) \n\t" 562 "swc1 %[f5], 20(%[fft_tmp]) \n\t" 563 "swc1 %[f6], 24(%[fft_tmp]) \n\t" 564 "swc1 %[f7], 28(%[fft_tmp]) \n\t" 565 "bgtz %[len], 1b \n\t" 566 " addiu %[fft_tmp], %[fft_tmp], 32 \n\t" 567 ".set pop \n\t" 568 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 569 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 570 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), 571 [fft_tmp] "=&r" (fft_tmp) 572 : [scale] "f" (scale), [fft] "r" (fft) 573 : "memory" 574 ); 575 } 576 aec_rdft_forward_128(fft); 577 aRe = aec->wfBuf[0] + pos; 578 aIm = aec->wfBuf[1] + pos; 579 __asm __volatile ( 580 ".set push \n\t" 581 ".set noreorder \n\t" 582 "addiu %[fft_tmp], %[fft], 0 \n\t" 583 "addiu %[len], $zero, 31 \n\t" 584 "lwc1 %[f0], 0(%[aRe]) \n\t" 585 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" 586 "lwc1 %[f2], 256(%[aRe]) \n\t" 587 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" 588 "lwc1 %[f4], 4(%[aRe]) \n\t" 589 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" 590 "lwc1 %[f6], 4(%[aIm]) \n\t" 591 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" 592 "add.s %[f0], %[f0], %[f1] \n\t" 593 "add.s %[f2], %[f2], %[f3] \n\t" 594 "add.s %[f4], %[f4], %[f5] \n\t" 595 "add.s %[f6], %[f6], %[f7] \n\t" 596 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" 597 "swc1 %[f0], 0(%[aRe]) \n\t" 598 "swc1 %[f2], 256(%[aRe]) \n\t" 599 "swc1 %[f4], 4(%[aRe]) \n\t" 600 "addiu %[aRe], %[aRe], 8 \n\t" 601 "swc1 %[f6], 4(%[aIm]) \n\t" 602 "addiu %[aIm], %[aIm], 8 \n\t" 603 "1: \n\t" 604 "lwc1 %[f0], 0(%[aRe]) \n\t" 605 "lwc1 %[f1], 0(%[fft_tmp]) \n\t" 606 "lwc1 %[f2], 0(%[aIm]) \n\t" 607 "lwc1 %[f3], 4(%[fft_tmp]) \n\t" 608 "lwc1 %[f4], 4(%[aRe]) \n\t" 609 "lwc1 %[f5], 8(%[fft_tmp]) \n\t" 610 "lwc1 %[f6], 4(%[aIm]) \n\t" 611 "lwc1 %[f7], 12(%[fft_tmp]) \n\t" 612 "add.s %[f0], %[f0], %[f1] \n\t" 613 "add.s %[f2], %[f2], %[f3] \n\t" 614 "add.s %[f4], %[f4], %[f5] \n\t" 615 "add.s %[f6], %[f6], %[f7] \n\t" 616 "addiu %[len], %[len], -1 \n\t" 617 "addiu %[fft_tmp], %[fft_tmp], 16 \n\t" 618 "swc1 %[f0], 0(%[aRe]) \n\t" 619 "swc1 %[f2], 0(%[aIm]) \n\t" 620 "swc1 %[f4], 4(%[aRe]) \n\t" 621 "addiu %[aRe], %[aRe], 8 \n\t" 622 "swc1 %[f6], 4(%[aIm]) \n\t" 623 "bgtz %[len], 1b \n\t" 624 " addiu %[aIm], %[aIm], 8 \n\t" 625 ".set pop \n\t" 626 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 627 [f3] "=&f" (f3), [f4] "=&f" (f4), [f5] "=&f" (f5), 628 [f6] "=&f" (f6), [f7] "=&f" (f7), [len] "=&r" (len), 629 [fft_tmp] "=&r" (fft_tmp), [aRe] "+r" (aRe), [aIm] "+r" (aIm) 630 : [fft] "r" (fft) 631 : "memory" 632 ); 633 } 634 } 635 636 void WebRtcAec_OverdriveAndSuppress_mips(AecCore* aec, 637 float hNl[PART_LEN1], 638 const float hNlFb, 639 float efw[2][PART_LEN1]) { 640 int i; 641 const float one = 1.0; 642 float* p_hNl; 643 float* p_efw0; 644 float* p_efw1; 645 float* p_WebRtcAec_wC; 646 float temp1, temp2, temp3, temp4; 647 648 p_hNl = &hNl[0]; 649 p_efw0 = &efw[0][0]; 650 p_efw1 = &efw[1][0]; 651 p_WebRtcAec_wC = (float*)&WebRtcAec_weightCurve[0]; 652 653 for (i = 0; i < PART_LEN1; i++) { 654 // Weight subbands 655 __asm __volatile ( 656 ".set push \n\t" 657 ".set noreorder \n\t" 658 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" 659 "lwc1 %[temp2], 0(%[p_wC]) \n\t" 660 "c.lt.s %[hNlFb], %[temp1] \n\t" 661 "bc1f 1f \n\t" 662 " mul.s %[temp3], %[temp2], %[hNlFb] \n\t" 663 "sub.s %[temp4], %[one], %[temp2] \n\t" 664 #if !defined(MIPS32_R2_LE) 665 "mul.s %[temp1], %[temp1], %[temp4] \n\t" 666 "add.s %[temp1], %[temp3], %[temp1] \n\t" 667 #else // #if !defined(MIPS32_R2_LE) 668 "madd.s %[temp1], %[temp3], %[temp1], %[temp4] \n\t" 669 #endif // #if !defined(MIPS32_R2_LE) 670 "swc1 %[temp1], 0(%[p_hNl]) \n\t" 671 "1: \n\t" 672 "addiu %[p_wC], %[p_wC], 4 \n\t" 673 ".set pop \n\t" 674 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), 675 [temp4] "=&f" (temp4), [p_wC] "+r" (p_WebRtcAec_wC) 676 : [hNlFb] "f" (hNlFb), [one] "f" (one), [p_hNl] "r" (p_hNl) 677 : "memory" 678 ); 679 680 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 681 682 __asm __volatile ( 683 "lwc1 %[temp1], 0(%[p_hNl]) \n\t" 684 "lwc1 %[temp3], 0(%[p_efw1]) \n\t" 685 "lwc1 %[temp2], 0(%[p_efw0]) \n\t" 686 "addiu %[p_hNl], %[p_hNl], 4 \n\t" 687 "mul.s %[temp3], %[temp3], %[temp1] \n\t" 688 "mul.s %[temp2], %[temp2], %[temp1] \n\t" 689 "addiu %[p_efw0], %[p_efw0], 4 \n\t" 690 "addiu %[p_efw1], %[p_efw1], 4 \n\t" 691 "neg.s %[temp4], %[temp3] \n\t" 692 "swc1 %[temp2], -4(%[p_efw0]) \n\t" 693 "swc1 %[temp4], -4(%[p_efw1]) \n\t" 694 : [temp1] "=&f" (temp1), [temp2] "=&f" (temp2), [temp3] "=&f" (temp3), 695 [temp4] "=&f" (temp4), [p_efw0] "+r" (p_efw0), [p_efw1] "+r" (p_efw1), 696 [p_hNl] "+r" (p_hNl) 697 : 698 : "memory" 699 ); 700 } 701 } 702 703 void WebRtcAec_ScaleErrorSignal_mips(AecCore* aec, float ef[2][PART_LEN1]) { 704 const float mu = aec->extended_filter_enabled ? kExtendedMu : aec->normal_mu; 705 const float error_threshold = aec->extended_filter_enabled 706 ? kExtendedErrorThreshold 707 : aec->normal_error_threshold; 708 int len = (PART_LEN1); 709 float* ef0 = ef[0]; 710 float* ef1 = ef[1]; 711 float* xPow = aec->xPow; 712 float fac1 = 1e-10f; 713 float err_th2 = error_threshold * error_threshold; 714 float f0, f1, f2; 715 #if !defined(MIPS32_R2_LE) 716 float f3; 717 #endif 718 719 __asm __volatile ( 720 ".set push \n\t" 721 ".set noreorder \n\t" 722 "1: \n\t" 723 "lwc1 %[f0], 0(%[xPow]) \n\t" 724 "lwc1 %[f1], 0(%[ef0]) \n\t" 725 "lwc1 %[f2], 0(%[ef1]) \n\t" 726 "add.s %[f0], %[f0], %[fac1] \n\t" 727 "div.s %[f1], %[f1], %[f0] \n\t" 728 "div.s %[f2], %[f2], %[f0] \n\t" 729 "mul.s %[f0], %[f1], %[f1] \n\t" 730 #if defined(MIPS32_R2_LE) 731 "madd.s %[f0], %[f0], %[f2], %[f2] \n\t" 732 #else 733 "mul.s %[f3], %[f2], %[f2] \n\t" 734 "add.s %[f0], %[f0], %[f3] \n\t" 735 #endif 736 "c.le.s %[f0], %[err_th2] \n\t" 737 "nop \n\t" 738 "bc1t 2f \n\t" 739 " nop \n\t" 740 "sqrt.s %[f0], %[f0] \n\t" 741 "add.s %[f0], %[f0], %[fac1] \n\t" 742 "div.s %[f0], %[err_th], %[f0] \n\t" 743 "mul.s %[f1], %[f1], %[f0] \n\t" 744 "mul.s %[f2], %[f2], %[f0] \n\t" 745 "2: \n\t" 746 "mul.s %[f1], %[f1], %[mu] \n\t" 747 "mul.s %[f2], %[f2], %[mu] \n\t" 748 "swc1 %[f1], 0(%[ef0]) \n\t" 749 "swc1 %[f2], 0(%[ef1]) \n\t" 750 "addiu %[len], %[len], -1 \n\t" 751 "addiu %[xPow], %[xPow], 4 \n\t" 752 "addiu %[ef0], %[ef0], 4 \n\t" 753 "bgtz %[len], 1b \n\t" 754 " addiu %[ef1], %[ef1], 4 \n\t" 755 ".set pop \n\t" 756 : [f0] "=&f" (f0), [f1] "=&f" (f1), [f2] "=&f" (f2), 757 #if !defined(MIPS32_R2_LE) 758 [f3] "=&f" (f3), 759 #endif 760 [xPow] "+r" (xPow), [ef0] "+r" (ef0), [ef1] "+r" (ef1), 761 [len] "+r" (len) 762 : [fac1] "f" (fac1), [err_th2] "f" (err_th2), [mu] "f" (mu), 763 [err_th] "f" (error_threshold) 764 : "memory" 765 ); 766 } 767 768 void WebRtcAec_InitAec_mips(void) { 769 WebRtcAec_FilterFar = WebRtcAec_FilterFar_mips; 770 WebRtcAec_FilterAdaptation = WebRtcAec_FilterAdaptation_mips; 771 WebRtcAec_ScaleErrorSignal = WebRtcAec_ScaleErrorSignal_mips; 772 WebRtcAec_ComfortNoise = WebRtcAec_ComfortNoise_mips; 773 WebRtcAec_OverdriveAndSuppress = WebRtcAec_OverdriveAndSuppress_mips; 774 } 775 776