1 /* 2 * Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 #include "webrtc/modules/audio_processing/aecm/aecm_core.h" 12 13 #include <assert.h> 14 15 #include "webrtc/modules/audio_processing/aecm/include/echo_control_mobile.h" 16 #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" 17 18 static const ALIGN8_BEG int16_t WebRtcAecm_kSqrtHanning[] ALIGN8_END = { 19 0, 399, 798, 1196, 1594, 1990, 2386, 2780, 3172, 20 3562, 3951, 4337, 4720, 5101, 5478, 5853, 6224, 21 6591, 6954, 7313, 7668, 8019, 8364, 8705, 9040, 22 9370, 9695, 10013, 10326, 10633, 10933, 11227, 11514, 23 11795, 12068, 12335, 12594, 12845, 13089, 13325, 13553, 24 13773, 13985, 14189, 14384, 14571, 14749, 14918, 15079, 25 15231, 15373, 15506, 15631, 15746, 15851, 15947, 16034, 26 16111, 16179, 16237, 16286, 16325, 16354, 16373, 16384 27 }; 28 29 static const int16_t kNoiseEstQDomain = 15; 30 static const int16_t kNoiseEstIncCount = 5; 31 32 static int16_t coefTable[] = { 33 0, 4, 256, 260, 128, 132, 384, 388, 34 64, 68, 320, 324, 192, 196, 448, 452, 35 32, 36, 288, 292, 160, 164, 416, 420, 36 96, 100, 352, 356, 224, 228, 480, 484, 37 16, 20, 272, 276, 144, 148, 400, 404, 38 80, 84, 336, 340, 208, 212, 464, 468, 39 48, 52, 304, 308, 176, 180, 432, 436, 40 112, 116, 368, 372, 240, 244, 496, 500, 41 8, 12, 264, 268, 136, 140, 392, 396, 42 72, 76, 328, 332, 200, 204, 456, 460, 43 40, 44, 296, 300, 168, 172, 424, 428, 44 104, 108, 360, 364, 232, 236, 488, 492, 45 24, 28, 280, 284, 152, 156, 408, 412, 46 88, 92, 344, 348, 216, 220, 472, 476, 47 56, 60, 312, 316, 184, 188, 440, 444, 48 120, 124, 376, 380, 248, 252, 504, 508 49 }; 50 51 static int16_t coefTable_ifft[] = { 52 0, 512, 256, 508, 128, 252, 384, 380, 53 64, 124, 320, 444, 192, 188, 448, 316, 54 32, 60, 288, 476, 160, 220, 416, 348, 55 96, 92, 352, 412, 224, 156, 480, 284, 56 16, 28, 272, 492, 144, 236, 400, 364, 57 80, 108, 336, 428, 208, 172, 464, 300, 58 48, 44, 304, 460, 176, 204, 432, 332, 59 112, 76, 368, 396, 240, 140, 496, 268, 60 8, 12, 264, 500, 136, 244, 392, 372, 61 72, 116, 328, 436, 200, 180, 456, 308, 62 40, 52, 296, 468, 168, 212, 424, 340, 63 104, 84, 360, 404, 232, 148, 488, 276, 64 24, 20, 280, 484, 152, 228, 408, 356, 65 88, 100, 344, 420, 216, 164, 472, 292, 66 56, 36, 312, 452, 184, 196, 440, 324, 67 120, 68, 376, 388, 248, 132, 504, 260 68 }; 69 70 static void ComfortNoise(AecmCore_t* aecm, 71 const uint16_t* dfa, 72 complex16_t* out, 73 const int16_t* lambda); 74 75 static void WindowAndFFT(AecmCore_t* aecm, 76 int16_t* fft, 77 const int16_t* time_signal, 78 complex16_t* freq_signal, 79 int time_signal_scaling) { 80 int i, j; 81 int32_t tmp1, tmp2, tmp3, tmp4; 82 int16_t* pfrfi; 83 complex16_t* pfreq_signal; 84 int16_t f_coef, s_coef; 85 int32_t load_ptr, store_ptr1, store_ptr2, shift, shift1; 86 int32_t hann, hann1, coefs; 87 88 memset(fft, 0, sizeof(int16_t) * PART_LEN4); 89 90 // FFT of signal 91 __asm __volatile ( 92 ".set push \n\t" 93 ".set noreorder \n\t" 94 "addiu %[shift], %[time_signal_scaling], -14 \n\t" 95 "addiu %[i], $zero, 64 \n\t" 96 "addiu %[load_ptr], %[time_signal], 0 \n\t" 97 "addiu %[hann], %[hanning], 0 \n\t" 98 "addiu %[hann1], %[hanning], 128 \n\t" 99 "addiu %[coefs], %[coefTable], 0 \n\t" 100 "bltz %[shift], 2f \n\t" 101 " negu %[shift1], %[shift] \n\t" 102 "1: \n\t" 103 "lh %[tmp1], 0(%[load_ptr]) \n\t" 104 "lh %[tmp2], 0(%[hann]) \n\t" 105 "lh %[tmp3], 128(%[load_ptr]) \n\t" 106 "lh %[tmp4], 0(%[hann1]) \n\t" 107 "addiu %[i], %[i], -1 \n\t" 108 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" 109 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" 110 "lh %[f_coef], 0(%[coefs]) \n\t" 111 "lh %[s_coef], 2(%[coefs]) \n\t" 112 "addiu %[load_ptr], %[load_ptr], 2 \n\t" 113 "addiu %[hann], %[hann], 2 \n\t" 114 "addiu %[hann1], %[hann1], -2 \n\t" 115 "addu %[store_ptr1], %[fft], %[f_coef] \n\t" 116 "addu %[store_ptr2], %[fft], %[s_coef] \n\t" 117 "sllv %[tmp1], %[tmp1], %[shift] \n\t" 118 "sllv %[tmp3], %[tmp3], %[shift] \n\t" 119 "sh %[tmp1], 0(%[store_ptr1]) \n\t" 120 "sh %[tmp3], 0(%[store_ptr2]) \n\t" 121 "bgtz %[i], 1b \n\t" 122 " addiu %[coefs], %[coefs], 4 \n\t" 123 "b 3f \n\t" 124 " nop \n\t" 125 "2: \n\t" 126 "lh %[tmp1], 0(%[load_ptr]) \n\t" 127 "lh %[tmp2], 0(%[hann]) \n\t" 128 "lh %[tmp3], 128(%[load_ptr]) \n\t" 129 "lh %[tmp4], 0(%[hann1]) \n\t" 130 "addiu %[i], %[i], -1 \n\t" 131 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" 132 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" 133 "lh %[f_coef], 0(%[coefs]) \n\t" 134 "lh %[s_coef], 2(%[coefs]) \n\t" 135 "addiu %[load_ptr], %[load_ptr], 2 \n\t" 136 "addiu %[hann], %[hann], 2 \n\t" 137 "addiu %[hann1], %[hann1], -2 \n\t" 138 "addu %[store_ptr1], %[fft], %[f_coef] \n\t" 139 "addu %[store_ptr2], %[fft], %[s_coef] \n\t" 140 "srav %[tmp1], %[tmp1], %[shift1] \n\t" 141 "srav %[tmp3], %[tmp3], %[shift1] \n\t" 142 "sh %[tmp1], 0(%[store_ptr1]) \n\t" 143 "sh %[tmp3], 0(%[store_ptr2]) \n\t" 144 "bgtz %[i], 2b \n\t" 145 " addiu %[coefs], %[coefs], 4 \n\t" 146 "3: \n\t" 147 ".set pop \n\t" 148 : [load_ptr] "=&r" (load_ptr), [shift] "=&r" (shift), [hann] "=&r" (hann), 149 [hann1] "=&r" (hann1), [shift1] "=&r" (shift1), [coefs] "=&r" (coefs), 150 [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), 151 [tmp4] "=&r" (tmp4), [i] "=&r" (i), [f_coef] "=&r" (f_coef), 152 [s_coef] "=&r" (s_coef), [store_ptr1] "=&r" (store_ptr1), 153 [store_ptr2] "=&r" (store_ptr2) 154 : [time_signal] "r" (time_signal), [coefTable] "r" (coefTable), 155 [time_signal_scaling] "r" (time_signal_scaling), 156 [hanning] "r" (WebRtcAecm_kSqrtHanning), [fft] "r" (fft) 157 : "memory", "hi", "lo" 158 ); 159 160 WebRtcSpl_ComplexFFT(fft, PART_LEN_SHIFT, 1); 161 pfrfi = fft; 162 pfreq_signal = freq_signal; 163 164 __asm __volatile ( 165 ".set push \n\t" 166 ".set noreorder \n\t" 167 "addiu %[j], $zero, 128 \n\t" 168 "1: \n\t" 169 "lh %[tmp1], 0(%[pfrfi]) \n\t" 170 "lh %[tmp2], 2(%[pfrfi]) \n\t" 171 "lh %[tmp3], 4(%[pfrfi]) \n\t" 172 "lh %[tmp4], 6(%[pfrfi]) \n\t" 173 "subu %[tmp2], $zero, %[tmp2] \n\t" 174 "sh %[tmp1], 0(%[pfreq_signal]) \n\t" 175 "sh %[tmp2], 2(%[pfreq_signal]) \n\t" 176 "subu %[tmp4], $zero, %[tmp4] \n\t" 177 "sh %[tmp3], 4(%[pfreq_signal]) \n\t" 178 "sh %[tmp4], 6(%[pfreq_signal]) \n\t" 179 "lh %[tmp1], 8(%[pfrfi]) \n\t" 180 "lh %[tmp2], 10(%[pfrfi]) \n\t" 181 "lh %[tmp3], 12(%[pfrfi]) \n\t" 182 "lh %[tmp4], 14(%[pfrfi]) \n\t" 183 "addiu %[j], %[j], -8 \n\t" 184 "subu %[tmp2], $zero, %[tmp2] \n\t" 185 "sh %[tmp1], 8(%[pfreq_signal]) \n\t" 186 "sh %[tmp2], 10(%[pfreq_signal]) \n\t" 187 "subu %[tmp4], $zero, %[tmp4] \n\t" 188 "sh %[tmp3], 12(%[pfreq_signal]) \n\t" 189 "sh %[tmp4], 14(%[pfreq_signal]) \n\t" 190 "addiu %[pfreq_signal], %[pfreq_signal], 16 \n\t" 191 "bgtz %[j], 1b \n\t" 192 " addiu %[pfrfi], %[pfrfi], 16 \n\t" 193 ".set pop \n\t" 194 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [tmp3] "=&r" (tmp3), 195 [j] "=&r" (j), [pfrfi] "+r" (pfrfi), [pfreq_signal] "+r" (pfreq_signal), 196 [tmp4] "=&r" (tmp4) 197 : 198 : "memory" 199 ); 200 } 201 202 static void InverseFFTAndWindow(AecmCore_t* aecm, 203 int16_t* fft, 204 complex16_t* efw, 205 int16_t* output, 206 const int16_t* nearendClean) { 207 int i, outCFFT; 208 int32_t tmp1, tmp2, tmp3, tmp4, tmp_re, tmp_im; 209 int16_t* pcoefTable_ifft = coefTable_ifft; 210 int16_t* pfft = fft; 211 int16_t* ppfft = fft; 212 complex16_t* pefw = efw; 213 int32_t out_aecm; 214 int16_t* paecm_buf = aecm->outBuf; 215 const int16_t* p_kSqrtHanning = WebRtcAecm_kSqrtHanning; 216 const int16_t* pp_kSqrtHanning = &WebRtcAecm_kSqrtHanning[PART_LEN]; 217 int16_t* output1 = output; 218 219 __asm __volatile ( 220 ".set push \n\t" 221 ".set noreorder \n\t" 222 "addiu %[i], $zero, 64 \n\t" 223 "1: \n\t" 224 "lh %[tmp1], 0(%[pcoefTable_ifft]) \n\t" 225 "lh %[tmp2], 2(%[pcoefTable_ifft]) \n\t" 226 "lh %[tmp_re], 0(%[pefw]) \n\t" 227 "lh %[tmp_im], 2(%[pefw]) \n\t" 228 "addu %[pfft], %[fft], %[tmp2] \n\t" 229 "sh %[tmp_re], 0(%[pfft]) \n\t" 230 "sh %[tmp_im], 2(%[pfft]) \n\t" 231 "addu %[pfft], %[fft], %[tmp1] \n\t" 232 "sh %[tmp_re], 0(%[pfft]) \n\t" 233 "subu %[tmp_im], $zero, %[tmp_im] \n\t" 234 "sh %[tmp_im], 2(%[pfft]) \n\t" 235 "lh %[tmp1], 4(%[pcoefTable_ifft]) \n\t" 236 "lh %[tmp2], 6(%[pcoefTable_ifft]) \n\t" 237 "lh %[tmp_re], 4(%[pefw]) \n\t" 238 "lh %[tmp_im], 6(%[pefw]) \n\t" 239 "addu %[pfft], %[fft], %[tmp2] \n\t" 240 "sh %[tmp_re], 0(%[pfft]) \n\t" 241 "sh %[tmp_im], 2(%[pfft]) \n\t" 242 "addu %[pfft], %[fft], %[tmp1] \n\t" 243 "sh %[tmp_re], 0(%[pfft]) \n\t" 244 "subu %[tmp_im], $zero, %[tmp_im] \n\t" 245 "sh %[tmp_im], 2(%[pfft]) \n\t" 246 "lh %[tmp1], 8(%[pcoefTable_ifft]) \n\t" 247 "lh %[tmp2], 10(%[pcoefTable_ifft]) \n\t" 248 "lh %[tmp_re], 8(%[pefw]) \n\t" 249 "lh %[tmp_im], 10(%[pefw]) \n\t" 250 "addu %[pfft], %[fft], %[tmp2] \n\t" 251 "sh %[tmp_re], 0(%[pfft]) \n\t" 252 "sh %[tmp_im], 2(%[pfft]) \n\t" 253 "addu %[pfft], %[fft], %[tmp1] \n\t" 254 "sh %[tmp_re], 0(%[pfft]) \n\t" 255 "subu %[tmp_im], $zero, %[tmp_im] \n\t" 256 "sh %[tmp_im], 2(%[pfft]) \n\t" 257 "lh %[tmp1], 12(%[pcoefTable_ifft]) \n\t" 258 "lh %[tmp2], 14(%[pcoefTable_ifft]) \n\t" 259 "lh %[tmp_re], 12(%[pefw]) \n\t" 260 "lh %[tmp_im], 14(%[pefw]) \n\t" 261 "addu %[pfft], %[fft], %[tmp2] \n\t" 262 "sh %[tmp_re], 0(%[pfft]) \n\t" 263 "sh %[tmp_im], 2(%[pfft]) \n\t" 264 "addu %[pfft], %[fft], %[tmp1] \n\t" 265 "sh %[tmp_re], 0(%[pfft]) \n\t" 266 "subu %[tmp_im], $zero, %[tmp_im] \n\t" 267 "sh %[tmp_im], 2(%[pfft]) \n\t" 268 "addiu %[pcoefTable_ifft], %[pcoefTable_ifft], 16 \n\t" 269 "addiu %[i], %[i], -4 \n\t" 270 "bgtz %[i], 1b \n\t" 271 " addiu %[pefw], %[pefw], 16 \n\t" 272 ".set pop \n\t" 273 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), 274 [i] "=&r" (i), [tmp_re] "=&r" (tmp_re), [tmp_im] "=&r" (tmp_im), 275 [pefw] "+r" (pefw), [pcoefTable_ifft] "+r" (pcoefTable_ifft), 276 [fft] "+r" (fft) 277 : 278 : "memory" 279 ); 280 281 fft[2] = efw[PART_LEN].real; 282 fft[3] = -efw[PART_LEN].imag; 283 284 outCFFT = WebRtcSpl_ComplexIFFT(fft, PART_LEN_SHIFT, 1); 285 pfft = fft; 286 287 __asm __volatile ( 288 ".set push \n\t" 289 ".set noreorder \n\t" 290 "addiu %[i], $zero, 128 \n\t" 291 "1: \n\t" 292 "lh %[tmp1], 0(%[ppfft]) \n\t" 293 "lh %[tmp2], 4(%[ppfft]) \n\t" 294 "lh %[tmp3], 8(%[ppfft]) \n\t" 295 "lh %[tmp4], 12(%[ppfft]) \n\t" 296 "addiu %[i], %[i], -4 \n\t" 297 "sh %[tmp1], 0(%[pfft]) \n\t" 298 "sh %[tmp2], 2(%[pfft]) \n\t" 299 "sh %[tmp3], 4(%[pfft]) \n\t" 300 "sh %[tmp4], 6(%[pfft]) \n\t" 301 "addiu %[ppfft], %[ppfft], 16 \n\t" 302 "bgtz %[i], 1b \n\t" 303 " addiu %[pfft], %[pfft], 8 \n\t" 304 ".set pop \n\t" 305 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), 306 [i] "=&r" (i), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), 307 [ppfft] "+r" (ppfft) 308 : 309 : "memory" 310 ); 311 312 pfft = fft; 313 out_aecm = (int32_t)(outCFFT - aecm->dfaCleanQDomain); 314 315 __asm __volatile ( 316 ".set push \n\t" 317 ".set noreorder \n\t" 318 "addiu %[i], $zero, 64 \n\t" 319 "11: \n\t" 320 "lh %[tmp1], 0(%[pfft]) \n\t" 321 "lh %[tmp2], 0(%[p_kSqrtHanning]) \n\t" 322 "addiu %[i], %[i], -2 \n\t" 323 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" 324 "lh %[tmp3], 2(%[pfft]) \n\t" 325 "lh %[tmp4], 2(%[p_kSqrtHanning]) \n\t" 326 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" 327 "addiu %[tmp1], %[tmp1], 8192 \n\t" 328 "sra %[tmp1], %[tmp1], 14 \n\t" 329 "addiu %[tmp3], %[tmp3], 8192 \n\t" 330 "sra %[tmp3], %[tmp3], 14 \n\t" 331 "bgez %[out_aecm], 1f \n\t" 332 " negu %[tmp2], %[out_aecm] \n\t" 333 "srav %[tmp1], %[tmp1], %[tmp2] \n\t" 334 "b 2f \n\t" 335 " srav %[tmp3], %[tmp3], %[tmp2] \n\t" 336 "1: \n\t" 337 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" 338 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" 339 "2: \n\t" 340 "lh %[tmp4], 0(%[paecm_buf]) \n\t" 341 "lh %[tmp2], 2(%[paecm_buf]) \n\t" 342 "addu %[tmp3], %[tmp3], %[tmp2] \n\t" 343 "addu %[tmp1], %[tmp1], %[tmp4] \n\t" 344 #if defined(MIPS_DSP_R1_LE) 345 "shll_s.w %[tmp1], %[tmp1], 16 \n\t" 346 "sra %[tmp1], %[tmp1], 16 \n\t" 347 "shll_s.w %[tmp3], %[tmp3], 16 \n\t" 348 "sra %[tmp3], %[tmp3], 16 \n\t" 349 #else // #if defined(MIPS_DSP_R1_LE) 350 "sra %[tmp4], %[tmp1], 31 \n\t" 351 "sra %[tmp2], %[tmp1], 15 \n\t" 352 "beq %[tmp4], %[tmp2], 3f \n\t" 353 " ori %[tmp2], $zero, 0x7fff \n\t" 354 "xor %[tmp1], %[tmp2], %[tmp4] \n\t" 355 "3: \n\t" 356 "sra %[tmp2], %[tmp3], 31 \n\t" 357 "sra %[tmp4], %[tmp3], 15 \n\t" 358 "beq %[tmp2], %[tmp4], 4f \n\t" 359 " ori %[tmp4], $zero, 0x7fff \n\t" 360 "xor %[tmp3], %[tmp4], %[tmp2] \n\t" 361 "4: \n\t" 362 #endif // #if defined(MIPS_DSP_R1_LE) 363 "sh %[tmp1], 0(%[pfft]) \n\t" 364 "sh %[tmp1], 0(%[output1]) \n\t" 365 "sh %[tmp3], 2(%[pfft]) \n\t" 366 "sh %[tmp3], 2(%[output1]) \n\t" 367 "lh %[tmp1], 128(%[pfft]) \n\t" 368 "lh %[tmp2], 0(%[pp_kSqrtHanning]) \n\t" 369 "mul %[tmp1], %[tmp1], %[tmp2] \n\t" 370 "lh %[tmp3], 130(%[pfft]) \n\t" 371 "lh %[tmp4], -2(%[pp_kSqrtHanning]) \n\t" 372 "mul %[tmp3], %[tmp3], %[tmp4] \n\t" 373 "sra %[tmp1], %[tmp1], 14 \n\t" 374 "sra %[tmp3], %[tmp3], 14 \n\t" 375 "bgez %[out_aecm], 5f \n\t" 376 " negu %[tmp2], %[out_aecm] \n\t" 377 "srav %[tmp3], %[tmp3], %[tmp2] \n\t" 378 "b 6f \n\t" 379 " srav %[tmp1], %[tmp1], %[tmp2] \n\t" 380 "5: \n\t" 381 "sllv %[tmp1], %[tmp1], %[out_aecm] \n\t" 382 "sllv %[tmp3], %[tmp3], %[out_aecm] \n\t" 383 "6: \n\t" 384 #if defined(MIPS_DSP_R1_LE) 385 "shll_s.w %[tmp1], %[tmp1], 16 \n\t" 386 "sra %[tmp1], %[tmp1], 16 \n\t" 387 "shll_s.w %[tmp3], %[tmp3], 16 \n\t" 388 "sra %[tmp3], %[tmp3], 16 \n\t" 389 #else // #if defined(MIPS_DSP_R1_LE) 390 "sra %[tmp4], %[tmp1], 31 \n\t" 391 "sra %[tmp2], %[tmp1], 15 \n\t" 392 "beq %[tmp4], %[tmp2], 7f \n\t" 393 " ori %[tmp2], $zero, 0x7fff \n\t" 394 "xor %[tmp1], %[tmp2], %[tmp4] \n\t" 395 "7: \n\t" 396 "sra %[tmp2], %[tmp3], 31 \n\t" 397 "sra %[tmp4], %[tmp3], 15 \n\t" 398 "beq %[tmp2], %[tmp4], 8f \n\t" 399 " ori %[tmp4], $zero, 0x7fff \n\t" 400 "xor %[tmp3], %[tmp4], %[tmp2] \n\t" 401 "8: \n\t" 402 #endif // #if defined(MIPS_DSP_R1_LE) 403 "sh %[tmp1], 0(%[paecm_buf]) \n\t" 404 "sh %[tmp3], 2(%[paecm_buf]) \n\t" 405 "addiu %[output1], %[output1], 4 \n\t" 406 "addiu %[paecm_buf], %[paecm_buf], 4 \n\t" 407 "addiu %[pfft], %[pfft], 4 \n\t" 408 "addiu %[p_kSqrtHanning], %[p_kSqrtHanning], 4 \n\t" 409 "bgtz %[i], 11b \n\t" 410 " addiu %[pp_kSqrtHanning], %[pp_kSqrtHanning], -4 \n\t" 411 ".set pop \n\t" 412 : [tmp1] "=&r" (tmp1), [tmp2] "=&r" (tmp2), [pfft] "+r" (pfft), 413 [output1] "+r" (output1), [tmp3] "=&r" (tmp3), [tmp4] "=&r" (tmp4), 414 [paecm_buf] "+r" (paecm_buf), [i] "=&r" (i), 415 [pp_kSqrtHanning] "+r" (pp_kSqrtHanning), 416 [p_kSqrtHanning] "+r" (p_kSqrtHanning) 417 : [out_aecm] "r" (out_aecm), 418 [WebRtcAecm_kSqrtHanning] "r" (WebRtcAecm_kSqrtHanning) 419 : "hi", "lo","memory" 420 ); 421 422 // Copy the current block to the old position 423 // (aecm->outBuf is shifted elsewhere) 424 memcpy(aecm->xBuf, aecm->xBuf + PART_LEN, sizeof(int16_t) * PART_LEN); 425 memcpy(aecm->dBufNoisy, 426 aecm->dBufNoisy + PART_LEN, 427 sizeof(int16_t) * PART_LEN); 428 if (nearendClean != NULL) { 429 memcpy(aecm->dBufClean, 430 aecm->dBufClean + PART_LEN, 431 sizeof(int16_t) * PART_LEN); 432 } 433 } 434 435 void WebRtcAecm_CalcLinearEnergies_mips(AecmCore_t* aecm, 436 const uint16_t* far_spectrum, 437 int32_t* echo_est, 438 uint32_t* far_energy, 439 uint32_t* echo_energy_adapt, 440 uint32_t* echo_energy_stored) { 441 int i; 442 uint32_t par1 = (*far_energy); 443 uint32_t par2 = (*echo_energy_adapt); 444 uint32_t par3 = (*echo_energy_stored); 445 int16_t* ch_stored_p = &(aecm->channelStored[0]); 446 int16_t* ch_adapt_p = &(aecm->channelAdapt16[0]); 447 uint16_t* spectrum_p = (uint16_t*)(&(far_spectrum[0])); 448 int32_t* echo_p = &(echo_est[0]); 449 int32_t temp0, stored0, echo0, adept0, spectrum0; 450 int32_t stored1, adept1, spectrum1, echo1, temp1; 451 452 // Get energy for the delayed far end signal and estimated 453 // echo using both stored and adapted channels. 454 for (i = 0; i < PART_LEN; i+= 4) { 455 __asm __volatile ( 456 ".set push \n\t" 457 ".set noreorder \n\t" 458 "lh %[stored0], 0(%[ch_stored_p]) \n\t" 459 "lhu %[adept0], 0(%[ch_adapt_p]) \n\t" 460 "lhu %[spectrum0], 0(%[spectrum_p]) \n\t" 461 "lh %[stored1], 2(%[ch_stored_p]) \n\t" 462 "lhu %[adept1], 2(%[ch_adapt_p]) \n\t" 463 "lhu %[spectrum1], 2(%[spectrum_p]) \n\t" 464 "mul %[echo0], %[stored0], %[spectrum0] \n\t" 465 "mul %[temp0], %[adept0], %[spectrum0] \n\t" 466 "mul %[echo1], %[stored1], %[spectrum1] \n\t" 467 "mul %[temp1], %[adept1], %[spectrum1] \n\t" 468 "addu %[par1], %[par1], %[spectrum0] \n\t" 469 "addu %[par1], %[par1], %[spectrum1] \n\t" 470 "addiu %[echo_p], %[echo_p], 16 \n\t" 471 "addu %[par3], %[par3], %[echo0] \n\t" 472 "addu %[par2], %[par2], %[temp0] \n\t" 473 "addu %[par3], %[par3], %[echo1] \n\t" 474 "addu %[par2], %[par2], %[temp1] \n\t" 475 "usw %[echo0], -16(%[echo_p]) \n\t" 476 "usw %[echo1], -12(%[echo_p]) \n\t" 477 "lh %[stored0], 4(%[ch_stored_p]) \n\t" 478 "lhu %[adept0], 4(%[ch_adapt_p]) \n\t" 479 "lhu %[spectrum0], 4(%[spectrum_p]) \n\t" 480 "lh %[stored1], 6(%[ch_stored_p]) \n\t" 481 "lhu %[adept1], 6(%[ch_adapt_p]) \n\t" 482 "lhu %[spectrum1], 6(%[spectrum_p]) \n\t" 483 "mul %[echo0], %[stored0], %[spectrum0] \n\t" 484 "mul %[temp0], %[adept0], %[spectrum0] \n\t" 485 "mul %[echo1], %[stored1], %[spectrum1] \n\t" 486 "mul %[temp1], %[adept1], %[spectrum1] \n\t" 487 "addu %[par1], %[par1], %[spectrum0] \n\t" 488 "addu %[par1], %[par1], %[spectrum1] \n\t" 489 "addiu %[ch_stored_p], %[ch_stored_p], 8 \n\t" 490 "addiu %[ch_adapt_p], %[ch_adapt_p], 8 \n\t" 491 "addiu %[spectrum_p], %[spectrum_p], 8 \n\t" 492 "addu %[par3], %[par3], %[echo0] \n\t" 493 "addu %[par2], %[par2], %[temp0] \n\t" 494 "addu %[par3], %[par3], %[echo1] \n\t" 495 "addu %[par2], %[par2], %[temp1] \n\t" 496 "usw %[echo0], -8(%[echo_p]) \n\t" 497 "usw %[echo1], -4(%[echo_p]) \n\t" 498 ".set pop \n\t" 499 : [temp0] "=&r" (temp0), [stored0] "=&r" (stored0), 500 [adept0] "=&r" (adept0), [spectrum0] "=&r" (spectrum0), 501 [echo0] "=&r" (echo0), [echo_p] "+r" (echo_p), [par3] "+r" (par3), 502 [par1] "+r" (par1), [par2] "+r" (par2), [stored1] "=&r" (stored1), 503 [adept1] "=&r" (adept1), [echo1] "=&r" (echo1), 504 [spectrum1] "=&r" (spectrum1), [temp1] "=&r" (temp1), 505 [ch_stored_p] "+r" (ch_stored_p), [ch_adapt_p] "+r" (ch_adapt_p), 506 [spectrum_p] "+r" (spectrum_p) 507 : 508 : "hi", "lo", "memory" 509 ); 510 } 511 512 echo_est[PART_LEN] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[PART_LEN], 513 far_spectrum[PART_LEN]); 514 par1 += (uint32_t)(far_spectrum[PART_LEN]); 515 par2 += aecm->channelAdapt16[PART_LEN] * far_spectrum[PART_LEN]; 516 par3 += (uint32_t)echo_est[PART_LEN]; 517 518 (*far_energy) = par1; 519 (*echo_energy_adapt) = par2; 520 (*echo_energy_stored) = par3; 521 } 522 523 #if defined(MIPS_DSP_R1_LE) 524 void WebRtcAecm_StoreAdaptiveChannel_mips(AecmCore_t* aecm, 525 const uint16_t* far_spectrum, 526 int32_t* echo_est) { 527 int i; 528 int16_t* temp1; 529 uint16_t* temp8; 530 int32_t temp0, temp2, temp3, temp4, temp5, temp6; 531 int32_t* temp7 = &(echo_est[0]); 532 temp1 = &(aecm->channelStored[0]); 533 temp8 = (uint16_t*)(&far_spectrum[0]); 534 535 // During startup we store the channel every block. 536 memcpy(aecm->channelStored, aecm->channelAdapt16, 537 sizeof(int16_t) * PART_LEN1); 538 // Recalculate echo estimate 539 for (i = 0; i < PART_LEN; i += 4) { 540 __asm __volatile ( 541 "ulw %[temp0], 0(%[temp8]) \n\t" 542 "ulw %[temp2], 0(%[temp1]) \n\t" 543 "ulw %[temp4], 4(%[temp8]) \n\t" 544 "ulw %[temp5], 4(%[temp1]) \n\t" 545 "muleq_s.w.phl %[temp3], %[temp2], %[temp0] \n\t" 546 "muleq_s.w.phr %[temp0], %[temp2], %[temp0] \n\t" 547 "muleq_s.w.phl %[temp6], %[temp5], %[temp4] \n\t" 548 "muleq_s.w.phr %[temp4], %[temp5], %[temp4] \n\t" 549 "addiu %[temp7], %[temp7], 16 \n\t" 550 "addiu %[temp1], %[temp1], 8 \n\t" 551 "addiu %[temp8], %[temp8], 8 \n\t" 552 "sra %[temp3], %[temp3], 1 \n\t" 553 "sra %[temp0], %[temp0], 1 \n\t" 554 "sra %[temp6], %[temp6], 1 \n\t" 555 "sra %[temp4], %[temp4], 1 \n\t" 556 "usw %[temp3], -12(%[temp7]) \n\t" 557 "usw %[temp0], -16(%[temp7]) \n\t" 558 "usw %[temp6], -4(%[temp7]) \n\t" 559 "usw %[temp4], -8(%[temp7]) \n\t" 560 : [temp0] "=&r" (temp0), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), 561 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), 562 [temp1] "+r" (temp1), [temp8] "+r" (temp8), [temp7] "+r" (temp7) 563 : 564 : "hi", "lo", "memory" 565 ); 566 } 567 echo_est[i] = WEBRTC_SPL_MUL_16_U16(aecm->channelStored[i], 568 far_spectrum[i]); 569 } 570 571 void WebRtcAecm_ResetAdaptiveChannel_mips(AecmCore_t* aecm) { 572 int i; 573 int32_t* temp3; 574 int16_t* temp0; 575 int32_t temp1, temp2, temp4, temp5; 576 577 temp0 = &(aecm->channelStored[0]); 578 temp3 = &(aecm->channelAdapt32[0]); 579 580 // The stored channel has a significantly lower MSE than the adaptive one for 581 // two consecutive calculations. Reset the adaptive channel. 582 memcpy(aecm->channelAdapt16, 583 aecm->channelStored, 584 sizeof(int16_t) * PART_LEN1); 585 586 // Restore the W32 channel 587 for (i = 0; i < PART_LEN; i += 4) { 588 __asm __volatile ( 589 "ulw %[temp1], 0(%[temp0]) \n\t" 590 "ulw %[temp4], 4(%[temp0]) \n\t" 591 "preceq.w.phl %[temp2], %[temp1] \n\t" 592 "preceq.w.phr %[temp1], %[temp1] \n\t" 593 "preceq.w.phl %[temp5], %[temp4] \n\t" 594 "preceq.w.phr %[temp4], %[temp4] \n\t" 595 "addiu %[temp0], %[temp0], 8 \n\t" 596 "usw %[temp2], 4(%[temp3]) \n\t" 597 "usw %[temp1], 0(%[temp3]) \n\t" 598 "usw %[temp5], 12(%[temp3]) \n\t" 599 "usw %[temp4], 8(%[temp3]) \n\t" 600 "addiu %[temp3], %[temp3], 16 \n\t" 601 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), 602 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), 603 [temp3] "+r" (temp3), [temp0] "+r" (temp0) 604 : 605 : "memory" 606 ); 607 } 608 609 aecm->channelAdapt32[i] = WEBRTC_SPL_LSHIFT_W32( 610 (int32_t)aecm->channelStored[i], 16); 611 } 612 #endif // #if defined(MIPS_DSP_R1_LE) 613 614 // Transforms a time domain signal into the frequency domain, outputting the 615 // complex valued signal, absolute value and sum of absolute values. 616 // 617 // time_signal [in] Pointer to time domain signal 618 // freq_signal_real [out] Pointer to real part of frequency domain array 619 // freq_signal_imag [out] Pointer to imaginary part of frequency domain 620 // array 621 // freq_signal_abs [out] Pointer to absolute value of frequency domain 622 // array 623 // freq_signal_sum_abs [out] Pointer to the sum of all absolute values in 624 // the frequency domain array 625 // return value The Q-domain of current frequency values 626 // 627 static int TimeToFrequencyDomain(AecmCore_t* aecm, 628 const int16_t* time_signal, 629 complex16_t* freq_signal, 630 uint16_t* freq_signal_abs, 631 uint32_t* freq_signal_sum_abs) 632 { 633 int i = 0; 634 int time_signal_scaling = 0; 635 636 // In fft_buf, +16 for 32-byte alignment. 637 int16_t fft_buf[PART_LEN4 + 16]; 638 int16_t *fft = (int16_t *) (((uintptr_t) fft_buf + 31) & ~31); 639 640 int16_t tmp16no1; 641 #if !defined(MIPS_DSP_R2_LE) 642 int32_t tmp32no1; 643 int32_t tmp32no2; 644 int16_t tmp16no2; 645 #else 646 int32_t tmp32no10, tmp32no11, tmp32no12, tmp32no13; 647 int32_t tmp32no20, tmp32no21, tmp32no22, tmp32no23; 648 int16_t* freqp; 649 uint16_t* freqabsp; 650 uint32_t freqt0, freqt1, freqt2, freqt3; 651 uint32_t freqs; 652 #endif 653 654 #ifdef AECM_DYNAMIC_Q 655 tmp16no1 = WebRtcSpl_MaxAbsValueW16(time_signal, PART_LEN2); 656 time_signal_scaling = WebRtcSpl_NormW16(tmp16no1); 657 #endif 658 659 WindowAndFFT(aecm, fft, time_signal, freq_signal, time_signal_scaling); 660 661 // Extract imaginary and real part, 662 // calculate the magnitude for all frequency bins 663 freq_signal[0].imag = 0; 664 freq_signal[PART_LEN].imag = 0; 665 freq_signal[PART_LEN].real = fft[PART_LEN2]; 666 freq_signal_abs[0] = (uint16_t)WEBRTC_SPL_ABS_W16(freq_signal[0].real); 667 freq_signal_abs[PART_LEN] = (uint16_t)WEBRTC_SPL_ABS_W16( 668 freq_signal[PART_LEN].real); 669 (*freq_signal_sum_abs) = (uint32_t)(freq_signal_abs[0]) + 670 (uint32_t)(freq_signal_abs[PART_LEN]); 671 672 #if !defined(MIPS_DSP_R2_LE) 673 for (i = 1; i < PART_LEN; i++) { 674 if (freq_signal[i].real == 0) 675 { 676 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( 677 freq_signal[i].imag); 678 } 679 else if (freq_signal[i].imag == 0) 680 { 681 freq_signal_abs[i] = (uint16_t)WEBRTC_SPL_ABS_W16( 682 freq_signal[i].real); 683 } 684 else 685 { 686 // Approximation for magnitude of complex fft output 687 // magn = sqrt(real^2 + imag^2) 688 // magn ~= alpha * max(|imag|,|real|) + beta * min(|imag|,|real|) 689 // 690 // The parameters alpha and beta are stored in Q15 691 tmp16no1 = WEBRTC_SPL_ABS_W16(freq_signal[i].real); 692 tmp16no2 = WEBRTC_SPL_ABS_W16(freq_signal[i].imag); 693 tmp32no1 = WEBRTC_SPL_MUL_16_16(tmp16no1, tmp16no1); 694 tmp32no2 = WEBRTC_SPL_MUL_16_16(tmp16no2, tmp16no2); 695 tmp32no2 = WebRtcSpl_AddSatW32(tmp32no1, tmp32no2); 696 tmp32no1 = WebRtcSpl_SqrtFloor(tmp32no2); 697 698 freq_signal_abs[i] = (uint16_t)tmp32no1; 699 } 700 (*freq_signal_sum_abs) += (uint32_t)freq_signal_abs[i]; 701 } 702 #else // #if !defined(MIPS_DSP_R2_LE) 703 freqs = (uint32_t)(freq_signal_abs[0]) + 704 (uint32_t)(freq_signal_abs[PART_LEN]); 705 freqp = &(freq_signal[1].real); 706 707 __asm __volatile ( 708 "lw %[freqt0], 0(%[freqp]) \n\t" 709 "lw %[freqt1], 4(%[freqp]) \n\t" 710 "lw %[freqt2], 8(%[freqp]) \n\t" 711 "mult $ac0, $zero, $zero \n\t" 712 "mult $ac1, $zero, $zero \n\t" 713 "mult $ac2, $zero, $zero \n\t" 714 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" 715 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" 716 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" 717 "addiu %[freqp], %[freqp], 12 \n\t" 718 "extr.w %[tmp32no20], $ac0, 1 \n\t" 719 "extr.w %[tmp32no21], $ac1, 1 \n\t" 720 "extr.w %[tmp32no22], $ac2, 1 \n\t" 721 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), 722 [freqt2] "=&r" (freqt2), [freqp] "+r" (freqp), 723 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), 724 [tmp32no22] "=r" (tmp32no22) 725 : 726 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", "$ac2hi", "$ac2lo" 727 ); 728 729 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); 730 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); 731 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); 732 freq_signal_abs[1] = (uint16_t)tmp32no10; 733 freq_signal_abs[2] = (uint16_t)tmp32no11; 734 freq_signal_abs[3] = (uint16_t)tmp32no12; 735 freqs += (uint32_t)tmp32no10; 736 freqs += (uint32_t)tmp32no11; 737 freqs += (uint32_t)tmp32no12; 738 freqabsp = &(freq_signal_abs[4]); 739 for (i = 4; i < PART_LEN; i+=4) 740 { 741 __asm __volatile ( 742 "ulw %[freqt0], 0(%[freqp]) \n\t" 743 "ulw %[freqt1], 4(%[freqp]) \n\t" 744 "ulw %[freqt2], 8(%[freqp]) \n\t" 745 "ulw %[freqt3], 12(%[freqp]) \n\t" 746 "mult $ac0, $zero, $zero \n\t" 747 "mult $ac1, $zero, $zero \n\t" 748 "mult $ac2, $zero, $zero \n\t" 749 "mult $ac3, $zero, $zero \n\t" 750 "dpaq_s.w.ph $ac0, %[freqt0], %[freqt0] \n\t" 751 "dpaq_s.w.ph $ac1, %[freqt1], %[freqt1] \n\t" 752 "dpaq_s.w.ph $ac2, %[freqt2], %[freqt2] \n\t" 753 "dpaq_s.w.ph $ac3, %[freqt3], %[freqt3] \n\t" 754 "addiu %[freqp], %[freqp], 16 \n\t" 755 "addiu %[freqabsp], %[freqabsp], 8 \n\t" 756 "extr.w %[tmp32no20], $ac0, 1 \n\t" 757 "extr.w %[tmp32no21], $ac1, 1 \n\t" 758 "extr.w %[tmp32no22], $ac2, 1 \n\t" 759 "extr.w %[tmp32no23], $ac3, 1 \n\t" 760 : [freqt0] "=&r" (freqt0), [freqt1] "=&r" (freqt1), 761 [freqt2] "=&r" (freqt2), [freqt3] "=&r" (freqt3), 762 [tmp32no20] "=r" (tmp32no20), [tmp32no21] "=r" (tmp32no21), 763 [tmp32no22] "=r" (tmp32no22), [tmp32no23] "=r" (tmp32no23), 764 [freqabsp] "+r" (freqabsp), [freqp] "+r" (freqp) 765 : 766 : "memory", "hi", "lo", "$ac1hi", "$ac1lo", 767 "$ac2hi", "$ac2lo", "$ac3hi", "$ac3lo" 768 ); 769 770 tmp32no10 = WebRtcSpl_SqrtFloor(tmp32no20); 771 tmp32no11 = WebRtcSpl_SqrtFloor(tmp32no21); 772 tmp32no12 = WebRtcSpl_SqrtFloor(tmp32no22); 773 tmp32no13 = WebRtcSpl_SqrtFloor(tmp32no23); 774 775 __asm __volatile ( 776 "sh %[tmp32no10], -8(%[freqabsp]) \n\t" 777 "sh %[tmp32no11], -6(%[freqabsp]) \n\t" 778 "sh %[tmp32no12], -4(%[freqabsp]) \n\t" 779 "sh %[tmp32no13], -2(%[freqabsp]) \n\t" 780 "addu %[freqs], %[freqs], %[tmp32no10] \n\t" 781 "addu %[freqs], %[freqs], %[tmp32no11] \n\t" 782 "addu %[freqs], %[freqs], %[tmp32no12] \n\t" 783 "addu %[freqs], %[freqs], %[tmp32no13] \n\t" 784 : [freqs] "+r" (freqs) 785 : [tmp32no10] "r" (tmp32no10), [tmp32no11] "r" (tmp32no11), 786 [tmp32no12] "r" (tmp32no12), [tmp32no13] "r" (tmp32no13), 787 [freqabsp] "r" (freqabsp) 788 : "memory" 789 ); 790 } 791 792 (*freq_signal_sum_abs) = freqs; 793 #endif 794 795 return time_signal_scaling; 796 } 797 798 int WebRtcAecm_ProcessBlock(AecmCore_t* aecm, 799 const int16_t* farend, 800 const int16_t* nearendNoisy, 801 const int16_t* nearendClean, 802 int16_t* output) { 803 int i; 804 uint32_t xfaSum; 805 uint32_t dfaNoisySum; 806 uint32_t dfaCleanSum; 807 uint32_t echoEst32Gained; 808 uint32_t tmpU32; 809 int32_t tmp32no1; 810 811 uint16_t xfa[PART_LEN1]; 812 uint16_t dfaNoisy[PART_LEN1]; 813 uint16_t dfaClean[PART_LEN1]; 814 uint16_t* ptrDfaClean = dfaClean; 815 const uint16_t* far_spectrum_ptr = NULL; 816 817 // 32 byte aligned buffers (with +8 or +16). 818 int16_t fft_buf[PART_LEN4 + 2 + 16]; // +2 to make a loop safe. 819 int32_t echoEst32_buf[PART_LEN1 + 8]; 820 int32_t dfw_buf[PART_LEN2 + 8]; 821 int32_t efw_buf[PART_LEN2 + 8]; 822 823 int16_t* fft = (int16_t*)(((uint32_t)fft_buf + 31) & ~ 31); 824 int32_t* echoEst32 = (int32_t*)(((uint32_t)echoEst32_buf + 31) & ~ 31); 825 complex16_t* dfw = (complex16_t*)(((uint32_t)dfw_buf + 31) & ~ 31); 826 complex16_t* efw = (complex16_t*)(((uint32_t)efw_buf + 31) & ~ 31); 827 828 int16_t hnl[PART_LEN1]; 829 int16_t numPosCoef = 0; 830 int delay; 831 int16_t tmp16no1; 832 int16_t tmp16no2; 833 int16_t mu; 834 int16_t supGain; 835 int16_t zeros32, zeros16; 836 int16_t zerosDBufNoisy, zerosDBufClean, zerosXBuf; 837 int far_q; 838 int16_t resolutionDiff, qDomainDiff, dfa_clean_q_domain_diff; 839 840 const int kMinPrefBand = 4; 841 const int kMaxPrefBand = 24; 842 int32_t avgHnl32 = 0; 843 844 int32_t temp1, temp2, temp3, temp4, temp5, temp6, temp7, temp8; 845 int16_t* ptr; 846 int16_t* ptr1; 847 int16_t* er_ptr; 848 int16_t* dr_ptr; 849 850 ptr = &hnl[0]; 851 ptr1 = &hnl[0]; 852 er_ptr = &efw[0].real; 853 dr_ptr = &dfw[0].real; 854 855 // Determine startup state. There are three states: 856 // (0) the first CONV_LEN blocks 857 // (1) another CONV_LEN blocks 858 // (2) the rest 859 860 if (aecm->startupState < 2) { 861 aecm->startupState = (aecm->totCount >= CONV_LEN) + 862 (aecm->totCount >= CONV_LEN2); 863 } 864 // END: Determine startup state 865 866 // Buffer near and far end signals 867 memcpy(aecm->xBuf + PART_LEN, farend, sizeof(int16_t) * PART_LEN); 868 memcpy(aecm->dBufNoisy + PART_LEN, 869 nearendNoisy, 870 sizeof(int16_t) * PART_LEN); 871 if (nearendClean != NULL) { 872 memcpy(aecm->dBufClean + PART_LEN, 873 nearendClean, 874 sizeof(int16_t) * PART_LEN); 875 } 876 877 // Transform far end signal from time domain to frequency domain. 878 far_q = TimeToFrequencyDomain(aecm, 879 aecm->xBuf, 880 dfw, 881 xfa, 882 &xfaSum); 883 884 // Transform noisy near end signal from time domain to frequency domain. 885 zerosDBufNoisy = TimeToFrequencyDomain(aecm, 886 aecm->dBufNoisy, 887 dfw, 888 dfaNoisy, 889 &dfaNoisySum); 890 aecm->dfaNoisyQDomainOld = aecm->dfaNoisyQDomain; 891 aecm->dfaNoisyQDomain = (int16_t)zerosDBufNoisy; 892 893 if (nearendClean == NULL) { 894 ptrDfaClean = dfaNoisy; 895 aecm->dfaCleanQDomainOld = aecm->dfaNoisyQDomainOld; 896 aecm->dfaCleanQDomain = aecm->dfaNoisyQDomain; 897 dfaCleanSum = dfaNoisySum; 898 } else { 899 // Transform clean near end signal from time domain to frequency domain. 900 zerosDBufClean = TimeToFrequencyDomain(aecm, 901 aecm->dBufClean, 902 dfw, 903 dfaClean, 904 &dfaCleanSum); 905 aecm->dfaCleanQDomainOld = aecm->dfaCleanQDomain; 906 aecm->dfaCleanQDomain = (int16_t)zerosDBufClean; 907 } 908 909 // Get the delay 910 // Save far-end history and estimate delay 911 WebRtcAecm_UpdateFarHistory(aecm, xfa, far_q); 912 913 if (WebRtc_AddFarSpectrumFix(aecm->delay_estimator_farend, xfa, PART_LEN1, 914 far_q) == -1) { 915 return -1; 916 } 917 delay = WebRtc_DelayEstimatorProcessFix(aecm->delay_estimator, 918 dfaNoisy, 919 PART_LEN1, 920 zerosDBufNoisy); 921 if (delay == -1) { 922 return -1; 923 } 924 else if (delay == -2) { 925 // If the delay is unknown, we assume zero. 926 // NOTE: this will have to be adjusted if we ever add lookahead. 927 delay = 0; 928 } 929 930 if (aecm->fixedDelay >= 0) { 931 // Use fixed delay 932 delay = aecm->fixedDelay; 933 } 934 935 // Get aligned far end spectrum 936 far_spectrum_ptr = WebRtcAecm_AlignedFarend(aecm, &far_q, delay); 937 zerosXBuf = (int16_t) far_q; 938 939 if (far_spectrum_ptr == NULL) { 940 return -1; 941 } 942 943 // Calculate log(energy) and update energy threshold levels 944 WebRtcAecm_CalcEnergies(aecm, 945 far_spectrum_ptr, 946 zerosXBuf, 947 dfaNoisySum, 948 echoEst32); 949 // Calculate stepsize 950 mu = WebRtcAecm_CalcStepSize(aecm); 951 952 // Update counters 953 aecm->totCount++; 954 955 // This is the channel estimation algorithm. 956 // It is base on NLMS but has a variable step length, 957 // which was calculated above. 958 WebRtcAecm_UpdateChannel(aecm, 959 far_spectrum_ptr, 960 zerosXBuf, 961 dfaNoisy, 962 mu, 963 echoEst32); 964 965 supGain = WebRtcAecm_CalcSuppressionGain(aecm); 966 967 // Calculate Wiener filter hnl[] 968 for (i = 0; i < PART_LEN1; i++) { 969 // Far end signal through channel estimate in Q8 970 // How much can we shift right to preserve resolution 971 tmp32no1 = echoEst32[i] - aecm->echoFilt[i]; 972 aecm->echoFilt[i] += (tmp32no1 * 50) >> 8; 973 974 zeros32 = WebRtcSpl_NormW32(aecm->echoFilt[i]) + 1; 975 zeros16 = WebRtcSpl_NormW16(supGain) + 1; 976 if (zeros32 + zeros16 > 16) { 977 // Multiplication is safe 978 // Result in 979 // Q(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN+aecm->xfaQDomainBuf[diff]) 980 echoEst32Gained = WEBRTC_SPL_UMUL_32_16((uint32_t)aecm->echoFilt[i], 981 (uint16_t)supGain); 982 resolutionDiff = 14 - RESOLUTION_CHANNEL16 - RESOLUTION_SUPGAIN; 983 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); 984 } else { 985 tmp16no1 = 17 - zeros32 - zeros16; 986 resolutionDiff = 14 + tmp16no1 - RESOLUTION_CHANNEL16 - 987 RESOLUTION_SUPGAIN; 988 resolutionDiff += (aecm->dfaCleanQDomain - zerosXBuf); 989 if (zeros32 > tmp16no1) { 990 echoEst32Gained = WEBRTC_SPL_UMUL_32_16( 991 (uint32_t)aecm->echoFilt[i], 992 (uint16_t)WEBRTC_SPL_RSHIFT_W16(supGain, tmp16no1)); 993 } else { 994 // Result in Q-(RESOLUTION_CHANNEL+RESOLUTION_SUPGAIN-16) 995 echoEst32Gained = WEBRTC_SPL_UMUL_32_16( 996 (uint32_t)WEBRTC_SPL_RSHIFT_W32(aecm->echoFilt[i], 997 tmp16no1), 998 (uint16_t)supGain); 999 } 1000 } 1001 1002 zeros16 = WebRtcSpl_NormW16(aecm->nearFilt[i]); 1003 assert(zeros16 >= 0); // |zeros16| is a norm, hence non-negative. 1004 dfa_clean_q_domain_diff = aecm->dfaCleanQDomain - aecm->dfaCleanQDomainOld; 1005 if (zeros16 < dfa_clean_q_domain_diff && aecm->nearFilt[i]) { 1006 tmp16no1 = aecm->nearFilt[i] << zeros16; 1007 qDomainDiff = zeros16 - dfa_clean_q_domain_diff; 1008 tmp16no2 = ptrDfaClean[i] >> -qDomainDiff; 1009 } else { 1010 tmp16no1 = dfa_clean_q_domain_diff < 0 1011 ? aecm->nearFilt[i] >> -dfa_clean_q_domain_diff 1012 : aecm->nearFilt[i] << dfa_clean_q_domain_diff; 1013 qDomainDiff = 0; 1014 tmp16no2 = ptrDfaClean[i]; 1015 } 1016 1017 tmp32no1 = (int32_t)(tmp16no2 - tmp16no1); 1018 tmp16no2 = (int16_t)WEBRTC_SPL_RSHIFT_W32(tmp32no1, 4); 1019 tmp16no2 += tmp16no1; 1020 zeros16 = WebRtcSpl_NormW16(tmp16no2); 1021 if ((tmp16no2) & (-qDomainDiff > zeros16)) { 1022 aecm->nearFilt[i] = WEBRTC_SPL_WORD16_MAX; 1023 } else { 1024 aecm->nearFilt[i] = qDomainDiff < 0 ? tmp16no2 << -qDomainDiff 1025 : tmp16no2 >> qDomainDiff; 1026 } 1027 1028 // Wiener filter coefficients, resulting hnl in Q14 1029 if (echoEst32Gained == 0) { 1030 hnl[i] = ONE_Q14; 1031 numPosCoef++; 1032 } else if (aecm->nearFilt[i] == 0) { 1033 hnl[i] = 0; 1034 } else { 1035 // Multiply the suppression gain 1036 // Rounding 1037 echoEst32Gained += (uint32_t)(aecm->nearFilt[i] >> 1); 1038 tmpU32 = WebRtcSpl_DivU32U16(echoEst32Gained, 1039 (uint16_t)aecm->nearFilt[i]); 1040 1041 // Current resolution is 1042 // Q-(RESOLUTION_CHANNEL + RESOLUTION_SUPGAIN 1043 // - max(0, 17 - zeros16 - zeros32)) 1044 // Make sure we are in Q14 1045 tmp32no1 = (int32_t)WEBRTC_SPL_SHIFT_W32(tmpU32, resolutionDiff); 1046 if (tmp32no1 > ONE_Q14) { 1047 hnl[i] = 0; 1048 } else if (tmp32no1 < 0) { 1049 hnl[i] = ONE_Q14; 1050 numPosCoef++; 1051 } else { 1052 // 1-echoEst/dfa 1053 hnl[i] = ONE_Q14 - (int16_t)tmp32no1; 1054 if (hnl[i] <= 0) { 1055 hnl[i] = 0; 1056 } else { 1057 numPosCoef++; 1058 } 1059 } 1060 } 1061 } 1062 1063 // Only in wideband. Prevent the gain in upper band from being larger than 1064 // in lower band. 1065 if (aecm->mult == 2) { 1066 // TODO(bjornv): Investigate if the scaling of hnl[i] below can cause 1067 // speech distortion in double-talk. 1068 for (i = 0; i < (PART_LEN1 >> 3); i++) { 1069 __asm __volatile ( 1070 "lh %[temp1], 0(%[ptr1]) \n\t" 1071 "lh %[temp2], 2(%[ptr1]) \n\t" 1072 "lh %[temp3], 4(%[ptr1]) \n\t" 1073 "lh %[temp4], 6(%[ptr1]) \n\t" 1074 "lh %[temp5], 8(%[ptr1]) \n\t" 1075 "lh %[temp6], 10(%[ptr1]) \n\t" 1076 "lh %[temp7], 12(%[ptr1]) \n\t" 1077 "lh %[temp8], 14(%[ptr1]) \n\t" 1078 "mul %[temp1], %[temp1], %[temp1] \n\t" 1079 "mul %[temp2], %[temp2], %[temp2] \n\t" 1080 "mul %[temp3], %[temp3], %[temp3] \n\t" 1081 "mul %[temp4], %[temp4], %[temp4] \n\t" 1082 "mul %[temp5], %[temp5], %[temp5] \n\t" 1083 "mul %[temp6], %[temp6], %[temp6] \n\t" 1084 "mul %[temp7], %[temp7], %[temp7] \n\t" 1085 "mul %[temp8], %[temp8], %[temp8] \n\t" 1086 "sra %[temp1], %[temp1], 14 \n\t" 1087 "sra %[temp2], %[temp2], 14 \n\t" 1088 "sra %[temp3], %[temp3], 14 \n\t" 1089 "sra %[temp4], %[temp4], 14 \n\t" 1090 "sra %[temp5], %[temp5], 14 \n\t" 1091 "sra %[temp6], %[temp6], 14 \n\t" 1092 "sra %[temp7], %[temp7], 14 \n\t" 1093 "sra %[temp8], %[temp8], 14 \n\t" 1094 "sh %[temp1], 0(%[ptr1]) \n\t" 1095 "sh %[temp2], 2(%[ptr1]) \n\t" 1096 "sh %[temp3], 4(%[ptr1]) \n\t" 1097 "sh %[temp4], 6(%[ptr1]) \n\t" 1098 "sh %[temp5], 8(%[ptr1]) \n\t" 1099 "sh %[temp6], 10(%[ptr1]) \n\t" 1100 "sh %[temp7], 12(%[ptr1]) \n\t" 1101 "sh %[temp8], 14(%[ptr1]) \n\t" 1102 "addiu %[ptr1], %[ptr1], 16 \n\t" 1103 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), 1104 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [temp6] "=&r" (temp6), 1105 [temp7] "=&r" (temp7), [temp8] "=&r" (temp8), [ptr1] "+r" (ptr1) 1106 : 1107 : "memory", "hi", "lo" 1108 ); 1109 } 1110 for(i = 0; i < (PART_LEN1 & 7); i++) { 1111 __asm __volatile ( 1112 "lh %[temp1], 0(%[ptr1]) \n\t" 1113 "mul %[temp1], %[temp1], %[temp1] \n\t" 1114 "sra %[temp1], %[temp1], 14 \n\t" 1115 "sh %[temp1], 0(%[ptr1]) \n\t" 1116 "addiu %[ptr1], %[ptr1], 2 \n\t" 1117 : [temp1] "=&r" (temp1), [ptr1] "+r" (ptr1) 1118 : 1119 : "memory", "hi", "lo" 1120 ); 1121 } 1122 1123 for (i = kMinPrefBand; i <= kMaxPrefBand; i++) { 1124 avgHnl32 += (int32_t)hnl[i]; 1125 } 1126 1127 assert(kMaxPrefBand - kMinPrefBand + 1 > 0); 1128 avgHnl32 /= (kMaxPrefBand - kMinPrefBand + 1); 1129 1130 for (i = kMaxPrefBand; i < PART_LEN1; i++) { 1131 if (hnl[i] > (int16_t)avgHnl32) { 1132 hnl[i] = (int16_t)avgHnl32; 1133 } 1134 } 1135 } 1136 1137 // Calculate NLP gain, result is in Q14 1138 if (aecm->nlpFlag) { 1139 if (numPosCoef < 3) { 1140 for (i = 0; i < PART_LEN1; i++) { 1141 efw[i].real = 0; 1142 efw[i].imag = 0; 1143 hnl[i] = 0; 1144 } 1145 } else { 1146 for (i = 0; i < PART_LEN1; i++) { 1147 #if defined(MIPS_DSP_R1_LE) 1148 __asm __volatile ( 1149 ".set push \n\t" 1150 ".set noreorder \n\t" 1151 "lh %[temp1], 0(%[ptr]) \n\t" 1152 "lh %[temp2], 0(%[dr_ptr]) \n\t" 1153 "slti %[temp4], %[temp1], 0x4001 \n\t" 1154 "beqz %[temp4], 3f \n\t" 1155 " lh %[temp3], 2(%[dr_ptr]) \n\t" 1156 "slti %[temp5], %[temp1], 3277 \n\t" 1157 "bnez %[temp5], 2f \n\t" 1158 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" 1159 "mul %[temp2], %[temp2], %[temp1] \n\t" 1160 "mul %[temp3], %[temp3], %[temp1] \n\t" 1161 "shra_r.w %[temp2], %[temp2], 14 \n\t" 1162 "shra_r.w %[temp3], %[temp3], 14 \n\t" 1163 "b 4f \n\t" 1164 " nop \n\t" 1165 "2: \n\t" 1166 "addu %[temp1], $zero, $zero \n\t" 1167 "addu %[temp2], $zero, $zero \n\t" 1168 "addu %[temp3], $zero, $zero \n\t" 1169 "b 1f \n\t" 1170 " nop \n\t" 1171 "3: \n\t" 1172 "addiu %[temp1], $0, 0x4000 \n\t" 1173 "1: \n\t" 1174 "sh %[temp1], 0(%[ptr]) \n\t" 1175 "4: \n\t" 1176 "sh %[temp2], 0(%[er_ptr]) \n\t" 1177 "sh %[temp3], 2(%[er_ptr]) \n\t" 1178 "addiu %[ptr], %[ptr], 2 \n\t" 1179 "addiu %[er_ptr], %[er_ptr], 4 \n\t" 1180 ".set pop \n\t" 1181 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), 1182 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), 1183 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) 1184 : 1185 : "memory", "hi", "lo" 1186 ); 1187 #else 1188 __asm __volatile ( 1189 ".set push \n\t" 1190 ".set noreorder \n\t" 1191 "lh %[temp1], 0(%[ptr]) \n\t" 1192 "lh %[temp2], 0(%[dr_ptr]) \n\t" 1193 "slti %[temp4], %[temp1], 0x4001 \n\t" 1194 "beqz %[temp4], 3f \n\t" 1195 " lh %[temp3], 2(%[dr_ptr]) \n\t" 1196 "slti %[temp5], %[temp1], 3277 \n\t" 1197 "bnez %[temp5], 2f \n\t" 1198 " addiu %[dr_ptr], %[dr_ptr], 4 \n\t" 1199 "mul %[temp2], %[temp2], %[temp1] \n\t" 1200 "mul %[temp3], %[temp3], %[temp1] \n\t" 1201 "addiu %[temp2], %[temp2], 0x2000 \n\t" 1202 "addiu %[temp3], %[temp3], 0x2000 \n\t" 1203 "sra %[temp2], %[temp2], 14 \n\t" 1204 "sra %[temp3], %[temp3], 14 \n\t" 1205 "b 4f \n\t" 1206 " nop \n\t" 1207 "2: \n\t" 1208 "addu %[temp1], $zero, $zero \n\t" 1209 "addu %[temp2], $zero, $zero \n\t" 1210 "addu %[temp3], $zero, $zero \n\t" 1211 "b 1f \n\t" 1212 " nop \n\t" 1213 "3: \n\t" 1214 "addiu %[temp1], $0, 0x4000 \n\t" 1215 "1: \n\t" 1216 "sh %[temp1], 0(%[ptr]) \n\t" 1217 "4: \n\t" 1218 "sh %[temp2], 0(%[er_ptr]) \n\t" 1219 "sh %[temp3], 2(%[er_ptr]) \n\t" 1220 "addiu %[ptr], %[ptr], 2 \n\t" 1221 "addiu %[er_ptr], %[er_ptr], 4 \n\t" 1222 ".set pop \n\t" 1223 : [temp1] "=&r" (temp1), [temp2] "=&r" (temp2), [temp3] "=&r" (temp3), 1224 [temp4] "=&r" (temp4), [temp5] "=&r" (temp5), [ptr] "+r" (ptr), 1225 [er_ptr] "+r" (er_ptr), [dr_ptr] "+r" (dr_ptr) 1226 : 1227 : "memory", "hi", "lo" 1228 ); 1229 #endif 1230 } 1231 } 1232 } 1233 else { 1234 // multiply with Wiener coefficients 1235 for (i = 0; i < PART_LEN1; i++) { 1236 efw[i].real = (int16_t) 1237 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].real, 1238 hnl[i], 1239 14)); 1240 efw[i].imag = (int16_t) 1241 (WEBRTC_SPL_MUL_16_16_RSFT_WITH_ROUND(dfw[i].imag, 1242 hnl[i], 1243 14)); 1244 } 1245 } 1246 1247 if (aecm->cngMode == AecmTrue) { 1248 ComfortNoise(aecm, ptrDfaClean, efw, hnl); 1249 } 1250 1251 InverseFFTAndWindow(aecm, fft, efw, output, nearendClean); 1252 1253 return 0; 1254 } 1255 1256 // Generate comfort noise and add to output signal. 1257 static void ComfortNoise(AecmCore_t* aecm, 1258 const uint16_t* dfa, 1259 complex16_t* out, 1260 const int16_t* lambda) { 1261 int16_t i; 1262 int16_t tmp16, tmp161, tmp162, tmp163, nrsh1, nrsh2; 1263 int32_t tmp32, tmp321, tnoise, tnoise1; 1264 int32_t tmp322, tmp323, *tmp1; 1265 int16_t* dfap; 1266 int16_t* lambdap; 1267 const int32_t c2049 = 2049; 1268 const int32_t c359 = 359; 1269 const int32_t c114 = ONE_Q14; 1270 1271 int16_t randW16[PART_LEN]; 1272 int16_t uReal[PART_LEN1]; 1273 int16_t uImag[PART_LEN1]; 1274 int32_t outLShift32; 1275 1276 int16_t shiftFromNearToNoise = kNoiseEstQDomain - aecm->dfaCleanQDomain; 1277 int16_t minTrackShift = 9; 1278 1279 assert(shiftFromNearToNoise >= 0); 1280 assert(shiftFromNearToNoise < 16); 1281 1282 if (aecm->noiseEstCtr < 100) { 1283 // Track the minimum more quickly initially. 1284 aecm->noiseEstCtr++; 1285 minTrackShift = 6; 1286 } 1287 1288 // Generate a uniform random array on [0 2^15-1]. 1289 WebRtcSpl_RandUArray(randW16, PART_LEN, &aecm->seed); 1290 int16_t* randW16p = (int16_t*)randW16; 1291 #if defined (MIPS_DSP_R1_LE) 1292 int16_t* kCosTablep = (int16_t*)WebRtcAecm_kCosTable; 1293 int16_t* kSinTablep = (int16_t*)WebRtcAecm_kSinTable; 1294 #endif // #if defined(MIPS_DSP_R1_LE) 1295 tmp1 = (int32_t*)aecm->noiseEst + 1; 1296 dfap = (int16_t*)dfa + 1; 1297 lambdap = (int16_t*)lambda + 1; 1298 // Estimate noise power. 1299 for (i = 1; i < PART_LEN1; i+=2) { 1300 // Shift to the noise domain. 1301 __asm __volatile ( 1302 "lh %[tmp32], 0(%[dfap]) \n\t" 1303 "lw %[tnoise], 0(%[tmp1]) \n\t" 1304 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" 1305 : [tmp32] "=&r" (tmp32), [outLShift32] "=r" (outLShift32), 1306 [tnoise] "=&r" (tnoise) 1307 : [tmp1] "r" (tmp1), [dfap] "r" (dfap), 1308 [shiftFromNearToNoise] "r" (shiftFromNearToNoise) 1309 : "memory" 1310 ); 1311 1312 if (outLShift32 < tnoise) { 1313 // Reset "too low" counter 1314 aecm->noiseEstTooLowCtr[i] = 0; 1315 // Track the minimum. 1316 if (tnoise < (1 << minTrackShift)) { 1317 // For small values, decrease noiseEst[i] every 1318 // |kNoiseEstIncCount| block. The regular approach below can not 1319 // go further down due to truncation. 1320 aecm->noiseEstTooHighCtr[i]++; 1321 if (aecm->noiseEstTooHighCtr[i] >= kNoiseEstIncCount) { 1322 tnoise--; 1323 aecm->noiseEstTooHighCtr[i] = 0; // Reset the counter 1324 } 1325 } else { 1326 __asm __volatile ( 1327 "subu %[tmp32], %[tnoise], %[outLShift32] \n\t" 1328 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" 1329 "subu %[tnoise], %[tnoise], %[tmp32] \n\t" 1330 : [tmp32] "=&r" (tmp32), [tnoise] "+r" (tnoise) 1331 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) 1332 ); 1333 } 1334 } else { 1335 // Reset "too high" counter 1336 aecm->noiseEstTooHighCtr[i] = 0; 1337 // Ramp slowly upwards until we hit the minimum again. 1338 if ((tnoise >> 19) <= 0) { 1339 if ((tnoise >> 11) > 0) { 1340 // Large enough for relative increase 1341 __asm __volatile ( 1342 "mul %[tnoise], %[tnoise], %[c2049] \n\t" 1343 "sra %[tnoise], %[tnoise], 11 \n\t" 1344 : [tnoise] "+r" (tnoise) 1345 : [c2049] "r" (c2049) 1346 : "hi", "lo" 1347 ); 1348 } else { 1349 // Make incremental increases based on size every 1350 // |kNoiseEstIncCount| block 1351 aecm->noiseEstTooLowCtr[i]++; 1352 if (aecm->noiseEstTooLowCtr[i] >= kNoiseEstIncCount) { 1353 __asm __volatile ( 1354 "sra %[tmp32], %[tnoise], 9 \n\t" 1355 "addi %[tnoise], %[tnoise], 1 \n\t" 1356 "addu %[tnoise], %[tnoise], %[tmp32] \n\t" 1357 : [tnoise] "+r" (tnoise), [tmp32] "=&r" (tmp32) 1358 : 1359 ); 1360 aecm->noiseEstTooLowCtr[i] = 0; // Reset counter 1361 } 1362 } 1363 } else { 1364 // Avoid overflow. 1365 // Multiplication with 2049 will cause wrap around. Scale 1366 // down first and then multiply 1367 __asm __volatile ( 1368 "sra %[tnoise], %[tnoise], 11 \n\t" 1369 "mul %[tnoise], %[tnoise], %[c2049] \n\t" 1370 : [tnoise] "+r" (tnoise) 1371 : [c2049] "r" (c2049) 1372 : "hi", "lo" 1373 ); 1374 } 1375 } 1376 1377 // Shift to the noise domain. 1378 __asm __volatile ( 1379 "lh %[tmp32], 2(%[dfap]) \n\t" 1380 "lw %[tnoise1], 4(%[tmp1]) \n\t" 1381 "addiu %[dfap], %[dfap], 4 \n\t" 1382 "sllv %[outLShift32], %[tmp32], %[shiftFromNearToNoise] \n\t" 1383 : [tmp32] "=&r" (tmp32), [dfap] "+r" (dfap), 1384 [outLShift32] "=r" (outLShift32), [tnoise1] "=&r" (tnoise1) 1385 : [tmp1] "r" (tmp1), [shiftFromNearToNoise] "r" (shiftFromNearToNoise) 1386 : "memory" 1387 ); 1388 1389 if (outLShift32 < tnoise1) { 1390 // Reset "too low" counter 1391 aecm->noiseEstTooLowCtr[i + 1] = 0; 1392 // Track the minimum. 1393 if (tnoise1 < (1 << minTrackShift)) { 1394 // For small values, decrease noiseEst[i] every 1395 // |kNoiseEstIncCount| block. The regular approach below can not 1396 // go further down due to truncation. 1397 aecm->noiseEstTooHighCtr[i + 1]++; 1398 if (aecm->noiseEstTooHighCtr[i + 1] >= kNoiseEstIncCount) { 1399 tnoise1--; 1400 aecm->noiseEstTooHighCtr[i + 1] = 0; // Reset the counter 1401 } 1402 } else { 1403 __asm __volatile ( 1404 "subu %[tmp32], %[tnoise1], %[outLShift32] \n\t" 1405 "srav %[tmp32], %[tmp32], %[minTrackShift] \n\t" 1406 "subu %[tnoise1], %[tnoise1], %[tmp32] \n\t" 1407 : [tmp32] "=&r" (tmp32), [tnoise1] "+r" (tnoise1) 1408 : [outLShift32] "r" (outLShift32), [minTrackShift] "r" (minTrackShift) 1409 ); 1410 } 1411 } else { 1412 // Reset "too high" counter 1413 aecm->noiseEstTooHighCtr[i + 1] = 0; 1414 // Ramp slowly upwards until we hit the minimum again. 1415 if ((tnoise1 >> 19) <= 0) { 1416 if ((tnoise1 >> 11) > 0) { 1417 // Large enough for relative increase 1418 __asm __volatile ( 1419 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" 1420 "sra %[tnoise1], %[tnoise1], 11 \n\t" 1421 : [tnoise1] "+r" (tnoise1) 1422 : [c2049] "r" (c2049) 1423 : "hi", "lo" 1424 ); 1425 } else { 1426 // Make incremental increases based on size every 1427 // |kNoiseEstIncCount| block 1428 aecm->noiseEstTooLowCtr[i + 1]++; 1429 if (aecm->noiseEstTooLowCtr[i + 1] >= kNoiseEstIncCount) { 1430 __asm __volatile ( 1431 "sra %[tmp32], %[tnoise1], 9 \n\t" 1432 "addi %[tnoise1], %[tnoise1], 1 \n\t" 1433 "addu %[tnoise1], %[tnoise1], %[tmp32] \n\t" 1434 : [tnoise1] "+r" (tnoise1), [tmp32] "=&r" (tmp32) 1435 : 1436 ); 1437 aecm->noiseEstTooLowCtr[i + 1] = 0; // Reset counter 1438 } 1439 } 1440 } else { 1441 // Avoid overflow. 1442 // Multiplication with 2049 will cause wrap around. Scale 1443 // down first and then multiply 1444 __asm __volatile ( 1445 "sra %[tnoise1], %[tnoise1], 11 \n\t" 1446 "mul %[tnoise1], %[tnoise1], %[c2049] \n\t" 1447 : [tnoise1] "+r" (tnoise1) 1448 : [c2049] "r" (c2049) 1449 : "hi", "lo" 1450 ); 1451 } 1452 } 1453 1454 __asm __volatile ( 1455 "lh %[tmp16], 0(%[lambdap]) \n\t" 1456 "lh %[tmp161], 2(%[lambdap]) \n\t" 1457 "sw %[tnoise], 0(%[tmp1]) \n\t" 1458 "sw %[tnoise1], 4(%[tmp1]) \n\t" 1459 "subu %[tmp16], %[c114], %[tmp16] \n\t" 1460 "subu %[tmp161], %[c114], %[tmp161] \n\t" 1461 "srav %[tmp32], %[tnoise], %[shiftFromNearToNoise] \n\t" 1462 "srav %[tmp321], %[tnoise1], %[shiftFromNearToNoise] \n\t" 1463 "addiu %[lambdap], %[lambdap], 4 \n\t" 1464 "addiu %[tmp1], %[tmp1], 8 \n\t" 1465 : [tmp16] "=&r" (tmp16), [tmp161] "=&r" (tmp161), [tmp1] "+r" (tmp1), 1466 [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), [lambdap] "+r" (lambdap) 1467 : [tnoise] "r" (tnoise), [tnoise1] "r" (tnoise1), [c114] "r" (c114), 1468 [shiftFromNearToNoise] "r" (shiftFromNearToNoise) 1469 : "memory" 1470 ); 1471 1472 if (tmp32 > 32767) { 1473 tmp32 = 32767; 1474 aecm->noiseEst[i] = WEBRTC_SPL_LSHIFT_W32(tmp32, shiftFromNearToNoise); 1475 } 1476 if (tmp321 > 32767) { 1477 tmp321 = 32767; 1478 aecm->noiseEst[i+1] = WEBRTC_SPL_LSHIFT_W32(tmp321, shiftFromNearToNoise); 1479 } 1480 1481 __asm __volatile ( 1482 "mul %[tmp32], %[tmp32], %[tmp16] \n\t" 1483 "mul %[tmp321], %[tmp321], %[tmp161] \n\t" 1484 "sra %[nrsh1], %[tmp32], 14 \n\t" 1485 "sra %[nrsh2], %[tmp321], 14 \n\t" 1486 : [nrsh1] "=&r" (nrsh1), [nrsh2] "=r" (nrsh2) 1487 : [tmp16] "r" (tmp16), [tmp161] "r" (tmp161), [tmp32] "r" (tmp32), 1488 [tmp321] "r" (tmp321) 1489 : "memory", "hi", "lo" 1490 ); 1491 1492 __asm __volatile ( 1493 "lh %[tmp32], 0(%[randW16p]) \n\t" 1494 "lh %[tmp321], 2(%[randW16p]) \n\t" 1495 "addiu %[randW16p], %[randW16p], 4 \n\t" 1496 "mul %[tmp32], %[tmp32], %[c359] \n\t" 1497 "mul %[tmp321], %[tmp321], %[c359] \n\t" 1498 "sra %[tmp16], %[tmp32], 15 \n\t" 1499 "sra %[tmp161], %[tmp321], 15 \n\t" 1500 : [randW16p] "+r" (randW16p), [tmp32] "=&r" (tmp32), 1501 [tmp16] "=r" (tmp16), [tmp161] "=r" (tmp161), [tmp321] "=&r" (tmp321) 1502 : [c359] "r" (c359) 1503 : "memory", "hi", "lo" 1504 ); 1505 1506 #if !defined(MIPS_DSP_R1_LE) 1507 tmp32 = WebRtcAecm_kCosTable[tmp16]; 1508 tmp321 = WebRtcAecm_kSinTable[tmp16]; 1509 tmp322 = WebRtcAecm_kCosTable[tmp161]; 1510 tmp323 = WebRtcAecm_kSinTable[tmp161]; 1511 #else 1512 __asm __volatile ( 1513 "sll %[tmp16], %[tmp16], 1 \n\t" 1514 "sll %[tmp161], %[tmp161], 1 \n\t" 1515 "lhx %[tmp32], %[tmp16](%[kCosTablep]) \n\t" 1516 "lhx %[tmp321], %[tmp16](%[kSinTablep]) \n\t" 1517 "lhx %[tmp322], %[tmp161](%[kCosTablep]) \n\t" 1518 "lhx %[tmp323], %[tmp161](%[kSinTablep]) \n\t" 1519 : [tmp32] "=&r" (tmp32), [tmp321] "=&r" (tmp321), 1520 [tmp322] "=&r" (tmp322), [tmp323] "=&r" (tmp323) 1521 : [kCosTablep] "r" (kCosTablep), [tmp16] "r" (tmp16), 1522 [tmp161] "r" (tmp161), [kSinTablep] "r" (kSinTablep) 1523 : "memory" 1524 ); 1525 #endif 1526 __asm __volatile ( 1527 "mul %[tmp32], %[tmp32], %[nrsh1] \n\t" 1528 "negu %[tmp162], %[nrsh1] \n\t" 1529 "mul %[tmp322], %[tmp322], %[nrsh2] \n\t" 1530 "negu %[tmp163], %[nrsh2] \n\t" 1531 "sra %[tmp32], %[tmp32], 13 \n\t" 1532 "mul %[tmp321], %[tmp321], %[tmp162] \n\t" 1533 "sra %[tmp322], %[tmp322], 13 \n\t" 1534 "mul %[tmp323], %[tmp323], %[tmp163] \n\t" 1535 "sra %[tmp321], %[tmp321], 13 \n\t" 1536 "sra %[tmp323], %[tmp323], 13 \n\t" 1537 : [tmp32] "+r" (tmp32), [tmp321] "+r" (tmp321), [tmp162] "=&r" (tmp162), 1538 [tmp322] "+r" (tmp322), [tmp323] "+r" (tmp323), [tmp163] "=&r" (tmp163) 1539 : [nrsh1] "r" (nrsh1), [nrsh2] "r" (nrsh2) 1540 : "hi", "lo" 1541 ); 1542 // Tables are in Q13. 1543 uReal[i] = (int16_t)tmp32; 1544 uImag[i] = (int16_t)tmp321; 1545 uReal[i + 1] = (int16_t)tmp322; 1546 uImag[i + 1] = (int16_t)tmp323; 1547 } 1548 1549 int32_t tt, sgn; 1550 tt = out[0].real; 1551 sgn = ((int)tt) >> 31; 1552 out[0].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); 1553 tt = out[0].imag; 1554 sgn = ((int)tt) >> 31; 1555 out[0].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); 1556 for (i = 1; i < PART_LEN; i++) { 1557 tt = out[i].real + uReal[i]; 1558 sgn = ((int)tt) >> 31; 1559 out[i].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); 1560 tt = out[i].imag + uImag[i]; 1561 sgn = ((int)tt) >> 31; 1562 out[i].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); 1563 } 1564 tt = out[PART_LEN].real + uReal[PART_LEN]; 1565 sgn = ((int)tt) >> 31; 1566 out[PART_LEN].real = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); 1567 tt = out[PART_LEN].imag; 1568 sgn = ((int)tt) >> 31; 1569 out[PART_LEN].imag = sgn == (int16_t)(tt >> 15) ? (int16_t)tt : (16384 ^ sgn); 1570 } 1571 1572