1 /* 2 * Copyright (c) 2011 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * The core AEC algorithm, which is presented with time-aligned signals. 13 */ 14 15 #include "aec_core.h" 16 17 #include <assert.h> 18 #include <math.h> 19 #include <stddef.h> // size_t 20 #include <stdlib.h> 21 #include <string.h> 22 23 #include "aec_rdft.h" 24 #include "delay_estimator_wrapper.h" 25 #include "ring_buffer.h" 26 #include "system_wrappers/interface/cpu_features_wrapper.h" 27 #include "typedefs.h" 28 29 // Buffer size (samples) 30 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. 31 32 // Noise suppression 33 static const int converged = 250; 34 35 // Metrics 36 static const int subCountLen = 4; 37 static const int countLen = 50; 38 39 // Quantities to control H band scaling for SWB input 40 static const int flagHbandCn = 1; // flag for adding comfort noise in H band 41 static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band 42 // Initial bin for averaging nlp gain in low band 43 static const int freqAvgIc = PART_LEN / 2; 44 45 // Matlab code to produce table: 46 // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; 47 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); 48 static const float sqrtHanning[65] = { 49 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 50 0.07356456359967f, 0.09801714032956f, 0.12241067519922f, 51 0.14673047445536f, 0.17096188876030f, 0.19509032201613f, 52 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, 53 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 54 0.35989503653499f, 0.38268343236509f, 0.40524131400499f, 55 0.42755509343028f, 0.44961132965461f, 0.47139673682600f, 56 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, 57 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 58 0.61523159058063f, 0.63439328416365f, 0.65317284295378f, 59 0.67155895484702f, 0.68954054473707f, 0.70710678118655f, 60 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, 61 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 62 0.81758481315158f, 0.83146961230255f, 0.84485356524971f, 63 0.85772861000027f, 0.87008699110871f, 0.88192126434835f, 64 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, 65 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 66 0.94952818059304f, 0.95694033573221f, 0.96377606579544f, 67 0.97003125319454f, 0.97570213003853f, 0.98078528040323f, 68 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, 69 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 70 0.99969881869620f, 1.00000000000000f 71 }; 72 73 // Matlab code to produce table: 74 // weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; 75 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); 76 const float WebRtcAec_weightCurve[65] = { 77 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 78 0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f, 79 0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f, 80 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, 81 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 82 0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f, 83 0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f, 84 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, 85 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 86 0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f, 87 0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f 88 }; 89 90 // Matlab code to produce table: 91 // overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; 92 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); 93 const float WebRtcAec_overDriveCurve[65] = { 94 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 95 1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f, 96 1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f, 97 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, 98 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 99 1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f, 100 1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f, 101 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, 102 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 103 1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f, 104 1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f 105 }; 106 107 // "Private" function prototypes. 108 static void ProcessBlock(aec_t* aec); 109 110 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH); 111 112 static void GetHighbandGain(const float *lambda, float *nlpGainHband); 113 114 // Comfort_noise also computes noise for H band returned in comfortNoiseHband 115 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], 116 complex_t *comfortNoiseHband, 117 const float *noisePow, const float *lambda); 118 119 static void WebRtcAec_InitLevel(power_level_t *level); 120 static void WebRtcAec_InitStats(stats_t *stats); 121 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]); 122 static void UpdateMetrics(aec_t *aec); 123 // Convert from time domain to frequency domain. Note that |time_data| are 124 // overwritten. 125 static void TimeToFrequency(float time_data[PART_LEN2], 126 float freq_data[2][PART_LEN1], 127 int window); 128 129 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) 130 { 131 return aRe * bRe - aIm * bIm; 132 } 133 134 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) 135 { 136 return aRe * bIm + aIm * bRe; 137 } 138 139 static int CmpFloat(const void *a, const void *b) 140 { 141 const float *da = (const float *)a; 142 const float *db = (const float *)b; 143 144 return (*da > *db) - (*da < *db); 145 } 146 147 int WebRtcAec_CreateAec(aec_t **aecInst) 148 { 149 aec_t *aec = malloc(sizeof(aec_t)); 150 *aecInst = aec; 151 if (aec == NULL) { 152 return -1; 153 } 154 155 if (WebRtc_CreateBuffer(&aec->nearFrBuf, 156 FRAME_LEN + PART_LEN, 157 sizeof(int16_t)) == -1) { 158 WebRtcAec_FreeAec(aec); 159 aec = NULL; 160 return -1; 161 } 162 163 if (WebRtc_CreateBuffer(&aec->outFrBuf, 164 FRAME_LEN + PART_LEN, 165 sizeof(int16_t)) == -1) { 166 WebRtcAec_FreeAec(aec); 167 aec = NULL; 168 return -1; 169 } 170 171 if (WebRtc_CreateBuffer(&aec->nearFrBufH, 172 FRAME_LEN + PART_LEN, 173 sizeof(int16_t)) == -1) { 174 WebRtcAec_FreeAec(aec); 175 aec = NULL; 176 return -1; 177 } 178 179 if (WebRtc_CreateBuffer(&aec->outFrBufH, 180 FRAME_LEN + PART_LEN, 181 sizeof(int16_t)) == -1) { 182 WebRtcAec_FreeAec(aec); 183 aec = NULL; 184 return -1; 185 } 186 187 // Create far-end buffers. 188 if (WebRtc_CreateBuffer(&aec->far_buf, kBufSizePartitions, 189 sizeof(float) * 2 * PART_LEN1) == -1) { 190 WebRtcAec_FreeAec(aec); 191 aec = NULL; 192 return -1; 193 } 194 if (WebRtc_CreateBuffer(&aec->far_buf_windowed, kBufSizePartitions, 195 sizeof(float) * 2 * PART_LEN1) == -1) { 196 WebRtcAec_FreeAec(aec); 197 aec = NULL; 198 return -1; 199 } 200 #ifdef WEBRTC_AEC_DEBUG_DUMP 201 if (WebRtc_CreateBuffer(&aec->far_time_buf, kBufSizePartitions, 202 sizeof(int16_t) * PART_LEN) == -1) { 203 WebRtcAec_FreeAec(aec); 204 aec = NULL; 205 return -1; 206 } 207 #endif 208 if (WebRtc_CreateDelayEstimator(&aec->delay_estimator, 209 PART_LEN1, 210 kMaxDelayBlocks, 211 kLookaheadBlocks) == -1) { 212 WebRtcAec_FreeAec(aec); 213 aec = NULL; 214 return -1; 215 } 216 217 return 0; 218 } 219 220 int WebRtcAec_FreeAec(aec_t *aec) 221 { 222 if (aec == NULL) { 223 return -1; 224 } 225 226 WebRtc_FreeBuffer(aec->nearFrBuf); 227 WebRtc_FreeBuffer(aec->outFrBuf); 228 229 WebRtc_FreeBuffer(aec->nearFrBufH); 230 WebRtc_FreeBuffer(aec->outFrBufH); 231 232 WebRtc_FreeBuffer(aec->far_buf); 233 WebRtc_FreeBuffer(aec->far_buf_windowed); 234 #ifdef WEBRTC_AEC_DEBUG_DUMP 235 WebRtc_FreeBuffer(aec->far_time_buf); 236 #endif 237 WebRtc_FreeDelayEstimator(aec->delay_estimator); 238 239 free(aec); 240 return 0; 241 } 242 243 static void FilterFar(aec_t *aec, float yf[2][PART_LEN1]) 244 { 245 int i; 246 for (i = 0; i < NR_PART; i++) { 247 int j; 248 int xPos = (i + aec->xfBufBlockPos) * PART_LEN1; 249 int pos = i * PART_LEN1; 250 // Check for wrap 251 if (i + aec->xfBufBlockPos >= NR_PART) { 252 xPos -= NR_PART*(PART_LEN1); 253 } 254 255 for (j = 0; j < PART_LEN1; j++) { 256 yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], 257 aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); 258 yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j], 259 aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]); 260 } 261 } 262 } 263 264 static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1]) 265 { 266 int i; 267 float absEf; 268 for (i = 0; i < (PART_LEN1); i++) { 269 ef[0][i] /= (aec->xPow[i] + 1e-10f); 270 ef[1][i] /= (aec->xPow[i] + 1e-10f); 271 absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); 272 273 if (absEf > aec->errThresh) { 274 absEf = aec->errThresh / (absEf + 1e-10f); 275 ef[0][i] *= absEf; 276 ef[1][i] *= absEf; 277 } 278 279 // Stepsize factor 280 ef[0][i] *= aec->mu; 281 ef[1][i] *= aec->mu; 282 } 283 } 284 285 // Time-unconstrined filter adaptation. 286 // TODO(andrew): consider for a low-complexity mode. 287 //static void FilterAdaptationUnconstrained(aec_t *aec, float *fft, 288 // float ef[2][PART_LEN1]) { 289 // int i, j; 290 // for (i = 0; i < NR_PART; i++) { 291 // int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); 292 // int pos; 293 // // Check for wrap 294 // if (i + aec->xfBufBlockPos >= NR_PART) { 295 // xPos -= NR_PART * PART_LEN1; 296 // } 297 // 298 // pos = i * PART_LEN1; 299 // 300 // for (j = 0; j < PART_LEN1; j++) { 301 // aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0], 302 // -aec->xfBuf[xPos + j][1], 303 // ef[j][0], ef[j][1]); 304 // aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0], 305 // -aec->xfBuf[xPos + j][1], 306 // ef[j][0], ef[j][1]); 307 // } 308 // } 309 //} 310 311 static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) { 312 int i, j; 313 for (i = 0; i < NR_PART; i++) { 314 int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1); 315 int pos; 316 // Check for wrap 317 if (i + aec->xfBufBlockPos >= NR_PART) { 318 xPos -= NR_PART * PART_LEN1; 319 } 320 321 pos = i * PART_LEN1; 322 323 for (j = 0; j < PART_LEN; j++) { 324 325 fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j], 326 -aec->xfBuf[1][xPos + j], 327 ef[0][j], ef[1][j]); 328 fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j], 329 -aec->xfBuf[1][xPos + j], 330 ef[0][j], ef[1][j]); 331 } 332 fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN], 333 -aec->xfBuf[1][xPos + PART_LEN], 334 ef[0][PART_LEN], ef[1][PART_LEN]); 335 336 aec_rdft_inverse_128(fft); 337 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 338 339 // fft scaling 340 { 341 float scale = 2.0f / PART_LEN2; 342 for (j = 0; j < PART_LEN; j++) { 343 fft[j] *= scale; 344 } 345 } 346 aec_rdft_forward_128(fft); 347 348 aec->wfBuf[0][pos] += fft[0]; 349 aec->wfBuf[0][pos + PART_LEN] += fft[1]; 350 351 for (j = 1; j < PART_LEN; j++) { 352 aec->wfBuf[0][pos + j] += fft[2 * j]; 353 aec->wfBuf[1][pos + j] += fft[2 * j + 1]; 354 } 355 } 356 } 357 358 static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1], 359 const float hNlFb, 360 float efw[2][PART_LEN1]) { 361 int i; 362 for (i = 0; i < PART_LEN1; i++) { 363 // Weight subbands 364 if (hNl[i] > hNlFb) { 365 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 366 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 367 } 368 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 369 370 // Suppress error signal 371 efw[0][i] *= hNl[i]; 372 efw[1][i] *= hNl[i]; 373 374 // Ooura fft returns incorrect sign on imaginary component. It matters here 375 // because we are making an additive change with comfort noise. 376 efw[1][i] *= -1; 377 } 378 } 379 380 WebRtcAec_FilterFar_t WebRtcAec_FilterFar; 381 WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal; 382 WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation; 383 WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress; 384 385 int WebRtcAec_InitAec(aec_t *aec, int sampFreq) 386 { 387 int i; 388 389 aec->sampFreq = sampFreq; 390 391 if (sampFreq == 8000) { 392 aec->mu = 0.6f; 393 aec->errThresh = 2e-6f; 394 } 395 else { 396 aec->mu = 0.5f; 397 aec->errThresh = 1.5e-6f; 398 } 399 400 if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) { 401 return -1; 402 } 403 404 if (WebRtc_InitBuffer(aec->outFrBuf) == -1) { 405 return -1; 406 } 407 408 if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) { 409 return -1; 410 } 411 412 if (WebRtc_InitBuffer(aec->outFrBufH) == -1) { 413 return -1; 414 } 415 416 // Initialize far-end buffers. 417 if (WebRtc_InitBuffer(aec->far_buf) == -1) { 418 return -1; 419 } 420 if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) { 421 return -1; 422 } 423 #ifdef WEBRTC_AEC_DEBUG_DUMP 424 if (WebRtc_InitBuffer(aec->far_time_buf) == -1) { 425 return -1; 426 } 427 #endif 428 aec->system_delay = 0; 429 430 if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { 431 return -1; 432 } 433 aec->delay_logging_enabled = 0; 434 memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); 435 436 // Default target suppression level 437 aec->targetSupp = -11.5; 438 aec->minOverDrive = 2.0; 439 440 // Sampling frequency multiplier 441 // SWB is processed as 160 frame size 442 if (aec->sampFreq == 32000) { 443 aec->mult = (short)aec->sampFreq / 16000; 444 } 445 else { 446 aec->mult = (short)aec->sampFreq / 8000; 447 } 448 449 aec->farBufWritePos = 0; 450 aec->farBufReadPos = 0; 451 452 aec->inSamples = 0; 453 aec->outSamples = 0; 454 aec->knownDelay = 0; 455 456 // Initialize buffers 457 memset(aec->dBuf, 0, sizeof(aec->dBuf)); 458 memset(aec->eBuf, 0, sizeof(aec->eBuf)); 459 // For H band 460 memset(aec->dBufH, 0, sizeof(aec->dBufH)); 461 462 memset(aec->xPow, 0, sizeof(aec->xPow)); 463 memset(aec->dPow, 0, sizeof(aec->dPow)); 464 memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); 465 aec->noisePow = aec->dInitMinPow; 466 aec->noiseEstCtr = 0; 467 468 // Initial comfort noise power 469 for (i = 0; i < PART_LEN1; i++) { 470 aec->dMinPow[i] = 1.0e6f; 471 } 472 473 // Holds the last block written to 474 aec->xfBufBlockPos = 0; 475 // TODO: Investigate need for these initializations. Deleting them doesn't 476 // change the output at all and yields 0.4% overall speedup. 477 memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); 478 memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); 479 memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); 480 memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); 481 memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1); 482 memset(aec->se, 0, sizeof(float) * PART_LEN1); 483 484 // To prevent numerical instability in the first block. 485 for (i = 0; i < PART_LEN1; i++) { 486 aec->sd[i] = 1; 487 } 488 for (i = 0; i < PART_LEN1; i++) { 489 aec->sx[i] = 1; 490 } 491 492 memset(aec->hNs, 0, sizeof(aec->hNs)); 493 memset(aec->outBuf, 0, sizeof(float) * PART_LEN); 494 495 aec->hNlFbMin = 1; 496 aec->hNlFbLocalMin = 1; 497 aec->hNlXdAvgMin = 1; 498 aec->hNlNewMin = 0; 499 aec->hNlMinCtr = 0; 500 aec->overDrive = 2; 501 aec->overDriveSm = 2; 502 aec->delayIdx = 0; 503 aec->stNearState = 0; 504 aec->echoState = 0; 505 aec->divergeState = 0; 506 507 aec->seed = 777; 508 aec->delayEstCtr = 0; 509 510 // Metrics disabled by default 511 aec->metricsMode = 0; 512 WebRtcAec_InitMetrics(aec); 513 514 // Assembly optimization 515 WebRtcAec_FilterFar = FilterFar; 516 WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; 517 WebRtcAec_FilterAdaptation = FilterAdaptation; 518 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; 519 if (WebRtc_GetCPUInfo(kSSE2)) { 520 #if defined(WEBRTC_USE_SSE2) 521 WebRtcAec_InitAec_SSE2(); 522 #endif 523 } 524 aec_rdft_init(); 525 526 return 0; 527 } 528 529 void WebRtcAec_InitMetrics(aec_t *aec) 530 { 531 aec->stateCounter = 0; 532 WebRtcAec_InitLevel(&aec->farlevel); 533 WebRtcAec_InitLevel(&aec->nearlevel); 534 WebRtcAec_InitLevel(&aec->linoutlevel); 535 WebRtcAec_InitLevel(&aec->nlpoutlevel); 536 537 WebRtcAec_InitStats(&aec->erl); 538 WebRtcAec_InitStats(&aec->erle); 539 WebRtcAec_InitStats(&aec->aNlp); 540 WebRtcAec_InitStats(&aec->rerl); 541 } 542 543 544 void WebRtcAec_BufferFarendPartition(aec_t *aec, const float* farend) { 545 float fft[PART_LEN2]; 546 float xf[2][PART_LEN1]; 547 548 // Check if the buffer is full, and in that case flush the oldest data. 549 if (WebRtc_available_write(aec->far_buf) < 1) { 550 WebRtc_MoveReadPtr(aec->far_buf, 1); 551 WebRtc_MoveReadPtr(aec->far_buf_windowed, 1); 552 aec->system_delay -= PART_LEN; 553 #ifdef WEBRTC_AEC_DEBUG_DUMP 554 WebRtc_MoveReadPtr(aec->far_time_buf, 1); 555 #endif 556 } 557 // Convert far-end partition to the frequency domain without windowing. 558 memcpy(fft, farend, sizeof(float) * PART_LEN2); 559 TimeToFrequency(fft, xf, 0); 560 WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1); 561 562 // Convert far-end partition to the frequency domain with windowing. 563 memcpy(fft, farend, sizeof(float) * PART_LEN2); 564 TimeToFrequency(fft, xf, 1); 565 WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1); 566 } 567 568 void WebRtcAec_ProcessFrame(aec_t *aec, 569 const short *nearend, 570 const short *nearendH, 571 int knownDelay) 572 { 573 // For each frame the process is as follows: 574 // 1) If the system_delay indicates on being too small for processing a 575 // frame we stuff the buffer with enough data for 10 ms. 576 // 2) Adjust the buffer to the system delay, by moving the read pointer. 577 // 3) If we can't move read pointer due to buffer size limitations we 578 // flush/stuff the buffer. 579 // 4) Process as many partitions as possible. 580 // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN 581 // samples. Even though we will have data left to process (we work with 582 // partitions) we consider updating a whole frame, since that's the 583 // amount of data we input and output in audio_processing. 584 585 // TODO(bjornv): Investigate how we should round the delay difference; right 586 // now we know that incoming |knownDelay| is underestimated when it's less 587 // than |aec->knownDelay|. We therefore, round (-32) in that direction. In 588 // the other direction, we don't have this situation, but might flush one 589 // partition too little. This can cause non-causality, which should be 590 // investigated. Maybe, allow for a non-symmetric rounding, like -16. 591 int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; 592 int moved_elements = 0; 593 594 // TODO(bjornv): Change the near-end buffer handling to be the same as for 595 // far-end, that is, with a near_pre_buf. 596 // Buffer the near-end frame. 597 WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN); 598 // For H band 599 if (aec->sampFreq == 32000) { 600 WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN); 601 } 602 603 // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we 604 // have enough far-end data for that by stuffing the buffer if the 605 // |system_delay| indicates others. 606 if (aec->system_delay < FRAME_LEN) { 607 // We don't have enough data so we rewind 10 ms. 608 WebRtc_MoveReadPtr(aec->far_buf_windowed, -(aec->mult + 1)); 609 aec->system_delay -= WebRtc_MoveReadPtr(aec->far_buf, -(aec->mult + 1)) * 610 PART_LEN; 611 #ifdef WEBRTC_AEC_DEBUG_DUMP 612 WebRtc_MoveReadPtr(aec->far_time_buf, -(aec->mult + 1)); 613 #endif 614 } 615 616 // 2) Compensate for a possible change in the system delay. 617 618 WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements); 619 moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements); 620 aec->knownDelay -= moved_elements * PART_LEN; 621 #ifdef WEBRTC_AEC_DEBUG_DUMP 622 WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); 623 #endif 624 625 // 4) Process as many blocks as possible. 626 while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { 627 ProcessBlock(aec); 628 } 629 630 // 5) Update system delay with respect to the entire frame. 631 aec->system_delay -= FRAME_LEN; 632 } 633 634 static void ProcessBlock(aec_t* aec) { 635 int i; 636 float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN]; 637 float scale; 638 639 float fft[PART_LEN2]; 640 float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1]; 641 float df[2][PART_LEN1]; 642 float far_spectrum = 0.0f; 643 float near_spectrum = 0.0f; 644 float abs_far_spectrum[PART_LEN1]; 645 float abs_near_spectrum[PART_LEN1]; 646 647 const float gPow[2] = {0.9f, 0.1f}; 648 649 // Noise estimate constants. 650 const int noiseInitBlocks = 500 * aec->mult; 651 const float step = 0.1f; 652 const float ramp = 1.0002f; 653 const float gInitNoise[2] = {0.999f, 0.001f}; 654 655 int16_t nearend[PART_LEN]; 656 int16_t* nearend_ptr = NULL; 657 int16_t output[PART_LEN]; 658 int16_t outputH[PART_LEN]; 659 660 float* xf_ptr = NULL; 661 662 memset(dH, 0, sizeof(dH)); 663 if (aec->sampFreq == 32000) { 664 // Get the upper band first so we can reuse |nearend|. 665 WebRtc_ReadBuffer(aec->nearFrBufH, 666 (void**) &nearend_ptr, 667 nearend, 668 PART_LEN); 669 for (i = 0; i < PART_LEN; i++) { 670 dH[i] = (float) (nearend_ptr[i]); 671 } 672 memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN); 673 } 674 WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN); 675 676 // ---------- Ooura fft ---------- 677 // Concatenate old and new nearend blocks. 678 for (i = 0; i < PART_LEN; i++) { 679 d[i] = (float) (nearend_ptr[i]); 680 } 681 memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN); 682 683 #ifdef WEBRTC_AEC_DEBUG_DUMP 684 { 685 int16_t farend[PART_LEN]; 686 int16_t* farend_ptr = NULL; 687 WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1); 688 fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile); 689 fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile); 690 } 691 #endif 692 693 // We should always have at least one element stored in |far_buf|. 694 assert(WebRtc_available_read(aec->far_buf) > 0); 695 WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1); 696 697 // Near fft 698 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); 699 TimeToFrequency(fft, df, 0); 700 701 // Power smoothing 702 for (i = 0; i < PART_LEN1; i++) { 703 far_spectrum = (xf_ptr[i] * xf_ptr[i]) + 704 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); 705 aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum; 706 // Calculate absolute spectra 707 abs_far_spectrum[i] = sqrtf(far_spectrum); 708 709 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; 710 aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; 711 // Calculate absolute spectra 712 abs_near_spectrum[i] = sqrtf(near_spectrum); 713 } 714 715 // Estimate noise power. Wait until dPow is more stable. 716 if (aec->noiseEstCtr > 50) { 717 for (i = 0; i < PART_LEN1; i++) { 718 if (aec->dPow[i] < aec->dMinPow[i]) { 719 aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] - 720 aec->dPow[i])) * ramp; 721 } 722 else { 723 aec->dMinPow[i] *= ramp; 724 } 725 } 726 } 727 728 // Smooth increasing noise power from zero at the start, 729 // to avoid a sudden burst of comfort noise. 730 if (aec->noiseEstCtr < noiseInitBlocks) { 731 aec->noiseEstCtr++; 732 for (i = 0; i < PART_LEN1; i++) { 733 if (aec->dMinPow[i] > aec->dInitMinPow[i]) { 734 aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + 735 gInitNoise[1] * aec->dMinPow[i]; 736 } 737 else { 738 aec->dInitMinPow[i] = aec->dMinPow[i]; 739 } 740 } 741 aec->noisePow = aec->dInitMinPow; 742 } 743 else { 744 aec->noisePow = aec->dMinPow; 745 } 746 747 // Block wise delay estimation used for logging 748 if (aec->delay_logging_enabled) { 749 int delay_estimate = 0; 750 // Estimate the delay 751 delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator, 752 abs_far_spectrum, 753 abs_near_spectrum, 754 PART_LEN1); 755 if (delay_estimate >= 0) { 756 // Update delay estimate buffer. 757 aec->delay_histogram[delay_estimate]++; 758 } 759 } 760 761 // Update the xfBuf block position. 762 aec->xfBufBlockPos--; 763 if (aec->xfBufBlockPos == -1) { 764 aec->xfBufBlockPos = NR_PART - 1; 765 } 766 767 // Buffer xf 768 memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr, 769 sizeof(float) * PART_LEN1); 770 memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1], 771 sizeof(float) * PART_LEN1); 772 773 memset(yf[0], 0, sizeof(float) * (PART_LEN1 * 2)); 774 775 // Filter far 776 WebRtcAec_FilterFar(aec, yf); 777 778 // Inverse fft to obtain echo estimate and error. 779 fft[0] = yf[0][0]; 780 fft[1] = yf[0][PART_LEN]; 781 for (i = 1; i < PART_LEN; i++) { 782 fft[2 * i] = yf[0][i]; 783 fft[2 * i + 1] = yf[1][i]; 784 } 785 aec_rdft_inverse_128(fft); 786 787 scale = 2.0f / PART_LEN2; 788 for (i = 0; i < PART_LEN; i++) { 789 y[i] = fft[PART_LEN + i] * scale; // fft scaling 790 } 791 792 for (i = 0; i < PART_LEN; i++) { 793 e[i] = d[i] - y[i]; 794 } 795 796 // Error fft 797 memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN); 798 memset(fft, 0, sizeof(float) * PART_LEN); 799 memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN); 800 // TODO(bjornv): Change to use TimeToFrequency(). 801 aec_rdft_forward_128(fft); 802 803 ef[1][0] = 0; 804 ef[1][PART_LEN] = 0; 805 ef[0][0] = fft[0]; 806 ef[0][PART_LEN] = fft[1]; 807 for (i = 1; i < PART_LEN; i++) { 808 ef[0][i] = fft[2 * i]; 809 ef[1][i] = fft[2 * i + 1]; 810 } 811 812 if (aec->metricsMode == 1) { 813 // Note that the first PART_LEN samples in fft (before transformation) are 814 // zero. Hence, the scaling by two in UpdateLevel() should not be 815 // performed. That scaling is taken care of in UpdateMetrics() instead. 816 UpdateLevel(&aec->linoutlevel, ef); 817 } 818 819 // Scale error signal inversely with far power. 820 WebRtcAec_ScaleErrorSignal(aec, ef); 821 WebRtcAec_FilterAdaptation(aec, fft, ef); 822 NonLinearProcessing(aec, output, outputH); 823 824 if (aec->metricsMode == 1) { 825 // Update power levels and echo metrics 826 UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr); 827 UpdateLevel(&aec->nearlevel, df); 828 UpdateMetrics(aec); 829 } 830 831 // Store the output block. 832 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); 833 // For H band 834 if (aec->sampFreq == 32000) { 835 WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN); 836 } 837 838 #ifdef WEBRTC_AEC_DEBUG_DUMP 839 { 840 int16_t eInt16[PART_LEN]; 841 for (i = 0; i < PART_LEN; i++) { 842 eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i], 843 WEBRTC_SPL_WORD16_MIN); 844 } 845 846 fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile); 847 fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile); 848 } 849 #endif 850 } 851 852 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH) 853 { 854 float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1]; 855 complex_t comfortNoiseHband[PART_LEN1]; 856 float fft[PART_LEN2]; 857 float scale, dtmp; 858 float nlpGainHband; 859 int i, j, pos; 860 861 // Coherence and non-linear filter 862 float cohde[PART_LEN1], cohxd[PART_LEN1]; 863 float hNlDeAvg, hNlXdAvg; 864 float hNl[PART_LEN1]; 865 float hNlPref[PREF_BAND_SIZE]; 866 float hNlFb = 0, hNlFbLow = 0; 867 const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; 868 const int prefBandSize = PREF_BAND_SIZE / aec->mult; 869 const int minPrefBand = 4 / aec->mult; 870 871 // Near and error power sums 872 float sdSum = 0, seSum = 0; 873 874 // Power estimate smoothing coefficients 875 const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}}; 876 const float *ptrGCoh = gCoh[aec->mult - 1]; 877 878 // Filter energy 879 float wfEnMax = 0, wfEn = 0; 880 const int delayEstInterval = 10 * aec->mult; 881 882 float* xfw_ptr = NULL; 883 884 aec->delayEstCtr++; 885 if (aec->delayEstCtr == delayEstInterval) { 886 aec->delayEstCtr = 0; 887 } 888 889 // initialize comfort noise for H band 890 memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband)); 891 nlpGainHband = (float)0.0; 892 dtmp = (float)0.0; 893 894 // Measure energy in each filter partition to determine delay. 895 // TODO: Spread by computing one partition per block? 896 if (aec->delayEstCtr == 0) { 897 wfEnMax = 0; 898 aec->delayIdx = 0; 899 for (i = 0; i < NR_PART; i++) { 900 pos = i * PART_LEN1; 901 wfEn = 0; 902 for (j = 0; j < PART_LEN1; j++) { 903 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + 904 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; 905 } 906 907 if (wfEn > wfEnMax) { 908 wfEnMax = wfEn; 909 aec->delayIdx = i; 910 } 911 } 912 } 913 914 // We should always have at least one element stored in |far_buf|. 915 assert(WebRtc_available_read(aec->far_buf_windowed) > 0); 916 // NLP 917 WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1); 918 919 // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of 920 // |xfwBuf|. 921 // Buffer far. 922 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); 923 924 // Use delayed far. 925 memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw)); 926 927 // Windowed near fft 928 for (i = 0; i < PART_LEN; i++) { 929 fft[i] = aec->dBuf[i] * sqrtHanning[i]; 930 fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; 931 } 932 aec_rdft_forward_128(fft); 933 934 dfw[1][0] = 0; 935 dfw[1][PART_LEN] = 0; 936 dfw[0][0] = fft[0]; 937 dfw[0][PART_LEN] = fft[1]; 938 for (i = 1; i < PART_LEN; i++) { 939 dfw[0][i] = fft[2 * i]; 940 dfw[1][i] = fft[2 * i + 1]; 941 } 942 943 // Windowed error fft 944 for (i = 0; i < PART_LEN; i++) { 945 fft[i] = aec->eBuf[i] * sqrtHanning[i]; 946 fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i]; 947 } 948 aec_rdft_forward_128(fft); 949 efw[1][0] = 0; 950 efw[1][PART_LEN] = 0; 951 efw[0][0] = fft[0]; 952 efw[0][PART_LEN] = fft[1]; 953 for (i = 1; i < PART_LEN; i++) { 954 efw[0][i] = fft[2 * i]; 955 efw[1][i] = fft[2 * i + 1]; 956 } 957 958 // Smoothed PSD 959 for (i = 0; i < PART_LEN1; i++) { 960 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] * 961 (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); 962 aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] * 963 (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); 964 // We threshold here to protect against the ill-effects of a zero farend. 965 // The threshold is not arbitrarily chosen, but balances protection and 966 // adverse interaction with the algorithm's tuning. 967 // TODO: investigate further why this is so sensitive. 968 aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] * 969 WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15); 970 971 aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] * 972 (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); 973 aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] * 974 (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); 975 976 aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] * 977 (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); 978 aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] * 979 (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); 980 981 sdSum += aec->sd[i]; 982 seSum += aec->se[i]; 983 } 984 985 // Divergent filter safeguard. 986 if (aec->divergeState == 0) { 987 if (seSum > sdSum) { 988 aec->divergeState = 1; 989 } 990 } 991 else { 992 if (seSum * 1.05f < sdSum) { 993 aec->divergeState = 0; 994 } 995 } 996 997 if (aec->divergeState == 1) { 998 memcpy(efw, dfw, sizeof(efw)); 999 } 1000 1001 // Reset if error is significantly larger than nearend (13 dB). 1002 if (seSum > (19.95f * sdSum)) { 1003 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); 1004 } 1005 1006 // Subband coherence 1007 for (i = 0; i < PART_LEN1; i++) { 1008 cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / 1009 (aec->sd[i] * aec->se[i] + 1e-10f); 1010 cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / 1011 (aec->sx[i] * aec->sd[i] + 1e-10f); 1012 } 1013 1014 hNlXdAvg = 0; 1015 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 1016 hNlXdAvg += cohxd[i]; 1017 } 1018 hNlXdAvg /= prefBandSize; 1019 hNlXdAvg = 1 - hNlXdAvg; 1020 1021 hNlDeAvg = 0; 1022 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 1023 hNlDeAvg += cohde[i]; 1024 } 1025 hNlDeAvg /= prefBandSize; 1026 1027 if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { 1028 aec->hNlXdAvgMin = hNlXdAvg; 1029 } 1030 1031 if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { 1032 aec->stNearState = 1; 1033 } 1034 else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { 1035 aec->stNearState = 0; 1036 } 1037 1038 if (aec->hNlXdAvgMin == 1) { 1039 aec->echoState = 0; 1040 aec->overDrive = aec->minOverDrive; 1041 1042 if (aec->stNearState == 1) { 1043 memcpy(hNl, cohde, sizeof(hNl)); 1044 hNlFb = hNlDeAvg; 1045 hNlFbLow = hNlDeAvg; 1046 } 1047 else { 1048 for (i = 0; i < PART_LEN1; i++) { 1049 hNl[i] = 1 - cohxd[i]; 1050 } 1051 hNlFb = hNlXdAvg; 1052 hNlFbLow = hNlXdAvg; 1053 } 1054 } 1055 else { 1056 1057 if (aec->stNearState == 1) { 1058 aec->echoState = 0; 1059 memcpy(hNl, cohde, sizeof(hNl)); 1060 hNlFb = hNlDeAvg; 1061 hNlFbLow = hNlDeAvg; 1062 } 1063 else { 1064 aec->echoState = 1; 1065 for (i = 0; i < PART_LEN1; i++) { 1066 hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); 1067 } 1068 1069 // Select an order statistic from the preferred bands. 1070 // TODO: Using quicksort now, but a selection algorithm may be preferred. 1071 memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); 1072 qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); 1073 hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; 1074 hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; 1075 } 1076 } 1077 1078 // Track the local filter minimum to determine suppression overdrive. 1079 if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { 1080 aec->hNlFbLocalMin = hNlFbLow; 1081 aec->hNlFbMin = hNlFbLow; 1082 aec->hNlNewMin = 1; 1083 aec->hNlMinCtr = 0; 1084 } 1085 aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); 1086 aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); 1087 1088 if (aec->hNlNewMin == 1) { 1089 aec->hNlMinCtr++; 1090 } 1091 if (aec->hNlMinCtr == 2) { 1092 aec->hNlNewMin = 0; 1093 aec->hNlMinCtr = 0; 1094 aec->overDrive = WEBRTC_SPL_MAX(aec->targetSupp / 1095 ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), aec->minOverDrive); 1096 } 1097 1098 // Smooth the overdrive. 1099 if (aec->overDrive < aec->overDriveSm) { 1100 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; 1101 } 1102 else { 1103 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; 1104 } 1105 1106 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); 1107 1108 // Add comfort noise. 1109 ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); 1110 1111 // TODO(bjornv): Investigate how to take the windowing below into account if 1112 // needed. 1113 if (aec->metricsMode == 1) { 1114 // Note that we have a scaling by two in the time domain |eBuf|. 1115 // In addition the time domain signal is windowed before transformation, 1116 // losing half the energy on the average. We take care of the first 1117 // scaling only in UpdateMetrics(). 1118 UpdateLevel(&aec->nlpoutlevel, efw); 1119 } 1120 // Inverse error fft. 1121 fft[0] = efw[0][0]; 1122 fft[1] = efw[0][PART_LEN]; 1123 for (i = 1; i < PART_LEN; i++) { 1124 fft[2*i] = efw[0][i]; 1125 // Sign change required by Ooura fft. 1126 fft[2*i + 1] = -efw[1][i]; 1127 } 1128 aec_rdft_inverse_128(fft); 1129 1130 // Overlap and add to obtain output. 1131 scale = 2.0f / PART_LEN2; 1132 for (i = 0; i < PART_LEN; i++) { 1133 fft[i] *= scale; // fft scaling 1134 fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i]; 1135 1136 // Saturation protection 1137 output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i], 1138 WEBRTC_SPL_WORD16_MIN); 1139 1140 fft[PART_LEN + i] *= scale; // fft scaling 1141 aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i]; 1142 } 1143 1144 // For H band 1145 if (aec->sampFreq == 32000) { 1146 1147 // H band gain 1148 // average nlp over low band: average over second half of freq spectrum 1149 // (4->8khz) 1150 GetHighbandGain(hNl, &nlpGainHband); 1151 1152 // Inverse comfort_noise 1153 if (flagHbandCn == 1) { 1154 fft[0] = comfortNoiseHband[0][0]; 1155 fft[1] = comfortNoiseHband[PART_LEN][0]; 1156 for (i = 1; i < PART_LEN; i++) { 1157 fft[2*i] = comfortNoiseHband[i][0]; 1158 fft[2*i + 1] = comfortNoiseHband[i][1]; 1159 } 1160 aec_rdft_inverse_128(fft); 1161 scale = 2.0f / PART_LEN2; 1162 } 1163 1164 // compute gain factor 1165 for (i = 0; i < PART_LEN; i++) { 1166 dtmp = (float)aec->dBufH[i]; 1167 dtmp = (float)dtmp * nlpGainHband; // for variable gain 1168 1169 // add some comfort noise where Hband is attenuated 1170 if (flagHbandCn == 1) { 1171 fft[i] *= scale; // fft scaling 1172 dtmp += cnScaleHband * fft[i]; 1173 } 1174 1175 // Saturation protection 1176 outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp, 1177 WEBRTC_SPL_WORD16_MIN); 1178 } 1179 } 1180 1181 // Copy the current block to the old position. 1182 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); 1183 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); 1184 1185 // Copy the current block to the old position for H band 1186 if (aec->sampFreq == 32000) { 1187 memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN); 1188 } 1189 1190 memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) - 1191 sizeof(complex_t) * PART_LEN1); 1192 } 1193 1194 static void GetHighbandGain(const float *lambda, float *nlpGainHband) 1195 { 1196 int i; 1197 1198 nlpGainHband[0] = (float)0.0; 1199 for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { 1200 nlpGainHband[0] += lambda[i]; 1201 } 1202 nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc); 1203 } 1204 1205 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1], 1206 complex_t *comfortNoiseHband, const float *noisePow, const float *lambda) 1207 { 1208 int i, num; 1209 float rand[PART_LEN]; 1210 float noise, noiseAvg, tmp, tmpAvg; 1211 WebRtc_Word16 randW16[PART_LEN]; 1212 complex_t u[PART_LEN1]; 1213 1214 const float pi2 = 6.28318530717959f; 1215 1216 // Generate a uniform random array on [0 1] 1217 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); 1218 for (i = 0; i < PART_LEN; i++) { 1219 rand[i] = ((float)randW16[i]) / 32768; 1220 } 1221 1222 // Reject LF noise 1223 u[0][0] = 0; 1224 u[0][1] = 0; 1225 for (i = 1; i < PART_LEN1; i++) { 1226 tmp = pi2 * rand[i - 1]; 1227 1228 noise = sqrtf(noisePow[i]); 1229 u[i][0] = noise * (float)cos(tmp); 1230 u[i][1] = -noise * (float)sin(tmp); 1231 } 1232 u[PART_LEN][1] = 0; 1233 1234 for (i = 0; i < PART_LEN1; i++) { 1235 // This is the proper weighting to match the background noise power 1236 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 1237 //tmp = 1 - lambda[i]; 1238 efw[0][i] += tmp * u[i][0]; 1239 efw[1][i] += tmp * u[i][1]; 1240 } 1241 1242 // For H band comfort noise 1243 // TODO: don't compute noise and "tmp" twice. Use the previous results. 1244 noiseAvg = 0.0; 1245 tmpAvg = 0.0; 1246 num = 0; 1247 if (aec->sampFreq == 32000 && flagHbandCn == 1) { 1248 1249 // average noise scale 1250 // average over second half of freq spectrum (i.e., 4->8khz) 1251 // TODO: we shouldn't need num. We know how many elements we're summing. 1252 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 1253 num++; 1254 noiseAvg += sqrtf(noisePow[i]); 1255 } 1256 noiseAvg /= (float)num; 1257 1258 // average nlp scale 1259 // average over second half of freq spectrum (i.e., 4->8khz) 1260 // TODO: we shouldn't need num. We know how many elements we're summing. 1261 num = 0; 1262 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 1263 num++; 1264 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 1265 } 1266 tmpAvg /= (float)num; 1267 1268 // Use average noise for H band 1269 // TODO: we should probably have a new random vector here. 1270 // Reject LF noise 1271 u[0][0] = 0; 1272 u[0][1] = 0; 1273 for (i = 1; i < PART_LEN1; i++) { 1274 tmp = pi2 * rand[i - 1]; 1275 1276 // Use average noise for H band 1277 u[i][0] = noiseAvg * (float)cos(tmp); 1278 u[i][1] = -noiseAvg * (float)sin(tmp); 1279 } 1280 u[PART_LEN][1] = 0; 1281 1282 for (i = 0; i < PART_LEN1; i++) { 1283 // Use average NLP weight for H band 1284 comfortNoiseHband[i][0] = tmpAvg * u[i][0]; 1285 comfortNoiseHband[i][1] = tmpAvg * u[i][1]; 1286 } 1287 } 1288 } 1289 1290 static void WebRtcAec_InitLevel(power_level_t *level) 1291 { 1292 const float bigFloat = 1E17f; 1293 1294 level->averagelevel = 0; 1295 level->framelevel = 0; 1296 level->minlevel = bigFloat; 1297 level->frsum = 0; 1298 level->sfrsum = 0; 1299 level->frcounter = 0; 1300 level->sfrcounter = 0; 1301 } 1302 1303 static void WebRtcAec_InitStats(stats_t *stats) 1304 { 1305 stats->instant = offsetLevel; 1306 stats->average = offsetLevel; 1307 stats->max = offsetLevel; 1308 stats->min = offsetLevel * (-1); 1309 stats->sum = 0; 1310 stats->hisum = 0; 1311 stats->himean = offsetLevel; 1312 stats->counter = 0; 1313 stats->hicounter = 0; 1314 } 1315 1316 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]) { 1317 // Do the energy calculation in the frequency domain. The FFT is performed on 1318 // a segment of PART_LEN2 samples due to overlap, but we only want the energy 1319 // of half that data (the last PART_LEN samples). Parseval's relation states 1320 // that the energy is preserved according to 1321 // 1322 // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2 1323 // = ENERGY, 1324 // 1325 // where N = PART_LEN2. Since we are only interested in calculating the energy 1326 // for the last PART_LEN samples we approximate by calculating ENERGY and 1327 // divide by 2, 1328 // 1329 // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2 1330 // 1331 // Since we deal with real valued time domain signals we only store frequency 1332 // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we 1333 // need to add the contribution from the missing part in 1334 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical 1335 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This 1336 // is the values in the for loop below, but multiplication by 2 and division 1337 // by 2 cancel. 1338 1339 // TODO(bjornv): Investigate reusing energy calculations performed at other 1340 // places in the code. 1341 int k = 1; 1342 // Imaginary parts are zero at end points and left out of the calculation. 1343 float energy = (in[0][0] * in[0][0]) / 2; 1344 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2; 1345 1346 for (k = 1; k < PART_LEN; k++) { 1347 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]); 1348 } 1349 energy /= PART_LEN2; 1350 1351 level->sfrsum += energy; 1352 level->sfrcounter++; 1353 1354 if (level->sfrcounter > subCountLen) { 1355 level->framelevel = level->sfrsum / (subCountLen * PART_LEN); 1356 level->sfrsum = 0; 1357 level->sfrcounter = 0; 1358 if (level->framelevel > 0) { 1359 if (level->framelevel < level->minlevel) { 1360 level->minlevel = level->framelevel; // New minimum. 1361 } else { 1362 level->minlevel *= (1 + 0.001f); // Small increase. 1363 } 1364 } 1365 level->frcounter++; 1366 level->frsum += level->framelevel; 1367 if (level->frcounter > countLen) { 1368 level->averagelevel = level->frsum / countLen; 1369 level->frsum = 0; 1370 level->frcounter = 0; 1371 } 1372 } 1373 } 1374 1375 static void UpdateMetrics(aec_t *aec) 1376 { 1377 float dtmp, dtmp2; 1378 1379 const float actThresholdNoisy = 8.0f; 1380 const float actThresholdClean = 40.0f; 1381 const float safety = 0.99995f; 1382 const float noisyPower = 300000.0f; 1383 1384 float actThreshold; 1385 float echo, suppressedEcho; 1386 1387 if (aec->echoState) { // Check if echo is likely present 1388 aec->stateCounter++; 1389 } 1390 1391 if (aec->farlevel.frcounter == 0) { 1392 1393 if (aec->farlevel.minlevel < noisyPower) { 1394 actThreshold = actThresholdClean; 1395 } 1396 else { 1397 actThreshold = actThresholdNoisy; 1398 } 1399 1400 if ((aec->stateCounter > (0.5f * countLen * subCountLen)) 1401 && (aec->farlevel.sfrcounter == 0) 1402 1403 // Estimate in active far-end segments only 1404 && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel)) 1405 ) { 1406 1407 // Subtract noise power 1408 echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; 1409 1410 // ERL 1411 dtmp = 10 * (float)log10(aec->farlevel.averagelevel / 1412 aec->nearlevel.averagelevel + 1e-10f); 1413 dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); 1414 1415 aec->erl.instant = dtmp; 1416 if (dtmp > aec->erl.max) { 1417 aec->erl.max = dtmp; 1418 } 1419 1420 if (dtmp < aec->erl.min) { 1421 aec->erl.min = dtmp; 1422 } 1423 1424 aec->erl.counter++; 1425 aec->erl.sum += dtmp; 1426 aec->erl.average = aec->erl.sum / aec->erl.counter; 1427 1428 // Upper mean 1429 if (dtmp > aec->erl.average) { 1430 aec->erl.hicounter++; 1431 aec->erl.hisum += dtmp; 1432 aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; 1433 } 1434 1435 // A_NLP 1436 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 1437 (2 * aec->linoutlevel.averagelevel) + 1e-10f); 1438 1439 // subtract noise power 1440 suppressedEcho = 2 * (aec->linoutlevel.averagelevel - 1441 safety * aec->linoutlevel.minlevel); 1442 1443 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 1444 1445 aec->aNlp.instant = dtmp2; 1446 if (dtmp > aec->aNlp.max) { 1447 aec->aNlp.max = dtmp; 1448 } 1449 1450 if (dtmp < aec->aNlp.min) { 1451 aec->aNlp.min = dtmp; 1452 } 1453 1454 aec->aNlp.counter++; 1455 aec->aNlp.sum += dtmp; 1456 aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; 1457 1458 // Upper mean 1459 if (dtmp > aec->aNlp.average) { 1460 aec->aNlp.hicounter++; 1461 aec->aNlp.hisum += dtmp; 1462 aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; 1463 } 1464 1465 // ERLE 1466 1467 // subtract noise power 1468 suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - 1469 safety * aec->nlpoutlevel.minlevel); 1470 1471 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 1472 (2 * aec->nlpoutlevel.averagelevel) + 1e-10f); 1473 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 1474 1475 dtmp = dtmp2; 1476 aec->erle.instant = dtmp; 1477 if (dtmp > aec->erle.max) { 1478 aec->erle.max = dtmp; 1479 } 1480 1481 if (dtmp < aec->erle.min) { 1482 aec->erle.min = dtmp; 1483 } 1484 1485 aec->erle.counter++; 1486 aec->erle.sum += dtmp; 1487 aec->erle.average = aec->erle.sum / aec->erle.counter; 1488 1489 // Upper mean 1490 if (dtmp > aec->erle.average) { 1491 aec->erle.hicounter++; 1492 aec->erle.hisum += dtmp; 1493 aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; 1494 } 1495 } 1496 1497 aec->stateCounter = 0; 1498 } 1499 } 1500 1501 static void TimeToFrequency(float time_data[PART_LEN2], 1502 float freq_data[2][PART_LEN1], 1503 int window) { 1504 int i = 0; 1505 1506 // TODO(bjornv): Should we have a different function/wrapper for windowed FFT? 1507 if (window) { 1508 for (i = 0; i < PART_LEN; i++) { 1509 time_data[i] *= sqrtHanning[i]; 1510 time_data[PART_LEN + i] *= sqrtHanning[PART_LEN - i]; 1511 } 1512 } 1513 1514 aec_rdft_forward_128(time_data); 1515 // Reorder. 1516 freq_data[1][0] = 0; 1517 freq_data[1][PART_LEN] = 0; 1518 freq_data[0][0] = time_data[0]; 1519 freq_data[0][PART_LEN] = time_data[1]; 1520 for (i = 1; i < PART_LEN; i++) { 1521 freq_data[0][i] = time_data[2 * i]; 1522 freq_data[1][i] = time_data[2 * i + 1]; 1523 } 1524 } 1525