1 /* 2 * Copyright (c) 2012 The WebRTC project authors. All Rights Reserved. 3 * 4 * Use of this source code is governed by a BSD-style license 5 * that can be found in the LICENSE file in the root of the source 6 * tree. An additional intellectual property rights grant can be found 7 * in the file PATENTS. All contributing project authors may 8 * be found in the AUTHORS file in the root of the source tree. 9 */ 10 11 /* 12 * The core AEC algorithm, which is presented with time-aligned signals. 13 */ 14 15 #include "webrtc/modules/audio_processing/aec/aec_core.h" 16 17 #ifdef WEBRTC_AEC_DEBUG_DUMP 18 #include <stdio.h> 19 #endif 20 21 #include <assert.h> 22 #include <math.h> 23 #include <stddef.h> // size_t 24 #include <stdlib.h> 25 #include <string.h> 26 27 #include "webrtc/common_audio/ring_buffer.h" 28 #include "webrtc/common_audio/signal_processing/include/signal_processing_library.h" 29 #include "webrtc/modules/audio_processing/aec/aec_common.h" 30 #include "webrtc/modules/audio_processing/aec/aec_core_internal.h" 31 #include "webrtc/modules/audio_processing/aec/aec_rdft.h" 32 #include "webrtc/modules/audio_processing/logging/aec_logging.h" 33 #include "webrtc/modules/audio_processing/utility/delay_estimator_wrapper.h" 34 #include "webrtc/system_wrappers/include/cpu_features_wrapper.h" 35 #include "webrtc/typedefs.h" 36 37 38 // Buffer size (samples) 39 static const size_t kBufSizePartitions = 250; // 1 second of audio in 16 kHz. 40 41 // Metrics 42 static const int subCountLen = 4; 43 static const int countLen = 50; 44 static const int kDelayMetricsAggregationWindow = 1250; // 5 seconds at 16 kHz. 45 46 // Quantities to control H band scaling for SWB input 47 static const float cnScaleHband = 48 (float)0.4; // scale for comfort noise in H band 49 // Initial bin for averaging nlp gain in low band 50 static const int freqAvgIc = PART_LEN / 2; 51 52 // Matlab code to produce table: 53 // win = sqrt(hanning(63)); win = [0 ; win(1:32)]; 54 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win); 55 ALIGN16_BEG const float ALIGN16_END WebRtcAec_sqrtHanning[65] = { 56 0.00000000000000f, 0.02454122852291f, 0.04906767432742f, 0.07356456359967f, 57 0.09801714032956f, 0.12241067519922f, 0.14673047445536f, 0.17096188876030f, 58 0.19509032201613f, 0.21910124015687f, 0.24298017990326f, 0.26671275747490f, 59 0.29028467725446f, 0.31368174039889f, 0.33688985339222f, 0.35989503653499f, 60 0.38268343236509f, 0.40524131400499f, 0.42755509343028f, 0.44961132965461f, 61 0.47139673682600f, 0.49289819222978f, 0.51410274419322f, 0.53499761988710f, 62 0.55557023301960f, 0.57580819141785f, 0.59569930449243f, 0.61523159058063f, 63 0.63439328416365f, 0.65317284295378f, 0.67155895484702f, 0.68954054473707f, 64 0.70710678118655f, 0.72424708295147f, 0.74095112535496f, 0.75720884650648f, 65 0.77301045336274f, 0.78834642762661f, 0.80320753148064f, 0.81758481315158f, 66 0.83146961230255f, 0.84485356524971f, 0.85772861000027f, 0.87008699110871f, 67 0.88192126434835f, 0.89322430119552f, 0.90398929312344f, 0.91420975570353f, 68 0.92387953251129f, 0.93299279883474f, 0.94154406518302f, 0.94952818059304f, 69 0.95694033573221f, 0.96377606579544f, 0.97003125319454f, 0.97570213003853f, 70 0.98078528040323f, 0.98527764238894f, 0.98917650996478f, 0.99247953459871f, 71 0.99518472667220f, 0.99729045667869f, 0.99879545620517f, 0.99969881869620f, 72 1.00000000000000f}; 73 74 // Matlab code to produce table: 75 // weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1]; 76 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve); 77 ALIGN16_BEG const float ALIGN16_END WebRtcAec_weightCurve[65] = { 78 0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f, 0.1845f, 0.1926f, 79 0.2000f, 0.2069f, 0.2134f, 0.2195f, 0.2254f, 0.2309f, 0.2363f, 0.2414f, 80 0.2464f, 0.2512f, 0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f, 81 0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f, 0.3035f, 0.3070f, 82 0.3104f, 0.3138f, 0.3171f, 0.3204f, 0.3236f, 0.3268f, 0.3299f, 0.3330f, 83 0.3360f, 0.3390f, 0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f, 84 0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f, 0.3752f, 0.3777f, 85 0.3803f, 0.3828f, 0.3854f, 0.3878f, 0.3903f, 0.3928f, 0.3952f, 0.3976f, 86 0.4000f}; 87 88 // Matlab code to produce table: 89 // overDriveCurve = [sqrt(linspace(0,1,65))' + 1]; 90 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve); 91 ALIGN16_BEG const float ALIGN16_END WebRtcAec_overDriveCurve[65] = { 92 1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f, 1.3062f, 1.3307f, 93 1.3536f, 1.3750f, 1.3953f, 1.4146f, 1.4330f, 1.4507f, 1.4677f, 1.4841f, 94 1.5000f, 1.5154f, 1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f, 95 1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f, 1.6847f, 1.6960f, 96 1.7071f, 1.7181f, 1.7289f, 1.7395f, 1.7500f, 1.7603f, 1.7706f, 1.7806f, 97 1.7906f, 1.8004f, 1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f, 98 1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f, 1.9186f, 1.9270f, 99 1.9354f, 1.9437f, 1.9520f, 1.9601f, 1.9682f, 1.9763f, 1.9843f, 1.9922f, 100 2.0000f}; 101 102 // Delay Agnostic AEC parameters, still under development and may change. 103 static const float kDelayQualityThresholdMax = 0.07f; 104 static const float kDelayQualityThresholdMin = 0.01f; 105 static const int kInitialShiftOffset = 5; 106 #if !defined(WEBRTC_ANDROID) 107 static const int kDelayCorrectionStart = 1500; // 10 ms chunks 108 #endif 109 110 // Target suppression levels for nlp modes. 111 // log{0.001, 0.00001, 0.00000001} 112 static const float kTargetSupp[3] = {-6.9f, -11.5f, -18.4f}; 113 114 // Two sets of parameters, one for the extended filter mode. 115 static const float kExtendedMinOverDrive[3] = {3.0f, 6.0f, 15.0f}; 116 static const float kNormalMinOverDrive[3] = {1.0f, 2.0f, 5.0f}; 117 const float WebRtcAec_kExtendedSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, 118 {0.92f, 0.08f}}; 119 const float WebRtcAec_kNormalSmoothingCoefficients[2][2] = {{0.9f, 0.1f}, 120 {0.93f, 0.07f}}; 121 122 // Number of partitions forming the NLP's "preferred" bands. 123 enum { 124 kPrefBandSize = 24 125 }; 126 127 #ifdef WEBRTC_AEC_DEBUG_DUMP 128 extern int webrtc_aec_instance_count; 129 #endif 130 131 WebRtcAecFilterFar WebRtcAec_FilterFar; 132 WebRtcAecScaleErrorSignal WebRtcAec_ScaleErrorSignal; 133 WebRtcAecFilterAdaptation WebRtcAec_FilterAdaptation; 134 WebRtcAecOverdriveAndSuppress WebRtcAec_OverdriveAndSuppress; 135 WebRtcAecComfortNoise WebRtcAec_ComfortNoise; 136 WebRtcAecSubBandCoherence WebRtcAec_SubbandCoherence; 137 WebRtcAecStoreAsComplex WebRtcAec_StoreAsComplex; 138 WebRtcAecPartitionDelay WebRtcAec_PartitionDelay; 139 WebRtcAecWindowData WebRtcAec_WindowData; 140 141 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm) { 142 return aRe * bRe - aIm * bIm; 143 } 144 145 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm) { 146 return aRe * bIm + aIm * bRe; 147 } 148 149 static int CmpFloat(const void* a, const void* b) { 150 const float* da = (const float*)a; 151 const float* db = (const float*)b; 152 153 return (*da > *db) - (*da < *db); 154 } 155 156 static void FilterFar( 157 int num_partitions, 158 int x_fft_buf_block_pos, 159 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 160 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 161 float y_fft[2][PART_LEN1]) { 162 int i; 163 for (i = 0; i < num_partitions; i++) { 164 int j; 165 int xPos = (i + x_fft_buf_block_pos) * PART_LEN1; 166 int pos = i * PART_LEN1; 167 // Check for wrap 168 if (i + x_fft_buf_block_pos >= num_partitions) { 169 xPos -= num_partitions * (PART_LEN1); 170 } 171 172 for (j = 0; j < PART_LEN1; j++) { 173 y_fft[0][j] += MulRe(x_fft_buf[0][xPos + j], 174 x_fft_buf[1][xPos + j], 175 h_fft_buf[0][pos + j], 176 h_fft_buf[1][pos + j]); 177 y_fft[1][j] += MulIm(x_fft_buf[0][xPos + j], 178 x_fft_buf[1][xPos + j], 179 h_fft_buf[0][pos + j], 180 h_fft_buf[1][pos + j]); 181 } 182 } 183 } 184 185 static void ScaleErrorSignal(int extended_filter_enabled, 186 float normal_mu, 187 float normal_error_threshold, 188 float x_pow[PART_LEN1], 189 float ef[2][PART_LEN1]) { 190 const float mu = extended_filter_enabled ? kExtendedMu : normal_mu; 191 const float error_threshold = extended_filter_enabled 192 ? kExtendedErrorThreshold 193 : normal_error_threshold; 194 int i; 195 float abs_ef; 196 for (i = 0; i < (PART_LEN1); i++) { 197 ef[0][i] /= (x_pow[i] + 1e-10f); 198 ef[1][i] /= (x_pow[i] + 1e-10f); 199 abs_ef = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]); 200 201 if (abs_ef > error_threshold) { 202 abs_ef = error_threshold / (abs_ef + 1e-10f); 203 ef[0][i] *= abs_ef; 204 ef[1][i] *= abs_ef; 205 } 206 207 // Stepsize factor 208 ef[0][i] *= mu; 209 ef[1][i] *= mu; 210 } 211 } 212 213 214 static void FilterAdaptation( 215 int num_partitions, 216 int x_fft_buf_block_pos, 217 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 218 float e_fft[2][PART_LEN1], 219 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1]) { 220 int i, j; 221 float fft[PART_LEN2]; 222 for (i = 0; i < num_partitions; i++) { 223 int xPos = (i + x_fft_buf_block_pos) * (PART_LEN1); 224 int pos; 225 // Check for wrap 226 if (i + x_fft_buf_block_pos >= num_partitions) { 227 xPos -= num_partitions * PART_LEN1; 228 } 229 230 pos = i * PART_LEN1; 231 232 for (j = 0; j < PART_LEN; j++) { 233 234 fft[2 * j] = MulRe(x_fft_buf[0][xPos + j], 235 -x_fft_buf[1][xPos + j], 236 e_fft[0][j], 237 e_fft[1][j]); 238 fft[2 * j + 1] = MulIm(x_fft_buf[0][xPos + j], 239 -x_fft_buf[1][xPos + j], 240 e_fft[0][j], 241 e_fft[1][j]); 242 } 243 fft[1] = MulRe(x_fft_buf[0][xPos + PART_LEN], 244 -x_fft_buf[1][xPos + PART_LEN], 245 e_fft[0][PART_LEN], 246 e_fft[1][PART_LEN]); 247 248 aec_rdft_inverse_128(fft); 249 memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN); 250 251 // fft scaling 252 { 253 float scale = 2.0f / PART_LEN2; 254 for (j = 0; j < PART_LEN; j++) { 255 fft[j] *= scale; 256 } 257 } 258 aec_rdft_forward_128(fft); 259 260 h_fft_buf[0][pos] += fft[0]; 261 h_fft_buf[0][pos + PART_LEN] += fft[1]; 262 263 for (j = 1; j < PART_LEN; j++) { 264 h_fft_buf[0][pos + j] += fft[2 * j]; 265 h_fft_buf[1][pos + j] += fft[2 * j + 1]; 266 } 267 } 268 } 269 270 static void OverdriveAndSuppress(AecCore* aec, 271 float hNl[PART_LEN1], 272 const float hNlFb, 273 float efw[2][PART_LEN1]) { 274 int i; 275 for (i = 0; i < PART_LEN1; i++) { 276 // Weight subbands 277 if (hNl[i] > hNlFb) { 278 hNl[i] = WebRtcAec_weightCurve[i] * hNlFb + 279 (1 - WebRtcAec_weightCurve[i]) * hNl[i]; 280 } 281 hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]); 282 283 // Suppress error signal 284 efw[0][i] *= hNl[i]; 285 efw[1][i] *= hNl[i]; 286 287 // Ooura fft returns incorrect sign on imaginary component. It matters here 288 // because we are making an additive change with comfort noise. 289 efw[1][i] *= -1; 290 } 291 } 292 293 static int PartitionDelay(const AecCore* aec) { 294 // Measures the energy in each filter partition and returns the partition with 295 // highest energy. 296 // TODO(bjornv): Spread computational cost by computing one partition per 297 // block? 298 float wfEnMax = 0; 299 int i; 300 int delay = 0; 301 302 for (i = 0; i < aec->num_partitions; i++) { 303 int j; 304 int pos = i * PART_LEN1; 305 float wfEn = 0; 306 for (j = 0; j < PART_LEN1; j++) { 307 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] + 308 aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j]; 309 } 310 311 if (wfEn > wfEnMax) { 312 wfEnMax = wfEn; 313 delay = i; 314 } 315 } 316 return delay; 317 } 318 319 // Threshold to protect against the ill-effects of a zero far-end. 320 const float WebRtcAec_kMinFarendPSD = 15; 321 322 // Updates the following smoothed Power Spectral Densities (PSD): 323 // - sd : near-end 324 // - se : residual echo 325 // - sx : far-end 326 // - sde : cross-PSD of near-end and residual echo 327 // - sxd : cross-PSD of near-end and far-end 328 // 329 // In addition to updating the PSDs, also the filter diverge state is 330 // determined. 331 static void SmoothedPSD(AecCore* aec, 332 float efw[2][PART_LEN1], 333 float dfw[2][PART_LEN1], 334 float xfw[2][PART_LEN1], 335 int* extreme_filter_divergence) { 336 // Power estimate smoothing coefficients. 337 const float* ptrGCoh = aec->extended_filter_enabled 338 ? WebRtcAec_kExtendedSmoothingCoefficients[aec->mult - 1] 339 : WebRtcAec_kNormalSmoothingCoefficients[aec->mult - 1]; 340 int i; 341 float sdSum = 0, seSum = 0; 342 343 for (i = 0; i < PART_LEN1; i++) { 344 aec->sd[i] = ptrGCoh[0] * aec->sd[i] + 345 ptrGCoh[1] * (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]); 346 aec->se[i] = ptrGCoh[0] * aec->se[i] + 347 ptrGCoh[1] * (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]); 348 // We threshold here to protect against the ill-effects of a zero farend. 349 // The threshold is not arbitrarily chosen, but balances protection and 350 // adverse interaction with the algorithm's tuning. 351 // TODO(bjornv): investigate further why this is so sensitive. 352 aec->sx[i] = 353 ptrGCoh[0] * aec->sx[i] + 354 ptrGCoh[1] * WEBRTC_SPL_MAX( 355 xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 356 WebRtcAec_kMinFarendPSD); 357 358 aec->sde[i][0] = 359 ptrGCoh[0] * aec->sde[i][0] + 360 ptrGCoh[1] * (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]); 361 aec->sde[i][1] = 362 ptrGCoh[0] * aec->sde[i][1] + 363 ptrGCoh[1] * (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]); 364 365 aec->sxd[i][0] = 366 ptrGCoh[0] * aec->sxd[i][0] + 367 ptrGCoh[1] * (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]); 368 aec->sxd[i][1] = 369 ptrGCoh[0] * aec->sxd[i][1] + 370 ptrGCoh[1] * (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]); 371 372 sdSum += aec->sd[i]; 373 seSum += aec->se[i]; 374 } 375 376 // Divergent filter safeguard update. 377 aec->divergeState = (aec->divergeState ? 1.05f : 1.0f) * seSum > sdSum; 378 379 // Signal extreme filter divergence if the error is significantly larger 380 // than the nearend (13 dB). 381 *extreme_filter_divergence = (seSum > (19.95f * sdSum)); 382 } 383 384 // Window time domain data to be used by the fft. 385 __inline static void WindowData(float* x_windowed, const float* x) { 386 int i; 387 for (i = 0; i < PART_LEN; i++) { 388 x_windowed[i] = x[i] * WebRtcAec_sqrtHanning[i]; 389 x_windowed[PART_LEN + i] = 390 x[PART_LEN + i] * WebRtcAec_sqrtHanning[PART_LEN - i]; 391 } 392 } 393 394 // Puts fft output data into a complex valued array. 395 __inline static void StoreAsComplex(const float* data, 396 float data_complex[2][PART_LEN1]) { 397 int i; 398 data_complex[0][0] = data[0]; 399 data_complex[1][0] = 0; 400 for (i = 1; i < PART_LEN; i++) { 401 data_complex[0][i] = data[2 * i]; 402 data_complex[1][i] = data[2 * i + 1]; 403 } 404 data_complex[0][PART_LEN] = data[1]; 405 data_complex[1][PART_LEN] = 0; 406 } 407 408 static void SubbandCoherence(AecCore* aec, 409 float efw[2][PART_LEN1], 410 float dfw[2][PART_LEN1], 411 float xfw[2][PART_LEN1], 412 float* fft, 413 float* cohde, 414 float* cohxd, 415 int* extreme_filter_divergence) { 416 int i; 417 418 SmoothedPSD(aec, efw, dfw, xfw, extreme_filter_divergence); 419 420 // Subband coherence 421 for (i = 0; i < PART_LEN1; i++) { 422 cohde[i] = 423 (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) / 424 (aec->sd[i] * aec->se[i] + 1e-10f); 425 cohxd[i] = 426 (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) / 427 (aec->sx[i] * aec->sd[i] + 1e-10f); 428 } 429 } 430 431 static void GetHighbandGain(const float* lambda, float* nlpGainHband) { 432 int i; 433 434 *nlpGainHband = (float)0.0; 435 for (i = freqAvgIc; i < PART_LEN1 - 1; i++) { 436 *nlpGainHband += lambda[i]; 437 } 438 *nlpGainHband /= (float)(PART_LEN1 - 1 - freqAvgIc); 439 } 440 441 static void ComfortNoise(AecCore* aec, 442 float efw[2][PART_LEN1], 443 float comfortNoiseHband[2][PART_LEN1], 444 const float* noisePow, 445 const float* lambda) { 446 int i, num; 447 float rand[PART_LEN]; 448 float noise, noiseAvg, tmp, tmpAvg; 449 int16_t randW16[PART_LEN]; 450 float u[2][PART_LEN1]; 451 452 const float pi2 = 6.28318530717959f; 453 454 // Generate a uniform random array on [0 1] 455 WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed); 456 for (i = 0; i < PART_LEN; i++) { 457 rand[i] = ((float)randW16[i]) / 32768; 458 } 459 460 // Reject LF noise 461 u[0][0] = 0; 462 u[1][0] = 0; 463 for (i = 1; i < PART_LEN1; i++) { 464 tmp = pi2 * rand[i - 1]; 465 466 noise = sqrtf(noisePow[i]); 467 u[0][i] = noise * cosf(tmp); 468 u[1][i] = -noise * sinf(tmp); 469 } 470 u[1][PART_LEN] = 0; 471 472 for (i = 0; i < PART_LEN1; i++) { 473 // This is the proper weighting to match the background noise power 474 tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 475 // tmp = 1 - lambda[i]; 476 efw[0][i] += tmp * u[0][i]; 477 efw[1][i] += tmp * u[1][i]; 478 } 479 480 // For H band comfort noise 481 // TODO: don't compute noise and "tmp" twice. Use the previous results. 482 noiseAvg = 0.0; 483 tmpAvg = 0.0; 484 num = 0; 485 if (aec->num_bands > 1) { 486 487 // average noise scale 488 // average over second half of freq spectrum (i.e., 4->8khz) 489 // TODO: we shouldn't need num. We know how many elements we're summing. 490 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 491 num++; 492 noiseAvg += sqrtf(noisePow[i]); 493 } 494 noiseAvg /= (float)num; 495 496 // average nlp scale 497 // average over second half of freq spectrum (i.e., 4->8khz) 498 // TODO: we shouldn't need num. We know how many elements we're summing. 499 num = 0; 500 for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) { 501 num++; 502 tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0)); 503 } 504 tmpAvg /= (float)num; 505 506 // Use average noise for H band 507 // TODO: we should probably have a new random vector here. 508 // Reject LF noise 509 u[0][0] = 0; 510 u[1][0] = 0; 511 for (i = 1; i < PART_LEN1; i++) { 512 tmp = pi2 * rand[i - 1]; 513 514 // Use average noise for H band 515 u[0][i] = noiseAvg * (float)cos(tmp); 516 u[1][i] = -noiseAvg * (float)sin(tmp); 517 } 518 u[1][PART_LEN] = 0; 519 520 for (i = 0; i < PART_LEN1; i++) { 521 // Use average NLP weight for H band 522 comfortNoiseHband[0][i] = tmpAvg * u[0][i]; 523 comfortNoiseHband[1][i] = tmpAvg * u[1][i]; 524 } 525 } else { 526 memset(comfortNoiseHband, 0, 527 2 * PART_LEN1 * sizeof(comfortNoiseHband[0][0])); 528 } 529 } 530 531 static void InitLevel(PowerLevel* level) { 532 const float kBigFloat = 1E17f; 533 534 level->averagelevel = 0; 535 level->framelevel = 0; 536 level->minlevel = kBigFloat; 537 level->frsum = 0; 538 level->sfrsum = 0; 539 level->frcounter = 0; 540 level->sfrcounter = 0; 541 } 542 543 static void InitStats(Stats* stats) { 544 stats->instant = kOffsetLevel; 545 stats->average = kOffsetLevel; 546 stats->max = kOffsetLevel; 547 stats->min = kOffsetLevel * (-1); 548 stats->sum = 0; 549 stats->hisum = 0; 550 stats->himean = kOffsetLevel; 551 stats->counter = 0; 552 stats->hicounter = 0; 553 } 554 555 static void InitMetrics(AecCore* self) { 556 self->stateCounter = 0; 557 InitLevel(&self->farlevel); 558 InitLevel(&self->nearlevel); 559 InitLevel(&self->linoutlevel); 560 InitLevel(&self->nlpoutlevel); 561 562 InitStats(&self->erl); 563 InitStats(&self->erle); 564 InitStats(&self->aNlp); 565 InitStats(&self->rerl); 566 } 567 568 static void UpdateLevel(PowerLevel* level, float in[2][PART_LEN1]) { 569 // Do the energy calculation in the frequency domain. The FFT is performed on 570 // a segment of PART_LEN2 samples due to overlap, but we only want the energy 571 // of half that data (the last PART_LEN samples). Parseval's relation states 572 // that the energy is preserved according to 573 // 574 // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2 575 // = ENERGY, 576 // 577 // where N = PART_LEN2. Since we are only interested in calculating the energy 578 // for the last PART_LEN samples we approximate by calculating ENERGY and 579 // divide by 2, 580 // 581 // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2 582 // 583 // Since we deal with real valued time domain signals we only store frequency 584 // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we 585 // need to add the contribution from the missing part in 586 // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical 587 // with the values in [1, PART_LEN-1], hence multiply those values by 2. This 588 // is the values in the for loop below, but multiplication by 2 and division 589 // by 2 cancel. 590 591 // TODO(bjornv): Investigate reusing energy calculations performed at other 592 // places in the code. 593 int k = 1; 594 // Imaginary parts are zero at end points and left out of the calculation. 595 float energy = (in[0][0] * in[0][0]) / 2; 596 energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2; 597 598 for (k = 1; k < PART_LEN; k++) { 599 energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]); 600 } 601 energy /= PART_LEN2; 602 603 level->sfrsum += energy; 604 level->sfrcounter++; 605 606 if (level->sfrcounter > subCountLen) { 607 level->framelevel = level->sfrsum / (subCountLen * PART_LEN); 608 level->sfrsum = 0; 609 level->sfrcounter = 0; 610 if (level->framelevel > 0) { 611 if (level->framelevel < level->minlevel) { 612 level->minlevel = level->framelevel; // New minimum. 613 } else { 614 level->minlevel *= (1 + 0.001f); // Small increase. 615 } 616 } 617 level->frcounter++; 618 level->frsum += level->framelevel; 619 if (level->frcounter > countLen) { 620 level->averagelevel = level->frsum / countLen; 621 level->frsum = 0; 622 level->frcounter = 0; 623 } 624 } 625 } 626 627 static void UpdateMetrics(AecCore* aec) { 628 float dtmp, dtmp2; 629 630 const float actThresholdNoisy = 8.0f; 631 const float actThresholdClean = 40.0f; 632 const float safety = 0.99995f; 633 const float noisyPower = 300000.0f; 634 635 float actThreshold; 636 float echo, suppressedEcho; 637 638 if (aec->echoState) { // Check if echo is likely present 639 aec->stateCounter++; 640 } 641 642 if (aec->farlevel.frcounter == 0) { 643 644 if (aec->farlevel.minlevel < noisyPower) { 645 actThreshold = actThresholdClean; 646 } else { 647 actThreshold = actThresholdNoisy; 648 } 649 650 if ((aec->stateCounter > (0.5f * countLen * subCountLen)) && 651 (aec->farlevel.sfrcounter == 0) 652 653 // Estimate in active far-end segments only 654 && 655 (aec->farlevel.averagelevel > 656 (actThreshold * aec->farlevel.minlevel))) { 657 658 // Subtract noise power 659 echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel; 660 661 // ERL 662 dtmp = 10 * (float)log10(aec->farlevel.averagelevel / 663 aec->nearlevel.averagelevel + 664 1e-10f); 665 dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f); 666 667 aec->erl.instant = dtmp; 668 if (dtmp > aec->erl.max) { 669 aec->erl.max = dtmp; 670 } 671 672 if (dtmp < aec->erl.min) { 673 aec->erl.min = dtmp; 674 } 675 676 aec->erl.counter++; 677 aec->erl.sum += dtmp; 678 aec->erl.average = aec->erl.sum / aec->erl.counter; 679 680 // Upper mean 681 if (dtmp > aec->erl.average) { 682 aec->erl.hicounter++; 683 aec->erl.hisum += dtmp; 684 aec->erl.himean = aec->erl.hisum / aec->erl.hicounter; 685 } 686 687 // A_NLP 688 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 689 (2 * aec->linoutlevel.averagelevel) + 690 1e-10f); 691 692 // subtract noise power 693 suppressedEcho = 2 * (aec->linoutlevel.averagelevel - 694 safety * aec->linoutlevel.minlevel); 695 696 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 697 698 aec->aNlp.instant = dtmp2; 699 if (dtmp > aec->aNlp.max) { 700 aec->aNlp.max = dtmp; 701 } 702 703 if (dtmp < aec->aNlp.min) { 704 aec->aNlp.min = dtmp; 705 } 706 707 aec->aNlp.counter++; 708 aec->aNlp.sum += dtmp; 709 aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter; 710 711 // Upper mean 712 if (dtmp > aec->aNlp.average) { 713 aec->aNlp.hicounter++; 714 aec->aNlp.hisum += dtmp; 715 aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter; 716 } 717 718 // ERLE 719 720 // subtract noise power 721 suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel - 722 safety * aec->nlpoutlevel.minlevel); 723 724 dtmp = 10 * (float)log10(aec->nearlevel.averagelevel / 725 (2 * aec->nlpoutlevel.averagelevel) + 726 1e-10f); 727 dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f); 728 729 dtmp = dtmp2; 730 aec->erle.instant = dtmp; 731 if (dtmp > aec->erle.max) { 732 aec->erle.max = dtmp; 733 } 734 735 if (dtmp < aec->erle.min) { 736 aec->erle.min = dtmp; 737 } 738 739 aec->erle.counter++; 740 aec->erle.sum += dtmp; 741 aec->erle.average = aec->erle.sum / aec->erle.counter; 742 743 // Upper mean 744 if (dtmp > aec->erle.average) { 745 aec->erle.hicounter++; 746 aec->erle.hisum += dtmp; 747 aec->erle.himean = aec->erle.hisum / aec->erle.hicounter; 748 } 749 } 750 751 aec->stateCounter = 0; 752 } 753 } 754 755 static void UpdateDelayMetrics(AecCore* self) { 756 int i = 0; 757 int delay_values = 0; 758 int median = 0; 759 int lookahead = WebRtc_lookahead(self->delay_estimator); 760 const int kMsPerBlock = PART_LEN / (self->mult * 8); 761 int64_t l1_norm = 0; 762 763 if (self->num_delay_values == 0) { 764 // We have no new delay value data. Even though -1 is a valid |median| in 765 // the sense that we allow negative values, it will practically never be 766 // used since multiples of |kMsPerBlock| will always be returned. 767 // We therefore use -1 to indicate in the logs that the delay estimator was 768 // not able to estimate the delay. 769 self->delay_median = -1; 770 self->delay_std = -1; 771 self->fraction_poor_delays = -1; 772 return; 773 } 774 775 // Start value for median count down. 776 delay_values = self->num_delay_values >> 1; 777 // Get median of delay values since last update. 778 for (i = 0; i < kHistorySizeBlocks; i++) { 779 delay_values -= self->delay_histogram[i]; 780 if (delay_values < 0) { 781 median = i; 782 break; 783 } 784 } 785 // Account for lookahead. 786 self->delay_median = (median - lookahead) * kMsPerBlock; 787 788 // Calculate the L1 norm, with median value as central moment. 789 for (i = 0; i < kHistorySizeBlocks; i++) { 790 l1_norm += abs(i - median) * self->delay_histogram[i]; 791 } 792 self->delay_std = (int)((l1_norm + self->num_delay_values / 2) / 793 self->num_delay_values) * kMsPerBlock; 794 795 // Determine fraction of delays that are out of bounds, that is, either 796 // negative (anti-causal system) or larger than the AEC filter length. 797 { 798 int num_delays_out_of_bounds = self->num_delay_values; 799 const int histogram_length = sizeof(self->delay_histogram) / 800 sizeof(self->delay_histogram[0]); 801 for (i = lookahead; i < lookahead + self->num_partitions; ++i) { 802 if (i < histogram_length) 803 num_delays_out_of_bounds -= self->delay_histogram[i]; 804 } 805 self->fraction_poor_delays = (float)num_delays_out_of_bounds / 806 self->num_delay_values; 807 } 808 809 // Reset histogram. 810 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); 811 self->num_delay_values = 0; 812 813 return; 814 } 815 816 static void ScaledInverseFft(float freq_data[2][PART_LEN1], 817 float time_data[PART_LEN2], 818 float scale, 819 int conjugate) { 820 int i; 821 const float normalization = scale / ((float)PART_LEN2); 822 const float sign = (conjugate ? -1 : 1); 823 time_data[0] = freq_data[0][0] * normalization; 824 time_data[1] = freq_data[0][PART_LEN] * normalization; 825 for (i = 1; i < PART_LEN; i++) { 826 time_data[2 * i] = freq_data[0][i] * normalization; 827 time_data[2 * i + 1] = sign * freq_data[1][i] * normalization; 828 } 829 aec_rdft_inverse_128(time_data); 830 } 831 832 833 static void Fft(float time_data[PART_LEN2], 834 float freq_data[2][PART_LEN1]) { 835 int i; 836 aec_rdft_forward_128(time_data); 837 838 // Reorder fft output data. 839 freq_data[1][0] = 0; 840 freq_data[1][PART_LEN] = 0; 841 freq_data[0][0] = time_data[0]; 842 freq_data[0][PART_LEN] = time_data[1]; 843 for (i = 1; i < PART_LEN; i++) { 844 freq_data[0][i] = time_data[2 * i]; 845 freq_data[1][i] = time_data[2 * i + 1]; 846 } 847 } 848 849 850 static int SignalBasedDelayCorrection(AecCore* self) { 851 int delay_correction = 0; 852 int last_delay = -2; 853 assert(self != NULL); 854 #if !defined(WEBRTC_ANDROID) 855 // On desktops, turn on correction after |kDelayCorrectionStart| frames. This 856 // is to let the delay estimation get a chance to converge. Also, if the 857 // playout audio volume is low (or even muted) the delay estimation can return 858 // a very large delay, which will break the AEC if it is applied. 859 if (self->frame_count < kDelayCorrectionStart) { 860 return 0; 861 } 862 #endif 863 864 // 1. Check for non-negative delay estimate. Note that the estimates we get 865 // from the delay estimation are not compensated for lookahead. Hence, a 866 // negative |last_delay| is an invalid one. 867 // 2. Verify that there is a delay change. In addition, only allow a change 868 // if the delay is outside a certain region taking the AEC filter length 869 // into account. 870 // TODO(bjornv): Investigate if we can remove the non-zero delay change check. 871 // 3. Only allow delay correction if the delay estimation quality exceeds 872 // |delay_quality_threshold|. 873 // 4. Finally, verify that the proposed |delay_correction| is feasible by 874 // comparing with the size of the far-end buffer. 875 last_delay = WebRtc_last_delay(self->delay_estimator); 876 if ((last_delay >= 0) && 877 (last_delay != self->previous_delay) && 878 (WebRtc_last_delay_quality(self->delay_estimator) > 879 self->delay_quality_threshold)) { 880 int delay = last_delay - WebRtc_lookahead(self->delay_estimator); 881 // Allow for a slack in the actual delay, defined by a |lower_bound| and an 882 // |upper_bound|. The adaptive echo cancellation filter is currently 883 // |num_partitions| (of 64 samples) long. If the delay estimate is negative 884 // or at least 3/4 of the filter length we open up for correction. 885 const int lower_bound = 0; 886 const int upper_bound = self->num_partitions * 3 / 4; 887 const int do_correction = delay <= lower_bound || delay > upper_bound; 888 if (do_correction == 1) { 889 int available_read = (int)WebRtc_available_read(self->far_time_buf); 890 // With |shift_offset| we gradually rely on the delay estimates. For 891 // positive delays we reduce the correction by |shift_offset| to lower the 892 // risk of pushing the AEC into a non causal state. For negative delays 893 // we rely on the values up to a rounding error, hence compensate by 1 894 // element to make sure to push the delay into the causal region. 895 delay_correction = -delay; 896 delay_correction += delay > self->shift_offset ? self->shift_offset : 1; 897 self->shift_offset--; 898 self->shift_offset = (self->shift_offset <= 1 ? 1 : self->shift_offset); 899 if (delay_correction > available_read - self->mult - 1) { 900 // There is not enough data in the buffer to perform this shift. Hence, 901 // we do not rely on the delay estimate and do nothing. 902 delay_correction = 0; 903 } else { 904 self->previous_delay = last_delay; 905 ++self->delay_correction_count; 906 } 907 } 908 } 909 // Update the |delay_quality_threshold| once we have our first delay 910 // correction. 911 if (self->delay_correction_count > 0) { 912 float delay_quality = WebRtc_last_delay_quality(self->delay_estimator); 913 delay_quality = (delay_quality > kDelayQualityThresholdMax ? 914 kDelayQualityThresholdMax : delay_quality); 915 self->delay_quality_threshold = 916 (delay_quality > self->delay_quality_threshold ? delay_quality : 917 self->delay_quality_threshold); 918 } 919 return delay_correction; 920 } 921 922 static void EchoSubtraction( 923 AecCore* aec, 924 int num_partitions, 925 int x_fft_buf_block_pos, 926 int metrics_mode, 927 int extended_filter_enabled, 928 float normal_mu, 929 float normal_error_threshold, 930 float x_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 931 float* const y, 932 float x_pow[PART_LEN1], 933 float h_fft_buf[2][kExtendedNumPartitions * PART_LEN1], 934 PowerLevel* linout_level, 935 float echo_subtractor_output[PART_LEN]) { 936 float s_fft[2][PART_LEN1]; 937 float e_extended[PART_LEN2]; 938 float s_extended[PART_LEN2]; 939 float *s; 940 float e[PART_LEN]; 941 float e_fft[2][PART_LEN1]; 942 int i; 943 memset(s_fft, 0, sizeof(s_fft)); 944 945 // Conditionally reset the echo subtraction filter if the filter has diverged 946 // significantly. 947 if (!aec->extended_filter_enabled && 948 aec->extreme_filter_divergence) { 949 memset(aec->wfBuf, 0, sizeof(aec->wfBuf)); 950 aec->extreme_filter_divergence = 0; 951 } 952 953 // Produce echo estimate s_fft. 954 WebRtcAec_FilterFar(num_partitions, 955 x_fft_buf_block_pos, 956 x_fft_buf, 957 h_fft_buf, 958 s_fft); 959 960 // Compute the time-domain echo estimate s. 961 ScaledInverseFft(s_fft, s_extended, 2.0f, 0); 962 s = &s_extended[PART_LEN]; 963 964 // Compute the time-domain echo prediction error. 965 for (i = 0; i < PART_LEN; ++i) { 966 e[i] = y[i] - s[i]; 967 } 968 969 // Compute the frequency domain echo prediction error. 970 memset(e_extended, 0, sizeof(float) * PART_LEN); 971 memcpy(e_extended + PART_LEN, e, sizeof(float) * PART_LEN); 972 Fft(e_extended, e_fft); 973 974 RTC_AEC_DEBUG_RAW_WRITE(aec->e_fft_file, 975 &e_fft[0][0], 976 sizeof(e_fft[0][0]) * PART_LEN1 * 2); 977 978 if (metrics_mode == 1) { 979 // Note that the first PART_LEN samples in fft (before transformation) are 980 // zero. Hence, the scaling by two in UpdateLevel() should not be 981 // performed. That scaling is taken care of in UpdateMetrics() instead. 982 UpdateLevel(linout_level, e_fft); 983 } 984 985 // Scale error signal inversely with far power. 986 WebRtcAec_ScaleErrorSignal(extended_filter_enabled, 987 normal_mu, 988 normal_error_threshold, 989 x_pow, 990 e_fft); 991 WebRtcAec_FilterAdaptation(num_partitions, 992 x_fft_buf_block_pos, 993 x_fft_buf, 994 e_fft, 995 h_fft_buf); 996 memcpy(echo_subtractor_output, e, sizeof(float) * PART_LEN); 997 } 998 999 1000 static void EchoSuppression(AecCore* aec, 1001 float farend[PART_LEN2], 1002 float* echo_subtractor_output, 1003 float* output, 1004 float* const* outputH) { 1005 float efw[2][PART_LEN1]; 1006 float xfw[2][PART_LEN1]; 1007 float dfw[2][PART_LEN1]; 1008 float comfortNoiseHband[2][PART_LEN1]; 1009 float fft[PART_LEN2]; 1010 float nlpGainHband; 1011 int i; 1012 size_t j; 1013 1014 // Coherence and non-linear filter 1015 float cohde[PART_LEN1], cohxd[PART_LEN1]; 1016 float hNlDeAvg, hNlXdAvg; 1017 float hNl[PART_LEN1]; 1018 float hNlPref[kPrefBandSize]; 1019 float hNlFb = 0, hNlFbLow = 0; 1020 const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f; 1021 const int prefBandSize = kPrefBandSize / aec->mult; 1022 const int minPrefBand = 4 / aec->mult; 1023 // Power estimate smoothing coefficients. 1024 const float* min_overdrive = aec->extended_filter_enabled 1025 ? kExtendedMinOverDrive 1026 : kNormalMinOverDrive; 1027 1028 // Filter energy 1029 const int delayEstInterval = 10 * aec->mult; 1030 1031 float* xfw_ptr = NULL; 1032 1033 // Update eBuf with echo subtractor output. 1034 memcpy(aec->eBuf + PART_LEN, 1035 echo_subtractor_output, 1036 sizeof(float) * PART_LEN); 1037 1038 // Analysis filter banks for the echo suppressor. 1039 // Windowed near-end ffts. 1040 WindowData(fft, aec->dBuf); 1041 aec_rdft_forward_128(fft); 1042 StoreAsComplex(fft, dfw); 1043 1044 // Windowed echo suppressor output ffts. 1045 WindowData(fft, aec->eBuf); 1046 aec_rdft_forward_128(fft); 1047 StoreAsComplex(fft, efw); 1048 1049 // NLP 1050 1051 // Convert far-end partition to the frequency domain with windowing. 1052 WindowData(fft, farend); 1053 Fft(fft, xfw); 1054 xfw_ptr = &xfw[0][0]; 1055 1056 // Buffer far. 1057 memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1); 1058 1059 aec->delayEstCtr++; 1060 if (aec->delayEstCtr == delayEstInterval) { 1061 aec->delayEstCtr = 0; 1062 aec->delayIdx = WebRtcAec_PartitionDelay(aec); 1063 } 1064 1065 // Use delayed far. 1066 memcpy(xfw, 1067 aec->xfwBuf + aec->delayIdx * PART_LEN1, 1068 sizeof(xfw[0][0]) * 2 * PART_LEN1); 1069 1070 WebRtcAec_SubbandCoherence(aec, efw, dfw, xfw, fft, cohde, cohxd, 1071 &aec->extreme_filter_divergence); 1072 1073 // Select the microphone signal as output if the filter is deemed to have 1074 // diverged. 1075 if (aec->divergeState) { 1076 memcpy(efw, dfw, sizeof(efw[0][0]) * 2 * PART_LEN1); 1077 } 1078 1079 hNlXdAvg = 0; 1080 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 1081 hNlXdAvg += cohxd[i]; 1082 } 1083 hNlXdAvg /= prefBandSize; 1084 hNlXdAvg = 1 - hNlXdAvg; 1085 1086 hNlDeAvg = 0; 1087 for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) { 1088 hNlDeAvg += cohde[i]; 1089 } 1090 hNlDeAvg /= prefBandSize; 1091 1092 if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) { 1093 aec->hNlXdAvgMin = hNlXdAvg; 1094 } 1095 1096 if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) { 1097 aec->stNearState = 1; 1098 } else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) { 1099 aec->stNearState = 0; 1100 } 1101 1102 if (aec->hNlXdAvgMin == 1) { 1103 aec->echoState = 0; 1104 aec->overDrive = min_overdrive[aec->nlp_mode]; 1105 1106 if (aec->stNearState == 1) { 1107 memcpy(hNl, cohde, sizeof(hNl)); 1108 hNlFb = hNlDeAvg; 1109 hNlFbLow = hNlDeAvg; 1110 } else { 1111 for (i = 0; i < PART_LEN1; i++) { 1112 hNl[i] = 1 - cohxd[i]; 1113 } 1114 hNlFb = hNlXdAvg; 1115 hNlFbLow = hNlXdAvg; 1116 } 1117 } else { 1118 1119 if (aec->stNearState == 1) { 1120 aec->echoState = 0; 1121 memcpy(hNl, cohde, sizeof(hNl)); 1122 hNlFb = hNlDeAvg; 1123 hNlFbLow = hNlDeAvg; 1124 } else { 1125 aec->echoState = 1; 1126 for (i = 0; i < PART_LEN1; i++) { 1127 hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]); 1128 } 1129 1130 // Select an order statistic from the preferred bands. 1131 // TODO: Using quicksort now, but a selection algorithm may be preferred. 1132 memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize); 1133 qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat); 1134 hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))]; 1135 hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))]; 1136 } 1137 } 1138 1139 // Track the local filter minimum to determine suppression overdrive. 1140 if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) { 1141 aec->hNlFbLocalMin = hNlFbLow; 1142 aec->hNlFbMin = hNlFbLow; 1143 aec->hNlNewMin = 1; 1144 aec->hNlMinCtr = 0; 1145 } 1146 aec->hNlFbLocalMin = 1147 WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1); 1148 aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1); 1149 1150 if (aec->hNlNewMin == 1) { 1151 aec->hNlMinCtr++; 1152 } 1153 if (aec->hNlMinCtr == 2) { 1154 aec->hNlNewMin = 0; 1155 aec->hNlMinCtr = 0; 1156 aec->overDrive = 1157 WEBRTC_SPL_MAX(kTargetSupp[aec->nlp_mode] / 1158 ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), 1159 min_overdrive[aec->nlp_mode]); 1160 } 1161 1162 // Smooth the overdrive. 1163 if (aec->overDrive < aec->overDriveSm) { 1164 aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive; 1165 } else { 1166 aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive; 1167 } 1168 1169 WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw); 1170 1171 // Add comfort noise. 1172 WebRtcAec_ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl); 1173 1174 // TODO(bjornv): Investigate how to take the windowing below into account if 1175 // needed. 1176 if (aec->metricsMode == 1) { 1177 // Note that we have a scaling by two in the time domain |eBuf|. 1178 // In addition the time domain signal is windowed before transformation, 1179 // losing half the energy on the average. We take care of the first 1180 // scaling only in UpdateMetrics(). 1181 UpdateLevel(&aec->nlpoutlevel, efw); 1182 } 1183 1184 // Inverse error fft. 1185 ScaledInverseFft(efw, fft, 2.0f, 1); 1186 1187 // Overlap and add to obtain output. 1188 for (i = 0; i < PART_LEN; i++) { 1189 output[i] = (fft[i] * WebRtcAec_sqrtHanning[i] + 1190 aec->outBuf[i] * WebRtcAec_sqrtHanning[PART_LEN - i]); 1191 1192 // Saturate output to keep it in the allowed range. 1193 output[i] = WEBRTC_SPL_SAT( 1194 WEBRTC_SPL_WORD16_MAX, output[i], WEBRTC_SPL_WORD16_MIN); 1195 } 1196 memcpy(aec->outBuf, &fft[PART_LEN], PART_LEN * sizeof(aec->outBuf[0])); 1197 1198 // For H band 1199 if (aec->num_bands > 1) { 1200 // H band gain 1201 // average nlp over low band: average over second half of freq spectrum 1202 // (4->8khz) 1203 GetHighbandGain(hNl, &nlpGainHband); 1204 1205 // Inverse comfort_noise 1206 ScaledInverseFft(comfortNoiseHband, fft, 2.0f, 0); 1207 1208 // compute gain factor 1209 for (j = 0; j < aec->num_bands - 1; ++j) { 1210 for (i = 0; i < PART_LEN; i++) { 1211 outputH[j][i] = aec->dBufH[j][i] * nlpGainHband; 1212 } 1213 } 1214 1215 // Add some comfort noise where Hband is attenuated. 1216 for (i = 0; i < PART_LEN; i++) { 1217 outputH[0][i] += cnScaleHband * fft[i]; 1218 } 1219 1220 // Saturate output to keep it in the allowed range. 1221 for (j = 0; j < aec->num_bands - 1; ++j) { 1222 for (i = 0; i < PART_LEN; i++) { 1223 outputH[j][i] = WEBRTC_SPL_SAT( 1224 WEBRTC_SPL_WORD16_MAX, outputH[j][i], WEBRTC_SPL_WORD16_MIN); 1225 } 1226 } 1227 1228 } 1229 1230 // Copy the current block to the old position. 1231 memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN); 1232 memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN); 1233 1234 // Copy the current block to the old position for H band 1235 for (j = 0; j < aec->num_bands - 1; ++j) { 1236 memcpy(aec->dBufH[j], aec->dBufH[j] + PART_LEN, sizeof(float) * PART_LEN); 1237 } 1238 1239 memmove(aec->xfwBuf + PART_LEN1, 1240 aec->xfwBuf, 1241 sizeof(aec->xfwBuf) - sizeof(complex_t) * PART_LEN1); 1242 } 1243 1244 static void ProcessBlock(AecCore* aec) { 1245 size_t i; 1246 1247 float fft[PART_LEN2]; 1248 float xf[2][PART_LEN1]; 1249 float df[2][PART_LEN1]; 1250 float far_spectrum = 0.0f; 1251 float near_spectrum = 0.0f; 1252 float abs_far_spectrum[PART_LEN1]; 1253 float abs_near_spectrum[PART_LEN1]; 1254 1255 const float gPow[2] = {0.9f, 0.1f}; 1256 1257 // Noise estimate constants. 1258 const int noiseInitBlocks = 500 * aec->mult; 1259 const float step = 0.1f; 1260 const float ramp = 1.0002f; 1261 const float gInitNoise[2] = {0.999f, 0.001f}; 1262 1263 float nearend[PART_LEN]; 1264 float* nearend_ptr = NULL; 1265 float farend[PART_LEN2]; 1266 float* farend_ptr = NULL; 1267 float echo_subtractor_output[PART_LEN]; 1268 float output[PART_LEN]; 1269 float outputH[NUM_HIGH_BANDS_MAX][PART_LEN]; 1270 float* outputH_ptr[NUM_HIGH_BANDS_MAX]; 1271 float* xf_ptr = NULL; 1272 1273 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1274 outputH_ptr[i] = outputH[i]; 1275 } 1276 1277 // Concatenate old and new nearend blocks. 1278 for (i = 0; i < aec->num_bands - 1; ++i) { 1279 WebRtc_ReadBuffer(aec->nearFrBufH[i], 1280 (void**)&nearend_ptr, 1281 nearend, 1282 PART_LEN); 1283 memcpy(aec->dBufH[i] + PART_LEN, nearend_ptr, sizeof(nearend)); 1284 } 1285 WebRtc_ReadBuffer(aec->nearFrBuf, (void**)&nearend_ptr, nearend, PART_LEN); 1286 memcpy(aec->dBuf + PART_LEN, nearend_ptr, sizeof(nearend)); 1287 1288 // We should always have at least one element stored in |far_buf|. 1289 assert(WebRtc_available_read(aec->far_time_buf) > 0); 1290 WebRtc_ReadBuffer(aec->far_time_buf, (void**)&farend_ptr, farend, 1); 1291 1292 #ifdef WEBRTC_AEC_DEBUG_DUMP 1293 { 1294 // TODO(minyue): |farend_ptr| starts from buffered samples. This will be 1295 // modified when |aec->far_time_buf| is revised. 1296 RTC_AEC_DEBUG_WAV_WRITE(aec->farFile, &farend_ptr[PART_LEN], PART_LEN); 1297 1298 RTC_AEC_DEBUG_WAV_WRITE(aec->nearFile, nearend_ptr, PART_LEN); 1299 } 1300 #endif 1301 1302 // Convert far-end signal to the frequency domain. 1303 memcpy(fft, farend_ptr, sizeof(float) * PART_LEN2); 1304 Fft(fft, xf); 1305 xf_ptr = &xf[0][0]; 1306 1307 // Near fft 1308 memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2); 1309 Fft(fft, df); 1310 1311 // Power smoothing 1312 for (i = 0; i < PART_LEN1; i++) { 1313 far_spectrum = (xf_ptr[i] * xf_ptr[i]) + 1314 (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]); 1315 aec->xPow[i] = 1316 gPow[0] * aec->xPow[i] + gPow[1] * aec->num_partitions * far_spectrum; 1317 // Calculate absolute spectra 1318 abs_far_spectrum[i] = sqrtf(far_spectrum); 1319 1320 near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i]; 1321 aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum; 1322 // Calculate absolute spectra 1323 abs_near_spectrum[i] = sqrtf(near_spectrum); 1324 } 1325 1326 // Estimate noise power. Wait until dPow is more stable. 1327 if (aec->noiseEstCtr > 50) { 1328 for (i = 0; i < PART_LEN1; i++) { 1329 if (aec->dPow[i] < aec->dMinPow[i]) { 1330 aec->dMinPow[i] = 1331 (aec->dPow[i] + step * (aec->dMinPow[i] - aec->dPow[i])) * ramp; 1332 } else { 1333 aec->dMinPow[i] *= ramp; 1334 } 1335 } 1336 } 1337 1338 // Smooth increasing noise power from zero at the start, 1339 // to avoid a sudden burst of comfort noise. 1340 if (aec->noiseEstCtr < noiseInitBlocks) { 1341 aec->noiseEstCtr++; 1342 for (i = 0; i < PART_LEN1; i++) { 1343 if (aec->dMinPow[i] > aec->dInitMinPow[i]) { 1344 aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] + 1345 gInitNoise[1] * aec->dMinPow[i]; 1346 } else { 1347 aec->dInitMinPow[i] = aec->dMinPow[i]; 1348 } 1349 } 1350 aec->noisePow = aec->dInitMinPow; 1351 } else { 1352 aec->noisePow = aec->dMinPow; 1353 } 1354 1355 // Block wise delay estimation used for logging 1356 if (aec->delay_logging_enabled) { 1357 if (WebRtc_AddFarSpectrumFloat( 1358 aec->delay_estimator_farend, abs_far_spectrum, PART_LEN1) == 0) { 1359 int delay_estimate = WebRtc_DelayEstimatorProcessFloat( 1360 aec->delay_estimator, abs_near_spectrum, PART_LEN1); 1361 if (delay_estimate >= 0) { 1362 // Update delay estimate buffer. 1363 aec->delay_histogram[delay_estimate]++; 1364 aec->num_delay_values++; 1365 } 1366 if (aec->delay_metrics_delivered == 1 && 1367 aec->num_delay_values >= kDelayMetricsAggregationWindow) { 1368 UpdateDelayMetrics(aec); 1369 } 1370 } 1371 } 1372 1373 // Update the xfBuf block position. 1374 aec->xfBufBlockPos--; 1375 if (aec->xfBufBlockPos == -1) { 1376 aec->xfBufBlockPos = aec->num_partitions - 1; 1377 } 1378 1379 // Buffer xf 1380 memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, 1381 xf_ptr, 1382 sizeof(float) * PART_LEN1); 1383 memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, 1384 &xf_ptr[PART_LEN1], 1385 sizeof(float) * PART_LEN1); 1386 1387 // Perform echo subtraction. 1388 EchoSubtraction(aec, 1389 aec->num_partitions, 1390 aec->xfBufBlockPos, 1391 aec->metricsMode, 1392 aec->extended_filter_enabled, 1393 aec->normal_mu, 1394 aec->normal_error_threshold, 1395 aec->xfBuf, 1396 nearend_ptr, 1397 aec->xPow, 1398 aec->wfBuf, 1399 &aec->linoutlevel, 1400 echo_subtractor_output); 1401 1402 RTC_AEC_DEBUG_WAV_WRITE(aec->outLinearFile, echo_subtractor_output, PART_LEN); 1403 1404 // Perform echo suppression. 1405 EchoSuppression(aec, farend_ptr, echo_subtractor_output, output, outputH_ptr); 1406 1407 if (aec->metricsMode == 1) { 1408 // Update power levels and echo metrics 1409 UpdateLevel(&aec->farlevel, (float(*)[PART_LEN1])xf_ptr); 1410 UpdateLevel(&aec->nearlevel, df); 1411 UpdateMetrics(aec); 1412 } 1413 1414 // Store the output block. 1415 WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN); 1416 // For high bands 1417 for (i = 0; i < aec->num_bands - 1; ++i) { 1418 WebRtc_WriteBuffer(aec->outFrBufH[i], outputH[i], PART_LEN); 1419 } 1420 1421 RTC_AEC_DEBUG_WAV_WRITE(aec->outFile, output, PART_LEN); 1422 } 1423 1424 AecCore* WebRtcAec_CreateAec() { 1425 int i; 1426 AecCore* aec = malloc(sizeof(AecCore)); 1427 if (!aec) { 1428 return NULL; 1429 } 1430 1431 aec->nearFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); 1432 if (!aec->nearFrBuf) { 1433 WebRtcAec_FreeAec(aec); 1434 return NULL; 1435 } 1436 1437 aec->outFrBuf = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, sizeof(float)); 1438 if (!aec->outFrBuf) { 1439 WebRtcAec_FreeAec(aec); 1440 return NULL; 1441 } 1442 1443 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1444 aec->nearFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, 1445 sizeof(float)); 1446 if (!aec->nearFrBufH[i]) { 1447 WebRtcAec_FreeAec(aec); 1448 return NULL; 1449 } 1450 aec->outFrBufH[i] = WebRtc_CreateBuffer(FRAME_LEN + PART_LEN, 1451 sizeof(float)); 1452 if (!aec->outFrBufH[i]) { 1453 WebRtcAec_FreeAec(aec); 1454 return NULL; 1455 } 1456 } 1457 1458 // Create far-end buffers. 1459 // For bit exactness with legacy code, each element in |far_time_buf| is 1460 // supposed to contain |PART_LEN2| samples with an overlap of |PART_LEN| 1461 // samples from the last frame. 1462 // TODO(minyue): reduce |far_time_buf| to non-overlapped |PART_LEN| samples. 1463 aec->far_time_buf = 1464 WebRtc_CreateBuffer(kBufSizePartitions, sizeof(float) * PART_LEN2); 1465 if (!aec->far_time_buf) { 1466 WebRtcAec_FreeAec(aec); 1467 return NULL; 1468 } 1469 1470 #ifdef WEBRTC_AEC_DEBUG_DUMP 1471 aec->instance_index = webrtc_aec_instance_count; 1472 1473 aec->farFile = aec->nearFile = aec->outFile = aec->outLinearFile = NULL; 1474 aec->debug_dump_count = 0; 1475 #endif 1476 aec->delay_estimator_farend = 1477 WebRtc_CreateDelayEstimatorFarend(PART_LEN1, kHistorySizeBlocks); 1478 if (aec->delay_estimator_farend == NULL) { 1479 WebRtcAec_FreeAec(aec); 1480 return NULL; 1481 } 1482 // We create the delay_estimator with the same amount of maximum lookahead as 1483 // the delay history size (kHistorySizeBlocks) for symmetry reasons. 1484 aec->delay_estimator = WebRtc_CreateDelayEstimator( 1485 aec->delay_estimator_farend, kHistorySizeBlocks); 1486 if (aec->delay_estimator == NULL) { 1487 WebRtcAec_FreeAec(aec); 1488 return NULL; 1489 } 1490 #ifdef WEBRTC_ANDROID 1491 aec->delay_agnostic_enabled = 1; // DA-AEC enabled by default. 1492 // DA-AEC assumes the system is causal from the beginning and will self adjust 1493 // the lookahead when shifting is required. 1494 WebRtc_set_lookahead(aec->delay_estimator, 0); 1495 #else 1496 aec->delay_agnostic_enabled = 0; 1497 WebRtc_set_lookahead(aec->delay_estimator, kLookaheadBlocks); 1498 #endif 1499 aec->extended_filter_enabled = 0; 1500 1501 // Assembly optimization 1502 WebRtcAec_FilterFar = FilterFar; 1503 WebRtcAec_ScaleErrorSignal = ScaleErrorSignal; 1504 WebRtcAec_FilterAdaptation = FilterAdaptation; 1505 WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress; 1506 WebRtcAec_ComfortNoise = ComfortNoise; 1507 WebRtcAec_SubbandCoherence = SubbandCoherence; 1508 WebRtcAec_StoreAsComplex = StoreAsComplex; 1509 WebRtcAec_PartitionDelay = PartitionDelay; 1510 WebRtcAec_WindowData = WindowData; 1511 1512 1513 #if defined(WEBRTC_ARCH_X86_FAMILY) 1514 if (WebRtc_GetCPUInfo(kSSE2)) { 1515 WebRtcAec_InitAec_SSE2(); 1516 } 1517 #endif 1518 1519 #if defined(MIPS_FPU_LE) 1520 WebRtcAec_InitAec_mips(); 1521 #endif 1522 1523 #if defined(WEBRTC_HAS_NEON) 1524 WebRtcAec_InitAec_neon(); 1525 #elif defined(WEBRTC_DETECT_NEON) 1526 if ((WebRtc_GetCPUFeaturesARM() & kCPUFeatureNEON) != 0) { 1527 WebRtcAec_InitAec_neon(); 1528 } 1529 #endif 1530 1531 aec_rdft_init(); 1532 1533 return aec; 1534 } 1535 1536 void WebRtcAec_FreeAec(AecCore* aec) { 1537 int i; 1538 if (aec == NULL) { 1539 return; 1540 } 1541 1542 WebRtc_FreeBuffer(aec->nearFrBuf); 1543 WebRtc_FreeBuffer(aec->outFrBuf); 1544 1545 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1546 WebRtc_FreeBuffer(aec->nearFrBufH[i]); 1547 WebRtc_FreeBuffer(aec->outFrBufH[i]); 1548 } 1549 1550 WebRtc_FreeBuffer(aec->far_time_buf); 1551 1552 RTC_AEC_DEBUG_WAV_CLOSE(aec->farFile); 1553 RTC_AEC_DEBUG_WAV_CLOSE(aec->nearFile); 1554 RTC_AEC_DEBUG_WAV_CLOSE(aec->outFile); 1555 RTC_AEC_DEBUG_WAV_CLOSE(aec->outLinearFile); 1556 RTC_AEC_DEBUG_RAW_CLOSE(aec->e_fft_file); 1557 1558 WebRtc_FreeDelayEstimator(aec->delay_estimator); 1559 WebRtc_FreeDelayEstimatorFarend(aec->delay_estimator_farend); 1560 1561 free(aec); 1562 } 1563 1564 int WebRtcAec_InitAec(AecCore* aec, int sampFreq) { 1565 int i; 1566 1567 aec->sampFreq = sampFreq; 1568 1569 if (sampFreq == 8000) { 1570 aec->normal_mu = 0.6f; 1571 aec->normal_error_threshold = 2e-6f; 1572 aec->num_bands = 1; 1573 } else { 1574 aec->normal_mu = 0.5f; 1575 aec->normal_error_threshold = 1.5e-6f; 1576 aec->num_bands = (size_t)(sampFreq / 16000); 1577 } 1578 1579 WebRtc_InitBuffer(aec->nearFrBuf); 1580 WebRtc_InitBuffer(aec->outFrBuf); 1581 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1582 WebRtc_InitBuffer(aec->nearFrBufH[i]); 1583 WebRtc_InitBuffer(aec->outFrBufH[i]); 1584 } 1585 1586 // Initialize far-end buffers. 1587 WebRtc_InitBuffer(aec->far_time_buf); 1588 1589 #ifdef WEBRTC_AEC_DEBUG_DUMP 1590 { 1591 int process_rate = sampFreq > 16000 ? 16000 : sampFreq; 1592 RTC_AEC_DEBUG_WAV_REOPEN("aec_far", aec->instance_index, 1593 aec->debug_dump_count, process_rate, 1594 &aec->farFile ); 1595 RTC_AEC_DEBUG_WAV_REOPEN("aec_near", aec->instance_index, 1596 aec->debug_dump_count, process_rate, 1597 &aec->nearFile); 1598 RTC_AEC_DEBUG_WAV_REOPEN("aec_out", aec->instance_index, 1599 aec->debug_dump_count, process_rate, 1600 &aec->outFile ); 1601 RTC_AEC_DEBUG_WAV_REOPEN("aec_out_linear", aec->instance_index, 1602 aec->debug_dump_count, process_rate, 1603 &aec->outLinearFile); 1604 } 1605 1606 RTC_AEC_DEBUG_RAW_OPEN("aec_e_fft", 1607 aec->debug_dump_count, 1608 &aec->e_fft_file); 1609 1610 ++aec->debug_dump_count; 1611 #endif 1612 aec->system_delay = 0; 1613 1614 if (WebRtc_InitDelayEstimatorFarend(aec->delay_estimator_farend) != 0) { 1615 return -1; 1616 } 1617 if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) { 1618 return -1; 1619 } 1620 aec->delay_logging_enabled = 0; 1621 aec->delay_metrics_delivered = 0; 1622 memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram)); 1623 aec->num_delay_values = 0; 1624 aec->delay_median = -1; 1625 aec->delay_std = -1; 1626 aec->fraction_poor_delays = -1.0f; 1627 1628 aec->signal_delay_correction = 0; 1629 aec->previous_delay = -2; // (-2): Uninitialized. 1630 aec->delay_correction_count = 0; 1631 aec->shift_offset = kInitialShiftOffset; 1632 aec->delay_quality_threshold = kDelayQualityThresholdMin; 1633 1634 aec->num_partitions = kNormalNumPartitions; 1635 1636 // Update the delay estimator with filter length. We use half the 1637 // |num_partitions| to take the echo path into account. In practice we say 1638 // that the echo has a duration of maximum half |num_partitions|, which is not 1639 // true, but serves as a crude measure. 1640 WebRtc_set_allowed_offset(aec->delay_estimator, aec->num_partitions / 2); 1641 // TODO(bjornv): I currently hard coded the enable. Once we've established 1642 // that AECM has no performance regression, robust_validation will be enabled 1643 // all the time and the APIs to turn it on/off will be removed. Hence, remove 1644 // this line then. 1645 WebRtc_enable_robust_validation(aec->delay_estimator, 1); 1646 aec->frame_count = 0; 1647 1648 // Default target suppression mode. 1649 aec->nlp_mode = 1; 1650 1651 // Sampling frequency multiplier w.r.t. 8 kHz. 1652 // In case of multiple bands we process the lower band in 16 kHz, hence the 1653 // multiplier is always 2. 1654 if (aec->num_bands > 1) { 1655 aec->mult = 2; 1656 } else { 1657 aec->mult = (short)aec->sampFreq / 8000; 1658 } 1659 1660 aec->farBufWritePos = 0; 1661 aec->farBufReadPos = 0; 1662 1663 aec->inSamples = 0; 1664 aec->outSamples = 0; 1665 aec->knownDelay = 0; 1666 1667 // Initialize buffers 1668 memset(aec->dBuf, 0, sizeof(aec->dBuf)); 1669 memset(aec->eBuf, 0, sizeof(aec->eBuf)); 1670 // For H bands 1671 for (i = 0; i < NUM_HIGH_BANDS_MAX; ++i) { 1672 memset(aec->dBufH[i], 0, sizeof(aec->dBufH[i])); 1673 } 1674 1675 memset(aec->xPow, 0, sizeof(aec->xPow)); 1676 memset(aec->dPow, 0, sizeof(aec->dPow)); 1677 memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow)); 1678 aec->noisePow = aec->dInitMinPow; 1679 aec->noiseEstCtr = 0; 1680 1681 // Initial comfort noise power 1682 for (i = 0; i < PART_LEN1; i++) { 1683 aec->dMinPow[i] = 1.0e6f; 1684 } 1685 1686 // Holds the last block written to 1687 aec->xfBufBlockPos = 0; 1688 // TODO: Investigate need for these initializations. Deleting them doesn't 1689 // change the output at all and yields 0.4% overall speedup. 1690 memset(aec->xfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); 1691 memset(aec->wfBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); 1692 memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1); 1693 memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1); 1694 memset( 1695 aec->xfwBuf, 0, sizeof(complex_t) * kExtendedNumPartitions * PART_LEN1); 1696 memset(aec->se, 0, sizeof(float) * PART_LEN1); 1697 1698 // To prevent numerical instability in the first block. 1699 for (i = 0; i < PART_LEN1; i++) { 1700 aec->sd[i] = 1; 1701 } 1702 for (i = 0; i < PART_LEN1; i++) { 1703 aec->sx[i] = 1; 1704 } 1705 1706 memset(aec->hNs, 0, sizeof(aec->hNs)); 1707 memset(aec->outBuf, 0, sizeof(float) * PART_LEN); 1708 1709 aec->hNlFbMin = 1; 1710 aec->hNlFbLocalMin = 1; 1711 aec->hNlXdAvgMin = 1; 1712 aec->hNlNewMin = 0; 1713 aec->hNlMinCtr = 0; 1714 aec->overDrive = 2; 1715 aec->overDriveSm = 2; 1716 aec->delayIdx = 0; 1717 aec->stNearState = 0; 1718 aec->echoState = 0; 1719 aec->divergeState = 0; 1720 1721 aec->seed = 777; 1722 aec->delayEstCtr = 0; 1723 1724 aec->extreme_filter_divergence = 0; 1725 1726 // Metrics disabled by default 1727 aec->metricsMode = 0; 1728 InitMetrics(aec); 1729 1730 return 0; 1731 } 1732 1733 1734 // For bit exactness with a legacy code, |farend| is supposed to contain 1735 // |PART_LEN2| samples with an overlap of |PART_LEN| samples from the last 1736 // frame. 1737 // TODO(minyue): reduce |farend| to non-overlapped |PART_LEN| samples. 1738 void WebRtcAec_BufferFarendPartition(AecCore* aec, const float* farend) { 1739 // Check if the buffer is full, and in that case flush the oldest data. 1740 if (WebRtc_available_write(aec->far_time_buf) < 1) { 1741 WebRtcAec_MoveFarReadPtr(aec, 1); 1742 } 1743 1744 WebRtc_WriteBuffer(aec->far_time_buf, farend, 1); 1745 } 1746 1747 int WebRtcAec_MoveFarReadPtr(AecCore* aec, int elements) { 1748 int elements_moved = WebRtc_MoveReadPtr(aec->far_time_buf, elements); 1749 aec->system_delay -= elements_moved * PART_LEN; 1750 return elements_moved; 1751 } 1752 1753 void WebRtcAec_ProcessFrames(AecCore* aec, 1754 const float* const* nearend, 1755 size_t num_bands, 1756 size_t num_samples, 1757 int knownDelay, 1758 float* const* out) { 1759 size_t i, j; 1760 int out_elements = 0; 1761 1762 aec->frame_count++; 1763 // For each frame the process is as follows: 1764 // 1) If the system_delay indicates on being too small for processing a 1765 // frame we stuff the buffer with enough data for 10 ms. 1766 // 2 a) Adjust the buffer to the system delay, by moving the read pointer. 1767 // b) Apply signal based delay correction, if we have detected poor AEC 1768 // performance. 1769 // 3) TODO(bjornv): Investigate if we need to add this: 1770 // If we can't move read pointer due to buffer size limitations we 1771 // flush/stuff the buffer. 1772 // 4) Process as many partitions as possible. 1773 // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN 1774 // samples. Even though we will have data left to process (we work with 1775 // partitions) we consider updating a whole frame, since that's the 1776 // amount of data we input and output in audio_processing. 1777 // 6) Update the outputs. 1778 1779 // The AEC has two different delay estimation algorithms built in. The 1780 // first relies on delay input values from the user and the amount of 1781 // shifted buffer elements is controlled by |knownDelay|. This delay will 1782 // give a guess on how much we need to shift far-end buffers to align with 1783 // the near-end signal. The other delay estimation algorithm uses the 1784 // far- and near-end signals to find the offset between them. This one 1785 // (called "signal delay") is then used to fine tune the alignment, or 1786 // simply compensate for errors in the system based one. 1787 // Note that the two algorithms operate independently. Currently, we only 1788 // allow one algorithm to be turned on. 1789 1790 assert(aec->num_bands == num_bands); 1791 1792 for (j = 0; j < num_samples; j+= FRAME_LEN) { 1793 // TODO(bjornv): Change the near-end buffer handling to be the same as for 1794 // far-end, that is, with a near_pre_buf. 1795 // Buffer the near-end frame. 1796 WebRtc_WriteBuffer(aec->nearFrBuf, &nearend[0][j], FRAME_LEN); 1797 // For H band 1798 for (i = 1; i < num_bands; ++i) { 1799 WebRtc_WriteBuffer(aec->nearFrBufH[i - 1], &nearend[i][j], FRAME_LEN); 1800 } 1801 1802 // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we 1803 // have enough far-end data for that by stuffing the buffer if the 1804 // |system_delay| indicates others. 1805 if (aec->system_delay < FRAME_LEN) { 1806 // We don't have enough data so we rewind 10 ms. 1807 WebRtcAec_MoveFarReadPtr(aec, -(aec->mult + 1)); 1808 } 1809 1810 if (!aec->delay_agnostic_enabled) { 1811 // 2 a) Compensate for a possible change in the system delay. 1812 1813 // TODO(bjornv): Investigate how we should round the delay difference; 1814 // right now we know that incoming |knownDelay| is underestimated when 1815 // it's less than |aec->knownDelay|. We therefore, round (-32) in that 1816 // direction. In the other direction, we don't have this situation, but 1817 // might flush one partition too little. This can cause non-causality, 1818 // which should be investigated. Maybe, allow for a non-symmetric 1819 // rounding, like -16. 1820 int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN; 1821 int moved_elements = 1822 WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); 1823 aec->knownDelay -= moved_elements * PART_LEN; 1824 } else { 1825 // 2 b) Apply signal based delay correction. 1826 int move_elements = SignalBasedDelayCorrection(aec); 1827 int moved_elements = 1828 WebRtc_MoveReadPtr(aec->far_time_buf, move_elements); 1829 int far_near_buffer_diff = WebRtc_available_read(aec->far_time_buf) - 1830 WebRtc_available_read(aec->nearFrBuf) / PART_LEN; 1831 WebRtc_SoftResetDelayEstimator(aec->delay_estimator, moved_elements); 1832 WebRtc_SoftResetDelayEstimatorFarend(aec->delay_estimator_farend, 1833 moved_elements); 1834 aec->signal_delay_correction += moved_elements; 1835 // If we rely on reported system delay values only, a buffer underrun here 1836 // can never occur since we've taken care of that in 1) above. Here, we 1837 // apply signal based delay correction and can therefore end up with 1838 // buffer underruns since the delay estimation can be wrong. We therefore 1839 // stuff the buffer with enough elements if needed. 1840 if (far_near_buffer_diff < 0) { 1841 WebRtcAec_MoveFarReadPtr(aec, far_near_buffer_diff); 1842 } 1843 } 1844 1845 // 4) Process as many blocks as possible. 1846 while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) { 1847 ProcessBlock(aec); 1848 } 1849 1850 // 5) Update system delay with respect to the entire frame. 1851 aec->system_delay -= FRAME_LEN; 1852 1853 // 6) Update output frame. 1854 // Stuff the out buffer if we have less than a frame to output. 1855 // This should only happen for the first frame. 1856 out_elements = (int)WebRtc_available_read(aec->outFrBuf); 1857 if (out_elements < FRAME_LEN) { 1858 WebRtc_MoveReadPtr(aec->outFrBuf, out_elements - FRAME_LEN); 1859 for (i = 0; i < num_bands - 1; ++i) { 1860 WebRtc_MoveReadPtr(aec->outFrBufH[i], out_elements - FRAME_LEN); 1861 } 1862 } 1863 // Obtain an output frame. 1864 WebRtc_ReadBuffer(aec->outFrBuf, NULL, &out[0][j], FRAME_LEN); 1865 // For H bands. 1866 for (i = 1; i < num_bands; ++i) { 1867 WebRtc_ReadBuffer(aec->outFrBufH[i - 1], NULL, &out[i][j], FRAME_LEN); 1868 } 1869 } 1870 } 1871 1872 int WebRtcAec_GetDelayMetricsCore(AecCore* self, int* median, int* std, 1873 float* fraction_poor_delays) { 1874 assert(self != NULL); 1875 assert(median != NULL); 1876 assert(std != NULL); 1877 1878 if (self->delay_logging_enabled == 0) { 1879 // Logging disabled. 1880 return -1; 1881 } 1882 1883 if (self->delay_metrics_delivered == 0) { 1884 UpdateDelayMetrics(self); 1885 self->delay_metrics_delivered = 1; 1886 } 1887 *median = self->delay_median; 1888 *std = self->delay_std; 1889 *fraction_poor_delays = self->fraction_poor_delays; 1890 1891 return 0; 1892 } 1893 1894 int WebRtcAec_echo_state(AecCore* self) { return self->echoState; } 1895 1896 void WebRtcAec_GetEchoStats(AecCore* self, 1897 Stats* erl, 1898 Stats* erle, 1899 Stats* a_nlp) { 1900 assert(erl != NULL); 1901 assert(erle != NULL); 1902 assert(a_nlp != NULL); 1903 *erl = self->erl; 1904 *erle = self->erle; 1905 *a_nlp = self->aNlp; 1906 } 1907 1908 void WebRtcAec_SetConfigCore(AecCore* self, 1909 int nlp_mode, 1910 int metrics_mode, 1911 int delay_logging) { 1912 assert(nlp_mode >= 0 && nlp_mode < 3); 1913 self->nlp_mode = nlp_mode; 1914 self->metricsMode = metrics_mode; 1915 if (self->metricsMode) { 1916 InitMetrics(self); 1917 } 1918 // Turn on delay logging if it is either set explicitly or if delay agnostic 1919 // AEC is enabled (which requires delay estimates). 1920 self->delay_logging_enabled = delay_logging || self->delay_agnostic_enabled; 1921 if (self->delay_logging_enabled) { 1922 memset(self->delay_histogram, 0, sizeof(self->delay_histogram)); 1923 } 1924 } 1925 1926 void WebRtcAec_enable_delay_agnostic(AecCore* self, int enable) { 1927 self->delay_agnostic_enabled = enable; 1928 } 1929 1930 int WebRtcAec_delay_agnostic_enabled(AecCore* self) { 1931 return self->delay_agnostic_enabled; 1932 } 1933 1934 void WebRtcAec_enable_extended_filter(AecCore* self, int enable) { 1935 self->extended_filter_enabled = enable; 1936 self->num_partitions = enable ? kExtendedNumPartitions : kNormalNumPartitions; 1937 // Update the delay estimator with filter length. See InitAEC() for details. 1938 WebRtc_set_allowed_offset(self->delay_estimator, self->num_partitions / 2); 1939 } 1940 1941 int WebRtcAec_extended_filter_enabled(AecCore* self) { 1942 return self->extended_filter_enabled; 1943 } 1944 1945 int WebRtcAec_system_delay(AecCore* self) { return self->system_delay; } 1946 1947 void WebRtcAec_SetSystemDelay(AecCore* self, int delay) { 1948 assert(delay >= 0); 1949 self->system_delay = delay; 1950 } 1951