Home | History | Annotate | Download | only in aec
      1 /*
      2  *  Copyright (c) 2011 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 /*
     12  * The core AEC algorithm, which is presented with time-aligned signals.
     13  */
     14 
     15 #include "aec_core.h"
     16 
     17 #include <assert.h>
     18 #include <math.h>
     19 #include <stddef.h>  // size_t
     20 #include <stdlib.h>
     21 #include <string.h>
     22 
     23 #include "aec_rdft.h"
     24 #include "delay_estimator_wrapper.h"
     25 #include "ring_buffer.h"
     26 #include "system_wrappers/interface/cpu_features_wrapper.h"
     27 #include "typedefs.h"
     28 
     29 // Buffer size (samples)
     30 static const size_t kBufSizePartitions = 250;  // 1 second of audio in 16 kHz.
     31 
     32 // Noise suppression
     33 static const int converged = 250;
     34 
     35 // Metrics
     36 static const int subCountLen = 4;
     37 static const int countLen = 50;
     38 
     39 // Quantities to control H band scaling for SWB input
     40 static const int flagHbandCn = 1; // flag for adding comfort noise in H band
     41 static const float cnScaleHband = (float)0.4; // scale for comfort noise in H band
     42 // Initial bin for averaging nlp gain in low band
     43 static const int freqAvgIc = PART_LEN / 2;
     44 
     45 // Matlab code to produce table:
     46 // win = sqrt(hanning(63)); win = [0 ; win(1:32)];
     47 // fprintf(1, '\t%.14f, %.14f, %.14f,\n', win);
     48 static const float sqrtHanning[65] = {
     49     0.00000000000000f, 0.02454122852291f, 0.04906767432742f,
     50     0.07356456359967f, 0.09801714032956f, 0.12241067519922f,
     51     0.14673047445536f, 0.17096188876030f, 0.19509032201613f,
     52     0.21910124015687f, 0.24298017990326f, 0.26671275747490f,
     53     0.29028467725446f, 0.31368174039889f, 0.33688985339222f,
     54     0.35989503653499f, 0.38268343236509f, 0.40524131400499f,
     55     0.42755509343028f, 0.44961132965461f, 0.47139673682600f,
     56     0.49289819222978f, 0.51410274419322f, 0.53499761988710f,
     57     0.55557023301960f, 0.57580819141785f, 0.59569930449243f,
     58     0.61523159058063f, 0.63439328416365f, 0.65317284295378f,
     59     0.67155895484702f, 0.68954054473707f, 0.70710678118655f,
     60     0.72424708295147f, 0.74095112535496f, 0.75720884650648f,
     61     0.77301045336274f, 0.78834642762661f, 0.80320753148064f,
     62     0.81758481315158f, 0.83146961230255f, 0.84485356524971f,
     63     0.85772861000027f, 0.87008699110871f, 0.88192126434835f,
     64     0.89322430119552f, 0.90398929312344f, 0.91420975570353f,
     65     0.92387953251129f, 0.93299279883474f, 0.94154406518302f,
     66     0.94952818059304f, 0.95694033573221f, 0.96377606579544f,
     67     0.97003125319454f, 0.97570213003853f, 0.98078528040323f,
     68     0.98527764238894f, 0.98917650996478f, 0.99247953459871f,
     69     0.99518472667220f, 0.99729045667869f, 0.99879545620517f,
     70     0.99969881869620f, 1.00000000000000f
     71 };
     72 
     73 // Matlab code to produce table:
     74 // weightCurve = [0 ; 0.3 * sqrt(linspace(0,1,64))' + 0.1];
     75 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', weightCurve);
     76 const float WebRtcAec_weightCurve[65] = {
     77     0.0000f, 0.1000f, 0.1378f, 0.1535f, 0.1655f, 0.1756f,
     78     0.1845f, 0.1926f, 0.2000f, 0.2069f, 0.2134f, 0.2195f,
     79     0.2254f, 0.2309f, 0.2363f, 0.2414f, 0.2464f, 0.2512f,
     80     0.2558f, 0.2604f, 0.2648f, 0.2690f, 0.2732f, 0.2773f,
     81     0.2813f, 0.2852f, 0.2890f, 0.2927f, 0.2964f, 0.3000f,
     82     0.3035f, 0.3070f, 0.3104f, 0.3138f, 0.3171f, 0.3204f,
     83     0.3236f, 0.3268f, 0.3299f, 0.3330f, 0.3360f, 0.3390f,
     84     0.3420f, 0.3449f, 0.3478f, 0.3507f, 0.3535f, 0.3563f,
     85     0.3591f, 0.3619f, 0.3646f, 0.3673f, 0.3699f, 0.3726f,
     86     0.3752f, 0.3777f, 0.3803f, 0.3828f, 0.3854f, 0.3878f,
     87     0.3903f, 0.3928f, 0.3952f, 0.3976f, 0.4000f
     88 };
     89 
     90 // Matlab code to produce table:
     91 // overDriveCurve = [sqrt(linspace(0,1,65))' + 1];
     92 // fprintf(1, '\t%.4f, %.4f, %.4f, %.4f, %.4f, %.4f,\n', overDriveCurve);
     93 const float WebRtcAec_overDriveCurve[65] = {
     94     1.0000f, 1.1250f, 1.1768f, 1.2165f, 1.2500f, 1.2795f,
     95     1.3062f, 1.3307f, 1.3536f, 1.3750f, 1.3953f, 1.4146f,
     96     1.4330f, 1.4507f, 1.4677f, 1.4841f, 1.5000f, 1.5154f,
     97     1.5303f, 1.5449f, 1.5590f, 1.5728f, 1.5863f, 1.5995f,
     98     1.6124f, 1.6250f, 1.6374f, 1.6495f, 1.6614f, 1.6731f,
     99     1.6847f, 1.6960f, 1.7071f, 1.7181f, 1.7289f, 1.7395f,
    100     1.7500f, 1.7603f, 1.7706f, 1.7806f, 1.7906f, 1.8004f,
    101     1.8101f, 1.8197f, 1.8292f, 1.8385f, 1.8478f, 1.8570f,
    102     1.8660f, 1.8750f, 1.8839f, 1.8927f, 1.9014f, 1.9100f,
    103     1.9186f, 1.9270f, 1.9354f, 1.9437f, 1.9520f, 1.9601f,
    104     1.9682f, 1.9763f, 1.9843f, 1.9922f, 2.0000f
    105 };
    106 
    107 // "Private" function prototypes.
    108 static void ProcessBlock(aec_t* aec);
    109 
    110 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH);
    111 
    112 static void GetHighbandGain(const float *lambda, float *nlpGainHband);
    113 
    114 // Comfort_noise also computes noise for H band returned in comfortNoiseHband
    115 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
    116                                   complex_t *comfortNoiseHband,
    117                                   const float *noisePow, const float *lambda);
    118 
    119 static void WebRtcAec_InitLevel(power_level_t *level);
    120 static void WebRtcAec_InitStats(stats_t *stats);
    121 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]);
    122 static void UpdateMetrics(aec_t *aec);
    123 // Convert from time domain to frequency domain. Note that |time_data| are
    124 // overwritten.
    125 static void TimeToFrequency(float time_data[PART_LEN2],
    126                             float freq_data[2][PART_LEN1],
    127                             int window);
    128 
    129 __inline static float MulRe(float aRe, float aIm, float bRe, float bIm)
    130 {
    131     return aRe * bRe - aIm * bIm;
    132 }
    133 
    134 __inline static float MulIm(float aRe, float aIm, float bRe, float bIm)
    135 {
    136     return aRe * bIm + aIm * bRe;
    137 }
    138 
    139 static int CmpFloat(const void *a, const void *b)
    140 {
    141     const float *da = (const float *)a;
    142     const float *db = (const float *)b;
    143 
    144     return (*da > *db) - (*da < *db);
    145 }
    146 
    147 int WebRtcAec_CreateAec(aec_t **aecInst)
    148 {
    149     aec_t *aec = malloc(sizeof(aec_t));
    150     *aecInst = aec;
    151     if (aec == NULL) {
    152         return -1;
    153     }
    154 
    155     if (WebRtc_CreateBuffer(&aec->nearFrBuf,
    156                             FRAME_LEN + PART_LEN,
    157                             sizeof(int16_t)) == -1) {
    158         WebRtcAec_FreeAec(aec);
    159         aec = NULL;
    160         return -1;
    161     }
    162 
    163     if (WebRtc_CreateBuffer(&aec->outFrBuf,
    164                             FRAME_LEN + PART_LEN,
    165                             sizeof(int16_t)) == -1) {
    166         WebRtcAec_FreeAec(aec);
    167         aec = NULL;
    168         return -1;
    169     }
    170 
    171     if (WebRtc_CreateBuffer(&aec->nearFrBufH,
    172                             FRAME_LEN + PART_LEN,
    173                             sizeof(int16_t)) == -1) {
    174         WebRtcAec_FreeAec(aec);
    175         aec = NULL;
    176         return -1;
    177     }
    178 
    179     if (WebRtc_CreateBuffer(&aec->outFrBufH,
    180                             FRAME_LEN + PART_LEN,
    181                             sizeof(int16_t)) == -1) {
    182         WebRtcAec_FreeAec(aec);
    183         aec = NULL;
    184         return -1;
    185     }
    186 
    187     // Create far-end buffers.
    188     if (WebRtc_CreateBuffer(&aec->far_buf, kBufSizePartitions,
    189                             sizeof(float) * 2 * PART_LEN1) == -1) {
    190         WebRtcAec_FreeAec(aec);
    191         aec = NULL;
    192         return -1;
    193     }
    194     if (WebRtc_CreateBuffer(&aec->far_buf_windowed, kBufSizePartitions,
    195                             sizeof(float) * 2 * PART_LEN1) == -1) {
    196         WebRtcAec_FreeAec(aec);
    197         aec = NULL;
    198         return -1;
    199     }
    200 #ifdef WEBRTC_AEC_DEBUG_DUMP
    201     if (WebRtc_CreateBuffer(&aec->far_time_buf, kBufSizePartitions,
    202                             sizeof(int16_t) * PART_LEN) == -1) {
    203         WebRtcAec_FreeAec(aec);
    204         aec = NULL;
    205         return -1;
    206     }
    207 #endif
    208     if (WebRtc_CreateDelayEstimator(&aec->delay_estimator,
    209                                     PART_LEN1,
    210                                     kMaxDelayBlocks,
    211                                     kLookaheadBlocks) == -1) {
    212       WebRtcAec_FreeAec(aec);
    213       aec = NULL;
    214       return -1;
    215     }
    216 
    217     return 0;
    218 }
    219 
    220 int WebRtcAec_FreeAec(aec_t *aec)
    221 {
    222     if (aec == NULL) {
    223         return -1;
    224     }
    225 
    226     WebRtc_FreeBuffer(aec->nearFrBuf);
    227     WebRtc_FreeBuffer(aec->outFrBuf);
    228 
    229     WebRtc_FreeBuffer(aec->nearFrBufH);
    230     WebRtc_FreeBuffer(aec->outFrBufH);
    231 
    232     WebRtc_FreeBuffer(aec->far_buf);
    233     WebRtc_FreeBuffer(aec->far_buf_windowed);
    234 #ifdef WEBRTC_AEC_DEBUG_DUMP
    235     WebRtc_FreeBuffer(aec->far_time_buf);
    236 #endif
    237     WebRtc_FreeDelayEstimator(aec->delay_estimator);
    238 
    239     free(aec);
    240     return 0;
    241 }
    242 
    243 static void FilterFar(aec_t *aec, float yf[2][PART_LEN1])
    244 {
    245   int i;
    246   for (i = 0; i < NR_PART; i++) {
    247     int j;
    248     int xPos = (i + aec->xfBufBlockPos) * PART_LEN1;
    249     int pos = i * PART_LEN1;
    250     // Check for wrap
    251     if (i + aec->xfBufBlockPos >= NR_PART) {
    252       xPos -= NR_PART*(PART_LEN1);
    253     }
    254 
    255     for (j = 0; j < PART_LEN1; j++) {
    256       yf[0][j] += MulRe(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
    257                         aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
    258       yf[1][j] += MulIm(aec->xfBuf[0][xPos + j], aec->xfBuf[1][xPos + j],
    259                         aec->wfBuf[0][ pos + j], aec->wfBuf[1][ pos + j]);
    260     }
    261   }
    262 }
    263 
    264 static void ScaleErrorSignal(aec_t *aec, float ef[2][PART_LEN1])
    265 {
    266   int i;
    267   float absEf;
    268   for (i = 0; i < (PART_LEN1); i++) {
    269     ef[0][i] /= (aec->xPow[i] + 1e-10f);
    270     ef[1][i] /= (aec->xPow[i] + 1e-10f);
    271     absEf = sqrtf(ef[0][i] * ef[0][i] + ef[1][i] * ef[1][i]);
    272 
    273     if (absEf > aec->errThresh) {
    274       absEf = aec->errThresh / (absEf + 1e-10f);
    275       ef[0][i] *= absEf;
    276       ef[1][i] *= absEf;
    277     }
    278 
    279     // Stepsize factor
    280     ef[0][i] *= aec->mu;
    281     ef[1][i] *= aec->mu;
    282   }
    283 }
    284 
    285 // Time-unconstrined filter adaptation.
    286 // TODO(andrew): consider for a low-complexity mode.
    287 //static void FilterAdaptationUnconstrained(aec_t *aec, float *fft,
    288 //                                          float ef[2][PART_LEN1]) {
    289 //  int i, j;
    290 //  for (i = 0; i < NR_PART; i++) {
    291 //    int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
    292 //    int pos;
    293 //    // Check for wrap
    294 //    if (i + aec->xfBufBlockPos >= NR_PART) {
    295 //      xPos -= NR_PART * PART_LEN1;
    296 //    }
    297 //
    298 //    pos = i * PART_LEN1;
    299 //
    300 //    for (j = 0; j < PART_LEN1; j++) {
    301 //      aec->wfBuf[pos + j][0] += MulRe(aec->xfBuf[xPos + j][0],
    302 //                                      -aec->xfBuf[xPos + j][1],
    303 //                                      ef[j][0], ef[j][1]);
    304 //      aec->wfBuf[pos + j][1] += MulIm(aec->xfBuf[xPos + j][0],
    305 //                                      -aec->xfBuf[xPos + j][1],
    306 //                                      ef[j][0], ef[j][1]);
    307 //    }
    308 //  }
    309 //}
    310 
    311 static void FilterAdaptation(aec_t *aec, float *fft, float ef[2][PART_LEN1]) {
    312   int i, j;
    313   for (i = 0; i < NR_PART; i++) {
    314     int xPos = (i + aec->xfBufBlockPos)*(PART_LEN1);
    315     int pos;
    316     // Check for wrap
    317     if (i + aec->xfBufBlockPos >= NR_PART) {
    318       xPos -= NR_PART * PART_LEN1;
    319     }
    320 
    321     pos = i * PART_LEN1;
    322 
    323     for (j = 0; j < PART_LEN; j++) {
    324 
    325       fft[2 * j] = MulRe(aec->xfBuf[0][xPos + j],
    326                          -aec->xfBuf[1][xPos + j],
    327                          ef[0][j], ef[1][j]);
    328       fft[2 * j + 1] = MulIm(aec->xfBuf[0][xPos + j],
    329                              -aec->xfBuf[1][xPos + j],
    330                              ef[0][j], ef[1][j]);
    331     }
    332     fft[1] = MulRe(aec->xfBuf[0][xPos + PART_LEN],
    333                    -aec->xfBuf[1][xPos + PART_LEN],
    334                    ef[0][PART_LEN], ef[1][PART_LEN]);
    335 
    336     aec_rdft_inverse_128(fft);
    337     memset(fft + PART_LEN, 0, sizeof(float) * PART_LEN);
    338 
    339     // fft scaling
    340     {
    341       float scale = 2.0f / PART_LEN2;
    342       for (j = 0; j < PART_LEN; j++) {
    343         fft[j] *= scale;
    344       }
    345     }
    346     aec_rdft_forward_128(fft);
    347 
    348     aec->wfBuf[0][pos] += fft[0];
    349     aec->wfBuf[0][pos + PART_LEN] += fft[1];
    350 
    351     for (j = 1; j < PART_LEN; j++) {
    352       aec->wfBuf[0][pos + j] += fft[2 * j];
    353       aec->wfBuf[1][pos + j] += fft[2 * j + 1];
    354     }
    355   }
    356 }
    357 
    358 static void OverdriveAndSuppress(aec_t *aec, float hNl[PART_LEN1],
    359                                  const float hNlFb,
    360                                  float efw[2][PART_LEN1]) {
    361   int i;
    362   for (i = 0; i < PART_LEN1; i++) {
    363     // Weight subbands
    364     if (hNl[i] > hNlFb) {
    365       hNl[i] = WebRtcAec_weightCurve[i] * hNlFb +
    366           (1 - WebRtcAec_weightCurve[i]) * hNl[i];
    367     }
    368     hNl[i] = powf(hNl[i], aec->overDriveSm * WebRtcAec_overDriveCurve[i]);
    369 
    370     // Suppress error signal
    371     efw[0][i] *= hNl[i];
    372     efw[1][i] *= hNl[i];
    373 
    374     // Ooura fft returns incorrect sign on imaginary component. It matters here
    375     // because we are making an additive change with comfort noise.
    376     efw[1][i] *= -1;
    377   }
    378 }
    379 
    380 WebRtcAec_FilterFar_t WebRtcAec_FilterFar;
    381 WebRtcAec_ScaleErrorSignal_t WebRtcAec_ScaleErrorSignal;
    382 WebRtcAec_FilterAdaptation_t WebRtcAec_FilterAdaptation;
    383 WebRtcAec_OverdriveAndSuppress_t WebRtcAec_OverdriveAndSuppress;
    384 
    385 int WebRtcAec_InitAec(aec_t *aec, int sampFreq)
    386 {
    387     int i;
    388 
    389     aec->sampFreq = sampFreq;
    390 
    391     if (sampFreq == 8000) {
    392         aec->mu = 0.6f;
    393         aec->errThresh = 2e-6f;
    394     }
    395     else {
    396         aec->mu = 0.5f;
    397         aec->errThresh = 1.5e-6f;
    398     }
    399 
    400     if (WebRtc_InitBuffer(aec->nearFrBuf) == -1) {
    401         return -1;
    402     }
    403 
    404     if (WebRtc_InitBuffer(aec->outFrBuf) == -1) {
    405         return -1;
    406     }
    407 
    408     if (WebRtc_InitBuffer(aec->nearFrBufH) == -1) {
    409         return -1;
    410     }
    411 
    412     if (WebRtc_InitBuffer(aec->outFrBufH) == -1) {
    413         return -1;
    414     }
    415 
    416     // Initialize far-end buffers.
    417     if (WebRtc_InitBuffer(aec->far_buf) == -1) {
    418         return -1;
    419     }
    420     if (WebRtc_InitBuffer(aec->far_buf_windowed) == -1) {
    421         return -1;
    422     }
    423 #ifdef WEBRTC_AEC_DEBUG_DUMP
    424     if (WebRtc_InitBuffer(aec->far_time_buf) == -1) {
    425         return -1;
    426     }
    427 #endif
    428     aec->system_delay = 0;
    429 
    430     if (WebRtc_InitDelayEstimator(aec->delay_estimator) != 0) {
    431       return -1;
    432     }
    433     aec->delay_logging_enabled = 0;
    434     memset(aec->delay_histogram, 0, sizeof(aec->delay_histogram));
    435 
    436     // Default target suppression level
    437     aec->targetSupp = -11.5;
    438     aec->minOverDrive = 2.0;
    439 
    440     // Sampling frequency multiplier
    441     // SWB is processed as 160 frame size
    442     if (aec->sampFreq == 32000) {
    443       aec->mult = (short)aec->sampFreq / 16000;
    444     }
    445     else {
    446         aec->mult = (short)aec->sampFreq / 8000;
    447     }
    448 
    449     aec->farBufWritePos = 0;
    450     aec->farBufReadPos = 0;
    451 
    452     aec->inSamples = 0;
    453     aec->outSamples = 0;
    454     aec->knownDelay = 0;
    455 
    456     // Initialize buffers
    457     memset(aec->dBuf, 0, sizeof(aec->dBuf));
    458     memset(aec->eBuf, 0, sizeof(aec->eBuf));
    459     // For H band
    460     memset(aec->dBufH, 0, sizeof(aec->dBufH));
    461 
    462     memset(aec->xPow, 0, sizeof(aec->xPow));
    463     memset(aec->dPow, 0, sizeof(aec->dPow));
    464     memset(aec->dInitMinPow, 0, sizeof(aec->dInitMinPow));
    465     aec->noisePow = aec->dInitMinPow;
    466     aec->noiseEstCtr = 0;
    467 
    468     // Initial comfort noise power
    469     for (i = 0; i < PART_LEN1; i++) {
    470         aec->dMinPow[i] = 1.0e6f;
    471     }
    472 
    473     // Holds the last block written to
    474     aec->xfBufBlockPos = 0;
    475     // TODO: Investigate need for these initializations. Deleting them doesn't
    476     //       change the output at all and yields 0.4% overall speedup.
    477     memset(aec->xfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
    478     memset(aec->wfBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
    479     memset(aec->sde, 0, sizeof(complex_t) * PART_LEN1);
    480     memset(aec->sxd, 0, sizeof(complex_t) * PART_LEN1);
    481     memset(aec->xfwBuf, 0, sizeof(complex_t) * NR_PART * PART_LEN1);
    482     memset(aec->se, 0, sizeof(float) * PART_LEN1);
    483 
    484     // To prevent numerical instability in the first block.
    485     for (i = 0; i < PART_LEN1; i++) {
    486         aec->sd[i] = 1;
    487     }
    488     for (i = 0; i < PART_LEN1; i++) {
    489         aec->sx[i] = 1;
    490     }
    491 
    492     memset(aec->hNs, 0, sizeof(aec->hNs));
    493     memset(aec->outBuf, 0, sizeof(float) * PART_LEN);
    494 
    495     aec->hNlFbMin = 1;
    496     aec->hNlFbLocalMin = 1;
    497     aec->hNlXdAvgMin = 1;
    498     aec->hNlNewMin = 0;
    499     aec->hNlMinCtr = 0;
    500     aec->overDrive = 2;
    501     aec->overDriveSm = 2;
    502     aec->delayIdx = 0;
    503     aec->stNearState = 0;
    504     aec->echoState = 0;
    505     aec->divergeState = 0;
    506 
    507     aec->seed = 777;
    508     aec->delayEstCtr = 0;
    509 
    510     // Metrics disabled by default
    511     aec->metricsMode = 0;
    512     WebRtcAec_InitMetrics(aec);
    513 
    514     // Assembly optimization
    515     WebRtcAec_FilterFar = FilterFar;
    516     WebRtcAec_ScaleErrorSignal = ScaleErrorSignal;
    517     WebRtcAec_FilterAdaptation = FilterAdaptation;
    518     WebRtcAec_OverdriveAndSuppress = OverdriveAndSuppress;
    519     if (WebRtc_GetCPUInfo(kSSE2)) {
    520 #if defined(WEBRTC_USE_SSE2)
    521       WebRtcAec_InitAec_SSE2();
    522 #endif
    523     }
    524     aec_rdft_init();
    525 
    526     return 0;
    527 }
    528 
    529 void WebRtcAec_InitMetrics(aec_t *aec)
    530 {
    531     aec->stateCounter = 0;
    532     WebRtcAec_InitLevel(&aec->farlevel);
    533     WebRtcAec_InitLevel(&aec->nearlevel);
    534     WebRtcAec_InitLevel(&aec->linoutlevel);
    535     WebRtcAec_InitLevel(&aec->nlpoutlevel);
    536 
    537     WebRtcAec_InitStats(&aec->erl);
    538     WebRtcAec_InitStats(&aec->erle);
    539     WebRtcAec_InitStats(&aec->aNlp);
    540     WebRtcAec_InitStats(&aec->rerl);
    541 }
    542 
    543 
    544 void WebRtcAec_BufferFarendPartition(aec_t *aec, const float* farend) {
    545   float fft[PART_LEN2];
    546   float xf[2][PART_LEN1];
    547 
    548   // Check if the buffer is full, and in that case flush the oldest data.
    549   if (WebRtc_available_write(aec->far_buf) < 1) {
    550     WebRtc_MoveReadPtr(aec->far_buf, 1);
    551     WebRtc_MoveReadPtr(aec->far_buf_windowed, 1);
    552     aec->system_delay -= PART_LEN;
    553 #ifdef WEBRTC_AEC_DEBUG_DUMP
    554     WebRtc_MoveReadPtr(aec->far_time_buf, 1);
    555 #endif
    556   }
    557   // Convert far-end partition to the frequency domain without windowing.
    558   memcpy(fft, farend, sizeof(float) * PART_LEN2);
    559   TimeToFrequency(fft, xf, 0);
    560   WebRtc_WriteBuffer(aec->far_buf, &xf[0][0], 1);
    561 
    562   // Convert far-end partition to the frequency domain with windowing.
    563   memcpy(fft, farend, sizeof(float) * PART_LEN2);
    564   TimeToFrequency(fft, xf, 1);
    565   WebRtc_WriteBuffer(aec->far_buf_windowed, &xf[0][0], 1);
    566 }
    567 
    568 void WebRtcAec_ProcessFrame(aec_t *aec,
    569                             const short *nearend,
    570                             const short *nearendH,
    571                             int knownDelay)
    572 {
    573     // For each frame the process is as follows:
    574     // 1) If the system_delay indicates on being too small for processing a
    575     //    frame we stuff the buffer with enough data for 10 ms.
    576     // 2) Adjust the buffer to the system delay, by moving the read pointer.
    577     // 3) If we can't move read pointer due to buffer size limitations we
    578     //    flush/stuff the buffer.
    579     // 4) Process as many partitions as possible.
    580     // 5) Update the |system_delay| with respect to a full frame of FRAME_LEN
    581     //    samples. Even though we will have data left to process (we work with
    582     //    partitions) we consider updating a whole frame, since that's the
    583     //    amount of data we input and output in audio_processing.
    584 
    585     // TODO(bjornv): Investigate how we should round the delay difference; right
    586     // now we know that incoming |knownDelay| is underestimated when it's less
    587     // than |aec->knownDelay|. We therefore, round (-32) in that direction. In
    588     // the other direction, we don't have this situation, but might flush one
    589     // partition too little. This can cause non-causality, which should be
    590     // investigated. Maybe, allow for a non-symmetric rounding, like -16.
    591     int move_elements = (aec->knownDelay - knownDelay - 32) / PART_LEN;
    592     int moved_elements = 0;
    593 
    594     // TODO(bjornv): Change the near-end buffer handling to be the same as for
    595     // far-end, that is, with a near_pre_buf.
    596     // Buffer the near-end frame.
    597     WebRtc_WriteBuffer(aec->nearFrBuf, nearend, FRAME_LEN);
    598     // For H band
    599     if (aec->sampFreq == 32000) {
    600         WebRtc_WriteBuffer(aec->nearFrBufH, nearendH, FRAME_LEN);
    601     }
    602 
    603     // 1) At most we process |aec->mult|+1 partitions in 10 ms. Make sure we
    604     // have enough far-end data for that by stuffing the buffer if the
    605     // |system_delay| indicates others.
    606     if (aec->system_delay < FRAME_LEN) {
    607       // We don't have enough data so we rewind 10 ms.
    608       WebRtc_MoveReadPtr(aec->far_buf_windowed, -(aec->mult + 1));
    609       aec->system_delay -= WebRtc_MoveReadPtr(aec->far_buf, -(aec->mult + 1)) *
    610           PART_LEN;
    611 #ifdef WEBRTC_AEC_DEBUG_DUMP
    612       WebRtc_MoveReadPtr(aec->far_time_buf, -(aec->mult + 1));
    613 #endif
    614     }
    615 
    616     // 2) Compensate for a possible change in the system delay.
    617 
    618     WebRtc_MoveReadPtr(aec->far_buf_windowed, move_elements);
    619     moved_elements = WebRtc_MoveReadPtr(aec->far_buf, move_elements);
    620     aec->knownDelay -= moved_elements * PART_LEN;
    621 #ifdef WEBRTC_AEC_DEBUG_DUMP
    622     WebRtc_MoveReadPtr(aec->far_time_buf, move_elements);
    623 #endif
    624 
    625     // 4) Process as many blocks as possible.
    626     while (WebRtc_available_read(aec->nearFrBuf) >= PART_LEN) {
    627         ProcessBlock(aec);
    628     }
    629 
    630     // 5) Update system delay with respect to the entire frame.
    631     aec->system_delay -= FRAME_LEN;
    632 }
    633 
    634 static void ProcessBlock(aec_t* aec) {
    635     int i;
    636     float d[PART_LEN], y[PART_LEN], e[PART_LEN], dH[PART_LEN];
    637     float scale;
    638 
    639     float fft[PART_LEN2];
    640     float xf[2][PART_LEN1], yf[2][PART_LEN1], ef[2][PART_LEN1];
    641     float df[2][PART_LEN1];
    642     float far_spectrum = 0.0f;
    643     float near_spectrum = 0.0f;
    644     float abs_far_spectrum[PART_LEN1];
    645     float abs_near_spectrum[PART_LEN1];
    646 
    647     const float gPow[2] = {0.9f, 0.1f};
    648 
    649     // Noise estimate constants.
    650     const int noiseInitBlocks = 500 * aec->mult;
    651     const float step = 0.1f;
    652     const float ramp = 1.0002f;
    653     const float gInitNoise[2] = {0.999f, 0.001f};
    654 
    655     int16_t nearend[PART_LEN];
    656     int16_t* nearend_ptr = NULL;
    657     int16_t output[PART_LEN];
    658     int16_t outputH[PART_LEN];
    659 
    660     float* xf_ptr = NULL;
    661 
    662     memset(dH, 0, sizeof(dH));
    663     if (aec->sampFreq == 32000) {
    664       // Get the upper band first so we can reuse |nearend|.
    665       WebRtc_ReadBuffer(aec->nearFrBufH,
    666                         (void**) &nearend_ptr,
    667                         nearend,
    668                         PART_LEN);
    669       for (i = 0; i < PART_LEN; i++) {
    670           dH[i] = (float) (nearend_ptr[i]);
    671       }
    672       memcpy(aec->dBufH + PART_LEN, dH, sizeof(float) * PART_LEN);
    673     }
    674     WebRtc_ReadBuffer(aec->nearFrBuf, (void**) &nearend_ptr, nearend, PART_LEN);
    675 
    676     // ---------- Ooura fft ----------
    677     // Concatenate old and new nearend blocks.
    678     for (i = 0; i < PART_LEN; i++) {
    679         d[i] = (float) (nearend_ptr[i]);
    680     }
    681     memcpy(aec->dBuf + PART_LEN, d, sizeof(float) * PART_LEN);
    682 
    683 #ifdef WEBRTC_AEC_DEBUG_DUMP
    684     {
    685         int16_t farend[PART_LEN];
    686         int16_t* farend_ptr = NULL;
    687         WebRtc_ReadBuffer(aec->far_time_buf, (void**) &farend_ptr, farend, 1);
    688         fwrite(farend_ptr, sizeof(int16_t), PART_LEN, aec->farFile);
    689         fwrite(nearend_ptr, sizeof(int16_t), PART_LEN, aec->nearFile);
    690     }
    691 #endif
    692 
    693     // We should always have at least one element stored in |far_buf|.
    694     assert(WebRtc_available_read(aec->far_buf) > 0);
    695     WebRtc_ReadBuffer(aec->far_buf, (void**) &xf_ptr, &xf[0][0], 1);
    696 
    697     // Near fft
    698     memcpy(fft, aec->dBuf, sizeof(float) * PART_LEN2);
    699     TimeToFrequency(fft, df, 0);
    700 
    701     // Power smoothing
    702     for (i = 0; i < PART_LEN1; i++) {
    703       far_spectrum = (xf_ptr[i] * xf_ptr[i]) +
    704           (xf_ptr[PART_LEN1 + i] * xf_ptr[PART_LEN1 + i]);
    705       aec->xPow[i] = gPow[0] * aec->xPow[i] + gPow[1] * NR_PART * far_spectrum;
    706       // Calculate absolute spectra
    707       abs_far_spectrum[i] = sqrtf(far_spectrum);
    708 
    709       near_spectrum = df[0][i] * df[0][i] + df[1][i] * df[1][i];
    710       aec->dPow[i] = gPow[0] * aec->dPow[i] + gPow[1] * near_spectrum;
    711       // Calculate absolute spectra
    712       abs_near_spectrum[i] = sqrtf(near_spectrum);
    713     }
    714 
    715     // Estimate noise power. Wait until dPow is more stable.
    716     if (aec->noiseEstCtr > 50) {
    717         for (i = 0; i < PART_LEN1; i++) {
    718             if (aec->dPow[i] < aec->dMinPow[i]) {
    719                 aec->dMinPow[i] = (aec->dPow[i] + step * (aec->dMinPow[i] -
    720                     aec->dPow[i])) * ramp;
    721             }
    722             else {
    723                 aec->dMinPow[i] *= ramp;
    724             }
    725         }
    726     }
    727 
    728     // Smooth increasing noise power from zero at the start,
    729     // to avoid a sudden burst of comfort noise.
    730     if (aec->noiseEstCtr < noiseInitBlocks) {
    731         aec->noiseEstCtr++;
    732         for (i = 0; i < PART_LEN1; i++) {
    733             if (aec->dMinPow[i] > aec->dInitMinPow[i]) {
    734                 aec->dInitMinPow[i] = gInitNoise[0] * aec->dInitMinPow[i] +
    735                     gInitNoise[1] * aec->dMinPow[i];
    736             }
    737             else {
    738                 aec->dInitMinPow[i] = aec->dMinPow[i];
    739             }
    740         }
    741         aec->noisePow = aec->dInitMinPow;
    742     }
    743     else {
    744         aec->noisePow = aec->dMinPow;
    745     }
    746 
    747     // Block wise delay estimation used for logging
    748     if (aec->delay_logging_enabled) {
    749       int delay_estimate = 0;
    750       // Estimate the delay
    751       delay_estimate = WebRtc_DelayEstimatorProcessFloat(aec->delay_estimator,
    752                                                          abs_far_spectrum,
    753                                                          abs_near_spectrum,
    754                                                          PART_LEN1);
    755       if (delay_estimate >= 0) {
    756         // Update delay estimate buffer.
    757         aec->delay_histogram[delay_estimate]++;
    758       }
    759     }
    760 
    761     // Update the xfBuf block position.
    762     aec->xfBufBlockPos--;
    763     if (aec->xfBufBlockPos == -1) {
    764         aec->xfBufBlockPos = NR_PART - 1;
    765     }
    766 
    767     // Buffer xf
    768     memcpy(aec->xfBuf[0] + aec->xfBufBlockPos * PART_LEN1, xf_ptr,
    769            sizeof(float) * PART_LEN1);
    770     memcpy(aec->xfBuf[1] + aec->xfBufBlockPos * PART_LEN1, &xf_ptr[PART_LEN1],
    771            sizeof(float) * PART_LEN1);
    772 
    773     memset(yf[0], 0, sizeof(float) * (PART_LEN1 * 2));
    774 
    775     // Filter far
    776     WebRtcAec_FilterFar(aec, yf);
    777 
    778     // Inverse fft to obtain echo estimate and error.
    779     fft[0] = yf[0][0];
    780     fft[1] = yf[0][PART_LEN];
    781     for (i = 1; i < PART_LEN; i++) {
    782         fft[2 * i] = yf[0][i];
    783         fft[2 * i + 1] = yf[1][i];
    784     }
    785     aec_rdft_inverse_128(fft);
    786 
    787     scale = 2.0f / PART_LEN2;
    788     for (i = 0; i < PART_LEN; i++) {
    789         y[i] = fft[PART_LEN + i] * scale; // fft scaling
    790     }
    791 
    792     for (i = 0; i < PART_LEN; i++) {
    793         e[i] = d[i] - y[i];
    794     }
    795 
    796     // Error fft
    797     memcpy(aec->eBuf + PART_LEN, e, sizeof(float) * PART_LEN);
    798     memset(fft, 0, sizeof(float) * PART_LEN);
    799     memcpy(fft + PART_LEN, e, sizeof(float) * PART_LEN);
    800     // TODO(bjornv): Change to use TimeToFrequency().
    801     aec_rdft_forward_128(fft);
    802 
    803     ef[1][0] = 0;
    804     ef[1][PART_LEN] = 0;
    805     ef[0][0] = fft[0];
    806     ef[0][PART_LEN] = fft[1];
    807     for (i = 1; i < PART_LEN; i++) {
    808         ef[0][i] = fft[2 * i];
    809         ef[1][i] = fft[2 * i + 1];
    810     }
    811 
    812     if (aec->metricsMode == 1) {
    813       // Note that the first PART_LEN samples in fft (before transformation) are
    814       // zero. Hence, the scaling by two in UpdateLevel() should not be
    815       // performed. That scaling is taken care of in UpdateMetrics() instead.
    816       UpdateLevel(&aec->linoutlevel, ef);
    817     }
    818 
    819     // Scale error signal inversely with far power.
    820     WebRtcAec_ScaleErrorSignal(aec, ef);
    821     WebRtcAec_FilterAdaptation(aec, fft, ef);
    822     NonLinearProcessing(aec, output, outputH);
    823 
    824     if (aec->metricsMode == 1) {
    825         // Update power levels and echo metrics
    826         UpdateLevel(&aec->farlevel, (float (*)[PART_LEN1]) xf_ptr);
    827         UpdateLevel(&aec->nearlevel, df);
    828         UpdateMetrics(aec);
    829     }
    830 
    831     // Store the output block.
    832     WebRtc_WriteBuffer(aec->outFrBuf, output, PART_LEN);
    833     // For H band
    834     if (aec->sampFreq == 32000) {
    835         WebRtc_WriteBuffer(aec->outFrBufH, outputH, PART_LEN);
    836     }
    837 
    838 #ifdef WEBRTC_AEC_DEBUG_DUMP
    839     {
    840         int16_t eInt16[PART_LEN];
    841         for (i = 0; i < PART_LEN; i++) {
    842             eInt16[i] = (int16_t)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, e[i],
    843                 WEBRTC_SPL_WORD16_MIN);
    844         }
    845 
    846         fwrite(eInt16, sizeof(int16_t), PART_LEN, aec->outLinearFile);
    847         fwrite(output, sizeof(int16_t), PART_LEN, aec->outFile);
    848     }
    849 #endif
    850 }
    851 
    852 static void NonLinearProcessing(aec_t *aec, short *output, short *outputH)
    853 {
    854     float efw[2][PART_LEN1], dfw[2][PART_LEN1], xfw[2][PART_LEN1];
    855     complex_t comfortNoiseHband[PART_LEN1];
    856     float fft[PART_LEN2];
    857     float scale, dtmp;
    858     float nlpGainHband;
    859     int i, j, pos;
    860 
    861     // Coherence and non-linear filter
    862     float cohde[PART_LEN1], cohxd[PART_LEN1];
    863     float hNlDeAvg, hNlXdAvg;
    864     float hNl[PART_LEN1];
    865     float hNlPref[PREF_BAND_SIZE];
    866     float hNlFb = 0, hNlFbLow = 0;
    867     const float prefBandQuant = 0.75f, prefBandQuantLow = 0.5f;
    868     const int prefBandSize = PREF_BAND_SIZE / aec->mult;
    869     const int minPrefBand = 4 / aec->mult;
    870 
    871     // Near and error power sums
    872     float sdSum = 0, seSum = 0;
    873 
    874     // Power estimate smoothing coefficients
    875     const float gCoh[2][2] = {{0.9f, 0.1f}, {0.93f, 0.07f}};
    876     const float *ptrGCoh = gCoh[aec->mult - 1];
    877 
    878     // Filter energy
    879     float wfEnMax = 0, wfEn = 0;
    880     const int delayEstInterval = 10 * aec->mult;
    881 
    882     float* xfw_ptr = NULL;
    883 
    884     aec->delayEstCtr++;
    885     if (aec->delayEstCtr == delayEstInterval) {
    886         aec->delayEstCtr = 0;
    887     }
    888 
    889     // initialize comfort noise for H band
    890     memset(comfortNoiseHband, 0, sizeof(comfortNoiseHband));
    891     nlpGainHband = (float)0.0;
    892     dtmp = (float)0.0;
    893 
    894     // Measure energy in each filter partition to determine delay.
    895     // TODO: Spread by computing one partition per block?
    896     if (aec->delayEstCtr == 0) {
    897         wfEnMax = 0;
    898         aec->delayIdx = 0;
    899         for (i = 0; i < NR_PART; i++) {
    900             pos = i * PART_LEN1;
    901             wfEn = 0;
    902             for (j = 0; j < PART_LEN1; j++) {
    903                 wfEn += aec->wfBuf[0][pos + j] * aec->wfBuf[0][pos + j] +
    904                     aec->wfBuf[1][pos + j] * aec->wfBuf[1][pos + j];
    905             }
    906 
    907             if (wfEn > wfEnMax) {
    908                 wfEnMax = wfEn;
    909                 aec->delayIdx = i;
    910             }
    911         }
    912     }
    913 
    914     // We should always have at least one element stored in |far_buf|.
    915     assert(WebRtc_available_read(aec->far_buf_windowed) > 0);
    916     // NLP
    917     WebRtc_ReadBuffer(aec->far_buf_windowed, (void**) &xfw_ptr, &xfw[0][0], 1);
    918 
    919     // TODO(bjornv): Investigate if we can reuse |far_buf_windowed| instead of
    920     // |xfwBuf|.
    921     // Buffer far.
    922     memcpy(aec->xfwBuf, xfw_ptr, sizeof(float) * 2 * PART_LEN1);
    923 
    924     // Use delayed far.
    925     memcpy(xfw, aec->xfwBuf + aec->delayIdx * PART_LEN1, sizeof(xfw));
    926 
    927     // Windowed near fft
    928     for (i = 0; i < PART_LEN; i++) {
    929         fft[i] = aec->dBuf[i] * sqrtHanning[i];
    930         fft[PART_LEN + i] = aec->dBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
    931     }
    932     aec_rdft_forward_128(fft);
    933 
    934     dfw[1][0] = 0;
    935     dfw[1][PART_LEN] = 0;
    936     dfw[0][0] = fft[0];
    937     dfw[0][PART_LEN] = fft[1];
    938     for (i = 1; i < PART_LEN; i++) {
    939         dfw[0][i] = fft[2 * i];
    940         dfw[1][i] = fft[2 * i + 1];
    941     }
    942 
    943     // Windowed error fft
    944     for (i = 0; i < PART_LEN; i++) {
    945         fft[i] = aec->eBuf[i] * sqrtHanning[i];
    946         fft[PART_LEN + i] = aec->eBuf[PART_LEN + i] * sqrtHanning[PART_LEN - i];
    947     }
    948     aec_rdft_forward_128(fft);
    949     efw[1][0] = 0;
    950     efw[1][PART_LEN] = 0;
    951     efw[0][0] = fft[0];
    952     efw[0][PART_LEN] = fft[1];
    953     for (i = 1; i < PART_LEN; i++) {
    954         efw[0][i] = fft[2 * i];
    955         efw[1][i] = fft[2 * i + 1];
    956     }
    957 
    958     // Smoothed PSD
    959     for (i = 0; i < PART_LEN1; i++) {
    960         aec->sd[i] = ptrGCoh[0] * aec->sd[i] + ptrGCoh[1] *
    961             (dfw[0][i] * dfw[0][i] + dfw[1][i] * dfw[1][i]);
    962         aec->se[i] = ptrGCoh[0] * aec->se[i] + ptrGCoh[1] *
    963             (efw[0][i] * efw[0][i] + efw[1][i] * efw[1][i]);
    964         // We threshold here to protect against the ill-effects of a zero farend.
    965         // The threshold is not arbitrarily chosen, but balances protection and
    966         // adverse interaction with the algorithm's tuning.
    967         // TODO: investigate further why this is so sensitive.
    968         aec->sx[i] = ptrGCoh[0] * aec->sx[i] + ptrGCoh[1] *
    969             WEBRTC_SPL_MAX(xfw[0][i] * xfw[0][i] + xfw[1][i] * xfw[1][i], 15);
    970 
    971         aec->sde[i][0] = ptrGCoh[0] * aec->sde[i][0] + ptrGCoh[1] *
    972             (dfw[0][i] * efw[0][i] + dfw[1][i] * efw[1][i]);
    973         aec->sde[i][1] = ptrGCoh[0] * aec->sde[i][1] + ptrGCoh[1] *
    974             (dfw[0][i] * efw[1][i] - dfw[1][i] * efw[0][i]);
    975 
    976         aec->sxd[i][0] = ptrGCoh[0] * aec->sxd[i][0] + ptrGCoh[1] *
    977             (dfw[0][i] * xfw[0][i] + dfw[1][i] * xfw[1][i]);
    978         aec->sxd[i][1] = ptrGCoh[0] * aec->sxd[i][1] + ptrGCoh[1] *
    979             (dfw[0][i] * xfw[1][i] - dfw[1][i] * xfw[0][i]);
    980 
    981         sdSum += aec->sd[i];
    982         seSum += aec->se[i];
    983     }
    984 
    985     // Divergent filter safeguard.
    986     if (aec->divergeState == 0) {
    987         if (seSum > sdSum) {
    988             aec->divergeState = 1;
    989         }
    990     }
    991     else {
    992         if (seSum * 1.05f < sdSum) {
    993             aec->divergeState = 0;
    994         }
    995     }
    996 
    997     if (aec->divergeState == 1) {
    998         memcpy(efw, dfw, sizeof(efw));
    999     }
   1000 
   1001     // Reset if error is significantly larger than nearend (13 dB).
   1002     if (seSum > (19.95f * sdSum)) {
   1003         memset(aec->wfBuf, 0, sizeof(aec->wfBuf));
   1004     }
   1005 
   1006     // Subband coherence
   1007     for (i = 0; i < PART_LEN1; i++) {
   1008         cohde[i] = (aec->sde[i][0] * aec->sde[i][0] + aec->sde[i][1] * aec->sde[i][1]) /
   1009             (aec->sd[i] * aec->se[i] + 1e-10f);
   1010         cohxd[i] = (aec->sxd[i][0] * aec->sxd[i][0] + aec->sxd[i][1] * aec->sxd[i][1]) /
   1011             (aec->sx[i] * aec->sd[i] + 1e-10f);
   1012     }
   1013 
   1014     hNlXdAvg = 0;
   1015     for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
   1016         hNlXdAvg += cohxd[i];
   1017     }
   1018     hNlXdAvg /= prefBandSize;
   1019     hNlXdAvg = 1 - hNlXdAvg;
   1020 
   1021     hNlDeAvg = 0;
   1022     for (i = minPrefBand; i < prefBandSize + minPrefBand; i++) {
   1023         hNlDeAvg += cohde[i];
   1024     }
   1025     hNlDeAvg /= prefBandSize;
   1026 
   1027     if (hNlXdAvg < 0.75f && hNlXdAvg < aec->hNlXdAvgMin) {
   1028         aec->hNlXdAvgMin = hNlXdAvg;
   1029     }
   1030 
   1031     if (hNlDeAvg > 0.98f && hNlXdAvg > 0.9f) {
   1032         aec->stNearState = 1;
   1033     }
   1034     else if (hNlDeAvg < 0.95f || hNlXdAvg < 0.8f) {
   1035         aec->stNearState = 0;
   1036     }
   1037 
   1038     if (aec->hNlXdAvgMin == 1) {
   1039         aec->echoState = 0;
   1040         aec->overDrive = aec->minOverDrive;
   1041 
   1042         if (aec->stNearState == 1) {
   1043             memcpy(hNl, cohde, sizeof(hNl));
   1044             hNlFb = hNlDeAvg;
   1045             hNlFbLow = hNlDeAvg;
   1046         }
   1047         else {
   1048             for (i = 0; i < PART_LEN1; i++) {
   1049                 hNl[i] = 1 - cohxd[i];
   1050             }
   1051             hNlFb = hNlXdAvg;
   1052             hNlFbLow = hNlXdAvg;
   1053         }
   1054     }
   1055     else {
   1056 
   1057         if (aec->stNearState == 1) {
   1058             aec->echoState = 0;
   1059             memcpy(hNl, cohde, sizeof(hNl));
   1060             hNlFb = hNlDeAvg;
   1061             hNlFbLow = hNlDeAvg;
   1062         }
   1063         else {
   1064             aec->echoState = 1;
   1065             for (i = 0; i < PART_LEN1; i++) {
   1066                 hNl[i] = WEBRTC_SPL_MIN(cohde[i], 1 - cohxd[i]);
   1067             }
   1068 
   1069             // Select an order statistic from the preferred bands.
   1070             // TODO: Using quicksort now, but a selection algorithm may be preferred.
   1071             memcpy(hNlPref, &hNl[minPrefBand], sizeof(float) * prefBandSize);
   1072             qsort(hNlPref, prefBandSize, sizeof(float), CmpFloat);
   1073             hNlFb = hNlPref[(int)floor(prefBandQuant * (prefBandSize - 1))];
   1074             hNlFbLow = hNlPref[(int)floor(prefBandQuantLow * (prefBandSize - 1))];
   1075         }
   1076     }
   1077 
   1078     // Track the local filter minimum to determine suppression overdrive.
   1079     if (hNlFbLow < 0.6f && hNlFbLow < aec->hNlFbLocalMin) {
   1080         aec->hNlFbLocalMin = hNlFbLow;
   1081         aec->hNlFbMin = hNlFbLow;
   1082         aec->hNlNewMin = 1;
   1083         aec->hNlMinCtr = 0;
   1084     }
   1085     aec->hNlFbLocalMin = WEBRTC_SPL_MIN(aec->hNlFbLocalMin + 0.0008f / aec->mult, 1);
   1086     aec->hNlXdAvgMin = WEBRTC_SPL_MIN(aec->hNlXdAvgMin + 0.0006f / aec->mult, 1);
   1087 
   1088     if (aec->hNlNewMin == 1) {
   1089         aec->hNlMinCtr++;
   1090     }
   1091     if (aec->hNlMinCtr == 2) {
   1092         aec->hNlNewMin = 0;
   1093         aec->hNlMinCtr = 0;
   1094         aec->overDrive = WEBRTC_SPL_MAX(aec->targetSupp /
   1095             ((float)log(aec->hNlFbMin + 1e-10f) + 1e-10f), aec->minOverDrive);
   1096     }
   1097 
   1098     // Smooth the overdrive.
   1099     if (aec->overDrive < aec->overDriveSm) {
   1100       aec->overDriveSm = 0.99f * aec->overDriveSm + 0.01f * aec->overDrive;
   1101     }
   1102     else {
   1103       aec->overDriveSm = 0.9f * aec->overDriveSm + 0.1f * aec->overDrive;
   1104     }
   1105 
   1106     WebRtcAec_OverdriveAndSuppress(aec, hNl, hNlFb, efw);
   1107 
   1108     // Add comfort noise.
   1109     ComfortNoise(aec, efw, comfortNoiseHband, aec->noisePow, hNl);
   1110 
   1111     // TODO(bjornv): Investigate how to take the windowing below into account if
   1112     // needed.
   1113     if (aec->metricsMode == 1) {
   1114       // Note that we have a scaling by two in the time domain |eBuf|.
   1115       // In addition the time domain signal is windowed before transformation,
   1116       // losing half the energy on the average. We take care of the first
   1117       // scaling only in UpdateMetrics().
   1118       UpdateLevel(&aec->nlpoutlevel, efw);
   1119     }
   1120     // Inverse error fft.
   1121     fft[0] = efw[0][0];
   1122     fft[1] = efw[0][PART_LEN];
   1123     for (i = 1; i < PART_LEN; i++) {
   1124         fft[2*i] = efw[0][i];
   1125         // Sign change required by Ooura fft.
   1126         fft[2*i + 1] = -efw[1][i];
   1127     }
   1128     aec_rdft_inverse_128(fft);
   1129 
   1130     // Overlap and add to obtain output.
   1131     scale = 2.0f / PART_LEN2;
   1132     for (i = 0; i < PART_LEN; i++) {
   1133         fft[i] *= scale; // fft scaling
   1134         fft[i] = fft[i]*sqrtHanning[i] + aec->outBuf[i];
   1135 
   1136         // Saturation protection
   1137         output[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, fft[i],
   1138             WEBRTC_SPL_WORD16_MIN);
   1139 
   1140         fft[PART_LEN + i] *= scale; // fft scaling
   1141         aec->outBuf[i] = fft[PART_LEN + i] * sqrtHanning[PART_LEN - i];
   1142     }
   1143 
   1144     // For H band
   1145     if (aec->sampFreq == 32000) {
   1146 
   1147         // H band gain
   1148         // average nlp over low band: average over second half of freq spectrum
   1149         // (4->8khz)
   1150         GetHighbandGain(hNl, &nlpGainHband);
   1151 
   1152         // Inverse comfort_noise
   1153         if (flagHbandCn == 1) {
   1154             fft[0] = comfortNoiseHband[0][0];
   1155             fft[1] = comfortNoiseHband[PART_LEN][0];
   1156             for (i = 1; i < PART_LEN; i++) {
   1157                 fft[2*i] = comfortNoiseHband[i][0];
   1158                 fft[2*i + 1] = comfortNoiseHband[i][1];
   1159             }
   1160             aec_rdft_inverse_128(fft);
   1161             scale = 2.0f / PART_LEN2;
   1162         }
   1163 
   1164         // compute gain factor
   1165         for (i = 0; i < PART_LEN; i++) {
   1166             dtmp = (float)aec->dBufH[i];
   1167             dtmp = (float)dtmp * nlpGainHband; // for variable gain
   1168 
   1169             // add some comfort noise where Hband is attenuated
   1170             if (flagHbandCn == 1) {
   1171                 fft[i] *= scale; // fft scaling
   1172                 dtmp += cnScaleHband * fft[i];
   1173             }
   1174 
   1175             // Saturation protection
   1176             outputH[i] = (short)WEBRTC_SPL_SAT(WEBRTC_SPL_WORD16_MAX, dtmp,
   1177                 WEBRTC_SPL_WORD16_MIN);
   1178          }
   1179     }
   1180 
   1181     // Copy the current block to the old position.
   1182     memcpy(aec->dBuf, aec->dBuf + PART_LEN, sizeof(float) * PART_LEN);
   1183     memcpy(aec->eBuf, aec->eBuf + PART_LEN, sizeof(float) * PART_LEN);
   1184 
   1185     // Copy the current block to the old position for H band
   1186     if (aec->sampFreq == 32000) {
   1187         memcpy(aec->dBufH, aec->dBufH + PART_LEN, sizeof(float) * PART_LEN);
   1188     }
   1189 
   1190     memmove(aec->xfwBuf + PART_LEN1, aec->xfwBuf, sizeof(aec->xfwBuf) -
   1191         sizeof(complex_t) * PART_LEN1);
   1192 }
   1193 
   1194 static void GetHighbandGain(const float *lambda, float *nlpGainHband)
   1195 {
   1196     int i;
   1197 
   1198     nlpGainHband[0] = (float)0.0;
   1199     for (i = freqAvgIc; i < PART_LEN1 - 1; i++) {
   1200         nlpGainHband[0] += lambda[i];
   1201     }
   1202     nlpGainHband[0] /= (float)(PART_LEN1 - 1 - freqAvgIc);
   1203 }
   1204 
   1205 static void ComfortNoise(aec_t *aec, float efw[2][PART_LEN1],
   1206     complex_t *comfortNoiseHband, const float *noisePow, const float *lambda)
   1207 {
   1208     int i, num;
   1209     float rand[PART_LEN];
   1210     float noise, noiseAvg, tmp, tmpAvg;
   1211     WebRtc_Word16 randW16[PART_LEN];
   1212     complex_t u[PART_LEN1];
   1213 
   1214     const float pi2 = 6.28318530717959f;
   1215 
   1216     // Generate a uniform random array on [0 1]
   1217     WebRtcSpl_RandUArray(randW16, PART_LEN, &aec->seed);
   1218     for (i = 0; i < PART_LEN; i++) {
   1219         rand[i] = ((float)randW16[i]) / 32768;
   1220     }
   1221 
   1222     // Reject LF noise
   1223     u[0][0] = 0;
   1224     u[0][1] = 0;
   1225     for (i = 1; i < PART_LEN1; i++) {
   1226         tmp = pi2 * rand[i - 1];
   1227 
   1228         noise = sqrtf(noisePow[i]);
   1229         u[i][0] = noise * (float)cos(tmp);
   1230         u[i][1] = -noise * (float)sin(tmp);
   1231     }
   1232     u[PART_LEN][1] = 0;
   1233 
   1234     for (i = 0; i < PART_LEN1; i++) {
   1235         // This is the proper weighting to match the background noise power
   1236         tmp = sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
   1237         //tmp = 1 - lambda[i];
   1238         efw[0][i] += tmp * u[i][0];
   1239         efw[1][i] += tmp * u[i][1];
   1240     }
   1241 
   1242     // For H band comfort noise
   1243     // TODO: don't compute noise and "tmp" twice. Use the previous results.
   1244     noiseAvg = 0.0;
   1245     tmpAvg = 0.0;
   1246     num = 0;
   1247     if (aec->sampFreq == 32000 && flagHbandCn == 1) {
   1248 
   1249         // average noise scale
   1250         // average over second half of freq spectrum (i.e., 4->8khz)
   1251         // TODO: we shouldn't need num. We know how many elements we're summing.
   1252         for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
   1253             num++;
   1254             noiseAvg += sqrtf(noisePow[i]);
   1255         }
   1256         noiseAvg /= (float)num;
   1257 
   1258         // average nlp scale
   1259         // average over second half of freq spectrum (i.e., 4->8khz)
   1260         // TODO: we shouldn't need num. We know how many elements we're summing.
   1261         num = 0;
   1262         for (i = PART_LEN1 >> 1; i < PART_LEN1; i++) {
   1263             num++;
   1264             tmpAvg += sqrtf(WEBRTC_SPL_MAX(1 - lambda[i] * lambda[i], 0));
   1265         }
   1266         tmpAvg /= (float)num;
   1267 
   1268         // Use average noise for H band
   1269         // TODO: we should probably have a new random vector here.
   1270         // Reject LF noise
   1271         u[0][0] = 0;
   1272         u[0][1] = 0;
   1273         for (i = 1; i < PART_LEN1; i++) {
   1274             tmp = pi2 * rand[i - 1];
   1275 
   1276             // Use average noise for H band
   1277             u[i][0] = noiseAvg * (float)cos(tmp);
   1278             u[i][1] = -noiseAvg * (float)sin(tmp);
   1279         }
   1280         u[PART_LEN][1] = 0;
   1281 
   1282         for (i = 0; i < PART_LEN1; i++) {
   1283             // Use average NLP weight for H band
   1284             comfortNoiseHband[i][0] = tmpAvg * u[i][0];
   1285             comfortNoiseHband[i][1] = tmpAvg * u[i][1];
   1286         }
   1287     }
   1288 }
   1289 
   1290 static void WebRtcAec_InitLevel(power_level_t *level)
   1291 {
   1292     const float bigFloat = 1E17f;
   1293 
   1294     level->averagelevel = 0;
   1295     level->framelevel = 0;
   1296     level->minlevel = bigFloat;
   1297     level->frsum = 0;
   1298     level->sfrsum = 0;
   1299     level->frcounter = 0;
   1300     level->sfrcounter = 0;
   1301 }
   1302 
   1303 static void WebRtcAec_InitStats(stats_t *stats)
   1304 {
   1305     stats->instant = offsetLevel;
   1306     stats->average = offsetLevel;
   1307     stats->max = offsetLevel;
   1308     stats->min = offsetLevel * (-1);
   1309     stats->sum = 0;
   1310     stats->hisum = 0;
   1311     stats->himean = offsetLevel;
   1312     stats->counter = 0;
   1313     stats->hicounter = 0;
   1314 }
   1315 
   1316 static void UpdateLevel(power_level_t* level, float in[2][PART_LEN1]) {
   1317   // Do the energy calculation in the frequency domain. The FFT is performed on
   1318   // a segment of PART_LEN2 samples due to overlap, but we only want the energy
   1319   // of half that data (the last PART_LEN samples). Parseval's relation states
   1320   // that the energy is preserved according to
   1321   //
   1322   // \sum_{n=0}^{N-1} |x(n)|^2 = 1/N * \sum_{n=0}^{N-1} |X(n)|^2
   1323   //                           = ENERGY,
   1324   //
   1325   // where N = PART_LEN2. Since we are only interested in calculating the energy
   1326   // for the last PART_LEN samples we approximate by calculating ENERGY and
   1327   // divide by 2,
   1328   //
   1329   // \sum_{n=N/2}^{N-1} |x(n)|^2 ~= ENERGY / 2
   1330   //
   1331   // Since we deal with real valued time domain signals we only store frequency
   1332   // bins [0, PART_LEN], which is what |in| consists of. To calculate ENERGY we
   1333   // need to add the contribution from the missing part in
   1334   // [PART_LEN+1, PART_LEN2-1]. These values are, up to a phase shift, identical
   1335   // with the values in [1, PART_LEN-1], hence multiply those values by 2. This
   1336   // is the values in the for loop below, but multiplication by 2 and division
   1337   // by 2 cancel.
   1338 
   1339   // TODO(bjornv): Investigate reusing energy calculations performed at other
   1340   // places in the code.
   1341   int k = 1;
   1342   // Imaginary parts are zero at end points and left out of the calculation.
   1343   float energy = (in[0][0] * in[0][0]) / 2;
   1344   energy += (in[0][PART_LEN] * in[0][PART_LEN]) / 2;
   1345 
   1346   for (k = 1; k < PART_LEN; k++) {
   1347     energy += (in[0][k] * in[0][k] + in[1][k] * in[1][k]);
   1348   }
   1349   energy /= PART_LEN2;
   1350 
   1351   level->sfrsum += energy;
   1352   level->sfrcounter++;
   1353 
   1354   if (level->sfrcounter > subCountLen) {
   1355     level->framelevel = level->sfrsum / (subCountLen * PART_LEN);
   1356     level->sfrsum = 0;
   1357     level->sfrcounter = 0;
   1358     if (level->framelevel > 0) {
   1359       if (level->framelevel < level->minlevel) {
   1360         level->minlevel = level->framelevel;  // New minimum.
   1361       } else {
   1362         level->minlevel *= (1 + 0.001f);  // Small increase.
   1363       }
   1364     }
   1365     level->frcounter++;
   1366     level->frsum += level->framelevel;
   1367     if (level->frcounter > countLen) {
   1368       level->averagelevel = level->frsum / countLen;
   1369       level->frsum = 0;
   1370       level->frcounter = 0;
   1371     }
   1372   }
   1373 }
   1374 
   1375 static void UpdateMetrics(aec_t *aec)
   1376 {
   1377     float dtmp, dtmp2;
   1378 
   1379     const float actThresholdNoisy = 8.0f;
   1380     const float actThresholdClean = 40.0f;
   1381     const float safety = 0.99995f;
   1382     const float noisyPower = 300000.0f;
   1383 
   1384     float actThreshold;
   1385     float echo, suppressedEcho;
   1386 
   1387     if (aec->echoState) {   // Check if echo is likely present
   1388         aec->stateCounter++;
   1389     }
   1390 
   1391     if (aec->farlevel.frcounter == 0) {
   1392 
   1393         if (aec->farlevel.minlevel < noisyPower) {
   1394             actThreshold = actThresholdClean;
   1395         }
   1396         else {
   1397             actThreshold = actThresholdNoisy;
   1398         }
   1399 
   1400         if ((aec->stateCounter > (0.5f * countLen * subCountLen))
   1401             && (aec->farlevel.sfrcounter == 0)
   1402 
   1403             // Estimate in active far-end segments only
   1404             && (aec->farlevel.averagelevel > (actThreshold * aec->farlevel.minlevel))
   1405             ) {
   1406 
   1407             // Subtract noise power
   1408             echo = aec->nearlevel.averagelevel - safety * aec->nearlevel.minlevel;
   1409 
   1410             // ERL
   1411             dtmp = 10 * (float)log10(aec->farlevel.averagelevel /
   1412                 aec->nearlevel.averagelevel + 1e-10f);
   1413             dtmp2 = 10 * (float)log10(aec->farlevel.averagelevel / echo + 1e-10f);
   1414 
   1415             aec->erl.instant = dtmp;
   1416             if (dtmp > aec->erl.max) {
   1417                 aec->erl.max = dtmp;
   1418             }
   1419 
   1420             if (dtmp < aec->erl.min) {
   1421                 aec->erl.min = dtmp;
   1422             }
   1423 
   1424             aec->erl.counter++;
   1425             aec->erl.sum += dtmp;
   1426             aec->erl.average = aec->erl.sum / aec->erl.counter;
   1427 
   1428             // Upper mean
   1429             if (dtmp > aec->erl.average) {
   1430                 aec->erl.hicounter++;
   1431                 aec->erl.hisum += dtmp;
   1432                 aec->erl.himean = aec->erl.hisum / aec->erl.hicounter;
   1433             }
   1434 
   1435             // A_NLP
   1436             dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
   1437                 (2 * aec->linoutlevel.averagelevel) + 1e-10f);
   1438 
   1439             // subtract noise power
   1440             suppressedEcho = 2 * (aec->linoutlevel.averagelevel -
   1441                 safety * aec->linoutlevel.minlevel);
   1442 
   1443             dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
   1444 
   1445             aec->aNlp.instant = dtmp2;
   1446             if (dtmp > aec->aNlp.max) {
   1447                 aec->aNlp.max = dtmp;
   1448             }
   1449 
   1450             if (dtmp < aec->aNlp.min) {
   1451                 aec->aNlp.min = dtmp;
   1452             }
   1453 
   1454             aec->aNlp.counter++;
   1455             aec->aNlp.sum += dtmp;
   1456             aec->aNlp.average = aec->aNlp.sum / aec->aNlp.counter;
   1457 
   1458             // Upper mean
   1459             if (dtmp > aec->aNlp.average) {
   1460                 aec->aNlp.hicounter++;
   1461                 aec->aNlp.hisum += dtmp;
   1462                 aec->aNlp.himean = aec->aNlp.hisum / aec->aNlp.hicounter;
   1463             }
   1464 
   1465             // ERLE
   1466 
   1467             // subtract noise power
   1468             suppressedEcho = 2 * (aec->nlpoutlevel.averagelevel -
   1469                 safety * aec->nlpoutlevel.minlevel);
   1470 
   1471             dtmp = 10 * (float)log10(aec->nearlevel.averagelevel /
   1472                 (2 * aec->nlpoutlevel.averagelevel) + 1e-10f);
   1473             dtmp2 = 10 * (float)log10(echo / suppressedEcho + 1e-10f);
   1474 
   1475             dtmp = dtmp2;
   1476             aec->erle.instant = dtmp;
   1477             if (dtmp > aec->erle.max) {
   1478                 aec->erle.max = dtmp;
   1479             }
   1480 
   1481             if (dtmp < aec->erle.min) {
   1482                 aec->erle.min = dtmp;
   1483             }
   1484 
   1485             aec->erle.counter++;
   1486             aec->erle.sum += dtmp;
   1487             aec->erle.average = aec->erle.sum / aec->erle.counter;
   1488 
   1489             // Upper mean
   1490             if (dtmp > aec->erle.average) {
   1491                 aec->erle.hicounter++;
   1492                 aec->erle.hisum += dtmp;
   1493                 aec->erle.himean = aec->erle.hisum / aec->erle.hicounter;
   1494             }
   1495         }
   1496 
   1497         aec->stateCounter = 0;
   1498     }
   1499 }
   1500 
   1501 static void TimeToFrequency(float time_data[PART_LEN2],
   1502                             float freq_data[2][PART_LEN1],
   1503                             int window) {
   1504   int i = 0;
   1505 
   1506   // TODO(bjornv): Should we have a different function/wrapper for windowed FFT?
   1507   if (window) {
   1508     for (i = 0; i < PART_LEN; i++) {
   1509       time_data[i] *= sqrtHanning[i];
   1510       time_data[PART_LEN + i] *= sqrtHanning[PART_LEN - i];
   1511     }
   1512   }
   1513 
   1514   aec_rdft_forward_128(time_data);
   1515   // Reorder.
   1516   freq_data[1][0] = 0;
   1517   freq_data[1][PART_LEN] = 0;
   1518   freq_data[0][0] = time_data[0];
   1519   freq_data[0][PART_LEN] = time_data[1];
   1520   for (i = 1; i < PART_LEN; i++) {
   1521     freq_data[0][i] = time_data[2 * i];
   1522     freq_data[1][i] = time_data[2 * i + 1];
   1523   }
   1524 }
   1525