Home | History | Annotate | Download | only in rtp
      1 /*
      2  * Copyrightm (C) 2010 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #include <stdio.h>
     18 #include <string.h>
     19 #include <stdint.h>
     20 #include <math.h>
     21 
     22 #define LOG_TAG "Echo"
     23 #include <utils/Log.h>
     24 
     25 #include "EchoSuppressor.h"
     26 
     27 // It is very difficult to do echo cancellation at this level due to the lack of
     28 // the timing information of the samples being played and recorded. Therefore,
     29 // for the first release only echo suppression is implemented.
     30 
     31 // The algorithm is derived from the "previous works" summarized in
     32 //   A new class of doubletalk detectors based on cross-correlation,
     33 //   J Benesty, DR Morgan, JH Cho, IEEE Trans. on Speech and Audio Processing.
     34 // The method proposed in that paper is not used because of its high complexity.
     35 
     36 // It is well known that cross-correlation can be computed using convolution,
     37 // but unfortunately not every mobile processor has a (fast enough) FPU. Thus
     38 // we use integer arithmetic as much as possible and do lots of bookkeeping.
     39 // Again, parameters and thresholds are chosen by experiments.
     40 
     41 EchoSuppressor::EchoSuppressor(int sampleCount, int tailLength)
     42 {
     43     tailLength += sampleCount * 4;
     44 
     45     int shift = 0;
     46     while ((sampleCount >> shift) > 1 && (tailLength >> shift) > 256) {
     47         ++shift;
     48     }
     49 
     50     mShift = shift + 4;
     51     mScale = 1 << shift;
     52     mSampleCount = sampleCount;
     53     mWindowSize = sampleCount >> shift;
     54     mTailLength = tailLength >> shift;
     55     mRecordLength = tailLength * 2 / sampleCount;
     56     mRecordOffset = 0;
     57 
     58     mXs = new uint16_t[mTailLength + mWindowSize];
     59     memset(mXs, 0, sizeof(*mXs) * (mTailLength + mWindowSize));
     60     mXSums = new uint32_t[mTailLength];
     61     memset(mXSums, 0, sizeof(*mXSums) * mTailLength);
     62     mX2Sums = new uint32_t[mTailLength];
     63     memset(mX2Sums, 0, sizeof(*mX2Sums) * mTailLength);
     64     mXRecords = new uint16_t[mRecordLength * mWindowSize];
     65     memset(mXRecords, 0, sizeof(*mXRecords) * mRecordLength * mWindowSize);
     66 
     67     mYSum = 0;
     68     mY2Sum = 0;
     69     mYRecords = new uint32_t[mRecordLength];
     70     memset(mYRecords, 0, sizeof(*mYRecords) * mRecordLength);
     71     mY2Records = new uint32_t[mRecordLength];
     72     memset(mY2Records, 0, sizeof(*mY2Records) * mRecordLength);
     73 
     74     mXYSums = new uint32_t[mTailLength];
     75     memset(mXYSums, 0, sizeof(*mXYSums) * mTailLength);
     76     mXYRecords = new uint32_t[mRecordLength * mTailLength];
     77     memset(mXYRecords, 0, sizeof(*mXYRecords) * mRecordLength * mTailLength);
     78 
     79     mLastX = 0;
     80     mLastY = 0;
     81     mWeight = 1.0f / (mRecordLength * mWindowSize);
     82 }
     83 
     84 EchoSuppressor::~EchoSuppressor()
     85 {
     86     delete [] mXs;
     87     delete [] mXSums;
     88     delete [] mX2Sums;
     89     delete [] mXRecords;
     90     delete [] mYRecords;
     91     delete [] mY2Records;
     92     delete [] mXYSums;
     93     delete [] mXYRecords;
     94 }
     95 
     96 void EchoSuppressor::run(int16_t *playbacked, int16_t *recorded)
     97 {
     98     // Update Xs.
     99     for (int i = mTailLength - 1; i >= 0; --i) {
    100         mXs[i + mWindowSize] = mXs[i];
    101     }
    102     for (int i = mWindowSize - 1, j = 0; i >= 0; --i, j += mScale) {
    103         uint32_t sum = 0;
    104         for (int k = 0; k < mScale; ++k) {
    105             int32_t x = playbacked[j + k] << 15;
    106             mLastX += x;
    107             sum += ((mLastX >= 0) ? mLastX : -mLastX) >> 15;
    108             mLastX -= (mLastX >> 10) + x;
    109         }
    110         mXs[i] = sum >> mShift;
    111     }
    112 
    113     // Update XSums, X2Sums, and XRecords.
    114     for (int i = mTailLength - mWindowSize - 1; i >= 0; --i) {
    115         mXSums[i + mWindowSize] = mXSums[i];
    116         mX2Sums[i + mWindowSize] = mX2Sums[i];
    117     }
    118     uint16_t *xRecords = &mXRecords[mRecordOffset * mWindowSize];
    119     for (int i = mWindowSize - 1; i >= 0; --i) {
    120         uint16_t x = mXs[i];
    121         mXSums[i] = mXSums[i + 1] + x - xRecords[i];
    122         mX2Sums[i] = mX2Sums[i + 1] + x * x - xRecords[i] * xRecords[i];
    123         xRecords[i] = x;
    124     }
    125 
    126     // Compute Ys.
    127     uint16_t ys[mWindowSize];
    128     for (int i = mWindowSize - 1, j = 0; i >= 0; --i, j += mScale) {
    129         uint32_t sum = 0;
    130         for (int k = 0; k < mScale; ++k) {
    131             int32_t y = recorded[j + k] << 15;
    132             mLastY += y;
    133             sum += ((mLastY >= 0) ? mLastY : -mLastY) >> 15;
    134             mLastY -= (mLastY >> 10) + y;
    135         }
    136         ys[i] = sum >> mShift;
    137     }
    138 
    139     // Update YSum, Y2Sum, YRecords, and Y2Records.
    140     uint32_t ySum = 0;
    141     uint32_t y2Sum = 0;
    142     for (int i = mWindowSize - 1; i >= 0; --i) {
    143         ySum += ys[i];
    144         y2Sum += ys[i] * ys[i];
    145     }
    146     mYSum += ySum - mYRecords[mRecordOffset];
    147     mY2Sum += y2Sum - mY2Records[mRecordOffset];
    148     mYRecords[mRecordOffset] = ySum;
    149     mY2Records[mRecordOffset] = y2Sum;
    150 
    151     // Update XYSums and XYRecords.
    152     uint32_t *xyRecords = &mXYRecords[mRecordOffset * mTailLength];
    153     for (int i = mTailLength - 1; i >= 0; --i) {
    154         uint32_t xySum = 0;
    155         for (int j = mWindowSize - 1; j >= 0; --j) {
    156             xySum += mXs[i + j] * ys[j];
    157         }
    158         mXYSums[i] += xySum - xyRecords[i];
    159         xyRecords[i] = xySum;
    160     }
    161 
    162     // Compute correlations.
    163     float corr2 = 0.0f;
    164     int latency = 0;
    165     float varY = mY2Sum - mWeight * mYSum * mYSum;
    166     for (int i = mTailLength - 1; i >= 0; --i) {
    167         float varX = mX2Sums[i] - mWeight * mXSums[i] * mXSums[i];
    168         float cov = mXYSums[i] - mWeight * mXSums[i] * mYSum;
    169         float c2 = cov * cov / (varX * varY + 1);
    170         if (c2 > corr2) {
    171             corr2 = c2;
    172             latency = i;
    173         }
    174     }
    175     //LOGI("correlation^2 = %.10f, latency = %d", corr2, latency * mScale);
    176 
    177     // Do echo suppression.
    178     if (corr2 > 0.1f) {
    179         int factor = (corr2 > 1.0f) ? 0 : (1.0f - sqrtf(corr2)) * 4096;
    180         for (int i = 0; i < mSampleCount; ++i) {
    181             recorded[i] = recorded[i] * factor >> 16;
    182         }
    183     }
    184 
    185     // Increase RecordOffset.
    186     ++mRecordOffset;
    187     if (mRecordOffset == mRecordLength) {
    188         mRecordOffset = 0;
    189     }
    190 }
    191