Home | History | Annotate | Download | only in audio_processing
      1 /*
      2  *  Copyright (c) 2015 The WebRTC project authors. All Rights Reserved.
      3  *
      4  *  Use of this source code is governed by a BSD-style license
      5  *  that can be found in the LICENSE file in the root of the source
      6  *  tree. An additional intellectual property rights grant can be found
      7  *  in the file PATENTS.  All contributing project authors may
      8  *  be found in the AUTHORS file in the root of the source tree.
      9  */
     10 
     11 // An implementation of a 3-band FIR filter-bank with DCT modulation, similar to
     12 // the proposed in "Multirate Signal Processing for Communication Systems" by
     13 // Fredric J Harris.
     14 //
     15 // The idea is to take a heterodyne system and change the order of the
     16 // components to get something which is efficient to implement digitally.
     17 //
     18 // It is possible to separate the filter using the noble identity as follows:
     19 //
     20 // H(z) = H0(z^3) + z^-1 * H1(z^3) + z^-2 * H2(z^3)
     21 //
     22 // This is used in the analysis stage to first downsample serial to parallel
     23 // and then filter each branch with one of these polyphase decompositions of the
     24 // lowpass prototype. Because each filter is only a modulation of the prototype,
     25 // it is enough to multiply each coefficient by the respective cosine value to
     26 // shift it to the desired band. But because the cosine period is 12 samples,
     27 // it requires separating the prototype even further using the noble identity.
     28 // After filtering and modulating for each band, the output of all filters is
     29 // accumulated to get the downsampled bands.
     30 //
     31 // A similar logic can be applied to the synthesis stage.
     32 
     33 // MSVC++ requires this to be set before any other includes to get M_PI.
     34 #define _USE_MATH_DEFINES
     35 
     36 #include "webrtc/modules/audio_processing/three_band_filter_bank.h"
     37 
     38 #include <cmath>
     39 
     40 #include "webrtc/base/checks.h"
     41 
     42 namespace webrtc {
     43 namespace {
     44 
     45 const size_t kNumBands = 3;
     46 const size_t kSparsity = 4;
     47 
     48 // Factors to take into account when choosing |kNumCoeffs|:
     49 //   1. Higher |kNumCoeffs|, means faster transition, which ensures less
     50 //      aliasing. This is especially important when there is non-linear
     51 //      processing between the splitting and merging.
     52 //   2. The delay that this filter bank introduces is
     53 //      |kNumBands| * |kSparsity| * |kNumCoeffs| / 2, so it increases linearly
     54 //      with |kNumCoeffs|.
     55 //   3. The computation complexity also increases linearly with |kNumCoeffs|.
     56 const size_t kNumCoeffs = 4;
     57 
     58 // The Matlab code to generate these |kLowpassCoeffs| is:
     59 //
     60 // N = kNumBands * kSparsity * kNumCoeffs - 1;
     61 // h = fir1(N, 1 / (2 * kNumBands), kaiser(N + 1, 3.5));
     62 // reshape(h, kNumBands * kSparsity, kNumCoeffs);
     63 //
     64 // Because the total bandwidth of the lower and higher band is double the middle
     65 // one (because of the spectrum parity), the low-pass prototype is half the
     66 // bandwidth of 1 / (2 * |kNumBands|) and is then shifted with cosine modulation
     67 // to the right places.
     68 // A Kaiser window is used because of its flexibility and the alpha is set to
     69 // 3.5, since that sets a stop band attenuation of 40dB ensuring a fast
     70 // transition.
     71 const float kLowpassCoeffs[kNumBands * kSparsity][kNumCoeffs] =
     72     {{-0.00047749f, -0.00496888f, +0.16547118f, +0.00425496f},
     73      {-0.00173287f, -0.01585778f, +0.14989004f, +0.00994113f},
     74      {-0.00304815f, -0.02536082f, +0.12154542f, +0.01157993f},
     75      {-0.00383509f, -0.02982767f, +0.08543175f, +0.00983212f},
     76      {-0.00346946f, -0.02587886f, +0.04760441f, +0.00607594f},
     77      {-0.00154717f, -0.01136076f, +0.01387458f, +0.00186353f},
     78      {+0.00186353f, +0.01387458f, -0.01136076f, -0.00154717f},
     79      {+0.00607594f, +0.04760441f, -0.02587886f, -0.00346946f},
     80      {+0.00983212f, +0.08543175f, -0.02982767f, -0.00383509f},
     81      {+0.01157993f, +0.12154542f, -0.02536082f, -0.00304815f},
     82      {+0.00994113f, +0.14989004f, -0.01585778f, -0.00173287f},
     83      {+0.00425496f, +0.16547118f, -0.00496888f, -0.00047749f}};
     84 
     85 // Downsamples |in| into |out|, taking one every |kNumbands| starting from
     86 // |offset|. |split_length| is the |out| length. |in| has to be at least
     87 // |kNumBands| * |split_length| long.
     88 void Downsample(const float* in,
     89                 size_t split_length,
     90                 size_t offset,
     91                 float* out) {
     92   for (size_t i = 0; i < split_length; ++i) {
     93     out[i] = in[kNumBands * i + offset];
     94   }
     95 }
     96 
     97 // Upsamples |in| into |out|, scaling by |kNumBands| and accumulating it every
     98 // |kNumBands| starting from |offset|. |split_length| is the |in| length. |out|
     99 // has to be at least |kNumBands| * |split_length| long.
    100 void Upsample(const float* in, size_t split_length, size_t offset, float* out) {
    101   for (size_t i = 0; i < split_length; ++i) {
    102     out[kNumBands * i + offset] += kNumBands * in[i];
    103   }
    104 }
    105 
    106 }  // namespace
    107 
    108 // Because the low-pass filter prototype has half bandwidth it is possible to
    109 // use a DCT to shift it in both directions at the same time, to the center
    110 // frequencies [1 / 12, 3 / 12, 5 / 12].
    111 ThreeBandFilterBank::ThreeBandFilterBank(size_t length)
    112     : in_buffer_(rtc::CheckedDivExact(length, kNumBands)),
    113       out_buffer_(in_buffer_.size()) {
    114   for (size_t i = 0; i < kSparsity; ++i) {
    115     for (size_t j = 0; j < kNumBands; ++j) {
    116       analysis_filters_.push_back(new SparseFIRFilter(
    117           kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i));
    118       synthesis_filters_.push_back(new SparseFIRFilter(
    119           kLowpassCoeffs[i * kNumBands + j], kNumCoeffs, kSparsity, i));
    120     }
    121   }
    122   dct_modulation_.resize(kNumBands * kSparsity);
    123   for (size_t i = 0; i < dct_modulation_.size(); ++i) {
    124     dct_modulation_[i].resize(kNumBands);
    125     for (size_t j = 0; j < kNumBands; ++j) {
    126       dct_modulation_[i][j] =
    127           2.f * cos(2.f * M_PI * i * (2.f * j + 1.f) / dct_modulation_.size());
    128     }
    129   }
    130 }
    131 
    132 // The analysis can be separated in these steps:
    133 //   1. Serial to parallel downsampling by a factor of |kNumBands|.
    134 //   2. Filtering of |kSparsity| different delayed signals with polyphase
    135 //      decomposition of the low-pass prototype filter and upsampled by a factor
    136 //      of |kSparsity|.
    137 //   3. Modulating with cosines and accumulating to get the desired band.
    138 void ThreeBandFilterBank::Analysis(const float* in,
    139                                    size_t length,
    140                                    float* const* out) {
    141   RTC_CHECK_EQ(in_buffer_.size(), rtc::CheckedDivExact(length, kNumBands));
    142   for (size_t i = 0; i < kNumBands; ++i) {
    143     memset(out[i], 0, in_buffer_.size() * sizeof(*out[i]));
    144   }
    145   for (size_t i = 0; i < kNumBands; ++i) {
    146     Downsample(in, in_buffer_.size(), kNumBands - i - 1, &in_buffer_[0]);
    147     for (size_t j = 0; j < kSparsity; ++j) {
    148       const size_t offset = i + j * kNumBands;
    149       analysis_filters_[offset]->Filter(&in_buffer_[0],
    150                                         in_buffer_.size(),
    151                                         &out_buffer_[0]);
    152       DownModulate(&out_buffer_[0], out_buffer_.size(), offset, out);
    153     }
    154   }
    155 }
    156 
    157 // The synthesis can be separated in these steps:
    158 //   1. Modulating with cosines.
    159 //   2. Filtering each one with a polyphase decomposition of the low-pass
    160 //      prototype filter upsampled by a factor of |kSparsity| and accumulating
    161 //      |kSparsity| signals with different delays.
    162 //   3. Parallel to serial upsampling by a factor of |kNumBands|.
    163 void ThreeBandFilterBank::Synthesis(const float* const* in,
    164                                     size_t split_length,
    165                                     float* out) {
    166   RTC_CHECK_EQ(in_buffer_.size(), split_length);
    167   memset(out, 0, kNumBands * in_buffer_.size() * sizeof(*out));
    168   for (size_t i = 0; i < kNumBands; ++i) {
    169     for (size_t j = 0; j < kSparsity; ++j) {
    170       const size_t offset = i + j * kNumBands;
    171       UpModulate(in, in_buffer_.size(), offset, &in_buffer_[0]);
    172       synthesis_filters_[offset]->Filter(&in_buffer_[0],
    173                                          in_buffer_.size(),
    174                                          &out_buffer_[0]);
    175       Upsample(&out_buffer_[0], out_buffer_.size(), i, out);
    176     }
    177   }
    178 }
    179 
    180 
    181 // Modulates |in| by |dct_modulation_| and accumulates it in each of the
    182 // |kNumBands| bands of |out|. |offset| is the index in the period of the
    183 // cosines used for modulation. |split_length| is the length of |in| and each
    184 // band of |out|.
    185 void ThreeBandFilterBank::DownModulate(const float* in,
    186                                        size_t split_length,
    187                                        size_t offset,
    188                                        float* const* out) {
    189   for (size_t i = 0; i < kNumBands; ++i) {
    190     for (size_t j = 0; j < split_length; ++j) {
    191       out[i][j] += dct_modulation_[offset][i] * in[j];
    192     }
    193   }
    194 }
    195 
    196 // Modulates each of the |kNumBands| bands of |in| by |dct_modulation_| and
    197 // accumulates them in |out|. |out| is cleared before starting to accumulate.
    198 // |offset| is the index in the period of the cosines used for modulation.
    199 // |split_length| is the length of each band of |in| and |out|.
    200 void ThreeBandFilterBank::UpModulate(const float* const* in,
    201                                      size_t split_length,
    202                                      size_t offset,
    203                                      float* out) {
    204   memset(out, 0, split_length * sizeof(*out));
    205   for (size_t i = 0; i < kNumBands; ++i) {
    206     for (size_t j = 0; j < split_length; ++j) {
    207       out[j] += dct_modulation_[offset][i] * in[i][j];
    208     }
    209   }
    210 }
    211 
    212 }  // namespace webrtc
    213