Home | History | Annotate | Download | only in libaudioprocessing
      1 /*
      2  * Copyright (C) 2013 The Android Open Source Project
      3  *
      4  * Licensed under the Apache License, Version 2.0 (the "License");
      5  * you may not use this file except in compliance with the License.
      6  * You may obtain a copy of the License at
      7  *
      8  *      http://www.apache.org/licenses/LICENSE-2.0
      9  *
     10  * Unless required by applicable law or agreed to in writing, software
     11  * distributed under the License is distributed on an "AS IS" BASIS,
     12  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     13  * See the License for the specific language governing permissions and
     14  * limitations under the License.
     15  */
     16 
     17 #ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
     18 #define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H
     19 
     20 #include "utils/Compat.h"
     21 
     22 namespace android {
     23 
     24 /*
     25  * generates a sine wave at equal steps.
     26  *
     27  * As most of our functions use sine or cosine at equal steps,
     28  * it is very efficient to compute them that way (single multiply and subtract),
     29  * rather than invoking the math library sin() or cos() each time.
     30  *
     31  * SineGen uses Goertzel's Algorithm (as a generator not a filter)
     32  * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep)
     33  * by stepping through 0, 1, ... n.
     34  *
     35  * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep)
     36  *
     37  * or looking at just the imaginary sine term, as the cosine follows identically:
     38  *
     39  * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep)
     40  *
     41  * Goertzel's algorithm is more efficient than the angle addition formula,
     42  * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to
     43  * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and
     44  * cosine generation due to the complex * complex multiply (full rotation).
     45  *
     46  * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
     47  *
     48  */
     49 
     50 class SineGen {
     51 public:
     52     SineGen(double wstart, double wstep, bool cosine = false) {
     53         if (cosine) {
     54             mCurrent = cos(wstart);
     55             mPrevious = cos(wstart - wstep);
     56         } else {
     57             mCurrent = sin(wstart);
     58             mPrevious = sin(wstart - wstep);
     59         }
     60         mTwoCos = 2.*cos(wstep);
     61     }
     62     SineGen(double expNow, double expPrev, double twoCosStep) {
     63         mCurrent = expNow;
     64         mPrevious = expPrev;
     65         mTwoCos = twoCosStep;
     66     }
     67     inline double value() const {
     68         return mCurrent;
     69     }
     70     inline void advance() {
     71         double tmp = mCurrent;
     72         mCurrent = mCurrent*mTwoCos - mPrevious;
     73         mPrevious = tmp;
     74     }
     75     inline double valueAdvance() {
     76         double tmp = mCurrent;
     77         mCurrent = mCurrent*mTwoCos - mPrevious;
     78         mPrevious = tmp;
     79         return tmp;
     80     }
     81 
     82 private:
     83     double mCurrent; // current value of sine/cosine
     84     double mPrevious; // previous value of sine/cosine
     85     double mTwoCos; // stepping factor
     86 };
     87 
     88 /*
     89  * generates a series of sine generators, phase offset by fixed steps.
     90  *
     91  * This is used to generate polyphase sine generators, one per polyphase
     92  * in the filter code below.
     93  *
     94  * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep;
     95  * increments by innerStep.
     96  *
     97  */
     98 
     99 class SineGenGen {
    100 public:
    101     SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false)
    102             : mSineInnerCur(outerStart, outerStep, cosine),
    103               mSineInnerPrev(outerStart-innerStep, outerStep, cosine)
    104     {
    105         mTwoCos = 2.*cos(innerStep);
    106     }
    107     inline SineGen value() {
    108         return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos);
    109     }
    110     inline void advance() {
    111         mSineInnerCur.advance();
    112         mSineInnerPrev.advance();
    113     }
    114     inline SineGen valueAdvance() {
    115         return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos);
    116     }
    117 
    118 private:
    119     SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep).
    120     SineGen mSineInnerPrev; // generate the inner sine previous values
    121                             // (behind by innerStep, stepped by outerStep).
    122     double mTwoCos; // the inner stepping factor for the returned SineGen.
    123 };
    124 
    125 static inline double sqr(double x) {
    126     return x * x;
    127 }
    128 
    129 /*
    130  * rounds a double to the nearest integer for FIR coefficients.
    131  *
    132  * One variant uses noise shaping, which must keep error history
    133  * to work (the err parameter, initialized to 0).
    134  * The other variant is a non-noise shaped version for
    135  * S32 coefficients (noise shaping doesn't gain much).
    136  *
    137  * Caution: No bounds saturation is applied, but isn't needed in this case.
    138  *
    139  * @param x is the value to round.
    140  *
    141  * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom).
    142  * Typically this may be the maximum positive integer+1 (using the fact that double precision
    143  * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition).
    144  *
    145  * @param err is the previous error (actual - rounded) for the previous rounding op.
    146  * For 16b coefficients this can improve stopband dB performance by up to 2dB.
    147  *
    148  * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping
    149  *
    150  */
    151 
    152 static inline int64_t toint(double x, int64_t maxval, double& err) {
    153     double val = x * maxval;
    154     double ival = floor(val + 0.5 + err*0.2);
    155     err = val - ival;
    156     return static_cast<int64_t>(ival);
    157 }
    158 
    159 static inline int64_t toint(double x, int64_t maxval) {
    160     return static_cast<int64_t>(floor(x * maxval + 0.5));
    161 }
    162 
    163 /*
    164  * Modified Bessel function of the first kind
    165  * http://en.wikipedia.org/wiki/Bessel_function
    166  *
    167  * The formulas are taken from Abramowitz and Stegun,
    168  * _Handbook of Mathematical Functions_ (links below):
    169  *
    170  * http://people.math.sfu.ca/~cbm/aands/page_375.htm
    171  * http://people.math.sfu.ca/~cbm/aands/page_378.htm
    172  *
    173  * http://dlmf.nist.gov/10.25
    174  * http://dlmf.nist.gov/10.40
    175  *
    176  * Note we assume x is nonnegative (the function is symmetric,
    177  * pass in the absolute value as needed).
    178  *
    179  * Constants are compile time derived with templates I0Term<> and
    180  * I0ATerm<> to the precision of the compiler.  The series can be expanded
    181  * to any precision needed, but currently set around 24b precision.
    182  *
    183  * We use a bit of template math here, constexpr would probably be
    184  * more appropriate for a C++11 compiler.
    185  *
    186  * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit.
    187  *
    188  */
    189 
    190 template <int N>
    191 struct I0Term {
    192     static const CONSTEXPR double value = I0Term<N-1>::value / (4. * N * N);
    193 };
    194 
    195 template <>
    196 struct I0Term<0> {
    197     static const CONSTEXPR double value = 1.;
    198 };
    199 
    200 template <int N>
    201 struct I0ATerm {
    202     static const CONSTEXPR double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N);
    203 };
    204 
    205 template <>
    206 struct I0ATerm<0> { // 1/sqrt(2*PI);
    207     static const CONSTEXPR double value =
    208             0.398942280401432677939946059934381868475858631164934657665925;
    209 };
    210 
    211 #if USE_HORNERS_METHOD
    212 /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
    213  * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method
    214  *
    215  * This has fewer multiplications than Estrin's method below, but has back to back
    216  * floating point dependencies.
    217  *
    218  * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled.
    219  */
    220 
    221 inline double Poly2(double A, double B, double x) {
    222     return A + x * B;
    223 }
    224 
    225 inline double Poly4(double A, double B, double C, double D, double x) {
    226     return A + x * (B + x * (C + x * (D)));
    227 }
    228 
    229 inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
    230         double x) {
    231     return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G))))));
    232 }
    233 
    234 inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
    235         double H, double I, double x) {
    236     return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I))))))));
    237 }
    238 
    239 #else
    240 /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ...
    241  * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme
    242  *
    243  * This is typically faster, perhaps gains about 5-10% overall on ARM processors
    244  * over Horner's method above.
    245  */
    246 
    247 inline double Poly2(double A, double B, double x) {
    248     return A + B * x;
    249 }
    250 
    251 inline double Poly3(double A, double B, double C, double x, double x2) {
    252     return Poly2(A, B, x) + C * x2;
    253 }
    254 
    255 inline double Poly3(double A, double B, double C, double x) {
    256     return Poly2(A, B, x) + C * x * x;
    257 }
    258 
    259 inline double Poly4(double A, double B, double C, double D, double x, double x2) {
    260     return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2);
    261 }
    262 
    263 inline double Poly4(double A, double B, double C, double D, double x) {
    264     return Poly4(A, B, C, D, x, x * x);
    265 }
    266 
    267 inline double Poly7(double A, double B, double C, double D, double E, double F, double G,
    268         double x) {
    269     double x2 = x * x;
    270     return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2);
    271 }
    272 
    273 inline double Poly8(double A, double B, double C, double D, double E, double F, double G,
    274         double H, double x, double x2, double x4) {
    275     return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4;
    276 }
    277 
    278 inline double Poly9(double A, double B, double C, double D, double E, double F, double G,
    279         double H, double I, double x) {
    280     double x2 = x * x;
    281 #if 1
    282     // It does not seem faster to explicitly decompose Poly8 into Poly4, but
    283     // could depend on compiler floating point scheduling.
    284     double x4 = x2 * x2;
    285     return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4);
    286 #else
    287     double val = Poly4(A, B, C, D, x, x2);
    288     double x4 = x2 * x2;
    289     return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4);
    290 #endif
    291 }
    292 #endif
    293 
    294 static inline double I0(double x) {
    295     if (x < 3.75) {
    296         x *= x;
    297         return Poly7(I0Term<0>::value, I0Term<1>::value,
    298                 I0Term<2>::value, I0Term<3>::value,
    299                 I0Term<4>::value, I0Term<5>::value,
    300                 I0Term<6>::value, x); // e < 1.6e-7
    301     }
    302     if (1) {
    303         /*
    304          * Series expansion coefs are easy to calculate, but are expanded around 0,
    305          * so error is unequal over the interval 0 < x < 3.75, the error being
    306          * significantly better near 0.
    307          *
    308          * A better solution is to use precise minimax polynomial fits.
    309          *
    310          * We use a slightly more complicated solution for 3.75 < x < 15, based on
    311          * the tables in Blair and Edwards, "Stable Rational Minimax Approximations
    312          * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory,
    313          * AECL-4928.
    314          *
    315          * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf
    316          *
    317          * See Table 11 for 0 < x < 15; e < 10^(-7.13).
    318          *
    319          * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b).
    320          *
    321          * This speeds up overall computation by about 40% over using the else clause below,
    322          * which requires sqrt and exp.
    323          *
    324          */
    325 
    326         x *= x;
    327         double num = Poly9(-0.13544938430e9, -0.33153754512e8,
    328                 -0.19406631946e7, -0.48058318783e5,
    329                 -0.63269783360e3, -0.49520779070e1,
    330                 -0.24970910370e-1, -0.74741159550e-4,
    331                 -0.18257612460e-6, x);
    332         double y = x - 225.; // reflection around 15 (squared)
    333         double den = Poly4(-0.34598737196e8, 0.23852643181e6,
    334                 -0.70699387620e3, 0.10000000000e1, y);
    335         return num / den;
    336 
    337 #if IO_EXTENDED_BETA
    338         /* Table 42 for x > 15; e < 10^(-8.11).
    339          * This is used for Beta>15, but is disabled here as
    340          * we never use Beta that high.
    341          *
    342          * NOTE: This should be enabled only for x > 15.
    343          */
    344 
    345         double y = 1./x;
    346         double z = y - (1./15);
    347         double num = Poly2(0.415079861746e1, -0.5149092496e1, z);
    348         double den = Poly3(0.103150763823e2, -0.14181687413e2,
    349                 0.1000000000e1, z);
    350         return exp(x) * sqrt(y) * num / den;
    351 #endif
    352     } else {
    353         /*
    354          * NOT USED, but reference for large Beta.
    355          *
    356          * Abramowitz and Stegun asymptotic formula.
    357          * works for x > 3.75.
    358          */
    359         double y = 1./x;
    360         return exp(x) * sqrt(y) *
    361                 // note: reciprocal squareroot may be easier!
    362                 // http://en.wikipedia.org/wiki/Fast_inverse_square_root
    363                 Poly9(I0ATerm<0>::value, I0ATerm<1>::value,
    364                         I0ATerm<2>::value, I0ATerm<3>::value,
    365                         I0ATerm<4>::value, I0ATerm<5>::value,
    366                         I0ATerm<6>::value, I0ATerm<7>::value,
    367                         I0ATerm<8>::value, y); // (... e) < 1.9e-7
    368     }
    369 }
    370 
    371 /* A speed optimized version of the Modified Bessel I0() which incorporates
    372  * the sqrt and numerator multiply and denominator divide into the computation.
    373  * This speeds up filter computation by about 10-15%.
    374  */
    375 static inline double I0SqrRat(double x2, double num, double den) {
    376     if (x2 < (3.75 * 3.75)) {
    377         return Poly7(I0Term<0>::value, I0Term<1>::value,
    378                 I0Term<2>::value, I0Term<3>::value,
    379                 I0Term<4>::value, I0Term<5>::value,
    380                 I0Term<6>::value, x2) * num / den; // e < 1.6e-7
    381     }
    382     num *= Poly9(-0.13544938430e9, -0.33153754512e8,
    383             -0.19406631946e7, -0.48058318783e5,
    384             -0.63269783360e3, -0.49520779070e1,
    385             -0.24970910370e-1, -0.74741159550e-4,
    386             -0.18257612460e-6, x2); // e < 10^(-7.13).
    387     double y = x2 - 225.; // reflection around 15 (squared)
    388     den *= Poly4(-0.34598737196e8, 0.23852643181e6,
    389             -0.70699387620e3, 0.10000000000e1, y);
    390     return num / den;
    391 }
    392 
    393 /*
    394  * calculates the transition bandwidth for a Kaiser filter
    395  *
    396  * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
    397  * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
    398  *
    399  * @param halfNumCoef is half the number of coefficients per filter phase.
    400  *
    401  * @param stopBandAtten is the stop band attenuation desired.
    402  *
    403  * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5)
    404  */
    405 static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) {
    406     return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef);
    407 }
    408 
    409 /*
    410  * calculates the fir transfer response of the overall polyphase filter at w.
    411  *
    412  * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the
    413  * fact that h[n] is symmetric (cosines only, no complex arithmetic).
    414  *
    415  * We use Goertzel's algorithm to accelerate the computation to essentially
    416  * a single multiply and 2 adds per filter coefficient h[].
    417  *
    418  * Be careful be careful to consider that h[n] is the overall polyphase filter,
    419  * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain",
    420  * as you only use one of the polyphases at a time.
    421  */
    422 template <typename T>
    423 static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) {
    424     double accum = static_cast<double>(coef[0])*0.5;  // "center coefficient" from first bank
    425     coef += halfNumCoef;    // skip first filterbank (picked up by the last filterbank).
    426 #if SLOW_FIRTRANSFER
    427     /* Original code for reference.  This is equivalent to the code below, but slower. */
    428     for (int i=1 ; i<=L ; ++i) {
    429         for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
    430             accum += cos(ix*w)*static_cast<double>(*coef++);
    431         }
    432     }
    433 #else
    434     /*
    435      * Our overall filter is stored striped by polyphases, not a contiguous h[n].
    436      * We could fetch coefficients in a non-contiguous fashion
    437      * but that will not scale to vector processing.
    438      *
    439      * We apply Goertzel's algorithm directly to each polyphase filter bank instead of
    440      * using cosine generation/multiplication, thereby saving one multiply per inner loop.
    441      *
    442      * See: http://en.wikipedia.org/wiki/Goertzel_algorithm
    443      * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720.
    444      *
    445      * We use the basic recursion to incorporate the cosine steps into real sequence x[n]:
    446      * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2]
    447      *
    448      * y[n] = s[n] - e^(iw)s[n-1]
    449      *      = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k))
    450      *      = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk)
    451      *
    452      * The summation contains the frequency steps we want multiplied by the source
    453      * (similar to a DTFT).
    454      *
    455      * Using symmetry, and just the real part (be careful, this must happen
    456      * after any internal complex multiplications), the polyphase filterbank
    457      * transfer function is:
    458      *
    459      * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0)
    460      *                = Re{ e^(iwn + iw_0) y[n]}
    461      *                = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1]
    462      *
    463      * using the fact that s[n] of real x[n] is real.
    464      *
    465      */
    466     double dcos = 2. * cos(L*w);
    467     int start = ((halfNumCoef)*L + 1);
    468     SineGen cc((start - L) * w, w, true); // cosine
    469     SineGen cp(start * w, w, true); // cosine
    470     for (int i=1 ; i<=L ; ++i) {
    471         double sc = 0;
    472         double sp = 0;
    473         for (int j=0 ; j<halfNumCoef ; ++j) {
    474             double tmp = sc;
    475             sc  = static_cast<double>(*coef++) + dcos*sc - sp;
    476             sp = tmp;
    477         }
    478         // If we are awfully clever, we can apply Goertzel's algorithm
    479         // again on the sc and sp sequences returned here.
    480         accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp;
    481     }
    482 #endif
    483     return accum*2.;
    484 }
    485 
    486 /*
    487  * evaluates the minimum and maximum |H(f)| bound in a band region.
    488  *
    489  * This is usually done with equally spaced increments in the target band in question.
    490  * The passband is often very small, and sampled that way. The stopband is often much
    491  * larger.
    492  *
    493  * We use the fact that the overall polyphase filter has an additional bank at the end
    494  * for interpolation; hence it is overspecified for the H(f) computation.  Thus the
    495  * first polyphase is never actually checked, excepting its first term.
    496  *
    497  * In this code we use the firTransfer() evaluator above, which uses Goertzel's
    498  * algorithm to calculate the transfer function at each point.
    499  *
    500  * TODO: An alternative with equal spacing is the FFT/DFT.  An alternative with unequal
    501  * spacing is a chirp transform.
    502  *
    503  * @param coef is the designed polyphase filter banks
    504  *
    505  * @param L is the number of phases (for interpolation)
    506  *
    507  * @param halfNumCoef should be half the number of coefficients for a single
    508  * polyphase.
    509  *
    510  * @param fstart is the normalized frequency start.
    511  *
    512  * @param fend is the normalized frequency end.
    513  *
    514  * @param steps is the number of steps to take (sampling) between frequency start and end
    515  *
    516  * @param firMin returns the minimum transfer |H(f)| found
    517  *
    518  * @param firMax returns the maximum transfer |H(f)| found
    519  *
    520  * 0 <= f <= 0.5.
    521  * This is used to test passband and stopband performance.
    522  */
    523 template <typename T>
    524 static void testFir(const T* coef, int L, int halfNumCoef,
    525         double fstart, double fend, int steps, double &firMin, double &firMax) {
    526     double wstart = fstart*(2.*M_PI);
    527     double wend = fend*(2.*M_PI);
    528     double wstep = (wend - wstart)/steps;
    529     double fmax, fmin;
    530     double trf = firTransfer(coef, L, halfNumCoef, wstart);
    531     if (trf<0) {
    532         trf = -trf;
    533     }
    534     fmin = fmax = trf;
    535     wstart += wstep;
    536     for (int i=1; i<steps; ++i) {
    537         trf = firTransfer(coef, L, halfNumCoef, wstart);
    538         if (trf<0) {
    539             trf = -trf;
    540         }
    541         if (trf>fmax) {
    542             fmax = trf;
    543         }
    544         else if (trf<fmin) {
    545             fmin = trf;
    546         }
    547         wstart += wstep;
    548     }
    549     // renormalize - this is only needed for integer filter types
    550     double norm = 1./((1ULL<<(sizeof(T)*8-1))*L);
    551 
    552     firMin = fmin * norm;
    553     firMax = fmax * norm;
    554 }
    555 
    556 /*
    557  * evaluates the |H(f)| lowpass band characteristics.
    558  *
    559  * This function tests the lowpass characteristics for the overall polyphase filter,
    560  * and is used to verify the design.  For this case, fp should be set to the
    561  * passband normalized frequency from 0 to 0.5 for the overall filter (thus it
    562  * is the designed polyphase bank value / L).  Likewise for fs.
    563  *
    564  * @param coef is the designed polyphase filter banks
    565  *
    566  * @param L is the number of phases (for interpolation)
    567  *
    568  * @param halfNumCoef should be half the number of coefficients for a single
    569  * polyphase.
    570  *
    571  * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5.
    572  *
    573  * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5.
    574  *
    575  * @param passSteps is the number of passband sampling steps.
    576  *
    577  * @param stopSteps is the number of stopband sampling steps.
    578  *
    579  * @param passMin is the minimum value in the passband
    580  *
    581  * @param passMax is the maximum value in the passband (useful for scaling).  This should
    582  * be less than 1., to avoid sine wave test overflow.
    583  *
    584  * @param passRipple is the passband ripple.  Typically this should be less than 0.1 for
    585  * an audio filter.  Generally speaker/headphone device characteristics will dominate
    586  * the passband term.
    587  *
    588  * @param stopMax is the maximum value in the stopband.
    589  *
    590  * @param stopRipple is the stopband ripple, also known as stopband attenuation.
    591  * Typically this should be greater than ~80dB for low quality, and greater than
    592  * ~100dB for full 16b quality, otherwise aliasing may become noticeable.
    593  *
    594  */
    595 template <typename T>
    596 static void testFir(const T* coef, int L, int halfNumCoef,
    597         double fp, double fs, int passSteps, int stopSteps,
    598         double &passMin, double &passMax, double &passRipple,
    599         double &stopMax, double &stopRipple) {
    600     double fmin, fmax;
    601     testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax);
    602     double d1 = (fmax - fmin)/2.;
    603     passMin = fmin;
    604     passMax = fmax;
    605     passRipple = -20.*log10(1. - d1); // passband ripple
    606     testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax);
    607     // fmin is really not important for the stopband.
    608     stopMax = fmax;
    609     stopRipple = -20.*log10(fmax); // stopband ripple/attenuation
    610 }
    611 
    612 /*
    613  * Calculates the overall polyphase filter based on a windowed sinc function.
    614  *
    615  * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1
    616  * taps for the entire kernel.  This is then decomposed into L+1 polyphase filterbanks.
    617  * The last filterbank is used for interpolation purposes (and is mostly composed
    618  * of the first bank shifted by one sample), and is unnecessary if one does
    619  * not do interpolation.
    620  *
    621  * We use the last filterbank for some transfer function calculation purposes,
    622  * so it needs to be generated anyways.
    623  *
    624  * @param coef is the caller allocated space for coefficients.  This should be
    625  * exactly (L+1)*halfNumCoef in size.
    626  *
    627  * @param L is the number of phases (for interpolation)
    628  *
    629  * @param halfNumCoef should be half the number of coefficients for a single
    630  * polyphase.
    631  *
    632  * @param stopBandAtten is the stopband value, should be >50dB.
    633  *
    634  * @param fcr is cutoff frequency/sampling rate (<0.5).  At this point, the energy
    635  * should be 6dB less. (fcr is where the amplitude drops by half).  Use the
    636  * firKaiserTbw() to calculate the transition bandwidth.  fcr is the midpoint
    637  * between the stop band and the pass band (fstop+fpass)/2.
    638  *
    639  * @param atten is the attenuation (generally slightly less than 1).
    640  */
    641 
    642 template <typename T>
    643 static inline void firKaiserGen(T* coef, int L, int halfNumCoef,
    644         double stopBandAtten, double fcr, double atten) {
    645     //
    646     // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48
    647     // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542
    648     //
    649     // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf
    650     //
    651     // Kaiser window and beta parameter
    652     //
    653     //         | 0.1102*(A - 8.7)                         A > 50
    654     //  beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21)   21 <= A <= 50
    655     //         | 0.                                       A < 21
    656     //
    657     // with A is the desired stop-band attenuation in dBFS
    658     //
    659     //    30 dB    2.210
    660     //    40 dB    3.384
    661     //    50 dB    4.538
    662     //    60 dB    5.658
    663     //    70 dB    6.764
    664     //    80 dB    7.865
    665     //    90 dB    8.960
    666     //   100 dB   10.056
    667 
    668     const int N = L * halfNumCoef; // non-negative half
    669     const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always
    670     const double xstep = (2. * M_PI) * fcr / L;
    671     const double xfrac = 1. / N;
    672     const double yscale = atten * L / (I0(beta) * M_PI);
    673     const double sqrbeta = sqr(beta);
    674 
    675     // We use sine generators, which computes sines on regular step intervals.
    676     // This speeds up overall computation about 40% from computing the sine directly.
    677 
    678     SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase)
    679 
    680     for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation
    681 
    682         // computation for a single polyphase of the overall filter.
    683         SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop.
    684         double err = 0; // for noise shaping on int16_t coefficients (over each polyphase)
    685 
    686         for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) {
    687             double y;
    688             if (CC_LIKELY(ix)) {
    689                 double x = static_cast<double>(ix);
    690 
    691                 // sine generator: sg.valueAdvance() returns sin(ix*xstep);
    692                 // y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x;
    693                 y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x);
    694             } else {
    695                 y = 2. * atten * fcr; // center of filter, sinc(0) = 1.
    696                 sg.advance();
    697             }
    698 
    699             if (is_same<T, int16_t>::value) { // int16_t needs noise shaping
    700                 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err));
    701             } else if (is_same<T, int32_t>::value) {
    702                 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1)));
    703             } else { // assumed float or double
    704                 *coef++ = static_cast<T>(y);
    705             }
    706         }
    707     }
    708 }
    709 
    710 } // namespace android
    711 
    712 #endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/
    713