1 /* 2 * Copyright (C) 2013 The Android Open Source Project 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 #ifndef ANDROID_AUDIO_RESAMPLER_FIR_GEN_H 18 #define ANDROID_AUDIO_RESAMPLER_FIR_GEN_H 19 20 #include "utils/Compat.h" 21 22 namespace android { 23 24 /* 25 * generates a sine wave at equal steps. 26 * 27 * As most of our functions use sine or cosine at equal steps, 28 * it is very efficient to compute them that way (single multiply and subtract), 29 * rather than invoking the math library sin() or cos() each time. 30 * 31 * SineGen uses Goertzel's Algorithm (as a generator not a filter) 32 * to calculate sine(wstart + n * wstep) or cosine(wstart + n * wstep) 33 * by stepping through 0, 1, ... n. 34 * 35 * e^i(wstart+wstep) = 2cos(wstep) * e^i(wstart) - e^i(wstart-wstep) 36 * 37 * or looking at just the imaginary sine term, as the cosine follows identically: 38 * 39 * sin(wstart+wstep) = 2cos(wstep) * sin(wstart) - sin(wstart-wstep) 40 * 41 * Goertzel's algorithm is more efficient than the angle addition formula, 42 * e^i(wstart+wstep) = e^i(wstart) * e^i(wstep), which takes up to 43 * 4 multiplies and 2 adds (or 3* and 3+) and requires both sine and 44 * cosine generation due to the complex * complex multiply (full rotation). 45 * 46 * See: http://en.wikipedia.org/wiki/Goertzel_algorithm 47 * 48 */ 49 50 class SineGen { 51 public: 52 SineGen(double wstart, double wstep, bool cosine = false) { 53 if (cosine) { 54 mCurrent = cos(wstart); 55 mPrevious = cos(wstart - wstep); 56 } else { 57 mCurrent = sin(wstart); 58 mPrevious = sin(wstart - wstep); 59 } 60 mTwoCos = 2.*cos(wstep); 61 } 62 SineGen(double expNow, double expPrev, double twoCosStep) { 63 mCurrent = expNow; 64 mPrevious = expPrev; 65 mTwoCos = twoCosStep; 66 } 67 inline double value() const { 68 return mCurrent; 69 } 70 inline void advance() { 71 double tmp = mCurrent; 72 mCurrent = mCurrent*mTwoCos - mPrevious; 73 mPrevious = tmp; 74 } 75 inline double valueAdvance() { 76 double tmp = mCurrent; 77 mCurrent = mCurrent*mTwoCos - mPrevious; 78 mPrevious = tmp; 79 return tmp; 80 } 81 82 private: 83 double mCurrent; // current value of sine/cosine 84 double mPrevious; // previous value of sine/cosine 85 double mTwoCos; // stepping factor 86 }; 87 88 /* 89 * generates a series of sine generators, phase offset by fixed steps. 90 * 91 * This is used to generate polyphase sine generators, one per polyphase 92 * in the filter code below. 93 * 94 * The SineGen returned by value() starts at innerStart = outerStart + n*outerStep; 95 * increments by innerStep. 96 * 97 */ 98 99 class SineGenGen { 100 public: 101 SineGenGen(double outerStart, double outerStep, double innerStep, bool cosine = false) 102 : mSineInnerCur(outerStart, outerStep, cosine), 103 mSineInnerPrev(outerStart-innerStep, outerStep, cosine) 104 { 105 mTwoCos = 2.*cos(innerStep); 106 } 107 inline SineGen value() { 108 return SineGen(mSineInnerCur.value(), mSineInnerPrev.value(), mTwoCos); 109 } 110 inline void advance() { 111 mSineInnerCur.advance(); 112 mSineInnerPrev.advance(); 113 } 114 inline SineGen valueAdvance() { 115 return SineGen(mSineInnerCur.valueAdvance(), mSineInnerPrev.valueAdvance(), mTwoCos); 116 } 117 118 private: 119 SineGen mSineInnerCur; // generate the inner sine values (stepped by outerStep). 120 SineGen mSineInnerPrev; // generate the inner sine previous values 121 // (behind by innerStep, stepped by outerStep). 122 double mTwoCos; // the inner stepping factor for the returned SineGen. 123 }; 124 125 static inline double sqr(double x) { 126 return x * x; 127 } 128 129 /* 130 * rounds a double to the nearest integer for FIR coefficients. 131 * 132 * One variant uses noise shaping, which must keep error history 133 * to work (the err parameter, initialized to 0). 134 * The other variant is a non-noise shaped version for 135 * S32 coefficients (noise shaping doesn't gain much). 136 * 137 * Caution: No bounds saturation is applied, but isn't needed in this case. 138 * 139 * @param x is the value to round. 140 * 141 * @param maxval is the maximum integer scale factor expressed as an int64 (for headroom). 142 * Typically this may be the maximum positive integer+1 (using the fact that double precision 143 * FIR coefficients generated here are never that close to 1.0 to pose an overflow condition). 144 * 145 * @param err is the previous error (actual - rounded) for the previous rounding op. 146 * For 16b coefficients this can improve stopband dB performance by up to 2dB. 147 * 148 * Many variants exist for the noise shaping: http://en.wikipedia.org/wiki/Noise_shaping 149 * 150 */ 151 152 static inline int64_t toint(double x, int64_t maxval, double& err) { 153 double val = x * maxval; 154 double ival = floor(val + 0.5 + err*0.2); 155 err = val - ival; 156 return static_cast<int64_t>(ival); 157 } 158 159 static inline int64_t toint(double x, int64_t maxval) { 160 return static_cast<int64_t>(floor(x * maxval + 0.5)); 161 } 162 163 /* 164 * Modified Bessel function of the first kind 165 * http://en.wikipedia.org/wiki/Bessel_function 166 * 167 * The formulas are taken from Abramowitz and Stegun, 168 * _Handbook of Mathematical Functions_ (links below): 169 * 170 * http://people.math.sfu.ca/~cbm/aands/page_375.htm 171 * http://people.math.sfu.ca/~cbm/aands/page_378.htm 172 * 173 * http://dlmf.nist.gov/10.25 174 * http://dlmf.nist.gov/10.40 175 * 176 * Note we assume x is nonnegative (the function is symmetric, 177 * pass in the absolute value as needed). 178 * 179 * Constants are compile time derived with templates I0Term<> and 180 * I0ATerm<> to the precision of the compiler. The series can be expanded 181 * to any precision needed, but currently set around 24b precision. 182 * 183 * We use a bit of template math here, constexpr would probably be 184 * more appropriate for a C++11 compiler. 185 * 186 * For the intermediate range 3.75 < x < 15, we use minimax polynomial fit. 187 * 188 */ 189 190 template <int N> 191 struct I0Term { 192 static const CONSTEXPR double value = I0Term<N-1>::value / (4. * N * N); 193 }; 194 195 template <> 196 struct I0Term<0> { 197 static const CONSTEXPR double value = 1.; 198 }; 199 200 template <int N> 201 struct I0ATerm { 202 static const CONSTEXPR double value = I0ATerm<N-1>::value * (2.*N-1.) * (2.*N-1.) / (8. * N); 203 }; 204 205 template <> 206 struct I0ATerm<0> { // 1/sqrt(2*PI); 207 static const CONSTEXPR double value = 208 0.398942280401432677939946059934381868475858631164934657665925; 209 }; 210 211 #if USE_HORNERS_METHOD 212 /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... 213 * using Horner's Method: http://en.wikipedia.org/wiki/Horner's_method 214 * 215 * This has fewer multiplications than Estrin's method below, but has back to back 216 * floating point dependencies. 217 * 218 * On ARM this appears to work slower, so USE_HORNERS_METHOD is not default enabled. 219 */ 220 221 inline double Poly2(double A, double B, double x) { 222 return A + x * B; 223 } 224 225 inline double Poly4(double A, double B, double C, double D, double x) { 226 return A + x * (B + x * (C + x * (D))); 227 } 228 229 inline double Poly7(double A, double B, double C, double D, double E, double F, double G, 230 double x) { 231 return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G)))))); 232 } 233 234 inline double Poly9(double A, double B, double C, double D, double E, double F, double G, 235 double H, double I, double x) { 236 return A + x * (B + x * (C + x * (D + x * (E + x * (F + x * (G + x * (H + x * (I)))))))); 237 } 238 239 #else 240 /* Polynomial evaluation of A + Bx + Cx^2 + Dx^3 + ... 241 * using Estrin's Method: http://en.wikipedia.org/wiki/Estrin's_scheme 242 * 243 * This is typically faster, perhaps gains about 5-10% overall on ARM processors 244 * over Horner's method above. 245 */ 246 247 inline double Poly2(double A, double B, double x) { 248 return A + B * x; 249 } 250 251 inline double Poly3(double A, double B, double C, double x, double x2) { 252 return Poly2(A, B, x) + C * x2; 253 } 254 255 inline double Poly3(double A, double B, double C, double x) { 256 return Poly2(A, B, x) + C * x * x; 257 } 258 259 inline double Poly4(double A, double B, double C, double D, double x, double x2) { 260 return Poly2(A, B, x) + Poly2(C, D, x) * x2; // same as poly2(poly2, poly2, x2); 261 } 262 263 inline double Poly4(double A, double B, double C, double D, double x) { 264 return Poly4(A, B, C, D, x, x * x); 265 } 266 267 inline double Poly7(double A, double B, double C, double D, double E, double F, double G, 268 double x) { 269 double x2 = x * x; 270 return Poly4(A, B, C, D, x, x2) + Poly3(E, F, G, x, x2) * (x2 * x2); 271 } 272 273 inline double Poly8(double A, double B, double C, double D, double E, double F, double G, 274 double H, double x, double x2, double x4) { 275 return Poly4(A, B, C, D, x, x2) + Poly4(E, F, G, H, x, x2) * x4; 276 } 277 278 inline double Poly9(double A, double B, double C, double D, double E, double F, double G, 279 double H, double I, double x) { 280 double x2 = x * x; 281 #if 1 282 // It does not seem faster to explicitly decompose Poly8 into Poly4, but 283 // could depend on compiler floating point scheduling. 284 double x4 = x2 * x2; 285 return Poly8(A, B, C, D, E, F, G, H, x, x2, x4) + I * (x4 * x4); 286 #else 287 double val = Poly4(A, B, C, D, x, x2); 288 double x4 = x2 * x2; 289 return val + Poly4(E, F, G, H, x, x2) * x4 + I * (x4 * x4); 290 #endif 291 } 292 #endif 293 294 static inline double I0(double x) { 295 if (x < 3.75) { 296 x *= x; 297 return Poly7(I0Term<0>::value, I0Term<1>::value, 298 I0Term<2>::value, I0Term<3>::value, 299 I0Term<4>::value, I0Term<5>::value, 300 I0Term<6>::value, x); // e < 1.6e-7 301 } 302 if (1) { 303 /* 304 * Series expansion coefs are easy to calculate, but are expanded around 0, 305 * so error is unequal over the interval 0 < x < 3.75, the error being 306 * significantly better near 0. 307 * 308 * A better solution is to use precise minimax polynomial fits. 309 * 310 * We use a slightly more complicated solution for 3.75 < x < 15, based on 311 * the tables in Blair and Edwards, "Stable Rational Minimax Approximations 312 * to the Modified Bessel Functions I0(x) and I1(x)", Chalk Hill Nuclear Laboratory, 313 * AECL-4928. 314 * 315 * http://www.iaea.org/inis/collection/NCLCollectionStore/_Public/06/178/6178667.pdf 316 * 317 * See Table 11 for 0 < x < 15; e < 10^(-7.13). 318 * 319 * Note: Beta cannot exceed 15 (hence Stopband cannot exceed 144dB = 24b). 320 * 321 * This speeds up overall computation by about 40% over using the else clause below, 322 * which requires sqrt and exp. 323 * 324 */ 325 326 x *= x; 327 double num = Poly9(-0.13544938430e9, -0.33153754512e8, 328 -0.19406631946e7, -0.48058318783e5, 329 -0.63269783360e3, -0.49520779070e1, 330 -0.24970910370e-1, -0.74741159550e-4, 331 -0.18257612460e-6, x); 332 double y = x - 225.; // reflection around 15 (squared) 333 double den = Poly4(-0.34598737196e8, 0.23852643181e6, 334 -0.70699387620e3, 0.10000000000e1, y); 335 return num / den; 336 337 #if IO_EXTENDED_BETA 338 /* Table 42 for x > 15; e < 10^(-8.11). 339 * This is used for Beta>15, but is disabled here as 340 * we never use Beta that high. 341 * 342 * NOTE: This should be enabled only for x > 15. 343 */ 344 345 double y = 1./x; 346 double z = y - (1./15); 347 double num = Poly2(0.415079861746e1, -0.5149092496e1, z); 348 double den = Poly3(0.103150763823e2, -0.14181687413e2, 349 0.1000000000e1, z); 350 return exp(x) * sqrt(y) * num / den; 351 #endif 352 } else { 353 /* 354 * NOT USED, but reference for large Beta. 355 * 356 * Abramowitz and Stegun asymptotic formula. 357 * works for x > 3.75. 358 */ 359 double y = 1./x; 360 return exp(x) * sqrt(y) * 361 // note: reciprocal squareroot may be easier! 362 // http://en.wikipedia.org/wiki/Fast_inverse_square_root 363 Poly9(I0ATerm<0>::value, I0ATerm<1>::value, 364 I0ATerm<2>::value, I0ATerm<3>::value, 365 I0ATerm<4>::value, I0ATerm<5>::value, 366 I0ATerm<6>::value, I0ATerm<7>::value, 367 I0ATerm<8>::value, y); // (... e) < 1.9e-7 368 } 369 } 370 371 /* A speed optimized version of the Modified Bessel I0() which incorporates 372 * the sqrt and numerator multiply and denominator divide into the computation. 373 * This speeds up filter computation by about 10-15%. 374 */ 375 static inline double I0SqrRat(double x2, double num, double den) { 376 if (x2 < (3.75 * 3.75)) { 377 return Poly7(I0Term<0>::value, I0Term<1>::value, 378 I0Term<2>::value, I0Term<3>::value, 379 I0Term<4>::value, I0Term<5>::value, 380 I0Term<6>::value, x2) * num / den; // e < 1.6e-7 381 } 382 num *= Poly9(-0.13544938430e9, -0.33153754512e8, 383 -0.19406631946e7, -0.48058318783e5, 384 -0.63269783360e3, -0.49520779070e1, 385 -0.24970910370e-1, -0.74741159550e-4, 386 -0.18257612460e-6, x2); // e < 10^(-7.13). 387 double y = x2 - 225.; // reflection around 15 (squared) 388 den *= Poly4(-0.34598737196e8, 0.23852643181e6, 389 -0.70699387620e3, 0.10000000000e1, y); 390 return num / den; 391 } 392 393 /* 394 * calculates the transition bandwidth for a Kaiser filter 395 * 396 * Formula 3.2.8, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 397 * Formula 7.76, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 398 * 399 * @param halfNumCoef is half the number of coefficients per filter phase. 400 * 401 * @param stopBandAtten is the stop band attenuation desired. 402 * 403 * @return the transition bandwidth in normalized frequency (0 <= f <= 0.5) 404 */ 405 static inline double firKaiserTbw(int halfNumCoef, double stopBandAtten) { 406 return (stopBandAtten - 7.95)/((2.*14.36)*halfNumCoef); 407 } 408 409 /* 410 * calculates the fir transfer response of the overall polyphase filter at w. 411 * 412 * Calculates the DTFT transfer coefficient H(w) for 0 <= w <= PI, utilizing the 413 * fact that h[n] is symmetric (cosines only, no complex arithmetic). 414 * 415 * We use Goertzel's algorithm to accelerate the computation to essentially 416 * a single multiply and 2 adds per filter coefficient h[]. 417 * 418 * Be careful be careful to consider that h[n] is the overall polyphase filter, 419 * with L phases, so rescaling H(w)/L is probably what you expect for "unity gain", 420 * as you only use one of the polyphases at a time. 421 */ 422 template <typename T> 423 static inline double firTransfer(const T* coef, int L, int halfNumCoef, double w) { 424 double accum = static_cast<double>(coef[0])*0.5; // "center coefficient" from first bank 425 coef += halfNumCoef; // skip first filterbank (picked up by the last filterbank). 426 #if SLOW_FIRTRANSFER 427 /* Original code for reference. This is equivalent to the code below, but slower. */ 428 for (int i=1 ; i<=L ; ++i) { 429 for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { 430 accum += cos(ix*w)*static_cast<double>(*coef++); 431 } 432 } 433 #else 434 /* 435 * Our overall filter is stored striped by polyphases, not a contiguous h[n]. 436 * We could fetch coefficients in a non-contiguous fashion 437 * but that will not scale to vector processing. 438 * 439 * We apply Goertzel's algorithm directly to each polyphase filter bank instead of 440 * using cosine generation/multiplication, thereby saving one multiply per inner loop. 441 * 442 * See: http://en.wikipedia.org/wiki/Goertzel_algorithm 443 * Also: Oppenheim and Schafer, _Discrete Time Signal Processing, 3e_, p. 720. 444 * 445 * We use the basic recursion to incorporate the cosine steps into real sequence x[n]: 446 * s[n] = x[n] + (2cosw)*s[n-1] + s[n-2] 447 * 448 * y[n] = s[n] - e^(iw)s[n-1] 449 * = sum_{k=-\infty}^{n} x[k]e^(-iw(n-k)) 450 * = e^(-iwn) sum_{k=0}^{n} x[k]e^(iwk) 451 * 452 * The summation contains the frequency steps we want multiplied by the source 453 * (similar to a DTFT). 454 * 455 * Using symmetry, and just the real part (be careful, this must happen 456 * after any internal complex multiplications), the polyphase filterbank 457 * transfer function is: 458 * 459 * Hpp[n, w, w_0] = sum_{k=0}^{n} x[k] * cos(wk + w_0) 460 * = Re{ e^(iwn + iw_0) y[n]} 461 * = cos(wn+w_0) * s[n] - cos(w(n+1)+w_0) * s[n-1] 462 * 463 * using the fact that s[n] of real x[n] is real. 464 * 465 */ 466 double dcos = 2. * cos(L*w); 467 int start = ((halfNumCoef)*L + 1); 468 SineGen cc((start - L) * w, w, true); // cosine 469 SineGen cp(start * w, w, true); // cosine 470 for (int i=1 ; i<=L ; ++i) { 471 double sc = 0; 472 double sp = 0; 473 for (int j=0 ; j<halfNumCoef ; ++j) { 474 double tmp = sc; 475 sc = static_cast<double>(*coef++) + dcos*sc - sp; 476 sp = tmp; 477 } 478 // If we are awfully clever, we can apply Goertzel's algorithm 479 // again on the sc and sp sequences returned here. 480 accum += cc.valueAdvance() * sc - cp.valueAdvance() * sp; 481 } 482 #endif 483 return accum*2.; 484 } 485 486 /* 487 * evaluates the minimum and maximum |H(f)| bound in a band region. 488 * 489 * This is usually done with equally spaced increments in the target band in question. 490 * The passband is often very small, and sampled that way. The stopband is often much 491 * larger. 492 * 493 * We use the fact that the overall polyphase filter has an additional bank at the end 494 * for interpolation; hence it is overspecified for the H(f) computation. Thus the 495 * first polyphase is never actually checked, excepting its first term. 496 * 497 * In this code we use the firTransfer() evaluator above, which uses Goertzel's 498 * algorithm to calculate the transfer function at each point. 499 * 500 * TODO: An alternative with equal spacing is the FFT/DFT. An alternative with unequal 501 * spacing is a chirp transform. 502 * 503 * @param coef is the designed polyphase filter banks 504 * 505 * @param L is the number of phases (for interpolation) 506 * 507 * @param halfNumCoef should be half the number of coefficients for a single 508 * polyphase. 509 * 510 * @param fstart is the normalized frequency start. 511 * 512 * @param fend is the normalized frequency end. 513 * 514 * @param steps is the number of steps to take (sampling) between frequency start and end 515 * 516 * @param firMin returns the minimum transfer |H(f)| found 517 * 518 * @param firMax returns the maximum transfer |H(f)| found 519 * 520 * 0 <= f <= 0.5. 521 * This is used to test passband and stopband performance. 522 */ 523 template <typename T> 524 static void testFir(const T* coef, int L, int halfNumCoef, 525 double fstart, double fend, int steps, double &firMin, double &firMax) { 526 double wstart = fstart*(2.*M_PI); 527 double wend = fend*(2.*M_PI); 528 double wstep = (wend - wstart)/steps; 529 double fmax, fmin; 530 double trf = firTransfer(coef, L, halfNumCoef, wstart); 531 if (trf<0) { 532 trf = -trf; 533 } 534 fmin = fmax = trf; 535 wstart += wstep; 536 for (int i=1; i<steps; ++i) { 537 trf = firTransfer(coef, L, halfNumCoef, wstart); 538 if (trf<0) { 539 trf = -trf; 540 } 541 if (trf>fmax) { 542 fmax = trf; 543 } 544 else if (trf<fmin) { 545 fmin = trf; 546 } 547 wstart += wstep; 548 } 549 // renormalize - this is only needed for integer filter types 550 double norm = 1./((1ULL<<(sizeof(T)*8-1))*L); 551 552 firMin = fmin * norm; 553 firMax = fmax * norm; 554 } 555 556 /* 557 * evaluates the |H(f)| lowpass band characteristics. 558 * 559 * This function tests the lowpass characteristics for the overall polyphase filter, 560 * and is used to verify the design. For this case, fp should be set to the 561 * passband normalized frequency from 0 to 0.5 for the overall filter (thus it 562 * is the designed polyphase bank value / L). Likewise for fs. 563 * 564 * @param coef is the designed polyphase filter banks 565 * 566 * @param L is the number of phases (for interpolation) 567 * 568 * @param halfNumCoef should be half the number of coefficients for a single 569 * polyphase. 570 * 571 * @param fp is the passband normalized frequency, 0 < fp < fs < 0.5. 572 * 573 * @param fs is the stopband normalized frequency, 0 < fp < fs < 0.5. 574 * 575 * @param passSteps is the number of passband sampling steps. 576 * 577 * @param stopSteps is the number of stopband sampling steps. 578 * 579 * @param passMin is the minimum value in the passband 580 * 581 * @param passMax is the maximum value in the passband (useful for scaling). This should 582 * be less than 1., to avoid sine wave test overflow. 583 * 584 * @param passRipple is the passband ripple. Typically this should be less than 0.1 for 585 * an audio filter. Generally speaker/headphone device characteristics will dominate 586 * the passband term. 587 * 588 * @param stopMax is the maximum value in the stopband. 589 * 590 * @param stopRipple is the stopband ripple, also known as stopband attenuation. 591 * Typically this should be greater than ~80dB for low quality, and greater than 592 * ~100dB for full 16b quality, otherwise aliasing may become noticeable. 593 * 594 */ 595 template <typename T> 596 static void testFir(const T* coef, int L, int halfNumCoef, 597 double fp, double fs, int passSteps, int stopSteps, 598 double &passMin, double &passMax, double &passRipple, 599 double &stopMax, double &stopRipple) { 600 double fmin, fmax; 601 testFir(coef, L, halfNumCoef, 0., fp, passSteps, fmin, fmax); 602 double d1 = (fmax - fmin)/2.; 603 passMin = fmin; 604 passMax = fmax; 605 passRipple = -20.*log10(1. - d1); // passband ripple 606 testFir(coef, L, halfNumCoef, fs, 0.5, stopSteps, fmin, fmax); 607 // fmin is really not important for the stopband. 608 stopMax = fmax; 609 stopRipple = -20.*log10(fmax); // stopband ripple/attenuation 610 } 611 612 /* 613 * Calculates the overall polyphase filter based on a windowed sinc function. 614 * 615 * The windowed sinc is an odd length symmetric filter of exactly L*halfNumCoef*2+1 616 * taps for the entire kernel. This is then decomposed into L+1 polyphase filterbanks. 617 * The last filterbank is used for interpolation purposes (and is mostly composed 618 * of the first bank shifted by one sample), and is unnecessary if one does 619 * not do interpolation. 620 * 621 * We use the last filterbank for some transfer function calculation purposes, 622 * so it needs to be generated anyways. 623 * 624 * @param coef is the caller allocated space for coefficients. This should be 625 * exactly (L+1)*halfNumCoef in size. 626 * 627 * @param L is the number of phases (for interpolation) 628 * 629 * @param halfNumCoef should be half the number of coefficients for a single 630 * polyphase. 631 * 632 * @param stopBandAtten is the stopband value, should be >50dB. 633 * 634 * @param fcr is cutoff frequency/sampling rate (<0.5). At this point, the energy 635 * should be 6dB less. (fcr is where the amplitude drops by half). Use the 636 * firKaiserTbw() to calculate the transition bandwidth. fcr is the midpoint 637 * between the stop band and the pass band (fstop+fpass)/2. 638 * 639 * @param atten is the attenuation (generally slightly less than 1). 640 */ 641 642 template <typename T> 643 static inline void firKaiserGen(T* coef, int L, int halfNumCoef, 644 double stopBandAtten, double fcr, double atten) { 645 // 646 // Formula 3.2.5, 3.2.7, Vaidyanathan, _Multirate Systems and Filter Banks_, p. 48 647 // Formula 7.75, Oppenheim and Schafer, _Discrete-time Signal Processing, 3e_, p. 542 648 // 649 // See also: http://melodi.ee.washington.edu/courses/ee518/notes/lec17.pdf 650 // 651 // Kaiser window and beta parameter 652 // 653 // | 0.1102*(A - 8.7) A > 50 654 // beta = | 0.5842*(A - 21)^0.4 + 0.07886*(A - 21) 21 <= A <= 50 655 // | 0. A < 21 656 // 657 // with A is the desired stop-band attenuation in dBFS 658 // 659 // 30 dB 2.210 660 // 40 dB 3.384 661 // 50 dB 4.538 662 // 60 dB 5.658 663 // 70 dB 6.764 664 // 80 dB 7.865 665 // 90 dB 8.960 666 // 100 dB 10.056 667 668 const int N = L * halfNumCoef; // non-negative half 669 const double beta = 0.1102 * (stopBandAtten - 8.7); // >= 50dB always 670 const double xstep = (2. * M_PI) * fcr / L; 671 const double xfrac = 1. / N; 672 const double yscale = atten * L / (I0(beta) * M_PI); 673 const double sqrbeta = sqr(beta); 674 675 // We use sine generators, which computes sines on regular step intervals. 676 // This speeds up overall computation about 40% from computing the sine directly. 677 678 SineGenGen sgg(0., xstep, L*xstep); // generates sine generators (one per polyphase) 679 680 for (int i=0 ; i<=L ; ++i) { // generate an extra set of coefs for interpolation 681 682 // computation for a single polyphase of the overall filter. 683 SineGen sg = sgg.valueAdvance(); // current sine generator for "j" inner loop. 684 double err = 0; // for noise shaping on int16_t coefficients (over each polyphase) 685 686 for (int j=0, ix=i ; j<halfNumCoef ; ++j, ix+=L) { 687 double y; 688 if (CC_LIKELY(ix)) { 689 double x = static_cast<double>(ix); 690 691 // sine generator: sg.valueAdvance() returns sin(ix*xstep); 692 // y = I0(beta * sqrt(1.0 - sqr(x * xfrac))) * yscale * sg.valueAdvance() / x; 693 y = I0SqrRat(sqrbeta * (1.0 - sqr(x * xfrac)), yscale * sg.valueAdvance(), x); 694 } else { 695 y = 2. * atten * fcr; // center of filter, sinc(0) = 1. 696 sg.advance(); 697 } 698 699 if (is_same<T, int16_t>::value) { // int16_t needs noise shaping 700 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1), err)); 701 } else if (is_same<T, int32_t>::value) { 702 *coef++ = static_cast<T>(toint(y, 1ULL<<(sizeof(T)*8-1))); 703 } else { // assumed float or double 704 *coef++ = static_cast<T>(y); 705 } 706 } 707 } 708 } 709 710 } // namespace android 711 712 #endif /*ANDROID_AUDIO_RESAMPLER_FIR_GEN_H*/ 713