Home | History | Annotate | Download | only in source
      1 /*
      2  * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
      3  * Copyright Takuya OOURA, 1996-2001
      4  *
      5  * You may use, copy, modify and distribute this code for any purpose (include
      6  * commercial use) and without fee. Please refer to this package when you modify
      7  * this code.
      8  *
      9  * Changes by the WebRTC authors:
     10  *    - Trivial type modifications.
     11  *    - Minimal code subset to do rdft of length 128.
     12  *    - Optimizations because of known length.
     13  *
     14  *  All changes are covered by the WebRTC license and IP grant:
     15  *  Use of this source code is governed by a BSD-style license
     16  *  that can be found in the LICENSE file in the root of the source
     17  *  tree. An additional intellectual property rights grant can be found
     18  *  in the file PATENTS.  All contributing project authors may
     19  *  be found in the AUTHORS file in the root of the source tree.
     20  */
     21 
     22 #include <math.h>
     23 
     24 #include "aec_rdft.h"
     25 #include "system_wrappers/interface/cpu_features_wrapper.h"
     26 
     27 float rdft_w[64];
     28 static int ip[16];
     29 
     30 static void bitrv2_32or128(int n, int *ip, float *a) {
     31   // n is 32 or 128
     32   int j, j1, k, k1, m, m2;
     33   float xr, xi, yr, yi;
     34 
     35   ip[0] = 0;
     36   {
     37     int l = n;
     38     m = 1;
     39     while ((m << 3) < l) {
     40       l >>= 1;
     41       for (j = 0; j < m; j++) {
     42         ip[m + j] = ip[j] + l;
     43       }
     44       m <<= 1;
     45     }
     46   }
     47   m2 = 2 * m;
     48   for (k = 0; k < m; k++) {
     49     for (j = 0; j < k; j++) {
     50       j1 = 2 * j + ip[k];
     51       k1 = 2 * k + ip[j];
     52       xr = a[j1];
     53       xi = a[j1 + 1];
     54       yr = a[k1];
     55       yi = a[k1 + 1];
     56       a[j1] = yr;
     57       a[j1 + 1] = yi;
     58       a[k1] = xr;
     59       a[k1 + 1] = xi;
     60       j1 += m2;
     61       k1 += 2 * m2;
     62       xr = a[j1];
     63       xi = a[j1 + 1];
     64       yr = a[k1];
     65       yi = a[k1 + 1];
     66       a[j1] = yr;
     67       a[j1 + 1] = yi;
     68       a[k1] = xr;
     69       a[k1 + 1] = xi;
     70       j1 += m2;
     71       k1 -= m2;
     72       xr = a[j1];
     73       xi = a[j1 + 1];
     74       yr = a[k1];
     75       yi = a[k1 + 1];
     76       a[j1] = yr;
     77       a[j1 + 1] = yi;
     78       a[k1] = xr;
     79       a[k1 + 1] = xi;
     80       j1 += m2;
     81       k1 += 2 * m2;
     82       xr = a[j1];
     83       xi = a[j1 + 1];
     84       yr = a[k1];
     85       yi = a[k1 + 1];
     86       a[j1] = yr;
     87       a[j1 + 1] = yi;
     88       a[k1] = xr;
     89       a[k1 + 1] = xi;
     90     }
     91     j1 = 2 * k + m2 + ip[k];
     92     k1 = j1 + m2;
     93     xr = a[j1];
     94     xi = a[j1 + 1];
     95     yr = a[k1];
     96     yi = a[k1 + 1];
     97     a[j1] = yr;
     98     a[j1 + 1] = yi;
     99     a[k1] = xr;
    100     a[k1 + 1] = xi;
    101   }
    102 }
    103 
    104 static void makewt_32() {
    105   const int nw = 32;
    106   int j, nwh;
    107   float delta, x, y;
    108 
    109   ip[0] = nw;
    110   ip[1] = 1;
    111   nwh = nw >> 1;
    112   delta = atanf(1.0f) / nwh;
    113   rdft_w[0] = 1;
    114   rdft_w[1] = 0;
    115   rdft_w[nwh] = cosf(delta * nwh);
    116   rdft_w[nwh + 1] = rdft_w[nwh];
    117   for (j = 2; j < nwh; j += 2) {
    118     x = cosf(delta * j);
    119     y = sinf(delta * j);
    120     rdft_w[j] = x;
    121     rdft_w[j + 1] = y;
    122     rdft_w[nw - j] = y;
    123     rdft_w[nw - j + 1] = x;
    124   }
    125   bitrv2_32or128(nw, ip + 2, rdft_w);
    126 }
    127 
    128 static void makect_32() {
    129   float *c = rdft_w + 32;
    130   const int nc = 32;
    131   int j, nch;
    132   float delta;
    133 
    134   ip[1] = nc;
    135   nch = nc >> 1;
    136   delta = atanf(1.0f) / nch;
    137   c[0] = cosf(delta * nch);
    138   c[nch] = 0.5f * c[0];
    139   for (j = 1; j < nch; j++) {
    140     c[j] = 0.5f * cosf(delta * j);
    141     c[nc - j] = 0.5f * sinf(delta * j);
    142   }
    143 }
    144 
    145 static void cft1st_128(float *a) {
    146   const int n = 128;
    147   int j, k1, k2;
    148   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
    149   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    150 
    151   x0r = a[0] + a[2];
    152   x0i = a[1] + a[3];
    153   x1r = a[0] - a[2];
    154   x1i = a[1] - a[3];
    155   x2r = a[4] + a[6];
    156   x2i = a[5] + a[7];
    157   x3r = a[4] - a[6];
    158   x3i = a[5] - a[7];
    159   a[0] = x0r + x2r;
    160   a[1] = x0i + x2i;
    161   a[4] = x0r - x2r;
    162   a[5] = x0i - x2i;
    163   a[2] = x1r - x3i;
    164   a[3] = x1i + x3r;
    165   a[6] = x1r + x3i;
    166   a[7] = x1i - x3r;
    167   wk1r = rdft_w[2];
    168   x0r = a[8] + a[10];
    169   x0i = a[9] + a[11];
    170   x1r = a[8] - a[10];
    171   x1i = a[9] - a[11];
    172   x2r = a[12] + a[14];
    173   x2i = a[13] + a[15];
    174   x3r = a[12] - a[14];
    175   x3i = a[13] - a[15];
    176   a[8] = x0r + x2r;
    177   a[9] = x0i + x2i;
    178   a[12] = x2i - x0i;
    179   a[13] = x0r - x2r;
    180   x0r = x1r - x3i;
    181   x0i = x1i + x3r;
    182   a[10] = wk1r * (x0r - x0i);
    183   a[11] = wk1r * (x0r + x0i);
    184   x0r = x3i + x1r;
    185   x0i = x3r - x1i;
    186   a[14] = wk1r * (x0i - x0r);
    187   a[15] = wk1r * (x0i + x0r);
    188   k1 = 0;
    189   for (j = 16; j < n; j += 16) {
    190     k1 += 2;
    191     k2 = 2 * k1;
    192     wk2r = rdft_w[k1];
    193     wk2i = rdft_w[k1 + 1];
    194     wk1r = rdft_w[k2];
    195     wk1i = rdft_w[k2 + 1];
    196     wk3r = wk1r - 2 * wk2i * wk1i;
    197     wk3i = 2 * wk2i * wk1r - wk1i;
    198     x0r = a[j] + a[j + 2];
    199     x0i = a[j + 1] + a[j + 3];
    200     x1r = a[j] - a[j + 2];
    201     x1i = a[j + 1] - a[j + 3];
    202     x2r = a[j + 4] + a[j + 6];
    203     x2i = a[j + 5] + a[j + 7];
    204     x3r = a[j + 4] - a[j + 6];
    205     x3i = a[j + 5] - a[j + 7];
    206     a[j] = x0r + x2r;
    207     a[j + 1] = x0i + x2i;
    208     x0r -= x2r;
    209     x0i -= x2i;
    210     a[j + 4] = wk2r * x0r - wk2i * x0i;
    211     a[j + 5] = wk2r * x0i + wk2i * x0r;
    212     x0r = x1r - x3i;
    213     x0i = x1i + x3r;
    214     a[j + 2] = wk1r * x0r - wk1i * x0i;
    215     a[j + 3] = wk1r * x0i + wk1i * x0r;
    216     x0r = x1r + x3i;
    217     x0i = x1i - x3r;
    218     a[j + 6] = wk3r * x0r - wk3i * x0i;
    219     a[j + 7] = wk3r * x0i + wk3i * x0r;
    220     wk1r = rdft_w[k2 + 2];
    221     wk1i = rdft_w[k2 + 3];
    222     wk3r = wk1r - 2 * wk2r * wk1i;
    223     wk3i = 2 * wk2r * wk1r - wk1i;
    224     x0r = a[j + 8] + a[j + 10];
    225     x0i = a[j + 9] + a[j + 11];
    226     x1r = a[j + 8] - a[j + 10];
    227     x1i = a[j + 9] - a[j + 11];
    228     x2r = a[j + 12] + a[j + 14];
    229     x2i = a[j + 13] + a[j + 15];
    230     x3r = a[j + 12] - a[j + 14];
    231     x3i = a[j + 13] - a[j + 15];
    232     a[j + 8] = x0r + x2r;
    233     a[j + 9] = x0i + x2i;
    234     x0r -= x2r;
    235     x0i -= x2i;
    236     a[j + 12] = -wk2i * x0r - wk2r * x0i;
    237     a[j + 13] = -wk2i * x0i + wk2r * x0r;
    238     x0r = x1r - x3i;
    239     x0i = x1i + x3r;
    240     a[j + 10] = wk1r * x0r - wk1i * x0i;
    241     a[j + 11] = wk1r * x0i + wk1i * x0r;
    242     x0r = x1r + x3i;
    243     x0i = x1i - x3r;
    244     a[j + 14] = wk3r * x0r - wk3i * x0i;
    245     a[j + 15] = wk3r * x0i + wk3i * x0r;
    246   }
    247 }
    248 
    249 static void cftmdl_128(int l, float *a) {
    250   const int n = 128;
    251   int j, j1, j2, j3, k, k1, k2, m, m2;
    252   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
    253   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    254 
    255   m = l << 2;
    256   for (j = 0; j < l; j += 2) {
    257     j1 = j + l;
    258     j2 = j1 + l;
    259     j3 = j2 + l;
    260     x0r = a[j] + a[j1];
    261     x0i = a[j + 1] + a[j1 + 1];
    262     x1r = a[j] - a[j1];
    263     x1i = a[j + 1] - a[j1 + 1];
    264     x2r = a[j2] + a[j3];
    265     x2i = a[j2 + 1] + a[j3 + 1];
    266     x3r = a[j2] - a[j3];
    267     x3i = a[j2 + 1] - a[j3 + 1];
    268     a[j] = x0r + x2r;
    269     a[j + 1] = x0i + x2i;
    270     a[j2] = x0r - x2r;
    271     a[j2 + 1] = x0i - x2i;
    272     a[j1] = x1r - x3i;
    273     a[j1 + 1] = x1i + x3r;
    274     a[j3] = x1r + x3i;
    275     a[j3 + 1] = x1i - x3r;
    276   }
    277   wk1r = rdft_w[2];
    278   for (j = m; j < l + m; j += 2) {
    279     j1 = j + l;
    280     j2 = j1 + l;
    281     j3 = j2 + l;
    282     x0r = a[j] + a[j1];
    283     x0i = a[j + 1] + a[j1 + 1];
    284     x1r = a[j] - a[j1];
    285     x1i = a[j + 1] - a[j1 + 1];
    286     x2r = a[j2] + a[j3];
    287     x2i = a[j2 + 1] + a[j3 + 1];
    288     x3r = a[j2] - a[j3];
    289     x3i = a[j2 + 1] - a[j3 + 1];
    290     a[j] = x0r + x2r;
    291     a[j + 1] = x0i + x2i;
    292     a[j2] = x2i - x0i;
    293     a[j2 + 1] = x0r - x2r;
    294     x0r = x1r - x3i;
    295     x0i = x1i + x3r;
    296     a[j1] = wk1r * (x0r - x0i);
    297     a[j1 + 1] = wk1r * (x0r + x0i);
    298     x0r = x3i + x1r;
    299     x0i = x3r - x1i;
    300     a[j3] = wk1r * (x0i - x0r);
    301     a[j3 + 1] = wk1r * (x0i + x0r);
    302   }
    303   k1 = 0;
    304   m2 = 2 * m;
    305   for (k = m2; k < n; k += m2) {
    306     k1 += 2;
    307     k2 = 2 * k1;
    308     wk2r = rdft_w[k1];
    309     wk2i = rdft_w[k1 + 1];
    310     wk1r = rdft_w[k2];
    311     wk1i = rdft_w[k2 + 1];
    312     wk3r = wk1r - 2 * wk2i * wk1i;
    313     wk3i = 2 * wk2i * wk1r - wk1i;
    314     for (j = k; j < l + k; j += 2) {
    315       j1 = j + l;
    316       j2 = j1 + l;
    317       j3 = j2 + l;
    318       x0r = a[j] + a[j1];
    319       x0i = a[j + 1] + a[j1 + 1];
    320       x1r = a[j] - a[j1];
    321       x1i = a[j + 1] - a[j1 + 1];
    322       x2r = a[j2] + a[j3];
    323       x2i = a[j2 + 1] + a[j3 + 1];
    324       x3r = a[j2] - a[j3];
    325       x3i = a[j2 + 1] - a[j3 + 1];
    326       a[j] = x0r + x2r;
    327       a[j + 1] = x0i + x2i;
    328       x0r -= x2r;
    329       x0i -= x2i;
    330       a[j2] = wk2r * x0r - wk2i * x0i;
    331       a[j2 + 1] = wk2r * x0i + wk2i * x0r;
    332       x0r = x1r - x3i;
    333       x0i = x1i + x3r;
    334       a[j1] = wk1r * x0r - wk1i * x0i;
    335       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
    336       x0r = x1r + x3i;
    337       x0i = x1i - x3r;
    338       a[j3] = wk3r * x0r - wk3i * x0i;
    339       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
    340     }
    341     wk1r = rdft_w[k2 + 2];
    342     wk1i = rdft_w[k2 + 3];
    343     wk3r = wk1r - 2 * wk2r * wk1i;
    344     wk3i = 2 * wk2r * wk1r - wk1i;
    345     for (j = k + m; j < l + (k + m); j += 2) {
    346       j1 = j + l;
    347       j2 = j1 + l;
    348       j3 = j2 + l;
    349       x0r = a[j] + a[j1];
    350       x0i = a[j + 1] + a[j1 + 1];
    351       x1r = a[j] - a[j1];
    352       x1i = a[j + 1] - a[j1 + 1];
    353       x2r = a[j2] + a[j3];
    354       x2i = a[j2 + 1] + a[j3 + 1];
    355       x3r = a[j2] - a[j3];
    356       x3i = a[j2 + 1] - a[j3 + 1];
    357       a[j] = x0r + x2r;
    358       a[j + 1] = x0i + x2i;
    359       x0r -= x2r;
    360       x0i -= x2i;
    361       a[j2] = -wk2i * x0r - wk2r * x0i;
    362       a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
    363       x0r = x1r - x3i;
    364       x0i = x1i + x3r;
    365       a[j1] = wk1r * x0r - wk1i * x0i;
    366       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
    367       x0r = x1r + x3i;
    368       x0i = x1i - x3r;
    369       a[j3] = wk3r * x0r - wk3i * x0i;
    370       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
    371     }
    372   }
    373 }
    374 
    375 static void cftfsub_128(float *a) {
    376   int j, j1, j2, j3, l;
    377   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    378 
    379   cft1st_128(a);
    380   cftmdl_128(8, a);
    381   l = 32;
    382   for (j = 0; j < l; j += 2) {
    383     j1 = j + l;
    384     j2 = j1 + l;
    385     j3 = j2 + l;
    386     x0r = a[j] + a[j1];
    387     x0i = a[j + 1] + a[j1 + 1];
    388     x1r = a[j] - a[j1];
    389     x1i = a[j + 1] - a[j1 + 1];
    390     x2r = a[j2] + a[j3];
    391     x2i = a[j2 + 1] + a[j3 + 1];
    392     x3r = a[j2] - a[j3];
    393     x3i = a[j2 + 1] - a[j3 + 1];
    394     a[j] = x0r + x2r;
    395     a[j + 1] = x0i + x2i;
    396     a[j2] = x0r - x2r;
    397     a[j2 + 1] = x0i - x2i;
    398     a[j1] = x1r - x3i;
    399     a[j1 + 1] = x1i + x3r;
    400     a[j3] = x1r + x3i;
    401     a[j3 + 1] = x1i - x3r;
    402   }
    403 }
    404 
    405 static void cftbsub_128(float *a) {
    406   int j, j1, j2, j3, l;
    407   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    408 
    409   cft1st_128(a);
    410   cftmdl_128(8, a);
    411   l = 32;
    412 
    413   for (j = 0; j < l; j += 2) {
    414     j1 = j + l;
    415     j2 = j1 + l;
    416     j3 = j2 + l;
    417     x0r = a[j] + a[j1];
    418     x0i = -a[j + 1] - a[j1 + 1];
    419     x1r = a[j] - a[j1];
    420     x1i = -a[j + 1] + a[j1 + 1];
    421     x2r = a[j2] + a[j3];
    422     x2i = a[j2 + 1] + a[j3 + 1];
    423     x3r = a[j2] - a[j3];
    424     x3i = a[j2 + 1] - a[j3 + 1];
    425     a[j] = x0r + x2r;
    426     a[j + 1] = x0i - x2i;
    427     a[j2] = x0r - x2r;
    428     a[j2 + 1] = x0i + x2i;
    429     a[j1] = x1r - x3i;
    430     a[j1 + 1] = x1i - x3r;
    431     a[j3] = x1r + x3i;
    432     a[j3 + 1] = x1i + x3r;
    433   }
    434 }
    435 
    436 static void rftfsub_128_C(float *a) {
    437   const float *c = rdft_w + 32;
    438   int j1, j2, k1, k2;
    439   float wkr, wki, xr, xi, yr, yi;
    440 
    441   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
    442     k2 = 128 - j2;
    443     k1 =  32 - j1;
    444     wkr = 0.5f - c[k1];
    445     wki = c[j1];
    446     xr = a[j2 + 0] - a[k2 + 0];
    447     xi = a[j2 + 1] + a[k2 + 1];
    448     yr = wkr * xr - wki * xi;
    449     yi = wkr * xi + wki * xr;
    450     a[j2 + 0] -= yr;
    451     a[j2 + 1] -= yi;
    452     a[k2 + 0] += yr;
    453     a[k2 + 1] -= yi;
    454   }
    455 }
    456 
    457 static void rftbsub_128_C(float *a) {
    458   const float *c = rdft_w + 32;
    459   int j1, j2, k1, k2;
    460   float wkr, wki, xr, xi, yr, yi;
    461 
    462   a[1] = -a[1];
    463   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
    464     k2 = 128 - j2;
    465     k1 =  32 - j1;
    466     wkr = 0.5f - c[k1];
    467     wki = c[j1];
    468     xr = a[j2 + 0] - a[k2 + 0];
    469     xi = a[j2 + 1] + a[k2 + 1];
    470     yr = wkr * xr + wki * xi;
    471     yi = wkr * xi - wki * xr;
    472     a[j2 + 0] = a[j2 + 0] - yr;
    473     a[j2 + 1] = yi - a[j2 + 1];
    474     a[k2 + 0] = yr + a[k2 + 0];
    475     a[k2 + 1] = yi - a[k2 + 1];
    476   }
    477   a[65] = -a[65];
    478 }
    479 
    480 void aec_rdft_forward_128(float *a) {
    481   const int n = 128;
    482   int nw;
    483   float xi;
    484 
    485   nw = ip[0];
    486   bitrv2_32or128(n, ip + 2, a);
    487   cftfsub_128(a);
    488   rftfsub_128(a);
    489   xi = a[0] - a[1];
    490   a[0] += a[1];
    491   a[1] = xi;
    492 }
    493 
    494 void aec_rdft_inverse_128(float *a) {
    495   const int n = 128;
    496   int nw;
    497   float xi;
    498 
    499   nw = ip[0];
    500   a[1] = 0.5f * (a[0] - a[1]);
    501   a[0] -= a[1];
    502   rftbsub_128(a);
    503   bitrv2_32or128(n, ip + 2, a);
    504   cftbsub_128(a);
    505 }
    506 
    507 // code path selection
    508 rft_sub_128_t rftfsub_128;
    509 rft_sub_128_t rftbsub_128;
    510 
    511 void aec_rdft_init(void) {
    512   rftfsub_128 = rftfsub_128_C;
    513   rftbsub_128 = rftbsub_128_C;
    514   if (WebRtc_GetCPUInfo(kSSE2)) {
    515 #if defined(__SSE2__)
    516     aec_rdft_init_sse2();
    517 #endif
    518   }
    519   // init library constants.
    520   makewt_32();
    521   makect_32();
    522 }
    523