Home | History | Annotate | Download | only in aec
      1 /*
      2  * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html
      3  * Copyright Takuya OOURA, 1996-2001
      4  *
      5  * You may use, copy, modify and distribute this code for any purpose (include
      6  * commercial use) and without fee. Please refer to this package when you modify
      7  * this code.
      8  *
      9  * Changes by the WebRTC authors:
     10  *    - Trivial type modifications.
     11  *    - Minimal code subset to do rdft of length 128.
     12  *    - Optimizations because of known length.
     13  *
     14  *  All changes are covered by the WebRTC license and IP grant:
     15  *  Use of this source code is governed by a BSD-style license
     16  *  that can be found in the LICENSE file in the root of the source
     17  *  tree. An additional intellectual property rights grant can be found
     18  *  in the file PATENTS.  All contributing project authors may
     19  *  be found in the AUTHORS file in the root of the source tree.
     20  */
     21 
     22 #include "aec_rdft.h"
     23 
     24 #include <math.h>
     25 
     26 #include "system_wrappers/interface/cpu_features_wrapper.h"
     27 #include "typedefs.h"
     28 
     29 // constants shared by all paths (C, SSE2).
     30 float rdft_w[64];
     31 // constants used by the C path.
     32 float rdft_wk3ri_first[32];
     33 float rdft_wk3ri_second[32];
     34 // constants used by SSE2 but initialized in C path.
     35 ALIGN16_BEG float ALIGN16_END rdft_wk1r[32];
     36 ALIGN16_BEG float ALIGN16_END rdft_wk2r[32];
     37 ALIGN16_BEG float ALIGN16_END rdft_wk3r[32];
     38 ALIGN16_BEG float ALIGN16_END rdft_wk1i[32];
     39 ALIGN16_BEG float ALIGN16_END rdft_wk2i[32];
     40 ALIGN16_BEG float ALIGN16_END rdft_wk3i[32];
     41 ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4];
     42 
     43 static int ip[16];
     44 
     45 static void bitrv2_32or128(int n, int *ip, float *a) {
     46   // n is 32 or 128
     47   int j, j1, k, k1, m, m2;
     48   float xr, xi, yr, yi;
     49 
     50   ip[0] = 0;
     51   {
     52     int l = n;
     53     m = 1;
     54     while ((m << 3) < l) {
     55       l >>= 1;
     56       for (j = 0; j < m; j++) {
     57         ip[m + j] = ip[j] + l;
     58       }
     59       m <<= 1;
     60     }
     61   }
     62   m2 = 2 * m;
     63   for (k = 0; k < m; k++) {
     64     for (j = 0; j < k; j++) {
     65       j1 = 2 * j + ip[k];
     66       k1 = 2 * k + ip[j];
     67       xr = a[j1];
     68       xi = a[j1 + 1];
     69       yr = a[k1];
     70       yi = a[k1 + 1];
     71       a[j1] = yr;
     72       a[j1 + 1] = yi;
     73       a[k1] = xr;
     74       a[k1 + 1] = xi;
     75       j1 += m2;
     76       k1 += 2 * m2;
     77       xr = a[j1];
     78       xi = a[j1 + 1];
     79       yr = a[k1];
     80       yi = a[k1 + 1];
     81       a[j1] = yr;
     82       a[j1 + 1] = yi;
     83       a[k1] = xr;
     84       a[k1 + 1] = xi;
     85       j1 += m2;
     86       k1 -= m2;
     87       xr = a[j1];
     88       xi = a[j1 + 1];
     89       yr = a[k1];
     90       yi = a[k1 + 1];
     91       a[j1] = yr;
     92       a[j1 + 1] = yi;
     93       a[k1] = xr;
     94       a[k1 + 1] = xi;
     95       j1 += m2;
     96       k1 += 2 * m2;
     97       xr = a[j1];
     98       xi = a[j1 + 1];
     99       yr = a[k1];
    100       yi = a[k1 + 1];
    101       a[j1] = yr;
    102       a[j1 + 1] = yi;
    103       a[k1] = xr;
    104       a[k1 + 1] = xi;
    105     }
    106     j1 = 2 * k + m2 + ip[k];
    107     k1 = j1 + m2;
    108     xr = a[j1];
    109     xi = a[j1 + 1];
    110     yr = a[k1];
    111     yi = a[k1 + 1];
    112     a[j1] = yr;
    113     a[j1 + 1] = yi;
    114     a[k1] = xr;
    115     a[k1 + 1] = xi;
    116   }
    117 }
    118 
    119 static void makewt_32(void) {
    120   const int nw = 32;
    121   int j, nwh;
    122   float delta, x, y;
    123 
    124   ip[0] = nw;
    125   ip[1] = 1;
    126   nwh = nw >> 1;
    127   delta = atanf(1.0f) / nwh;
    128   rdft_w[0] = 1;
    129   rdft_w[1] = 0;
    130   rdft_w[nwh] = cosf(delta * nwh);
    131   rdft_w[nwh + 1] = rdft_w[nwh];
    132   for (j = 2; j < nwh; j += 2) {
    133     x = cosf(delta * j);
    134     y = sinf(delta * j);
    135     rdft_w[j] = x;
    136     rdft_w[j + 1] = y;
    137     rdft_w[nw - j] = y;
    138     rdft_w[nw - j + 1] = x;
    139   }
    140   bitrv2_32or128(nw, ip + 2, rdft_w);
    141 
    142   // pre-calculate constants used by cft1st_128 and cftmdl_128...
    143   cftmdl_wk1r[0] = rdft_w[2];
    144   cftmdl_wk1r[1] = rdft_w[2];
    145   cftmdl_wk1r[2] = rdft_w[2];
    146   cftmdl_wk1r[3] = -rdft_w[2];
    147   {
    148     int k1;
    149 
    150     for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) {
    151       const int k2 = 2 * k1;
    152       const float wk2r = rdft_w[k1 + 0];
    153       const float wk2i = rdft_w[k1 + 1];
    154       float wk1r, wk1i;
    155       // ... scalar version.
    156       wk1r = rdft_w[k2 + 0];
    157       wk1i = rdft_w[k2 + 1];
    158       rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i;
    159       rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i;
    160       wk1r = rdft_w[k2 + 2];
    161       wk1i = rdft_w[k2 + 3];
    162       rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i;
    163       rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i;
    164       // ... vector version.
    165       rdft_wk1r[k2 + 0] = rdft_w[k2 + 0];
    166       rdft_wk1r[k2 + 1] = rdft_w[k2 + 0];
    167       rdft_wk1r[k2 + 2] = rdft_w[k2 + 2];
    168       rdft_wk1r[k2 + 3] = rdft_w[k2 + 2];
    169       rdft_wk2r[k2 + 0] = rdft_w[k1 + 0];
    170       rdft_wk2r[k2 + 1] = rdft_w[k1 + 0];
    171       rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1];
    172       rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1];
    173       rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0];
    174       rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0];
    175       rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0];
    176       rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0];
    177       rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1];
    178       rdft_wk1i[k2 + 1] = rdft_w[k2 + 1];
    179       rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3];
    180       rdft_wk1i[k2 + 3] = rdft_w[k2 + 3];
    181       rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1];
    182       rdft_wk2i[k2 + 1] = rdft_w[k1 + 1];
    183       rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0];
    184       rdft_wk2i[k2 + 3] = rdft_w[k1 + 0];
    185       rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1];
    186       rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1];
    187       rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1];
    188       rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1];
    189     }
    190   }
    191 }
    192 
    193 static void makect_32(void) {
    194   float *c = rdft_w + 32;
    195   const int nc = 32;
    196   int j, nch;
    197   float delta;
    198 
    199   ip[1] = nc;
    200   nch = nc >> 1;
    201   delta = atanf(1.0f) / nch;
    202   c[0] = cosf(delta * nch);
    203   c[nch] = 0.5f * c[0];
    204   for (j = 1; j < nch; j++) {
    205     c[j] = 0.5f * cosf(delta * j);
    206     c[nc - j] = 0.5f * sinf(delta * j);
    207   }
    208 }
    209 
    210 static void cft1st_128_C(float *a) {
    211   const int n = 128;
    212   int j, k1, k2;
    213   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
    214   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    215 
    216   x0r = a[0] + a[2];
    217   x0i = a[1] + a[3];
    218   x1r = a[0] - a[2];
    219   x1i = a[1] - a[3];
    220   x2r = a[4] + a[6];
    221   x2i = a[5] + a[7];
    222   x3r = a[4] - a[6];
    223   x3i = a[5] - a[7];
    224   a[0] = x0r + x2r;
    225   a[1] = x0i + x2i;
    226   a[4] = x0r - x2r;
    227   a[5] = x0i - x2i;
    228   a[2] = x1r - x3i;
    229   a[3] = x1i + x3r;
    230   a[6] = x1r + x3i;
    231   a[7] = x1i - x3r;
    232   wk1r = rdft_w[2];
    233   x0r = a[8] + a[10];
    234   x0i = a[9] + a[11];
    235   x1r = a[8] - a[10];
    236   x1i = a[9] - a[11];
    237   x2r = a[12] + a[14];
    238   x2i = a[13] + a[15];
    239   x3r = a[12] - a[14];
    240   x3i = a[13] - a[15];
    241   a[8] = x0r + x2r;
    242   a[9] = x0i + x2i;
    243   a[12] = x2i - x0i;
    244   a[13] = x0r - x2r;
    245   x0r = x1r - x3i;
    246   x0i = x1i + x3r;
    247   a[10] = wk1r * (x0r - x0i);
    248   a[11] = wk1r * (x0r + x0i);
    249   x0r = x3i + x1r;
    250   x0i = x3r - x1i;
    251   a[14] = wk1r * (x0i - x0r);
    252   a[15] = wk1r * (x0i + x0r);
    253   k1 = 0;
    254   for (j = 16; j < n; j += 16) {
    255     k1 += 2;
    256     k2 = 2 * k1;
    257     wk2r = rdft_w[k1 + 0];
    258     wk2i = rdft_w[k1 + 1];
    259     wk1r = rdft_w[k2 + 0];
    260     wk1i = rdft_w[k2 + 1];
    261     wk3r = rdft_wk3ri_first[k1 + 0];
    262     wk3i = rdft_wk3ri_first[k1 + 1];
    263     x0r = a[j + 0] + a[j + 2];
    264     x0i = a[j + 1] + a[j + 3];
    265     x1r = a[j + 0] - a[j + 2];
    266     x1i = a[j + 1] - a[j + 3];
    267     x2r = a[j + 4] + a[j + 6];
    268     x2i = a[j + 5] + a[j + 7];
    269     x3r = a[j + 4] - a[j + 6];
    270     x3i = a[j + 5] - a[j + 7];
    271     a[j + 0] = x0r + x2r;
    272     a[j + 1] = x0i + x2i;
    273     x0r -= x2r;
    274     x0i -= x2i;
    275     a[j + 4] = wk2r * x0r - wk2i * x0i;
    276     a[j + 5] = wk2r * x0i + wk2i * x0r;
    277     x0r = x1r - x3i;
    278     x0i = x1i + x3r;
    279     a[j + 2] = wk1r * x0r - wk1i * x0i;
    280     a[j + 3] = wk1r * x0i + wk1i * x0r;
    281     x0r = x1r + x3i;
    282     x0i = x1i - x3r;
    283     a[j + 6] = wk3r * x0r - wk3i * x0i;
    284     a[j + 7] = wk3r * x0i + wk3i * x0r;
    285     wk1r = rdft_w[k2 + 2];
    286     wk1i = rdft_w[k2 + 3];
    287     wk3r = rdft_wk3ri_second[k1 + 0];
    288     wk3i = rdft_wk3ri_second[k1 + 1];
    289     x0r = a[j + 8] + a[j + 10];
    290     x0i = a[j + 9] + a[j + 11];
    291     x1r = a[j + 8] - a[j + 10];
    292     x1i = a[j + 9] - a[j + 11];
    293     x2r = a[j + 12] + a[j + 14];
    294     x2i = a[j + 13] + a[j + 15];
    295     x3r = a[j + 12] - a[j + 14];
    296     x3i = a[j + 13] - a[j + 15];
    297     a[j + 8] = x0r + x2r;
    298     a[j + 9] = x0i + x2i;
    299     x0r -= x2r;
    300     x0i -= x2i;
    301     a[j + 12] = -wk2i * x0r - wk2r * x0i;
    302     a[j + 13] = -wk2i * x0i + wk2r * x0r;
    303     x0r = x1r - x3i;
    304     x0i = x1i + x3r;
    305     a[j + 10] = wk1r * x0r - wk1i * x0i;
    306     a[j + 11] = wk1r * x0i + wk1i * x0r;
    307     x0r = x1r + x3i;
    308     x0i = x1i - x3r;
    309     a[j + 14] = wk3r * x0r - wk3i * x0i;
    310     a[j + 15] = wk3r * x0i + wk3i * x0r;
    311   }
    312 }
    313 
    314 static void cftmdl_128_C(float *a) {
    315   const int l = 8;
    316   const int n = 128;
    317   const int m = 32;
    318   int j0, j1, j2, j3, k, k1, k2, m2;
    319   float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i;
    320   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    321 
    322   for (j0 = 0; j0 < l; j0 += 2) {
    323     j1 = j0 +  8;
    324     j2 = j0 + 16;
    325     j3 = j0 + 24;
    326     x0r = a[j0 + 0] + a[j1 + 0];
    327     x0i = a[j0 + 1] + a[j1 + 1];
    328     x1r = a[j0 + 0] - a[j1 + 0];
    329     x1i = a[j0 + 1] - a[j1 + 1];
    330     x2r = a[j2 + 0] + a[j3 + 0];
    331     x2i = a[j2 + 1] + a[j3 + 1];
    332     x3r = a[j2 + 0] - a[j3 + 0];
    333     x3i = a[j2 + 1] - a[j3 + 1];
    334     a[j0 + 0] = x0r + x2r;
    335     a[j0 + 1] = x0i + x2i;
    336     a[j2 + 0] = x0r - x2r;
    337     a[j2 + 1] = x0i - x2i;
    338     a[j1 + 0] = x1r - x3i;
    339     a[j1 + 1] = x1i + x3r;
    340     a[j3 + 0] = x1r + x3i;
    341     a[j3 + 1] = x1i - x3r;
    342   }
    343   wk1r = rdft_w[2];
    344   for (j0 = m; j0 < l + m; j0 += 2) {
    345     j1 = j0 +  8;
    346     j2 = j0 + 16;
    347     j3 = j0 + 24;
    348     x0r = a[j0 + 0] + a[j1 + 0];
    349     x0i = a[j0 + 1] + a[j1 + 1];
    350     x1r = a[j0 + 0] - a[j1 + 0];
    351     x1i = a[j0 + 1] - a[j1 + 1];
    352     x2r = a[j2 + 0] + a[j3 + 0];
    353     x2i = a[j2 + 1] + a[j3 + 1];
    354     x3r = a[j2 + 0] - a[j3 + 0];
    355     x3i = a[j2 + 1] - a[j3 + 1];
    356     a[j0 + 0] = x0r + x2r;
    357     a[j0 + 1] = x0i + x2i;
    358     a[j2 + 0] = x2i - x0i;
    359     a[j2 + 1] = x0r - x2r;
    360     x0r = x1r - x3i;
    361     x0i = x1i + x3r;
    362     a[j1 + 0] = wk1r * (x0r - x0i);
    363     a[j1 + 1] = wk1r * (x0r + x0i);
    364     x0r = x3i + x1r;
    365     x0i = x3r - x1i;
    366     a[j3 + 0] = wk1r * (x0i - x0r);
    367     a[j3 + 1] = wk1r * (x0i + x0r);
    368   }
    369   k1 = 0;
    370   m2 = 2 * m;
    371   for (k = m2; k < n; k += m2) {
    372     k1 += 2;
    373     k2 = 2 * k1;
    374     wk2r = rdft_w[k1 + 0];
    375     wk2i = rdft_w[k1 + 1];
    376     wk1r = rdft_w[k2 + 0];
    377     wk1i = rdft_w[k2 + 1];
    378     wk3r = rdft_wk3ri_first[k1 + 0];
    379     wk3i = rdft_wk3ri_first[k1 + 1];
    380     for (j0 = k; j0 < l + k; j0 += 2) {
    381       j1 = j0 +  8;
    382       j2 = j0 + 16;
    383       j3 = j0 + 24;
    384       x0r = a[j0 + 0] + a[j1 + 0];
    385       x0i = a[j0 + 1] + a[j1 + 1];
    386       x1r = a[j0 + 0] - a[j1 + 0];
    387       x1i = a[j0 + 1] - a[j1 + 1];
    388       x2r = a[j2 + 0] + a[j3 + 0];
    389       x2i = a[j2 + 1] + a[j3 + 1];
    390       x3r = a[j2 + 0] - a[j3 + 0];
    391       x3i = a[j2 + 1] - a[j3 + 1];
    392       a[j0 + 0] = x0r + x2r;
    393       a[j0 + 1] = x0i + x2i;
    394       x0r -= x2r;
    395       x0i -= x2i;
    396       a[j2 + 0] = wk2r * x0r - wk2i * x0i;
    397       a[j2 + 1] = wk2r * x0i + wk2i * x0r;
    398       x0r = x1r - x3i;
    399       x0i = x1i + x3r;
    400       a[j1 + 0] = wk1r * x0r - wk1i * x0i;
    401       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
    402       x0r = x1r + x3i;
    403       x0i = x1i - x3r;
    404       a[j3 + 0] = wk3r * x0r - wk3i * x0i;
    405       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
    406     }
    407     wk1r = rdft_w[k2 + 2];
    408     wk1i = rdft_w[k2 + 3];
    409     wk3r = rdft_wk3ri_second[k1 + 0];
    410     wk3i = rdft_wk3ri_second[k1 + 1];
    411     for (j0 = k + m; j0 < l + (k + m); j0 += 2) {
    412       j1 = j0 +  8;
    413       j2 = j0 + 16;
    414       j3 = j0 + 24;
    415       x0r = a[j0 + 0] + a[j1 + 0];
    416       x0i = a[j0 + 1] + a[j1 + 1];
    417       x1r = a[j0 + 0] - a[j1 + 0];
    418       x1i = a[j0 + 1] - a[j1 + 1];
    419       x2r = a[j2 + 0] + a[j3 + 0];
    420       x2i = a[j2 + 1] + a[j3 + 1];
    421       x3r = a[j2 + 0] - a[j3 + 0];
    422       x3i = a[j2 + 1] - a[j3 + 1];
    423       a[j0 + 0] = x0r + x2r;
    424       a[j0 + 1] = x0i + x2i;
    425       x0r -= x2r;
    426       x0i -= x2i;
    427       a[j2 + 0] = -wk2i * x0r - wk2r * x0i;
    428       a[j2 + 1] = -wk2i * x0i + wk2r * x0r;
    429       x0r = x1r - x3i;
    430       x0i = x1i + x3r;
    431       a[j1 + 0] = wk1r * x0r - wk1i * x0i;
    432       a[j1 + 1] = wk1r * x0i + wk1i * x0r;
    433       x0r = x1r + x3i;
    434       x0i = x1i - x3r;
    435       a[j3 + 0] = wk3r * x0r - wk3i * x0i;
    436       a[j3 + 1] = wk3r * x0i + wk3i * x0r;
    437     }
    438   }
    439 }
    440 
    441 static void cftfsub_128(float *a) {
    442   int j, j1, j2, j3, l;
    443   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    444 
    445   cft1st_128(a);
    446   cftmdl_128(a);
    447   l = 32;
    448   for (j = 0; j < l; j += 2) {
    449     j1 = j + l;
    450     j2 = j1 + l;
    451     j3 = j2 + l;
    452     x0r = a[j] + a[j1];
    453     x0i = a[j + 1] + a[j1 + 1];
    454     x1r = a[j] - a[j1];
    455     x1i = a[j + 1] - a[j1 + 1];
    456     x2r = a[j2] + a[j3];
    457     x2i = a[j2 + 1] + a[j3 + 1];
    458     x3r = a[j2] - a[j3];
    459     x3i = a[j2 + 1] - a[j3 + 1];
    460     a[j] = x0r + x2r;
    461     a[j + 1] = x0i + x2i;
    462     a[j2] = x0r - x2r;
    463     a[j2 + 1] = x0i - x2i;
    464     a[j1] = x1r - x3i;
    465     a[j1 + 1] = x1i + x3r;
    466     a[j3] = x1r + x3i;
    467     a[j3 + 1] = x1i - x3r;
    468   }
    469 }
    470 
    471 static void cftbsub_128(float *a) {
    472   int j, j1, j2, j3, l;
    473   float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i;
    474 
    475   cft1st_128(a);
    476   cftmdl_128(a);
    477   l = 32;
    478 
    479   for (j = 0; j < l; j += 2) {
    480     j1 = j + l;
    481     j2 = j1 + l;
    482     j3 = j2 + l;
    483     x0r = a[j] + a[j1];
    484     x0i = -a[j + 1] - a[j1 + 1];
    485     x1r = a[j] - a[j1];
    486     x1i = -a[j + 1] + a[j1 + 1];
    487     x2r = a[j2] + a[j3];
    488     x2i = a[j2 + 1] + a[j3 + 1];
    489     x3r = a[j2] - a[j3];
    490     x3i = a[j2 + 1] - a[j3 + 1];
    491     a[j] = x0r + x2r;
    492     a[j + 1] = x0i - x2i;
    493     a[j2] = x0r - x2r;
    494     a[j2 + 1] = x0i + x2i;
    495     a[j1] = x1r - x3i;
    496     a[j1 + 1] = x1i - x3r;
    497     a[j3] = x1r + x3i;
    498     a[j3 + 1] = x1i + x3r;
    499   }
    500 }
    501 
    502 static void rftfsub_128_C(float *a) {
    503   const float *c = rdft_w + 32;
    504   int j1, j2, k1, k2;
    505   float wkr, wki, xr, xi, yr, yi;
    506 
    507   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
    508     k2 = 128 - j2;
    509     k1 =  32 - j1;
    510     wkr = 0.5f - c[k1];
    511     wki = c[j1];
    512     xr = a[j2 + 0] - a[k2 + 0];
    513     xi = a[j2 + 1] + a[k2 + 1];
    514     yr = wkr * xr - wki * xi;
    515     yi = wkr * xi + wki * xr;
    516     a[j2 + 0] -= yr;
    517     a[j2 + 1] -= yi;
    518     a[k2 + 0] += yr;
    519     a[k2 + 1] -= yi;
    520   }
    521 }
    522 
    523 static void rftbsub_128_C(float *a) {
    524   const float *c = rdft_w + 32;
    525   int j1, j2, k1, k2;
    526   float wkr, wki, xr, xi, yr, yi;
    527 
    528   a[1] = -a[1];
    529   for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) {
    530     k2 = 128 - j2;
    531     k1 =  32 - j1;
    532     wkr = 0.5f - c[k1];
    533     wki = c[j1];
    534     xr = a[j2 + 0] - a[k2 + 0];
    535     xi = a[j2 + 1] + a[k2 + 1];
    536     yr = wkr * xr + wki * xi;
    537     yi = wkr * xi - wki * xr;
    538     a[j2 + 0] = a[j2 + 0] - yr;
    539     a[j2 + 1] = yi - a[j2 + 1];
    540     a[k2 + 0] = yr + a[k2 + 0];
    541     a[k2 + 1] = yi - a[k2 + 1];
    542   }
    543   a[65] = -a[65];
    544 }
    545 
    546 void aec_rdft_forward_128(float *a) {
    547   const int n = 128;
    548   float xi;
    549 
    550   bitrv2_32or128(n, ip + 2, a);
    551   cftfsub_128(a);
    552   rftfsub_128(a);
    553   xi = a[0] - a[1];
    554   a[0] += a[1];
    555   a[1] = xi;
    556 }
    557 
    558 void aec_rdft_inverse_128(float *a) {
    559   const int n = 128;
    560 
    561   a[1] = 0.5f * (a[0] - a[1]);
    562   a[0] -= a[1];
    563   rftbsub_128(a);
    564   bitrv2_32or128(n, ip + 2, a);
    565   cftbsub_128(a);
    566 }
    567 
    568 // code path selection
    569 rft_sub_128_t cft1st_128;
    570 rft_sub_128_t cftmdl_128;
    571 rft_sub_128_t rftfsub_128;
    572 rft_sub_128_t rftbsub_128;
    573 
    574 void aec_rdft_init(void) {
    575   cft1st_128 = cft1st_128_C;
    576   cftmdl_128 = cftmdl_128_C;
    577   rftfsub_128 = rftfsub_128_C;
    578   rftbsub_128 = rftbsub_128_C;
    579   if (WebRtc_GetCPUInfo(kSSE2)) {
    580 #if defined(WEBRTC_USE_SSE2)
    581     aec_rdft_init_sse2();
    582 #endif
    583   }
    584   // init library constants.
    585   makewt_32();
    586   makect_32();
    587 }
    588