1 /* 2 * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html 3 * Copyright Takuya OOURA, 1996-2001 4 * 5 * You may use, copy, modify and distribute this code for any purpose (include 6 * commercial use) and without fee. Please refer to this package when you modify 7 * this code. 8 * 9 * Changes by the WebRTC authors: 10 * - Trivial type modifications. 11 * - Minimal code subset to do rdft of length 128. 12 * - Optimizations because of known length. 13 * 14 * All changes are covered by the WebRTC license and IP grant: 15 * Use of this source code is governed by a BSD-style license 16 * that can be found in the LICENSE file in the root of the source 17 * tree. An additional intellectual property rights grant can be found 18 * in the file PATENTS. All contributing project authors may 19 * be found in the AUTHORS file in the root of the source tree. 20 */ 21 22 #include "aec_rdft.h" 23 24 #include <math.h> 25 26 #include "system_wrappers/interface/cpu_features_wrapper.h" 27 #include "typedefs.h" 28 29 // constants shared by all paths (C, SSE2). 30 float rdft_w[64]; 31 // constants used by the C path. 32 float rdft_wk3ri_first[32]; 33 float rdft_wk3ri_second[32]; 34 // constants used by SSE2 but initialized in C path. 35 ALIGN16_BEG float ALIGN16_END rdft_wk1r[32]; 36 ALIGN16_BEG float ALIGN16_END rdft_wk2r[32]; 37 ALIGN16_BEG float ALIGN16_END rdft_wk3r[32]; 38 ALIGN16_BEG float ALIGN16_END rdft_wk1i[32]; 39 ALIGN16_BEG float ALIGN16_END rdft_wk2i[32]; 40 ALIGN16_BEG float ALIGN16_END rdft_wk3i[32]; 41 ALIGN16_BEG float ALIGN16_END cftmdl_wk1r[4]; 42 43 static int ip[16]; 44 45 static void bitrv2_32or128(int n, int *ip, float *a) { 46 // n is 32 or 128 47 int j, j1, k, k1, m, m2; 48 float xr, xi, yr, yi; 49 50 ip[0] = 0; 51 { 52 int l = n; 53 m = 1; 54 while ((m << 3) < l) { 55 l >>= 1; 56 for (j = 0; j < m; j++) { 57 ip[m + j] = ip[j] + l; 58 } 59 m <<= 1; 60 } 61 } 62 m2 = 2 * m; 63 for (k = 0; k < m; k++) { 64 for (j = 0; j < k; j++) { 65 j1 = 2 * j + ip[k]; 66 k1 = 2 * k + ip[j]; 67 xr = a[j1]; 68 xi = a[j1 + 1]; 69 yr = a[k1]; 70 yi = a[k1 + 1]; 71 a[j1] = yr; 72 a[j1 + 1] = yi; 73 a[k1] = xr; 74 a[k1 + 1] = xi; 75 j1 += m2; 76 k1 += 2 * m2; 77 xr = a[j1]; 78 xi = a[j1 + 1]; 79 yr = a[k1]; 80 yi = a[k1 + 1]; 81 a[j1] = yr; 82 a[j1 + 1] = yi; 83 a[k1] = xr; 84 a[k1 + 1] = xi; 85 j1 += m2; 86 k1 -= m2; 87 xr = a[j1]; 88 xi = a[j1 + 1]; 89 yr = a[k1]; 90 yi = a[k1 + 1]; 91 a[j1] = yr; 92 a[j1 + 1] = yi; 93 a[k1] = xr; 94 a[k1 + 1] = xi; 95 j1 += m2; 96 k1 += 2 * m2; 97 xr = a[j1]; 98 xi = a[j1 + 1]; 99 yr = a[k1]; 100 yi = a[k1 + 1]; 101 a[j1] = yr; 102 a[j1 + 1] = yi; 103 a[k1] = xr; 104 a[k1 + 1] = xi; 105 } 106 j1 = 2 * k + m2 + ip[k]; 107 k1 = j1 + m2; 108 xr = a[j1]; 109 xi = a[j1 + 1]; 110 yr = a[k1]; 111 yi = a[k1 + 1]; 112 a[j1] = yr; 113 a[j1 + 1] = yi; 114 a[k1] = xr; 115 a[k1 + 1] = xi; 116 } 117 } 118 119 static void makewt_32(void) { 120 const int nw = 32; 121 int j, nwh; 122 float delta, x, y; 123 124 ip[0] = nw; 125 ip[1] = 1; 126 nwh = nw >> 1; 127 delta = atanf(1.0f) / nwh; 128 rdft_w[0] = 1; 129 rdft_w[1] = 0; 130 rdft_w[nwh] = cosf(delta * nwh); 131 rdft_w[nwh + 1] = rdft_w[nwh]; 132 for (j = 2; j < nwh; j += 2) { 133 x = cosf(delta * j); 134 y = sinf(delta * j); 135 rdft_w[j] = x; 136 rdft_w[j + 1] = y; 137 rdft_w[nw - j] = y; 138 rdft_w[nw - j + 1] = x; 139 } 140 bitrv2_32or128(nw, ip + 2, rdft_w); 141 142 // pre-calculate constants used by cft1st_128 and cftmdl_128... 143 cftmdl_wk1r[0] = rdft_w[2]; 144 cftmdl_wk1r[1] = rdft_w[2]; 145 cftmdl_wk1r[2] = rdft_w[2]; 146 cftmdl_wk1r[3] = -rdft_w[2]; 147 { 148 int k1; 149 150 for (k1 = 0, j = 0; j < 128; j += 16, k1 += 2) { 151 const int k2 = 2 * k1; 152 const float wk2r = rdft_w[k1 + 0]; 153 const float wk2i = rdft_w[k1 + 1]; 154 float wk1r, wk1i; 155 // ... scalar version. 156 wk1r = rdft_w[k2 + 0]; 157 wk1i = rdft_w[k2 + 1]; 158 rdft_wk3ri_first[k1 + 0] = wk1r - 2 * wk2i * wk1i; 159 rdft_wk3ri_first[k1 + 1] = 2 * wk2i * wk1r - wk1i; 160 wk1r = rdft_w[k2 + 2]; 161 wk1i = rdft_w[k2 + 3]; 162 rdft_wk3ri_second[k1 + 0] = wk1r - 2 * wk2r * wk1i; 163 rdft_wk3ri_second[k1 + 1] = 2 * wk2r * wk1r - wk1i; 164 // ... vector version. 165 rdft_wk1r[k2 + 0] = rdft_w[k2 + 0]; 166 rdft_wk1r[k2 + 1] = rdft_w[k2 + 0]; 167 rdft_wk1r[k2 + 2] = rdft_w[k2 + 2]; 168 rdft_wk1r[k2 + 3] = rdft_w[k2 + 2]; 169 rdft_wk2r[k2 + 0] = rdft_w[k1 + 0]; 170 rdft_wk2r[k2 + 1] = rdft_w[k1 + 0]; 171 rdft_wk2r[k2 + 2] = -rdft_w[k1 + 1]; 172 rdft_wk2r[k2 + 3] = -rdft_w[k1 + 1]; 173 rdft_wk3r[k2 + 0] = rdft_wk3ri_first[k1 + 0]; 174 rdft_wk3r[k2 + 1] = rdft_wk3ri_first[k1 + 0]; 175 rdft_wk3r[k2 + 2] = rdft_wk3ri_second[k1 + 0]; 176 rdft_wk3r[k2 + 3] = rdft_wk3ri_second[k1 + 0]; 177 rdft_wk1i[k2 + 0] = -rdft_w[k2 + 1]; 178 rdft_wk1i[k2 + 1] = rdft_w[k2 + 1]; 179 rdft_wk1i[k2 + 2] = -rdft_w[k2 + 3]; 180 rdft_wk1i[k2 + 3] = rdft_w[k2 + 3]; 181 rdft_wk2i[k2 + 0] = -rdft_w[k1 + 1]; 182 rdft_wk2i[k2 + 1] = rdft_w[k1 + 1]; 183 rdft_wk2i[k2 + 2] = -rdft_w[k1 + 0]; 184 rdft_wk2i[k2 + 3] = rdft_w[k1 + 0]; 185 rdft_wk3i[k2 + 0] = -rdft_wk3ri_first[k1 + 1]; 186 rdft_wk3i[k2 + 1] = rdft_wk3ri_first[k1 + 1]; 187 rdft_wk3i[k2 + 2] = -rdft_wk3ri_second[k1 + 1]; 188 rdft_wk3i[k2 + 3] = rdft_wk3ri_second[k1 + 1]; 189 } 190 } 191 } 192 193 static void makect_32(void) { 194 float *c = rdft_w + 32; 195 const int nc = 32; 196 int j, nch; 197 float delta; 198 199 ip[1] = nc; 200 nch = nc >> 1; 201 delta = atanf(1.0f) / nch; 202 c[0] = cosf(delta * nch); 203 c[nch] = 0.5f * c[0]; 204 for (j = 1; j < nch; j++) { 205 c[j] = 0.5f * cosf(delta * j); 206 c[nc - j] = 0.5f * sinf(delta * j); 207 } 208 } 209 210 static void cft1st_128_C(float *a) { 211 const int n = 128; 212 int j, k1, k2; 213 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 214 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 215 216 x0r = a[0] + a[2]; 217 x0i = a[1] + a[3]; 218 x1r = a[0] - a[2]; 219 x1i = a[1] - a[3]; 220 x2r = a[4] + a[6]; 221 x2i = a[5] + a[7]; 222 x3r = a[4] - a[6]; 223 x3i = a[5] - a[7]; 224 a[0] = x0r + x2r; 225 a[1] = x0i + x2i; 226 a[4] = x0r - x2r; 227 a[5] = x0i - x2i; 228 a[2] = x1r - x3i; 229 a[3] = x1i + x3r; 230 a[6] = x1r + x3i; 231 a[7] = x1i - x3r; 232 wk1r = rdft_w[2]; 233 x0r = a[8] + a[10]; 234 x0i = a[9] + a[11]; 235 x1r = a[8] - a[10]; 236 x1i = a[9] - a[11]; 237 x2r = a[12] + a[14]; 238 x2i = a[13] + a[15]; 239 x3r = a[12] - a[14]; 240 x3i = a[13] - a[15]; 241 a[8] = x0r + x2r; 242 a[9] = x0i + x2i; 243 a[12] = x2i - x0i; 244 a[13] = x0r - x2r; 245 x0r = x1r - x3i; 246 x0i = x1i + x3r; 247 a[10] = wk1r * (x0r - x0i); 248 a[11] = wk1r * (x0r + x0i); 249 x0r = x3i + x1r; 250 x0i = x3r - x1i; 251 a[14] = wk1r * (x0i - x0r); 252 a[15] = wk1r * (x0i + x0r); 253 k1 = 0; 254 for (j = 16; j < n; j += 16) { 255 k1 += 2; 256 k2 = 2 * k1; 257 wk2r = rdft_w[k1 + 0]; 258 wk2i = rdft_w[k1 + 1]; 259 wk1r = rdft_w[k2 + 0]; 260 wk1i = rdft_w[k2 + 1]; 261 wk3r = rdft_wk3ri_first[k1 + 0]; 262 wk3i = rdft_wk3ri_first[k1 + 1]; 263 x0r = a[j + 0] + a[j + 2]; 264 x0i = a[j + 1] + a[j + 3]; 265 x1r = a[j + 0] - a[j + 2]; 266 x1i = a[j + 1] - a[j + 3]; 267 x2r = a[j + 4] + a[j + 6]; 268 x2i = a[j + 5] + a[j + 7]; 269 x3r = a[j + 4] - a[j + 6]; 270 x3i = a[j + 5] - a[j + 7]; 271 a[j + 0] = x0r + x2r; 272 a[j + 1] = x0i + x2i; 273 x0r -= x2r; 274 x0i -= x2i; 275 a[j + 4] = wk2r * x0r - wk2i * x0i; 276 a[j + 5] = wk2r * x0i + wk2i * x0r; 277 x0r = x1r - x3i; 278 x0i = x1i + x3r; 279 a[j + 2] = wk1r * x0r - wk1i * x0i; 280 a[j + 3] = wk1r * x0i + wk1i * x0r; 281 x0r = x1r + x3i; 282 x0i = x1i - x3r; 283 a[j + 6] = wk3r * x0r - wk3i * x0i; 284 a[j + 7] = wk3r * x0i + wk3i * x0r; 285 wk1r = rdft_w[k2 + 2]; 286 wk1i = rdft_w[k2 + 3]; 287 wk3r = rdft_wk3ri_second[k1 + 0]; 288 wk3i = rdft_wk3ri_second[k1 + 1]; 289 x0r = a[j + 8] + a[j + 10]; 290 x0i = a[j + 9] + a[j + 11]; 291 x1r = a[j + 8] - a[j + 10]; 292 x1i = a[j + 9] - a[j + 11]; 293 x2r = a[j + 12] + a[j + 14]; 294 x2i = a[j + 13] + a[j + 15]; 295 x3r = a[j + 12] - a[j + 14]; 296 x3i = a[j + 13] - a[j + 15]; 297 a[j + 8] = x0r + x2r; 298 a[j + 9] = x0i + x2i; 299 x0r -= x2r; 300 x0i -= x2i; 301 a[j + 12] = -wk2i * x0r - wk2r * x0i; 302 a[j + 13] = -wk2i * x0i + wk2r * x0r; 303 x0r = x1r - x3i; 304 x0i = x1i + x3r; 305 a[j + 10] = wk1r * x0r - wk1i * x0i; 306 a[j + 11] = wk1r * x0i + wk1i * x0r; 307 x0r = x1r + x3i; 308 x0i = x1i - x3r; 309 a[j + 14] = wk3r * x0r - wk3i * x0i; 310 a[j + 15] = wk3r * x0i + wk3i * x0r; 311 } 312 } 313 314 static void cftmdl_128_C(float *a) { 315 const int l = 8; 316 const int n = 128; 317 const int m = 32; 318 int j0, j1, j2, j3, k, k1, k2, m2; 319 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 320 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 321 322 for (j0 = 0; j0 < l; j0 += 2) { 323 j1 = j0 + 8; 324 j2 = j0 + 16; 325 j3 = j0 + 24; 326 x0r = a[j0 + 0] + a[j1 + 0]; 327 x0i = a[j0 + 1] + a[j1 + 1]; 328 x1r = a[j0 + 0] - a[j1 + 0]; 329 x1i = a[j0 + 1] - a[j1 + 1]; 330 x2r = a[j2 + 0] + a[j3 + 0]; 331 x2i = a[j2 + 1] + a[j3 + 1]; 332 x3r = a[j2 + 0] - a[j3 + 0]; 333 x3i = a[j2 + 1] - a[j3 + 1]; 334 a[j0 + 0] = x0r + x2r; 335 a[j0 + 1] = x0i + x2i; 336 a[j2 + 0] = x0r - x2r; 337 a[j2 + 1] = x0i - x2i; 338 a[j1 + 0] = x1r - x3i; 339 a[j1 + 1] = x1i + x3r; 340 a[j3 + 0] = x1r + x3i; 341 a[j3 + 1] = x1i - x3r; 342 } 343 wk1r = rdft_w[2]; 344 for (j0 = m; j0 < l + m; j0 += 2) { 345 j1 = j0 + 8; 346 j2 = j0 + 16; 347 j3 = j0 + 24; 348 x0r = a[j0 + 0] + a[j1 + 0]; 349 x0i = a[j0 + 1] + a[j1 + 1]; 350 x1r = a[j0 + 0] - a[j1 + 0]; 351 x1i = a[j0 + 1] - a[j1 + 1]; 352 x2r = a[j2 + 0] + a[j3 + 0]; 353 x2i = a[j2 + 1] + a[j3 + 1]; 354 x3r = a[j2 + 0] - a[j3 + 0]; 355 x3i = a[j2 + 1] - a[j3 + 1]; 356 a[j0 + 0] = x0r + x2r; 357 a[j0 + 1] = x0i + x2i; 358 a[j2 + 0] = x2i - x0i; 359 a[j2 + 1] = x0r - x2r; 360 x0r = x1r - x3i; 361 x0i = x1i + x3r; 362 a[j1 + 0] = wk1r * (x0r - x0i); 363 a[j1 + 1] = wk1r * (x0r + x0i); 364 x0r = x3i + x1r; 365 x0i = x3r - x1i; 366 a[j3 + 0] = wk1r * (x0i - x0r); 367 a[j3 + 1] = wk1r * (x0i + x0r); 368 } 369 k1 = 0; 370 m2 = 2 * m; 371 for (k = m2; k < n; k += m2) { 372 k1 += 2; 373 k2 = 2 * k1; 374 wk2r = rdft_w[k1 + 0]; 375 wk2i = rdft_w[k1 + 1]; 376 wk1r = rdft_w[k2 + 0]; 377 wk1i = rdft_w[k2 + 1]; 378 wk3r = rdft_wk3ri_first[k1 + 0]; 379 wk3i = rdft_wk3ri_first[k1 + 1]; 380 for (j0 = k; j0 < l + k; j0 += 2) { 381 j1 = j0 + 8; 382 j2 = j0 + 16; 383 j3 = j0 + 24; 384 x0r = a[j0 + 0] + a[j1 + 0]; 385 x0i = a[j0 + 1] + a[j1 + 1]; 386 x1r = a[j0 + 0] - a[j1 + 0]; 387 x1i = a[j0 + 1] - a[j1 + 1]; 388 x2r = a[j2 + 0] + a[j3 + 0]; 389 x2i = a[j2 + 1] + a[j3 + 1]; 390 x3r = a[j2 + 0] - a[j3 + 0]; 391 x3i = a[j2 + 1] - a[j3 + 1]; 392 a[j0 + 0] = x0r + x2r; 393 a[j0 + 1] = x0i + x2i; 394 x0r -= x2r; 395 x0i -= x2i; 396 a[j2 + 0] = wk2r * x0r - wk2i * x0i; 397 a[j2 + 1] = wk2r * x0i + wk2i * x0r; 398 x0r = x1r - x3i; 399 x0i = x1i + x3r; 400 a[j1 + 0] = wk1r * x0r - wk1i * x0i; 401 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 402 x0r = x1r + x3i; 403 x0i = x1i - x3r; 404 a[j3 + 0] = wk3r * x0r - wk3i * x0i; 405 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 406 } 407 wk1r = rdft_w[k2 + 2]; 408 wk1i = rdft_w[k2 + 3]; 409 wk3r = rdft_wk3ri_second[k1 + 0]; 410 wk3i = rdft_wk3ri_second[k1 + 1]; 411 for (j0 = k + m; j0 < l + (k + m); j0 += 2) { 412 j1 = j0 + 8; 413 j2 = j0 + 16; 414 j3 = j0 + 24; 415 x0r = a[j0 + 0] + a[j1 + 0]; 416 x0i = a[j0 + 1] + a[j1 + 1]; 417 x1r = a[j0 + 0] - a[j1 + 0]; 418 x1i = a[j0 + 1] - a[j1 + 1]; 419 x2r = a[j2 + 0] + a[j3 + 0]; 420 x2i = a[j2 + 1] + a[j3 + 1]; 421 x3r = a[j2 + 0] - a[j3 + 0]; 422 x3i = a[j2 + 1] - a[j3 + 1]; 423 a[j0 + 0] = x0r + x2r; 424 a[j0 + 1] = x0i + x2i; 425 x0r -= x2r; 426 x0i -= x2i; 427 a[j2 + 0] = -wk2i * x0r - wk2r * x0i; 428 a[j2 + 1] = -wk2i * x0i + wk2r * x0r; 429 x0r = x1r - x3i; 430 x0i = x1i + x3r; 431 a[j1 + 0] = wk1r * x0r - wk1i * x0i; 432 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 433 x0r = x1r + x3i; 434 x0i = x1i - x3r; 435 a[j3 + 0] = wk3r * x0r - wk3i * x0i; 436 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 437 } 438 } 439 } 440 441 static void cftfsub_128(float *a) { 442 int j, j1, j2, j3, l; 443 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 444 445 cft1st_128(a); 446 cftmdl_128(a); 447 l = 32; 448 for (j = 0; j < l; j += 2) { 449 j1 = j + l; 450 j2 = j1 + l; 451 j3 = j2 + l; 452 x0r = a[j] + a[j1]; 453 x0i = a[j + 1] + a[j1 + 1]; 454 x1r = a[j] - a[j1]; 455 x1i = a[j + 1] - a[j1 + 1]; 456 x2r = a[j2] + a[j3]; 457 x2i = a[j2 + 1] + a[j3 + 1]; 458 x3r = a[j2] - a[j3]; 459 x3i = a[j2 + 1] - a[j3 + 1]; 460 a[j] = x0r + x2r; 461 a[j + 1] = x0i + x2i; 462 a[j2] = x0r - x2r; 463 a[j2 + 1] = x0i - x2i; 464 a[j1] = x1r - x3i; 465 a[j1 + 1] = x1i + x3r; 466 a[j3] = x1r + x3i; 467 a[j3 + 1] = x1i - x3r; 468 } 469 } 470 471 static void cftbsub_128(float *a) { 472 int j, j1, j2, j3, l; 473 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 474 475 cft1st_128(a); 476 cftmdl_128(a); 477 l = 32; 478 479 for (j = 0; j < l; j += 2) { 480 j1 = j + l; 481 j2 = j1 + l; 482 j3 = j2 + l; 483 x0r = a[j] + a[j1]; 484 x0i = -a[j + 1] - a[j1 + 1]; 485 x1r = a[j] - a[j1]; 486 x1i = -a[j + 1] + a[j1 + 1]; 487 x2r = a[j2] + a[j3]; 488 x2i = a[j2 + 1] + a[j3 + 1]; 489 x3r = a[j2] - a[j3]; 490 x3i = a[j2 + 1] - a[j3 + 1]; 491 a[j] = x0r + x2r; 492 a[j + 1] = x0i - x2i; 493 a[j2] = x0r - x2r; 494 a[j2 + 1] = x0i + x2i; 495 a[j1] = x1r - x3i; 496 a[j1 + 1] = x1i - x3r; 497 a[j3] = x1r + x3i; 498 a[j3 + 1] = x1i + x3r; 499 } 500 } 501 502 static void rftfsub_128_C(float *a) { 503 const float *c = rdft_w + 32; 504 int j1, j2, k1, k2; 505 float wkr, wki, xr, xi, yr, yi; 506 507 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { 508 k2 = 128 - j2; 509 k1 = 32 - j1; 510 wkr = 0.5f - c[k1]; 511 wki = c[j1]; 512 xr = a[j2 + 0] - a[k2 + 0]; 513 xi = a[j2 + 1] + a[k2 + 1]; 514 yr = wkr * xr - wki * xi; 515 yi = wkr * xi + wki * xr; 516 a[j2 + 0] -= yr; 517 a[j2 + 1] -= yi; 518 a[k2 + 0] += yr; 519 a[k2 + 1] -= yi; 520 } 521 } 522 523 static void rftbsub_128_C(float *a) { 524 const float *c = rdft_w + 32; 525 int j1, j2, k1, k2; 526 float wkr, wki, xr, xi, yr, yi; 527 528 a[1] = -a[1]; 529 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { 530 k2 = 128 - j2; 531 k1 = 32 - j1; 532 wkr = 0.5f - c[k1]; 533 wki = c[j1]; 534 xr = a[j2 + 0] - a[k2 + 0]; 535 xi = a[j2 + 1] + a[k2 + 1]; 536 yr = wkr * xr + wki * xi; 537 yi = wkr * xi - wki * xr; 538 a[j2 + 0] = a[j2 + 0] - yr; 539 a[j2 + 1] = yi - a[j2 + 1]; 540 a[k2 + 0] = yr + a[k2 + 0]; 541 a[k2 + 1] = yi - a[k2 + 1]; 542 } 543 a[65] = -a[65]; 544 } 545 546 void aec_rdft_forward_128(float *a) { 547 const int n = 128; 548 float xi; 549 550 bitrv2_32or128(n, ip + 2, a); 551 cftfsub_128(a); 552 rftfsub_128(a); 553 xi = a[0] - a[1]; 554 a[0] += a[1]; 555 a[1] = xi; 556 } 557 558 void aec_rdft_inverse_128(float *a) { 559 const int n = 128; 560 561 a[1] = 0.5f * (a[0] - a[1]); 562 a[0] -= a[1]; 563 rftbsub_128(a); 564 bitrv2_32or128(n, ip + 2, a); 565 cftbsub_128(a); 566 } 567 568 // code path selection 569 rft_sub_128_t cft1st_128; 570 rft_sub_128_t cftmdl_128; 571 rft_sub_128_t rftfsub_128; 572 rft_sub_128_t rftbsub_128; 573 574 void aec_rdft_init(void) { 575 cft1st_128 = cft1st_128_C; 576 cftmdl_128 = cftmdl_128_C; 577 rftfsub_128 = rftfsub_128_C; 578 rftbsub_128 = rftbsub_128_C; 579 if (WebRtc_GetCPUInfo(kSSE2)) { 580 #if defined(WEBRTC_USE_SSE2) 581 aec_rdft_init_sse2(); 582 #endif 583 } 584 // init library constants. 585 makewt_32(); 586 makect_32(); 587 } 588