1 /* 2 * http://www.kurims.kyoto-u.ac.jp/~ooura/fft.html 3 * Copyright Takuya OOURA, 1996-2001 4 * 5 * You may use, copy, modify and distribute this code for any purpose (include 6 * commercial use) and without fee. Please refer to this package when you modify 7 * this code. 8 * 9 * Changes by the WebRTC authors: 10 * - Trivial type modifications. 11 * - Minimal code subset to do rdft of length 128. 12 * - Optimizations because of known length. 13 * 14 * All changes are covered by the WebRTC license and IP grant: 15 * Use of this source code is governed by a BSD-style license 16 * that can be found in the LICENSE file in the root of the source 17 * tree. An additional intellectual property rights grant can be found 18 * in the file PATENTS. All contributing project authors may 19 * be found in the AUTHORS file in the root of the source tree. 20 */ 21 22 #include <math.h> 23 24 #include "aec_rdft.h" 25 #include "system_wrappers/interface/cpu_features_wrapper.h" 26 27 float rdft_w[64]; 28 static int ip[16]; 29 30 static void bitrv2_32or128(int n, int *ip, float *a) { 31 // n is 32 or 128 32 int j, j1, k, k1, m, m2; 33 float xr, xi, yr, yi; 34 35 ip[0] = 0; 36 { 37 int l = n; 38 m = 1; 39 while ((m << 3) < l) { 40 l >>= 1; 41 for (j = 0; j < m; j++) { 42 ip[m + j] = ip[j] + l; 43 } 44 m <<= 1; 45 } 46 } 47 m2 = 2 * m; 48 for (k = 0; k < m; k++) { 49 for (j = 0; j < k; j++) { 50 j1 = 2 * j + ip[k]; 51 k1 = 2 * k + ip[j]; 52 xr = a[j1]; 53 xi = a[j1 + 1]; 54 yr = a[k1]; 55 yi = a[k1 + 1]; 56 a[j1] = yr; 57 a[j1 + 1] = yi; 58 a[k1] = xr; 59 a[k1 + 1] = xi; 60 j1 += m2; 61 k1 += 2 * m2; 62 xr = a[j1]; 63 xi = a[j1 + 1]; 64 yr = a[k1]; 65 yi = a[k1 + 1]; 66 a[j1] = yr; 67 a[j1 + 1] = yi; 68 a[k1] = xr; 69 a[k1 + 1] = xi; 70 j1 += m2; 71 k1 -= m2; 72 xr = a[j1]; 73 xi = a[j1 + 1]; 74 yr = a[k1]; 75 yi = a[k1 + 1]; 76 a[j1] = yr; 77 a[j1 + 1] = yi; 78 a[k1] = xr; 79 a[k1 + 1] = xi; 80 j1 += m2; 81 k1 += 2 * m2; 82 xr = a[j1]; 83 xi = a[j1 + 1]; 84 yr = a[k1]; 85 yi = a[k1 + 1]; 86 a[j1] = yr; 87 a[j1 + 1] = yi; 88 a[k1] = xr; 89 a[k1 + 1] = xi; 90 } 91 j1 = 2 * k + m2 + ip[k]; 92 k1 = j1 + m2; 93 xr = a[j1]; 94 xi = a[j1 + 1]; 95 yr = a[k1]; 96 yi = a[k1 + 1]; 97 a[j1] = yr; 98 a[j1 + 1] = yi; 99 a[k1] = xr; 100 a[k1 + 1] = xi; 101 } 102 } 103 104 static void makewt_32() { 105 const int nw = 32; 106 int j, nwh; 107 float delta, x, y; 108 109 ip[0] = nw; 110 ip[1] = 1; 111 nwh = nw >> 1; 112 delta = atanf(1.0f) / nwh; 113 rdft_w[0] = 1; 114 rdft_w[1] = 0; 115 rdft_w[nwh] = cosf(delta * nwh); 116 rdft_w[nwh + 1] = rdft_w[nwh]; 117 for (j = 2; j < nwh; j += 2) { 118 x = cosf(delta * j); 119 y = sinf(delta * j); 120 rdft_w[j] = x; 121 rdft_w[j + 1] = y; 122 rdft_w[nw - j] = y; 123 rdft_w[nw - j + 1] = x; 124 } 125 bitrv2_32or128(nw, ip + 2, rdft_w); 126 } 127 128 static void makect_32() { 129 float *c = rdft_w + 32; 130 const int nc = 32; 131 int j, nch; 132 float delta; 133 134 ip[1] = nc; 135 nch = nc >> 1; 136 delta = atanf(1.0f) / nch; 137 c[0] = cosf(delta * nch); 138 c[nch] = 0.5f * c[0]; 139 for (j = 1; j < nch; j++) { 140 c[j] = 0.5f * cosf(delta * j); 141 c[nc - j] = 0.5f * sinf(delta * j); 142 } 143 } 144 145 static void cft1st_128(float *a) { 146 const int n = 128; 147 int j, k1, k2; 148 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 149 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 150 151 x0r = a[0] + a[2]; 152 x0i = a[1] + a[3]; 153 x1r = a[0] - a[2]; 154 x1i = a[1] - a[3]; 155 x2r = a[4] + a[6]; 156 x2i = a[5] + a[7]; 157 x3r = a[4] - a[6]; 158 x3i = a[5] - a[7]; 159 a[0] = x0r + x2r; 160 a[1] = x0i + x2i; 161 a[4] = x0r - x2r; 162 a[5] = x0i - x2i; 163 a[2] = x1r - x3i; 164 a[3] = x1i + x3r; 165 a[6] = x1r + x3i; 166 a[7] = x1i - x3r; 167 wk1r = rdft_w[2]; 168 x0r = a[8] + a[10]; 169 x0i = a[9] + a[11]; 170 x1r = a[8] - a[10]; 171 x1i = a[9] - a[11]; 172 x2r = a[12] + a[14]; 173 x2i = a[13] + a[15]; 174 x3r = a[12] - a[14]; 175 x3i = a[13] - a[15]; 176 a[8] = x0r + x2r; 177 a[9] = x0i + x2i; 178 a[12] = x2i - x0i; 179 a[13] = x0r - x2r; 180 x0r = x1r - x3i; 181 x0i = x1i + x3r; 182 a[10] = wk1r * (x0r - x0i); 183 a[11] = wk1r * (x0r + x0i); 184 x0r = x3i + x1r; 185 x0i = x3r - x1i; 186 a[14] = wk1r * (x0i - x0r); 187 a[15] = wk1r * (x0i + x0r); 188 k1 = 0; 189 for (j = 16; j < n; j += 16) { 190 k1 += 2; 191 k2 = 2 * k1; 192 wk2r = rdft_w[k1]; 193 wk2i = rdft_w[k1 + 1]; 194 wk1r = rdft_w[k2]; 195 wk1i = rdft_w[k2 + 1]; 196 wk3r = wk1r - 2 * wk2i * wk1i; 197 wk3i = 2 * wk2i * wk1r - wk1i; 198 x0r = a[j] + a[j + 2]; 199 x0i = a[j + 1] + a[j + 3]; 200 x1r = a[j] - a[j + 2]; 201 x1i = a[j + 1] - a[j + 3]; 202 x2r = a[j + 4] + a[j + 6]; 203 x2i = a[j + 5] + a[j + 7]; 204 x3r = a[j + 4] - a[j + 6]; 205 x3i = a[j + 5] - a[j + 7]; 206 a[j] = x0r + x2r; 207 a[j + 1] = x0i + x2i; 208 x0r -= x2r; 209 x0i -= x2i; 210 a[j + 4] = wk2r * x0r - wk2i * x0i; 211 a[j + 5] = wk2r * x0i + wk2i * x0r; 212 x0r = x1r - x3i; 213 x0i = x1i + x3r; 214 a[j + 2] = wk1r * x0r - wk1i * x0i; 215 a[j + 3] = wk1r * x0i + wk1i * x0r; 216 x0r = x1r + x3i; 217 x0i = x1i - x3r; 218 a[j + 6] = wk3r * x0r - wk3i * x0i; 219 a[j + 7] = wk3r * x0i + wk3i * x0r; 220 wk1r = rdft_w[k2 + 2]; 221 wk1i = rdft_w[k2 + 3]; 222 wk3r = wk1r - 2 * wk2r * wk1i; 223 wk3i = 2 * wk2r * wk1r - wk1i; 224 x0r = a[j + 8] + a[j + 10]; 225 x0i = a[j + 9] + a[j + 11]; 226 x1r = a[j + 8] - a[j + 10]; 227 x1i = a[j + 9] - a[j + 11]; 228 x2r = a[j + 12] + a[j + 14]; 229 x2i = a[j + 13] + a[j + 15]; 230 x3r = a[j + 12] - a[j + 14]; 231 x3i = a[j + 13] - a[j + 15]; 232 a[j + 8] = x0r + x2r; 233 a[j + 9] = x0i + x2i; 234 x0r -= x2r; 235 x0i -= x2i; 236 a[j + 12] = -wk2i * x0r - wk2r * x0i; 237 a[j + 13] = -wk2i * x0i + wk2r * x0r; 238 x0r = x1r - x3i; 239 x0i = x1i + x3r; 240 a[j + 10] = wk1r * x0r - wk1i * x0i; 241 a[j + 11] = wk1r * x0i + wk1i * x0r; 242 x0r = x1r + x3i; 243 x0i = x1i - x3r; 244 a[j + 14] = wk3r * x0r - wk3i * x0i; 245 a[j + 15] = wk3r * x0i + wk3i * x0r; 246 } 247 } 248 249 static void cftmdl_128(int l, float *a) { 250 const int n = 128; 251 int j, j1, j2, j3, k, k1, k2, m, m2; 252 float wk1r, wk1i, wk2r, wk2i, wk3r, wk3i; 253 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 254 255 m = l << 2; 256 for (j = 0; j < l; j += 2) { 257 j1 = j + l; 258 j2 = j1 + l; 259 j3 = j2 + l; 260 x0r = a[j] + a[j1]; 261 x0i = a[j + 1] + a[j1 + 1]; 262 x1r = a[j] - a[j1]; 263 x1i = a[j + 1] - a[j1 + 1]; 264 x2r = a[j2] + a[j3]; 265 x2i = a[j2 + 1] + a[j3 + 1]; 266 x3r = a[j2] - a[j3]; 267 x3i = a[j2 + 1] - a[j3 + 1]; 268 a[j] = x0r + x2r; 269 a[j + 1] = x0i + x2i; 270 a[j2] = x0r - x2r; 271 a[j2 + 1] = x0i - x2i; 272 a[j1] = x1r - x3i; 273 a[j1 + 1] = x1i + x3r; 274 a[j3] = x1r + x3i; 275 a[j3 + 1] = x1i - x3r; 276 } 277 wk1r = rdft_w[2]; 278 for (j = m; j < l + m; j += 2) { 279 j1 = j + l; 280 j2 = j1 + l; 281 j3 = j2 + l; 282 x0r = a[j] + a[j1]; 283 x0i = a[j + 1] + a[j1 + 1]; 284 x1r = a[j] - a[j1]; 285 x1i = a[j + 1] - a[j1 + 1]; 286 x2r = a[j2] + a[j3]; 287 x2i = a[j2 + 1] + a[j3 + 1]; 288 x3r = a[j2] - a[j3]; 289 x3i = a[j2 + 1] - a[j3 + 1]; 290 a[j] = x0r + x2r; 291 a[j + 1] = x0i + x2i; 292 a[j2] = x2i - x0i; 293 a[j2 + 1] = x0r - x2r; 294 x0r = x1r - x3i; 295 x0i = x1i + x3r; 296 a[j1] = wk1r * (x0r - x0i); 297 a[j1 + 1] = wk1r * (x0r + x0i); 298 x0r = x3i + x1r; 299 x0i = x3r - x1i; 300 a[j3] = wk1r * (x0i - x0r); 301 a[j3 + 1] = wk1r * (x0i + x0r); 302 } 303 k1 = 0; 304 m2 = 2 * m; 305 for (k = m2; k < n; k += m2) { 306 k1 += 2; 307 k2 = 2 * k1; 308 wk2r = rdft_w[k1]; 309 wk2i = rdft_w[k1 + 1]; 310 wk1r = rdft_w[k2]; 311 wk1i = rdft_w[k2 + 1]; 312 wk3r = wk1r - 2 * wk2i * wk1i; 313 wk3i = 2 * wk2i * wk1r - wk1i; 314 for (j = k; j < l + k; j += 2) { 315 j1 = j + l; 316 j2 = j1 + l; 317 j3 = j2 + l; 318 x0r = a[j] + a[j1]; 319 x0i = a[j + 1] + a[j1 + 1]; 320 x1r = a[j] - a[j1]; 321 x1i = a[j + 1] - a[j1 + 1]; 322 x2r = a[j2] + a[j3]; 323 x2i = a[j2 + 1] + a[j3 + 1]; 324 x3r = a[j2] - a[j3]; 325 x3i = a[j2 + 1] - a[j3 + 1]; 326 a[j] = x0r + x2r; 327 a[j + 1] = x0i + x2i; 328 x0r -= x2r; 329 x0i -= x2i; 330 a[j2] = wk2r * x0r - wk2i * x0i; 331 a[j2 + 1] = wk2r * x0i + wk2i * x0r; 332 x0r = x1r - x3i; 333 x0i = x1i + x3r; 334 a[j1] = wk1r * x0r - wk1i * x0i; 335 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 336 x0r = x1r + x3i; 337 x0i = x1i - x3r; 338 a[j3] = wk3r * x0r - wk3i * x0i; 339 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 340 } 341 wk1r = rdft_w[k2 + 2]; 342 wk1i = rdft_w[k2 + 3]; 343 wk3r = wk1r - 2 * wk2r * wk1i; 344 wk3i = 2 * wk2r * wk1r - wk1i; 345 for (j = k + m; j < l + (k + m); j += 2) { 346 j1 = j + l; 347 j2 = j1 + l; 348 j3 = j2 + l; 349 x0r = a[j] + a[j1]; 350 x0i = a[j + 1] + a[j1 + 1]; 351 x1r = a[j] - a[j1]; 352 x1i = a[j + 1] - a[j1 + 1]; 353 x2r = a[j2] + a[j3]; 354 x2i = a[j2 + 1] + a[j3 + 1]; 355 x3r = a[j2] - a[j3]; 356 x3i = a[j2 + 1] - a[j3 + 1]; 357 a[j] = x0r + x2r; 358 a[j + 1] = x0i + x2i; 359 x0r -= x2r; 360 x0i -= x2i; 361 a[j2] = -wk2i * x0r - wk2r * x0i; 362 a[j2 + 1] = -wk2i * x0i + wk2r * x0r; 363 x0r = x1r - x3i; 364 x0i = x1i + x3r; 365 a[j1] = wk1r * x0r - wk1i * x0i; 366 a[j1 + 1] = wk1r * x0i + wk1i * x0r; 367 x0r = x1r + x3i; 368 x0i = x1i - x3r; 369 a[j3] = wk3r * x0r - wk3i * x0i; 370 a[j3 + 1] = wk3r * x0i + wk3i * x0r; 371 } 372 } 373 } 374 375 static void cftfsub_128(float *a) { 376 int j, j1, j2, j3, l; 377 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 378 379 cft1st_128(a); 380 cftmdl_128(8, a); 381 l = 32; 382 for (j = 0; j < l; j += 2) { 383 j1 = j + l; 384 j2 = j1 + l; 385 j3 = j2 + l; 386 x0r = a[j] + a[j1]; 387 x0i = a[j + 1] + a[j1 + 1]; 388 x1r = a[j] - a[j1]; 389 x1i = a[j + 1] - a[j1 + 1]; 390 x2r = a[j2] + a[j3]; 391 x2i = a[j2 + 1] + a[j3 + 1]; 392 x3r = a[j2] - a[j3]; 393 x3i = a[j2 + 1] - a[j3 + 1]; 394 a[j] = x0r + x2r; 395 a[j + 1] = x0i + x2i; 396 a[j2] = x0r - x2r; 397 a[j2 + 1] = x0i - x2i; 398 a[j1] = x1r - x3i; 399 a[j1 + 1] = x1i + x3r; 400 a[j3] = x1r + x3i; 401 a[j3 + 1] = x1i - x3r; 402 } 403 } 404 405 static void cftbsub_128(float *a) { 406 int j, j1, j2, j3, l; 407 float x0r, x0i, x1r, x1i, x2r, x2i, x3r, x3i; 408 409 cft1st_128(a); 410 cftmdl_128(8, a); 411 l = 32; 412 413 for (j = 0; j < l; j += 2) { 414 j1 = j + l; 415 j2 = j1 + l; 416 j3 = j2 + l; 417 x0r = a[j] + a[j1]; 418 x0i = -a[j + 1] - a[j1 + 1]; 419 x1r = a[j] - a[j1]; 420 x1i = -a[j + 1] + a[j1 + 1]; 421 x2r = a[j2] + a[j3]; 422 x2i = a[j2 + 1] + a[j3 + 1]; 423 x3r = a[j2] - a[j3]; 424 x3i = a[j2 + 1] - a[j3 + 1]; 425 a[j] = x0r + x2r; 426 a[j + 1] = x0i - x2i; 427 a[j2] = x0r - x2r; 428 a[j2 + 1] = x0i + x2i; 429 a[j1] = x1r - x3i; 430 a[j1 + 1] = x1i - x3r; 431 a[j3] = x1r + x3i; 432 a[j3 + 1] = x1i + x3r; 433 } 434 } 435 436 static void rftfsub_128_C(float *a) { 437 const float *c = rdft_w + 32; 438 int j1, j2, k1, k2; 439 float wkr, wki, xr, xi, yr, yi; 440 441 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { 442 k2 = 128 - j2; 443 k1 = 32 - j1; 444 wkr = 0.5f - c[k1]; 445 wki = c[j1]; 446 xr = a[j2 + 0] - a[k2 + 0]; 447 xi = a[j2 + 1] + a[k2 + 1]; 448 yr = wkr * xr - wki * xi; 449 yi = wkr * xi + wki * xr; 450 a[j2 + 0] -= yr; 451 a[j2 + 1] -= yi; 452 a[k2 + 0] += yr; 453 a[k2 + 1] -= yi; 454 } 455 } 456 457 static void rftbsub_128_C(float *a) { 458 const float *c = rdft_w + 32; 459 int j1, j2, k1, k2; 460 float wkr, wki, xr, xi, yr, yi; 461 462 a[1] = -a[1]; 463 for (j1 = 1, j2 = 2; j2 < 64; j1 += 1, j2 += 2) { 464 k2 = 128 - j2; 465 k1 = 32 - j1; 466 wkr = 0.5f - c[k1]; 467 wki = c[j1]; 468 xr = a[j2 + 0] - a[k2 + 0]; 469 xi = a[j2 + 1] + a[k2 + 1]; 470 yr = wkr * xr + wki * xi; 471 yi = wkr * xi - wki * xr; 472 a[j2 + 0] = a[j2 + 0] - yr; 473 a[j2 + 1] = yi - a[j2 + 1]; 474 a[k2 + 0] = yr + a[k2 + 0]; 475 a[k2 + 1] = yi - a[k2 + 1]; 476 } 477 a[65] = -a[65]; 478 } 479 480 void aec_rdft_forward_128(float *a) { 481 const int n = 128; 482 int nw; 483 float xi; 484 485 nw = ip[0]; 486 bitrv2_32or128(n, ip + 2, a); 487 cftfsub_128(a); 488 rftfsub_128(a); 489 xi = a[0] - a[1]; 490 a[0] += a[1]; 491 a[1] = xi; 492 } 493 494 void aec_rdft_inverse_128(float *a) { 495 const int n = 128; 496 int nw; 497 float xi; 498 499 nw = ip[0]; 500 a[1] = 0.5f * (a[0] - a[1]); 501 a[0] -= a[1]; 502 rftbsub_128(a); 503 bitrv2_32or128(n, ip + 2, a); 504 cftbsub_128(a); 505 } 506 507 // code path selection 508 rft_sub_128_t rftfsub_128; 509 rft_sub_128_t rftbsub_128; 510 511 void aec_rdft_init(void) { 512 rftfsub_128 = rftfsub_128_C; 513 rftbsub_128 = rftbsub_128_C; 514 if (WebRtc_GetCPUInfo(kSSE2)) { 515 #if defined(__SSE2__) 516 aec_rdft_init_sse2(); 517 #endif 518 } 519 // init library constants. 520 makewt_32(); 521 makect_32(); 522 } 523