1 /* 2 Copyright (c) 2003-2004, Mark Borgerding 3 Copyright (c) 2005-2007, Jean-Marc Valin 4 5 All rights reserved. 6 7 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: 8 9 * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. 10 * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. 11 * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission. 12 13 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 14 */ 15 16 17 #ifdef HAVE_CONFIG_H 18 #include "config.h" 19 #endif 20 21 #include "_kiss_fft_guts.h" 22 #include "arch.h" 23 #include "os_support.h" 24 25 /* The guts header contains all the multiplication and addition macros that are defined for 26 fixed or floating point complex numbers. It also delares the kf_ internal functions. 27 */ 28 29 static void kf_bfly2( 30 kiss_fft_cpx * Fout, 31 const size_t fstride, 32 const kiss_fft_cfg st, 33 int m, 34 int N, 35 int mm 36 ) 37 { 38 kiss_fft_cpx * Fout2; 39 kiss_fft_cpx * tw1; 40 kiss_fft_cpx t; 41 if (!st->inverse) { 42 int i,j; 43 kiss_fft_cpx * Fout_beg = Fout; 44 for (i=0;i<N;i++) 45 { 46 Fout = Fout_beg + i*mm; 47 Fout2 = Fout + m; 48 tw1 = st->twiddles; 49 for(j=0;j<m;j++) 50 { 51 /* Almost the same as the code path below, except that we divide the input by two 52 (while keeping the best accuracy possible) */ 53 spx_word32_t tr, ti; 54 tr = SHR32(SUB32(MULT16_16(Fout2->r , tw1->r),MULT16_16(Fout2->i , tw1->i)), 1); 55 ti = SHR32(ADD32(MULT16_16(Fout2->i , tw1->r),MULT16_16(Fout2->r , tw1->i)), 1); 56 tw1 += fstride; 57 Fout2->r = PSHR32(SUB32(SHL32(EXTEND32(Fout->r), 14), tr), 15); 58 Fout2->i = PSHR32(SUB32(SHL32(EXTEND32(Fout->i), 14), ti), 15); 59 Fout->r = PSHR32(ADD32(SHL32(EXTEND32(Fout->r), 14), tr), 15); 60 Fout->i = PSHR32(ADD32(SHL32(EXTEND32(Fout->i), 14), ti), 15); 61 ++Fout2; 62 ++Fout; 63 } 64 } 65 } else { 66 int i,j; 67 kiss_fft_cpx * Fout_beg = Fout; 68 for (i=0;i<N;i++) 69 { 70 Fout = Fout_beg + i*mm; 71 Fout2 = Fout + m; 72 tw1 = st->twiddles; 73 for(j=0;j<m;j++) 74 { 75 C_MUL (t, *Fout2 , *tw1); 76 tw1 += fstride; 77 C_SUB( *Fout2 , *Fout , t ); 78 C_ADDTO( *Fout , t ); 79 ++Fout2; 80 ++Fout; 81 } 82 } 83 } 84 } 85 86 static void kf_bfly4( 87 kiss_fft_cpx * Fout, 88 const size_t fstride, 89 const kiss_fft_cfg st, 90 int m, 91 int N, 92 int mm 93 ) 94 { 95 kiss_fft_cpx *tw1,*tw2,*tw3; 96 kiss_fft_cpx scratch[6]; 97 const size_t m2=2*m; 98 const size_t m3=3*m; 99 int i, j; 100 101 if (st->inverse) 102 { 103 kiss_fft_cpx * Fout_beg = Fout; 104 for (i=0;i<N;i++) 105 { 106 Fout = Fout_beg + i*mm; 107 tw3 = tw2 = tw1 = st->twiddles; 108 for (j=0;j<m;j++) 109 { 110 C_MUL(scratch[0],Fout[m] , *tw1 ); 111 C_MUL(scratch[1],Fout[m2] , *tw2 ); 112 C_MUL(scratch[2],Fout[m3] , *tw3 ); 113 114 C_SUB( scratch[5] , *Fout, scratch[1] ); 115 C_ADDTO(*Fout, scratch[1]); 116 C_ADD( scratch[3] , scratch[0] , scratch[2] ); 117 C_SUB( scratch[4] , scratch[0] , scratch[2] ); 118 C_SUB( Fout[m2], *Fout, scratch[3] ); 119 tw1 += fstride; 120 tw2 += fstride*2; 121 tw3 += fstride*3; 122 C_ADDTO( *Fout , scratch[3] ); 123 124 Fout[m].r = scratch[5].r - scratch[4].i; 125 Fout[m].i = scratch[5].i + scratch[4].r; 126 Fout[m3].r = scratch[5].r + scratch[4].i; 127 Fout[m3].i = scratch[5].i - scratch[4].r; 128 ++Fout; 129 } 130 } 131 } else 132 { 133 kiss_fft_cpx * Fout_beg = Fout; 134 for (i=0;i<N;i++) 135 { 136 Fout = Fout_beg + i*mm; 137 tw3 = tw2 = tw1 = st->twiddles; 138 for (j=0;j<m;j++) 139 { 140 C_MUL4(scratch[0],Fout[m] , *tw1 ); 141 C_MUL4(scratch[1],Fout[m2] , *tw2 ); 142 C_MUL4(scratch[2],Fout[m3] , *tw3 ); 143 144 Fout->r = PSHR16(Fout->r, 2); 145 Fout->i = PSHR16(Fout->i, 2); 146 C_SUB( scratch[5] , *Fout, scratch[1] ); 147 C_ADDTO(*Fout, scratch[1]); 148 C_ADD( scratch[3] , scratch[0] , scratch[2] ); 149 C_SUB( scratch[4] , scratch[0] , scratch[2] ); 150 Fout[m2].r = PSHR16(Fout[m2].r, 2); 151 Fout[m2].i = PSHR16(Fout[m2].i, 2); 152 C_SUB( Fout[m2], *Fout, scratch[3] ); 153 tw1 += fstride; 154 tw2 += fstride*2; 155 tw3 += fstride*3; 156 C_ADDTO( *Fout , scratch[3] ); 157 158 Fout[m].r = scratch[5].r + scratch[4].i; 159 Fout[m].i = scratch[5].i - scratch[4].r; 160 Fout[m3].r = scratch[5].r - scratch[4].i; 161 Fout[m3].i = scratch[5].i + scratch[4].r; 162 ++Fout; 163 } 164 } 165 } 166 } 167 168 static void kf_bfly3( 169 kiss_fft_cpx * Fout, 170 const size_t fstride, 171 const kiss_fft_cfg st, 172 size_t m 173 ) 174 { 175 size_t k=m; 176 const size_t m2 = 2*m; 177 kiss_fft_cpx *tw1,*tw2; 178 kiss_fft_cpx scratch[5]; 179 kiss_fft_cpx epi3; 180 epi3 = st->twiddles[fstride*m]; 181 182 tw1=tw2=st->twiddles; 183 184 do{ 185 if (!st->inverse) { 186 C_FIXDIV(*Fout,3); C_FIXDIV(Fout[m],3); C_FIXDIV(Fout[m2],3); 187 } 188 189 C_MUL(scratch[1],Fout[m] , *tw1); 190 C_MUL(scratch[2],Fout[m2] , *tw2); 191 192 C_ADD(scratch[3],scratch[1],scratch[2]); 193 C_SUB(scratch[0],scratch[1],scratch[2]); 194 tw1 += fstride; 195 tw2 += fstride*2; 196 197 Fout[m].r = Fout->r - HALF_OF(scratch[3].r); 198 Fout[m].i = Fout->i - HALF_OF(scratch[3].i); 199 200 C_MULBYSCALAR( scratch[0] , epi3.i ); 201 202 C_ADDTO(*Fout,scratch[3]); 203 204 Fout[m2].r = Fout[m].r + scratch[0].i; 205 Fout[m2].i = Fout[m].i - scratch[0].r; 206 207 Fout[m].r -= scratch[0].i; 208 Fout[m].i += scratch[0].r; 209 210 ++Fout; 211 }while(--k); 212 } 213 214 static void kf_bfly5( 215 kiss_fft_cpx * Fout, 216 const size_t fstride, 217 const kiss_fft_cfg st, 218 int m 219 ) 220 { 221 kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4; 222 int u; 223 kiss_fft_cpx scratch[13]; 224 kiss_fft_cpx * twiddles = st->twiddles; 225 kiss_fft_cpx *tw; 226 kiss_fft_cpx ya,yb; 227 ya = twiddles[fstride*m]; 228 yb = twiddles[fstride*2*m]; 229 230 Fout0=Fout; 231 Fout1=Fout0+m; 232 Fout2=Fout0+2*m; 233 Fout3=Fout0+3*m; 234 Fout4=Fout0+4*m; 235 236 tw=st->twiddles; 237 for ( u=0; u<m; ++u ) { 238 if (!st->inverse) { 239 C_FIXDIV( *Fout0,5); C_FIXDIV( *Fout1,5); C_FIXDIV( *Fout2,5); C_FIXDIV( *Fout3,5); C_FIXDIV( *Fout4,5); 240 } 241 scratch[0] = *Fout0; 242 243 C_MUL(scratch[1] ,*Fout1, tw[u*fstride]); 244 C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]); 245 C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]); 246 C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]); 247 248 C_ADD( scratch[7],scratch[1],scratch[4]); 249 C_SUB( scratch[10],scratch[1],scratch[4]); 250 C_ADD( scratch[8],scratch[2],scratch[3]); 251 C_SUB( scratch[9],scratch[2],scratch[3]); 252 253 Fout0->r += scratch[7].r + scratch[8].r; 254 Fout0->i += scratch[7].i + scratch[8].i; 255 256 scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r); 257 scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r); 258 259 scratch[6].r = S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i); 260 scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i); 261 262 C_SUB(*Fout1,scratch[5],scratch[6]); 263 C_ADD(*Fout4,scratch[5],scratch[6]); 264 265 scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r); 266 scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r); 267 scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i); 268 scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i); 269 270 C_ADD(*Fout2,scratch[11],scratch[12]); 271 C_SUB(*Fout3,scratch[11],scratch[12]); 272 273 ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4; 274 } 275 } 276 277 /* perform the butterfly for one stage of a mixed radix FFT */ 278 static void kf_bfly_generic( 279 kiss_fft_cpx * Fout, 280 const size_t fstride, 281 const kiss_fft_cfg st, 282 int m, 283 int p 284 ) 285 { 286 int u,k,q1,q; 287 kiss_fft_cpx * twiddles = st->twiddles; 288 kiss_fft_cpx t; 289 kiss_fft_cpx scratchbuf[17]; 290 int Norig = st->nfft; 291 292 /*CHECKBUF(scratchbuf,nscratchbuf,p);*/ 293 if (p>17) 294 speex_fatal("KissFFT: max radix supported is 17"); 295 296 for ( u=0; u<m; ++u ) { 297 k=u; 298 for ( q1=0 ; q1<p ; ++q1 ) { 299 scratchbuf[q1] = Fout[ k ]; 300 if (!st->inverse) { 301 C_FIXDIV(scratchbuf[q1],p); 302 } 303 k += m; 304 } 305 306 k=u; 307 for ( q1=0 ; q1<p ; ++q1 ) { 308 int twidx=0; 309 Fout[ k ] = scratchbuf[0]; 310 for (q=1;q<p;++q ) { 311 twidx += fstride * k; 312 if (twidx>=Norig) twidx-=Norig; 313 C_MUL(t,scratchbuf[q] , twiddles[twidx] ); 314 C_ADDTO( Fout[ k ] ,t); 315 } 316 k += m; 317 } 318 } 319 } 320 321 static 322 void kf_shuffle( 323 kiss_fft_cpx * Fout, 324 const kiss_fft_cpx * f, 325 const size_t fstride, 326 int in_stride, 327 int * factors, 328 const kiss_fft_cfg st 329 ) 330 { 331 const int p=*factors++; /* the radix */ 332 const int m=*factors++; /* stage's fft length/p */ 333 334 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ 335 if (m==1) 336 { 337 int j; 338 for (j=0;j<p;j++) 339 { 340 Fout[j] = *f; 341 f += fstride*in_stride; 342 } 343 } else { 344 int j; 345 for (j=0;j<p;j++) 346 { 347 kf_shuffle( Fout , f, fstride*p, in_stride, factors,st); 348 f += fstride*in_stride; 349 Fout += m; 350 } 351 } 352 } 353 354 static 355 void kf_work( 356 kiss_fft_cpx * Fout, 357 const kiss_fft_cpx * f, 358 const size_t fstride, 359 int in_stride, 360 int * factors, 361 const kiss_fft_cfg st, 362 int N, 363 int s2, 364 int m2 365 ) 366 { 367 int i; 368 kiss_fft_cpx * Fout_beg=Fout; 369 const int p=*factors++; /* the radix */ 370 const int m=*factors++; /* stage's fft length/p */ 371 #if 0 372 /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/ 373 if (m==1) 374 { 375 /* int j; 376 for (j=0;j<p;j++) 377 { 378 Fout[j] = *f; 379 f += fstride*in_stride; 380 }*/ 381 } else { 382 int j; 383 for (j=0;j<p;j++) 384 { 385 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m); 386 f += fstride*in_stride; 387 Fout += m; 388 } 389 } 390 391 Fout=Fout_beg; 392 393 switch (p) { 394 case 2: kf_bfly2(Fout,fstride,st,m); break; 395 case 3: kf_bfly3(Fout,fstride,st,m); break; 396 case 4: kf_bfly4(Fout,fstride,st,m); break; 397 case 5: kf_bfly5(Fout,fstride,st,m); break; 398 default: kf_bfly_generic(Fout,fstride,st,m,p); break; 399 } 400 #else 401 /*printf ("fft %d %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N, m2);*/ 402 if (m==1) 403 { 404 /*for (i=0;i<N;i++) 405 { 406 int j; 407 Fout = Fout_beg+i*m2; 408 const kiss_fft_cpx * f2 = f+i*s2; 409 for (j=0;j<p;j++) 410 { 411 *Fout++ = *f2; 412 f2 += fstride*in_stride; 413 } 414 }*/ 415 }else{ 416 kf_work( Fout , f, fstride*p, in_stride, factors,st, N*p, fstride*in_stride, m); 417 } 418 419 420 421 422 switch (p) { 423 case 2: kf_bfly2(Fout,fstride,st,m, N, m2); break; 424 case 3: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly3(Fout,fstride,st,m);} break; 425 case 4: kf_bfly4(Fout,fstride,st,m, N, m2); break; 426 case 5: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly5(Fout,fstride,st,m);} break; 427 default: for (i=0;i<N;i++){Fout=Fout_beg+i*m2; kf_bfly_generic(Fout,fstride,st,m,p);} break; 428 } 429 #endif 430 } 431 432 /* facbuf is populated by p1,m1,p2,m2, ... 433 where 434 p[i] * m[i] = m[i-1] 435 m0 = n */ 436 static 437 void kf_factor(int n,int * facbuf) 438 { 439 int p=4; 440 441 /*factor out powers of 4, powers of 2, then any remaining primes */ 442 do { 443 while (n % p) { 444 switch (p) { 445 case 4: p = 2; break; 446 case 2: p = 3; break; 447 default: p += 2; break; 448 } 449 if (p>32000 || (spx_int32_t)p*(spx_int32_t)p > n) 450 p = n; /* no more factors, skip to end */ 451 } 452 n /= p; 453 *facbuf++ = p; 454 *facbuf++ = n; 455 } while (n > 1); 456 } 457 /* 458 * 459 * User-callable function to allocate all necessary storage space for the fft. 460 * 461 * The return value is a contiguous block of memory, allocated with malloc. As such, 462 * It can be freed with free(), rather than a kiss_fft-specific function. 463 * */ 464 kiss_fft_cfg kiss_fft_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem ) 465 { 466 kiss_fft_cfg st=NULL; 467 size_t memneeded = sizeof(struct kiss_fft_state) 468 + sizeof(kiss_fft_cpx)*(nfft-1); /* twiddle factors*/ 469 470 if ( lenmem==NULL ) { 471 st = ( kiss_fft_cfg)KISS_FFT_MALLOC( memneeded ); 472 }else{ 473 if (mem != NULL && *lenmem >= memneeded) 474 st = (kiss_fft_cfg)mem; 475 *lenmem = memneeded; 476 } 477 if (st) { 478 int i; 479 st->nfft=nfft; 480 st->inverse = inverse_fft; 481 #ifdef FIXED_POINT 482 for (i=0;i<nfft;++i) { 483 spx_word32_t phase = i; 484 if (!st->inverse) 485 phase = -phase; 486 kf_cexp2(st->twiddles+i, DIV32(SHL32(phase,17),nfft)); 487 } 488 #else 489 for (i=0;i<nfft;++i) { 490 const double pi=3.14159265358979323846264338327; 491 double phase = ( -2*pi /nfft ) * i; 492 if (st->inverse) 493 phase *= -1; 494 kf_cexp(st->twiddles+i, phase ); 495 } 496 #endif 497 kf_factor(nfft,st->factors); 498 } 499 return st; 500 } 501 502 503 504 505 void kiss_fft_stride(kiss_fft_cfg st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout,int in_stride) 506 { 507 if (fin == fout) 508 { 509 speex_fatal("In-place FFT not supported"); 510 /*CHECKBUF(tmpbuf,ntmpbuf,st->nfft); 511 kf_work(tmpbuf,fin,1,in_stride, st->factors,st); 512 SPEEX_MOVE(fout,tmpbuf,st->nfft);*/ 513 } else { 514 kf_shuffle( fout, fin, 1,in_stride, st->factors,st); 515 kf_work( fout, fin, 1,in_stride, st->factors,st, 1, in_stride, 1); 516 } 517 } 518 519 void kiss_fft(kiss_fft_cfg cfg,const kiss_fft_cpx *fin,kiss_fft_cpx *fout) 520 { 521 kiss_fft_stride(cfg,fin,fout,1); 522 } 523 524