Home | History | Annotate | Download | only in celt
      1 /*Copyright (c) 2003-2004, Mark Borgerding
      2   Lots of modifications by Jean-Marc Valin
      3   Copyright (c) 2005-2007, Xiph.Org Foundation
      4   Copyright (c) 2008,      Xiph.Org Foundation, CSIRO
      5 
      6   All rights reserved.
      7 
      8   Redistribution and use in source and binary forms, with or without
      9    modification, are permitted provided that the following conditions are met:
     10 
     11     * Redistributions of source code must retain the above copyright notice,
     12        this list of conditions and the following disclaimer.
     13     * Redistributions in binary form must reproduce the above copyright notice,
     14        this list of conditions and the following disclaimer in the
     15        documentation and/or other materials provided with the distribution.
     16 
     17   THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
     18   AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     19   IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     20   ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
     21   LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
     22   CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
     23   SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
     24   INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
     25   CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
     26   ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
     27   POSSIBILITY OF SUCH DAMAGE.*/
     28 
     29 /* This code is originally from Mark Borgerding's KISS-FFT but has been
     30    heavily modified to better suit Opus */
     31 
     32 #ifndef SKIP_CONFIG_H
     33 #  ifdef HAVE_CONFIG_H
     34 #    include "config.h"
     35 #  endif
     36 #endif
     37 
     38 #include "_kiss_fft_guts.h"
     39 #include "arch.h"
     40 #include "os_support.h"
     41 #include "mathops.h"
     42 #include "stack_alloc.h"
     43 
     44 /* The guts header contains all the multiplication and addition macros that are defined for
     45    complex numbers.  It also delares the kf_ internal functions.
     46 */
     47 
     48 static void kf_bfly2(
     49                      kiss_fft_cpx * Fout,
     50                      int m,
     51                      int N
     52                     )
     53 {
     54    kiss_fft_cpx * Fout2;
     55    int i;
     56    (void)m;
     57 #ifdef CUSTOM_MODES
     58    if (m==1)
     59    {
     60       celt_assert(m==1);
     61       for (i=0;i<N;i++)
     62       {
     63          kiss_fft_cpx t;
     64          Fout2 = Fout + 1;
     65          t = *Fout2;
     66          C_SUB( *Fout2 ,  *Fout , t );
     67          C_ADDTO( *Fout ,  t );
     68          Fout += 2;
     69       }
     70    } else
     71 #endif
     72    {
     73       opus_val16 tw;
     74       tw = QCONST16(0.7071067812f, 15);
     75       /* We know that m==4 here because the radix-2 is just after a radix-4 */
     76       celt_assert(m==4);
     77       for (i=0;i<N;i++)
     78       {
     79          kiss_fft_cpx t;
     80          Fout2 = Fout + 4;
     81          t = Fout2[0];
     82          C_SUB( Fout2[0] ,  Fout[0] , t );
     83          C_ADDTO( Fout[0] ,  t );
     84 
     85          t.r = S_MUL(Fout2[1].r+Fout2[1].i, tw);
     86          t.i = S_MUL(Fout2[1].i-Fout2[1].r, tw);
     87          C_SUB( Fout2[1] ,  Fout[1] , t );
     88          C_ADDTO( Fout[1] ,  t );
     89 
     90          t.r = Fout2[2].i;
     91          t.i = -Fout2[2].r;
     92          C_SUB( Fout2[2] ,  Fout[2] , t );
     93          C_ADDTO( Fout[2] ,  t );
     94 
     95          t.r = S_MUL(Fout2[3].i-Fout2[3].r, tw);
     96          t.i = S_MUL(-Fout2[3].i-Fout2[3].r, tw);
     97          C_SUB( Fout2[3] ,  Fout[3] , t );
     98          C_ADDTO( Fout[3] ,  t );
     99          Fout += 8;
    100       }
    101    }
    102 }
    103 
    104 static void kf_bfly4(
    105                      kiss_fft_cpx * Fout,
    106                      const size_t fstride,
    107                      const kiss_fft_state *st,
    108                      int m,
    109                      int N,
    110                      int mm
    111                     )
    112 {
    113    int i;
    114 
    115    if (m==1)
    116    {
    117       /* Degenerate case where all the twiddles are 1. */
    118       for (i=0;i<N;i++)
    119       {
    120          kiss_fft_cpx scratch0, scratch1;
    121 
    122          C_SUB( scratch0 , *Fout, Fout[2] );
    123          C_ADDTO(*Fout, Fout[2]);
    124          C_ADD( scratch1 , Fout[1] , Fout[3] );
    125          C_SUB( Fout[2], *Fout, scratch1 );
    126          C_ADDTO( *Fout , scratch1 );
    127          C_SUB( scratch1 , Fout[1] , Fout[3] );
    128 
    129          Fout[1].r = scratch0.r + scratch1.i;
    130          Fout[1].i = scratch0.i - scratch1.r;
    131          Fout[3].r = scratch0.r - scratch1.i;
    132          Fout[3].i = scratch0.i + scratch1.r;
    133          Fout+=4;
    134       }
    135    } else {
    136       int j;
    137       kiss_fft_cpx scratch[6];
    138       const kiss_twiddle_cpx *tw1,*tw2,*tw3;
    139       const int m2=2*m;
    140       const int m3=3*m;
    141       kiss_fft_cpx * Fout_beg = Fout;
    142       for (i=0;i<N;i++)
    143       {
    144          Fout = Fout_beg + i*mm;
    145          tw3 = tw2 = tw1 = st->twiddles;
    146          /* m is guaranteed to be a multiple of 4. */
    147          for (j=0;j<m;j++)
    148          {
    149             C_MUL(scratch[0],Fout[m] , *tw1 );
    150             C_MUL(scratch[1],Fout[m2] , *tw2 );
    151             C_MUL(scratch[2],Fout[m3] , *tw3 );
    152 
    153             C_SUB( scratch[5] , *Fout, scratch[1] );
    154             C_ADDTO(*Fout, scratch[1]);
    155             C_ADD( scratch[3] , scratch[0] , scratch[2] );
    156             C_SUB( scratch[4] , scratch[0] , scratch[2] );
    157             C_SUB( Fout[m2], *Fout, scratch[3] );
    158             tw1 += fstride;
    159             tw2 += fstride*2;
    160             tw3 += fstride*3;
    161             C_ADDTO( *Fout , scratch[3] );
    162 
    163             Fout[m].r = scratch[5].r + scratch[4].i;
    164             Fout[m].i = scratch[5].i - scratch[4].r;
    165             Fout[m3].r = scratch[5].r - scratch[4].i;
    166             Fout[m3].i = scratch[5].i + scratch[4].r;
    167             ++Fout;
    168          }
    169       }
    170    }
    171 }
    172 
    173 
    174 #ifndef RADIX_TWO_ONLY
    175 
    176 static void kf_bfly3(
    177                      kiss_fft_cpx * Fout,
    178                      const size_t fstride,
    179                      const kiss_fft_state *st,
    180                      int m,
    181                      int N,
    182                      int mm
    183                     )
    184 {
    185    int i;
    186    size_t k;
    187    const size_t m2 = 2*m;
    188    const kiss_twiddle_cpx *tw1,*tw2;
    189    kiss_fft_cpx scratch[5];
    190    kiss_twiddle_cpx epi3;
    191 
    192    kiss_fft_cpx * Fout_beg = Fout;
    193 #ifdef FIXED_POINT
    194    /*epi3.r = -16384;*/ /* Unused */
    195    epi3.i = -28378;
    196 #else
    197    epi3 = st->twiddles[fstride*m];
    198 #endif
    199    for (i=0;i<N;i++)
    200    {
    201       Fout = Fout_beg + i*mm;
    202       tw1=tw2=st->twiddles;
    203       /* For non-custom modes, m is guaranteed to be a multiple of 4. */
    204       k=m;
    205       do {
    206 
    207          C_MUL(scratch[1],Fout[m] , *tw1);
    208          C_MUL(scratch[2],Fout[m2] , *tw2);
    209 
    210          C_ADD(scratch[3],scratch[1],scratch[2]);
    211          C_SUB(scratch[0],scratch[1],scratch[2]);
    212          tw1 += fstride;
    213          tw2 += fstride*2;
    214 
    215          Fout[m].r = Fout->r - HALF_OF(scratch[3].r);
    216          Fout[m].i = Fout->i - HALF_OF(scratch[3].i);
    217 
    218          C_MULBYSCALAR( scratch[0] , epi3.i );
    219 
    220          C_ADDTO(*Fout,scratch[3]);
    221 
    222          Fout[m2].r = Fout[m].r + scratch[0].i;
    223          Fout[m2].i = Fout[m].i - scratch[0].r;
    224 
    225          Fout[m].r -= scratch[0].i;
    226          Fout[m].i += scratch[0].r;
    227 
    228          ++Fout;
    229       } while(--k);
    230    }
    231 }
    232 
    233 
    234 #ifndef OVERRIDE_kf_bfly5
    235 static void kf_bfly5(
    236                      kiss_fft_cpx * Fout,
    237                      const size_t fstride,
    238                      const kiss_fft_state *st,
    239                      int m,
    240                      int N,
    241                      int mm
    242                     )
    243 {
    244    kiss_fft_cpx *Fout0,*Fout1,*Fout2,*Fout3,*Fout4;
    245    int i, u;
    246    kiss_fft_cpx scratch[13];
    247    const kiss_twiddle_cpx *tw;
    248    kiss_twiddle_cpx ya,yb;
    249    kiss_fft_cpx * Fout_beg = Fout;
    250 
    251 #ifdef FIXED_POINT
    252    ya.r = 10126;
    253    ya.i = -31164;
    254    yb.r = -26510;
    255    yb.i = -19261;
    256 #else
    257    ya = st->twiddles[fstride*m];
    258    yb = st->twiddles[fstride*2*m];
    259 #endif
    260    tw=st->twiddles;
    261 
    262    for (i=0;i<N;i++)
    263    {
    264       Fout = Fout_beg + i*mm;
    265       Fout0=Fout;
    266       Fout1=Fout0+m;
    267       Fout2=Fout0+2*m;
    268       Fout3=Fout0+3*m;
    269       Fout4=Fout0+4*m;
    270 
    271       /* For non-custom modes, m is guaranteed to be a multiple of 4. */
    272       for ( u=0; u<m; ++u ) {
    273          scratch[0] = *Fout0;
    274 
    275          C_MUL(scratch[1] ,*Fout1, tw[u*fstride]);
    276          C_MUL(scratch[2] ,*Fout2, tw[2*u*fstride]);
    277          C_MUL(scratch[3] ,*Fout3, tw[3*u*fstride]);
    278          C_MUL(scratch[4] ,*Fout4, tw[4*u*fstride]);
    279 
    280          C_ADD( scratch[7],scratch[1],scratch[4]);
    281          C_SUB( scratch[10],scratch[1],scratch[4]);
    282          C_ADD( scratch[8],scratch[2],scratch[3]);
    283          C_SUB( scratch[9],scratch[2],scratch[3]);
    284 
    285          Fout0->r += scratch[7].r + scratch[8].r;
    286          Fout0->i += scratch[7].i + scratch[8].i;
    287 
    288          scratch[5].r = scratch[0].r + S_MUL(scratch[7].r,ya.r) + S_MUL(scratch[8].r,yb.r);
    289          scratch[5].i = scratch[0].i + S_MUL(scratch[7].i,ya.r) + S_MUL(scratch[8].i,yb.r);
    290 
    291          scratch[6].r =  S_MUL(scratch[10].i,ya.i) + S_MUL(scratch[9].i,yb.i);
    292          scratch[6].i = -S_MUL(scratch[10].r,ya.i) - S_MUL(scratch[9].r,yb.i);
    293 
    294          C_SUB(*Fout1,scratch[5],scratch[6]);
    295          C_ADD(*Fout4,scratch[5],scratch[6]);
    296 
    297          scratch[11].r = scratch[0].r + S_MUL(scratch[7].r,yb.r) + S_MUL(scratch[8].r,ya.r);
    298          scratch[11].i = scratch[0].i + S_MUL(scratch[7].i,yb.r) + S_MUL(scratch[8].i,ya.r);
    299          scratch[12].r = - S_MUL(scratch[10].i,yb.i) + S_MUL(scratch[9].i,ya.i);
    300          scratch[12].i = S_MUL(scratch[10].r,yb.i) - S_MUL(scratch[9].r,ya.i);
    301 
    302          C_ADD(*Fout2,scratch[11],scratch[12]);
    303          C_SUB(*Fout3,scratch[11],scratch[12]);
    304 
    305          ++Fout0;++Fout1;++Fout2;++Fout3;++Fout4;
    306       }
    307    }
    308 }
    309 #endif /* OVERRIDE_kf_bfly5 */
    310 
    311 
    312 #endif
    313 
    314 
    315 #ifdef CUSTOM_MODES
    316 
    317 static
    318 void compute_bitrev_table(
    319          int Fout,
    320          opus_int16 *f,
    321          const size_t fstride,
    322          int in_stride,
    323          opus_int16 * factors,
    324          const kiss_fft_state *st
    325             )
    326 {
    327    const int p=*factors++; /* the radix  */
    328    const int m=*factors++; /* stage's fft length/p */
    329 
    330     /*printf ("fft %d %d %d %d %d %d\n", p*m, m, p, s2, fstride*in_stride, N);*/
    331    if (m==1)
    332    {
    333       int j;
    334       for (j=0;j<p;j++)
    335       {
    336          *f = Fout+j;
    337          f += fstride*in_stride;
    338       }
    339    } else {
    340       int j;
    341       for (j=0;j<p;j++)
    342       {
    343          compute_bitrev_table( Fout , f, fstride*p, in_stride, factors,st);
    344          f += fstride*in_stride;
    345          Fout += m;
    346       }
    347    }
    348 }
    349 
    350 /*  facbuf is populated by p1,m1,p2,m2, ...
    351     where
    352     p[i] * m[i] = m[i-1]
    353     m0 = n                  */
    354 static
    355 int kf_factor(int n,opus_int16 * facbuf)
    356 {
    357     int p=4;
    358     int i;
    359     int stages=0;
    360     int nbak = n;
    361 
    362     /*factor out powers of 4, powers of 2, then any remaining primes */
    363     do {
    364         while (n % p) {
    365             switch (p) {
    366                 case 4: p = 2; break;
    367                 case 2: p = 3; break;
    368                 default: p += 2; break;
    369             }
    370             if (p>32000 || (opus_int32)p*(opus_int32)p > n)
    371                 p = n;          /* no more factors, skip to end */
    372         }
    373         n /= p;
    374 #ifdef RADIX_TWO_ONLY
    375         if (p!=2 && p != 4)
    376 #else
    377         if (p>5)
    378 #endif
    379         {
    380            return 0;
    381         }
    382         facbuf[2*stages] = p;
    383         if (p==2 && stages > 1)
    384         {
    385            facbuf[2*stages] = 4;
    386            facbuf[2] = 2;
    387         }
    388         stages++;
    389     } while (n > 1);
    390     n = nbak;
    391     /* Reverse the order to get the radix 4 at the end, so we can use the
    392        fast degenerate case. It turns out that reversing the order also
    393        improves the noise behaviour. */
    394     for (i=0;i<stages/2;i++)
    395     {
    396        int tmp;
    397        tmp = facbuf[2*i];
    398        facbuf[2*i] = facbuf[2*(stages-i-1)];
    399        facbuf[2*(stages-i-1)] = tmp;
    400     }
    401     for (i=0;i<stages;i++)
    402     {
    403         n /= facbuf[2*i];
    404         facbuf[2*i+1] = n;
    405     }
    406     return 1;
    407 }
    408 
    409 static void compute_twiddles(kiss_twiddle_cpx *twiddles, int nfft)
    410 {
    411    int i;
    412 #ifdef FIXED_POINT
    413    for (i=0;i<nfft;++i) {
    414       opus_val32 phase = -i;
    415       kf_cexp2(twiddles+i, DIV32(SHL32(phase,17),nfft));
    416    }
    417 #else
    418    for (i=0;i<nfft;++i) {
    419       const double pi=3.14159265358979323846264338327;
    420       double phase = ( -2*pi /nfft ) * i;
    421       kf_cexp(twiddles+i, phase );
    422    }
    423 #endif
    424 }
    425 
    426 int opus_fft_alloc_arch_c(kiss_fft_state *st) {
    427    (void)st;
    428    return 0;
    429 }
    430 
    431 /*
    432  *
    433  * Allocates all necessary storage space for the fft and ifft.
    434  * The return value is a contiguous block of memory.  As such,
    435  * It can be freed with free().
    436  * */
    437 kiss_fft_state *opus_fft_alloc_twiddles(int nfft,void * mem,size_t * lenmem,
    438                                         const kiss_fft_state *base, int arch)
    439 {
    440     kiss_fft_state *st=NULL;
    441     size_t memneeded = sizeof(struct kiss_fft_state); /* twiddle factors*/
    442 
    443     if ( lenmem==NULL ) {
    444         st = ( kiss_fft_state*)KISS_FFT_MALLOC( memneeded );
    445     }else{
    446         if (mem != NULL && *lenmem >= memneeded)
    447             st = (kiss_fft_state*)mem;
    448         *lenmem = memneeded;
    449     }
    450     if (st) {
    451         opus_int16 *bitrev;
    452         kiss_twiddle_cpx *twiddles;
    453 
    454         st->nfft=nfft;
    455 #ifdef FIXED_POINT
    456         st->scale_shift = celt_ilog2(st->nfft);
    457         if (st->nfft == 1<<st->scale_shift)
    458            st->scale = Q15ONE;
    459         else
    460            st->scale = (1073741824+st->nfft/2)/st->nfft>>(15-st->scale_shift);
    461 #else
    462         st->scale = 1.f/nfft;
    463 #endif
    464         if (base != NULL)
    465         {
    466            st->twiddles = base->twiddles;
    467            st->shift = 0;
    468            while (st->shift < 32 && nfft<<st->shift != base->nfft)
    469               st->shift++;
    470            if (st->shift>=32)
    471               goto fail;
    472         } else {
    473            st->twiddles = twiddles = (kiss_twiddle_cpx*)KISS_FFT_MALLOC(sizeof(kiss_twiddle_cpx)*nfft);
    474            compute_twiddles(twiddles, nfft);
    475            st->shift = -1;
    476         }
    477         if (!kf_factor(nfft,st->factors))
    478         {
    479            goto fail;
    480         }
    481 
    482         /* bitrev */
    483         st->bitrev = bitrev = (opus_int16*)KISS_FFT_MALLOC(sizeof(opus_int16)*nfft);
    484         if (st->bitrev==NULL)
    485             goto fail;
    486         compute_bitrev_table(0, bitrev, 1,1, st->factors,st);
    487 
    488         /* Initialize architecture specific fft parameters */
    489         if (opus_fft_alloc_arch(st, arch))
    490             goto fail;
    491     }
    492     return st;
    493 fail:
    494     opus_fft_free(st, arch);
    495     return NULL;
    496 }
    497 
    498 kiss_fft_state *opus_fft_alloc(int nfft,void * mem,size_t * lenmem, int arch)
    499 {
    500    return opus_fft_alloc_twiddles(nfft, mem, lenmem, NULL, arch);
    501 }
    502 
    503 void opus_fft_free_arch_c(kiss_fft_state *st) {
    504    (void)st;
    505 }
    506 
    507 void opus_fft_free(const kiss_fft_state *cfg, int arch)
    508 {
    509    if (cfg)
    510    {
    511       opus_fft_free_arch((kiss_fft_state *)cfg, arch);
    512       opus_free((opus_int16*)cfg->bitrev);
    513       if (cfg->shift < 0)
    514          opus_free((kiss_twiddle_cpx*)cfg->twiddles);
    515       opus_free((kiss_fft_state*)cfg);
    516    }
    517 }
    518 
    519 #endif /* CUSTOM_MODES */
    520 
    521 void opus_fft_impl(const kiss_fft_state *st,kiss_fft_cpx *fout)
    522 {
    523     int m2, m;
    524     int p;
    525     int L;
    526     int fstride[MAXFACTORS];
    527     int i;
    528     int shift;
    529 
    530     /* st->shift can be -1 */
    531     shift = st->shift>0 ? st->shift : 0;
    532 
    533     fstride[0] = 1;
    534     L=0;
    535     do {
    536        p = st->factors[2*L];
    537        m = st->factors[2*L+1];
    538        fstride[L+1] = fstride[L]*p;
    539        L++;
    540     } while(m!=1);
    541     m = st->factors[2*L-1];
    542     for (i=L-1;i>=0;i--)
    543     {
    544        if (i!=0)
    545           m2 = st->factors[2*i-1];
    546        else
    547           m2 = 1;
    548        switch (st->factors[2*i])
    549        {
    550        case 2:
    551           kf_bfly2(fout, m, fstride[i]);
    552           break;
    553        case 4:
    554           kf_bfly4(fout,fstride[i]<<shift,st,m, fstride[i], m2);
    555           break;
    556  #ifndef RADIX_TWO_ONLY
    557        case 3:
    558           kf_bfly3(fout,fstride[i]<<shift,st,m, fstride[i], m2);
    559           break;
    560        case 5:
    561           kf_bfly5(fout,fstride[i]<<shift,st,m, fstride[i], m2);
    562           break;
    563  #endif
    564        }
    565        m = m2;
    566     }
    567 }
    568 
    569 void opus_fft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
    570 {
    571    int i;
    572    opus_val16 scale;
    573 #ifdef FIXED_POINT
    574    /* Allows us to scale with MULT16_32_Q16(), which is faster than
    575       MULT16_32_Q15() on ARM. */
    576    int scale_shift = st->scale_shift-1;
    577 #endif
    578    scale = st->scale;
    579 
    580    celt_assert2 (fin != fout, "In-place FFT not supported");
    581    /* Bit-reverse the input */
    582    for (i=0;i<st->nfft;i++)
    583    {
    584       kiss_fft_cpx x = fin[i];
    585       fout[st->bitrev[i]].r = SHR32(MULT16_32_Q16(scale, x.r), scale_shift);
    586       fout[st->bitrev[i]].i = SHR32(MULT16_32_Q16(scale, x.i), scale_shift);
    587    }
    588    opus_fft_impl(st, fout);
    589 }
    590 
    591 
    592 void opus_ifft_c(const kiss_fft_state *st,const kiss_fft_cpx *fin,kiss_fft_cpx *fout)
    593 {
    594    int i;
    595    celt_assert2 (fin != fout, "In-place FFT not supported");
    596    /* Bit-reverse the input */
    597    for (i=0;i<st->nfft;i++)
    598       fout[st->bitrev[i]] = fin[i];
    599    for (i=0;i<st->nfft;i++)
    600       fout[i].i = -fout[i].i;
    601    opus_fft_impl(st, fout);
    602    for (i=0;i<st->nfft;i++)
    603       fout[i].i = -fout[i].i;
    604 }
    605