Home | History | Annotate | Download | only in libspeex
      1 /*
      2 Copyright (c) 2003-2004, Mark Borgerding
      3 
      4 All rights reserved.
      5 
      6 Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met:
      7 
      8     * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer.
      9     * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution.
     10     * Neither the author nor the names of any contributors may be used to endorse or promote products derived from this software without specific prior written permission.
     11 
     12 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
     13 */
     14 
     15 #ifdef HAVE_CONFIG_H
     16 #include "config.h"
     17 #endif
     18 
     19 #include "os_support.h"
     20 #include "kiss_fftr.h"
     21 #include "_kiss_fft_guts.h"
     22 
     23 struct kiss_fftr_state{
     24     kiss_fft_cfg substate;
     25     kiss_fft_cpx * tmpbuf;
     26     kiss_fft_cpx * super_twiddles;
     27 #ifdef USE_SIMD
     28     long pad;
     29 #endif
     30 };
     31 
     32 kiss_fftr_cfg kiss_fftr_alloc(int nfft,int inverse_fft,void * mem,size_t * lenmem)
     33 {
     34     int i;
     35     kiss_fftr_cfg st = NULL;
     36     size_t subsize, memneeded;
     37 
     38     if (nfft & 1) {
     39         speex_warning("Real FFT optimization must be even.\n");
     40         return NULL;
     41     }
     42     nfft >>= 1;
     43 
     44     kiss_fft_alloc (nfft, inverse_fft, NULL, &subsize);
     45     memneeded = sizeof(struct kiss_fftr_state) + subsize + sizeof(kiss_fft_cpx) * ( nfft * 2);
     46 
     47     if (lenmem == NULL) {
     48         st = (kiss_fftr_cfg) KISS_FFT_MALLOC (memneeded);
     49     } else {
     50         if (*lenmem >= memneeded)
     51             st = (kiss_fftr_cfg) mem;
     52         *lenmem = memneeded;
     53     }
     54     if (!st)
     55         return NULL;
     56 
     57     st->substate = (kiss_fft_cfg) (st + 1); /*just beyond kiss_fftr_state struct */
     58     st->tmpbuf = (kiss_fft_cpx *) (((char *) st->substate) + subsize);
     59     st->super_twiddles = st->tmpbuf + nfft;
     60     kiss_fft_alloc(nfft, inverse_fft, st->substate, &subsize);
     61 
     62 #ifdef FIXED_POINT
     63     for (i=0;i<nfft;++i) {
     64        spx_word32_t phase = i+(nfft>>1);
     65        if (!inverse_fft)
     66           phase = -phase;
     67        kf_cexp2(st->super_twiddles+i, DIV32(SHL32(phase,16),nfft));
     68     }
     69 #else
     70     for (i=0;i<nfft;++i) {
     71        const double pi=3.14159265358979323846264338327;
     72        double phase = pi*(((double)i) /nfft + .5);
     73        if (!inverse_fft)
     74           phase = -phase;
     75        kf_cexp(st->super_twiddles+i, phase );
     76     }
     77 #endif
     78     return st;
     79 }
     80 
     81 void kiss_fftr(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_cpx *freqdata)
     82 {
     83     /* input buffer timedata is stored row-wise */
     84     int k,ncfft;
     85     kiss_fft_cpx fpnk,fpk,f1k,f2k,tw,tdc;
     86 
     87     if ( st->substate->inverse) {
     88         speex_fatal("kiss fft usage error: improper alloc\n");
     89     }
     90 
     91     ncfft = st->substate->nfft;
     92 
     93     /*perform the parallel fft of two real signals packed in real,imag*/
     94     kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
     95     /* The real part of the DC element of the frequency spectrum in st->tmpbuf
     96      * contains the sum of the even-numbered elements of the input time sequence
     97      * The imag part is the sum of the odd-numbered elements
     98      *
     99      * The sum of tdc.r and tdc.i is the sum of the input time sequence.
    100      *      yielding DC of input time sequence
    101      * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
    102      *      yielding Nyquist bin of input time sequence
    103      */
    104 
    105     tdc.r = st->tmpbuf[0].r;
    106     tdc.i = st->tmpbuf[0].i;
    107     C_FIXDIV(tdc,2);
    108     CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
    109     CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
    110     freqdata[0].r = tdc.r + tdc.i;
    111     freqdata[ncfft].r = tdc.r - tdc.i;
    112 #ifdef USE_SIMD
    113     freqdata[ncfft].i = freqdata[0].i = _mm_set1_ps(0);
    114 #else
    115     freqdata[ncfft].i = freqdata[0].i = 0;
    116 #endif
    117 
    118     for ( k=1;k <= ncfft/2 ; ++k ) {
    119         fpk    = st->tmpbuf[k];
    120         fpnk.r =   st->tmpbuf[ncfft-k].r;
    121         fpnk.i = - st->tmpbuf[ncfft-k].i;
    122         C_FIXDIV(fpk,2);
    123         C_FIXDIV(fpnk,2);
    124 
    125         C_ADD( f1k, fpk , fpnk );
    126         C_SUB( f2k, fpk , fpnk );
    127         C_MUL( tw , f2k , st->super_twiddles[k]);
    128 
    129         freqdata[k].r = HALF_OF(f1k.r + tw.r);
    130         freqdata[k].i = HALF_OF(f1k.i + tw.i);
    131         freqdata[ncfft-k].r = HALF_OF(f1k.r - tw.r);
    132         freqdata[ncfft-k].i = HALF_OF(tw.i - f1k.i);
    133     }
    134 }
    135 
    136 void kiss_fftri(kiss_fftr_cfg st,const kiss_fft_cpx *freqdata, kiss_fft_scalar *timedata)
    137 {
    138     /* input buffer timedata is stored row-wise */
    139     int k, ncfft;
    140 
    141     if (st->substate->inverse == 0) {
    142         speex_fatal("kiss fft usage error: improper alloc\n");
    143     }
    144 
    145     ncfft = st->substate->nfft;
    146 
    147     st->tmpbuf[0].r = freqdata[0].r + freqdata[ncfft].r;
    148     st->tmpbuf[0].i = freqdata[0].r - freqdata[ncfft].r;
    149     /*C_FIXDIV(st->tmpbuf[0],2);*/
    150 
    151     for (k = 1; k <= ncfft / 2; ++k) {
    152         kiss_fft_cpx fk, fnkc, fek, fok, tmp;
    153         fk = freqdata[k];
    154         fnkc.r = freqdata[ncfft - k].r;
    155         fnkc.i = -freqdata[ncfft - k].i;
    156         /*C_FIXDIV( fk , 2 );
    157         C_FIXDIV( fnkc , 2 );*/
    158 
    159         C_ADD (fek, fk, fnkc);
    160         C_SUB (tmp, fk, fnkc);
    161         C_MUL (fok, tmp, st->super_twiddles[k]);
    162         C_ADD (st->tmpbuf[k],     fek, fok);
    163         C_SUB (st->tmpbuf[ncfft - k], fek, fok);
    164 #ifdef USE_SIMD
    165         st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
    166 #else
    167         st->tmpbuf[ncfft - k].i *= -1;
    168 #endif
    169     }
    170     kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
    171 }
    172 
    173 void kiss_fftr2(kiss_fftr_cfg st,const kiss_fft_scalar *timedata,kiss_fft_scalar *freqdata)
    174 {
    175    /* input buffer timedata is stored row-wise */
    176    int k,ncfft;
    177    kiss_fft_cpx f2k,tdc;
    178    spx_word32_t f1kr, f1ki, twr, twi;
    179 
    180    if ( st->substate->inverse) {
    181       speex_fatal("kiss fft usage error: improper alloc\n");
    182    }
    183 
    184    ncfft = st->substate->nfft;
    185 
    186    /*perform the parallel fft of two real signals packed in real,imag*/
    187    kiss_fft( st->substate , (const kiss_fft_cpx*)timedata, st->tmpbuf );
    188     /* The real part of the DC element of the frequency spectrum in st->tmpbuf
    189    * contains the sum of the even-numbered elements of the input time sequence
    190    * The imag part is the sum of the odd-numbered elements
    191    *
    192    * The sum of tdc.r and tdc.i is the sum of the input time sequence.
    193    *      yielding DC of input time sequence
    194    * The difference of tdc.r - tdc.i is the sum of the input (dot product) [1,-1,1,-1...
    195    *      yielding Nyquist bin of input time sequence
    196     */
    197 
    198    tdc.r = st->tmpbuf[0].r;
    199    tdc.i = st->tmpbuf[0].i;
    200    C_FIXDIV(tdc,2);
    201    CHECK_OVERFLOW_OP(tdc.r ,+, tdc.i);
    202    CHECK_OVERFLOW_OP(tdc.r ,-, tdc.i);
    203    freqdata[0] = tdc.r + tdc.i;
    204    freqdata[2*ncfft-1] = tdc.r - tdc.i;
    205 
    206    for ( k=1;k <= ncfft/2 ; ++k )
    207    {
    208       /*fpk    = st->tmpbuf[k];
    209       fpnk.r =   st->tmpbuf[ncfft-k].r;
    210       fpnk.i = - st->tmpbuf[ncfft-k].i;
    211       C_FIXDIV(fpk,2);
    212       C_FIXDIV(fpnk,2);
    213 
    214       C_ADD( f1k, fpk , fpnk );
    215       C_SUB( f2k, fpk , fpnk );
    216 
    217       C_MUL( tw , f2k , st->super_twiddles[k]);
    218 
    219       freqdata[2*k-1] = HALF_OF(f1k.r + tw.r);
    220       freqdata[2*k] = HALF_OF(f1k.i + tw.i);
    221       freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r);
    222       freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i);
    223       */
    224 
    225       /*f1k.r = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
    226       f1k.i = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
    227       f2k.r = PSHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
    228       f2k.i = SHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
    229 
    230       C_MUL( tw , f2k , st->super_twiddles[k]);
    231 
    232       freqdata[2*k-1] = HALF_OF(f1k.r + tw.r);
    233       freqdata[2*k] = HALF_OF(f1k.i + tw.i);
    234       freqdata[2*(ncfft-k)-1] = HALF_OF(f1k.r - tw.r);
    235       freqdata[2*(ncfft-k)] = HALF_OF(tw.i - f1k.i);
    236    */
    237       f2k.r = SHR32(SUB32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),1);
    238       f2k.i = PSHR32(ADD32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),1);
    239 
    240       f1kr = SHL32(ADD32(EXTEND32(st->tmpbuf[k].r), EXTEND32(st->tmpbuf[ncfft-k].r)),13);
    241       f1ki = SHL32(SUB32(EXTEND32(st->tmpbuf[k].i), EXTEND32(st->tmpbuf[ncfft-k].i)),13);
    242 
    243       twr = SHR32(SUB32(MULT16_16(f2k.r,st->super_twiddles[k].r),MULT16_16(f2k.i,st->super_twiddles[k].i)), 1);
    244       twi = SHR32(ADD32(MULT16_16(f2k.i,st->super_twiddles[k].r),MULT16_16(f2k.r,st->super_twiddles[k].i)), 1);
    245 
    246 #ifdef FIXED_POINT
    247       freqdata[2*k-1] = PSHR32(f1kr + twr, 15);
    248       freqdata[2*k] = PSHR32(f1ki + twi, 15);
    249       freqdata[2*(ncfft-k)-1] = PSHR32(f1kr - twr, 15);
    250       freqdata[2*(ncfft-k)] = PSHR32(twi - f1ki, 15);
    251 #else
    252       freqdata[2*k-1] = .5f*(f1kr + twr);
    253       freqdata[2*k] = .5f*(f1ki + twi);
    254       freqdata[2*(ncfft-k)-1] = .5f*(f1kr - twr);
    255       freqdata[2*(ncfft-k)] = .5f*(twi - f1ki);
    256 
    257 #endif
    258    }
    259 }
    260 
    261 void kiss_fftri2(kiss_fftr_cfg st,const kiss_fft_scalar *freqdata,kiss_fft_scalar *timedata)
    262 {
    263    /* input buffer timedata is stored row-wise */
    264    int k, ncfft;
    265 
    266    if (st->substate->inverse == 0) {
    267       speex_fatal ("kiss fft usage error: improper alloc\n");
    268    }
    269 
    270    ncfft = st->substate->nfft;
    271 
    272    st->tmpbuf[0].r = freqdata[0] + freqdata[2*ncfft-1];
    273    st->tmpbuf[0].i = freqdata[0] - freqdata[2*ncfft-1];
    274    /*C_FIXDIV(st->tmpbuf[0],2);*/
    275 
    276    for (k = 1; k <= ncfft / 2; ++k) {
    277       kiss_fft_cpx fk, fnkc, fek, fok, tmp;
    278       fk.r = freqdata[2*k-1];
    279       fk.i = freqdata[2*k];
    280       fnkc.r = freqdata[2*(ncfft - k)-1];
    281       fnkc.i = -freqdata[2*(ncfft - k)];
    282         /*C_FIXDIV( fk , 2 );
    283       C_FIXDIV( fnkc , 2 );*/
    284 
    285       C_ADD (fek, fk, fnkc);
    286       C_SUB (tmp, fk, fnkc);
    287       C_MUL (fok, tmp, st->super_twiddles[k]);
    288       C_ADD (st->tmpbuf[k],     fek, fok);
    289       C_SUB (st->tmpbuf[ncfft - k], fek, fok);
    290 #ifdef USE_SIMD
    291       st->tmpbuf[ncfft - k].i *= _mm_set1_ps(-1.0);
    292 #else
    293       st->tmpbuf[ncfft - k].i *= -1;
    294 #endif
    295    }
    296    kiss_fft (st->substate, st->tmpbuf, (kiss_fft_cpx *) timedata);
    297 }
    298