Home | History | Annotate | Download | only in include
      1 /*---------------------------------------------------------------------------*
      2  *  sp_fft.h  *
      3  *                                                                           *
      4  *  Copyright 2007, 2008 Nuance Communciations, Inc.                               *
      5  *                                                                           *
      6  *  Licensed under the Apache License, Version 2.0 (the 'License');          *
      7  *  you may not use this file except in compliance with the License.         *
      8  *                                                                           *
      9  *  You may obtain a copy of the License at                                  *
     10  *      http://www.apache.org/licenses/LICENSE-2.0                           *
     11  *                                                                           *
     12  *  Unless required by applicable law or agreed to in writing, software      *
     13  *  distributed under the License is distributed on an 'AS IS' BASIS,        *
     14  *  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. *
     15  *  See the License for the specific language governing permissions and      *
     16  *  limitations under the License.                                           *
     17  *                                                                           *
     18  *---------------------------------------------------------------------------*/
     19 
     20 #ifndef _SPLIT_RADIX_FFT_H_
     21 #define _SPLIT_RADIX_FFT_H_
     22 
     23 /*
     24 ////////////////////////////////////////////////////////////////////////////
     25 //
     26 //  FILE:         fft.h
     27 //
     28 //  CREATED:   11-September-99
     29 //
     30 //  DESCRIPTION:  Split-Radix FFT
     31 //
     32 //
     33 //
     34 //
     35 //  MODIFICATIONS:
     36 // Revision history log
     37     VSS revision history.  Do not edit by hand.
     38 
     39     $NoKeywords: $
     40 
     41 */
     42 
     43 /*
     44 >>>>> Floating and Fixed Point plit-Radix FFT -- The Fastest FFT  <<<<<<<<
     45 
     46 The split-radix FFT improves the efficiency of the FFT implementation
     47 by mixing a radix-2 and a radix-4 FFT algorithms. Specifically,
     48 the radix-2 decomposes one Fourier transform into two half-sized Fourier
     49 transform at each stage. It is the FFT presented in almost every textbook
     50 and implemented. Unfortuantely, it is also the least efficient among the
     51 power-of-two FFT, The radix-4 decomposes one Fourier transform into
     52 four quarter-sized Fourier transform. It is much more
     53 efficient than the radix-2 FFT, but it requires a power-of-four as the data length.
     54 By combining the radix-2 and the radix-4 algorithms, one not only
     55 removes the power-of-four limitation of the radix-4 alogorithm, but also
     56 obtains the most efficient power-of-two FFT algorithm.
     57 
     58 The split-radix FFT decomposes a Fourier transform
     59 
     60   X(k) = sum(n = 0 to N-1, x[n] * W**(n*k))
     61 
     62 successively into one length N/2 radix-2 and two length N/4 radix-4 FFT
     63 
     64  X(2k) = sum(n = 0 to N/2 - 1, [x[n] + x(n + N/2)] * W**(2*n*k))
     65  X(4k + 1) = sum(n = 0 to N/4 - 1, [x[n] - x(n + N/2) - j(x[n] - x(n + 3*N/4))] * W(n)*W**(4*n*k))
     66  X(4k + 3) = sum(n = 0 to N/4 - 1, [x[n] - x(n + N/2) + j(x[n] - x(n + 3*N/4))] * W(n)*W**(4*n*k))
     67 
     68 where W(n) = exp(-j2*PI*n/N) is called a twiddle factor
     69 
     70 The detailed description of the algorithm (with bugs) can be found in
     71 
     72   H.V. Sorensen, M.T. Heideman, and C. S. Burrus, "On Computing the Split-Radix
     73   FFT,"IEEE Trans. Acoust., Speech, Signal Processing, pp. 152-200, Feb. 1986.
     74 
     75 The implementation of the split-radix is similar to that of the radix-2
     76 algorithm, except a smart index scheme is needed at each stage to determine
     77 which nodes need to be decomposed further and which ones do not, since
     78 a radix-2 decomposition is done at every stage while a radix-4 decomposition
     79 is done at every other stage. This implementation uses an index scheme designed
     80 by H.V. Sorensen, M.T. Heideman, and C. S. Burrus.
     81 
     82 You can use this implementation for both floating and fixed point FFT and
     83 inverse FFT computation by changing a compliler constant FIXED_POINT
     84 in file fft.h. Some addtional information can be found in following sections..
     85 
     86  Usage
     87  Efficiency
     88  Examples:
     89 
     90 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
     91 Usage:
     92 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
     93 
     94 For floating point, you have to undefine the constant FIXED_POINT and
     95 use typedef to define the trigonomy function and input data types.
     96 
     97 For fixed point, the constant FIXED_POINT must be defined and a 32 bit
     98 representation of both input data and trigonomy function data are required.
     99 Furthermore, you have three choices to conduct fixed-point FFT.
    100 If you use the algorithm in both Microsoft C and I86 hardware environment, you
    101 should use an assemblyline implementation. To do this, you go to the file
    102 himul32.cpp and define constants MICROSOFTC_HIMUL32 and I86_HIMUL3.
    103 If you use the algorithm only in the Microsoft C environment,
    104 a Microsoft specific implementation should be used. You do this by
    105 going to the file himul32.cpp to undefine I86_HIMUL3, and
    106 define MICROSOFTC_HIMUL32.
    107 In any other situation, a stric C implementation should be used by
    108 undefining both MICROSOFTC_HIMUL32 and I86_HIMUL3.
    109 
    110 To use the algorithm, you need to constrcut an fft_info object
    111 
    112  fft_info* my_fft_info = new_fft_info(log2Size);
    113 
    114 If you have a real data input data[] of size 2**(log2Size + 1), you
    115 use
    116 
    117  do_real_fft(my_fft_info, size, data);
    118 
    119 The positive half frequency Fourier transform will be returned, in addition
    120 to the first and last elements of the transform that are stored in the
    121 first and second elements of the data array. If the data[] array is the
    122 Fourier transform of a real data input, you can also conduct the inverse
    123 Fourier transform to get the real data input by
    124 
    125  do_real_ifft(my_fft_info, size, data);
    126 
    127 Finally, you should remember to remove my_fft_info object using
    128 
    129  delete_fft_info(my_fft_info)
    130 
    131 when you are done with FFT.
    132 
    133 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    134 Efficiency:
    135 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    136 
    137 TIME: Let M = log2Size be the power-of-two length and N = 2**M be the complex
    138   FFT input size. The following formulas give the comutation requirements
    139   of the split-radix FFT algorithm.
    140 
    141   Multiplications = (4/3)*M*N - (35/9)N + 4
    142   Adds   = (8/3)*M*N - (16/9)N + 2
    143 
    144   On a 266 MHz 64 MB RAM Pentium, using a release build,
    145   500 runs of paired 256 point complex (512  point real) input FFT
    146   (forward + inverse) have the following time counts:
    147 
    148 Floating Point:
    149   Real:  0.160 sec.
    150   Complex: 0.120 sec.
    151 
    152 
    153 Fixed Point:
    154   Assembly:  Real 0.170 sec, Complex 0.140 sec.
    155   Microsoft C: Real 0.250 sec, Complex 0.240 sec.
    156   Stric C:  Real 0.540 sec, Complex 0.441 sec.
    157 
    158 
    159 SPACE: The computation is done in place so that there is no dynamic memory allocation
    160   in do_fft (do_real_fft) and do_ifft (do_real_ifft) call, except some
    161   temporary local variables.
    162 
    163   The memory space is needed in fft_info struct to store the cosine (sine) tales
    164   butterfly index table, and bit reverse table. Specifically,
    165 
    166   cosine(sine) tables: 3*N (half can be saved if we only save cosine or sine tables)
    167   butterfly index table: < N
    168   bitrever index table: N
    169 
    170 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    171 Example:
    172 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>
    173 
    174 The examples of running this program is given in a compiled out main()
    175 in fft.cpp. You can run this program by compiling the fft.h, fft.cpp, and
    176 himul32.cpp with main() compiled.
    177 
    178 */ /* end of the comment block*/
    179 
    180 #ifdef __cplusplus
    181 extern "C"
    182 {
    183 #endif
    184 
    185 
    186 #define DATA_BITS 16   /*number of significant bits in fftdata*/
    187 #define HAMMING_DATA_BITS 15
    188 
    189   typedef fftdata     trigonomydata;
    190 
    191   typedef struct
    192   {
    193     /* fft log length */
    194     unsigned  m_logLength;
    195 
    196     /* fft data length */
    197     unsigned  m_length;
    198 
    199     unsigned  *m_bitreverseTbl;
    200 
    201     /* L shaped butterfly index table */
    202     unsigned  *m_butterflyIndexTbl;
    203 
    204     /* sine and cosine tables */
    205     trigonomydata *m_sin1Tbl;
    206     trigonomydata *m_cos1Tbl;
    207     trigonomydata *m_cos2Tbl;
    208     trigonomydata *m_sin2Tbl;
    209     trigonomydata *m_sin3Tbl;
    210     trigonomydata *m_cos3Tbl;
    211 
    212 
    213   }
    214   srfft;
    215 
    216   typedef struct
    217   {
    218     srfft   *m_srfft;
    219 
    220     fftdata *real;
    221     fftdata *imag;
    222 
    223     asr_uint32_t  size;
    224     asr_uint32_t  size2;
    225 
    226   }
    227   fft_info;
    228 
    229 
    230   /****************************************************************
    231    new_srfft: create srfft object
    232 
    233    Parameters:
    234     log2Length -- the power-of-two length of the complex FFT
    235 
    236    Returns:
    237     the created srfft object
    238   ****************************************************************/
    239   srfft* new_srfft(unsigned log2Length);
    240 
    241 
    242   /****************************************************************
    243    delete_srfft: release all the memory allocated to srfft object
    244 
    245    Parameters:
    246     pthis -- a pointer to a srfft object created by new_srfft
    247 
    248    Returns:
    249     none
    250   ****************************************************************/
    251   void delete_srfft(srfft* pthis);
    252 
    253 
    254   /******************************************************************
    255    do_real_fft conducts a forward FFT of a real data array using
    256    the split-radix algorithm
    257 
    258    Parameters:
    259     pthis  -- a pointer to the srfft object
    260     length -- length of data array, must be a power-of-2 length
    261        and must be equal to 2**(log2Length + 1)
    262     data   -- A real data array, overwritten by the positive frequency
    263        half of its Fourier transform.
    264        Data[0] and data[1] store the first and last
    265        component of the the Fourier transform. After that, the
    266        even elements store the real component, while the odd
    267        elements store the imaginary component of the transform.
    268    Return:
    269     none
    270   *********************************************************************/
    271   void do_real_fft(srfft* pthis, unsigned length, fftdata* data);
    272 
    273 
    274   /******************************************************************
    275    do_real_ifft conducts an inverse FFT of a real data array's FFT using
    276    the split-radix algorithm
    277 
    278    Parameters:
    279     pthis  -- a pointer to the srfft object
    280     length -- length of data array, must be a power-of-2 length
    281        and must be equal to 2**(log2Length + 1)
    282     data   -- FFT of an real data array, overwritten by the real data array.
    283        For input, data[0] and data[1] store the first and last
    284        component of the the Fourier transform. After that,
    285        the even elements store the real component of the transform,
    286        while the odd elements store the imaginary component of
    287        the transform.
    288    Return:
    289     none
    290   *********************************************************************/
    291   void do_real_ifft(srfft* pthis, unsigned length, fftdata* data);
    292 
    293 
    294   /* >>>>>>>>>>>>>> Private Methods <<<<<<<<<<<<<<<<<<<<<<<<< */
    295 
    296   /******************************************************************
    297    allocate_butterfly_index uses an index scheme developed by
    298    Sorensen, Heideman, and Burrus to determine which L shaped
    299    butterfiles needs to be computed at each stage and
    300    store these indexes into a table
    301 
    302    Parameters:
    303     pthis -- a pointer to the srfft object
    304 
    305    Returns:
    306     none
    307   *********************************************************************/
    308   void allocate_butterfly_tbl(srfft* pthis);
    309 
    310   /******************************************************************
    311    allocate_trigonomy_tbl allocates trigonomy function tables
    312    for FFT computation
    313 
    314    Parameters:
    315     pthis -- a pointer to the srfft object
    316 
    317    Returns:
    318     none
    319   *********************************************************************/
    320   void allocate_trigonomy_tbl(srfft* pthis);
    321 
    322   /******************************************************************
    323    allocate_bitreverse_tbl() allocates bit reverse index table
    324 
    325    Parameters:
    326     pthis -- a pointer to the srfft object
    327 
    328    Returns:
    329     none
    330   *********************************************************************/
    331   void allocate_bitreverse_tbl(srfft* pthis);
    332 
    333 #ifdef __cplusplus
    334 }
    335 #endif
    336 
    337 #endif
    338