1 /*---------------------------------------------------------------------------* 2 * sp_fft.h * 3 * * 4 * Copyright 2007, 2008 Nuance Communciations, Inc. * 5 * * 6 * Licensed under the Apache License, Version 2.0 (the 'License'); * 7 * you may not use this file except in compliance with the License. * 8 * * 9 * You may obtain a copy of the License at * 10 * http://www.apache.org/licenses/LICENSE-2.0 * 11 * * 12 * Unless required by applicable law or agreed to in writing, software * 13 * distributed under the License is distributed on an 'AS IS' BASIS, * 14 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. * 15 * See the License for the specific language governing permissions and * 16 * limitations under the License. * 17 * * 18 *---------------------------------------------------------------------------*/ 19 20 #ifndef _SPLIT_RADIX_FFT_H_ 21 #define _SPLIT_RADIX_FFT_H_ 22 23 /* 24 //////////////////////////////////////////////////////////////////////////// 25 // 26 // FILE: fft.h 27 // 28 // CREATED: 11-September-99 29 // 30 // DESCRIPTION: Split-Radix FFT 31 // 32 // 33 // 34 // 35 // MODIFICATIONS: 36 // Revision history log 37 VSS revision history. Do not edit by hand. 38 39 $NoKeywords: $ 40 41 */ 42 43 /* 44 >>>>> Floating and Fixed Point plit-Radix FFT -- The Fastest FFT <<<<<<<< 45 46 The split-radix FFT improves the efficiency of the FFT implementation 47 by mixing a radix-2 and a radix-4 FFT algorithms. Specifically, 48 the radix-2 decomposes one Fourier transform into two half-sized Fourier 49 transform at each stage. It is the FFT presented in almost every textbook 50 and implemented. Unfortuantely, it is also the least efficient among the 51 power-of-two FFT, The radix-4 decomposes one Fourier transform into 52 four quarter-sized Fourier transform. It is much more 53 efficient than the radix-2 FFT, but it requires a power-of-four as the data length. 54 By combining the radix-2 and the radix-4 algorithms, one not only 55 removes the power-of-four limitation of the radix-4 alogorithm, but also 56 obtains the most efficient power-of-two FFT algorithm. 57 58 The split-radix FFT decomposes a Fourier transform 59 60 X(k) = sum(n = 0 to N-1, x[n] * W**(n*k)) 61 62 successively into one length N/2 radix-2 and two length N/4 radix-4 FFT 63 64 X(2k) = sum(n = 0 to N/2 - 1, [x[n] + x(n + N/2)] * W**(2*n*k)) 65 X(4k + 1) = sum(n = 0 to N/4 - 1, [x[n] - x(n + N/2) - j(x[n] - x(n + 3*N/4))] * W(n)*W**(4*n*k)) 66 X(4k + 3) = sum(n = 0 to N/4 - 1, [x[n] - x(n + N/2) + j(x[n] - x(n + 3*N/4))] * W(n)*W**(4*n*k)) 67 68 where W(n) = exp(-j2*PI*n/N) is called a twiddle factor 69 70 The detailed description of the algorithm (with bugs) can be found in 71 72 H.V. Sorensen, M.T. Heideman, and C. S. Burrus, "On Computing the Split-Radix 73 FFT,"IEEE Trans. Acoust., Speech, Signal Processing, pp. 152-200, Feb. 1986. 74 75 The implementation of the split-radix is similar to that of the radix-2 76 algorithm, except a smart index scheme is needed at each stage to determine 77 which nodes need to be decomposed further and which ones do not, since 78 a radix-2 decomposition is done at every stage while a radix-4 decomposition 79 is done at every other stage. This implementation uses an index scheme designed 80 by H.V. Sorensen, M.T. Heideman, and C. S. Burrus. 81 82 You can use this implementation for both floating and fixed point FFT and 83 inverse FFT computation by changing a compliler constant FIXED_POINT 84 in file fft.h. Some addtional information can be found in following sections.. 85 86 Usage 87 Efficiency 88 Examples: 89 90 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 91 Usage: 92 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 93 94 For floating point, you have to undefine the constant FIXED_POINT and 95 use typedef to define the trigonomy function and input data types. 96 97 For fixed point, the constant FIXED_POINT must be defined and a 32 bit 98 representation of both input data and trigonomy function data are required. 99 Furthermore, you have three choices to conduct fixed-point FFT. 100 If you use the algorithm in both Microsoft C and I86 hardware environment, you 101 should use an assemblyline implementation. To do this, you go to the file 102 himul32.cpp and define constants MICROSOFTC_HIMUL32 and I86_HIMUL3. 103 If you use the algorithm only in the Microsoft C environment, 104 a Microsoft specific implementation should be used. You do this by 105 going to the file himul32.cpp to undefine I86_HIMUL3, and 106 define MICROSOFTC_HIMUL32. 107 In any other situation, a stric C implementation should be used by 108 undefining both MICROSOFTC_HIMUL32 and I86_HIMUL3. 109 110 To use the algorithm, you need to constrcut an fft_info object 111 112 fft_info* my_fft_info = new_fft_info(log2Size); 113 114 If you have a real data input data[] of size 2**(log2Size + 1), you 115 use 116 117 do_real_fft(my_fft_info, size, data); 118 119 The positive half frequency Fourier transform will be returned, in addition 120 to the first and last elements of the transform that are stored in the 121 first and second elements of the data array. If the data[] array is the 122 Fourier transform of a real data input, you can also conduct the inverse 123 Fourier transform to get the real data input by 124 125 do_real_ifft(my_fft_info, size, data); 126 127 Finally, you should remember to remove my_fft_info object using 128 129 delete_fft_info(my_fft_info) 130 131 when you are done with FFT. 132 133 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 134 Efficiency: 135 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 136 137 TIME: Let M = log2Size be the power-of-two length and N = 2**M be the complex 138 FFT input size. The following formulas give the comutation requirements 139 of the split-radix FFT algorithm. 140 141 Multiplications = (4/3)*M*N - (35/9)N + 4 142 Adds = (8/3)*M*N - (16/9)N + 2 143 144 On a 266 MHz 64 MB RAM Pentium, using a release build, 145 500 runs of paired 256 point complex (512 point real) input FFT 146 (forward + inverse) have the following time counts: 147 148 Floating Point: 149 Real: 0.160 sec. 150 Complex: 0.120 sec. 151 152 153 Fixed Point: 154 Assembly: Real 0.170 sec, Complex 0.140 sec. 155 Microsoft C: Real 0.250 sec, Complex 0.240 sec. 156 Stric C: Real 0.540 sec, Complex 0.441 sec. 157 158 159 SPACE: The computation is done in place so that there is no dynamic memory allocation 160 in do_fft (do_real_fft) and do_ifft (do_real_ifft) call, except some 161 temporary local variables. 162 163 The memory space is needed in fft_info struct to store the cosine (sine) tales 164 butterfly index table, and bit reverse table. Specifically, 165 166 cosine(sine) tables: 3*N (half can be saved if we only save cosine or sine tables) 167 butterfly index table: < N 168 bitrever index table: N 169 170 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 171 Example: 172 >>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>>> 173 174 The examples of running this program is given in a compiled out main() 175 in fft.cpp. You can run this program by compiling the fft.h, fft.cpp, and 176 himul32.cpp with main() compiled. 177 178 */ /* end of the comment block*/ 179 180 #ifdef __cplusplus 181 extern "C" 182 { 183 #endif 184 185 186 #define DATA_BITS 16 /*number of significant bits in fftdata*/ 187 #define HAMMING_DATA_BITS 15 188 189 typedef fftdata trigonomydata; 190 191 typedef struct 192 { 193 /* fft log length */ 194 unsigned m_logLength; 195 196 /* fft data length */ 197 unsigned m_length; 198 199 unsigned *m_bitreverseTbl; 200 201 /* L shaped butterfly index table */ 202 unsigned *m_butterflyIndexTbl; 203 204 /* sine and cosine tables */ 205 trigonomydata *m_sin1Tbl; 206 trigonomydata *m_cos1Tbl; 207 trigonomydata *m_cos2Tbl; 208 trigonomydata *m_sin2Tbl; 209 trigonomydata *m_sin3Tbl; 210 trigonomydata *m_cos3Tbl; 211 212 213 } 214 srfft; 215 216 typedef struct 217 { 218 srfft *m_srfft; 219 220 fftdata *real; 221 fftdata *imag; 222 223 asr_uint32_t size; 224 asr_uint32_t size2; 225 226 } 227 fft_info; 228 229 230 /**************************************************************** 231 new_srfft: create srfft object 232 233 Parameters: 234 log2Length -- the power-of-two length of the complex FFT 235 236 Returns: 237 the created srfft object 238 ****************************************************************/ 239 srfft* new_srfft(unsigned log2Length); 240 241 242 /**************************************************************** 243 delete_srfft: release all the memory allocated to srfft object 244 245 Parameters: 246 pthis -- a pointer to a srfft object created by new_srfft 247 248 Returns: 249 none 250 ****************************************************************/ 251 void delete_srfft(srfft* pthis); 252 253 254 /****************************************************************** 255 do_real_fft conducts a forward FFT of a real data array using 256 the split-radix algorithm 257 258 Parameters: 259 pthis -- a pointer to the srfft object 260 length -- length of data array, must be a power-of-2 length 261 and must be equal to 2**(log2Length + 1) 262 data -- A real data array, overwritten by the positive frequency 263 half of its Fourier transform. 264 Data[0] and data[1] store the first and last 265 component of the the Fourier transform. After that, the 266 even elements store the real component, while the odd 267 elements store the imaginary component of the transform. 268 Return: 269 none 270 *********************************************************************/ 271 void do_real_fft(srfft* pthis, unsigned length, fftdata* data); 272 273 274 /****************************************************************** 275 do_real_ifft conducts an inverse FFT of a real data array's FFT using 276 the split-radix algorithm 277 278 Parameters: 279 pthis -- a pointer to the srfft object 280 length -- length of data array, must be a power-of-2 length 281 and must be equal to 2**(log2Length + 1) 282 data -- FFT of an real data array, overwritten by the real data array. 283 For input, data[0] and data[1] store the first and last 284 component of the the Fourier transform. After that, 285 the even elements store the real component of the transform, 286 while the odd elements store the imaginary component of 287 the transform. 288 Return: 289 none 290 *********************************************************************/ 291 void do_real_ifft(srfft* pthis, unsigned length, fftdata* data); 292 293 294 /* >>>>>>>>>>>>>> Private Methods <<<<<<<<<<<<<<<<<<<<<<<<< */ 295 296 /****************************************************************** 297 allocate_butterfly_index uses an index scheme developed by 298 Sorensen, Heideman, and Burrus to determine which L shaped 299 butterfiles needs to be computed at each stage and 300 store these indexes into a table 301 302 Parameters: 303 pthis -- a pointer to the srfft object 304 305 Returns: 306 none 307 *********************************************************************/ 308 void allocate_butterfly_tbl(srfft* pthis); 309 310 /****************************************************************** 311 allocate_trigonomy_tbl allocates trigonomy function tables 312 for FFT computation 313 314 Parameters: 315 pthis -- a pointer to the srfft object 316 317 Returns: 318 none 319 *********************************************************************/ 320 void allocate_trigonomy_tbl(srfft* pthis); 321 322 /****************************************************************** 323 allocate_bitreverse_tbl() allocates bit reverse index table 324 325 Parameters: 326 pthis -- a pointer to the srfft object 327 328 Returns: 329 none 330 *********************************************************************/ 331 void allocate_bitreverse_tbl(srfft* pthis); 332 333 #ifdef __cplusplus 334 } 335 #endif 336 337 #endif 338