1 /* ------------------------------------------------------------------ 2 * Copyright (C) 1998-2009 PacketVideo 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either 13 * express or implied. 14 * See the License for the specific language governing permissions 15 * and limitations under the License. 16 * ------------------------------------------------------------------- 17 */ 18 /* 19 20 Pathname: ./src/fft_rx4_long.c 21 Funtions: fft_rx4_long 22 23 ------------------------------------------------------------------------------ 24 REVISION HISTORY 25 26 Description: 27 (1) Eliminated search for max in the main loop. 28 (2) Reduced precision on w_256rx4 from Q15 to Q10 29 30 Description: 31 (1) Created function fft_rx4_long_no_max to overcome LTP problem. 32 33 Description: 34 (1) Modified shift so the accumulation growths faster than the 35 downshift, so now the input can be as high as 1.0 and saturation 36 will not occurre. The accumulation times the Q10 format will 37 never exceed 31 bits. This increases precision 38 (2) Eliminated unneeded data moves, used before for max search. 39 (3) Eliminated function fft_rx4_long_no_max. 40 41 Description: 42 (1) Added comment to explain max search elimination and 43 Q format during multiplications 44 45 Who: Date: 46 Description: 47 48 ------------------------------------------------------------------------------ 49 INPUT AND OUTPUT DEFINITIONS 50 51 Inputs: 52 Data = Input complex vector, arranged in the following order: 53 real, imag, real, imag... 54 This is a complex vector whose elements (real and Imag) are 55 Int32. 56 type Int32 * 57 58 peak_value = Input, peak value of the input vector 59 Output, peak value of the resulting vector 60 type Int32 * 61 62 Local Stores/Buffers/Pointers Needed: 63 None 64 65 Global Stores/Buffers/Pointers Needed: 66 None 67 68 Outputs: 69 None 70 71 Pointers and Buffers Modified: 72 calculation are done in-place and returned in Data 73 74 Local Stores Modified: 75 None 76 77 Global Stores Modified: 78 None 79 80 ------------------------------------------------------------------------------ 81 FUNCTION DESCRIPTION 82 83 Fast Fourier Transform, radix 4 with Decimation in Frequency and block 84 floating point arithmetic. 85 The radix-4 FFT simply divides the FFT into four smaller FFTs. Each of 86 the smaller FFTs is then further divided into smaller ones and so on. 87 It consists of log 4 N stages and each stage consists of N/4 dragonflies. 88 89 An FFT is nothing but a bundle of multiplications and summations which 90 may overflow during calculations. 91 92 93 This routine uses a scheme to test and scale the result output from 94 each FFT stage in order to fix the accumulation overflow. 95 96 The Input Data should be in Q13 format to get the highest precision. 97 At the end of each dragonfly calculation, a test for possible bit growth 98 is made, if bit growth is possible the Data is scale down back to Q13. 99 100 ------------------------------------------------------------------------------ 101 REQUIREMENTS 102 103 This function should provide a fixed point FFT for an input array 104 of size 256. 105 106 ------------------------------------------------------------------------------ 107 REFERENCES 108 109 [1] Advance Digital Signal Processing, J. Proakis, C. Rader, F. Ling, 110 C. Nikias, Macmillan Pub. Co. 111 112 ------------------------------------------------------------------------------ 113 PSEUDO-CODE 114 115 116 MODIFY( x[] ) 117 RETURN( exponent ) 118 119 ------------------------------------------------------------------------------ 120 RESOURCES USED 121 When the code is written for a specific target processor the 122 the resources used should be documented below. 123 124 STACK USAGE: [stack count for this module] + [variable to represent 125 stack usage for each subroutine called] 126 127 where: [stack usage variable] = stack usage for [subroutine 128 name] (see [filename].ext) 129 130 DATA MEMORY USED: x words 131 132 PROGRAM MEMORY USED: x words 133 134 CLOCK CYCLES: [cycle count equation for this module] + [variable 135 used to represent cycle count for each subroutine 136 called] 137 138 where: [cycle count variable] = cycle count for [subroutine 139 name] (see [filename].ext) 140 141 ------------------------------------------------------------------------------ 142 */ 143 /*---------------------------------------------------------------------------- 144 ; INCLUDES 145 ----------------------------------------------------------------------------*/ 146 147 #include "pv_audio_type_defs.h" 148 #include "fft_rx4.h" 149 150 #include "fxp_mul32.h" 151 152 /*---------------------------------------------------------------------------- 153 ; MACROS 154 ; Define module specific macros here 155 ----------------------------------------------------------------------------*/ 156 157 /*---------------------------------------------------------------------------- 158 ; DEFINES 159 ; Include all pre-processor statements here. Include conditional 160 ; compile variables also. 161 ----------------------------------------------------------------------------*/ 162 163 /*---------------------------------------------------------------------------- 164 ; LOCAL FUNCTION DEFINITIONS 165 ; Function Prototype declaration 166 ----------------------------------------------------------------------------*/ 167 168 /*---------------------------------------------------------------------------- 169 ; LOCAL VARIABLE DEFINITIONS 170 ; Variable declaration - defined here and used outside this module 171 ----------------------------------------------------------------------------*/ 172 173 /*---------------------------------------------------------------------------- 174 ; EXTERNAL FUNCTION REFERENCES 175 ; Declare functions defined elsewhere and referenced in this module 176 ----------------------------------------------------------------------------*/ 177 178 /*---------------------------------------------------------------------------- 179 ; EXTERNAL VARIABLES REFERENCES 180 ; Declare variables used in this module but defined elsewhere 181 ----------------------------------------------------------------------------*/ 182 183 /*---------------------------------------------------------------------------- 184 ; EXTERNAL GLOBAL STORE/BUFFER/POINTER REFERENCES 185 ; Declare variables used in this module but defined elsewhere 186 ----------------------------------------------------------------------------*/ 187 188 /*---------------------------------------------------------------------------- 189 ; FUNCTION CODE 190 ----------------------------------------------------------------------------*/ 191 192 193 void fft_rx4_long( 194 Int32 Data[], 195 Int32 *peak_value) 196 197 { 198 Int n1; 199 Int n2; 200 Int j; 201 Int k; 202 Int i; 203 204 Int32 t1; 205 Int32 t2; 206 Int32 r1; 207 Int32 r2; 208 Int32 r3; 209 Int32 r4; 210 Int32 s1; 211 Int32 s2; 212 Int32 s3; 213 Int32 *pData1; 214 Int32 *pData2; 215 Int32 *pData3; 216 Int32 *pData4; 217 Int32 temp1; 218 Int32 temp2; 219 Int32 temp3; 220 Int32 temp4; 221 Int32 max; 222 223 Int32 exp_jw1; 224 Int32 exp_jw2; 225 Int32 exp_jw3; 226 227 228 229 const Int32 *pw = W_256rx4; 230 231 n2 = FFT_RX4_LONG; 232 233 for (k = FFT_RX4_LONG; k > 4; k >>= 2) 234 { 235 236 n1 = n2; 237 n2 >>= 2; 238 239 for (i = 0; i < FFT_RX4_LONG; i += n1) 240 { 241 pData1 = &Data[ i<<1]; 242 pData2 = pData1 + n1; 243 244 temp1 = *pData1; 245 temp2 = *pData2; 246 247 r1 = temp1 + temp2; 248 r2 = temp1 - temp2; 249 250 pData3 = pData1 + (n1 >> 1); 251 pData4 = pData3 + n1; 252 temp3 = *pData3++; 253 temp4 = *pData4++; 254 255 t1 = temp3 + temp4; 256 257 *(pData1++) = (r1 + t1); 258 t2 = temp3 - temp4; 259 *(pData2++) = (r1 - t1); 260 261 temp1 = *pData1; 262 temp2 = *pData2; 263 264 s1 = temp1 + temp2; 265 temp3 = *pData3; 266 s2 = temp1 - temp2; 267 temp4 = *pData4; 268 *pData3-- = (s2 - t2); 269 *pData4-- = (s2 + t2); 270 271 t1 = temp3 + temp4; 272 273 *pData1 = (s1 + t1); 274 *pData2 = (s1 - t1); 275 276 r1 = temp3 - temp4; 277 278 *pData4 = (r2 - r1); 279 *pData3 = (r2 + r1); 280 281 } /* i */ 282 283 284 285 for (j = 1; j < n2; j++) 286 { 287 288 exp_jw1 = (*pw++); 289 exp_jw2 = (*pw++); 290 exp_jw3 = (*pw++); 291 292 293 for (i = j; i < FFT_RX4_LONG; i += n1) 294 { 295 pData1 = &Data[ i<<1]; 296 pData2 = pData1 + n1; 297 298 temp1 = *pData1; 299 temp2 = *pData2++; 300 301 r1 = temp1 + temp2; 302 r2 = temp1 - temp2; 303 304 pData3 = pData1 + (n1 >> 1); 305 pData4 = pData3 + n1; 306 temp3 = *pData3++; 307 temp4 = *pData4++; 308 309 r3 = temp3 + temp4; 310 r4 = temp3 - temp4; 311 312 *(pData1++) = (r1 + r3); 313 r1 = (r1 - r3) << 1; 314 315 temp2 = *pData2; 316 temp1 = *pData1; 317 318 s1 = temp1 + temp2; 319 s2 = temp1 - temp2; 320 s3 = (s2 + r4) << 1; 321 s2 = (s2 - r4) << 1; 322 323 temp3 = *pData3; 324 temp4 = *pData4; 325 326 t1 = temp3 + temp4; 327 t2 = temp3 - temp4; 328 329 *pData1 = (s1 + t1); 330 s1 = (s1 - t1) << 1; 331 332 *pData2-- = cmplx_mul32_by_16(s1, -r1, exp_jw2); 333 r3 = (r2 - t2) << 1; 334 *pData2 = cmplx_mul32_by_16(r1, s1, exp_jw2); 335 336 r2 = (r2 + t2) << 1; 337 338 *pData3-- = cmplx_mul32_by_16(s2, -r2, exp_jw1); 339 *pData3 = cmplx_mul32_by_16(r2, s2, exp_jw1); 340 341 *pData4-- = cmplx_mul32_by_16(s3, -r3, exp_jw3); 342 *pData4 = cmplx_mul32_by_16(r3, s3, exp_jw3); 343 344 } /* i */ 345 346 } /* j */ 347 348 } /* k */ 349 350 351 max = 0; 352 353 pData1 = Data - 7; 354 355 356 for (i = ONE_FOURTH_FFT_RX4_LONG; i != 0 ; i--) 357 { 358 pData1 += 7; 359 pData2 = pData1 + 4; 360 361 362 temp1 = *pData1; 363 temp2 = *pData2++; 364 365 r1 = temp1 + temp2; 366 r2 = temp1 - temp2; 367 368 pData3 = pData1 + 2; 369 pData4 = pData1 + 6; 370 temp1 = *pData3++; 371 temp2 = *pData4++; 372 373 t1 = temp1 + temp2; 374 t2 = temp1 - temp2; 375 376 temp1 = (r1 + t1); 377 r1 = (r1 - t1); 378 *(pData1++) = temp1; 379 max |= (temp1 >> 31) ^ temp1; 380 381 382 383 temp2 = *pData2; 384 temp1 = *pData1; 385 386 s1 = temp1 + temp2; 387 s2 = temp1 - temp2; 388 389 390 temp1 = *pData3; 391 temp2 = *pData4; 392 393 s3 = (s2 + t2); 394 s2 = (s2 - t2); 395 396 t1 = temp1 + temp2; 397 t2 = temp1 - temp2; 398 399 temp1 = (s1 + t1); 400 *pData1 = temp1; 401 temp2 = (s1 - t1); 402 403 max |= (temp1 >> 31) ^ temp1; 404 *pData2-- = temp2; 405 max |= (temp2 >> 31) ^ temp2; 406 407 *pData2 = r1; 408 max |= (r1 >> 31) ^ r1; 409 *pData3-- = s2; 410 max |= (s2 >> 31) ^ s2; 411 *pData4-- = s3; 412 max |= (s3 >> 31) ^ s3; 413 414 temp1 = (r2 - t2); 415 *pData4 = temp1; 416 temp2 = (r2 + t2); 417 *pData3 = temp2; 418 max |= (temp1 >> 31) ^ temp1; 419 max |= (temp2 >> 31) ^ temp2; 420 421 } /* i */ 422 423 *peak_value = max; 424 425 return ; 426 427 } 428 429