1 @// 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 @// 4 @// Use of this source code is governed by a BSD-style license 5 @// that can be found in the LICENSE file in the root of the source 6 @// tree. An additional intellectual property rights grant can be found 7 @// in the file PATENTS. All contributing project authors may 8 @// be found in the AUTHORS file in the root of the source tree. 9 @// 10 @// This file was originally licensed as follows. It has been 11 @// relicensed with permission from the copyright holders. 12 @// 13 14 @// 15 @// File Name: omxSP_FFTInv_CCSToR_S32_Sfs_s.s 16 @// OpenMAX DL: v1.0.2 17 @// Last Modified Revision: 7469 18 @// Last Modified Date: Thu, 20 Sep 2007 19 @// 20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21 @// 22 @// 23 @// 24 @// Description: 25 @// Compute an inverse FFT for a complex signal 26 @// 27 28 29 30 @// Include standard headers 31 32 #include "dl/api/arm/armCOMM_s.h" 33 #include "dl/api/arm/omxtypes_s.h" 34 35 36 @// Import symbols required from other files 37 @// (For example tables) 38 39 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe 40 .extern armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe 41 .extern armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe 42 .extern armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe 43 .extern armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 44 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe 45 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe 46 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe 47 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 48 .extern armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe 49 .extern armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe 50 .extern armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe 51 52 53 @// Set debugging level 54 @//DEBUG_ON SETL {TRUE} 55 56 57 58 @// Guarding implementation by the processor name 59 60 61 62 @// Guarding implementation by the processor name 63 64 @// Import symbols required from other files 65 @// (For example tables) 66 .extern armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 67 .extern armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe 68 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe 69 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe 70 71 72 @//Input Registers 73 74 #define pSrc r0 75 #define pDst r1 76 #define pFFTSpec r2 77 #define scale r3 78 79 80 @// Output registers 81 #define result r0 82 83 @//Local Scratch Registers 84 85 #define argTwiddle r1 86 #define argDst r2 87 #define argScale r4 88 #define tmpOrder r4 89 #define pTwiddle r4 90 #define pOut r5 91 #define subFFTSize r7 92 #define subFFTNum r6 93 #define N r6 94 #define order r14 95 #define diff r9 96 @// Total num of radix stages required to comple the FFT 97 #define count r8 98 #define x0r r4 99 #define x0i r5 100 #define diffMinusOne r2 101 #define round r3 102 103 #define pOut1 r2 104 #define size r7 105 #define step r8 106 #define step1 r9 107 #define twStep r10 108 #define pTwiddleTmp r11 109 #define argTwiddle1 r12 110 #define zero r14 111 112 @// Neon registers 113 114 #define dX0 D0.S32 115 #define dShift D1.S32 116 #define dX1 D1.S32 117 #define dY0 D2.S32 118 #define dY1 D3.S32 119 #define dX0r D0.S32 120 #define dX0i D1.S32 121 #define dX1r D2.S32 122 #define dX1i D3.S32 123 #define dW0r D4.S32 124 #define dW0i D5.S32 125 #define dW1r D6.S32 126 #define dW1i D7.S32 127 #define dT0 D8.S32 128 #define dT1 D9.S32 129 #define dT2 D10.S32 130 #define dT3 D11.S32 131 #define qT0 Q6.S64 132 #define qT1 Q7.S64 133 #define qT2 Q8.S64 134 #define qT3 Q9.S64 135 #define dY0r D4.S32 136 #define dY0i D5.S32 137 #define dY1r D6.S32 138 #define dY1i D7.S32 139 #define dzero D20.S32 140 141 #define dY2 D4.S32 142 #define dY3 D5.S32 143 #define dW0 D6.S32 144 #define dW1 D7.S32 145 #define dW0Tmp D10.S32 146 #define dW1Neg D11.S32 147 148 149 150 @// Allocate stack memory required by the function 151 M_ALLOC4 diffOnStack, 4 152 153 @// Write function header 154 M_START omxSP_FFTInv_CCSToR_S32_Sfs,r11,d15 155 156 @ Structure offsets for the FFTSpec 157 .set ARMsFFTSpec_N, 0 158 .set ARMsFFTSpec_pBitRev, 4 159 .set ARMsFFTSpec_pTwiddle, 8 160 .set ARMsFFTSpec_pBuf, 12 161 162 @// Define stack arguments 163 164 @// Read the size from structure and take log 165 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 166 167 @// Read other structure parameters 168 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 169 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 170 171 @// N=1 Treat seperately 172 CMP N,#1 173 BGT sizeGreaterThanOne 174 VLD1 dX0[0],[pSrc] 175 RSB scale,scale,#0 @// to use VRSHL for right shift by a variable 176 VMOV dShift[0],scale 177 VRSHL dX0,dShift 178 VST1 dX0[0],[pDst] 179 180 B End 181 182 sizeGreaterThanOne: 183 184 @// Call the preTwiddle Radix2 stage before doing the compledIFFT 185 186 @// The following conditional BL combination would work since 187 @// evenOddButterflyLoop in the first call would set Z flag to zero 188 189 CMP scale,#0 190 BLEQ armSP_FFTInv_CCSToR_S32_preTwiddleRadix2_unsafe 191 BLGT armSP_FFTInv_CCSToR_S32_Sfs_preTwiddleRadix2_unsafe 192 193 194 195 complexIFFT: 196 197 ASR N,N,#1 @// N/2 point complex IFFT 198 ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1 199 200 CLZ order,N @// N = 2^order 201 RSB order,order,#31 202 MOV subFFTSize,#1 203 @//MOV subFFTNum,N 204 205 ADD scale,scale,order @// FFTInverse has a final scaling factor by N 206 207 CMP order,#3 208 BGT orderGreaterthan3 @// order > 3 209 210 CMP order,#1 211 BGE orderGreaterthan0 @// order > 0 212 M_STR scale, diffOnStack,LT @// order = 0 213 VLD1 dX0,[pSrc] 214 VST1 dX0,[pDst] 215 MOV pSrc,pDst 216 BLT FFTEnd 217 218 orderGreaterthan0: 219 @// set the buffers appropriately for various orders 220 CMP order,#2 221 MOVNE argDst,pDst 222 MOVEQ argDst,pOut 223 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 224 MOV argTwiddle,pTwiddle 225 @// Store the scale factor and scale at the end 226 SUB diff,scale,order 227 M_STR diff, diffOnStack 228 BGE orderGreaterthan1 229 BLLT armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1 230 B FFTEnd 231 232 orderGreaterthan1: 233 MOV tmpOrder,order @// tmpOrder = RN 4 234 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe 235 CMP tmpOrder,#2 236 BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 237 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe 238 B FFTEnd 239 240 241 orderGreaterthan3: 242 @// check scale = 0 or scale = order 243 SUBS diff, scale, order @// scale > order 244 MOVGT scale,order 245 BGE specialScaleCase @// scale = 0 or scale = order 246 CMP scale,#0 247 BEQ specialScaleCase 248 B generalScaleCase 249 250 specialScaleCase: @// scale = 0 or scale = order and order >= 2 251 252 TST order, #2 @// Set input args to fft stages 253 MOVNE argDst,pDst 254 MOVEQ argDst,pOut 255 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 256 MOV argTwiddle,pTwiddle 257 258 CMP diff,#0 259 M_STR diff, diffOnStack 260 BGE scaleEqualsOrder 261 262 @//check for even or odd order 263 @// NOTE: The following combination of BL's would work fine eventhough the first 264 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 265 @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 266 267 TST order,#0x00000001 268 BLEQ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe 269 BLNE armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe 270 271 CMP subFFTNum,#4 272 BLT FFTEnd 273 274 275 unscaledRadix4Loop: 276 BEQ lastStageUnscaledRadix4 277 BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 278 CMP subFFTNum,#4 279 B unscaledRadix4Loop 280 281 lastStageUnscaledRadix4: 282 BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 283 B FFTEnd 284 285 286 scaleEqualsOrder: 287 @//check for even or odd order 288 @// NOTE: The following combination of BL's would work fine eventhough the first 289 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 290 @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 291 292 TST order,#0x00000001 293 BLEQ armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe 294 BLNE armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe 295 296 CMP subFFTNum,#4 297 BLT FFTEnd 298 299 300 scaledRadix4Loop: 301 BEQ lastStageScaledRadix4 302 BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe 303 CMP subFFTNum,#4 304 B scaledRadix4Loop 305 306 lastStageScaledRadix4: 307 BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe 308 B FFTEnd 309 310 generalScaleCase: @// 0 < scale < order and order >= 2 311 @// Determine the correct destination buffer 312 SUB diff,order,scale 313 TST diff,#0x01 314 ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2 315 MOVNE count,order 316 TST count,#0x01 @// Is count even or odd ? 317 318 MOVNE argDst,pDst @// Set input args to fft stages 319 MOVEQ argDst,pOut 320 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 321 MOV argTwiddle,pTwiddle 322 323 M_STR diff, diffOnStack 324 325 MOV argScale,scale @// Put scale in RN4 so as to save and restore 326 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage 327 SUBS argScale,argScale,#1 328 329 scaledRadix2Loop: 330 BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 331 SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages 332 BGT scaledRadix2Loop 333 334 335 M_LDR diff, diffOnStack 336 @//check for even or odd order 337 TST diff,#0x00000001 338 BEQ generalUnscaledRadix4Loop 339 B unscaledRadix2Loop 340 341 generalUnscaledRadix4Loop: 342 CMP subFFTNum,#4 343 BEQ generalLastStageUnscaledRadix4 344 BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 345 B generalUnscaledRadix4Loop 346 347 generalLastStageUnscaledRadix4: 348 BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 349 B End 350 351 352 unscaledRadix2Loop: 353 CMP subFFTNum,#2 354 BEQ generalLastStageUnscaledRadix2 355 BL armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe 356 B unscaledRadix2Loop 357 358 generalLastStageUnscaledRadix2: 359 BL armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe 360 B End 361 362 363 FFTEnd: @// Does only the scaling 364 365 M_LDR diff, diffOnStack 366 CMP diff,#0 367 BLE End 368 369 RSB diff,diff,#0 @// to use VRSHL for right shift by a variable 370 VDUP dShift,diff 371 372 scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff 373 VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer 374 SUBS subFFTSize,subFFTSize,#1 375 VRSHL dX0,dShift 376 VST1 {dX0},[pSrc]! 377 378 BGT scaleFFTData 379 380 381 End: 382 @// Set return value 383 MOV result, #OMX_Sts_NoErr 384 385 @// Write function tail 386 M_END 387 388 389 390 .end 391