1 @// 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 @// 4 @// Use of this source code is governed by a BSD-style license 5 @// that can be found in the LICENSE file in the root of the source 6 @// tree. An additional intellectual property rights grant can be found 7 @// in the file PATENTS. All contributing project authors may 8 @// be found in the AUTHORS file in the root of the source tree. 9 @// 10 @// This file was originally licensed as follows. It has been 11 @// relicensed with permission from the copyright holders. 12 @// 13 14 @// 15 @// File Name: omxSP_FFTInv_CToC_SC32_Sfs_s.s 16 @// OpenMAX DL: v1.0.2 17 @// Last Modified Revision: 6675 18 @// Last Modified Date: Fri, 06 Jul 2007 19 @// 20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21 @// 22 @// 23 @// 24 @// Description: 25 @// Compute an inverse FFT for a complex signal 26 @// 27 28 29 @// Include standard headers 30 31 #include "dl/api/arm/armCOMM_s.h" 32 #include "dl/api/arm/omxtypes_s.h" 33 34 @// Import symbols required from other files 35 @// (For example tables) 36 37 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe 38 .extern armSP_FFTInv_CToC_SC32_Radix2_fs_OutOfPlace_unsafe 39 .extern armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe 40 .extern armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe 41 .extern armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 42 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe 43 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe 44 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe 45 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 46 .extern armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe 47 48 @// Set debugging level 49 @//DEBUG_ON SETL {TRUE} 50 51 52 53 @// Guarding implementation by the processor name 54 55 56 57 @// Guarding implementation by the processor name 58 59 @// Import symbols required from other files 60 @// (For example tables) 61 .extern armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 62 .extern armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe 63 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe 64 .extern armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe 65 66 67 @//Input Registers 68 69 #define pSrc r0 70 #define pDst r1 71 #define pFFTSpec r2 72 #define scale r3 73 74 75 @// Output registers 76 #define result r0 77 78 @//Local Scratch Registers 79 80 #define argTwiddle r1 81 #define argDst r2 82 #define argScale r4 83 #define tmpOrder r4 84 #define pTwiddle r4 85 #define pOut r5 86 #define subFFTSize r7 87 #define subFFTNum r6 88 #define N r6 89 #define order r14 90 #define diff r9 91 @// Total num of radix stages required to comple the FFT 92 #define count r8 93 #define x0r r4 94 #define x0i r5 95 #define diffMinusOne r2 96 #define round r3 97 98 @// Neon registers 99 100 #define dX0 D0.S32 101 #define dShift D1.S32 102 103 104 105 @// Allocate stack memory required by the function 106 M_ALLOC4 diffOnStack, 4 107 108 @// Write function header 109 M_START omxSP_FFTInv_CToC_SC32_Sfs,r11,d15 110 111 @ Structure offsets for the FFTSpec 112 .set ARMsFFTSpec_N, 0 113 .set ARMsFFTSpec_pBitRev, 4 114 .set ARMsFFTSpec_pTwiddle, 8 115 .set ARMsFFTSpec_pBuf, 12 116 117 @// Define stack arguments 118 119 @// Read the size from structure and take log 120 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 121 122 @// Read other structure parameters 123 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 124 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 125 126 CLZ order,N @// N = 2^order 127 RSB order,order,#31 128 MOV subFFTSize,#1 129 @//MOV subFFTNum,N 130 131 ADD scale,scale,order @// FFTInverse has a final scaling factor by N 132 133 CMP order,#3 134 BGT orderGreaterthan3 @// order > 3 135 136 CMP order,#1 137 BGE orderGreaterthan0 @// order > 0 138 M_STR scale, diffOnStack,LT @// order = 0 139 VLD1 dX0,[pSrc] 140 VST1 dX0,[pDst] 141 MOV pSrc,pDst 142 BLT FFTEnd 143 144 orderGreaterthan0: 145 @// set the buffers appropriately for various orders 146 CMP order,#2 147 MOVNE argDst,pDst 148 MOVEQ argDst,pOut 149 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 150 MOV argTwiddle,pTwiddle 151 @// Store the scale factor and scale at the end 152 SUB diff,scale,order 153 M_STR diff, diffOnStack 154 BGE orderGreaterthan1 155 BLLT armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1 156 B FFTEnd 157 158 orderGreaterthan1: 159 MOV tmpOrder,order @// tmpOrder = RN 4 160 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe 161 CMP tmpOrder,#2 162 BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 163 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_ls_OutOfPlace_unsafe 164 B FFTEnd 165 166 167 orderGreaterthan3: 168 @// check scale = 0 or scale = order 169 SUBS diff, scale, order @// scale > order 170 MOVGT scale,order 171 BGE specialScaleCase @// scale = 0 or scale = order 172 CMP scale,#0 173 BEQ specialScaleCase 174 B generalScaleCase 175 176 specialScaleCase: @// scale = 0 or scale = order and order >= 2 177 178 TST order, #2 @// Set input args to fft stages 179 MOVNE argDst,pDst 180 MOVEQ argDst,pOut 181 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 182 MOV argTwiddle,pTwiddle 183 184 CMP diff,#0 185 M_STR diff, diffOnStack 186 BGE scaleEqualsOrder 187 188 @//check for even or odd order 189 @// NOTE: The following combination of BL's would work fine eventhough the first 190 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 191 @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 192 193 TST order,#0x00000001 194 BLEQ armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe 195 BLNE armSP_FFTInv_CToC_SC32_Radix8_fs_OutOfPlace_unsafe 196 197 CMP subFFTNum,#4 198 BLT FFTEnd 199 200 201 unscaledRadix4Loop: 202 BEQ lastStageUnscaledRadix4 203 BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 204 CMP subFFTNum,#4 205 B unscaledRadix4Loop 206 207 lastStageUnscaledRadix4: 208 BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 209 B FFTEnd 210 211 212 scaleEqualsOrder: 213 @//check for even or odd order 214 @// NOTE: The following combination of BL's would work fine eventhough the first 215 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 216 @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 217 218 TST order,#0x00000001 219 BLEQ armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe 220 BLNE armSP_FFTInv_CToC_SC32_Sfs_Radix8_fs_OutOfPlace_unsafe 221 222 CMP subFFTNum,#4 223 BLT FFTEnd 224 225 226 scaledRadix4Loop: 227 BEQ lastStageScaledRadix4 228 BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_OutOfPlace_unsafe 229 CMP subFFTNum,#4 230 B scaledRadix4Loop 231 232 lastStageScaledRadix4: 233 BL armSP_FFTInv_CToC_SC32_Sfs_Radix4_ls_OutOfPlace_unsafe 234 B FFTEnd 235 236 generalScaleCase: @// 0 < scale < order and order >= 2 237 @// Determine the correct destination buffer 238 SUB diff,order,scale 239 TST diff,#0x01 240 ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2 241 MOVNE count,order 242 TST count,#0x01 @// Is count even or odd ? 243 244 MOVNE argDst,pDst @// Set input args to fft stages 245 MOVEQ argDst,pOut 246 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 247 MOV argTwiddle,pTwiddle 248 249 M_STR diff, diffOnStack 250 251 MOV argScale,scale @// Put scale in RN4 so as to save and restore 252 BL armSP_FFTInv_CToC_SC32_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage 253 SUBS argScale,argScale,#1 254 255 scaledRadix2Loop: 256 BLGT armSP_FFTInv_CToC_SC32_Sfs_Radix2_OutOfPlace_unsafe 257 SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages 258 BGT scaledRadix2Loop 259 260 261 M_LDR diff, diffOnStack 262 @//check for even or odd order 263 TST diff,#0x00000001 264 BEQ generalUnscaledRadix4Loop 265 B unscaledRadix2Loop 266 267 generalUnscaledRadix4Loop: 268 CMP subFFTNum,#4 269 BEQ generalLastStageUnscaledRadix4 270 BL armSP_FFTInv_CToC_SC32_Radix4_OutOfPlace_unsafe 271 B generalUnscaledRadix4Loop 272 273 generalLastStageUnscaledRadix4: 274 BL armSP_FFTInv_CToC_SC32_Radix4_ls_OutOfPlace_unsafe 275 B End 276 277 278 unscaledRadix2Loop: 279 CMP subFFTNum,#2 280 BEQ generalLastStageUnscaledRadix2 281 BL armSP_FFTInv_CToC_SC32_Radix2_OutOfPlace_unsafe 282 B unscaledRadix2Loop 283 284 generalLastStageUnscaledRadix2: 285 BL armSP_FFTInv_CToC_SC32_Radix2_ls_OutOfPlace_unsafe 286 B End 287 288 289 FFTEnd: @// Does only the scaling 290 291 M_LDR diff, diffOnStack 292 CMP diff,#0 293 BLE End 294 295 RSB diff,diff,#0 @// to use VRSHL for right shift by a variable 296 VDUP dShift,diff 297 298 scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff 299 VLD1 {dX0},[pSrc] @// pSrc contains pDst pointer 300 SUBS subFFTSize,subFFTSize,#1 301 VRSHL dX0,dShift 302 VST1 {dX0},[pSrc]! 303 304 BGT scaleFFTData 305 306 307 End: 308 @// Set return value 309 MOV result, #OMX_Sts_NoErr 310 311 @// Write function tail 312 M_END 313 314 .end 315