1 @// 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 @// 4 @// Use of this source code is governed by a BSD-style license 5 @// that can be found in the LICENSE file in the root of the source 6 @// tree. An additional intellectual property rights grant can be found 7 @// in the file PATENTS. All contributing project authors may 8 @// be found in the AUTHORS file in the root of the source tree. 9 @// 10 @// This file was originally licensed as follows. It has been 11 @// relicensed with permission from the copyright holders. 12 13 @// 14 @// 15 @// File Name: omxSP_FFTInv_CToC_SC16_Sfs_s.s 16 @// OpenMAX DL: v1.0.2 17 @// Last Modified Revision: 6729 18 @// Last Modified Date: Tue, 17 Jul 2007 19 @// 20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21 @// 22 @// 23 @// 24 @// Description: 25 @// Compute an inverse FFT for a complex signal 26 @// 27 @// 28 29 30 @// Include standard headers 31 32 #include "dl/api/arm/armCOMM_s.h" 33 #include "dl/api/arm/omxtypes_s.h" 34 35 @// Import symbols required from other files 36 @// (For example tables) 37 38 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe 39 .extern armSP_FFTInv_CToC_SC16_Radix2_fs_OutOfPlace_unsafe 40 .extern armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe 41 .extern armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe 42 .extern armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe 43 .extern armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe 44 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe 45 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe 46 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe 47 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe 48 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe 49 .extern armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe 50 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe 51 .extern armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe 52 53 @// Set debugging level 54 @//DEBUG_ON SETL {TRUE} 55 56 57 58 @// Guarding implementation by the processor name 59 60 61 62 @// Guarding implementation by the processor name 63 64 65 .extern armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe 66 .extern armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe 67 68 @//Input Registers 69 70 #define pSrc r0 71 #define pDst r1 72 #define pFFTSpec r2 73 #define scale r3 74 75 76 @// Output registers 77 #define result r0 78 79 @//Local Scratch Registers 80 81 #define argTwiddle r1 82 #define argDst r2 83 #define argScale r4 84 #define pTwiddle r4 85 #define tmpOrder r4 86 #define pOut r5 87 #define subFFTSize r7 88 #define subFFTNum r6 89 #define N r6 90 #define order r14 91 #define diff r9 92 @// Total num of radix stages required to comple the FFT 93 #define count r8 94 #define x0r r4 95 #define x0i r5 96 #define diffMinusOne r2 97 #define round r3 98 99 @// Neon registers 100 101 #define dX0 D0.S16 102 #define dShift D1.S16 103 #define dX0S32 D0.S32 104 105 106 @// Allocate stack memory required by the function 107 M_ALLOC4 diffOnStack, 4 108 109 @// Write function header 110 M_START omxSP_FFTInv_CToC_SC16_Sfs,r11,d15 111 112 @ Structure offsets for the FFTSpec 113 .set ARMsFFTSpec_N, 0 114 .set ARMsFFTSpec_pBitRev, 4 115 .set ARMsFFTSpec_pTwiddle, 8 116 .set ARMsFFTSpec_pBuf, 12 117 118 @// Define stack arguments 119 120 @// Read the size from structure and take log 121 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 122 123 @// Read other structure parameters 124 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 125 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 126 127 CLZ order,N @// N = 2^order 128 RSB order,order,#31 129 MOV subFFTSize,#1 130 @//MOV subFFTNum,N 131 132 ADD scale,scale,order @// FFTInverse has a final scaling factor by N 133 134 CMP order,#3 135 BGT orderGreaterthan3 @// order > 3 136 137 CMP order,#1 138 BGE orderGreaterthan0 @// order > 0 139 M_STR scale, diffOnStack,LT @// order = 0 140 LDRLT x0r,[pSrc] 141 STRLT x0r,[pDst] 142 MOVLT pSrc,pDst 143 BLT FFTEnd 144 145 orderGreaterthan0: 146 @// set the buffers appropriately for various orders 147 CMP order,#2 148 MOVNE argDst,pDst 149 MOVEQ argDst,pOut 150 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 151 MOV argTwiddle,pTwiddle 152 @// Store the scale factor and scale at the end 153 SUB diff,scale,order 154 M_STR diff, diffOnStack 155 BGE orderGreaterthan1 156 BLLT armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// order = 1 157 B FFTEnd 158 159 160 orderGreaterthan1: 161 MOV tmpOrder,order @// tmpOrder = RN 4 162 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe 163 CMP tmpOrder,#2 164 BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe 165 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ls_OutOfPlace_unsafe 166 B FFTEnd 167 168 169 170 171 orderGreaterthan3: 172 @// check scale = 0 or scale = order 173 SUBS diff, scale, order @// scale > order 174 MOVGT scale,order 175 BGE specialScaleCase @// scale = 0 or scale = order 176 CMP scale,#0 177 BEQ specialScaleCase 178 B generalScaleCase 179 180 specialScaleCase: @// scale = 0 or scale = order and order > 3 181 182 TST order, #2 @// Set input args to fft stages 183 MOVNE argDst,pDst 184 MOVEQ argDst,pOut 185 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 186 MOV argTwiddle,pTwiddle 187 188 CMP diff,#0 189 M_STR diff, diffOnStack 190 BGE scaleEqualsOrder 191 192 @//check for even or odd order 193 @// NOTE: The following combination of BL's would work fine eventhough the first 194 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 195 @// armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 196 197 TST order,#0x00000001 198 BLEQ armSP_FFTInv_CToC_SC16_Radix4_fs_OutOfPlace_unsafe 199 BLNE armSP_FFTInv_CToC_SC16_Radix8_fs_OutOfPlace_unsafe 200 201 CMP subFFTNum,#4 202 BLT FFTEnd 203 204 unscaledRadix4Loop: 205 BEQ lastStageUnscaledRadix4 206 BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe 207 CMP subFFTNum,#4 208 B unscaledRadix4Loop 209 210 lastStageUnscaledRadix4: 211 BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe 212 B FFTEnd 213 214 scaleEqualsOrder: 215 @//check for even or odd order 216 @// NOTE: The following combination of BL's would work fine eventhough the first 217 @// BL would corrupt the flags. This is because the end of the "grpZeroSetLoop" loop inside 218 @// armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag to EQ 219 220 TST order,#0x00000001 221 BLEQ armSP_FFTInv_CToC_SC16_Sfs_Radix4_fs_OutOfPlace_unsafe 222 BLNE armSP_FFTInv_CToC_SC16_Sfs_Radix8_fs_OutOfPlace_unsafe 223 224 CMP subFFTNum,#4 225 BLT FFTEnd 226 227 scaledRadix4Loop: 228 BEQ lastStageScaledRadix4 229 BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_OutOfPlace_unsafe 230 CMP subFFTNum,#4 231 B scaledRadix4Loop 232 233 lastStageScaledRadix4: 234 BL armSP_FFTInv_CToC_SC16_Sfs_Radix4_ls_OutOfPlace_unsafe 235 B FFTEnd 236 237 238 239 generalScaleCase: @// 0 < scale < order and order > 3 240 @// Determine the correct destination buffer 241 SUB diff,order,scale 242 TST diff,#0x01 243 ADDEQ count,scale,diff,LSR #1 @// count = scale + (order - scale)/2 244 MOVNE count,order 245 TST count,#0x01 @// Is count even or odd ? 246 247 MOVNE argDst,pDst @// Set input args to fft stages 248 MOVEQ argDst,pOut 249 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 250 MOV argTwiddle,pTwiddle 251 252 CMP diff,#1 253 M_STR diff, diffOnStack 254 BEQ scaleps @// scaling including a radix2_ps stage 255 256 MOV argScale,scale @// Put scale in RN4 so as to save and restore 257 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage 258 SUBS argScale,argScale,#1 259 260 scaledRadix2Loop: 261 BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe 262 SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages 263 BGT scaledRadix2Loop 264 B outScale 265 266 scaleps: 267 SUB argScale,scale,#1 @// order>3 and diff=1 => scale >= 3 268 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_fs_OutOfPlace_unsafe @// scaled first stage 269 SUBS argScale,argScale,#1 270 271 scaledRadix2psLoop: 272 BEQ scaledRadix2psStage 273 BLGT armSP_FFTInv_CToC_SC16_Sfs_Radix2_OutOfPlace_unsafe 274 SUBS argScale,argScale,#1 @// save and restore scale (RN4) in the scaled stages 275 BGE scaledRadix2psLoop 276 277 scaledRadix2psStage: 278 BL armSP_FFTInv_CToC_SC16_Sfs_Radix2_ps_OutOfPlace_unsafe 279 B generalLastStageUnscaledRadix2 280 281 282 outScale: 283 M_LDR diff, diffOnStack 284 @//check for even or odd order 285 TST diff,#0x00000001 286 BEQ generalUnscaledRadix4Loop 287 B unscaledRadix2Loop 288 289 generalUnscaledRadix4Loop: 290 CMP subFFTNum,#4 291 BEQ generalLastStageUnscaledRadix4 292 BL armSP_FFTInv_CToC_SC16_Radix4_OutOfPlace_unsafe 293 B generalUnscaledRadix4Loop 294 295 generalLastStageUnscaledRadix4: 296 BL armSP_FFTInv_CToC_SC16_Radix4_ls_OutOfPlace_unsafe 297 B End 298 299 unscaledRadix2Loop: 300 CMP subFFTNum,#4 301 BEQ generalLastTwoStagesUnscaledRadix2 302 BL armSP_FFTInv_CToC_SC16_Radix2_OutOfPlace_unsafe 303 B unscaledRadix2Loop 304 305 generalLastTwoStagesUnscaledRadix2: 306 BL armSP_FFTInv_CToC_SC16_Radix2_ps_OutOfPlace_unsafe 307 generalLastStageUnscaledRadix2: 308 BL armSP_FFTInv_CToC_SC16_Radix2_ls_OutOfPlace_unsafe 309 B End 310 311 312 FFTEnd: @// Does only the scaling 313 314 M_LDR diff, diffOnStack 315 CMP diff,#0 316 BLE End 317 318 RSB diff,diff,#0 @// to use VRSHL for right shift by a variable 319 VDUP dShift,diff 320 321 scaleFFTData: @// N = subFFTSize ; dataptr = pDst ; scale = diff 322 VLD1 {dX0S32[0]},[pSrc] @// pSrc contains pDst pointer 323 SUBS subFFTSize,subFFTSize,#1 324 VRSHL dX0,dShift 325 VST1 {dX0S32[0]},[pSrc]! 326 327 BGT scaleFFTData 328 329 330 End: 331 @// Set return value 332 MOV result, #OMX_Sts_NoErr 333 334 @// Write function tail 335 M_END 336 337 338 339 340 341 342 .END 343