1 @// 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 @// 4 @// Use of this source code is governed by a BSD-style license 5 @// that can be found in the LICENSE file in the root of the source 6 @// tree. An additional intellectual property rights grant can be found 7 @// in the file PATENTS. All contributing project authors may 8 @// be found in the AUTHORS file in the root of the source tree. 9 @// 10 @// This is a modification of omxSP_FFTInv_CToC_SC32_Sfs_s.s 11 @// to support float instead of SC32. 12 @// 13 14 @// 15 @// Description: 16 @// Compute an inverse FFT for a complex signal 17 @// 18 @// 19 20 21 @// Include standard headers 22 23 #include "dl/api/arm/armCOMM_s.h" 24 #include "dl/api/arm/omxtypes_s.h" 25 26 @// M_VARIANTS ARM1136JS 27 28 @// Import symbols required from other files 29 @// (For example tables) 30 31 .extern armSP_FFTInv_CToC_FC32_Sfs_Radix2_fs_OutOfPlace_unsafe_vfp 32 .extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 33 .extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 34 .extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 35 .extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 36 37 @// Set debugging level 38 @//DEBUG_ON SETL {TRUE} 39 40 41 42 @// Guarding implementation by the processor name 43 44 @// IF ARM1136JS 45 46 @//Input Registers 47 48 #define pSrc r0 49 #define pDst r1 50 #define pFFTSpec r2 51 52 53 @// Output registers 54 #define result r0 55 56 @//Local Scratch Registers 57 58 #define argTwiddle r1 59 #define argDst r2 60 #define argScale r4 61 #define pTwiddle r4 62 #define pOut r5 63 #define subFFTSize r7 64 #define subFFTNum r6 65 #define N r6 66 #define order r14 67 #define diff r9 68 #define count r8 69 #define diffMinusOne r2 70 #define round r3 71 72 #define x0r s0 73 #define x0i s1 74 #define fone s2 75 #define fscale s3 76 77 78 @// Allocate stack memory required by the function 79 80 @// Write function header 81 M_START omxSP_FFTInv_CToC_FC32_Sfs_vfp,r11 82 83 @ Structure offsets for FFTSpec 84 .set ARMsFFTSpec_N, 0 85 .set ARMsFFTSpec_pBitRev, 4 86 .set ARMsFFTSpec_pTwiddle, 8 87 .set ARMsFFTSpec_pBuf, 12 88 89 @// Define stack arguments 90 91 @// Read the size from structure and take log 92 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 93 94 @// Read other structure parameters 95 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 96 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 97 98 CLZ order,N @// N = 2^order 99 RSB order,order,#31 100 MOV subFFTSize,#1 101 @//MOV subFFTNum,N 102 103 CMP order,#1 104 BGT orderGreaterthan1 @// order > 1 105 @// Order = 0 or 1 106 vldmlt.f32 pSrc, {x0r, x0i} 107 vstmlt.f32 pDst, {x0r, x0i} 108 109 MOVLT pSrc,pDst 110 BLT FFTEnd 111 112 @// Handle order = 1 113 MOV argDst,pDst 114 MOV argTwiddle,pTwiddle 115 116 BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 117 B FFTEnd 118 119 orderGreaterthan1: 120 121 TST order, #2 @// Set input args to fft stages 122 MOVNE argDst,pDst 123 MOVEQ argDst,pOut 124 MOVEQ pOut,pDst @// Pass the first stage dest in RN5 125 MOV argTwiddle,pTwiddle 126 127 128 @//check for even or odd order 129 @// NOTE: The following combination of BL's would work fine 130 @// eventhough the first BL would corrupt the flags. This is 131 @// because the end of the "grpZeroSetLoop" loop inside 132 @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets 133 @// the Z flag to EQ 134 135 TST order,#0x00000001 136 BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 137 BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 138 139 unscaledRadix4Loop: 140 CMP subFFTNum,#1 141 BEQ FFTEnd 142 BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 143 B unscaledRadix4Loop 144 145 146 FFTEnd: 147 148 vldm.f32 pSrc, {x0r, x0i} 149 150 vmov.f32 fscale, subFFTSize 151 vcvt.f32.s32 fscale, fscale @// fscale = N as a float 152 movw round, #0 153 movt round, #0x3f80 @// round = 1.0 154 vmov.f32 fone, round 155 vdiv.f32 fscale, fone, fscale @// fscale = 1/N 156 scaleFFTData: @// N = subFFTSize 157 SUBS subFFTSize,subFFTSize,#1 158 vmul.f32 x0r, x0r, fscale 159 vmul.f32 x0i, x0i, fscale 160 vstm.f32 pSrc, {x0r, x0i} 161 add pSrc, #8 162 vldmgt.f32 pSrc, {x0r, x0i} 163 164 bgt scaleFFTData 165 166 167 @// Set return value 168 MOV result, #OMX_Sts_NoErr 169 170 @// Write function tail 171 M_END 172 173 @// ENDIF @//ARM1136JS 174 175 176 @// Guarding implementation by the processor name 177 178 179 180 .end 181