1 @// 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 @// 4 @// Use of this source code is governed by a BSD-style license 5 @// that can be found in the LICENSE file in the root of the source 6 @// tree. An additional intellectual property rights grant can be found 7 @// in the file PATENTS. All contributing project authors may 8 @// be found in the AUTHORS file in the root of the source tree. 9 @// 10 @// This is a modification of omxSP_FFTInv_CCSToR_S32_Sfs_s.s 11 @// to support float instead of SC32. 12 @// 13 14 @// 15 @// Description: 16 @// Compute an inverse FFT for a complex signal 17 @// 18 @// 19 20 21 @// Include standard headers 22 23 #include "dl/api/arm/armCOMM_s.h" 24 #include "dl/api/arm/omxtypes_s.h" 25 26 @// M_VARIANTS ARM1136JS 27 28 @// Import symbols required from other files 29 @// (For example tables) 30 31 .extern armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 32 .extern armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 33 .extern armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 34 .extern armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 35 .extern armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_vfp 36 37 @// Set debugging level 38 @//DEBUG_ON SETL {TRUE} 39 40 41 42 @// Guarding implementation by the processor name 43 44 @// IF ARM1136JS 45 46 @//Input Registers 47 48 #define pSrc r0 49 #define pDst r1 50 #define pFFTSpec r2 51 52 53 @// Output registers 54 #define result r0 55 56 @//Local Scratch Registers 57 58 59 #define argTwiddle r1 60 #define argDst r2 61 #define argScale r4 62 #define pTwiddle r4 63 #define pOut r5 64 #define subFFTSize r7 65 #define subFFTNum r6 66 #define N r6 67 #define order r14 68 #define diff r9 69 @// Total num of radix stages required to comple the FFT*/ 70 #define count r8 71 72 #define round r3 73 74 #define x0r s0 75 #define x0i s1 76 #define y0r s2 77 #define y0i s3 78 #define x1r s4 79 #define x1i s5 80 #define w1r s2 81 #define w1i s3 82 #define w0r s6 83 #define w0i s7 84 #define y1r s2 /*@// w1r,w1i*/ 85 #define y1i s3 86 #define st0 s8 87 #define st1 s9 88 #define st2 s10 89 #define st3 s11 90 #define st4 s12 91 #define st5 s13 92 #define fscale s2 93 #define fone s3 94 95 96 97 @// Allocate stack memory required by the function 98 M_ALLOC4 pDstOnStack, 4 99 M_ALLOC4 pFFTSpecOnStack, 4 100 101 @// Write function header 102 M_START omxSP_FFTInv_CCSToR_F32_Sfs_vfp,r11 103 104 @ Structure offsets for FFTSpec 105 .set ARMsFFTSpec_N, 0 106 .set ARMsFFTSpec_pBitRev, 4 107 .set ARMsFFTSpec_pTwiddle, 8 108 .set ARMsFFTSpec_pBuf, 12 109 110 @// Define stack arguments 111 112 @// Read the size from structure and take log 113 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 114 115 116 117 @// N=1 Treat seperately 118 CMP N,#1 119 BGT sizeGreaterThanOne 120 vldr.f32 x0r, [pSrc] 121 vstr.f32 x0r, [pDst] 122 123 B End 124 125 sizeGreaterThanOne: 126 M_STR pDst,pDstOnStack @// store all the pointers 127 M_STR pFFTSpec,pFFTSpecOnStack 128 129 130 @// Call the preTwiddle Radix2 stage before doing the compledIFFT 131 132 BL armSP_FFTInv_CCSToR_F32_preTwiddleRadix2_unsafe_vfp 133 134 135 complexIFFT: 136 137 M_LDR pFFTSpec,pFFTSpecOnStack 138 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 139 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 140 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 141 142 ASR N,N,#1 @// N/2 point complex IFFT 143 ADD pSrc,pOut,N,LSL #3 @// set pSrc as pOut1 144 M_LDR pDst,pDstOnStack 145 146 CLZ order,N @// N = 2^order 147 RSB order,order,#31 148 MOV subFFTSize,#1 149 150 CMP order,#1 151 BGT orderGreaterthan1 @// order > 1 152 vldmlt.f32 pSrc, {x0r, x0i} 153 vstmlt.f32 pDst, {x0r, x0i} 154 155 MOVLT pSrc,pDst 156 BLT FFTEnd 157 158 MOV argDst,pDst @// Set input args to fft stages 159 MOV argTwiddle,pTwiddle 160 161 BL armSP_FFTInv_CToC_FC32_Radix2_fs_OutOfPlace_unsafe_vfp 162 B FFTEnd 163 164 165 orderGreaterthan1: 166 167 TST order, #2 @// Set input args to fft stages 168 MOVNE argDst,pDst 169 MOVEQ argDst,pOut 170 MOVEQ pOut,pDst @// Pass the first stage destination in RN5 171 MOV argTwiddle,pTwiddle 172 173 174 @//check for even or odd order 175 176 @// NOTE: The following combination of BL's would work fine 177 @// eventhough the first BL would corrupt the flags. This is 178 @// because the end of the "grpZeroSetLoop" loop inside 179 @// armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp sets 180 @// the Z flag to EQ 181 182 TST order,#0x00000001 183 BLEQ armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace_unsafe_vfp 184 BLNE armSP_FFTInv_CToC_FC32_Radix8_fs_OutOfPlace_unsafe_vfp 185 186 unscaledRadix4Loop: 187 CMP subFFTNum,#1 188 BEQ FFTEnd 189 BL armSP_FFTInv_CToC_FC32_Radix4_OutOfPlace_unsafe_vfp 190 B unscaledRadix4Loop 191 192 FFTEnd: 193 194 vldm.f32 pSrc, {x0r, x0i} 195 196 vmov.f32 fscale, subFFTSize 197 vcvt.f32.s32 fscale, fscale @// fscale = N as a float 198 mov round, #1 199 vmov.f32 fone, round 200 vcvt.f32.s32 fone, fone 201 vdiv.f32 fscale, fone, fscale @// fscale = 1/N 202 203 scaleFFTData: @// N = subFFTSize 204 SUBS subFFTSize,subFFTSize,#1 205 vmul.f32 x0r, x0r, fscale 206 vmul.f32 x0i, x0i, fscale 207 vstm.f32 pSrc!, {x0r, x0i} 208 vldmgt.f32 pSrc, {x0r, x0i} 209 210 BGT scaleFFTData 211 212 213 End: 214 @// Set return value 215 MOV result, #OMX_Sts_NoErr 216 217 @// Write function tail 218 M_END 219 220 @// ENDIF @//ARM1136JS 221 222 223 @// Guarding implementation by the processor name 224 225 226 227 .end 228