1 // 2 // Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 // 4 // Use of this source code is governed by a BSD-style license 5 // that can be found in the LICENSE file in the root of the source 6 // tree. An additional intellectual property rights grant can be found 7 // in the file PATENTS. All contributing project authors may 8 // be found in the AUTHORS file in the root of the source tree. 9 // 10 // 11 // This is a modification of armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s 12 // to support float instead of SC32. 13 // 14 15 // 16 // Description: 17 // Compute a first stage Radix 4 FFT stage for a N point complex signal 18 // 19 // 20 21 22 // Include standard headers 23 24 #include "dl/api/arm/arm64COMM_s.h" 25 #include "dl/api/arm/omxtypes_s.h" 26 27 // Import symbols required from other files 28 // (For example tables) 29 30 31 32 33 // Set debugging level 34 //DEBUG_ON SETL {TRUE} 35 36 37 38 // Guarding implementation by the processor name 39 40 41 42 // Guarding implementation by the processor name 43 44 //Input Registers 45 46 #define pSrc x0 47 #define pDst x1 48 #define pTwiddle x2 49 #define pSubFFTNum x3 50 #define pSubFFTSize x4 51 52 53 //Output Registers 54 55 56 //Local Scratch Registers 57 58 #define subFFTNum x5 59 #define subFFTSize x6 60 #define grpSize x7 61 // Reuse grpSize as setCount 62 #define setCount x7 63 #define pointStep x8 64 #define outPointStep x8 65 #define setStep x9 66 #define step1 x10 67 #define step3 x11 68 69 // Neon Registers 70 71 #define dXr0 v0.2s 72 #define dXi0 v1.2s 73 #define dXr1 v2.2s 74 #define dXi1 v3.2s 75 #define dXr2 v4.2s 76 #define dXi2 v5.2s 77 #define dXr3 v6.2s 78 #define dXi3 v7.2s 79 #define dYr0 v8.2s 80 #define dYi0 v9.2s 81 #define dYr1 v10.2s 82 #define dYi1 v11.2s 83 #define dYr2 v12.2s 84 #define dYi2 v13.2s 85 #define dYr3 v14.2s 86 #define dYi3 v15.2s 87 #define dZr0 v16.2s 88 #define dZi0 v17.2s 89 #define dZr1 v18.2s 90 #define dZi1 v19.2s 91 #define dZr2 v20.2s 92 #define dZi2 v21.2s 93 #define dZr3 v22.2s 94 #define dZi3 v23.2s 95 96 97 .macro FFTSTAGE scaled, inverse, name 98 99 // Define stack arguments 100 101 // Move args values into our work registers 102 ldr subFFTNum, [pSubFFTNum] 103 ldr subFFTSize, [pSubFFTSize] 104 105 // pT0+1 increments pT0 by 8 bytes 106 // pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes 107 // Note: outPointStep = pointStep for firststage 108 109 lsl pointStep, subFFTNum, #1 110 111 // Update pSubFFTSize and pSubFFTNum regs 112 ld2 {dXr0,dXi0}, [pSrc], pointStep // data[0] 113 114 // subFFTSize = 1 for the first stage 115 MOV subFFTSize,#4 116 117 // Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount) 118 LSR grpSize,subFFTNum,#2 119 ld2 {dXr1,dXi1}, [pSrc], pointStep // data[1] 120 MOV subFFTNum,grpSize 121 122 123 // Calculate the step of input data for the next set 124 //MOV setStep,pointStep,LSL #1 125 lsl setStep, grpSize, #4 126 ld2 {dXr2,dXi2}, [pSrc], pointStep // data[2] 127 128 // setStep = 3*pointStep 129 ADD setStep,setStep,pointStep 130 // setStep = - 3*pointStep+16 131 132 rsb setStep,setStep,#16 133 // data[3] & update pSrc for the next set 134 ld2 {dXr3,dXi3}, [pSrc], setStep 135 136 // step1 = 2*pointStep 137 lsl step1, pointStep, #1 138 139 // fadd qY0, qX0, qX2 140 fadd dYr0, dXr0, dXr2 141 fadd dYi0, dXi0, dXi2 142 // step3 = -pointStep 143 neg step3, pointStep 144 145 // grp = 0 a special case since all the twiddle factors are 1 146 // Loop on the sets : 2 sets at a time 147 148 radix4fsGrpZeroSetLoop\name : 149 150 151 152 // Decrement setcount 153 SUBS setCount,setCount,#2 154 155 156 // finish first stage of 4 point FFT 157 158 159 // fsub qy2,qx0,qx2 160 fsub dYr2, dXr0, dXr2 161 fsub dYi2, dXi0, dXi2 162 163 ld2 {dXr0,dXi0}, [pSrc], step1 // data[0] 164 // fadd qy1,qx1,qx3 165 fadd dYr1, dXr1, dXr3 166 fadd dYi1, dXi1, dXi3 167 ld2 {dXr2,dXi2}, [pSrc], step3 // data[2] 168 // fsub qy3,qx1,qx3 169 fsub dYr3, dXr1, dXr3 170 fsub dYi3, dXi1, dXi3 171 172 173 // finish second stage of 4 point FFT 174 175 .ifeqs "\inverse", "TRUE" 176 177 ld2 {dXr1,dXi1}, [pSrc], step1 // data[1] 178 // fadd qz0,qy0,qy1 179 fadd dZr0, dYr0, dYr1 180 fadd dZi0, dYi0, dYi1 181 182 // data[3] & update pSrc for the next set, but not if it's the 183 // last iteration so that we don't read past the end of the 184 // input array. 185 BEQ radix4SkipLastUpdateInv\name 186 ld2 {dXr3,dXi3}, [pSrc], setStep 187 188 radix4SkipLastUpdateInv\name: 189 FSUB dZr3,dYr2,dYi3 190 191 st2 {dZr0,dZi0},[pDst],outPointStep 192 FADD dZi3,dYi2,dYr3 193 194 // fsub qZ1,qY0,qY1 195 FSUB dZr1, dYr0, dYr1 196 FSUB dZi1, dYi0, dYi1 197 st2 {dZr3,dZi3},[pDst],outPointStep 198 199 FADD dZr2,dYr2,dYi3 200 st2 {dZr1,dZi1},[pDst],outPointStep 201 FSUB dZi2,dYi2,dYr3 202 203 // fadd qY0, qX0, qX2 204 FADD dYr0, dXr0, dXr2 // u0 for next iteration 205 FADD dYi0, dXi0, dXi2 206 st2 {dZr2,dZi2},[pDst],setStep 207 208 209 .else 210 211 ld2 {dXr1,dXi1}, [pSrc], step1 // data[1] 212 // fadd qZ0,qY0,qY1 213 fadd dZr0, dYr0, dYr1 214 fadd dZi0, dYi0, dYi1 215 216 // data[3] & update pSrc for the next set, but not if it's the 217 // last iteration so that we don't read past the end of the 218 // input array. 219 BEQ radix4SkipLastUpdateFwd\name 220 ld2 {dXr3,dXi3}, [pSrc], setStep 221 222 radix4SkipLastUpdateFwd\name: 223 FADD dZr2,dYr2,dYi3 224 225 st2 {dZr0,dZi0},[pDst],outPointStep 226 FSUB dZi2,dYi2,dYr3 227 228 // fsub qz1,qy0,qy1 229 fsub dZr1, dYr0, dYr1 230 fsub dZi1, dYi0, dYi1 231 st2 {dZr2,dZi2},[pDst],outPointStep 232 233 FSUB dZr3,dYr2,dYi3 234 st2 {dZr1,dZi1},[pDst],outPointStep 235 FADD dZi3,dYi2,dYr3 236 237 // fadd qy0,qx0,qx2 238 fadd dYr0, dXr0, dXr2 // u0 for next iteration 239 fadd dYi0, dXi0, dXi2 240 241 st2 {dZr3,dZi3},[pDst],setStep 242 243 .endif 244 245 BGT radix4fsGrpZeroSetLoop\name 246 247 // Save subFFTNum and subFFTSize for next stage 248 str subFFTNum, [pSubFFTNum] 249 str subFFTSize, [pSubFFTSize] 250 251 .endm 252 253 254 255 M_START armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace,,d15 256 FFTSTAGE "FALSE","FALSE",fwd 257 M_END 258 259 260 261 M_START armSP_FFTInv_CToC_FC32_Radix4_fs_OutOfPlace,,d15 262 FFTSTAGE "FALSE","TRUE",inv 263 M_END 264 265 266 .end 267