1 @// 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 @// 4 @// Use of this source code is governed by a BSD-style license 5 @// that can be found in the LICENSE file in the root of the source 6 @// tree. An additional intellectual property rights grant can be found 7 @// in the file PATENTS. All contributing project authors may 8 @// be found in the AUTHORS file in the root of the source tree. 9 @// 10 @// This file was originally licensed as follows. It has been 11 @// relicensed with permission from the copyright holders. 12 @// 13 14 @// 15 @// File Name: armSP_FFT_CToC_SC32_Radix4_fs_unsafe_s.s 16 @// OpenMAX DL: v1.0.2 17 @// Last Modified Revision: 7767 18 @// Last Modified Date: Thu, 27 Sep 2007 19 @// 20 @// (c) Copyright 2007-2008 ARM Limited. All Rights Reserved. 21 @// 22 @// 23 @// 24 @// Description: 25 @// Compute a first stage Radix 4 FFT stage for a N point complex signal 26 @// 27 28 29 30 @// Include standard headers 31 32 #include "dl/api/arm/armCOMM_s.h" 33 #include "dl/api/arm/omxtypes_s.h" 34 35 @// Import symbols required from other files 36 @// (For example tables) 37 38 39 40 41 @// Set debugging level 42 @//DEBUG_ON SETL {TRUE} 43 44 45 46 @// Guarding implementation by the processor name 47 48 49 50 @// Guarding implementation by the processor name 51 52 53 @//Input Registers 54 55 #define pSrc r0 56 #define pDst r2 57 #define pTwiddle r1 58 #define pPingPongBuf r5 59 #define subFFTNum r6 60 #define subFFTSize r7 61 62 63 @//Output Registers 64 65 66 @//Local Scratch Registers 67 68 #define grpSize r3 69 @// Reuse grpSize as setCount 70 #define setCount r3 71 #define pointStep r4 72 #define outPointStep r4 73 #define setStep r8 74 #define step1 r9 75 #define step3 r10 76 77 @// Neon Registers 78 79 #define dXr0 D0.S32 80 #define dXi0 D1.S32 81 #define dXr1 D2.S32 82 #define dXi1 D3.S32 83 #define dXr2 D4.S32 84 #define dXi2 D5.S32 85 #define dXr3 D6.S32 86 #define dXi3 D7.S32 87 #define dYr0 D8.S32 88 #define dYi0 D9.S32 89 #define dYr1 D10.S32 90 #define dYi1 D11.S32 91 #define dYr2 D12.S32 92 #define dYi2 D13.S32 93 #define dYr3 D14.S32 94 #define dYi3 D15.S32 95 #define qX0 Q0.S32 96 #define qX1 Q1.S32 97 #define qX2 Q2.S32 98 #define qX3 Q3.S32 99 #define qY0 Q4.S32 100 #define qY1 Q5.S32 101 #define qY2 Q6.S32 102 #define qY3 Q7.S32 103 #define dZr0 D16.S32 104 #define dZi0 D17.S32 105 #define dZr1 D18.S32 106 #define dZi1 D19.S32 107 #define dZr2 D20.S32 108 #define dZi2 D21.S32 109 #define dZr3 D22.S32 110 #define dZi3 D23.S32 111 #define qZ0 Q8.S32 112 #define qZ1 Q9.S32 113 #define qZ2 Q10.S32 114 #define qZ3 Q11.S32 115 116 117 .MACRO FFTSTAGE scaled, inverse, name 118 119 @// Define stack arguments 120 121 @// pT0+1 increments pT0 by 8 bytes 122 @// pT0+pointStep = increment of 8*pointStep bytes = 2*grpSize bytes 123 @// Note: outPointStep = pointStep for firststage 124 125 MOV pointStep,subFFTNum,LSL #1 126 127 128 @// Update pSubFFTSize and pSubFFTNum regs 129 VLD2 {dXr0,dXi0},[pSrc :128],pointStep @// data[0] 130 MOV subFFTSize,#4 @// subFFTSize = 1 for the first stage 131 132 @// Note: setCount = subFFTNum/4 (reuse the grpSize reg for setCount) 133 LSR grpSize,subFFTNum,#2 134 VLD2 {dXr1,dXi1},[pSrc :128],pointStep @// data[1] 135 MOV subFFTNum,grpSize 136 137 138 @// Calculate the step of input data for the next set 139 @//MOV setStep,pointStep,LSL #1 140 MOV setStep,grpSize,LSL #4 141 VLD2 {dXr2,dXi2},[pSrc :128],pointStep @// data[2] 142 ADD setStep,setStep,pointStep @// setStep = 3*pointStep 143 RSB setStep,setStep,#16 @// setStep = - 3*pointStep+16 144 145 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 146 MOV step1,pointStep,LSL #1 @// step1 = 2*pointStep 147 148 .ifeqs "\scaled", "TRUE" 149 VHADD qY0,qX0,qX2 150 .else 151 VADD qY0,qX0,qX2 152 .endif 153 154 RSB step3,pointStep,#0 @// step3 = -pointStep 155 156 @// grp = 0 a special case since all the twiddle factors are 1 157 @// Loop on the sets : 2 sets at a time 158 159 grpZeroSetLoop\name : 160 161 162 163 @// Decrement setcount 164 SUBS setCount,setCount,#2 @// decrement the set loop counter 165 166 .ifeqs "\scaled", "TRUE" 167 168 @// finish first stage of 4 point FFT 169 170 VHSUB qY2,qX0,qX2 171 172 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] 173 VHADD qY1,qX1,qX3 174 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2] 175 VHSUB qY3,qX1,qX3 176 177 178 @// finish second stage of 4 point FFT 179 180 .ifeqs "\inverse", "TRUE" 181 182 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 183 VHADD qZ0,qY0,qY1 184 185 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 186 VHSUB dZr3,dYr2,dYi3 187 188 VST2 {dZr0,dZi0},[pDst :128],outPointStep 189 VHADD dZi3,dYi2,dYr3 190 191 VHSUB qZ1,qY0,qY1 192 VST2 {dZr3,dZi3},[pDst :128],outPointStep 193 194 VHADD dZr2,dYr2,dYi3 195 VST2 {dZr1,dZi1},[pDst :128],outPointStep 196 VHSUB dZi2,dYi2,dYr3 197 198 VHADD qY0,qX0,qX2 @// u0 for next iteration 199 VST2 {dZr2,dZi2},[pDst :128],setStep 200 201 202 .else 203 204 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 205 VHADD qZ0,qY0,qY1 206 207 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 208 VHADD dZr2,dYr2,dYi3 209 210 VST2 {dZr0,dZi0},[pDst :128],outPointStep 211 VHSUB dZi2,dYi2,dYr3 212 213 VHSUB qZ1,qY0,qY1 214 VST2 {dZr2,dZi2},[pDst :128],outPointStep 215 216 VHSUB dZr3,dYr2,dYi3 217 VST2 {dZr1,dZi1},[pDst :128],outPointStep 218 VHADD dZi3,dYi2,dYr3 219 220 VHADD qY0,qX0,qX2 @// u0 for next iteration 221 VST2 {dZr3,dZi3},[pDst :128],setStep 222 223 .endif 224 225 226 227 .else 228 229 @// finish first stage of 4 point FFT 230 231 232 VSUB qY2,qX0,qX2 233 234 VLD2 {dXr0,dXi0},[pSrc :128],step1 @// data[0] 235 VADD qY1,qX1,qX3 236 VLD2 {dXr2,dXi2},[pSrc :128],step3 @// data[2] 237 VSUB qY3,qX1,qX3 238 239 240 @// finish second stage of 4 point FFT 241 242 .ifeqs "\inverse", "TRUE" 243 244 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 245 VADD qZ0,qY0,qY1 246 247 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 248 VSUB dZr3,dYr2,dYi3 249 250 VST2 {dZr0,dZi0},[pDst :128],outPointStep 251 VADD dZi3,dYi2,dYr3 252 253 VSUB qZ1,qY0,qY1 254 VST2 {dZr3,dZi3},[pDst :128],outPointStep 255 256 VADD dZr2,dYr2,dYi3 257 VST2 {dZr1,dZi1},[pDst :128],outPointStep 258 VSUB dZi2,dYi2,dYr3 259 260 VADD qY0,qX0,qX2 @// u0 for next iteration 261 VST2 {dZr2,dZi2},[pDst :128],setStep 262 263 264 .else 265 266 VLD2 {dXr1,dXi1},[pSrc :128],step1 @// data[1] 267 VADD qZ0,qY0,qY1 268 269 VLD2 {dXr3,dXi3},[pSrc :128],setStep @// data[3] & update pSrc for the next set 270 VADD dZr2,dYr2,dYi3 271 272 VST2 {dZr0,dZi0},[pDst :128],outPointStep 273 VSUB dZi2,dYi2,dYr3 274 275 VSUB qZ1,qY0,qY1 276 VST2 {dZr2,dZi2},[pDst :128],outPointStep 277 278 VSUB dZr3,dYr2,dYi3 279 VST2 {dZr1,dZi1},[pDst :128],outPointStep 280 VADD dZi3,dYi2,dYr3 281 282 VADD qY0,qX0,qX2 @// u0 for next iteration 283 VST2 {dZr3,dZi3},[pDst :128],setStep 284 285 .endif 286 287 .endif 288 289 BGT grpZeroSetLoop\name 290 291 @// reset pSrc to pDst for the next stage 292 SUB pSrc,pDst,pointStep @// pDst -= 2*grpSize 293 MOV pDst,pPingPongBuf 294 295 296 .endm 297 298 299 300 M_START armSP_FFTFwd_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4 301 FFTSTAGE "FALSE","FALSE",fwd 302 M_END 303 304 305 306 M_START armSP_FFTInv_CToC_SC32_Radix4_fs_OutOfPlace_unsafe,r4 307 FFTSTAGE "FALSE","TRUE",inv 308 M_END 309 310 311 M_START armSP_FFTFwd_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 312 FFTSTAGE "TRUE","FALSE",fwdsfs 313 M_END 314 315 316 M_START armSP_FFTInv_CToC_SC32_Sfs_Radix4_fs_OutOfPlace_unsafe,r4 317 FFTSTAGE "TRUE","TRUE",invsfs 318 M_END 319 320 .end 321