1 @// 2 @// Copyright (c) 2013 The WebRTC project authors. All Rights Reserved. 3 @// 4 @// Use of this source code is governed by a BSD-style license 5 @// that can be found in the LICENSE file in the root of the source 6 @// tree. An additional intellectual property rights grant can be found 7 @// in the file PATENTS. All contributing project authors may 8 @// be found in the AUTHORS file in the root of the source tree. 9 @// 10 @// This is a modification of omxSP_FFTFwd_CToC_SC32_Sfs_s.s 11 @// to support float instead of SC32. 12 @// 13 14 @// 15 @// Description: 16 @// Compute an inverse FFT for a complex signal 17 @// 18 @// 19 20 21 @// Include standard headers 22 23 #include "dl/api/arm/armCOMM_s.h" 24 #include "dl/api/arm/omxtypes_s.h" 25 26 @// Import symbols required from other files 27 @// (For example tables) 28 29 .extern armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe 30 .extern armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe 31 .extern armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe 32 .extern armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe 33 .extern armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe 34 35 @// Set debugging level 36 @//DEBUG_ON SETL {TRUE} 37 38 39 40 @// Guarding implementation by the processor name 41 42 43 44 @// Guarding implementation by the processor name 45 46 @// Import symbols required from other files 47 @// (For example tables) 48 .extern armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe 49 .extern armSP_FFTFwd_CToC_FC32_Sfs_Radix2_ls_OutOfPlace_unsafe 50 51 52 @//Input Registers 53 54 #define pSrc r0 55 #define pDst r1 56 #define pFFTSpec r2 57 58 59 @// Output registers 60 #define result r0 61 62 @//Local Scratch Registers 63 64 #define argTwiddle r1 65 #define argDst r2 66 #define argScale r4 67 #define tmpOrder r4 68 #define pTwiddle r4 69 #define pOut r5 70 #define subFFTSize r7 71 #define subFFTNum r6 72 #define N r6 73 #define order r14 74 #define diff r9 75 @// Total num of radix stages required to comple the FFT 76 #define count r8 77 #define x0r r4 78 #define x0i r5 79 #define diffMinusOne r2 80 81 @// Neon registers 82 83 #define dX0 D0.F32 84 85 86 @// Allocate stack memory required by the function 87 88 @// Write function header 89 M_START omxSP_FFTFwd_CToC_FC32_Sfs,r11,d15 90 91 @ Structure offsets for the FFTSpec 92 .set ARMsFFTSpec_N, 0 93 .set ARMsFFTSpec_pBitRev, 4 94 .set ARMsFFTSpec_pTwiddle, 8 95 .set ARMsFFTSpec_pBuf, 12 96 97 @// Define stack arguments 98 99 @// Read the size from structure and take log 100 LDR N, [pFFTSpec, #ARMsFFTSpec_N] 101 102 @// Read other structure parameters 103 LDR pTwiddle, [pFFTSpec, #ARMsFFTSpec_pTwiddle] 104 LDR pOut, [pFFTSpec, #ARMsFFTSpec_pBuf] 105 106 CLZ order,N @// N = 2^order 107 RSB order,order,#31 108 MOV subFFTSize,#1 109 @//MOV subFFTNum,N 110 111 CMP order,#3 112 BGT orderGreaterthan3 @// order > 3 113 114 CMP order,#1 115 BGE orderGreaterthan0 @// order > 0 116 VLD1 dX0,[pSrc] 117 VST1 dX0,[pDst] 118 MOV pSrc,pDst 119 BLT FFTEnd 120 121 orderGreaterthan0: 122 @// set the buffers appropriately for various orders 123 CMP order,#2 124 MOVNE argDst,pDst 125 MOVEQ argDst,pOut 126 @// Pass the first stage destination in RN5 127 MOVEQ pOut,pDst 128 MOV argTwiddle,pTwiddle 129 130 CMP order,#1 131 BGT orderGreaterthan1 132 @// order = 1 133 BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe 134 B FFTEnd 135 136 orderGreaterthan1: 137 CMP order,#2 138 BGT orderGreaterthan2 139 @// order = 2 140 BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe 141 BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe 142 B FFTEnd 143 144 orderGreaterthan2: @// order =3 145 BL armSP_FFTFwd_CToC_FC32_Radix2_fs_OutOfPlace_unsafe 146 BL armSP_FFTFwd_CToC_FC32_Radix2_OutOfPlace_unsafe 147 BL armSP_FFTFwd_CToC_FC32_Radix2_ls_OutOfPlace_unsafe 148 B FFTEnd 149 150 orderGreaterthan3: 151 @// Set input args to fft stages 152 TST order, #2 153 MOVNE argDst,pDst 154 MOVEQ argDst,pOut 155 @// Pass the first stage destination in RN5 156 MOVEQ pOut,pDst 157 MOV argTwiddle,pTwiddle 158 159 @//check for even or odd order 160 @// NOTE: The following combination of BL's would work fine even though 161 @// the first BL would corrupt the flags. This is because the end of 162 @// the "grpZeroSetLoop" loop inside 163 @// armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe sets the Z flag 164 @// to EQ 165 166 TST order,#0x00000001 167 BLEQ armSP_FFTFwd_CToC_FC32_Radix4_fs_OutOfPlace_unsafe 168 BLNE armSP_FFTFwd_CToC_FC32_Radix8_fs_OutOfPlace_unsafe 169 170 CMP subFFTNum,#4 171 BLT FFTEnd 172 173 174 unscaledRadix4Loop: 175 BEQ lastStageUnscaledRadix4 176 BL armSP_FFTFwd_CToC_FC32_Radix4_OutOfPlace_unsafe 177 CMP subFFTNum,#4 178 B unscaledRadix4Loop 179 180 lastStageUnscaledRadix4: 181 BL armSP_FFTFwd_CToC_FC32_Radix4_ls_OutOfPlace_unsafe 182 B FFTEnd 183 184 FFTEnd: 185 186 @// Set return value 187 MOV result, #OMX_Sts_NoErr 188 189 @// Write function tail 190 M_END 191 192 .end 193