1 @/* 2 @ ** Copyright 2003-2010, VisualOn, Inc. 3 @ ** 4 @ ** Licensed under the Apache License, Version 2.0 (the "License"); 5 @ ** you may not use this file except in compliance with the License. 6 @ ** You may obtain a copy of the License at 7 @ ** 8 @ ** http://www.apache.org/licenses/LICENSE-2.0 9 @ ** 10 @ ** Unless required by applicable law or agreed to in writing, software 11 @ ** distributed under the License is distributed on an "AS IS" BASIS, 12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 @ ** See the License for the specific language governing permissions and 14 @ ** limitations under the License. 15 @ */ 16 17 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 18 @ File: Radix4FFT_v5.s 19 @ 20 @ Content: Radix4FFT armv5 assemble 21 @ 22 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 23 .section .text 24 .global Radix4FFT 25 26 Radix4FFT: 27 stmdb sp!, {r4 - r11, lr} 28 sub sp, sp, #32 29 30 mov r1, r1, asr #2 31 cmp r1, #0 32 beq Radix4FFT_END 33 34 Radix4FFT_LOOP1: 35 mov r14, r0 @ xptr = buf@ 36 mov r10, r1 @ i = num@ 37 mov r9, r2, lsl #3 @ step = 2*bgn@ 38 cmp r10, #0 39 str r0, [sp] 40 str r1, [sp, #4] 41 str r2, [sp, #8] 42 str r3, [sp, #12] 43 beq Radix4FFT_LOOP1_END 44 45 Radix4FFT_LOOP2: 46 mov r12, r3 @ csptr = twidTab@ 47 mov r11, r2 @ j = bgn 48 cmp r11, #0 49 str r10, [sp, #16] 50 beq Radix4FFT_LOOP2_END 51 52 Radix4FFT_LOOP3: 53 str r11, [sp, #20] 54 55 ldrd r0, [r14, #0] @ r0 = xptr[0]@ r1 = xptr[1]@ 56 add r14, r14, r9 @ xptr += step@ 57 58 ldrd r10, [r14, #0] @ r2 = xptr[0]@ r3 = xptr[1]@ 59 ldr r8, [r12], #4 @ cosxsinx = csptr[0]@ 60 61 smulwt r4, r10, r8 @ L_mpy_wx(cosx, t0) 62 smulwt r3, r11, r8 @ L_mpy_wx(cosx, t1) 63 64 smlawb r2, r11, r8, r4 @ r2 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ 65 smulwb r5, r10, r8 @ L_mpy_wx(sinx, t0) 66 67 mov r10, r0, asr #2 @ t0 = r0 >> 2@ 68 mov r11, r1, asr #2 @ t1 = r1 >> 2@ 69 70 sub r3, r3, r5 @ r3 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ 71 add r14, r14, r9 @ xptr += step@ 72 73 sub r0, r10, r2 @ r0 = t0 - r2@ 74 sub r1, r11, r3 @ r1 = t1 - r3@ 75 76 add r2, r10, r2 @ r2 = t0 + r2@ 77 add r3, r11, r3 @ r3 = t1 + r3@ 78 79 str r2, [sp, #24] 80 str r3, [sp, #28] 81 82 ldrd r10, [r14, #0] @ r4 = xptr[0]@ r5 = xptr[1]@ 83 ldr r8, [r12], #4 @ cosxsinx = csptr[1]@ 84 85 smulwt r6, r10, r8 @ L_mpy_wx(cosx, t0) 86 smulwt r5, r11, r8 @ L_mpy_wx(cosx, t1) 87 88 smlawb r4, r11, r8, r6 @ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ 89 smulwb r7, r10, r8 @ L_mpy_wx(sinx, t0) 90 91 add r14, r14, r9 @ xptr += step@ 92 sub r5, r5, r7 @ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ 93 94 ldrd r10, [r14] @ r6 = xptr[0]@ r7 = xptr[1]@ 95 ldr r8, [r12], #4 @ cosxsinx = csptr[1]@ 96 97 smulwt r2, r10, r8 @ L_mpy_wx(cosx, t0) 98 smulwt r7, r11, r8 @ L_mpy_wx(cosx, t1) 99 100 smlawb r6, r11, r8, r2 @ r4 = L_mpy_wx(cosx, t0) + L_mpy_wx(sinx, t1)@ 101 smulwb r3, r10, r8 @ L_mpy_wx(sinx, t0) 102 103 mov r10, r4 @ t0 = r4@ 104 mov r11, r5 @ t1 = r5@ 105 106 sub r7, r7, r3 @ r5 = L_mpy_wx(cosx, t1) - L_mpy_wx(sinx, t0)@ 107 108 109 add r4, r10, r6 @ r4 = t0 + r6@ 110 sub r5, r7, r11 @ r5 = r7 - t1@ 111 112 sub r6, r10, r6 @ r6 = t0 - r6@ 113 add r7, r7, r11 @ r7 = r7 + t1@ 114 115 ldr r2, [sp, #24] 116 ldr r3, [sp, #28] 117 118 add r10, r0, r5 @ xptr[0] = r0 + r5@ 119 add r11, r1, r6 @ xptr[0] = r1 + r6 120 121 strd r10, [r14] 122 sub r14, r14, r9 @ xptr -= step@ 123 124 sub r10, r2, r4 @ xptr[0] = r2 - r4@ 125 sub r11, r3, r7 @ xptr[1] = r3 - r7@ 126 127 strd r10, [r14] 128 sub r14, r14, r9 @ xptr -= step@ 129 130 sub r10, r0, r5 @ xptr[0] = r0 - r5@ 131 sub r11, r1, r6 @ xptr[0] = r1 - r6 132 133 strd r10, [r14] 134 sub r14, r14, r9 @ xptr -= step@ 135 136 add r10, r2, r4 @ xptr[0] = r2 - r4@ 137 add r11, r3, r7 @ xptr[1] = r3 - r7@ 138 139 strd r10, [r14] 140 add r14, r14, #8 @ xptr += 2@ 141 142 ldr r11, [sp, #20] 143 subs r11, r11, #1 144 bne Radix4FFT_LOOP3 145 146 Radix4FFT_LOOP2_END: 147 ldr r10, [sp, #16] 148 ldr r3, [sp, #12] 149 ldr r2, [sp, #8] 150 rsb r8, r9, r9, lsl #2 151 sub r10, r10, #1 152 add r14, r14, r8 153 cmp r10, #0 154 bhi Radix4FFT_LOOP2 155 156 Radix4FFT_LOOP1_END: 157 ldr r0, [sp] 158 ldr r1, [sp, #4] 159 add r3, r3, r8, asr #1 160 mov r2, r2, lsl #2 161 movs r1, r1, asr #2 162 bne Radix4FFT_LOOP1 163 164 Radix4FFT_END: 165 add sp, sp, #32 166 ldmia sp!, {r4 - r11, pc} 167 168 @ENDP @ |Radix4FFT| 169 .end 170