1 @/* 2 @ ** Copyright 2003-2010, VisualOn, Inc. 3 @ ** 4 @ ** Licensed under the Apache License, Version 2.0 (the "License"); 5 @ ** you may not use this file except in compliance with the License. 6 @ ** You may obtain a copy of the License at 7 @ ** 8 @ ** http://www.apache.org/licenses/LICENSE-2.0 9 @ ** 10 @ ** Unless required by applicable law or agreed to in writing, software 11 @ ** distributed under the License is distributed on an "AS IS" BASIS, 12 @ ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 @ ** See the License for the specific language governing permissions and 14 @ ** limitations under the License. 15 @ */ 16 17 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 18 @ File: R4R8First_v7.s 19 @ 20 @ Content: Radix8First and Radix4First function armv7 assemble 21 @ 22 @@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@@ 23 24 .section .text 25 .global Radix8First 26 .fnstart 27 28 Radix8First: 29 stmdb sp!, {r4 - r11, lr} 30 .save {r4 - r11, lr} 31 fstmfdd sp!, {d8 - d15} 32 .vsave {d8 - d15} 33 34 ldr r3, SQRT1_2 35 cmp r1, #0 36 37 VDUP.I32 Q15, r3 38 beq Radix8First_END 39 40 Radix8First_LOOP: 41 VLD1.I32 {d0, d1, d2, d3}, [r0]! 42 VLD1.I32 {d8, d9, d10, d11}, [r0]! 43 44 VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@i0 = buf[1] + buf[3]@ 45 VSUB.S32 d5, d0, d1 @ r1 = buf[0] - buf[2]@i1 = buf[1] - buf[3]@ 46 VSUB.S32 d7, d2, d3 @ r2 = buf[4] - buf[6]@i2 = buf[5] - buf[7]@ 47 VADD.S32 d6, d2, d3 @ r3 = buf[4] + buf[6]@i3 = buf[5] + buf[7]@ 48 VREV64.I32 d7, d7 49 50 VADD.S32 Q0, Q2, Q3 @ r4 = (r0 + r2)@i4 = (i0 + i2)@i6 = (i1 + r3)@r7 = (r1 + i3) 51 VSUB.S32 Q1, Q2, Q3 @ r5 = (r0 - r2)@i5 = (i0 - i2)@r6 = (r1 - i3)@i7 = (i1 - r3)@ 52 53 VREV64.I32 d3, d3 54 55 VADD.S32 d4, d8, d9 @ r0 = buf[ 8] + buf[10]@i0 = buf[ 9] + buf[11]@ 56 VSUB.S32 d7, d10, d11 @ r1 = buf[12] - buf[14]@i1 = buf[13] - buf[15]@ 57 VADD.S32 d6, d10, d11 @ r2 = buf[12] + buf[14]@i2 = buf[13] + buf[15]@ 58 VREV64.I32 d7, d7 59 VSUB.S32 d5, d8, d9 @ r3 = buf[ 8] - buf[10]@i3 = buf[ 9] - buf[11]@ 60 61 VTRN.32 d1, d3 62 63 VADD.S32 Q4, Q2, Q3 @ t0 = (r0 + r2) >> 1@t1 = (i0 + i2) >> 1@i0 = i1 + r3@r2 = r1 + i3@ 64 VSUB.S32 Q5, Q2, Q3 @ t2 = (r0 - r2) >> 1@t3 = (i0 - i2) >> 1@r0 = r1 - i3@i2 = i1 - r3@ 65 66 VREV64.I32 d3, d3 67 68 VSHR.S32 d8, d8, #1 69 VSHR.S32 Q0, Q0, #1 70 VREV64.I32 d10, d10 71 VTRN.32 d11, d9 72 VSHR.S32 Q1, Q1, #1 73 VSHR.S32 d10, d10, #1 74 VREV64.I32 d9, d9 75 76 sub r0, r0, #0x40 77 78 VADD.S32 d12, d0, d8 79 VSUB.S32 d16, d0, d8 80 VADD.S32 d14, d2, d10 81 VSUB.S32 d18, d2, d10 82 83 VSUB.S32 d4, d11, d9 84 VADD.S32 d5, d11, d9 85 86 VREV64.I32 d18, d18 87 88 VQDMULH.S32 Q3, Q2, Q15 89 VTRN.32 d14, d18 90 VTRN.32 d6, d7 91 VREV64.I32 d18, d18 92 93 VSUB.S32 d15, d3, d6 94 VREV64.I32 d7, d7 95 VADD.S32 d19, d3, d6 96 VADD.S32 d13, d1, d7 97 VSUB.S32 d17, d1, d7 98 99 VREV64.I32 d17, d17 100 VTRN.32 d13, d17 101 VREV64.I32 d17, d17 102 103 subs r1, r1, #1 104 105 VST1.I32 {d12, d13, d14, d15}, [r0]! 106 VST1.I32 {d16, d17, d18, d19}, [r0]! 107 bne Radix8First_LOOP 108 109 Radix8First_END: 110 fldmfdd sp!, {d8 - d15} 111 ldmia sp!, {r4 - r11, pc} 112 SQRT1_2: 113 .word 0x2d413ccd 114 115 @ENDP @ |Radix8First| 116 .fnend 117 118 .section .text 119 .global Radix4First 120 .fnstart 121 122 Radix4First: 123 stmdb sp!, {r4 - r11, lr} 124 .save {r4 - r11, lr} 125 fstmfdd sp!, {d8 - d15} 126 .vsave {d8 - d15} 127 128 cmp r1, #0 129 beq Radix4First_END 130 131 Radix4First_LOOP: 132 VLD1.I32 {d0, d1, d2, d3}, [r0] 133 134 VADD.S32 d4, d0, d1 @ r0 = buf[0] + buf[2]@ r1 = buf[1] + buf[3]@ 135 VSUB.S32 d5, d0, d1 @ r2 = buf[0] - buf[2]@ r3 = buf[1] - buf[3]@ 136 VSUB.S32 d7, d2, d3 @ r4 = buf[4] + buf[6]@ r5 = buf[5] + buf[7]@ 137 VADD.S32 d6, d2, d3 @ r6 = buf[4] - buf[6]@ r7 = buf[5] - buf[7]@ 138 139 VREV64.I32 d7, d7 @ 140 141 VADD.S32 Q4, Q2, Q3 142 VSUB.S32 Q5, Q2, Q3 143 144 VREV64.I32 d11, d11 145 VTRN.32 d9, d11 146 subs r1, r1, #1 147 VREV64.I32 d11, d11 148 VST1.I32 {d8, d9, d10, d11}, [r0]! 149 150 bne Radix4First_LOOP 151 152 Radix4First_END: 153 fldmfdd sp!, {d8 - d15} 154 ldmia sp!, {r4 - r11, pc} 155 156 @ENDP @ |Radix4First| 157 .fnend 158