1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !math_big_pure_go 6 7 #include "textflag.h" 8 9 // This file provides fast assembly versions for the elementary 10 // arithmetic operations on vectors implemented in arith.go. 11 12 // func addVV(z, x, y []Word) (c Word) 13 TEXT addVV(SB),NOSPLIT,$0 14 ADD.S $0, R0 // clear carry flag 15 MOVW z+0(FP), R1 16 MOVW z_len+4(FP), R4 17 MOVW x+12(FP), R2 18 MOVW y+24(FP), R3 19 ADD R4<<2, R1, R4 20 B E1 21 L1: 22 MOVW.P 4(R2), R5 23 MOVW.P 4(R3), R6 24 ADC.S R6, R5 25 MOVW.P R5, 4(R1) 26 E1: 27 TEQ R1, R4 28 BNE L1 29 30 MOVW $0, R0 31 MOVW.CS $1, R0 32 MOVW R0, c+36(FP) 33 RET 34 35 36 // func subVV(z, x, y []Word) (c Word) 37 // (same as addVV except for SBC instead of ADC and label names) 38 TEXT subVV(SB),NOSPLIT,$0 39 SUB.S $0, R0 // clear borrow flag 40 MOVW z+0(FP), R1 41 MOVW z_len+4(FP), R4 42 MOVW x+12(FP), R2 43 MOVW y+24(FP), R3 44 ADD R4<<2, R1, R4 45 B E2 46 L2: 47 MOVW.P 4(R2), R5 48 MOVW.P 4(R3), R6 49 SBC.S R6, R5 50 MOVW.P R5, 4(R1) 51 E2: 52 TEQ R1, R4 53 BNE L2 54 55 MOVW $0, R0 56 MOVW.CC $1, R0 57 MOVW R0, c+36(FP) 58 RET 59 60 61 // func addVW(z, x []Word, y Word) (c Word) 62 TEXT addVW(SB),NOSPLIT,$0 63 MOVW z+0(FP), R1 64 MOVW z_len+4(FP), R4 65 MOVW x+12(FP), R2 66 MOVW y+24(FP), R3 67 ADD R4<<2, R1, R4 68 TEQ R1, R4 69 BNE L3a 70 MOVW R3, c+28(FP) 71 RET 72 L3a: 73 MOVW.P 4(R2), R5 74 ADD.S R3, R5 75 MOVW.P R5, 4(R1) 76 B E3 77 L3: 78 MOVW.P 4(R2), R5 79 ADC.S $0, R5 80 MOVW.P R5, 4(R1) 81 E3: 82 TEQ R1, R4 83 BNE L3 84 85 MOVW $0, R0 86 MOVW.CS $1, R0 87 MOVW R0, c+28(FP) 88 RET 89 90 91 // func subVW(z, x []Word, y Word) (c Word) 92 TEXT subVW(SB),NOSPLIT,$0 93 MOVW z+0(FP), R1 94 MOVW z_len+4(FP), R4 95 MOVW x+12(FP), R2 96 MOVW y+24(FP), R3 97 ADD R4<<2, R1, R4 98 TEQ R1, R4 99 BNE L4a 100 MOVW R3, c+28(FP) 101 RET 102 L4a: 103 MOVW.P 4(R2), R5 104 SUB.S R3, R5 105 MOVW.P R5, 4(R1) 106 B E4 107 L4: 108 MOVW.P 4(R2), R5 109 SBC.S $0, R5 110 MOVW.P R5, 4(R1) 111 E4: 112 TEQ R1, R4 113 BNE L4 114 115 MOVW $0, R0 116 MOVW.CC $1, R0 117 MOVW R0, c+28(FP) 118 RET 119 120 121 // func shlVU(z, x []Word, s uint) (c Word) 122 TEXT shlVU(SB),NOSPLIT,$0 123 MOVW z_len+4(FP), R5 124 TEQ $0, R5 125 BEQ X7 126 127 MOVW z+0(FP), R1 128 MOVW x+12(FP), R2 129 ADD R5<<2, R2, R2 130 ADD R5<<2, R1, R5 131 MOVW s+24(FP), R3 132 TEQ $0, R3 // shift 0 is special 133 BEQ Y7 134 ADD $4, R1 // stop one word early 135 MOVW $32, R4 136 SUB R3, R4 137 MOVW $0, R7 138 139 MOVW.W -4(R2), R6 140 MOVW R6<<R3, R7 141 MOVW R6>>R4, R6 142 MOVW R6, c+28(FP) 143 B E7 144 145 L7: 146 MOVW.W -4(R2), R6 147 ORR R6>>R4, R7 148 MOVW.W R7, -4(R5) 149 MOVW R6<<R3, R7 150 E7: 151 TEQ R1, R5 152 BNE L7 153 154 MOVW R7, -4(R5) 155 RET 156 157 Y7: // copy loop, because shift 0 == shift 32 158 MOVW.W -4(R2), R6 159 MOVW.W R6, -4(R5) 160 TEQ R1, R5 161 BNE Y7 162 163 X7: 164 MOVW $0, R1 165 MOVW R1, c+28(FP) 166 RET 167 168 169 // func shrVU(z, x []Word, s uint) (c Word) 170 TEXT shrVU(SB),NOSPLIT,$0 171 MOVW z_len+4(FP), R5 172 TEQ $0, R5 173 BEQ X6 174 175 MOVW z+0(FP), R1 176 MOVW x+12(FP), R2 177 ADD R5<<2, R1, R5 178 MOVW s+24(FP), R3 179 TEQ $0, R3 // shift 0 is special 180 BEQ Y6 181 SUB $4, R5 // stop one word early 182 MOVW $32, R4 183 SUB R3, R4 184 MOVW $0, R7 185 186 // first word 187 MOVW.P 4(R2), R6 188 MOVW R6>>R3, R7 189 MOVW R6<<R4, R6 190 MOVW R6, c+28(FP) 191 B E6 192 193 // word loop 194 L6: 195 MOVW.P 4(R2), R6 196 ORR R6<<R4, R7 197 MOVW.P R7, 4(R1) 198 MOVW R6>>R3, R7 199 E6: 200 TEQ R1, R5 201 BNE L6 202 203 MOVW R7, 0(R1) 204 RET 205 206 Y6: // copy loop, because shift 0 == shift 32 207 MOVW.P 4(R2), R6 208 MOVW.P R6, 4(R1) 209 TEQ R1, R5 210 BNE Y6 211 212 X6: 213 MOVW $0, R1 214 MOVW R1, c+28(FP) 215 RET 216 217 218 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 219 TEXT mulAddVWW(SB),NOSPLIT,$0 220 MOVW $0, R0 221 MOVW z+0(FP), R1 222 MOVW z_len+4(FP), R5 223 MOVW x+12(FP), R2 224 MOVW y+24(FP), R3 225 MOVW r+28(FP), R4 226 ADD R5<<2, R1, R5 227 B E8 228 229 // word loop 230 L8: 231 MOVW.P 4(R2), R6 232 MULLU R6, R3, (R7, R6) 233 ADD.S R4, R6 234 ADC R0, R7 235 MOVW.P R6, 4(R1) 236 MOVW R7, R4 237 E8: 238 TEQ R1, R5 239 BNE L8 240 241 MOVW R4, c+32(FP) 242 RET 243 244 245 // func addMulVVW(z, x []Word, y Word) (c Word) 246 TEXT addMulVVW(SB),NOSPLIT,$0 247 MOVW $0, R0 248 MOVW z+0(FP), R1 249 MOVW z_len+4(FP), R5 250 MOVW x+12(FP), R2 251 MOVW y+24(FP), R3 252 ADD R5<<2, R1, R5 253 MOVW $0, R4 254 B E9 255 256 // word loop 257 L9: 258 MOVW.P 4(R2), R6 259 MULLU R6, R3, (R7, R6) 260 ADD.S R4, R6 261 ADC R0, R7 262 MOVW 0(R1), R4 263 ADD.S R4, R6 264 ADC R0, R7 265 MOVW.P R6, 4(R1) 266 MOVW R7, R4 267 E9: 268 TEQ R1, R5 269 BNE L9 270 271 MOVW R4, c+28(FP) 272 RET 273 274 275 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 276 TEXT divWVW(SB),NOSPLIT,$0 277 // ARM has no multiword division, so use portable code. 278 B divWVW_g(SB) 279 280 281 // func divWW(x1, x0, y Word) (q, r Word) 282 TEXT divWW(SB),NOSPLIT,$0 283 // ARM has no multiword division, so use portable code. 284 B divWW_g(SB) 285 286 287 // func mulWW(x, y Word) (z1, z0 Word) 288 TEXT mulWW(SB),NOSPLIT,$0 289 MOVW x+0(FP), R1 290 MOVW y+4(FP), R2 291 MULLU R1, R2, (R4, R3) 292 MOVW R4, z1+8(FP) 293 MOVW R3, z0+12(FP) 294 RET 295 296 // func bitLen(x Word) (n int) 297 TEXT bitLen(SB),NOSPLIT,$0 298 MOVW x+0(FP), R0 299 CLZ R0, R0 300 RSB $32, R0 301 MOVW R0, n+4(FP) 302 RET 303