1 // Copyright 2009 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !math_big_pure_go 6 7 #include "textflag.h" 8 9 // This file provides fast assembly versions for the elementary 10 // arithmetic operations on vectors implemented in arith.go. 11 12 // func mulWW(x, y Word) (z1, z0 Word) 13 TEXT mulWW(SB),NOSPLIT,$0 14 MOVL x+0(FP), AX 15 MULL y+4(FP) 16 MOVL DX, z1+8(FP) 17 MOVL AX, z0+12(FP) 18 RET 19 20 21 // func divWW(x1, x0, y Word) (q, r Word) 22 TEXT divWW(SB),NOSPLIT,$0 23 MOVL x1+0(FP), DX 24 MOVL x0+4(FP), AX 25 DIVL y+8(FP) 26 MOVL AX, q+12(FP) 27 MOVL DX, r+16(FP) 28 RET 29 30 31 // func addVV(z, x, y []Word) (c Word) 32 TEXT addVV(SB),NOSPLIT,$0 33 MOVL z+0(FP), DI 34 MOVL x+12(FP), SI 35 MOVL y+24(FP), CX 36 MOVL z_len+4(FP), BP 37 MOVL $0, BX // i = 0 38 MOVL $0, DX // c = 0 39 JMP E1 40 41 L1: MOVL (SI)(BX*4), AX 42 ADDL DX, DX // restore CF 43 ADCL (CX)(BX*4), AX 44 SBBL DX, DX // save CF 45 MOVL AX, (DI)(BX*4) 46 ADDL $1, BX // i++ 47 48 E1: CMPL BX, BP // i < n 49 JL L1 50 51 NEGL DX 52 MOVL DX, c+36(FP) 53 RET 54 55 56 // func subVV(z, x, y []Word) (c Word) 57 // (same as addVV except for SBBL instead of ADCL and label names) 58 TEXT subVV(SB),NOSPLIT,$0 59 MOVL z+0(FP), DI 60 MOVL x+12(FP), SI 61 MOVL y+24(FP), CX 62 MOVL z_len+4(FP), BP 63 MOVL $0, BX // i = 0 64 MOVL $0, DX // c = 0 65 JMP E2 66 67 L2: MOVL (SI)(BX*4), AX 68 ADDL DX, DX // restore CF 69 SBBL (CX)(BX*4), AX 70 SBBL DX, DX // save CF 71 MOVL AX, (DI)(BX*4) 72 ADDL $1, BX // i++ 73 74 E2: CMPL BX, BP // i < n 75 JL L2 76 77 NEGL DX 78 MOVL DX, c+36(FP) 79 RET 80 81 82 // func addVW(z, x []Word, y Word) (c Word) 83 TEXT addVW(SB),NOSPLIT,$0 84 MOVL z+0(FP), DI 85 MOVL x+12(FP), SI 86 MOVL y+24(FP), AX // c = y 87 MOVL z_len+4(FP), BP 88 MOVL $0, BX // i = 0 89 JMP E3 90 91 L3: ADDL (SI)(BX*4), AX 92 MOVL AX, (DI)(BX*4) 93 SBBL AX, AX // save CF 94 NEGL AX 95 ADDL $1, BX // i++ 96 97 E3: CMPL BX, BP // i < n 98 JL L3 99 100 MOVL AX, c+28(FP) 101 RET 102 103 104 // func subVW(z, x []Word, y Word) (c Word) 105 TEXT subVW(SB),NOSPLIT,$0 106 MOVL z+0(FP), DI 107 MOVL x+12(FP), SI 108 MOVL y+24(FP), AX // c = y 109 MOVL z_len+4(FP), BP 110 MOVL $0, BX // i = 0 111 JMP E4 112 113 L4: MOVL (SI)(BX*4), DX 114 SUBL AX, DX 115 MOVL DX, (DI)(BX*4) 116 SBBL AX, AX // save CF 117 NEGL AX 118 ADDL $1, BX // i++ 119 120 E4: CMPL BX, BP // i < n 121 JL L4 122 123 MOVL AX, c+28(FP) 124 RET 125 126 127 // func shlVU(z, x []Word, s uint) (c Word) 128 TEXT shlVU(SB),NOSPLIT,$0 129 MOVL z_len+4(FP), BX // i = z 130 SUBL $1, BX // i-- 131 JL X8b // i < 0 (n <= 0) 132 133 // n > 0 134 MOVL z+0(FP), DI 135 MOVL x+12(FP), SI 136 MOVL s+24(FP), CX 137 MOVL (SI)(BX*4), AX // w1 = x[n-1] 138 MOVL $0, DX 139 SHLL CX, DX:AX // w1>> 140 MOVL DX, c+28(FP) 141 142 CMPL BX, $0 143 JLE X8a // i <= 0 144 145 // i > 0 146 L8: MOVL AX, DX // w = w1 147 MOVL -4(SI)(BX*4), AX // w1 = x[i-1] 148 SHLL CX, DX:AX // w<<s | w1>> 149 MOVL DX, (DI)(BX*4) // z[i] = w<<s | w1>> 150 SUBL $1, BX // i-- 151 JG L8 // i > 0 152 153 // i <= 0 154 X8a: SHLL CX, AX // w1<<s 155 MOVL AX, (DI) // z[0] = w1<<s 156 RET 157 158 X8b: MOVL $0, c+28(FP) 159 RET 160 161 162 // func shrVU(z, x []Word, s uint) (c Word) 163 TEXT shrVU(SB),NOSPLIT,$0 164 MOVL z_len+4(FP), BP 165 SUBL $1, BP // n-- 166 JL X9b // n < 0 (n <= 0) 167 168 // n > 0 169 MOVL z+0(FP), DI 170 MOVL x+12(FP), SI 171 MOVL s+24(FP), CX 172 MOVL (SI), AX // w1 = x[0] 173 MOVL $0, DX 174 SHRL CX, DX:AX // w1<< 175 MOVL DX, c+28(FP) 176 177 MOVL $0, BX // i = 0 178 JMP E9 179 180 // i < n-1 181 L9: MOVL AX, DX // w = w1 182 MOVL 4(SI)(BX*4), AX // w1 = x[i+1] 183 SHRL CX, DX:AX // w>>s | w1<< 184 MOVL DX, (DI)(BX*4) // z[i] = w>>s | w1<< 185 ADDL $1, BX // i++ 186 187 E9: CMPL BX, BP 188 JL L9 // i < n-1 189 190 // i >= n-1 191 X9a: SHRL CX, AX // w1>>s 192 MOVL AX, (DI)(BP*4) // z[n-1] = w1>>s 193 RET 194 195 X9b: MOVL $0, c+28(FP) 196 RET 197 198 199 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 200 TEXT mulAddVWW(SB),NOSPLIT,$0 201 MOVL z+0(FP), DI 202 MOVL x+12(FP), SI 203 MOVL y+24(FP), BP 204 MOVL r+28(FP), CX // c = r 205 MOVL z_len+4(FP), BX 206 LEAL (DI)(BX*4), DI 207 LEAL (SI)(BX*4), SI 208 NEGL BX // i = -n 209 JMP E5 210 211 L5: MOVL (SI)(BX*4), AX 212 MULL BP 213 ADDL CX, AX 214 ADCL $0, DX 215 MOVL AX, (DI)(BX*4) 216 MOVL DX, CX 217 ADDL $1, BX // i++ 218 219 E5: CMPL BX, $0 // i < 0 220 JL L5 221 222 MOVL CX, c+32(FP) 223 RET 224 225 226 // func addMulVVW(z, x []Word, y Word) (c Word) 227 TEXT addMulVVW(SB),NOSPLIT,$0 228 MOVL z+0(FP), DI 229 MOVL x+12(FP), SI 230 MOVL y+24(FP), BP 231 MOVL z_len+4(FP), BX 232 LEAL (DI)(BX*4), DI 233 LEAL (SI)(BX*4), SI 234 NEGL BX // i = -n 235 MOVL $0, CX // c = 0 236 JMP E6 237 238 L6: MOVL (SI)(BX*4), AX 239 MULL BP 240 ADDL CX, AX 241 ADCL $0, DX 242 ADDL AX, (DI)(BX*4) 243 ADCL $0, DX 244 MOVL DX, CX 245 ADDL $1, BX // i++ 246 247 E6: CMPL BX, $0 // i < 0 248 JL L6 249 250 MOVL CX, c+28(FP) 251 RET 252 253 254 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word) 255 TEXT divWVW(SB),NOSPLIT,$0 256 MOVL z+0(FP), DI 257 MOVL xn+12(FP), DX // r = xn 258 MOVL x+16(FP), SI 259 MOVL y+28(FP), CX 260 MOVL z_len+4(FP), BX // i = z 261 JMP E7 262 263 L7: MOVL (SI)(BX*4), AX 264 DIVL CX 265 MOVL AX, (DI)(BX*4) 266 267 E7: SUBL $1, BX // i-- 268 JGE L7 // i >= 0 269 270 MOVL DX, r+32(FP) 271 RET 272