Home | History | Annotate | Download | only in big
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build !math_big_pure_go
      6 
      7 #include "textflag.h"
      8 
      9 // This file provides fast assembly versions for the elementary
     10 // arithmetic operations on vectors implemented in arith.go.
     11 
     12 // func mulWW(x, y Word) (z1, z0 Word)
     13 TEXT mulWW(SB),NOSPLIT,$0
     14 	MOVQ x+0(FP), AX
     15 	MULQ y+8(FP)
     16 	MOVQ DX, z1+16(FP)
     17 	MOVQ AX, z0+24(FP)
     18 	RET
     19 
     20 
     21 // func divWW(x1, x0, y Word) (q, r Word)
     22 TEXT divWW(SB),NOSPLIT,$0
     23 	MOVQ x1+0(FP), DX
     24 	MOVQ x0+8(FP), AX
     25 	DIVQ y+16(FP)
     26 	MOVQ AX, q+24(FP)
     27 	MOVQ DX, r+32(FP)
     28 	RET
     29 
     30 // The carry bit is saved with SBBQ Rx, Rx: if the carry was set, Rx is -1, otherwise it is 0.
     31 // It is restored with ADDQ Rx, Rx: if Rx was -1 the carry is set, otherwise it is cleared.
     32 // This is faster than using rotate instructions.
     33 //
     34 // CAUTION: Note that MOVQ $0, Rx is translated to XORQ Rx, Rx which clears the carry bit!
     35 
     36 // func addVV(z, x, y []Word) (c Word)
     37 TEXT addVV(SB),NOSPLIT,$0
     38 	MOVQ z_len+8(FP), DI
     39 	MOVQ x+24(FP), R8
     40 	MOVQ y+48(FP), R9
     41 	MOVQ z+0(FP), R10
     42 
     43 	MOVQ $0, CX		// c = 0
     44 	MOVQ $0, SI		// i = 0
     45 
     46 	// s/JL/JMP/ below to disable the unrolled loop
     47 	SUBQ $4, DI		// n -= 4
     48 	JL V1			// if n < 0 goto V1
     49 
     50 U1:	// n >= 0
     51 	// regular loop body unrolled 4x
     52 	ADDQ CX, CX		// restore CF
     53 	MOVQ 0(R8)(SI*8), R11
     54 	MOVQ 8(R8)(SI*8), R12
     55 	MOVQ 16(R8)(SI*8), R13
     56 	MOVQ 24(R8)(SI*8), R14
     57 	ADCQ 0(R9)(SI*8), R11
     58 	ADCQ 8(R9)(SI*8), R12
     59 	ADCQ 16(R9)(SI*8), R13
     60 	ADCQ 24(R9)(SI*8), R14
     61 	MOVQ R11, 0(R10)(SI*8)
     62 	MOVQ R12, 8(R10)(SI*8)
     63 	MOVQ R13, 16(R10)(SI*8)
     64 	MOVQ R14, 24(R10)(SI*8)
     65 	SBBQ CX, CX		// save CF
     66 
     67 	ADDQ $4, SI		// i += 4
     68 	SUBQ $4, DI		// n -= 4
     69 	JGE U1			// if n >= 0 goto U1
     70 
     71 V1:	ADDQ $4, DI		// n += 4
     72 	JLE E1			// if n <= 0 goto E1
     73 
     74 L1:	// n > 0
     75 	ADDQ CX, CX		// restore CF
     76 	MOVQ 0(R8)(SI*8), R11
     77 	ADCQ 0(R9)(SI*8), R11
     78 	MOVQ R11, 0(R10)(SI*8)
     79 	SBBQ CX, CX		// save CF
     80 
     81 	ADDQ $1, SI		// i++
     82 	SUBQ $1, DI		// n--
     83 	JG L1			// if n > 0 goto L1
     84 
     85 E1:	NEGQ CX
     86 	MOVQ CX, c+72(FP)	// return c
     87 	RET
     88 
     89 
     90 // func subVV(z, x, y []Word) (c Word)
     91 // (same as addVV except for SBBQ instead of ADCQ and label names)
     92 TEXT subVV(SB),NOSPLIT,$0
     93 	MOVQ z_len+8(FP), DI
     94 	MOVQ x+24(FP), R8
     95 	MOVQ y+48(FP), R9
     96 	MOVQ z+0(FP), R10
     97 
     98 	MOVQ $0, CX		// c = 0
     99 	MOVQ $0, SI		// i = 0
    100 
    101 	// s/JL/JMP/ below to disable the unrolled loop
    102 	SUBQ $4, DI		// n -= 4
    103 	JL V2			// if n < 0 goto V2
    104 
    105 U2:	// n >= 0
    106 	// regular loop body unrolled 4x
    107 	ADDQ CX, CX		// restore CF
    108 	MOVQ 0(R8)(SI*8), R11
    109 	MOVQ 8(R8)(SI*8), R12
    110 	MOVQ 16(R8)(SI*8), R13
    111 	MOVQ 24(R8)(SI*8), R14
    112 	SBBQ 0(R9)(SI*8), R11
    113 	SBBQ 8(R9)(SI*8), R12
    114 	SBBQ 16(R9)(SI*8), R13
    115 	SBBQ 24(R9)(SI*8), R14
    116 	MOVQ R11, 0(R10)(SI*8)
    117 	MOVQ R12, 8(R10)(SI*8)
    118 	MOVQ R13, 16(R10)(SI*8)
    119 	MOVQ R14, 24(R10)(SI*8)
    120 	SBBQ CX, CX		// save CF
    121 
    122 	ADDQ $4, SI		// i += 4
    123 	SUBQ $4, DI		// n -= 4
    124 	JGE U2			// if n >= 0 goto U2
    125 
    126 V2:	ADDQ $4, DI		// n += 4
    127 	JLE E2			// if n <= 0 goto E2
    128 
    129 L2:	// n > 0
    130 	ADDQ CX, CX		// restore CF
    131 	MOVQ 0(R8)(SI*8), R11
    132 	SBBQ 0(R9)(SI*8), R11
    133 	MOVQ R11, 0(R10)(SI*8)
    134 	SBBQ CX, CX		// save CF
    135 
    136 	ADDQ $1, SI		// i++
    137 	SUBQ $1, DI		// n--
    138 	JG L2			// if n > 0 goto L2
    139 
    140 E2:	NEGQ CX
    141 	MOVQ CX, c+72(FP)	// return c
    142 	RET
    143 
    144 
    145 // func addVW(z, x []Word, y Word) (c Word)
    146 TEXT addVW(SB),NOSPLIT,$0
    147 	MOVQ z_len+8(FP), DI
    148 	MOVQ x+24(FP), R8
    149 	MOVQ y+48(FP), CX	// c = y
    150 	MOVQ z+0(FP), R10
    151 
    152 	MOVQ $0, SI		// i = 0
    153 
    154 	// s/JL/JMP/ below to disable the unrolled loop
    155 	SUBQ $4, DI		// n -= 4
    156 	JL V3			// if n < 4 goto V3
    157 
    158 U3:	// n >= 0
    159 	// regular loop body unrolled 4x
    160 	MOVQ 0(R8)(SI*8), R11
    161 	MOVQ 8(R8)(SI*8), R12
    162 	MOVQ 16(R8)(SI*8), R13
    163 	MOVQ 24(R8)(SI*8), R14
    164 	ADDQ CX, R11
    165 	ADCQ $0, R12
    166 	ADCQ $0, R13
    167 	ADCQ $0, R14
    168 	SBBQ CX, CX		// save CF
    169 	NEGQ CX
    170 	MOVQ R11, 0(R10)(SI*8)
    171 	MOVQ R12, 8(R10)(SI*8)
    172 	MOVQ R13, 16(R10)(SI*8)
    173 	MOVQ R14, 24(R10)(SI*8)
    174 
    175 	ADDQ $4, SI		// i += 4
    176 	SUBQ $4, DI		// n -= 4
    177 	JGE U3			// if n >= 0 goto U3
    178 
    179 V3:	ADDQ $4, DI		// n += 4
    180 	JLE E3			// if n <= 0 goto E3
    181 
    182 L3:	// n > 0
    183 	ADDQ 0(R8)(SI*8), CX
    184 	MOVQ CX, 0(R10)(SI*8)
    185 	SBBQ CX, CX		// save CF
    186 	NEGQ CX
    187 
    188 	ADDQ $1, SI		// i++
    189 	SUBQ $1, DI		// n--
    190 	JG L3			// if n > 0 goto L3
    191 
    192 E3:	MOVQ CX, c+56(FP)	// return c
    193 	RET
    194 
    195 
    196 // func subVW(z, x []Word, y Word) (c Word)
    197 // (same as addVW except for SUBQ/SBBQ instead of ADDQ/ADCQ and label names)
    198 TEXT subVW(SB),NOSPLIT,$0
    199 	MOVQ z_len+8(FP), DI
    200 	MOVQ x+24(FP), R8
    201 	MOVQ y+48(FP), CX	// c = y
    202 	MOVQ z+0(FP), R10
    203 
    204 	MOVQ $0, SI		// i = 0
    205 
    206 	// s/JL/JMP/ below to disable the unrolled loop
    207 	SUBQ $4, DI		// n -= 4
    208 	JL V4			// if n < 4 goto V4
    209 
    210 U4:	// n >= 0
    211 	// regular loop body unrolled 4x
    212 	MOVQ 0(R8)(SI*8), R11
    213 	MOVQ 8(R8)(SI*8), R12
    214 	MOVQ 16(R8)(SI*8), R13
    215 	MOVQ 24(R8)(SI*8), R14
    216 	SUBQ CX, R11
    217 	SBBQ $0, R12
    218 	SBBQ $0, R13
    219 	SBBQ $0, R14
    220 	SBBQ CX, CX		// save CF
    221 	NEGQ CX
    222 	MOVQ R11, 0(R10)(SI*8)
    223 	MOVQ R12, 8(R10)(SI*8)
    224 	MOVQ R13, 16(R10)(SI*8)
    225 	MOVQ R14, 24(R10)(SI*8)
    226 
    227 	ADDQ $4, SI		// i += 4
    228 	SUBQ $4, DI		// n -= 4
    229 	JGE U4			// if n >= 0 goto U4
    230 
    231 V4:	ADDQ $4, DI		// n += 4
    232 	JLE E4			// if n <= 0 goto E4
    233 
    234 L4:	// n > 0
    235 	MOVQ 0(R8)(SI*8), R11
    236 	SUBQ CX, R11
    237 	MOVQ R11, 0(R10)(SI*8)
    238 	SBBQ CX, CX		// save CF
    239 	NEGQ CX
    240 
    241 	ADDQ $1, SI		// i++
    242 	SUBQ $1, DI		// n--
    243 	JG L4			// if n > 0 goto L4
    244 
    245 E4:	MOVQ CX, c+56(FP)	// return c
    246 	RET
    247 
    248 
    249 // func shlVU(z, x []Word, s uint) (c Word)
    250 TEXT shlVU(SB),NOSPLIT,$0
    251 	MOVQ z_len+8(FP), BX	// i = z
    252 	SUBQ $1, BX		// i--
    253 	JL X8b			// i < 0	(n <= 0)
    254 
    255 	// n > 0
    256 	MOVQ z+0(FP), R10
    257 	MOVQ x+24(FP), R8
    258 	MOVQ s+48(FP), CX
    259 	MOVQ (R8)(BX*8), AX	// w1 = x[n-1]
    260 	MOVQ $0, DX
    261 	SHLQ CX, DX:AX		// w1>>
    262 	MOVQ DX, c+56(FP)
    263 
    264 	CMPQ BX, $0
    265 	JLE X8a			// i <= 0
    266 
    267 	// i > 0
    268 L8:	MOVQ AX, DX		// w = w1
    269 	MOVQ -8(R8)(BX*8), AX	// w1 = x[i-1]
    270 	SHLQ CX, DX:AX		// w<<s | w1>>
    271 	MOVQ DX, (R10)(BX*8)	// z[i] = w<<s | w1>>
    272 	SUBQ $1, BX		// i--
    273 	JG L8			// i > 0
    274 
    275 	// i <= 0
    276 X8a:	SHLQ CX, AX		// w1<<s
    277 	MOVQ AX, (R10)		// z[0] = w1<<s
    278 	RET
    279 
    280 X8b:	MOVQ $0, c+56(FP)
    281 	RET
    282 
    283 
    284 // func shrVU(z, x []Word, s uint) (c Word)
    285 TEXT shrVU(SB),NOSPLIT,$0
    286 	MOVQ z_len+8(FP), R11
    287 	SUBQ $1, R11		// n--
    288 	JL X9b			// n < 0	(n <= 0)
    289 
    290 	// n > 0
    291 	MOVQ z+0(FP), R10
    292 	MOVQ x+24(FP), R8
    293 	MOVQ s+48(FP), CX
    294 	MOVQ (R8), AX		// w1 = x[0]
    295 	MOVQ $0, DX
    296 	SHRQ CX, DX:AX		// w1<<
    297 	MOVQ DX, c+56(FP)
    298 
    299 	MOVQ $0, BX		// i = 0
    300 	JMP E9
    301 
    302 	// i < n-1
    303 L9:	MOVQ AX, DX		// w = w1
    304 	MOVQ 8(R8)(BX*8), AX	// w1 = x[i+1]
    305 	SHRQ CX, DX:AX		// w>>s | w1<<
    306 	MOVQ DX, (R10)(BX*8)	// z[i] = w>>s | w1<<
    307 	ADDQ $1, BX		// i++
    308 
    309 E9:	CMPQ BX, R11
    310 	JL L9			// i < n-1
    311 
    312 	// i >= n-1
    313 X9a:	SHRQ CX, AX		// w1>>s
    314 	MOVQ AX, (R10)(R11*8)	// z[n-1] = w1>>s
    315 	RET
    316 
    317 X9b:	MOVQ $0, c+56(FP)
    318 	RET
    319 
    320 
    321 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
    322 TEXT mulAddVWW(SB),NOSPLIT,$0
    323 	MOVQ z+0(FP), R10
    324 	MOVQ x+24(FP), R8
    325 	MOVQ y+48(FP), R9
    326 	MOVQ r+56(FP), CX	// c = r
    327 	MOVQ z_len+8(FP), R11
    328 	MOVQ $0, BX		// i = 0
    329 	JMP E5
    330 
    331 L5:	MOVQ (R8)(BX*8), AX
    332 	MULQ R9
    333 	ADDQ CX, AX
    334 	ADCQ $0, DX
    335 	MOVQ AX, (R10)(BX*8)
    336 	MOVQ DX, CX
    337 	ADDQ $1, BX		// i++
    338 
    339 E5:	CMPQ BX, R11		// i < n
    340 	JL L5
    341 
    342 	MOVQ CX, c+64(FP)
    343 	RET
    344 
    345 
    346 // func addMulVVW(z, x []Word, y Word) (c Word)
    347 TEXT addMulVVW(SB),NOSPLIT,$0
    348 	MOVQ z+0(FP), R10
    349 	MOVQ x+24(FP), R8
    350 	MOVQ y+48(FP), R9
    351 	MOVQ z_len+8(FP), R11
    352 	MOVQ $0, BX		// i = 0
    353 	MOVQ $0, CX		// c = 0
    354 	MOVQ R11, R12
    355 	ANDQ $-2, R12
    356 	CMPQ R11, $2
    357 	JAE A6
    358 	JMP E6
    359 
    360 A6:
    361 	MOVQ (R8)(BX*8), AX
    362 	MULQ R9
    363 	ADDQ (R10)(BX*8), AX
    364 	ADCQ $0, DX
    365 	ADDQ CX, AX
    366 	ADCQ $0, DX
    367 	MOVQ DX, CX
    368 	MOVQ AX, (R10)(BX*8)
    369 
    370 	MOVQ (8)(R8)(BX*8), AX
    371 	MULQ R9
    372 	ADDQ (8)(R10)(BX*8), AX
    373 	ADCQ $0, DX
    374 	ADDQ CX, AX
    375 	ADCQ $0, DX
    376 	MOVQ DX, CX
    377 	MOVQ AX, (8)(R10)(BX*8)
    378 
    379 	ADDQ $2, BX
    380 	CMPQ BX, R12
    381 	JL A6
    382 	JMP E6
    383 
    384 L6:	MOVQ (R8)(BX*8), AX
    385 	MULQ R9
    386 	ADDQ CX, AX
    387 	ADCQ $0, DX
    388 	ADDQ AX, (R10)(BX*8)
    389 	ADCQ $0, DX
    390 	MOVQ DX, CX
    391 	ADDQ $1, BX		// i++
    392 
    393 E6:	CMPQ BX, R11		// i < n
    394 	JL L6
    395 
    396 	MOVQ CX, c+56(FP)
    397 	RET
    398 
    399 
    400 // func divWVW(z []Word, xn Word, x []Word, y Word) (r Word)
    401 TEXT divWVW(SB),NOSPLIT,$0
    402 	MOVQ z+0(FP), R10
    403 	MOVQ xn+24(FP), DX	// r = xn
    404 	MOVQ x+32(FP), R8
    405 	MOVQ y+56(FP), R9
    406 	MOVQ z_len+8(FP), BX	// i = z
    407 	JMP E7
    408 
    409 L7:	MOVQ (R8)(BX*8), AX
    410 	DIVQ R9
    411 	MOVQ AX, (R10)(BX*8)
    412 
    413 E7:	SUBQ $1, BX		// i--
    414 	JGE L7			// i >= 0
    415 
    416 	MOVQ DX, r+64(FP)
    417 	RET
    418 
    419 // func bitLen(x Word) (n int)
    420 TEXT bitLen(SB),NOSPLIT,$0
    421 	BSRQ x+0(FP), AX
    422 	JZ Z1
    423 	ADDQ $1, AX
    424 	MOVQ AX, n+8(FP)
    425 	RET
    426 
    427 Z1:	MOVQ $0, n+8(FP)
    428 	RET
    429