Home | History | Annotate | Download | only in big
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build !math_big_pure_go
      6 
      7 #include "textflag.h"
      8 
      9 // This file provides fast assembly versions for the elementary
     10 // arithmetic operations on vectors implemented in arith.go.
     11 
     12 // func mulWW(x, y Word) (z1, z0 Word)
     13 TEXT mulWW(SB),NOSPLIT,$0
     14 	MOVL x+0(FP), AX
     15 	MULL y+4(FP)
     16 	MOVL DX, z1+8(FP)
     17 	MOVL AX, z0+12(FP)
     18 	RET
     19 
     20 
     21 // func divWW(x1, x0, y Word) (q, r Word)
     22 TEXT divWW(SB),NOSPLIT,$0
     23 	MOVL x1+0(FP), DX
     24 	MOVL x0+4(FP), AX
     25 	DIVL y+8(FP)
     26 	MOVL AX, q+12(FP)
     27 	MOVL DX, r+16(FP)
     28 	RET
     29 
     30 
     31 // func addVV(z, x, y []Word) (c Word)
     32 TEXT addVV(SB),NOSPLIT,$0
     33 	MOVL z+0(FP), DI
     34 	MOVL x+12(FP), SI
     35 	MOVL y+24(FP), CX
     36 	MOVL z_len+4(FP), BP
     37 	MOVL $0, BX		// i = 0
     38 	MOVL $0, DX		// c = 0
     39 	JMP E1
     40 
     41 L1:	MOVL (SI)(BX*4), AX
     42 	ADDL DX, DX		// restore CF
     43 	ADCL (CX)(BX*4), AX
     44 	SBBL DX, DX		// save CF
     45 	MOVL AX, (DI)(BX*4)
     46 	ADDL $1, BX		// i++
     47 
     48 E1:	CMPL BX, BP		// i < n
     49 	JL L1
     50 
     51 	NEGL DX
     52 	MOVL DX, c+36(FP)
     53 	RET
     54 
     55 
     56 // func subVV(z, x, y []Word) (c Word)
     57 // (same as addVV except for SBBL instead of ADCL and label names)
     58 TEXT subVV(SB),NOSPLIT,$0
     59 	MOVL z+0(FP), DI
     60 	MOVL x+12(FP), SI
     61 	MOVL y+24(FP), CX
     62 	MOVL z_len+4(FP), BP
     63 	MOVL $0, BX		// i = 0
     64 	MOVL $0, DX		// c = 0
     65 	JMP E2
     66 
     67 L2:	MOVL (SI)(BX*4), AX
     68 	ADDL DX, DX		// restore CF
     69 	SBBL (CX)(BX*4), AX
     70 	SBBL DX, DX		// save CF
     71 	MOVL AX, (DI)(BX*4)
     72 	ADDL $1, BX		// i++
     73 
     74 E2:	CMPL BX, BP		// i < n
     75 	JL L2
     76 
     77 	NEGL DX
     78 	MOVL DX, c+36(FP)
     79 	RET
     80 
     81 
     82 // func addVW(z, x []Word, y Word) (c Word)
     83 TEXT addVW(SB),NOSPLIT,$0
     84 	MOVL z+0(FP), DI
     85 	MOVL x+12(FP), SI
     86 	MOVL y+24(FP), AX	// c = y
     87 	MOVL z_len+4(FP), BP
     88 	MOVL $0, BX		// i = 0
     89 	JMP E3
     90 
     91 L3:	ADDL (SI)(BX*4), AX
     92 	MOVL AX, (DI)(BX*4)
     93 	SBBL AX, AX		// save CF
     94 	NEGL AX
     95 	ADDL $1, BX		// i++
     96 
     97 E3:	CMPL BX, BP		// i < n
     98 	JL L3
     99 
    100 	MOVL AX, c+28(FP)
    101 	RET
    102 
    103 
    104 // func subVW(z, x []Word, y Word) (c Word)
    105 TEXT subVW(SB),NOSPLIT,$0
    106 	MOVL z+0(FP), DI
    107 	MOVL x+12(FP), SI
    108 	MOVL y+24(FP), AX	// c = y
    109 	MOVL z_len+4(FP), BP
    110 	MOVL $0, BX		// i = 0
    111 	JMP E4
    112 
    113 L4:	MOVL (SI)(BX*4), DX
    114 	SUBL AX, DX
    115 	MOVL DX, (DI)(BX*4)
    116 	SBBL AX, AX		// save CF
    117 	NEGL AX
    118 	ADDL $1, BX		// i++
    119 
    120 E4:	CMPL BX, BP		// i < n
    121 	JL L4
    122 
    123 	MOVL AX, c+28(FP)
    124 	RET
    125 
    126 
    127 // func shlVU(z, x []Word, s uint) (c Word)
    128 TEXT shlVU(SB),NOSPLIT,$0
    129 	MOVL z_len+4(FP), BX	// i = z
    130 	SUBL $1, BX		// i--
    131 	JL X8b			// i < 0	(n <= 0)
    132 
    133 	// n > 0
    134 	MOVL z+0(FP), DI
    135 	MOVL x+12(FP), SI
    136 	MOVL s+24(FP), CX
    137 	MOVL (SI)(BX*4), AX	// w1 = x[n-1]
    138 	MOVL $0, DX
    139 	SHLL CX, DX:AX		// w1>>
    140 	MOVL DX, c+28(FP)
    141 
    142 	CMPL BX, $0
    143 	JLE X8a			// i <= 0
    144 
    145 	// i > 0
    146 L8:	MOVL AX, DX		// w = w1
    147 	MOVL -4(SI)(BX*4), AX	// w1 = x[i-1]
    148 	SHLL CX, DX:AX		// w<<s | w1>>
    149 	MOVL DX, (DI)(BX*4)	// z[i] = w<<s | w1>>
    150 	SUBL $1, BX		// i--
    151 	JG L8			// i > 0
    152 
    153 	// i <= 0
    154 X8a:	SHLL CX, AX		// w1<<s
    155 	MOVL AX, (DI)		// z[0] = w1<<s
    156 	RET
    157 
    158 X8b:	MOVL $0, c+28(FP)
    159 	RET
    160 
    161 
    162 // func shrVU(z, x []Word, s uint) (c Word)
    163 TEXT shrVU(SB),NOSPLIT,$0
    164 	MOVL z_len+4(FP), BP
    165 	SUBL $1, BP		// n--
    166 	JL X9b			// n < 0	(n <= 0)
    167 
    168 	// n > 0
    169 	MOVL z+0(FP), DI
    170 	MOVL x+12(FP), SI
    171 	MOVL s+24(FP), CX
    172 	MOVL (SI), AX		// w1 = x[0]
    173 	MOVL $0, DX
    174 	SHRL CX, DX:AX		// w1<<
    175 	MOVL DX, c+28(FP)
    176 
    177 	MOVL $0, BX		// i = 0
    178 	JMP E9
    179 
    180 	// i < n-1
    181 L9:	MOVL AX, DX		// w = w1
    182 	MOVL 4(SI)(BX*4), AX	// w1 = x[i+1]
    183 	SHRL CX, DX:AX		// w>>s | w1<<
    184 	MOVL DX, (DI)(BX*4)	// z[i] = w>>s | w1<<
    185 	ADDL $1, BX		// i++
    186 
    187 E9:	CMPL BX, BP
    188 	JL L9			// i < n-1
    189 
    190 	// i >= n-1
    191 X9a:	SHRL CX, AX		// w1>>s
    192 	MOVL AX, (DI)(BP*4)	// z[n-1] = w1>>s
    193 	RET
    194 
    195 X9b:	MOVL $0, c+28(FP)
    196 	RET
    197 
    198 
    199 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
    200 TEXT mulAddVWW(SB),NOSPLIT,$0
    201 	MOVL z+0(FP), DI
    202 	MOVL x+12(FP), SI
    203 	MOVL y+24(FP), BP
    204 	MOVL r+28(FP), CX	// c = r
    205 	MOVL z_len+4(FP), BX
    206 	LEAL (DI)(BX*4), DI
    207 	LEAL (SI)(BX*4), SI
    208 	NEGL BX			// i = -n
    209 	JMP E5
    210 
    211 L5:	MOVL (SI)(BX*4), AX
    212 	MULL BP
    213 	ADDL CX, AX
    214 	ADCL $0, DX
    215 	MOVL AX, (DI)(BX*4)
    216 	MOVL DX, CX
    217 	ADDL $1, BX		// i++
    218 
    219 E5:	CMPL BX, $0		// i < 0
    220 	JL L5
    221 
    222 	MOVL CX, c+32(FP)
    223 	RET
    224 
    225 
    226 // func addMulVVW(z, x []Word, y Word) (c Word)
    227 TEXT addMulVVW(SB),NOSPLIT,$0
    228 	MOVL z+0(FP), DI
    229 	MOVL x+12(FP), SI
    230 	MOVL y+24(FP), BP
    231 	MOVL z_len+4(FP), BX
    232 	LEAL (DI)(BX*4), DI
    233 	LEAL (SI)(BX*4), SI
    234 	NEGL BX			// i = -n
    235 	MOVL $0, CX		// c = 0
    236 	JMP E6
    237 
    238 L6:	MOVL (SI)(BX*4), AX
    239 	MULL BP
    240 	ADDL CX, AX
    241 	ADCL $0, DX
    242 	ADDL AX, (DI)(BX*4)
    243 	ADCL $0, DX
    244 	MOVL DX, CX
    245 	ADDL $1, BX		// i++
    246 
    247 E6:	CMPL BX, $0		// i < 0
    248 	JL L6
    249 
    250 	MOVL CX, c+28(FP)
    251 	RET
    252 
    253 
    254 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
    255 TEXT divWVW(SB),NOSPLIT,$0
    256 	MOVL z+0(FP), DI
    257 	MOVL xn+12(FP), DX	// r = xn
    258 	MOVL x+16(FP), SI
    259 	MOVL y+28(FP), CX
    260 	MOVL z_len+4(FP), BX	// i = z
    261 	JMP E7
    262 
    263 L7:	MOVL (SI)(BX*4), AX
    264 	DIVL CX
    265 	MOVL AX, (DI)(BX*4)
    266 
    267 E7:	SUBL $1, BX		// i--
    268 	JGE L7			// i >= 0
    269 
    270 	MOVL DX, r+32(FP)
    271 	RET
    272