Home | History | Annotate | Download | only in big
      1 // Copyright 2009 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build !math_big_pure_go
      6 
      7 #include "textflag.h"
      8 
      9 // This file provides fast assembly versions for the elementary
     10 // arithmetic operations on vectors implemented in arith.go.
     11 
     12 // func addVV(z, x, y []Word) (c Word)
     13 TEXT addVV(SB),NOSPLIT,$0
     14 	ADD.S	$0, R0		// clear carry flag
     15 	MOVW	z+0(FP), R1
     16 	MOVW	z_len+4(FP), R4
     17 	MOVW	x+12(FP), R2
     18 	MOVW	y+24(FP), R3
     19 	ADD	R4<<2, R1, R4
     20 	B E1
     21 L1:
     22 	MOVW.P	4(R2), R5
     23 	MOVW.P	4(R3), R6
     24 	ADC.S	R6, R5
     25 	MOVW.P	R5, 4(R1)
     26 E1:
     27 	TEQ	R1, R4
     28 	BNE L1
     29 
     30 	MOVW	$0, R0
     31 	MOVW.CS	$1, R0
     32 	MOVW	R0, c+36(FP)
     33 	RET
     34 
     35 
     36 // func subVV(z, x, y []Word) (c Word)
     37 // (same as addVV except for SBC instead of ADC and label names)
     38 TEXT subVV(SB),NOSPLIT,$0
     39 	SUB.S	$0, R0		// clear borrow flag
     40 	MOVW	z+0(FP), R1
     41 	MOVW	z_len+4(FP), R4
     42 	MOVW	x+12(FP), R2
     43 	MOVW	y+24(FP), R3
     44 	ADD	R4<<2, R1, R4
     45 	B E2
     46 L2:
     47 	MOVW.P	4(R2), R5
     48 	MOVW.P	4(R3), R6
     49 	SBC.S	R6, R5
     50 	MOVW.P	R5, 4(R1)
     51 E2:
     52 	TEQ	R1, R4
     53 	BNE L2
     54 
     55 	MOVW	$0, R0
     56 	MOVW.CC	$1, R0
     57 	MOVW	R0, c+36(FP)
     58 	RET
     59 
     60 
     61 // func addVW(z, x []Word, y Word) (c Word)
     62 TEXT addVW(SB),NOSPLIT,$0
     63 	MOVW	z+0(FP), R1
     64 	MOVW	z_len+4(FP), R4
     65 	MOVW	x+12(FP), R2
     66 	MOVW	y+24(FP), R3
     67 	ADD	R4<<2, R1, R4
     68 	TEQ	R1, R4
     69 	BNE L3a
     70 	MOVW	R3, c+28(FP)
     71 	RET
     72 L3a:
     73 	MOVW.P	4(R2), R5
     74 	ADD.S	R3, R5
     75 	MOVW.P	R5, 4(R1)
     76 	B	E3
     77 L3:
     78 	MOVW.P	4(R2), R5
     79 	ADC.S	$0, R5
     80 	MOVW.P	R5, 4(R1)
     81 E3:
     82 	TEQ	R1, R4
     83 	BNE	L3
     84 
     85 	MOVW	$0, R0
     86 	MOVW.CS	$1, R0
     87 	MOVW	R0, c+28(FP)
     88 	RET
     89 
     90 
     91 // func subVW(z, x []Word, y Word) (c Word)
     92 TEXT subVW(SB),NOSPLIT,$0
     93 	MOVW	z+0(FP), R1
     94 	MOVW	z_len+4(FP), R4
     95 	MOVW	x+12(FP), R2
     96 	MOVW	y+24(FP), R3
     97 	ADD	R4<<2, R1, R4
     98 	TEQ	R1, R4
     99 	BNE L4a
    100 	MOVW	R3, c+28(FP)
    101 	RET
    102 L4a:
    103 	MOVW.P	4(R2), R5
    104 	SUB.S	R3, R5
    105 	MOVW.P	R5, 4(R1)
    106 	B	E4
    107 L4:
    108 	MOVW.P	4(R2), R5
    109 	SBC.S	$0, R5
    110 	MOVW.P	R5, 4(R1)
    111 E4:
    112 	TEQ	R1, R4
    113 	BNE	L4
    114 
    115 	MOVW	$0, R0
    116 	MOVW.CC	$1, R0
    117 	MOVW	R0, c+28(FP)
    118 	RET
    119 
    120 
    121 // func shlVU(z, x []Word, s uint) (c Word)
    122 TEXT shlVU(SB),NOSPLIT,$0
    123 	MOVW	z_len+4(FP), R5
    124 	TEQ	$0, R5
    125 	BEQ	X7
    126 
    127 	MOVW	z+0(FP), R1
    128 	MOVW	x+12(FP), R2
    129 	ADD	R5<<2, R2, R2
    130 	ADD	R5<<2, R1, R5
    131 	MOVW	s+24(FP), R3
    132 	TEQ	$0, R3	// shift 0 is special
    133 	BEQ	Y7
    134 	ADD	$4, R1	// stop one word early
    135 	MOVW	$32, R4
    136 	SUB	R3, R4
    137 	MOVW	$0, R7
    138 
    139 	MOVW.W	-4(R2), R6
    140 	MOVW	R6<<R3, R7
    141 	MOVW	R6>>R4, R6
    142 	MOVW	R6, c+28(FP)
    143 	B E7
    144 
    145 L7:
    146 	MOVW.W	-4(R2), R6
    147 	ORR	R6>>R4, R7
    148 	MOVW.W	R7, -4(R5)
    149 	MOVW	R6<<R3, R7
    150 E7:
    151 	TEQ	R1, R5
    152 	BNE	L7
    153 
    154 	MOVW	R7, -4(R5)
    155 	RET
    156 
    157 Y7:	// copy loop, because shift 0 == shift 32
    158 	MOVW.W	-4(R2), R6
    159 	MOVW.W	R6, -4(R5)
    160 	TEQ	R1, R5
    161 	BNE Y7
    162 
    163 X7:
    164 	MOVW	$0, R1
    165 	MOVW	R1, c+28(FP)
    166 	RET
    167 
    168 
    169 // func shrVU(z, x []Word, s uint) (c Word)
    170 TEXT shrVU(SB),NOSPLIT,$0
    171 	MOVW	z_len+4(FP), R5
    172 	TEQ	$0, R5
    173 	BEQ	X6
    174 
    175 	MOVW	z+0(FP), R1
    176 	MOVW	x+12(FP), R2
    177 	ADD	R5<<2, R1, R5
    178 	MOVW	s+24(FP), R3
    179 	TEQ	$0, R3	// shift 0 is special
    180 	BEQ Y6
    181 	SUB	$4, R5	// stop one word early
    182 	MOVW	$32, R4
    183 	SUB	R3, R4
    184 	MOVW	$0, R7
    185 
    186 	// first word
    187 	MOVW.P	4(R2), R6
    188 	MOVW	R6>>R3, R7
    189 	MOVW	R6<<R4, R6
    190 	MOVW	R6, c+28(FP)
    191 	B E6
    192 
    193 	// word loop
    194 L6:
    195 	MOVW.P	4(R2), R6
    196 	ORR	R6<<R4, R7
    197 	MOVW.P	R7, 4(R1)
    198 	MOVW	R6>>R3, R7
    199 E6:
    200 	TEQ	R1, R5
    201 	BNE	L6
    202 
    203 	MOVW	R7, 0(R1)
    204 	RET
    205 
    206 Y6:	// copy loop, because shift 0 == shift 32
    207 	MOVW.P	4(R2), R6
    208 	MOVW.P	R6, 4(R1)
    209 	TEQ R1, R5
    210 	BNE Y6
    211 
    212 X6:
    213 	MOVW	$0, R1
    214 	MOVW	R1, c+28(FP)
    215 	RET
    216 
    217 
    218 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
    219 TEXT mulAddVWW(SB),NOSPLIT,$0
    220 	MOVW	$0, R0
    221 	MOVW	z+0(FP), R1
    222 	MOVW	z_len+4(FP), R5
    223 	MOVW	x+12(FP), R2
    224 	MOVW	y+24(FP), R3
    225 	MOVW	r+28(FP), R4
    226 	ADD	R5<<2, R1, R5
    227 	B E8
    228 
    229 	// word loop
    230 L8:
    231 	MOVW.P	4(R2), R6
    232 	MULLU	R6, R3, (R7, R6)
    233 	ADD.S	R4, R6
    234 	ADC	R0, R7
    235 	MOVW.P	R6, 4(R1)
    236 	MOVW	R7, R4
    237 E8:
    238 	TEQ	R1, R5
    239 	BNE	L8
    240 
    241 	MOVW	R4, c+32(FP)
    242 	RET
    243 
    244 
    245 // func addMulVVW(z, x []Word, y Word) (c Word)
    246 TEXT addMulVVW(SB),NOSPLIT,$0
    247 	MOVW	$0, R0
    248 	MOVW	z+0(FP), R1
    249 	MOVW	z_len+4(FP), R5
    250 	MOVW	x+12(FP), R2
    251 	MOVW	y+24(FP), R3
    252 	ADD	R5<<2, R1, R5
    253 	MOVW	$0, R4
    254 	B E9
    255 
    256 	// word loop
    257 L9:
    258 	MOVW.P	4(R2), R6
    259 	MULLU	R6, R3, (R7, R6)
    260 	ADD.S	R4, R6
    261 	ADC	R0, R7
    262 	MOVW	0(R1), R4
    263 	ADD.S	R4, R6
    264 	ADC	R0, R7
    265 	MOVW.P	R6, 4(R1)
    266 	MOVW	R7, R4
    267 E9:
    268 	TEQ	R1, R5
    269 	BNE	L9
    270 
    271 	MOVW	R4, c+28(FP)
    272 	RET
    273 
    274 
    275 // func divWVW(z* Word, xn Word, x []Word, y Word) (r Word)
    276 TEXT divWVW(SB),NOSPLIT,$0
    277 	// ARM has no multiword division, so use portable code.
    278 	B divWVW_g(SB)
    279 
    280 
    281 // func divWW(x1, x0, y Word) (q, r Word)
    282 TEXT divWW(SB),NOSPLIT,$0
    283 	// ARM has no multiword division, so use portable code.
    284 	B divWW_g(SB)
    285 
    286 
    287 // func mulWW(x, y Word) (z1, z0 Word)
    288 TEXT mulWW(SB),NOSPLIT,$0
    289 	MOVW	x+0(FP), R1
    290 	MOVW	y+4(FP), R2
    291 	MULLU	R1, R2, (R4, R3)
    292 	MOVW	R4, z1+8(FP)
    293 	MOVW	R3, z0+12(FP)
    294 	RET
    295 
    296 // func bitLen(x Word) (n int)
    297 TEXT bitLen(SB),NOSPLIT,$0
    298 	MOVW	x+0(FP), R0
    299 	CLZ 	R0, R0
    300 	RSB	$32, R0
    301 	MOVW	R0, n+4(FP)
    302 	RET
    303