1 // Copyright 2013 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // +build !math_big_pure_go 6 7 #include "textflag.h" 8 9 // This file provides fast assembly versions for the elementary 10 // arithmetic operations on vectors implemented in arith.go. 11 12 // TODO: Consider re-implementing using Advanced SIMD 13 // once the assembler supports those instructions. 14 15 // func mulWW(x, y Word) (z1, z0 Word) 16 TEXT mulWW(SB),NOSPLIT,$0 17 MOVD x+0(FP), R0 18 MOVD y+8(FP), R1 19 MUL R0, R1, R2 20 UMULH R0, R1, R3 21 MOVD R3, z1+16(FP) 22 MOVD R2, z0+24(FP) 23 RET 24 25 26 // func divWW(x1, x0, y Word) (q, r Word) 27 TEXT divWW(SB),NOSPLIT,$0 28 B divWW_g(SB) // ARM64 has no multiword division 29 30 31 // func addVV(z, x, y []Word) (c Word) 32 TEXT addVV(SB),NOSPLIT,$0 33 MOVD z+0(FP), R3 34 MOVD z_len+8(FP), R0 35 MOVD x+24(FP), R1 36 MOVD y+48(FP), R2 37 ADDS $0, R0 // clear carry flag 38 loop: 39 CBZ R0, done // careful not to touch the carry flag 40 MOVD.P 8(R1), R4 41 MOVD.P 8(R2), R5 42 ADCS R4, R5 43 MOVD.P R5, 8(R3) 44 SUB $1, R0 45 B loop 46 done: 47 CSET HS, R0 // extract carry flag 48 MOVD R0, c+72(FP) 49 RET 50 51 52 // func subVV(z, x, y []Word) (c Word) 53 TEXT subVV(SB),NOSPLIT,$0 54 MOVD z+0(FP), R3 55 MOVD z_len+8(FP), R0 56 MOVD x+24(FP), R1 57 MOVD y+48(FP), R2 58 CMP R0, R0 // set carry flag 59 loop: 60 CBZ R0, done // careful not to touch the carry flag 61 MOVD.P 8(R1), R4 62 MOVD.P 8(R2), R5 63 SBCS R5, R4 64 MOVD.P R4, 8(R3) 65 SUB $1, R0 66 B loop 67 done: 68 CSET LO, R0 // extract carry flag 69 MOVD R0, c+72(FP) 70 RET 71 72 73 // func addVW(z, x []Word, y Word) (c Word) 74 TEXT addVW(SB),NOSPLIT,$0 75 MOVD z+0(FP), R3 76 MOVD z_len+8(FP), R0 77 MOVD x+24(FP), R1 78 MOVD y+48(FP), R2 79 CBZ R0, return_y 80 MOVD.P 8(R1), R4 81 ADDS R2, R4 82 MOVD.P R4, 8(R3) 83 SUB $1, R0 84 loop: 85 CBZ R0, done // careful not to touch the carry flag 86 MOVD.P 8(R1), R4 87 ADCS $0, R4 88 MOVD.P R4, 8(R3) 89 SUB $1, R0 90 B loop 91 done: 92 CSET HS, R0 // extract carry flag 93 MOVD R0, c+56(FP) 94 RET 95 return_y: // z is empty; copy y to c 96 MOVD R2, c+56(FP) 97 RET 98 99 100 // func subVW(z, x []Word, y Word) (c Word) 101 TEXT subVW(SB),NOSPLIT,$0 102 MOVD z+0(FP), R3 103 MOVD z_len+8(FP), R0 104 MOVD x+24(FP), R1 105 MOVD y+48(FP), R2 106 CBZ R0, rety 107 MOVD.P 8(R1), R4 108 SUBS R2, R4 109 MOVD.P R4, 8(R3) 110 SUB $1, R0 111 loop: 112 CBZ R0, done // careful not to touch the carry flag 113 MOVD.P 8(R1), R4 114 SBCS $0, R4 115 MOVD.P R4, 8(R3) 116 SUB $1, R0 117 B loop 118 done: 119 CSET LO, R0 // extract carry flag 120 MOVD R0, c+56(FP) 121 RET 122 rety: // z is empty; copy y to c 123 MOVD R2, c+56(FP) 124 RET 125 126 127 // func shlVU(z, x []Word, s uint) (c Word) 128 TEXT shlVU(SB),NOSPLIT,$0 129 B shlVU_g(SB) 130 131 132 // func shrVU(z, x []Word, s uint) (c Word) 133 TEXT shrVU(SB),NOSPLIT,$0 134 B shrVU_g(SB) 135 136 137 // func mulAddVWW(z, x []Word, y, r Word) (c Word) 138 TEXT mulAddVWW(SB),NOSPLIT,$0 139 MOVD z+0(FP), R1 140 MOVD z_len+8(FP), R0 141 MOVD x+24(FP), R2 142 MOVD y+48(FP), R3 143 MOVD r+56(FP), R4 144 loop: 145 CBZ R0, done 146 MOVD.P 8(R2), R5 147 UMULH R5, R3, R7 148 MUL R5, R3, R6 149 ADDS R4, R6 150 ADC $0, R7 151 MOVD.P R6, 8(R1) 152 MOVD R7, R4 153 SUB $1, R0 154 B loop 155 done: 156 MOVD R4, c+64(FP) 157 RET 158 159 160 // func addMulVVW(z, x []Word, y Word) (c Word) 161 TEXT addMulVVW(SB),NOSPLIT,$0 162 B addMulVVW_g(SB) 163 164 165 // func divWVW(z []Word, xn Word, x []Word, y Word) (r Word) 166 TEXT divWVW(SB),NOSPLIT,$0 167 B divWVW_g(SB) 168 169 170 // func bitLen(x Word) (n int) 171 TEXT bitLen(SB),NOSPLIT,$0 172 MOVD x+0(FP), R0 173 CLZ R0, R0 174 MOVD $64, R1 175 SUB R0, R1, R0 176 MOVD R0, n+8(FP) 177 RET 178