Home | History | Annotate | Download | only in big
      1 // Copyright 2013 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build !math_big_pure_go,ppc64 !math_big_pure_go,ppc64le
      6 
      7 #include "textflag.h"
      8 
      9 // This file provides fast assembly versions for the elementary
     10 // arithmetic operations on vectors implemented in arith.go.
     11 
     12 // func mulWW(x, y Word) (z1, z0 Word)
     13 TEXT mulWW(SB), NOSPLIT, $0
     14 	MOVD   x+0(FP), R4
     15 	MOVD   y+8(FP), R5
     16 	MULHDU R4, R5, R6
     17 	MULLD  R4, R5, R7
     18 	MOVD   R6, z1+16(FP)
     19 	MOVD   R7, z0+24(FP)
     20 	RET
     21 
     22 TEXT addVV(SB), NOSPLIT, $0
     23 	BR addVV_g(SB)
     24 
     25 // func subVV(z, x, y []Word) (c Word)
     26 // z[i] = x[i] - y[i] for all i, carrying
     27 TEXT subVV(SB), NOSPLIT, $0
     28 	MOVD z_len+8(FP), R7
     29 	MOVD x+24(FP), R8
     30 	MOVD y+48(FP), R9
     31 	MOVD z+0(FP), R10
     32 
     33 	MOVD $0, R4  // c = 0
     34 	MOVD $0, R5  // i = 0
     35 	MOVD $1, R29 // work around lack of ADDI
     36 	MOVD $8, R28 // work around lack of scaled addressing
     37 
     38 	SUBC R0, R0  // clear CA
     39 	JMP  sublend
     40 
     41 // amd64 saves and restores CF, but I believe they only have to do that because all of
     42 // their math operations clobber it - we should just be able to recover it at the end.
     43 subloop:
     44 	MULLD R5, R28, R6
     45 	MOVD  (R8)(R6), R11 // x[i]
     46 	MOVD  (R9)(R6), R12 // y[i]
     47 
     48 	SUBE R12, R11, R15
     49 	MOVD R15, (R10)(R6)
     50 
     51 	ADD R29, R5 // i++
     52 
     53 sublend:
     54 	CMP R5, R7
     55 	BLT subloop
     56 
     57 	ADDZE R4
     58 	XOR   R29, R4
     59 	MOVD  R4, c+72(FP)
     60 	RET
     61 
     62 TEXT addVW(SB), NOSPLIT, $0
     63 	BR addVW_g(SB)
     64 
     65 TEXT subVW(SB), NOSPLIT, $0
     66 	BR subVW_g(SB)
     67 
     68 TEXT shlVU(SB), NOSPLIT, $0
     69 	BR shlVU_g(SB)
     70 
     71 TEXT shrVU(SB), NOSPLIT, $0
     72 	BR shrVU_g(SB)
     73 
     74 // func mulAddVWW(z, x []Word, y, r Word) (c Word)
     75 TEXT mulAddVWW(SB), NOSPLIT, $0
     76 	MOVD z+0(FP), R10
     77 	MOVD x+24(FP), R8
     78 	MOVD y+48(FP), R9
     79 	MOVD r+56(FP), R4     // c = r
     80 	MOVD z_len+8(FP), R11
     81 	MOVD $0, R3           // i = 0
     82 	MOVD $8, R18
     83 	MOVD $1, R19
     84 
     85 	JMP e5
     86 
     87 l5:
     88 	MULLD  R18, R3, R5
     89 	MOVD   (R8)(R5), R20
     90 	MULLD  R9, R20, R6
     91 	MULHDU R9, R20, R7
     92 	ADDC   R4, R6
     93 	ADDZE  R7
     94 	MOVD   R6, (R10)(R5)
     95 	MOVD   R7, R4
     96 	ADD    R19, R3
     97 
     98 e5:
     99 	CMP R3, R11
    100 	BLT l5
    101 
    102 	MOVD R4, c+64(FP)
    103 	RET
    104 
    105 // func addMulVVW(z, x []Word, y Word) (c Word)
    106 TEXT addMulVVW(SB), NOSPLIT, $0
    107 	MOVD z+0(FP), R10
    108 	MOVD x+24(FP), R8
    109 	MOVD y+48(FP), R9
    110 	MOVD z_len+8(FP), R22
    111 
    112 	MOVD $0, R5   // i = 0
    113 	MOVD $0, R4   // c = 0
    114 	MOVD $8, R28
    115 	MOVD $-2, R23
    116 	AND  R22, R23 // mask the last bit of z.len
    117 	MOVD $2, R24
    118 	CMP  R23, R24
    119 	BGE  unrolled
    120 	JMP  end
    121 
    122 unrolled:
    123 	MOVD  $8, R19         // no (RA)(RB*8) on power
    124 	MULLD R5, R19
    125 	MOVD  (R10)(R19), R11 // R11 = z[i]
    126 	MOVD  (R8)(R19), R16  // R16 = x[i]
    127 	ADD   R28, R19, R25
    128 	MOVD  (R10)(R25), R17
    129 	MOVD  (R8)(R25), R18
    130 
    131 	MULLD  R9, R16, R12
    132 	MULHDU R9, R16, R14
    133 	MULLD  R9, R18, R6
    134 	MULHDU R9, R18, R7
    135 	ADDC   R4, R12
    136 	ADDZE  R14
    137 	ADDC   R11, R12        // z[i] = (x[i]*y) + z[i] + carry
    138 	ADDZE  R14             // carry = high order bits + add carry
    139 	MOVD   R12, (R10)(R19)
    140 	ADDC   R14, R6
    141 	ADDZE  R7
    142 	ADDC   R17, R6
    143 	ADDZE  R7
    144 	MOVD   R6, (R10)(R25)
    145 	MOVD   R7, R4
    146 
    147 	ADD R24, R5
    148 	CMP R5, R23
    149 	BLT unrolled
    150 	JMP end
    151 
    152 loop:
    153 	MOVD   $8, R19
    154 	MULLD  R5, R19
    155 	MOVD   (R10)(R19), R11
    156 	MOVD   (R8)(R19), R16
    157 	MULLD  R9, R16, R12
    158 	MULHDU R9, R16, R14
    159 	ADDC   R4, R12
    160 	ADDZE  R14
    161 	ADDC   R11, R12
    162 	ADDZE  R14
    163 	MOVD   R12, (R10)(R19)
    164 	MOVD   R14, R4
    165 
    166 	MOVD $1, R15
    167 	ADD  R15, R5
    168 
    169 end:
    170 	CMP R5, R22
    171 	BLT loop
    172 
    173 	MOVD R4, c+56(FP)
    174 	RET
    175 
    176 TEXT divWVW(SB), NOSPLIT, $0
    177 	BR divWVW_g(SB)
    178 
    179 // func bitLen(x Word) int
    180 TEXT bitLen(SB), NOSPLIT, $0
    181 	MOVD   x+0(FP), R4
    182 	CNTLZD R4, R4
    183 	MOVD   $64, R5
    184 	SUB    R4, R5
    185 	MOVD   R5, n+8(FP)
    186 	RET
    187