Home | History | Annotate | Download | only in elliptic
      1 // Copyright 2015 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // This file contains the Go wrapper for the constant-time, 64-bit assembly
      6 // implementation of P256. The optimizations performed here are described in
      7 // detail in:
      8 // S.Gueron and V.Krasnov, "Fast prime field elliptic-curve cryptography with
      9 //                          256-bit primes"
     10 // http://link.springer.com/article/10.1007%2Fs13389-014-0090-x
     11 // https://eprint.iacr.org/2013/816.pdf
     12 
     13 // +build amd64
     14 
     15 package elliptic
     16 
     17 import (
     18 	"math/big"
     19 	"sync"
     20 )
     21 
     22 type (
     23 	p256Curve struct {
     24 		*CurveParams
     25 	}
     26 
     27 	p256Point struct {
     28 		xyz [12]uint64
     29 	}
     30 )
     31 
     32 var (
     33 	p256            p256Curve
     34 	p256Precomputed *[37][64 * 8]uint64
     35 	precomputeOnce  sync.Once
     36 )
     37 
     38 func initP256() {
     39 	// See FIPS 186-3, section D.2.3
     40 	p256.CurveParams = &CurveParams{Name: "P-256"}
     41 	p256.P, _ = new(big.Int).SetString("115792089210356248762697446949407573530086143415290314195533631308867097853951", 10)
     42 	p256.N, _ = new(big.Int).SetString("115792089210356248762697446949407573529996955224135760342422259061068512044369", 10)
     43 	p256.B, _ = new(big.Int).SetString("5ac635d8aa3a93e7b3ebbd55769886bc651d06b0cc53b0f63bce3c3e27d2604b", 16)
     44 	p256.Gx, _ = new(big.Int).SetString("6b17d1f2e12c4247f8bce6e563a440f277037d812deb33a0f4a13945d898c296", 16)
     45 	p256.Gy, _ = new(big.Int).SetString("4fe342e2fe1a7f9b8ee7eb4a7c0f9e162bce33576b315ececbb6406837bf51f5", 16)
     46 	p256.BitSize = 256
     47 }
     48 
     49 func (curve p256Curve) Params() *CurveParams {
     50 	return curve.CurveParams
     51 }
     52 
     53 // Functions implemented in p256_asm_amd64.s
     54 // Montgomery multiplication modulo P256
     55 func p256Mul(res, in1, in2 []uint64)
     56 
     57 // Montgomery square modulo P256
     58 func p256Sqr(res, in []uint64)
     59 
     60 // Montgomery multiplication by 1
     61 func p256FromMont(res, in []uint64)
     62 
     63 // iff cond == 1  val <- -val
     64 func p256NegCond(val []uint64, cond int)
     65 
     66 // if cond == 0 res <- b; else res <- a
     67 func p256MovCond(res, a, b []uint64, cond int)
     68 
     69 // Endianness swap
     70 func p256BigToLittle(res []uint64, in []byte)
     71 func p256LittleToBig(res []byte, in []uint64)
     72 
     73 // Constant time table access
     74 func p256Select(point, table []uint64, idx int)
     75 func p256SelectBase(point, table []uint64, idx int)
     76 
     77 // Montgomery multiplication modulo Ord(G)
     78 func p256OrdMul(res, in1, in2 []uint64)
     79 
     80 // Montgomery square modulo Ord(G), repeated n times
     81 func p256OrdSqr(res, in []uint64, n int)
     82 
     83 // Point add with in2 being affine point
     84 // If sign == 1 -> in2 = -in2
     85 // If sel == 0 -> res = in1
     86 // if zero == 0 -> res = in2
     87 func p256PointAddAffineAsm(res, in1, in2 []uint64, sign, sel, zero int)
     88 
     89 // Point add
     90 func p256PointAddAsm(res, in1, in2 []uint64)
     91 
     92 // Point double
     93 func p256PointDoubleAsm(res, in []uint64)
     94 
     95 func (curve p256Curve) Inverse(k *big.Int) *big.Int {
     96 	if k.Sign() < 0 {
     97 		// This should never happen.
     98 		k = new(big.Int).Neg(k)
     99 	}
    100 
    101 	if k.Cmp(p256.N) >= 0 {
    102 		// This should never happen.
    103 		k = new(big.Int).Mod(k, p256.N)
    104 	}
    105 
    106 	// table will store precomputed powers of x. The four words at index
    107 	// 4i store x^(i+1).
    108 	var table [4 * 15]uint64
    109 
    110 	x := make([]uint64, 4)
    111 	fromBig(x[:], k)
    112 	// This code operates in the Montgomery domain where R = 2^256 mod n
    113 	// and n is the order of the scalar field. (See initP256 for the
    114 	// value.) Elements in the Montgomery domain take the form aR and
    115 	// multiplication of x and y in the calculates (x  y  R^-1) mod n. RR
    116 	// is RR mod n thus the Montgomery multiplication x and RR gives xR,
    117 	// i.e. converts x into the Montgomery domain.
    118 	RR := []uint64{0x83244c95be79eea2, 0x4699799c49bd6fa6, 0x2845b2392b6bec59, 0x66e12d94f3d95620}
    119 	p256OrdMul(table[:4], x, RR)
    120 
    121 	// Prepare the table, no need in constant time access, because the
    122 	// power is not a secret. (Entry 0 is never used.)
    123 	for i := 2; i < 16; i += 2 {
    124 		p256OrdSqr(table[4*(i-1):], table[4*((i/2)-1):], 1)
    125 		p256OrdMul(table[4*i:], table[4*(i-1):], table[:4])
    126 	}
    127 
    128 	x[0] = table[4*14+0] // f
    129 	x[1] = table[4*14+1]
    130 	x[2] = table[4*14+2]
    131 	x[3] = table[4*14+3]
    132 
    133 	p256OrdSqr(x, x, 4)
    134 	p256OrdMul(x, x, table[4*14:4*14+4]) // ff
    135 	t := make([]uint64, 4, 4)
    136 	t[0] = x[0]
    137 	t[1] = x[1]
    138 	t[2] = x[2]
    139 	t[3] = x[3]
    140 
    141 	p256OrdSqr(x, x, 8)
    142 	p256OrdMul(x, x, t) // ffff
    143 	t[0] = x[0]
    144 	t[1] = x[1]
    145 	t[2] = x[2]
    146 	t[3] = x[3]
    147 
    148 	p256OrdSqr(x, x, 16)
    149 	p256OrdMul(x, x, t) // ffffffff
    150 	t[0] = x[0]
    151 	t[1] = x[1]
    152 	t[2] = x[2]
    153 	t[3] = x[3]
    154 
    155 	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
    156 	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
    157 	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
    158 	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
    159 
    160 	// Remaining 32 windows
    161 	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4, 0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
    162 	for i := 0; i < 32; i++ {
    163 		p256OrdSqr(x, x, 4)
    164 		p256OrdMul(x, x, table[4*(expLo[i]-1):])
    165 	}
    166 
    167 	// Multiplying by one in the Montgomery domain converts a Montgomery
    168 	// value out of the domain.
    169 	one := []uint64{1, 0, 0, 0}
    170 	p256OrdMul(x, x, one)
    171 
    172 	xOut := make([]byte, 32)
    173 	p256LittleToBig(xOut, x)
    174 	return new(big.Int).SetBytes(xOut)
    175 }
    176 
    177 // fromBig converts a *big.Int into a format used by this code.
    178 func fromBig(out []uint64, big *big.Int) {
    179 	for i := range out {
    180 		out[i] = 0
    181 	}
    182 
    183 	for i, v := range big.Bits() {
    184 		out[i] = uint64(v)
    185 	}
    186 }
    187 
    188 // p256GetScalar endian-swaps the big-endian scalar value from in and writes it
    189 // to out. If the scalar is equal or greater than the order of the group, it's
    190 // reduced modulo that order.
    191 func p256GetScalar(out []uint64, in []byte) {
    192 	n := new(big.Int).SetBytes(in)
    193 
    194 	if n.Cmp(p256.N) >= 0 {
    195 		n.Mod(n, p256.N)
    196 	}
    197 	fromBig(out, n)
    198 }
    199 
    200 // p256Mul operates in a Montgomery domain with R = 2^256 mod p, where p is the
    201 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
    202 // RR mod p. See comment in Inverse about how this is used.
    203 var rr = []uint64{0x0000000000000003, 0xfffffffbffffffff, 0xfffffffffffffffe, 0x00000004fffffffd}
    204 
    205 func maybeReduceModP(in *big.Int) *big.Int {
    206 	if in.Cmp(p256.P) < 0 {
    207 		return in
    208 	}
    209 	return new(big.Int).Mod(in, p256.P)
    210 }
    211 
    212 func (curve p256Curve) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
    213 	scalarReversed := make([]uint64, 4)
    214 	var r1, r2 p256Point
    215 	p256GetScalar(scalarReversed, baseScalar)
    216 	r1.p256BaseMult(scalarReversed)
    217 
    218 	p256GetScalar(scalarReversed, scalar)
    219 	fromBig(r2.xyz[0:4], maybeReduceModP(bigX))
    220 	fromBig(r2.xyz[4:8], maybeReduceModP(bigY))
    221 	p256Mul(r2.xyz[0:4], r2.xyz[0:4], rr[:])
    222 	p256Mul(r2.xyz[4:8], r2.xyz[4:8], rr[:])
    223 
    224 	// This sets r2's Z value to 1, in the Montgomery domain.
    225 	r2.xyz[8] = 0x0000000000000001
    226 	r2.xyz[9] = 0xffffffff00000000
    227 	r2.xyz[10] = 0xffffffffffffffff
    228 	r2.xyz[11] = 0x00000000fffffffe
    229 
    230 	r2.p256ScalarMult(scalarReversed)
    231 	p256PointAddAsm(r1.xyz[:], r1.xyz[:], r2.xyz[:])
    232 	return r1.p256PointToAffine()
    233 }
    234 
    235 func (curve p256Curve) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
    236 	scalarReversed := make([]uint64, 4)
    237 	p256GetScalar(scalarReversed, scalar)
    238 
    239 	var r p256Point
    240 	r.p256BaseMult(scalarReversed)
    241 	return r.p256PointToAffine()
    242 }
    243 
    244 func (curve p256Curve) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
    245 	scalarReversed := make([]uint64, 4)
    246 	p256GetScalar(scalarReversed, scalar)
    247 
    248 	var r p256Point
    249 	fromBig(r.xyz[0:4], maybeReduceModP(bigX))
    250 	fromBig(r.xyz[4:8], maybeReduceModP(bigY))
    251 	p256Mul(r.xyz[0:4], r.xyz[0:4], rr[:])
    252 	p256Mul(r.xyz[4:8], r.xyz[4:8], rr[:])
    253 	// This sets r2's Z value to 1, in the Montgomery domain.
    254 	r.xyz[8] = 0x0000000000000001
    255 	r.xyz[9] = 0xffffffff00000000
    256 	r.xyz[10] = 0xffffffffffffffff
    257 	r.xyz[11] = 0x00000000fffffffe
    258 
    259 	r.p256ScalarMult(scalarReversed)
    260 	return r.p256PointToAffine()
    261 }
    262 
    263 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
    264 	zInv := make([]uint64, 4)
    265 	zInvSq := make([]uint64, 4)
    266 	p256Inverse(zInv, p.xyz[8:12])
    267 	p256Sqr(zInvSq, zInv)
    268 	p256Mul(zInv, zInv, zInvSq)
    269 
    270 	p256Mul(zInvSq, p.xyz[0:4], zInvSq)
    271 	p256Mul(zInv, p.xyz[4:8], zInv)
    272 
    273 	p256FromMont(zInvSq, zInvSq)
    274 	p256FromMont(zInv, zInv)
    275 
    276 	xOut := make([]byte, 32)
    277 	yOut := make([]byte, 32)
    278 	p256LittleToBig(xOut, zInvSq)
    279 	p256LittleToBig(yOut, zInv)
    280 
    281 	return new(big.Int).SetBytes(xOut), new(big.Int).SetBytes(yOut)
    282 }
    283 
    284 // p256Inverse sets out to in^-1 mod p.
    285 func p256Inverse(out, in []uint64) {
    286 	var stack [6 * 4]uint64
    287 	p2 := stack[4*0 : 4*0+4]
    288 	p4 := stack[4*1 : 4*1+4]
    289 	p8 := stack[4*2 : 4*2+4]
    290 	p16 := stack[4*3 : 4*3+4]
    291 	p32 := stack[4*4 : 4*4+4]
    292 
    293 	p256Sqr(out, in)
    294 	p256Mul(p2, out, in) // 3*p
    295 
    296 	p256Sqr(out, p2)
    297 	p256Sqr(out, out)
    298 	p256Mul(p4, out, p2) // f*p
    299 
    300 	p256Sqr(out, p4)
    301 	p256Sqr(out, out)
    302 	p256Sqr(out, out)
    303 	p256Sqr(out, out)
    304 	p256Mul(p8, out, p4) // ff*p
    305 
    306 	p256Sqr(out, p8)
    307 
    308 	for i := 0; i < 7; i++ {
    309 		p256Sqr(out, out)
    310 	}
    311 	p256Mul(p16, out, p8) // ffff*p
    312 
    313 	p256Sqr(out, p16)
    314 	for i := 0; i < 15; i++ {
    315 		p256Sqr(out, out)
    316 	}
    317 	p256Mul(p32, out, p16) // ffffffff*p
    318 
    319 	p256Sqr(out, p32)
    320 
    321 	for i := 0; i < 31; i++ {
    322 		p256Sqr(out, out)
    323 	}
    324 	p256Mul(out, out, in)
    325 
    326 	for i := 0; i < 32*4; i++ {
    327 		p256Sqr(out, out)
    328 	}
    329 	p256Mul(out, out, p32)
    330 
    331 	for i := 0; i < 32; i++ {
    332 		p256Sqr(out, out)
    333 	}
    334 	p256Mul(out, out, p32)
    335 
    336 	for i := 0; i < 16; i++ {
    337 		p256Sqr(out, out)
    338 	}
    339 	p256Mul(out, out, p16)
    340 
    341 	for i := 0; i < 8; i++ {
    342 		p256Sqr(out, out)
    343 	}
    344 	p256Mul(out, out, p8)
    345 
    346 	p256Sqr(out, out)
    347 	p256Sqr(out, out)
    348 	p256Sqr(out, out)
    349 	p256Sqr(out, out)
    350 	p256Mul(out, out, p4)
    351 
    352 	p256Sqr(out, out)
    353 	p256Sqr(out, out)
    354 	p256Mul(out, out, p2)
    355 
    356 	p256Sqr(out, out)
    357 	p256Sqr(out, out)
    358 	p256Mul(out, out, in)
    359 }
    360 
    361 func (p *p256Point) p256StorePoint(r *[16 * 4 * 3]uint64, index int) {
    362 	copy(r[index*12:], p.xyz[:])
    363 }
    364 
    365 func boothW5(in uint) (int, int) {
    366 	var s uint = ^((in >> 5) - 1)
    367 	var d uint = (1 << 6) - in - 1
    368 	d = (d & s) | (in & (^s))
    369 	d = (d >> 1) + (d & 1)
    370 	return int(d), int(s & 1)
    371 }
    372 
    373 func boothW7(in uint) (int, int) {
    374 	var s uint = ^((in >> 7) - 1)
    375 	var d uint = (1 << 8) - in - 1
    376 	d = (d & s) | (in & (^s))
    377 	d = (d >> 1) + (d & 1)
    378 	return int(d), int(s & 1)
    379 }
    380 
    381 func initTable() {
    382 	p256Precomputed = new([37][64 * 8]uint64)
    383 
    384 	basePoint := []uint64{
    385 		0x79e730d418a9143c, 0x75ba95fc5fedb601, 0x79fb732b77622510, 0x18905f76a53755c6,
    386 		0xddf25357ce95560a, 0x8b4ab8e4ba19e45c, 0xd2e88688dd21f325, 0x8571ff1825885d85,
    387 		0x0000000000000001, 0xffffffff00000000, 0xffffffffffffffff, 0x00000000fffffffe,
    388 	}
    389 	t1 := make([]uint64, 12)
    390 	t2 := make([]uint64, 12)
    391 	copy(t2, basePoint)
    392 
    393 	zInv := make([]uint64, 4)
    394 	zInvSq := make([]uint64, 4)
    395 	for j := 0; j < 64; j++ {
    396 		copy(t1, t2)
    397 		for i := 0; i < 37; i++ {
    398 			// The window size is 7 so we need to double 7 times.
    399 			if i != 0 {
    400 				for k := 0; k < 7; k++ {
    401 					p256PointDoubleAsm(t1, t1)
    402 				}
    403 			}
    404 			// Convert the point to affine form. (Its values are
    405 			// still in Montgomery form however.)
    406 			p256Inverse(zInv, t1[8:12])
    407 			p256Sqr(zInvSq, zInv)
    408 			p256Mul(zInv, zInv, zInvSq)
    409 
    410 			p256Mul(t1[:4], t1[:4], zInvSq)
    411 			p256Mul(t1[4:8], t1[4:8], zInv)
    412 
    413 			copy(t1[8:12], basePoint[8:12])
    414 			// Update the table entry
    415 			copy(p256Precomputed[i][j*8:], t1[:8])
    416 		}
    417 		if j == 0 {
    418 			p256PointDoubleAsm(t2, basePoint)
    419 		} else {
    420 			p256PointAddAsm(t2, t2, basePoint)
    421 		}
    422 	}
    423 }
    424 
    425 func (p *p256Point) p256BaseMult(scalar []uint64) {
    426 	precomputeOnce.Do(initTable)
    427 
    428 	wvalue := (scalar[0] << 1) & 0xff
    429 	sel, sign := boothW7(uint(wvalue))
    430 	p256SelectBase(p.xyz[0:8], p256Precomputed[0][0:], sel)
    431 	p256NegCond(p.xyz[4:8], sign)
    432 
    433 	// (This is one, in the Montgomery domain.)
    434 	p.xyz[8] = 0x0000000000000001
    435 	p.xyz[9] = 0xffffffff00000000
    436 	p.xyz[10] = 0xffffffffffffffff
    437 	p.xyz[11] = 0x00000000fffffffe
    438 
    439 	var t0 p256Point
    440 	// (This is one, in the Montgomery domain.)
    441 	t0.xyz[8] = 0x0000000000000001
    442 	t0.xyz[9] = 0xffffffff00000000
    443 	t0.xyz[10] = 0xffffffffffffffff
    444 	t0.xyz[11] = 0x00000000fffffffe
    445 
    446 	index := uint(6)
    447 	zero := sel
    448 
    449 	for i := 1; i < 37; i++ {
    450 		if index < 192 {
    451 			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0xff
    452 		} else {
    453 			wvalue = (scalar[index/64] >> (index % 64)) & 0xff
    454 		}
    455 		index += 7
    456 		sel, sign = boothW7(uint(wvalue))
    457 		p256SelectBase(t0.xyz[0:8], p256Precomputed[i][0:], sel)
    458 		p256PointAddAffineAsm(p.xyz[0:12], p.xyz[0:12], t0.xyz[0:8], sign, sel, zero)
    459 		zero |= sel
    460 	}
    461 }
    462 
    463 func (p *p256Point) p256ScalarMult(scalar []uint64) {
    464 	// precomp is a table of precomputed points that stores powers of p
    465 	// from p^1 to p^16.
    466 	var precomp [16 * 4 * 3]uint64
    467 	var t0, t1, t2, t3 p256Point
    468 
    469 	// Prepare the table
    470 	p.p256StorePoint(&precomp, 0) // 1
    471 
    472 	p256PointDoubleAsm(t0.xyz[:], p.xyz[:])
    473 	p256PointDoubleAsm(t1.xyz[:], t0.xyz[:])
    474 	p256PointDoubleAsm(t2.xyz[:], t1.xyz[:])
    475 	p256PointDoubleAsm(t3.xyz[:], t2.xyz[:])
    476 	t0.p256StorePoint(&precomp, 1)  // 2
    477 	t1.p256StorePoint(&precomp, 3)  // 4
    478 	t2.p256StorePoint(&precomp, 7)  // 8
    479 	t3.p256StorePoint(&precomp, 15) // 16
    480 
    481 	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
    482 	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
    483 	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
    484 	t0.p256StorePoint(&precomp, 2) // 3
    485 	t1.p256StorePoint(&precomp, 4) // 5
    486 	t2.p256StorePoint(&precomp, 8) // 9
    487 
    488 	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
    489 	p256PointDoubleAsm(t1.xyz[:], t1.xyz[:])
    490 	t0.p256StorePoint(&precomp, 5) // 6
    491 	t1.p256StorePoint(&precomp, 9) // 10
    492 
    493 	p256PointAddAsm(t2.xyz[:], t0.xyz[:], p.xyz[:])
    494 	p256PointAddAsm(t1.xyz[:], t1.xyz[:], p.xyz[:])
    495 	t2.p256StorePoint(&precomp, 6)  // 7
    496 	t1.p256StorePoint(&precomp, 10) // 11
    497 
    498 	p256PointDoubleAsm(t0.xyz[:], t0.xyz[:])
    499 	p256PointDoubleAsm(t2.xyz[:], t2.xyz[:])
    500 	t0.p256StorePoint(&precomp, 11) // 12
    501 	t2.p256StorePoint(&precomp, 13) // 14
    502 
    503 	p256PointAddAsm(t0.xyz[:], t0.xyz[:], p.xyz[:])
    504 	p256PointAddAsm(t2.xyz[:], t2.xyz[:], p.xyz[:])
    505 	t0.p256StorePoint(&precomp, 12) // 13
    506 	t2.p256StorePoint(&precomp, 14) // 15
    507 
    508 	// Start scanning the window from top bit
    509 	index := uint(254)
    510 	var sel, sign int
    511 
    512 	wvalue := (scalar[index/64] >> (index % 64)) & 0x3f
    513 	sel, _ = boothW5(uint(wvalue))
    514 
    515 	p256Select(p.xyz[0:12], precomp[0:], sel)
    516 	zero := sel
    517 
    518 	for index > 4 {
    519 		index -= 5
    520 		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    521 		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    522 		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    523 		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    524 		p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    525 
    526 		if index < 192 {
    527 			wvalue = ((scalar[index/64] >> (index % 64)) + (scalar[index/64+1] << (64 - (index % 64)))) & 0x3f
    528 		} else {
    529 			wvalue = (scalar[index/64] >> (index % 64)) & 0x3f
    530 		}
    531 
    532 		sel, sign = boothW5(uint(wvalue))
    533 
    534 		p256Select(t0.xyz[0:], precomp[0:], sel)
    535 		p256NegCond(t0.xyz[4:8], sign)
    536 		p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
    537 		p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
    538 		p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
    539 		zero |= sel
    540 	}
    541 
    542 	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    543 	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    544 	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    545 	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    546 	p256PointDoubleAsm(p.xyz[:], p.xyz[:])
    547 
    548 	wvalue = (scalar[0] << 1) & 0x3f
    549 	sel, sign = boothW5(uint(wvalue))
    550 
    551 	p256Select(t0.xyz[0:], precomp[0:], sel)
    552 	p256NegCond(t0.xyz[4:8], sign)
    553 	p256PointAddAsm(t1.xyz[:], p.xyz[:], t0.xyz[:])
    554 	p256MovCond(t1.xyz[0:12], t1.xyz[0:12], p.xyz[0:12], sel)
    555 	p256MovCond(p.xyz[0:12], t1.xyz[0:12], t0.xyz[0:12], zero)
    556 }
    557