Home | History | Annotate | Download | only in elliptic
      1 // Copyright 2016 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build s390x
      6 
      7 package elliptic
      8 
      9 import (
     10 	"math/big"
     11 )
     12 
     13 type p256CurveFast struct {
     14 	*CurveParams
     15 }
     16 
     17 type p256Point struct {
     18 	x [32]byte
     19 	y [32]byte
     20 	z [32]byte
     21 }
     22 
     23 var (
     24 	p256        Curve
     25 	p256PreFast *[37][64]p256Point
     26 )
     27 
     28 // hasVectorFacility reports whether the machine has the z/Architecture
     29 // vector facility installed and enabled.
     30 func hasVectorFacility() bool
     31 
     32 var hasVX = hasVectorFacility()
     33 
     34 func initP256Arch() {
     35 	if hasVX {
     36 		p256 = p256CurveFast{p256Params}
     37 		initTable()
     38 		return
     39 	}
     40 
     41 	// No vector support, use pure Go implementation.
     42 	p256 = p256Curve{p256Params}
     43 	return
     44 }
     45 
     46 func (curve p256CurveFast) Params() *CurveParams {
     47 	return curve.CurveParams
     48 }
     49 
     50 // Functions implemented in p256_asm_s390x.s
     51 // Montgomery multiplication modulo P256
     52 func p256MulAsm(res, in1, in2 []byte)
     53 
     54 // Montgomery square modulo P256
     55 func p256Sqr(res, in []byte) {
     56 	p256MulAsm(res, in, in)
     57 }
     58 
     59 // Montgomery multiplication by 1
     60 func p256FromMont(res, in []byte)
     61 
     62 // iff cond == 1  val <- -val
     63 func p256NegCond(val *p256Point, cond int)
     64 
     65 // if cond == 0 res <- b; else res <- a
     66 func p256MovCond(res, a, b *p256Point, cond int)
     67 
     68 // Constant time table access
     69 func p256Select(point *p256Point, table []p256Point, idx int)
     70 func p256SelectBase(point *p256Point, table []p256Point, idx int)
     71 
     72 // Montgomery multiplication modulo Ord(G)
     73 func p256OrdMul(res, in1, in2 []byte)
     74 
     75 // Montgomery square modulo Ord(G), repeated n times
     76 func p256OrdSqr(res, in []byte, n int) {
     77 	copy(res, in)
     78 	for i := 0; i < n; i += 1 {
     79 		p256OrdMul(res, res, res)
     80 	}
     81 }
     82 
     83 // Point add with P2 being affine point
     84 // If sign == 1 -> P2 = -P2
     85 // If sel == 0 -> P3 = P1
     86 // if zero == 0 -> P3 = P2
     87 func p256PointAddAffineAsm(P3, P1, P2 *p256Point, sign, sel, zero int)
     88 
     89 // Point add
     90 func p256PointAddAsm(P3, P1, P2 *p256Point)
     91 func p256PointDoubleAsm(P3, P1 *p256Point)
     92 
     93 func (curve p256CurveFast) Inverse(k *big.Int) *big.Int {
     94 	if k.Cmp(p256Params.N) >= 0 {
     95 		// This should never happen.
     96 		reducedK := new(big.Int).Mod(k, p256Params.N)
     97 		k = reducedK
     98 	}
     99 
    100 	// table will store precomputed powers of x. The 32 bytes at index
    101 	// i store x^(i+1).
    102 	var table [15][32]byte
    103 
    104 	x := fromBig(k)
    105 	// This code operates in the Montgomery domain where R = 2^256 mod n
    106 	// and n is the order of the scalar field. (See initP256 for the
    107 	// value.) Elements in the Montgomery domain take the form aR and
    108 	// multiplication of x and y in the calculates (x  y  R^-1) mod n. RR
    109 	// is RR mod n thus the Montgomery multiplication x and RR gives xR,
    110 	// i.e. converts x into the Montgomery domain. Stored in BigEndian form
    111 	RR := []byte{0x66, 0xe1, 0x2d, 0x94, 0xf3, 0xd9, 0x56, 0x20, 0x28, 0x45, 0xb2, 0x39, 0x2b, 0x6b, 0xec, 0x59,
    112 		0x46, 0x99, 0x79, 0x9c, 0x49, 0xbd, 0x6f, 0xa6, 0x83, 0x24, 0x4c, 0x95, 0xbe, 0x79, 0xee, 0xa2}
    113 
    114 	p256OrdMul(table[0][:], x, RR)
    115 
    116 	// Prepare the table, no need in constant time access, because the
    117 	// power is not a secret. (Entry 0 is never used.)
    118 	for i := 2; i < 16; i += 2 {
    119 		p256OrdSqr(table[i-1][:], table[(i/2)-1][:], 1)
    120 		p256OrdMul(table[i][:], table[i-1][:], table[0][:])
    121 	}
    122 
    123 	copy(x, table[14][:]) // f
    124 
    125 	p256OrdSqr(x[0:32], x[0:32], 4)
    126 	p256OrdMul(x[0:32], x[0:32], table[14][:]) // ff
    127 	t := make([]byte, 32)
    128 	copy(t, x)
    129 
    130 	p256OrdSqr(x, x, 8)
    131 	p256OrdMul(x, x, t) // ffff
    132 	copy(t, x)
    133 
    134 	p256OrdSqr(x, x, 16)
    135 	p256OrdMul(x, x, t) // ffffffff
    136 	copy(t, x)
    137 
    138 	p256OrdSqr(x, x, 64) // ffffffff0000000000000000
    139 	p256OrdMul(x, x, t)  // ffffffff00000000ffffffff
    140 	p256OrdSqr(x, x, 32) // ffffffff00000000ffffffff00000000
    141 	p256OrdMul(x, x, t)  // ffffffff00000000ffffffffffffffff
    142 
    143 	// Remaining 32 windows
    144 	expLo := [32]byte{0xb, 0xc, 0xe, 0x6, 0xf, 0xa, 0xa, 0xd, 0xa, 0x7, 0x1, 0x7, 0x9, 0xe, 0x8, 0x4,
    145 		0xf, 0x3, 0xb, 0x9, 0xc, 0xa, 0xc, 0x2, 0xf, 0xc, 0x6, 0x3, 0x2, 0x5, 0x4, 0xf}
    146 	for i := 0; i < 32; i++ {
    147 		p256OrdSqr(x, x, 4)
    148 		p256OrdMul(x, x, table[expLo[i]-1][:])
    149 	}
    150 
    151 	// Multiplying by one in the Montgomery domain converts a Montgomery
    152 	// value out of the domain.
    153 	one := []byte{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
    154 		0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1}
    155 	p256OrdMul(x, x, one)
    156 
    157 	return new(big.Int).SetBytes(x)
    158 }
    159 
    160 // fromBig converts a *big.Int into a format used by this code.
    161 func fromBig(big *big.Int) []byte {
    162 	// This could be done a lot more efficiently...
    163 	res := big.Bytes()
    164 	if 32 == len(res) {
    165 		return res
    166 	}
    167 	t := make([]byte, 32)
    168 	offset := 32 - len(res)
    169 	for i := len(res) - 1; i >= 0; i-- {
    170 		t[i+offset] = res[i]
    171 	}
    172 	return t
    173 }
    174 
    175 // p256GetMultiplier makes sure byte array will have 32 byte elements, If the scalar
    176 // is equal or greater than the order of the group, it's reduced modulo that order.
    177 func p256GetMultiplier(in []byte) []byte {
    178 	n := new(big.Int).SetBytes(in)
    179 
    180 	if n.Cmp(p256Params.N) >= 0 {
    181 		n.Mod(n, p256Params.N)
    182 	}
    183 	return fromBig(n)
    184 }
    185 
    186 // p256MulAsm operates in a Montgomery domain with R = 2^256 mod p, where p is the
    187 // underlying field of the curve. (See initP256 for the value.) Thus rr here is
    188 // RR mod p. See comment in Inverse about how this is used.
    189 var rr = []byte{0x00, 0x00, 0x00, 0x04, 0xff, 0xff, 0xff, 0xfd, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xfe,
    190 	0xff, 0xff, 0xff, 0xfb, 0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x03}
    191 
    192 // (This is one, in the Montgomery domain.)
    193 var one = []byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    194 	0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}
    195 
    196 func maybeReduceModP(in *big.Int) *big.Int {
    197 	if in.Cmp(p256Params.P) < 0 {
    198 		return in
    199 	}
    200 	return new(big.Int).Mod(in, p256Params.P)
    201 }
    202 
    203 func (curve p256CurveFast) CombinedMult(bigX, bigY *big.Int, baseScalar, scalar []byte) (x, y *big.Int) {
    204 	var r1, r2 p256Point
    205 	r1.p256BaseMult(p256GetMultiplier(baseScalar))
    206 
    207 	copy(r2.x[:], fromBig(maybeReduceModP(bigX)))
    208 	copy(r2.y[:], fromBig(maybeReduceModP(bigY)))
    209 	copy(r2.z[:], one)
    210 	p256MulAsm(r2.x[:], r2.x[:], rr[:])
    211 	p256MulAsm(r2.y[:], r2.y[:], rr[:])
    212 
    213 	r2.p256ScalarMult(p256GetMultiplier(scalar))
    214 	p256PointAddAsm(&r1, &r1, &r2)
    215 	return r1.p256PointToAffine()
    216 }
    217 
    218 func (curve p256CurveFast) ScalarBaseMult(scalar []byte) (x, y *big.Int) {
    219 	var r p256Point
    220 	r.p256BaseMult(p256GetMultiplier(scalar))
    221 	return r.p256PointToAffine()
    222 }
    223 
    224 func (curve p256CurveFast) ScalarMult(bigX, bigY *big.Int, scalar []byte) (x, y *big.Int) {
    225 	var r p256Point
    226 	copy(r.x[:], fromBig(maybeReduceModP(bigX)))
    227 	copy(r.y[:], fromBig(maybeReduceModP(bigY)))
    228 	copy(r.z[:], one)
    229 	p256MulAsm(r.x[:], r.x[:], rr[:])
    230 	p256MulAsm(r.y[:], r.y[:], rr[:])
    231 	r.p256ScalarMult(p256GetMultiplier(scalar))
    232 	return r.p256PointToAffine()
    233 }
    234 
    235 func (p *p256Point) p256PointToAffine() (x, y *big.Int) {
    236 	zInv := make([]byte, 32)
    237 	zInvSq := make([]byte, 32)
    238 
    239 	p256Inverse(zInv, p.z[:])
    240 	p256Sqr(zInvSq, zInv)
    241 	p256MulAsm(zInv, zInv, zInvSq)
    242 
    243 	p256MulAsm(zInvSq, p.x[:], zInvSq)
    244 	p256MulAsm(zInv, p.y[:], zInv)
    245 
    246 	p256FromMont(zInvSq, zInvSq)
    247 	p256FromMont(zInv, zInv)
    248 
    249 	return new(big.Int).SetBytes(zInvSq), new(big.Int).SetBytes(zInv)
    250 }
    251 
    252 // p256Inverse sets out to in^-1 mod p.
    253 func p256Inverse(out, in []byte) {
    254 	var stack [6 * 32]byte
    255 	p2 := stack[32*0 : 32*0+32]
    256 	p4 := stack[32*1 : 32*1+32]
    257 	p8 := stack[32*2 : 32*2+32]
    258 	p16 := stack[32*3 : 32*3+32]
    259 	p32 := stack[32*4 : 32*4+32]
    260 
    261 	p256Sqr(out, in)
    262 	p256MulAsm(p2, out, in) // 3*p
    263 
    264 	p256Sqr(out, p2)
    265 	p256Sqr(out, out)
    266 	p256MulAsm(p4, out, p2) // f*p
    267 
    268 	p256Sqr(out, p4)
    269 	p256Sqr(out, out)
    270 	p256Sqr(out, out)
    271 	p256Sqr(out, out)
    272 	p256MulAsm(p8, out, p4) // ff*p
    273 
    274 	p256Sqr(out, p8)
    275 
    276 	for i := 0; i < 7; i++ {
    277 		p256Sqr(out, out)
    278 	}
    279 	p256MulAsm(p16, out, p8) // ffff*p
    280 
    281 	p256Sqr(out, p16)
    282 	for i := 0; i < 15; i++ {
    283 		p256Sqr(out, out)
    284 	}
    285 	p256MulAsm(p32, out, p16) // ffffffff*p
    286 
    287 	p256Sqr(out, p32)
    288 
    289 	for i := 0; i < 31; i++ {
    290 		p256Sqr(out, out)
    291 	}
    292 	p256MulAsm(out, out, in)
    293 
    294 	for i := 0; i < 32*4; i++ {
    295 		p256Sqr(out, out)
    296 	}
    297 	p256MulAsm(out, out, p32)
    298 
    299 	for i := 0; i < 32; i++ {
    300 		p256Sqr(out, out)
    301 	}
    302 	p256MulAsm(out, out, p32)
    303 
    304 	for i := 0; i < 16; i++ {
    305 		p256Sqr(out, out)
    306 	}
    307 	p256MulAsm(out, out, p16)
    308 
    309 	for i := 0; i < 8; i++ {
    310 		p256Sqr(out, out)
    311 	}
    312 	p256MulAsm(out, out, p8)
    313 
    314 	p256Sqr(out, out)
    315 	p256Sqr(out, out)
    316 	p256Sqr(out, out)
    317 	p256Sqr(out, out)
    318 	p256MulAsm(out, out, p4)
    319 
    320 	p256Sqr(out, out)
    321 	p256Sqr(out, out)
    322 	p256MulAsm(out, out, p2)
    323 
    324 	p256Sqr(out, out)
    325 	p256Sqr(out, out)
    326 	p256MulAsm(out, out, in)
    327 }
    328 
    329 func boothW5(in uint) (int, int) {
    330 	var s uint = ^((in >> 5) - 1)
    331 	var d uint = (1 << 6) - in - 1
    332 	d = (d & s) | (in & (^s))
    333 	d = (d >> 1) + (d & 1)
    334 	return int(d), int(s & 1)
    335 }
    336 
    337 func boothW7(in uint) (int, int) {
    338 	var s uint = ^((in >> 7) - 1)
    339 	var d uint = (1 << 8) - in - 1
    340 	d = (d & s) | (in & (^s))
    341 	d = (d >> 1) + (d & 1)
    342 	return int(d), int(s & 1)
    343 }
    344 
    345 func initTable() {
    346 	p256PreFast = new([37][64]p256Point) //z coordinate not used
    347 	basePoint := p256Point{
    348 		x: [32]byte{0x18, 0x90, 0x5f, 0x76, 0xa5, 0x37, 0x55, 0xc6, 0x79, 0xfb, 0x73, 0x2b, 0x77, 0x62, 0x25, 0x10,
    349 			0x75, 0xba, 0x95, 0xfc, 0x5f, 0xed, 0xb6, 0x01, 0x79, 0xe7, 0x30, 0xd4, 0x18, 0xa9, 0x14, 0x3c}, //(p256.x*2^256)%p
    350 		y: [32]byte{0x85, 0x71, 0xff, 0x18, 0x25, 0x88, 0x5d, 0x85, 0xd2, 0xe8, 0x86, 0x88, 0xdd, 0x21, 0xf3, 0x25,
    351 			0x8b, 0x4a, 0xb8, 0xe4, 0xba, 0x19, 0xe4, 0x5c, 0xdd, 0xf2, 0x53, 0x57, 0xce, 0x95, 0x56, 0x0a}, //(p256.y*2^256)%p
    352 		z: [32]byte{0x00, 0x00, 0x00, 0x00, 0xff, 0xff, 0xff, 0xfe, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
    353 			0xff, 0xff, 0xff, 0xff, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x01}, //(p256.z*2^256)%p
    354 	}
    355 
    356 	t1 := new(p256Point)
    357 	t2 := new(p256Point)
    358 	*t2 = basePoint
    359 
    360 	zInv := make([]byte, 32)
    361 	zInvSq := make([]byte, 32)
    362 	for j := 0; j < 64; j++ {
    363 		*t1 = *t2
    364 		for i := 0; i < 37; i++ {
    365 			// The window size is 7 so we need to double 7 times.
    366 			if i != 0 {
    367 				for k := 0; k < 7; k++ {
    368 					p256PointDoubleAsm(t1, t1)
    369 				}
    370 			}
    371 			// Convert the point to affine form. (Its values are
    372 			// still in Montgomery form however.)
    373 			p256Inverse(zInv, t1.z[:])
    374 			p256Sqr(zInvSq, zInv)
    375 			p256MulAsm(zInv, zInv, zInvSq)
    376 
    377 			p256MulAsm(t1.x[:], t1.x[:], zInvSq)
    378 			p256MulAsm(t1.y[:], t1.y[:], zInv)
    379 
    380 			copy(t1.z[:], basePoint.z[:])
    381 			// Update the table entry
    382 			copy(p256PreFast[i][j].x[:], t1.x[:])
    383 			copy(p256PreFast[i][j].y[:], t1.y[:])
    384 		}
    385 		if j == 0 {
    386 			p256PointDoubleAsm(t2, &basePoint)
    387 		} else {
    388 			p256PointAddAsm(t2, t2, &basePoint)
    389 		}
    390 	}
    391 }
    392 
    393 func (p *p256Point) p256BaseMult(scalar []byte) {
    394 	wvalue := (uint(scalar[31]) << 1) & 0xff
    395 	sel, sign := boothW7(uint(wvalue))
    396 	p256SelectBase(p, p256PreFast[0][:], sel)
    397 	p256NegCond(p, sign)
    398 
    399 	copy(p.z[:], one[:])
    400 	var t0 p256Point
    401 
    402 	copy(t0.z[:], one[:])
    403 
    404 	index := uint(6)
    405 	zero := sel
    406 
    407 	for i := 1; i < 37; i++ {
    408 		if index < 247 {
    409 			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0xff
    410 		} else {
    411 			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0xff
    412 		}
    413 		index += 7
    414 		sel, sign = boothW7(uint(wvalue))
    415 		p256SelectBase(&t0, p256PreFast[i][:], sel)
    416 		p256PointAddAffineAsm(p, p, &t0, sign, sel, zero)
    417 		zero |= sel
    418 	}
    419 }
    420 
    421 func (p *p256Point) p256ScalarMult(scalar []byte) {
    422 	// precomp is a table of precomputed points that stores powers of p
    423 	// from p^1 to p^16.
    424 	var precomp [16]p256Point
    425 	var t0, t1, t2, t3 p256Point
    426 
    427 	// Prepare the table
    428 	*&precomp[0] = *p
    429 
    430 	p256PointDoubleAsm(&t0, p)
    431 	p256PointDoubleAsm(&t1, &t0)
    432 	p256PointDoubleAsm(&t2, &t1)
    433 	p256PointDoubleAsm(&t3, &t2)
    434 	*&precomp[1] = t0  // 2
    435 	*&precomp[3] = t1  // 4
    436 	*&precomp[7] = t2  // 8
    437 	*&precomp[15] = t3 // 16
    438 
    439 	p256PointAddAsm(&t0, &t0, p)
    440 	p256PointAddAsm(&t1, &t1, p)
    441 	p256PointAddAsm(&t2, &t2, p)
    442 	*&precomp[2] = t0 // 3
    443 	*&precomp[4] = t1 // 5
    444 	*&precomp[8] = t2 // 9
    445 
    446 	p256PointDoubleAsm(&t0, &t0)
    447 	p256PointDoubleAsm(&t1, &t1)
    448 	*&precomp[5] = t0 // 6
    449 	*&precomp[9] = t1 // 10
    450 
    451 	p256PointAddAsm(&t2, &t0, p)
    452 	p256PointAddAsm(&t1, &t1, p)
    453 	*&precomp[6] = t2  // 7
    454 	*&precomp[10] = t1 // 11
    455 
    456 	p256PointDoubleAsm(&t0, &t0)
    457 	p256PointDoubleAsm(&t2, &t2)
    458 	*&precomp[11] = t0 // 12
    459 	*&precomp[13] = t2 // 14
    460 
    461 	p256PointAddAsm(&t0, &t0, p)
    462 	p256PointAddAsm(&t2, &t2, p)
    463 	*&precomp[12] = t0 // 13
    464 	*&precomp[14] = t2 // 15
    465 
    466 	// Start scanning the window from top bit
    467 	index := uint(254)
    468 	var sel, sign int
    469 
    470 	wvalue := (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
    471 	sel, _ = boothW5(uint(wvalue))
    472 	p256Select(p, precomp[:], sel)
    473 	zero := sel
    474 
    475 	for index > 4 {
    476 		index -= 5
    477 		p256PointDoubleAsm(p, p)
    478 		p256PointDoubleAsm(p, p)
    479 		p256PointDoubleAsm(p, p)
    480 		p256PointDoubleAsm(p, p)
    481 		p256PointDoubleAsm(p, p)
    482 
    483 		if index < 247 {
    484 			wvalue = ((uint(scalar[31-index/8]) >> (index % 8)) + (uint(scalar[31-index/8-1]) << (8 - (index % 8)))) & 0x3f
    485 		} else {
    486 			wvalue = (uint(scalar[31-index/8]) >> (index % 8)) & 0x3f
    487 		}
    488 
    489 		sel, sign = boothW5(uint(wvalue))
    490 
    491 		p256Select(&t0, precomp[:], sel)
    492 		p256NegCond(&t0, sign)
    493 		p256PointAddAsm(&t1, p, &t0)
    494 		p256MovCond(&t1, &t1, p, sel)
    495 		p256MovCond(p, &t1, &t0, zero)
    496 		zero |= sel
    497 	}
    498 
    499 	p256PointDoubleAsm(p, p)
    500 	p256PointDoubleAsm(p, p)
    501 	p256PointDoubleAsm(p, p)
    502 	p256PointDoubleAsm(p, p)
    503 	p256PointDoubleAsm(p, p)
    504 
    505 	wvalue = (uint(scalar[31]) << 1) & 0x3f
    506 	sel, sign = boothW5(uint(wvalue))
    507 
    508 	p256Select(&t0, precomp[:], sel)
    509 	p256NegCond(&t0, sign)
    510 	p256PointAddAsm(&t1, p, &t0)
    511 	p256MovCond(&t1, &t1, p, sel)
    512 	p256MovCond(p, &t1, &t0, zero)
    513 }
    514