Home | History | Annotate | Download | only in curve25519
      1 // Copyright 2012 The Go Authors. All rights reserved.
      2 // Use of this source code is governed by a BSD-style
      3 // license that can be found in the LICENSE file.
      4 
      5 // +build amd64,!gccgo,!appengine
      6 
      7 package curve25519
      8 
      9 // These functions are implemented in the .s files. The names of the functions
     10 // in the rest of the file are also taken from the SUPERCOP sources to help
     11 // people following along.
     12 
     13 //go:noescape
     14 
     15 func cswap(inout *[5]uint64, v uint64)
     16 
     17 //go:noescape
     18 
     19 func ladderstep(inout *[5][5]uint64)
     20 
     21 //go:noescape
     22 
     23 func freeze(inout *[5]uint64)
     24 
     25 //go:noescape
     26 
     27 func mul(dest, a, b *[5]uint64)
     28 
     29 //go:noescape
     30 
     31 func square(out, in *[5]uint64)
     32 
     33 // mladder uses a Montgomery ladder to calculate (xr/zr) *= s.
     34 func mladder(xr, zr *[5]uint64, s *[32]byte) {
     35 	var work [5][5]uint64
     36 
     37 	work[0] = *xr
     38 	setint(&work[1], 1)
     39 	setint(&work[2], 0)
     40 	work[3] = *xr
     41 	setint(&work[4], 1)
     42 
     43 	j := uint(6)
     44 	var prevbit byte
     45 
     46 	for i := 31; i >= 0; i-- {
     47 		for j < 8 {
     48 			bit := ((*s)[i] >> j) & 1
     49 			swap := bit ^ prevbit
     50 			prevbit = bit
     51 			cswap(&work[1], uint64(swap))
     52 			ladderstep(&work)
     53 			j--
     54 		}
     55 		j = 7
     56 	}
     57 
     58 	*xr = work[1]
     59 	*zr = work[2]
     60 }
     61 
     62 func scalarMult(out, in, base *[32]byte) {
     63 	var e [32]byte
     64 	copy(e[:], (*in)[:])
     65 	e[0] &= 248
     66 	e[31] &= 127
     67 	e[31] |= 64
     68 
     69 	var t, z [5]uint64
     70 	unpack(&t, base)
     71 	mladder(&t, &z, &e)
     72 	invert(&z, &z)
     73 	mul(&t, &t, &z)
     74 	pack(out, &t)
     75 }
     76 
     77 func setint(r *[5]uint64, v uint64) {
     78 	r[0] = v
     79 	r[1] = 0
     80 	r[2] = 0
     81 	r[3] = 0
     82 	r[4] = 0
     83 }
     84 
     85 // unpack sets r = x where r consists of 5, 51-bit limbs in little-endian
     86 // order.
     87 func unpack(r *[5]uint64, x *[32]byte) {
     88 	r[0] = uint64(x[0]) |
     89 		uint64(x[1])<<8 |
     90 		uint64(x[2])<<16 |
     91 		uint64(x[3])<<24 |
     92 		uint64(x[4])<<32 |
     93 		uint64(x[5])<<40 |
     94 		uint64(x[6]&7)<<48
     95 
     96 	r[1] = uint64(x[6])>>3 |
     97 		uint64(x[7])<<5 |
     98 		uint64(x[8])<<13 |
     99 		uint64(x[9])<<21 |
    100 		uint64(x[10])<<29 |
    101 		uint64(x[11])<<37 |
    102 		uint64(x[12]&63)<<45
    103 
    104 	r[2] = uint64(x[12])>>6 |
    105 		uint64(x[13])<<2 |
    106 		uint64(x[14])<<10 |
    107 		uint64(x[15])<<18 |
    108 		uint64(x[16])<<26 |
    109 		uint64(x[17])<<34 |
    110 		uint64(x[18])<<42 |
    111 		uint64(x[19]&1)<<50
    112 
    113 	r[3] = uint64(x[19])>>1 |
    114 		uint64(x[20])<<7 |
    115 		uint64(x[21])<<15 |
    116 		uint64(x[22])<<23 |
    117 		uint64(x[23])<<31 |
    118 		uint64(x[24])<<39 |
    119 		uint64(x[25]&15)<<47
    120 
    121 	r[4] = uint64(x[25])>>4 |
    122 		uint64(x[26])<<4 |
    123 		uint64(x[27])<<12 |
    124 		uint64(x[28])<<20 |
    125 		uint64(x[29])<<28 |
    126 		uint64(x[30])<<36 |
    127 		uint64(x[31]&127)<<44
    128 }
    129 
    130 // pack sets out = x where out is the usual, little-endian form of the 5,
    131 // 51-bit limbs in x.
    132 func pack(out *[32]byte, x *[5]uint64) {
    133 	t := *x
    134 	freeze(&t)
    135 
    136 	out[0] = byte(t[0])
    137 	out[1] = byte(t[0] >> 8)
    138 	out[2] = byte(t[0] >> 16)
    139 	out[3] = byte(t[0] >> 24)
    140 	out[4] = byte(t[0] >> 32)
    141 	out[5] = byte(t[0] >> 40)
    142 	out[6] = byte(t[0] >> 48)
    143 
    144 	out[6] ^= byte(t[1]<<3) & 0xf8
    145 	out[7] = byte(t[1] >> 5)
    146 	out[8] = byte(t[1] >> 13)
    147 	out[9] = byte(t[1] >> 21)
    148 	out[10] = byte(t[1] >> 29)
    149 	out[11] = byte(t[1] >> 37)
    150 	out[12] = byte(t[1] >> 45)
    151 
    152 	out[12] ^= byte(t[2]<<6) & 0xc0
    153 	out[13] = byte(t[2] >> 2)
    154 	out[14] = byte(t[2] >> 10)
    155 	out[15] = byte(t[2] >> 18)
    156 	out[16] = byte(t[2] >> 26)
    157 	out[17] = byte(t[2] >> 34)
    158 	out[18] = byte(t[2] >> 42)
    159 	out[19] = byte(t[2] >> 50)
    160 
    161 	out[19] ^= byte(t[3]<<1) & 0xfe
    162 	out[20] = byte(t[3] >> 7)
    163 	out[21] = byte(t[3] >> 15)
    164 	out[22] = byte(t[3] >> 23)
    165 	out[23] = byte(t[3] >> 31)
    166 	out[24] = byte(t[3] >> 39)
    167 	out[25] = byte(t[3] >> 47)
    168 
    169 	out[25] ^= byte(t[4]<<4) & 0xf0
    170 	out[26] = byte(t[4] >> 4)
    171 	out[27] = byte(t[4] >> 12)
    172 	out[28] = byte(t[4] >> 20)
    173 	out[29] = byte(t[4] >> 28)
    174 	out[30] = byte(t[4] >> 36)
    175 	out[31] = byte(t[4] >> 44)
    176 }
    177 
    178 // invert calculates r = x^-1 mod p using Fermat's little theorem.
    179 func invert(r *[5]uint64, x *[5]uint64) {
    180 	var z2, z9, z11, z2_5_0, z2_10_0, z2_20_0, z2_50_0, z2_100_0, t [5]uint64
    181 
    182 	square(&z2, x)        /* 2 */
    183 	square(&t, &z2)       /* 4 */
    184 	square(&t, &t)        /* 8 */
    185 	mul(&z9, &t, x)       /* 9 */
    186 	mul(&z11, &z9, &z2)   /* 11 */
    187 	square(&t, &z11)      /* 22 */
    188 	mul(&z2_5_0, &t, &z9) /* 2^5 - 2^0 = 31 */
    189 
    190 	square(&t, &z2_5_0)      /* 2^6 - 2^1 */
    191 	for i := 1; i < 5; i++ { /* 2^20 - 2^10 */
    192 		square(&t, &t)
    193 	}
    194 	mul(&z2_10_0, &t, &z2_5_0) /* 2^10 - 2^0 */
    195 
    196 	square(&t, &z2_10_0)      /* 2^11 - 2^1 */
    197 	for i := 1; i < 10; i++ { /* 2^20 - 2^10 */
    198 		square(&t, &t)
    199 	}
    200 	mul(&z2_20_0, &t, &z2_10_0) /* 2^20 - 2^0 */
    201 
    202 	square(&t, &z2_20_0)      /* 2^21 - 2^1 */
    203 	for i := 1; i < 20; i++ { /* 2^40 - 2^20 */
    204 		square(&t, &t)
    205 	}
    206 	mul(&t, &t, &z2_20_0) /* 2^40 - 2^0 */
    207 
    208 	square(&t, &t)            /* 2^41 - 2^1 */
    209 	for i := 1; i < 10; i++ { /* 2^50 - 2^10 */
    210 		square(&t, &t)
    211 	}
    212 	mul(&z2_50_0, &t, &z2_10_0) /* 2^50 - 2^0 */
    213 
    214 	square(&t, &z2_50_0)      /* 2^51 - 2^1 */
    215 	for i := 1; i < 50; i++ { /* 2^100 - 2^50 */
    216 		square(&t, &t)
    217 	}
    218 	mul(&z2_100_0, &t, &z2_50_0) /* 2^100 - 2^0 */
    219 
    220 	square(&t, &z2_100_0)      /* 2^101 - 2^1 */
    221 	for i := 1; i < 100; i++ { /* 2^200 - 2^100 */
    222 		square(&t, &t)
    223 	}
    224 	mul(&t, &t, &z2_100_0) /* 2^200 - 2^0 */
    225 
    226 	square(&t, &t)            /* 2^201 - 2^1 */
    227 	for i := 1; i < 50; i++ { /* 2^250 - 2^50 */
    228 		square(&t, &t)
    229 	}
    230 	mul(&t, &t, &z2_50_0) /* 2^250 - 2^0 */
    231 
    232 	square(&t, &t) /* 2^251 - 2^1 */
    233 	square(&t, &t) /* 2^252 - 2^2 */
    234 	square(&t, &t) /* 2^253 - 2^3 */
    235 
    236 	square(&t, &t) /* 2^254 - 2^4 */
    237 
    238 	square(&t, &t)   /* 2^255 - 2^5 */
    239 	mul(r, &t, &z11) /* 2^255 - 21 */
    240 }
    241