1 /* Copyright (c) 2015, Google Inc. 2 * 3 * Permission to use, copy, modify, and/or distribute this software for any 4 * purpose with or without fee is hereby granted, provided that the above 5 * copyright notice and this permission notice appear in all copies. 6 * 7 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 8 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 9 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY 10 * SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 11 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN ACTION 12 * OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF OR IN 13 * CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. */ 14 15 /* This code is mostly taken from the ref10 version of Ed25519 in SUPERCOP 16 * 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as 17 * public domain but this file has the ISC license just to keep licencing 18 * simple. 19 * 20 * The field functions are shared by Ed25519 and X25519 where possible. */ 21 22 #include <openssl/curve25519.h> 23 24 #include <string.h> 25 26 #include "../internal.h" 27 #include "internal.h" 28 29 30 #if defined(BORINGSSL_X25519_X86_64) 31 32 typedef struct { uint64_t v[5]; } fe25519; 33 34 /* These functions are defined in asm/x25519-x86_64.S */ 35 void x25519_x86_64_work_cswap(fe25519 *, uint64_t); 36 void x25519_x86_64_mul(fe25519 *out, const fe25519 *a, const fe25519 *b); 37 void x25519_x86_64_square(fe25519 *out, const fe25519 *a); 38 void x25519_x86_64_freeze(fe25519 *); 39 void x25519_x86_64_ladderstep(fe25519 *work); 40 41 static void fe25519_setint(fe25519 *r, unsigned v) { 42 r->v[0] = v; 43 r->v[1] = 0; 44 r->v[2] = 0; 45 r->v[3] = 0; 46 r->v[4] = 0; 47 } 48 49 /* Assumes input x being reduced below 2^255 */ 50 static void fe25519_pack(unsigned char r[32], const fe25519 *x) { 51 fe25519 t; 52 t = *x; 53 x25519_x86_64_freeze(&t); 54 55 r[0] = (uint8_t)(t.v[0] & 0xff); 56 r[1] = (uint8_t)((t.v[0] >> 8) & 0xff); 57 r[2] = (uint8_t)((t.v[0] >> 16) & 0xff); 58 r[3] = (uint8_t)((t.v[0] >> 24) & 0xff); 59 r[4] = (uint8_t)((t.v[0] >> 32) & 0xff); 60 r[5] = (uint8_t)((t.v[0] >> 40) & 0xff); 61 r[6] = (uint8_t)((t.v[0] >> 48)); 62 63 r[6] ^= (uint8_t)((t.v[1] << 3) & 0xf8); 64 r[7] = (uint8_t)((t.v[1] >> 5) & 0xff); 65 r[8] = (uint8_t)((t.v[1] >> 13) & 0xff); 66 r[9] = (uint8_t)((t.v[1] >> 21) & 0xff); 67 r[10] = (uint8_t)((t.v[1] >> 29) & 0xff); 68 r[11] = (uint8_t)((t.v[1] >> 37) & 0xff); 69 r[12] = (uint8_t)((t.v[1] >> 45)); 70 71 r[12] ^= (uint8_t)((t.v[2] << 6) & 0xc0); 72 r[13] = (uint8_t)((t.v[2] >> 2) & 0xff); 73 r[14] = (uint8_t)((t.v[2] >> 10) & 0xff); 74 r[15] = (uint8_t)((t.v[2] >> 18) & 0xff); 75 r[16] = (uint8_t)((t.v[2] >> 26) & 0xff); 76 r[17] = (uint8_t)((t.v[2] >> 34) & 0xff); 77 r[18] = (uint8_t)((t.v[2] >> 42) & 0xff); 78 r[19] = (uint8_t)((t.v[2] >> 50)); 79 80 r[19] ^= (uint8_t)((t.v[3] << 1) & 0xfe); 81 r[20] = (uint8_t)((t.v[3] >> 7) & 0xff); 82 r[21] = (uint8_t)((t.v[3] >> 15) & 0xff); 83 r[22] = (uint8_t)((t.v[3] >> 23) & 0xff); 84 r[23] = (uint8_t)((t.v[3] >> 31) & 0xff); 85 r[24] = (uint8_t)((t.v[3] >> 39) & 0xff); 86 r[25] = (uint8_t)((t.v[3] >> 47)); 87 88 r[25] ^= (uint8_t)((t.v[4] << 4) & 0xf0); 89 r[26] = (uint8_t)((t.v[4] >> 4) & 0xff); 90 r[27] = (uint8_t)((t.v[4] >> 12) & 0xff); 91 r[28] = (uint8_t)((t.v[4] >> 20) & 0xff); 92 r[29] = (uint8_t)((t.v[4] >> 28) & 0xff); 93 r[30] = (uint8_t)((t.v[4] >> 36) & 0xff); 94 r[31] = (uint8_t)((t.v[4] >> 44)); 95 } 96 97 static void fe25519_unpack(fe25519 *r, const uint8_t x[32]) { 98 r->v[0] = x[0]; 99 r->v[0] += (uint64_t)x[1] << 8; 100 r->v[0] += (uint64_t)x[2] << 16; 101 r->v[0] += (uint64_t)x[3] << 24; 102 r->v[0] += (uint64_t)x[4] << 32; 103 r->v[0] += (uint64_t)x[5] << 40; 104 r->v[0] += ((uint64_t)x[6] & 7) << 48; 105 106 r->v[1] = x[6] >> 3; 107 r->v[1] += (uint64_t)x[7] << 5; 108 r->v[1] += (uint64_t)x[8] << 13; 109 r->v[1] += (uint64_t)x[9] << 21; 110 r->v[1] += (uint64_t)x[10] << 29; 111 r->v[1] += (uint64_t)x[11] << 37; 112 r->v[1] += ((uint64_t)x[12] & 63) << 45; 113 114 r->v[2] = x[12] >> 6; 115 r->v[2] += (uint64_t)x[13] << 2; 116 r->v[2] += (uint64_t)x[14] << 10; 117 r->v[2] += (uint64_t)x[15] << 18; 118 r->v[2] += (uint64_t)x[16] << 26; 119 r->v[2] += (uint64_t)x[17] << 34; 120 r->v[2] += (uint64_t)x[18] << 42; 121 r->v[2] += ((uint64_t)x[19] & 1) << 50; 122 123 r->v[3] = x[19] >> 1; 124 r->v[3] += (uint64_t)x[20] << 7; 125 r->v[3] += (uint64_t)x[21] << 15; 126 r->v[3] += (uint64_t)x[22] << 23; 127 r->v[3] += (uint64_t)x[23] << 31; 128 r->v[3] += (uint64_t)x[24] << 39; 129 r->v[3] += ((uint64_t)x[25] & 15) << 47; 130 131 r->v[4] = x[25] >> 4; 132 r->v[4] += (uint64_t)x[26] << 4; 133 r->v[4] += (uint64_t)x[27] << 12; 134 r->v[4] += (uint64_t)x[28] << 20; 135 r->v[4] += (uint64_t)x[29] << 28; 136 r->v[4] += (uint64_t)x[30] << 36; 137 r->v[4] += ((uint64_t)x[31] & 127) << 44; 138 } 139 140 static void fe25519_invert(fe25519 *r, const fe25519 *x) { 141 fe25519 z2; 142 fe25519 z9; 143 fe25519 z11; 144 fe25519 z2_5_0; 145 fe25519 z2_10_0; 146 fe25519 z2_20_0; 147 fe25519 z2_50_0; 148 fe25519 z2_100_0; 149 fe25519 t; 150 int i; 151 152 /* 2 */ x25519_x86_64_square(&z2, x); 153 /* 4 */ x25519_x86_64_square(&t, &z2); 154 /* 8 */ x25519_x86_64_square(&t, &t); 155 /* 9 */ x25519_x86_64_mul(&z9, &t, x); 156 /* 11 */ x25519_x86_64_mul(&z11, &z9, &z2); 157 /* 22 */ x25519_x86_64_square(&t, &z11); 158 /* 2^5 - 2^0 = 31 */ x25519_x86_64_mul(&z2_5_0, &t, &z9); 159 160 /* 2^6 - 2^1 */ x25519_x86_64_square(&t, &z2_5_0); 161 /* 2^20 - 2^10 */ for (i = 1; i < 5; i++) { x25519_x86_64_square(&t, &t); } 162 /* 2^10 - 2^0 */ x25519_x86_64_mul(&z2_10_0, &t, &z2_5_0); 163 164 /* 2^11 - 2^1 */ x25519_x86_64_square(&t, &z2_10_0); 165 /* 2^20 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } 166 /* 2^20 - 2^0 */ x25519_x86_64_mul(&z2_20_0, &t, &z2_10_0); 167 168 /* 2^21 - 2^1 */ x25519_x86_64_square(&t, &z2_20_0); 169 /* 2^40 - 2^20 */ for (i = 1; i < 20; i++) { x25519_x86_64_square(&t, &t); } 170 /* 2^40 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_20_0); 171 172 /* 2^41 - 2^1 */ x25519_x86_64_square(&t, &t); 173 /* 2^50 - 2^10 */ for (i = 1; i < 10; i++) { x25519_x86_64_square(&t, &t); } 174 /* 2^50 - 2^0 */ x25519_x86_64_mul(&z2_50_0, &t, &z2_10_0); 175 176 /* 2^51 - 2^1 */ x25519_x86_64_square(&t, &z2_50_0); 177 /* 2^100 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } 178 /* 2^100 - 2^0 */ x25519_x86_64_mul(&z2_100_0, &t, &z2_50_0); 179 180 /* 2^101 - 2^1 */ x25519_x86_64_square(&t, &z2_100_0); 181 /* 2^200 - 2^100 */ for (i = 1; i < 100; i++) { 182 x25519_x86_64_square(&t, &t); 183 } 184 /* 2^200 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_100_0); 185 186 /* 2^201 - 2^1 */ x25519_x86_64_square(&t, &t); 187 /* 2^250 - 2^50 */ for (i = 1; i < 50; i++) { x25519_x86_64_square(&t, &t); } 188 /* 2^250 - 2^0 */ x25519_x86_64_mul(&t, &t, &z2_50_0); 189 190 /* 2^251 - 2^1 */ x25519_x86_64_square(&t, &t); 191 /* 2^252 - 2^2 */ x25519_x86_64_square(&t, &t); 192 /* 2^253 - 2^3 */ x25519_x86_64_square(&t, &t); 193 194 /* 2^254 - 2^4 */ x25519_x86_64_square(&t, &t); 195 196 /* 2^255 - 2^5 */ x25519_x86_64_square(&t, &t); 197 /* 2^255 - 21 */ x25519_x86_64_mul(r, &t, &z11); 198 } 199 200 static void mladder(fe25519 *xr, fe25519 *zr, const uint8_t s[32]) { 201 fe25519 work[5]; 202 203 work[0] = *xr; 204 fe25519_setint(work + 1, 1); 205 fe25519_setint(work + 2, 0); 206 work[3] = *xr; 207 fe25519_setint(work + 4, 1); 208 209 int i, j; 210 uint8_t prevbit = 0; 211 212 j = 6; 213 for (i = 31; i >= 0; i--) { 214 while (j >= 0) { 215 const uint8_t bit = 1 & (s[i] >> j); 216 const uint64_t swap = bit ^ prevbit; 217 prevbit = bit; 218 x25519_x86_64_work_cswap(work + 1, swap); 219 x25519_x86_64_ladderstep(work); 220 j -= 1; 221 } 222 j = 7; 223 } 224 225 *xr = work[1]; 226 *zr = work[2]; 227 } 228 229 void x25519_x86_64(uint8_t out[32], const uint8_t scalar[32], 230 const uint8_t point[32]) { 231 uint8_t e[32]; 232 OPENSSL_memcpy(e, scalar, sizeof(e)); 233 234 e[0] &= 248; 235 e[31] &= 127; 236 e[31] |= 64; 237 238 fe25519 t; 239 fe25519 z; 240 fe25519_unpack(&t, point); 241 mladder(&t, &z, e); 242 fe25519_invert(&z, &z); 243 x25519_x86_64_mul(&t, &t, &z); 244 fe25519_pack(out, &t); 245 } 246 247 #endif /* BORINGSSL_X25519_X86_64 */ 248