1 // The MIT License (MIT) 2 // 3 // Copyright (c) 2015-2016 the fiat-crypto authors (see the AUTHORS file). 4 // 5 // Permission is hereby granted, free of charge, to any person obtaining a copy 6 // of this software and associated documentation files (the "Software"), to deal 7 // in the Software without restriction, including without limitation the rights 8 // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell 9 // copies of the Software, and to permit persons to whom the Software is 10 // furnished to do so, subject to the following conditions: 11 // 12 // The above copyright notice and this permission notice shall be included in all 13 // copies or substantial portions of the Software. 14 // 15 // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE 18 // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 // SOFTWARE. 22 23 // Some of this code is taken from the ref10 version of Ed25519 in SUPERCOP 24 // 20141124 (http://bench.cr.yp.to/supercop.html). That code is released as 25 // public domain but parts have been replaced with code generated by Fiat 26 // (https://github.com/mit-plv/fiat-crypto), which is MIT licensed. 27 // 28 // The field functions are shared by Ed25519 and X25519 where possible. 29 30 #include <openssl/curve25519.h> 31 32 #include <assert.h> 33 #include <string.h> 34 35 #include <openssl/cpu.h> 36 #include <openssl/mem.h> 37 #include <openssl/rand.h> 38 #include <openssl/sha.h> 39 #include <openssl/type_check.h> 40 41 #include "internal.h" 42 #include "../../crypto/internal.h" 43 44 45 // Various pre-computed constants. 46 #include "./curve25519_tables.h" 47 48 49 // Low-level intrinsic operations (hand-written). 50 51 static uint64_t load_3(const uint8_t *in) { 52 uint64_t result; 53 result = (uint64_t)in[0]; 54 result |= ((uint64_t)in[1]) << 8; 55 result |= ((uint64_t)in[2]) << 16; 56 return result; 57 } 58 59 static uint64_t load_4(const uint8_t *in) { 60 uint64_t result; 61 result = (uint64_t)in[0]; 62 result |= ((uint64_t)in[1]) << 8; 63 result |= ((uint64_t)in[2]) << 16; 64 result |= ((uint64_t)in[3]) << 24; 65 return result; 66 } 67 68 #if defined(BORINGSSL_CURVE25519_64BIT) 69 static uint64_t load_8(const uint8_t *in) { 70 uint64_t result; 71 result = (uint64_t)in[0]; 72 result |= ((uint64_t)in[1]) << 8; 73 result |= ((uint64_t)in[2]) << 16; 74 result |= ((uint64_t)in[3]) << 24; 75 result |= ((uint64_t)in[4]) << 32; 76 result |= ((uint64_t)in[5]) << 40; 77 result |= ((uint64_t)in[6]) << 48; 78 result |= ((uint64_t)in[7]) << 56; 79 return result; 80 } 81 82 static uint8_t /*bool*/ addcarryx_u51(uint8_t /*bool*/ c, uint64_t a, 83 uint64_t b, uint64_t *low) { 84 // This function extracts 51 bits of result and 1 bit of carry (52 total), so 85 // a 64-bit intermediate is sufficient. 86 uint64_t x = a + b + c; 87 *low = x & ((UINT64_C(1) << 51) - 1); 88 return (x >> 51) & 1; 89 } 90 91 static uint8_t /*bool*/ subborrow_u51(uint8_t /*bool*/ c, uint64_t a, 92 uint64_t b, uint64_t *low) { 93 // This function extracts 51 bits of result and 1 bit of borrow (52 total), so 94 // a 64-bit intermediate is sufficient. 95 uint64_t x = a - b - c; 96 *low = x & ((UINT64_C(1) << 51) - 1); 97 return x >> 63; 98 } 99 100 static uint64_t cmovznz64(uint64_t t, uint64_t z, uint64_t nz) { 101 t = -!!t; // all set if nonzero, 0 if 0 102 return (t&nz) | ((~t)&z); 103 } 104 105 #else 106 107 static uint8_t /*bool*/ addcarryx_u25(uint8_t /*bool*/ c, uint32_t a, 108 uint32_t b, uint32_t *low) { 109 // This function extracts 25 bits of result and 1 bit of carry (26 total), so 110 // a 32-bit intermediate is sufficient. 111 uint32_t x = a + b + c; 112 *low = x & ((1 << 25) - 1); 113 return (x >> 25) & 1; 114 } 115 116 static uint8_t /*bool*/ addcarryx_u26(uint8_t /*bool*/ c, uint32_t a, 117 uint32_t b, uint32_t *low) { 118 // This function extracts 26 bits of result and 1 bit of carry (27 total), so 119 // a 32-bit intermediate is sufficient. 120 uint32_t x = a + b + c; 121 *low = x & ((1 << 26) - 1); 122 return (x >> 26) & 1; 123 } 124 125 static uint8_t /*bool*/ subborrow_u25(uint8_t /*bool*/ c, uint32_t a, 126 uint32_t b, uint32_t *low) { 127 // This function extracts 25 bits of result and 1 bit of borrow (26 total), so 128 // a 32-bit intermediate is sufficient. 129 uint32_t x = a - b - c; 130 *low = x & ((1 << 25) - 1); 131 return x >> 31; 132 } 133 134 static uint8_t /*bool*/ subborrow_u26(uint8_t /*bool*/ c, uint32_t a, 135 uint32_t b, uint32_t *low) { 136 // This function extracts 26 bits of result and 1 bit of borrow (27 total), so 137 // a 32-bit intermediate is sufficient. 138 uint32_t x = a - b - c; 139 *low = x & ((1 << 26) - 1); 140 return x >> 31; 141 } 142 143 static uint32_t cmovznz32(uint32_t t, uint32_t z, uint32_t nz) { 144 t = -!!t; // all set if nonzero, 0 if 0 145 return (t&nz) | ((~t)&z); 146 } 147 148 #endif 149 150 151 // Field operations. 152 153 #if defined(BORINGSSL_CURVE25519_64BIT) 154 155 #define assert_fe(f) do { \ 156 for (unsigned _assert_fe_i = 0; _assert_fe_i< 5; _assert_fe_i++) { \ 157 assert(f[_assert_fe_i] < 1.125*(UINT64_C(1)<<51)); \ 158 } \ 159 } while (0) 160 161 #define assert_fe_loose(f) do { \ 162 for (unsigned _assert_fe_i = 0; _assert_fe_i< 5; _assert_fe_i++) { \ 163 assert(f[_assert_fe_i] < 3.375*(UINT64_C(1)<<51)); \ 164 } \ 165 } while (0) 166 167 #define assert_fe_frozen(f) do { \ 168 for (unsigned _assert_fe_i = 0; _assert_fe_i< 5; _assert_fe_i++) { \ 169 assert(f[_assert_fe_i] < (UINT64_C(1)<<51)); \ 170 } \ 171 } while (0) 172 173 static void fe_frombytes_impl(uint64_t h[5], const uint8_t *s) { 174 // Ignores top bit of s. 175 uint64_t a0 = load_8(s); 176 uint64_t a1 = load_8(s+8); 177 uint64_t a2 = load_8(s+16); 178 uint64_t a3 = load_8(s+24); 179 // Use 51 bits, 64-51 = 13 left. 180 h[0] = a0 & ((UINT64_C(1) << 51) - 1); 181 // (64-51) + 38 = 13 + 38 = 51 182 h[1] = (a0 >> 51) | ((a1 & ((UINT64_C(1) << 38) - 1)) << 13); 183 // (64-38) + 25 = 26 + 25 = 51 184 h[2] = (a1 >> 38) | ((a2 & ((UINT64_C(1) << 25) - 1)) << 26); 185 // (64-25) + 12 = 39 + 12 = 51 186 h[3] = (a2 >> 25) | ((a3 & ((UINT64_C(1) << 12) - 1)) << 39); 187 // (64-12) = 52, ignore top bit 188 h[4] = (a3 >> 12) & ((UINT64_C(1) << 51) - 1); 189 assert_fe(h); 190 } 191 192 static void fe_frombytes(fe *h, const uint8_t *s) { 193 fe_frombytes_impl(h->v, s); 194 } 195 196 static void fe_freeze(uint64_t out[5], const uint64_t in1[5]) { 197 { const uint64_t x7 = in1[4]; 198 { const uint64_t x8 = in1[3]; 199 { const uint64_t x6 = in1[2]; 200 { const uint64_t x4 = in1[1]; 201 { const uint64_t x2 = in1[0]; 202 { uint64_t x10; uint8_t/*bool*/ x11 = subborrow_u51(0x0, x2, 0x7ffffffffffed, &x10); 203 { uint64_t x13; uint8_t/*bool*/ x14 = subborrow_u51(x11, x4, 0x7ffffffffffff, &x13); 204 { uint64_t x16; uint8_t/*bool*/ x17 = subborrow_u51(x14, x6, 0x7ffffffffffff, &x16); 205 { uint64_t x19; uint8_t/*bool*/ x20 = subborrow_u51(x17, x8, 0x7ffffffffffff, &x19); 206 { uint64_t x22; uint8_t/*bool*/ x23 = subborrow_u51(x20, x7, 0x7ffffffffffff, &x22); 207 { uint64_t x24 = cmovznz64(x23, 0x0, 0xffffffffffffffffL); 208 { uint64_t x25 = (x24 & 0x7ffffffffffed); 209 { uint64_t x27; uint8_t/*bool*/ x28 = addcarryx_u51(0x0, x10, x25, &x27); 210 { uint64_t x29 = (x24 & 0x7ffffffffffff); 211 { uint64_t x31; uint8_t/*bool*/ x32 = addcarryx_u51(x28, x13, x29, &x31); 212 { uint64_t x33 = (x24 & 0x7ffffffffffff); 213 { uint64_t x35; uint8_t/*bool*/ x36 = addcarryx_u51(x32, x16, x33, &x35); 214 { uint64_t x37 = (x24 & 0x7ffffffffffff); 215 { uint64_t x39; uint8_t/*bool*/ x40 = addcarryx_u51(x36, x19, x37, &x39); 216 { uint64_t x41 = (x24 & 0x7ffffffffffff); 217 { uint64_t x43; addcarryx_u51(x40, x22, x41, &x43); 218 out[0] = x27; 219 out[1] = x31; 220 out[2] = x35; 221 out[3] = x39; 222 out[4] = x43; 223 }}}}}}}}}}}}}}}}}}}}} 224 } 225 226 static void fe_tobytes(uint8_t s[32], const fe *f) { 227 assert_fe(f->v); 228 uint64_t h[5]; 229 fe_freeze(h, f->v); 230 assert_fe_frozen(h); 231 232 s[0] = h[0] >> 0; 233 s[1] = h[0] >> 8; 234 s[2] = h[0] >> 16; 235 s[3] = h[0] >> 24; 236 s[4] = h[0] >> 32; 237 s[5] = h[0] >> 40; 238 s[6] = (h[0] >> 48) | (h[1] << 3); 239 s[7] = h[1] >> 5; 240 s[8] = h[1] >> 13; 241 s[9] = h[1] >> 21; 242 s[10] = h[1] >> 29; 243 s[11] = h[1] >> 37; 244 s[12] = (h[1] >> 45) | (h[2] << 6); 245 s[13] = h[2] >> 2; 246 s[14] = h[2] >> 10; 247 s[15] = h[2] >> 18; 248 s[16] = h[2] >> 26; 249 s[17] = h[2] >> 34; 250 s[18] = h[2] >> 42; 251 s[19] = (h[2] >> 50) | (h[3] << 1); 252 s[20] = h[3] >> 7; 253 s[21] = h[3] >> 15; 254 s[22] = h[3] >> 23; 255 s[23] = h[3] >> 31; 256 s[24] = h[3] >> 39; 257 s[25] = (h[3] >> 47) | (h[4] << 4); 258 s[26] = h[4] >> 4; 259 s[27] = h[4] >> 12; 260 s[28] = h[4] >> 20; 261 s[29] = h[4] >> 28; 262 s[30] = h[4] >> 36; 263 s[31] = h[4] >> 44; 264 } 265 266 // h = 0 267 static void fe_0(fe *h) { 268 OPENSSL_memset(h, 0, sizeof(fe)); 269 } 270 271 static void fe_loose_0(fe_loose *h) { 272 OPENSSL_memset(h, 0, sizeof(fe_loose)); 273 } 274 275 // h = 1 276 static void fe_1(fe *h) { 277 OPENSSL_memset(h, 0, sizeof(fe)); 278 h->v[0] = 1; 279 } 280 281 static void fe_loose_1(fe_loose *h) { 282 OPENSSL_memset(h, 0, sizeof(fe_loose)); 283 h->v[0] = 1; 284 } 285 286 static void fe_add_impl(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) { 287 { const uint64_t x10 = in1[4]; 288 { const uint64_t x11 = in1[3]; 289 { const uint64_t x9 = in1[2]; 290 { const uint64_t x7 = in1[1]; 291 { const uint64_t x5 = in1[0]; 292 { const uint64_t x18 = in2[4]; 293 { const uint64_t x19 = in2[3]; 294 { const uint64_t x17 = in2[2]; 295 { const uint64_t x15 = in2[1]; 296 { const uint64_t x13 = in2[0]; 297 out[0] = (x5 + x13); 298 out[1] = (x7 + x15); 299 out[2] = (x9 + x17); 300 out[3] = (x11 + x19); 301 out[4] = (x10 + x18); 302 }}}}}}}}}} 303 } 304 305 // h = f + g 306 // Can overlap h with f or g. 307 static void fe_add(fe_loose *h, const fe *f, const fe *g) { 308 assert_fe(f->v); 309 assert_fe(g->v); 310 fe_add_impl(h->v, f->v, g->v); 311 assert_fe_loose(h->v); 312 } 313 314 static void fe_sub_impl(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) { 315 { const uint64_t x10 = in1[4]; 316 { const uint64_t x11 = in1[3]; 317 { const uint64_t x9 = in1[2]; 318 { const uint64_t x7 = in1[1]; 319 { const uint64_t x5 = in1[0]; 320 { const uint64_t x18 = in2[4]; 321 { const uint64_t x19 = in2[3]; 322 { const uint64_t x17 = in2[2]; 323 { const uint64_t x15 = in2[1]; 324 { const uint64_t x13 = in2[0]; 325 out[0] = ((0xfffffffffffda + x5) - x13); 326 out[1] = ((0xffffffffffffe + x7) - x15); 327 out[2] = ((0xffffffffffffe + x9) - x17); 328 out[3] = ((0xffffffffffffe + x11) - x19); 329 out[4] = ((0xffffffffffffe + x10) - x18); 330 }}}}}}}}}} 331 } 332 333 // h = f - g 334 // Can overlap h with f or g. 335 static void fe_sub(fe_loose *h, const fe *f, const fe *g) { 336 assert_fe(f->v); 337 assert_fe(g->v); 338 fe_sub_impl(h->v, f->v, g->v); 339 assert_fe_loose(h->v); 340 } 341 342 static void fe_carry_impl(uint64_t out[5], const uint64_t in1[5]) { 343 { const uint64_t x7 = in1[4]; 344 { const uint64_t x8 = in1[3]; 345 { const uint64_t x6 = in1[2]; 346 { const uint64_t x4 = in1[1]; 347 { const uint64_t x2 = in1[0]; 348 { uint64_t x9 = (x2 >> 0x33); 349 { uint64_t x10 = (x2 & 0x7ffffffffffff); 350 { uint64_t x11 = (x9 + x4); 351 { uint64_t x12 = (x11 >> 0x33); 352 { uint64_t x13 = (x11 & 0x7ffffffffffff); 353 { uint64_t x14 = (x12 + x6); 354 { uint64_t x15 = (x14 >> 0x33); 355 { uint64_t x16 = (x14 & 0x7ffffffffffff); 356 { uint64_t x17 = (x15 + x8); 357 { uint64_t x18 = (x17 >> 0x33); 358 { uint64_t x19 = (x17 & 0x7ffffffffffff); 359 { uint64_t x20 = (x18 + x7); 360 { uint64_t x21 = (x20 >> 0x33); 361 { uint64_t x22 = (x20 & 0x7ffffffffffff); 362 { uint64_t x23 = (x10 + (0x13 * x21)); 363 { uint64_t x24 = (x23 >> 0x33); 364 { uint64_t x25 = (x23 & 0x7ffffffffffff); 365 { uint64_t x26 = (x24 + x13); 366 { uint64_t x27 = (x26 >> 0x33); 367 { uint64_t x28 = (x26 & 0x7ffffffffffff); 368 out[0] = x25; 369 out[1] = x28; 370 out[2] = (x27 + x16); 371 out[3] = x19; 372 out[4] = x22; 373 }}}}}}}}}}}}}}}}}}}}}}}}} 374 } 375 376 static void fe_carry(fe *h, const fe_loose* f) { 377 assert_fe_loose(f->v); 378 fe_carry_impl(h->v, f->v); 379 assert_fe(h->v); 380 } 381 382 static void fe_mul_impl(uint64_t out[5], const uint64_t in1[5], const uint64_t in2[5]) { 383 assert_fe_loose(in1); 384 assert_fe_loose(in2); 385 { const uint64_t x10 = in1[4]; 386 { const uint64_t x11 = in1[3]; 387 { const uint64_t x9 = in1[2]; 388 { const uint64_t x7 = in1[1]; 389 { const uint64_t x5 = in1[0]; 390 { const uint64_t x18 = in2[4]; 391 { const uint64_t x19 = in2[3]; 392 { const uint64_t x17 = in2[2]; 393 { const uint64_t x15 = in2[1]; 394 { const uint64_t x13 = in2[0]; 395 { uint128_t x20 = ((uint128_t)x5 * x13); 396 { uint128_t x21 = (((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)); 397 { uint128_t x22 = ((((uint128_t)x5 * x17) + ((uint128_t)x9 * x13)) + ((uint128_t)x7 * x15)); 398 { uint128_t x23 = (((((uint128_t)x5 * x19) + ((uint128_t)x11 * x13)) + ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15)); 399 { uint128_t x24 = ((((((uint128_t)x5 * x18) + ((uint128_t)x10 * x13)) + ((uint128_t)x11 * x15)) + ((uint128_t)x7 * x19)) + ((uint128_t)x9 * x17)); 400 { uint64_t x25 = (x10 * 0x13); 401 { uint64_t x26 = (x7 * 0x13); 402 { uint64_t x27 = (x9 * 0x13); 403 { uint64_t x28 = (x11 * 0x13); 404 { uint128_t x29 = ((((x20 + ((uint128_t)x25 * x15)) + ((uint128_t)x26 * x18)) + ((uint128_t)x27 * x19)) + ((uint128_t)x28 * x17)); 405 { uint128_t x30 = (((x21 + ((uint128_t)x25 * x17)) + ((uint128_t)x27 * x18)) + ((uint128_t)x28 * x19)); 406 { uint128_t x31 = ((x22 + ((uint128_t)x25 * x19)) + ((uint128_t)x28 * x18)); 407 { uint128_t x32 = (x23 + ((uint128_t)x25 * x18)); 408 { uint64_t x33 = (uint64_t) (x29 >> 0x33); 409 { uint64_t x34 = ((uint64_t)x29 & 0x7ffffffffffff); 410 { uint128_t x35 = (x33 + x30); 411 { uint64_t x36 = (uint64_t) (x35 >> 0x33); 412 { uint64_t x37 = ((uint64_t)x35 & 0x7ffffffffffff); 413 { uint128_t x38 = (x36 + x31); 414 { uint64_t x39 = (uint64_t) (x38 >> 0x33); 415 { uint64_t x40 = ((uint64_t)x38 & 0x7ffffffffffff); 416 { uint128_t x41 = (x39 + x32); 417 { uint64_t x42 = (uint64_t) (x41 >> 0x33); 418 { uint64_t x43 = ((uint64_t)x41 & 0x7ffffffffffff); 419 { uint128_t x44 = (x42 + x24); 420 { uint64_t x45 = (uint64_t) (x44 >> 0x33); 421 { uint64_t x46 = ((uint64_t)x44 & 0x7ffffffffffff); 422 { uint64_t x47 = (x34 + (0x13 * x45)); 423 { uint64_t x48 = (x47 >> 0x33); 424 { uint64_t x49 = (x47 & 0x7ffffffffffff); 425 { uint64_t x50 = (x48 + x37); 426 { uint64_t x51 = (x50 >> 0x33); 427 { uint64_t x52 = (x50 & 0x7ffffffffffff); 428 out[0] = x49; 429 out[1] = x52; 430 out[2] = (x51 + x40); 431 out[3] = x43; 432 out[4] = x46; 433 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 434 assert_fe(out); 435 } 436 437 static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) { 438 fe_mul_impl(h->v, f->v, g->v); 439 } 440 441 static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) { 442 fe_mul_impl(h->v, f->v, g->v); 443 } 444 445 static void fe_mul_ttt(fe *h, const fe *f, const fe *g) { 446 fe_mul_impl(h->v, f->v, g->v); 447 } 448 449 static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) { 450 fe_mul_impl(h->v, f->v, g->v); 451 } 452 453 static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) { 454 fe_mul_impl(h->v, f->v, g->v); 455 } 456 457 static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) { 458 fe_mul_impl(h->v, f->v, g->v); 459 } 460 461 static void fe_sqr_impl(uint64_t out[5], const uint64_t in1[5]) { 462 assert_fe_loose(in1); 463 { const uint64_t x7 = in1[4]; 464 { const uint64_t x8 = in1[3]; 465 { const uint64_t x6 = in1[2]; 466 { const uint64_t x4 = in1[1]; 467 { const uint64_t x2 = in1[0]; 468 { uint64_t x9 = (x2 * 0x2); 469 { uint64_t x10 = (x4 * 0x2); 470 { uint64_t x11 = ((x6 * 0x2) * 0x13); 471 { uint64_t x12 = (x7 * 0x13); 472 { uint64_t x13 = (x12 * 0x2); 473 { uint128_t x14 = ((((uint128_t)x2 * x2) + ((uint128_t)x13 * x4)) + ((uint128_t)x11 * x8)); 474 { uint128_t x15 = ((((uint128_t)x9 * x4) + ((uint128_t)x13 * x6)) + ((uint128_t)x8 * (x8 * 0x13))); 475 { uint128_t x16 = ((((uint128_t)x9 * x6) + ((uint128_t)x4 * x4)) + ((uint128_t)x13 * x8)); 476 { uint128_t x17 = ((((uint128_t)x9 * x8) + ((uint128_t)x10 * x6)) + ((uint128_t)x7 * x12)); 477 { uint128_t x18 = ((((uint128_t)x9 * x7) + ((uint128_t)x10 * x8)) + ((uint128_t)x6 * x6)); 478 { uint64_t x19 = (uint64_t) (x14 >> 0x33); 479 { uint64_t x20 = ((uint64_t)x14 & 0x7ffffffffffff); 480 { uint128_t x21 = (x19 + x15); 481 { uint64_t x22 = (uint64_t) (x21 >> 0x33); 482 { uint64_t x23 = ((uint64_t)x21 & 0x7ffffffffffff); 483 { uint128_t x24 = (x22 + x16); 484 { uint64_t x25 = (uint64_t) (x24 >> 0x33); 485 { uint64_t x26 = ((uint64_t)x24 & 0x7ffffffffffff); 486 { uint128_t x27 = (x25 + x17); 487 { uint64_t x28 = (uint64_t) (x27 >> 0x33); 488 { uint64_t x29 = ((uint64_t)x27 & 0x7ffffffffffff); 489 { uint128_t x30 = (x28 + x18); 490 { uint64_t x31 = (uint64_t) (x30 >> 0x33); 491 { uint64_t x32 = ((uint64_t)x30 & 0x7ffffffffffff); 492 { uint64_t x33 = (x20 + (0x13 * x31)); 493 { uint64_t x34 = (x33 >> 0x33); 494 { uint64_t x35 = (x33 & 0x7ffffffffffff); 495 { uint64_t x36 = (x34 + x23); 496 { uint64_t x37 = (x36 >> 0x33); 497 { uint64_t x38 = (x36 & 0x7ffffffffffff); 498 out[0] = x35; 499 out[1] = x38; 500 out[2] = (x37 + x26); 501 out[3] = x29; 502 out[4] = x32; 503 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 504 assert_fe(out); 505 } 506 507 static void fe_sq_tl(fe *h, const fe_loose *f) { 508 fe_sqr_impl(h->v, f->v); 509 } 510 511 static void fe_sq_tt(fe *h, const fe *f) { 512 fe_sqr_impl(h->v, f->v); 513 } 514 515 // Replace (f,g) with (g,f) if b == 1; 516 // replace (f,g) with (f,g) if b == 0. 517 // 518 // Preconditions: b in {0,1}. 519 static void fe_cswap(fe *f, fe *g, uint64_t b) { 520 b = 0-b; 521 for (unsigned i = 0; i < 5; i++) { 522 uint64_t x = f->v[i] ^ g->v[i]; 523 x &= b; 524 f->v[i] ^= x; 525 g->v[i] ^= x; 526 } 527 } 528 529 // NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.. 530 static void fe_mul_121666_impl(uint64_t out[5], const uint64_t in1[5]) { 531 assert_fe_loose(in1); 532 { const uint64_t x10 = in1[4]; 533 { const uint64_t x11 = in1[3]; 534 { const uint64_t x9 = in1[2]; 535 { const uint64_t x7 = in1[1]; 536 { const uint64_t x5 = in1[0]; 537 { const uint64_t x18 = 0; 538 { const uint64_t x19 = 0; 539 { const uint64_t x17 = 0; 540 { const uint64_t x15 = 0; 541 { const uint64_t x13 = 121666; 542 { uint128_t x20 = ((uint128_t)x5 * x13); 543 { uint128_t x21 = (((uint128_t)x5 * x15) + ((uint128_t)x7 * x13)); 544 { uint128_t x22 = ((((uint128_t)x5 * x17) + ((uint128_t)x9 * x13)) + ((uint128_t)x7 * x15)); 545 { uint128_t x23 = (((((uint128_t)x5 * x19) + ((uint128_t)x11 * x13)) + ((uint128_t)x7 * x17)) + ((uint128_t)x9 * x15)); 546 { uint128_t x24 = ((((((uint128_t)x5 * x18) + ((uint128_t)x10 * x13)) + ((uint128_t)x11 * x15)) + ((uint128_t)x7 * x19)) + ((uint128_t)x9 * x17)); 547 { uint64_t x25 = (x10 * 0x13); 548 { uint64_t x26 = (x7 * 0x13); 549 { uint64_t x27 = (x9 * 0x13); 550 { uint64_t x28 = (x11 * 0x13); 551 { uint128_t x29 = ((((x20 + ((uint128_t)x25 * x15)) + ((uint128_t)x26 * x18)) + ((uint128_t)x27 * x19)) + ((uint128_t)x28 * x17)); 552 { uint128_t x30 = (((x21 + ((uint128_t)x25 * x17)) + ((uint128_t)x27 * x18)) + ((uint128_t)x28 * x19)); 553 { uint128_t x31 = ((x22 + ((uint128_t)x25 * x19)) + ((uint128_t)x28 * x18)); 554 { uint128_t x32 = (x23 + ((uint128_t)x25 * x18)); 555 { uint64_t x33 = (uint64_t) (x29 >> 0x33); 556 { uint64_t x34 = ((uint64_t)x29 & 0x7ffffffffffff); 557 { uint128_t x35 = (x33 + x30); 558 { uint64_t x36 = (uint64_t) (x35 >> 0x33); 559 { uint64_t x37 = ((uint64_t)x35 & 0x7ffffffffffff); 560 { uint128_t x38 = (x36 + x31); 561 { uint64_t x39 = (uint64_t) (x38 >> 0x33); 562 { uint64_t x40 = ((uint64_t)x38 & 0x7ffffffffffff); 563 { uint128_t x41 = (x39 + x32); 564 { uint64_t x42 = (uint64_t) (x41 >> 0x33); 565 { uint64_t x43 = ((uint64_t)x41 & 0x7ffffffffffff); 566 { uint128_t x44 = (x42 + x24); 567 { uint64_t x45 = (uint64_t) (x44 >> 0x33); 568 { uint64_t x46 = ((uint64_t)x44 & 0x7ffffffffffff); 569 { uint64_t x47 = (x34 + (0x13 * x45)); 570 { uint64_t x48 = (x47 >> 0x33); 571 { uint64_t x49 = (x47 & 0x7ffffffffffff); 572 { uint64_t x50 = (x48 + x37); 573 { uint64_t x51 = (x50 >> 0x33); 574 { uint64_t x52 = (x50 & 0x7ffffffffffff); 575 out[0] = x49; 576 out[1] = x52; 577 out[2] = (x51 + x40); 578 out[3] = x43; 579 out[4] = x46; 580 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 581 assert_fe(out); 582 } 583 584 static void fe_mul121666(fe *h, const fe_loose *f) { 585 assert_fe_loose(f->v); 586 fe_mul_121666_impl(h->v, f->v); 587 assert_fe(h->v); 588 } 589 590 // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0. 591 static void fe_neg_impl(uint64_t out[5], const uint64_t in2[5]) { 592 { const uint64_t x10 = 0; 593 { const uint64_t x11 = 0; 594 { const uint64_t x9 = 0; 595 { const uint64_t x7 = 0; 596 { const uint64_t x5 = 0; 597 { const uint64_t x18 = in2[4]; 598 { const uint64_t x19 = in2[3]; 599 { const uint64_t x17 = in2[2]; 600 { const uint64_t x15 = in2[1]; 601 { const uint64_t x13 = in2[0]; 602 out[0] = ((0xfffffffffffda + x5) - x13); 603 out[1] = ((0xffffffffffffe + x7) - x15); 604 out[2] = ((0xffffffffffffe + x9) - x17); 605 out[3] = ((0xffffffffffffe + x11) - x19); 606 out[4] = ((0xffffffffffffe + x10) - x18); 607 }}}}}}}}}} 608 } 609 610 // h = -f 611 static void fe_neg(fe_loose *h, const fe *f) { 612 assert_fe(f->v); 613 fe_neg_impl(h->v, f->v); 614 assert_fe_loose(h->v); 615 } 616 617 // Replace (f,g) with (g,g) if b == 1; 618 // replace (f,g) with (f,g) if b == 0. 619 // 620 // Preconditions: b in {0,1}. 621 static void fe_cmov(fe_loose *f, const fe_loose *g, uint64_t b) { 622 b = 0-b; 623 for (unsigned i = 0; i < 5; i++) { 624 uint64_t x = f->v[i] ^ g->v[i]; 625 x &= b; 626 f->v[i] ^= x; 627 } 628 } 629 630 #else 631 632 #define assert_fe(f) do { \ 633 for (unsigned _assert_fe_i = 0; _assert_fe_i< 10; _assert_fe_i++) { \ 634 assert(f[_assert_fe_i] < 1.125*(1<<(26-(_assert_fe_i&1)))); \ 635 } \ 636 } while (0) 637 638 #define assert_fe_loose(f) do { \ 639 for (unsigned _assert_fe_i = 0; _assert_fe_i< 10; _assert_fe_i++) { \ 640 assert(f[_assert_fe_i] < 3.375*(1<<(26-(_assert_fe_i&1)))); \ 641 } \ 642 } while (0) 643 644 #define assert_fe_frozen(f) do { \ 645 for (unsigned _assert_fe_i = 0; _assert_fe_i< 10; _assert_fe_i++) { \ 646 assert(f[_assert_fe_i] < (1u<<(26-(_assert_fe_i&1)))); \ 647 } \ 648 } while (0) 649 650 static void fe_frombytes_impl(uint32_t h[10], const uint8_t *s) { 651 // Ignores top bit of s. 652 uint32_t a0 = load_4(s); 653 uint32_t a1 = load_4(s+4); 654 uint32_t a2 = load_4(s+8); 655 uint32_t a3 = load_4(s+12); 656 uint32_t a4 = load_4(s+16); 657 uint32_t a5 = load_4(s+20); 658 uint32_t a6 = load_4(s+24); 659 uint32_t a7 = load_4(s+28); 660 h[0] = a0&((1<<26)-1); // 26 used, 32-26 left. 26 661 h[1] = (a0>>26) | ((a1&((1<<19)-1))<< 6); // (32-26) + 19 = 6+19 = 25 662 h[2] = (a1>>19) | ((a2&((1<<13)-1))<<13); // (32-19) + 13 = 13+13 = 26 663 h[3] = (a2>>13) | ((a3&((1<< 6)-1))<<19); // (32-13) + 6 = 19+ 6 = 25 664 h[4] = (a3>> 6); // (32- 6) = 26 665 h[5] = a4&((1<<25)-1); // 25 666 h[6] = (a4>>25) | ((a5&((1<<19)-1))<< 7); // (32-25) + 19 = 7+19 = 26 667 h[7] = (a5>>19) | ((a6&((1<<12)-1))<<13); // (32-19) + 12 = 13+12 = 25 668 h[8] = (a6>>12) | ((a7&((1<< 6)-1))<<20); // (32-12) + 6 = 20+ 6 = 26 669 h[9] = (a7>> 6)&((1<<25)-1); // 25 670 assert_fe(h); 671 } 672 673 static void fe_frombytes(fe *h, const uint8_t *s) { 674 fe_frombytes_impl(h->v, s); 675 } 676 677 static void fe_freeze(uint32_t out[10], const uint32_t in1[10]) { 678 { const uint32_t x17 = in1[9]; 679 { const uint32_t x18 = in1[8]; 680 { const uint32_t x16 = in1[7]; 681 { const uint32_t x14 = in1[6]; 682 { const uint32_t x12 = in1[5]; 683 { const uint32_t x10 = in1[4]; 684 { const uint32_t x8 = in1[3]; 685 { const uint32_t x6 = in1[2]; 686 { const uint32_t x4 = in1[1]; 687 { const uint32_t x2 = in1[0]; 688 { uint32_t x20; uint8_t/*bool*/ x21 = subborrow_u26(0x0, x2, 0x3ffffed, &x20); 689 { uint32_t x23; uint8_t/*bool*/ x24 = subborrow_u25(x21, x4, 0x1ffffff, &x23); 690 { uint32_t x26; uint8_t/*bool*/ x27 = subborrow_u26(x24, x6, 0x3ffffff, &x26); 691 { uint32_t x29; uint8_t/*bool*/ x30 = subborrow_u25(x27, x8, 0x1ffffff, &x29); 692 { uint32_t x32; uint8_t/*bool*/ x33 = subborrow_u26(x30, x10, 0x3ffffff, &x32); 693 { uint32_t x35; uint8_t/*bool*/ x36 = subborrow_u25(x33, x12, 0x1ffffff, &x35); 694 { uint32_t x38; uint8_t/*bool*/ x39 = subborrow_u26(x36, x14, 0x3ffffff, &x38); 695 { uint32_t x41; uint8_t/*bool*/ x42 = subborrow_u25(x39, x16, 0x1ffffff, &x41); 696 { uint32_t x44; uint8_t/*bool*/ x45 = subborrow_u26(x42, x18, 0x3ffffff, &x44); 697 { uint32_t x47; uint8_t/*bool*/ x48 = subborrow_u25(x45, x17, 0x1ffffff, &x47); 698 { uint32_t x49 = cmovznz32(x48, 0x0, 0xffffffff); 699 { uint32_t x50 = (x49 & 0x3ffffed); 700 { uint32_t x52; uint8_t/*bool*/ x53 = addcarryx_u26(0x0, x20, x50, &x52); 701 { uint32_t x54 = (x49 & 0x1ffffff); 702 { uint32_t x56; uint8_t/*bool*/ x57 = addcarryx_u25(x53, x23, x54, &x56); 703 { uint32_t x58 = (x49 & 0x3ffffff); 704 { uint32_t x60; uint8_t/*bool*/ x61 = addcarryx_u26(x57, x26, x58, &x60); 705 { uint32_t x62 = (x49 & 0x1ffffff); 706 { uint32_t x64; uint8_t/*bool*/ x65 = addcarryx_u25(x61, x29, x62, &x64); 707 { uint32_t x66 = (x49 & 0x3ffffff); 708 { uint32_t x68; uint8_t/*bool*/ x69 = addcarryx_u26(x65, x32, x66, &x68); 709 { uint32_t x70 = (x49 & 0x1ffffff); 710 { uint32_t x72; uint8_t/*bool*/ x73 = addcarryx_u25(x69, x35, x70, &x72); 711 { uint32_t x74 = (x49 & 0x3ffffff); 712 { uint32_t x76; uint8_t/*bool*/ x77 = addcarryx_u26(x73, x38, x74, &x76); 713 { uint32_t x78 = (x49 & 0x1ffffff); 714 { uint32_t x80; uint8_t/*bool*/ x81 = addcarryx_u25(x77, x41, x78, &x80); 715 { uint32_t x82 = (x49 & 0x3ffffff); 716 { uint32_t x84; uint8_t/*bool*/ x85 = addcarryx_u26(x81, x44, x82, &x84); 717 { uint32_t x86 = (x49 & 0x1ffffff); 718 { uint32_t x88; addcarryx_u25(x85, x47, x86, &x88); 719 out[0] = x52; 720 out[1] = x56; 721 out[2] = x60; 722 out[3] = x64; 723 out[4] = x68; 724 out[5] = x72; 725 out[6] = x76; 726 out[7] = x80; 727 out[8] = x84; 728 out[9] = x88; 729 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 730 } 731 732 static void fe_tobytes(uint8_t s[32], const fe *f) { 733 assert_fe(f->v); 734 uint32_t h[10]; 735 fe_freeze(h, f->v); 736 assert_fe_frozen(h); 737 738 s[0] = h[0] >> 0; 739 s[1] = h[0] >> 8; 740 s[2] = h[0] >> 16; 741 s[3] = (h[0] >> 24) | (h[1] << 2); 742 s[4] = h[1] >> 6; 743 s[5] = h[1] >> 14; 744 s[6] = (h[1] >> 22) | (h[2] << 3); 745 s[7] = h[2] >> 5; 746 s[8] = h[2] >> 13; 747 s[9] = (h[2] >> 21) | (h[3] << 5); 748 s[10] = h[3] >> 3; 749 s[11] = h[3] >> 11; 750 s[12] = (h[3] >> 19) | (h[4] << 6); 751 s[13] = h[4] >> 2; 752 s[14] = h[4] >> 10; 753 s[15] = h[4] >> 18; 754 s[16] = h[5] >> 0; 755 s[17] = h[5] >> 8; 756 s[18] = h[5] >> 16; 757 s[19] = (h[5] >> 24) | (h[6] << 1); 758 s[20] = h[6] >> 7; 759 s[21] = h[6] >> 15; 760 s[22] = (h[6] >> 23) | (h[7] << 3); 761 s[23] = h[7] >> 5; 762 s[24] = h[7] >> 13; 763 s[25] = (h[7] >> 21) | (h[8] << 4); 764 s[26] = h[8] >> 4; 765 s[27] = h[8] >> 12; 766 s[28] = (h[8] >> 20) | (h[9] << 6); 767 s[29] = h[9] >> 2; 768 s[30] = h[9] >> 10; 769 s[31] = h[9] >> 18; 770 } 771 772 // h = 0 773 static void fe_0(fe *h) { 774 OPENSSL_memset(h, 0, sizeof(fe)); 775 } 776 777 static void fe_loose_0(fe_loose *h) { 778 OPENSSL_memset(h, 0, sizeof(fe_loose)); 779 } 780 781 // h = 1 782 static void fe_1(fe *h) { 783 OPENSSL_memset(h, 0, sizeof(fe)); 784 h->v[0] = 1; 785 } 786 787 static void fe_loose_1(fe_loose *h) { 788 OPENSSL_memset(h, 0, sizeof(fe_loose)); 789 h->v[0] = 1; 790 } 791 792 static void fe_add_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { 793 { const uint32_t x20 = in1[9]; 794 { const uint32_t x21 = in1[8]; 795 { const uint32_t x19 = in1[7]; 796 { const uint32_t x17 = in1[6]; 797 { const uint32_t x15 = in1[5]; 798 { const uint32_t x13 = in1[4]; 799 { const uint32_t x11 = in1[3]; 800 { const uint32_t x9 = in1[2]; 801 { const uint32_t x7 = in1[1]; 802 { const uint32_t x5 = in1[0]; 803 { const uint32_t x38 = in2[9]; 804 { const uint32_t x39 = in2[8]; 805 { const uint32_t x37 = in2[7]; 806 { const uint32_t x35 = in2[6]; 807 { const uint32_t x33 = in2[5]; 808 { const uint32_t x31 = in2[4]; 809 { const uint32_t x29 = in2[3]; 810 { const uint32_t x27 = in2[2]; 811 { const uint32_t x25 = in2[1]; 812 { const uint32_t x23 = in2[0]; 813 out[0] = (x5 + x23); 814 out[1] = (x7 + x25); 815 out[2] = (x9 + x27); 816 out[3] = (x11 + x29); 817 out[4] = (x13 + x31); 818 out[5] = (x15 + x33); 819 out[6] = (x17 + x35); 820 out[7] = (x19 + x37); 821 out[8] = (x21 + x39); 822 out[9] = (x20 + x38); 823 }}}}}}}}}}}}}}}}}}}} 824 } 825 826 // h = f + g 827 // Can overlap h with f or g. 828 static void fe_add(fe_loose *h, const fe *f, const fe *g) { 829 assert_fe(f->v); 830 assert_fe(g->v); 831 fe_add_impl(h->v, f->v, g->v); 832 assert_fe_loose(h->v); 833 } 834 835 static void fe_sub_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { 836 { const uint32_t x20 = in1[9]; 837 { const uint32_t x21 = in1[8]; 838 { const uint32_t x19 = in1[7]; 839 { const uint32_t x17 = in1[6]; 840 { const uint32_t x15 = in1[5]; 841 { const uint32_t x13 = in1[4]; 842 { const uint32_t x11 = in1[3]; 843 { const uint32_t x9 = in1[2]; 844 { const uint32_t x7 = in1[1]; 845 { const uint32_t x5 = in1[0]; 846 { const uint32_t x38 = in2[9]; 847 { const uint32_t x39 = in2[8]; 848 { const uint32_t x37 = in2[7]; 849 { const uint32_t x35 = in2[6]; 850 { const uint32_t x33 = in2[5]; 851 { const uint32_t x31 = in2[4]; 852 { const uint32_t x29 = in2[3]; 853 { const uint32_t x27 = in2[2]; 854 { const uint32_t x25 = in2[1]; 855 { const uint32_t x23 = in2[0]; 856 out[0] = ((0x7ffffda + x5) - x23); 857 out[1] = ((0x3fffffe + x7) - x25); 858 out[2] = ((0x7fffffe + x9) - x27); 859 out[3] = ((0x3fffffe + x11) - x29); 860 out[4] = ((0x7fffffe + x13) - x31); 861 out[5] = ((0x3fffffe + x15) - x33); 862 out[6] = ((0x7fffffe + x17) - x35); 863 out[7] = ((0x3fffffe + x19) - x37); 864 out[8] = ((0x7fffffe + x21) - x39); 865 out[9] = ((0x3fffffe + x20) - x38); 866 }}}}}}}}}}}}}}}}}}}} 867 } 868 869 // h = f - g 870 // Can overlap h with f or g. 871 static void fe_sub(fe_loose *h, const fe *f, const fe *g) { 872 assert_fe(f->v); 873 assert_fe(g->v); 874 fe_sub_impl(h->v, f->v, g->v); 875 assert_fe_loose(h->v); 876 } 877 878 static void fe_carry_impl(uint32_t out[10], const uint32_t in1[10]) { 879 { const uint32_t x17 = in1[9]; 880 { const uint32_t x18 = in1[8]; 881 { const uint32_t x16 = in1[7]; 882 { const uint32_t x14 = in1[6]; 883 { const uint32_t x12 = in1[5]; 884 { const uint32_t x10 = in1[4]; 885 { const uint32_t x8 = in1[3]; 886 { const uint32_t x6 = in1[2]; 887 { const uint32_t x4 = in1[1]; 888 { const uint32_t x2 = in1[0]; 889 { uint32_t x19 = (x2 >> 0x1a); 890 { uint32_t x20 = (x2 & 0x3ffffff); 891 { uint32_t x21 = (x19 + x4); 892 { uint32_t x22 = (x21 >> 0x19); 893 { uint32_t x23 = (x21 & 0x1ffffff); 894 { uint32_t x24 = (x22 + x6); 895 { uint32_t x25 = (x24 >> 0x1a); 896 { uint32_t x26 = (x24 & 0x3ffffff); 897 { uint32_t x27 = (x25 + x8); 898 { uint32_t x28 = (x27 >> 0x19); 899 { uint32_t x29 = (x27 & 0x1ffffff); 900 { uint32_t x30 = (x28 + x10); 901 { uint32_t x31 = (x30 >> 0x1a); 902 { uint32_t x32 = (x30 & 0x3ffffff); 903 { uint32_t x33 = (x31 + x12); 904 { uint32_t x34 = (x33 >> 0x19); 905 { uint32_t x35 = (x33 & 0x1ffffff); 906 { uint32_t x36 = (x34 + x14); 907 { uint32_t x37 = (x36 >> 0x1a); 908 { uint32_t x38 = (x36 & 0x3ffffff); 909 { uint32_t x39 = (x37 + x16); 910 { uint32_t x40 = (x39 >> 0x19); 911 { uint32_t x41 = (x39 & 0x1ffffff); 912 { uint32_t x42 = (x40 + x18); 913 { uint32_t x43 = (x42 >> 0x1a); 914 { uint32_t x44 = (x42 & 0x3ffffff); 915 { uint32_t x45 = (x43 + x17); 916 { uint32_t x46 = (x45 >> 0x19); 917 { uint32_t x47 = (x45 & 0x1ffffff); 918 { uint32_t x48 = (x20 + (0x13 * x46)); 919 { uint32_t x49 = (x48 >> 0x1a); 920 { uint32_t x50 = (x48 & 0x3ffffff); 921 { uint32_t x51 = (x49 + x23); 922 { uint32_t x52 = (x51 >> 0x19); 923 { uint32_t x53 = (x51 & 0x1ffffff); 924 out[0] = x50; 925 out[1] = x53; 926 out[2] = (x52 + x26); 927 out[3] = x29; 928 out[4] = x32; 929 out[5] = x35; 930 out[6] = x38; 931 out[7] = x41; 932 out[8] = x44; 933 out[9] = x47; 934 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 935 } 936 937 static void fe_carry(fe *h, const fe_loose* f) { 938 assert_fe_loose(f->v); 939 fe_carry_impl(h->v, f->v); 940 assert_fe(h->v); 941 } 942 943 static void fe_mul_impl(uint32_t out[10], const uint32_t in1[10], const uint32_t in2[10]) { 944 assert_fe_loose(in1); 945 assert_fe_loose(in2); 946 { const uint32_t x20 = in1[9]; 947 { const uint32_t x21 = in1[8]; 948 { const uint32_t x19 = in1[7]; 949 { const uint32_t x17 = in1[6]; 950 { const uint32_t x15 = in1[5]; 951 { const uint32_t x13 = in1[4]; 952 { const uint32_t x11 = in1[3]; 953 { const uint32_t x9 = in1[2]; 954 { const uint32_t x7 = in1[1]; 955 { const uint32_t x5 = in1[0]; 956 { const uint32_t x38 = in2[9]; 957 { const uint32_t x39 = in2[8]; 958 { const uint32_t x37 = in2[7]; 959 { const uint32_t x35 = in2[6]; 960 { const uint32_t x33 = in2[5]; 961 { const uint32_t x31 = in2[4]; 962 { const uint32_t x29 = in2[3]; 963 { const uint32_t x27 = in2[2]; 964 { const uint32_t x25 = in2[1]; 965 { const uint32_t x23 = in2[0]; 966 { uint64_t x40 = ((uint64_t)x23 * x5); 967 { uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5)); 968 { uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5)); 969 { uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5)); 970 { uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5)); 971 { uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5)); 972 { uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5)); 973 { uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5)); 974 { uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5)); 975 { uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5)); 976 { uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9)); 977 { uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9)); 978 { uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13)); 979 { uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13)); 980 { uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17)); 981 { uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17)); 982 { uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19)))); 983 { uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21)); 984 { uint64_t x58 = ((uint64_t)(0x2 * x38) * x20); 985 { uint64_t x59 = (x48 + (x58 << 0x4)); 986 { uint64_t x60 = (x59 + (x58 << 0x1)); 987 { uint64_t x61 = (x60 + x58); 988 { uint64_t x62 = (x47 + (x57 << 0x4)); 989 { uint64_t x63 = (x62 + (x57 << 0x1)); 990 { uint64_t x64 = (x63 + x57); 991 { uint64_t x65 = (x46 + (x56 << 0x4)); 992 { uint64_t x66 = (x65 + (x56 << 0x1)); 993 { uint64_t x67 = (x66 + x56); 994 { uint64_t x68 = (x45 + (x55 << 0x4)); 995 { uint64_t x69 = (x68 + (x55 << 0x1)); 996 { uint64_t x70 = (x69 + x55); 997 { uint64_t x71 = (x44 + (x54 << 0x4)); 998 { uint64_t x72 = (x71 + (x54 << 0x1)); 999 { uint64_t x73 = (x72 + x54); 1000 { uint64_t x74 = (x43 + (x53 << 0x4)); 1001 { uint64_t x75 = (x74 + (x53 << 0x1)); 1002 { uint64_t x76 = (x75 + x53); 1003 { uint64_t x77 = (x42 + (x52 << 0x4)); 1004 { uint64_t x78 = (x77 + (x52 << 0x1)); 1005 { uint64_t x79 = (x78 + x52); 1006 { uint64_t x80 = (x41 + (x51 << 0x4)); 1007 { uint64_t x81 = (x80 + (x51 << 0x1)); 1008 { uint64_t x82 = (x81 + x51); 1009 { uint64_t x83 = (x40 + (x50 << 0x4)); 1010 { uint64_t x84 = (x83 + (x50 << 0x1)); 1011 { uint64_t x85 = (x84 + x50); 1012 { uint64_t x86 = (x85 >> 0x1a); 1013 { uint32_t x87 = ((uint32_t)x85 & 0x3ffffff); 1014 { uint64_t x88 = (x86 + x82); 1015 { uint64_t x89 = (x88 >> 0x19); 1016 { uint32_t x90 = ((uint32_t)x88 & 0x1ffffff); 1017 { uint64_t x91 = (x89 + x79); 1018 { uint64_t x92 = (x91 >> 0x1a); 1019 { uint32_t x93 = ((uint32_t)x91 & 0x3ffffff); 1020 { uint64_t x94 = (x92 + x76); 1021 { uint64_t x95 = (x94 >> 0x19); 1022 { uint32_t x96 = ((uint32_t)x94 & 0x1ffffff); 1023 { uint64_t x97 = (x95 + x73); 1024 { uint64_t x98 = (x97 >> 0x1a); 1025 { uint32_t x99 = ((uint32_t)x97 & 0x3ffffff); 1026 { uint64_t x100 = (x98 + x70); 1027 { uint64_t x101 = (x100 >> 0x19); 1028 { uint32_t x102 = ((uint32_t)x100 & 0x1ffffff); 1029 { uint64_t x103 = (x101 + x67); 1030 { uint64_t x104 = (x103 >> 0x1a); 1031 { uint32_t x105 = ((uint32_t)x103 & 0x3ffffff); 1032 { uint64_t x106 = (x104 + x64); 1033 { uint64_t x107 = (x106 >> 0x19); 1034 { uint32_t x108 = ((uint32_t)x106 & 0x1ffffff); 1035 { uint64_t x109 = (x107 + x61); 1036 { uint64_t x110 = (x109 >> 0x1a); 1037 { uint32_t x111 = ((uint32_t)x109 & 0x3ffffff); 1038 { uint64_t x112 = (x110 + x49); 1039 { uint64_t x113 = (x112 >> 0x19); 1040 { uint32_t x114 = ((uint32_t)x112 & 0x1ffffff); 1041 { uint64_t x115 = (x87 + (0x13 * x113)); 1042 { uint32_t x116 = (uint32_t) (x115 >> 0x1a); 1043 { uint32_t x117 = ((uint32_t)x115 & 0x3ffffff); 1044 { uint32_t x118 = (x116 + x90); 1045 { uint32_t x119 = (x118 >> 0x19); 1046 { uint32_t x120 = (x118 & 0x1ffffff); 1047 out[0] = x117; 1048 out[1] = x120; 1049 out[2] = (x119 + x93); 1050 out[3] = x96; 1051 out[4] = x99; 1052 out[5] = x102; 1053 out[6] = x105; 1054 out[7] = x108; 1055 out[8] = x111; 1056 out[9] = x114; 1057 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 1058 assert_fe(out); 1059 } 1060 1061 static void fe_mul_ltt(fe_loose *h, const fe *f, const fe *g) { 1062 fe_mul_impl(h->v, f->v, g->v); 1063 } 1064 1065 static void fe_mul_llt(fe_loose *h, const fe_loose *f, const fe *g) { 1066 fe_mul_impl(h->v, f->v, g->v); 1067 } 1068 1069 static void fe_mul_ttt(fe *h, const fe *f, const fe *g) { 1070 fe_mul_impl(h->v, f->v, g->v); 1071 } 1072 1073 static void fe_mul_tlt(fe *h, const fe_loose *f, const fe *g) { 1074 fe_mul_impl(h->v, f->v, g->v); 1075 } 1076 1077 static void fe_mul_ttl(fe *h, const fe *f, const fe_loose *g) { 1078 fe_mul_impl(h->v, f->v, g->v); 1079 } 1080 1081 static void fe_mul_tll(fe *h, const fe_loose *f, const fe_loose *g) { 1082 fe_mul_impl(h->v, f->v, g->v); 1083 } 1084 1085 static void fe_sqr_impl(uint32_t out[10], const uint32_t in1[10]) { 1086 assert_fe_loose(in1); 1087 { const uint32_t x17 = in1[9]; 1088 { const uint32_t x18 = in1[8]; 1089 { const uint32_t x16 = in1[7]; 1090 { const uint32_t x14 = in1[6]; 1091 { const uint32_t x12 = in1[5]; 1092 { const uint32_t x10 = in1[4]; 1093 { const uint32_t x8 = in1[3]; 1094 { const uint32_t x6 = in1[2]; 1095 { const uint32_t x4 = in1[1]; 1096 { const uint32_t x2 = in1[0]; 1097 { uint64_t x19 = ((uint64_t)x2 * x2); 1098 { uint64_t x20 = ((uint64_t)(0x2 * x2) * x4); 1099 { uint64_t x21 = (0x2 * (((uint64_t)x4 * x4) + ((uint64_t)x2 * x6))); 1100 { uint64_t x22 = (0x2 * (((uint64_t)x4 * x6) + ((uint64_t)x2 * x8))); 1101 { uint64_t x23 = ((((uint64_t)x6 * x6) + ((uint64_t)(0x4 * x4) * x8)) + ((uint64_t)(0x2 * x2) * x10)); 1102 { uint64_t x24 = (0x2 * ((((uint64_t)x6 * x8) + ((uint64_t)x4 * x10)) + ((uint64_t)x2 * x12))); 1103 { uint64_t x25 = (0x2 * (((((uint64_t)x8 * x8) + ((uint64_t)x6 * x10)) + ((uint64_t)x2 * x14)) + ((uint64_t)(0x2 * x4) * x12))); 1104 { uint64_t x26 = (0x2 * (((((uint64_t)x8 * x10) + ((uint64_t)x6 * x12)) + ((uint64_t)x4 * x14)) + ((uint64_t)x2 * x16))); 1105 { uint64_t x27 = (((uint64_t)x10 * x10) + (0x2 * ((((uint64_t)x6 * x14) + ((uint64_t)x2 * x18)) + (0x2 * (((uint64_t)x4 * x16) + ((uint64_t)x8 * x12)))))); 1106 { uint64_t x28 = (0x2 * ((((((uint64_t)x10 * x12) + ((uint64_t)x8 * x14)) + ((uint64_t)x6 * x16)) + ((uint64_t)x4 * x18)) + ((uint64_t)x2 * x17))); 1107 { uint64_t x29 = (0x2 * (((((uint64_t)x12 * x12) + ((uint64_t)x10 * x14)) + ((uint64_t)x6 * x18)) + (0x2 * (((uint64_t)x8 * x16) + ((uint64_t)x4 * x17))))); 1108 { uint64_t x30 = (0x2 * (((((uint64_t)x12 * x14) + ((uint64_t)x10 * x16)) + ((uint64_t)x8 * x18)) + ((uint64_t)x6 * x17))); 1109 { uint64_t x31 = (((uint64_t)x14 * x14) + (0x2 * (((uint64_t)x10 * x18) + (0x2 * (((uint64_t)x12 * x16) + ((uint64_t)x8 * x17)))))); 1110 { uint64_t x32 = (0x2 * ((((uint64_t)x14 * x16) + ((uint64_t)x12 * x18)) + ((uint64_t)x10 * x17))); 1111 { uint64_t x33 = (0x2 * ((((uint64_t)x16 * x16) + ((uint64_t)x14 * x18)) + ((uint64_t)(0x2 * x12) * x17))); 1112 { uint64_t x34 = (0x2 * (((uint64_t)x16 * x18) + ((uint64_t)x14 * x17))); 1113 { uint64_t x35 = (((uint64_t)x18 * x18) + ((uint64_t)(0x4 * x16) * x17)); 1114 { uint64_t x36 = ((uint64_t)(0x2 * x18) * x17); 1115 { uint64_t x37 = ((uint64_t)(0x2 * x17) * x17); 1116 { uint64_t x38 = (x27 + (x37 << 0x4)); 1117 { uint64_t x39 = (x38 + (x37 << 0x1)); 1118 { uint64_t x40 = (x39 + x37); 1119 { uint64_t x41 = (x26 + (x36 << 0x4)); 1120 { uint64_t x42 = (x41 + (x36 << 0x1)); 1121 { uint64_t x43 = (x42 + x36); 1122 { uint64_t x44 = (x25 + (x35 << 0x4)); 1123 { uint64_t x45 = (x44 + (x35 << 0x1)); 1124 { uint64_t x46 = (x45 + x35); 1125 { uint64_t x47 = (x24 + (x34 << 0x4)); 1126 { uint64_t x48 = (x47 + (x34 << 0x1)); 1127 { uint64_t x49 = (x48 + x34); 1128 { uint64_t x50 = (x23 + (x33 << 0x4)); 1129 { uint64_t x51 = (x50 + (x33 << 0x1)); 1130 { uint64_t x52 = (x51 + x33); 1131 { uint64_t x53 = (x22 + (x32 << 0x4)); 1132 { uint64_t x54 = (x53 + (x32 << 0x1)); 1133 { uint64_t x55 = (x54 + x32); 1134 { uint64_t x56 = (x21 + (x31 << 0x4)); 1135 { uint64_t x57 = (x56 + (x31 << 0x1)); 1136 { uint64_t x58 = (x57 + x31); 1137 { uint64_t x59 = (x20 + (x30 << 0x4)); 1138 { uint64_t x60 = (x59 + (x30 << 0x1)); 1139 { uint64_t x61 = (x60 + x30); 1140 { uint64_t x62 = (x19 + (x29 << 0x4)); 1141 { uint64_t x63 = (x62 + (x29 << 0x1)); 1142 { uint64_t x64 = (x63 + x29); 1143 { uint64_t x65 = (x64 >> 0x1a); 1144 { uint32_t x66 = ((uint32_t)x64 & 0x3ffffff); 1145 { uint64_t x67 = (x65 + x61); 1146 { uint64_t x68 = (x67 >> 0x19); 1147 { uint32_t x69 = ((uint32_t)x67 & 0x1ffffff); 1148 { uint64_t x70 = (x68 + x58); 1149 { uint64_t x71 = (x70 >> 0x1a); 1150 { uint32_t x72 = ((uint32_t)x70 & 0x3ffffff); 1151 { uint64_t x73 = (x71 + x55); 1152 { uint64_t x74 = (x73 >> 0x19); 1153 { uint32_t x75 = ((uint32_t)x73 & 0x1ffffff); 1154 { uint64_t x76 = (x74 + x52); 1155 { uint64_t x77 = (x76 >> 0x1a); 1156 { uint32_t x78 = ((uint32_t)x76 & 0x3ffffff); 1157 { uint64_t x79 = (x77 + x49); 1158 { uint64_t x80 = (x79 >> 0x19); 1159 { uint32_t x81 = ((uint32_t)x79 & 0x1ffffff); 1160 { uint64_t x82 = (x80 + x46); 1161 { uint64_t x83 = (x82 >> 0x1a); 1162 { uint32_t x84 = ((uint32_t)x82 & 0x3ffffff); 1163 { uint64_t x85 = (x83 + x43); 1164 { uint64_t x86 = (x85 >> 0x19); 1165 { uint32_t x87 = ((uint32_t)x85 & 0x1ffffff); 1166 { uint64_t x88 = (x86 + x40); 1167 { uint64_t x89 = (x88 >> 0x1a); 1168 { uint32_t x90 = ((uint32_t)x88 & 0x3ffffff); 1169 { uint64_t x91 = (x89 + x28); 1170 { uint64_t x92 = (x91 >> 0x19); 1171 { uint32_t x93 = ((uint32_t)x91 & 0x1ffffff); 1172 { uint64_t x94 = (x66 + (0x13 * x92)); 1173 { uint32_t x95 = (uint32_t) (x94 >> 0x1a); 1174 { uint32_t x96 = ((uint32_t)x94 & 0x3ffffff); 1175 { uint32_t x97 = (x95 + x69); 1176 { uint32_t x98 = (x97 >> 0x19); 1177 { uint32_t x99 = (x97 & 0x1ffffff); 1178 out[0] = x96; 1179 out[1] = x99; 1180 out[2] = (x98 + x72); 1181 out[3] = x75; 1182 out[4] = x78; 1183 out[5] = x81; 1184 out[6] = x84; 1185 out[7] = x87; 1186 out[8] = x90; 1187 out[9] = x93; 1188 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 1189 assert_fe(out); 1190 } 1191 1192 static void fe_sq_tl(fe *h, const fe_loose *f) { 1193 fe_sqr_impl(h->v, f->v); 1194 } 1195 1196 static void fe_sq_tt(fe *h, const fe *f) { 1197 fe_sqr_impl(h->v, f->v); 1198 } 1199 1200 // Replace (f,g) with (g,f) if b == 1; 1201 // replace (f,g) with (f,g) if b == 0. 1202 // 1203 // Preconditions: b in {0,1}. 1204 static void fe_cswap(fe *f, fe *g, unsigned int b) { 1205 b = 0-b; 1206 unsigned i; 1207 for (i = 0; i < 10; i++) { 1208 uint32_t x = f->v[i] ^ g->v[i]; 1209 x &= b; 1210 f->v[i] ^= x; 1211 g->v[i] ^= x; 1212 } 1213 } 1214 1215 // NOTE: based on fiat-crypto fe_mul, edited for in2=121666, 0, 0.. 1216 static void fe_mul_121666_impl(uint32_t out[10], const uint32_t in1[10]) { 1217 assert_fe_loose(in1); 1218 { const uint32_t x20 = in1[9]; 1219 { const uint32_t x21 = in1[8]; 1220 { const uint32_t x19 = in1[7]; 1221 { const uint32_t x17 = in1[6]; 1222 { const uint32_t x15 = in1[5]; 1223 { const uint32_t x13 = in1[4]; 1224 { const uint32_t x11 = in1[3]; 1225 { const uint32_t x9 = in1[2]; 1226 { const uint32_t x7 = in1[1]; 1227 { const uint32_t x5 = in1[0]; 1228 { const uint32_t x38 = 0; 1229 { const uint32_t x39 = 0; 1230 { const uint32_t x37 = 0; 1231 { const uint32_t x35 = 0; 1232 { const uint32_t x33 = 0; 1233 { const uint32_t x31 = 0; 1234 { const uint32_t x29 = 0; 1235 { const uint32_t x27 = 0; 1236 { const uint32_t x25 = 0; 1237 { const uint32_t x23 = 121666; 1238 { uint64_t x40 = ((uint64_t)x23 * x5); 1239 { uint64_t x41 = (((uint64_t)x23 * x7) + ((uint64_t)x25 * x5)); 1240 { uint64_t x42 = ((((uint64_t)(0x2 * x25) * x7) + ((uint64_t)x23 * x9)) + ((uint64_t)x27 * x5)); 1241 { uint64_t x43 = (((((uint64_t)x25 * x9) + ((uint64_t)x27 * x7)) + ((uint64_t)x23 * x11)) + ((uint64_t)x29 * x5)); 1242 { uint64_t x44 = (((((uint64_t)x27 * x9) + (0x2 * (((uint64_t)x25 * x11) + ((uint64_t)x29 * x7)))) + ((uint64_t)x23 * x13)) + ((uint64_t)x31 * x5)); 1243 { uint64_t x45 = (((((((uint64_t)x27 * x11) + ((uint64_t)x29 * x9)) + ((uint64_t)x25 * x13)) + ((uint64_t)x31 * x7)) + ((uint64_t)x23 * x15)) + ((uint64_t)x33 * x5)); 1244 { uint64_t x46 = (((((0x2 * ((((uint64_t)x29 * x11) + ((uint64_t)x25 * x15)) + ((uint64_t)x33 * x7))) + ((uint64_t)x27 * x13)) + ((uint64_t)x31 * x9)) + ((uint64_t)x23 * x17)) + ((uint64_t)x35 * x5)); 1245 { uint64_t x47 = (((((((((uint64_t)x29 * x13) + ((uint64_t)x31 * x11)) + ((uint64_t)x27 * x15)) + ((uint64_t)x33 * x9)) + ((uint64_t)x25 * x17)) + ((uint64_t)x35 * x7)) + ((uint64_t)x23 * x19)) + ((uint64_t)x37 * x5)); 1246 { uint64_t x48 = (((((((uint64_t)x31 * x13) + (0x2 * (((((uint64_t)x29 * x15) + ((uint64_t)x33 * x11)) + ((uint64_t)x25 * x19)) + ((uint64_t)x37 * x7)))) + ((uint64_t)x27 * x17)) + ((uint64_t)x35 * x9)) + ((uint64_t)x23 * x21)) + ((uint64_t)x39 * x5)); 1247 { uint64_t x49 = (((((((((((uint64_t)x31 * x15) + ((uint64_t)x33 * x13)) + ((uint64_t)x29 * x17)) + ((uint64_t)x35 * x11)) + ((uint64_t)x27 * x19)) + ((uint64_t)x37 * x9)) + ((uint64_t)x25 * x21)) + ((uint64_t)x39 * x7)) + ((uint64_t)x23 * x20)) + ((uint64_t)x38 * x5)); 1248 { uint64_t x50 = (((((0x2 * ((((((uint64_t)x33 * x15) + ((uint64_t)x29 * x19)) + ((uint64_t)x37 * x11)) + ((uint64_t)x25 * x20)) + ((uint64_t)x38 * x7))) + ((uint64_t)x31 * x17)) + ((uint64_t)x35 * x13)) + ((uint64_t)x27 * x21)) + ((uint64_t)x39 * x9)); 1249 { uint64_t x51 = (((((((((uint64_t)x33 * x17) + ((uint64_t)x35 * x15)) + ((uint64_t)x31 * x19)) + ((uint64_t)x37 * x13)) + ((uint64_t)x29 * x21)) + ((uint64_t)x39 * x11)) + ((uint64_t)x27 * x20)) + ((uint64_t)x38 * x9)); 1250 { uint64_t x52 = (((((uint64_t)x35 * x17) + (0x2 * (((((uint64_t)x33 * x19) + ((uint64_t)x37 * x15)) + ((uint64_t)x29 * x20)) + ((uint64_t)x38 * x11)))) + ((uint64_t)x31 * x21)) + ((uint64_t)x39 * x13)); 1251 { uint64_t x53 = (((((((uint64_t)x35 * x19) + ((uint64_t)x37 * x17)) + ((uint64_t)x33 * x21)) + ((uint64_t)x39 * x15)) + ((uint64_t)x31 * x20)) + ((uint64_t)x38 * x13)); 1252 { uint64_t x54 = (((0x2 * ((((uint64_t)x37 * x19) + ((uint64_t)x33 * x20)) + ((uint64_t)x38 * x15))) + ((uint64_t)x35 * x21)) + ((uint64_t)x39 * x17)); 1253 { uint64_t x55 = (((((uint64_t)x37 * x21) + ((uint64_t)x39 * x19)) + ((uint64_t)x35 * x20)) + ((uint64_t)x38 * x17)); 1254 { uint64_t x56 = (((uint64_t)x39 * x21) + (0x2 * (((uint64_t)x37 * x20) + ((uint64_t)x38 * x19)))); 1255 { uint64_t x57 = (((uint64_t)x39 * x20) + ((uint64_t)x38 * x21)); 1256 { uint64_t x58 = ((uint64_t)(0x2 * x38) * x20); 1257 { uint64_t x59 = (x48 + (x58 << 0x4)); 1258 { uint64_t x60 = (x59 + (x58 << 0x1)); 1259 { uint64_t x61 = (x60 + x58); 1260 { uint64_t x62 = (x47 + (x57 << 0x4)); 1261 { uint64_t x63 = (x62 + (x57 << 0x1)); 1262 { uint64_t x64 = (x63 + x57); 1263 { uint64_t x65 = (x46 + (x56 << 0x4)); 1264 { uint64_t x66 = (x65 + (x56 << 0x1)); 1265 { uint64_t x67 = (x66 + x56); 1266 { uint64_t x68 = (x45 + (x55 << 0x4)); 1267 { uint64_t x69 = (x68 + (x55 << 0x1)); 1268 { uint64_t x70 = (x69 + x55); 1269 { uint64_t x71 = (x44 + (x54 << 0x4)); 1270 { uint64_t x72 = (x71 + (x54 << 0x1)); 1271 { uint64_t x73 = (x72 + x54); 1272 { uint64_t x74 = (x43 + (x53 << 0x4)); 1273 { uint64_t x75 = (x74 + (x53 << 0x1)); 1274 { uint64_t x76 = (x75 + x53); 1275 { uint64_t x77 = (x42 + (x52 << 0x4)); 1276 { uint64_t x78 = (x77 + (x52 << 0x1)); 1277 { uint64_t x79 = (x78 + x52); 1278 { uint64_t x80 = (x41 + (x51 << 0x4)); 1279 { uint64_t x81 = (x80 + (x51 << 0x1)); 1280 { uint64_t x82 = (x81 + x51); 1281 { uint64_t x83 = (x40 + (x50 << 0x4)); 1282 { uint64_t x84 = (x83 + (x50 << 0x1)); 1283 { uint64_t x85 = (x84 + x50); 1284 { uint64_t x86 = (x85 >> 0x1a); 1285 { uint32_t x87 = ((uint32_t)x85 & 0x3ffffff); 1286 { uint64_t x88 = (x86 + x82); 1287 { uint64_t x89 = (x88 >> 0x19); 1288 { uint32_t x90 = ((uint32_t)x88 & 0x1ffffff); 1289 { uint64_t x91 = (x89 + x79); 1290 { uint64_t x92 = (x91 >> 0x1a); 1291 { uint32_t x93 = ((uint32_t)x91 & 0x3ffffff); 1292 { uint64_t x94 = (x92 + x76); 1293 { uint64_t x95 = (x94 >> 0x19); 1294 { uint32_t x96 = ((uint32_t)x94 & 0x1ffffff); 1295 { uint64_t x97 = (x95 + x73); 1296 { uint64_t x98 = (x97 >> 0x1a); 1297 { uint32_t x99 = ((uint32_t)x97 & 0x3ffffff); 1298 { uint64_t x100 = (x98 + x70); 1299 { uint64_t x101 = (x100 >> 0x19); 1300 { uint32_t x102 = ((uint32_t)x100 & 0x1ffffff); 1301 { uint64_t x103 = (x101 + x67); 1302 { uint64_t x104 = (x103 >> 0x1a); 1303 { uint32_t x105 = ((uint32_t)x103 & 0x3ffffff); 1304 { uint64_t x106 = (x104 + x64); 1305 { uint64_t x107 = (x106 >> 0x19); 1306 { uint32_t x108 = ((uint32_t)x106 & 0x1ffffff); 1307 { uint64_t x109 = (x107 + x61); 1308 { uint64_t x110 = (x109 >> 0x1a); 1309 { uint32_t x111 = ((uint32_t)x109 & 0x3ffffff); 1310 { uint64_t x112 = (x110 + x49); 1311 { uint64_t x113 = (x112 >> 0x19); 1312 { uint32_t x114 = ((uint32_t)x112 & 0x1ffffff); 1313 { uint64_t x115 = (x87 + (0x13 * x113)); 1314 { uint32_t x116 = (uint32_t) (x115 >> 0x1a); 1315 { uint32_t x117 = ((uint32_t)x115 & 0x3ffffff); 1316 { uint32_t x118 = (x116 + x90); 1317 { uint32_t x119 = (x118 >> 0x19); 1318 { uint32_t x120 = (x118 & 0x1ffffff); 1319 out[0] = x117; 1320 out[1] = x120; 1321 out[2] = (x119 + x93); 1322 out[3] = x96; 1323 out[4] = x99; 1324 out[5] = x102; 1325 out[6] = x105; 1326 out[7] = x108; 1327 out[8] = x111; 1328 out[9] = x114; 1329 }}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}}} 1330 assert_fe(out); 1331 } 1332 1333 static void fe_mul121666(fe *h, const fe_loose *f) { 1334 assert_fe_loose(f->v); 1335 fe_mul_121666_impl(h->v, f->v); 1336 assert_fe(h->v); 1337 } 1338 1339 // Adapted from Fiat-synthesized |fe_sub_impl| with |out| = 0. 1340 static void fe_neg_impl(uint32_t out[10], const uint32_t in2[10]) { 1341 { const uint32_t x20 = 0; 1342 { const uint32_t x21 = 0; 1343 { const uint32_t x19 = 0; 1344 { const uint32_t x17 = 0; 1345 { const uint32_t x15 = 0; 1346 { const uint32_t x13 = 0; 1347 { const uint32_t x11 = 0; 1348 { const uint32_t x9 = 0; 1349 { const uint32_t x7 = 0; 1350 { const uint32_t x5 = 0; 1351 { const uint32_t x38 = in2[9]; 1352 { const uint32_t x39 = in2[8]; 1353 { const uint32_t x37 = in2[7]; 1354 { const uint32_t x35 = in2[6]; 1355 { const uint32_t x33 = in2[5]; 1356 { const uint32_t x31 = in2[4]; 1357 { const uint32_t x29 = in2[3]; 1358 { const uint32_t x27 = in2[2]; 1359 { const uint32_t x25 = in2[1]; 1360 { const uint32_t x23 = in2[0]; 1361 out[0] = ((0x7ffffda + x5) - x23); 1362 out[1] = ((0x3fffffe + x7) - x25); 1363 out[2] = ((0x7fffffe + x9) - x27); 1364 out[3] = ((0x3fffffe + x11) - x29); 1365 out[4] = ((0x7fffffe + x13) - x31); 1366 out[5] = ((0x3fffffe + x15) - x33); 1367 out[6] = ((0x7fffffe + x17) - x35); 1368 out[7] = ((0x3fffffe + x19) - x37); 1369 out[8] = ((0x7fffffe + x21) - x39); 1370 out[9] = ((0x3fffffe + x20) - x38); 1371 }}}}}}}}}}}}}}}}}}}} 1372 } 1373 1374 // h = -f 1375 static void fe_neg(fe_loose *h, const fe *f) { 1376 assert_fe(f->v); 1377 fe_neg_impl(h->v, f->v); 1378 assert_fe_loose(h->v); 1379 } 1380 1381 // Replace (f,g) with (g,g) if b == 1; 1382 // replace (f,g) with (f,g) if b == 0. 1383 // 1384 // Preconditions: b in {0,1}. 1385 static void fe_cmov(fe_loose *f, const fe_loose *g, unsigned b) { 1386 b = 0-b; 1387 unsigned i; 1388 for (i = 0; i < 10; i++) { 1389 uint32_t x = f->v[i] ^ g->v[i]; 1390 x &= b; 1391 f->v[i] ^= x; 1392 } 1393 } 1394 1395 #endif // BORINGSSL_CURVE25519_64BIT 1396 1397 // h = f 1398 static void fe_copy(fe *h, const fe *f) { 1399 OPENSSL_memmove(h, f, sizeof(fe)); 1400 } 1401 1402 static void fe_copy_lt(fe_loose *h, const fe *f) { 1403 OPENSSL_COMPILE_ASSERT(sizeof(fe_loose) == sizeof(fe), 1404 fe_and_fe_loose_mismatch); 1405 OPENSSL_memmove(h, f, sizeof(fe)); 1406 } 1407 #if !defined(OPENSSL_SMALL) 1408 static void fe_copy_ll(fe_loose *h, const fe_loose *f) { 1409 OPENSSL_memmove(h, f, sizeof(fe_loose)); 1410 } 1411 #endif // !defined(OPENSSL_SMALL) 1412 1413 static void fe_loose_invert(fe *out, const fe_loose *z) { 1414 fe t0; 1415 fe t1; 1416 fe t2; 1417 fe t3; 1418 int i; 1419 1420 fe_sq_tl(&t0, z); 1421 fe_sq_tt(&t1, &t0); 1422 for (i = 1; i < 2; ++i) { 1423 fe_sq_tt(&t1, &t1); 1424 } 1425 fe_mul_tlt(&t1, z, &t1); 1426 fe_mul_ttt(&t0, &t0, &t1); 1427 fe_sq_tt(&t2, &t0); 1428 fe_mul_ttt(&t1, &t1, &t2); 1429 fe_sq_tt(&t2, &t1); 1430 for (i = 1; i < 5; ++i) { 1431 fe_sq_tt(&t2, &t2); 1432 } 1433 fe_mul_ttt(&t1, &t2, &t1); 1434 fe_sq_tt(&t2, &t1); 1435 for (i = 1; i < 10; ++i) { 1436 fe_sq_tt(&t2, &t2); 1437 } 1438 fe_mul_ttt(&t2, &t2, &t1); 1439 fe_sq_tt(&t3, &t2); 1440 for (i = 1; i < 20; ++i) { 1441 fe_sq_tt(&t3, &t3); 1442 } 1443 fe_mul_ttt(&t2, &t3, &t2); 1444 fe_sq_tt(&t2, &t2); 1445 for (i = 1; i < 10; ++i) { 1446 fe_sq_tt(&t2, &t2); 1447 } 1448 fe_mul_ttt(&t1, &t2, &t1); 1449 fe_sq_tt(&t2, &t1); 1450 for (i = 1; i < 50; ++i) { 1451 fe_sq_tt(&t2, &t2); 1452 } 1453 fe_mul_ttt(&t2, &t2, &t1); 1454 fe_sq_tt(&t3, &t2); 1455 for (i = 1; i < 100; ++i) { 1456 fe_sq_tt(&t3, &t3); 1457 } 1458 fe_mul_ttt(&t2, &t3, &t2); 1459 fe_sq_tt(&t2, &t2); 1460 for (i = 1; i < 50; ++i) { 1461 fe_sq_tt(&t2, &t2); 1462 } 1463 fe_mul_ttt(&t1, &t2, &t1); 1464 fe_sq_tt(&t1, &t1); 1465 for (i = 1; i < 5; ++i) { 1466 fe_sq_tt(&t1, &t1); 1467 } 1468 fe_mul_ttt(out, &t1, &t0); 1469 } 1470 1471 static void fe_invert(fe *out, const fe *z) { 1472 fe_loose l; 1473 fe_copy_lt(&l, z); 1474 fe_loose_invert(out, &l); 1475 } 1476 1477 // return 0 if f == 0 1478 // return 1 if f != 0 1479 static int fe_isnonzero(const fe_loose *f) { 1480 fe tight; 1481 fe_carry(&tight, f); 1482 uint8_t s[32]; 1483 fe_tobytes(s, &tight); 1484 1485 static const uint8_t zero[32] = {0}; 1486 return CRYPTO_memcmp(s, zero, sizeof(zero)) != 0; 1487 } 1488 1489 // return 1 if f is in {1,3,5,...,q-2} 1490 // return 0 if f is in {0,2,4,...,q-1} 1491 static int fe_isnegative(const fe *f) { 1492 uint8_t s[32]; 1493 fe_tobytes(s, f); 1494 return s[0] & 1; 1495 } 1496 1497 static void fe_sq2_tt(fe *h, const fe *f) { 1498 // h = f^2 1499 fe_sq_tt(h, f); 1500 1501 // h = h + h 1502 fe_loose tmp; 1503 fe_add(&tmp, h, h); 1504 fe_carry(h, &tmp); 1505 } 1506 1507 static void fe_pow22523(fe *out, const fe *z) { 1508 fe t0; 1509 fe t1; 1510 fe t2; 1511 int i; 1512 1513 fe_sq_tt(&t0, z); 1514 fe_sq_tt(&t1, &t0); 1515 for (i = 1; i < 2; ++i) { 1516 fe_sq_tt(&t1, &t1); 1517 } 1518 fe_mul_ttt(&t1, z, &t1); 1519 fe_mul_ttt(&t0, &t0, &t1); 1520 fe_sq_tt(&t0, &t0); 1521 fe_mul_ttt(&t0, &t1, &t0); 1522 fe_sq_tt(&t1, &t0); 1523 for (i = 1; i < 5; ++i) { 1524 fe_sq_tt(&t1, &t1); 1525 } 1526 fe_mul_ttt(&t0, &t1, &t0); 1527 fe_sq_tt(&t1, &t0); 1528 for (i = 1; i < 10; ++i) { 1529 fe_sq_tt(&t1, &t1); 1530 } 1531 fe_mul_ttt(&t1, &t1, &t0); 1532 fe_sq_tt(&t2, &t1); 1533 for (i = 1; i < 20; ++i) { 1534 fe_sq_tt(&t2, &t2); 1535 } 1536 fe_mul_ttt(&t1, &t2, &t1); 1537 fe_sq_tt(&t1, &t1); 1538 for (i = 1; i < 10; ++i) { 1539 fe_sq_tt(&t1, &t1); 1540 } 1541 fe_mul_ttt(&t0, &t1, &t0); 1542 fe_sq_tt(&t1, &t0); 1543 for (i = 1; i < 50; ++i) { 1544 fe_sq_tt(&t1, &t1); 1545 } 1546 fe_mul_ttt(&t1, &t1, &t0); 1547 fe_sq_tt(&t2, &t1); 1548 for (i = 1; i < 100; ++i) { 1549 fe_sq_tt(&t2, &t2); 1550 } 1551 fe_mul_ttt(&t1, &t2, &t1); 1552 fe_sq_tt(&t1, &t1); 1553 for (i = 1; i < 50; ++i) { 1554 fe_sq_tt(&t1, &t1); 1555 } 1556 fe_mul_ttt(&t0, &t1, &t0); 1557 fe_sq_tt(&t0, &t0); 1558 for (i = 1; i < 2; ++i) { 1559 fe_sq_tt(&t0, &t0); 1560 } 1561 fe_mul_ttt(out, &t0, z); 1562 } 1563 1564 1565 // Group operations. 1566 1567 void x25519_ge_tobytes(uint8_t s[32], const ge_p2 *h) { 1568 fe recip; 1569 fe x; 1570 fe y; 1571 1572 fe_invert(&recip, &h->Z); 1573 fe_mul_ttt(&x, &h->X, &recip); 1574 fe_mul_ttt(&y, &h->Y, &recip); 1575 fe_tobytes(s, &y); 1576 s[31] ^= fe_isnegative(&x) << 7; 1577 } 1578 1579 static void ge_p3_tobytes(uint8_t s[32], const ge_p3 *h) { 1580 fe recip; 1581 fe x; 1582 fe y; 1583 1584 fe_invert(&recip, &h->Z); 1585 fe_mul_ttt(&x, &h->X, &recip); 1586 fe_mul_ttt(&y, &h->Y, &recip); 1587 fe_tobytes(s, &y); 1588 s[31] ^= fe_isnegative(&x) << 7; 1589 } 1590 1591 int x25519_ge_frombytes_vartime(ge_p3 *h, const uint8_t *s) { 1592 fe u; 1593 fe_loose v; 1594 fe v3; 1595 fe vxx; 1596 fe_loose check; 1597 1598 fe_frombytes(&h->Y, s); 1599 fe_1(&h->Z); 1600 fe_sq_tt(&v3, &h->Y); 1601 fe_mul_ttt(&vxx, &v3, &d); 1602 fe_sub(&v, &v3, &h->Z); // u = y^2-1 1603 fe_carry(&u, &v); 1604 fe_add(&v, &vxx, &h->Z); // v = dy^2+1 1605 1606 fe_sq_tl(&v3, &v); 1607 fe_mul_ttl(&v3, &v3, &v); // v3 = v^3 1608 fe_sq_tt(&h->X, &v3); 1609 fe_mul_ttl(&h->X, &h->X, &v); 1610 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^7 1611 1612 fe_pow22523(&h->X, &h->X); // x = (uv^7)^((q-5)/8) 1613 fe_mul_ttt(&h->X, &h->X, &v3); 1614 fe_mul_ttt(&h->X, &h->X, &u); // x = uv^3(uv^7)^((q-5)/8) 1615 1616 fe_sq_tt(&vxx, &h->X); 1617 fe_mul_ttl(&vxx, &vxx, &v); 1618 fe_sub(&check, &vxx, &u); 1619 if (fe_isnonzero(&check)) { 1620 fe_add(&check, &vxx, &u); 1621 if (fe_isnonzero(&check)) { 1622 return -1; 1623 } 1624 fe_mul_ttt(&h->X, &h->X, &sqrtm1); 1625 } 1626 1627 if (fe_isnegative(&h->X) != (s[31] >> 7)) { 1628 fe_loose t; 1629 fe_neg(&t, &h->X); 1630 fe_carry(&h->X, &t); 1631 } 1632 1633 fe_mul_ttt(&h->T, &h->X, &h->Y); 1634 return 0; 1635 } 1636 1637 static void ge_p2_0(ge_p2 *h) { 1638 fe_0(&h->X); 1639 fe_1(&h->Y); 1640 fe_1(&h->Z); 1641 } 1642 1643 static void ge_p3_0(ge_p3 *h) { 1644 fe_0(&h->X); 1645 fe_1(&h->Y); 1646 fe_1(&h->Z); 1647 fe_0(&h->T); 1648 } 1649 1650 static void ge_cached_0(ge_cached *h) { 1651 fe_loose_1(&h->YplusX); 1652 fe_loose_1(&h->YminusX); 1653 fe_loose_1(&h->Z); 1654 fe_loose_0(&h->T2d); 1655 } 1656 1657 static void ge_precomp_0(ge_precomp *h) { 1658 fe_loose_1(&h->yplusx); 1659 fe_loose_1(&h->yminusx); 1660 fe_loose_0(&h->xy2d); 1661 } 1662 1663 // r = p 1664 static void ge_p3_to_p2(ge_p2 *r, const ge_p3 *p) { 1665 fe_copy(&r->X, &p->X); 1666 fe_copy(&r->Y, &p->Y); 1667 fe_copy(&r->Z, &p->Z); 1668 } 1669 1670 // r = p 1671 void x25519_ge_p3_to_cached(ge_cached *r, const ge_p3 *p) { 1672 fe_add(&r->YplusX, &p->Y, &p->X); 1673 fe_sub(&r->YminusX, &p->Y, &p->X); 1674 fe_copy_lt(&r->Z, &p->Z); 1675 fe_mul_ltt(&r->T2d, &p->T, &d2); 1676 } 1677 1678 // r = p 1679 void x25519_ge_p1p1_to_p2(ge_p2 *r, const ge_p1p1 *p) { 1680 fe_mul_tll(&r->X, &p->X, &p->T); 1681 fe_mul_tll(&r->Y, &p->Y, &p->Z); 1682 fe_mul_tll(&r->Z, &p->Z, &p->T); 1683 } 1684 1685 // r = p 1686 void x25519_ge_p1p1_to_p3(ge_p3 *r, const ge_p1p1 *p) { 1687 fe_mul_tll(&r->X, &p->X, &p->T); 1688 fe_mul_tll(&r->Y, &p->Y, &p->Z); 1689 fe_mul_tll(&r->Z, &p->Z, &p->T); 1690 fe_mul_tll(&r->T, &p->X, &p->Y); 1691 } 1692 1693 // r = p 1694 static void ge_p1p1_to_cached(ge_cached *r, const ge_p1p1 *p) { 1695 ge_p3 t; 1696 x25519_ge_p1p1_to_p3(&t, p); 1697 x25519_ge_p3_to_cached(r, &t); 1698 } 1699 1700 // r = 2 * p 1701 static void ge_p2_dbl(ge_p1p1 *r, const ge_p2 *p) { 1702 fe trX, trZ, trT; 1703 fe t0; 1704 1705 fe_sq_tt(&trX, &p->X); 1706 fe_sq_tt(&trZ, &p->Y); 1707 fe_sq2_tt(&trT, &p->Z); 1708 fe_add(&r->Y, &p->X, &p->Y); 1709 fe_sq_tl(&t0, &r->Y); 1710 1711 fe_add(&r->Y, &trZ, &trX); 1712 fe_sub(&r->Z, &trZ, &trX); 1713 fe_carry(&trZ, &r->Y); 1714 fe_sub(&r->X, &t0, &trZ); 1715 fe_carry(&trZ, &r->Z); 1716 fe_sub(&r->T, &trT, &trZ); 1717 } 1718 1719 // r = 2 * p 1720 static void ge_p3_dbl(ge_p1p1 *r, const ge_p3 *p) { 1721 ge_p2 q; 1722 ge_p3_to_p2(&q, p); 1723 ge_p2_dbl(r, &q); 1724 } 1725 1726 // r = p + q 1727 static void ge_madd(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 1728 fe trY, trZ, trT; 1729 1730 fe_add(&r->X, &p->Y, &p->X); 1731 fe_sub(&r->Y, &p->Y, &p->X); 1732 fe_mul_tll(&trZ, &r->X, &q->yplusx); 1733 fe_mul_tll(&trY, &r->Y, &q->yminusx); 1734 fe_mul_tlt(&trT, &q->xy2d, &p->T); 1735 fe_add(&r->T, &p->Z, &p->Z); 1736 fe_sub(&r->X, &trZ, &trY); 1737 fe_add(&r->Y, &trZ, &trY); 1738 fe_carry(&trZ, &r->T); 1739 fe_add(&r->Z, &trZ, &trT); 1740 fe_sub(&r->T, &trZ, &trT); 1741 } 1742 1743 // r = p - q 1744 static void ge_msub(ge_p1p1 *r, const ge_p3 *p, const ge_precomp *q) { 1745 fe trY, trZ, trT; 1746 1747 fe_add(&r->X, &p->Y, &p->X); 1748 fe_sub(&r->Y, &p->Y, &p->X); 1749 fe_mul_tll(&trZ, &r->X, &q->yminusx); 1750 fe_mul_tll(&trY, &r->Y, &q->yplusx); 1751 fe_mul_tlt(&trT, &q->xy2d, &p->T); 1752 fe_add(&r->T, &p->Z, &p->Z); 1753 fe_sub(&r->X, &trZ, &trY); 1754 fe_add(&r->Y, &trZ, &trY); 1755 fe_carry(&trZ, &r->T); 1756 fe_sub(&r->Z, &trZ, &trT); 1757 fe_add(&r->T, &trZ, &trT); 1758 } 1759 1760 // r = p + q 1761 void x25519_ge_add(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 1762 fe trX, trY, trZ, trT; 1763 1764 fe_add(&r->X, &p->Y, &p->X); 1765 fe_sub(&r->Y, &p->Y, &p->X); 1766 fe_mul_tll(&trZ, &r->X, &q->YplusX); 1767 fe_mul_tll(&trY, &r->Y, &q->YminusX); 1768 fe_mul_tlt(&trT, &q->T2d, &p->T); 1769 fe_mul_ttl(&trX, &p->Z, &q->Z); 1770 fe_add(&r->T, &trX, &trX); 1771 fe_sub(&r->X, &trZ, &trY); 1772 fe_add(&r->Y, &trZ, &trY); 1773 fe_carry(&trZ, &r->T); 1774 fe_add(&r->Z, &trZ, &trT); 1775 fe_sub(&r->T, &trZ, &trT); 1776 } 1777 1778 // r = p - q 1779 void x25519_ge_sub(ge_p1p1 *r, const ge_p3 *p, const ge_cached *q) { 1780 fe trX, trY, trZ, trT; 1781 1782 fe_add(&r->X, &p->Y, &p->X); 1783 fe_sub(&r->Y, &p->Y, &p->X); 1784 fe_mul_tll(&trZ, &r->X, &q->YminusX); 1785 fe_mul_tll(&trY, &r->Y, &q->YplusX); 1786 fe_mul_tlt(&trT, &q->T2d, &p->T); 1787 fe_mul_ttl(&trX, &p->Z, &q->Z); 1788 fe_add(&r->T, &trX, &trX); 1789 fe_sub(&r->X, &trZ, &trY); 1790 fe_add(&r->Y, &trZ, &trY); 1791 fe_carry(&trZ, &r->T); 1792 fe_sub(&r->Z, &trZ, &trT); 1793 fe_add(&r->T, &trZ, &trT); 1794 } 1795 1796 static uint8_t equal(signed char b, signed char c) { 1797 uint8_t ub = b; 1798 uint8_t uc = c; 1799 uint8_t x = ub ^ uc; // 0: yes; 1..255: no 1800 uint32_t y = x; // 0: yes; 1..255: no 1801 y -= 1; // 4294967295: yes; 0..254: no 1802 y >>= 31; // 1: yes; 0: no 1803 return y; 1804 } 1805 1806 static void cmov(ge_precomp *t, const ge_precomp *u, uint8_t b) { 1807 fe_cmov(&t->yplusx, &u->yplusx, b); 1808 fe_cmov(&t->yminusx, &u->yminusx, b); 1809 fe_cmov(&t->xy2d, &u->xy2d, b); 1810 } 1811 1812 void x25519_ge_scalarmult_small_precomp( 1813 ge_p3 *h, const uint8_t a[32], const uint8_t precomp_table[15 * 2 * 32]) { 1814 // precomp_table is first expanded into matching |ge_precomp| 1815 // elements. 1816 ge_precomp multiples[15]; 1817 1818 unsigned i; 1819 for (i = 0; i < 15; i++) { 1820 const uint8_t *bytes = &precomp_table[i*(2 * 32)]; 1821 fe x, y; 1822 fe_frombytes(&x, bytes); 1823 fe_frombytes(&y, bytes + 32); 1824 1825 ge_precomp *out = &multiples[i]; 1826 fe_add(&out->yplusx, &y, &x); 1827 fe_sub(&out->yminusx, &y, &x); 1828 fe_mul_ltt(&out->xy2d, &x, &y); 1829 fe_mul_llt(&out->xy2d, &out->xy2d, &d2); 1830 } 1831 1832 // See the comment above |k25519SmallPrecomp| about the structure of the 1833 // precomputed elements. This loop does 64 additions and 64 doublings to 1834 // calculate the result. 1835 ge_p3_0(h); 1836 1837 for (i = 63; i < 64; i--) { 1838 unsigned j; 1839 signed char index = 0; 1840 1841 for (j = 0; j < 4; j++) { 1842 const uint8_t bit = 1 & (a[(8 * j) + (i / 8)] >> (i & 7)); 1843 index |= (bit << j); 1844 } 1845 1846 ge_precomp e; 1847 ge_precomp_0(&e); 1848 1849 for (j = 1; j < 16; j++) { 1850 cmov(&e, &multiples[j-1], equal(index, j)); 1851 } 1852 1853 ge_cached cached; 1854 ge_p1p1 r; 1855 x25519_ge_p3_to_cached(&cached, h); 1856 x25519_ge_add(&r, h, &cached); 1857 x25519_ge_p1p1_to_p3(h, &r); 1858 1859 ge_madd(&r, h, &e); 1860 x25519_ge_p1p1_to_p3(h, &r); 1861 } 1862 } 1863 1864 #if defined(OPENSSL_SMALL) 1865 1866 void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t a[32]) { 1867 x25519_ge_scalarmult_small_precomp(h, a, k25519SmallPrecomp); 1868 } 1869 1870 #else 1871 1872 static uint8_t negative(signed char b) { 1873 uint32_t x = b; 1874 x >>= 31; // 1: yes; 0: no 1875 return x; 1876 } 1877 1878 static void table_select(ge_precomp *t, int pos, signed char b) { 1879 ge_precomp minust; 1880 uint8_t bnegative = negative(b); 1881 uint8_t babs = b - ((uint8_t)((-bnegative) & b) << 1); 1882 1883 ge_precomp_0(t); 1884 cmov(t, &k25519Precomp[pos][0], equal(babs, 1)); 1885 cmov(t, &k25519Precomp[pos][1], equal(babs, 2)); 1886 cmov(t, &k25519Precomp[pos][2], equal(babs, 3)); 1887 cmov(t, &k25519Precomp[pos][3], equal(babs, 4)); 1888 cmov(t, &k25519Precomp[pos][4], equal(babs, 5)); 1889 cmov(t, &k25519Precomp[pos][5], equal(babs, 6)); 1890 cmov(t, &k25519Precomp[pos][6], equal(babs, 7)); 1891 cmov(t, &k25519Precomp[pos][7], equal(babs, 8)); 1892 fe_copy_ll(&minust.yplusx, &t->yminusx); 1893 fe_copy_ll(&minust.yminusx, &t->yplusx); 1894 1895 // NOTE: the input table is canonical, but types don't encode it 1896 fe tmp; 1897 fe_carry(&tmp, &t->xy2d); 1898 fe_neg(&minust.xy2d, &tmp); 1899 1900 cmov(t, &minust, bnegative); 1901 } 1902 1903 // h = a * B 1904 // where a = a[0]+256*a[1]+...+256^31 a[31] 1905 // B is the Ed25519 base point (x,4/5) with x positive. 1906 // 1907 // Preconditions: 1908 // a[31] <= 127 1909 void x25519_ge_scalarmult_base(ge_p3 *h, const uint8_t *a) { 1910 signed char e[64]; 1911 signed char carry; 1912 ge_p1p1 r; 1913 ge_p2 s; 1914 ge_precomp t; 1915 int i; 1916 1917 for (i = 0; i < 32; ++i) { 1918 e[2 * i + 0] = (a[i] >> 0) & 15; 1919 e[2 * i + 1] = (a[i] >> 4) & 15; 1920 } 1921 // each e[i] is between 0 and 15 1922 // e[63] is between 0 and 7 1923 1924 carry = 0; 1925 for (i = 0; i < 63; ++i) { 1926 e[i] += carry; 1927 carry = e[i] + 8; 1928 carry >>= 4; 1929 e[i] -= carry << 4; 1930 } 1931 e[63] += carry; 1932 // each e[i] is between -8 and 8 1933 1934 ge_p3_0(h); 1935 for (i = 1; i < 64; i += 2) { 1936 table_select(&t, i / 2, e[i]); 1937 ge_madd(&r, h, &t); 1938 x25519_ge_p1p1_to_p3(h, &r); 1939 } 1940 1941 ge_p3_dbl(&r, h); 1942 x25519_ge_p1p1_to_p2(&s, &r); 1943 ge_p2_dbl(&r, &s); 1944 x25519_ge_p1p1_to_p2(&s, &r); 1945 ge_p2_dbl(&r, &s); 1946 x25519_ge_p1p1_to_p2(&s, &r); 1947 ge_p2_dbl(&r, &s); 1948 x25519_ge_p1p1_to_p3(h, &r); 1949 1950 for (i = 0; i < 64; i += 2) { 1951 table_select(&t, i / 2, e[i]); 1952 ge_madd(&r, h, &t); 1953 x25519_ge_p1p1_to_p3(h, &r); 1954 } 1955 } 1956 1957 #endif 1958 1959 static void cmov_cached(ge_cached *t, ge_cached *u, uint8_t b) { 1960 fe_cmov(&t->YplusX, &u->YplusX, b); 1961 fe_cmov(&t->YminusX, &u->YminusX, b); 1962 fe_cmov(&t->Z, &u->Z, b); 1963 fe_cmov(&t->T2d, &u->T2d, b); 1964 } 1965 1966 // r = scalar * A. 1967 // where a = a[0]+256*a[1]+...+256^31 a[31]. 1968 void x25519_ge_scalarmult(ge_p2 *r, const uint8_t *scalar, const ge_p3 *A) { 1969 ge_p2 Ai_p2[8]; 1970 ge_cached Ai[16]; 1971 ge_p1p1 t; 1972 1973 ge_cached_0(&Ai[0]); 1974 x25519_ge_p3_to_cached(&Ai[1], A); 1975 ge_p3_to_p2(&Ai_p2[1], A); 1976 1977 unsigned i; 1978 for (i = 2; i < 16; i += 2) { 1979 ge_p2_dbl(&t, &Ai_p2[i / 2]); 1980 ge_p1p1_to_cached(&Ai[i], &t); 1981 if (i < 8) { 1982 x25519_ge_p1p1_to_p2(&Ai_p2[i], &t); 1983 } 1984 x25519_ge_add(&t, A, &Ai[i]); 1985 ge_p1p1_to_cached(&Ai[i + 1], &t); 1986 if (i < 7) { 1987 x25519_ge_p1p1_to_p2(&Ai_p2[i + 1], &t); 1988 } 1989 } 1990 1991 ge_p2_0(r); 1992 ge_p3 u; 1993 1994 for (i = 0; i < 256; i += 4) { 1995 ge_p2_dbl(&t, r); 1996 x25519_ge_p1p1_to_p2(r, &t); 1997 ge_p2_dbl(&t, r); 1998 x25519_ge_p1p1_to_p2(r, &t); 1999 ge_p2_dbl(&t, r); 2000 x25519_ge_p1p1_to_p2(r, &t); 2001 ge_p2_dbl(&t, r); 2002 x25519_ge_p1p1_to_p3(&u, &t); 2003 2004 uint8_t index = scalar[31 - i/8]; 2005 index >>= 4 - (i & 4); 2006 index &= 0xf; 2007 2008 unsigned j; 2009 ge_cached selected; 2010 ge_cached_0(&selected); 2011 for (j = 0; j < 16; j++) { 2012 cmov_cached(&selected, &Ai[j], equal(j, index)); 2013 } 2014 2015 x25519_ge_add(&t, &u, &selected); 2016 x25519_ge_p1p1_to_p2(r, &t); 2017 } 2018 } 2019 2020 static void slide(signed char *r, const uint8_t *a) { 2021 int i; 2022 int b; 2023 int k; 2024 2025 for (i = 0; i < 256; ++i) { 2026 r[i] = 1 & (a[i >> 3] >> (i & 7)); 2027 } 2028 2029 for (i = 0; i < 256; ++i) { 2030 if (r[i]) { 2031 for (b = 1; b <= 6 && i + b < 256; ++b) { 2032 if (r[i + b]) { 2033 if (r[i] + (r[i + b] << b) <= 15) { 2034 r[i] += r[i + b] << b; 2035 r[i + b] = 0; 2036 } else if (r[i] - (r[i + b] << b) >= -15) { 2037 r[i] -= r[i + b] << b; 2038 for (k = i + b; k < 256; ++k) { 2039 if (!r[k]) { 2040 r[k] = 1; 2041 break; 2042 } 2043 r[k] = 0; 2044 } 2045 } else { 2046 break; 2047 } 2048 } 2049 } 2050 } 2051 } 2052 } 2053 2054 // r = a * A + b * B 2055 // where a = a[0]+256*a[1]+...+256^31 a[31]. 2056 // and b = b[0]+256*b[1]+...+256^31 b[31]. 2057 // B is the Ed25519 base point (x,4/5) with x positive. 2058 static void ge_double_scalarmult_vartime(ge_p2 *r, const uint8_t *a, 2059 const ge_p3 *A, const uint8_t *b) { 2060 signed char aslide[256]; 2061 signed char bslide[256]; 2062 ge_cached Ai[8]; // A,3A,5A,7A,9A,11A,13A,15A 2063 ge_p1p1 t; 2064 ge_p3 u; 2065 ge_p3 A2; 2066 int i; 2067 2068 slide(aslide, a); 2069 slide(bslide, b); 2070 2071 x25519_ge_p3_to_cached(&Ai[0], A); 2072 ge_p3_dbl(&t, A); 2073 x25519_ge_p1p1_to_p3(&A2, &t); 2074 x25519_ge_add(&t, &A2, &Ai[0]); 2075 x25519_ge_p1p1_to_p3(&u, &t); 2076 x25519_ge_p3_to_cached(&Ai[1], &u); 2077 x25519_ge_add(&t, &A2, &Ai[1]); 2078 x25519_ge_p1p1_to_p3(&u, &t); 2079 x25519_ge_p3_to_cached(&Ai[2], &u); 2080 x25519_ge_add(&t, &A2, &Ai[2]); 2081 x25519_ge_p1p1_to_p3(&u, &t); 2082 x25519_ge_p3_to_cached(&Ai[3], &u); 2083 x25519_ge_add(&t, &A2, &Ai[3]); 2084 x25519_ge_p1p1_to_p3(&u, &t); 2085 x25519_ge_p3_to_cached(&Ai[4], &u); 2086 x25519_ge_add(&t, &A2, &Ai[4]); 2087 x25519_ge_p1p1_to_p3(&u, &t); 2088 x25519_ge_p3_to_cached(&Ai[5], &u); 2089 x25519_ge_add(&t, &A2, &Ai[5]); 2090 x25519_ge_p1p1_to_p3(&u, &t); 2091 x25519_ge_p3_to_cached(&Ai[6], &u); 2092 x25519_ge_add(&t, &A2, &Ai[6]); 2093 x25519_ge_p1p1_to_p3(&u, &t); 2094 x25519_ge_p3_to_cached(&Ai[7], &u); 2095 2096 ge_p2_0(r); 2097 2098 for (i = 255; i >= 0; --i) { 2099 if (aslide[i] || bslide[i]) { 2100 break; 2101 } 2102 } 2103 2104 for (; i >= 0; --i) { 2105 ge_p2_dbl(&t, r); 2106 2107 if (aslide[i] > 0) { 2108 x25519_ge_p1p1_to_p3(&u, &t); 2109 x25519_ge_add(&t, &u, &Ai[aslide[i] / 2]); 2110 } else if (aslide[i] < 0) { 2111 x25519_ge_p1p1_to_p3(&u, &t); 2112 x25519_ge_sub(&t, &u, &Ai[(-aslide[i]) / 2]); 2113 } 2114 2115 if (bslide[i] > 0) { 2116 x25519_ge_p1p1_to_p3(&u, &t); 2117 ge_madd(&t, &u, &Bi[bslide[i] / 2]); 2118 } else if (bslide[i] < 0) { 2119 x25519_ge_p1p1_to_p3(&u, &t); 2120 ge_msub(&t, &u, &Bi[(-bslide[i]) / 2]); 2121 } 2122 2123 x25519_ge_p1p1_to_p2(r, &t); 2124 } 2125 } 2126 2127 // The set of scalars is \Z/l 2128 // where l = 2^252 + 27742317777372353535851937790883648493. 2129 2130 // Input: 2131 // s[0]+256*s[1]+...+256^63*s[63] = s 2132 // 2133 // Output: 2134 // s[0]+256*s[1]+...+256^31*s[31] = s mod l 2135 // where l = 2^252 + 27742317777372353535851937790883648493. 2136 // Overwrites s in place. 2137 void x25519_sc_reduce(uint8_t s[64]) { 2138 int64_t s0 = 2097151 & load_3(s); 2139 int64_t s1 = 2097151 & (load_4(s + 2) >> 5); 2140 int64_t s2 = 2097151 & (load_3(s + 5) >> 2); 2141 int64_t s3 = 2097151 & (load_4(s + 7) >> 7); 2142 int64_t s4 = 2097151 & (load_4(s + 10) >> 4); 2143 int64_t s5 = 2097151 & (load_3(s + 13) >> 1); 2144 int64_t s6 = 2097151 & (load_4(s + 15) >> 6); 2145 int64_t s7 = 2097151 & (load_3(s + 18) >> 3); 2146 int64_t s8 = 2097151 & load_3(s + 21); 2147 int64_t s9 = 2097151 & (load_4(s + 23) >> 5); 2148 int64_t s10 = 2097151 & (load_3(s + 26) >> 2); 2149 int64_t s11 = 2097151 & (load_4(s + 28) >> 7); 2150 int64_t s12 = 2097151 & (load_4(s + 31) >> 4); 2151 int64_t s13 = 2097151 & (load_3(s + 34) >> 1); 2152 int64_t s14 = 2097151 & (load_4(s + 36) >> 6); 2153 int64_t s15 = 2097151 & (load_3(s + 39) >> 3); 2154 int64_t s16 = 2097151 & load_3(s + 42); 2155 int64_t s17 = 2097151 & (load_4(s + 44) >> 5); 2156 int64_t s18 = 2097151 & (load_3(s + 47) >> 2); 2157 int64_t s19 = 2097151 & (load_4(s + 49) >> 7); 2158 int64_t s20 = 2097151 & (load_4(s + 52) >> 4); 2159 int64_t s21 = 2097151 & (load_3(s + 55) >> 1); 2160 int64_t s22 = 2097151 & (load_4(s + 57) >> 6); 2161 int64_t s23 = (load_4(s + 60) >> 3); 2162 int64_t carry0; 2163 int64_t carry1; 2164 int64_t carry2; 2165 int64_t carry3; 2166 int64_t carry4; 2167 int64_t carry5; 2168 int64_t carry6; 2169 int64_t carry7; 2170 int64_t carry8; 2171 int64_t carry9; 2172 int64_t carry10; 2173 int64_t carry11; 2174 int64_t carry12; 2175 int64_t carry13; 2176 int64_t carry14; 2177 int64_t carry15; 2178 int64_t carry16; 2179 2180 s11 += s23 * 666643; 2181 s12 += s23 * 470296; 2182 s13 += s23 * 654183; 2183 s14 -= s23 * 997805; 2184 s15 += s23 * 136657; 2185 s16 -= s23 * 683901; 2186 s23 = 0; 2187 2188 s10 += s22 * 666643; 2189 s11 += s22 * 470296; 2190 s12 += s22 * 654183; 2191 s13 -= s22 * 997805; 2192 s14 += s22 * 136657; 2193 s15 -= s22 * 683901; 2194 s22 = 0; 2195 2196 s9 += s21 * 666643; 2197 s10 += s21 * 470296; 2198 s11 += s21 * 654183; 2199 s12 -= s21 * 997805; 2200 s13 += s21 * 136657; 2201 s14 -= s21 * 683901; 2202 s21 = 0; 2203 2204 s8 += s20 * 666643; 2205 s9 += s20 * 470296; 2206 s10 += s20 * 654183; 2207 s11 -= s20 * 997805; 2208 s12 += s20 * 136657; 2209 s13 -= s20 * 683901; 2210 s20 = 0; 2211 2212 s7 += s19 * 666643; 2213 s8 += s19 * 470296; 2214 s9 += s19 * 654183; 2215 s10 -= s19 * 997805; 2216 s11 += s19 * 136657; 2217 s12 -= s19 * 683901; 2218 s19 = 0; 2219 2220 s6 += s18 * 666643; 2221 s7 += s18 * 470296; 2222 s8 += s18 * 654183; 2223 s9 -= s18 * 997805; 2224 s10 += s18 * 136657; 2225 s11 -= s18 * 683901; 2226 s18 = 0; 2227 2228 carry6 = (s6 + (1 << 20)) >> 21; 2229 s7 += carry6; 2230 s6 -= carry6 << 21; 2231 carry8 = (s8 + (1 << 20)) >> 21; 2232 s9 += carry8; 2233 s8 -= carry8 << 21; 2234 carry10 = (s10 + (1 << 20)) >> 21; 2235 s11 += carry10; 2236 s10 -= carry10 << 21; 2237 carry12 = (s12 + (1 << 20)) >> 21; 2238 s13 += carry12; 2239 s12 -= carry12 << 21; 2240 carry14 = (s14 + (1 << 20)) >> 21; 2241 s15 += carry14; 2242 s14 -= carry14 << 21; 2243 carry16 = (s16 + (1 << 20)) >> 21; 2244 s17 += carry16; 2245 s16 -= carry16 << 21; 2246 2247 carry7 = (s7 + (1 << 20)) >> 21; 2248 s8 += carry7; 2249 s7 -= carry7 << 21; 2250 carry9 = (s9 + (1 << 20)) >> 21; 2251 s10 += carry9; 2252 s9 -= carry9 << 21; 2253 carry11 = (s11 + (1 << 20)) >> 21; 2254 s12 += carry11; 2255 s11 -= carry11 << 21; 2256 carry13 = (s13 + (1 << 20)) >> 21; 2257 s14 += carry13; 2258 s13 -= carry13 << 21; 2259 carry15 = (s15 + (1 << 20)) >> 21; 2260 s16 += carry15; 2261 s15 -= carry15 << 21; 2262 2263 s5 += s17 * 666643; 2264 s6 += s17 * 470296; 2265 s7 += s17 * 654183; 2266 s8 -= s17 * 997805; 2267 s9 += s17 * 136657; 2268 s10 -= s17 * 683901; 2269 s17 = 0; 2270 2271 s4 += s16 * 666643; 2272 s5 += s16 * 470296; 2273 s6 += s16 * 654183; 2274 s7 -= s16 * 997805; 2275 s8 += s16 * 136657; 2276 s9 -= s16 * 683901; 2277 s16 = 0; 2278 2279 s3 += s15 * 666643; 2280 s4 += s15 * 470296; 2281 s5 += s15 * 654183; 2282 s6 -= s15 * 997805; 2283 s7 += s15 * 136657; 2284 s8 -= s15 * 683901; 2285 s15 = 0; 2286 2287 s2 += s14 * 666643; 2288 s3 += s14 * 470296; 2289 s4 += s14 * 654183; 2290 s5 -= s14 * 997805; 2291 s6 += s14 * 136657; 2292 s7 -= s14 * 683901; 2293 s14 = 0; 2294 2295 s1 += s13 * 666643; 2296 s2 += s13 * 470296; 2297 s3 += s13 * 654183; 2298 s4 -= s13 * 997805; 2299 s5 += s13 * 136657; 2300 s6 -= s13 * 683901; 2301 s13 = 0; 2302 2303 s0 += s12 * 666643; 2304 s1 += s12 * 470296; 2305 s2 += s12 * 654183; 2306 s3 -= s12 * 997805; 2307 s4 += s12 * 136657; 2308 s5 -= s12 * 683901; 2309 s12 = 0; 2310 2311 carry0 = (s0 + (1 << 20)) >> 21; 2312 s1 += carry0; 2313 s0 -= carry0 << 21; 2314 carry2 = (s2 + (1 << 20)) >> 21; 2315 s3 += carry2; 2316 s2 -= carry2 << 21; 2317 carry4 = (s4 + (1 << 20)) >> 21; 2318 s5 += carry4; 2319 s4 -= carry4 << 21; 2320 carry6 = (s6 + (1 << 20)) >> 21; 2321 s7 += carry6; 2322 s6 -= carry6 << 21; 2323 carry8 = (s8 + (1 << 20)) >> 21; 2324 s9 += carry8; 2325 s8 -= carry8 << 21; 2326 carry10 = (s10 + (1 << 20)) >> 21; 2327 s11 += carry10; 2328 s10 -= carry10 << 21; 2329 2330 carry1 = (s1 + (1 << 20)) >> 21; 2331 s2 += carry1; 2332 s1 -= carry1 << 21; 2333 carry3 = (s3 + (1 << 20)) >> 21; 2334 s4 += carry3; 2335 s3 -= carry3 << 21; 2336 carry5 = (s5 + (1 << 20)) >> 21; 2337 s6 += carry5; 2338 s5 -= carry5 << 21; 2339 carry7 = (s7 + (1 << 20)) >> 21; 2340 s8 += carry7; 2341 s7 -= carry7 << 21; 2342 carry9 = (s9 + (1 << 20)) >> 21; 2343 s10 += carry9; 2344 s9 -= carry9 << 21; 2345 carry11 = (s11 + (1 << 20)) >> 21; 2346 s12 += carry11; 2347 s11 -= carry11 << 21; 2348 2349 s0 += s12 * 666643; 2350 s1 += s12 * 470296; 2351 s2 += s12 * 654183; 2352 s3 -= s12 * 997805; 2353 s4 += s12 * 136657; 2354 s5 -= s12 * 683901; 2355 s12 = 0; 2356 2357 carry0 = s0 >> 21; 2358 s1 += carry0; 2359 s0 -= carry0 << 21; 2360 carry1 = s1 >> 21; 2361 s2 += carry1; 2362 s1 -= carry1 << 21; 2363 carry2 = s2 >> 21; 2364 s3 += carry2; 2365 s2 -= carry2 << 21; 2366 carry3 = s3 >> 21; 2367 s4 += carry3; 2368 s3 -= carry3 << 21; 2369 carry4 = s4 >> 21; 2370 s5 += carry4; 2371 s4 -= carry4 << 21; 2372 carry5 = s5 >> 21; 2373 s6 += carry5; 2374 s5 -= carry5 << 21; 2375 carry6 = s6 >> 21; 2376 s7 += carry6; 2377 s6 -= carry6 << 21; 2378 carry7 = s7 >> 21; 2379 s8 += carry7; 2380 s7 -= carry7 << 21; 2381 carry8 = s8 >> 21; 2382 s9 += carry8; 2383 s8 -= carry8 << 21; 2384 carry9 = s9 >> 21; 2385 s10 += carry9; 2386 s9 -= carry9 << 21; 2387 carry10 = s10 >> 21; 2388 s11 += carry10; 2389 s10 -= carry10 << 21; 2390 carry11 = s11 >> 21; 2391 s12 += carry11; 2392 s11 -= carry11 << 21; 2393 2394 s0 += s12 * 666643; 2395 s1 += s12 * 470296; 2396 s2 += s12 * 654183; 2397 s3 -= s12 * 997805; 2398 s4 += s12 * 136657; 2399 s5 -= s12 * 683901; 2400 s12 = 0; 2401 2402 carry0 = s0 >> 21; 2403 s1 += carry0; 2404 s0 -= carry0 << 21; 2405 carry1 = s1 >> 21; 2406 s2 += carry1; 2407 s1 -= carry1 << 21; 2408 carry2 = s2 >> 21; 2409 s3 += carry2; 2410 s2 -= carry2 << 21; 2411 carry3 = s3 >> 21; 2412 s4 += carry3; 2413 s3 -= carry3 << 21; 2414 carry4 = s4 >> 21; 2415 s5 += carry4; 2416 s4 -= carry4 << 21; 2417 carry5 = s5 >> 21; 2418 s6 += carry5; 2419 s5 -= carry5 << 21; 2420 carry6 = s6 >> 21; 2421 s7 += carry6; 2422 s6 -= carry6 << 21; 2423 carry7 = s7 >> 21; 2424 s8 += carry7; 2425 s7 -= carry7 << 21; 2426 carry8 = s8 >> 21; 2427 s9 += carry8; 2428 s8 -= carry8 << 21; 2429 carry9 = s9 >> 21; 2430 s10 += carry9; 2431 s9 -= carry9 << 21; 2432 carry10 = s10 >> 21; 2433 s11 += carry10; 2434 s10 -= carry10 << 21; 2435 2436 s[0] = s0 >> 0; 2437 s[1] = s0 >> 8; 2438 s[2] = (s0 >> 16) | (s1 << 5); 2439 s[3] = s1 >> 3; 2440 s[4] = s1 >> 11; 2441 s[5] = (s1 >> 19) | (s2 << 2); 2442 s[6] = s2 >> 6; 2443 s[7] = (s2 >> 14) | (s3 << 7); 2444 s[8] = s3 >> 1; 2445 s[9] = s3 >> 9; 2446 s[10] = (s3 >> 17) | (s4 << 4); 2447 s[11] = s4 >> 4; 2448 s[12] = s4 >> 12; 2449 s[13] = (s4 >> 20) | (s5 << 1); 2450 s[14] = s5 >> 7; 2451 s[15] = (s5 >> 15) | (s6 << 6); 2452 s[16] = s6 >> 2; 2453 s[17] = s6 >> 10; 2454 s[18] = (s6 >> 18) | (s7 << 3); 2455 s[19] = s7 >> 5; 2456 s[20] = s7 >> 13; 2457 s[21] = s8 >> 0; 2458 s[22] = s8 >> 8; 2459 s[23] = (s8 >> 16) | (s9 << 5); 2460 s[24] = s9 >> 3; 2461 s[25] = s9 >> 11; 2462 s[26] = (s9 >> 19) | (s10 << 2); 2463 s[27] = s10 >> 6; 2464 s[28] = (s10 >> 14) | (s11 << 7); 2465 s[29] = s11 >> 1; 2466 s[30] = s11 >> 9; 2467 s[31] = s11 >> 17; 2468 } 2469 2470 // Input: 2471 // a[0]+256*a[1]+...+256^31*a[31] = a 2472 // b[0]+256*b[1]+...+256^31*b[31] = b 2473 // c[0]+256*c[1]+...+256^31*c[31] = c 2474 // 2475 // Output: 2476 // s[0]+256*s[1]+...+256^31*s[31] = (ab+c) mod l 2477 // where l = 2^252 + 27742317777372353535851937790883648493. 2478 static void sc_muladd(uint8_t *s, const uint8_t *a, const uint8_t *b, 2479 const uint8_t *c) { 2480 int64_t a0 = 2097151 & load_3(a); 2481 int64_t a1 = 2097151 & (load_4(a + 2) >> 5); 2482 int64_t a2 = 2097151 & (load_3(a + 5) >> 2); 2483 int64_t a3 = 2097151 & (load_4(a + 7) >> 7); 2484 int64_t a4 = 2097151 & (load_4(a + 10) >> 4); 2485 int64_t a5 = 2097151 & (load_3(a + 13) >> 1); 2486 int64_t a6 = 2097151 & (load_4(a + 15) >> 6); 2487 int64_t a7 = 2097151 & (load_3(a + 18) >> 3); 2488 int64_t a8 = 2097151 & load_3(a + 21); 2489 int64_t a9 = 2097151 & (load_4(a + 23) >> 5); 2490 int64_t a10 = 2097151 & (load_3(a + 26) >> 2); 2491 int64_t a11 = (load_4(a + 28) >> 7); 2492 int64_t b0 = 2097151 & load_3(b); 2493 int64_t b1 = 2097151 & (load_4(b + 2) >> 5); 2494 int64_t b2 = 2097151 & (load_3(b + 5) >> 2); 2495 int64_t b3 = 2097151 & (load_4(b + 7) >> 7); 2496 int64_t b4 = 2097151 & (load_4(b + 10) >> 4); 2497 int64_t b5 = 2097151 & (load_3(b + 13) >> 1); 2498 int64_t b6 = 2097151 & (load_4(b + 15) >> 6); 2499 int64_t b7 = 2097151 & (load_3(b + 18) >> 3); 2500 int64_t b8 = 2097151 & load_3(b + 21); 2501 int64_t b9 = 2097151 & (load_4(b + 23) >> 5); 2502 int64_t b10 = 2097151 & (load_3(b + 26) >> 2); 2503 int64_t b11 = (load_4(b + 28) >> 7); 2504 int64_t c0 = 2097151 & load_3(c); 2505 int64_t c1 = 2097151 & (load_4(c + 2) >> 5); 2506 int64_t c2 = 2097151 & (load_3(c + 5) >> 2); 2507 int64_t c3 = 2097151 & (load_4(c + 7) >> 7); 2508 int64_t c4 = 2097151 & (load_4(c + 10) >> 4); 2509 int64_t c5 = 2097151 & (load_3(c + 13) >> 1); 2510 int64_t c6 = 2097151 & (load_4(c + 15) >> 6); 2511 int64_t c7 = 2097151 & (load_3(c + 18) >> 3); 2512 int64_t c8 = 2097151 & load_3(c + 21); 2513 int64_t c9 = 2097151 & (load_4(c + 23) >> 5); 2514 int64_t c10 = 2097151 & (load_3(c + 26) >> 2); 2515 int64_t c11 = (load_4(c + 28) >> 7); 2516 int64_t s0; 2517 int64_t s1; 2518 int64_t s2; 2519 int64_t s3; 2520 int64_t s4; 2521 int64_t s5; 2522 int64_t s6; 2523 int64_t s7; 2524 int64_t s8; 2525 int64_t s9; 2526 int64_t s10; 2527 int64_t s11; 2528 int64_t s12; 2529 int64_t s13; 2530 int64_t s14; 2531 int64_t s15; 2532 int64_t s16; 2533 int64_t s17; 2534 int64_t s18; 2535 int64_t s19; 2536 int64_t s20; 2537 int64_t s21; 2538 int64_t s22; 2539 int64_t s23; 2540 int64_t carry0; 2541 int64_t carry1; 2542 int64_t carry2; 2543 int64_t carry3; 2544 int64_t carry4; 2545 int64_t carry5; 2546 int64_t carry6; 2547 int64_t carry7; 2548 int64_t carry8; 2549 int64_t carry9; 2550 int64_t carry10; 2551 int64_t carry11; 2552 int64_t carry12; 2553 int64_t carry13; 2554 int64_t carry14; 2555 int64_t carry15; 2556 int64_t carry16; 2557 int64_t carry17; 2558 int64_t carry18; 2559 int64_t carry19; 2560 int64_t carry20; 2561 int64_t carry21; 2562 int64_t carry22; 2563 2564 s0 = c0 + a0 * b0; 2565 s1 = c1 + a0 * b1 + a1 * b0; 2566 s2 = c2 + a0 * b2 + a1 * b1 + a2 * b0; 2567 s3 = c3 + a0 * b3 + a1 * b2 + a2 * b1 + a3 * b0; 2568 s4 = c4 + a0 * b4 + a1 * b3 + a2 * b2 + a3 * b1 + a4 * b0; 2569 s5 = c5 + a0 * b5 + a1 * b4 + a2 * b3 + a3 * b2 + a4 * b1 + a5 * b0; 2570 s6 = c6 + a0 * b6 + a1 * b5 + a2 * b4 + a3 * b3 + a4 * b2 + a5 * b1 + a6 * b0; 2571 s7 = c7 + a0 * b7 + a1 * b6 + a2 * b5 + a3 * b4 + a4 * b3 + a5 * b2 + 2572 a6 * b1 + a7 * b0; 2573 s8 = c8 + a0 * b8 + a1 * b7 + a2 * b6 + a3 * b5 + a4 * b4 + a5 * b3 + 2574 a6 * b2 + a7 * b1 + a8 * b0; 2575 s9 = c9 + a0 * b9 + a1 * b8 + a2 * b7 + a3 * b6 + a4 * b5 + a5 * b4 + 2576 a6 * b3 + a7 * b2 + a8 * b1 + a9 * b0; 2577 s10 = c10 + a0 * b10 + a1 * b9 + a2 * b8 + a3 * b7 + a4 * b6 + a5 * b5 + 2578 a6 * b4 + a7 * b3 + a8 * b2 + a9 * b1 + a10 * b0; 2579 s11 = c11 + a0 * b11 + a1 * b10 + a2 * b9 + a3 * b8 + a4 * b7 + a5 * b6 + 2580 a6 * b5 + a7 * b4 + a8 * b3 + a9 * b2 + a10 * b1 + a11 * b0; 2581 s12 = a1 * b11 + a2 * b10 + a3 * b9 + a4 * b8 + a5 * b7 + a6 * b6 + a7 * b5 + 2582 a8 * b4 + a9 * b3 + a10 * b2 + a11 * b1; 2583 s13 = a2 * b11 + a3 * b10 + a4 * b9 + a5 * b8 + a6 * b7 + a7 * b6 + a8 * b5 + 2584 a9 * b4 + a10 * b3 + a11 * b2; 2585 s14 = a3 * b11 + a4 * b10 + a5 * b9 + a6 * b8 + a7 * b7 + a8 * b6 + a9 * b5 + 2586 a10 * b4 + a11 * b3; 2587 s15 = a4 * b11 + a5 * b10 + a6 * b9 + a7 * b8 + a8 * b7 + a9 * b6 + a10 * b5 + 2588 a11 * b4; 2589 s16 = a5 * b11 + a6 * b10 + a7 * b9 + a8 * b8 + a9 * b7 + a10 * b6 + a11 * b5; 2590 s17 = a6 * b11 + a7 * b10 + a8 * b9 + a9 * b8 + a10 * b7 + a11 * b6; 2591 s18 = a7 * b11 + a8 * b10 + a9 * b9 + a10 * b8 + a11 * b7; 2592 s19 = a8 * b11 + a9 * b10 + a10 * b9 + a11 * b8; 2593 s20 = a9 * b11 + a10 * b10 + a11 * b9; 2594 s21 = a10 * b11 + a11 * b10; 2595 s22 = a11 * b11; 2596 s23 = 0; 2597 2598 carry0 = (s0 + (1 << 20)) >> 21; 2599 s1 += carry0; 2600 s0 -= carry0 << 21; 2601 carry2 = (s2 + (1 << 20)) >> 21; 2602 s3 += carry2; 2603 s2 -= carry2 << 21; 2604 carry4 = (s4 + (1 << 20)) >> 21; 2605 s5 += carry4; 2606 s4 -= carry4 << 21; 2607 carry6 = (s6 + (1 << 20)) >> 21; 2608 s7 += carry6; 2609 s6 -= carry6 << 21; 2610 carry8 = (s8 + (1 << 20)) >> 21; 2611 s9 += carry8; 2612 s8 -= carry8 << 21; 2613 carry10 = (s10 + (1 << 20)) >> 21; 2614 s11 += carry10; 2615 s10 -= carry10 << 21; 2616 carry12 = (s12 + (1 << 20)) >> 21; 2617 s13 += carry12; 2618 s12 -= carry12 << 21; 2619 carry14 = (s14 + (1 << 20)) >> 21; 2620 s15 += carry14; 2621 s14 -= carry14 << 21; 2622 carry16 = (s16 + (1 << 20)) >> 21; 2623 s17 += carry16; 2624 s16 -= carry16 << 21; 2625 carry18 = (s18 + (1 << 20)) >> 21; 2626 s19 += carry18; 2627 s18 -= carry18 << 21; 2628 carry20 = (s20 + (1 << 20)) >> 21; 2629 s21 += carry20; 2630 s20 -= carry20 << 21; 2631 carry22 = (s22 + (1 << 20)) >> 21; 2632 s23 += carry22; 2633 s22 -= carry22 << 21; 2634 2635 carry1 = (s1 + (1 << 20)) >> 21; 2636 s2 += carry1; 2637 s1 -= carry1 << 21; 2638 carry3 = (s3 + (1 << 20)) >> 21; 2639 s4 += carry3; 2640 s3 -= carry3 << 21; 2641 carry5 = (s5 + (1 << 20)) >> 21; 2642 s6 += carry5; 2643 s5 -= carry5 << 21; 2644 carry7 = (s7 + (1 << 20)) >> 21; 2645 s8 += carry7; 2646 s7 -= carry7 << 21; 2647 carry9 = (s9 + (1 << 20)) >> 21; 2648 s10 += carry9; 2649 s9 -= carry9 << 21; 2650 carry11 = (s11 + (1 << 20)) >> 21; 2651 s12 += carry11; 2652 s11 -= carry11 << 21; 2653 carry13 = (s13 + (1 << 20)) >> 21; 2654 s14 += carry13; 2655 s13 -= carry13 << 21; 2656 carry15 = (s15 + (1 << 20)) >> 21; 2657 s16 += carry15; 2658 s15 -= carry15 << 21; 2659 carry17 = (s17 + (1 << 20)) >> 21; 2660 s18 += carry17; 2661 s17 -= carry17 << 21; 2662 carry19 = (s19 + (1 << 20)) >> 21; 2663 s20 += carry19; 2664 s19 -= carry19 << 21; 2665 carry21 = (s21 + (1 << 20)) >> 21; 2666 s22 += carry21; 2667 s21 -= carry21 << 21; 2668 2669 s11 += s23 * 666643; 2670 s12 += s23 * 470296; 2671 s13 += s23 * 654183; 2672 s14 -= s23 * 997805; 2673 s15 += s23 * 136657; 2674 s16 -= s23 * 683901; 2675 s23 = 0; 2676 2677 s10 += s22 * 666643; 2678 s11 += s22 * 470296; 2679 s12 += s22 * 654183; 2680 s13 -= s22 * 997805; 2681 s14 += s22 * 136657; 2682 s15 -= s22 * 683901; 2683 s22 = 0; 2684 2685 s9 += s21 * 666643; 2686 s10 += s21 * 470296; 2687 s11 += s21 * 654183; 2688 s12 -= s21 * 997805; 2689 s13 += s21 * 136657; 2690 s14 -= s21 * 683901; 2691 s21 = 0; 2692 2693 s8 += s20 * 666643; 2694 s9 += s20 * 470296; 2695 s10 += s20 * 654183; 2696 s11 -= s20 * 997805; 2697 s12 += s20 * 136657; 2698 s13 -= s20 * 683901; 2699 s20 = 0; 2700 2701 s7 += s19 * 666643; 2702 s8 += s19 * 470296; 2703 s9 += s19 * 654183; 2704 s10 -= s19 * 997805; 2705 s11 += s19 * 136657; 2706 s12 -= s19 * 683901; 2707 s19 = 0; 2708 2709 s6 += s18 * 666643; 2710 s7 += s18 * 470296; 2711 s8 += s18 * 654183; 2712 s9 -= s18 * 997805; 2713 s10 += s18 * 136657; 2714 s11 -= s18 * 683901; 2715 s18 = 0; 2716 2717 carry6 = (s6 + (1 << 20)) >> 21; 2718 s7 += carry6; 2719 s6 -= carry6 << 21; 2720 carry8 = (s8 + (1 << 20)) >> 21; 2721 s9 += carry8; 2722 s8 -= carry8 << 21; 2723 carry10 = (s10 + (1 << 20)) >> 21; 2724 s11 += carry10; 2725 s10 -= carry10 << 21; 2726 carry12 = (s12 + (1 << 20)) >> 21; 2727 s13 += carry12; 2728 s12 -= carry12 << 21; 2729 carry14 = (s14 + (1 << 20)) >> 21; 2730 s15 += carry14; 2731 s14 -= carry14 << 21; 2732 carry16 = (s16 + (1 << 20)) >> 21; 2733 s17 += carry16; 2734 s16 -= carry16 << 21; 2735 2736 carry7 = (s7 + (1 << 20)) >> 21; 2737 s8 += carry7; 2738 s7 -= carry7 << 21; 2739 carry9 = (s9 + (1 << 20)) >> 21; 2740 s10 += carry9; 2741 s9 -= carry9 << 21; 2742 carry11 = (s11 + (1 << 20)) >> 21; 2743 s12 += carry11; 2744 s11 -= carry11 << 21; 2745 carry13 = (s13 + (1 << 20)) >> 21; 2746 s14 += carry13; 2747 s13 -= carry13 << 21; 2748 carry15 = (s15 + (1 << 20)) >> 21; 2749 s16 += carry15; 2750 s15 -= carry15 << 21; 2751 2752 s5 += s17 * 666643; 2753 s6 += s17 * 470296; 2754 s7 += s17 * 654183; 2755 s8 -= s17 * 997805; 2756 s9 += s17 * 136657; 2757 s10 -= s17 * 683901; 2758 s17 = 0; 2759 2760 s4 += s16 * 666643; 2761 s5 += s16 * 470296; 2762 s6 += s16 * 654183; 2763 s7 -= s16 * 997805; 2764 s8 += s16 * 136657; 2765 s9 -= s16 * 683901; 2766 s16 = 0; 2767 2768 s3 += s15 * 666643; 2769 s4 += s15 * 470296; 2770 s5 += s15 * 654183; 2771 s6 -= s15 * 997805; 2772 s7 += s15 * 136657; 2773 s8 -= s15 * 683901; 2774 s15 = 0; 2775 2776 s2 += s14 * 666643; 2777 s3 += s14 * 470296; 2778 s4 += s14 * 654183; 2779 s5 -= s14 * 997805; 2780 s6 += s14 * 136657; 2781 s7 -= s14 * 683901; 2782 s14 = 0; 2783 2784 s1 += s13 * 666643; 2785 s2 += s13 * 470296; 2786 s3 += s13 * 654183; 2787 s4 -= s13 * 997805; 2788 s5 += s13 * 136657; 2789 s6 -= s13 * 683901; 2790 s13 = 0; 2791 2792 s0 += s12 * 666643; 2793 s1 += s12 * 470296; 2794 s2 += s12 * 654183; 2795 s3 -= s12 * 997805; 2796 s4 += s12 * 136657; 2797 s5 -= s12 * 683901; 2798 s12 = 0; 2799 2800 carry0 = (s0 + (1 << 20)) >> 21; 2801 s1 += carry0; 2802 s0 -= carry0 << 21; 2803 carry2 = (s2 + (1 << 20)) >> 21; 2804 s3 += carry2; 2805 s2 -= carry2 << 21; 2806 carry4 = (s4 + (1 << 20)) >> 21; 2807 s5 += carry4; 2808 s4 -= carry4 << 21; 2809 carry6 = (s6 + (1 << 20)) >> 21; 2810 s7 += carry6; 2811 s6 -= carry6 << 21; 2812 carry8 = (s8 + (1 << 20)) >> 21; 2813 s9 += carry8; 2814 s8 -= carry8 << 21; 2815 carry10 = (s10 + (1 << 20)) >> 21; 2816 s11 += carry10; 2817 s10 -= carry10 << 21; 2818 2819 carry1 = (s1 + (1 << 20)) >> 21; 2820 s2 += carry1; 2821 s1 -= carry1 << 21; 2822 carry3 = (s3 + (1 << 20)) >> 21; 2823 s4 += carry3; 2824 s3 -= carry3 << 21; 2825 carry5 = (s5 + (1 << 20)) >> 21; 2826 s6 += carry5; 2827 s5 -= carry5 << 21; 2828 carry7 = (s7 + (1 << 20)) >> 21; 2829 s8 += carry7; 2830 s7 -= carry7 << 21; 2831 carry9 = (s9 + (1 << 20)) >> 21; 2832 s10 += carry9; 2833 s9 -= carry9 << 21; 2834 carry11 = (s11 + (1 << 20)) >> 21; 2835 s12 += carry11; 2836 s11 -= carry11 << 21; 2837 2838 s0 += s12 * 666643; 2839 s1 += s12 * 470296; 2840 s2 += s12 * 654183; 2841 s3 -= s12 * 997805; 2842 s4 += s12 * 136657; 2843 s5 -= s12 * 683901; 2844 s12 = 0; 2845 2846 carry0 = s0 >> 21; 2847 s1 += carry0; 2848 s0 -= carry0 << 21; 2849 carry1 = s1 >> 21; 2850 s2 += carry1; 2851 s1 -= carry1 << 21; 2852 carry2 = s2 >> 21; 2853 s3 += carry2; 2854 s2 -= carry2 << 21; 2855 carry3 = s3 >> 21; 2856 s4 += carry3; 2857 s3 -= carry3 << 21; 2858 carry4 = s4 >> 21; 2859 s5 += carry4; 2860 s4 -= carry4 << 21; 2861 carry5 = s5 >> 21; 2862 s6 += carry5; 2863 s5 -= carry5 << 21; 2864 carry6 = s6 >> 21; 2865 s7 += carry6; 2866 s6 -= carry6 << 21; 2867 carry7 = s7 >> 21; 2868 s8 += carry7; 2869 s7 -= carry7 << 21; 2870 carry8 = s8 >> 21; 2871 s9 += carry8; 2872 s8 -= carry8 << 21; 2873 carry9 = s9 >> 21; 2874 s10 += carry9; 2875 s9 -= carry9 << 21; 2876 carry10 = s10 >> 21; 2877 s11 += carry10; 2878 s10 -= carry10 << 21; 2879 carry11 = s11 >> 21; 2880 s12 += carry11; 2881 s11 -= carry11 << 21; 2882 2883 s0 += s12 * 666643; 2884 s1 += s12 * 470296; 2885 s2 += s12 * 654183; 2886 s3 -= s12 * 997805; 2887 s4 += s12 * 136657; 2888 s5 -= s12 * 683901; 2889 s12 = 0; 2890 2891 carry0 = s0 >> 21; 2892 s1 += carry0; 2893 s0 -= carry0 << 21; 2894 carry1 = s1 >> 21; 2895 s2 += carry1; 2896 s1 -= carry1 << 21; 2897 carry2 = s2 >> 21; 2898 s3 += carry2; 2899 s2 -= carry2 << 21; 2900 carry3 = s3 >> 21; 2901 s4 += carry3; 2902 s3 -= carry3 << 21; 2903 carry4 = s4 >> 21; 2904 s5 += carry4; 2905 s4 -= carry4 << 21; 2906 carry5 = s5 >> 21; 2907 s6 += carry5; 2908 s5 -= carry5 << 21; 2909 carry6 = s6 >> 21; 2910 s7 += carry6; 2911 s6 -= carry6 << 21; 2912 carry7 = s7 >> 21; 2913 s8 += carry7; 2914 s7 -= carry7 << 21; 2915 carry8 = s8 >> 21; 2916 s9 += carry8; 2917 s8 -= carry8 << 21; 2918 carry9 = s9 >> 21; 2919 s10 += carry9; 2920 s9 -= carry9 << 21; 2921 carry10 = s10 >> 21; 2922 s11 += carry10; 2923 s10 -= carry10 << 21; 2924 2925 s[0] = s0 >> 0; 2926 s[1] = s0 >> 8; 2927 s[2] = (s0 >> 16) | (s1 << 5); 2928 s[3] = s1 >> 3; 2929 s[4] = s1 >> 11; 2930 s[5] = (s1 >> 19) | (s2 << 2); 2931 s[6] = s2 >> 6; 2932 s[7] = (s2 >> 14) | (s3 << 7); 2933 s[8] = s3 >> 1; 2934 s[9] = s3 >> 9; 2935 s[10] = (s3 >> 17) | (s4 << 4); 2936 s[11] = s4 >> 4; 2937 s[12] = s4 >> 12; 2938 s[13] = (s4 >> 20) | (s5 << 1); 2939 s[14] = s5 >> 7; 2940 s[15] = (s5 >> 15) | (s6 << 6); 2941 s[16] = s6 >> 2; 2942 s[17] = s6 >> 10; 2943 s[18] = (s6 >> 18) | (s7 << 3); 2944 s[19] = s7 >> 5; 2945 s[20] = s7 >> 13; 2946 s[21] = s8 >> 0; 2947 s[22] = s8 >> 8; 2948 s[23] = (s8 >> 16) | (s9 << 5); 2949 s[24] = s9 >> 3; 2950 s[25] = s9 >> 11; 2951 s[26] = (s9 >> 19) | (s10 << 2); 2952 s[27] = s10 >> 6; 2953 s[28] = (s10 >> 14) | (s11 << 7); 2954 s[29] = s11 >> 1; 2955 s[30] = s11 >> 9; 2956 s[31] = s11 >> 17; 2957 } 2958 2959 void ED25519_keypair(uint8_t out_public_key[32], uint8_t out_private_key[64]) { 2960 uint8_t seed[32]; 2961 RAND_bytes(seed, 32); 2962 ED25519_keypair_from_seed(out_public_key, out_private_key, seed); 2963 } 2964 2965 int ED25519_sign(uint8_t out_sig[64], const uint8_t *message, 2966 size_t message_len, const uint8_t private_key[64]) { 2967 uint8_t az[SHA512_DIGEST_LENGTH]; 2968 SHA512(private_key, 32, az); 2969 2970 az[0] &= 248; 2971 az[31] &= 63; 2972 az[31] |= 64; 2973 2974 SHA512_CTX hash_ctx; 2975 SHA512_Init(&hash_ctx); 2976 SHA512_Update(&hash_ctx, az + 32, 32); 2977 SHA512_Update(&hash_ctx, message, message_len); 2978 uint8_t nonce[SHA512_DIGEST_LENGTH]; 2979 SHA512_Final(nonce, &hash_ctx); 2980 2981 x25519_sc_reduce(nonce); 2982 ge_p3 R; 2983 x25519_ge_scalarmult_base(&R, nonce); 2984 ge_p3_tobytes(out_sig, &R); 2985 2986 SHA512_Init(&hash_ctx); 2987 SHA512_Update(&hash_ctx, out_sig, 32); 2988 SHA512_Update(&hash_ctx, private_key + 32, 32); 2989 SHA512_Update(&hash_ctx, message, message_len); 2990 uint8_t hram[SHA512_DIGEST_LENGTH]; 2991 SHA512_Final(hram, &hash_ctx); 2992 2993 x25519_sc_reduce(hram); 2994 sc_muladd(out_sig + 32, hram, az, nonce); 2995 2996 return 1; 2997 } 2998 2999 int ED25519_verify(const uint8_t *message, size_t message_len, 3000 const uint8_t signature[64], const uint8_t public_key[32]) { 3001 ge_p3 A; 3002 if ((signature[63] & 224) != 0 || 3003 x25519_ge_frombytes_vartime(&A, public_key) != 0) { 3004 return 0; 3005 } 3006 3007 fe_loose t; 3008 fe_neg(&t, &A.X); 3009 fe_carry(&A.X, &t); 3010 fe_neg(&t, &A.T); 3011 fe_carry(&A.T, &t); 3012 3013 uint8_t pkcopy[32]; 3014 OPENSSL_memcpy(pkcopy, public_key, 32); 3015 uint8_t rcopy[32]; 3016 OPENSSL_memcpy(rcopy, signature, 32); 3017 union { 3018 uint64_t u64[4]; 3019 uint8_t u8[32]; 3020 } scopy; 3021 OPENSSL_memcpy(&scopy.u8[0], signature + 32, 32); 3022 3023 // https://tools.ietf.org/html/rfc8032#section-5.1.7 requires that s be in 3024 // the range [0, order) in order to prevent signature malleability. 3025 3026 // kOrder is the order of Curve25519 in little-endian form. 3027 static const uint64_t kOrder[4] = { 3028 UINT64_C(0x5812631a5cf5d3ed), 3029 UINT64_C(0x14def9dea2f79cd6), 3030 0, 3031 UINT64_C(0x1000000000000000), 3032 }; 3033 for (size_t i = 3;; i--) { 3034 if (scopy.u64[i] > kOrder[i]) { 3035 return 0; 3036 } else if (scopy.u64[i] < kOrder[i]) { 3037 break; 3038 } else if (i == 0) { 3039 return 0; 3040 } 3041 } 3042 3043 SHA512_CTX hash_ctx; 3044 SHA512_Init(&hash_ctx); 3045 SHA512_Update(&hash_ctx, signature, 32); 3046 SHA512_Update(&hash_ctx, public_key, 32); 3047 SHA512_Update(&hash_ctx, message, message_len); 3048 uint8_t h[SHA512_DIGEST_LENGTH]; 3049 SHA512_Final(h, &hash_ctx); 3050 3051 x25519_sc_reduce(h); 3052 3053 ge_p2 R; 3054 ge_double_scalarmult_vartime(&R, h, &A, scopy.u8); 3055 3056 uint8_t rcheck[32]; 3057 x25519_ge_tobytes(rcheck, &R); 3058 3059 return CRYPTO_memcmp(rcheck, rcopy, sizeof(rcheck)) == 0; 3060 } 3061 3062 void ED25519_keypair_from_seed(uint8_t out_public_key[32], 3063 uint8_t out_private_key[64], 3064 const uint8_t seed[32]) { 3065 uint8_t az[SHA512_DIGEST_LENGTH]; 3066 SHA512(seed, 32, az); 3067 3068 az[0] &= 248; 3069 az[31] &= 63; 3070 az[31] |= 64; 3071 3072 ge_p3 A; 3073 x25519_ge_scalarmult_base(&A, az); 3074 ge_p3_tobytes(out_public_key, &A); 3075 3076 OPENSSL_memcpy(out_private_key, seed, 32); 3077 OPENSSL_memcpy(out_private_key + 32, out_public_key, 32); 3078 } 3079 3080 3081 static void x25519_scalar_mult_generic(uint8_t out[32], 3082 const uint8_t scalar[32], 3083 const uint8_t point[32]) { 3084 fe x1, x2, z2, x3, z3, tmp0, tmp1; 3085 fe_loose x2l, z2l, x3l, tmp0l, tmp1l; 3086 3087 uint8_t e[32]; 3088 OPENSSL_memcpy(e, scalar, 32); 3089 e[0] &= 248; 3090 e[31] &= 127; 3091 e[31] |= 64; 3092 3093 // The following implementation was transcribed to Coq and proven to 3094 // correspond to unary scalar multiplication in affine coordinates given that 3095 // x1 != 0 is the x coordinate of some point on the curve. It was also checked 3096 // in Coq that doing a ladderstep with x1 = x3 = 0 gives z2' = z3' = 0, and z2 3097 // = z3 = 0 gives z2' = z3' = 0. The statement was quantified over the 3098 // underlying field, so it applies to Curve25519 itself and the quadratic 3099 // twist of Curve25519. It was not proven in Coq that prime-field arithmetic 3100 // correctly simulates extension-field arithmetic on prime-field values. 3101 // The decoding of the byte array representation of e was not considered. 3102 // Specification of Montgomery curves in affine coordinates: 3103 // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Spec/MontgomeryCurve.v#L27> 3104 // Proof that these form a group that is isomorphic to a Weierstrass curve: 3105 // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/AffineProofs.v#L35> 3106 // Coq transcription and correctness proof of the loop (where scalarbits=255): 3107 // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L118> 3108 // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L278> 3109 // preconditions: 0 <= e < 2^255 (not necessarily e < order), fe_invert(0) = 0 3110 fe_frombytes(&x1, point); 3111 fe_1(&x2); 3112 fe_0(&z2); 3113 fe_copy(&x3, &x1); 3114 fe_1(&z3); 3115 3116 unsigned swap = 0; 3117 int pos; 3118 for (pos = 254; pos >= 0; --pos) { 3119 // loop invariant as of right before the test, for the case where x1 != 0: 3120 // pos >= -1; if z2 = 0 then x2 is nonzero; if z3 = 0 then x3 is nonzero 3121 // let r := e >> (pos+1) in the following equalities of projective points: 3122 // to_xz (r*P) === if swap then (x3, z3) else (x2, z2) 3123 // to_xz ((r+1)*P) === if swap then (x2, z2) else (x3, z3) 3124 // x1 is the nonzero x coordinate of the nonzero point (r*P-(r+1)*P) 3125 unsigned b = 1 & (e[pos / 8] >> (pos & 7)); 3126 swap ^= b; 3127 fe_cswap(&x2, &x3, swap); 3128 fe_cswap(&z2, &z3, swap); 3129 swap = b; 3130 // Coq transcription of ladderstep formula (called from transcribed loop): 3131 // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZ.v#L89> 3132 // <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L131> 3133 // x1 != 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L217> 3134 // x1 = 0 <https://github.com/mit-plv/fiat-crypto/blob/2456d821825521f7e03e65882cc3521795b0320f/src/Curves/Montgomery/XZProofs.v#L147> 3135 fe_sub(&tmp0l, &x3, &z3); 3136 fe_sub(&tmp1l, &x2, &z2); 3137 fe_add(&x2l, &x2, &z2); 3138 fe_add(&z2l, &x3, &z3); 3139 fe_mul_tll(&z3, &tmp0l, &x2l); 3140 fe_mul_tll(&z2, &z2l, &tmp1l); 3141 fe_sq_tl(&tmp0, &tmp1l); 3142 fe_sq_tl(&tmp1, &x2l); 3143 fe_add(&x3l, &z3, &z2); 3144 fe_sub(&z2l, &z3, &z2); 3145 fe_mul_ttt(&x2, &tmp1, &tmp0); 3146 fe_sub(&tmp1l, &tmp1, &tmp0); 3147 fe_sq_tl(&z2, &z2l); 3148 fe_mul121666(&z3, &tmp1l); 3149 fe_sq_tl(&x3, &x3l); 3150 fe_add(&tmp0l, &tmp0, &z3); 3151 fe_mul_ttt(&z3, &x1, &z2); 3152 fe_mul_tll(&z2, &tmp1l, &tmp0l); 3153 } 3154 // here pos=-1, so r=e, so to_xz (e*P) === if swap then (x3, z3) else (x2, z2) 3155 fe_cswap(&x2, &x3, swap); 3156 fe_cswap(&z2, &z3, swap); 3157 3158 fe_invert(&z2, &z2); 3159 fe_mul_ttt(&x2, &x2, &z2); 3160 fe_tobytes(out, &x2); 3161 } 3162 3163 static void x25519_scalar_mult(uint8_t out[32], const uint8_t scalar[32], 3164 const uint8_t point[32]) { 3165 #if defined(BORINGSSL_X25519_NEON) 3166 if (CRYPTO_is_NEON_capable()) { 3167 x25519_NEON(out, scalar, point); 3168 return; 3169 } 3170 #endif 3171 3172 x25519_scalar_mult_generic(out, scalar, point); 3173 } 3174 3175 void X25519_keypair(uint8_t out_public_value[32], uint8_t out_private_key[32]) { 3176 RAND_bytes(out_private_key, 32); 3177 3178 // All X25519 implementations should decode scalars correctly (see 3179 // https://tools.ietf.org/html/rfc7748#section-5). However, if an 3180 // implementation doesn't then it might interoperate with random keys a 3181 // fraction of the time because they'll, randomly, happen to be correctly 3182 // formed. 3183 // 3184 // Thus we do the opposite of the masking here to make sure that our private 3185 // keys are never correctly masked and so, hopefully, any incorrect 3186 // implementations are deterministically broken. 3187 // 3188 // This does not affect security because, although we're throwing away 3189 // entropy, a valid implementation of scalarmult should throw away the exact 3190 // same bits anyway. 3191 out_private_key[0] |= 7; 3192 out_private_key[31] &= 63; 3193 out_private_key[31] |= 128; 3194 3195 X25519_public_from_private(out_public_value, out_private_key); 3196 } 3197 3198 int X25519(uint8_t out_shared_key[32], const uint8_t private_key[32], 3199 const uint8_t peer_public_value[32]) { 3200 static const uint8_t kZeros[32] = {0}; 3201 x25519_scalar_mult(out_shared_key, private_key, peer_public_value); 3202 // The all-zero output results when the input is a point of small order. 3203 return CRYPTO_memcmp(kZeros, out_shared_key, 32) != 0; 3204 } 3205 3206 void X25519_public_from_private(uint8_t out_public_value[32], 3207 const uint8_t private_key[32]) { 3208 #if defined(BORINGSSL_X25519_NEON) 3209 if (CRYPTO_is_NEON_capable()) { 3210 static const uint8_t kMongomeryBasePoint[32] = {9}; 3211 x25519_NEON(out_public_value, private_key, kMongomeryBasePoint); 3212 return; 3213 } 3214 #endif 3215 3216 uint8_t e[32]; 3217 OPENSSL_memcpy(e, private_key, 32); 3218 e[0] &= 248; 3219 e[31] &= 127; 3220 e[31] |= 64; 3221 3222 ge_p3 A; 3223 x25519_ge_scalarmult_base(&A, e); 3224 3225 // We only need the u-coordinate of the curve25519 point. The map is 3226 // u=(y+1)/(1-y). Since y=Y/Z, this gives u=(Z+Y)/(Z-Y). 3227 fe_loose zplusy, zminusy; 3228 fe zminusy_inv; 3229 fe_add(&zplusy, &A.Z, &A.Y); 3230 fe_sub(&zminusy, &A.Z, &A.Y); 3231 fe_loose_invert(&zminusy_inv, &zminusy); 3232 fe_mul_tlt(&zminusy_inv, &zplusy, &zminusy_inv); 3233 fe_tobytes(out_public_value, &zminusy_inv); 3234 } 3235