1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com) 2 * All rights reserved. 3 * 4 * This package is an SSL implementation written 5 * by Eric Young (eay (at) cryptsoft.com). 6 * The implementation was written so as to conform with Netscapes SSL. 7 * 8 * This library is free for commercial and non-commercial use as long as 9 * the following conditions are aheared to. The following conditions 10 * apply to all code found in this distribution, be it the RC4, RSA, 11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 12 * included with this distribution is covered by the same copyright terms 13 * except that the holder is Tim Hudson (tjh (at) cryptsoft.com). 14 * 15 * Copyright remains Eric Young's, and as such any Copyright notices in 16 * the code are not to be removed. 17 * If this package is used in a product, Eric Young should be given attribution 18 * as the author of the parts of the library used. 19 * This can be in the form of a textual message at program startup or 20 * in documentation (online or textual) provided with the package. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * "This product includes cryptographic software written by 33 * Eric Young (eay (at) cryptsoft.com)" 34 * The word 'cryptographic' can be left out if the rouines from the library 35 * being used are not cryptographic related :-). 36 * 4. If you include any Windows specific code (or a derivative thereof) from 37 * the apps directory (application code) you must include an acknowledgement: 38 * "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)" 39 * 40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * The licence and distribution terms for any publically available version or 53 * derivative of this code cannot be changed. i.e. this code cannot simply be 54 * copied and put under another distribution licence 55 * [including the GNU Public Licence.] 56 */ 57 /* ==================================================================== 58 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 59 * 60 * Redistribution and use in source and binary forms, with or without 61 * modification, are permitted provided that the following conditions 62 * are met: 63 * 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 67 * 2. Redistributions in binary form must reproduce the above copyright 68 * notice, this list of conditions and the following disclaimer in 69 * the documentation and/or other materials provided with the 70 * distribution. 71 * 72 * 3. All advertising materials mentioning features or use of this 73 * software must display the following acknowledgment: 74 * "This product includes software developed by the OpenSSL Project 75 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 76 * 77 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 78 * endorse or promote products derived from this software without 79 * prior written permission. For written permission, please contact 80 * openssl-core (at) openssl.org. 81 * 82 * 5. Products derived from this software may not be called "OpenSSL" 83 * nor may "OpenSSL" appear in their names without prior written 84 * permission of the OpenSSL Project. 85 * 86 * 6. Redistributions of any form whatsoever must retain the following 87 * acknowledgment: 88 * "This product includes software developed by the OpenSSL Project 89 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 92 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 94 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 95 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 96 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 97 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 98 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 100 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 101 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 102 * OF THE POSSIBILITY OF SUCH DAMAGE. 103 * ==================================================================== 104 * 105 * This product includes cryptographic software written by Eric Young 106 * (eay (at) cryptsoft.com). This product includes software written by Tim 107 * Hudson (tjh (at) cryptsoft.com). */ 108 109 #include <openssl/bn.h> 110 111 #include <assert.h> 112 #include <string.h> 113 114 #include <openssl/cpu.h> 115 #include <openssl/err.h> 116 #include <openssl/mem.h> 117 118 #include "internal.h" 119 120 121 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) 122 #define OPENSSL_BN_ASM_MONT5 123 #define RSAZ_ENABLED 124 125 #include "rsaz_exp.h" 126 127 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table, 128 const BN_ULONG *np, const BN_ULONG *n0, int num, 129 int power); 130 void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power); 131 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 132 void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table, 133 const BN_ULONG *np, const BN_ULONG *n0, int num, int power); 134 int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap, 135 const BN_ULONG *not_used, const BN_ULONG *np, 136 const BN_ULONG *n0, int num); 137 #endif 138 139 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { 140 int i, bits, ret = 0; 141 BIGNUM *v, *rr; 142 143 BN_CTX_start(ctx); 144 if (r == a || r == p) { 145 rr = BN_CTX_get(ctx); 146 } else { 147 rr = r; 148 } 149 150 v = BN_CTX_get(ctx); 151 if (rr == NULL || v == NULL) { 152 goto err; 153 } 154 155 if (BN_copy(v, a) == NULL) { 156 goto err; 157 } 158 bits = BN_num_bits(p); 159 160 if (BN_is_odd(p)) { 161 if (BN_copy(rr, a) == NULL) { 162 goto err; 163 } 164 } else { 165 if (!BN_one(rr)) { 166 goto err; 167 } 168 } 169 170 for (i = 1; i < bits; i++) { 171 if (!BN_sqr(v, v, ctx)) { 172 goto err; 173 } 174 if (BN_is_bit_set(p, i)) { 175 if (!BN_mul(rr, rr, v, ctx)) { 176 goto err; 177 } 178 } 179 } 180 181 if (r != rr && !BN_copy(r, rr)) { 182 goto err; 183 } 184 ret = 1; 185 186 err: 187 BN_CTX_end(ctx); 188 return ret; 189 } 190 191 typedef struct bn_recp_ctx_st { 192 BIGNUM N; // the divisor 193 BIGNUM Nr; // the reciprocal 194 int num_bits; 195 int shift; 196 int flags; 197 } BN_RECP_CTX; 198 199 static void BN_RECP_CTX_init(BN_RECP_CTX *recp) { 200 BN_init(&recp->N); 201 BN_init(&recp->Nr); 202 recp->num_bits = 0; 203 recp->shift = 0; 204 recp->flags = 0; 205 } 206 207 static void BN_RECP_CTX_free(BN_RECP_CTX *recp) { 208 if (recp == NULL) { 209 return; 210 } 211 212 BN_free(&recp->N); 213 BN_free(&recp->Nr); 214 } 215 216 static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) { 217 if (!BN_copy(&(recp->N), d)) { 218 return 0; 219 } 220 BN_zero(&recp->Nr); 221 recp->num_bits = BN_num_bits(d); 222 recp->shift = 0; 223 224 return 1; 225 } 226 227 // len is the expected size of the result We actually calculate with an extra 228 // word of precision, so we can do faster division if the remainder is not 229 // required. 230 // r := 2^len / m 231 static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) { 232 int ret = -1; 233 BIGNUM *t; 234 235 BN_CTX_start(ctx); 236 t = BN_CTX_get(ctx); 237 if (t == NULL) { 238 goto err; 239 } 240 241 if (!BN_set_bit(t, len)) { 242 goto err; 243 } 244 245 if (!BN_div(r, NULL, t, m, ctx)) { 246 goto err; 247 } 248 249 ret = len; 250 251 err: 252 BN_CTX_end(ctx); 253 return ret; 254 } 255 256 static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, 257 BN_RECP_CTX *recp, BN_CTX *ctx) { 258 int i, j, ret = 0; 259 BIGNUM *a, *b, *d, *r; 260 261 BN_CTX_start(ctx); 262 a = BN_CTX_get(ctx); 263 b = BN_CTX_get(ctx); 264 if (dv != NULL) { 265 d = dv; 266 } else { 267 d = BN_CTX_get(ctx); 268 } 269 270 if (rem != NULL) { 271 r = rem; 272 } else { 273 r = BN_CTX_get(ctx); 274 } 275 276 if (a == NULL || b == NULL || d == NULL || r == NULL) { 277 goto err; 278 } 279 280 if (BN_ucmp(m, &recp->N) < 0) { 281 BN_zero(d); 282 if (!BN_copy(r, m)) { 283 goto err; 284 } 285 BN_CTX_end(ctx); 286 return 1; 287 } 288 289 // We want the remainder 290 // Given input of ABCDEF / ab 291 // we need multiply ABCDEF by 3 digests of the reciprocal of ab 292 293 // i := max(BN_num_bits(m), 2*BN_num_bits(N)) 294 i = BN_num_bits(m); 295 j = recp->num_bits << 1; 296 if (j > i) { 297 i = j; 298 } 299 300 // Nr := round(2^i / N) 301 if (i != recp->shift) { 302 recp->shift = 303 BN_reciprocal(&(recp->Nr), &(recp->N), i, 304 ctx); // BN_reciprocal returns i, or -1 for an error 305 } 306 307 if (recp->shift == -1) { 308 goto err; 309 } 310 311 // d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - 312 // BN_num_bits(N)))| 313 // = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - 314 // BN_num_bits(N)))| 315 // <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)| 316 // = |m/N| 317 if (!BN_rshift(a, m, recp->num_bits)) { 318 goto err; 319 } 320 if (!BN_mul(b, a, &(recp->Nr), ctx)) { 321 goto err; 322 } 323 if (!BN_rshift(d, b, i - recp->num_bits)) { 324 goto err; 325 } 326 d->neg = 0; 327 328 if (!BN_mul(b, &(recp->N), d, ctx)) { 329 goto err; 330 } 331 if (!BN_usub(r, m, b)) { 332 goto err; 333 } 334 r->neg = 0; 335 336 j = 0; 337 while (BN_ucmp(r, &(recp->N)) >= 0) { 338 if (j++ > 2) { 339 OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL); 340 goto err; 341 } 342 if (!BN_usub(r, r, &(recp->N))) { 343 goto err; 344 } 345 if (!BN_add_word(d, 1)) { 346 goto err; 347 } 348 } 349 350 r->neg = BN_is_zero(r) ? 0 : m->neg; 351 d->neg = m->neg ^ recp->N.neg; 352 ret = 1; 353 354 err: 355 BN_CTX_end(ctx); 356 return ret; 357 } 358 359 static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, 360 BN_RECP_CTX *recp, BN_CTX *ctx) { 361 int ret = 0; 362 BIGNUM *a; 363 const BIGNUM *ca; 364 365 BN_CTX_start(ctx); 366 a = BN_CTX_get(ctx); 367 if (a == NULL) { 368 goto err; 369 } 370 371 if (y != NULL) { 372 if (x == y) { 373 if (!BN_sqr(a, x, ctx)) { 374 goto err; 375 } 376 } else { 377 if (!BN_mul(a, x, y, ctx)) { 378 goto err; 379 } 380 } 381 ca = a; 382 } else { 383 ca = x; // Just do the mod 384 } 385 386 ret = BN_div_recp(NULL, r, ca, recp, ctx); 387 388 err: 389 BN_CTX_end(ctx); 390 return ret; 391 } 392 393 // BN_window_bits_for_exponent_size returns sliding window size for mod_exp with 394 // a |b| bit exponent. 395 // 396 // For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of 397 // multiplications is a constant plus on average 398 // 399 // 2^(w-1) + (b-w)/(w+1); 400 // 401 // here 2^(w-1) is for precomputing the table (we actually need entries only 402 // for windows that have the lowest bit set), and (b-w)/(w+1) is an 403 // approximation for the expected number of w-bit windows, not counting the 404 // first one. 405 // 406 // Thus we should use 407 // 408 // w >= 6 if b > 671 409 // w = 5 if 671 > b > 239 410 // w = 4 if 239 > b > 79 411 // w = 3 if 79 > b > 23 412 // w <= 2 if 23 > b 413 // 414 // (with draws in between). Very small exponents are often selected 415 // with low Hamming weight, so we use w = 1 for b <= 23. 416 static int BN_window_bits_for_exponent_size(int b) { 417 if (b > 671) { 418 return 6; 419 } 420 if (b > 239) { 421 return 5; 422 } 423 if (b > 79) { 424 return 4; 425 } 426 if (b > 23) { 427 return 3; 428 } 429 return 1; 430 } 431 432 // TABLE_SIZE is the maximum precomputation table size for *variable* sliding 433 // windows. This must be 2^(max_window - 1), where max_window is the largest 434 // value returned from |BN_window_bits_for_exponent_size|. 435 #define TABLE_SIZE 32 436 437 // TABLE_BITS_SMALL is the smallest value returned from 438 // |BN_window_bits_for_exponent_size| when |b| is at most |BN_BITS2| * 439 // |BN_SMALL_MAX_WORDS| words. 440 #define TABLE_BITS_SMALL 5 441 442 // TABLE_SIZE_SMALL is the same as |TABLE_SIZE|, but when |b| is at most 443 // |BN_BITS2| * |BN_SMALL_MAX_WORDS|. 444 #define TABLE_SIZE_SMALL (1 << (TABLE_BITS_SMALL - 1)) 445 446 static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 447 const BIGNUM *m, BN_CTX *ctx) { 448 int i, j, bits, ret = 0, wstart, window; 449 int start = 1; 450 BIGNUM *aa; 451 // Table of variables obtained from 'ctx' 452 BIGNUM *val[TABLE_SIZE]; 453 BN_RECP_CTX recp; 454 455 bits = BN_num_bits(p); 456 457 if (bits == 0) { 458 // x**0 mod 1 is still zero. 459 if (BN_is_one(m)) { 460 BN_zero(r); 461 return 1; 462 } 463 return BN_one(r); 464 } 465 466 BN_CTX_start(ctx); 467 aa = BN_CTX_get(ctx); 468 val[0] = BN_CTX_get(ctx); 469 if (!aa || !val[0]) { 470 goto err; 471 } 472 473 BN_RECP_CTX_init(&recp); 474 if (m->neg) { 475 // ignore sign of 'm' 476 if (!BN_copy(aa, m)) { 477 goto err; 478 } 479 aa->neg = 0; 480 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) { 481 goto err; 482 } 483 } else { 484 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) { 485 goto err; 486 } 487 } 488 489 if (!BN_nnmod(val[0], a, m, ctx)) { 490 goto err; // 1 491 } 492 if (BN_is_zero(val[0])) { 493 BN_zero(r); 494 ret = 1; 495 goto err; 496 } 497 498 window = BN_window_bits_for_exponent_size(bits); 499 if (window > 1) { 500 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) { 501 goto err; // 2 502 } 503 j = 1 << (window - 1); 504 for (i = 1; i < j; i++) { 505 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 506 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) { 507 goto err; 508 } 509 } 510 } 511 512 start = 1; // This is used to avoid multiplication etc 513 // when there is only the value '1' in the 514 // buffer. 515 wstart = bits - 1; // The top bit of the window 516 517 if (!BN_one(r)) { 518 goto err; 519 } 520 521 for (;;) { 522 int wvalue; // The 'value' of the window 523 int wend; // The bottom bit of the window 524 525 if (!BN_is_bit_set(p, wstart)) { 526 if (!start) { 527 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { 528 goto err; 529 } 530 } 531 if (wstart == 0) { 532 break; 533 } 534 wstart--; 535 continue; 536 } 537 538 // We now have wstart on a 'set' bit, we now need to work out 539 // how bit a window to do. To do this we need to scan 540 // forward until the last set bit before the end of the 541 // window 542 wvalue = 1; 543 wend = 0; 544 for (i = 1; i < window; i++) { 545 if (wstart - i < 0) { 546 break; 547 } 548 if (BN_is_bit_set(p, wstart - i)) { 549 wvalue <<= (i - wend); 550 wvalue |= 1; 551 wend = i; 552 } 553 } 554 555 // wend is the size of the current window 556 j = wend + 1; 557 // add the 'bytes above' 558 if (!start) { 559 for (i = 0; i < j; i++) { 560 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { 561 goto err; 562 } 563 } 564 } 565 566 // wvalue will be an odd number < 2^window 567 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) { 568 goto err; 569 } 570 571 // move the 'window' down further 572 wstart -= wend + 1; 573 start = 0; 574 if (wstart < 0) { 575 break; 576 } 577 } 578 ret = 1; 579 580 err: 581 BN_CTX_end(ctx); 582 BN_RECP_CTX_free(&recp); 583 return ret; 584 } 585 586 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 587 BN_CTX *ctx) { 588 if (BN_is_odd(m)) { 589 return BN_mod_exp_mont(r, a, p, m, ctx, NULL); 590 } 591 592 return mod_exp_recp(r, a, p, m, ctx); 593 } 594 595 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 596 const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) { 597 if (!BN_is_odd(m)) { 598 OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); 599 return 0; 600 } 601 int bits = BN_num_bits(p); 602 if (bits == 0) { 603 // x**0 mod 1 is still zero. 604 if (BN_is_one(m)) { 605 BN_zero(rr); 606 return 1; 607 } 608 return BN_one(rr); 609 } 610 611 int ret = 0; 612 BIGNUM *val[TABLE_SIZE]; 613 BN_MONT_CTX *new_mont = NULL; 614 615 BN_CTX_start(ctx); 616 BIGNUM *d = BN_CTX_get(ctx); 617 BIGNUM *r = BN_CTX_get(ctx); 618 val[0] = BN_CTX_get(ctx); 619 if (!d || !r || !val[0]) { 620 goto err; 621 } 622 623 // Allocate a montgomery context if it was not supplied by the caller. 624 if (mont == NULL) { 625 new_mont = BN_MONT_CTX_new_for_modulus(m, ctx); 626 if (new_mont == NULL) { 627 goto err; 628 } 629 mont = new_mont; 630 } 631 632 const BIGNUM *aa; 633 if (a->neg || BN_ucmp(a, m) >= 0) { 634 if (!BN_nnmod(val[0], a, m, ctx)) { 635 goto err; 636 } 637 aa = val[0]; 638 } else { 639 aa = a; 640 } 641 642 if (BN_is_zero(aa)) { 643 BN_zero(rr); 644 ret = 1; 645 goto err; 646 } 647 648 // We exponentiate by looking at sliding windows of the exponent and 649 // precomputing powers of |aa|. Windows may be shifted so they always end on a 650 // set bit, so only precompute odd powers. We compute val[i] = aa^(2*i + 1) 651 // for i = 0 to 2^(window-1), all in Montgomery form. 652 int window = BN_window_bits_for_exponent_size(bits); 653 if (!BN_to_montgomery(val[0], aa, mont, ctx)) { 654 goto err; 655 } 656 if (window > 1) { 657 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) { 658 goto err; 659 } 660 for (int i = 1; i < 1 << (window - 1); i++) { 661 val[i] = BN_CTX_get(ctx); 662 if (val[i] == NULL || 663 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) { 664 goto err; 665 } 666 } 667 } 668 669 if (!bn_one_to_montgomery(r, mont, ctx)) { 670 goto err; 671 } 672 673 int r_is_one = 1; 674 int wstart = bits - 1; // The top bit of the window. 675 for (;;) { 676 if (!BN_is_bit_set(p, wstart)) { 677 if (!r_is_one && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 678 goto err; 679 } 680 if (wstart == 0) { 681 break; 682 } 683 wstart--; 684 continue; 685 } 686 687 // We now have wstart on a set bit. Find the largest window we can use. 688 int wvalue = 1; 689 int wsize = 0; 690 for (int i = 1; i < window && i <= wstart; i++) { 691 if (BN_is_bit_set(p, wstart - i)) { 692 wvalue <<= (i - wsize); 693 wvalue |= 1; 694 wsize = i; 695 } 696 } 697 698 // Shift |r| to the end of the window. 699 if (!r_is_one) { 700 for (int i = 0; i < wsize + 1; i++) { 701 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 702 goto err; 703 } 704 } 705 } 706 707 assert(wvalue & 1); 708 assert(wvalue < (1 << window)); 709 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) { 710 goto err; 711 } 712 713 r_is_one = 0; 714 if (wstart == wsize) { 715 break; 716 } 717 wstart -= wsize + 1; 718 } 719 720 if (!BN_from_montgomery(rr, r, mont, ctx)) { 721 goto err; 722 } 723 ret = 1; 724 725 err: 726 BN_MONT_CTX_free(new_mont); 727 BN_CTX_end(ctx); 728 return ret; 729 } 730 731 int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a, 732 size_t num_a, const BN_ULONG *p, size_t num_p, 733 const BN_MONT_CTX *mont) { 734 size_t num_n = mont->N.top; 735 if (num_n != num_a || num_n != num_r || num_n > BN_SMALL_MAX_WORDS) { 736 OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 737 return 0; 738 } 739 if (!BN_is_odd(&mont->N)) { 740 OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); 741 return 0; 742 } 743 unsigned bits = 0; 744 if (num_p != 0) { 745 bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2; 746 } 747 if (bits == 0) { 748 OPENSSL_memset(r, 0, num_r * sizeof(BN_ULONG)); 749 if (!BN_is_one(&mont->N)) { 750 r[0] = 1; 751 } 752 return 1; 753 } 754 755 // We exponentiate by looking at sliding windows of the exponent and 756 // precomputing powers of |a|. Windows may be shifted so they always end on a 757 // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1) for 758 // i = 0 to 2^(window-1), all in Montgomery form. 759 unsigned window = BN_window_bits_for_exponent_size(bits); 760 if (window > TABLE_BITS_SMALL) { 761 window = TABLE_BITS_SMALL; // Tolerate excessively large |p|. 762 } 763 int ret = 0; 764 BN_ULONG val[TABLE_SIZE_SMALL][BN_SMALL_MAX_WORDS]; 765 OPENSSL_memcpy(val[0], a, num_n * sizeof(BN_ULONG)); 766 if (window > 1) { 767 BN_ULONG d[BN_SMALL_MAX_WORDS]; 768 if (!bn_mod_mul_montgomery_small(d, num_n, val[0], num_n, val[0], num_n, 769 mont)) { 770 goto err; 771 } 772 for (unsigned i = 1; i < 1u << (window - 1); i++) { 773 if (!bn_mod_mul_montgomery_small(val[i], num_n, val[i - 1], num_n, d, 774 num_n, mont)) { 775 goto err; 776 } 777 } 778 } 779 780 if (!bn_one_to_montgomery_small(r, num_r, mont)) { 781 goto err; 782 } 783 784 int r_is_one = 1; 785 unsigned wstart = bits - 1; // The top bit of the window. 786 for (;;) { 787 if (!bn_is_bit_set_words(p, num_p, wstart)) { 788 if (!r_is_one && 789 !bn_mod_mul_montgomery_small(r, num_r, r, num_r, r, num_r, mont)) { 790 goto err; 791 } 792 if (wstart == 0) { 793 break; 794 } 795 wstart--; 796 continue; 797 } 798 799 // We now have wstart on a set bit. Find the largest window we can use. 800 unsigned wvalue = 1; 801 unsigned wsize = 0; 802 for (unsigned i = 1; i < window && i <= wstart; i++) { 803 if (bn_is_bit_set_words(p, num_p, wstart - i)) { 804 wvalue <<= (i - wsize); 805 wvalue |= 1; 806 wsize = i; 807 } 808 } 809 810 // Shift |r| to the end of the window. 811 if (!r_is_one) { 812 for (unsigned i = 0; i < wsize + 1; i++) { 813 if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r, r, num_r, mont)) { 814 goto err; 815 } 816 } 817 } 818 819 assert(wvalue & 1); 820 assert(wvalue < (1u << window)); 821 if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r, val[wvalue >> 1], 822 num_n, mont)) { 823 goto err; 824 } 825 826 r_is_one = 0; 827 if (wstart == wsize) { 828 break; 829 } 830 wstart -= wsize + 1; 831 } 832 833 ret = 1; 834 835 err: 836 OPENSSL_cleanse(val, sizeof(val)); 837 return ret; 838 } 839 840 int bn_mod_inverse_prime_mont_small(BN_ULONG *r, size_t num_r, 841 const BN_ULONG *a, size_t num_a, 842 const BN_MONT_CTX *mont) { 843 const BN_ULONG *p = mont->N.d; 844 size_t num_p = mont->N.top; 845 if (num_p > BN_SMALL_MAX_WORDS || num_p == 0) { 846 OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 847 return 0; 848 } 849 850 // Per Fermat's Little Theorem, a^-1 = a^(p-2) (mod p) for p prime. 851 BN_ULONG p_minus_two[BN_SMALL_MAX_WORDS]; 852 OPENSSL_memcpy(p_minus_two, p, num_p * sizeof(BN_ULONG)); 853 if (p_minus_two[0] >= 2) { 854 p_minus_two[0] -= 2; 855 } else { 856 p_minus_two[0] -= 2; 857 for (size_t i = 1; i < num_p; i++) { 858 if (p_minus_two[i]-- != 0) { 859 break; 860 } 861 } 862 } 863 864 return bn_mod_exp_mont_small(r, num_r, a, num_a, p_minus_two, num_p, mont); 865 } 866 867 868 // |BN_mod_exp_mont_consttime| stores the precomputed powers in a specific 869 // layout so that accessing any of these table values shows the same access 870 // pattern as far as cache lines are concerned. The following functions are 871 // used to transfer a BIGNUM from/to that table. 872 873 static void copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, 874 int idx, int window) { 875 int i, j; 876 const int width = 1 << window; 877 BN_ULONG *table = (BN_ULONG *) buf; 878 879 if (top > b->top) { 880 top = b->top; // this works because 'buf' is explicitly zeroed 881 } 882 883 for (i = 0, j = idx; i < top; i++, j += width) { 884 table[j] = b->d[i]; 885 } 886 } 887 888 static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx, 889 int window) { 890 int i, j; 891 const int width = 1 << window; 892 volatile BN_ULONG *table = (volatile BN_ULONG *)buf; 893 894 if (!bn_wexpand(b, top)) { 895 return 0; 896 } 897 898 if (window <= 3) { 899 for (i = 0; i < top; i++, table += width) { 900 BN_ULONG acc = 0; 901 902 for (j = 0; j < width; j++) { 903 acc |= table[j] & ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1)); 904 } 905 906 b->d[i] = acc; 907 } 908 } else { 909 int xstride = 1 << (window - 2); 910 BN_ULONG y0, y1, y2, y3; 911 912 i = idx >> (window - 2); // equivalent of idx / xstride 913 idx &= xstride - 1; // equivalent of idx % xstride 914 915 y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1); 916 y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1); 917 y2 = (BN_ULONG)0 - (constant_time_eq_int(i, 2) & 1); 918 y3 = (BN_ULONG)0 - (constant_time_eq_int(i, 3) & 1); 919 920 for (i = 0; i < top; i++, table += width) { 921 BN_ULONG acc = 0; 922 923 for (j = 0; j < xstride; j++) { 924 acc |= ((table[j + 0 * xstride] & y0) | (table[j + 1 * xstride] & y1) | 925 (table[j + 2 * xstride] & y2) | (table[j + 3 * xstride] & y3)) & 926 ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1)); 927 } 928 929 b->d[i] = acc; 930 } 931 } 932 933 b->top = top; 934 bn_correct_top(b); 935 return 1; 936 } 937 938 // BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache 939 // line width of the target processor is at least the following value. 940 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64) 941 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \ 942 (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) 943 944 // Window sizes optimized for fixed window size modular exponentiation 945 // algorithm (BN_mod_exp_mont_consttime). 946 // 947 // To achieve the security goals of BN_mode_exp_mont_consttime, the maximum 948 // size of the window must not exceed 949 // log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). 950 // 951 // Window size thresholds are defined for cache line sizes of 32 and 64, cache 952 // line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of 953 // 7 should only be used on processors that have a 128 byte or greater cache 954 // line size. 955 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 956 957 #define BN_window_bits_for_ctime_exponent_size(b) \ 958 ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) 959 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) 960 961 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 962 963 #define BN_window_bits_for_ctime_exponent_size(b) \ 964 ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) 965 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) 966 967 #endif 968 969 // Given a pointer value, compute the next address that is a cache line 970 // multiple. 971 #define MOD_EXP_CTIME_ALIGN(x_) \ 972 ((unsigned char *)(x_) + \ 973 (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \ 974 (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 975 976 // This variant of BN_mod_exp_mont() uses fixed windows and the special 977 // precomputation memory layout to limit data-dependency to a minimum 978 // to protect secret exponents (cf. the hyper-threading timing attacks 979 // pointed out by Colin Percival, 980 // http://www.daemonology.net/hyperthreading-considered-harmful/) 981 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 982 const BIGNUM *m, BN_CTX *ctx, 983 const BN_MONT_CTX *mont) { 984 int i, ret = 0, window, wvalue; 985 int top; 986 BN_MONT_CTX *new_mont = NULL; 987 988 int numPowers; 989 unsigned char *powerbufFree = NULL; 990 int powerbufLen = 0; 991 unsigned char *powerbuf = NULL; 992 BIGNUM tmp, am; 993 BIGNUM *new_a = NULL; 994 995 if (!BN_is_odd(m)) { 996 OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); 997 return 0; 998 } 999 1000 top = m->top; 1001 1002 // Use all bits stored in |p|, rather than |BN_num_bits|, so we do not leak 1003 // whether the top bits are zero. 1004 int max_bits = p->top * BN_BITS2; 1005 int bits = max_bits; 1006 if (bits == 0) { 1007 // x**0 mod 1 is still zero. 1008 if (BN_is_one(m)) { 1009 BN_zero(rr); 1010 return 1; 1011 } 1012 return BN_one(rr); 1013 } 1014 1015 // Allocate a montgomery context if it was not supplied by the caller. 1016 if (mont == NULL) { 1017 new_mont = BN_MONT_CTX_new_for_modulus(m, ctx); 1018 if (new_mont == NULL) { 1019 goto err; 1020 } 1021 mont = new_mont; 1022 } 1023 1024 if (a->neg || BN_ucmp(a, m) >= 0) { 1025 new_a = BN_new(); 1026 if (new_a == NULL || 1027 !BN_nnmod(new_a, a, m, ctx)) { 1028 goto err; 1029 } 1030 a = new_a; 1031 } 1032 1033 #ifdef RSAZ_ENABLED 1034 // If the size of the operands allow it, perform the optimized 1035 // RSAZ exponentiation. For further information see 1036 // crypto/bn/rsaz_exp.c and accompanying assembly modules. 1037 if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) && 1038 rsaz_avx2_eligible()) { 1039 if (!bn_wexpand(rr, 16)) { 1040 goto err; 1041 } 1042 RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]); 1043 rr->top = 16; 1044 rr->neg = 0; 1045 bn_correct_top(rr); 1046 ret = 1; 1047 goto err; 1048 } 1049 #endif 1050 1051 // Get the window size to use with size of p. 1052 window = BN_window_bits_for_ctime_exponent_size(bits); 1053 #if defined(OPENSSL_BN_ASM_MONT5) 1054 if (window >= 5) { 1055 window = 5; // ~5% improvement for RSA2048 sign, and even for RSA4096 1056 // reserve space for mont->N.d[] copy 1057 powerbufLen += top * sizeof(mont->N.d[0]); 1058 } 1059 #endif 1060 1061 // Allocate a buffer large enough to hold all of the pre-computed 1062 // powers of am, am itself and tmp. 1063 numPowers = 1 << window; 1064 powerbufLen += 1065 sizeof(m->d[0]) * 1066 (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers)); 1067 #ifdef alloca 1068 if (powerbufLen < 3072) { 1069 powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 1070 } else 1071 #endif 1072 { 1073 if ((powerbufFree = OPENSSL_malloc( 1074 powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) { 1075 goto err; 1076 } 1077 } 1078 1079 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 1080 OPENSSL_memset(powerbuf, 0, powerbufLen); 1081 1082 #ifdef alloca 1083 if (powerbufLen < 3072) { 1084 powerbufFree = NULL; 1085 } 1086 #endif 1087 1088 // lay down tmp and am right after powers table 1089 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 1090 am.d = tmp.d + top; 1091 tmp.top = am.top = 0; 1092 tmp.dmax = am.dmax = top; 1093 tmp.neg = am.neg = 0; 1094 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 1095 1096 if (!bn_one_to_montgomery(&tmp, mont, ctx)) { 1097 goto err; 1098 } 1099 1100 // prepare a^1 in Montgomery domain 1101 assert(!a->neg); 1102 assert(BN_ucmp(a, m) < 0); 1103 if (!BN_to_montgomery(&am, a, mont, ctx)) { 1104 goto err; 1105 } 1106 1107 #if defined(OPENSSL_BN_ASM_MONT5) 1108 // This optimization uses ideas from http://eprint.iacr.org/2011/239, 1109 // specifically optimization of cache-timing attack countermeasures 1110 // and pre-computation optimization. 1111 1112 // Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 1113 // 512-bit RSA is hardly relevant, we omit it to spare size... 1114 if (window == 5 && top > 1) { 1115 const BN_ULONG *n0 = mont->n0; 1116 BN_ULONG *np; 1117 1118 // BN_to_montgomery can contaminate words above .top 1119 // [in BN_DEBUG[_DEBUG] build]... 1120 for (i = am.top; i < top; i++) { 1121 am.d[i] = 0; 1122 } 1123 for (i = tmp.top; i < top; i++) { 1124 tmp.d[i] = 0; 1125 } 1126 1127 // copy mont->N.d[] to improve cache locality 1128 for (np = am.d + top, i = 0; i < top; i++) { 1129 np[i] = mont->N.d[i]; 1130 } 1131 1132 bn_scatter5(tmp.d, top, powerbuf, 0); 1133 bn_scatter5(am.d, am.top, powerbuf, 1); 1134 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 1135 bn_scatter5(tmp.d, top, powerbuf, 2); 1136 1137 // same as above, but uses squaring for 1/2 of operations 1138 for (i = 4; i < 32; i *= 2) { 1139 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1140 bn_scatter5(tmp.d, top, powerbuf, i); 1141 } 1142 for (i = 3; i < 8; i += 2) { 1143 int j; 1144 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 1145 bn_scatter5(tmp.d, top, powerbuf, i); 1146 for (j = 2 * i; j < 32; j *= 2) { 1147 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1148 bn_scatter5(tmp.d, top, powerbuf, j); 1149 } 1150 } 1151 for (; i < 16; i += 2) { 1152 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 1153 bn_scatter5(tmp.d, top, powerbuf, i); 1154 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1155 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 1156 } 1157 for (; i < 32; i += 2) { 1158 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1); 1159 bn_scatter5(tmp.d, top, powerbuf, i); 1160 } 1161 1162 bits--; 1163 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) { 1164 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1165 } 1166 bn_gather5(tmp.d, top, powerbuf, wvalue); 1167 1168 // At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit 1169 // that has not been read yet.) 1170 assert(bits >= -1 && (bits == -1 || bits % 5 == 4)); 1171 1172 // Scan the exponent one window at a time starting from the most 1173 // significant bits. 1174 if (top & 7) { 1175 while (bits >= 0) { 1176 for (wvalue = 0, i = 0; i < 5; i++, bits--) { 1177 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1178 } 1179 1180 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1181 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1182 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1183 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1184 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1185 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 1186 } 1187 } else { 1188 const uint8_t *p_bytes = (const uint8_t *)p->d; 1189 assert(bits < max_bits); 1190 // |p = 0| has been handled as a special case, so |max_bits| is at least 1191 // one word. 1192 assert(max_bits >= 64); 1193 1194 // If the first bit to be read lands in the last byte, unroll the first 1195 // iteration to avoid reading past the bounds of |p->d|. (After the first 1196 // iteration, we are guaranteed to be past the last byte.) Note |bits| 1197 // here is the top bit, inclusive. 1198 if (bits - 4 >= max_bits - 8) { 1199 // Read five bits from |bits-4| through |bits|, inclusive. 1200 wvalue = p_bytes[p->top * BN_BYTES - 1]; 1201 wvalue >>= (bits - 4) & 7; 1202 wvalue &= 0x1f; 1203 bits -= 5; 1204 bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 1205 } 1206 while (bits >= 0) { 1207 // Read five bits from |bits-4| through |bits|, inclusive. 1208 int first_bit = bits - 4; 1209 uint16_t val; 1210 OPENSSL_memcpy(&val, p_bytes + (first_bit >> 3), sizeof(val)); 1211 val >>= first_bit & 7; 1212 val &= 0x1f; 1213 bits -= 5; 1214 bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, val); 1215 } 1216 } 1217 1218 ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top); 1219 tmp.top = top; 1220 bn_correct_top(&tmp); 1221 if (ret) { 1222 if (!BN_copy(rr, &tmp)) { 1223 ret = 0; 1224 } 1225 goto err; // non-zero ret means it's not error 1226 } 1227 } else 1228 #endif 1229 { 1230 copy_to_prebuf(&tmp, top, powerbuf, 0, window); 1231 copy_to_prebuf(&am, top, powerbuf, 1, window); 1232 1233 // If the window size is greater than 1, then calculate 1234 // val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 1235 // (even powers could instead be computed as (a^(i/2))^2 1236 // to use the slight performance advantage of sqr over mul). 1237 if (window > 1) { 1238 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) { 1239 goto err; 1240 } 1241 1242 copy_to_prebuf(&tmp, top, powerbuf, 2, window); 1243 1244 for (i = 3; i < numPowers; i++) { 1245 // Calculate a^i = a^(i-1) * a 1246 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) { 1247 goto err; 1248 } 1249 1250 copy_to_prebuf(&tmp, top, powerbuf, i, window); 1251 } 1252 } 1253 1254 bits--; 1255 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) { 1256 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1257 } 1258 if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) { 1259 goto err; 1260 } 1261 1262 // Scan the exponent one window at a time starting from the most 1263 // significant bits. 1264 while (bits >= 0) { 1265 wvalue = 0; // The 'value' of the window 1266 1267 // Scan the window, squaring the result as we go 1268 for (i = 0; i < window; i++, bits--) { 1269 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) { 1270 goto err; 1271 } 1272 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1273 } 1274 1275 // Fetch the appropriate pre-computed value from the pre-buf 1276 if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) { 1277 goto err; 1278 } 1279 1280 // Multiply the result into the intermediate result 1281 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) { 1282 goto err; 1283 } 1284 } 1285 } 1286 1287 // Convert the final result from montgomery to standard format 1288 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) { 1289 goto err; 1290 } 1291 ret = 1; 1292 1293 err: 1294 BN_MONT_CTX_free(new_mont); 1295 BN_clear_free(new_a); 1296 OPENSSL_free(powerbufFree); 1297 return (ret); 1298 } 1299 1300 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 1301 const BIGNUM *m, BN_CTX *ctx, 1302 const BN_MONT_CTX *mont) { 1303 BIGNUM a_bignum; 1304 BN_init(&a_bignum); 1305 1306 int ret = 0; 1307 1308 if (!BN_set_word(&a_bignum, a)) { 1309 OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR); 1310 goto err; 1311 } 1312 1313 ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont); 1314 1315 err: 1316 BN_free(&a_bignum); 1317 1318 return ret; 1319 } 1320 1321 #define TABLE_SIZE 32 1322 1323 int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, 1324 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, 1325 BN_CTX *ctx, const BN_MONT_CTX *mont) { 1326 BIGNUM tmp; 1327 BN_init(&tmp); 1328 1329 int ret = 0; 1330 BN_MONT_CTX *new_mont = NULL; 1331 1332 // Allocate a montgomery context if it was not supplied by the caller. 1333 if (mont == NULL) { 1334 new_mont = BN_MONT_CTX_new_for_modulus(m, ctx); 1335 if (new_mont == NULL) { 1336 goto err; 1337 } 1338 mont = new_mont; 1339 } 1340 1341 // BN_mod_mul_montgomery removes one Montgomery factor, so passing one 1342 // Montgomery-encoded and one non-Montgomery-encoded value gives a 1343 // non-Montgomery-encoded result. 1344 if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) || 1345 !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) || 1346 !BN_to_montgomery(rr, rr, mont, ctx) || 1347 !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) { 1348 goto err; 1349 } 1350 1351 ret = 1; 1352 1353 err: 1354 BN_MONT_CTX_free(new_mont); 1355 BN_free(&tmp); 1356 1357 return ret; 1358 } 1359