1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com) 2 * All rights reserved. 3 * 4 * This package is an SSL implementation written 5 * by Eric Young (eay (at) cryptsoft.com). 6 * The implementation was written so as to conform with Netscapes SSL. 7 * 8 * This library is free for commercial and non-commercial use as long as 9 * the following conditions are aheared to. The following conditions 10 * apply to all code found in this distribution, be it the RC4, RSA, 11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 12 * included with this distribution is covered by the same copyright terms 13 * except that the holder is Tim Hudson (tjh (at) cryptsoft.com). 14 * 15 * Copyright remains Eric Young's, and as such any Copyright notices in 16 * the code are not to be removed. 17 * If this package is used in a product, Eric Young should be given attribution 18 * as the author of the parts of the library used. 19 * This can be in the form of a textual message at program startup or 20 * in documentation (online or textual) provided with the package. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * "This product includes cryptographic software written by 33 * Eric Young (eay (at) cryptsoft.com)" 34 * The word 'cryptographic' can be left out if the rouines from the library 35 * being used are not cryptographic related :-). 36 * 4. If you include any Windows specific code (or a derivative thereof) from 37 * the apps directory (application code) you must include an acknowledgement: 38 * "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)" 39 * 40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * The licence and distribution terms for any publically available version or 53 * derivative of this code cannot be changed. i.e. this code cannot simply be 54 * copied and put under another distribution licence 55 * [including the GNU Public Licence.] 56 */ 57 /* ==================================================================== 58 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 59 * 60 * Redistribution and use in source and binary forms, with or without 61 * modification, are permitted provided that the following conditions 62 * are met: 63 * 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 67 * 2. Redistributions in binary form must reproduce the above copyright 68 * notice, this list of conditions and the following disclaimer in 69 * the documentation and/or other materials provided with the 70 * distribution. 71 * 72 * 3. All advertising materials mentioning features or use of this 73 * software must display the following acknowledgment: 74 * "This product includes software developed by the OpenSSL Project 75 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 76 * 77 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 78 * endorse or promote products derived from this software without 79 * prior written permission. For written permission, please contact 80 * openssl-core (at) openssl.org. 81 * 82 * 5. Products derived from this software may not be called "OpenSSL" 83 * nor may "OpenSSL" appear in their names without prior written 84 * permission of the OpenSSL Project. 85 * 86 * 6. Redistributions of any form whatsoever must retain the following 87 * acknowledgment: 88 * "This product includes software developed by the OpenSSL Project 89 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 92 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 94 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 95 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 96 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 97 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 98 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 100 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 101 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 102 * OF THE POSSIBILITY OF SUCH DAMAGE. 103 * ==================================================================== 104 * 105 * This product includes cryptographic software written by Eric Young 106 * (eay (at) cryptsoft.com). This product includes software written by Tim 107 * Hudson (tjh (at) cryptsoft.com). */ 108 109 #include <openssl/bn.h> 110 111 #include <assert.h> 112 #include <string.h> 113 114 #include <openssl/cpu.h> 115 #include <openssl/err.h> 116 #include <openssl/mem.h> 117 118 #include "internal.h" 119 120 121 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) 122 #define OPENSSL_BN_ASM_MONT5 123 #define RSAZ_ENABLED 124 125 #include "rsaz_exp.h" 126 #endif 127 128 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { 129 int i, bits, ret = 0; 130 BIGNUM *v, *rr; 131 132 if ((p->flags & BN_FLG_CONSTTIME) != 0) { 133 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 134 OPENSSL_PUT_ERROR(BN, BN_exp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 135 return 0; 136 } 137 138 BN_CTX_start(ctx); 139 if (r == a || r == p) { 140 rr = BN_CTX_get(ctx); 141 } else { 142 rr = r; 143 } 144 145 v = BN_CTX_get(ctx); 146 if (rr == NULL || v == NULL) { 147 goto err; 148 } 149 150 if (BN_copy(v, a) == NULL) { 151 goto err; 152 } 153 bits = BN_num_bits(p); 154 155 if (BN_is_odd(p)) { 156 if (BN_copy(rr, a) == NULL) { 157 goto err; 158 } 159 } else { 160 if (!BN_one(rr)) { 161 goto err; 162 } 163 } 164 165 for (i = 1; i < bits; i++) { 166 if (!BN_sqr(v, v, ctx)) { 167 goto err; 168 } 169 if (BN_is_bit_set(p, i)) { 170 if (!BN_mul(rr, rr, v, ctx)) { 171 goto err; 172 } 173 } 174 } 175 176 if (r != rr) { 177 BN_copy(r, rr); 178 } 179 ret = 1; 180 181 err: 182 BN_CTX_end(ctx); 183 return ret; 184 } 185 186 /* maximum precomputation table size for *variable* sliding windows */ 187 #define TABLE_SIZE 32 188 189 typedef struct bn_recp_ctx_st { 190 BIGNUM N; /* the divisor */ 191 BIGNUM Nr; /* the reciprocal */ 192 int num_bits; 193 int shift; 194 int flags; 195 } BN_RECP_CTX; 196 197 static void BN_RECP_CTX_init(BN_RECP_CTX *recp) { 198 BN_init(&recp->N); 199 BN_init(&recp->Nr); 200 recp->num_bits = 0; 201 recp->flags = 0; 202 } 203 204 static void BN_RECP_CTX_free(BN_RECP_CTX *recp) { 205 if (recp == NULL) { 206 return; 207 } 208 209 BN_free(&recp->N); 210 BN_free(&recp->Nr); 211 } 212 213 static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) { 214 if (!BN_copy(&(recp->N), d)) { 215 return 0; 216 } 217 BN_zero(&recp->Nr); 218 recp->num_bits = BN_num_bits(d); 219 recp->shift = 0; 220 221 return 1; 222 } 223 224 /* len is the expected size of the result We actually calculate with an extra 225 * word of precision, so we can do faster division if the remainder is not 226 * required. 227 * r := 2^len / m */ 228 static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) { 229 int ret = -1; 230 BIGNUM *t; 231 232 BN_CTX_start(ctx); 233 t = BN_CTX_get(ctx); 234 if (t == NULL) { 235 goto err; 236 } 237 238 if (!BN_set_bit(t, len)) { 239 goto err; 240 } 241 242 if (!BN_div(r, NULL, t, m, ctx)) { 243 goto err; 244 } 245 246 ret = len; 247 248 err: 249 BN_CTX_end(ctx); 250 return ret; 251 } 252 253 static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, 254 BN_RECP_CTX *recp, BN_CTX *ctx) { 255 int i, j, ret = 0; 256 BIGNUM *a, *b, *d, *r; 257 258 BN_CTX_start(ctx); 259 a = BN_CTX_get(ctx); 260 b = BN_CTX_get(ctx); 261 if (dv != NULL) { 262 d = dv; 263 } else { 264 d = BN_CTX_get(ctx); 265 } 266 267 if (rem != NULL) { 268 r = rem; 269 } else { 270 r = BN_CTX_get(ctx); 271 } 272 273 if (a == NULL || b == NULL || d == NULL || r == NULL) { 274 goto err; 275 } 276 277 if (BN_ucmp(m, &(recp->N)) < 0) { 278 BN_zero(d); 279 if (!BN_copy(r, m)) { 280 return 0; 281 } 282 BN_CTX_end(ctx); 283 return 1; 284 } 285 286 /* We want the remainder 287 * Given input of ABCDEF / ab 288 * we need multiply ABCDEF by 3 digests of the reciprocal of ab */ 289 290 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */ 291 i = BN_num_bits(m); 292 j = recp->num_bits << 1; 293 if (j > i) { 294 i = j; 295 } 296 297 /* Nr := round(2^i / N) */ 298 if (i != recp->shift) { 299 recp->shift = 300 BN_reciprocal(&(recp->Nr), &(recp->N), i, 301 ctx); /* BN_reciprocal returns i, or -1 for an error */ 302 } 303 304 if (recp->shift == -1) { 305 goto err; 306 } 307 308 /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - 309 * BN_num_bits(N)))| 310 * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - 311 * BN_num_bits(N)))| 312 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)| 313 * = |m/N| */ 314 if (!BN_rshift(a, m, recp->num_bits)) { 315 goto err; 316 } 317 if (!BN_mul(b, a, &(recp->Nr), ctx)) { 318 goto err; 319 } 320 if (!BN_rshift(d, b, i - recp->num_bits)) { 321 goto err; 322 } 323 d->neg = 0; 324 325 if (!BN_mul(b, &(recp->N), d, ctx)) { 326 goto err; 327 } 328 if (!BN_usub(r, m, b)) { 329 goto err; 330 } 331 r->neg = 0; 332 333 j = 0; 334 while (BN_ucmp(r, &(recp->N)) >= 0) { 335 if (j++ > 2) { 336 OPENSSL_PUT_ERROR(BN, BN_div_recp, BN_R_BAD_RECIPROCAL); 337 goto err; 338 } 339 if (!BN_usub(r, r, &(recp->N))) { 340 goto err; 341 } 342 if (!BN_add_word(d, 1)) { 343 goto err; 344 } 345 } 346 347 r->neg = BN_is_zero(r) ? 0 : m->neg; 348 d->neg = m->neg ^ recp->N.neg; 349 ret = 1; 350 351 err: 352 BN_CTX_end(ctx); 353 return ret; 354 } 355 356 static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, 357 BN_RECP_CTX *recp, BN_CTX *ctx) { 358 int ret = 0; 359 BIGNUM *a; 360 const BIGNUM *ca; 361 362 BN_CTX_start(ctx); 363 a = BN_CTX_get(ctx); 364 if (a == NULL) { 365 goto err; 366 } 367 368 if (y != NULL) { 369 if (x == y) { 370 if (!BN_sqr(a, x, ctx)) { 371 goto err; 372 } 373 } else { 374 if (!BN_mul(a, x, y, ctx)) { 375 goto err; 376 } 377 } 378 ca = a; 379 } else { 380 ca = x; /* Just do the mod */ 381 } 382 383 ret = BN_div_recp(NULL, r, ca, recp, ctx); 384 385 err: 386 BN_CTX_end(ctx); 387 return ret; 388 } 389 390 /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp 391 * functions 392 * 393 * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of 394 * multiplications is a constant plus on average 395 * 396 * 2^(w-1) + (b-w)/(w+1); 397 * 398 * here 2^(w-1) is for precomputing the table (we actually need entries only 399 * for windows that have the lowest bit set), and (b-w)/(w+1) is an 400 * approximation for the expected number of w-bit windows, not counting the 401 * first one. 402 * 403 * Thus we should use 404 * 405 * w >= 6 if b > 671 406 * w = 5 if 671 > b > 239 407 * w = 4 if 239 > b > 79 408 * w = 3 if 79 > b > 23 409 * w <= 2 if 23 > b 410 * 411 * (with draws in between). Very small exponents are often selected 412 * with low Hamming weight, so we use w = 1 for b <= 23. */ 413 #define BN_window_bits_for_exponent_size(b) \ 414 ((b) > 671 ? 6 : \ 415 (b) > 239 ? 5 : \ 416 (b) > 79 ? 4 : \ 417 (b) > 23 ? 3 : 1) 418 419 static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 420 const BIGNUM *m, BN_CTX *ctx) { 421 int i, j, bits, ret = 0, wstart, window; 422 int start = 1; 423 BIGNUM *aa; 424 /* Table of variables obtained from 'ctx' */ 425 BIGNUM *val[TABLE_SIZE]; 426 BN_RECP_CTX recp; 427 428 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 429 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 430 OPENSSL_PUT_ERROR(BN, mod_exp_recp, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 431 return 0; 432 } 433 434 bits = BN_num_bits(p); 435 436 if (bits == 0) { 437 ret = BN_one(r); 438 return ret; 439 } 440 441 BN_CTX_start(ctx); 442 aa = BN_CTX_get(ctx); 443 val[0] = BN_CTX_get(ctx); 444 if (!aa || !val[0]) { 445 goto err; 446 } 447 448 BN_RECP_CTX_init(&recp); 449 if (m->neg) { 450 /* ignore sign of 'm' */ 451 if (!BN_copy(aa, m)) { 452 goto err; 453 } 454 aa->neg = 0; 455 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) { 456 goto err; 457 } 458 } else { 459 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) { 460 goto err; 461 } 462 } 463 464 if (!BN_nnmod(val[0], a, m, ctx)) { 465 goto err; /* 1 */ 466 } 467 if (BN_is_zero(val[0])) { 468 BN_zero(r); 469 ret = 1; 470 goto err; 471 } 472 473 window = BN_window_bits_for_exponent_size(bits); 474 if (window > 1) { 475 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) { 476 goto err; /* 2 */ 477 } 478 j = 1 << (window - 1); 479 for (i = 1; i < j; i++) { 480 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 481 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) { 482 goto err; 483 } 484 } 485 } 486 487 start = 1; /* This is used to avoid multiplication etc 488 * when there is only the value '1' in the 489 * buffer. */ 490 wstart = bits - 1; /* The top bit of the window */ 491 492 if (!BN_one(r)) { 493 goto err; 494 } 495 496 for (;;) { 497 int wvalue; /* The 'value' of the window */ 498 int wend; /* The bottom bit of the window */ 499 500 if (BN_is_bit_set(p, wstart) == 0) { 501 if (!start) { 502 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { 503 goto err; 504 } 505 } 506 if (wstart == 0) { 507 break; 508 } 509 wstart--; 510 continue; 511 } 512 513 /* We now have wstart on a 'set' bit, we now need to work out 514 * how bit a window to do. To do this we need to scan 515 * forward until the last set bit before the end of the 516 * window */ 517 wvalue = 1; 518 wend = 0; 519 for (i = 1; i < window; i++) { 520 if (wstart - i < 0) { 521 break; 522 } 523 if (BN_is_bit_set(p, wstart - i)) { 524 wvalue <<= (i - wend); 525 wvalue |= 1; 526 wend = i; 527 } 528 } 529 530 /* wend is the size of the current window */ 531 j = wend + 1; 532 /* add the 'bytes above' */ 533 if (!start) { 534 for (i = 0; i < j; i++) { 535 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { 536 goto err; 537 } 538 } 539 } 540 541 /* wvalue will be an odd number < 2^window */ 542 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) { 543 goto err; 544 } 545 546 /* move the 'window' down further */ 547 wstart -= wend + 1; 548 start = 0; 549 if (wstart < 0) { 550 break; 551 } 552 } 553 ret = 1; 554 555 err: 556 BN_CTX_end(ctx); 557 BN_RECP_CTX_free(&recp); 558 return ret; 559 } 560 561 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 562 BN_CTX *ctx) { 563 /* For even modulus m = 2^k*m_odd, it might make sense to compute 564 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 565 * exponentiation for the odd part), using appropriate exponent 566 * reductions, and combine the results using the CRT. 567 * 568 * For now, we use Montgomery only if the modulus is odd; otherwise, 569 * exponentiation using the reciprocal-based quick remaindering 570 * algorithm is used. 571 * 572 * (Timing obtained with expspeed.c [computations a^p mod m 573 * where a, p, m are of the same length: 256, 512, 1024, 2048, 574 * 4096, 8192 bits], compared to the running time of the 575 * standard algorithm: 576 * 577 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 578 * 55 .. 77 % [UltraSparc processor, but 579 * debug-solaris-sparcv8-gcc conf.] 580 * 581 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 582 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 583 * 584 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 585 * at 2048 and more bits, but at 512 and 1024 bits, it was 586 * slower even than the standard algorithm! 587 * 588 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 589 * should be obtained when the new Montgomery reduction code 590 * has been integrated into OpenSSL.) */ 591 592 if (BN_is_odd(m)) { 593 if (a->top == 1 && !a->neg && BN_get_flags(p, BN_FLG_CONSTTIME) == 0) { 594 BN_ULONG A = a->d[0]; 595 return BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 596 } 597 598 return BN_mod_exp_mont(r, a, p, m, ctx, NULL); 599 } 600 601 return mod_exp_recp(r, a, p, m, ctx); 602 } 603 604 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 605 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { 606 int i, j, bits, ret = 0, wstart, window; 607 int start = 1; 608 BIGNUM *d, *r; 609 const BIGNUM *aa; 610 /* Table of variables obtained from 'ctx' */ 611 BIGNUM *val[TABLE_SIZE]; 612 BN_MONT_CTX *mont = NULL; 613 614 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 615 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont); 616 } 617 618 if (!BN_is_odd(m)) { 619 OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont, BN_R_CALLED_WITH_EVEN_MODULUS); 620 return 0; 621 } 622 bits = BN_num_bits(p); 623 if (bits == 0) { 624 ret = BN_one(rr); 625 return ret; 626 } 627 628 BN_CTX_start(ctx); 629 d = BN_CTX_get(ctx); 630 r = BN_CTX_get(ctx); 631 val[0] = BN_CTX_get(ctx); 632 if (!d || !r || !val[0]) { 633 goto err; 634 } 635 636 /* If this is not done, things will break in the montgomery part */ 637 638 if (in_mont != NULL) { 639 mont = in_mont; 640 } else { 641 mont = BN_MONT_CTX_new(); 642 if (mont == NULL) { 643 goto err; 644 } 645 if (!BN_MONT_CTX_set(mont, m, ctx)) { 646 goto err; 647 } 648 } 649 650 if (a->neg || BN_ucmp(a, m) >= 0) { 651 if (!BN_nnmod(val[0], a, m, ctx)) { 652 goto err; 653 } 654 aa = val[0]; 655 } else { 656 aa = a; 657 } 658 659 if (BN_is_zero(aa)) { 660 BN_zero(rr); 661 ret = 1; 662 goto err; 663 } 664 if (!BN_to_montgomery(val[0], aa, mont, ctx)) { 665 goto err; /* 1 */ 666 } 667 668 window = BN_window_bits_for_exponent_size(bits); 669 if (window > 1) { 670 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) { 671 goto err; /* 2 */ 672 } 673 j = 1 << (window - 1); 674 for (i = 1; i < j; i++) { 675 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 676 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) { 677 goto err; 678 } 679 } 680 } 681 682 start = 1; /* This is used to avoid multiplication etc 683 * when there is only the value '1' in the 684 * buffer. */ 685 wstart = bits - 1; /* The top bit of the window */ 686 687 j = m->top; /* borrow j */ 688 if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { 689 if (bn_wexpand(r, j) == NULL) { 690 goto err; 691 } 692 /* 2^(top*BN_BITS2) - m */ 693 r->d[0] = (0 - m->d[0]) & BN_MASK2; 694 for (i = 1; i < j; i++) { 695 r->d[i] = (~m->d[i]) & BN_MASK2; 696 } 697 r->top = j; 698 /* Upper words will be zero if the corresponding words of 'm' 699 * were 0xfff[...], so decrement r->top accordingly. */ 700 bn_correct_top(r); 701 } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) { 702 goto err; 703 } 704 705 for (;;) { 706 int wvalue; /* The 'value' of the window */ 707 int wend; /* The bottom bit of the window */ 708 709 if (BN_is_bit_set(p, wstart) == 0) { 710 if (!start && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 711 goto err; 712 } 713 if (wstart == 0) { 714 break; 715 } 716 wstart--; 717 continue; 718 } 719 720 /* We now have wstart on a 'set' bit, we now need to work out how bit a 721 * window to do. To do this we need to scan forward until the last set bit 722 * before the end of the window */ 723 wvalue = 1; 724 wend = 0; 725 for (i = 1; i < window; i++) { 726 if (wstart - i < 0) { 727 break; 728 } 729 if (BN_is_bit_set(p, wstart - i)) { 730 wvalue <<= (i - wend); 731 wvalue |= 1; 732 wend = i; 733 } 734 } 735 736 /* wend is the size of the current window */ 737 j = wend + 1; 738 /* add the 'bytes above' */ 739 if (!start) { 740 for (i = 0; i < j; i++) { 741 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 742 goto err; 743 } 744 } 745 } 746 747 /* wvalue will be an odd number < 2^window */ 748 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) { 749 goto err; 750 } 751 752 /* move the 'window' down further */ 753 wstart -= wend + 1; 754 start = 0; 755 if (wstart < 0) { 756 break; 757 } 758 } 759 760 if (!BN_from_montgomery(rr, r, mont, ctx)) { 761 goto err; 762 } 763 ret = 1; 764 765 err: 766 if (in_mont == NULL) { 767 BN_MONT_CTX_free(mont); 768 } 769 BN_CTX_end(ctx); 770 return ret; 771 } 772 773 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 774 * layout so that accessing any of these table values shows the same access 775 * pattern as far as cache lines are concerned. The following functions are 776 * used to transfer a BIGNUM from/to that table. */ 777 static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx, 778 int width) { 779 size_t i, j; 780 781 if (top > b->top) { 782 top = b->top; /* this works because 'buf' is explicitly zeroed */ 783 } 784 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 785 buf[j] = ((unsigned char *)b->d)[i]; 786 } 787 788 return 1; 789 } 790 791 static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx, 792 int width) { 793 size_t i, j; 794 795 if (bn_wexpand(b, top) == NULL) { 796 return 0; 797 } 798 799 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 800 ((unsigned char *)b->d)[i] = buf[j]; 801 } 802 803 b->top = top; 804 bn_correct_top(b); 805 return 1; 806 } 807 808 /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache 809 * line width of the target processor is at least the following value. */ 810 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64) 811 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \ 812 (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) 813 814 /* Window sizes optimized for fixed window size modular exponentiation 815 * algorithm (BN_mod_exp_mont_consttime). 816 * 817 * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum 818 * size of the window must not exceed 819 * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). 820 * 821 * Window size thresholds are defined for cache line sizes of 32 and 64, cache 822 * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of 823 * 7 should only be used on processors that have a 128 byte or greater cache 824 * line size. */ 825 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 826 827 #define BN_window_bits_for_ctime_exponent_size(b) \ 828 ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) 829 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) 830 831 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 832 833 #define BN_window_bits_for_ctime_exponent_size(b) \ 834 ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) 835 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) 836 837 #endif 838 839 /* Given a pointer value, compute the next address that is a cache line 840 * multiple. */ 841 #define MOD_EXP_CTIME_ALIGN(x_) \ 842 ((unsigned char *)(x_) + \ 843 (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \ 844 (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 845 846 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 847 * precomputation memory layout to limit data-dependency to a minimum 848 * to protect secret exponents (cf. the hyper-threading timing attacks 849 * pointed out by Colin Percival, 850 * http://www.daemonology.net/hyperthreading-considered-harmful/) 851 */ 852 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 853 const BIGNUM *m, BN_CTX *ctx, 854 BN_MONT_CTX *in_mont) { 855 int i, bits, ret = 0, window, wvalue; 856 int top; 857 BN_MONT_CTX *mont = NULL; 858 859 int numPowers; 860 unsigned char *powerbufFree = NULL; 861 int powerbufLen = 0; 862 unsigned char *powerbuf = NULL; 863 BIGNUM tmp, am; 864 865 top = m->top; 866 867 if (!(m->d[0] & 1)) { 868 OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_consttime, 869 BN_R_CALLED_WITH_EVEN_MODULUS); 870 return 0; 871 } 872 bits = BN_num_bits(p); 873 if (bits == 0) { 874 ret = BN_one(rr); 875 return ret; 876 } 877 878 BN_CTX_start(ctx); 879 880 /* Allocate a montgomery context if it was not supplied by the caller. 881 * If this is not done, things will break in the montgomery part. */ 882 if (in_mont != NULL) { 883 mont = in_mont; 884 } else { 885 mont = BN_MONT_CTX_new(); 886 if (mont == NULL || !BN_MONT_CTX_set(mont, m, ctx)) { 887 goto err; 888 } 889 } 890 891 #ifdef RSAZ_ENABLED 892 /* If the size of the operands allow it, perform the optimized 893 * RSAZ exponentiation. For further information see 894 * crypto/bn/rsaz_exp.c and accompanying assembly modules. */ 895 if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) && 896 rsaz_avx2_eligible()) { 897 if (NULL == bn_wexpand(rr, 16)) { 898 goto err; 899 } 900 RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]); 901 rr->top = 16; 902 rr->neg = 0; 903 bn_correct_top(rr); 904 ret = 1; 905 goto err; 906 } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { 907 if (NULL == bn_wexpand(rr, 8)) { 908 goto err; 909 } 910 RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); 911 rr->top = 8; 912 rr->neg = 0; 913 bn_correct_top(rr); 914 ret = 1; 915 goto err; 916 } 917 #endif 918 919 /* Get the window size to use with size of p. */ 920 window = BN_window_bits_for_ctime_exponent_size(bits); 921 #if defined(OPENSSL_BN_ASM_MONT5) 922 if (window >= 5) { 923 window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */ 924 if ((top & 7) == 0) { 925 powerbufLen += 2 * top * sizeof(m->d[0]); 926 } 927 } 928 #endif 929 (void)0; 930 931 /* Allocate a buffer large enough to hold all of the pre-computed 932 * powers of am, am itself and tmp. 933 */ 934 numPowers = 1 << window; 935 powerbufLen += 936 sizeof(m->d[0]) * 937 (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers)); 938 #ifdef alloca 939 if (powerbufLen < 3072) { 940 powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 941 } else 942 #endif 943 { 944 if ((powerbufFree = (unsigned char *)OPENSSL_malloc( 945 powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) { 946 goto err; 947 } 948 } 949 950 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 951 memset(powerbuf, 0, powerbufLen); 952 953 #ifdef alloca 954 if (powerbufLen < 3072) { 955 powerbufFree = NULL; 956 } 957 #endif 958 959 /* lay down tmp and am right after powers table */ 960 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 961 am.d = tmp.d + top; 962 tmp.top = am.top = 0; 963 tmp.dmax = am.dmax = top; 964 tmp.neg = am.neg = 0; 965 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 966 967 /* prepare a^0 in Montgomery domain */ 968 /* by Shay Gueron's suggestion */ 969 if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { 970 /* 2^(top*BN_BITS2) - m */ 971 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; 972 for (i = 1; i < top; i++) { 973 tmp.d[i] = (~m->d[i]) & BN_MASK2; 974 } 975 tmp.top = top; 976 } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) { 977 goto err; 978 } 979 980 /* prepare a^1 in Montgomery domain */ 981 if (a->neg || BN_ucmp(a, m) >= 0) { 982 if (!BN_mod(&am, a, m, ctx) || 983 !BN_to_montgomery(&am, &am, mont, ctx)) { 984 goto err; 985 } 986 } else if (!BN_to_montgomery(&am, a, mont, ctx)) { 987 goto err; 988 } 989 990 #if defined(OPENSSL_BN_ASM_MONT5) 991 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 992 * specifically optimization of cache-timing attack countermeasures 993 * and pre-computation optimization. */ 994 995 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 996 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 997 if (window == 5 && top > 1) { 998 void bn_mul_mont_gather5(BN_ULONG * rp, const BN_ULONG * ap, 999 const void * table, const BN_ULONG * np, 1000 const BN_ULONG * n0, int num, int power); 1001 void bn_scatter5(const BN_ULONG * inp, size_t num, void * table, 1002 size_t power); 1003 void bn_gather5(BN_ULONG * out, size_t num, void * table, size_t power); 1004 void bn_power5(BN_ULONG * rp, const BN_ULONG * ap, const void * table, 1005 const BN_ULONG * np, const BN_ULONG * n0, int num, 1006 int power); 1007 int bn_from_montgomery(BN_ULONG * rp, const BN_ULONG * ap, 1008 const BN_ULONG * not_used, const BN_ULONG * np, 1009 const BN_ULONG * n0, int num); 1010 1011 BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2; 1012 1013 /* BN_to_montgomery can contaminate words above .top 1014 * [in BN_DEBUG[_DEBUG] build]... */ 1015 for (i = am.top; i < top; i++) { 1016 am.d[i] = 0; 1017 } 1018 for (i = tmp.top; i < top; i++) { 1019 tmp.d[i] = 0; 1020 } 1021 1022 if (top & 7) { 1023 np2 = np; 1024 } else { 1025 for (np2 = am.d + top, i = 0; i < top; i++) { 1026 np2[2 * i] = np[i]; 1027 } 1028 } 1029 1030 bn_scatter5(tmp.d, top, powerbuf, 0); 1031 bn_scatter5(am.d, am.top, powerbuf, 1); 1032 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 1033 bn_scatter5(tmp.d, top, powerbuf, 2); 1034 1035 /* same as above, but uses squaring for 1/2 of operations */ 1036 for (i = 4; i < 32; i *= 2) { 1037 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1038 bn_scatter5(tmp.d, top, powerbuf, i); 1039 } 1040 for (i = 3; i < 8; i += 2) { 1041 int j; 1042 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); 1043 bn_scatter5(tmp.d, top, powerbuf, i); 1044 for (j = 2 * i; j < 32; j *= 2) { 1045 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1046 bn_scatter5(tmp.d, top, powerbuf, j); 1047 } 1048 } 1049 for (; i < 16; i += 2) { 1050 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); 1051 bn_scatter5(tmp.d, top, powerbuf, i); 1052 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1053 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 1054 } 1055 for (; i < 32; i += 2) { 1056 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); 1057 bn_scatter5(tmp.d, top, powerbuf, i); 1058 } 1059 1060 bits--; 1061 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) { 1062 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1063 } 1064 bn_gather5(tmp.d, top, powerbuf, wvalue); 1065 1066 /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit 1067 * that has not been read yet.) */ 1068 assert(bits >= -1 && (bits == -1 || bits % 5 == 4)); 1069 1070 /* Scan the exponent one window at a time starting from the most 1071 * significant bits. 1072 */ 1073 if (top & 7) { 1074 while (bits >= 0) { 1075 for (wvalue = 0, i = 0; i < 5; i++, bits--) { 1076 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1077 } 1078 1079 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1080 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1081 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1082 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1083 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1084 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 1085 } 1086 } else { 1087 const uint8_t *p_bytes = (const uint8_t *)p->d; 1088 int max_bits = p->top * BN_BITS2; 1089 assert(bits < max_bits); 1090 /* |p = 0| has been handled as a special case, so |max_bits| is at least 1091 * one word. */ 1092 assert(max_bits >= 64); 1093 1094 /* If the first bit to be read lands in the last byte, unroll the first 1095 * iteration to avoid reading past the bounds of |p->d|. (After the first 1096 * iteration, we are guaranteed to be past the last byte.) Note |bits| 1097 * here is the top bit, inclusive. */ 1098 if (bits - 4 >= max_bits - 8) { 1099 /* Read five bits from |bits-4| through |bits|, inclusive. */ 1100 wvalue = p_bytes[p->top * BN_BYTES - 1]; 1101 wvalue >>= (bits - 4) & 7; 1102 wvalue &= 0x1f; 1103 bits -= 5; 1104 bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue); 1105 } 1106 while (bits >= 0) { 1107 /* Read five bits from |bits-4| through |bits|, inclusive. */ 1108 int first_bit = bits - 4; 1109 wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3)); 1110 wvalue >>= first_bit & 7; 1111 wvalue &= 0x1f; 1112 bits -= 5; 1113 bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue); 1114 } 1115 } 1116 1117 ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top); 1118 tmp.top = top; 1119 bn_correct_top(&tmp); 1120 if (ret) { 1121 if (!BN_copy(rr, &tmp)) { 1122 ret = 0; 1123 } 1124 goto err; /* non-zero ret means it's not error */ 1125 } 1126 } else 1127 #endif 1128 { 1129 if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers) || 1130 !copy_to_prebuf(&am, top, powerbuf, 1, numPowers)) { 1131 goto err; 1132 } 1133 1134 /* If the window size is greater than 1, then calculate 1135 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 1136 * (even powers could instead be computed as (a^(i/2))^2 1137 * to use the slight performance advantage of sqr over mul). 1138 */ 1139 if (window > 1) { 1140 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) || 1141 !copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers)) { 1142 goto err; 1143 } 1144 for (i = 3; i < numPowers; i++) { 1145 /* Calculate a^i = a^(i-1) * a */ 1146 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) || 1147 !copy_to_prebuf(&tmp, top, powerbuf, i, numPowers)) { 1148 goto err; 1149 } 1150 } 1151 } 1152 1153 bits--; 1154 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) { 1155 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1156 } 1157 if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers)) { 1158 goto err; 1159 } 1160 1161 /* Scan the exponent one window at a time starting from the most 1162 * significant bits. 1163 */ 1164 while (bits >= 0) { 1165 wvalue = 0; /* The 'value' of the window */ 1166 1167 /* Scan the window, squaring the result as we go */ 1168 for (i = 0; i < window; i++, bits--) { 1169 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) { 1170 goto err; 1171 } 1172 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1173 } 1174 1175 /* Fetch the appropriate pre-computed value from the pre-buf */ 1176 if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers)) { 1177 goto err; 1178 } 1179 1180 /* Multiply the result into the intermediate result */ 1181 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) { 1182 goto err; 1183 } 1184 } 1185 } 1186 1187 /* Convert the final result from montgomery to standard format */ 1188 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) { 1189 goto err; 1190 } 1191 ret = 1; 1192 err: 1193 if (in_mont == NULL) { 1194 BN_MONT_CTX_free(mont); 1195 } 1196 if (powerbuf != NULL) { 1197 OPENSSL_cleanse(powerbuf, powerbufLen); 1198 OPENSSL_free(powerbufFree); 1199 } 1200 BN_CTX_end(ctx); 1201 return (ret); 1202 } 1203 1204 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 1205 const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont) { 1206 BN_MONT_CTX *mont = NULL; 1207 int b, bits, ret = 0; 1208 int r_is_one; 1209 BN_ULONG w, next_w; 1210 BIGNUM *d, *r, *t; 1211 BIGNUM *swap_tmp; 1212 #define BN_MOD_MUL_WORD(r, w, m) \ 1213 (BN_mul_word(r, (w)) && \ 1214 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 1215 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 1216 /* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 1217 * probably more overhead than always using BN_mod (which uses BN_copy if a 1218 * similar test returns true). We can use BN_mod and do not need BN_nnmod 1219 * because our accumulator is never negative (the result of BN_mod does not 1220 * depend on the sign of the modulus). */ 1221 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 1222 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 1223 1224 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1225 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1226 OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word, 1227 ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1228 return 0; 1229 } 1230 1231 if (!BN_is_odd(m)) { 1232 OPENSSL_PUT_ERROR(BN, BN_mod_exp_mont_word, BN_R_CALLED_WITH_EVEN_MODULUS); 1233 return 0; 1234 } 1235 1236 if (m->top == 1) { 1237 a %= m->d[0]; /* make sure that 'a' is reduced */ 1238 } 1239 1240 bits = BN_num_bits(p); 1241 if (bits == 0) { 1242 /* x**0 mod 1 is still zero. */ 1243 if (BN_is_one(m)) { 1244 ret = 1; 1245 BN_zero(rr); 1246 } else { 1247 ret = BN_one(rr); 1248 } 1249 return ret; 1250 } 1251 if (a == 0) { 1252 BN_zero(rr); 1253 ret = 1; 1254 return ret; 1255 } 1256 1257 BN_CTX_start(ctx); 1258 d = BN_CTX_get(ctx); 1259 r = BN_CTX_get(ctx); 1260 t = BN_CTX_get(ctx); 1261 if (d == NULL || r == NULL || t == NULL) { 1262 goto err; 1263 } 1264 1265 if (in_mont != NULL) { 1266 mont = in_mont; 1267 } else { 1268 mont = BN_MONT_CTX_new(); 1269 if (mont == NULL || !BN_MONT_CTX_set(mont, m, ctx)) { 1270 goto err; 1271 } 1272 } 1273 1274 r_is_one = 1; /* except for Montgomery factor */ 1275 1276 /* bits-1 >= 0 */ 1277 1278 /* The result is accumulated in the product r*w. */ 1279 w = a; /* bit 'bits-1' of 'p' is always set */ 1280 for (b = bits - 2; b >= 0; b--) { 1281 /* First, square r*w. */ 1282 next_w = w * w; 1283 if ((next_w / w) != w) { 1284 /* overflow */ 1285 if (r_is_one) { 1286 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) { 1287 goto err; 1288 } 1289 r_is_one = 0; 1290 } else { 1291 if (!BN_MOD_MUL_WORD(r, w, m)) { 1292 goto err; 1293 } 1294 } 1295 next_w = 1; 1296 } 1297 1298 w = next_w; 1299 if (!r_is_one) { 1300 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 1301 goto err; 1302 } 1303 } 1304 1305 /* Second, multiply r*w by 'a' if exponent bit is set. */ 1306 if (BN_is_bit_set(p, b)) { 1307 next_w = w * a; 1308 if ((next_w / a) != w) { 1309 /* overflow */ 1310 if (r_is_one) { 1311 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) { 1312 goto err; 1313 } 1314 r_is_one = 0; 1315 } else { 1316 if (!BN_MOD_MUL_WORD(r, w, m)) { 1317 goto err; 1318 } 1319 } 1320 next_w = a; 1321 } 1322 w = next_w; 1323 } 1324 } 1325 1326 /* Finally, set r:=r*w. */ 1327 if (w != 1) { 1328 if (r_is_one) { 1329 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) { 1330 goto err; 1331 } 1332 r_is_one = 0; 1333 } else { 1334 if (!BN_MOD_MUL_WORD(r, w, m)) { 1335 goto err; 1336 } 1337 } 1338 } 1339 1340 if (r_is_one) { 1341 /* can happen only if a == 1*/ 1342 if (!BN_one(rr)) { 1343 goto err; 1344 } 1345 } else { 1346 if (!BN_from_montgomery(rr, r, mont, ctx)) { 1347 goto err; 1348 } 1349 } 1350 ret = 1; 1351 1352 err: 1353 if (in_mont == NULL) { 1354 BN_MONT_CTX_free(mont); 1355 } 1356 BN_CTX_end(ctx); 1357 return ret; 1358 } 1359 1360 #define TABLE_SIZE 32 1361 1362 int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, 1363 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, 1364 BN_CTX *ctx, BN_MONT_CTX *in_mont) { 1365 int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, 1366 wvalue1, wvalue2; 1367 int r_is_one = 1; 1368 BIGNUM *d, *r; 1369 const BIGNUM *a_mod_m; 1370 /* Tables of variables obtained from 'ctx' */ 1371 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE]; 1372 BN_MONT_CTX *mont = NULL; 1373 1374 if (!(m->d[0] & 1)) { 1375 OPENSSL_PUT_ERROR(BN, BN_mod_exp2_mont, BN_R_CALLED_WITH_EVEN_MODULUS); 1376 return 0; 1377 } 1378 bits1 = BN_num_bits(p1); 1379 bits2 = BN_num_bits(p2); 1380 if (bits1 == 0 && bits2 == 0) { 1381 ret = BN_one(rr); 1382 return ret; 1383 } 1384 1385 bits = (bits1 > bits2) ? bits1 : bits2; 1386 1387 BN_CTX_start(ctx); 1388 d = BN_CTX_get(ctx); 1389 r = BN_CTX_get(ctx); 1390 val1[0] = BN_CTX_get(ctx); 1391 val2[0] = BN_CTX_get(ctx); 1392 if (!d || !r || !val1[0] || !val2[0]) { 1393 goto err; 1394 } 1395 1396 if (in_mont != NULL) { 1397 mont = in_mont; 1398 } else { 1399 mont = BN_MONT_CTX_new(); 1400 if (mont == NULL) { 1401 goto err; 1402 } 1403 if (!BN_MONT_CTX_set(mont, m, ctx)) { 1404 goto err; 1405 } 1406 } 1407 1408 window1 = BN_window_bits_for_exponent_size(bits1); 1409 window2 = BN_window_bits_for_exponent_size(bits2); 1410 1411 /* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 1412 * 2^(window1-1) */ 1413 if (a1->neg || BN_ucmp(a1, m) >= 0) { 1414 if (!BN_mod(val1[0], a1, m, ctx)) { 1415 goto err; 1416 } 1417 a_mod_m = val1[0]; 1418 } else { 1419 a_mod_m = a1; 1420 } 1421 1422 if (BN_is_zero(a_mod_m)) { 1423 BN_zero(rr); 1424 ret = 1; 1425 goto err; 1426 } 1427 1428 if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) { 1429 goto err; 1430 } 1431 1432 if (window1 > 1) { 1433 if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) { 1434 goto err; 1435 } 1436 1437 j = 1 << (window1 - 1); 1438 for (i = 1; i < j; i++) { 1439 if (((val1[i] = BN_CTX_get(ctx)) == NULL) || 1440 !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx)) { 1441 goto err; 1442 } 1443 } 1444 } 1445 1446 /* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 1447 * 2^(window2-1) */ 1448 if (a2->neg || BN_ucmp(a2, m) >= 0) { 1449 if (!BN_mod(val2[0], a2, m, ctx)) { 1450 goto err; 1451 } 1452 a_mod_m = val2[0]; 1453 } else { 1454 a_mod_m = a2; 1455 } 1456 1457 if (BN_is_zero(a_mod_m)) { 1458 BN_zero(rr); 1459 ret = 1; 1460 goto err; 1461 } 1462 1463 if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) { 1464 goto err; 1465 } 1466 1467 if (window2 > 1) { 1468 if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) { 1469 goto err; 1470 } 1471 1472 j = 1 << (window2 - 1); 1473 for (i = 1; i < j; i++) { 1474 if (((val2[i] = BN_CTX_get(ctx)) == NULL) || 1475 !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx)) { 1476 goto err; 1477 } 1478 } 1479 } 1480 1481 /* Now compute the power product, using independent windows. */ 1482 r_is_one = 1; 1483 wvalue1 = 0; /* The 'value' of the first window */ 1484 wvalue2 = 0; /* The 'value' of the second window */ 1485 wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */ 1486 wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */ 1487 1488 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) { 1489 goto err; 1490 } 1491 1492 for (b = bits - 1; b >= 0; b--) { 1493 if (!r_is_one) { 1494 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 1495 goto err; 1496 } 1497 } 1498 1499 if (!wvalue1 && BN_is_bit_set(p1, b)) { 1500 /* consider bits b-window1+1 .. b for this window */ 1501 i = b - window1 + 1; 1502 /* works for i<0 */ 1503 while (!BN_is_bit_set(p1, i)) { 1504 i++; 1505 } 1506 wpos1 = i; 1507 wvalue1 = 1; 1508 for (i = b - 1; i >= wpos1; i--) { 1509 wvalue1 <<= 1; 1510 if (BN_is_bit_set(p1, i)) { 1511 wvalue1++; 1512 } 1513 } 1514 } 1515 1516 if (!wvalue2 && BN_is_bit_set(p2, b)) { 1517 /* consider bits b-window2+1 .. b for this window */ 1518 i = b - window2 + 1; 1519 while (!BN_is_bit_set(p2, i)) { 1520 i++; 1521 } 1522 wpos2 = i; 1523 wvalue2 = 1; 1524 for (i = b - 1; i >= wpos2; i--) { 1525 wvalue2 <<= 1; 1526 if (BN_is_bit_set(p2, i)) { 1527 wvalue2++; 1528 } 1529 } 1530 } 1531 1532 if (wvalue1 && b == wpos1) { 1533 /* wvalue1 is odd and < 2^window1 */ 1534 if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx)) { 1535 goto err; 1536 } 1537 wvalue1 = 0; 1538 r_is_one = 0; 1539 } 1540 1541 if (wvalue2 && b == wpos2) { 1542 /* wvalue2 is odd and < 2^window2 */ 1543 if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx)) { 1544 goto err; 1545 } 1546 wvalue2 = 0; 1547 r_is_one = 0; 1548 } 1549 } 1550 1551 if (!BN_from_montgomery(rr, r, mont, ctx)) { 1552 goto err; 1553 } 1554 ret = 1; 1555 1556 err: 1557 if (in_mont == NULL) { 1558 BN_MONT_CTX_free(mont); 1559 } 1560 BN_CTX_end(ctx); 1561 return ret; 1562 } 1563