1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com) 2 * All rights reserved. 3 * 4 * This package is an SSL implementation written 5 * by Eric Young (eay (at) cryptsoft.com). 6 * The implementation was written so as to conform with Netscapes SSL. 7 * 8 * This library is free for commercial and non-commercial use as long as 9 * the following conditions are aheared to. The following conditions 10 * apply to all code found in this distribution, be it the RC4, RSA, 11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 12 * included with this distribution is covered by the same copyright terms 13 * except that the holder is Tim Hudson (tjh (at) cryptsoft.com). 14 * 15 * Copyright remains Eric Young's, and as such any Copyright notices in 16 * the code are not to be removed. 17 * If this package is used in a product, Eric Young should be given attribution 18 * as the author of the parts of the library used. 19 * This can be in the form of a textual message at program startup or 20 * in documentation (online or textual) provided with the package. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * "This product includes cryptographic software written by 33 * Eric Young (eay (at) cryptsoft.com)" 34 * The word 'cryptographic' can be left out if the rouines from the library 35 * being used are not cryptographic related :-). 36 * 4. If you include any Windows specific code (or a derivative thereof) from 37 * the apps directory (application code) you must include an acknowledgement: 38 * "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)" 39 * 40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * The licence and distribution terms for any publically available version or 53 * derivative of this code cannot be changed. i.e. this code cannot simply be 54 * copied and put under another distribution licence 55 * [including the GNU Public Licence.] 56 */ 57 /* ==================================================================== 58 * Copyright (c) 1998-2005 The OpenSSL Project. All rights reserved. 59 * 60 * Redistribution and use in source and binary forms, with or without 61 * modification, are permitted provided that the following conditions 62 * are met: 63 * 64 * 1. Redistributions of source code must retain the above copyright 65 * notice, this list of conditions and the following disclaimer. 66 * 67 * 2. Redistributions in binary form must reproduce the above copyright 68 * notice, this list of conditions and the following disclaimer in 69 * the documentation and/or other materials provided with the 70 * distribution. 71 * 72 * 3. All advertising materials mentioning features or use of this 73 * software must display the following acknowledgment: 74 * "This product includes software developed by the OpenSSL Project 75 * for use in the OpenSSL Toolkit. (http://www.openssl.org/)" 76 * 77 * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to 78 * endorse or promote products derived from this software without 79 * prior written permission. For written permission, please contact 80 * openssl-core (at) openssl.org. 81 * 82 * 5. Products derived from this software may not be called "OpenSSL" 83 * nor may "OpenSSL" appear in their names without prior written 84 * permission of the OpenSSL Project. 85 * 86 * 6. Redistributions of any form whatsoever must retain the following 87 * acknowledgment: 88 * "This product includes software developed by the OpenSSL Project 89 * for use in the OpenSSL Toolkit (http://www.openssl.org/)" 90 * 91 * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY 92 * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 93 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 94 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE OpenSSL PROJECT OR 95 * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 96 * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 97 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 98 * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 99 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, 100 * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 101 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED 102 * OF THE POSSIBILITY OF SUCH DAMAGE. 103 * ==================================================================== 104 * 105 * This product includes cryptographic software written by Eric Young 106 * (eay (at) cryptsoft.com). This product includes software written by Tim 107 * Hudson (tjh (at) cryptsoft.com). */ 108 109 #include <openssl/bn.h> 110 111 #include <assert.h> 112 #include <string.h> 113 114 #include <openssl/cpu.h> 115 #include <openssl/err.h> 116 #include <openssl/mem.h> 117 118 #include "internal.h" 119 120 121 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64) 122 #define OPENSSL_BN_ASM_MONT5 123 #define RSAZ_ENABLED 124 125 #include "rsaz_exp.h" 126 127 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table, 128 const BN_ULONG *np, const BN_ULONG *n0, int num, 129 int power); 130 void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power); 131 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power); 132 void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table, 133 const BN_ULONG *np, const BN_ULONG *n0, int num, int power); 134 int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap, 135 const BN_ULONG *not_used, const BN_ULONG *np, 136 const BN_ULONG *n0, int num); 137 #endif 138 139 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) { 140 int i, bits, ret = 0; 141 BIGNUM *v, *rr; 142 143 if ((p->flags & BN_FLG_CONSTTIME) != 0) { 144 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 145 OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 146 return 0; 147 } 148 149 BN_CTX_start(ctx); 150 if (r == a || r == p) { 151 rr = BN_CTX_get(ctx); 152 } else { 153 rr = r; 154 } 155 156 v = BN_CTX_get(ctx); 157 if (rr == NULL || v == NULL) { 158 goto err; 159 } 160 161 if (BN_copy(v, a) == NULL) { 162 goto err; 163 } 164 bits = BN_num_bits(p); 165 166 if (BN_is_odd(p)) { 167 if (BN_copy(rr, a) == NULL) { 168 goto err; 169 } 170 } else { 171 if (!BN_one(rr)) { 172 goto err; 173 } 174 } 175 176 for (i = 1; i < bits; i++) { 177 if (!BN_sqr(v, v, ctx)) { 178 goto err; 179 } 180 if (BN_is_bit_set(p, i)) { 181 if (!BN_mul(rr, rr, v, ctx)) { 182 goto err; 183 } 184 } 185 } 186 187 if (r != rr && !BN_copy(r, rr)) { 188 goto err; 189 } 190 ret = 1; 191 192 err: 193 BN_CTX_end(ctx); 194 return ret; 195 } 196 197 /* maximum precomputation table size for *variable* sliding windows */ 198 #define TABLE_SIZE 32 199 200 typedef struct bn_recp_ctx_st { 201 BIGNUM N; /* the divisor */ 202 BIGNUM Nr; /* the reciprocal */ 203 int num_bits; 204 int shift; 205 int flags; 206 } BN_RECP_CTX; 207 208 static void BN_RECP_CTX_init(BN_RECP_CTX *recp) { 209 BN_init(&recp->N); 210 BN_init(&recp->Nr); 211 recp->num_bits = 0; 212 recp->flags = 0; 213 } 214 215 static void BN_RECP_CTX_free(BN_RECP_CTX *recp) { 216 if (recp == NULL) { 217 return; 218 } 219 220 BN_free(&recp->N); 221 BN_free(&recp->Nr); 222 } 223 224 static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) { 225 if (!BN_copy(&(recp->N), d)) { 226 return 0; 227 } 228 BN_zero(&recp->Nr); 229 recp->num_bits = BN_num_bits(d); 230 recp->shift = 0; 231 232 return 1; 233 } 234 235 /* len is the expected size of the result We actually calculate with an extra 236 * word of precision, so we can do faster division if the remainder is not 237 * required. 238 * r := 2^len / m */ 239 static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) { 240 int ret = -1; 241 BIGNUM *t; 242 243 BN_CTX_start(ctx); 244 t = BN_CTX_get(ctx); 245 if (t == NULL) { 246 goto err; 247 } 248 249 if (!BN_set_bit(t, len)) { 250 goto err; 251 } 252 253 if (!BN_div(r, NULL, t, m, ctx)) { 254 goto err; 255 } 256 257 ret = len; 258 259 err: 260 BN_CTX_end(ctx); 261 return ret; 262 } 263 264 static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m, 265 BN_RECP_CTX *recp, BN_CTX *ctx) { 266 int i, j, ret = 0; 267 BIGNUM *a, *b, *d, *r; 268 269 BN_CTX_start(ctx); 270 a = BN_CTX_get(ctx); 271 b = BN_CTX_get(ctx); 272 if (dv != NULL) { 273 d = dv; 274 } else { 275 d = BN_CTX_get(ctx); 276 } 277 278 if (rem != NULL) { 279 r = rem; 280 } else { 281 r = BN_CTX_get(ctx); 282 } 283 284 if (a == NULL || b == NULL || d == NULL || r == NULL) { 285 goto err; 286 } 287 288 if (BN_ucmp(m, &recp->N) < 0) { 289 BN_zero(d); 290 if (!BN_copy(r, m)) { 291 goto err; 292 } 293 BN_CTX_end(ctx); 294 return 1; 295 } 296 297 /* We want the remainder 298 * Given input of ABCDEF / ab 299 * we need multiply ABCDEF by 3 digests of the reciprocal of ab */ 300 301 /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */ 302 i = BN_num_bits(m); 303 j = recp->num_bits << 1; 304 if (j > i) { 305 i = j; 306 } 307 308 /* Nr := round(2^i / N) */ 309 if (i != recp->shift) { 310 recp->shift = 311 BN_reciprocal(&(recp->Nr), &(recp->N), i, 312 ctx); /* BN_reciprocal returns i, or -1 for an error */ 313 } 314 315 if (recp->shift == -1) { 316 goto err; 317 } 318 319 /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i - 320 * BN_num_bits(N)))| 321 * = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i - 322 * BN_num_bits(N)))| 323 * <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)| 324 * = |m/N| */ 325 if (!BN_rshift(a, m, recp->num_bits)) { 326 goto err; 327 } 328 if (!BN_mul(b, a, &(recp->Nr), ctx)) { 329 goto err; 330 } 331 if (!BN_rshift(d, b, i - recp->num_bits)) { 332 goto err; 333 } 334 d->neg = 0; 335 336 if (!BN_mul(b, &(recp->N), d, ctx)) { 337 goto err; 338 } 339 if (!BN_usub(r, m, b)) { 340 goto err; 341 } 342 r->neg = 0; 343 344 j = 0; 345 while (BN_ucmp(r, &(recp->N)) >= 0) { 346 if (j++ > 2) { 347 OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL); 348 goto err; 349 } 350 if (!BN_usub(r, r, &(recp->N))) { 351 goto err; 352 } 353 if (!BN_add_word(d, 1)) { 354 goto err; 355 } 356 } 357 358 r->neg = BN_is_zero(r) ? 0 : m->neg; 359 d->neg = m->neg ^ recp->N.neg; 360 ret = 1; 361 362 err: 363 BN_CTX_end(ctx); 364 return ret; 365 } 366 367 static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y, 368 BN_RECP_CTX *recp, BN_CTX *ctx) { 369 int ret = 0; 370 BIGNUM *a; 371 const BIGNUM *ca; 372 373 BN_CTX_start(ctx); 374 a = BN_CTX_get(ctx); 375 if (a == NULL) { 376 goto err; 377 } 378 379 if (y != NULL) { 380 if (x == y) { 381 if (!BN_sqr(a, x, ctx)) { 382 goto err; 383 } 384 } else { 385 if (!BN_mul(a, x, y, ctx)) { 386 goto err; 387 } 388 } 389 ca = a; 390 } else { 391 ca = x; /* Just do the mod */ 392 } 393 394 ret = BN_div_recp(NULL, r, ca, recp, ctx); 395 396 err: 397 BN_CTX_end(ctx); 398 return ret; 399 } 400 401 /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp 402 * functions 403 * 404 * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of 405 * multiplications is a constant plus on average 406 * 407 * 2^(w-1) + (b-w)/(w+1); 408 * 409 * here 2^(w-1) is for precomputing the table (we actually need entries only 410 * for windows that have the lowest bit set), and (b-w)/(w+1) is an 411 * approximation for the expected number of w-bit windows, not counting the 412 * first one. 413 * 414 * Thus we should use 415 * 416 * w >= 6 if b > 671 417 * w = 5 if 671 > b > 239 418 * w = 4 if 239 > b > 79 419 * w = 3 if 79 > b > 23 420 * w <= 2 if 23 > b 421 * 422 * (with draws in between). Very small exponents are often selected 423 * with low Hamming weight, so we use w = 1 for b <= 23. */ 424 #define BN_window_bits_for_exponent_size(b) \ 425 ((b) > 671 ? 6 : \ 426 (b) > 239 ? 5 : \ 427 (b) > 79 ? 4 : \ 428 (b) > 23 ? 3 : 1) 429 430 static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, 431 const BIGNUM *m, BN_CTX *ctx) { 432 int i, j, bits, ret = 0, wstart, window; 433 int start = 1; 434 BIGNUM *aa; 435 /* Table of variables obtained from 'ctx' */ 436 BIGNUM *val[TABLE_SIZE]; 437 BN_RECP_CTX recp; 438 439 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 440 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 441 OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 442 return 0; 443 } 444 445 bits = BN_num_bits(p); 446 447 if (bits == 0) { 448 /* x**0 mod 1 is still zero. */ 449 if (BN_is_one(m)) { 450 BN_zero(r); 451 return 1; 452 } 453 return BN_one(r); 454 } 455 456 BN_CTX_start(ctx); 457 aa = BN_CTX_get(ctx); 458 val[0] = BN_CTX_get(ctx); 459 if (!aa || !val[0]) { 460 goto err; 461 } 462 463 BN_RECP_CTX_init(&recp); 464 if (m->neg) { 465 /* ignore sign of 'm' */ 466 if (!BN_copy(aa, m)) { 467 goto err; 468 } 469 aa->neg = 0; 470 if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) { 471 goto err; 472 } 473 } else { 474 if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) { 475 goto err; 476 } 477 } 478 479 if (!BN_nnmod(val[0], a, m, ctx)) { 480 goto err; /* 1 */ 481 } 482 if (BN_is_zero(val[0])) { 483 BN_zero(r); 484 ret = 1; 485 goto err; 486 } 487 488 window = BN_window_bits_for_exponent_size(bits); 489 if (window > 1) { 490 if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) { 491 goto err; /* 2 */ 492 } 493 j = 1 << (window - 1); 494 for (i = 1; i < j; i++) { 495 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 496 !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) { 497 goto err; 498 } 499 } 500 } 501 502 start = 1; /* This is used to avoid multiplication etc 503 * when there is only the value '1' in the 504 * buffer. */ 505 wstart = bits - 1; /* The top bit of the window */ 506 507 if (!BN_one(r)) { 508 goto err; 509 } 510 511 for (;;) { 512 int wvalue; /* The 'value' of the window */ 513 int wend; /* The bottom bit of the window */ 514 515 if (BN_is_bit_set(p, wstart) == 0) { 516 if (!start) { 517 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { 518 goto err; 519 } 520 } 521 if (wstart == 0) { 522 break; 523 } 524 wstart--; 525 continue; 526 } 527 528 /* We now have wstart on a 'set' bit, we now need to work out 529 * how bit a window to do. To do this we need to scan 530 * forward until the last set bit before the end of the 531 * window */ 532 wvalue = 1; 533 wend = 0; 534 for (i = 1; i < window; i++) { 535 if (wstart - i < 0) { 536 break; 537 } 538 if (BN_is_bit_set(p, wstart - i)) { 539 wvalue <<= (i - wend); 540 wvalue |= 1; 541 wend = i; 542 } 543 } 544 545 /* wend is the size of the current window */ 546 j = wend + 1; 547 /* add the 'bytes above' */ 548 if (!start) { 549 for (i = 0; i < j; i++) { 550 if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) { 551 goto err; 552 } 553 } 554 } 555 556 /* wvalue will be an odd number < 2^window */ 557 if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) { 558 goto err; 559 } 560 561 /* move the 'window' down further */ 562 wstart -= wend + 1; 563 start = 0; 564 if (wstart < 0) { 565 break; 566 } 567 } 568 ret = 1; 569 570 err: 571 BN_CTX_end(ctx); 572 BN_RECP_CTX_free(&recp); 573 return ret; 574 } 575 576 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m, 577 BN_CTX *ctx) { 578 /* For even modulus m = 2^k*m_odd, it might make sense to compute 579 * a^p mod m_odd and a^p mod 2^k separately (with Montgomery 580 * exponentiation for the odd part), using appropriate exponent 581 * reductions, and combine the results using the CRT. 582 * 583 * For now, we use Montgomery only if the modulus is odd; otherwise, 584 * exponentiation using the reciprocal-based quick remaindering 585 * algorithm is used. 586 * 587 * (Timing obtained with expspeed.c [computations a^p mod m 588 * where a, p, m are of the same length: 256, 512, 1024, 2048, 589 * 4096, 8192 bits], compared to the running time of the 590 * standard algorithm: 591 * 592 * BN_mod_exp_mont 33 .. 40 % [AMD K6-2, Linux, debug configuration] 593 * 55 .. 77 % [UltraSparc processor, but 594 * debug-solaris-sparcv8-gcc conf.] 595 * 596 * BN_mod_exp_recp 50 .. 70 % [AMD K6-2, Linux, debug configuration] 597 * 62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc] 598 * 599 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont 600 * at 2048 and more bits, but at 512 and 1024 bits, it was 601 * slower even than the standard algorithm! 602 * 603 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations] 604 * should be obtained when the new Montgomery reduction code 605 * has been integrated into OpenSSL.) */ 606 607 if (BN_is_odd(m)) { 608 if (a->top == 1 && !a->neg && BN_get_flags(p, BN_FLG_CONSTTIME) == 0) { 609 BN_ULONG A = a->d[0]; 610 return BN_mod_exp_mont_word(r, A, p, m, ctx, NULL); 611 } 612 613 return BN_mod_exp_mont(r, a, p, m, ctx, NULL); 614 } 615 616 return mod_exp_recp(r, a, p, m, ctx); 617 } 618 619 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 620 const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) { 621 int i, j, bits, ret = 0, wstart, window; 622 int start = 1; 623 BIGNUM *d, *r; 624 const BIGNUM *aa; 625 /* Table of variables obtained from 'ctx' */ 626 BIGNUM *val[TABLE_SIZE]; 627 BN_MONT_CTX *new_mont = NULL; 628 629 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 630 return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, mont); 631 } 632 633 if (!BN_is_odd(m)) { 634 OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); 635 return 0; 636 } 637 bits = BN_num_bits(p); 638 if (bits == 0) { 639 /* x**0 mod 1 is still zero. */ 640 if (BN_is_one(m)) { 641 BN_zero(rr); 642 return 1; 643 } 644 return BN_one(rr); 645 } 646 647 BN_CTX_start(ctx); 648 d = BN_CTX_get(ctx); 649 r = BN_CTX_get(ctx); 650 val[0] = BN_CTX_get(ctx); 651 if (!d || !r || !val[0]) { 652 goto err; 653 } 654 655 /* Allocate a montgomery context if it was not supplied by the caller. */ 656 if (mont == NULL) { 657 new_mont = BN_MONT_CTX_new(); 658 if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) { 659 goto err; 660 } 661 mont = new_mont; 662 } 663 664 if (a->neg || BN_ucmp(a, m) >= 0) { 665 if (!BN_nnmod(val[0], a, m, ctx)) { 666 goto err; 667 } 668 aa = val[0]; 669 } else { 670 aa = a; 671 } 672 673 if (BN_is_zero(aa)) { 674 BN_zero(rr); 675 ret = 1; 676 goto err; 677 } 678 if (!BN_to_montgomery(val[0], aa, mont, ctx)) { 679 goto err; /* 1 */ 680 } 681 682 window = BN_window_bits_for_exponent_size(bits); 683 if (window > 1) { 684 if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) { 685 goto err; /* 2 */ 686 } 687 j = 1 << (window - 1); 688 for (i = 1; i < j; i++) { 689 if (((val[i] = BN_CTX_get(ctx)) == NULL) || 690 !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) { 691 goto err; 692 } 693 } 694 } 695 696 start = 1; /* This is used to avoid multiplication etc 697 * when there is only the value '1' in the 698 * buffer. */ 699 wstart = bits - 1; /* The top bit of the window */ 700 701 j = m->top; /* borrow j */ 702 if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { 703 if (bn_wexpand(r, j) == NULL) { 704 goto err; 705 } 706 /* 2^(top*BN_BITS2) - m */ 707 r->d[0] = (0 - m->d[0]) & BN_MASK2; 708 for (i = 1; i < j; i++) { 709 r->d[i] = (~m->d[i]) & BN_MASK2; 710 } 711 r->top = j; 712 /* Upper words will be zero if the corresponding words of 'm' 713 * were 0xfff[...], so decrement r->top accordingly. */ 714 bn_correct_top(r); 715 } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) { 716 goto err; 717 } 718 719 for (;;) { 720 int wvalue; /* The 'value' of the window */ 721 int wend; /* The bottom bit of the window */ 722 723 if (BN_is_bit_set(p, wstart) == 0) { 724 if (!start && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 725 goto err; 726 } 727 if (wstart == 0) { 728 break; 729 } 730 wstart--; 731 continue; 732 } 733 734 /* We now have wstart on a 'set' bit, we now need to work out how bit a 735 * window to do. To do this we need to scan forward until the last set bit 736 * before the end of the window */ 737 wvalue = 1; 738 wend = 0; 739 for (i = 1; i < window; i++) { 740 if (wstart - i < 0) { 741 break; 742 } 743 if (BN_is_bit_set(p, wstart - i)) { 744 wvalue <<= (i - wend); 745 wvalue |= 1; 746 wend = i; 747 } 748 } 749 750 /* wend is the size of the current window */ 751 j = wend + 1; 752 /* add the 'bytes above' */ 753 if (!start) { 754 for (i = 0; i < j; i++) { 755 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 756 goto err; 757 } 758 } 759 } 760 761 /* wvalue will be an odd number < 2^window */ 762 if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) { 763 goto err; 764 } 765 766 /* move the 'window' down further */ 767 wstart -= wend + 1; 768 start = 0; 769 if (wstart < 0) { 770 break; 771 } 772 } 773 774 if (!BN_from_montgomery(rr, r, mont, ctx)) { 775 goto err; 776 } 777 ret = 1; 778 779 err: 780 BN_MONT_CTX_free(new_mont); 781 BN_CTX_end(ctx); 782 return ret; 783 } 784 785 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific 786 * layout so that accessing any of these table values shows the same access 787 * pattern as far as cache lines are concerned. The following functions are 788 * used to transfer a BIGNUM from/to that table. */ 789 static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx, 790 int width) { 791 size_t i, j; 792 793 if (top > b->top) { 794 top = b->top; /* this works because 'buf' is explicitly zeroed */ 795 } 796 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 797 buf[j] = ((unsigned char *)b->d)[i]; 798 } 799 800 return 1; 801 } 802 803 static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx, 804 int width) { 805 size_t i, j; 806 807 if (bn_wexpand(b, top) == NULL) { 808 return 0; 809 } 810 811 for (i = 0, j = idx; i < top * sizeof b->d[0]; i++, j += width) { 812 ((unsigned char *)b->d)[i] = buf[j]; 813 } 814 815 b->top = top; 816 bn_correct_top(b); 817 return 1; 818 } 819 820 /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache 821 * line width of the target processor is at least the following value. */ 822 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64) 823 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \ 824 (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1) 825 826 /* Window sizes optimized for fixed window size modular exponentiation 827 * algorithm (BN_mod_exp_mont_consttime). 828 * 829 * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum 830 * size of the window must not exceed 831 * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH). 832 * 833 * Window size thresholds are defined for cache line sizes of 32 and 64, cache 834 * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of 835 * 7 should only be used on processors that have a 128 byte or greater cache 836 * line size. */ 837 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64 838 839 #define BN_window_bits_for_ctime_exponent_size(b) \ 840 ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) 841 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6) 842 843 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32 844 845 #define BN_window_bits_for_ctime_exponent_size(b) \ 846 ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1) 847 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5) 848 849 #endif 850 851 /* Given a pointer value, compute the next address that is a cache line 852 * multiple. */ 853 #define MOD_EXP_CTIME_ALIGN(x_) \ 854 ((unsigned char *)(x_) + \ 855 (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \ 856 (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK)))) 857 858 /* This variant of BN_mod_exp_mont() uses fixed windows and the special 859 * precomputation memory layout to limit data-dependency to a minimum 860 * to protect secret exponents (cf. the hyper-threading timing attacks 861 * pointed out by Colin Percival, 862 * http://www.daemonology.net/hyperthreading-considered-harmful/) 863 */ 864 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p, 865 const BIGNUM *m, BN_CTX *ctx, 866 const BN_MONT_CTX *mont) { 867 int i, bits, ret = 0, window, wvalue; 868 int top; 869 BN_MONT_CTX *new_mont = NULL; 870 871 int numPowers; 872 unsigned char *powerbufFree = NULL; 873 int powerbufLen = 0; 874 unsigned char *powerbuf = NULL; 875 BIGNUM tmp, am; 876 877 if (!BN_is_odd(m)) { 878 OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); 879 return 0; 880 } 881 882 top = m->top; 883 884 bits = BN_num_bits(p); 885 if (bits == 0) { 886 /* x**0 mod 1 is still zero. */ 887 if (BN_is_one(m)) { 888 BN_zero(rr); 889 return 1; 890 } 891 return BN_one(rr); 892 } 893 894 BN_CTX_start(ctx); 895 896 /* Allocate a montgomery context if it was not supplied by the caller. */ 897 if (mont == NULL) { 898 new_mont = BN_MONT_CTX_new(); 899 if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) { 900 goto err; 901 } 902 mont = new_mont; 903 } 904 905 #ifdef RSAZ_ENABLED 906 /* If the size of the operands allow it, perform the optimized 907 * RSAZ exponentiation. For further information see 908 * crypto/bn/rsaz_exp.c and accompanying assembly modules. */ 909 if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) && 910 rsaz_avx2_eligible()) { 911 if (NULL == bn_wexpand(rr, 16)) { 912 goto err; 913 } 914 RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]); 915 rr->top = 16; 916 rr->neg = 0; 917 bn_correct_top(rr); 918 ret = 1; 919 goto err; 920 } else if ((8 == a->top) && (8 == p->top) && (BN_num_bits(m) == 512)) { 921 if (NULL == bn_wexpand(rr, 8)) { 922 goto err; 923 } 924 RSAZ_512_mod_exp(rr->d, a->d, p->d, m->d, mont->n0[0], mont->RR.d); 925 rr->top = 8; 926 rr->neg = 0; 927 bn_correct_top(rr); 928 ret = 1; 929 goto err; 930 } 931 #endif 932 933 /* Get the window size to use with size of p. */ 934 window = BN_window_bits_for_ctime_exponent_size(bits); 935 #if defined(OPENSSL_BN_ASM_MONT5) 936 if (window >= 5) { 937 window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */ 938 if ((top & 7) == 0) { 939 powerbufLen += 2 * top * sizeof(m->d[0]); 940 } 941 } 942 #endif 943 944 /* Allocate a buffer large enough to hold all of the pre-computed 945 * powers of am, am itself and tmp. 946 */ 947 numPowers = 1 << window; 948 powerbufLen += 949 sizeof(m->d[0]) * 950 (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers)); 951 #ifdef alloca 952 if (powerbufLen < 3072) { 953 powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH); 954 } else 955 #endif 956 { 957 if ((powerbufFree = (unsigned char *)OPENSSL_malloc( 958 powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) { 959 goto err; 960 } 961 } 962 963 powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree); 964 memset(powerbuf, 0, powerbufLen); 965 966 #ifdef alloca 967 if (powerbufLen < 3072) { 968 powerbufFree = NULL; 969 } 970 #endif 971 972 /* lay down tmp and am right after powers table */ 973 tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers); 974 am.d = tmp.d + top; 975 tmp.top = am.top = 0; 976 tmp.dmax = am.dmax = top; 977 tmp.neg = am.neg = 0; 978 tmp.flags = am.flags = BN_FLG_STATIC_DATA; 979 980 /* prepare a^0 in Montgomery domain */ 981 /* by Shay Gueron's suggestion */ 982 if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) { 983 /* 2^(top*BN_BITS2) - m */ 984 tmp.d[0] = (0 - m->d[0]) & BN_MASK2; 985 for (i = 1; i < top; i++) { 986 tmp.d[i] = (~m->d[i]) & BN_MASK2; 987 } 988 tmp.top = top; 989 } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) { 990 goto err; 991 } 992 993 /* prepare a^1 in Montgomery domain */ 994 if (a->neg || BN_ucmp(a, m) >= 0) { 995 if (!BN_mod(&am, a, m, ctx) || 996 !BN_to_montgomery(&am, &am, mont, ctx)) { 997 goto err; 998 } 999 } else if (!BN_to_montgomery(&am, a, mont, ctx)) { 1000 goto err; 1001 } 1002 1003 #if defined(OPENSSL_BN_ASM_MONT5) 1004 /* This optimization uses ideas from http://eprint.iacr.org/2011/239, 1005 * specifically optimization of cache-timing attack countermeasures 1006 * and pre-computation optimization. */ 1007 1008 /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as 1009 * 512-bit RSA is hardly relevant, we omit it to spare size... */ 1010 if (window == 5 && top > 1) { 1011 const BN_ULONG *np = mont->N.d, *n0 = mont->n0, *np2; 1012 1013 /* BN_to_montgomery can contaminate words above .top 1014 * [in BN_DEBUG[_DEBUG] build]... */ 1015 for (i = am.top; i < top; i++) { 1016 am.d[i] = 0; 1017 } 1018 for (i = tmp.top; i < top; i++) { 1019 tmp.d[i] = 0; 1020 } 1021 1022 if (top & 7) { 1023 np2 = np; 1024 } else { 1025 BN_ULONG *np_double = am.d + top; 1026 for (i = 0; i < top; i++) { 1027 np_double[2 * i] = np[i]; 1028 } 1029 np2 = np_double; 1030 } 1031 1032 bn_scatter5(tmp.d, top, powerbuf, 0); 1033 bn_scatter5(am.d, am.top, powerbuf, 1); 1034 bn_mul_mont(tmp.d, am.d, am.d, np, n0, top); 1035 bn_scatter5(tmp.d, top, powerbuf, 2); 1036 1037 /* same as above, but uses squaring for 1/2 of operations */ 1038 for (i = 4; i < 32; i *= 2) { 1039 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1040 bn_scatter5(tmp.d, top, powerbuf, i); 1041 } 1042 for (i = 3; i < 8; i += 2) { 1043 int j; 1044 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); 1045 bn_scatter5(tmp.d, top, powerbuf, i); 1046 for (j = 2 * i; j < 32; j *= 2) { 1047 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1048 bn_scatter5(tmp.d, top, powerbuf, j); 1049 } 1050 } 1051 for (; i < 16; i += 2) { 1052 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); 1053 bn_scatter5(tmp.d, top, powerbuf, i); 1054 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1055 bn_scatter5(tmp.d, top, powerbuf, 2 * i); 1056 } 1057 for (; i < 32; i += 2) { 1058 bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np2, n0, top, i - 1); 1059 bn_scatter5(tmp.d, top, powerbuf, i); 1060 } 1061 1062 bits--; 1063 for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) { 1064 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1065 } 1066 bn_gather5(tmp.d, top, powerbuf, wvalue); 1067 1068 /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit 1069 * that has not been read yet.) */ 1070 assert(bits >= -1 && (bits == -1 || bits % 5 == 4)); 1071 1072 /* Scan the exponent one window at a time starting from the most 1073 * significant bits. 1074 */ 1075 if (top & 7) { 1076 while (bits >= 0) { 1077 for (wvalue = 0, i = 0; i < 5; i++, bits--) { 1078 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1079 } 1080 1081 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1082 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1083 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1084 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1085 bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top); 1086 bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue); 1087 } 1088 } else { 1089 const uint8_t *p_bytes = (const uint8_t *)p->d; 1090 int max_bits = p->top * BN_BITS2; 1091 assert(bits < max_bits); 1092 /* |p = 0| has been handled as a special case, so |max_bits| is at least 1093 * one word. */ 1094 assert(max_bits >= 64); 1095 1096 /* If the first bit to be read lands in the last byte, unroll the first 1097 * iteration to avoid reading past the bounds of |p->d|. (After the first 1098 * iteration, we are guaranteed to be past the last byte.) Note |bits| 1099 * here is the top bit, inclusive. */ 1100 if (bits - 4 >= max_bits - 8) { 1101 /* Read five bits from |bits-4| through |bits|, inclusive. */ 1102 wvalue = p_bytes[p->top * BN_BYTES - 1]; 1103 wvalue >>= (bits - 4) & 7; 1104 wvalue &= 0x1f; 1105 bits -= 5; 1106 bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue); 1107 } 1108 while (bits >= 0) { 1109 /* Read five bits from |bits-4| through |bits|, inclusive. */ 1110 int first_bit = bits - 4; 1111 wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3)); 1112 wvalue >>= first_bit & 7; 1113 wvalue &= 0x1f; 1114 bits -= 5; 1115 bn_power5(tmp.d, tmp.d, powerbuf, np2, n0, top, wvalue); 1116 } 1117 } 1118 1119 ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np2, n0, top); 1120 tmp.top = top; 1121 bn_correct_top(&tmp); 1122 if (ret) { 1123 if (!BN_copy(rr, &tmp)) { 1124 ret = 0; 1125 } 1126 goto err; /* non-zero ret means it's not error */ 1127 } 1128 } else 1129 #endif 1130 { 1131 if (!copy_to_prebuf(&tmp, top, powerbuf, 0, numPowers) || 1132 !copy_to_prebuf(&am, top, powerbuf, 1, numPowers)) { 1133 goto err; 1134 } 1135 1136 /* If the window size is greater than 1, then calculate 1137 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1) 1138 * (even powers could instead be computed as (a^(i/2))^2 1139 * to use the slight performance advantage of sqr over mul). 1140 */ 1141 if (window > 1) { 1142 if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) || 1143 !copy_to_prebuf(&tmp, top, powerbuf, 2, numPowers)) { 1144 goto err; 1145 } 1146 for (i = 3; i < numPowers; i++) { 1147 /* Calculate a^i = a^(i-1) * a */ 1148 if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) || 1149 !copy_to_prebuf(&tmp, top, powerbuf, i, numPowers)) { 1150 goto err; 1151 } 1152 } 1153 } 1154 1155 bits--; 1156 for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) { 1157 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1158 } 1159 if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, numPowers)) { 1160 goto err; 1161 } 1162 1163 /* Scan the exponent one window at a time starting from the most 1164 * significant bits. 1165 */ 1166 while (bits >= 0) { 1167 wvalue = 0; /* The 'value' of the window */ 1168 1169 /* Scan the window, squaring the result as we go */ 1170 for (i = 0; i < window; i++, bits--) { 1171 if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) { 1172 goto err; 1173 } 1174 wvalue = (wvalue << 1) + BN_is_bit_set(p, bits); 1175 } 1176 1177 /* Fetch the appropriate pre-computed value from the pre-buf */ 1178 if (!copy_from_prebuf(&am, top, powerbuf, wvalue, numPowers)) { 1179 goto err; 1180 } 1181 1182 /* Multiply the result into the intermediate result */ 1183 if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) { 1184 goto err; 1185 } 1186 } 1187 } 1188 1189 /* Convert the final result from montgomery to standard format */ 1190 if (!BN_from_montgomery(rr, &tmp, mont, ctx)) { 1191 goto err; 1192 } 1193 ret = 1; 1194 1195 err: 1196 BN_MONT_CTX_free(new_mont); 1197 if (powerbuf != NULL) { 1198 OPENSSL_cleanse(powerbuf, powerbufLen); 1199 OPENSSL_free(powerbufFree); 1200 } 1201 BN_CTX_end(ctx); 1202 return (ret); 1203 } 1204 1205 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p, 1206 const BIGNUM *m, BN_CTX *ctx, 1207 const BN_MONT_CTX *mont) { 1208 BN_MONT_CTX *new_mont = NULL; 1209 int b, bits, ret = 0; 1210 int r_is_one; 1211 BN_ULONG w, next_w; 1212 BIGNUM *d, *r, *t; 1213 BIGNUM *swap_tmp; 1214 #define BN_MOD_MUL_WORD(r, w, m) \ 1215 (BN_mul_word(r, (w)) && \ 1216 (/* BN_ucmp(r, (m)) < 0 ? 1 :*/ \ 1217 (BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1)))) 1218 /* BN_MOD_MUL_WORD is only used with 'w' large, so the BN_ucmp test is 1219 * probably more overhead than always using BN_mod (which uses BN_copy if a 1220 * similar test returns true). We can use BN_mod and do not need BN_nnmod 1221 * because our accumulator is never negative (the result of BN_mod does not 1222 * depend on the sign of the modulus). */ 1223 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \ 1224 (BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx)) 1225 1226 if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0) { 1227 /* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */ 1228 OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED); 1229 return 0; 1230 } 1231 1232 if (!BN_is_odd(m)) { 1233 OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); 1234 return 0; 1235 } 1236 1237 if (m->top == 1) { 1238 a %= m->d[0]; /* make sure that 'a' is reduced */ 1239 } 1240 1241 bits = BN_num_bits(p); 1242 if (bits == 0) { 1243 /* x**0 mod 1 is still zero. */ 1244 if (BN_is_one(m)) { 1245 BN_zero(rr); 1246 return 1; 1247 } 1248 return BN_one(rr); 1249 } 1250 if (a == 0) { 1251 BN_zero(rr); 1252 return 1; 1253 } 1254 1255 BN_CTX_start(ctx); 1256 d = BN_CTX_get(ctx); 1257 r = BN_CTX_get(ctx); 1258 t = BN_CTX_get(ctx); 1259 if (d == NULL || r == NULL || t == NULL) { 1260 goto err; 1261 } 1262 1263 /* Allocate a montgomery context if it was not supplied by the caller. */ 1264 if (mont == NULL) { 1265 new_mont = BN_MONT_CTX_new(); 1266 if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) { 1267 goto err; 1268 } 1269 mont = new_mont; 1270 } 1271 1272 r_is_one = 1; /* except for Montgomery factor */ 1273 1274 /* bits-1 >= 0 */ 1275 1276 /* The result is accumulated in the product r*w. */ 1277 w = a; /* bit 'bits-1' of 'p' is always set */ 1278 for (b = bits - 2; b >= 0; b--) { 1279 /* First, square r*w. */ 1280 next_w = w * w; 1281 if ((next_w / w) != w) { 1282 /* overflow */ 1283 if (r_is_one) { 1284 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) { 1285 goto err; 1286 } 1287 r_is_one = 0; 1288 } else { 1289 if (!BN_MOD_MUL_WORD(r, w, m)) { 1290 goto err; 1291 } 1292 } 1293 next_w = 1; 1294 } 1295 1296 w = next_w; 1297 if (!r_is_one) { 1298 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 1299 goto err; 1300 } 1301 } 1302 1303 /* Second, multiply r*w by 'a' if exponent bit is set. */ 1304 if (BN_is_bit_set(p, b)) { 1305 next_w = w * a; 1306 if ((next_w / a) != w) { 1307 /* overflow */ 1308 if (r_is_one) { 1309 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) { 1310 goto err; 1311 } 1312 r_is_one = 0; 1313 } else { 1314 if (!BN_MOD_MUL_WORD(r, w, m)) { 1315 goto err; 1316 } 1317 } 1318 next_w = a; 1319 } 1320 w = next_w; 1321 } 1322 } 1323 1324 /* Finally, set r:=r*w. */ 1325 if (w != 1) { 1326 if (r_is_one) { 1327 if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) { 1328 goto err; 1329 } 1330 r_is_one = 0; 1331 } else { 1332 if (!BN_MOD_MUL_WORD(r, w, m)) { 1333 goto err; 1334 } 1335 } 1336 } 1337 1338 if (r_is_one) { 1339 /* can happen only if a == 1*/ 1340 if (!BN_one(rr)) { 1341 goto err; 1342 } 1343 } else { 1344 if (!BN_from_montgomery(rr, r, mont, ctx)) { 1345 goto err; 1346 } 1347 } 1348 ret = 1; 1349 1350 err: 1351 BN_MONT_CTX_free(new_mont); 1352 BN_CTX_end(ctx); 1353 return ret; 1354 } 1355 1356 #define TABLE_SIZE 32 1357 1358 int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1, 1359 const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m, 1360 BN_CTX *ctx, const BN_MONT_CTX *mont) { 1361 int i, j, bits, b, bits1, bits2, ret = 0, wpos1, wpos2, window1, window2, 1362 wvalue1, wvalue2; 1363 int r_is_one = 1; 1364 BIGNUM *d, *r; 1365 const BIGNUM *a_mod_m; 1366 /* Tables of variables obtained from 'ctx' */ 1367 BIGNUM *val1[TABLE_SIZE], *val2[TABLE_SIZE]; 1368 BN_MONT_CTX *new_mont = NULL; 1369 1370 if (!(m->d[0] & 1)) { 1371 OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS); 1372 return 0; 1373 } 1374 bits1 = BN_num_bits(p1); 1375 bits2 = BN_num_bits(p2); 1376 if (bits1 == 0 && bits2 == 0) { 1377 ret = BN_one(rr); 1378 return ret; 1379 } 1380 1381 bits = (bits1 > bits2) ? bits1 : bits2; 1382 1383 BN_CTX_start(ctx); 1384 d = BN_CTX_get(ctx); 1385 r = BN_CTX_get(ctx); 1386 val1[0] = BN_CTX_get(ctx); 1387 val2[0] = BN_CTX_get(ctx); 1388 if (!d || !r || !val1[0] || !val2[0]) { 1389 goto err; 1390 } 1391 1392 /* Allocate a montgomery context if it was not supplied by the caller. */ 1393 if (mont == NULL) { 1394 new_mont = BN_MONT_CTX_new(); 1395 if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) { 1396 goto err; 1397 } 1398 mont = new_mont; 1399 } 1400 1401 window1 = BN_window_bits_for_exponent_size(bits1); 1402 window2 = BN_window_bits_for_exponent_size(bits2); 1403 1404 /* Build table for a1: val1[i] := a1^(2*i + 1) mod m for i = 0 .. 1405 * 2^(window1-1) */ 1406 if (a1->neg || BN_ucmp(a1, m) >= 0) { 1407 if (!BN_mod(val1[0], a1, m, ctx)) { 1408 goto err; 1409 } 1410 a_mod_m = val1[0]; 1411 } else { 1412 a_mod_m = a1; 1413 } 1414 1415 if (BN_is_zero(a_mod_m)) { 1416 BN_zero(rr); 1417 ret = 1; 1418 goto err; 1419 } 1420 1421 if (!BN_to_montgomery(val1[0], a_mod_m, mont, ctx)) { 1422 goto err; 1423 } 1424 1425 if (window1 > 1) { 1426 if (!BN_mod_mul_montgomery(d, val1[0], val1[0], mont, ctx)) { 1427 goto err; 1428 } 1429 1430 j = 1 << (window1 - 1); 1431 for (i = 1; i < j; i++) { 1432 if (((val1[i] = BN_CTX_get(ctx)) == NULL) || 1433 !BN_mod_mul_montgomery(val1[i], val1[i - 1], d, mont, ctx)) { 1434 goto err; 1435 } 1436 } 1437 } 1438 1439 /* Build table for a2: val2[i] := a2^(2*i + 1) mod m for i = 0 .. 1440 * 2^(window2-1) */ 1441 if (a2->neg || BN_ucmp(a2, m) >= 0) { 1442 if (!BN_mod(val2[0], a2, m, ctx)) { 1443 goto err; 1444 } 1445 a_mod_m = val2[0]; 1446 } else { 1447 a_mod_m = a2; 1448 } 1449 1450 if (BN_is_zero(a_mod_m)) { 1451 BN_zero(rr); 1452 ret = 1; 1453 goto err; 1454 } 1455 1456 if (!BN_to_montgomery(val2[0], a_mod_m, mont, ctx)) { 1457 goto err; 1458 } 1459 1460 if (window2 > 1) { 1461 if (!BN_mod_mul_montgomery(d, val2[0], val2[0], mont, ctx)) { 1462 goto err; 1463 } 1464 1465 j = 1 << (window2 - 1); 1466 for (i = 1; i < j; i++) { 1467 if (((val2[i] = BN_CTX_get(ctx)) == NULL) || 1468 !BN_mod_mul_montgomery(val2[i], val2[i - 1], d, mont, ctx)) { 1469 goto err; 1470 } 1471 } 1472 } 1473 1474 /* Now compute the power product, using independent windows. */ 1475 r_is_one = 1; 1476 wvalue1 = 0; /* The 'value' of the first window */ 1477 wvalue2 = 0; /* The 'value' of the second window */ 1478 wpos1 = 0; /* If wvalue1 > 0, the bottom bit of the first window */ 1479 wpos2 = 0; /* If wvalue2 > 0, the bottom bit of the second window */ 1480 1481 if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) { 1482 goto err; 1483 } 1484 1485 for (b = bits - 1; b >= 0; b--) { 1486 if (!r_is_one) { 1487 if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) { 1488 goto err; 1489 } 1490 } 1491 1492 if (!wvalue1 && BN_is_bit_set(p1, b)) { 1493 /* consider bits b-window1+1 .. b for this window */ 1494 i = b - window1 + 1; 1495 /* works for i<0 */ 1496 while (!BN_is_bit_set(p1, i)) { 1497 i++; 1498 } 1499 wpos1 = i; 1500 wvalue1 = 1; 1501 for (i = b - 1; i >= wpos1; i--) { 1502 wvalue1 <<= 1; 1503 if (BN_is_bit_set(p1, i)) { 1504 wvalue1++; 1505 } 1506 } 1507 } 1508 1509 if (!wvalue2 && BN_is_bit_set(p2, b)) { 1510 /* consider bits b-window2+1 .. b for this window */ 1511 i = b - window2 + 1; 1512 while (!BN_is_bit_set(p2, i)) { 1513 i++; 1514 } 1515 wpos2 = i; 1516 wvalue2 = 1; 1517 for (i = b - 1; i >= wpos2; i--) { 1518 wvalue2 <<= 1; 1519 if (BN_is_bit_set(p2, i)) { 1520 wvalue2++; 1521 } 1522 } 1523 } 1524 1525 if (wvalue1 && b == wpos1) { 1526 /* wvalue1 is odd and < 2^window1 */ 1527 if (!BN_mod_mul_montgomery(r, r, val1[wvalue1 >> 1], mont, ctx)) { 1528 goto err; 1529 } 1530 wvalue1 = 0; 1531 r_is_one = 0; 1532 } 1533 1534 if (wvalue2 && b == wpos2) { 1535 /* wvalue2 is odd and < 2^window2 */ 1536 if (!BN_mod_mul_montgomery(r, r, val2[wvalue2 >> 1], mont, ctx)) { 1537 goto err; 1538 } 1539 wvalue2 = 0; 1540 r_is_one = 0; 1541 } 1542 } 1543 1544 if (!BN_from_montgomery(rr, r, mont, ctx)) { 1545 goto err; 1546 } 1547 ret = 1; 1548 1549 err: 1550 BN_MONT_CTX_free(new_mont); 1551 BN_CTX_end(ctx); 1552 return ret; 1553 } 1554