Home | History | Annotate | Download | only in bn
      1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      2  * All rights reserved.
      3  *
      4  * This package is an SSL implementation written
      5  * by Eric Young (eay (at) cryptsoft.com).
      6  * The implementation was written so as to conform with Netscapes SSL.
      7  *
      8  * This library is free for commercial and non-commercial use as long as
      9  * the following conditions are aheared to.  The following conditions
     10  * apply to all code found in this distribution, be it the RC4, RSA,
     11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     12  * included with this distribution is covered by the same copyright terms
     13  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     14  *
     15  * Copyright remains Eric Young's, and as such any Copyright notices in
     16  * the code are not to be removed.
     17  * If this package is used in a product, Eric Young should be given attribution
     18  * as the author of the parts of the library used.
     19  * This can be in the form of a textual message at program startup or
     20  * in documentation (online or textual) provided with the package.
     21  *
     22  * Redistribution and use in source and binary forms, with or without
     23  * modification, are permitted provided that the following conditions
     24  * are met:
     25  * 1. Redistributions of source code must retain the copyright
     26  *    notice, this list of conditions and the following disclaimer.
     27  * 2. Redistributions in binary form must reproduce the above copyright
     28  *    notice, this list of conditions and the following disclaimer in the
     29  *    documentation and/or other materials provided with the distribution.
     30  * 3. All advertising materials mentioning features or use of this software
     31  *    must display the following acknowledgement:
     32  *    "This product includes cryptographic software written by
     33  *     Eric Young (eay (at) cryptsoft.com)"
     34  *    The word 'cryptographic' can be left out if the rouines from the library
     35  *    being used are not cryptographic related :-).
     36  * 4. If you include any Windows specific code (or a derivative thereof) from
     37  *    the apps directory (application code) you must include an acknowledgement:
     38  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     39  *
     40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     50  * SUCH DAMAGE.
     51  *
     52  * The licence and distribution terms for any publically available version or
     53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     54  * copied and put under another distribution licence
     55  * [including the GNU Public Licence.]
     56  */
     57 /* ====================================================================
     58  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
     59  *
     60  * Redistribution and use in source and binary forms, with or without
     61  * modification, are permitted provided that the following conditions
     62  * are met:
     63  *
     64  * 1. Redistributions of source code must retain the above copyright
     65  *    notice, this list of conditions and the following disclaimer.
     66  *
     67  * 2. Redistributions in binary form must reproduce the above copyright
     68  *    notice, this list of conditions and the following disclaimer in
     69  *    the documentation and/or other materials provided with the
     70  *    distribution.
     71  *
     72  * 3. All advertising materials mentioning features or use of this
     73  *    software must display the following acknowledgment:
     74  *    "This product includes software developed by the OpenSSL Project
     75  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
     76  *
     77  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     78  *    endorse or promote products derived from this software without
     79  *    prior written permission. For written permission, please contact
     80  *    openssl-core (at) openssl.org.
     81  *
     82  * 5. Products derived from this software may not be called "OpenSSL"
     83  *    nor may "OpenSSL" appear in their names without prior written
     84  *    permission of the OpenSSL Project.
     85  *
     86  * 6. Redistributions of any form whatsoever must retain the following
     87  *    acknowledgment:
     88  *    "This product includes software developed by the OpenSSL Project
     89  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     92  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     94  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     95  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     96  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     97  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     98  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    100  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    101  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
    102  * OF THE POSSIBILITY OF SUCH DAMAGE.
    103  * ====================================================================
    104  *
    105  * This product includes cryptographic software written by Eric Young
    106  * (eay (at) cryptsoft.com).  This product includes software written by Tim
    107  * Hudson (tjh (at) cryptsoft.com). */
    108 
    109 #include <openssl/bn.h>
    110 
    111 #include <assert.h>
    112 #include <string.h>
    113 
    114 #include <openssl/cpu.h>
    115 #include <openssl/err.h>
    116 #include <openssl/mem.h>
    117 
    118 #include "internal.h"
    119 
    120 
    121 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
    122 #define OPENSSL_BN_ASM_MONT5
    123 #define RSAZ_ENABLED
    124 
    125 #include "rsaz_exp.h"
    126 
    127 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
    128                          const BN_ULONG *np, const BN_ULONG *n0, int num,
    129                          int power);
    130 void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power);
    131 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
    132 void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
    133                const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
    134 int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
    135                        const BN_ULONG *not_used, const BN_ULONG *np,
    136                        const BN_ULONG *n0, int num);
    137 #endif
    138 
    139 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
    140   int i, bits, ret = 0;
    141   BIGNUM *v, *rr;
    142 
    143   BN_CTX_start(ctx);
    144   if (r == a || r == p) {
    145     rr = BN_CTX_get(ctx);
    146   } else {
    147     rr = r;
    148   }
    149 
    150   v = BN_CTX_get(ctx);
    151   if (rr == NULL || v == NULL) {
    152     goto err;
    153   }
    154 
    155   if (BN_copy(v, a) == NULL) {
    156     goto err;
    157   }
    158   bits = BN_num_bits(p);
    159 
    160   if (BN_is_odd(p)) {
    161     if (BN_copy(rr, a) == NULL) {
    162       goto err;
    163     }
    164   } else {
    165     if (!BN_one(rr)) {
    166       goto err;
    167     }
    168   }
    169 
    170   for (i = 1; i < bits; i++) {
    171     if (!BN_sqr(v, v, ctx)) {
    172       goto err;
    173     }
    174     if (BN_is_bit_set(p, i)) {
    175       if (!BN_mul(rr, rr, v, ctx)) {
    176         goto err;
    177       }
    178     }
    179   }
    180 
    181   if (r != rr && !BN_copy(r, rr)) {
    182     goto err;
    183   }
    184   ret = 1;
    185 
    186 err:
    187   BN_CTX_end(ctx);
    188   return ret;
    189 }
    190 
    191 typedef struct bn_recp_ctx_st {
    192   BIGNUM N;   // the divisor
    193   BIGNUM Nr;  // the reciprocal
    194   int num_bits;
    195   int shift;
    196   int flags;
    197 } BN_RECP_CTX;
    198 
    199 static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
    200   BN_init(&recp->N);
    201   BN_init(&recp->Nr);
    202   recp->num_bits = 0;
    203   recp->shift = 0;
    204   recp->flags = 0;
    205 }
    206 
    207 static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
    208   if (recp == NULL) {
    209     return;
    210   }
    211 
    212   BN_free(&recp->N);
    213   BN_free(&recp->Nr);
    214 }
    215 
    216 static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
    217   if (!BN_copy(&(recp->N), d)) {
    218     return 0;
    219   }
    220   BN_zero(&recp->Nr);
    221   recp->num_bits = BN_num_bits(d);
    222   recp->shift = 0;
    223 
    224   return 1;
    225 }
    226 
    227 // len is the expected size of the result We actually calculate with an extra
    228 // word of precision, so we can do faster division if the remainder is not
    229 // required.
    230 // r := 2^len / m
    231 static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
    232   int ret = -1;
    233   BIGNUM *t;
    234 
    235   BN_CTX_start(ctx);
    236   t = BN_CTX_get(ctx);
    237   if (t == NULL) {
    238     goto err;
    239   }
    240 
    241   if (!BN_set_bit(t, len)) {
    242     goto err;
    243   }
    244 
    245   if (!BN_div(r, NULL, t, m, ctx)) {
    246     goto err;
    247   }
    248 
    249   ret = len;
    250 
    251 err:
    252   BN_CTX_end(ctx);
    253   return ret;
    254 }
    255 
    256 static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
    257                        BN_RECP_CTX *recp, BN_CTX *ctx) {
    258   int i, j, ret = 0;
    259   BIGNUM *a, *b, *d, *r;
    260 
    261   BN_CTX_start(ctx);
    262   a = BN_CTX_get(ctx);
    263   b = BN_CTX_get(ctx);
    264   if (dv != NULL) {
    265     d = dv;
    266   } else {
    267     d = BN_CTX_get(ctx);
    268   }
    269 
    270   if (rem != NULL) {
    271     r = rem;
    272   } else {
    273     r = BN_CTX_get(ctx);
    274   }
    275 
    276   if (a == NULL || b == NULL || d == NULL || r == NULL) {
    277     goto err;
    278   }
    279 
    280   if (BN_ucmp(m, &recp->N) < 0) {
    281     BN_zero(d);
    282     if (!BN_copy(r, m)) {
    283       goto err;
    284     }
    285     BN_CTX_end(ctx);
    286     return 1;
    287   }
    288 
    289   // We want the remainder
    290   // Given input of ABCDEF / ab
    291   // we need multiply ABCDEF by 3 digests of the reciprocal of ab
    292 
    293   // i := max(BN_num_bits(m), 2*BN_num_bits(N))
    294   i = BN_num_bits(m);
    295   j = recp->num_bits << 1;
    296   if (j > i) {
    297     i = j;
    298   }
    299 
    300   // Nr := round(2^i / N)
    301   if (i != recp->shift) {
    302     recp->shift =
    303         BN_reciprocal(&(recp->Nr), &(recp->N), i,
    304                       ctx);  // BN_reciprocal returns i, or -1 for an error
    305   }
    306 
    307   if (recp->shift == -1) {
    308     goto err;
    309   }
    310 
    311   // d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
    312   // BN_num_bits(N)))|
    313   //    = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
    314   // BN_num_bits(N)))|
    315   //   <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
    316   //    = |m/N|
    317   if (!BN_rshift(a, m, recp->num_bits)) {
    318     goto err;
    319   }
    320   if (!BN_mul(b, a, &(recp->Nr), ctx)) {
    321     goto err;
    322   }
    323   if (!BN_rshift(d, b, i - recp->num_bits)) {
    324     goto err;
    325   }
    326   d->neg = 0;
    327 
    328   if (!BN_mul(b, &(recp->N), d, ctx)) {
    329     goto err;
    330   }
    331   if (!BN_usub(r, m, b)) {
    332     goto err;
    333   }
    334   r->neg = 0;
    335 
    336   j = 0;
    337   while (BN_ucmp(r, &(recp->N)) >= 0) {
    338     if (j++ > 2) {
    339       OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
    340       goto err;
    341     }
    342     if (!BN_usub(r, r, &(recp->N))) {
    343       goto err;
    344     }
    345     if (!BN_add_word(d, 1)) {
    346       goto err;
    347     }
    348   }
    349 
    350   r->neg = BN_is_zero(r) ? 0 : m->neg;
    351   d->neg = m->neg ^ recp->N.neg;
    352   ret = 1;
    353 
    354 err:
    355   BN_CTX_end(ctx);
    356   return ret;
    357 }
    358 
    359 static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
    360                                  BN_RECP_CTX *recp, BN_CTX *ctx) {
    361   int ret = 0;
    362   BIGNUM *a;
    363   const BIGNUM *ca;
    364 
    365   BN_CTX_start(ctx);
    366   a = BN_CTX_get(ctx);
    367   if (a == NULL) {
    368     goto err;
    369   }
    370 
    371   if (y != NULL) {
    372     if (x == y) {
    373       if (!BN_sqr(a, x, ctx)) {
    374         goto err;
    375       }
    376     } else {
    377       if (!BN_mul(a, x, y, ctx)) {
    378         goto err;
    379       }
    380     }
    381     ca = a;
    382   } else {
    383     ca = x;  // Just do the mod
    384   }
    385 
    386   ret = BN_div_recp(NULL, r, ca, recp, ctx);
    387 
    388 err:
    389   BN_CTX_end(ctx);
    390   return ret;
    391 }
    392 
    393 // BN_window_bits_for_exponent_size returns sliding window size for mod_exp with
    394 // a |b| bit exponent.
    395 //
    396 // For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
    397 // multiplications is a constant plus on average
    398 //
    399 //    2^(w-1) + (b-w)/(w+1);
    400 //
    401 // here 2^(w-1)  is for precomputing the table (we actually need entries only
    402 // for windows that have the lowest bit set), and (b-w)/(w+1)  is an
    403 // approximation for the expected number of w-bit windows, not counting the
    404 // first one.
    405 //
    406 // Thus we should use
    407 //
    408 //    w >= 6  if        b > 671
    409 //     w = 5  if  671 > b > 239
    410 //     w = 4  if  239 > b >  79
    411 //     w = 3  if   79 > b >  23
    412 //    w <= 2  if   23 > b
    413 //
    414 // (with draws in between).  Very small exponents are often selected
    415 // with low Hamming weight, so we use  w = 1  for b <= 23.
    416 static int BN_window_bits_for_exponent_size(int b) {
    417   if (b > 671) {
    418     return 6;
    419   }
    420   if (b > 239) {
    421     return 5;
    422   }
    423   if (b > 79) {
    424     return 4;
    425   }
    426   if (b > 23) {
    427     return 3;
    428   }
    429   return 1;
    430 }
    431 
    432 // TABLE_SIZE is the maximum precomputation table size for *variable* sliding
    433 // windows. This must be 2^(max_window - 1), where max_window is the largest
    434 // value returned from |BN_window_bits_for_exponent_size|.
    435 #define TABLE_SIZE 32
    436 
    437 // TABLE_BITS_SMALL is the smallest value returned from
    438 // |BN_window_bits_for_exponent_size| when |b| is at most |BN_BITS2| *
    439 // |BN_SMALL_MAX_WORDS| words.
    440 #define TABLE_BITS_SMALL 5
    441 
    442 // TABLE_SIZE_SMALL is the same as |TABLE_SIZE|, but when |b| is at most
    443 // |BN_BITS2| * |BN_SMALL_MAX_WORDS|.
    444 #define TABLE_SIZE_SMALL (1 << (TABLE_BITS_SMALL - 1))
    445 
    446 static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
    447                         const BIGNUM *m, BN_CTX *ctx) {
    448   int i, j, bits, ret = 0, wstart, window;
    449   int start = 1;
    450   BIGNUM *aa;
    451   // Table of variables obtained from 'ctx'
    452   BIGNUM *val[TABLE_SIZE];
    453   BN_RECP_CTX recp;
    454 
    455   bits = BN_num_bits(p);
    456 
    457   if (bits == 0) {
    458     // x**0 mod 1 is still zero.
    459     if (BN_is_one(m)) {
    460       BN_zero(r);
    461       return 1;
    462     }
    463     return BN_one(r);
    464   }
    465 
    466   BN_CTX_start(ctx);
    467   aa = BN_CTX_get(ctx);
    468   val[0] = BN_CTX_get(ctx);
    469   if (!aa || !val[0]) {
    470     goto err;
    471   }
    472 
    473   BN_RECP_CTX_init(&recp);
    474   if (m->neg) {
    475     // ignore sign of 'm'
    476     if (!BN_copy(aa, m)) {
    477       goto err;
    478     }
    479     aa->neg = 0;
    480     if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
    481       goto err;
    482     }
    483   } else {
    484     if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
    485       goto err;
    486     }
    487   }
    488 
    489   if (!BN_nnmod(val[0], a, m, ctx)) {
    490     goto err;  // 1
    491   }
    492   if (BN_is_zero(val[0])) {
    493     BN_zero(r);
    494     ret = 1;
    495     goto err;
    496   }
    497 
    498   window = BN_window_bits_for_exponent_size(bits);
    499   if (window > 1) {
    500     if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
    501       goto err;  // 2
    502     }
    503     j = 1 << (window - 1);
    504     for (i = 1; i < j; i++) {
    505       if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
    506           !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
    507         goto err;
    508       }
    509     }
    510   }
    511 
    512   start = 1;  // This is used to avoid multiplication etc
    513               // when there is only the value '1' in the
    514               // buffer.
    515   wstart = bits - 1;  // The top bit of the window
    516 
    517   if (!BN_one(r)) {
    518     goto err;
    519   }
    520 
    521   for (;;) {
    522     int wvalue;  // The 'value' of the window
    523     int wend;  // The bottom bit of the window
    524 
    525     if (!BN_is_bit_set(p, wstart)) {
    526       if (!start) {
    527         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
    528           goto err;
    529         }
    530       }
    531       if (wstart == 0) {
    532         break;
    533       }
    534       wstart--;
    535       continue;
    536     }
    537 
    538     // We now have wstart on a 'set' bit, we now need to work out
    539     // how bit a window to do.  To do this we need to scan
    540     // forward until the last set bit before the end of the
    541     // window
    542     wvalue = 1;
    543     wend = 0;
    544     for (i = 1; i < window; i++) {
    545       if (wstart - i < 0) {
    546         break;
    547       }
    548       if (BN_is_bit_set(p, wstart - i)) {
    549         wvalue <<= (i - wend);
    550         wvalue |= 1;
    551         wend = i;
    552       }
    553     }
    554 
    555     // wend is the size of the current window
    556     j = wend + 1;
    557     // add the 'bytes above'
    558     if (!start) {
    559       for (i = 0; i < j; i++) {
    560         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
    561           goto err;
    562         }
    563       }
    564     }
    565 
    566     // wvalue will be an odd number < 2^window
    567     if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
    568       goto err;
    569     }
    570 
    571     // move the 'window' down further
    572     wstart -= wend + 1;
    573     start = 0;
    574     if (wstart < 0) {
    575       break;
    576     }
    577   }
    578   ret = 1;
    579 
    580 err:
    581   BN_CTX_end(ctx);
    582   BN_RECP_CTX_free(&recp);
    583   return ret;
    584 }
    585 
    586 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
    587                BN_CTX *ctx) {
    588   if (BN_is_odd(m)) {
    589     return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
    590   }
    591 
    592   return mod_exp_recp(r, a, p, m, ctx);
    593 }
    594 
    595 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    596                     const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
    597   if (!BN_is_odd(m)) {
    598     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
    599     return 0;
    600   }
    601   int bits = BN_num_bits(p);
    602   if (bits == 0) {
    603     // x**0 mod 1 is still zero.
    604     if (BN_is_one(m)) {
    605       BN_zero(rr);
    606       return 1;
    607     }
    608     return BN_one(rr);
    609   }
    610 
    611   int ret = 0;
    612   BIGNUM *val[TABLE_SIZE];
    613   BN_MONT_CTX *new_mont = NULL;
    614 
    615   BN_CTX_start(ctx);
    616   BIGNUM *d = BN_CTX_get(ctx);
    617   BIGNUM *r = BN_CTX_get(ctx);
    618   val[0] = BN_CTX_get(ctx);
    619   if (!d || !r || !val[0]) {
    620     goto err;
    621   }
    622 
    623   // Allocate a montgomery context if it was not supplied by the caller.
    624   if (mont == NULL) {
    625     new_mont = BN_MONT_CTX_new_for_modulus(m, ctx);
    626     if (new_mont == NULL) {
    627       goto err;
    628     }
    629     mont = new_mont;
    630   }
    631 
    632   const BIGNUM *aa;
    633   if (a->neg || BN_ucmp(a, m) >= 0) {
    634     if (!BN_nnmod(val[0], a, m, ctx)) {
    635       goto err;
    636     }
    637     aa = val[0];
    638   } else {
    639     aa = a;
    640   }
    641 
    642   if (BN_is_zero(aa)) {
    643     BN_zero(rr);
    644     ret = 1;
    645     goto err;
    646   }
    647 
    648   // We exponentiate by looking at sliding windows of the exponent and
    649   // precomputing powers of |aa|. Windows may be shifted so they always end on a
    650   // set bit, so only precompute odd powers. We compute val[i] = aa^(2*i + 1)
    651   // for i = 0 to 2^(window-1), all in Montgomery form.
    652   int window = BN_window_bits_for_exponent_size(bits);
    653   if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
    654     goto err;
    655   }
    656   if (window > 1) {
    657     if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
    658       goto err;
    659     }
    660     for (int i = 1; i < 1 << (window - 1); i++) {
    661       val[i] = BN_CTX_get(ctx);
    662       if (val[i] == NULL ||
    663           !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
    664         goto err;
    665       }
    666     }
    667   }
    668 
    669   if (!bn_one_to_montgomery(r, mont, ctx)) {
    670     goto err;
    671   }
    672 
    673   int r_is_one = 1;
    674   int wstart = bits - 1;  // The top bit of the window.
    675   for (;;) {
    676     if (!BN_is_bit_set(p, wstart)) {
    677       if (!r_is_one && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
    678         goto err;
    679       }
    680       if (wstart == 0) {
    681         break;
    682       }
    683       wstart--;
    684       continue;
    685     }
    686 
    687     // We now have wstart on a set bit. Find the largest window we can use.
    688     int wvalue = 1;
    689     int wsize = 0;
    690     for (int i = 1; i < window && i <= wstart; i++) {
    691       if (BN_is_bit_set(p, wstart - i)) {
    692         wvalue <<= (i - wsize);
    693         wvalue |= 1;
    694         wsize = i;
    695       }
    696     }
    697 
    698     // Shift |r| to the end of the window.
    699     if (!r_is_one) {
    700       for (int i = 0; i < wsize + 1; i++) {
    701         if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
    702           goto err;
    703         }
    704       }
    705     }
    706 
    707     assert(wvalue & 1);
    708     assert(wvalue < (1 << window));
    709     if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
    710       goto err;
    711     }
    712 
    713     r_is_one = 0;
    714     if (wstart == wsize) {
    715       break;
    716     }
    717     wstart -= wsize + 1;
    718   }
    719 
    720   if (!BN_from_montgomery(rr, r, mont, ctx)) {
    721     goto err;
    722   }
    723   ret = 1;
    724 
    725 err:
    726   BN_MONT_CTX_free(new_mont);
    727   BN_CTX_end(ctx);
    728   return ret;
    729 }
    730 
    731 int bn_mod_exp_mont_small(BN_ULONG *r, size_t num_r, const BN_ULONG *a,
    732                           size_t num_a, const BN_ULONG *p, size_t num_p,
    733                           const BN_MONT_CTX *mont) {
    734   size_t num_n = mont->N.top;
    735   if (num_n != num_a || num_n != num_r || num_n > BN_SMALL_MAX_WORDS) {
    736     OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    737     return 0;
    738   }
    739   if (!BN_is_odd(&mont->N)) {
    740     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
    741     return 0;
    742   }
    743   unsigned bits = 0;
    744   if (num_p != 0) {
    745     bits = BN_num_bits_word(p[num_p - 1]) + (num_p - 1) * BN_BITS2;
    746   }
    747   if (bits == 0) {
    748     OPENSSL_memset(r, 0, num_r * sizeof(BN_ULONG));
    749     if (!BN_is_one(&mont->N)) {
    750       r[0] = 1;
    751     }
    752     return 1;
    753   }
    754 
    755   // We exponentiate by looking at sliding windows of the exponent and
    756   // precomputing powers of |a|. Windows may be shifted so they always end on a
    757   // set bit, so only precompute odd powers. We compute val[i] = a^(2*i + 1) for
    758   // i = 0 to 2^(window-1), all in Montgomery form.
    759   unsigned window = BN_window_bits_for_exponent_size(bits);
    760   if (window > TABLE_BITS_SMALL) {
    761     window = TABLE_BITS_SMALL;  // Tolerate excessively large |p|.
    762   }
    763   int ret = 0;
    764   BN_ULONG val[TABLE_SIZE_SMALL][BN_SMALL_MAX_WORDS];
    765   OPENSSL_memcpy(val[0], a, num_n * sizeof(BN_ULONG));
    766   if (window > 1) {
    767     BN_ULONG d[BN_SMALL_MAX_WORDS];
    768     if (!bn_mod_mul_montgomery_small(d, num_n, val[0], num_n, val[0], num_n,
    769                                      mont)) {
    770       goto err;
    771     }
    772     for (unsigned i = 1; i < 1u << (window - 1); i++) {
    773       if (!bn_mod_mul_montgomery_small(val[i], num_n, val[i - 1], num_n, d,
    774                                        num_n, mont)) {
    775         goto err;
    776       }
    777     }
    778   }
    779 
    780   if (!bn_one_to_montgomery_small(r, num_r, mont)) {
    781     goto err;
    782   }
    783 
    784   int r_is_one = 1;
    785   unsigned wstart = bits - 1;  // The top bit of the window.
    786   for (;;) {
    787     if (!bn_is_bit_set_words(p, num_p, wstart)) {
    788       if (!r_is_one &&
    789           !bn_mod_mul_montgomery_small(r, num_r, r, num_r, r, num_r, mont)) {
    790         goto err;
    791       }
    792       if (wstart == 0) {
    793         break;
    794       }
    795       wstart--;
    796       continue;
    797     }
    798 
    799     // We now have wstart on a set bit. Find the largest window we can use.
    800     unsigned wvalue = 1;
    801     unsigned wsize = 0;
    802     for (unsigned i = 1; i < window && i <= wstart; i++) {
    803       if (bn_is_bit_set_words(p, num_p, wstart - i)) {
    804         wvalue <<= (i - wsize);
    805         wvalue |= 1;
    806         wsize = i;
    807       }
    808     }
    809 
    810     // Shift |r| to the end of the window.
    811     if (!r_is_one) {
    812       for (unsigned i = 0; i < wsize + 1; i++) {
    813         if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r, r, num_r, mont)) {
    814           goto err;
    815         }
    816       }
    817     }
    818 
    819     assert(wvalue & 1);
    820     assert(wvalue < (1u << window));
    821     if (!bn_mod_mul_montgomery_small(r, num_r, r, num_r, val[wvalue >> 1],
    822                                      num_n, mont)) {
    823       goto err;
    824     }
    825 
    826     r_is_one = 0;
    827     if (wstart == wsize) {
    828       break;
    829     }
    830     wstart -= wsize + 1;
    831   }
    832 
    833   ret = 1;
    834 
    835 err:
    836   OPENSSL_cleanse(val, sizeof(val));
    837   return ret;
    838 }
    839 
    840 int bn_mod_inverse_prime_mont_small(BN_ULONG *r, size_t num_r,
    841                                     const BN_ULONG *a, size_t num_a,
    842                                     const BN_MONT_CTX *mont) {
    843   const BN_ULONG *p = mont->N.d;
    844   size_t num_p = mont->N.top;
    845   if (num_p > BN_SMALL_MAX_WORDS || num_p == 0) {
    846     OPENSSL_PUT_ERROR(BN, ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    847     return 0;
    848   }
    849 
    850   // Per Fermat's Little Theorem, a^-1 = a^(p-2) (mod p) for p prime.
    851   BN_ULONG p_minus_two[BN_SMALL_MAX_WORDS];
    852   OPENSSL_memcpy(p_minus_two, p, num_p * sizeof(BN_ULONG));
    853   if (p_minus_two[0] >= 2) {
    854     p_minus_two[0] -= 2;
    855   } else {
    856     p_minus_two[0] -= 2;
    857     for (size_t i = 1; i < num_p; i++) {
    858       if (p_minus_two[i]-- != 0) {
    859         break;
    860       }
    861     }
    862   }
    863 
    864   return bn_mod_exp_mont_small(r, num_r, a, num_a, p_minus_two, num_p, mont);
    865 }
    866 
    867 
    868 // |BN_mod_exp_mont_consttime| stores the precomputed powers in a specific
    869 // layout so that accessing any of these table values shows the same access
    870 // pattern as far as cache lines are concerned. The following functions are
    871 // used to transfer a BIGNUM from/to that table.
    872 
    873 static void copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf,
    874                            int idx, int window) {
    875   int i, j;
    876   const int width = 1 << window;
    877   BN_ULONG *table = (BN_ULONG *) buf;
    878 
    879   if (top > b->top) {
    880     top = b->top;  // this works because 'buf' is explicitly zeroed
    881   }
    882 
    883   for (i = 0, j = idx; i < top; i++, j += width)  {
    884     table[j] = b->d[i];
    885   }
    886 }
    887 
    888 static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
    889                             int window) {
    890   int i, j;
    891   const int width = 1 << window;
    892   volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
    893 
    894   if (!bn_wexpand(b, top)) {
    895     return 0;
    896   }
    897 
    898   if (window <= 3) {
    899     for (i = 0; i < top; i++, table += width) {
    900       BN_ULONG acc = 0;
    901 
    902       for (j = 0; j < width; j++) {
    903         acc |= table[j] & ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
    904       }
    905 
    906       b->d[i] = acc;
    907     }
    908   } else {
    909     int xstride = 1 << (window - 2);
    910     BN_ULONG y0, y1, y2, y3;
    911 
    912     i = idx >> (window - 2);  // equivalent of idx / xstride
    913     idx &= xstride - 1;       // equivalent of idx % xstride
    914 
    915     y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1);
    916     y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1);
    917     y2 = (BN_ULONG)0 - (constant_time_eq_int(i, 2) & 1);
    918     y3 = (BN_ULONG)0 - (constant_time_eq_int(i, 3) & 1);
    919 
    920     for (i = 0; i < top; i++, table += width) {
    921       BN_ULONG acc = 0;
    922 
    923       for (j = 0; j < xstride; j++) {
    924         acc |= ((table[j + 0 * xstride] & y0) | (table[j + 1 * xstride] & y1) |
    925                 (table[j + 2 * xstride] & y2) | (table[j + 3 * xstride] & y3)) &
    926                ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
    927       }
    928 
    929       b->d[i] = acc;
    930     }
    931   }
    932 
    933   b->top = top;
    934   bn_correct_top(b);
    935   return 1;
    936 }
    937 
    938 // BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
    939 // line width of the target processor is at least the following value.
    940 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
    941 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
    942   (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
    943 
    944 // Window sizes optimized for fixed window size modular exponentiation
    945 // algorithm (BN_mod_exp_mont_consttime).
    946 //
    947 // To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
    948 // size of the window must not exceed
    949 // log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
    950 //
    951 // Window size thresholds are defined for cache line sizes of 32 and 64, cache
    952 // line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
    953 // 7 should only be used on processors that have a 128 byte or greater cache
    954 // line size.
    955 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
    956 
    957 #define BN_window_bits_for_ctime_exponent_size(b) \
    958   ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
    959 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
    960 
    961 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
    962 
    963 #define BN_window_bits_for_ctime_exponent_size(b) \
    964   ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
    965 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
    966 
    967 #endif
    968 
    969 // Given a pointer value, compute the next address that is a cache line
    970 // multiple.
    971 #define MOD_EXP_CTIME_ALIGN(x_)          \
    972   ((unsigned char *)(x_) +               \
    973    (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
    974     (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
    975 
    976 // This variant of BN_mod_exp_mont() uses fixed windows and the special
    977 // precomputation memory layout to limit data-dependency to a minimum
    978 // to protect secret exponents (cf. the hyper-threading timing attacks
    979 // pointed out by Colin Percival,
    980 // http://www.daemonology.net/hyperthreading-considered-harmful/)
    981 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    982                               const BIGNUM *m, BN_CTX *ctx,
    983                               const BN_MONT_CTX *mont) {
    984   int i, ret = 0, window, wvalue;
    985   int top;
    986   BN_MONT_CTX *new_mont = NULL;
    987 
    988   int numPowers;
    989   unsigned char *powerbufFree = NULL;
    990   int powerbufLen = 0;
    991   unsigned char *powerbuf = NULL;
    992   BIGNUM tmp, am;
    993   BIGNUM *new_a = NULL;
    994 
    995   if (!BN_is_odd(m)) {
    996     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
    997     return 0;
    998   }
    999 
   1000   top = m->top;
   1001 
   1002   // Use all bits stored in |p|, rather than |BN_num_bits|, so we do not leak
   1003   // whether the top bits are zero.
   1004   int max_bits = p->top * BN_BITS2;
   1005   int bits = max_bits;
   1006   if (bits == 0) {
   1007     // x**0 mod 1 is still zero.
   1008     if (BN_is_one(m)) {
   1009       BN_zero(rr);
   1010       return 1;
   1011     }
   1012     return BN_one(rr);
   1013   }
   1014 
   1015   // Allocate a montgomery context if it was not supplied by the caller.
   1016   if (mont == NULL) {
   1017     new_mont = BN_MONT_CTX_new_for_modulus(m, ctx);
   1018     if (new_mont == NULL) {
   1019       goto err;
   1020     }
   1021     mont = new_mont;
   1022   }
   1023 
   1024   if (a->neg || BN_ucmp(a, m) >= 0) {
   1025     new_a = BN_new();
   1026     if (new_a == NULL ||
   1027         !BN_nnmod(new_a, a, m, ctx)) {
   1028       goto err;
   1029     }
   1030     a = new_a;
   1031   }
   1032 
   1033 #ifdef RSAZ_ENABLED
   1034   // If the size of the operands allow it, perform the optimized
   1035   // RSAZ exponentiation. For further information see
   1036   // crypto/bn/rsaz_exp.c and accompanying assembly modules.
   1037   if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
   1038       rsaz_avx2_eligible()) {
   1039     if (!bn_wexpand(rr, 16)) {
   1040       goto err;
   1041     }
   1042     RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
   1043     rr->top = 16;
   1044     rr->neg = 0;
   1045     bn_correct_top(rr);
   1046     ret = 1;
   1047     goto err;
   1048   }
   1049 #endif
   1050 
   1051   // Get the window size to use with size of p.
   1052   window = BN_window_bits_for_ctime_exponent_size(bits);
   1053 #if defined(OPENSSL_BN_ASM_MONT5)
   1054   if (window >= 5) {
   1055     window = 5;  // ~5% improvement for RSA2048 sign, and even for RSA4096
   1056     // reserve space for mont->N.d[] copy
   1057     powerbufLen += top * sizeof(mont->N.d[0]);
   1058   }
   1059 #endif
   1060 
   1061   // Allocate a buffer large enough to hold all of the pre-computed
   1062   // powers of am, am itself and tmp.
   1063   numPowers = 1 << window;
   1064   powerbufLen +=
   1065       sizeof(m->d[0]) *
   1066       (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
   1067 #ifdef alloca
   1068   if (powerbufLen < 3072) {
   1069     powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
   1070   } else
   1071 #endif
   1072   {
   1073     if ((powerbufFree = OPENSSL_malloc(
   1074             powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
   1075       goto err;
   1076     }
   1077   }
   1078 
   1079   powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
   1080   OPENSSL_memset(powerbuf, 0, powerbufLen);
   1081 
   1082 #ifdef alloca
   1083   if (powerbufLen < 3072) {
   1084     powerbufFree = NULL;
   1085   }
   1086 #endif
   1087 
   1088   // lay down tmp and am right after powers table
   1089   tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
   1090   am.d = tmp.d + top;
   1091   tmp.top = am.top = 0;
   1092   tmp.dmax = am.dmax = top;
   1093   tmp.neg = am.neg = 0;
   1094   tmp.flags = am.flags = BN_FLG_STATIC_DATA;
   1095 
   1096   if (!bn_one_to_montgomery(&tmp, mont, ctx)) {
   1097     goto err;
   1098   }
   1099 
   1100   // prepare a^1 in Montgomery domain
   1101   assert(!a->neg);
   1102   assert(BN_ucmp(a, m) < 0);
   1103   if (!BN_to_montgomery(&am, a, mont, ctx)) {
   1104     goto err;
   1105   }
   1106 
   1107 #if defined(OPENSSL_BN_ASM_MONT5)
   1108   // This optimization uses ideas from http://eprint.iacr.org/2011/239,
   1109   // specifically optimization of cache-timing attack countermeasures
   1110   // and pre-computation optimization.
   1111 
   1112   // Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
   1113   // 512-bit RSA is hardly relevant, we omit it to spare size...
   1114   if (window == 5 && top > 1) {
   1115     const BN_ULONG *n0 = mont->n0;
   1116     BN_ULONG *np;
   1117 
   1118     // BN_to_montgomery can contaminate words above .top
   1119     // [in BN_DEBUG[_DEBUG] build]...
   1120     for (i = am.top; i < top; i++) {
   1121       am.d[i] = 0;
   1122     }
   1123     for (i = tmp.top; i < top; i++) {
   1124       tmp.d[i] = 0;
   1125     }
   1126 
   1127     // copy mont->N.d[] to improve cache locality
   1128     for (np = am.d + top, i = 0; i < top; i++) {
   1129       np[i] = mont->N.d[i];
   1130     }
   1131 
   1132     bn_scatter5(tmp.d, top, powerbuf, 0);
   1133     bn_scatter5(am.d, am.top, powerbuf, 1);
   1134     bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
   1135     bn_scatter5(tmp.d, top, powerbuf, 2);
   1136 
   1137     // same as above, but uses squaring for 1/2 of operations
   1138     for (i = 4; i < 32; i *= 2) {
   1139       bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1140       bn_scatter5(tmp.d, top, powerbuf, i);
   1141     }
   1142     for (i = 3; i < 8; i += 2) {
   1143       int j;
   1144       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
   1145       bn_scatter5(tmp.d, top, powerbuf, i);
   1146       for (j = 2 * i; j < 32; j *= 2) {
   1147         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1148         bn_scatter5(tmp.d, top, powerbuf, j);
   1149       }
   1150     }
   1151     for (; i < 16; i += 2) {
   1152       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
   1153       bn_scatter5(tmp.d, top, powerbuf, i);
   1154       bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1155       bn_scatter5(tmp.d, top, powerbuf, 2 * i);
   1156     }
   1157     for (; i < 32; i += 2) {
   1158       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
   1159       bn_scatter5(tmp.d, top, powerbuf, i);
   1160     }
   1161 
   1162     bits--;
   1163     for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
   1164       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1165     }
   1166     bn_gather5(tmp.d, top, powerbuf, wvalue);
   1167 
   1168     // At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
   1169     // that has not been read yet.)
   1170     assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
   1171 
   1172     // Scan the exponent one window at a time starting from the most
   1173     // significant bits.
   1174     if (top & 7) {
   1175       while (bits >= 0) {
   1176         for (wvalue = 0, i = 0; i < 5; i++, bits--) {
   1177           wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1178         }
   1179 
   1180         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1181         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1182         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1183         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1184         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1185         bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
   1186       }
   1187     } else {
   1188       const uint8_t *p_bytes = (const uint8_t *)p->d;
   1189       assert(bits < max_bits);
   1190       // |p = 0| has been handled as a special case, so |max_bits| is at least
   1191       // one word.
   1192       assert(max_bits >= 64);
   1193 
   1194       // If the first bit to be read lands in the last byte, unroll the first
   1195       // iteration to avoid reading past the bounds of |p->d|. (After the first
   1196       // iteration, we are guaranteed to be past the last byte.) Note |bits|
   1197       // here is the top bit, inclusive.
   1198       if (bits - 4 >= max_bits - 8) {
   1199         // Read five bits from |bits-4| through |bits|, inclusive.
   1200         wvalue = p_bytes[p->top * BN_BYTES - 1];
   1201         wvalue >>= (bits - 4) & 7;
   1202         wvalue &= 0x1f;
   1203         bits -= 5;
   1204         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
   1205       }
   1206       while (bits >= 0) {
   1207         // Read five bits from |bits-4| through |bits|, inclusive.
   1208         int first_bit = bits - 4;
   1209         uint16_t val;
   1210         OPENSSL_memcpy(&val, p_bytes + (first_bit >> 3), sizeof(val));
   1211         val >>= first_bit & 7;
   1212         val &= 0x1f;
   1213         bits -= 5;
   1214         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, val);
   1215       }
   1216     }
   1217 
   1218     ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
   1219     tmp.top = top;
   1220     bn_correct_top(&tmp);
   1221     if (ret) {
   1222       if (!BN_copy(rr, &tmp)) {
   1223         ret = 0;
   1224       }
   1225       goto err;  // non-zero ret means it's not error
   1226     }
   1227   } else
   1228 #endif
   1229   {
   1230     copy_to_prebuf(&tmp, top, powerbuf, 0, window);
   1231     copy_to_prebuf(&am, top, powerbuf, 1, window);
   1232 
   1233     // If the window size is greater than 1, then calculate
   1234     // val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
   1235     // (even powers could instead be computed as (a^(i/2))^2
   1236     // to use the slight performance advantage of sqr over mul).
   1237     if (window > 1) {
   1238       if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx)) {
   1239         goto err;
   1240       }
   1241 
   1242       copy_to_prebuf(&tmp, top, powerbuf, 2, window);
   1243 
   1244       for (i = 3; i < numPowers; i++) {
   1245         // Calculate a^i = a^(i-1) * a
   1246         if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx)) {
   1247           goto err;
   1248         }
   1249 
   1250         copy_to_prebuf(&tmp, top, powerbuf, i, window);
   1251       }
   1252     }
   1253 
   1254     bits--;
   1255     for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
   1256       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1257     }
   1258     if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
   1259       goto err;
   1260     }
   1261 
   1262     // Scan the exponent one window at a time starting from the most
   1263     // significant bits.
   1264     while (bits >= 0) {
   1265       wvalue = 0;  // The 'value' of the window
   1266 
   1267       // Scan the window, squaring the result as we go
   1268       for (i = 0; i < window; i++, bits--) {
   1269         if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
   1270           goto err;
   1271         }
   1272         wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1273       }
   1274 
   1275       // Fetch the appropriate pre-computed value from the pre-buf
   1276       if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
   1277         goto err;
   1278       }
   1279 
   1280       // Multiply the result into the intermediate result
   1281       if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
   1282         goto err;
   1283       }
   1284     }
   1285   }
   1286 
   1287   // Convert the final result from montgomery to standard format
   1288   if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
   1289     goto err;
   1290   }
   1291   ret = 1;
   1292 
   1293 err:
   1294   BN_MONT_CTX_free(new_mont);
   1295   BN_clear_free(new_a);
   1296   OPENSSL_free(powerbufFree);
   1297   return (ret);
   1298 }
   1299 
   1300 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
   1301                          const BIGNUM *m, BN_CTX *ctx,
   1302                          const BN_MONT_CTX *mont) {
   1303   BIGNUM a_bignum;
   1304   BN_init(&a_bignum);
   1305 
   1306   int ret = 0;
   1307 
   1308   if (!BN_set_word(&a_bignum, a)) {
   1309     OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
   1310     goto err;
   1311   }
   1312 
   1313   ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont);
   1314 
   1315 err:
   1316   BN_free(&a_bignum);
   1317 
   1318   return ret;
   1319 }
   1320 
   1321 #define TABLE_SIZE 32
   1322 
   1323 int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
   1324                      const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
   1325                      BN_CTX *ctx, const BN_MONT_CTX *mont) {
   1326   BIGNUM tmp;
   1327   BN_init(&tmp);
   1328 
   1329   int ret = 0;
   1330   BN_MONT_CTX *new_mont = NULL;
   1331 
   1332   // Allocate a montgomery context if it was not supplied by the caller.
   1333   if (mont == NULL) {
   1334     new_mont = BN_MONT_CTX_new_for_modulus(m, ctx);
   1335     if (new_mont == NULL) {
   1336       goto err;
   1337     }
   1338     mont = new_mont;
   1339   }
   1340 
   1341   // BN_mod_mul_montgomery removes one Montgomery factor, so passing one
   1342   // Montgomery-encoded and one non-Montgomery-encoded value gives a
   1343   // non-Montgomery-encoded result.
   1344   if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) ||
   1345       !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) ||
   1346       !BN_to_montgomery(rr, rr, mont, ctx) ||
   1347       !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) {
   1348     goto err;
   1349   }
   1350 
   1351   ret = 1;
   1352 
   1353 err:
   1354   BN_MONT_CTX_free(new_mont);
   1355   BN_free(&tmp);
   1356 
   1357   return ret;
   1358 }
   1359