Home | History | Annotate | Download | only in bn
      1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      2  * All rights reserved.
      3  *
      4  * This package is an SSL implementation written
      5  * by Eric Young (eay (at) cryptsoft.com).
      6  * The implementation was written so as to conform with Netscapes SSL.
      7  *
      8  * This library is free for commercial and non-commercial use as long as
      9  * the following conditions are aheared to.  The following conditions
     10  * apply to all code found in this distribution, be it the RC4, RSA,
     11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     12  * included with this distribution is covered by the same copyright terms
     13  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     14  *
     15  * Copyright remains Eric Young's, and as such any Copyright notices in
     16  * the code are not to be removed.
     17  * If this package is used in a product, Eric Young should be given attribution
     18  * as the author of the parts of the library used.
     19  * This can be in the form of a textual message at program startup or
     20  * in documentation (online or textual) provided with the package.
     21  *
     22  * Redistribution and use in source and binary forms, with or without
     23  * modification, are permitted provided that the following conditions
     24  * are met:
     25  * 1. Redistributions of source code must retain the copyright
     26  *    notice, this list of conditions and the following disclaimer.
     27  * 2. Redistributions in binary form must reproduce the above copyright
     28  *    notice, this list of conditions and the following disclaimer in the
     29  *    documentation and/or other materials provided with the distribution.
     30  * 3. All advertising materials mentioning features or use of this software
     31  *    must display the following acknowledgement:
     32  *    "This product includes cryptographic software written by
     33  *     Eric Young (eay (at) cryptsoft.com)"
     34  *    The word 'cryptographic' can be left out if the rouines from the library
     35  *    being used are not cryptographic related :-).
     36  * 4. If you include any Windows specific code (or a derivative thereof) from
     37  *    the apps directory (application code) you must include an acknowledgement:
     38  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     39  *
     40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     50  * SUCH DAMAGE.
     51  *
     52  * The licence and distribution terms for any publically available version or
     53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     54  * copied and put under another distribution licence
     55  * [including the GNU Public Licence.]
     56  */
     57 /* ====================================================================
     58  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
     59  *
     60  * Redistribution and use in source and binary forms, with or without
     61  * modification, are permitted provided that the following conditions
     62  * are met:
     63  *
     64  * 1. Redistributions of source code must retain the above copyright
     65  *    notice, this list of conditions and the following disclaimer.
     66  *
     67  * 2. Redistributions in binary form must reproduce the above copyright
     68  *    notice, this list of conditions and the following disclaimer in
     69  *    the documentation and/or other materials provided with the
     70  *    distribution.
     71  *
     72  * 3. All advertising materials mentioning features or use of this
     73  *    software must display the following acknowledgment:
     74  *    "This product includes software developed by the OpenSSL Project
     75  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
     76  *
     77  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     78  *    endorse or promote products derived from this software without
     79  *    prior written permission. For written permission, please contact
     80  *    openssl-core (at) openssl.org.
     81  *
     82  * 5. Products derived from this software may not be called "OpenSSL"
     83  *    nor may "OpenSSL" appear in their names without prior written
     84  *    permission of the OpenSSL Project.
     85  *
     86  * 6. Redistributions of any form whatsoever must retain the following
     87  *    acknowledgment:
     88  *    "This product includes software developed by the OpenSSL Project
     89  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
     90  *
     91  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     92  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     93  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     94  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     95  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     96  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     97  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     98  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     99  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    100  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    101  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
    102  * OF THE POSSIBILITY OF SUCH DAMAGE.
    103  * ====================================================================
    104  *
    105  * This product includes cryptographic software written by Eric Young
    106  * (eay (at) cryptsoft.com).  This product includes software written by Tim
    107  * Hudson (tjh (at) cryptsoft.com). */
    108 
    109 #include <openssl/bn.h>
    110 
    111 #include <assert.h>
    112 #include <string.h>
    113 
    114 #include <openssl/cpu.h>
    115 #include <openssl/err.h>
    116 #include <openssl/mem.h>
    117 
    118 #include "internal.h"
    119 
    120 
    121 #if !defined(OPENSSL_NO_ASM) && defined(OPENSSL_X86_64)
    122 #define OPENSSL_BN_ASM_MONT5
    123 #define RSAZ_ENABLED
    124 
    125 #include "rsaz_exp.h"
    126 
    127 void bn_mul_mont_gather5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
    128                          const BN_ULONG *np, const BN_ULONG *n0, int num,
    129                          int power);
    130 void bn_scatter5(const BN_ULONG *inp, size_t num, void *table, size_t power);
    131 void bn_gather5(BN_ULONG *out, size_t num, void *table, size_t power);
    132 void bn_power5(BN_ULONG *rp, const BN_ULONG *ap, const void *table,
    133                const BN_ULONG *np, const BN_ULONG *n0, int num, int power);
    134 int bn_from_montgomery(BN_ULONG *rp, const BN_ULONG *ap,
    135                        const BN_ULONG *not_used, const BN_ULONG *np,
    136                        const BN_ULONG *n0, int num);
    137 #endif
    138 
    139 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx) {
    140   int i, bits, ret = 0;
    141   BIGNUM *v, *rr;
    142 
    143   BN_CTX_start(ctx);
    144   if (r == a || r == p) {
    145     rr = BN_CTX_get(ctx);
    146   } else {
    147     rr = r;
    148   }
    149 
    150   v = BN_CTX_get(ctx);
    151   if (rr == NULL || v == NULL) {
    152     goto err;
    153   }
    154 
    155   if (BN_copy(v, a) == NULL) {
    156     goto err;
    157   }
    158   bits = BN_num_bits(p);
    159 
    160   if (BN_is_odd(p)) {
    161     if (BN_copy(rr, a) == NULL) {
    162       goto err;
    163     }
    164   } else {
    165     if (!BN_one(rr)) {
    166       goto err;
    167     }
    168   }
    169 
    170   for (i = 1; i < bits; i++) {
    171     if (!BN_sqr(v, v, ctx)) {
    172       goto err;
    173     }
    174     if (BN_is_bit_set(p, i)) {
    175       if (!BN_mul(rr, rr, v, ctx)) {
    176         goto err;
    177       }
    178     }
    179   }
    180 
    181   if (r != rr && !BN_copy(r, rr)) {
    182     goto err;
    183   }
    184   ret = 1;
    185 
    186 err:
    187   BN_CTX_end(ctx);
    188   return ret;
    189 }
    190 
    191 /* maximum precomputation table size for *variable* sliding windows */
    192 #define TABLE_SIZE 32
    193 
    194 typedef struct bn_recp_ctx_st {
    195   BIGNUM N;  /* the divisor */
    196   BIGNUM Nr; /* the reciprocal */
    197   int num_bits;
    198   int shift;
    199   int flags;
    200 } BN_RECP_CTX;
    201 
    202 static void BN_RECP_CTX_init(BN_RECP_CTX *recp) {
    203   BN_init(&recp->N);
    204   BN_init(&recp->Nr);
    205   recp->num_bits = 0;
    206   recp->shift = 0;
    207   recp->flags = 0;
    208 }
    209 
    210 static void BN_RECP_CTX_free(BN_RECP_CTX *recp) {
    211   if (recp == NULL) {
    212     return;
    213   }
    214 
    215   BN_free(&recp->N);
    216   BN_free(&recp->Nr);
    217 }
    218 
    219 static int BN_RECP_CTX_set(BN_RECP_CTX *recp, const BIGNUM *d, BN_CTX *ctx) {
    220   if (!BN_copy(&(recp->N), d)) {
    221     return 0;
    222   }
    223   BN_zero(&recp->Nr);
    224   recp->num_bits = BN_num_bits(d);
    225   recp->shift = 0;
    226 
    227   return 1;
    228 }
    229 
    230 /* len is the expected size of the result We actually calculate with an extra
    231  * word of precision, so we can do faster division if the remainder is not
    232  * required.
    233  * r := 2^len / m */
    234 static int BN_reciprocal(BIGNUM *r, const BIGNUM *m, int len, BN_CTX *ctx) {
    235   int ret = -1;
    236   BIGNUM *t;
    237 
    238   BN_CTX_start(ctx);
    239   t = BN_CTX_get(ctx);
    240   if (t == NULL) {
    241     goto err;
    242   }
    243 
    244   if (!BN_set_bit(t, len)) {
    245     goto err;
    246   }
    247 
    248   if (!BN_div(r, NULL, t, m, ctx)) {
    249     goto err;
    250   }
    251 
    252   ret = len;
    253 
    254 err:
    255   BN_CTX_end(ctx);
    256   return ret;
    257 }
    258 
    259 static int BN_div_recp(BIGNUM *dv, BIGNUM *rem, const BIGNUM *m,
    260                        BN_RECP_CTX *recp, BN_CTX *ctx) {
    261   int i, j, ret = 0;
    262   BIGNUM *a, *b, *d, *r;
    263 
    264   BN_CTX_start(ctx);
    265   a = BN_CTX_get(ctx);
    266   b = BN_CTX_get(ctx);
    267   if (dv != NULL) {
    268     d = dv;
    269   } else {
    270     d = BN_CTX_get(ctx);
    271   }
    272 
    273   if (rem != NULL) {
    274     r = rem;
    275   } else {
    276     r = BN_CTX_get(ctx);
    277   }
    278 
    279   if (a == NULL || b == NULL || d == NULL || r == NULL) {
    280     goto err;
    281   }
    282 
    283   if (BN_ucmp(m, &recp->N) < 0) {
    284     BN_zero(d);
    285     if (!BN_copy(r, m)) {
    286       goto err;
    287     }
    288     BN_CTX_end(ctx);
    289     return 1;
    290   }
    291 
    292   /* We want the remainder
    293    * Given input of ABCDEF / ab
    294    * we need multiply ABCDEF by 3 digests of the reciprocal of ab */
    295 
    296   /* i := max(BN_num_bits(m), 2*BN_num_bits(N)) */
    297   i = BN_num_bits(m);
    298   j = recp->num_bits << 1;
    299   if (j > i) {
    300     i = j;
    301   }
    302 
    303   /* Nr := round(2^i / N) */
    304   if (i != recp->shift) {
    305     recp->shift =
    306         BN_reciprocal(&(recp->Nr), &(recp->N), i,
    307                       ctx); /* BN_reciprocal returns i, or -1 for an error */
    308   }
    309 
    310   if (recp->shift == -1) {
    311     goto err;
    312   }
    313 
    314   /* d := |round(round(m / 2^BN_num_bits(N)) * recp->Nr / 2^(i -
    315    * BN_num_bits(N)))|
    316    *    = |round(round(m / 2^BN_num_bits(N)) * round(2^i / N) / 2^(i -
    317    * BN_num_bits(N)))|
    318    *   <= |(m / 2^BN_num_bits(N)) * (2^i / N) * (2^BN_num_bits(N) / 2^i)|
    319    *    = |m/N| */
    320   if (!BN_rshift(a, m, recp->num_bits)) {
    321     goto err;
    322   }
    323   if (!BN_mul(b, a, &(recp->Nr), ctx)) {
    324     goto err;
    325   }
    326   if (!BN_rshift(d, b, i - recp->num_bits)) {
    327     goto err;
    328   }
    329   d->neg = 0;
    330 
    331   if (!BN_mul(b, &(recp->N), d, ctx)) {
    332     goto err;
    333   }
    334   if (!BN_usub(r, m, b)) {
    335     goto err;
    336   }
    337   r->neg = 0;
    338 
    339   j = 0;
    340   while (BN_ucmp(r, &(recp->N)) >= 0) {
    341     if (j++ > 2) {
    342       OPENSSL_PUT_ERROR(BN, BN_R_BAD_RECIPROCAL);
    343       goto err;
    344     }
    345     if (!BN_usub(r, r, &(recp->N))) {
    346       goto err;
    347     }
    348     if (!BN_add_word(d, 1)) {
    349       goto err;
    350     }
    351   }
    352 
    353   r->neg = BN_is_zero(r) ? 0 : m->neg;
    354   d->neg = m->neg ^ recp->N.neg;
    355   ret = 1;
    356 
    357 err:
    358   BN_CTX_end(ctx);
    359   return ret;
    360 }
    361 
    362 static int BN_mod_mul_reciprocal(BIGNUM *r, const BIGNUM *x, const BIGNUM *y,
    363                                  BN_RECP_CTX *recp, BN_CTX *ctx) {
    364   int ret = 0;
    365   BIGNUM *a;
    366   const BIGNUM *ca;
    367 
    368   BN_CTX_start(ctx);
    369   a = BN_CTX_get(ctx);
    370   if (a == NULL) {
    371     goto err;
    372   }
    373 
    374   if (y != NULL) {
    375     if (x == y) {
    376       if (!BN_sqr(a, x, ctx)) {
    377         goto err;
    378       }
    379     } else {
    380       if (!BN_mul(a, x, y, ctx)) {
    381         goto err;
    382       }
    383     }
    384     ca = a;
    385   } else {
    386     ca = x; /* Just do the mod */
    387   }
    388 
    389   ret = BN_div_recp(NULL, r, ca, recp, ctx);
    390 
    391 err:
    392   BN_CTX_end(ctx);
    393   return ret;
    394 }
    395 
    396 /* BN_window_bits_for_exponent_size -- macro for sliding window mod_exp
    397  * functions
    398  *
    399  * For window size 'w' (w >= 2) and a random 'b' bits exponent, the number of
    400  * multiplications is a constant plus on average
    401  *
    402  *    2^(w-1) + (b-w)/(w+1);
    403  *
    404  * here 2^(w-1)  is for precomputing the table (we actually need entries only
    405  * for windows that have the lowest bit set), and (b-w)/(w+1)  is an
    406  * approximation for the expected number of w-bit windows, not counting the
    407  * first one.
    408  *
    409  * Thus we should use
    410  *
    411  *    w >= 6  if        b > 671
    412  *     w = 5  if  671 > b > 239
    413  *     w = 4  if  239 > b >  79
    414  *     w = 3  if   79 > b >  23
    415  *    w <= 2  if   23 > b
    416  *
    417  * (with draws in between).  Very small exponents are often selected
    418  * with low Hamming weight, so we use  w = 1  for b <= 23. */
    419 #define BN_window_bits_for_exponent_size(b) \
    420 		((b) > 671 ? 6 : \
    421 		 (b) > 239 ? 5 : \
    422 		 (b) >  79 ? 4 : \
    423 		 (b) >  23 ? 3 : 1)
    424 
    425 static int mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
    426                         const BIGNUM *m, BN_CTX *ctx) {
    427   int i, j, bits, ret = 0, wstart, window;
    428   int start = 1;
    429   BIGNUM *aa;
    430   /* Table of variables obtained from 'ctx' */
    431   BIGNUM *val[TABLE_SIZE];
    432   BN_RECP_CTX recp;
    433 
    434   bits = BN_num_bits(p);
    435 
    436   if (bits == 0) {
    437     /* x**0 mod 1 is still zero. */
    438     if (BN_is_one(m)) {
    439       BN_zero(r);
    440       return 1;
    441     }
    442     return BN_one(r);
    443   }
    444 
    445   BN_CTX_start(ctx);
    446   aa = BN_CTX_get(ctx);
    447   val[0] = BN_CTX_get(ctx);
    448   if (!aa || !val[0]) {
    449     goto err;
    450   }
    451 
    452   BN_RECP_CTX_init(&recp);
    453   if (m->neg) {
    454     /* ignore sign of 'm' */
    455     if (!BN_copy(aa, m)) {
    456       goto err;
    457     }
    458     aa->neg = 0;
    459     if (BN_RECP_CTX_set(&recp, aa, ctx) <= 0) {
    460       goto err;
    461     }
    462   } else {
    463     if (BN_RECP_CTX_set(&recp, m, ctx) <= 0) {
    464       goto err;
    465     }
    466   }
    467 
    468   if (!BN_nnmod(val[0], a, m, ctx)) {
    469     goto err; /* 1 */
    470   }
    471   if (BN_is_zero(val[0])) {
    472     BN_zero(r);
    473     ret = 1;
    474     goto err;
    475   }
    476 
    477   window = BN_window_bits_for_exponent_size(bits);
    478   if (window > 1) {
    479     if (!BN_mod_mul_reciprocal(aa, val[0], val[0], &recp, ctx)) {
    480       goto err; /* 2 */
    481     }
    482     j = 1 << (window - 1);
    483     for (i = 1; i < j; i++) {
    484       if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
    485           !BN_mod_mul_reciprocal(val[i], val[i - 1], aa, &recp, ctx)) {
    486         goto err;
    487       }
    488     }
    489   }
    490 
    491   start = 1; /* This is used to avoid multiplication etc
    492               * when there is only the value '1' in the
    493               * buffer. */
    494   wstart = bits - 1; /* The top bit of the window */
    495 
    496   if (!BN_one(r)) {
    497     goto err;
    498   }
    499 
    500   for (;;) {
    501     int wvalue; /* The 'value' of the window */
    502     int wend; /* The bottom bit of the window */
    503 
    504     if (BN_is_bit_set(p, wstart) == 0) {
    505       if (!start) {
    506         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
    507           goto err;
    508         }
    509       }
    510       if (wstart == 0) {
    511         break;
    512       }
    513       wstart--;
    514       continue;
    515     }
    516 
    517     /* We now have wstart on a 'set' bit, we now need to work out
    518      * how bit a window to do.  To do this we need to scan
    519      * forward until the last set bit before the end of the
    520      * window */
    521     wvalue = 1;
    522     wend = 0;
    523     for (i = 1; i < window; i++) {
    524       if (wstart - i < 0) {
    525         break;
    526       }
    527       if (BN_is_bit_set(p, wstart - i)) {
    528         wvalue <<= (i - wend);
    529         wvalue |= 1;
    530         wend = i;
    531       }
    532     }
    533 
    534     /* wend is the size of the current window */
    535     j = wend + 1;
    536     /* add the 'bytes above' */
    537     if (!start) {
    538       for (i = 0; i < j; i++) {
    539         if (!BN_mod_mul_reciprocal(r, r, r, &recp, ctx)) {
    540           goto err;
    541         }
    542       }
    543     }
    544 
    545     /* wvalue will be an odd number < 2^window */
    546     if (!BN_mod_mul_reciprocal(r, r, val[wvalue >> 1], &recp, ctx)) {
    547       goto err;
    548     }
    549 
    550     /* move the 'window' down further */
    551     wstart -= wend + 1;
    552     start = 0;
    553     if (wstart < 0) {
    554       break;
    555     }
    556   }
    557   ret = 1;
    558 
    559 err:
    560   BN_CTX_end(ctx);
    561   BN_RECP_CTX_free(&recp);
    562   return ret;
    563 }
    564 
    565 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
    566                BN_CTX *ctx) {
    567   if (BN_is_odd(m)) {
    568     return BN_mod_exp_mont(r, a, p, m, ctx, NULL);
    569   }
    570 
    571   return mod_exp_recp(r, a, p, m, ctx);
    572 }
    573 
    574 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    575                     const BIGNUM *m, BN_CTX *ctx, const BN_MONT_CTX *mont) {
    576   int i, j, bits, ret = 0, wstart, window;
    577   int start = 1;
    578   BIGNUM *d, *r;
    579   const BIGNUM *aa;
    580   /* Table of variables obtained from 'ctx' */
    581   BIGNUM *val[TABLE_SIZE];
    582   BN_MONT_CTX *new_mont = NULL;
    583 
    584   if (!BN_is_odd(m)) {
    585     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
    586     return 0;
    587   }
    588   bits = BN_num_bits(p);
    589   if (bits == 0) {
    590     /* x**0 mod 1 is still zero. */
    591     if (BN_is_one(m)) {
    592       BN_zero(rr);
    593       return 1;
    594     }
    595     return BN_one(rr);
    596   }
    597 
    598   BN_CTX_start(ctx);
    599   d = BN_CTX_get(ctx);
    600   r = BN_CTX_get(ctx);
    601   val[0] = BN_CTX_get(ctx);
    602   if (!d || !r || !val[0]) {
    603     goto err;
    604   }
    605 
    606   /* Allocate a montgomery context if it was not supplied by the caller. */
    607   if (mont == NULL) {
    608     new_mont = BN_MONT_CTX_new();
    609     if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
    610       goto err;
    611     }
    612     mont = new_mont;
    613   }
    614 
    615   if (a->neg || BN_ucmp(a, m) >= 0) {
    616     if (!BN_nnmod(val[0], a, m, ctx)) {
    617       goto err;
    618     }
    619     aa = val[0];
    620   } else {
    621     aa = a;
    622   }
    623 
    624   if (BN_is_zero(aa)) {
    625     BN_zero(rr);
    626     ret = 1;
    627     goto err;
    628   }
    629   if (!BN_to_montgomery(val[0], aa, mont, ctx)) {
    630     goto err; /* 1 */
    631   }
    632 
    633   window = BN_window_bits_for_exponent_size(bits);
    634   if (window > 1) {
    635     if (!BN_mod_mul_montgomery(d, val[0], val[0], mont, ctx)) {
    636       goto err; /* 2 */
    637     }
    638     j = 1 << (window - 1);
    639     for (i = 1; i < j; i++) {
    640       if (((val[i] = BN_CTX_get(ctx)) == NULL) ||
    641           !BN_mod_mul_montgomery(val[i], val[i - 1], d, mont, ctx)) {
    642         goto err;
    643       }
    644     }
    645   }
    646 
    647   start = 1; /* This is used to avoid multiplication etc
    648               * when there is only the value '1' in the
    649               * buffer. */
    650   wstart = bits - 1; /* The top bit of the window */
    651 
    652   j = m->top; /* borrow j */
    653   if (m->d[j - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
    654     if (bn_wexpand(r, j) == NULL) {
    655       goto err;
    656     }
    657     /* 2^(top*BN_BITS2) - m */
    658     r->d[0] = (0 - m->d[0]) & BN_MASK2;
    659     for (i = 1; i < j; i++) {
    660       r->d[i] = (~m->d[i]) & BN_MASK2;
    661     }
    662     r->top = j;
    663     /* Upper words will be zero if the corresponding words of 'm'
    664      * were 0xfff[...], so decrement r->top accordingly. */
    665     bn_correct_top(r);
    666   } else if (!BN_to_montgomery(r, BN_value_one(), mont, ctx)) {
    667     goto err;
    668   }
    669 
    670   for (;;) {
    671     int wvalue; /* The 'value' of the window */
    672     int wend; /* The bottom bit of the window */
    673 
    674     if (BN_is_bit_set(p, wstart) == 0) {
    675       if (!start && !BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
    676         goto err;
    677       }
    678       if (wstart == 0) {
    679         break;
    680       }
    681       wstart--;
    682       continue;
    683     }
    684 
    685     /* We now have wstart on a 'set' bit, we now need to work out how bit a
    686      * window to do.  To do this we need to scan forward until the last set bit
    687      * before the end of the window */
    688     wvalue = 1;
    689     wend = 0;
    690     for (i = 1; i < window; i++) {
    691       if (wstart - i < 0) {
    692         break;
    693       }
    694       if (BN_is_bit_set(p, wstart - i)) {
    695         wvalue <<= (i - wend);
    696         wvalue |= 1;
    697         wend = i;
    698       }
    699     }
    700 
    701     /* wend is the size of the current window */
    702     j = wend + 1;
    703     /* add the 'bytes above' */
    704     if (!start) {
    705       for (i = 0; i < j; i++) {
    706         if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) {
    707           goto err;
    708         }
    709       }
    710     }
    711 
    712     /* wvalue will be an odd number < 2^window */
    713     if (!BN_mod_mul_montgomery(r, r, val[wvalue >> 1], mont, ctx)) {
    714       goto err;
    715     }
    716 
    717     /* move the 'window' down further */
    718     wstart -= wend + 1;
    719     start = 0;
    720     if (wstart < 0) {
    721       break;
    722     }
    723   }
    724 
    725   if (!BN_from_montgomery(rr, r, mont, ctx)) {
    726     goto err;
    727   }
    728   ret = 1;
    729 
    730 err:
    731   BN_MONT_CTX_free(new_mont);
    732   BN_CTX_end(ctx);
    733   return ret;
    734 }
    735 
    736 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific
    737  * layout so that accessing any of these table values shows the same access
    738  * pattern as far as cache lines are concerned. The following functions are
    739  * used to transfer a BIGNUM from/to that table. */
    740 static int copy_to_prebuf(const BIGNUM *b, int top, unsigned char *buf, int idx,
    741                           int window) {
    742   int i, j;
    743   const int width = 1 << window;
    744   BN_ULONG *table = (BN_ULONG *) buf;
    745 
    746   if (top > b->top) {
    747     top = b->top; /* this works because 'buf' is explicitly zeroed */
    748   }
    749 
    750   for (i = 0, j = idx; i < top; i++, j += width)  {
    751     table[j] = b->d[i];
    752   }
    753 
    754   return 1;
    755 }
    756 
    757 static int copy_from_prebuf(BIGNUM *b, int top, unsigned char *buf, int idx,
    758                             int window) {
    759   int i, j;
    760   const int width = 1 << window;
    761   volatile BN_ULONG *table = (volatile BN_ULONG *)buf;
    762 
    763   if (bn_wexpand(b, top) == NULL) {
    764     return 0;
    765   }
    766 
    767   if (window <= 3) {
    768     for (i = 0; i < top; i++, table += width) {
    769       BN_ULONG acc = 0;
    770 
    771       for (j = 0; j < width; j++) {
    772         acc |= table[j] & ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
    773       }
    774 
    775       b->d[i] = acc;
    776     }
    777   } else {
    778     int xstride = 1 << (window - 2);
    779     BN_ULONG y0, y1, y2, y3;
    780 
    781     i = idx >> (window - 2); /* equivalent of idx / xstride */
    782     idx &= xstride - 1;      /* equivalent of idx % xstride */
    783 
    784     y0 = (BN_ULONG)0 - (constant_time_eq_int(i, 0) & 1);
    785     y1 = (BN_ULONG)0 - (constant_time_eq_int(i, 1) & 1);
    786     y2 = (BN_ULONG)0 - (constant_time_eq_int(i, 2) & 1);
    787     y3 = (BN_ULONG)0 - (constant_time_eq_int(i, 3) & 1);
    788 
    789     for (i = 0; i < top; i++, table += width) {
    790       BN_ULONG acc = 0;
    791 
    792       for (j = 0; j < xstride; j++) {
    793         acc |= ((table[j + 0 * xstride] & y0) | (table[j + 1 * xstride] & y1) |
    794                 (table[j + 2 * xstride] & y2) | (table[j + 3 * xstride] & y3)) &
    795                ((BN_ULONG)0 - (constant_time_eq_int(j, idx) & 1));
    796       }
    797 
    798       b->d[i] = acc;
    799     }
    800   }
    801 
    802   b->top = top;
    803   bn_correct_top(b);
    804   return 1;
    805 }
    806 
    807 /* BN_mod_exp_mont_conttime is based on the assumption that the L1 data cache
    808  * line width of the target processor is at least the following value. */
    809 #define MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH (64)
    810 #define MOD_EXP_CTIME_MIN_CACHE_LINE_MASK \
    811   (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - 1)
    812 
    813 /* Window sizes optimized for fixed window size modular exponentiation
    814  * algorithm (BN_mod_exp_mont_consttime).
    815  *
    816  * To achieve the security goals of BN_mode_exp_mont_consttime, the maximum
    817  * size of the window must not exceed
    818  * log_2(MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH).
    819  *
    820  * Window size thresholds are defined for cache line sizes of 32 and 64, cache
    821  * line sizes where log_2(32)=5 and log_2(64)=6 respectively. A window size of
    822  * 7 should only be used on processors that have a 128 byte or greater cache
    823  * line size. */
    824 #if MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 64
    825 
    826 #define BN_window_bits_for_ctime_exponent_size(b) \
    827   ((b) > 937 ? 6 : (b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
    828 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (6)
    829 
    830 #elif MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH == 32
    831 
    832 #define BN_window_bits_for_ctime_exponent_size(b) \
    833   ((b) > 306 ? 5 : (b) > 89 ? 4 : (b) > 22 ? 3 : 1)
    834 #define BN_MAX_WINDOW_BITS_FOR_CTIME_EXPONENT_SIZE (5)
    835 
    836 #endif
    837 
    838 /* Given a pointer value, compute the next address that is a cache line
    839  * multiple. */
    840 #define MOD_EXP_CTIME_ALIGN(x_)          \
    841   ((unsigned char *)(x_) +               \
    842    (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - \
    843     (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
    844 
    845 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
    846  * precomputation memory layout to limit data-dependency to a minimum
    847  * to protect secret exponents (cf. the hyper-threading timing attacks
    848  * pointed out by Colin Percival,
    849  * http://www.daemonology.net/hyperthreading-considered-harmful/)
    850  */
    851 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    852                               const BIGNUM *m, BN_CTX *ctx,
    853                               const BN_MONT_CTX *mont) {
    854   int i, bits, ret = 0, window, wvalue;
    855   int top;
    856   BN_MONT_CTX *new_mont = NULL;
    857 
    858   int numPowers;
    859   unsigned char *powerbufFree = NULL;
    860   int powerbufLen = 0;
    861   unsigned char *powerbuf = NULL;
    862   BIGNUM tmp, am;
    863   BIGNUM *new_a = NULL;
    864 
    865   if (!BN_is_odd(m)) {
    866     OPENSSL_PUT_ERROR(BN, BN_R_CALLED_WITH_EVEN_MODULUS);
    867     return 0;
    868   }
    869 
    870   top = m->top;
    871 
    872   bits = BN_num_bits(p);
    873   if (bits == 0) {
    874     /* x**0 mod 1 is still zero. */
    875     if (BN_is_one(m)) {
    876       BN_zero(rr);
    877       return 1;
    878     }
    879     return BN_one(rr);
    880   }
    881 
    882   /* Allocate a montgomery context if it was not supplied by the caller. */
    883   if (mont == NULL) {
    884     new_mont = BN_MONT_CTX_new();
    885     if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
    886       goto err;
    887     }
    888     mont = new_mont;
    889   }
    890 
    891   if (a->neg || BN_ucmp(a, m) >= 0) {
    892     new_a = BN_new();
    893     if (new_a == NULL ||
    894         !BN_nnmod(new_a, a, m, ctx)) {
    895       goto err;
    896     }
    897     a = new_a;
    898   }
    899 
    900 #ifdef RSAZ_ENABLED
    901   /* If the size of the operands allow it, perform the optimized
    902    * RSAZ exponentiation. For further information see
    903    * crypto/bn/rsaz_exp.c and accompanying assembly modules. */
    904   if ((16 == a->top) && (16 == p->top) && (BN_num_bits(m) == 1024) &&
    905       rsaz_avx2_eligible()) {
    906     if (NULL == bn_wexpand(rr, 16)) {
    907       goto err;
    908     }
    909     RSAZ_1024_mod_exp_avx2(rr->d, a->d, p->d, m->d, mont->RR.d, mont->n0[0]);
    910     rr->top = 16;
    911     rr->neg = 0;
    912     bn_correct_top(rr);
    913     ret = 1;
    914     goto err;
    915   }
    916 #endif
    917 
    918   /* Get the window size to use with size of p. */
    919   window = BN_window_bits_for_ctime_exponent_size(bits);
    920 #if defined(OPENSSL_BN_ASM_MONT5)
    921   if (window >= 5) {
    922     window = 5; /* ~5% improvement for RSA2048 sign, and even for RSA4096 */
    923     /* reserve space for mont->N.d[] copy */
    924     powerbufLen += top * sizeof(mont->N.d[0]);
    925   }
    926 #endif
    927 
    928   /* Allocate a buffer large enough to hold all of the pre-computed
    929    * powers of am, am itself and tmp.
    930    */
    931   numPowers = 1 << window;
    932   powerbufLen +=
    933       sizeof(m->d[0]) *
    934       (top * numPowers + ((2 * top) > numPowers ? (2 * top) : numPowers));
    935 #ifdef alloca
    936   if (powerbufLen < 3072) {
    937     powerbufFree = alloca(powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
    938   } else
    939 #endif
    940   {
    941     if ((powerbufFree = OPENSSL_malloc(
    942             powerbufLen + MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL) {
    943       goto err;
    944     }
    945   }
    946 
    947   powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
    948   OPENSSL_memset(powerbuf, 0, powerbufLen);
    949 
    950 #ifdef alloca
    951   if (powerbufLen < 3072) {
    952     powerbufFree = NULL;
    953   }
    954 #endif
    955 
    956   /* lay down tmp and am right after powers table */
    957   tmp.d = (BN_ULONG *)(powerbuf + sizeof(m->d[0]) * top * numPowers);
    958   am.d = tmp.d + top;
    959   tmp.top = am.top = 0;
    960   tmp.dmax = am.dmax = top;
    961   tmp.neg = am.neg = 0;
    962   tmp.flags = am.flags = BN_FLG_STATIC_DATA;
    963 
    964 /* prepare a^0 in Montgomery domain */
    965 /* by Shay Gueron's suggestion */
    966   if (m->d[top - 1] & (((BN_ULONG)1) << (BN_BITS2 - 1))) {
    967     /* 2^(top*BN_BITS2) - m */
    968     tmp.d[0] = (0 - m->d[0]) & BN_MASK2;
    969     for (i = 1; i < top; i++) {
    970       tmp.d[i] = (~m->d[i]) & BN_MASK2;
    971     }
    972     tmp.top = top;
    973   } else if (!BN_to_montgomery(&tmp, BN_value_one(), mont, ctx)) {
    974     goto err;
    975   }
    976 
    977   /* prepare a^1 in Montgomery domain */
    978   assert(!a->neg);
    979   assert(BN_ucmp(a, m) < 0);
    980   if (!BN_to_montgomery(&am, a, mont, ctx)) {
    981     goto err;
    982   }
    983 
    984 #if defined(OPENSSL_BN_ASM_MONT5)
    985   /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
    986    * specifically optimization of cache-timing attack countermeasures
    987    * and pre-computation optimization. */
    988 
    989   /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
    990    * 512-bit RSA is hardly relevant, we omit it to spare size... */
    991   if (window == 5 && top > 1) {
    992     const BN_ULONG *n0 = mont->n0;
    993     BN_ULONG *np;
    994 
    995     /* BN_to_montgomery can contaminate words above .top
    996      * [in BN_DEBUG[_DEBUG] build]... */
    997     for (i = am.top; i < top; i++) {
    998       am.d[i] = 0;
    999     }
   1000     for (i = tmp.top; i < top; i++) {
   1001       tmp.d[i] = 0;
   1002     }
   1003 
   1004     /* copy mont->N.d[] to improve cache locality */
   1005     for (np = am.d + top, i = 0; i < top; i++) {
   1006       np[i] = mont->N.d[i];
   1007     }
   1008 
   1009     bn_scatter5(tmp.d, top, powerbuf, 0);
   1010     bn_scatter5(am.d, am.top, powerbuf, 1);
   1011     bn_mul_mont(tmp.d, am.d, am.d, np, n0, top);
   1012     bn_scatter5(tmp.d, top, powerbuf, 2);
   1013 
   1014     /* same as above, but uses squaring for 1/2 of operations */
   1015     for (i = 4; i < 32; i *= 2) {
   1016       bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1017       bn_scatter5(tmp.d, top, powerbuf, i);
   1018     }
   1019     for (i = 3; i < 8; i += 2) {
   1020       int j;
   1021       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
   1022       bn_scatter5(tmp.d, top, powerbuf, i);
   1023       for (j = 2 * i; j < 32; j *= 2) {
   1024         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1025         bn_scatter5(tmp.d, top, powerbuf, j);
   1026       }
   1027     }
   1028     for (; i < 16; i += 2) {
   1029       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
   1030       bn_scatter5(tmp.d, top, powerbuf, i);
   1031       bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1032       bn_scatter5(tmp.d, top, powerbuf, 2 * i);
   1033     }
   1034     for (; i < 32; i += 2) {
   1035       bn_mul_mont_gather5(tmp.d, am.d, powerbuf, np, n0, top, i - 1);
   1036       bn_scatter5(tmp.d, top, powerbuf, i);
   1037     }
   1038 
   1039     bits--;
   1040     for (wvalue = 0, i = bits % 5; i >= 0; i--, bits--) {
   1041       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1042     }
   1043     bn_gather5(tmp.d, top, powerbuf, wvalue);
   1044 
   1045     /* At this point |bits| is 4 mod 5 and at least -1. (|bits| is the first bit
   1046      * that has not been read yet.) */
   1047     assert(bits >= -1 && (bits == -1 || bits % 5 == 4));
   1048 
   1049     /* Scan the exponent one window at a time starting from the most
   1050      * significant bits.
   1051      */
   1052     if (top & 7) {
   1053       while (bits >= 0) {
   1054         for (wvalue = 0, i = 0; i < 5; i++, bits--) {
   1055           wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1056         }
   1057 
   1058         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1059         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1060         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1061         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1062         bn_mul_mont(tmp.d, tmp.d, tmp.d, np, n0, top);
   1063         bn_mul_mont_gather5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
   1064       }
   1065     } else {
   1066       const uint8_t *p_bytes = (const uint8_t *)p->d;
   1067       int max_bits = p->top * BN_BITS2;
   1068       assert(bits < max_bits);
   1069       /* |p = 0| has been handled as a special case, so |max_bits| is at least
   1070        * one word. */
   1071       assert(max_bits >= 64);
   1072 
   1073       /* If the first bit to be read lands in the last byte, unroll the first
   1074        * iteration to avoid reading past the bounds of |p->d|. (After the first
   1075        * iteration, we are guaranteed to be past the last byte.) Note |bits|
   1076        * here is the top bit, inclusive. */
   1077       if (bits - 4 >= max_bits - 8) {
   1078         /* Read five bits from |bits-4| through |bits|, inclusive. */
   1079         wvalue = p_bytes[p->top * BN_BYTES - 1];
   1080         wvalue >>= (bits - 4) & 7;
   1081         wvalue &= 0x1f;
   1082         bits -= 5;
   1083         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
   1084       }
   1085       while (bits >= 0) {
   1086         /* Read five bits from |bits-4| through |bits|, inclusive. */
   1087         int first_bit = bits - 4;
   1088         wvalue = *(const uint16_t *) (p_bytes + (first_bit >> 3));
   1089         wvalue >>= first_bit & 7;
   1090         wvalue &= 0x1f;
   1091         bits -= 5;
   1092         bn_power5(tmp.d, tmp.d, powerbuf, np, n0, top, wvalue);
   1093       }
   1094     }
   1095 
   1096     ret = bn_from_montgomery(tmp.d, tmp.d, NULL, np, n0, top);
   1097     tmp.top = top;
   1098     bn_correct_top(&tmp);
   1099     if (ret) {
   1100       if (!BN_copy(rr, &tmp)) {
   1101         ret = 0;
   1102       }
   1103       goto err; /* non-zero ret means it's not error */
   1104     }
   1105   } else
   1106 #endif
   1107   {
   1108     if (!copy_to_prebuf(&tmp, top, powerbuf, 0, window) ||
   1109         !copy_to_prebuf(&am, top, powerbuf, 1, window)) {
   1110       goto err;
   1111     }
   1112 
   1113     /* If the window size is greater than 1, then calculate
   1114      * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
   1115      * (even powers could instead be computed as (a^(i/2))^2
   1116      * to use the slight performance advantage of sqr over mul).
   1117      */
   1118     if (window > 1) {
   1119       if (!BN_mod_mul_montgomery(&tmp, &am, &am, mont, ctx) ||
   1120           !copy_to_prebuf(&tmp, top, powerbuf, 2, window)) {
   1121         goto err;
   1122       }
   1123       for (i = 3; i < numPowers; i++) {
   1124         /* Calculate a^i = a^(i-1) * a */
   1125         if (!BN_mod_mul_montgomery(&tmp, &am, &tmp, mont, ctx) ||
   1126             !copy_to_prebuf(&tmp, top, powerbuf, i, window)) {
   1127           goto err;
   1128         }
   1129       }
   1130     }
   1131 
   1132     bits--;
   1133     for (wvalue = 0, i = bits % window; i >= 0; i--, bits--) {
   1134       wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1135     }
   1136     if (!copy_from_prebuf(&tmp, top, powerbuf, wvalue, window)) {
   1137       goto err;
   1138     }
   1139 
   1140     /* Scan the exponent one window at a time starting from the most
   1141      * significant bits.
   1142      */
   1143     while (bits >= 0) {
   1144       wvalue = 0; /* The 'value' of the window */
   1145 
   1146       /* Scan the window, squaring the result as we go */
   1147       for (i = 0; i < window; i++, bits--) {
   1148         if (!BN_mod_mul_montgomery(&tmp, &tmp, &tmp, mont, ctx)) {
   1149           goto err;
   1150         }
   1151         wvalue = (wvalue << 1) + BN_is_bit_set(p, bits);
   1152       }
   1153 
   1154       /* Fetch the appropriate pre-computed value from the pre-buf */
   1155       if (!copy_from_prebuf(&am, top, powerbuf, wvalue, window)) {
   1156         goto err;
   1157       }
   1158 
   1159       /* Multiply the result into the intermediate result */
   1160       if (!BN_mod_mul_montgomery(&tmp, &tmp, &am, mont, ctx)) {
   1161         goto err;
   1162       }
   1163     }
   1164   }
   1165 
   1166   /* Convert the final result from montgomery to standard format */
   1167   if (!BN_from_montgomery(rr, &tmp, mont, ctx)) {
   1168     goto err;
   1169   }
   1170   ret = 1;
   1171 
   1172 err:
   1173   BN_MONT_CTX_free(new_mont);
   1174   BN_clear_free(new_a);
   1175   if (powerbuf != NULL) {
   1176     OPENSSL_cleanse(powerbuf, powerbufLen);
   1177     OPENSSL_free(powerbufFree);
   1178   }
   1179   return (ret);
   1180 }
   1181 
   1182 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
   1183                          const BIGNUM *m, BN_CTX *ctx,
   1184                          const BN_MONT_CTX *mont) {
   1185   BIGNUM a_bignum;
   1186   BN_init(&a_bignum);
   1187 
   1188   int ret = 0;
   1189 
   1190   if (!BN_set_word(&a_bignum, a)) {
   1191     OPENSSL_PUT_ERROR(BN, ERR_R_INTERNAL_ERROR);
   1192     goto err;
   1193   }
   1194 
   1195   ret = BN_mod_exp_mont(rr, &a_bignum, p, m, ctx, mont);
   1196 
   1197 err:
   1198   BN_free(&a_bignum);
   1199 
   1200   return ret;
   1201 }
   1202 
   1203 #define TABLE_SIZE 32
   1204 
   1205 int BN_mod_exp2_mont(BIGNUM *rr, const BIGNUM *a1, const BIGNUM *p1,
   1206                      const BIGNUM *a2, const BIGNUM *p2, const BIGNUM *m,
   1207                      BN_CTX *ctx, const BN_MONT_CTX *mont) {
   1208   BIGNUM tmp;
   1209   BN_init(&tmp);
   1210 
   1211   int ret = 0;
   1212   BN_MONT_CTX *new_mont = NULL;
   1213 
   1214   /* Allocate a montgomery context if it was not supplied by the caller. */
   1215   if (mont == NULL) {
   1216     new_mont = BN_MONT_CTX_new();
   1217     if (new_mont == NULL || !BN_MONT_CTX_set(new_mont, m, ctx)) {
   1218       goto err;
   1219     }
   1220     mont = new_mont;
   1221   }
   1222 
   1223   /* BN_mod_mul_montgomery removes one Montgomery factor, so passing one
   1224    * Montgomery-encoded and one non-Montgomery-encoded value gives a
   1225    * non-Montgomery-encoded result. */
   1226   if (!BN_mod_exp_mont(rr, a1, p1, m, ctx, mont) ||
   1227       !BN_mod_exp_mont(&tmp, a2, p2, m, ctx, mont) ||
   1228       !BN_to_montgomery(rr, rr, mont, ctx) ||
   1229       !BN_mod_mul_montgomery(rr, rr, &tmp, mont, ctx)) {
   1230     goto err;
   1231   }
   1232 
   1233   ret = 1;
   1234 
   1235 err:
   1236   BN_MONT_CTX_free(new_mont);
   1237   BN_free(&tmp);
   1238 
   1239   return ret;
   1240 }
   1241