Home | History | Annotate | Download | only in bn
      1 /* crypto/bn/bn_exp.c */
      2 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      3  * All rights reserved.
      4  *
      5  * This package is an SSL implementation written
      6  * by Eric Young (eay (at) cryptsoft.com).
      7  * The implementation was written so as to conform with Netscapes SSL.
      8  *
      9  * This library is free for commercial and non-commercial use as long as
     10  * the following conditions are aheared to.  The following conditions
     11  * apply to all code found in this distribution, be it the RC4, RSA,
     12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     13  * included with this distribution is covered by the same copyright terms
     14  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     15  *
     16  * Copyright remains Eric Young's, and as such any Copyright notices in
     17  * the code are not to be removed.
     18  * If this package is used in a product, Eric Young should be given attribution
     19  * as the author of the parts of the library used.
     20  * This can be in the form of a textual message at program startup or
     21  * in documentation (online or textual) provided with the package.
     22  *
     23  * Redistribution and use in source and binary forms, with or without
     24  * modification, are permitted provided that the following conditions
     25  * are met:
     26  * 1. Redistributions of source code must retain the copyright
     27  *    notice, this list of conditions and the following disclaimer.
     28  * 2. Redistributions in binary form must reproduce the above copyright
     29  *    notice, this list of conditions and the following disclaimer in the
     30  *    documentation and/or other materials provided with the distribution.
     31  * 3. All advertising materials mentioning features or use of this software
     32  *    must display the following acknowledgement:
     33  *    "This product includes cryptographic software written by
     34  *     Eric Young (eay (at) cryptsoft.com)"
     35  *    The word 'cryptographic' can be left out if the rouines from the library
     36  *    being used are not cryptographic related :-).
     37  * 4. If you include any Windows specific code (or a derivative thereof) from
     38  *    the apps directory (application code) you must include an acknowledgement:
     39  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     40  *
     41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     51  * SUCH DAMAGE.
     52  *
     53  * The licence and distribution terms for any publically available version or
     54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     55  * copied and put under another distribution licence
     56  * [including the GNU Public Licence.]
     57  */
     58 /* ====================================================================
     59  * Copyright (c) 1998-2005 The OpenSSL Project.  All rights reserved.
     60  *
     61  * Redistribution and use in source and binary forms, with or without
     62  * modification, are permitted provided that the following conditions
     63  * are met:
     64  *
     65  * 1. Redistributions of source code must retain the above copyright
     66  *    notice, this list of conditions and the following disclaimer.
     67  *
     68  * 2. Redistributions in binary form must reproduce the above copyright
     69  *    notice, this list of conditions and the following disclaimer in
     70  *    the documentation and/or other materials provided with the
     71  *    distribution.
     72  *
     73  * 3. All advertising materials mentioning features or use of this
     74  *    software must display the following acknowledgment:
     75  *    "This product includes software developed by the OpenSSL Project
     76  *    for use in the OpenSSL Toolkit. (http://www.openssl.org/)"
     77  *
     78  * 4. The names "OpenSSL Toolkit" and "OpenSSL Project" must not be used to
     79  *    endorse or promote products derived from this software without
     80  *    prior written permission. For written permission, please contact
     81  *    openssl-core (at) openssl.org.
     82  *
     83  * 5. Products derived from this software may not be called "OpenSSL"
     84  *    nor may "OpenSSL" appear in their names without prior written
     85  *    permission of the OpenSSL Project.
     86  *
     87  * 6. Redistributions of any form whatsoever must retain the following
     88  *    acknowledgment:
     89  *    "This product includes software developed by the OpenSSL Project
     90  *    for use in the OpenSSL Toolkit (http://www.openssl.org/)"
     91  *
     92  * THIS SOFTWARE IS PROVIDED BY THE OpenSSL PROJECT ``AS IS'' AND ANY
     93  * EXPRESSED OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     94  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
     95  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE OpenSSL PROJECT OR
     96  * ITS CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
     97  * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
     98  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES;
     99  * LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
    100  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT,
    101  * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
    102  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED
    103  * OF THE POSSIBILITY OF SUCH DAMAGE.
    104  * ====================================================================
    105  *
    106  * This product includes cryptographic software written by Eric Young
    107  * (eay (at) cryptsoft.com).  This product includes software written by Tim
    108  * Hudson (tjh (at) cryptsoft.com).
    109  *
    110  */
    111 
    112 
    113 #include "cryptlib.h"
    114 #include "bn_lcl.h"
    115 
    116 #include <stdlib.h>
    117 #ifdef _WIN32
    118 # include <malloc.h>
    119 # ifndef alloca
    120 #  define alloca _alloca
    121 # endif
    122 #elif defined(__GNUC__)
    123 # ifndef alloca
    124 #  define alloca(s) __builtin_alloca((s))
    125 # endif
    126 #endif
    127 
    128 /* maximum precomputation table size for *variable* sliding windows */
    129 #define TABLE_SIZE	32
    130 
    131 /* this one works - simple but works */
    132 int BN_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, BN_CTX *ctx)
    133 	{
    134 	int i,bits,ret=0;
    135 	BIGNUM *v,*rr;
    136 
    137 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
    138 		{
    139 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
    140 		BNerr(BN_F_BN_EXP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    141 		return -1;
    142 		}
    143 
    144 	BN_CTX_start(ctx);
    145 	if ((r == a) || (r == p))
    146 		rr = BN_CTX_get(ctx);
    147 	else
    148 		rr = r;
    149 	v = BN_CTX_get(ctx);
    150 	if (rr == NULL || v == NULL) goto err;
    151 
    152 	if (BN_copy(v,a) == NULL) goto err;
    153 	bits=BN_num_bits(p);
    154 
    155 	if (BN_is_odd(p))
    156 		{ if (BN_copy(rr,a) == NULL) goto err; }
    157 	else	{ if (!BN_one(rr)) goto err; }
    158 
    159 	for (i=1; i<bits; i++)
    160 		{
    161 		if (!BN_sqr(v,v,ctx)) goto err;
    162 		if (BN_is_bit_set(p,i))
    163 			{
    164 			if (!BN_mul(rr,rr,v,ctx)) goto err;
    165 			}
    166 		}
    167 	ret=1;
    168 err:
    169 	if (r != rr) BN_copy(r,rr);
    170 	BN_CTX_end(ctx);
    171 	bn_check_top(r);
    172 	return(ret);
    173 	}
    174 
    175 
    176 int BN_mod_exp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p, const BIGNUM *m,
    177 	       BN_CTX *ctx)
    178 	{
    179 	int ret;
    180 
    181 	bn_check_top(a);
    182 	bn_check_top(p);
    183 	bn_check_top(m);
    184 
    185 	/* For even modulus  m = 2^k*m_odd,  it might make sense to compute
    186 	 * a^p mod m_odd  and  a^p mod 2^k  separately (with Montgomery
    187 	 * exponentiation for the odd part), using appropriate exponent
    188 	 * reductions, and combine the results using the CRT.
    189 	 *
    190 	 * For now, we use Montgomery only if the modulus is odd; otherwise,
    191 	 * exponentiation using the reciprocal-based quick remaindering
    192 	 * algorithm is used.
    193 	 *
    194 	 * (Timing obtained with expspeed.c [computations  a^p mod m
    195 	 * where  a, p, m  are of the same length: 256, 512, 1024, 2048,
    196 	 * 4096, 8192 bits], compared to the running time of the
    197 	 * standard algorithm:
    198 	 *
    199 	 *   BN_mod_exp_mont   33 .. 40 %  [AMD K6-2, Linux, debug configuration]
    200          *                     55 .. 77 %  [UltraSparc processor, but
    201 	 *                                  debug-solaris-sparcv8-gcc conf.]
    202 	 *
    203 	 *   BN_mod_exp_recp   50 .. 70 %  [AMD K6-2, Linux, debug configuration]
    204 	 *                     62 .. 118 % [UltraSparc, debug-solaris-sparcv8-gcc]
    205 	 *
    206 	 * On the Sparc, BN_mod_exp_recp was faster than BN_mod_exp_mont
    207 	 * at 2048 and more bits, but at 512 and 1024 bits, it was
    208 	 * slower even than the standard algorithm!
    209 	 *
    210 	 * "Real" timings [linux-elf, solaris-sparcv9-gcc configurations]
    211 	 * should be obtained when the new Montgomery reduction code
    212 	 * has been integrated into OpenSSL.)
    213 	 */
    214 
    215 #define MONT_MUL_MOD
    216 #define MONT_EXP_WORD
    217 #define RECP_MUL_MOD
    218 
    219 #ifdef MONT_MUL_MOD
    220 	/* I have finally been able to take out this pre-condition of
    221 	 * the top bit being set.  It was caused by an error in BN_div
    222 	 * with negatives.  There was also another problem when for a^b%m
    223 	 * a >= m.  eay 07-May-97 */
    224 /*	if ((m->d[m->top-1]&BN_TBIT) && BN_is_odd(m)) */
    225 
    226 	if (BN_is_odd(m))
    227 		{
    228 #  ifdef MONT_EXP_WORD
    229 		if (a->top == 1 && !a->neg && (BN_get_flags(p, BN_FLG_CONSTTIME) == 0))
    230 			{
    231 			BN_ULONG A = a->d[0];
    232 			ret=BN_mod_exp_mont_word(r,A,p,m,ctx,NULL);
    233 			}
    234 		else
    235 #  endif
    236 			ret=BN_mod_exp_mont(r,a,p,m,ctx,NULL);
    237 		}
    238 	else
    239 #endif
    240 #ifdef RECP_MUL_MOD
    241 		{ ret=BN_mod_exp_recp(r,a,p,m,ctx); }
    242 #else
    243 		{ ret=BN_mod_exp_simple(r,a,p,m,ctx); }
    244 #endif
    245 
    246 	bn_check_top(r);
    247 	return(ret);
    248 	}
    249 
    250 
    251 int BN_mod_exp_recp(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
    252 		    const BIGNUM *m, BN_CTX *ctx)
    253 	{
    254 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
    255 	int start=1;
    256 	BIGNUM *aa;
    257 	/* Table of variables obtained from 'ctx' */
    258 	BIGNUM *val[TABLE_SIZE];
    259 	BN_RECP_CTX recp;
    260 
    261 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
    262 		{
    263 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
    264 		BNerr(BN_F_BN_MOD_EXP_RECP,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    265 		return -1;
    266 		}
    267 
    268 	bits=BN_num_bits(p);
    269 
    270 	if (bits == 0)
    271 		{
    272 		ret = BN_one(r);
    273 		return ret;
    274 		}
    275 
    276 	BN_CTX_start(ctx);
    277 	aa = BN_CTX_get(ctx);
    278 	val[0] = BN_CTX_get(ctx);
    279 	if(!aa || !val[0]) goto err;
    280 
    281 	BN_RECP_CTX_init(&recp);
    282 	if (m->neg)
    283 		{
    284 		/* ignore sign of 'm' */
    285 		if (!BN_copy(aa, m)) goto err;
    286 		aa->neg = 0;
    287 		if (BN_RECP_CTX_set(&recp,aa,ctx) <= 0) goto err;
    288 		}
    289 	else
    290 		{
    291 		if (BN_RECP_CTX_set(&recp,m,ctx) <= 0) goto err;
    292 		}
    293 
    294 	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
    295 	if (BN_is_zero(val[0]))
    296 		{
    297 		BN_zero(r);
    298 		ret = 1;
    299 		goto err;
    300 		}
    301 
    302 	window = BN_window_bits_for_exponent_size(bits);
    303 	if (window > 1)
    304 		{
    305 		if (!BN_mod_mul_reciprocal(aa,val[0],val[0],&recp,ctx))
    306 			goto err;				/* 2 */
    307 		j=1<<(window-1);
    308 		for (i=1; i<j; i++)
    309 			{
    310 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
    311 					!BN_mod_mul_reciprocal(val[i],val[i-1],
    312 						aa,&recp,ctx))
    313 				goto err;
    314 			}
    315 		}
    316 
    317 	start=1;	/* This is used to avoid multiplication etc
    318 			 * when there is only the value '1' in the
    319 			 * buffer. */
    320 	wvalue=0;	/* The 'value' of the window */
    321 	wstart=bits-1;	/* The top bit of the window */
    322 	wend=0;		/* The bottom bit of the window */
    323 
    324 	if (!BN_one(r)) goto err;
    325 
    326 	for (;;)
    327 		{
    328 		if (BN_is_bit_set(p,wstart) == 0)
    329 			{
    330 			if (!start)
    331 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
    332 				goto err;
    333 			if (wstart == 0) break;
    334 			wstart--;
    335 			continue;
    336 			}
    337 		/* We now have wstart on a 'set' bit, we now need to work out
    338 		 * how bit a window to do.  To do this we need to scan
    339 		 * forward until the last set bit before the end of the
    340 		 * window */
    341 		j=wstart;
    342 		wvalue=1;
    343 		wend=0;
    344 		for (i=1; i<window; i++)
    345 			{
    346 			if (wstart-i < 0) break;
    347 			if (BN_is_bit_set(p,wstart-i))
    348 				{
    349 				wvalue<<=(i-wend);
    350 				wvalue|=1;
    351 				wend=i;
    352 				}
    353 			}
    354 
    355 		/* wend is the size of the current window */
    356 		j=wend+1;
    357 		/* add the 'bytes above' */
    358 		if (!start)
    359 			for (i=0; i<j; i++)
    360 				{
    361 				if (!BN_mod_mul_reciprocal(r,r,r,&recp,ctx))
    362 					goto err;
    363 				}
    364 
    365 		/* wvalue will be an odd number < 2^window */
    366 		if (!BN_mod_mul_reciprocal(r,r,val[wvalue>>1],&recp,ctx))
    367 			goto err;
    368 
    369 		/* move the 'window' down further */
    370 		wstart-=wend+1;
    371 		wvalue=0;
    372 		start=0;
    373 		if (wstart < 0) break;
    374 		}
    375 	ret=1;
    376 err:
    377 	BN_CTX_end(ctx);
    378 	BN_RECP_CTX_free(&recp);
    379 	bn_check_top(r);
    380 	return(ret);
    381 	}
    382 
    383 
    384 int BN_mod_exp_mont(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    385 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
    386 	{
    387 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
    388 	int start=1;
    389 	BIGNUM *d,*r;
    390 	const BIGNUM *aa;
    391 	/* Table of variables obtained from 'ctx' */
    392 	BIGNUM *val[TABLE_SIZE];
    393 	BN_MONT_CTX *mont=NULL;
    394 
    395 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
    396 		{
    397 		return BN_mod_exp_mont_consttime(rr, a, p, m, ctx, in_mont);
    398 		}
    399 
    400 	bn_check_top(a);
    401 	bn_check_top(p);
    402 	bn_check_top(m);
    403 
    404 	if (!BN_is_odd(m))
    405 		{
    406 		BNerr(BN_F_BN_MOD_EXP_MONT,BN_R_CALLED_WITH_EVEN_MODULUS);
    407 		return(0);
    408 		}
    409 	bits=BN_num_bits(p);
    410 	if (bits == 0)
    411 		{
    412 		ret = BN_one(rr);
    413 		return ret;
    414 		}
    415 
    416 	BN_CTX_start(ctx);
    417 	d = BN_CTX_get(ctx);
    418 	r = BN_CTX_get(ctx);
    419 	val[0] = BN_CTX_get(ctx);
    420 	if (!d || !r || !val[0]) goto err;
    421 
    422 	/* If this is not done, things will break in the montgomery
    423 	 * part */
    424 
    425 	if (in_mont != NULL)
    426 		mont=in_mont;
    427 	else
    428 		{
    429 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
    430 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
    431 		}
    432 
    433 	if (a->neg || BN_ucmp(a,m) >= 0)
    434 		{
    435 		if (!BN_nnmod(val[0],a,m,ctx))
    436 			goto err;
    437 		aa= val[0];
    438 		}
    439 	else
    440 		aa=a;
    441 	if (BN_is_zero(aa))
    442 		{
    443 		BN_zero(rr);
    444 		ret = 1;
    445 		goto err;
    446 		}
    447 	if (!BN_to_montgomery(val[0],aa,mont,ctx)) goto err; /* 1 */
    448 
    449 	window = BN_window_bits_for_exponent_size(bits);
    450 	if (window > 1)
    451 		{
    452 		if (!BN_mod_mul_montgomery(d,val[0],val[0],mont,ctx)) goto err; /* 2 */
    453 		j=1<<(window-1);
    454 		for (i=1; i<j; i++)
    455 			{
    456 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
    457 					!BN_mod_mul_montgomery(val[i],val[i-1],
    458 						d,mont,ctx))
    459 				goto err;
    460 			}
    461 		}
    462 
    463 	start=1;	/* This is used to avoid multiplication etc
    464 			 * when there is only the value '1' in the
    465 			 * buffer. */
    466 	wvalue=0;	/* The 'value' of the window */
    467 	wstart=bits-1;	/* The top bit of the window */
    468 	wend=0;		/* The bottom bit of the window */
    469 
    470 	if (!BN_to_montgomery(r,BN_value_one(),mont,ctx)) goto err;
    471 	for (;;)
    472 		{
    473 		if (BN_is_bit_set(p,wstart) == 0)
    474 			{
    475 			if (!start)
    476 				{
    477 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
    478 				goto err;
    479 				}
    480 			if (wstart == 0) break;
    481 			wstart--;
    482 			continue;
    483 			}
    484 		/* We now have wstart on a 'set' bit, we now need to work out
    485 		 * how bit a window to do.  To do this we need to scan
    486 		 * forward until the last set bit before the end of the
    487 		 * window */
    488 		j=wstart;
    489 		wvalue=1;
    490 		wend=0;
    491 		for (i=1; i<window; i++)
    492 			{
    493 			if (wstart-i < 0) break;
    494 			if (BN_is_bit_set(p,wstart-i))
    495 				{
    496 				wvalue<<=(i-wend);
    497 				wvalue|=1;
    498 				wend=i;
    499 				}
    500 			}
    501 
    502 		/* wend is the size of the current window */
    503 		j=wend+1;
    504 		/* add the 'bytes above' */
    505 		if (!start)
    506 			for (i=0; i<j; i++)
    507 				{
    508 				if (!BN_mod_mul_montgomery(r,r,r,mont,ctx))
    509 					goto err;
    510 				}
    511 
    512 		/* wvalue will be an odd number < 2^window */
    513 		if (!BN_mod_mul_montgomery(r,r,val[wvalue>>1],mont,ctx))
    514 			goto err;
    515 
    516 		/* move the 'window' down further */
    517 		wstart-=wend+1;
    518 		wvalue=0;
    519 		start=0;
    520 		if (wstart < 0) break;
    521 		}
    522 	if (!BN_from_montgomery(rr,r,mont,ctx)) goto err;
    523 	ret=1;
    524 err:
    525 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
    526 	BN_CTX_end(ctx);
    527 	bn_check_top(rr);
    528 	return(ret);
    529 	}
    530 
    531 
    532 /* BN_mod_exp_mont_consttime() stores the precomputed powers in a specific layout
    533  * so that accessing any of these table values shows the same access pattern as far
    534  * as cache lines are concerned.  The following functions are used to transfer a BIGNUM
    535  * from/to that table. */
    536 
    537 static int MOD_EXP_CTIME_COPY_TO_PREBUF(const BIGNUM *b, int top, unsigned char *buf, int idx, int width)
    538 	{
    539 	size_t i, j;
    540 
    541 	if (top > b->top)
    542 		top = b->top; /* this works because 'buf' is explicitly zeroed */
    543 	for (i = 0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
    544 		{
    545 		buf[j] = ((unsigned char*)b->d)[i];
    546 		}
    547 
    548 	return 1;
    549 	}
    550 
    551 static int MOD_EXP_CTIME_COPY_FROM_PREBUF(BIGNUM *b, int top, unsigned char *buf, int idx, int width)
    552 	{
    553 	size_t i, j;
    554 
    555 	if (bn_wexpand(b, top) == NULL)
    556 		return 0;
    557 
    558 	for (i=0, j=idx; i < top * sizeof b->d[0]; i++, j+=width)
    559 		{
    560 		((unsigned char*)b->d)[i] = buf[j];
    561 		}
    562 
    563 	b->top = top;
    564 	bn_correct_top(b);
    565 	return 1;
    566 	}
    567 
    568 /* Given a pointer value, compute the next address that is a cache line multiple. */
    569 #define MOD_EXP_CTIME_ALIGN(x_) \
    570 	((unsigned char*)(x_) + (MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH - (((size_t)(x_)) & (MOD_EXP_CTIME_MIN_CACHE_LINE_MASK))))
    571 
    572 /* This variant of BN_mod_exp_mont() uses fixed windows and the special
    573  * precomputation memory layout to limit data-dependency to a minimum
    574  * to protect secret exponents (cf. the hyper-threading timing attacks
    575  * pointed out by Colin Percival,
    576  * http://www.daemonology.net/hyperthreading-considered-harmful/)
    577  */
    578 int BN_mod_exp_mont_consttime(BIGNUM *rr, const BIGNUM *a, const BIGNUM *p,
    579 		    const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
    580 	{
    581 	int i,bits,ret=0,window,wvalue;
    582 	int top;
    583 	BN_MONT_CTX *mont=NULL;
    584 
    585 	int numPowers;
    586 	unsigned char *powerbufFree=NULL;
    587 	int powerbufLen = 0;
    588 	unsigned char *powerbuf=NULL;
    589 	BIGNUM tmp, am;
    590 
    591 	bn_check_top(a);
    592 	bn_check_top(p);
    593 	bn_check_top(m);
    594 
    595 	top = m->top;
    596 
    597 	if (!(m->d[0] & 1))
    598 		{
    599 		BNerr(BN_F_BN_MOD_EXP_MONT_CONSTTIME,BN_R_CALLED_WITH_EVEN_MODULUS);
    600 		return(0);
    601 		}
    602 	bits=BN_num_bits(p);
    603 	if (bits == 0)
    604 		{
    605 		ret = BN_one(rr);
    606 		return ret;
    607 		}
    608 
    609 	BN_CTX_start(ctx);
    610 
    611 	/* Allocate a montgomery context if it was not supplied by the caller.
    612 	 * If this is not done, things will break in the montgomery part.
    613  	 */
    614 	if (in_mont != NULL)
    615 		mont=in_mont;
    616 	else
    617 		{
    618 		if ((mont=BN_MONT_CTX_new()) == NULL) goto err;
    619 		if (!BN_MONT_CTX_set(mont,m,ctx)) goto err;
    620 		}
    621 
    622 	/* Get the window size to use with size of p. */
    623 	window = BN_window_bits_for_ctime_exponent_size(bits);
    624 #if defined(OPENSSL_BN_ASM_MONT5)
    625 	if (window==6 && bits<=1024) window=5;	/* ~5% improvement of 2048-bit RSA sign */
    626 #endif
    627 
    628 	/* Allocate a buffer large enough to hold all of the pre-computed
    629 	 * powers of am, am itself and tmp.
    630 	 */
    631 	numPowers = 1 << window;
    632 	powerbufLen = sizeof(m->d[0])*(top*numPowers +
    633 				((2*top)>numPowers?(2*top):numPowers));
    634 #ifdef alloca
    635 	if (powerbufLen < 3072)
    636 		powerbufFree = alloca(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH);
    637 	else
    638 #endif
    639 	if ((powerbufFree=(unsigned char*)OPENSSL_malloc(powerbufLen+MOD_EXP_CTIME_MIN_CACHE_LINE_WIDTH)) == NULL)
    640 		goto err;
    641 
    642 	powerbuf = MOD_EXP_CTIME_ALIGN(powerbufFree);
    643 	memset(powerbuf, 0, powerbufLen);
    644 
    645 #ifdef alloca
    646 	if (powerbufLen < 3072)
    647 		powerbufFree = NULL;
    648 #endif
    649 
    650 	/* lay down tmp and am right after powers table */
    651 	tmp.d     = (BN_ULONG *)(powerbuf + sizeof(m->d[0])*top*numPowers);
    652 	am.d      = tmp.d + top;
    653 	tmp.top   = am.top  = 0;
    654 	tmp.dmax  = am.dmax = top;
    655 	tmp.neg   = am.neg  = 0;
    656 	tmp.flags = am.flags = BN_FLG_STATIC_DATA;
    657 
    658 	/* prepare a^0 in Montgomery domain */
    659 #if 1
    660  	if (!BN_to_montgomery(&tmp,BN_value_one(),mont,ctx))	goto err;
    661 #else
    662 	tmp.d[0] = (0-m->d[0])&BN_MASK2;	/* 2^(top*BN_BITS2) - m */
    663 	for (i=1;i<top;i++)
    664 		tmp.d[i] = (~m->d[i])&BN_MASK2;
    665 	tmp.top = top;
    666 #endif
    667 
    668 	/* prepare a^1 in Montgomery domain */
    669 	if (a->neg || BN_ucmp(a,m) >= 0)
    670 		{
    671 		if (!BN_mod(&am,a,m,ctx))			goto err;
    672 		if (!BN_to_montgomery(&am,&am,mont,ctx))	goto err;
    673 		}
    674 	else	if (!BN_to_montgomery(&am,a,mont,ctx))		goto err;
    675 
    676 #if defined(OPENSSL_BN_ASM_MONT5)
    677     /* This optimization uses ideas from http://eprint.iacr.org/2011/239,
    678      * specifically optimization of cache-timing attack countermeasures
    679      * and pre-computation optimization. */
    680 
    681     /* Dedicated window==4 case improves 512-bit RSA sign by ~15%, but as
    682      * 512-bit RSA is hardly relevant, we omit it to spare size... */
    683     if (window==5)
    684 	{
    685 	void bn_mul_mont_gather5(BN_ULONG *rp,const BN_ULONG *ap,
    686 			const void *table,const BN_ULONG *np,
    687 			const BN_ULONG *n0,int num,int power);
    688 	void bn_scatter5(const BN_ULONG *inp,size_t num,
    689 			void *table,size_t power);
    690 	void bn_gather5(BN_ULONG *out,size_t num,
    691 			void *table,size_t power);
    692 
    693 	BN_ULONG *np=mont->N.d, *n0=mont->n0;
    694 
    695 	/* BN_to_montgomery can contaminate words above .top
    696 	 * [in BN_DEBUG[_DEBUG] build]... */
    697 	for (i=am.top; i<top; i++)	am.d[i]=0;
    698 	for (i=tmp.top; i<top; i++)	tmp.d[i]=0;
    699 
    700 	bn_scatter5(tmp.d,top,powerbuf,0);
    701 	bn_scatter5(am.d,am.top,powerbuf,1);
    702 	bn_mul_mont(tmp.d,am.d,am.d,np,n0,top);
    703 	bn_scatter5(tmp.d,top,powerbuf,2);
    704 
    705 #if 0
    706 	for (i=3; i<32; i++)
    707 		{
    708 		/* Calculate a^i = a^(i-1) * a */
    709 		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
    710 		bn_scatter5(tmp.d,top,powerbuf,i);
    711 		}
    712 #else
    713 	/* same as above, but uses squaring for 1/2 of operations */
    714 	for (i=4; i<32; i*=2)
    715 		{
    716 		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    717 		bn_scatter5(tmp.d,top,powerbuf,i);
    718 		}
    719 	for (i=3; i<8; i+=2)
    720 		{
    721 		int j;
    722 		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
    723 		bn_scatter5(tmp.d,top,powerbuf,i);
    724 		for (j=2*i; j<32; j*=2)
    725 			{
    726 			bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    727 			bn_scatter5(tmp.d,top,powerbuf,j);
    728 			}
    729 		}
    730 	for (; i<16; i+=2)
    731 		{
    732 		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
    733 		bn_scatter5(tmp.d,top,powerbuf,i);
    734 		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    735 		bn_scatter5(tmp.d,top,powerbuf,2*i);
    736 		}
    737 	for (; i<32; i+=2)
    738 		{
    739 		bn_mul_mont_gather5(tmp.d,am.d,powerbuf,np,n0,top,i-1);
    740 		bn_scatter5(tmp.d,top,powerbuf,i);
    741 		}
    742 #endif
    743 	bits--;
    744 	for (wvalue=0, i=bits%5; i>=0; i--,bits--)
    745 		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
    746 	bn_gather5(tmp.d,top,powerbuf,wvalue);
    747 
    748 	/* Scan the exponent one window at a time starting from the most
    749 	 * significant bits.
    750 	 */
    751 	while (bits >= 0)
    752 		{
    753 		for (wvalue=0, i=0; i<5; i++,bits--)
    754 			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
    755 
    756 		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    757 		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    758 		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    759 		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    760 		bn_mul_mont(tmp.d,tmp.d,tmp.d,np,n0,top);
    761 		bn_mul_mont_gather5(tmp.d,tmp.d,powerbuf,np,n0,top,wvalue);
    762 		}
    763 
    764 	tmp.top=top;
    765 	bn_correct_top(&tmp);
    766 	}
    767     else
    768 #endif
    769 	{
    770 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 0, numPowers)) goto err;
    771 	if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&am,  top, powerbuf, 1, numPowers)) goto err;
    772 
    773 	/* If the window size is greater than 1, then calculate
    774 	 * val[i=2..2^winsize-1]. Powers are computed as a*a^(i-1)
    775 	 * (even powers could instead be computed as (a^(i/2))^2
    776 	 * to use the slight performance advantage of sqr over mul).
    777 	 */
    778 	if (window > 1)
    779 		{
    780 		if (!BN_mod_mul_montgomery(&tmp,&am,&am,mont,ctx))	goto err;
    781 		if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, 2, numPowers)) goto err;
    782 		for (i=3; i<numPowers; i++)
    783 			{
    784 			/* Calculate a^i = a^(i-1) * a */
    785 			if (!BN_mod_mul_montgomery(&tmp,&am,&tmp,mont,ctx))
    786 				goto err;
    787 			if (!MOD_EXP_CTIME_COPY_TO_PREBUF(&tmp, top, powerbuf, i, numPowers)) goto err;
    788 			}
    789 		}
    790 
    791 	bits--;
    792 	for (wvalue=0, i=bits%window; i>=0; i--,bits--)
    793 		wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
    794 	if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&tmp,top,powerbuf,wvalue,numPowers)) goto err;
    795 
    796 	/* Scan the exponent one window at a time starting from the most
    797 	 * significant bits.
    798 	 */
    799  	while (bits >= 0)
    800   		{
    801  		wvalue=0; /* The 'value' of the window */
    802 
    803  		/* Scan the window, squaring the result as we go */
    804  		for (i=0; i<window; i++,bits--)
    805  			{
    806 			if (!BN_mod_mul_montgomery(&tmp,&tmp,&tmp,mont,ctx))	goto err;
    807 			wvalue = (wvalue<<1)+BN_is_bit_set(p,bits);
    808   			}
    809 
    810 		/* Fetch the appropriate pre-computed value from the pre-buf */
    811 		if (!MOD_EXP_CTIME_COPY_FROM_PREBUF(&am, top, powerbuf, wvalue, numPowers)) goto err;
    812 
    813  		/* Multiply the result into the intermediate result */
    814  		if (!BN_mod_mul_montgomery(&tmp,&tmp,&am,mont,ctx)) goto err;
    815   		}
    816 	}
    817 
    818  	/* Convert the final result from montgomery to standard format */
    819 	if (!BN_from_montgomery(rr,&tmp,mont,ctx)) goto err;
    820 	ret=1;
    821 err:
    822 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
    823 	if (powerbuf!=NULL)
    824 		{
    825 		OPENSSL_cleanse(powerbuf,powerbufLen);
    826 		if (powerbufFree) OPENSSL_free(powerbufFree);
    827 		}
    828 	BN_CTX_end(ctx);
    829 	return(ret);
    830 	}
    831 
    832 int BN_mod_exp_mont_word(BIGNUM *rr, BN_ULONG a, const BIGNUM *p,
    833                          const BIGNUM *m, BN_CTX *ctx, BN_MONT_CTX *in_mont)
    834 	{
    835 	BN_MONT_CTX *mont = NULL;
    836 	int b, bits, ret=0;
    837 	int r_is_one;
    838 	BN_ULONG w, next_w;
    839 	BIGNUM *d, *r, *t;
    840 	BIGNUM *swap_tmp;
    841 #define BN_MOD_MUL_WORD(r, w, m) \
    842 		(BN_mul_word(r, (w)) && \
    843 		(/* BN_ucmp(r, (m)) < 0 ? 1 :*/  \
    844 			(BN_mod(t, r, m, ctx) && (swap_tmp = r, r = t, t = swap_tmp, 1))))
    845 		/* BN_MOD_MUL_WORD is only used with 'w' large,
    846 		 * so the BN_ucmp test is probably more overhead
    847 		 * than always using BN_mod (which uses BN_copy if
    848 		 * a similar test returns true). */
    849 		/* We can use BN_mod and do not need BN_nnmod because our
    850 		 * accumulator is never negative (the result of BN_mod does
    851 		 * not depend on the sign of the modulus).
    852 		 */
    853 #define BN_TO_MONTGOMERY_WORD(r, w, mont) \
    854 		(BN_set_word(r, (w)) && BN_to_montgomery(r, r, (mont), ctx))
    855 
    856 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
    857 		{
    858 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
    859 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    860 		return -1;
    861 		}
    862 
    863 	bn_check_top(p);
    864 	bn_check_top(m);
    865 
    866 	if (!BN_is_odd(m))
    867 		{
    868 		BNerr(BN_F_BN_MOD_EXP_MONT_WORD,BN_R_CALLED_WITH_EVEN_MODULUS);
    869 		return(0);
    870 		}
    871 	if (m->top == 1)
    872 		a %= m->d[0]; /* make sure that 'a' is reduced */
    873 
    874 	bits = BN_num_bits(p);
    875 	if (bits == 0)
    876 		{
    877 		ret = BN_one(rr);
    878 		return ret;
    879 		}
    880 	if (a == 0)
    881 		{
    882 		BN_zero(rr);
    883 		ret = 1;
    884 		return ret;
    885 		}
    886 
    887 	BN_CTX_start(ctx);
    888 	d = BN_CTX_get(ctx);
    889 	r = BN_CTX_get(ctx);
    890 	t = BN_CTX_get(ctx);
    891 	if (d == NULL || r == NULL || t == NULL) goto err;
    892 
    893 	if (in_mont != NULL)
    894 		mont=in_mont;
    895 	else
    896 		{
    897 		if ((mont = BN_MONT_CTX_new()) == NULL) goto err;
    898 		if (!BN_MONT_CTX_set(mont, m, ctx)) goto err;
    899 		}
    900 
    901 	r_is_one = 1; /* except for Montgomery factor */
    902 
    903 	/* bits-1 >= 0 */
    904 
    905 	/* The result is accumulated in the product r*w. */
    906 	w = a; /* bit 'bits-1' of 'p' is always set */
    907 	for (b = bits-2; b >= 0; b--)
    908 		{
    909 		/* First, square r*w. */
    910 		next_w = w*w;
    911 		if ((next_w/w) != w) /* overflow */
    912 			{
    913 			if (r_is_one)
    914 				{
    915 				if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
    916 				r_is_one = 0;
    917 				}
    918 			else
    919 				{
    920 				if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
    921 				}
    922 			next_w = 1;
    923 			}
    924 		w = next_w;
    925 		if (!r_is_one)
    926 			{
    927 			if (!BN_mod_mul_montgomery(r, r, r, mont, ctx)) goto err;
    928 			}
    929 
    930 		/* Second, multiply r*w by 'a' if exponent bit is set. */
    931 		if (BN_is_bit_set(p, b))
    932 			{
    933 			next_w = w*a;
    934 			if ((next_w/a) != w) /* overflow */
    935 				{
    936 				if (r_is_one)
    937 					{
    938 					if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
    939 					r_is_one = 0;
    940 					}
    941 				else
    942 					{
    943 					if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
    944 					}
    945 				next_w = a;
    946 				}
    947 			w = next_w;
    948 			}
    949 		}
    950 
    951 	/* Finally, set r:=r*w. */
    952 	if (w != 1)
    953 		{
    954 		if (r_is_one)
    955 			{
    956 			if (!BN_TO_MONTGOMERY_WORD(r, w, mont)) goto err;
    957 			r_is_one = 0;
    958 			}
    959 		else
    960 			{
    961 			if (!BN_MOD_MUL_WORD(r, w, m)) goto err;
    962 			}
    963 		}
    964 
    965 	if (r_is_one) /* can happen only if a == 1*/
    966 		{
    967 		if (!BN_one(rr)) goto err;
    968 		}
    969 	else
    970 		{
    971 		if (!BN_from_montgomery(rr, r, mont, ctx)) goto err;
    972 		}
    973 	ret = 1;
    974 err:
    975 	if ((in_mont == NULL) && (mont != NULL)) BN_MONT_CTX_free(mont);
    976 	BN_CTX_end(ctx);
    977 	bn_check_top(rr);
    978 	return(ret);
    979 	}
    980 
    981 
    982 /* The old fallback, simple version :-) */
    983 int BN_mod_exp_simple(BIGNUM *r, const BIGNUM *a, const BIGNUM *p,
    984 		const BIGNUM *m, BN_CTX *ctx)
    985 	{
    986 	int i,j,bits,ret=0,wstart,wend,window,wvalue;
    987 	int start=1;
    988 	BIGNUM *d;
    989 	/* Table of variables obtained from 'ctx' */
    990 	BIGNUM *val[TABLE_SIZE];
    991 
    992 	if (BN_get_flags(p, BN_FLG_CONSTTIME) != 0)
    993 		{
    994 		/* BN_FLG_CONSTTIME only supported by BN_mod_exp_mont() */
    995 		BNerr(BN_F_BN_MOD_EXP_SIMPLE,ERR_R_SHOULD_NOT_HAVE_BEEN_CALLED);
    996 		return -1;
    997 		}
    998 
    999 	bits=BN_num_bits(p);
   1000 
   1001 	if (bits == 0)
   1002 		{
   1003 		ret = BN_one(r);
   1004 		return ret;
   1005 		}
   1006 
   1007 	BN_CTX_start(ctx);
   1008 	d = BN_CTX_get(ctx);
   1009 	val[0] = BN_CTX_get(ctx);
   1010 	if(!d || !val[0]) goto err;
   1011 
   1012 	if (!BN_nnmod(val[0],a,m,ctx)) goto err;		/* 1 */
   1013 	if (BN_is_zero(val[0]))
   1014 		{
   1015 		BN_zero(r);
   1016 		ret = 1;
   1017 		goto err;
   1018 		}
   1019 
   1020 	window = BN_window_bits_for_exponent_size(bits);
   1021 	if (window > 1)
   1022 		{
   1023 		if (!BN_mod_mul(d,val[0],val[0],m,ctx))
   1024 			goto err;				/* 2 */
   1025 		j=1<<(window-1);
   1026 		for (i=1; i<j; i++)
   1027 			{
   1028 			if(((val[i] = BN_CTX_get(ctx)) == NULL) ||
   1029 					!BN_mod_mul(val[i],val[i-1],d,m,ctx))
   1030 				goto err;
   1031 			}
   1032 		}
   1033 
   1034 	start=1;	/* This is used to avoid multiplication etc
   1035 			 * when there is only the value '1' in the
   1036 			 * buffer. */
   1037 	wvalue=0;	/* The 'value' of the window */
   1038 	wstart=bits-1;	/* The top bit of the window */
   1039 	wend=0;		/* The bottom bit of the window */
   1040 
   1041 	if (!BN_one(r)) goto err;
   1042 
   1043 	for (;;)
   1044 		{
   1045 		if (BN_is_bit_set(p,wstart) == 0)
   1046 			{
   1047 			if (!start)
   1048 				if (!BN_mod_mul(r,r,r,m,ctx))
   1049 				goto err;
   1050 			if (wstart == 0) break;
   1051 			wstart--;
   1052 			continue;
   1053 			}
   1054 		/* We now have wstart on a 'set' bit, we now need to work out
   1055 		 * how bit a window to do.  To do this we need to scan
   1056 		 * forward until the last set bit before the end of the
   1057 		 * window */
   1058 		j=wstart;
   1059 		wvalue=1;
   1060 		wend=0;
   1061 		for (i=1; i<window; i++)
   1062 			{
   1063 			if (wstart-i < 0) break;
   1064 			if (BN_is_bit_set(p,wstart-i))
   1065 				{
   1066 				wvalue<<=(i-wend);
   1067 				wvalue|=1;
   1068 				wend=i;
   1069 				}
   1070 			}
   1071 
   1072 		/* wend is the size of the current window */
   1073 		j=wend+1;
   1074 		/* add the 'bytes above' */
   1075 		if (!start)
   1076 			for (i=0; i<j; i++)
   1077 				{
   1078 				if (!BN_mod_mul(r,r,r,m,ctx))
   1079 					goto err;
   1080 				}
   1081 
   1082 		/* wvalue will be an odd number < 2^window */
   1083 		if (!BN_mod_mul(r,r,val[wvalue>>1],m,ctx))
   1084 			goto err;
   1085 
   1086 		/* move the 'window' down further */
   1087 		wstart-=wend+1;
   1088 		wvalue=0;
   1089 		start=0;
   1090 		if (wstart < 0) break;
   1091 		}
   1092 	ret=1;
   1093 err:
   1094 	BN_CTX_end(ctx);
   1095 	bn_check_top(r);
   1096 	return(ret);
   1097 	}
   1098