Home | History | Annotate | Download | only in bn
      1 /* crypto/bn/bn_mul.c */
      2 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      3  * All rights reserved.
      4  *
      5  * This package is an SSL implementation written
      6  * by Eric Young (eay (at) cryptsoft.com).
      7  * The implementation was written so as to conform with Netscapes SSL.
      8  *
      9  * This library is free for commercial and non-commercial use as long as
     10  * the following conditions are aheared to.  The following conditions
     11  * apply to all code found in this distribution, be it the RC4, RSA,
     12  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     13  * included with this distribution is covered by the same copyright terms
     14  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     15  *
     16  * Copyright remains Eric Young's, and as such any Copyright notices in
     17  * the code are not to be removed.
     18  * If this package is used in a product, Eric Young should be given attribution
     19  * as the author of the parts of the library used.
     20  * This can be in the form of a textual message at program startup or
     21  * in documentation (online or textual) provided with the package.
     22  *
     23  * Redistribution and use in source and binary forms, with or without
     24  * modification, are permitted provided that the following conditions
     25  * are met:
     26  * 1. Redistributions of source code must retain the copyright
     27  *    notice, this list of conditions and the following disclaimer.
     28  * 2. Redistributions in binary form must reproduce the above copyright
     29  *    notice, this list of conditions and the following disclaimer in the
     30  *    documentation and/or other materials provided with the distribution.
     31  * 3. All advertising materials mentioning features or use of this software
     32  *    must display the following acknowledgement:
     33  *    "This product includes cryptographic software written by
     34  *     Eric Young (eay (at) cryptsoft.com)"
     35  *    The word 'cryptographic' can be left out if the rouines from the library
     36  *    being used are not cryptographic related :-).
     37  * 4. If you include any Windows specific code (or a derivative thereof) from
     38  *    the apps directory (application code) you must include an acknowledgement:
     39  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     40  *
     41  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     42  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     43  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     44  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     45  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     46  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     47  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     48  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     49  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     50  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     51  * SUCH DAMAGE.
     52  *
     53  * The licence and distribution terms for any publically available version or
     54  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     55  * copied and put under another distribution licence
     56  * [including the GNU Public Licence.]
     57  */
     58 
     59 #ifndef BN_DEBUG
     60 # undef NDEBUG /* avoid conflicting definitions */
     61 # define NDEBUG
     62 #endif
     63 
     64 #include <stdio.h>
     65 #include <assert.h>
     66 #include "cryptlib.h"
     67 #include "bn_lcl.h"
     68 
     69 #if defined(OPENSSL_NO_ASM) || !defined(OPENSSL_BN_ASM_PART_WORDS)
     70 /* Here follows specialised variants of bn_add_words() and
     71    bn_sub_words().  They have the property performing operations on
     72    arrays of different sizes.  The sizes of those arrays is expressed through
     73    cl, which is the common length ( basicall, min(len(a),len(b)) ), and dl,
     74    which is the delta between the two lengths, calculated as len(a)-len(b).
     75    All lengths are the number of BN_ULONGs...  For the operations that require
     76    a result array as parameter, it must have the length cl+abs(dl).
     77    These functions should probably end up in bn_asm.c as soon as there are
     78    assembler counterparts for the systems that use assembler files.  */
     79 
     80 BN_ULONG bn_sub_part_words(BN_ULONG *r,
     81 	const BN_ULONG *a, const BN_ULONG *b,
     82 	int cl, int dl)
     83 	{
     84 	BN_ULONG c, t;
     85 
     86 	assert(cl >= 0);
     87 	c = bn_sub_words(r, a, b, cl);
     88 
     89 	if (dl == 0)
     90 		return c;
     91 
     92 	r += cl;
     93 	a += cl;
     94 	b += cl;
     95 
     96 	if (dl < 0)
     97 		{
     98 #ifdef BN_COUNT
     99 		fprintf(stderr, "  bn_sub_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
    100 #endif
    101 		for (;;)
    102 			{
    103 			t = b[0];
    104 			r[0] = (0-t-c)&BN_MASK2;
    105 			if (t != 0) c=1;
    106 			if (++dl >= 0) break;
    107 
    108 			t = b[1];
    109 			r[1] = (0-t-c)&BN_MASK2;
    110 			if (t != 0) c=1;
    111 			if (++dl >= 0) break;
    112 
    113 			t = b[2];
    114 			r[2] = (0-t-c)&BN_MASK2;
    115 			if (t != 0) c=1;
    116 			if (++dl >= 0) break;
    117 
    118 			t = b[3];
    119 			r[3] = (0-t-c)&BN_MASK2;
    120 			if (t != 0) c=1;
    121 			if (++dl >= 0) break;
    122 
    123 			b += 4;
    124 			r += 4;
    125 			}
    126 		}
    127 	else
    128 		{
    129 		int save_dl = dl;
    130 #ifdef BN_COUNT
    131 		fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c = %d)\n", cl, dl, c);
    132 #endif
    133 		while(c)
    134 			{
    135 			t = a[0];
    136 			r[0] = (t-c)&BN_MASK2;
    137 			if (t != 0) c=0;
    138 			if (--dl <= 0) break;
    139 
    140 			t = a[1];
    141 			r[1] = (t-c)&BN_MASK2;
    142 			if (t != 0) c=0;
    143 			if (--dl <= 0) break;
    144 
    145 			t = a[2];
    146 			r[2] = (t-c)&BN_MASK2;
    147 			if (t != 0) c=0;
    148 			if (--dl <= 0) break;
    149 
    150 			t = a[3];
    151 			r[3] = (t-c)&BN_MASK2;
    152 			if (t != 0) c=0;
    153 			if (--dl <= 0) break;
    154 
    155 			save_dl = dl;
    156 			a += 4;
    157 			r += 4;
    158 			}
    159 		if (dl > 0)
    160 			{
    161 #ifdef BN_COUNT
    162 			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
    163 #endif
    164 			if (save_dl > dl)
    165 				{
    166 				switch (save_dl - dl)
    167 					{
    168 				case 1:
    169 					r[1] = a[1];
    170 					if (--dl <= 0) break;
    171 				case 2:
    172 					r[2] = a[2];
    173 					if (--dl <= 0) break;
    174 				case 3:
    175 					r[3] = a[3];
    176 					if (--dl <= 0) break;
    177 					}
    178 				a += 4;
    179 				r += 4;
    180 				}
    181 			}
    182 		if (dl > 0)
    183 			{
    184 #ifdef BN_COUNT
    185 			fprintf(stderr, "  bn_sub_part_words %d + %d (dl > 0, copy)\n", cl, dl);
    186 #endif
    187 			for(;;)
    188 				{
    189 				r[0] = a[0];
    190 				if (--dl <= 0) break;
    191 				r[1] = a[1];
    192 				if (--dl <= 0) break;
    193 				r[2] = a[2];
    194 				if (--dl <= 0) break;
    195 				r[3] = a[3];
    196 				if (--dl <= 0) break;
    197 
    198 				a += 4;
    199 				r += 4;
    200 				}
    201 			}
    202 		}
    203 	return c;
    204 	}
    205 #endif
    206 
    207 BN_ULONG bn_add_part_words(BN_ULONG *r,
    208 	const BN_ULONG *a, const BN_ULONG *b,
    209 	int cl, int dl)
    210 	{
    211 	BN_ULONG c, l, t;
    212 
    213 	assert(cl >= 0);
    214 	c = bn_add_words(r, a, b, cl);
    215 
    216 	if (dl == 0)
    217 		return c;
    218 
    219 	r += cl;
    220 	a += cl;
    221 	b += cl;
    222 
    223 	if (dl < 0)
    224 		{
    225 		int save_dl = dl;
    226 #ifdef BN_COUNT
    227 		fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c = %d)\n", cl, dl, c);
    228 #endif
    229 		while (c)
    230 			{
    231 			l=(c+b[0])&BN_MASK2;
    232 			c=(l < c);
    233 			r[0]=l;
    234 			if (++dl >= 0) break;
    235 
    236 			l=(c+b[1])&BN_MASK2;
    237 			c=(l < c);
    238 			r[1]=l;
    239 			if (++dl >= 0) break;
    240 
    241 			l=(c+b[2])&BN_MASK2;
    242 			c=(l < c);
    243 			r[2]=l;
    244 			if (++dl >= 0) break;
    245 
    246 			l=(c+b[3])&BN_MASK2;
    247 			c=(l < c);
    248 			r[3]=l;
    249 			if (++dl >= 0) break;
    250 
    251 			save_dl = dl;
    252 			b+=4;
    253 			r+=4;
    254 			}
    255 		if (dl < 0)
    256 			{
    257 #ifdef BN_COUNT
    258 			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, c == 0)\n", cl, dl);
    259 #endif
    260 			if (save_dl < dl)
    261 				{
    262 				switch (dl - save_dl)
    263 					{
    264 				case 1:
    265 					r[1] = b[1];
    266 					if (++dl >= 0) break;
    267 				case 2:
    268 					r[2] = b[2];
    269 					if (++dl >= 0) break;
    270 				case 3:
    271 					r[3] = b[3];
    272 					if (++dl >= 0) break;
    273 					}
    274 				b += 4;
    275 				r += 4;
    276 				}
    277 			}
    278 		if (dl < 0)
    279 			{
    280 #ifdef BN_COUNT
    281 			fprintf(stderr, "  bn_add_part_words %d + %d (dl < 0, copy)\n", cl, dl);
    282 #endif
    283 			for(;;)
    284 				{
    285 				r[0] = b[0];
    286 				if (++dl >= 0) break;
    287 				r[1] = b[1];
    288 				if (++dl >= 0) break;
    289 				r[2] = b[2];
    290 				if (++dl >= 0) break;
    291 				r[3] = b[3];
    292 				if (++dl >= 0) break;
    293 
    294 				b += 4;
    295 				r += 4;
    296 				}
    297 			}
    298 		}
    299 	else
    300 		{
    301 		int save_dl = dl;
    302 #ifdef BN_COUNT
    303 		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0)\n", cl, dl);
    304 #endif
    305 		while (c)
    306 			{
    307 			t=(a[0]+c)&BN_MASK2;
    308 			c=(t < c);
    309 			r[0]=t;
    310 			if (--dl <= 0) break;
    311 
    312 			t=(a[1]+c)&BN_MASK2;
    313 			c=(t < c);
    314 			r[1]=t;
    315 			if (--dl <= 0) break;
    316 
    317 			t=(a[2]+c)&BN_MASK2;
    318 			c=(t < c);
    319 			r[2]=t;
    320 			if (--dl <= 0) break;
    321 
    322 			t=(a[3]+c)&BN_MASK2;
    323 			c=(t < c);
    324 			r[3]=t;
    325 			if (--dl <= 0) break;
    326 
    327 			save_dl = dl;
    328 			a+=4;
    329 			r+=4;
    330 			}
    331 #ifdef BN_COUNT
    332 		fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, c == 0)\n", cl, dl);
    333 #endif
    334 		if (dl > 0)
    335 			{
    336 			if (save_dl > dl)
    337 				{
    338 				switch (save_dl - dl)
    339 					{
    340 				case 1:
    341 					r[1] = a[1];
    342 					if (--dl <= 0) break;
    343 				case 2:
    344 					r[2] = a[2];
    345 					if (--dl <= 0) break;
    346 				case 3:
    347 					r[3] = a[3];
    348 					if (--dl <= 0) break;
    349 					}
    350 				a += 4;
    351 				r += 4;
    352 				}
    353 			}
    354 		if (dl > 0)
    355 			{
    356 #ifdef BN_COUNT
    357 			fprintf(stderr, "  bn_add_part_words %d + %d (dl > 0, copy)\n", cl, dl);
    358 #endif
    359 			for(;;)
    360 				{
    361 				r[0] = a[0];
    362 				if (--dl <= 0) break;
    363 				r[1] = a[1];
    364 				if (--dl <= 0) break;
    365 				r[2] = a[2];
    366 				if (--dl <= 0) break;
    367 				r[3] = a[3];
    368 				if (--dl <= 0) break;
    369 
    370 				a += 4;
    371 				r += 4;
    372 				}
    373 			}
    374 		}
    375 	return c;
    376 	}
    377 
    378 #ifdef BN_RECURSION
    379 /* Karatsuba recursive multiplication algorithm
    380  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
    381 
    382 /* r is 2*n2 words in size,
    383  * a and b are both n2 words in size.
    384  * n2 must be a power of 2.
    385  * We multiply and return the result.
    386  * t must be 2*n2 words in size
    387  * We calculate
    388  * a[0]*b[0]
    389  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
    390  * a[1]*b[1]
    391  */
    392 /* dnX may not be positive, but n2/2+dnX has to be */
    393 void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
    394 	int dna, int dnb, BN_ULONG *t)
    395 	{
    396 	int n=n2/2,c1,c2;
    397 	int tna=n+dna, tnb=n+dnb;
    398 	unsigned int neg,zero;
    399 	BN_ULONG ln,lo,*p;
    400 
    401 # ifdef BN_COUNT
    402 	fprintf(stderr," bn_mul_recursive %d%+d * %d%+d\n",n2,dna,n2,dnb);
    403 # endif
    404 # ifdef BN_MUL_COMBA
    405 #  if 0
    406 	if (n2 == 4)
    407 		{
    408 		bn_mul_comba4(r,a,b);
    409 		return;
    410 		}
    411 #  endif
    412 	/* Only call bn_mul_comba 8 if n2 == 8 and the
    413 	 * two arrays are complete [steve]
    414 	 */
    415 	if (n2 == 8 && dna == 0 && dnb == 0)
    416 		{
    417 		bn_mul_comba8(r,a,b);
    418 		return;
    419 		}
    420 # endif /* BN_MUL_COMBA */
    421 	/* Else do normal multiply */
    422 	if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL)
    423 		{
    424 		bn_mul_normal(r,a,n2+dna,b,n2+dnb);
    425 		if ((dna + dnb) < 0)
    426 			memset(&r[2*n2 + dna + dnb], 0,
    427 				sizeof(BN_ULONG) * -(dna + dnb));
    428 		return;
    429 		}
    430 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
    431 	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
    432 	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
    433 	zero=neg=0;
    434 	switch (c1*3+c2)
    435 		{
    436 	case -4:
    437 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
    438 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
    439 		break;
    440 	case -3:
    441 		zero=1;
    442 		break;
    443 	case -2:
    444 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
    445 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
    446 		neg=1;
    447 		break;
    448 	case -1:
    449 	case 0:
    450 	case 1:
    451 		zero=1;
    452 		break;
    453 	case 2:
    454 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
    455 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
    456 		neg=1;
    457 		break;
    458 	case 3:
    459 		zero=1;
    460 		break;
    461 	case 4:
    462 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
    463 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
    464 		break;
    465 		}
    466 
    467 # ifdef BN_MUL_COMBA
    468 	if (n == 4 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba4 could take
    469 					       extra args to do this well */
    470 		{
    471 		if (!zero)
    472 			bn_mul_comba4(&(t[n2]),t,&(t[n]));
    473 		else
    474 			memset(&(t[n2]),0,8*sizeof(BN_ULONG));
    475 
    476 		bn_mul_comba4(r,a,b);
    477 		bn_mul_comba4(&(r[n2]),&(a[n]),&(b[n]));
    478 		}
    479 	else if (n == 8 && dna == 0 && dnb == 0) /* XXX: bn_mul_comba8 could
    480 						    take extra args to do this
    481 						    well */
    482 		{
    483 		if (!zero)
    484 			bn_mul_comba8(&(t[n2]),t,&(t[n]));
    485 		else
    486 			memset(&(t[n2]),0,16*sizeof(BN_ULONG));
    487 
    488 		bn_mul_comba8(r,a,b);
    489 		bn_mul_comba8(&(r[n2]),&(a[n]),&(b[n]));
    490 		}
    491 	else
    492 # endif /* BN_MUL_COMBA */
    493 		{
    494 		p= &(t[n2*2]);
    495 		if (!zero)
    496 			bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
    497 		else
    498 			memset(&(t[n2]),0,n2*sizeof(BN_ULONG));
    499 		bn_mul_recursive(r,a,b,n,0,0,p);
    500 		bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),n,dna,dnb,p);
    501 		}
    502 
    503 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
    504 	 * r[10] holds (a[0]*b[0])
    505 	 * r[32] holds (b[1]*b[1])
    506 	 */
    507 
    508 	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
    509 
    510 	if (neg) /* if t[32] is negative */
    511 		{
    512 		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
    513 		}
    514 	else
    515 		{
    516 		/* Might have a carry */
    517 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
    518 		}
    519 
    520 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
    521 	 * r[10] holds (a[0]*b[0])
    522 	 * r[32] holds (b[1]*b[1])
    523 	 * c1 holds the carry bits
    524 	 */
    525 	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
    526 	if (c1)
    527 		{
    528 		p= &(r[n+n2]);
    529 		lo= *p;
    530 		ln=(lo+c1)&BN_MASK2;
    531 		*p=ln;
    532 
    533 		/* The overflow will stop before we over write
    534 		 * words we should not overwrite */
    535 		if (ln < (BN_ULONG)c1)
    536 			{
    537 			do	{
    538 				p++;
    539 				lo= *p;
    540 				ln=(lo+1)&BN_MASK2;
    541 				*p=ln;
    542 				} while (ln == 0);
    543 			}
    544 		}
    545 	}
    546 
    547 /* n+tn is the word length
    548  * t needs to be n*4 is size, as does r */
    549 /* tnX may not be negative but less than n */
    550 void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
    551 	     int tna, int tnb, BN_ULONG *t)
    552 	{
    553 	int i,j,n2=n*2;
    554 	int c1,c2,neg,zero;
    555 	BN_ULONG ln,lo,*p;
    556 
    557 # ifdef BN_COUNT
    558 	fprintf(stderr," bn_mul_part_recursive (%d%+d) * (%d%+d)\n",
    559 		n, tna, n, tnb);
    560 # endif
    561 	if (n < 8)
    562 		{
    563 		bn_mul_normal(r,a,n+tna,b,n+tnb);
    564 		return;
    565 		}
    566 
    567 	/* r=(a[0]-a[1])*(b[1]-b[0]) */
    568 	c1=bn_cmp_part_words(a,&(a[n]),tna,n-tna);
    569 	c2=bn_cmp_part_words(&(b[n]),b,tnb,tnb-n);
    570 	zero=neg=0;
    571 	switch (c1*3+c2)
    572 		{
    573 	case -4:
    574 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
    575 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
    576 		break;
    577 	case -3:
    578 		zero=1;
    579 		/* break; */
    580 	case -2:
    581 		bn_sub_part_words(t,      &(a[n]),a,      tna,tna-n); /* - */
    582 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n); /* + */
    583 		neg=1;
    584 		break;
    585 	case -1:
    586 	case 0:
    587 	case 1:
    588 		zero=1;
    589 		/* break; */
    590 	case 2:
    591 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna); /* + */
    592 		bn_sub_part_words(&(t[n]),b,      &(b[n]),tnb,n-tnb); /* - */
    593 		neg=1;
    594 		break;
    595 	case 3:
    596 		zero=1;
    597 		/* break; */
    598 	case 4:
    599 		bn_sub_part_words(t,      a,      &(a[n]),tna,n-tna);
    600 		bn_sub_part_words(&(t[n]),&(b[n]),b,      tnb,tnb-n);
    601 		break;
    602 		}
    603 		/* The zero case isn't yet implemented here. The speedup
    604 		   would probably be negligible. */
    605 # if 0
    606 	if (n == 4)
    607 		{
    608 		bn_mul_comba4(&(t[n2]),t,&(t[n]));
    609 		bn_mul_comba4(r,a,b);
    610 		bn_mul_normal(&(r[n2]),&(a[n]),tn,&(b[n]),tn);
    611 		memset(&(r[n2+tn*2]),0,sizeof(BN_ULONG)*(n2-tn*2));
    612 		}
    613 	else
    614 # endif
    615 	if (n == 8)
    616 		{
    617 		bn_mul_comba8(&(t[n2]),t,&(t[n]));
    618 		bn_mul_comba8(r,a,b);
    619 		bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
    620 		memset(&(r[n2+tna+tnb]),0,sizeof(BN_ULONG)*(n2-tna-tnb));
    621 		}
    622 	else
    623 		{
    624 		p= &(t[n2*2]);
    625 		bn_mul_recursive(&(t[n2]),t,&(t[n]),n,0,0,p);
    626 		bn_mul_recursive(r,a,b,n,0,0,p);
    627 		i=n/2;
    628 		/* If there is only a bottom half to the number,
    629 		 * just do it */
    630 		if (tna > tnb)
    631 			j = tna - i;
    632 		else
    633 			j = tnb - i;
    634 		if (j == 0)
    635 			{
    636 			bn_mul_recursive(&(r[n2]),&(a[n]),&(b[n]),
    637 				i,tna-i,tnb-i,p);
    638 			memset(&(r[n2+i*2]),0,sizeof(BN_ULONG)*(n2-i*2));
    639 			}
    640 		else if (j > 0) /* eg, n == 16, i == 8 and tn == 11 */
    641 				{
    642 				bn_mul_part_recursive(&(r[n2]),&(a[n]),&(b[n]),
    643 					i,tna-i,tnb-i,p);
    644 				memset(&(r[n2+tna+tnb]),0,
    645 					sizeof(BN_ULONG)*(n2-tna-tnb));
    646 				}
    647 		else /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
    648 			{
    649 			memset(&(r[n2]),0,sizeof(BN_ULONG)*n2);
    650 			if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL
    651 				&& tnb < BN_MUL_RECURSIVE_SIZE_NORMAL)
    652 				{
    653 				bn_mul_normal(&(r[n2]),&(a[n]),tna,&(b[n]),tnb);
    654 				}
    655 			else
    656 				{
    657 				for (;;)
    658 					{
    659 					i/=2;
    660 					/* these simplified conditions work
    661 					 * exclusively because difference
    662 					 * between tna and tnb is 1 or 0 */
    663 					if (i < tna || i < tnb)
    664 						{
    665 						bn_mul_part_recursive(&(r[n2]),
    666 							&(a[n]),&(b[n]),
    667 							i,tna-i,tnb-i,p);
    668 						break;
    669 						}
    670 					else if (i == tna || i == tnb)
    671 						{
    672 						bn_mul_recursive(&(r[n2]),
    673 							&(a[n]),&(b[n]),
    674 							i,tna-i,tnb-i,p);
    675 						break;
    676 						}
    677 					}
    678 				}
    679 			}
    680 		}
    681 
    682 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
    683 	 * r[10] holds (a[0]*b[0])
    684 	 * r[32] holds (b[1]*b[1])
    685 	 */
    686 
    687 	c1=(int)(bn_add_words(t,r,&(r[n2]),n2));
    688 
    689 	if (neg) /* if t[32] is negative */
    690 		{
    691 		c1-=(int)(bn_sub_words(&(t[n2]),t,&(t[n2]),n2));
    692 		}
    693 	else
    694 		{
    695 		/* Might have a carry */
    696 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),t,n2));
    697 		}
    698 
    699 	/* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
    700 	 * r[10] holds (a[0]*b[0])
    701 	 * r[32] holds (b[1]*b[1])
    702 	 * c1 holds the carry bits
    703 	 */
    704 	c1+=(int)(bn_add_words(&(r[n]),&(r[n]),&(t[n2]),n2));
    705 	if (c1)
    706 		{
    707 		p= &(r[n+n2]);
    708 		lo= *p;
    709 		ln=(lo+c1)&BN_MASK2;
    710 		*p=ln;
    711 
    712 		/* The overflow will stop before we over write
    713 		 * words we should not overwrite */
    714 		if (ln < (BN_ULONG)c1)
    715 			{
    716 			do	{
    717 				p++;
    718 				lo= *p;
    719 				ln=(lo+1)&BN_MASK2;
    720 				*p=ln;
    721 				} while (ln == 0);
    722 			}
    723 		}
    724 	}
    725 
    726 /* a and b must be the same size, which is n2.
    727  * r needs to be n2 words and t needs to be n2*2
    728  */
    729 void bn_mul_low_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
    730 	     BN_ULONG *t)
    731 	{
    732 	int n=n2/2;
    733 
    734 # ifdef BN_COUNT
    735 	fprintf(stderr," bn_mul_low_recursive %d * %d\n",n2,n2);
    736 # endif
    737 
    738 	bn_mul_recursive(r,a,b,n,0,0,&(t[0]));
    739 	if (n >= BN_MUL_LOW_RECURSIVE_SIZE_NORMAL)
    740 		{
    741 		bn_mul_low_recursive(&(t[0]),&(a[0]),&(b[n]),n,&(t[n2]));
    742 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
    743 		bn_mul_low_recursive(&(t[0]),&(a[n]),&(b[0]),n,&(t[n2]));
    744 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
    745 		}
    746 	else
    747 		{
    748 		bn_mul_low_normal(&(t[0]),&(a[0]),&(b[n]),n);
    749 		bn_mul_low_normal(&(t[n]),&(a[n]),&(b[0]),n);
    750 		bn_add_words(&(r[n]),&(r[n]),&(t[0]),n);
    751 		bn_add_words(&(r[n]),&(r[n]),&(t[n]),n);
    752 		}
    753 	}
    754 
    755 /* a and b must be the same size, which is n2.
    756  * r needs to be n2 words and t needs to be n2*2
    757  * l is the low words of the output.
    758  * t needs to be n2*3
    759  */
    760 void bn_mul_high(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, BN_ULONG *l, int n2,
    761 	     BN_ULONG *t)
    762 	{
    763 	int i,n;
    764 	int c1,c2;
    765 	int neg,oneg,zero;
    766 	BN_ULONG ll,lc,*lp,*mp;
    767 
    768 # ifdef BN_COUNT
    769 	fprintf(stderr," bn_mul_high %d * %d\n",n2,n2);
    770 # endif
    771 	n=n2/2;
    772 
    773 	/* Calculate (al-ah)*(bh-bl) */
    774 	neg=zero=0;
    775 	c1=bn_cmp_words(&(a[0]),&(a[n]),n);
    776 	c2=bn_cmp_words(&(b[n]),&(b[0]),n);
    777 	switch (c1*3+c2)
    778 		{
    779 	case -4:
    780 		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
    781 		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
    782 		break;
    783 	case -3:
    784 		zero=1;
    785 		break;
    786 	case -2:
    787 		bn_sub_words(&(r[0]),&(a[n]),&(a[0]),n);
    788 		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
    789 		neg=1;
    790 		break;
    791 	case -1:
    792 	case 0:
    793 	case 1:
    794 		zero=1;
    795 		break;
    796 	case 2:
    797 		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
    798 		bn_sub_words(&(r[n]),&(b[0]),&(b[n]),n);
    799 		neg=1;
    800 		break;
    801 	case 3:
    802 		zero=1;
    803 		break;
    804 	case 4:
    805 		bn_sub_words(&(r[0]),&(a[0]),&(a[n]),n);
    806 		bn_sub_words(&(r[n]),&(b[n]),&(b[0]),n);
    807 		break;
    808 		}
    809 
    810 	oneg=neg;
    811 	/* t[10] = (a[0]-a[1])*(b[1]-b[0]) */
    812 	/* r[10] = (a[1]*b[1]) */
    813 # ifdef BN_MUL_COMBA
    814 	if (n == 8)
    815 		{
    816 		bn_mul_comba8(&(t[0]),&(r[0]),&(r[n]));
    817 		bn_mul_comba8(r,&(a[n]),&(b[n]));
    818 		}
    819 	else
    820 # endif
    821 		{
    822 		bn_mul_recursive(&(t[0]),&(r[0]),&(r[n]),n,0,0,&(t[n2]));
    823 		bn_mul_recursive(r,&(a[n]),&(b[n]),n,0,0,&(t[n2]));
    824 		}
    825 
    826 	/* s0 == low(al*bl)
    827 	 * s1 == low(ah*bh)+low((al-ah)*(bh-bl))+low(al*bl)+high(al*bl)
    828 	 * We know s0 and s1 so the only unknown is high(al*bl)
    829 	 * high(al*bl) == s1 - low(ah*bh+s0+(al-ah)*(bh-bl))
    830 	 * high(al*bl) == s1 - (r[0]+l[0]+t[0])
    831 	 */
    832 	if (l != NULL)
    833 		{
    834 		lp= &(t[n2+n]);
    835 		c1=(int)(bn_add_words(lp,&(r[0]),&(l[0]),n));
    836 		}
    837 	else
    838 		{
    839 		c1=0;
    840 		lp= &(r[0]);
    841 		}
    842 
    843 	if (neg)
    844 		neg=(int)(bn_sub_words(&(t[n2]),lp,&(t[0]),n));
    845 	else
    846 		{
    847 		bn_add_words(&(t[n2]),lp,&(t[0]),n);
    848 		neg=0;
    849 		}
    850 
    851 	if (l != NULL)
    852 		{
    853 		bn_sub_words(&(t[n2+n]),&(l[n]),&(t[n2]),n);
    854 		}
    855 	else
    856 		{
    857 		lp= &(t[n2+n]);
    858 		mp= &(t[n2]);
    859 		for (i=0; i<n; i++)
    860 			lp[i]=((~mp[i])+1)&BN_MASK2;
    861 		}
    862 
    863 	/* s[0] = low(al*bl)
    864 	 * t[3] = high(al*bl)
    865 	 * t[10] = (a[0]-a[1])*(b[1]-b[0]) neg is the sign
    866 	 * r[10] = (a[1]*b[1])
    867 	 */
    868 	/* R[10] = al*bl
    869 	 * R[21] = al*bl + ah*bh + (a[0]-a[1])*(b[1]-b[0])
    870 	 * R[32] = ah*bh
    871 	 */
    872 	/* R[1]=t[3]+l[0]+r[0](+-)t[0] (have carry/borrow)
    873 	 * R[2]=r[0]+t[3]+r[1](+-)t[1] (have carry/borrow)
    874 	 * R[3]=r[1]+(carry/borrow)
    875 	 */
    876 	if (l != NULL)
    877 		{
    878 		lp= &(t[n2]);
    879 		c1= (int)(bn_add_words(lp,&(t[n2+n]),&(l[0]),n));
    880 		}
    881 	else
    882 		{
    883 		lp= &(t[n2+n]);
    884 		c1=0;
    885 		}
    886 	c1+=(int)(bn_add_words(&(t[n2]),lp,  &(r[0]),n));
    887 	if (oneg)
    888 		c1-=(int)(bn_sub_words(&(t[n2]),&(t[n2]),&(t[0]),n));
    889 	else
    890 		c1+=(int)(bn_add_words(&(t[n2]),&(t[n2]),&(t[0]),n));
    891 
    892 	c2 =(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n2+n]),n));
    893 	c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(r[n]),n));
    894 	if (oneg)
    895 		c2-=(int)(bn_sub_words(&(r[0]),&(r[0]),&(t[n]),n));
    896 	else
    897 		c2+=(int)(bn_add_words(&(r[0]),&(r[0]),&(t[n]),n));
    898 
    899 	if (c1 != 0) /* Add starting at r[0], could be +ve or -ve */
    900 		{
    901 		i=0;
    902 		if (c1 > 0)
    903 			{
    904 			lc=c1;
    905 			do	{
    906 				ll=(r[i]+lc)&BN_MASK2;
    907 				r[i++]=ll;
    908 				lc=(lc > ll);
    909 				} while (lc);
    910 			}
    911 		else
    912 			{
    913 			lc= -c1;
    914 			do	{
    915 				ll=r[i];
    916 				r[i++]=(ll-lc)&BN_MASK2;
    917 				lc=(lc > ll);
    918 				} while (lc);
    919 			}
    920 		}
    921 	if (c2 != 0) /* Add starting at r[1] */
    922 		{
    923 		i=n;
    924 		if (c2 > 0)
    925 			{
    926 			lc=c2;
    927 			do	{
    928 				ll=(r[i]+lc)&BN_MASK2;
    929 				r[i++]=ll;
    930 				lc=(lc > ll);
    931 				} while (lc);
    932 			}
    933 		else
    934 			{
    935 			lc= -c2;
    936 			do	{
    937 				ll=r[i];
    938 				r[i++]=(ll-lc)&BN_MASK2;
    939 				lc=(lc > ll);
    940 				} while (lc);
    941 			}
    942 		}
    943 	}
    944 #endif /* BN_RECURSION */
    945 
    946 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx)
    947 	{
    948 	int ret=0;
    949 	int top,al,bl;
    950 	BIGNUM *rr;
    951 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
    952 	int i;
    953 #endif
    954 #ifdef BN_RECURSION
    955 	BIGNUM *t=NULL;
    956 	int j=0,k;
    957 #endif
    958 
    959 #ifdef BN_COUNT
    960 	fprintf(stderr,"BN_mul %d * %d\n",a->top,b->top);
    961 #endif
    962 
    963 	bn_check_top(a);
    964 	bn_check_top(b);
    965 	bn_check_top(r);
    966 
    967 	al=a->top;
    968 	bl=b->top;
    969 
    970 	if ((al == 0) || (bl == 0))
    971 		{
    972 		BN_zero(r);
    973 		return(1);
    974 		}
    975 	top=al+bl;
    976 
    977 	BN_CTX_start(ctx);
    978 	if ((r == a) || (r == b))
    979 		{
    980 		if ((rr = BN_CTX_get(ctx)) == NULL) goto err;
    981 		}
    982 	else
    983 		rr = r;
    984 	rr->neg=a->neg^b->neg;
    985 
    986 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
    987 	i = al-bl;
    988 #endif
    989 #ifdef BN_MUL_COMBA
    990 	if (i == 0)
    991 		{
    992 # if 0
    993 		if (al == 4)
    994 			{
    995 			if (bn_wexpand(rr,8) == NULL) goto err;
    996 			rr->top=8;
    997 			bn_mul_comba4(rr->d,a->d,b->d);
    998 			goto end;
    999 			}
   1000 # endif
   1001 		if (al == 8)
   1002 			{
   1003 			if (bn_wexpand(rr,16) == NULL) goto err;
   1004 			rr->top=16;
   1005 			bn_mul_comba8(rr->d,a->d,b->d);
   1006 			goto end;
   1007 			}
   1008 		}
   1009 #endif /* BN_MUL_COMBA */
   1010 #ifdef BN_RECURSION
   1011 	if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL))
   1012 		{
   1013 		if (i >= -1 && i <= 1)
   1014 			{
   1015 			int sav_j =0;
   1016 			/* Find out the power of two lower or equal
   1017 			   to the longest of the two numbers */
   1018 			if (i >= 0)
   1019 				{
   1020 				j = BN_num_bits_word((BN_ULONG)al);
   1021 				}
   1022 			if (i == -1)
   1023 				{
   1024 				j = BN_num_bits_word((BN_ULONG)bl);
   1025 				}
   1026 			sav_j = j;
   1027 			j = 1<<(j-1);
   1028 			assert(j <= al || j <= bl);
   1029 			k = j+j;
   1030 			t = BN_CTX_get(ctx);
   1031 			if (t == NULL)
   1032 				goto err;
   1033 			if (al > j || bl > j)
   1034 				{
   1035 				if (bn_wexpand(t,k*4) == NULL) goto err;
   1036 				if (bn_wexpand(rr,k*4) == NULL) goto err;
   1037 				bn_mul_part_recursive(rr->d,a->d,b->d,
   1038 					j,al-j,bl-j,t->d);
   1039 				}
   1040 			else	/* al <= j || bl <= j */
   1041 				{
   1042 				if (bn_wexpand(t,k*2) == NULL) goto err;
   1043 				if (bn_wexpand(rr,k*2) == NULL) goto err;
   1044 				bn_mul_recursive(rr->d,a->d,b->d,
   1045 					j,al-j,bl-j,t->d);
   1046 				}
   1047 			rr->top=top;
   1048 			goto end;
   1049 			}
   1050 #if 0
   1051 		if (i == 1 && !BN_get_flags(b,BN_FLG_STATIC_DATA))
   1052 			{
   1053 			BIGNUM *tmp_bn = (BIGNUM *)b;
   1054 			if (bn_wexpand(tmp_bn,al) == NULL) goto err;
   1055 			tmp_bn->d[bl]=0;
   1056 			bl++;
   1057 			i--;
   1058 			}
   1059 		else if (i == -1 && !BN_get_flags(a,BN_FLG_STATIC_DATA))
   1060 			{
   1061 			BIGNUM *tmp_bn = (BIGNUM *)a;
   1062 			if (bn_wexpand(tmp_bn,bl) == NULL) goto err;
   1063 			tmp_bn->d[al]=0;
   1064 			al++;
   1065 			i++;
   1066 			}
   1067 		if (i == 0)
   1068 			{
   1069 			/* symmetric and > 4 */
   1070 			/* 16 or larger */
   1071 			j=BN_num_bits_word((BN_ULONG)al);
   1072 			j=1<<(j-1);
   1073 			k=j+j;
   1074 			t = BN_CTX_get(ctx);
   1075 			if (al == j) /* exact multiple */
   1076 				{
   1077 				if (bn_wexpand(t,k*2) == NULL) goto err;
   1078 				if (bn_wexpand(rr,k*2) == NULL) goto err;
   1079 				bn_mul_recursive(rr->d,a->d,b->d,al,t->d);
   1080 				}
   1081 			else
   1082 				{
   1083 				if (bn_wexpand(t,k*4) == NULL) goto err;
   1084 				if (bn_wexpand(rr,k*4) == NULL) goto err;
   1085 				bn_mul_part_recursive(rr->d,a->d,b->d,al-j,j,t->d);
   1086 				}
   1087 			rr->top=top;
   1088 			goto end;
   1089 			}
   1090 #endif
   1091 		}
   1092 #endif /* BN_RECURSION */
   1093 	if (bn_wexpand(rr,top) == NULL) goto err;
   1094 	rr->top=top;
   1095 	bn_mul_normal(rr->d,a->d,al,b->d,bl);
   1096 
   1097 #if defined(BN_MUL_COMBA) || defined(BN_RECURSION)
   1098 end:
   1099 #endif
   1100 	bn_correct_top(rr);
   1101 	if (r != rr) BN_copy(r,rr);
   1102 	ret=1;
   1103 err:
   1104 	bn_check_top(r);
   1105 	BN_CTX_end(ctx);
   1106 	return(ret);
   1107 	}
   1108 
   1109 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb)
   1110 	{
   1111 	BN_ULONG *rr;
   1112 
   1113 #ifdef BN_COUNT
   1114 	fprintf(stderr," bn_mul_normal %d * %d\n",na,nb);
   1115 #endif
   1116 
   1117 	if (na < nb)
   1118 		{
   1119 		int itmp;
   1120 		BN_ULONG *ltmp;
   1121 
   1122 		itmp=na; na=nb; nb=itmp;
   1123 		ltmp=a;   a=b;   b=ltmp;
   1124 
   1125 		}
   1126 	rr= &(r[na]);
   1127 	if (nb <= 0)
   1128 		{
   1129 		(void)bn_mul_words(r,a,na,0);
   1130 		return;
   1131 		}
   1132 	else
   1133 		rr[0]=bn_mul_words(r,a,na,b[0]);
   1134 
   1135 	for (;;)
   1136 		{
   1137 		if (--nb <= 0) return;
   1138 		rr[1]=bn_mul_add_words(&(r[1]),a,na,b[1]);
   1139 		if (--nb <= 0) return;
   1140 		rr[2]=bn_mul_add_words(&(r[2]),a,na,b[2]);
   1141 		if (--nb <= 0) return;
   1142 		rr[3]=bn_mul_add_words(&(r[3]),a,na,b[3]);
   1143 		if (--nb <= 0) return;
   1144 		rr[4]=bn_mul_add_words(&(r[4]),a,na,b[4]);
   1145 		rr+=4;
   1146 		r+=4;
   1147 		b+=4;
   1148 		}
   1149 	}
   1150 
   1151 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n)
   1152 	{
   1153 #ifdef BN_COUNT
   1154 	fprintf(stderr," bn_mul_low_normal %d * %d\n",n,n);
   1155 #endif
   1156 	bn_mul_words(r,a,n,b[0]);
   1157 
   1158 	for (;;)
   1159 		{
   1160 		if (--n <= 0) return;
   1161 		bn_mul_add_words(&(r[1]),a,n,b[1]);
   1162 		if (--n <= 0) return;
   1163 		bn_mul_add_words(&(r[2]),a,n,b[2]);
   1164 		if (--n <= 0) return;
   1165 		bn_mul_add_words(&(r[3]),a,n,b[3]);
   1166 		if (--n <= 0) return;
   1167 		bn_mul_add_words(&(r[4]),a,n,b[4]);
   1168 		r+=4;
   1169 		b+=4;
   1170 		}
   1171 	}
   1172