1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com) 2 * All rights reserved. 3 * 4 * This package is an SSL implementation written 5 * by Eric Young (eay (at) cryptsoft.com). 6 * The implementation was written so as to conform with Netscapes SSL. 7 * 8 * This library is free for commercial and non-commercial use as long as 9 * the following conditions are aheared to. The following conditions 10 * apply to all code found in this distribution, be it the RC4, RSA, 11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 12 * included with this distribution is covered by the same copyright terms 13 * except that the holder is Tim Hudson (tjh (at) cryptsoft.com). 14 * 15 * Copyright remains Eric Young's, and as such any Copyright notices in 16 * the code are not to be removed. 17 * If this package is used in a product, Eric Young should be given attribution 18 * as the author of the parts of the library used. 19 * This can be in the form of a textual message at program startup or 20 * in documentation (online or textual) provided with the package. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * "This product includes cryptographic software written by 33 * Eric Young (eay (at) cryptsoft.com)" 34 * The word 'cryptographic' can be left out if the rouines from the library 35 * being used are not cryptographic related :-). 36 * 4. If you include any Windows specific code (or a derivative thereof) from 37 * the apps directory (application code) you must include an acknowledgement: 38 * "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)" 39 * 40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * The licence and distribution terms for any publically available version or 53 * derivative of this code cannot be changed. i.e. this code cannot simply be 54 * copied and put under another distribution licence 55 * [including the GNU Public Licence.] */ 56 57 #include <openssl/bn.h> 58 59 #include <assert.h> 60 61 #include "internal.h" 62 63 64 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) { 65 BN_ULONG *rr; 66 67 if (na < nb) { 68 int itmp; 69 BN_ULONG *ltmp; 70 71 itmp = na; 72 na = nb; 73 nb = itmp; 74 ltmp = a; 75 a = b; 76 b = ltmp; 77 } 78 rr = &(r[na]); 79 if (nb <= 0) { 80 (void)bn_mul_words(r, a, na, 0); 81 return; 82 } else { 83 rr[0] = bn_mul_words(r, a, na, b[0]); 84 } 85 86 for (;;) { 87 if (--nb <= 0) { 88 return; 89 } 90 rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]); 91 if (--nb <= 0) { 92 return; 93 } 94 rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]); 95 if (--nb <= 0) { 96 return; 97 } 98 rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]); 99 if (--nb <= 0) { 100 return; 101 } 102 rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]); 103 rr += 4; 104 r += 4; 105 b += 4; 106 } 107 } 108 109 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) { 110 bn_mul_words(r, a, n, b[0]); 111 112 for (;;) { 113 if (--n <= 0) { 114 return; 115 } 116 bn_mul_add_words(&(r[1]), a, n, b[1]); 117 if (--n <= 0) { 118 return; 119 } 120 bn_mul_add_words(&(r[2]), a, n, b[2]); 121 if (--n <= 0) { 122 return; 123 } 124 bn_mul_add_words(&(r[3]), a, n, b[3]); 125 if (--n <= 0) { 126 return; 127 } 128 bn_mul_add_words(&(r[4]), a, n, b[4]); 129 r += 4; 130 b += 4; 131 } 132 } 133 134 #if !defined(OPENSSL_X86) || defined(OPENSSL_NO_ASM) 135 /* Here follows specialised variants of bn_add_words() and bn_sub_words(). They 136 * have the property performing operations on arrays of different sizes. The 137 * sizes of those arrays is expressed through cl, which is the common length ( 138 * basicall, min(len(a),len(b)) ), and dl, which is the delta between the two 139 * lengths, calculated as len(a)-len(b). All lengths are the number of 140 * BN_ULONGs... For the operations that require a result array as parameter, 141 * it must have the length cl+abs(dl). These functions should probably end up 142 * in bn_asm.c as soon as there are assembler counterparts for the systems that 143 * use assembler files. */ 144 145 static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, 146 const BN_ULONG *b, int cl, int dl) { 147 BN_ULONG c, t; 148 149 assert(cl >= 0); 150 c = bn_sub_words(r, a, b, cl); 151 152 if (dl == 0) 153 return c; 154 155 r += cl; 156 a += cl; 157 b += cl; 158 159 if (dl < 0) { 160 for (;;) { 161 t = b[0]; 162 r[0] = (0 - t - c) & BN_MASK2; 163 if (t != 0) { 164 c = 1; 165 } 166 if (++dl >= 0) { 167 break; 168 } 169 170 t = b[1]; 171 r[1] = (0 - t - c) & BN_MASK2; 172 if (t != 0) { 173 c = 1; 174 } 175 if (++dl >= 0) { 176 break; 177 } 178 179 t = b[2]; 180 r[2] = (0 - t - c) & BN_MASK2; 181 if (t != 0) { 182 c = 1; 183 } 184 if (++dl >= 0) { 185 break; 186 } 187 188 t = b[3]; 189 r[3] = (0 - t - c) & BN_MASK2; 190 if (t != 0) { 191 c = 1; 192 } 193 if (++dl >= 0) { 194 break; 195 } 196 197 b += 4; 198 r += 4; 199 } 200 } else { 201 int save_dl = dl; 202 while (c) { 203 t = a[0]; 204 r[0] = (t - c) & BN_MASK2; 205 if (t != 0) { 206 c = 0; 207 } 208 if (--dl <= 0) { 209 break; 210 } 211 212 t = a[1]; 213 r[1] = (t - c) & BN_MASK2; 214 if (t != 0) { 215 c = 0; 216 } 217 if (--dl <= 0) { 218 break; 219 } 220 221 t = a[2]; 222 r[2] = (t - c) & BN_MASK2; 223 if (t != 0) { 224 c = 0; 225 } 226 if (--dl <= 0) { 227 break; 228 } 229 230 t = a[3]; 231 r[3] = (t - c) & BN_MASK2; 232 if (t != 0) { 233 c = 0; 234 } 235 if (--dl <= 0) { 236 break; 237 } 238 239 save_dl = dl; 240 a += 4; 241 r += 4; 242 } 243 if (dl > 0) { 244 if (save_dl > dl) { 245 switch (save_dl - dl) { 246 case 1: 247 r[1] = a[1]; 248 if (--dl <= 0) { 249 break; 250 } 251 case 2: 252 r[2] = a[2]; 253 if (--dl <= 0) { 254 break; 255 } 256 case 3: 257 r[3] = a[3]; 258 if (--dl <= 0) { 259 break; 260 } 261 } 262 a += 4; 263 r += 4; 264 } 265 } 266 267 if (dl > 0) { 268 for (;;) { 269 r[0] = a[0]; 270 if (--dl <= 0) { 271 break; 272 } 273 r[1] = a[1]; 274 if (--dl <= 0) { 275 break; 276 } 277 r[2] = a[2]; 278 if (--dl <= 0) { 279 break; 280 } 281 r[3] = a[3]; 282 if (--dl <= 0) { 283 break; 284 } 285 286 a += 4; 287 r += 4; 288 } 289 } 290 } 291 292 return c; 293 } 294 #else 295 /* On other platforms the function is defined in asm. */ 296 BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 297 int cl, int dl); 298 #endif 299 300 /* Karatsuba recursive multiplication algorithm 301 * (cf. Knuth, The Art of Computer Programming, Vol. 2) */ 302 303 /* r is 2*n2 words in size, 304 * a and b are both n2 words in size. 305 * n2 must be a power of 2. 306 * We multiply and return the result. 307 * t must be 2*n2 words in size 308 * We calculate 309 * a[0]*b[0] 310 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) 311 * a[1]*b[1] 312 */ 313 /* dnX may not be positive, but n2/2+dnX has to be */ 314 static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2, 315 int dna, int dnb, BN_ULONG *t) { 316 int n = n2 / 2, c1, c2; 317 int tna = n + dna, tnb = n + dnb; 318 unsigned int neg, zero; 319 BN_ULONG ln, lo, *p; 320 321 /* Only call bn_mul_comba 8 if n2 == 8 and the 322 * two arrays are complete [steve] 323 */ 324 if (n2 == 8 && dna == 0 && dnb == 0) { 325 bn_mul_comba8(r, a, b); 326 return; 327 } 328 329 /* Else do normal multiply */ 330 if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) { 331 bn_mul_normal(r, a, n2 + dna, b, n2 + dnb); 332 if ((dna + dnb) < 0) 333 memset(&r[2 * n2 + dna + dnb], 0, sizeof(BN_ULONG) * -(dna + dnb)); 334 return; 335 } 336 337 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 338 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 339 c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); 340 zero = neg = 0; 341 switch (c1 * 3 + c2) { 342 case -4: 343 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 344 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 345 break; 346 case -3: 347 zero = 1; 348 break; 349 case -2: 350 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 351 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 352 neg = 1; 353 break; 354 case -1: 355 case 0: 356 case 1: 357 zero = 1; 358 break; 359 case 2: 360 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 361 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 362 neg = 1; 363 break; 364 case 3: 365 zero = 1; 366 break; 367 case 4: 368 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 369 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 370 break; 371 } 372 373 if (n == 4 && dna == 0 && dnb == 0) { 374 /* XXX: bn_mul_comba4 could take extra args to do this well */ 375 if (!zero) { 376 bn_mul_comba4(&(t[n2]), t, &(t[n])); 377 } else { 378 memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG)); 379 } 380 381 bn_mul_comba4(r, a, b); 382 bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n])); 383 } else if (n == 8 && dna == 0 && dnb == 0) { 384 /* XXX: bn_mul_comba8 could take extra args to do this well */ 385 if (!zero) { 386 bn_mul_comba8(&(t[n2]), t, &(t[n])); 387 } else { 388 memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG)); 389 } 390 391 bn_mul_comba8(r, a, b); 392 bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n])); 393 } else { 394 p = &(t[n2 * 2]); 395 if (!zero) { 396 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 397 } else { 398 memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); 399 } 400 bn_mul_recursive(r, a, b, n, 0, 0, p); 401 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p); 402 } 403 404 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 405 * r[10] holds (a[0]*b[0]) 406 * r[32] holds (b[1]*b[1]) */ 407 408 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); 409 410 if (neg) { 411 /* if t[32] is negative */ 412 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); 413 } else { 414 /* Might have a carry */ 415 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 416 } 417 418 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 419 * r[10] holds (a[0]*b[0]) 420 * r[32] holds (b[1]*b[1]) 421 * c1 holds the carry bits */ 422 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 423 if (c1) { 424 p = &(r[n + n2]); 425 lo = *p; 426 ln = (lo + c1) & BN_MASK2; 427 *p = ln; 428 429 /* The overflow will stop before we over write 430 * words we should not overwrite */ 431 if (ln < (BN_ULONG)c1) { 432 do { 433 p++; 434 lo = *p; 435 ln = (lo + 1) & BN_MASK2; 436 *p = ln; 437 } while (ln == 0); 438 } 439 } 440 } 441 442 /* n+tn is the word length 443 * t needs to be n*4 is size, as does r */ 444 /* tnX may not be negative but less than n */ 445 static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n, 446 int tna, int tnb, BN_ULONG *t) { 447 int i, j, n2 = n * 2; 448 int c1, c2, neg; 449 BN_ULONG ln, lo, *p; 450 451 if (n < 8) { 452 bn_mul_normal(r, a, n + tna, b, n + tnb); 453 return; 454 } 455 456 /* r=(a[0]-a[1])*(b[1]-b[0]) */ 457 c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna); 458 c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n); 459 neg = 0; 460 switch (c1 * 3 + c2) { 461 case -4: 462 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 463 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 464 break; 465 case -3: 466 /* break; */ 467 case -2: 468 bn_sub_part_words(t, &(a[n]), a, tna, tna - n); /* - */ 469 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */ 470 neg = 1; 471 break; 472 case -1: 473 case 0: 474 case 1: 475 /* break; */ 476 case 2: 477 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); /* + */ 478 bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */ 479 neg = 1; 480 break; 481 case 3: 482 /* break; */ 483 case 4: 484 bn_sub_part_words(t, a, &(a[n]), tna, n - tna); 485 bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); 486 break; 487 } 488 489 if (n == 8) { 490 bn_mul_comba8(&(t[n2]), t, &(t[n])); 491 bn_mul_comba8(r, a, b); 492 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); 493 memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb)); 494 } else { 495 p = &(t[n2 * 2]); 496 bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p); 497 bn_mul_recursive(r, a, b, n, 0, 0, p); 498 i = n / 2; 499 /* If there is only a bottom half to the number, 500 * just do it */ 501 if (tna > tnb) { 502 j = tna - i; 503 } else { 504 j = tnb - i; 505 } 506 507 if (j == 0) { 508 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p); 509 memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2)); 510 } else if (j > 0) { 511 /* eg, n == 16, i == 8 and tn == 11 */ 512 bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p); 513 memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb)); 514 } else { 515 /* (j < 0) eg, n == 16, i == 8 and tn == 5 */ 516 memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2); 517 if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL && 518 tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) { 519 bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb); 520 } else { 521 for (;;) { 522 i /= 2; 523 /* these simplified conditions work 524 * exclusively because difference 525 * between tna and tnb is 1 or 0 */ 526 if (i < tna || i < tnb) { 527 bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, 528 tnb - i, p); 529 break; 530 } else if (i == tna || i == tnb) { 531 bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, 532 p); 533 break; 534 } 535 } 536 } 537 } 538 } 539 540 /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign 541 * r[10] holds (a[0]*b[0]) 542 * r[32] holds (b[1]*b[1]) 543 */ 544 545 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); 546 547 if (neg) { 548 /* if t[32] is negative */ 549 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); 550 } else { 551 /* Might have a carry */ 552 c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2)); 553 } 554 555 /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1]) 556 * r[10] holds (a[0]*b[0]) 557 * r[32] holds (b[1]*b[1]) 558 * c1 holds the carry bits */ 559 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 560 if (c1) { 561 p = &(r[n + n2]); 562 lo = *p; 563 ln = (lo + c1) & BN_MASK2; 564 *p = ln; 565 566 /* The overflow will stop before we over write 567 * words we should not overwrite */ 568 if (ln < (BN_ULONG)c1) { 569 do { 570 p++; 571 lo = *p; 572 ln = (lo + 1) & BN_MASK2; 573 *p = ln; 574 } while (ln == 0); 575 } 576 } 577 } 578 579 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) { 580 int ret = 0; 581 int top, al, bl; 582 BIGNUM *rr; 583 int i; 584 BIGNUM *t = NULL; 585 int j = 0, k; 586 587 al = a->top; 588 bl = b->top; 589 590 if ((al == 0) || (bl == 0)) { 591 BN_zero(r); 592 return 1; 593 } 594 top = al + bl; 595 596 BN_CTX_start(ctx); 597 if ((r == a) || (r == b)) { 598 if ((rr = BN_CTX_get(ctx)) == NULL) { 599 goto err; 600 } 601 } else { 602 rr = r; 603 } 604 rr->neg = a->neg ^ b->neg; 605 606 i = al - bl; 607 if (i == 0) { 608 if (al == 8) { 609 if (bn_wexpand(rr, 16) == NULL) { 610 goto err; 611 } 612 rr->top = 16; 613 bn_mul_comba8(rr->d, a->d, b->d); 614 goto end; 615 } 616 } 617 618 if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) { 619 if (i >= -1 && i <= 1) { 620 /* Find out the power of two lower or equal 621 to the longest of the two numbers */ 622 if (i >= 0) { 623 j = BN_num_bits_word((BN_ULONG)al); 624 } 625 if (i == -1) { 626 j = BN_num_bits_word((BN_ULONG)bl); 627 } 628 j = 1 << (j - 1); 629 assert(j <= al || j <= bl); 630 k = j + j; 631 t = BN_CTX_get(ctx); 632 if (t == NULL) { 633 goto err; 634 } 635 if (al > j || bl > j) { 636 if (bn_wexpand(t, k * 4) == NULL) { 637 goto err; 638 } 639 if (bn_wexpand(rr, k * 4) == NULL) { 640 goto err; 641 } 642 bn_mul_part_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d); 643 } else { 644 /* al <= j || bl <= j */ 645 if (bn_wexpand(t, k * 2) == NULL) { 646 goto err; 647 } 648 if (bn_wexpand(rr, k * 2) == NULL) { 649 goto err; 650 } 651 bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d); 652 } 653 rr->top = top; 654 goto end; 655 } 656 } 657 658 if (bn_wexpand(rr, top) == NULL) { 659 goto err; 660 } 661 rr->top = top; 662 bn_mul_normal(rr->d, a->d, al, b->d, bl); 663 664 end: 665 bn_correct_top(rr); 666 if (r != rr) { 667 BN_copy(r, rr); 668 } 669 ret = 1; 670 671 err: 672 BN_CTX_end(ctx); 673 return ret; 674 } 675 676 /* tmp must have 2*n words */ 677 static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) { 678 int i, j, max; 679 const BN_ULONG *ap; 680 BN_ULONG *rp; 681 682 max = n * 2; 683 ap = a; 684 rp = r; 685 rp[0] = rp[max - 1] = 0; 686 rp++; 687 j = n; 688 689 if (--j > 0) { 690 ap++; 691 rp[j] = bn_mul_words(rp, ap, j, ap[-1]); 692 rp += 2; 693 } 694 695 for (i = n - 2; i > 0; i--) { 696 j--; 697 ap++; 698 rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]); 699 rp += 2; 700 } 701 702 bn_add_words(r, r, r, max); 703 704 /* There will not be a carry */ 705 706 bn_sqr_words(tmp, a, n); 707 708 bn_add_words(r, r, tmp, max); 709 } 710 711 /* r is 2*n words in size, 712 * a and b are both n words in size. (There's not actually a 'b' here ...) 713 * n must be a power of 2. 714 * We multiply and return the result. 715 * t must be 2*n words in size 716 * We calculate 717 * a[0]*b[0] 718 * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0]) 719 * a[1]*b[1] 720 */ 721 static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t) { 722 int n = n2 / 2; 723 int zero, c1; 724 BN_ULONG ln, lo, *p; 725 726 if (n2 == 4) { 727 bn_sqr_comba4(r, a); 728 return; 729 } else if (n2 == 8) { 730 bn_sqr_comba8(r, a); 731 return; 732 } 733 if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) { 734 bn_sqr_normal(r, a, n2, t); 735 return; 736 } 737 /* r=(a[0]-a[1])*(a[1]-a[0]) */ 738 c1 = bn_cmp_words(a, &(a[n]), n); 739 zero = 0; 740 if (c1 > 0) { 741 bn_sub_words(t, a, &(a[n]), n); 742 } else if (c1 < 0) { 743 bn_sub_words(t, &(a[n]), a, n); 744 } else { 745 zero = 1; 746 } 747 748 /* The result will always be negative unless it is zero */ 749 p = &(t[n2 * 2]); 750 751 if (!zero) { 752 bn_sqr_recursive(&(t[n2]), t, n, p); 753 } else { 754 memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG)); 755 } 756 bn_sqr_recursive(r, a, n, p); 757 bn_sqr_recursive(&(r[n2]), &(a[n]), n, p); 758 759 /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero 760 * r[10] holds (a[0]*b[0]) 761 * r[32] holds (b[1]*b[1]) */ 762 763 c1 = (int)(bn_add_words(t, r, &(r[n2]), n2)); 764 765 /* t[32] is negative */ 766 c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2)); 767 768 /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1]) 769 * r[10] holds (a[0]*a[0]) 770 * r[32] holds (a[1]*a[1]) 771 * c1 holds the carry bits */ 772 c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2)); 773 if (c1) { 774 p = &(r[n + n2]); 775 lo = *p; 776 ln = (lo + c1) & BN_MASK2; 777 *p = ln; 778 779 /* The overflow will stop before we over write 780 * words we should not overwrite */ 781 if (ln < (BN_ULONG)c1) { 782 do { 783 p++; 784 lo = *p; 785 ln = (lo + 1) & BN_MASK2; 786 *p = ln; 787 } while (ln == 0); 788 } 789 } 790 } 791 792 int BN_mul_word(BIGNUM *bn, BN_ULONG w) { 793 BN_ULONG ll; 794 795 w &= BN_MASK2; 796 if (!bn->top) { 797 return 1; 798 } 799 800 if (w == 0) { 801 BN_zero(bn); 802 return 1; 803 } 804 805 ll = bn_mul_words(bn->d, bn->d, bn->top, w); 806 if (ll) { 807 if (bn_wexpand(bn, bn->top + 1) == NULL) { 808 return 0; 809 } 810 bn->d[bn->top++] = ll; 811 } 812 813 return 1; 814 } 815 816 int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) { 817 int max, al; 818 int ret = 0; 819 BIGNUM *tmp, *rr; 820 821 al = a->top; 822 if (al <= 0) { 823 r->top = 0; 824 r->neg = 0; 825 return 1; 826 } 827 828 BN_CTX_start(ctx); 829 rr = (a != r) ? r : BN_CTX_get(ctx); 830 tmp = BN_CTX_get(ctx); 831 if (!rr || !tmp) { 832 goto err; 833 } 834 835 max = 2 * al; /* Non-zero (from above) */ 836 if (bn_wexpand(rr, max) == NULL) { 837 goto err; 838 } 839 840 if (al == 4) { 841 bn_sqr_comba4(rr->d, a->d); 842 } else if (al == 8) { 843 bn_sqr_comba8(rr->d, a->d); 844 } else { 845 if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) { 846 BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2]; 847 bn_sqr_normal(rr->d, a->d, al, t); 848 } else { 849 int j, k; 850 851 j = BN_num_bits_word((BN_ULONG)al); 852 j = 1 << (j - 1); 853 k = j + j; 854 if (al == j) { 855 if (bn_wexpand(tmp, k * 2) == NULL) { 856 goto err; 857 } 858 bn_sqr_recursive(rr->d, a->d, al, tmp->d); 859 } else { 860 if (bn_wexpand(tmp, max) == NULL) { 861 goto err; 862 } 863 bn_sqr_normal(rr->d, a->d, al, tmp->d); 864 } 865 } 866 } 867 868 rr->neg = 0; 869 /* If the most-significant half of the top word of 'a' is zero, then 870 * the square of 'a' will max-1 words. */ 871 if (a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) { 872 rr->top = max - 1; 873 } else { 874 rr->top = max; 875 } 876 877 if (rr != r) { 878 BN_copy(r, rr); 879 } 880 ret = 1; 881 882 err: 883 BN_CTX_end(ctx); 884 return ret; 885 } 886