1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com) 2 * All rights reserved. 3 * 4 * This package is an SSL implementation written 5 * by Eric Young (eay (at) cryptsoft.com). 6 * The implementation was written so as to conform with Netscapes SSL. 7 * 8 * This library is free for commercial and non-commercial use as long as 9 * the following conditions are aheared to. The following conditions 10 * apply to all code found in this distribution, be it the RC4, RSA, 11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 12 * included with this distribution is covered by the same copyright terms 13 * except that the holder is Tim Hudson (tjh (at) cryptsoft.com). 14 * 15 * Copyright remains Eric Young's, and as such any Copyright notices in 16 * the code are not to be removed. 17 * If this package is used in a product, Eric Young should be given attribution 18 * as the author of the parts of the library used. 19 * This can be in the form of a textual message at program startup or 20 * in documentation (online or textual) provided with the package. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * "This product includes cryptographic software written by 33 * Eric Young (eay (at) cryptsoft.com)" 34 * The word 'cryptographic' can be left out if the rouines from the library 35 * being used are not cryptographic related :-). 36 * 4. If you include any Windows specific code (or a derivative thereof) from 37 * the apps directory (application code) you must include an acknowledgement: 38 * "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)" 39 * 40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * The licence and distribution terms for any publically available version or 53 * derivative of this code cannot be changed. i.e. this code cannot simply be 54 * copied and put under another distribution licence 55 * [including the GNU Public Licence.] */ 56 57 #include <openssl/bn.h> 58 59 #include <assert.h> 60 61 #include "internal.h" 62 63 64 /* This file has two other implementations: x86 assembly language in 65 * asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c. */ 66 #if defined(OPENSSL_NO_ASM) || \ 67 !(defined(OPENSSL_X86) || (defined(OPENSSL_X86_64) && defined(__GNUC__))) 68 69 #ifdef BN_ULLONG 70 #define mul_add(r, a, w, c) \ 71 do { \ 72 BN_ULLONG t; \ 73 t = (BN_ULLONG)(w) * (a) + (r) + (c); \ 74 (r) = Lw(t); \ 75 (c) = Hw(t); \ 76 } while (0) 77 78 #define mul(r, a, w, c) \ 79 do { \ 80 BN_ULLONG t; \ 81 t = (BN_ULLONG)(w) * (a) + (c); \ 82 (r) = Lw(t); \ 83 (c) = Hw(t); \ 84 } while (0) 85 86 #define sqr(r0, r1, a) \ 87 do { \ 88 BN_ULLONG t; \ 89 t = (BN_ULLONG)(a) * (a); \ 90 (r0) = Lw(t); \ 91 (r1) = Hw(t); \ 92 } while (0) 93 94 #else 95 96 #define mul_add(r, a, w, c) \ 97 do { \ 98 BN_ULONG high, low, ret, tmp = (a); \ 99 ret = (r); \ 100 BN_UMULT_LOHI(low, high, w, tmp); \ 101 ret += (c); \ 102 (c) = (ret < (c)) ? 1 : 0; \ 103 (c) += high; \ 104 ret += low; \ 105 (c) += (ret < low) ? 1 : 0; \ 106 (r) = ret; \ 107 } while (0) 108 109 #define mul(r, a, w, c) \ 110 do { \ 111 BN_ULONG high, low, ret, ta = (a); \ 112 BN_UMULT_LOHI(low, high, w, ta); \ 113 ret = low + (c); \ 114 (c) = high; \ 115 (c) += (ret < low) ? 1 : 0; \ 116 (r) = ret; \ 117 } while (0) 118 119 #define sqr(r0, r1, a) \ 120 do { \ 121 BN_ULONG tmp = (a); \ 122 BN_UMULT_LOHI(r0, r1, tmp, tmp); \ 123 } while (0) 124 125 #endif /* !BN_ULLONG */ 126 127 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num, 128 BN_ULONG w) { 129 BN_ULONG c1 = 0; 130 131 assert(num >= 0); 132 if (num <= 0) { 133 return c1; 134 } 135 136 while (num & ~3) { 137 mul_add(rp[0], ap[0], w, c1); 138 mul_add(rp[1], ap[1], w, c1); 139 mul_add(rp[2], ap[2], w, c1); 140 mul_add(rp[3], ap[3], w, c1); 141 ap += 4; 142 rp += 4; 143 num -= 4; 144 } 145 146 while (num) { 147 mul_add(rp[0], ap[0], w, c1); 148 ap++; 149 rp++; 150 num--; 151 } 152 153 return c1; 154 } 155 156 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) { 157 BN_ULONG c1 = 0; 158 159 assert(num >= 0); 160 if (num <= 0) { 161 return c1; 162 } 163 164 while (num & ~3) { 165 mul(rp[0], ap[0], w, c1); 166 mul(rp[1], ap[1], w, c1); 167 mul(rp[2], ap[2], w, c1); 168 mul(rp[3], ap[3], w, c1); 169 ap += 4; 170 rp += 4; 171 num -= 4; 172 } 173 while (num) { 174 mul(rp[0], ap[0], w, c1); 175 ap++; 176 rp++; 177 num--; 178 } 179 return c1; 180 } 181 182 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) { 183 assert(n >= 0); 184 if (n <= 0) { 185 return; 186 } 187 188 while (n & ~3) { 189 sqr(r[0], r[1], a[0]); 190 sqr(r[2], r[3], a[1]); 191 sqr(r[4], r[5], a[2]); 192 sqr(r[6], r[7], a[3]); 193 a += 4; 194 r += 8; 195 n -= 4; 196 } 197 while (n) { 198 sqr(r[0], r[1], a[0]); 199 a++; 200 r += 2; 201 n--; 202 } 203 } 204 205 #ifdef BN_ULLONG 206 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 207 int n) { 208 BN_ULLONG ll = 0; 209 210 assert(n >= 0); 211 if (n <= 0) { 212 return (BN_ULONG)0; 213 } 214 215 while (n & ~3) { 216 ll += (BN_ULLONG)a[0] + b[0]; 217 r[0] = (BN_ULONG)ll & BN_MASK2; 218 ll >>= BN_BITS2; 219 ll += (BN_ULLONG)a[1] + b[1]; 220 r[1] = (BN_ULONG)ll & BN_MASK2; 221 ll >>= BN_BITS2; 222 ll += (BN_ULLONG)a[2] + b[2]; 223 r[2] = (BN_ULONG)ll & BN_MASK2; 224 ll >>= BN_BITS2; 225 ll += (BN_ULLONG)a[3] + b[3]; 226 r[3] = (BN_ULONG)ll & BN_MASK2; 227 ll >>= BN_BITS2; 228 a += 4; 229 b += 4; 230 r += 4; 231 n -= 4; 232 } 233 while (n) { 234 ll += (BN_ULLONG)a[0] + b[0]; 235 r[0] = (BN_ULONG)ll & BN_MASK2; 236 ll >>= BN_BITS2; 237 a++; 238 b++; 239 r++; 240 n--; 241 } 242 return (BN_ULONG)ll; 243 } 244 245 #else /* !BN_ULLONG */ 246 247 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 248 int n) { 249 BN_ULONG c, l, t; 250 251 assert(n >= 0); 252 if (n <= 0) { 253 return (BN_ULONG)0; 254 } 255 256 c = 0; 257 while (n & ~3) { 258 t = a[0]; 259 t = (t + c) & BN_MASK2; 260 c = (t < c); 261 l = (t + b[0]) & BN_MASK2; 262 c += (l < t); 263 r[0] = l; 264 t = a[1]; 265 t = (t + c) & BN_MASK2; 266 c = (t < c); 267 l = (t + b[1]) & BN_MASK2; 268 c += (l < t); 269 r[1] = l; 270 t = a[2]; 271 t = (t + c) & BN_MASK2; 272 c = (t < c); 273 l = (t + b[2]) & BN_MASK2; 274 c += (l < t); 275 r[2] = l; 276 t = a[3]; 277 t = (t + c) & BN_MASK2; 278 c = (t < c); 279 l = (t + b[3]) & BN_MASK2; 280 c += (l < t); 281 r[3] = l; 282 a += 4; 283 b += 4; 284 r += 4; 285 n -= 4; 286 } 287 while (n) { 288 t = a[0]; 289 t = (t + c) & BN_MASK2; 290 c = (t < c); 291 l = (t + b[0]) & BN_MASK2; 292 c += (l < t); 293 r[0] = l; 294 a++; 295 b++; 296 r++; 297 n--; 298 } 299 return (BN_ULONG)c; 300 } 301 302 #endif /* !BN_ULLONG */ 303 304 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 305 int n) { 306 BN_ULONG t1, t2; 307 int c = 0; 308 309 assert(n >= 0); 310 if (n <= 0) { 311 return (BN_ULONG)0; 312 } 313 314 while (n & ~3) { 315 t1 = a[0]; 316 t2 = b[0]; 317 r[0] = (t1 - t2 - c) & BN_MASK2; 318 if (t1 != t2) { 319 c = (t1 < t2); 320 } 321 t1 = a[1]; 322 t2 = b[1]; 323 r[1] = (t1 - t2 - c) & BN_MASK2; 324 if (t1 != t2) { 325 c = (t1 < t2); 326 } 327 t1 = a[2]; 328 t2 = b[2]; 329 r[2] = (t1 - t2 - c) & BN_MASK2; 330 if (t1 != t2) { 331 c = (t1 < t2); 332 } 333 t1 = a[3]; 334 t2 = b[3]; 335 r[3] = (t1 - t2 - c) & BN_MASK2; 336 if (t1 != t2) { 337 c = (t1 < t2); 338 } 339 a += 4; 340 b += 4; 341 r += 4; 342 n -= 4; 343 } 344 while (n) { 345 t1 = a[0]; 346 t2 = b[0]; 347 r[0] = (t1 - t2 - c) & BN_MASK2; 348 if (t1 != t2) { 349 c = (t1 < t2); 350 } 351 a++; 352 b++; 353 r++; 354 n--; 355 } 356 return c; 357 } 358 359 /* mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) */ 360 /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */ 361 /* sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) */ 362 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */ 363 364 #ifdef BN_ULLONG 365 366 /* Keep in mind that additions to multiplication result can not overflow, 367 * because its high half cannot be all-ones. */ 368 #define mul_add_c(a, b, c0, c1, c2) \ 369 do { \ 370 BN_ULONG hi; \ 371 BN_ULLONG t = (BN_ULLONG)(a) * (b); \ 372 t += (c0); /* no carry */ \ 373 (c0) = (BN_ULONG)Lw(t); \ 374 hi = (BN_ULONG)Hw(t); \ 375 (c1) = ((c1) + (hi)) & BN_MASK2; \ 376 if ((c1) < hi) { \ 377 (c2)++; \ 378 } \ 379 } while (0) 380 381 #define mul_add_c2(a, b, c0, c1, c2) \ 382 do { \ 383 BN_ULONG hi; \ 384 BN_ULLONG t = (BN_ULLONG)(a) * (b); \ 385 BN_ULLONG tt = t + (c0); /* no carry */ \ 386 (c0) = (BN_ULONG)Lw(tt); \ 387 hi = (BN_ULONG)Hw(tt); \ 388 (c1) = ((c1) + hi) & BN_MASK2; \ 389 if ((c1) < hi) { \ 390 (c2)++; \ 391 } \ 392 t += (c0); /* no carry */ \ 393 (c0) = (BN_ULONG)Lw(t); \ 394 hi = (BN_ULONG)Hw(t); \ 395 (c1) = ((c1) + hi) & BN_MASK2; \ 396 if ((c1) < hi) { \ 397 (c2)++; \ 398 } \ 399 } while (0) 400 401 #define sqr_add_c(a, i, c0, c1, c2) \ 402 do { \ 403 BN_ULONG hi; \ 404 BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \ 405 t += (c0); /* no carry */ \ 406 (c0) = (BN_ULONG)Lw(t); \ 407 hi = (BN_ULONG)Hw(t); \ 408 (c1) = ((c1) + hi) & BN_MASK2; \ 409 if ((c1) < hi) { \ 410 (c2)++; \ 411 } \ 412 } while (0) 413 414 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) 415 416 #else 417 418 /* Keep in mind that additions to hi can not overflow, because the high word of 419 * a multiplication result cannot be all-ones. */ 420 #define mul_add_c(a, b, c0, c1, c2) \ 421 do { \ 422 BN_ULONG ta = (a), tb = (b); \ 423 BN_ULONG lo, hi; \ 424 BN_UMULT_LOHI(lo, hi, ta, tb); \ 425 (c0) += lo; \ 426 hi += ((c0) < lo) ? 1 : 0; \ 427 (c1) += hi; \ 428 (c2) += ((c1) < hi) ? 1 : 0; \ 429 } while (0) 430 431 #define mul_add_c2(a, b, c0, c1, c2) \ 432 do { \ 433 BN_ULONG ta = (a), tb = (b); \ 434 BN_ULONG lo, hi, tt; \ 435 BN_UMULT_LOHI(lo, hi, ta, tb); \ 436 (c0) += lo; \ 437 tt = hi + (((c0) < lo) ? 1 : 0); \ 438 (c1) += tt; \ 439 (c2) += ((c1) < tt) ? 1 : 0; \ 440 (c0) += lo; \ 441 hi += (c0 < lo) ? 1 : 0; \ 442 (c1) += hi; \ 443 (c2) += ((c1) < hi) ? 1 : 0; \ 444 } while (0) 445 446 #define sqr_add_c(a, i, c0, c1, c2) \ 447 do { \ 448 BN_ULONG ta = (a)[i]; \ 449 BN_ULONG lo, hi; \ 450 BN_UMULT_LOHI(lo, hi, ta, ta); \ 451 (c0) += lo; \ 452 hi += (c0 < lo) ? 1 : 0; \ 453 (c1) += hi; \ 454 (c2) += ((c1) < hi) ? 1 : 0; \ 455 } while (0) 456 457 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) 458 459 #endif /* !BN_ULLONG */ 460 461 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { 462 BN_ULONG c1, c2, c3; 463 464 c1 = 0; 465 c2 = 0; 466 c3 = 0; 467 mul_add_c(a[0], b[0], c1, c2, c3); 468 r[0] = c1; 469 c1 = 0; 470 mul_add_c(a[0], b[1], c2, c3, c1); 471 mul_add_c(a[1], b[0], c2, c3, c1); 472 r[1] = c2; 473 c2 = 0; 474 mul_add_c(a[2], b[0], c3, c1, c2); 475 mul_add_c(a[1], b[1], c3, c1, c2); 476 mul_add_c(a[0], b[2], c3, c1, c2); 477 r[2] = c3; 478 c3 = 0; 479 mul_add_c(a[0], b[3], c1, c2, c3); 480 mul_add_c(a[1], b[2], c1, c2, c3); 481 mul_add_c(a[2], b[1], c1, c2, c3); 482 mul_add_c(a[3], b[0], c1, c2, c3); 483 r[3] = c1; 484 c1 = 0; 485 mul_add_c(a[4], b[0], c2, c3, c1); 486 mul_add_c(a[3], b[1], c2, c3, c1); 487 mul_add_c(a[2], b[2], c2, c3, c1); 488 mul_add_c(a[1], b[3], c2, c3, c1); 489 mul_add_c(a[0], b[4], c2, c3, c1); 490 r[4] = c2; 491 c2 = 0; 492 mul_add_c(a[0], b[5], c3, c1, c2); 493 mul_add_c(a[1], b[4], c3, c1, c2); 494 mul_add_c(a[2], b[3], c3, c1, c2); 495 mul_add_c(a[3], b[2], c3, c1, c2); 496 mul_add_c(a[4], b[1], c3, c1, c2); 497 mul_add_c(a[5], b[0], c3, c1, c2); 498 r[5] = c3; 499 c3 = 0; 500 mul_add_c(a[6], b[0], c1, c2, c3); 501 mul_add_c(a[5], b[1], c1, c2, c3); 502 mul_add_c(a[4], b[2], c1, c2, c3); 503 mul_add_c(a[3], b[3], c1, c2, c3); 504 mul_add_c(a[2], b[4], c1, c2, c3); 505 mul_add_c(a[1], b[5], c1, c2, c3); 506 mul_add_c(a[0], b[6], c1, c2, c3); 507 r[6] = c1; 508 c1 = 0; 509 mul_add_c(a[0], b[7], c2, c3, c1); 510 mul_add_c(a[1], b[6], c2, c3, c1); 511 mul_add_c(a[2], b[5], c2, c3, c1); 512 mul_add_c(a[3], b[4], c2, c3, c1); 513 mul_add_c(a[4], b[3], c2, c3, c1); 514 mul_add_c(a[5], b[2], c2, c3, c1); 515 mul_add_c(a[6], b[1], c2, c3, c1); 516 mul_add_c(a[7], b[0], c2, c3, c1); 517 r[7] = c2; 518 c2 = 0; 519 mul_add_c(a[7], b[1], c3, c1, c2); 520 mul_add_c(a[6], b[2], c3, c1, c2); 521 mul_add_c(a[5], b[3], c3, c1, c2); 522 mul_add_c(a[4], b[4], c3, c1, c2); 523 mul_add_c(a[3], b[5], c3, c1, c2); 524 mul_add_c(a[2], b[6], c3, c1, c2); 525 mul_add_c(a[1], b[7], c3, c1, c2); 526 r[8] = c3; 527 c3 = 0; 528 mul_add_c(a[2], b[7], c1, c2, c3); 529 mul_add_c(a[3], b[6], c1, c2, c3); 530 mul_add_c(a[4], b[5], c1, c2, c3); 531 mul_add_c(a[5], b[4], c1, c2, c3); 532 mul_add_c(a[6], b[3], c1, c2, c3); 533 mul_add_c(a[7], b[2], c1, c2, c3); 534 r[9] = c1; 535 c1 = 0; 536 mul_add_c(a[7], b[3], c2, c3, c1); 537 mul_add_c(a[6], b[4], c2, c3, c1); 538 mul_add_c(a[5], b[5], c2, c3, c1); 539 mul_add_c(a[4], b[6], c2, c3, c1); 540 mul_add_c(a[3], b[7], c2, c3, c1); 541 r[10] = c2; 542 c2 = 0; 543 mul_add_c(a[4], b[7], c3, c1, c2); 544 mul_add_c(a[5], b[6], c3, c1, c2); 545 mul_add_c(a[6], b[5], c3, c1, c2); 546 mul_add_c(a[7], b[4], c3, c1, c2); 547 r[11] = c3; 548 c3 = 0; 549 mul_add_c(a[7], b[5], c1, c2, c3); 550 mul_add_c(a[6], b[6], c1, c2, c3); 551 mul_add_c(a[5], b[7], c1, c2, c3); 552 r[12] = c1; 553 c1 = 0; 554 mul_add_c(a[6], b[7], c2, c3, c1); 555 mul_add_c(a[7], b[6], c2, c3, c1); 556 r[13] = c2; 557 c2 = 0; 558 mul_add_c(a[7], b[7], c3, c1, c2); 559 r[14] = c3; 560 r[15] = c1; 561 } 562 563 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) { 564 BN_ULONG c1, c2, c3; 565 566 c1 = 0; 567 c2 = 0; 568 c3 = 0; 569 mul_add_c(a[0], b[0], c1, c2, c3); 570 r[0] = c1; 571 c1 = 0; 572 mul_add_c(a[0], b[1], c2, c3, c1); 573 mul_add_c(a[1], b[0], c2, c3, c1); 574 r[1] = c2; 575 c2 = 0; 576 mul_add_c(a[2], b[0], c3, c1, c2); 577 mul_add_c(a[1], b[1], c3, c1, c2); 578 mul_add_c(a[0], b[2], c3, c1, c2); 579 r[2] = c3; 580 c3 = 0; 581 mul_add_c(a[0], b[3], c1, c2, c3); 582 mul_add_c(a[1], b[2], c1, c2, c3); 583 mul_add_c(a[2], b[1], c1, c2, c3); 584 mul_add_c(a[3], b[0], c1, c2, c3); 585 r[3] = c1; 586 c1 = 0; 587 mul_add_c(a[3], b[1], c2, c3, c1); 588 mul_add_c(a[2], b[2], c2, c3, c1); 589 mul_add_c(a[1], b[3], c2, c3, c1); 590 r[4] = c2; 591 c2 = 0; 592 mul_add_c(a[2], b[3], c3, c1, c2); 593 mul_add_c(a[3], b[2], c3, c1, c2); 594 r[5] = c3; 595 c3 = 0; 596 mul_add_c(a[3], b[3], c1, c2, c3); 597 r[6] = c1; 598 r[7] = c2; 599 } 600 601 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) { 602 BN_ULONG c1, c2, c3; 603 604 c1 = 0; 605 c2 = 0; 606 c3 = 0; 607 sqr_add_c(a, 0, c1, c2, c3); 608 r[0] = c1; 609 c1 = 0; 610 sqr_add_c2(a, 1, 0, c2, c3, c1); 611 r[1] = c2; 612 c2 = 0; 613 sqr_add_c(a, 1, c3, c1, c2); 614 sqr_add_c2(a, 2, 0, c3, c1, c2); 615 r[2] = c3; 616 c3 = 0; 617 sqr_add_c2(a, 3, 0, c1, c2, c3); 618 sqr_add_c2(a, 2, 1, c1, c2, c3); 619 r[3] = c1; 620 c1 = 0; 621 sqr_add_c(a, 2, c2, c3, c1); 622 sqr_add_c2(a, 3, 1, c2, c3, c1); 623 sqr_add_c2(a, 4, 0, c2, c3, c1); 624 r[4] = c2; 625 c2 = 0; 626 sqr_add_c2(a, 5, 0, c3, c1, c2); 627 sqr_add_c2(a, 4, 1, c3, c1, c2); 628 sqr_add_c2(a, 3, 2, c3, c1, c2); 629 r[5] = c3; 630 c3 = 0; 631 sqr_add_c(a, 3, c1, c2, c3); 632 sqr_add_c2(a, 4, 2, c1, c2, c3); 633 sqr_add_c2(a, 5, 1, c1, c2, c3); 634 sqr_add_c2(a, 6, 0, c1, c2, c3); 635 r[6] = c1; 636 c1 = 0; 637 sqr_add_c2(a, 7, 0, c2, c3, c1); 638 sqr_add_c2(a, 6, 1, c2, c3, c1); 639 sqr_add_c2(a, 5, 2, c2, c3, c1); 640 sqr_add_c2(a, 4, 3, c2, c3, c1); 641 r[7] = c2; 642 c2 = 0; 643 sqr_add_c(a, 4, c3, c1, c2); 644 sqr_add_c2(a, 5, 3, c3, c1, c2); 645 sqr_add_c2(a, 6, 2, c3, c1, c2); 646 sqr_add_c2(a, 7, 1, c3, c1, c2); 647 r[8] = c3; 648 c3 = 0; 649 sqr_add_c2(a, 7, 2, c1, c2, c3); 650 sqr_add_c2(a, 6, 3, c1, c2, c3); 651 sqr_add_c2(a, 5, 4, c1, c2, c3); 652 r[9] = c1; 653 c1 = 0; 654 sqr_add_c(a, 5, c2, c3, c1); 655 sqr_add_c2(a, 6, 4, c2, c3, c1); 656 sqr_add_c2(a, 7, 3, c2, c3, c1); 657 r[10] = c2; 658 c2 = 0; 659 sqr_add_c2(a, 7, 4, c3, c1, c2); 660 sqr_add_c2(a, 6, 5, c3, c1, c2); 661 r[11] = c3; 662 c3 = 0; 663 sqr_add_c(a, 6, c1, c2, c3); 664 sqr_add_c2(a, 7, 5, c1, c2, c3); 665 r[12] = c1; 666 c1 = 0; 667 sqr_add_c2(a, 7, 6, c2, c3, c1); 668 r[13] = c2; 669 c2 = 0; 670 sqr_add_c(a, 7, c3, c1, c2); 671 r[14] = c3; 672 r[15] = c1; 673 } 674 675 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) { 676 BN_ULONG c1, c2, c3; 677 678 c1 = 0; 679 c2 = 0; 680 c3 = 0; 681 sqr_add_c(a, 0, c1, c2, c3); 682 r[0] = c1; 683 c1 = 0; 684 sqr_add_c2(a, 1, 0, c2, c3, c1); 685 r[1] = c2; 686 c2 = 0; 687 sqr_add_c(a, 1, c3, c1, c2); 688 sqr_add_c2(a, 2, 0, c3, c1, c2); 689 r[2] = c3; 690 c3 = 0; 691 sqr_add_c2(a, 3, 0, c1, c2, c3); 692 sqr_add_c2(a, 2, 1, c1, c2, c3); 693 r[3] = c1; 694 c1 = 0; 695 sqr_add_c(a, 2, c2, c3, c1); 696 sqr_add_c2(a, 3, 1, c2, c3, c1); 697 r[4] = c2; 698 c2 = 0; 699 sqr_add_c2(a, 3, 2, c3, c1, c2); 700 r[5] = c3; 701 c3 = 0; 702 sqr_add_c(a, 3, c1, c2, c3); 703 r[6] = c1; 704 r[7] = c2; 705 } 706 707 #undef mul_add 708 #undef mul 709 #undef sqr 710 #undef mul_add_c 711 #undef mul_add_c2 712 #undef sqr_add_c 713 #undef sqr_add_c2 714 715 #endif 716