1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com) 2 * All rights reserved. 3 * 4 * This package is an SSL implementation written 5 * by Eric Young (eay (at) cryptsoft.com). 6 * The implementation was written so as to conform with Netscapes SSL. 7 * 8 * This library is free for commercial and non-commercial use as long as 9 * the following conditions are aheared to. The following conditions 10 * apply to all code found in this distribution, be it the RC4, RSA, 11 * lhash, DES, etc., code; not just the SSL code. The SSL documentation 12 * included with this distribution is covered by the same copyright terms 13 * except that the holder is Tim Hudson (tjh (at) cryptsoft.com). 14 * 15 * Copyright remains Eric Young's, and as such any Copyright notices in 16 * the code are not to be removed. 17 * If this package is used in a product, Eric Young should be given attribution 18 * as the author of the parts of the library used. 19 * This can be in the form of a textual message at program startup or 20 * in documentation (online or textual) provided with the package. 21 * 22 * Redistribution and use in source and binary forms, with or without 23 * modification, are permitted provided that the following conditions 24 * are met: 25 * 1. Redistributions of source code must retain the copyright 26 * notice, this list of conditions and the following disclaimer. 27 * 2. Redistributions in binary form must reproduce the above copyright 28 * notice, this list of conditions and the following disclaimer in the 29 * documentation and/or other materials provided with the distribution. 30 * 3. All advertising materials mentioning features or use of this software 31 * must display the following acknowledgement: 32 * "This product includes cryptographic software written by 33 * Eric Young (eay (at) cryptsoft.com)" 34 * The word 'cryptographic' can be left out if the rouines from the library 35 * being used are not cryptographic related :-). 36 * 4. If you include any Windows specific code (or a derivative thereof) from 37 * the apps directory (application code) you must include an acknowledgement: 38 * "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)" 39 * 40 * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND 41 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 42 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 43 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 44 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 45 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 46 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 47 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 48 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 49 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 50 * SUCH DAMAGE. 51 * 52 * The licence and distribution terms for any publically available version or 53 * derivative of this code cannot be changed. i.e. this code cannot simply be 54 * copied and put under another distribution licence 55 * [including the GNU Public Licence.] */ 56 57 #include <openssl/bn.h> 58 59 #include <assert.h> 60 61 #include "internal.h" 62 63 64 // This file has two other implementations: x86 assembly language in 65 // asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c. 66 #if defined(OPENSSL_NO_ASM) || \ 67 !(defined(OPENSSL_X86) || \ 68 (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__)))) 69 70 #ifdef BN_ULLONG 71 #define mul_add(r, a, w, c) \ 72 do { \ 73 BN_ULLONG t; \ 74 t = (BN_ULLONG)(w) * (a) + (r) + (c); \ 75 (r) = Lw(t); \ 76 (c) = Hw(t); \ 77 } while (0) 78 79 #define mul(r, a, w, c) \ 80 do { \ 81 BN_ULLONG t; \ 82 t = (BN_ULLONG)(w) * (a) + (c); \ 83 (r) = Lw(t); \ 84 (c) = Hw(t); \ 85 } while (0) 86 87 #define sqr(r0, r1, a) \ 88 do { \ 89 BN_ULLONG t; \ 90 t = (BN_ULLONG)(a) * (a); \ 91 (r0) = Lw(t); \ 92 (r1) = Hw(t); \ 93 } while (0) 94 95 #else 96 97 #define mul_add(r, a, w, c) \ 98 do { \ 99 BN_ULONG high, low, ret, tmp = (a); \ 100 ret = (r); \ 101 BN_UMULT_LOHI(low, high, w, tmp); \ 102 ret += (c); \ 103 (c) = (ret < (c)) ? 1 : 0; \ 104 (c) += high; \ 105 ret += low; \ 106 (c) += (ret < low) ? 1 : 0; \ 107 (r) = ret; \ 108 } while (0) 109 110 #define mul(r, a, w, c) \ 111 do { \ 112 BN_ULONG high, low, ret, ta = (a); \ 113 BN_UMULT_LOHI(low, high, w, ta); \ 114 ret = low + (c); \ 115 (c) = high; \ 116 (c) += (ret < low) ? 1 : 0; \ 117 (r) = ret; \ 118 } while (0) 119 120 #define sqr(r0, r1, a) \ 121 do { \ 122 BN_ULONG tmp = (a); \ 123 BN_UMULT_LOHI(r0, r1, tmp, tmp); \ 124 } while (0) 125 126 #endif // !BN_ULLONG 127 128 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, 129 BN_ULONG w) { 130 BN_ULONG c1 = 0; 131 132 if (num == 0) { 133 return c1; 134 } 135 136 while (num & ~3) { 137 mul_add(rp[0], ap[0], w, c1); 138 mul_add(rp[1], ap[1], w, c1); 139 mul_add(rp[2], ap[2], w, c1); 140 mul_add(rp[3], ap[3], w, c1); 141 ap += 4; 142 rp += 4; 143 num -= 4; 144 } 145 146 while (num) { 147 mul_add(rp[0], ap[0], w, c1); 148 ap++; 149 rp++; 150 num--; 151 } 152 153 return c1; 154 } 155 156 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num, 157 BN_ULONG w) { 158 BN_ULONG c1 = 0; 159 160 if (num == 0) { 161 return c1; 162 } 163 164 while (num & ~3) { 165 mul(rp[0], ap[0], w, c1); 166 mul(rp[1], ap[1], w, c1); 167 mul(rp[2], ap[2], w, c1); 168 mul(rp[3], ap[3], w, c1); 169 ap += 4; 170 rp += 4; 171 num -= 4; 172 } 173 while (num) { 174 mul(rp[0], ap[0], w, c1); 175 ap++; 176 rp++; 177 num--; 178 } 179 return c1; 180 } 181 182 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) { 183 if (n == 0) { 184 return; 185 } 186 187 while (n & ~3) { 188 sqr(r[0], r[1], a[0]); 189 sqr(r[2], r[3], a[1]); 190 sqr(r[4], r[5], a[2]); 191 sqr(r[6], r[7], a[3]); 192 a += 4; 193 r += 8; 194 n -= 4; 195 } 196 while (n) { 197 sqr(r[0], r[1], a[0]); 198 a++; 199 r += 2; 200 n--; 201 } 202 } 203 204 #ifdef BN_ULLONG 205 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 206 size_t n) { 207 BN_ULLONG ll = 0; 208 209 if (n == 0) { 210 return 0; 211 } 212 213 while (n & ~3) { 214 ll += (BN_ULLONG)a[0] + b[0]; 215 r[0] = (BN_ULONG)ll; 216 ll >>= BN_BITS2; 217 ll += (BN_ULLONG)a[1] + b[1]; 218 r[1] = (BN_ULONG)ll; 219 ll >>= BN_BITS2; 220 ll += (BN_ULLONG)a[2] + b[2]; 221 r[2] = (BN_ULONG)ll; 222 ll >>= BN_BITS2; 223 ll += (BN_ULLONG)a[3] + b[3]; 224 r[3] = (BN_ULONG)ll; 225 ll >>= BN_BITS2; 226 a += 4; 227 b += 4; 228 r += 4; 229 n -= 4; 230 } 231 while (n) { 232 ll += (BN_ULLONG)a[0] + b[0]; 233 r[0] = (BN_ULONG)ll; 234 ll >>= BN_BITS2; 235 a++; 236 b++; 237 r++; 238 n--; 239 } 240 return (BN_ULONG)ll; 241 } 242 243 #else // !BN_ULLONG 244 245 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 246 size_t n) { 247 BN_ULONG c, l, t; 248 249 if (n == 0) { 250 return (BN_ULONG)0; 251 } 252 253 c = 0; 254 while (n & ~3) { 255 t = a[0]; 256 t += c; 257 c = (t < c); 258 l = t + b[0]; 259 c += (l < t); 260 r[0] = l; 261 t = a[1]; 262 t += c; 263 c = (t < c); 264 l = t + b[1]; 265 c += (l < t); 266 r[1] = l; 267 t = a[2]; 268 t += c; 269 c = (t < c); 270 l = t + b[2]; 271 c += (l < t); 272 r[2] = l; 273 t = a[3]; 274 t += c; 275 c = (t < c); 276 l = t + b[3]; 277 c += (l < t); 278 r[3] = l; 279 a += 4; 280 b += 4; 281 r += 4; 282 n -= 4; 283 } 284 while (n) { 285 t = a[0]; 286 t += c; 287 c = (t < c); 288 l = t + b[0]; 289 c += (l < t); 290 r[0] = l; 291 a++; 292 b++; 293 r++; 294 n--; 295 } 296 return (BN_ULONG)c; 297 } 298 299 #endif // !BN_ULLONG 300 301 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b, 302 size_t n) { 303 BN_ULONG t1, t2; 304 int c = 0; 305 306 if (n == 0) { 307 return (BN_ULONG)0; 308 } 309 310 while (n & ~3) { 311 t1 = a[0]; 312 t2 = b[0]; 313 r[0] = t1 - t2 - c; 314 if (t1 != t2) { 315 c = (t1 < t2); 316 } 317 t1 = a[1]; 318 t2 = b[1]; 319 r[1] = t1 - t2 - c; 320 if (t1 != t2) { 321 c = (t1 < t2); 322 } 323 t1 = a[2]; 324 t2 = b[2]; 325 r[2] = t1 - t2 - c; 326 if (t1 != t2) { 327 c = (t1 < t2); 328 } 329 t1 = a[3]; 330 t2 = b[3]; 331 r[3] = t1 - t2 - c; 332 if (t1 != t2) { 333 c = (t1 < t2); 334 } 335 a += 4; 336 b += 4; 337 r += 4; 338 n -= 4; 339 } 340 while (n) { 341 t1 = a[0]; 342 t2 = b[0]; 343 r[0] = t1 - t2 - c; 344 if (t1 != t2) { 345 c = (t1 < t2); 346 } 347 a++; 348 b++; 349 r++; 350 n--; 351 } 352 return c; 353 } 354 355 // mul_add_c(a,b,c0,c1,c2) -- c+=a*b for three word number c=(c2,c1,c0) 356 // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) 357 // sqr_add_c(a,i,c0,c1,c2) -- c+=a[i]^2 for three word number c=(c2,c1,c0) 358 // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) 359 360 #ifdef BN_ULLONG 361 362 // Keep in mind that additions to multiplication result can not overflow, 363 // because its high half cannot be all-ones. 364 #define mul_add_c(a, b, c0, c1, c2) \ 365 do { \ 366 BN_ULONG hi; \ 367 BN_ULLONG t = (BN_ULLONG)(a) * (b); \ 368 t += (c0); /* no carry */ \ 369 (c0) = (BN_ULONG)Lw(t); \ 370 hi = (BN_ULONG)Hw(t); \ 371 (c1) += (hi); \ 372 if ((c1) < hi) { \ 373 (c2)++; \ 374 } \ 375 } while (0) 376 377 #define mul_add_c2(a, b, c0, c1, c2) \ 378 do { \ 379 BN_ULONG hi; \ 380 BN_ULLONG t = (BN_ULLONG)(a) * (b); \ 381 BN_ULLONG tt = t + (c0); /* no carry */ \ 382 (c0) = (BN_ULONG)Lw(tt); \ 383 hi = (BN_ULONG)Hw(tt); \ 384 (c1) += hi; \ 385 if ((c1) < hi) { \ 386 (c2)++; \ 387 } \ 388 t += (c0); /* no carry */ \ 389 (c0) = (BN_ULONG)Lw(t); \ 390 hi = (BN_ULONG)Hw(t); \ 391 (c1) += hi; \ 392 if ((c1) < hi) { \ 393 (c2)++; \ 394 } \ 395 } while (0) 396 397 #define sqr_add_c(a, i, c0, c1, c2) \ 398 do { \ 399 BN_ULONG hi; \ 400 BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \ 401 t += (c0); /* no carry */ \ 402 (c0) = (BN_ULONG)Lw(t); \ 403 hi = (BN_ULONG)Hw(t); \ 404 (c1) += hi; \ 405 if ((c1) < hi) { \ 406 (c2)++; \ 407 } \ 408 } while (0) 409 410 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) 411 412 #else 413 414 // Keep in mind that additions to hi can not overflow, because the high word of 415 // a multiplication result cannot be all-ones. 416 #define mul_add_c(a, b, c0, c1, c2) \ 417 do { \ 418 BN_ULONG ta = (a), tb = (b); \ 419 BN_ULONG lo, hi; \ 420 BN_UMULT_LOHI(lo, hi, ta, tb); \ 421 (c0) += lo; \ 422 hi += ((c0) < lo) ? 1 : 0; \ 423 (c1) += hi; \ 424 (c2) += ((c1) < hi) ? 1 : 0; \ 425 } while (0) 426 427 #define mul_add_c2(a, b, c0, c1, c2) \ 428 do { \ 429 BN_ULONG ta = (a), tb = (b); \ 430 BN_ULONG lo, hi, tt; \ 431 BN_UMULT_LOHI(lo, hi, ta, tb); \ 432 (c0) += lo; \ 433 tt = hi + (((c0) < lo) ? 1 : 0); \ 434 (c1) += tt; \ 435 (c2) += ((c1) < tt) ? 1 : 0; \ 436 (c0) += lo; \ 437 hi += (c0 < lo) ? 1 : 0; \ 438 (c1) += hi; \ 439 (c2) += ((c1) < hi) ? 1 : 0; \ 440 } while (0) 441 442 #define sqr_add_c(a, i, c0, c1, c2) \ 443 do { \ 444 BN_ULONG ta = (a)[i]; \ 445 BN_ULONG lo, hi; \ 446 BN_UMULT_LOHI(lo, hi, ta, ta); \ 447 (c0) += lo; \ 448 hi += (c0 < lo) ? 1 : 0; \ 449 (c1) += hi; \ 450 (c2) += ((c1) < hi) ? 1 : 0; \ 451 } while (0) 452 453 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2) 454 455 #endif // !BN_ULLONG 456 457 void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) { 458 BN_ULONG c1, c2, c3; 459 460 c1 = 0; 461 c2 = 0; 462 c3 = 0; 463 mul_add_c(a[0], b[0], c1, c2, c3); 464 r[0] = c1; 465 c1 = 0; 466 mul_add_c(a[0], b[1], c2, c3, c1); 467 mul_add_c(a[1], b[0], c2, c3, c1); 468 r[1] = c2; 469 c2 = 0; 470 mul_add_c(a[2], b[0], c3, c1, c2); 471 mul_add_c(a[1], b[1], c3, c1, c2); 472 mul_add_c(a[0], b[2], c3, c1, c2); 473 r[2] = c3; 474 c3 = 0; 475 mul_add_c(a[0], b[3], c1, c2, c3); 476 mul_add_c(a[1], b[2], c1, c2, c3); 477 mul_add_c(a[2], b[1], c1, c2, c3); 478 mul_add_c(a[3], b[0], c1, c2, c3); 479 r[3] = c1; 480 c1 = 0; 481 mul_add_c(a[4], b[0], c2, c3, c1); 482 mul_add_c(a[3], b[1], c2, c3, c1); 483 mul_add_c(a[2], b[2], c2, c3, c1); 484 mul_add_c(a[1], b[3], c2, c3, c1); 485 mul_add_c(a[0], b[4], c2, c3, c1); 486 r[4] = c2; 487 c2 = 0; 488 mul_add_c(a[0], b[5], c3, c1, c2); 489 mul_add_c(a[1], b[4], c3, c1, c2); 490 mul_add_c(a[2], b[3], c3, c1, c2); 491 mul_add_c(a[3], b[2], c3, c1, c2); 492 mul_add_c(a[4], b[1], c3, c1, c2); 493 mul_add_c(a[5], b[0], c3, c1, c2); 494 r[5] = c3; 495 c3 = 0; 496 mul_add_c(a[6], b[0], c1, c2, c3); 497 mul_add_c(a[5], b[1], c1, c2, c3); 498 mul_add_c(a[4], b[2], c1, c2, c3); 499 mul_add_c(a[3], b[3], c1, c2, c3); 500 mul_add_c(a[2], b[4], c1, c2, c3); 501 mul_add_c(a[1], b[5], c1, c2, c3); 502 mul_add_c(a[0], b[6], c1, c2, c3); 503 r[6] = c1; 504 c1 = 0; 505 mul_add_c(a[0], b[7], c2, c3, c1); 506 mul_add_c(a[1], b[6], c2, c3, c1); 507 mul_add_c(a[2], b[5], c2, c3, c1); 508 mul_add_c(a[3], b[4], c2, c3, c1); 509 mul_add_c(a[4], b[3], c2, c3, c1); 510 mul_add_c(a[5], b[2], c2, c3, c1); 511 mul_add_c(a[6], b[1], c2, c3, c1); 512 mul_add_c(a[7], b[0], c2, c3, c1); 513 r[7] = c2; 514 c2 = 0; 515 mul_add_c(a[7], b[1], c3, c1, c2); 516 mul_add_c(a[6], b[2], c3, c1, c2); 517 mul_add_c(a[5], b[3], c3, c1, c2); 518 mul_add_c(a[4], b[4], c3, c1, c2); 519 mul_add_c(a[3], b[5], c3, c1, c2); 520 mul_add_c(a[2], b[6], c3, c1, c2); 521 mul_add_c(a[1], b[7], c3, c1, c2); 522 r[8] = c3; 523 c3 = 0; 524 mul_add_c(a[2], b[7], c1, c2, c3); 525 mul_add_c(a[3], b[6], c1, c2, c3); 526 mul_add_c(a[4], b[5], c1, c2, c3); 527 mul_add_c(a[5], b[4], c1, c2, c3); 528 mul_add_c(a[6], b[3], c1, c2, c3); 529 mul_add_c(a[7], b[2], c1, c2, c3); 530 r[9] = c1; 531 c1 = 0; 532 mul_add_c(a[7], b[3], c2, c3, c1); 533 mul_add_c(a[6], b[4], c2, c3, c1); 534 mul_add_c(a[5], b[5], c2, c3, c1); 535 mul_add_c(a[4], b[6], c2, c3, c1); 536 mul_add_c(a[3], b[7], c2, c3, c1); 537 r[10] = c2; 538 c2 = 0; 539 mul_add_c(a[4], b[7], c3, c1, c2); 540 mul_add_c(a[5], b[6], c3, c1, c2); 541 mul_add_c(a[6], b[5], c3, c1, c2); 542 mul_add_c(a[7], b[4], c3, c1, c2); 543 r[11] = c3; 544 c3 = 0; 545 mul_add_c(a[7], b[5], c1, c2, c3); 546 mul_add_c(a[6], b[6], c1, c2, c3); 547 mul_add_c(a[5], b[7], c1, c2, c3); 548 r[12] = c1; 549 c1 = 0; 550 mul_add_c(a[6], b[7], c2, c3, c1); 551 mul_add_c(a[7], b[6], c2, c3, c1); 552 r[13] = c2; 553 c2 = 0; 554 mul_add_c(a[7], b[7], c3, c1, c2); 555 r[14] = c3; 556 r[15] = c1; 557 } 558 559 void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) { 560 BN_ULONG c1, c2, c3; 561 562 c1 = 0; 563 c2 = 0; 564 c3 = 0; 565 mul_add_c(a[0], b[0], c1, c2, c3); 566 r[0] = c1; 567 c1 = 0; 568 mul_add_c(a[0], b[1], c2, c3, c1); 569 mul_add_c(a[1], b[0], c2, c3, c1); 570 r[1] = c2; 571 c2 = 0; 572 mul_add_c(a[2], b[0], c3, c1, c2); 573 mul_add_c(a[1], b[1], c3, c1, c2); 574 mul_add_c(a[0], b[2], c3, c1, c2); 575 r[2] = c3; 576 c3 = 0; 577 mul_add_c(a[0], b[3], c1, c2, c3); 578 mul_add_c(a[1], b[2], c1, c2, c3); 579 mul_add_c(a[2], b[1], c1, c2, c3); 580 mul_add_c(a[3], b[0], c1, c2, c3); 581 r[3] = c1; 582 c1 = 0; 583 mul_add_c(a[3], b[1], c2, c3, c1); 584 mul_add_c(a[2], b[2], c2, c3, c1); 585 mul_add_c(a[1], b[3], c2, c3, c1); 586 r[4] = c2; 587 c2 = 0; 588 mul_add_c(a[2], b[3], c3, c1, c2); 589 mul_add_c(a[3], b[2], c3, c1, c2); 590 r[5] = c3; 591 c3 = 0; 592 mul_add_c(a[3], b[3], c1, c2, c3); 593 r[6] = c1; 594 r[7] = c2; 595 } 596 597 void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) { 598 BN_ULONG c1, c2, c3; 599 600 c1 = 0; 601 c2 = 0; 602 c3 = 0; 603 sqr_add_c(a, 0, c1, c2, c3); 604 r[0] = c1; 605 c1 = 0; 606 sqr_add_c2(a, 1, 0, c2, c3, c1); 607 r[1] = c2; 608 c2 = 0; 609 sqr_add_c(a, 1, c3, c1, c2); 610 sqr_add_c2(a, 2, 0, c3, c1, c2); 611 r[2] = c3; 612 c3 = 0; 613 sqr_add_c2(a, 3, 0, c1, c2, c3); 614 sqr_add_c2(a, 2, 1, c1, c2, c3); 615 r[3] = c1; 616 c1 = 0; 617 sqr_add_c(a, 2, c2, c3, c1); 618 sqr_add_c2(a, 3, 1, c2, c3, c1); 619 sqr_add_c2(a, 4, 0, c2, c3, c1); 620 r[4] = c2; 621 c2 = 0; 622 sqr_add_c2(a, 5, 0, c3, c1, c2); 623 sqr_add_c2(a, 4, 1, c3, c1, c2); 624 sqr_add_c2(a, 3, 2, c3, c1, c2); 625 r[5] = c3; 626 c3 = 0; 627 sqr_add_c(a, 3, c1, c2, c3); 628 sqr_add_c2(a, 4, 2, c1, c2, c3); 629 sqr_add_c2(a, 5, 1, c1, c2, c3); 630 sqr_add_c2(a, 6, 0, c1, c2, c3); 631 r[6] = c1; 632 c1 = 0; 633 sqr_add_c2(a, 7, 0, c2, c3, c1); 634 sqr_add_c2(a, 6, 1, c2, c3, c1); 635 sqr_add_c2(a, 5, 2, c2, c3, c1); 636 sqr_add_c2(a, 4, 3, c2, c3, c1); 637 r[7] = c2; 638 c2 = 0; 639 sqr_add_c(a, 4, c3, c1, c2); 640 sqr_add_c2(a, 5, 3, c3, c1, c2); 641 sqr_add_c2(a, 6, 2, c3, c1, c2); 642 sqr_add_c2(a, 7, 1, c3, c1, c2); 643 r[8] = c3; 644 c3 = 0; 645 sqr_add_c2(a, 7, 2, c1, c2, c3); 646 sqr_add_c2(a, 6, 3, c1, c2, c3); 647 sqr_add_c2(a, 5, 4, c1, c2, c3); 648 r[9] = c1; 649 c1 = 0; 650 sqr_add_c(a, 5, c2, c3, c1); 651 sqr_add_c2(a, 6, 4, c2, c3, c1); 652 sqr_add_c2(a, 7, 3, c2, c3, c1); 653 r[10] = c2; 654 c2 = 0; 655 sqr_add_c2(a, 7, 4, c3, c1, c2); 656 sqr_add_c2(a, 6, 5, c3, c1, c2); 657 r[11] = c3; 658 c3 = 0; 659 sqr_add_c(a, 6, c1, c2, c3); 660 sqr_add_c2(a, 7, 5, c1, c2, c3); 661 r[12] = c1; 662 c1 = 0; 663 sqr_add_c2(a, 7, 6, c2, c3, c1); 664 r[13] = c2; 665 c2 = 0; 666 sqr_add_c(a, 7, c3, c1, c2); 667 r[14] = c3; 668 r[15] = c1; 669 } 670 671 void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) { 672 BN_ULONG c1, c2, c3; 673 674 c1 = 0; 675 c2 = 0; 676 c3 = 0; 677 sqr_add_c(a, 0, c1, c2, c3); 678 r[0] = c1; 679 c1 = 0; 680 sqr_add_c2(a, 1, 0, c2, c3, c1); 681 r[1] = c2; 682 c2 = 0; 683 sqr_add_c(a, 1, c3, c1, c2); 684 sqr_add_c2(a, 2, 0, c3, c1, c2); 685 r[2] = c3; 686 c3 = 0; 687 sqr_add_c2(a, 3, 0, c1, c2, c3); 688 sqr_add_c2(a, 2, 1, c1, c2, c3); 689 r[3] = c1; 690 c1 = 0; 691 sqr_add_c(a, 2, c2, c3, c1); 692 sqr_add_c2(a, 3, 1, c2, c3, c1); 693 r[4] = c2; 694 c2 = 0; 695 sqr_add_c2(a, 3, 2, c3, c1, c2); 696 r[5] = c3; 697 c3 = 0; 698 sqr_add_c(a, 3, c1, c2, c3); 699 r[6] = c1; 700 r[7] = c2; 701 } 702 703 #undef mul_add 704 #undef mul 705 #undef sqr 706 #undef mul_add_c 707 #undef mul_add_c2 708 #undef sqr_add_c 709 #undef sqr_add_c2 710 711 #endif 712