Home | History | Annotate | Download | only in bn
      1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      2  * All rights reserved.
      3  *
      4  * This package is an SSL implementation written
      5  * by Eric Young (eay (at) cryptsoft.com).
      6  * The implementation was written so as to conform with Netscapes SSL.
      7  *
      8  * This library is free for commercial and non-commercial use as long as
      9  * the following conditions are aheared to.  The following conditions
     10  * apply to all code found in this distribution, be it the RC4, RSA,
     11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     12  * included with this distribution is covered by the same copyright terms
     13  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     14  *
     15  * Copyright remains Eric Young's, and as such any Copyright notices in
     16  * the code are not to be removed.
     17  * If this package is used in a product, Eric Young should be given attribution
     18  * as the author of the parts of the library used.
     19  * This can be in the form of a textual message at program startup or
     20  * in documentation (online or textual) provided with the package.
     21  *
     22  * Redistribution and use in source and binary forms, with or without
     23  * modification, are permitted provided that the following conditions
     24  * are met:
     25  * 1. Redistributions of source code must retain the copyright
     26  *    notice, this list of conditions and the following disclaimer.
     27  * 2. Redistributions in binary form must reproduce the above copyright
     28  *    notice, this list of conditions and the following disclaimer in the
     29  *    documentation and/or other materials provided with the distribution.
     30  * 3. All advertising materials mentioning features or use of this software
     31  *    must display the following acknowledgement:
     32  *    "This product includes cryptographic software written by
     33  *     Eric Young (eay (at) cryptsoft.com)"
     34  *    The word 'cryptographic' can be left out if the rouines from the library
     35  *    being used are not cryptographic related :-).
     36  * 4. If you include any Windows specific code (or a derivative thereof) from
     37  *    the apps directory (application code) you must include an acknowledgement:
     38  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     39  *
     40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     50  * SUCH DAMAGE.
     51  *
     52  * The licence and distribution terms for any publically available version or
     53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     54  * copied and put under another distribution licence
     55  * [including the GNU Public Licence.] */
     56 
     57 #include <openssl/bn.h>
     58 
     59 #include <assert.h>
     60 
     61 #include "internal.h"
     62 
     63 
     64 // This file has two other implementations: x86 assembly language in
     65 // asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c.
     66 #if defined(OPENSSL_NO_ASM) || \
     67     !(defined(OPENSSL_X86) ||  \
     68       (defined(OPENSSL_X86_64) && (defined(__GNUC__) || defined(__clang__))))
     69 
     70 #ifdef BN_ULLONG
     71 #define mul_add(r, a, w, c)               \
     72   do {                                    \
     73     BN_ULLONG t;                          \
     74     t = (BN_ULLONG)(w) * (a) + (r) + (c); \
     75     (r) = Lw(t);                          \
     76     (c) = Hw(t);                          \
     77   } while (0)
     78 
     79 #define mul(r, a, w, c)             \
     80   do {                              \
     81     BN_ULLONG t;                    \
     82     t = (BN_ULLONG)(w) * (a) + (c); \
     83     (r) = Lw(t);                    \
     84     (c) = Hw(t);                    \
     85   } while (0)
     86 
     87 #define sqr(r0, r1, a)        \
     88   do {                        \
     89     BN_ULLONG t;              \
     90     t = (BN_ULLONG)(a) * (a); \
     91     (r0) = Lw(t);             \
     92     (r1) = Hw(t);             \
     93   } while (0)
     94 
     95 #else
     96 
     97 #define mul_add(r, a, w, c)             \
     98   do {                                  \
     99     BN_ULONG high, low, ret, tmp = (a); \
    100     ret = (r);                          \
    101     BN_UMULT_LOHI(low, high, w, tmp);   \
    102     ret += (c);                         \
    103     (c) = (ret < (c)) ? 1 : 0;          \
    104     (c) += high;                        \
    105     ret += low;                         \
    106     (c) += (ret < low) ? 1 : 0;         \
    107     (r) = ret;                          \
    108   } while (0)
    109 
    110 #define mul(r, a, w, c)                \
    111   do {                                 \
    112     BN_ULONG high, low, ret, ta = (a); \
    113     BN_UMULT_LOHI(low, high, w, ta);   \
    114     ret = low + (c);                   \
    115     (c) = high;                        \
    116     (c) += (ret < low) ? 1 : 0;        \
    117     (r) = ret;                         \
    118   } while (0)
    119 
    120 #define sqr(r0, r1, a)               \
    121   do {                               \
    122     BN_ULONG tmp = (a);              \
    123     BN_UMULT_LOHI(r0, r1, tmp, tmp); \
    124   } while (0)
    125 
    126 #endif  // !BN_ULLONG
    127 
    128 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
    129                           BN_ULONG w) {
    130   BN_ULONG c1 = 0;
    131 
    132   if (num == 0) {
    133     return c1;
    134   }
    135 
    136   while (num & ~3) {
    137     mul_add(rp[0], ap[0], w, c1);
    138     mul_add(rp[1], ap[1], w, c1);
    139     mul_add(rp[2], ap[2], w, c1);
    140     mul_add(rp[3], ap[3], w, c1);
    141     ap += 4;
    142     rp += 4;
    143     num -= 4;
    144   }
    145 
    146   while (num) {
    147     mul_add(rp[0], ap[0], w, c1);
    148     ap++;
    149     rp++;
    150     num--;
    151   }
    152 
    153   return c1;
    154 }
    155 
    156 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, size_t num,
    157                       BN_ULONG w) {
    158   BN_ULONG c1 = 0;
    159 
    160   if (num == 0) {
    161     return c1;
    162   }
    163 
    164   while (num & ~3) {
    165     mul(rp[0], ap[0], w, c1);
    166     mul(rp[1], ap[1], w, c1);
    167     mul(rp[2], ap[2], w, c1);
    168     mul(rp[3], ap[3], w, c1);
    169     ap += 4;
    170     rp += 4;
    171     num -= 4;
    172   }
    173   while (num) {
    174     mul(rp[0], ap[0], w, c1);
    175     ap++;
    176     rp++;
    177     num--;
    178   }
    179   return c1;
    180 }
    181 
    182 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, size_t n) {
    183   if (n == 0) {
    184     return;
    185   }
    186 
    187   while (n & ~3) {
    188     sqr(r[0], r[1], a[0]);
    189     sqr(r[2], r[3], a[1]);
    190     sqr(r[4], r[5], a[2]);
    191     sqr(r[6], r[7], a[3]);
    192     a += 4;
    193     r += 8;
    194     n -= 4;
    195   }
    196   while (n) {
    197     sqr(r[0], r[1], a[0]);
    198     a++;
    199     r += 2;
    200     n--;
    201   }
    202 }
    203 
    204 #ifdef BN_ULLONG
    205 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
    206                       size_t n) {
    207   BN_ULLONG ll = 0;
    208 
    209   if (n == 0) {
    210     return 0;
    211   }
    212 
    213   while (n & ~3) {
    214     ll += (BN_ULLONG)a[0] + b[0];
    215     r[0] = (BN_ULONG)ll;
    216     ll >>= BN_BITS2;
    217     ll += (BN_ULLONG)a[1] + b[1];
    218     r[1] = (BN_ULONG)ll;
    219     ll >>= BN_BITS2;
    220     ll += (BN_ULLONG)a[2] + b[2];
    221     r[2] = (BN_ULONG)ll;
    222     ll >>= BN_BITS2;
    223     ll += (BN_ULLONG)a[3] + b[3];
    224     r[3] = (BN_ULONG)ll;
    225     ll >>= BN_BITS2;
    226     a += 4;
    227     b += 4;
    228     r += 4;
    229     n -= 4;
    230   }
    231   while (n) {
    232     ll += (BN_ULLONG)a[0] + b[0];
    233     r[0] = (BN_ULONG)ll;
    234     ll >>= BN_BITS2;
    235     a++;
    236     b++;
    237     r++;
    238     n--;
    239   }
    240   return (BN_ULONG)ll;
    241 }
    242 
    243 #else  // !BN_ULLONG
    244 
    245 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
    246                       size_t n) {
    247   BN_ULONG c, l, t;
    248 
    249   if (n == 0) {
    250     return (BN_ULONG)0;
    251   }
    252 
    253   c = 0;
    254   while (n & ~3) {
    255     t = a[0];
    256     t += c;
    257     c = (t < c);
    258     l = t + b[0];
    259     c += (l < t);
    260     r[0] = l;
    261     t = a[1];
    262     t += c;
    263     c = (t < c);
    264     l = t + b[1];
    265     c += (l < t);
    266     r[1] = l;
    267     t = a[2];
    268     t += c;
    269     c = (t < c);
    270     l = t + b[2];
    271     c += (l < t);
    272     r[2] = l;
    273     t = a[3];
    274     t += c;
    275     c = (t < c);
    276     l = t + b[3];
    277     c += (l < t);
    278     r[3] = l;
    279     a += 4;
    280     b += 4;
    281     r += 4;
    282     n -= 4;
    283   }
    284   while (n) {
    285     t = a[0];
    286     t += c;
    287     c = (t < c);
    288     l = t + b[0];
    289     c += (l < t);
    290     r[0] = l;
    291     a++;
    292     b++;
    293     r++;
    294     n--;
    295   }
    296   return (BN_ULONG)c;
    297 }
    298 
    299 #endif  // !BN_ULLONG
    300 
    301 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
    302                       size_t n) {
    303   BN_ULONG t1, t2;
    304   int c = 0;
    305 
    306   if (n == 0) {
    307     return (BN_ULONG)0;
    308   }
    309 
    310   while (n & ~3) {
    311     t1 = a[0];
    312     t2 = b[0];
    313     r[0] = t1 - t2 - c;
    314     if (t1 != t2) {
    315       c = (t1 < t2);
    316     }
    317     t1 = a[1];
    318     t2 = b[1];
    319     r[1] = t1 - t2 - c;
    320     if (t1 != t2) {
    321       c = (t1 < t2);
    322     }
    323     t1 = a[2];
    324     t2 = b[2];
    325     r[2] = t1 - t2 - c;
    326     if (t1 != t2) {
    327       c = (t1 < t2);
    328     }
    329     t1 = a[3];
    330     t2 = b[3];
    331     r[3] = t1 - t2 - c;
    332     if (t1 != t2) {
    333       c = (t1 < t2);
    334     }
    335     a += 4;
    336     b += 4;
    337     r += 4;
    338     n -= 4;
    339   }
    340   while (n) {
    341     t1 = a[0];
    342     t2 = b[0];
    343     r[0] = t1 - t2 - c;
    344     if (t1 != t2) {
    345       c = (t1 < t2);
    346     }
    347     a++;
    348     b++;
    349     r++;
    350     n--;
    351   }
    352   return c;
    353 }
    354 
    355 // mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0)
    356 // mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0)
    357 // sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0)
    358 // sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0)
    359 
    360 #ifdef BN_ULLONG
    361 
    362 // Keep in mind that additions to multiplication result can not overflow,
    363 // because its high half cannot be all-ones.
    364 #define mul_add_c(a, b, c0, c1, c2)     \
    365   do {                                  \
    366     BN_ULONG hi;                        \
    367     BN_ULLONG t = (BN_ULLONG)(a) * (b); \
    368     t += (c0); /* no carry */           \
    369     (c0) = (BN_ULONG)Lw(t);             \
    370     hi = (BN_ULONG)Hw(t);               \
    371     (c1) += (hi);                       \
    372     if ((c1) < hi) {                    \
    373       (c2)++;                           \
    374     }                                   \
    375   } while (0)
    376 
    377 #define mul_add_c2(a, b, c0, c1, c2)        \
    378   do {                                      \
    379     BN_ULONG hi;                            \
    380     BN_ULLONG t = (BN_ULLONG)(a) * (b);     \
    381     BN_ULLONG tt = t + (c0); /* no carry */ \
    382     (c0) = (BN_ULONG)Lw(tt);                \
    383     hi = (BN_ULONG)Hw(tt);                  \
    384     (c1) += hi;                             \
    385     if ((c1) < hi) {                        \
    386       (c2)++;                               \
    387     }                                       \
    388     t += (c0); /* no carry */               \
    389     (c0) = (BN_ULONG)Lw(t);                 \
    390     hi = (BN_ULONG)Hw(t);                   \
    391     (c1) += hi;                             \
    392     if ((c1) < hi) {                        \
    393       (c2)++;                               \
    394     }                                       \
    395   } while (0)
    396 
    397 #define sqr_add_c(a, i, c0, c1, c2)           \
    398   do {                                        \
    399     BN_ULONG hi;                              \
    400     BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
    401     t += (c0); /* no carry */                 \
    402     (c0) = (BN_ULONG)Lw(t);                   \
    403     hi = (BN_ULONG)Hw(t);                     \
    404     (c1) += hi;                               \
    405     if ((c1) < hi) {                          \
    406       (c2)++;                                 \
    407     }                                         \
    408   } while (0)
    409 
    410 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
    411 
    412 #else
    413 
    414 // Keep in mind that additions to hi can not overflow, because the high word of
    415 // a multiplication result cannot be all-ones.
    416 #define mul_add_c(a, b, c0, c1, c2) \
    417   do {                              \
    418     BN_ULONG ta = (a), tb = (b);    \
    419     BN_ULONG lo, hi;                \
    420     BN_UMULT_LOHI(lo, hi, ta, tb);  \
    421     (c0) += lo;                     \
    422     hi += ((c0) < lo) ? 1 : 0;      \
    423     (c1) += hi;                     \
    424     (c2) += ((c1) < hi) ? 1 : 0;    \
    425   } while (0)
    426 
    427 #define mul_add_c2(a, b, c0, c1, c2) \
    428   do {                               \
    429     BN_ULONG ta = (a), tb = (b);     \
    430     BN_ULONG lo, hi, tt;             \
    431     BN_UMULT_LOHI(lo, hi, ta, tb);   \
    432     (c0) += lo;                      \
    433     tt = hi + (((c0) < lo) ? 1 : 0); \
    434     (c1) += tt;                      \
    435     (c2) += ((c1) < tt) ? 1 : 0;     \
    436     (c0) += lo;                      \
    437     hi += (c0 < lo) ? 1 : 0;         \
    438     (c1) += hi;                      \
    439     (c2) += ((c1) < hi) ? 1 : 0;     \
    440   } while (0)
    441 
    442 #define sqr_add_c(a, i, c0, c1, c2) \
    443   do {                              \
    444     BN_ULONG ta = (a)[i];           \
    445     BN_ULONG lo, hi;                \
    446     BN_UMULT_LOHI(lo, hi, ta, ta);  \
    447     (c0) += lo;                     \
    448     hi += (c0 < lo) ? 1 : 0;        \
    449     (c1) += hi;                     \
    450     (c2) += ((c1) < hi) ? 1 : 0;    \
    451   } while (0)
    452 
    453 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
    454 
    455 #endif  // !BN_ULLONG
    456 
    457 void bn_mul_comba8(BN_ULONG r[16], const BN_ULONG a[8], const BN_ULONG b[8]) {
    458   BN_ULONG c1, c2, c3;
    459 
    460   c1 = 0;
    461   c2 = 0;
    462   c3 = 0;
    463   mul_add_c(a[0], b[0], c1, c2, c3);
    464   r[0] = c1;
    465   c1 = 0;
    466   mul_add_c(a[0], b[1], c2, c3, c1);
    467   mul_add_c(a[1], b[0], c2, c3, c1);
    468   r[1] = c2;
    469   c2 = 0;
    470   mul_add_c(a[2], b[0], c3, c1, c2);
    471   mul_add_c(a[1], b[1], c3, c1, c2);
    472   mul_add_c(a[0], b[2], c3, c1, c2);
    473   r[2] = c3;
    474   c3 = 0;
    475   mul_add_c(a[0], b[3], c1, c2, c3);
    476   mul_add_c(a[1], b[2], c1, c2, c3);
    477   mul_add_c(a[2], b[1], c1, c2, c3);
    478   mul_add_c(a[3], b[0], c1, c2, c3);
    479   r[3] = c1;
    480   c1 = 0;
    481   mul_add_c(a[4], b[0], c2, c3, c1);
    482   mul_add_c(a[3], b[1], c2, c3, c1);
    483   mul_add_c(a[2], b[2], c2, c3, c1);
    484   mul_add_c(a[1], b[3], c2, c3, c1);
    485   mul_add_c(a[0], b[4], c2, c3, c1);
    486   r[4] = c2;
    487   c2 = 0;
    488   mul_add_c(a[0], b[5], c3, c1, c2);
    489   mul_add_c(a[1], b[4], c3, c1, c2);
    490   mul_add_c(a[2], b[3], c3, c1, c2);
    491   mul_add_c(a[3], b[2], c3, c1, c2);
    492   mul_add_c(a[4], b[1], c3, c1, c2);
    493   mul_add_c(a[5], b[0], c3, c1, c2);
    494   r[5] = c3;
    495   c3 = 0;
    496   mul_add_c(a[6], b[0], c1, c2, c3);
    497   mul_add_c(a[5], b[1], c1, c2, c3);
    498   mul_add_c(a[4], b[2], c1, c2, c3);
    499   mul_add_c(a[3], b[3], c1, c2, c3);
    500   mul_add_c(a[2], b[4], c1, c2, c3);
    501   mul_add_c(a[1], b[5], c1, c2, c3);
    502   mul_add_c(a[0], b[6], c1, c2, c3);
    503   r[6] = c1;
    504   c1 = 0;
    505   mul_add_c(a[0], b[7], c2, c3, c1);
    506   mul_add_c(a[1], b[6], c2, c3, c1);
    507   mul_add_c(a[2], b[5], c2, c3, c1);
    508   mul_add_c(a[3], b[4], c2, c3, c1);
    509   mul_add_c(a[4], b[3], c2, c3, c1);
    510   mul_add_c(a[5], b[2], c2, c3, c1);
    511   mul_add_c(a[6], b[1], c2, c3, c1);
    512   mul_add_c(a[7], b[0], c2, c3, c1);
    513   r[7] = c2;
    514   c2 = 0;
    515   mul_add_c(a[7], b[1], c3, c1, c2);
    516   mul_add_c(a[6], b[2], c3, c1, c2);
    517   mul_add_c(a[5], b[3], c3, c1, c2);
    518   mul_add_c(a[4], b[4], c3, c1, c2);
    519   mul_add_c(a[3], b[5], c3, c1, c2);
    520   mul_add_c(a[2], b[6], c3, c1, c2);
    521   mul_add_c(a[1], b[7], c3, c1, c2);
    522   r[8] = c3;
    523   c3 = 0;
    524   mul_add_c(a[2], b[7], c1, c2, c3);
    525   mul_add_c(a[3], b[6], c1, c2, c3);
    526   mul_add_c(a[4], b[5], c1, c2, c3);
    527   mul_add_c(a[5], b[4], c1, c2, c3);
    528   mul_add_c(a[6], b[3], c1, c2, c3);
    529   mul_add_c(a[7], b[2], c1, c2, c3);
    530   r[9] = c1;
    531   c1 = 0;
    532   mul_add_c(a[7], b[3], c2, c3, c1);
    533   mul_add_c(a[6], b[4], c2, c3, c1);
    534   mul_add_c(a[5], b[5], c2, c3, c1);
    535   mul_add_c(a[4], b[6], c2, c3, c1);
    536   mul_add_c(a[3], b[7], c2, c3, c1);
    537   r[10] = c2;
    538   c2 = 0;
    539   mul_add_c(a[4], b[7], c3, c1, c2);
    540   mul_add_c(a[5], b[6], c3, c1, c2);
    541   mul_add_c(a[6], b[5], c3, c1, c2);
    542   mul_add_c(a[7], b[4], c3, c1, c2);
    543   r[11] = c3;
    544   c3 = 0;
    545   mul_add_c(a[7], b[5], c1, c2, c3);
    546   mul_add_c(a[6], b[6], c1, c2, c3);
    547   mul_add_c(a[5], b[7], c1, c2, c3);
    548   r[12] = c1;
    549   c1 = 0;
    550   mul_add_c(a[6], b[7], c2, c3, c1);
    551   mul_add_c(a[7], b[6], c2, c3, c1);
    552   r[13] = c2;
    553   c2 = 0;
    554   mul_add_c(a[7], b[7], c3, c1, c2);
    555   r[14] = c3;
    556   r[15] = c1;
    557 }
    558 
    559 void bn_mul_comba4(BN_ULONG r[8], const BN_ULONG a[4], const BN_ULONG b[4]) {
    560   BN_ULONG c1, c2, c3;
    561 
    562   c1 = 0;
    563   c2 = 0;
    564   c3 = 0;
    565   mul_add_c(a[0], b[0], c1, c2, c3);
    566   r[0] = c1;
    567   c1 = 0;
    568   mul_add_c(a[0], b[1], c2, c3, c1);
    569   mul_add_c(a[1], b[0], c2, c3, c1);
    570   r[1] = c2;
    571   c2 = 0;
    572   mul_add_c(a[2], b[0], c3, c1, c2);
    573   mul_add_c(a[1], b[1], c3, c1, c2);
    574   mul_add_c(a[0], b[2], c3, c1, c2);
    575   r[2] = c3;
    576   c3 = 0;
    577   mul_add_c(a[0], b[3], c1, c2, c3);
    578   mul_add_c(a[1], b[2], c1, c2, c3);
    579   mul_add_c(a[2], b[1], c1, c2, c3);
    580   mul_add_c(a[3], b[0], c1, c2, c3);
    581   r[3] = c1;
    582   c1 = 0;
    583   mul_add_c(a[3], b[1], c2, c3, c1);
    584   mul_add_c(a[2], b[2], c2, c3, c1);
    585   mul_add_c(a[1], b[3], c2, c3, c1);
    586   r[4] = c2;
    587   c2 = 0;
    588   mul_add_c(a[2], b[3], c3, c1, c2);
    589   mul_add_c(a[3], b[2], c3, c1, c2);
    590   r[5] = c3;
    591   c3 = 0;
    592   mul_add_c(a[3], b[3], c1, c2, c3);
    593   r[6] = c1;
    594   r[7] = c2;
    595 }
    596 
    597 void bn_sqr_comba8(BN_ULONG r[16], const BN_ULONG a[8]) {
    598   BN_ULONG c1, c2, c3;
    599 
    600   c1 = 0;
    601   c2 = 0;
    602   c3 = 0;
    603   sqr_add_c(a, 0, c1, c2, c3);
    604   r[0] = c1;
    605   c1 = 0;
    606   sqr_add_c2(a, 1, 0, c2, c3, c1);
    607   r[1] = c2;
    608   c2 = 0;
    609   sqr_add_c(a, 1, c3, c1, c2);
    610   sqr_add_c2(a, 2, 0, c3, c1, c2);
    611   r[2] = c3;
    612   c3 = 0;
    613   sqr_add_c2(a, 3, 0, c1, c2, c3);
    614   sqr_add_c2(a, 2, 1, c1, c2, c3);
    615   r[3] = c1;
    616   c1 = 0;
    617   sqr_add_c(a, 2, c2, c3, c1);
    618   sqr_add_c2(a, 3, 1, c2, c3, c1);
    619   sqr_add_c2(a, 4, 0, c2, c3, c1);
    620   r[4] = c2;
    621   c2 = 0;
    622   sqr_add_c2(a, 5, 0, c3, c1, c2);
    623   sqr_add_c2(a, 4, 1, c3, c1, c2);
    624   sqr_add_c2(a, 3, 2, c3, c1, c2);
    625   r[5] = c3;
    626   c3 = 0;
    627   sqr_add_c(a, 3, c1, c2, c3);
    628   sqr_add_c2(a, 4, 2, c1, c2, c3);
    629   sqr_add_c2(a, 5, 1, c1, c2, c3);
    630   sqr_add_c2(a, 6, 0, c1, c2, c3);
    631   r[6] = c1;
    632   c1 = 0;
    633   sqr_add_c2(a, 7, 0, c2, c3, c1);
    634   sqr_add_c2(a, 6, 1, c2, c3, c1);
    635   sqr_add_c2(a, 5, 2, c2, c3, c1);
    636   sqr_add_c2(a, 4, 3, c2, c3, c1);
    637   r[7] = c2;
    638   c2 = 0;
    639   sqr_add_c(a, 4, c3, c1, c2);
    640   sqr_add_c2(a, 5, 3, c3, c1, c2);
    641   sqr_add_c2(a, 6, 2, c3, c1, c2);
    642   sqr_add_c2(a, 7, 1, c3, c1, c2);
    643   r[8] = c3;
    644   c3 = 0;
    645   sqr_add_c2(a, 7, 2, c1, c2, c3);
    646   sqr_add_c2(a, 6, 3, c1, c2, c3);
    647   sqr_add_c2(a, 5, 4, c1, c2, c3);
    648   r[9] = c1;
    649   c1 = 0;
    650   sqr_add_c(a, 5, c2, c3, c1);
    651   sqr_add_c2(a, 6, 4, c2, c3, c1);
    652   sqr_add_c2(a, 7, 3, c2, c3, c1);
    653   r[10] = c2;
    654   c2 = 0;
    655   sqr_add_c2(a, 7, 4, c3, c1, c2);
    656   sqr_add_c2(a, 6, 5, c3, c1, c2);
    657   r[11] = c3;
    658   c3 = 0;
    659   sqr_add_c(a, 6, c1, c2, c3);
    660   sqr_add_c2(a, 7, 5, c1, c2, c3);
    661   r[12] = c1;
    662   c1 = 0;
    663   sqr_add_c2(a, 7, 6, c2, c3, c1);
    664   r[13] = c2;
    665   c2 = 0;
    666   sqr_add_c(a, 7, c3, c1, c2);
    667   r[14] = c3;
    668   r[15] = c1;
    669 }
    670 
    671 void bn_sqr_comba4(BN_ULONG r[8], const BN_ULONG a[4]) {
    672   BN_ULONG c1, c2, c3;
    673 
    674   c1 = 0;
    675   c2 = 0;
    676   c3 = 0;
    677   sqr_add_c(a, 0, c1, c2, c3);
    678   r[0] = c1;
    679   c1 = 0;
    680   sqr_add_c2(a, 1, 0, c2, c3, c1);
    681   r[1] = c2;
    682   c2 = 0;
    683   sqr_add_c(a, 1, c3, c1, c2);
    684   sqr_add_c2(a, 2, 0, c3, c1, c2);
    685   r[2] = c3;
    686   c3 = 0;
    687   sqr_add_c2(a, 3, 0, c1, c2, c3);
    688   sqr_add_c2(a, 2, 1, c1, c2, c3);
    689   r[3] = c1;
    690   c1 = 0;
    691   sqr_add_c(a, 2, c2, c3, c1);
    692   sqr_add_c2(a, 3, 1, c2, c3, c1);
    693   r[4] = c2;
    694   c2 = 0;
    695   sqr_add_c2(a, 3, 2, c3, c1, c2);
    696   r[5] = c3;
    697   c3 = 0;
    698   sqr_add_c(a, 3, c1, c2, c3);
    699   r[6] = c1;
    700   r[7] = c2;
    701 }
    702 
    703 #undef mul_add
    704 #undef mul
    705 #undef sqr
    706 #undef mul_add_c
    707 #undef mul_add_c2
    708 #undef sqr_add_c
    709 #undef sqr_add_c2
    710 
    711 #endif
    712