Home | History | Annotate | Download | only in bn
      1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      2  * All rights reserved.
      3  *
      4  * This package is an SSL implementation written
      5  * by Eric Young (eay (at) cryptsoft.com).
      6  * The implementation was written so as to conform with Netscapes SSL.
      7  *
      8  * This library is free for commercial and non-commercial use as long as
      9  * the following conditions are aheared to.  The following conditions
     10  * apply to all code found in this distribution, be it the RC4, RSA,
     11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     12  * included with this distribution is covered by the same copyright terms
     13  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     14  *
     15  * Copyright remains Eric Young's, and as such any Copyright notices in
     16  * the code are not to be removed.
     17  * If this package is used in a product, Eric Young should be given attribution
     18  * as the author of the parts of the library used.
     19  * This can be in the form of a textual message at program startup or
     20  * in documentation (online or textual) provided with the package.
     21  *
     22  * Redistribution and use in source and binary forms, with or without
     23  * modification, are permitted provided that the following conditions
     24  * are met:
     25  * 1. Redistributions of source code must retain the copyright
     26  *    notice, this list of conditions and the following disclaimer.
     27  * 2. Redistributions in binary form must reproduce the above copyright
     28  *    notice, this list of conditions and the following disclaimer in the
     29  *    documentation and/or other materials provided with the distribution.
     30  * 3. All advertising materials mentioning features or use of this software
     31  *    must display the following acknowledgement:
     32  *    "This product includes cryptographic software written by
     33  *     Eric Young (eay (at) cryptsoft.com)"
     34  *    The word 'cryptographic' can be left out if the rouines from the library
     35  *    being used are not cryptographic related :-).
     36  * 4. If you include any Windows specific code (or a derivative thereof) from
     37  *    the apps directory (application code) you must include an acknowledgement:
     38  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     39  *
     40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     50  * SUCH DAMAGE.
     51  *
     52  * The licence and distribution terms for any publically available version or
     53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     54  * copied and put under another distribution licence
     55  * [including the GNU Public Licence.] */
     56 
     57 #include <openssl/bn.h>
     58 
     59 #include <assert.h>
     60 
     61 #include "internal.h"
     62 
     63 
     64 /* This file has two other implementations: x86 assembly language in
     65  * asm/bn-586.pl and x86_64 inline assembly in asm/x86_64-gcc.c. */
     66 #if defined(OPENSSL_NO_ASM) || \
     67     !(defined(OPENSSL_X86) || (defined(OPENSSL_X86_64) && defined(__GNUC__)))
     68 
     69 #ifdef BN_ULLONG
     70 #define mul_add(r, a, w, c)               \
     71   do {                                    \
     72     BN_ULLONG t;                          \
     73     t = (BN_ULLONG)(w) * (a) + (r) + (c); \
     74     (r) = Lw(t);                          \
     75     (c) = Hw(t);                          \
     76   } while (0)
     77 
     78 #define mul(r, a, w, c)             \
     79   do {                              \
     80     BN_ULLONG t;                    \
     81     t = (BN_ULLONG)(w) * (a) + (c); \
     82     (r) = Lw(t);                    \
     83     (c) = Hw(t);                    \
     84   } while (0)
     85 
     86 #define sqr(r0, r1, a)        \
     87   do {                        \
     88     BN_ULLONG t;              \
     89     t = (BN_ULLONG)(a) * (a); \
     90     (r0) = Lw(t);             \
     91     (r1) = Hw(t);             \
     92   } while (0)
     93 
     94 #else
     95 
     96 #define mul_add(r, a, w, c)             \
     97   do {                                  \
     98     BN_ULONG high, low, ret, tmp = (a); \
     99     ret = (r);                          \
    100     BN_UMULT_LOHI(low, high, w, tmp);   \
    101     ret += (c);                         \
    102     (c) = (ret < (c)) ? 1 : 0;          \
    103     (c) += high;                        \
    104     ret += low;                         \
    105     (c) += (ret < low) ? 1 : 0;         \
    106     (r) = ret;                          \
    107   } while (0)
    108 
    109 #define mul(r, a, w, c)                \
    110   do {                                 \
    111     BN_ULONG high, low, ret, ta = (a); \
    112     BN_UMULT_LOHI(low, high, w, ta);   \
    113     ret = low + (c);                   \
    114     (c) = high;                        \
    115     (c) += (ret < low) ? 1 : 0;        \
    116     (r) = ret;                         \
    117   } while (0)
    118 
    119 #define sqr(r0, r1, a)               \
    120   do {                               \
    121     BN_ULONG tmp = (a);              \
    122     BN_UMULT_LOHI(r0, r1, tmp, tmp); \
    123   } while (0)
    124 
    125 #endif /* !BN_ULLONG */
    126 
    127 BN_ULONG bn_mul_add_words(BN_ULONG *rp, const BN_ULONG *ap, int num,
    128                           BN_ULONG w) {
    129   BN_ULONG c1 = 0;
    130 
    131   assert(num >= 0);
    132   if (num <= 0) {
    133     return c1;
    134   }
    135 
    136   while (num & ~3) {
    137     mul_add(rp[0], ap[0], w, c1);
    138     mul_add(rp[1], ap[1], w, c1);
    139     mul_add(rp[2], ap[2], w, c1);
    140     mul_add(rp[3], ap[3], w, c1);
    141     ap += 4;
    142     rp += 4;
    143     num -= 4;
    144   }
    145 
    146   while (num) {
    147     mul_add(rp[0], ap[0], w, c1);
    148     ap++;
    149     rp++;
    150     num--;
    151   }
    152 
    153   return c1;
    154 }
    155 
    156 BN_ULONG bn_mul_words(BN_ULONG *rp, const BN_ULONG *ap, int num, BN_ULONG w) {
    157   BN_ULONG c1 = 0;
    158 
    159   assert(num >= 0);
    160   if (num <= 0) {
    161     return c1;
    162   }
    163 
    164   while (num & ~3) {
    165     mul(rp[0], ap[0], w, c1);
    166     mul(rp[1], ap[1], w, c1);
    167     mul(rp[2], ap[2], w, c1);
    168     mul(rp[3], ap[3], w, c1);
    169     ap += 4;
    170     rp += 4;
    171     num -= 4;
    172   }
    173   while (num) {
    174     mul(rp[0], ap[0], w, c1);
    175     ap++;
    176     rp++;
    177     num--;
    178   }
    179   return c1;
    180 }
    181 
    182 void bn_sqr_words(BN_ULONG *r, const BN_ULONG *a, int n) {
    183   assert(n >= 0);
    184   if (n <= 0) {
    185     return;
    186   }
    187 
    188   while (n & ~3) {
    189     sqr(r[0], r[1], a[0]);
    190     sqr(r[2], r[3], a[1]);
    191     sqr(r[4], r[5], a[2]);
    192     sqr(r[6], r[7], a[3]);
    193     a += 4;
    194     r += 8;
    195     n -= 4;
    196   }
    197   while (n) {
    198     sqr(r[0], r[1], a[0]);
    199     a++;
    200     r += 2;
    201     n--;
    202   }
    203 }
    204 
    205 #ifdef BN_ULLONG
    206 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
    207                       int n) {
    208   BN_ULLONG ll = 0;
    209 
    210   assert(n >= 0);
    211   if (n <= 0) {
    212     return (BN_ULONG)0;
    213   }
    214 
    215   while (n & ~3) {
    216     ll += (BN_ULLONG)a[0] + b[0];
    217     r[0] = (BN_ULONG)ll & BN_MASK2;
    218     ll >>= BN_BITS2;
    219     ll += (BN_ULLONG)a[1] + b[1];
    220     r[1] = (BN_ULONG)ll & BN_MASK2;
    221     ll >>= BN_BITS2;
    222     ll += (BN_ULLONG)a[2] + b[2];
    223     r[2] = (BN_ULONG)ll & BN_MASK2;
    224     ll >>= BN_BITS2;
    225     ll += (BN_ULLONG)a[3] + b[3];
    226     r[3] = (BN_ULONG)ll & BN_MASK2;
    227     ll >>= BN_BITS2;
    228     a += 4;
    229     b += 4;
    230     r += 4;
    231     n -= 4;
    232   }
    233   while (n) {
    234     ll += (BN_ULLONG)a[0] + b[0];
    235     r[0] = (BN_ULONG)ll & BN_MASK2;
    236     ll >>= BN_BITS2;
    237     a++;
    238     b++;
    239     r++;
    240     n--;
    241   }
    242   return (BN_ULONG)ll;
    243 }
    244 
    245 #else /* !BN_ULLONG */
    246 
    247 BN_ULONG bn_add_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
    248                       int n) {
    249   BN_ULONG c, l, t;
    250 
    251   assert(n >= 0);
    252   if (n <= 0) {
    253     return (BN_ULONG)0;
    254   }
    255 
    256   c = 0;
    257   while (n & ~3) {
    258     t = a[0];
    259     t = (t + c) & BN_MASK2;
    260     c = (t < c);
    261     l = (t + b[0]) & BN_MASK2;
    262     c += (l < t);
    263     r[0] = l;
    264     t = a[1];
    265     t = (t + c) & BN_MASK2;
    266     c = (t < c);
    267     l = (t + b[1]) & BN_MASK2;
    268     c += (l < t);
    269     r[1] = l;
    270     t = a[2];
    271     t = (t + c) & BN_MASK2;
    272     c = (t < c);
    273     l = (t + b[2]) & BN_MASK2;
    274     c += (l < t);
    275     r[2] = l;
    276     t = a[3];
    277     t = (t + c) & BN_MASK2;
    278     c = (t < c);
    279     l = (t + b[3]) & BN_MASK2;
    280     c += (l < t);
    281     r[3] = l;
    282     a += 4;
    283     b += 4;
    284     r += 4;
    285     n -= 4;
    286   }
    287   while (n) {
    288     t = a[0];
    289     t = (t + c) & BN_MASK2;
    290     c = (t < c);
    291     l = (t + b[0]) & BN_MASK2;
    292     c += (l < t);
    293     r[0] = l;
    294     a++;
    295     b++;
    296     r++;
    297     n--;
    298   }
    299   return (BN_ULONG)c;
    300 }
    301 
    302 #endif /* !BN_ULLONG */
    303 
    304 BN_ULONG bn_sub_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
    305                       int n) {
    306   BN_ULONG t1, t2;
    307   int c = 0;
    308 
    309   assert(n >= 0);
    310   if (n <= 0) {
    311     return (BN_ULONG)0;
    312   }
    313 
    314   while (n & ~3) {
    315     t1 = a[0];
    316     t2 = b[0];
    317     r[0] = (t1 - t2 - c) & BN_MASK2;
    318     if (t1 != t2) {
    319       c = (t1 < t2);
    320     }
    321     t1 = a[1];
    322     t2 = b[1];
    323     r[1] = (t1 - t2 - c) & BN_MASK2;
    324     if (t1 != t2) {
    325       c = (t1 < t2);
    326     }
    327     t1 = a[2];
    328     t2 = b[2];
    329     r[2] = (t1 - t2 - c) & BN_MASK2;
    330     if (t1 != t2) {
    331       c = (t1 < t2);
    332     }
    333     t1 = a[3];
    334     t2 = b[3];
    335     r[3] = (t1 - t2 - c) & BN_MASK2;
    336     if (t1 != t2) {
    337       c = (t1 < t2);
    338     }
    339     a += 4;
    340     b += 4;
    341     r += 4;
    342     n -= 4;
    343   }
    344   while (n) {
    345     t1 = a[0];
    346     t2 = b[0];
    347     r[0] = (t1 - t2 - c) & BN_MASK2;
    348     if (t1 != t2) {
    349       c = (t1 < t2);
    350     }
    351     a++;
    352     b++;
    353     r++;
    354     n--;
    355   }
    356   return c;
    357 }
    358 
    359 /* mul_add_c(a,b,c0,c1,c2)  -- c+=a*b for three word number c=(c2,c1,c0) */
    360 /* mul_add_c2(a,b,c0,c1,c2) -- c+=2*a*b for three word number c=(c2,c1,c0) */
    361 /* sqr_add_c(a,i,c0,c1,c2)  -- c+=a[i]^2 for three word number c=(c2,c1,c0) */
    362 /* sqr_add_c2(a,i,c0,c1,c2) -- c+=2*a[i]*a[j] for three word number c=(c2,c1,c0) */
    363 
    364 #ifdef BN_ULLONG
    365 
    366 /* Keep in mind that additions to multiplication result can not overflow,
    367  * because its high half cannot be all-ones. */
    368 #define mul_add_c(a, b, c0, c1, c2)     \
    369   do {                                  \
    370     BN_ULONG hi;                        \
    371     BN_ULLONG t = (BN_ULLONG)(a) * (b); \
    372     t += (c0); /* no carry */           \
    373     (c0) = (BN_ULONG)Lw(t);             \
    374     hi = (BN_ULONG)Hw(t);               \
    375     (c1) = ((c1) + (hi)) & BN_MASK2;    \
    376     if ((c1) < hi) {                    \
    377       (c2)++;                           \
    378     }                                   \
    379   } while (0)
    380 
    381 #define mul_add_c2(a, b, c0, c1, c2)        \
    382   do {                                      \
    383     BN_ULONG hi;                            \
    384     BN_ULLONG t = (BN_ULLONG)(a) * (b);     \
    385     BN_ULLONG tt = t + (c0); /* no carry */ \
    386     (c0) = (BN_ULONG)Lw(tt);                \
    387     hi = (BN_ULONG)Hw(tt);                  \
    388     (c1) = ((c1) + hi) & BN_MASK2;          \
    389     if ((c1) < hi) {                        \
    390       (c2)++;                               \
    391     }                                       \
    392     t += (c0); /* no carry */               \
    393     (c0) = (BN_ULONG)Lw(t);                 \
    394     hi = (BN_ULONG)Hw(t);                   \
    395     (c1) = ((c1) + hi) & BN_MASK2;          \
    396     if ((c1) < hi) {                        \
    397       (c2)++;                               \
    398     }                                       \
    399   } while (0)
    400 
    401 #define sqr_add_c(a, i, c0, c1, c2)           \
    402   do {                                        \
    403     BN_ULONG hi;                              \
    404     BN_ULLONG t = (BN_ULLONG)(a)[i] * (a)[i]; \
    405     t += (c0); /* no carry */                 \
    406     (c0) = (BN_ULONG)Lw(t);                   \
    407     hi = (BN_ULONG)Hw(t);                     \
    408     (c1) = ((c1) + hi) & BN_MASK2;            \
    409     if ((c1) < hi) {                          \
    410       (c2)++;                                 \
    411     }                                         \
    412   } while (0)
    413 
    414 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
    415 
    416 #else
    417 
    418 /* Keep in mind that additions to hi can not overflow, because the high word of
    419  * a multiplication result cannot be all-ones. */
    420 #define mul_add_c(a, b, c0, c1, c2) \
    421   do {                              \
    422     BN_ULONG ta = (a), tb = (b);    \
    423     BN_ULONG lo, hi;                \
    424     BN_UMULT_LOHI(lo, hi, ta, tb);  \
    425     (c0) += lo;                     \
    426     hi += ((c0) < lo) ? 1 : 0;      \
    427     (c1) += hi;                     \
    428     (c2) += ((c1) < hi) ? 1 : 0;    \
    429   } while (0)
    430 
    431 #define mul_add_c2(a, b, c0, c1, c2) \
    432   do {                               \
    433     BN_ULONG ta = (a), tb = (b);     \
    434     BN_ULONG lo, hi, tt;             \
    435     BN_UMULT_LOHI(lo, hi, ta, tb);   \
    436     (c0) += lo;                      \
    437     tt = hi + (((c0) < lo) ? 1 : 0); \
    438     (c1) += tt;                      \
    439     (c2) += ((c1) < tt) ? 1 : 0;     \
    440     (c0) += lo;                      \
    441     hi += (c0 < lo) ? 1 : 0;         \
    442     (c1) += hi;                      \
    443     (c2) += ((c1) < hi) ? 1 : 0;     \
    444   } while (0)
    445 
    446 #define sqr_add_c(a, i, c0, c1, c2) \
    447   do {                              \
    448     BN_ULONG ta = (a)[i];           \
    449     BN_ULONG lo, hi;                \
    450     BN_UMULT_LOHI(lo, hi, ta, ta);  \
    451     (c0) += lo;                     \
    452     hi += (c0 < lo) ? 1 : 0;        \
    453     (c1) += hi;                     \
    454     (c2) += ((c1) < hi) ? 1 : 0;    \
    455   } while (0)
    456 
    457 #define sqr_add_c2(a, i, j, c0, c1, c2) mul_add_c2((a)[i], (a)[j], c0, c1, c2)
    458 
    459 #endif /* !BN_ULLONG */
    460 
    461 void bn_mul_comba8(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
    462   BN_ULONG c1, c2, c3;
    463 
    464   c1 = 0;
    465   c2 = 0;
    466   c3 = 0;
    467   mul_add_c(a[0], b[0], c1, c2, c3);
    468   r[0] = c1;
    469   c1 = 0;
    470   mul_add_c(a[0], b[1], c2, c3, c1);
    471   mul_add_c(a[1], b[0], c2, c3, c1);
    472   r[1] = c2;
    473   c2 = 0;
    474   mul_add_c(a[2], b[0], c3, c1, c2);
    475   mul_add_c(a[1], b[1], c3, c1, c2);
    476   mul_add_c(a[0], b[2], c3, c1, c2);
    477   r[2] = c3;
    478   c3 = 0;
    479   mul_add_c(a[0], b[3], c1, c2, c3);
    480   mul_add_c(a[1], b[2], c1, c2, c3);
    481   mul_add_c(a[2], b[1], c1, c2, c3);
    482   mul_add_c(a[3], b[0], c1, c2, c3);
    483   r[3] = c1;
    484   c1 = 0;
    485   mul_add_c(a[4], b[0], c2, c3, c1);
    486   mul_add_c(a[3], b[1], c2, c3, c1);
    487   mul_add_c(a[2], b[2], c2, c3, c1);
    488   mul_add_c(a[1], b[3], c2, c3, c1);
    489   mul_add_c(a[0], b[4], c2, c3, c1);
    490   r[4] = c2;
    491   c2 = 0;
    492   mul_add_c(a[0], b[5], c3, c1, c2);
    493   mul_add_c(a[1], b[4], c3, c1, c2);
    494   mul_add_c(a[2], b[3], c3, c1, c2);
    495   mul_add_c(a[3], b[2], c3, c1, c2);
    496   mul_add_c(a[4], b[1], c3, c1, c2);
    497   mul_add_c(a[5], b[0], c3, c1, c2);
    498   r[5] = c3;
    499   c3 = 0;
    500   mul_add_c(a[6], b[0], c1, c2, c3);
    501   mul_add_c(a[5], b[1], c1, c2, c3);
    502   mul_add_c(a[4], b[2], c1, c2, c3);
    503   mul_add_c(a[3], b[3], c1, c2, c3);
    504   mul_add_c(a[2], b[4], c1, c2, c3);
    505   mul_add_c(a[1], b[5], c1, c2, c3);
    506   mul_add_c(a[0], b[6], c1, c2, c3);
    507   r[6] = c1;
    508   c1 = 0;
    509   mul_add_c(a[0], b[7], c2, c3, c1);
    510   mul_add_c(a[1], b[6], c2, c3, c1);
    511   mul_add_c(a[2], b[5], c2, c3, c1);
    512   mul_add_c(a[3], b[4], c2, c3, c1);
    513   mul_add_c(a[4], b[3], c2, c3, c1);
    514   mul_add_c(a[5], b[2], c2, c3, c1);
    515   mul_add_c(a[6], b[1], c2, c3, c1);
    516   mul_add_c(a[7], b[0], c2, c3, c1);
    517   r[7] = c2;
    518   c2 = 0;
    519   mul_add_c(a[7], b[1], c3, c1, c2);
    520   mul_add_c(a[6], b[2], c3, c1, c2);
    521   mul_add_c(a[5], b[3], c3, c1, c2);
    522   mul_add_c(a[4], b[4], c3, c1, c2);
    523   mul_add_c(a[3], b[5], c3, c1, c2);
    524   mul_add_c(a[2], b[6], c3, c1, c2);
    525   mul_add_c(a[1], b[7], c3, c1, c2);
    526   r[8] = c3;
    527   c3 = 0;
    528   mul_add_c(a[2], b[7], c1, c2, c3);
    529   mul_add_c(a[3], b[6], c1, c2, c3);
    530   mul_add_c(a[4], b[5], c1, c2, c3);
    531   mul_add_c(a[5], b[4], c1, c2, c3);
    532   mul_add_c(a[6], b[3], c1, c2, c3);
    533   mul_add_c(a[7], b[2], c1, c2, c3);
    534   r[9] = c1;
    535   c1 = 0;
    536   mul_add_c(a[7], b[3], c2, c3, c1);
    537   mul_add_c(a[6], b[4], c2, c3, c1);
    538   mul_add_c(a[5], b[5], c2, c3, c1);
    539   mul_add_c(a[4], b[6], c2, c3, c1);
    540   mul_add_c(a[3], b[7], c2, c3, c1);
    541   r[10] = c2;
    542   c2 = 0;
    543   mul_add_c(a[4], b[7], c3, c1, c2);
    544   mul_add_c(a[5], b[6], c3, c1, c2);
    545   mul_add_c(a[6], b[5], c3, c1, c2);
    546   mul_add_c(a[7], b[4], c3, c1, c2);
    547   r[11] = c3;
    548   c3 = 0;
    549   mul_add_c(a[7], b[5], c1, c2, c3);
    550   mul_add_c(a[6], b[6], c1, c2, c3);
    551   mul_add_c(a[5], b[7], c1, c2, c3);
    552   r[12] = c1;
    553   c1 = 0;
    554   mul_add_c(a[6], b[7], c2, c3, c1);
    555   mul_add_c(a[7], b[6], c2, c3, c1);
    556   r[13] = c2;
    557   c2 = 0;
    558   mul_add_c(a[7], b[7], c3, c1, c2);
    559   r[14] = c3;
    560   r[15] = c1;
    561 }
    562 
    563 void bn_mul_comba4(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b) {
    564   BN_ULONG c1, c2, c3;
    565 
    566   c1 = 0;
    567   c2 = 0;
    568   c3 = 0;
    569   mul_add_c(a[0], b[0], c1, c2, c3);
    570   r[0] = c1;
    571   c1 = 0;
    572   mul_add_c(a[0], b[1], c2, c3, c1);
    573   mul_add_c(a[1], b[0], c2, c3, c1);
    574   r[1] = c2;
    575   c2 = 0;
    576   mul_add_c(a[2], b[0], c3, c1, c2);
    577   mul_add_c(a[1], b[1], c3, c1, c2);
    578   mul_add_c(a[0], b[2], c3, c1, c2);
    579   r[2] = c3;
    580   c3 = 0;
    581   mul_add_c(a[0], b[3], c1, c2, c3);
    582   mul_add_c(a[1], b[2], c1, c2, c3);
    583   mul_add_c(a[2], b[1], c1, c2, c3);
    584   mul_add_c(a[3], b[0], c1, c2, c3);
    585   r[3] = c1;
    586   c1 = 0;
    587   mul_add_c(a[3], b[1], c2, c3, c1);
    588   mul_add_c(a[2], b[2], c2, c3, c1);
    589   mul_add_c(a[1], b[3], c2, c3, c1);
    590   r[4] = c2;
    591   c2 = 0;
    592   mul_add_c(a[2], b[3], c3, c1, c2);
    593   mul_add_c(a[3], b[2], c3, c1, c2);
    594   r[5] = c3;
    595   c3 = 0;
    596   mul_add_c(a[3], b[3], c1, c2, c3);
    597   r[6] = c1;
    598   r[7] = c2;
    599 }
    600 
    601 void bn_sqr_comba8(BN_ULONG *r, const BN_ULONG *a) {
    602   BN_ULONG c1, c2, c3;
    603 
    604   c1 = 0;
    605   c2 = 0;
    606   c3 = 0;
    607   sqr_add_c(a, 0, c1, c2, c3);
    608   r[0] = c1;
    609   c1 = 0;
    610   sqr_add_c2(a, 1, 0, c2, c3, c1);
    611   r[1] = c2;
    612   c2 = 0;
    613   sqr_add_c(a, 1, c3, c1, c2);
    614   sqr_add_c2(a, 2, 0, c3, c1, c2);
    615   r[2] = c3;
    616   c3 = 0;
    617   sqr_add_c2(a, 3, 0, c1, c2, c3);
    618   sqr_add_c2(a, 2, 1, c1, c2, c3);
    619   r[3] = c1;
    620   c1 = 0;
    621   sqr_add_c(a, 2, c2, c3, c1);
    622   sqr_add_c2(a, 3, 1, c2, c3, c1);
    623   sqr_add_c2(a, 4, 0, c2, c3, c1);
    624   r[4] = c2;
    625   c2 = 0;
    626   sqr_add_c2(a, 5, 0, c3, c1, c2);
    627   sqr_add_c2(a, 4, 1, c3, c1, c2);
    628   sqr_add_c2(a, 3, 2, c3, c1, c2);
    629   r[5] = c3;
    630   c3 = 0;
    631   sqr_add_c(a, 3, c1, c2, c3);
    632   sqr_add_c2(a, 4, 2, c1, c2, c3);
    633   sqr_add_c2(a, 5, 1, c1, c2, c3);
    634   sqr_add_c2(a, 6, 0, c1, c2, c3);
    635   r[6] = c1;
    636   c1 = 0;
    637   sqr_add_c2(a, 7, 0, c2, c3, c1);
    638   sqr_add_c2(a, 6, 1, c2, c3, c1);
    639   sqr_add_c2(a, 5, 2, c2, c3, c1);
    640   sqr_add_c2(a, 4, 3, c2, c3, c1);
    641   r[7] = c2;
    642   c2 = 0;
    643   sqr_add_c(a, 4, c3, c1, c2);
    644   sqr_add_c2(a, 5, 3, c3, c1, c2);
    645   sqr_add_c2(a, 6, 2, c3, c1, c2);
    646   sqr_add_c2(a, 7, 1, c3, c1, c2);
    647   r[8] = c3;
    648   c3 = 0;
    649   sqr_add_c2(a, 7, 2, c1, c2, c3);
    650   sqr_add_c2(a, 6, 3, c1, c2, c3);
    651   sqr_add_c2(a, 5, 4, c1, c2, c3);
    652   r[9] = c1;
    653   c1 = 0;
    654   sqr_add_c(a, 5, c2, c3, c1);
    655   sqr_add_c2(a, 6, 4, c2, c3, c1);
    656   sqr_add_c2(a, 7, 3, c2, c3, c1);
    657   r[10] = c2;
    658   c2 = 0;
    659   sqr_add_c2(a, 7, 4, c3, c1, c2);
    660   sqr_add_c2(a, 6, 5, c3, c1, c2);
    661   r[11] = c3;
    662   c3 = 0;
    663   sqr_add_c(a, 6, c1, c2, c3);
    664   sqr_add_c2(a, 7, 5, c1, c2, c3);
    665   r[12] = c1;
    666   c1 = 0;
    667   sqr_add_c2(a, 7, 6, c2, c3, c1);
    668   r[13] = c2;
    669   c2 = 0;
    670   sqr_add_c(a, 7, c3, c1, c2);
    671   r[14] = c3;
    672   r[15] = c1;
    673 }
    674 
    675 void bn_sqr_comba4(BN_ULONG *r, const BN_ULONG *a) {
    676   BN_ULONG c1, c2, c3;
    677 
    678   c1 = 0;
    679   c2 = 0;
    680   c3 = 0;
    681   sqr_add_c(a, 0, c1, c2, c3);
    682   r[0] = c1;
    683   c1 = 0;
    684   sqr_add_c2(a, 1, 0, c2, c3, c1);
    685   r[1] = c2;
    686   c2 = 0;
    687   sqr_add_c(a, 1, c3, c1, c2);
    688   sqr_add_c2(a, 2, 0, c3, c1, c2);
    689   r[2] = c3;
    690   c3 = 0;
    691   sqr_add_c2(a, 3, 0, c1, c2, c3);
    692   sqr_add_c2(a, 2, 1, c1, c2, c3);
    693   r[3] = c1;
    694   c1 = 0;
    695   sqr_add_c(a, 2, c2, c3, c1);
    696   sqr_add_c2(a, 3, 1, c2, c3, c1);
    697   r[4] = c2;
    698   c2 = 0;
    699   sqr_add_c2(a, 3, 2, c3, c1, c2);
    700   r[5] = c3;
    701   c3 = 0;
    702   sqr_add_c(a, 3, c1, c2, c3);
    703   r[6] = c1;
    704   r[7] = c2;
    705 }
    706 
    707 #undef mul_add
    708 #undef mul
    709 #undef sqr
    710 #undef mul_add_c
    711 #undef mul_add_c2
    712 #undef sqr_add_c
    713 #undef sqr_add_c2
    714 
    715 #endif
    716