Home | History | Annotate | Download | only in bn
      1 /* Copyright (C) 1995-1998 Eric Young (eay (at) cryptsoft.com)
      2  * All rights reserved.
      3  *
      4  * This package is an SSL implementation written
      5  * by Eric Young (eay (at) cryptsoft.com).
      6  * The implementation was written so as to conform with Netscapes SSL.
      7  *
      8  * This library is free for commercial and non-commercial use as long as
      9  * the following conditions are aheared to.  The following conditions
     10  * apply to all code found in this distribution, be it the RC4, RSA,
     11  * lhash, DES, etc., code; not just the SSL code.  The SSL documentation
     12  * included with this distribution is covered by the same copyright terms
     13  * except that the holder is Tim Hudson (tjh (at) cryptsoft.com).
     14  *
     15  * Copyright remains Eric Young's, and as such any Copyright notices in
     16  * the code are not to be removed.
     17  * If this package is used in a product, Eric Young should be given attribution
     18  * as the author of the parts of the library used.
     19  * This can be in the form of a textual message at program startup or
     20  * in documentation (online or textual) provided with the package.
     21  *
     22  * Redistribution and use in source and binary forms, with or without
     23  * modification, are permitted provided that the following conditions
     24  * are met:
     25  * 1. Redistributions of source code must retain the copyright
     26  *    notice, this list of conditions and the following disclaimer.
     27  * 2. Redistributions in binary form must reproduce the above copyright
     28  *    notice, this list of conditions and the following disclaimer in the
     29  *    documentation and/or other materials provided with the distribution.
     30  * 3. All advertising materials mentioning features or use of this software
     31  *    must display the following acknowledgement:
     32  *    "This product includes cryptographic software written by
     33  *     Eric Young (eay (at) cryptsoft.com)"
     34  *    The word 'cryptographic' can be left out if the rouines from the library
     35  *    being used are not cryptographic related :-).
     36  * 4. If you include any Windows specific code (or a derivative thereof) from
     37  *    the apps directory (application code) you must include an acknowledgement:
     38  *    "This product includes software written by Tim Hudson (tjh (at) cryptsoft.com)"
     39  *
     40  * THIS SOFTWARE IS PROVIDED BY ERIC YOUNG ``AS IS'' AND
     41  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
     42  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
     43  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
     44  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
     45  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
     46  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
     47  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
     48  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
     49  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
     50  * SUCH DAMAGE.
     51  *
     52  * The licence and distribution terms for any publically available version or
     53  * derivative of this code cannot be changed.  i.e. this code cannot simply be
     54  * copied and put under another distribution licence
     55  * [including the GNU Public Licence.] */
     56 
     57 #include <openssl/bn.h>
     58 
     59 #include <assert.h>
     60 #include <string.h>
     61 
     62 #include "internal.h"
     63 
     64 
     65 void bn_mul_normal(BN_ULONG *r, BN_ULONG *a, int na, BN_ULONG *b, int nb) {
     66   BN_ULONG *rr;
     67 
     68   if (na < nb) {
     69     int itmp;
     70     BN_ULONG *ltmp;
     71 
     72     itmp = na;
     73     na = nb;
     74     nb = itmp;
     75     ltmp = a;
     76     a = b;
     77     b = ltmp;
     78   }
     79   rr = &(r[na]);
     80   if (nb <= 0) {
     81     (void)bn_mul_words(r, a, na, 0);
     82     return;
     83   } else {
     84     rr[0] = bn_mul_words(r, a, na, b[0]);
     85   }
     86 
     87   for (;;) {
     88     if (--nb <= 0) {
     89       return;
     90     }
     91     rr[1] = bn_mul_add_words(&(r[1]), a, na, b[1]);
     92     if (--nb <= 0) {
     93       return;
     94     }
     95     rr[2] = bn_mul_add_words(&(r[2]), a, na, b[2]);
     96     if (--nb <= 0) {
     97       return;
     98     }
     99     rr[3] = bn_mul_add_words(&(r[3]), a, na, b[3]);
    100     if (--nb <= 0) {
    101       return;
    102     }
    103     rr[4] = bn_mul_add_words(&(r[4]), a, na, b[4]);
    104     rr += 4;
    105     r += 4;
    106     b += 4;
    107   }
    108 }
    109 
    110 void bn_mul_low_normal(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n) {
    111   bn_mul_words(r, a, n, b[0]);
    112 
    113   for (;;) {
    114     if (--n <= 0) {
    115       return;
    116     }
    117     bn_mul_add_words(&(r[1]), a, n, b[1]);
    118     if (--n <= 0) {
    119       return;
    120     }
    121     bn_mul_add_words(&(r[2]), a, n, b[2]);
    122     if (--n <= 0) {
    123       return;
    124     }
    125     bn_mul_add_words(&(r[3]), a, n, b[3]);
    126     if (--n <= 0) {
    127       return;
    128     }
    129     bn_mul_add_words(&(r[4]), a, n, b[4]);
    130     r += 4;
    131     b += 4;
    132   }
    133 }
    134 
    135 #if !defined(OPENSSL_X86) || defined(OPENSSL_NO_ASM)
    136 /* Here follows specialised variants of bn_add_words() and bn_sub_words(). They
    137  * have the property performing operations on arrays of different sizes. The
    138  * sizes of those arrays is expressed through cl, which is the common length (
    139  * basicall, min(len(a),len(b)) ), and dl, which is the delta between the two
    140  * lengths, calculated as len(a)-len(b). All lengths are the number of
    141  * BN_ULONGs...  For the operations that require a result array as parameter,
    142  * it must have the length cl+abs(dl). These functions should probably end up
    143  * in bn_asm.c as soon as there are assembler counterparts for the systems that
    144  * use assembler files.  */
    145 
    146 static BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a,
    147                                   const BN_ULONG *b, int cl, int dl) {
    148   BN_ULONG c, t;
    149 
    150   assert(cl >= 0);
    151   c = bn_sub_words(r, a, b, cl);
    152 
    153   if (dl == 0) {
    154     return c;
    155   }
    156 
    157   r += cl;
    158   a += cl;
    159   b += cl;
    160 
    161   if (dl < 0) {
    162     for (;;) {
    163       t = b[0];
    164       r[0] = (0 - t - c) & BN_MASK2;
    165       if (t != 0) {
    166         c = 1;
    167       }
    168       if (++dl >= 0) {
    169         break;
    170       }
    171 
    172       t = b[1];
    173       r[1] = (0 - t - c) & BN_MASK2;
    174       if (t != 0) {
    175         c = 1;
    176       }
    177       if (++dl >= 0) {
    178         break;
    179       }
    180 
    181       t = b[2];
    182       r[2] = (0 - t - c) & BN_MASK2;
    183       if (t != 0) {
    184         c = 1;
    185       }
    186       if (++dl >= 0) {
    187         break;
    188       }
    189 
    190       t = b[3];
    191       r[3] = (0 - t - c) & BN_MASK2;
    192       if (t != 0) {
    193         c = 1;
    194       }
    195       if (++dl >= 0) {
    196         break;
    197       }
    198 
    199       b += 4;
    200       r += 4;
    201     }
    202   } else {
    203     int save_dl = dl;
    204     while (c) {
    205       t = a[0];
    206       r[0] = (t - c) & BN_MASK2;
    207       if (t != 0) {
    208         c = 0;
    209       }
    210       if (--dl <= 0) {
    211         break;
    212       }
    213 
    214       t = a[1];
    215       r[1] = (t - c) & BN_MASK2;
    216       if (t != 0) {
    217         c = 0;
    218       }
    219       if (--dl <= 0) {
    220         break;
    221       }
    222 
    223       t = a[2];
    224       r[2] = (t - c) & BN_MASK2;
    225       if (t != 0) {
    226         c = 0;
    227       }
    228       if (--dl <= 0) {
    229         break;
    230       }
    231 
    232       t = a[3];
    233       r[3] = (t - c) & BN_MASK2;
    234       if (t != 0) {
    235         c = 0;
    236       }
    237       if (--dl <= 0) {
    238         break;
    239       }
    240 
    241       save_dl = dl;
    242       a += 4;
    243       r += 4;
    244     }
    245     if (dl > 0) {
    246       if (save_dl > dl) {
    247         switch (save_dl - dl) {
    248           case 1:
    249             r[1] = a[1];
    250             if (--dl <= 0) {
    251               break;
    252             }
    253           case 2:
    254             r[2] = a[2];
    255             if (--dl <= 0) {
    256               break;
    257             }
    258           case 3:
    259             r[3] = a[3];
    260             if (--dl <= 0) {
    261               break;
    262             }
    263         }
    264         a += 4;
    265         r += 4;
    266       }
    267     }
    268 
    269     if (dl > 0) {
    270       for (;;) {
    271         r[0] = a[0];
    272         if (--dl <= 0) {
    273           break;
    274         }
    275         r[1] = a[1];
    276         if (--dl <= 0) {
    277           break;
    278         }
    279         r[2] = a[2];
    280         if (--dl <= 0) {
    281           break;
    282         }
    283         r[3] = a[3];
    284         if (--dl <= 0) {
    285           break;
    286         }
    287 
    288         a += 4;
    289         r += 4;
    290       }
    291     }
    292   }
    293 
    294   return c;
    295 }
    296 #else
    297 /* On other platforms the function is defined in asm. */
    298 BN_ULONG bn_sub_part_words(BN_ULONG *r, const BN_ULONG *a, const BN_ULONG *b,
    299                            int cl, int dl);
    300 #endif
    301 
    302 /* Karatsuba recursive multiplication algorithm
    303  * (cf. Knuth, The Art of Computer Programming, Vol. 2) */
    304 
    305 /* r is 2*n2 words in size,
    306  * a and b are both n2 words in size.
    307  * n2 must be a power of 2.
    308  * We multiply and return the result.
    309  * t must be 2*n2 words in size
    310  * We calculate
    311  * a[0]*b[0]
    312  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
    313  * a[1]*b[1]
    314  */
    315 /* dnX may not be positive, but n2/2+dnX has to be */
    316 static void bn_mul_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n2,
    317                              int dna, int dnb, BN_ULONG *t) {
    318   int n = n2 / 2, c1, c2;
    319   int tna = n + dna, tnb = n + dnb;
    320   unsigned int neg, zero;
    321   BN_ULONG ln, lo, *p;
    322 
    323   /* Only call bn_mul_comba 8 if n2 == 8 and the
    324    * two arrays are complete [steve]
    325    */
    326   if (n2 == 8 && dna == 0 && dnb == 0) {
    327     bn_mul_comba8(r, a, b);
    328     return;
    329   }
    330 
    331   /* Else do normal multiply */
    332   if (n2 < BN_MUL_RECURSIVE_SIZE_NORMAL) {
    333     bn_mul_normal(r, a, n2 + dna, b, n2 + dnb);
    334     if ((dna + dnb) < 0) {
    335       memset(&r[2 * n2 + dna + dnb], 0, sizeof(BN_ULONG) * -(dna + dnb));
    336     }
    337     return;
    338   }
    339 
    340   /* r=(a[0]-a[1])*(b[1]-b[0]) */
    341   c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
    342   c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
    343   zero = neg = 0;
    344   switch (c1 * 3 + c2) {
    345     case -4:
    346       bn_sub_part_words(t, &(a[n]), a, tna, tna - n);       /* - */
    347       bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
    348       break;
    349     case -3:
    350       zero = 1;
    351       break;
    352     case -2:
    353       bn_sub_part_words(t, &(a[n]), a, tna, tna - n);       /* - */
    354       bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
    355       neg = 1;
    356       break;
    357     case -1:
    358     case 0:
    359     case 1:
    360       zero = 1;
    361       break;
    362     case 2:
    363       bn_sub_part_words(t, a, &(a[n]), tna, n - tna);       /* + */
    364       bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
    365       neg = 1;
    366       break;
    367     case 3:
    368       zero = 1;
    369       break;
    370     case 4:
    371       bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
    372       bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
    373       break;
    374   }
    375 
    376   if (n == 4 && dna == 0 && dnb == 0) {
    377     /* XXX: bn_mul_comba4 could take extra args to do this well */
    378     if (!zero) {
    379       bn_mul_comba4(&(t[n2]), t, &(t[n]));
    380     } else {
    381       memset(&(t[n2]), 0, 8 * sizeof(BN_ULONG));
    382     }
    383 
    384     bn_mul_comba4(r, a, b);
    385     bn_mul_comba4(&(r[n2]), &(a[n]), &(b[n]));
    386   } else if (n == 8 && dna == 0 && dnb == 0) {
    387     /* XXX: bn_mul_comba8 could take extra args to do this well */
    388     if (!zero) {
    389       bn_mul_comba8(&(t[n2]), t, &(t[n]));
    390     } else {
    391       memset(&(t[n2]), 0, 16 * sizeof(BN_ULONG));
    392     }
    393 
    394     bn_mul_comba8(r, a, b);
    395     bn_mul_comba8(&(r[n2]), &(a[n]), &(b[n]));
    396   } else {
    397     p = &(t[n2 * 2]);
    398     if (!zero) {
    399       bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
    400     } else {
    401       memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
    402     }
    403     bn_mul_recursive(r, a, b, n, 0, 0, p);
    404     bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), n, dna, dnb, p);
    405   }
    406 
    407   /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
    408    * r[10] holds (a[0]*b[0])
    409    * r[32] holds (b[1]*b[1]) */
    410 
    411   c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
    412 
    413   if (neg) {
    414     /* if t[32] is negative */
    415     c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
    416   } else {
    417     /* Might have a carry */
    418     c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
    419   }
    420 
    421   /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
    422    * r[10] holds (a[0]*b[0])
    423    * r[32] holds (b[1]*b[1])
    424    * c1 holds the carry bits */
    425   c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
    426   if (c1) {
    427     p = &(r[n + n2]);
    428     lo = *p;
    429     ln = (lo + c1) & BN_MASK2;
    430     *p = ln;
    431 
    432     /* The overflow will stop before we over write
    433      * words we should not overwrite */
    434     if (ln < (BN_ULONG)c1) {
    435       do {
    436         p++;
    437         lo = *p;
    438         ln = (lo + 1) & BN_MASK2;
    439         *p = ln;
    440       } while (ln == 0);
    441     }
    442   }
    443 }
    444 
    445 /* n+tn is the word length
    446  * t needs to be n*4 is size, as does r */
    447 /* tnX may not be negative but less than n */
    448 static void bn_mul_part_recursive(BN_ULONG *r, BN_ULONG *a, BN_ULONG *b, int n,
    449                                   int tna, int tnb, BN_ULONG *t) {
    450   int i, j, n2 = n * 2;
    451   int c1, c2, neg;
    452   BN_ULONG ln, lo, *p;
    453 
    454   if (n < 8) {
    455     bn_mul_normal(r, a, n + tna, b, n + tnb);
    456     return;
    457   }
    458 
    459   /* r=(a[0]-a[1])*(b[1]-b[0]) */
    460   c1 = bn_cmp_part_words(a, &(a[n]), tna, n - tna);
    461   c2 = bn_cmp_part_words(&(b[n]), b, tnb, tnb - n);
    462   neg = 0;
    463   switch (c1 * 3 + c2) {
    464     case -4:
    465       bn_sub_part_words(t, &(a[n]), a, tna, tna - n);       /* - */
    466       bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
    467       break;
    468     case -3:
    469     /* break; */
    470     case -2:
    471       bn_sub_part_words(t, &(a[n]), a, tna, tna - n);       /* - */
    472       bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n); /* + */
    473       neg = 1;
    474       break;
    475     case -1:
    476     case 0:
    477     case 1:
    478     /* break; */
    479     case 2:
    480       bn_sub_part_words(t, a, &(a[n]), tna, n - tna);       /* + */
    481       bn_sub_part_words(&(t[n]), b, &(b[n]), tnb, n - tnb); /* - */
    482       neg = 1;
    483       break;
    484     case 3:
    485     /* break; */
    486     case 4:
    487       bn_sub_part_words(t, a, &(a[n]), tna, n - tna);
    488       bn_sub_part_words(&(t[n]), &(b[n]), b, tnb, tnb - n);
    489       break;
    490   }
    491 
    492   if (n == 8) {
    493     bn_mul_comba8(&(t[n2]), t, &(t[n]));
    494     bn_mul_comba8(r, a, b);
    495     bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
    496     memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
    497   } else {
    498     p = &(t[n2 * 2]);
    499     bn_mul_recursive(&(t[n2]), t, &(t[n]), n, 0, 0, p);
    500     bn_mul_recursive(r, a, b, n, 0, 0, p);
    501     i = n / 2;
    502     /* If there is only a bottom half to the number,
    503      * just do it */
    504     if (tna > tnb) {
    505       j = tna - i;
    506     } else {
    507       j = tnb - i;
    508     }
    509 
    510     if (j == 0) {
    511       bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p);
    512       memset(&(r[n2 + i * 2]), 0, sizeof(BN_ULONG) * (n2 - i * 2));
    513     } else if (j > 0) {
    514       /* eg, n == 16, i == 8 and tn == 11 */
    515       bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i, p);
    516       memset(&(r[n2 + tna + tnb]), 0, sizeof(BN_ULONG) * (n2 - tna - tnb));
    517     } else {
    518       /* (j < 0) eg, n == 16, i == 8 and tn == 5 */
    519       memset(&(r[n2]), 0, sizeof(BN_ULONG) * n2);
    520       if (tna < BN_MUL_RECURSIVE_SIZE_NORMAL &&
    521           tnb < BN_MUL_RECURSIVE_SIZE_NORMAL) {
    522         bn_mul_normal(&(r[n2]), &(a[n]), tna, &(b[n]), tnb);
    523       } else {
    524         for (;;) {
    525           i /= 2;
    526           /* these simplified conditions work
    527            * exclusively because difference
    528            * between tna and tnb is 1 or 0 */
    529           if (i < tna || i < tnb) {
    530             bn_mul_part_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i,
    531                                   tnb - i, p);
    532             break;
    533           } else if (i == tna || i == tnb) {
    534             bn_mul_recursive(&(r[n2]), &(a[n]), &(b[n]), i, tna - i, tnb - i,
    535                              p);
    536             break;
    537           }
    538         }
    539       }
    540     }
    541   }
    542 
    543   /* t[32] holds (a[0]-a[1])*(b[1]-b[0]), c1 is the sign
    544    * r[10] holds (a[0]*b[0])
    545    * r[32] holds (b[1]*b[1])
    546    */
    547 
    548   c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
    549 
    550   if (neg) {
    551     /* if t[32] is negative */
    552     c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
    553   } else {
    554     /* Might have a carry */
    555     c1 += (int)(bn_add_words(&(t[n2]), &(t[n2]), t, n2));
    556   }
    557 
    558   /* t[32] holds (a[0]-a[1])*(b[1]-b[0])+(a[0]*b[0])+(a[1]*b[1])
    559    * r[10] holds (a[0]*b[0])
    560    * r[32] holds (b[1]*b[1])
    561    * c1 holds the carry bits */
    562   c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
    563   if (c1) {
    564     p = &(r[n + n2]);
    565     lo = *p;
    566     ln = (lo + c1) & BN_MASK2;
    567     *p = ln;
    568 
    569     /* The overflow will stop before we over write
    570      * words we should not overwrite */
    571     if (ln < (BN_ULONG)c1) {
    572       do {
    573         p++;
    574         lo = *p;
    575         ln = (lo + 1) & BN_MASK2;
    576         *p = ln;
    577       } while (ln == 0);
    578     }
    579   }
    580 }
    581 
    582 int BN_mul(BIGNUM *r, const BIGNUM *a, const BIGNUM *b, BN_CTX *ctx) {
    583   int ret = 0;
    584   int top, al, bl;
    585   BIGNUM *rr;
    586   int i;
    587   BIGNUM *t = NULL;
    588   int j = 0, k;
    589 
    590   al = a->top;
    591   bl = b->top;
    592 
    593   if ((al == 0) || (bl == 0)) {
    594     BN_zero(r);
    595     return 1;
    596   }
    597   top = al + bl;
    598 
    599   BN_CTX_start(ctx);
    600   if ((r == a) || (r == b)) {
    601     if ((rr = BN_CTX_get(ctx)) == NULL) {
    602       goto err;
    603     }
    604   } else {
    605     rr = r;
    606   }
    607   rr->neg = a->neg ^ b->neg;
    608 
    609   i = al - bl;
    610   if (i == 0) {
    611     if (al == 8) {
    612       if (bn_wexpand(rr, 16) == NULL) {
    613         goto err;
    614       }
    615       rr->top = 16;
    616       bn_mul_comba8(rr->d, a->d, b->d);
    617       goto end;
    618     }
    619   }
    620 
    621   if ((al >= BN_MULL_SIZE_NORMAL) && (bl >= BN_MULL_SIZE_NORMAL)) {
    622     if (i >= -1 && i <= 1) {
    623       /* Find out the power of two lower or equal
    624          to the longest of the two numbers */
    625       if (i >= 0) {
    626         j = BN_num_bits_word((BN_ULONG)al);
    627       }
    628       if (i == -1) {
    629         j = BN_num_bits_word((BN_ULONG)bl);
    630       }
    631       j = 1 << (j - 1);
    632       assert(j <= al || j <= bl);
    633       k = j + j;
    634       t = BN_CTX_get(ctx);
    635       if (t == NULL) {
    636         goto err;
    637       }
    638       if (al > j || bl > j) {
    639         if (bn_wexpand(t, k * 4) == NULL) {
    640           goto err;
    641         }
    642         if (bn_wexpand(rr, k * 4) == NULL) {
    643           goto err;
    644         }
    645         bn_mul_part_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
    646       } else {
    647         /* al <= j || bl <= j */
    648         if (bn_wexpand(t, k * 2) == NULL) {
    649           goto err;
    650         }
    651         if (bn_wexpand(rr, k * 2) == NULL) {
    652           goto err;
    653         }
    654         bn_mul_recursive(rr->d, a->d, b->d, j, al - j, bl - j, t->d);
    655       }
    656       rr->top = top;
    657       goto end;
    658     }
    659   }
    660 
    661   if (bn_wexpand(rr, top) == NULL) {
    662     goto err;
    663   }
    664   rr->top = top;
    665   bn_mul_normal(rr->d, a->d, al, b->d, bl);
    666 
    667 end:
    668   bn_correct_top(rr);
    669   if (r != rr && !BN_copy(r, rr)) {
    670     goto err;
    671   }
    672   ret = 1;
    673 
    674 err:
    675   BN_CTX_end(ctx);
    676   return ret;
    677 }
    678 
    679 /* tmp must have 2*n words */
    680 static void bn_sqr_normal(BN_ULONG *r, const BN_ULONG *a, int n, BN_ULONG *tmp) {
    681   int i, j, max;
    682   const BN_ULONG *ap;
    683   BN_ULONG *rp;
    684 
    685   max = n * 2;
    686   ap = a;
    687   rp = r;
    688   rp[0] = rp[max - 1] = 0;
    689   rp++;
    690   j = n;
    691 
    692   if (--j > 0) {
    693     ap++;
    694     rp[j] = bn_mul_words(rp, ap, j, ap[-1]);
    695     rp += 2;
    696   }
    697 
    698   for (i = n - 2; i > 0; i--) {
    699     j--;
    700     ap++;
    701     rp[j] = bn_mul_add_words(rp, ap, j, ap[-1]);
    702     rp += 2;
    703   }
    704 
    705   bn_add_words(r, r, r, max);
    706 
    707   /* There will not be a carry */
    708 
    709   bn_sqr_words(tmp, a, n);
    710 
    711   bn_add_words(r, r, tmp, max);
    712 }
    713 
    714 /* r is 2*n words in size,
    715  * a and b are both n words in size.    (There's not actually a 'b' here ...)
    716  * n must be a power of 2.
    717  * We multiply and return the result.
    718  * t must be 2*n words in size
    719  * We calculate
    720  * a[0]*b[0]
    721  * a[0]*b[0]+a[1]*b[1]+(a[0]-a[1])*(b[1]-b[0])
    722  * a[1]*b[1]
    723  */
    724 static void bn_sqr_recursive(BN_ULONG *r, const BN_ULONG *a, int n2, BN_ULONG *t) {
    725   int n = n2 / 2;
    726   int zero, c1;
    727   BN_ULONG ln, lo, *p;
    728 
    729   if (n2 == 4) {
    730     bn_sqr_comba4(r, a);
    731     return;
    732   } else if (n2 == 8) {
    733     bn_sqr_comba8(r, a);
    734     return;
    735   }
    736   if (n2 < BN_SQR_RECURSIVE_SIZE_NORMAL) {
    737     bn_sqr_normal(r, a, n2, t);
    738     return;
    739   }
    740   /* r=(a[0]-a[1])*(a[1]-a[0]) */
    741   c1 = bn_cmp_words(a, &(a[n]), n);
    742   zero = 0;
    743   if (c1 > 0) {
    744     bn_sub_words(t, a, &(a[n]), n);
    745   } else if (c1 < 0) {
    746     bn_sub_words(t, &(a[n]), a, n);
    747   } else {
    748     zero = 1;
    749   }
    750 
    751   /* The result will always be negative unless it is zero */
    752   p = &(t[n2 * 2]);
    753 
    754   if (!zero) {
    755     bn_sqr_recursive(&(t[n2]), t, n, p);
    756   } else {
    757     memset(&(t[n2]), 0, n2 * sizeof(BN_ULONG));
    758   }
    759   bn_sqr_recursive(r, a, n, p);
    760   bn_sqr_recursive(&(r[n2]), &(a[n]), n, p);
    761 
    762   /* t[32] holds (a[0]-a[1])*(a[1]-a[0]), it is negative or zero
    763    * r[10] holds (a[0]*b[0])
    764    * r[32] holds (b[1]*b[1]) */
    765 
    766   c1 = (int)(bn_add_words(t, r, &(r[n2]), n2));
    767 
    768   /* t[32] is negative */
    769   c1 -= (int)(bn_sub_words(&(t[n2]), t, &(t[n2]), n2));
    770 
    771   /* t[32] holds (a[0]-a[1])*(a[1]-a[0])+(a[0]*a[0])+(a[1]*a[1])
    772    * r[10] holds (a[0]*a[0])
    773    * r[32] holds (a[1]*a[1])
    774    * c1 holds the carry bits */
    775   c1 += (int)(bn_add_words(&(r[n]), &(r[n]), &(t[n2]), n2));
    776   if (c1) {
    777     p = &(r[n + n2]);
    778     lo = *p;
    779     ln = (lo + c1) & BN_MASK2;
    780     *p = ln;
    781 
    782     /* The overflow will stop before we over write
    783      * words we should not overwrite */
    784     if (ln < (BN_ULONG)c1) {
    785       do {
    786         p++;
    787         lo = *p;
    788         ln = (lo + 1) & BN_MASK2;
    789         *p = ln;
    790       } while (ln == 0);
    791     }
    792   }
    793 }
    794 
    795 int BN_mul_word(BIGNUM *bn, BN_ULONG w) {
    796   BN_ULONG ll;
    797 
    798   w &= BN_MASK2;
    799   if (!bn->top) {
    800     return 1;
    801   }
    802 
    803   if (w == 0) {
    804     BN_zero(bn);
    805     return 1;
    806   }
    807 
    808   ll = bn_mul_words(bn->d, bn->d, bn->top, w);
    809   if (ll) {
    810     if (bn_wexpand(bn, bn->top + 1) == NULL) {
    811       return 0;
    812     }
    813     bn->d[bn->top++] = ll;
    814   }
    815 
    816   return 1;
    817 }
    818 
    819 int BN_sqr(BIGNUM *r, const BIGNUM *a, BN_CTX *ctx) {
    820   int max, al;
    821   int ret = 0;
    822   BIGNUM *tmp, *rr;
    823 
    824   al = a->top;
    825   if (al <= 0) {
    826     r->top = 0;
    827     r->neg = 0;
    828     return 1;
    829   }
    830 
    831   BN_CTX_start(ctx);
    832   rr = (a != r) ? r : BN_CTX_get(ctx);
    833   tmp = BN_CTX_get(ctx);
    834   if (!rr || !tmp) {
    835     goto err;
    836   }
    837 
    838   max = 2 * al; /* Non-zero (from above) */
    839   if (bn_wexpand(rr, max) == NULL) {
    840     goto err;
    841   }
    842 
    843   if (al == 4) {
    844     bn_sqr_comba4(rr->d, a->d);
    845   } else if (al == 8) {
    846     bn_sqr_comba8(rr->d, a->d);
    847   } else {
    848     if (al < BN_SQR_RECURSIVE_SIZE_NORMAL) {
    849       BN_ULONG t[BN_SQR_RECURSIVE_SIZE_NORMAL * 2];
    850       bn_sqr_normal(rr->d, a->d, al, t);
    851     } else {
    852       int j, k;
    853 
    854       j = BN_num_bits_word((BN_ULONG)al);
    855       j = 1 << (j - 1);
    856       k = j + j;
    857       if (al == j) {
    858         if (bn_wexpand(tmp, k * 2) == NULL) {
    859           goto err;
    860         }
    861         bn_sqr_recursive(rr->d, a->d, al, tmp->d);
    862       } else {
    863         if (bn_wexpand(tmp, max) == NULL) {
    864           goto err;
    865         }
    866         bn_sqr_normal(rr->d, a->d, al, tmp->d);
    867       }
    868     }
    869   }
    870 
    871   rr->neg = 0;
    872   /* If the most-significant half of the top word of 'a' is zero, then
    873    * the square of 'a' will max-1 words. */
    874   if (a->d[al - 1] == (a->d[al - 1] & BN_MASK2l)) {
    875     rr->top = max - 1;
    876   } else {
    877     rr->top = max;
    878   }
    879 
    880   if (rr != r && !BN_copy(r, rr)) {
    881     goto err;
    882   }
    883   ret = 1;
    884 
    885 err:
    886   BN_CTX_end(ctx);
    887   return ret;
    888 }
    889