Home | History | Annotate | Download | only in wpa_supplicant
      1 /*
      2  * Minimal code for RSA support from LibTomMath 0.3.9
      3  * http://math.libtomcrypt.com/
      4  * http://math.libtomcrypt.com/files/ltm-0.39.tar.bz2
      5  * This library was released in public domain by Tom St Denis.
      6  *
      7  * The combination in this file is not using many of the optimized algorithms
      8  * (e.g., Montgomery reduction) and is considerable slower than the LibTomMath
      9  * with its default of SC_RSA_1 settins. The main purpose of having this
     10  * version here is to make it easier to build bignum.c wrapper without having
     11  * to install and build an external library. However, it is likely worth the
     12  * effort to use the full library with SC_RSA_1 instead of this minimized copy.
     13  * Including the optimized algorithms may increase the size requirements by
     14  * 15 kB or so (measured with x86 build).
     15  *
     16  * If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
     17  * libtommath.c file instead of using the external LibTomMath library.
     18  */
     19 
     20 #ifndef CHAR_BIT
     21 #define CHAR_BIT 8
     22 #endif
     23 
     24 #define BN_MP_INVMOD_C
     25 #define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
     26 			   * require BN_MP_EXPTMOD_FAST_C instead */
     27 #define BN_S_MP_MUL_DIGS_C
     28 #define BN_MP_INVMOD_SLOW_C
     29 #define BN_S_MP_SQR_C
     30 #define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
     31 				 * would require other than mp_reduce */
     32 
     33 
     34 /* from tommath.h */
     35 
     36 #ifndef MIN
     37    #define MIN(x,y) ((x)<(y)?(x):(y))
     38 #endif
     39 
     40 #ifndef MAX
     41    #define MAX(x,y) ((x)>(y)?(x):(y))
     42 #endif
     43 
     44 #define  OPT_CAST(x)
     45 
     46 typedef unsigned long mp_digit;
     47 typedef u64 mp_word;
     48 
     49 #define DIGIT_BIT          28
     50 #define MP_28BIT
     51 
     52 
     53 #define XMALLOC  os_malloc
     54 #define XFREE    os_free
     55 #define XREALLOC os_realloc
     56 
     57 
     58 #define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
     59 
     60 #define MP_LT        -1   /* less than */
     61 #define MP_EQ         0   /* equal to */
     62 #define MP_GT         1   /* greater than */
     63 
     64 #define MP_ZPOS       0   /* positive integer */
     65 #define MP_NEG        1   /* negative */
     66 
     67 #define MP_OKAY       0   /* ok result */
     68 #define MP_MEM        -2  /* out of mem */
     69 #define MP_VAL        -3  /* invalid input */
     70 
     71 #define MP_YES        1   /* yes response */
     72 #define MP_NO         0   /* no response */
     73 
     74 typedef int           mp_err;
     75 
     76 /* define this to use lower memory usage routines (exptmods mostly) */
     77 #define MP_LOW_MEM
     78 
     79 /* default precision */
     80 #ifndef MP_PREC
     81    #ifndef MP_LOW_MEM
     82       #define MP_PREC                 32     /* default digits of precision */
     83    #else
     84       #define MP_PREC                 8      /* default digits of precision */
     85    #endif
     86 #endif
     87 
     88 /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
     89 #define MP_WARRAY               (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
     90 
     91 /* the infamous mp_int structure */
     92 typedef struct  {
     93     int used, alloc, sign;
     94     mp_digit *dp;
     95 } mp_int;
     96 
     97 
     98 /* ---> Basic Manipulations <--- */
     99 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
    100 #define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
    101 #define mp_isodd(a)  (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
    102 
    103 
    104 /* prototypes for copied functions */
    105 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
    106 static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
    107 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
    108 static int s_mp_sqr(mp_int * a, mp_int * b);
    109 static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
    110 
    111 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
    112 
    113 static int mp_init_multi(mp_int *mp, ...);
    114 static void mp_clear_multi(mp_int *mp, ...);
    115 static int mp_lshd(mp_int * a, int b);
    116 static void mp_set(mp_int * a, mp_digit b);
    117 static void mp_clamp(mp_int * a);
    118 static void mp_exch(mp_int * a, mp_int * b);
    119 static void mp_rshd(mp_int * a, int b);
    120 static void mp_zero(mp_int * a);
    121 static int mp_mod_2d(mp_int * a, int b, mp_int * c);
    122 static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
    123 static int mp_init_copy(mp_int * a, mp_int * b);
    124 static int mp_mul_2d(mp_int * a, int b, mp_int * c);
    125 static int mp_div_2(mp_int * a, mp_int * b);
    126 static int mp_copy(mp_int * a, mp_int * b);
    127 static int mp_count_bits(mp_int * a);
    128 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
    129 static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
    130 static int mp_grow(mp_int * a, int size);
    131 static int mp_cmp_mag(mp_int * a, mp_int * b);
    132 static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
    133 static int mp_abs(mp_int * a, mp_int * b);
    134 static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
    135 static int mp_sqr(mp_int * a, mp_int * b);
    136 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
    137 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
    138 static int mp_2expt(mp_int * a, int b);
    139 static int mp_reduce_setup(mp_int * a, mp_int * b);
    140 static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
    141 static int mp_init_size(mp_int * a, int size);
    142 
    143 
    144 
    145 /* functions from bn_<func name>.c */
    146 
    147 
    148 /* reverse an array, used for radix code */
    149 static void bn_reverse (unsigned char *s, int len)
    150 {
    151   int     ix, iy;
    152   unsigned char t;
    153 
    154   ix = 0;
    155   iy = len - 1;
    156   while (ix < iy) {
    157     t     = s[ix];
    158     s[ix] = s[iy];
    159     s[iy] = t;
    160     ++ix;
    161     --iy;
    162   }
    163 }
    164 
    165 
    166 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
    167 static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
    168 {
    169   mp_int *x;
    170   int     olduse, res, min, max;
    171 
    172   /* find sizes, we let |a| <= |b| which means we have to sort
    173    * them.  "x" will point to the input with the most digits
    174    */
    175   if (a->used > b->used) {
    176     min = b->used;
    177     max = a->used;
    178     x = a;
    179   } else {
    180     min = a->used;
    181     max = b->used;
    182     x = b;
    183   }
    184 
    185   /* init result */
    186   if (c->alloc < max + 1) {
    187     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
    188       return res;
    189     }
    190   }
    191 
    192   /* get old used digit count and set new one */
    193   olduse = c->used;
    194   c->used = max + 1;
    195 
    196   {
    197     register mp_digit u, *tmpa, *tmpb, *tmpc;
    198     register int i;
    199 
    200     /* alias for digit pointers */
    201 
    202     /* first input */
    203     tmpa = a->dp;
    204 
    205     /* second input */
    206     tmpb = b->dp;
    207 
    208     /* destination */
    209     tmpc = c->dp;
    210 
    211     /* zero the carry */
    212     u = 0;
    213     for (i = 0; i < min; i++) {
    214       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
    215       *tmpc = *tmpa++ + *tmpb++ + u;
    216 
    217       /* U = carry bit of T[i] */
    218       u = *tmpc >> ((mp_digit)DIGIT_BIT);
    219 
    220       /* take away carry bit from T[i] */
    221       *tmpc++ &= MP_MASK;
    222     }
    223 
    224     /* now copy higher words if any, that is in A+B
    225      * if A or B has more digits add those in
    226      */
    227     if (min != max) {
    228       for (; i < max; i++) {
    229         /* T[i] = X[i] + U */
    230         *tmpc = x->dp[i] + u;
    231 
    232         /* U = carry bit of T[i] */
    233         u = *tmpc >> ((mp_digit)DIGIT_BIT);
    234 
    235         /* take away carry bit from T[i] */
    236         *tmpc++ &= MP_MASK;
    237       }
    238     }
    239 
    240     /* add carry */
    241     *tmpc++ = u;
    242 
    243     /* clear digits above oldused */
    244     for (i = c->used; i < olduse; i++) {
    245       *tmpc++ = 0;
    246     }
    247   }
    248 
    249   mp_clamp (c);
    250   return MP_OKAY;
    251 }
    252 
    253 
    254 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
    255 static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
    256 {
    257   int     olduse, res, min, max;
    258 
    259   /* find sizes */
    260   min = b->used;
    261   max = a->used;
    262 
    263   /* init result */
    264   if (c->alloc < max) {
    265     if ((res = mp_grow (c, max)) != MP_OKAY) {
    266       return res;
    267     }
    268   }
    269   olduse = c->used;
    270   c->used = max;
    271 
    272   {
    273     register mp_digit u, *tmpa, *tmpb, *tmpc;
    274     register int i;
    275 
    276     /* alias for digit pointers */
    277     tmpa = a->dp;
    278     tmpb = b->dp;
    279     tmpc = c->dp;
    280 
    281     /* set carry to zero */
    282     u = 0;
    283     for (i = 0; i < min; i++) {
    284       /* T[i] = A[i] - B[i] - U */
    285       *tmpc = *tmpa++ - *tmpb++ - u;
    286 
    287       /* U = carry bit of T[i]
    288        * Note this saves performing an AND operation since
    289        * if a carry does occur it will propagate all the way to the
    290        * MSB.  As a result a single shift is enough to get the carry
    291        */
    292       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
    293 
    294       /* Clear carry from T[i] */
    295       *tmpc++ &= MP_MASK;
    296     }
    297 
    298     /* now copy higher words if any, e.g. if A has more digits than B  */
    299     for (; i < max; i++) {
    300       /* T[i] = A[i] - U */
    301       *tmpc = *tmpa++ - u;
    302 
    303       /* U = carry bit of T[i] */
    304       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
    305 
    306       /* Clear carry from T[i] */
    307       *tmpc++ &= MP_MASK;
    308     }
    309 
    310     /* clear digits above used (since we may not have grown result above) */
    311     for (i = c->used; i < olduse; i++) {
    312       *tmpc++ = 0;
    313     }
    314   }
    315 
    316   mp_clamp (c);
    317   return MP_OKAY;
    318 }
    319 
    320 
    321 /* init a new mp_int */
    322 static int mp_init (mp_int * a)
    323 {
    324   int i;
    325 
    326   /* allocate memory required and clear it */
    327   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
    328   if (a->dp == NULL) {
    329     return MP_MEM;
    330   }
    331 
    332   /* set the digits to zero */
    333   for (i = 0; i < MP_PREC; i++) {
    334       a->dp[i] = 0;
    335   }
    336 
    337   /* set the used to zero, allocated digits to the default precision
    338    * and sign to positive */
    339   a->used  = 0;
    340   a->alloc = MP_PREC;
    341   a->sign  = MP_ZPOS;
    342 
    343   return MP_OKAY;
    344 }
    345 
    346 
    347 /* clear one (frees)  */
    348 static void mp_clear (mp_int * a)
    349 {
    350   int i;
    351 
    352   /* only do anything if a hasn't been freed previously */
    353   if (a->dp != NULL) {
    354     /* first zero the digits */
    355     for (i = 0; i < a->used; i++) {
    356         a->dp[i] = 0;
    357     }
    358 
    359     /* free ram */
    360     XFREE(a->dp);
    361 
    362     /* reset members to make debugging easier */
    363     a->dp    = NULL;
    364     a->alloc = a->used = 0;
    365     a->sign  = MP_ZPOS;
    366   }
    367 }
    368 
    369 
    370 /* high level addition (handles signs) */
    371 static int mp_add (mp_int * a, mp_int * b, mp_int * c)
    372 {
    373   int     sa, sb, res;
    374 
    375   /* get sign of both inputs */
    376   sa = a->sign;
    377   sb = b->sign;
    378 
    379   /* handle two cases, not four */
    380   if (sa == sb) {
    381     /* both positive or both negative */
    382     /* add their magnitudes, copy the sign */
    383     c->sign = sa;
    384     res = s_mp_add (a, b, c);
    385   } else {
    386     /* one positive, the other negative */
    387     /* subtract the one with the greater magnitude from */
    388     /* the one of the lesser magnitude.  The result gets */
    389     /* the sign of the one with the greater magnitude. */
    390     if (mp_cmp_mag (a, b) == MP_LT) {
    391       c->sign = sb;
    392       res = s_mp_sub (b, a, c);
    393     } else {
    394       c->sign = sa;
    395       res = s_mp_sub (a, b, c);
    396     }
    397   }
    398   return res;
    399 }
    400 
    401 
    402 /* high level subtraction (handles signs) */
    403 static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
    404 {
    405   int     sa, sb, res;
    406 
    407   sa = a->sign;
    408   sb = b->sign;
    409 
    410   if (sa != sb) {
    411     /* subtract a negative from a positive, OR */
    412     /* subtract a positive from a negative. */
    413     /* In either case, ADD their magnitudes, */
    414     /* and use the sign of the first number. */
    415     c->sign = sa;
    416     res = s_mp_add (a, b, c);
    417   } else {
    418     /* subtract a positive from a positive, OR */
    419     /* subtract a negative from a negative. */
    420     /* First, take the difference between their */
    421     /* magnitudes, then... */
    422     if (mp_cmp_mag (a, b) != MP_LT) {
    423       /* Copy the sign from the first */
    424       c->sign = sa;
    425       /* The first has a larger or equal magnitude */
    426       res = s_mp_sub (a, b, c);
    427     } else {
    428       /* The result has the *opposite* sign from */
    429       /* the first number. */
    430       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
    431       /* The second has a larger magnitude */
    432       res = s_mp_sub (b, a, c);
    433     }
    434   }
    435   return res;
    436 }
    437 
    438 
    439 /* high level multiplication (handles sign) */
    440 static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
    441 {
    442   int     res, neg;
    443   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
    444 
    445   /* use Toom-Cook? */
    446 #ifdef BN_MP_TOOM_MUL_C
    447   if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
    448     res = mp_toom_mul(a, b, c);
    449   } else
    450 #endif
    451 #ifdef BN_MP_KARATSUBA_MUL_C
    452   /* use Karatsuba? */
    453   if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
    454     res = mp_karatsuba_mul (a, b, c);
    455   } else
    456 #endif
    457   {
    458     /* can we use the fast multiplier?
    459      *
    460      * The fast multiplier can be used if the output will
    461      * have less than MP_WARRAY digits and the number of
    462      * digits won't affect carry propagation
    463      */
    464 #ifdef BN_FAST_S_MP_MUL_DIGS_C
    465     int     digs = a->used + b->used + 1;
    466 
    467     if ((digs < MP_WARRAY) &&
    468         MIN(a->used, b->used) <=
    469         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
    470       res = fast_s_mp_mul_digs (a, b, c, digs);
    471     } else
    472 #endif
    473 #ifdef BN_S_MP_MUL_DIGS_C
    474       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
    475 #else
    476 #error mp_mul could fail
    477       res = MP_VAL;
    478 #endif
    479 
    480   }
    481   c->sign = (c->used > 0) ? neg : MP_ZPOS;
    482   return res;
    483 }
    484 
    485 
    486 /* d = a * b (mod c) */
    487 static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
    488 {
    489   int     res;
    490   mp_int  t;
    491 
    492   if ((res = mp_init (&t)) != MP_OKAY) {
    493     return res;
    494   }
    495 
    496   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
    497     mp_clear (&t);
    498     return res;
    499   }
    500   res = mp_mod (&t, c, d);
    501   mp_clear (&t);
    502   return res;
    503 }
    504 
    505 
    506 /* c = a mod b, 0 <= c < b */
    507 static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
    508 {
    509   mp_int  t;
    510   int     res;
    511 
    512   if ((res = mp_init (&t)) != MP_OKAY) {
    513     return res;
    514   }
    515 
    516   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
    517     mp_clear (&t);
    518     return res;
    519   }
    520 
    521   if (t.sign != b->sign) {
    522     res = mp_add (b, &t, c);
    523   } else {
    524     res = MP_OKAY;
    525     mp_exch (&t, c);
    526   }
    527 
    528   mp_clear (&t);
    529   return res;
    530 }
    531 
    532 
    533 /* this is a shell function that calls either the normal or Montgomery
    534  * exptmod functions.  Originally the call to the montgomery code was
    535  * embedded in the normal function but that wasted alot of stack space
    536  * for nothing (since 99% of the time the Montgomery code would be called)
    537  */
    538 static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
    539 {
    540   int dr;
    541 
    542   /* modulus P must be positive */
    543   if (P->sign == MP_NEG) {
    544      return MP_VAL;
    545   }
    546 
    547   /* if exponent X is negative we have to recurse */
    548   if (X->sign == MP_NEG) {
    549 #ifdef BN_MP_INVMOD_C
    550      mp_int tmpG, tmpX;
    551      int err;
    552 
    553      /* first compute 1/G mod P */
    554      if ((err = mp_init(&tmpG)) != MP_OKAY) {
    555         return err;
    556      }
    557      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
    558         mp_clear(&tmpG);
    559         return err;
    560      }
    561 
    562      /* now get |X| */
    563      if ((err = mp_init(&tmpX)) != MP_OKAY) {
    564         mp_clear(&tmpG);
    565         return err;
    566      }
    567      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
    568         mp_clear_multi(&tmpG, &tmpX, NULL);
    569         return err;
    570      }
    571 
    572      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
    573      err = mp_exptmod(&tmpG, &tmpX, P, Y);
    574      mp_clear_multi(&tmpG, &tmpX, NULL);
    575      return err;
    576 #else
    577 #error mp_exptmod would always fail
    578      /* no invmod */
    579      return MP_VAL;
    580 #endif
    581   }
    582 
    583 /* modified diminished radix reduction */
    584 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
    585   if (mp_reduce_is_2k_l(P) == MP_YES) {
    586      return s_mp_exptmod(G, X, P, Y, 1);
    587   }
    588 #endif
    589 
    590 #ifdef BN_MP_DR_IS_MODULUS_C
    591   /* is it a DR modulus? */
    592   dr = mp_dr_is_modulus(P);
    593 #else
    594   /* default to no */
    595   dr = 0;
    596 #endif
    597 
    598 #ifdef BN_MP_REDUCE_IS_2K_C
    599   /* if not, is it a unrestricted DR modulus? */
    600   if (dr == 0) {
    601      dr = mp_reduce_is_2k(P) << 1;
    602   }
    603 #endif
    604 
    605   /* if the modulus is odd or dr != 0 use the montgomery method */
    606 #ifdef BN_MP_EXPTMOD_FAST_C
    607   if (mp_isodd (P) == 1 || dr !=  0) {
    608     return mp_exptmod_fast (G, X, P, Y, dr);
    609   } else {
    610 #endif
    611 #ifdef BN_S_MP_EXPTMOD_C
    612     /* otherwise use the generic Barrett reduction technique */
    613     return s_mp_exptmod (G, X, P, Y, 0);
    614 #else
    615 #error mp_exptmod could fail
    616     /* no exptmod for evens */
    617     return MP_VAL;
    618 #endif
    619 #ifdef BN_MP_EXPTMOD_FAST_C
    620   }
    621 #endif
    622 }
    623 
    624 
    625 /* compare two ints (signed)*/
    626 static int mp_cmp (mp_int * a, mp_int * b)
    627 {
    628   /* compare based on sign */
    629   if (a->sign != b->sign) {
    630      if (a->sign == MP_NEG) {
    631         return MP_LT;
    632      } else {
    633         return MP_GT;
    634      }
    635   }
    636 
    637   /* compare digits */
    638   if (a->sign == MP_NEG) {
    639      /* if negative compare opposite direction */
    640      return mp_cmp_mag(b, a);
    641   } else {
    642      return mp_cmp_mag(a, b);
    643   }
    644 }
    645 
    646 
    647 /* compare a digit */
    648 static int mp_cmp_d(mp_int * a, mp_digit b)
    649 {
    650   /* compare based on sign */
    651   if (a->sign == MP_NEG) {
    652     return MP_LT;
    653   }
    654 
    655   /* compare based on magnitude */
    656   if (a->used > 1) {
    657     return MP_GT;
    658   }
    659 
    660   /* compare the only digit of a to b */
    661   if (a->dp[0] > b) {
    662     return MP_GT;
    663   } else if (a->dp[0] < b) {
    664     return MP_LT;
    665   } else {
    666     return MP_EQ;
    667   }
    668 }
    669 
    670 
    671 /* hac 14.61, pp608 */
    672 static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
    673 {
    674   /* b cannot be negative */
    675   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
    676     return MP_VAL;
    677   }
    678 
    679 #ifdef BN_FAST_MP_INVMOD_C
    680   /* if the modulus is odd we can use a faster routine instead */
    681   if (mp_isodd (b) == 1) {
    682     return fast_mp_invmod (a, b, c);
    683   }
    684 #endif
    685 
    686 #ifdef BN_MP_INVMOD_SLOW_C
    687   return mp_invmod_slow(a, b, c);
    688 #endif
    689 
    690 #ifndef BN_FAST_MP_INVMOD_C
    691 #ifndef BN_MP_INVMOD_SLOW_C
    692 #error mp_invmod would always fail
    693 #endif
    694 #endif
    695   return MP_VAL;
    696 }
    697 
    698 
    699 /* get the size for an unsigned equivalent */
    700 static int mp_unsigned_bin_size (mp_int * a)
    701 {
    702   int     size = mp_count_bits (a);
    703   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
    704 }
    705 
    706 
    707 /* hac 14.61, pp608 */
    708 static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
    709 {
    710   mp_int  x, y, u, v, A, B, C, D;
    711   int     res;
    712 
    713   /* b cannot be negative */
    714   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
    715     return MP_VAL;
    716   }
    717 
    718   /* init temps */
    719   if ((res = mp_init_multi(&x, &y, &u, &v,
    720                            &A, &B, &C, &D, NULL)) != MP_OKAY) {
    721      return res;
    722   }
    723 
    724   /* x = a, y = b */
    725   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
    726       goto LBL_ERR;
    727   }
    728   if ((res = mp_copy (b, &y)) != MP_OKAY) {
    729     goto LBL_ERR;
    730   }
    731 
    732   /* 2. [modified] if x,y are both even then return an error! */
    733   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
    734     res = MP_VAL;
    735     goto LBL_ERR;
    736   }
    737 
    738   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
    739   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
    740     goto LBL_ERR;
    741   }
    742   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
    743     goto LBL_ERR;
    744   }
    745   mp_set (&A, 1);
    746   mp_set (&D, 1);
    747 
    748 top:
    749   /* 4.  while u is even do */
    750   while (mp_iseven (&u) == 1) {
    751     /* 4.1 u = u/2 */
    752     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
    753       goto LBL_ERR;
    754     }
    755     /* 4.2 if A or B is odd then */
    756     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
    757       /* A = (A+y)/2, B = (B-x)/2 */
    758       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
    759          goto LBL_ERR;
    760       }
    761       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
    762          goto LBL_ERR;
    763       }
    764     }
    765     /* A = A/2, B = B/2 */
    766     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
    767       goto LBL_ERR;
    768     }
    769     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
    770       goto LBL_ERR;
    771     }
    772   }
    773 
    774   /* 5.  while v is even do */
    775   while (mp_iseven (&v) == 1) {
    776     /* 5.1 v = v/2 */
    777     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
    778       goto LBL_ERR;
    779     }
    780     /* 5.2 if C or D is odd then */
    781     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
    782       /* C = (C+y)/2, D = (D-x)/2 */
    783       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
    784          goto LBL_ERR;
    785       }
    786       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
    787          goto LBL_ERR;
    788       }
    789     }
    790     /* C = C/2, D = D/2 */
    791     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
    792       goto LBL_ERR;
    793     }
    794     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
    795       goto LBL_ERR;
    796     }
    797   }
    798 
    799   /* 6.  if u >= v then */
    800   if (mp_cmp (&u, &v) != MP_LT) {
    801     /* u = u - v, A = A - C, B = B - D */
    802     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
    803       goto LBL_ERR;
    804     }
    805 
    806     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
    807       goto LBL_ERR;
    808     }
    809 
    810     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
    811       goto LBL_ERR;
    812     }
    813   } else {
    814     /* v - v - u, C = C - A, D = D - B */
    815     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
    816       goto LBL_ERR;
    817     }
    818 
    819     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
    820       goto LBL_ERR;
    821     }
    822 
    823     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
    824       goto LBL_ERR;
    825     }
    826   }
    827 
    828   /* if not zero goto step 4 */
    829   if (mp_iszero (&u) == 0)
    830     goto top;
    831 
    832   /* now a = C, b = D, gcd == g*v */
    833 
    834   /* if v != 1 then there is no inverse */
    835   if (mp_cmp_d (&v, 1) != MP_EQ) {
    836     res = MP_VAL;
    837     goto LBL_ERR;
    838   }
    839 
    840   /* if its too low */
    841   while (mp_cmp_d(&C, 0) == MP_LT) {
    842       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
    843          goto LBL_ERR;
    844       }
    845   }
    846 
    847   /* too big */
    848   while (mp_cmp_mag(&C, b) != MP_LT) {
    849       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
    850          goto LBL_ERR;
    851       }
    852   }
    853 
    854   /* C is now the inverse */
    855   mp_exch (&C, c);
    856   res = MP_OKAY;
    857 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
    858   return res;
    859 }
    860 
    861 
    862 /* compare maginitude of two ints (unsigned) */
    863 static int mp_cmp_mag (mp_int * a, mp_int * b)
    864 {
    865   int     n;
    866   mp_digit *tmpa, *tmpb;
    867 
    868   /* compare based on # of non-zero digits */
    869   if (a->used > b->used) {
    870     return MP_GT;
    871   }
    872 
    873   if (a->used < b->used) {
    874     return MP_LT;
    875   }
    876 
    877   /* alias for a */
    878   tmpa = a->dp + (a->used - 1);
    879 
    880   /* alias for b */
    881   tmpb = b->dp + (a->used - 1);
    882 
    883   /* compare based on digits  */
    884   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
    885     if (*tmpa > *tmpb) {
    886       return MP_GT;
    887     }
    888 
    889     if (*tmpa < *tmpb) {
    890       return MP_LT;
    891     }
    892   }
    893   return MP_EQ;
    894 }
    895 
    896 
    897 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
    898 static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
    899 {
    900   int     res;
    901 
    902   /* make sure there are at least two digits */
    903   if (a->alloc < 2) {
    904      if ((res = mp_grow(a, 2)) != MP_OKAY) {
    905         return res;
    906      }
    907   }
    908 
    909   /* zero the int */
    910   mp_zero (a);
    911 
    912   /* read the bytes in */
    913   while (c-- > 0) {
    914     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
    915       return res;
    916     }
    917 
    918 #ifndef MP_8BIT
    919       a->dp[0] |= *b++;
    920       a->used += 1;
    921 #else
    922       a->dp[0] = (*b & MP_MASK);
    923       a->dp[1] |= ((*b++ >> 7U) & 1);
    924       a->used += 2;
    925 #endif
    926   }
    927   mp_clamp (a);
    928   return MP_OKAY;
    929 }
    930 
    931 
    932 /* store in unsigned [big endian] format */
    933 static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
    934 {
    935   int     x, res;
    936   mp_int  t;
    937 
    938   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
    939     return res;
    940   }
    941 
    942   x = 0;
    943   while (mp_iszero (&t) == 0) {
    944 #ifndef MP_8BIT
    945       b[x++] = (unsigned char) (t.dp[0] & 255);
    946 #else
    947       b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
    948 #endif
    949     if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
    950       mp_clear (&t);
    951       return res;
    952     }
    953   }
    954   bn_reverse (b, x);
    955   mp_clear (&t);
    956   return MP_OKAY;
    957 }
    958 
    959 
    960 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
    961 static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
    962 {
    963   mp_digit D, r, rr;
    964   int     x, res;
    965   mp_int  t;
    966 
    967 
    968   /* if the shift count is <= 0 then we do no work */
    969   if (b <= 0) {
    970     res = mp_copy (a, c);
    971     if (d != NULL) {
    972       mp_zero (d);
    973     }
    974     return res;
    975   }
    976 
    977   if ((res = mp_init (&t)) != MP_OKAY) {
    978     return res;
    979   }
    980 
    981   /* get the remainder */
    982   if (d != NULL) {
    983     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
    984       mp_clear (&t);
    985       return res;
    986     }
    987   }
    988 
    989   /* copy */
    990   if ((res = mp_copy (a, c)) != MP_OKAY) {
    991     mp_clear (&t);
    992     return res;
    993   }
    994 
    995   /* shift by as many digits in the bit count */
    996   if (b >= (int)DIGIT_BIT) {
    997     mp_rshd (c, b / DIGIT_BIT);
    998   }
    999 
   1000   /* shift any bit count < DIGIT_BIT */
   1001   D = (mp_digit) (b % DIGIT_BIT);
   1002   if (D != 0) {
   1003     register mp_digit *tmpc, mask, shift;
   1004 
   1005     /* mask */
   1006     mask = (((mp_digit)1) << D) - 1;
   1007 
   1008     /* shift for lsb */
   1009     shift = DIGIT_BIT - D;
   1010 
   1011     /* alias */
   1012     tmpc = c->dp + (c->used - 1);
   1013 
   1014     /* carry */
   1015     r = 0;
   1016     for (x = c->used - 1; x >= 0; x--) {
   1017       /* get the lower  bits of this word in a temp */
   1018       rr = *tmpc & mask;
   1019 
   1020       /* shift the current word and mix in the carry bits from the previous word */
   1021       *tmpc = (*tmpc >> D) | (r << shift);
   1022       --tmpc;
   1023 
   1024       /* set the carry to the carry bits of the current word found above */
   1025       r = rr;
   1026     }
   1027   }
   1028   mp_clamp (c);
   1029   if (d != NULL) {
   1030     mp_exch (&t, d);
   1031   }
   1032   mp_clear (&t);
   1033   return MP_OKAY;
   1034 }
   1035 
   1036 
   1037 static int mp_init_copy (mp_int * a, mp_int * b)
   1038 {
   1039   int     res;
   1040 
   1041   if ((res = mp_init (a)) != MP_OKAY) {
   1042     return res;
   1043   }
   1044   return mp_copy (b, a);
   1045 }
   1046 
   1047 
   1048 /* set to zero */
   1049 static void mp_zero (mp_int * a)
   1050 {
   1051   int       n;
   1052   mp_digit *tmp;
   1053 
   1054   a->sign = MP_ZPOS;
   1055   a->used = 0;
   1056 
   1057   tmp = a->dp;
   1058   for (n = 0; n < a->alloc; n++) {
   1059      *tmp++ = 0;
   1060   }
   1061 }
   1062 
   1063 
   1064 /* copy, b = a */
   1065 static int mp_copy (mp_int * a, mp_int * b)
   1066 {
   1067   int     res, n;
   1068 
   1069   /* if dst == src do nothing */
   1070   if (a == b) {
   1071     return MP_OKAY;
   1072   }
   1073 
   1074   /* grow dest */
   1075   if (b->alloc < a->used) {
   1076      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
   1077         return res;
   1078      }
   1079   }
   1080 
   1081   /* zero b and copy the parameters over */
   1082   {
   1083     register mp_digit *tmpa, *tmpb;
   1084 
   1085     /* pointer aliases */
   1086 
   1087     /* source */
   1088     tmpa = a->dp;
   1089 
   1090     /* destination */
   1091     tmpb = b->dp;
   1092 
   1093     /* copy all the digits */
   1094     for (n = 0; n < a->used; n++) {
   1095       *tmpb++ = *tmpa++;
   1096     }
   1097 
   1098     /* clear high digits */
   1099     for (; n < b->used; n++) {
   1100       *tmpb++ = 0;
   1101     }
   1102   }
   1103 
   1104   /* copy used count and sign */
   1105   b->used = a->used;
   1106   b->sign = a->sign;
   1107   return MP_OKAY;
   1108 }
   1109 
   1110 
   1111 /* shift right a certain amount of digits */
   1112 static void mp_rshd (mp_int * a, int b)
   1113 {
   1114   int     x;
   1115 
   1116   /* if b <= 0 then ignore it */
   1117   if (b <= 0) {
   1118     return;
   1119   }
   1120 
   1121   /* if b > used then simply zero it and return */
   1122   if (a->used <= b) {
   1123     mp_zero (a);
   1124     return;
   1125   }
   1126 
   1127   {
   1128     register mp_digit *bottom, *top;
   1129 
   1130     /* shift the digits down */
   1131 
   1132     /* bottom */
   1133     bottom = a->dp;
   1134 
   1135     /* top [offset into digits] */
   1136     top = a->dp + b;
   1137 
   1138     /* this is implemented as a sliding window where
   1139      * the window is b-digits long and digits from
   1140      * the top of the window are copied to the bottom
   1141      *
   1142      * e.g.
   1143 
   1144      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
   1145                  /\                   |      ---->
   1146                   \-------------------/      ---->
   1147      */
   1148     for (x = 0; x < (a->used - b); x++) {
   1149       *bottom++ = *top++;
   1150     }
   1151 
   1152     /* zero the top digits */
   1153     for (; x < a->used; x++) {
   1154       *bottom++ = 0;
   1155     }
   1156   }
   1157 
   1158   /* remove excess digits */
   1159   a->used -= b;
   1160 }
   1161 
   1162 
   1163 /* swap the elements of two integers, for cases where you can't simply swap the
   1164  * mp_int pointers around
   1165  */
   1166 static void mp_exch (mp_int * a, mp_int * b)
   1167 {
   1168   mp_int  t;
   1169 
   1170   t  = *a;
   1171   *a = *b;
   1172   *b = t;
   1173 }
   1174 
   1175 
   1176 /* trim unused digits
   1177  *
   1178  * This is used to ensure that leading zero digits are
   1179  * trimed and the leading "used" digit will be non-zero
   1180  * Typically very fast.  Also fixes the sign if there
   1181  * are no more leading digits
   1182  */
   1183 static void mp_clamp (mp_int * a)
   1184 {
   1185   /* decrease used while the most significant digit is
   1186    * zero.
   1187    */
   1188   while (a->used > 0 && a->dp[a->used - 1] == 0) {
   1189     --(a->used);
   1190   }
   1191 
   1192   /* reset the sign flag if used == 0 */
   1193   if (a->used == 0) {
   1194     a->sign = MP_ZPOS;
   1195   }
   1196 }
   1197 
   1198 
   1199 /* grow as required */
   1200 static int mp_grow (mp_int * a, int size)
   1201 {
   1202   int     i;
   1203   mp_digit *tmp;
   1204 
   1205   /* if the alloc size is smaller alloc more ram */
   1206   if (a->alloc < size) {
   1207     /* ensure there are always at least MP_PREC digits extra on top */
   1208     size += (MP_PREC * 2) - (size % MP_PREC);
   1209 
   1210     /* reallocate the array a->dp
   1211      *
   1212      * We store the return in a temporary variable
   1213      * in case the operation failed we don't want
   1214      * to overwrite the dp member of a.
   1215      */
   1216     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
   1217     if (tmp == NULL) {
   1218       /* reallocation failed but "a" is still valid [can be freed] */
   1219       return MP_MEM;
   1220     }
   1221 
   1222     /* reallocation succeeded so set a->dp */
   1223     a->dp = tmp;
   1224 
   1225     /* zero excess digits */
   1226     i        = a->alloc;
   1227     a->alloc = size;
   1228     for (; i < a->alloc; i++) {
   1229       a->dp[i] = 0;
   1230     }
   1231   }
   1232   return MP_OKAY;
   1233 }
   1234 
   1235 
   1236 /* b = |a|
   1237  *
   1238  * Simple function copies the input and fixes the sign to positive
   1239  */
   1240 static int mp_abs (mp_int * a, mp_int * b)
   1241 {
   1242   int     res;
   1243 
   1244   /* copy a to b */
   1245   if (a != b) {
   1246      if ((res = mp_copy (a, b)) != MP_OKAY) {
   1247        return res;
   1248      }
   1249   }
   1250 
   1251   /* force the sign of b to positive */
   1252   b->sign = MP_ZPOS;
   1253 
   1254   return MP_OKAY;
   1255 }
   1256 
   1257 
   1258 /* set to a digit */
   1259 static void mp_set (mp_int * a, mp_digit b)
   1260 {
   1261   mp_zero (a);
   1262   a->dp[0] = b & MP_MASK;
   1263   a->used  = (a->dp[0] != 0) ? 1 : 0;
   1264 }
   1265 
   1266 
   1267 /* b = a/2 */
   1268 static int mp_div_2(mp_int * a, mp_int * b)
   1269 {
   1270   int     x, res, oldused;
   1271 
   1272   /* copy */
   1273   if (b->alloc < a->used) {
   1274     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
   1275       return res;
   1276     }
   1277   }
   1278 
   1279   oldused = b->used;
   1280   b->used = a->used;
   1281   {
   1282     register mp_digit r, rr, *tmpa, *tmpb;
   1283 
   1284     /* source alias */
   1285     tmpa = a->dp + b->used - 1;
   1286 
   1287     /* dest alias */
   1288     tmpb = b->dp + b->used - 1;
   1289 
   1290     /* carry */
   1291     r = 0;
   1292     for (x = b->used - 1; x >= 0; x--) {
   1293       /* get the carry for the next iteration */
   1294       rr = *tmpa & 1;
   1295 
   1296       /* shift the current digit, add in carry and store */
   1297       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
   1298 
   1299       /* forward carry to next iteration */
   1300       r = rr;
   1301     }
   1302 
   1303     /* zero excess digits */
   1304     tmpb = b->dp + b->used;
   1305     for (x = b->used; x < oldused; x++) {
   1306       *tmpb++ = 0;
   1307     }
   1308   }
   1309   b->sign = a->sign;
   1310   mp_clamp (b);
   1311   return MP_OKAY;
   1312 }
   1313 
   1314 
   1315 /* shift left by a certain bit count */
   1316 static int mp_mul_2d (mp_int * a, int b, mp_int * c)
   1317 {
   1318   mp_digit d;
   1319   int      res;
   1320 
   1321   /* copy */
   1322   if (a != c) {
   1323      if ((res = mp_copy (a, c)) != MP_OKAY) {
   1324        return res;
   1325      }
   1326   }
   1327 
   1328   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
   1329      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
   1330        return res;
   1331      }
   1332   }
   1333 
   1334   /* shift by as many digits in the bit count */
   1335   if (b >= (int)DIGIT_BIT) {
   1336     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
   1337       return res;
   1338     }
   1339   }
   1340 
   1341   /* shift any bit count < DIGIT_BIT */
   1342   d = (mp_digit) (b % DIGIT_BIT);
   1343   if (d != 0) {
   1344     register mp_digit *tmpc, shift, mask, r, rr;
   1345     register int x;
   1346 
   1347     /* bitmask for carries */
   1348     mask = (((mp_digit)1) << d) - 1;
   1349 
   1350     /* shift for msbs */
   1351     shift = DIGIT_BIT - d;
   1352 
   1353     /* alias */
   1354     tmpc = c->dp;
   1355 
   1356     /* carry */
   1357     r    = 0;
   1358     for (x = 0; x < c->used; x++) {
   1359       /* get the higher bits of the current word */
   1360       rr = (*tmpc >> shift) & mask;
   1361 
   1362       /* shift the current word and OR in the carry */
   1363       *tmpc = ((*tmpc << d) | r) & MP_MASK;
   1364       ++tmpc;
   1365 
   1366       /* set the carry to the carry bits of the current word */
   1367       r = rr;
   1368     }
   1369 
   1370     /* set final carry */
   1371     if (r != 0) {
   1372        c->dp[(c->used)++] = r;
   1373     }
   1374   }
   1375   mp_clamp (c);
   1376   return MP_OKAY;
   1377 }
   1378 
   1379 
   1380 static int mp_init_multi(mp_int *mp, ...)
   1381 {
   1382     mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
   1383     int n = 0;                 /* Number of ok inits */
   1384     mp_int* cur_arg = mp;
   1385     va_list args;
   1386 
   1387     va_start(args, mp);        /* init args to next argument from caller */
   1388     while (cur_arg != NULL) {
   1389         if (mp_init(cur_arg) != MP_OKAY) {
   1390             /* Oops - error! Back-track and mp_clear what we already
   1391                succeeded in init-ing, then return error.
   1392             */
   1393             va_list clean_args;
   1394 
   1395             /* end the current list */
   1396             va_end(args);
   1397 
   1398             /* now start cleaning up */
   1399             cur_arg = mp;
   1400             va_start(clean_args, mp);
   1401             while (n--) {
   1402                 mp_clear(cur_arg);
   1403                 cur_arg = va_arg(clean_args, mp_int*);
   1404             }
   1405             va_end(clean_args);
   1406             res = MP_MEM;
   1407             break;
   1408         }
   1409         n++;
   1410         cur_arg = va_arg(args, mp_int*);
   1411     }
   1412     va_end(args);
   1413     return res;                /* Assumed ok, if error flagged above. */
   1414 }
   1415 
   1416 
   1417 static void mp_clear_multi(mp_int *mp, ...)
   1418 {
   1419     mp_int* next_mp = mp;
   1420     va_list args;
   1421     va_start(args, mp);
   1422     while (next_mp != NULL) {
   1423         mp_clear(next_mp);
   1424         next_mp = va_arg(args, mp_int*);
   1425     }
   1426     va_end(args);
   1427 }
   1428 
   1429 
   1430 /* shift left a certain amount of digits */
   1431 static int mp_lshd (mp_int * a, int b)
   1432 {
   1433   int     x, res;
   1434 
   1435   /* if its less than zero return */
   1436   if (b <= 0) {
   1437     return MP_OKAY;
   1438   }
   1439 
   1440   /* grow to fit the new digits */
   1441   if (a->alloc < a->used + b) {
   1442      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
   1443        return res;
   1444      }
   1445   }
   1446 
   1447   {
   1448     register mp_digit *top, *bottom;
   1449 
   1450     /* increment the used by the shift amount then copy upwards */
   1451     a->used += b;
   1452 
   1453     /* top */
   1454     top = a->dp + a->used - 1;
   1455 
   1456     /* base */
   1457     bottom = a->dp + a->used - 1 - b;
   1458 
   1459     /* much like mp_rshd this is implemented using a sliding window
   1460      * except the window goes the otherway around.  Copying from
   1461      * the bottom to the top.  see bn_mp_rshd.c for more info.
   1462      */
   1463     for (x = a->used - 1; x >= b; x--) {
   1464       *top-- = *bottom--;
   1465     }
   1466 
   1467     /* zero the lower digits */
   1468     top = a->dp;
   1469     for (x = 0; x < b; x++) {
   1470       *top++ = 0;
   1471     }
   1472   }
   1473   return MP_OKAY;
   1474 }
   1475 
   1476 
   1477 /* returns the number of bits in an int */
   1478 static int mp_count_bits (mp_int * a)
   1479 {
   1480   int     r;
   1481   mp_digit q;
   1482 
   1483   /* shortcut */
   1484   if (a->used == 0) {
   1485     return 0;
   1486   }
   1487 
   1488   /* get number of digits and add that */
   1489   r = (a->used - 1) * DIGIT_BIT;
   1490 
   1491   /* take the last digit and count the bits in it */
   1492   q = a->dp[a->used - 1];
   1493   while (q > ((mp_digit) 0)) {
   1494     ++r;
   1495     q >>= ((mp_digit) 1);
   1496   }
   1497   return r;
   1498 }
   1499 
   1500 
   1501 /* calc a value mod 2**b */
   1502 static int mp_mod_2d (mp_int * a, int b, mp_int * c)
   1503 {
   1504   int     x, res;
   1505 
   1506   /* if b is <= 0 then zero the int */
   1507   if (b <= 0) {
   1508     mp_zero (c);
   1509     return MP_OKAY;
   1510   }
   1511 
   1512   /* if the modulus is larger than the value than return */
   1513   if (b >= (int) (a->used * DIGIT_BIT)) {
   1514     res = mp_copy (a, c);
   1515     return res;
   1516   }
   1517 
   1518   /* copy */
   1519   if ((res = mp_copy (a, c)) != MP_OKAY) {
   1520     return res;
   1521   }
   1522 
   1523   /* zero digits above the last digit of the modulus */
   1524   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
   1525     c->dp[x] = 0;
   1526   }
   1527   /* clear the digit that is not completely outside/inside the modulus */
   1528   c->dp[b / DIGIT_BIT] &=
   1529     (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
   1530   mp_clamp (c);
   1531   return MP_OKAY;
   1532 }
   1533 
   1534 
   1535 /* slower bit-bang division... also smaller */
   1536 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
   1537 {
   1538    mp_int ta, tb, tq, q;
   1539    int    res, n, n2;
   1540 
   1541   /* is divisor zero ? */
   1542   if (mp_iszero (b) == 1) {
   1543     return MP_VAL;
   1544   }
   1545 
   1546   /* if a < b then q=0, r = a */
   1547   if (mp_cmp_mag (a, b) == MP_LT) {
   1548     if (d != NULL) {
   1549       res = mp_copy (a, d);
   1550     } else {
   1551       res = MP_OKAY;
   1552     }
   1553     if (c != NULL) {
   1554       mp_zero (c);
   1555     }
   1556     return res;
   1557   }
   1558 
   1559   /* init our temps */
   1560   if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
   1561      return res;
   1562   }
   1563 
   1564 
   1565   mp_set(&tq, 1);
   1566   n = mp_count_bits(a) - mp_count_bits(b);
   1567   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
   1568       ((res = mp_abs(b, &tb)) != MP_OKAY) ||
   1569       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
   1570       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
   1571       goto LBL_ERR;
   1572   }
   1573 
   1574   while (n-- >= 0) {
   1575      if (mp_cmp(&tb, &ta) != MP_GT) {
   1576         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
   1577             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
   1578            goto LBL_ERR;
   1579         }
   1580      }
   1581      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
   1582          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
   1583            goto LBL_ERR;
   1584      }
   1585   }
   1586 
   1587   /* now q == quotient and ta == remainder */
   1588   n  = a->sign;
   1589   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
   1590   if (c != NULL) {
   1591      mp_exch(c, &q);
   1592      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
   1593   }
   1594   if (d != NULL) {
   1595      mp_exch(d, &ta);
   1596      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
   1597   }
   1598 LBL_ERR:
   1599    mp_clear_multi(&ta, &tb, &tq, &q, NULL);
   1600    return res;
   1601 }
   1602 
   1603 
   1604 #ifdef MP_LOW_MEM
   1605    #define TAB_SIZE 32
   1606 #else
   1607    #define TAB_SIZE 256
   1608 #endif
   1609 
   1610 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
   1611 {
   1612   mp_int  M[TAB_SIZE], res, mu;
   1613   mp_digit buf;
   1614   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
   1615   int (*redux)(mp_int*,mp_int*,mp_int*);
   1616 
   1617   /* find window size */
   1618   x = mp_count_bits (X);
   1619   if (x <= 7) {
   1620     winsize = 2;
   1621   } else if (x <= 36) {
   1622     winsize = 3;
   1623   } else if (x <= 140) {
   1624     winsize = 4;
   1625   } else if (x <= 450) {
   1626     winsize = 5;
   1627   } else if (x <= 1303) {
   1628     winsize = 6;
   1629   } else if (x <= 3529) {
   1630     winsize = 7;
   1631   } else {
   1632     winsize = 8;
   1633   }
   1634 
   1635 #ifdef MP_LOW_MEM
   1636     if (winsize > 5) {
   1637        winsize = 5;
   1638     }
   1639 #endif
   1640 
   1641   /* init M array */
   1642   /* init first cell */
   1643   if ((err = mp_init(&M[1])) != MP_OKAY) {
   1644      return err;
   1645   }
   1646 
   1647   /* now init the second half of the array */
   1648   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
   1649     if ((err = mp_init(&M[x])) != MP_OKAY) {
   1650       for (y = 1<<(winsize-1); y < x; y++) {
   1651         mp_clear (&M[y]);
   1652       }
   1653       mp_clear(&M[1]);
   1654       return err;
   1655     }
   1656   }
   1657 
   1658   /* create mu, used for Barrett reduction */
   1659   if ((err = mp_init (&mu)) != MP_OKAY) {
   1660     goto LBL_M;
   1661   }
   1662 
   1663   if (redmode == 0) {
   1664      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
   1665         goto LBL_MU;
   1666      }
   1667      redux = mp_reduce;
   1668   } else {
   1669      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
   1670         goto LBL_MU;
   1671      }
   1672      redux = mp_reduce_2k_l;
   1673   }
   1674 
   1675   /* create M table
   1676    *
   1677    * The M table contains powers of the base,
   1678    * e.g. M[x] = G**x mod P
   1679    *
   1680    * The first half of the table is not
   1681    * computed though accept for M[0] and M[1]
   1682    */
   1683   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
   1684     goto LBL_MU;
   1685   }
   1686 
   1687   /* compute the value at M[1<<(winsize-1)] by squaring
   1688    * M[1] (winsize-1) times
   1689    */
   1690   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
   1691     goto LBL_MU;
   1692   }
   1693 
   1694   for (x = 0; x < (winsize - 1); x++) {
   1695     /* square it */
   1696     if ((err = mp_sqr (&M[1 << (winsize - 1)],
   1697                        &M[1 << (winsize - 1)])) != MP_OKAY) {
   1698       goto LBL_MU;
   1699     }
   1700 
   1701     /* reduce modulo P */
   1702     if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
   1703       goto LBL_MU;
   1704     }
   1705   }
   1706 
   1707   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
   1708    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
   1709    */
   1710   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
   1711     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
   1712       goto LBL_MU;
   1713     }
   1714     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
   1715       goto LBL_MU;
   1716     }
   1717   }
   1718 
   1719   /* setup result */
   1720   if ((err = mp_init (&res)) != MP_OKAY) {
   1721     goto LBL_MU;
   1722   }
   1723   mp_set (&res, 1);
   1724 
   1725   /* set initial mode and bit cnt */
   1726   mode   = 0;
   1727   bitcnt = 1;
   1728   buf    = 0;
   1729   digidx = X->used - 1;
   1730   bitcpy = 0;
   1731   bitbuf = 0;
   1732 
   1733   for (;;) {
   1734     /* grab next digit as required */
   1735     if (--bitcnt == 0) {
   1736       /* if digidx == -1 we are out of digits */
   1737       if (digidx == -1) {
   1738         break;
   1739       }
   1740       /* read next digit and reset the bitcnt */
   1741       buf    = X->dp[digidx--];
   1742       bitcnt = (int) DIGIT_BIT;
   1743     }
   1744 
   1745     /* grab the next msb from the exponent */
   1746     y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
   1747     buf <<= (mp_digit)1;
   1748 
   1749     /* if the bit is zero and mode == 0 then we ignore it
   1750      * These represent the leading zero bits before the first 1 bit
   1751      * in the exponent.  Technically this opt is not required but it
   1752      * does lower the # of trivial squaring/reductions used
   1753      */
   1754     if (mode == 0 && y == 0) {
   1755       continue;
   1756     }
   1757 
   1758     /* if the bit is zero and mode == 1 then we square */
   1759     if (mode == 1 && y == 0) {
   1760       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   1761         goto LBL_RES;
   1762       }
   1763       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   1764         goto LBL_RES;
   1765       }
   1766       continue;
   1767     }
   1768 
   1769     /* else we add it to the window */
   1770     bitbuf |= (y << (winsize - ++bitcpy));
   1771     mode    = 2;
   1772 
   1773     if (bitcpy == winsize) {
   1774       /* ok window is filled so square as required and multiply  */
   1775       /* square first */
   1776       for (x = 0; x < winsize; x++) {
   1777         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   1778           goto LBL_RES;
   1779         }
   1780         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   1781           goto LBL_RES;
   1782         }
   1783       }
   1784 
   1785       /* then multiply */
   1786       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
   1787         goto LBL_RES;
   1788       }
   1789       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   1790         goto LBL_RES;
   1791       }
   1792 
   1793       /* empty window and reset */
   1794       bitcpy = 0;
   1795       bitbuf = 0;
   1796       mode   = 1;
   1797     }
   1798   }
   1799 
   1800   /* if bits remain then square/multiply */
   1801   if (mode == 2 && bitcpy > 0) {
   1802     /* square then multiply if the bit is set */
   1803     for (x = 0; x < bitcpy; x++) {
   1804       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   1805         goto LBL_RES;
   1806       }
   1807       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   1808         goto LBL_RES;
   1809       }
   1810 
   1811       bitbuf <<= 1;
   1812       if ((bitbuf & (1 << winsize)) != 0) {
   1813         /* then multiply */
   1814         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
   1815           goto LBL_RES;
   1816         }
   1817         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   1818           goto LBL_RES;
   1819         }
   1820       }
   1821     }
   1822   }
   1823 
   1824   mp_exch (&res, Y);
   1825   err = MP_OKAY;
   1826 LBL_RES:mp_clear (&res);
   1827 LBL_MU:mp_clear (&mu);
   1828 LBL_M:
   1829   mp_clear(&M[1]);
   1830   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
   1831     mp_clear (&M[x]);
   1832   }
   1833   return err;
   1834 }
   1835 
   1836 
   1837 /* computes b = a*a */
   1838 static int mp_sqr (mp_int * a, mp_int * b)
   1839 {
   1840   int     res;
   1841 
   1842 #ifdef BN_MP_TOOM_SQR_C
   1843   /* use Toom-Cook? */
   1844   if (a->used >= TOOM_SQR_CUTOFF) {
   1845     res = mp_toom_sqr(a, b);
   1846   /* Karatsuba? */
   1847   } else
   1848 #endif
   1849 #ifdef BN_MP_KARATSUBA_SQR_C
   1850 if (a->used >= KARATSUBA_SQR_CUTOFF) {
   1851     res = mp_karatsuba_sqr (a, b);
   1852   } else
   1853 #endif
   1854   {
   1855 #ifdef BN_FAST_S_MP_SQR_C
   1856     /* can we use the fast comba multiplier? */
   1857     if ((a->used * 2 + 1) < MP_WARRAY &&
   1858          a->used <
   1859          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
   1860       res = fast_s_mp_sqr (a, b);
   1861     } else
   1862 #endif
   1863 #ifdef BN_S_MP_SQR_C
   1864       res = s_mp_sqr (a, b);
   1865 #else
   1866 #error mp_sqr could fail
   1867       res = MP_VAL;
   1868 #endif
   1869   }
   1870   b->sign = MP_ZPOS;
   1871   return res;
   1872 }
   1873 
   1874 
   1875 /* reduces a modulo n where n is of the form 2**p - d
   1876    This differs from reduce_2k since "d" can be larger
   1877    than a single digit.
   1878 */
   1879 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
   1880 {
   1881    mp_int q;
   1882    int    p, res;
   1883 
   1884    if ((res = mp_init(&q)) != MP_OKAY) {
   1885       return res;
   1886    }
   1887 
   1888    p = mp_count_bits(n);
   1889 top:
   1890    /* q = a/2**p, a = a mod 2**p */
   1891    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
   1892       goto ERR;
   1893    }
   1894 
   1895    /* q = q * d */
   1896    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
   1897       goto ERR;
   1898    }
   1899 
   1900    /* a = a + q */
   1901    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
   1902       goto ERR;
   1903    }
   1904 
   1905    if (mp_cmp_mag(a, n) != MP_LT) {
   1906       s_mp_sub(a, n, a);
   1907       goto top;
   1908    }
   1909 
   1910 ERR:
   1911    mp_clear(&q);
   1912    return res;
   1913 }
   1914 
   1915 
   1916 /* determines the setup value */
   1917 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
   1918 {
   1919    int    res;
   1920    mp_int tmp;
   1921 
   1922    if ((res = mp_init(&tmp)) != MP_OKAY) {
   1923       return res;
   1924    }
   1925 
   1926    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
   1927       goto ERR;
   1928    }
   1929 
   1930    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
   1931       goto ERR;
   1932    }
   1933 
   1934 ERR:
   1935    mp_clear(&tmp);
   1936    return res;
   1937 }
   1938 
   1939 
   1940 /* computes a = 2**b
   1941  *
   1942  * Simple algorithm which zeroes the int, grows it then just sets one bit
   1943  * as required.
   1944  */
   1945 static int mp_2expt (mp_int * a, int b)
   1946 {
   1947   int     res;
   1948 
   1949   /* zero a as per default */
   1950   mp_zero (a);
   1951 
   1952   /* grow a to accomodate the single bit */
   1953   if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
   1954     return res;
   1955   }
   1956 
   1957   /* set the used count of where the bit will go */
   1958   a->used = b / DIGIT_BIT + 1;
   1959 
   1960   /* put the single bit in its place */
   1961   a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
   1962 
   1963   return MP_OKAY;
   1964 }
   1965 
   1966 
   1967 /* pre-calculate the value required for Barrett reduction
   1968  * For a given modulus "b" it calulates the value required in "a"
   1969  */
   1970 static int mp_reduce_setup (mp_int * a, mp_int * b)
   1971 {
   1972   int     res;
   1973 
   1974   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
   1975     return res;
   1976   }
   1977   return mp_div (a, b, a, NULL);
   1978 }
   1979 
   1980 
   1981 /* reduces x mod m, assumes 0 < x < m**2, mu is
   1982  * precomputed via mp_reduce_setup.
   1983  * From HAC pp.604 Algorithm 14.42
   1984  */
   1985 static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
   1986 {
   1987   mp_int  q;
   1988   int     res, um = m->used;
   1989 
   1990   /* q = x */
   1991   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
   1992     return res;
   1993   }
   1994 
   1995   /* q1 = x / b**(k-1)  */
   1996   mp_rshd (&q, um - 1);
   1997 
   1998   /* according to HAC this optimization is ok */
   1999   if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
   2000     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
   2001       goto CLEANUP;
   2002     }
   2003   } else {
   2004 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
   2005     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
   2006       goto CLEANUP;
   2007     }
   2008 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
   2009     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
   2010       goto CLEANUP;
   2011     }
   2012 #else
   2013     {
   2014 #error mp_reduce would always fail
   2015       res = MP_VAL;
   2016       goto CLEANUP;
   2017     }
   2018 #endif
   2019   }
   2020 
   2021   /* q3 = q2 / b**(k+1) */
   2022   mp_rshd (&q, um + 1);
   2023 
   2024   /* x = x mod b**(k+1), quick (no division) */
   2025   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
   2026     goto CLEANUP;
   2027   }
   2028 
   2029   /* q = q * m mod b**(k+1), quick (no division) */
   2030   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
   2031     goto CLEANUP;
   2032   }
   2033 
   2034   /* x = x - q */
   2035   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
   2036     goto CLEANUP;
   2037   }
   2038 
   2039   /* If x < 0, add b**(k+1) to it */
   2040   if (mp_cmp_d (x, 0) == MP_LT) {
   2041     mp_set (&q, 1);
   2042     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
   2043       goto CLEANUP;
   2044     }
   2045     if ((res = mp_add (x, &q, x)) != MP_OKAY) {
   2046       goto CLEANUP;
   2047     }
   2048   }
   2049 
   2050   /* Back off if it's too big */
   2051   while (mp_cmp (x, m) != MP_LT) {
   2052     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
   2053       goto CLEANUP;
   2054     }
   2055   }
   2056 
   2057 CLEANUP:
   2058   mp_clear (&q);
   2059 
   2060   return res;
   2061 }
   2062 
   2063 
   2064 /* multiplies |a| * |b| and only computes upto digs digits of result
   2065  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
   2066  * many digits of output are created.
   2067  */
   2068 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   2069 {
   2070   mp_int  t;
   2071   int     res, pa, pb, ix, iy;
   2072   mp_digit u;
   2073   mp_word r;
   2074   mp_digit tmpx, *tmpt, *tmpy;
   2075 
   2076   /* can we use the fast multiplier? */
   2077   if (((digs) < MP_WARRAY) &&
   2078       MIN (a->used, b->used) <
   2079           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
   2080     return fast_s_mp_mul_digs (a, b, c, digs);
   2081   }
   2082 
   2083   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
   2084     return res;
   2085   }
   2086   t.used = digs;
   2087 
   2088   /* compute the digits of the product directly */
   2089   pa = a->used;
   2090   for (ix = 0; ix < pa; ix++) {
   2091     /* set the carry to zero */
   2092     u = 0;
   2093 
   2094     /* limit ourselves to making digs digits of output */
   2095     pb = MIN (b->used, digs - ix);
   2096 
   2097     /* setup some aliases */
   2098     /* copy of the digit from a used within the nested loop */
   2099     tmpx = a->dp[ix];
   2100 
   2101     /* an alias for the destination shifted ix places */
   2102     tmpt = t.dp + ix;
   2103 
   2104     /* an alias for the digits of b */
   2105     tmpy = b->dp;
   2106 
   2107     /* compute the columns of the output and propagate the carry */
   2108     for (iy = 0; iy < pb; iy++) {
   2109       /* compute the column as a mp_word */
   2110       r       = ((mp_word)*tmpt) +
   2111                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
   2112                 ((mp_word) u);
   2113 
   2114       /* the new column is the lower part of the result */
   2115       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2116 
   2117       /* get the carry word from the result */
   2118       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
   2119     }
   2120     /* set carry if it is placed below digs */
   2121     if (ix + iy < digs) {
   2122       *tmpt = u;
   2123     }
   2124   }
   2125 
   2126   mp_clamp (&t);
   2127   mp_exch (&t, c);
   2128 
   2129   mp_clear (&t);
   2130   return MP_OKAY;
   2131 }
   2132 
   2133 
   2134 /* Fast (comba) multiplier
   2135  *
   2136  * This is the fast column-array [comba] multiplier.  It is
   2137  * designed to compute the columns of the product first
   2138  * then handle the carries afterwards.  This has the effect
   2139  * of making the nested loops that compute the columns very
   2140  * simple and schedulable on super-scalar processors.
   2141  *
   2142  * This has been modified to produce a variable number of
   2143  * digits of output so if say only a half-product is required
   2144  * you don't have to compute the upper half (a feature
   2145  * required for fast Barrett reduction).
   2146  *
   2147  * Based on Algorithm 14.12 on pp.595 of HAC.
   2148  *
   2149  */
   2150 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   2151 {
   2152   int     olduse, res, pa, ix, iz;
   2153   mp_digit W[MP_WARRAY];
   2154   register mp_word  _W;
   2155 
   2156   /* grow the destination as required */
   2157   if (c->alloc < digs) {
   2158     if ((res = mp_grow (c, digs)) != MP_OKAY) {
   2159       return res;
   2160     }
   2161   }
   2162 
   2163   /* number of output digits to produce */
   2164   pa = MIN(digs, a->used + b->used);
   2165 
   2166   /* clear the carry */
   2167   _W = 0;
   2168   for (ix = 0; ix < pa; ix++) {
   2169       int      tx, ty;
   2170       int      iy;
   2171       mp_digit *tmpx, *tmpy;
   2172 
   2173       /* get offsets into the two bignums */
   2174       ty = MIN(b->used-1, ix);
   2175       tx = ix - ty;
   2176 
   2177       /* setup temp aliases */
   2178       tmpx = a->dp + tx;
   2179       tmpy = b->dp + ty;
   2180 
   2181       /* this is the number of times the loop will iterrate, essentially
   2182          while (tx++ < a->used && ty-- >= 0) { ... }
   2183        */
   2184       iy = MIN(a->used-tx, ty+1);
   2185 
   2186       /* execute loop */
   2187       for (iz = 0; iz < iy; ++iz) {
   2188          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
   2189 
   2190       }
   2191 
   2192       /* store term */
   2193       W[ix] = ((mp_digit)_W) & MP_MASK;
   2194 
   2195       /* make next carry */
   2196       _W = _W >> ((mp_word)DIGIT_BIT);
   2197  }
   2198 
   2199   /* setup dest */
   2200   olduse  = c->used;
   2201   c->used = pa;
   2202 
   2203   {
   2204     register mp_digit *tmpc;
   2205     tmpc = c->dp;
   2206     for (ix = 0; ix < pa+1; ix++) {
   2207       /* now extract the previous digit [below the carry] */
   2208       *tmpc++ = W[ix];
   2209     }
   2210 
   2211     /* clear unused digits [that existed in the old copy of c] */
   2212     for (; ix < olduse; ix++) {
   2213       *tmpc++ = 0;
   2214     }
   2215   }
   2216   mp_clamp (c);
   2217   return MP_OKAY;
   2218 }
   2219 
   2220 
   2221 /* init an mp_init for a given size */
   2222 static int mp_init_size (mp_int * a, int size)
   2223 {
   2224   int x;
   2225 
   2226   /* pad size so there are always extra digits */
   2227   size += (MP_PREC * 2) - (size % MP_PREC);
   2228 
   2229   /* alloc mem */
   2230   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
   2231   if (a->dp == NULL) {
   2232     return MP_MEM;
   2233   }
   2234 
   2235   /* set the members */
   2236   a->used  = 0;
   2237   a->alloc = size;
   2238   a->sign  = MP_ZPOS;
   2239 
   2240   /* zero the digits */
   2241   for (x = 0; x < size; x++) {
   2242       a->dp[x] = 0;
   2243   }
   2244 
   2245   return MP_OKAY;
   2246 }
   2247 
   2248 
   2249 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
   2250 static int s_mp_sqr (mp_int * a, mp_int * b)
   2251 {
   2252   mp_int  t;
   2253   int     res, ix, iy, pa;
   2254   mp_word r;
   2255   mp_digit u, tmpx, *tmpt;
   2256 
   2257   pa = a->used;
   2258   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
   2259     return res;
   2260   }
   2261 
   2262   /* default used is maximum possible size */
   2263   t.used = 2*pa + 1;
   2264 
   2265   for (ix = 0; ix < pa; ix++) {
   2266     /* first calculate the digit at 2*ix */
   2267     /* calculate double precision result */
   2268     r = ((mp_word) t.dp[2*ix]) +
   2269         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
   2270 
   2271     /* store lower part in result */
   2272     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
   2273 
   2274     /* get the carry */
   2275     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
   2276 
   2277     /* left hand side of A[ix] * A[iy] */
   2278     tmpx        = a->dp[ix];
   2279 
   2280     /* alias for where to store the results */
   2281     tmpt        = t.dp + (2*ix + 1);
   2282 
   2283     for (iy = ix + 1; iy < pa; iy++) {
   2284       /* first calculate the product */
   2285       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
   2286 
   2287       /* now calculate the double precision result, note we use
   2288        * addition instead of *2 since it's easier to optimize
   2289        */
   2290       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
   2291 
   2292       /* store lower part */
   2293       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2294 
   2295       /* get carry */
   2296       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
   2297     }
   2298     /* propagate upwards */
   2299     while (u != ((mp_digit) 0)) {
   2300       r       = ((mp_word) *tmpt) + ((mp_word) u);
   2301       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2302       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
   2303     }
   2304   }
   2305 
   2306   mp_clamp (&t);
   2307   mp_exch (&t, b);
   2308   mp_clear (&t);
   2309   return MP_OKAY;
   2310 }
   2311 
   2312 
   2313 /* multiplies |a| * |b| and does not compute the lower digs digits
   2314  * [meant to get the higher part of the product]
   2315  */
   2316 static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   2317 {
   2318   mp_int  t;
   2319   int     res, pa, pb, ix, iy;
   2320   mp_digit u;
   2321   mp_word r;
   2322   mp_digit tmpx, *tmpt, *tmpy;
   2323 
   2324   /* can we use the fast multiplier? */
   2325 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
   2326   if (((a->used + b->used + 1) < MP_WARRAY)
   2327       && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
   2328     return fast_s_mp_mul_high_digs (a, b, c, digs);
   2329   }
   2330 #endif
   2331 
   2332   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
   2333     return res;
   2334   }
   2335   t.used = a->used + b->used + 1;
   2336 
   2337   pa = a->used;
   2338   pb = b->used;
   2339   for (ix = 0; ix < pa; ix++) {
   2340     /* clear the carry */
   2341     u = 0;
   2342 
   2343     /* left hand side of A[ix] * B[iy] */
   2344     tmpx = a->dp[ix];
   2345 
   2346     /* alias to the address of where the digits will be stored */
   2347     tmpt = &(t.dp[digs]);
   2348 
   2349     /* alias for where to read the right hand side from */
   2350     tmpy = b->dp + (digs - ix);
   2351 
   2352     for (iy = digs - ix; iy < pb; iy++) {
   2353       /* calculate the double precision result */
   2354       r       = ((mp_word)*tmpt) +
   2355                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
   2356                 ((mp_word) u);
   2357 
   2358       /* get the lower part */
   2359       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2360 
   2361       /* carry the carry */
   2362       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
   2363     }
   2364     *tmpt = u;
   2365   }
   2366   mp_clamp (&t);
   2367   mp_exch (&t, c);
   2368   mp_clear (&t);
   2369   return MP_OKAY;
   2370 }
   2371