Home | History | Annotate | Download | only in tls
      1 /*
      2  * Minimal code for RSA support from LibTomMath 0.41
      3  * http://libtom.org/
      4  * http://libtom.org/files/ltm-0.41.tar.bz2
      5  * This library was released in public domain by Tom St Denis.
      6  *
      7  * The combination in this file may not use all of the optimized algorithms
      8  * from LibTomMath and may be considerable slower than the LibTomMath with its
      9  * default settings. The main purpose of having this version here is to make it
     10  * easier to build bignum.c wrapper without having to install and build an
     11  * external library.
     12  *
     13  * If CONFIG_INTERNAL_LIBTOMMATH is defined, bignum.c includes this
     14  * libtommath.c file instead of using the external LibTomMath library.
     15  */
     16 
     17 #ifndef CHAR_BIT
     18 #define CHAR_BIT 8
     19 #endif
     20 
     21 #define BN_MP_INVMOD_C
     22 #define BN_S_MP_EXPTMOD_C /* Note: #undef in tommath_superclass.h; this would
     23 			   * require BN_MP_EXPTMOD_FAST_C instead */
     24 #define BN_S_MP_MUL_DIGS_C
     25 #define BN_MP_INVMOD_SLOW_C
     26 #define BN_S_MP_SQR_C
     27 #define BN_S_MP_MUL_HIGH_DIGS_C /* Note: #undef in tommath_superclass.h; this
     28 				 * would require other than mp_reduce */
     29 
     30 #ifdef LTM_FAST
     31 
     32 /* Use faster div at the cost of about 1 kB */
     33 #define BN_MP_MUL_D_C
     34 
     35 /* Include faster exptmod (Montgomery) at the cost of about 2.5 kB in code */
     36 #define BN_MP_EXPTMOD_FAST_C
     37 #define BN_MP_MONTGOMERY_SETUP_C
     38 #define BN_FAST_MP_MONTGOMERY_REDUCE_C
     39 #define BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
     40 #define BN_MP_MUL_2_C
     41 
     42 /* Include faster sqr at the cost of about 0.5 kB in code */
     43 #define BN_FAST_S_MP_SQR_C
     44 
     45 #else /* LTM_FAST */
     46 
     47 #define BN_MP_DIV_SMALL
     48 #define BN_MP_INIT_MULTI_C
     49 #define BN_MP_CLEAR_MULTI_C
     50 #define BN_MP_ABS_C
     51 #endif /* LTM_FAST */
     52 
     53 /* Current uses do not require support for negative exponent in exptmod, so we
     54  * can save about 1.5 kB in leaving out invmod. */
     55 #define LTM_NO_NEG_EXP
     56 
     57 /* from tommath.h */
     58 
     59 #ifndef MIN
     60    #define MIN(x,y) ((x)<(y)?(x):(y))
     61 #endif
     62 
     63 #ifndef MAX
     64    #define MAX(x,y) ((x)>(y)?(x):(y))
     65 #endif
     66 
     67 #define  OPT_CAST(x)
     68 
     69 typedef unsigned long mp_digit;
     70 typedef u64 mp_word;
     71 
     72 #define DIGIT_BIT          28
     73 #define MP_28BIT
     74 
     75 
     76 #define XMALLOC  os_malloc
     77 #define XFREE    os_free
     78 #define XREALLOC os_realloc
     79 
     80 
     81 #define MP_MASK          ((((mp_digit)1)<<((mp_digit)DIGIT_BIT))-((mp_digit)1))
     82 
     83 #define MP_LT        -1   /* less than */
     84 #define MP_EQ         0   /* equal to */
     85 #define MP_GT         1   /* greater than */
     86 
     87 #define MP_ZPOS       0   /* positive integer */
     88 #define MP_NEG        1   /* negative */
     89 
     90 #define MP_OKAY       0   /* ok result */
     91 #define MP_MEM        -2  /* out of mem */
     92 #define MP_VAL        -3  /* invalid input */
     93 
     94 #define MP_YES        1   /* yes response */
     95 #define MP_NO         0   /* no response */
     96 
     97 typedef int           mp_err;
     98 
     99 /* define this to use lower memory usage routines (exptmods mostly) */
    100 #define MP_LOW_MEM
    101 
    102 /* default precision */
    103 #ifndef MP_PREC
    104    #ifndef MP_LOW_MEM
    105       #define MP_PREC                 32     /* default digits of precision */
    106    #else
    107       #define MP_PREC                 8      /* default digits of precision */
    108    #endif
    109 #endif
    110 
    111 /* size of comba arrays, should be at least 2 * 2**(BITS_PER_WORD - BITS_PER_DIGIT*2) */
    112 #define MP_WARRAY               (1 << (sizeof(mp_word) * CHAR_BIT - 2 * DIGIT_BIT + 1))
    113 
    114 /* the infamous mp_int structure */
    115 typedef struct  {
    116     int used, alloc, sign;
    117     mp_digit *dp;
    118 } mp_int;
    119 
    120 
    121 /* ---> Basic Manipulations <--- */
    122 #define mp_iszero(a) (((a)->used == 0) ? MP_YES : MP_NO)
    123 #define mp_iseven(a) (((a)->used > 0 && (((a)->dp[0] & 1) == 0)) ? MP_YES : MP_NO)
    124 #define mp_isodd(a)  (((a)->used > 0 && (((a)->dp[0] & 1) == 1)) ? MP_YES : MP_NO)
    125 
    126 
    127 /* prototypes for copied functions */
    128 #define s_mp_mul(a, b, c) s_mp_mul_digs(a, b, c, (a)->used + (b)->used + 1)
    129 static int s_mp_exptmod(mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
    130 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
    131 static int s_mp_sqr(mp_int * a, mp_int * b);
    132 static int s_mp_mul_high_digs(mp_int * a, mp_int * b, mp_int * c, int digs);
    133 
    134 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs);
    135 
    136 #ifdef BN_MP_INIT_MULTI_C
    137 static int mp_init_multi(mp_int *mp, ...);
    138 #endif
    139 #ifdef BN_MP_CLEAR_MULTI_C
    140 static void mp_clear_multi(mp_int *mp, ...);
    141 #endif
    142 static int mp_lshd(mp_int * a, int b);
    143 static void mp_set(mp_int * a, mp_digit b);
    144 static void mp_clamp(mp_int * a);
    145 static void mp_exch(mp_int * a, mp_int * b);
    146 static void mp_rshd(mp_int * a, int b);
    147 static void mp_zero(mp_int * a);
    148 static int mp_mod_2d(mp_int * a, int b, mp_int * c);
    149 static int mp_div_2d(mp_int * a, int b, mp_int * c, mp_int * d);
    150 static int mp_init_copy(mp_int * a, mp_int * b);
    151 static int mp_mul_2d(mp_int * a, int b, mp_int * c);
    152 #ifndef LTM_NO_NEG_EXP
    153 static int mp_div_2(mp_int * a, mp_int * b);
    154 static int mp_invmod(mp_int * a, mp_int * b, mp_int * c);
    155 static int mp_invmod_slow(mp_int * a, mp_int * b, mp_int * c);
    156 #endif /* LTM_NO_NEG_EXP */
    157 static int mp_copy(mp_int * a, mp_int * b);
    158 static int mp_count_bits(mp_int * a);
    159 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d);
    160 static int mp_mod(mp_int * a, mp_int * b, mp_int * c);
    161 static int mp_grow(mp_int * a, int size);
    162 static int mp_cmp_mag(mp_int * a, mp_int * b);
    163 #ifdef BN_MP_ABS_C
    164 static int mp_abs(mp_int * a, mp_int * b);
    165 #endif
    166 static int mp_sqr(mp_int * a, mp_int * b);
    167 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d);
    168 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d);
    169 static int mp_2expt(mp_int * a, int b);
    170 static int mp_reduce_setup(mp_int * a, mp_int * b);
    171 static int mp_reduce(mp_int * x, mp_int * m, mp_int * mu);
    172 static int mp_init_size(mp_int * a, int size);
    173 #ifdef BN_MP_EXPTMOD_FAST_C
    174 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode);
    175 #endif /* BN_MP_EXPTMOD_FAST_C */
    176 #ifdef BN_FAST_S_MP_SQR_C
    177 static int fast_s_mp_sqr (mp_int * a, mp_int * b);
    178 #endif /* BN_FAST_S_MP_SQR_C */
    179 #ifdef BN_MP_MUL_D_C
    180 static int mp_mul_d (mp_int * a, mp_digit b, mp_int * c);
    181 #endif /* BN_MP_MUL_D_C */
    182 
    183 
    184 
    185 /* functions from bn_<func name>.c */
    186 
    187 
    188 /* reverse an array, used for radix code */
    189 static void bn_reverse (unsigned char *s, int len)
    190 {
    191   int     ix, iy;
    192   unsigned char t;
    193 
    194   ix = 0;
    195   iy = len - 1;
    196   while (ix < iy) {
    197     t     = s[ix];
    198     s[ix] = s[iy];
    199     s[iy] = t;
    200     ++ix;
    201     --iy;
    202   }
    203 }
    204 
    205 
    206 /* low level addition, based on HAC pp.594, Algorithm 14.7 */
    207 static int s_mp_add (mp_int * a, mp_int * b, mp_int * c)
    208 {
    209   mp_int *x;
    210   int     olduse, res, min, max;
    211 
    212   /* find sizes, we let |a| <= |b| which means we have to sort
    213    * them.  "x" will point to the input with the most digits
    214    */
    215   if (a->used > b->used) {
    216     min = b->used;
    217     max = a->used;
    218     x = a;
    219   } else {
    220     min = a->used;
    221     max = b->used;
    222     x = b;
    223   }
    224 
    225   /* init result */
    226   if (c->alloc < max + 1) {
    227     if ((res = mp_grow (c, max + 1)) != MP_OKAY) {
    228       return res;
    229     }
    230   }
    231 
    232   /* get old used digit count and set new one */
    233   olduse = c->used;
    234   c->used = max + 1;
    235 
    236   {
    237     register mp_digit u, *tmpa, *tmpb, *tmpc;
    238     register int i;
    239 
    240     /* alias for digit pointers */
    241 
    242     /* first input */
    243     tmpa = a->dp;
    244 
    245     /* second input */
    246     tmpb = b->dp;
    247 
    248     /* destination */
    249     tmpc = c->dp;
    250 
    251     /* zero the carry */
    252     u = 0;
    253     for (i = 0; i < min; i++) {
    254       /* Compute the sum at one digit, T[i] = A[i] + B[i] + U */
    255       *tmpc = *tmpa++ + *tmpb++ + u;
    256 
    257       /* U = carry bit of T[i] */
    258       u = *tmpc >> ((mp_digit)DIGIT_BIT);
    259 
    260       /* take away carry bit from T[i] */
    261       *tmpc++ &= MP_MASK;
    262     }
    263 
    264     /* now copy higher words if any, that is in A+B
    265      * if A or B has more digits add those in
    266      */
    267     if (min != max) {
    268       for (; i < max; i++) {
    269         /* T[i] = X[i] + U */
    270         *tmpc = x->dp[i] + u;
    271 
    272         /* U = carry bit of T[i] */
    273         u = *tmpc >> ((mp_digit)DIGIT_BIT);
    274 
    275         /* take away carry bit from T[i] */
    276         *tmpc++ &= MP_MASK;
    277       }
    278     }
    279 
    280     /* add carry */
    281     *tmpc++ = u;
    282 
    283     /* clear digits above oldused */
    284     for (i = c->used; i < olduse; i++) {
    285       *tmpc++ = 0;
    286     }
    287   }
    288 
    289   mp_clamp (c);
    290   return MP_OKAY;
    291 }
    292 
    293 
    294 /* low level subtraction (assumes |a| > |b|), HAC pp.595 Algorithm 14.9 */
    295 static int s_mp_sub (mp_int * a, mp_int * b, mp_int * c)
    296 {
    297   int     olduse, res, min, max;
    298 
    299   /* find sizes */
    300   min = b->used;
    301   max = a->used;
    302 
    303   /* init result */
    304   if (c->alloc < max) {
    305     if ((res = mp_grow (c, max)) != MP_OKAY) {
    306       return res;
    307     }
    308   }
    309   olduse = c->used;
    310   c->used = max;
    311 
    312   {
    313     register mp_digit u, *tmpa, *tmpb, *tmpc;
    314     register int i;
    315 
    316     /* alias for digit pointers */
    317     tmpa = a->dp;
    318     tmpb = b->dp;
    319     tmpc = c->dp;
    320 
    321     /* set carry to zero */
    322     u = 0;
    323     for (i = 0; i < min; i++) {
    324       /* T[i] = A[i] - B[i] - U */
    325       *tmpc = *tmpa++ - *tmpb++ - u;
    326 
    327       /* U = carry bit of T[i]
    328        * Note this saves performing an AND operation since
    329        * if a carry does occur it will propagate all the way to the
    330        * MSB.  As a result a single shift is enough to get the carry
    331        */
    332       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
    333 
    334       /* Clear carry from T[i] */
    335       *tmpc++ &= MP_MASK;
    336     }
    337 
    338     /* now copy higher words if any, e.g. if A has more digits than B  */
    339     for (; i < max; i++) {
    340       /* T[i] = A[i] - U */
    341       *tmpc = *tmpa++ - u;
    342 
    343       /* U = carry bit of T[i] */
    344       u = *tmpc >> ((mp_digit)(CHAR_BIT * sizeof (mp_digit) - 1));
    345 
    346       /* Clear carry from T[i] */
    347       *tmpc++ &= MP_MASK;
    348     }
    349 
    350     /* clear digits above used (since we may not have grown result above) */
    351     for (i = c->used; i < olduse; i++) {
    352       *tmpc++ = 0;
    353     }
    354   }
    355 
    356   mp_clamp (c);
    357   return MP_OKAY;
    358 }
    359 
    360 
    361 /* init a new mp_int */
    362 static int mp_init (mp_int * a)
    363 {
    364   int i;
    365 
    366   /* allocate memory required and clear it */
    367   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * MP_PREC);
    368   if (a->dp == NULL) {
    369     return MP_MEM;
    370   }
    371 
    372   /* set the digits to zero */
    373   for (i = 0; i < MP_PREC; i++) {
    374       a->dp[i] = 0;
    375   }
    376 
    377   /* set the used to zero, allocated digits to the default precision
    378    * and sign to positive */
    379   a->used  = 0;
    380   a->alloc = MP_PREC;
    381   a->sign  = MP_ZPOS;
    382 
    383   return MP_OKAY;
    384 }
    385 
    386 
    387 /* clear one (frees)  */
    388 static void mp_clear (mp_int * a)
    389 {
    390   int i;
    391 
    392   /* only do anything if a hasn't been freed previously */
    393   if (a->dp != NULL) {
    394     /* first zero the digits */
    395     for (i = 0; i < a->used; i++) {
    396         a->dp[i] = 0;
    397     }
    398 
    399     /* free ram */
    400     XFREE(a->dp);
    401 
    402     /* reset members to make debugging easier */
    403     a->dp    = NULL;
    404     a->alloc = a->used = 0;
    405     a->sign  = MP_ZPOS;
    406   }
    407 }
    408 
    409 
    410 /* high level addition (handles signs) */
    411 static int mp_add (mp_int * a, mp_int * b, mp_int * c)
    412 {
    413   int     sa, sb, res;
    414 
    415   /* get sign of both inputs */
    416   sa = a->sign;
    417   sb = b->sign;
    418 
    419   /* handle two cases, not four */
    420   if (sa == sb) {
    421     /* both positive or both negative */
    422     /* add their magnitudes, copy the sign */
    423     c->sign = sa;
    424     res = s_mp_add (a, b, c);
    425   } else {
    426     /* one positive, the other negative */
    427     /* subtract the one with the greater magnitude from */
    428     /* the one of the lesser magnitude.  The result gets */
    429     /* the sign of the one with the greater magnitude. */
    430     if (mp_cmp_mag (a, b) == MP_LT) {
    431       c->sign = sb;
    432       res = s_mp_sub (b, a, c);
    433     } else {
    434       c->sign = sa;
    435       res = s_mp_sub (a, b, c);
    436     }
    437   }
    438   return res;
    439 }
    440 
    441 
    442 /* high level subtraction (handles signs) */
    443 static int mp_sub (mp_int * a, mp_int * b, mp_int * c)
    444 {
    445   int     sa, sb, res;
    446 
    447   sa = a->sign;
    448   sb = b->sign;
    449 
    450   if (sa != sb) {
    451     /* subtract a negative from a positive, OR */
    452     /* subtract a positive from a negative. */
    453     /* In either case, ADD their magnitudes, */
    454     /* and use the sign of the first number. */
    455     c->sign = sa;
    456     res = s_mp_add (a, b, c);
    457   } else {
    458     /* subtract a positive from a positive, OR */
    459     /* subtract a negative from a negative. */
    460     /* First, take the difference between their */
    461     /* magnitudes, then... */
    462     if (mp_cmp_mag (a, b) != MP_LT) {
    463       /* Copy the sign from the first */
    464       c->sign = sa;
    465       /* The first has a larger or equal magnitude */
    466       res = s_mp_sub (a, b, c);
    467     } else {
    468       /* The result has the *opposite* sign from */
    469       /* the first number. */
    470       c->sign = (sa == MP_ZPOS) ? MP_NEG : MP_ZPOS;
    471       /* The second has a larger magnitude */
    472       res = s_mp_sub (b, a, c);
    473     }
    474   }
    475   return res;
    476 }
    477 
    478 
    479 /* high level multiplication (handles sign) */
    480 static int mp_mul (mp_int * a, mp_int * b, mp_int * c)
    481 {
    482   int     res, neg;
    483   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
    484 
    485   /* use Toom-Cook? */
    486 #ifdef BN_MP_TOOM_MUL_C
    487   if (MIN (a->used, b->used) >= TOOM_MUL_CUTOFF) {
    488     res = mp_toom_mul(a, b, c);
    489   } else
    490 #endif
    491 #ifdef BN_MP_KARATSUBA_MUL_C
    492   /* use Karatsuba? */
    493   if (MIN (a->used, b->used) >= KARATSUBA_MUL_CUTOFF) {
    494     res = mp_karatsuba_mul (a, b, c);
    495   } else
    496 #endif
    497   {
    498     /* can we use the fast multiplier?
    499      *
    500      * The fast multiplier can be used if the output will
    501      * have less than MP_WARRAY digits and the number of
    502      * digits won't affect carry propagation
    503      */
    504 #ifdef BN_FAST_S_MP_MUL_DIGS_C
    505     int     digs = a->used + b->used + 1;
    506 
    507     if ((digs < MP_WARRAY) &&
    508         MIN(a->used, b->used) <=
    509         (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
    510       res = fast_s_mp_mul_digs (a, b, c, digs);
    511     } else
    512 #endif
    513 #ifdef BN_S_MP_MUL_DIGS_C
    514       res = s_mp_mul (a, b, c); /* uses s_mp_mul_digs */
    515 #else
    516 #error mp_mul could fail
    517       res = MP_VAL;
    518 #endif
    519 
    520   }
    521   c->sign = (c->used > 0) ? neg : MP_ZPOS;
    522   return res;
    523 }
    524 
    525 
    526 /* d = a * b (mod c) */
    527 static int mp_mulmod (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
    528 {
    529   int     res;
    530   mp_int  t;
    531 
    532   if ((res = mp_init (&t)) != MP_OKAY) {
    533     return res;
    534   }
    535 
    536   if ((res = mp_mul (a, b, &t)) != MP_OKAY) {
    537     mp_clear (&t);
    538     return res;
    539   }
    540   res = mp_mod (&t, c, d);
    541   mp_clear (&t);
    542   return res;
    543 }
    544 
    545 
    546 /* c = a mod b, 0 <= c < b */
    547 static int mp_mod (mp_int * a, mp_int * b, mp_int * c)
    548 {
    549   mp_int  t;
    550   int     res;
    551 
    552   if ((res = mp_init (&t)) != MP_OKAY) {
    553     return res;
    554   }
    555 
    556   if ((res = mp_div (a, b, NULL, &t)) != MP_OKAY) {
    557     mp_clear (&t);
    558     return res;
    559   }
    560 
    561   if (t.sign != b->sign) {
    562     res = mp_add (b, &t, c);
    563   } else {
    564     res = MP_OKAY;
    565     mp_exch (&t, c);
    566   }
    567 
    568   mp_clear (&t);
    569   return res;
    570 }
    571 
    572 
    573 /* this is a shell function that calls either the normal or Montgomery
    574  * exptmod functions.  Originally the call to the montgomery code was
    575  * embedded in the normal function but that wasted a lot of stack space
    576  * for nothing (since 99% of the time the Montgomery code would be called)
    577  */
    578 static int mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y)
    579 {
    580   int dr;
    581 
    582   /* modulus P must be positive */
    583   if (P->sign == MP_NEG) {
    584      return MP_VAL;
    585   }
    586 
    587   /* if exponent X is negative we have to recurse */
    588   if (X->sign == MP_NEG) {
    589 #ifdef LTM_NO_NEG_EXP
    590         return MP_VAL;
    591 #else /* LTM_NO_NEG_EXP */
    592 #ifdef BN_MP_INVMOD_C
    593      mp_int tmpG, tmpX;
    594      int err;
    595 
    596      /* first compute 1/G mod P */
    597      if ((err = mp_init(&tmpG)) != MP_OKAY) {
    598         return err;
    599      }
    600      if ((err = mp_invmod(G, P, &tmpG)) != MP_OKAY) {
    601         mp_clear(&tmpG);
    602         return err;
    603      }
    604 
    605      /* now get |X| */
    606      if ((err = mp_init(&tmpX)) != MP_OKAY) {
    607         mp_clear(&tmpG);
    608         return err;
    609      }
    610      if ((err = mp_abs(X, &tmpX)) != MP_OKAY) {
    611         mp_clear_multi(&tmpG, &tmpX, NULL);
    612         return err;
    613      }
    614 
    615      /* and now compute (1/G)**|X| instead of G**X [X < 0] */
    616      err = mp_exptmod(&tmpG, &tmpX, P, Y);
    617      mp_clear_multi(&tmpG, &tmpX, NULL);
    618      return err;
    619 #else
    620 #error mp_exptmod would always fail
    621      /* no invmod */
    622      return MP_VAL;
    623 #endif
    624 #endif /* LTM_NO_NEG_EXP */
    625   }
    626 
    627 /* modified diminished radix reduction */
    628 #if defined(BN_MP_REDUCE_IS_2K_L_C) && defined(BN_MP_REDUCE_2K_L_C) && defined(BN_S_MP_EXPTMOD_C)
    629   if (mp_reduce_is_2k_l(P) == MP_YES) {
    630      return s_mp_exptmod(G, X, P, Y, 1);
    631   }
    632 #endif
    633 
    634 #ifdef BN_MP_DR_IS_MODULUS_C
    635   /* is it a DR modulus? */
    636   dr = mp_dr_is_modulus(P);
    637 #else
    638   /* default to no */
    639   dr = 0;
    640 #endif
    641 
    642 #ifdef BN_MP_REDUCE_IS_2K_C
    643   /* if not, is it a unrestricted DR modulus? */
    644   if (dr == 0) {
    645      dr = mp_reduce_is_2k(P) << 1;
    646   }
    647 #endif
    648 
    649   /* if the modulus is odd or dr != 0 use the montgomery method */
    650 #ifdef BN_MP_EXPTMOD_FAST_C
    651   if (mp_isodd (P) == 1 || dr !=  0) {
    652     return mp_exptmod_fast (G, X, P, Y, dr);
    653   } else {
    654 #endif
    655 #ifdef BN_S_MP_EXPTMOD_C
    656     /* otherwise use the generic Barrett reduction technique */
    657     return s_mp_exptmod (G, X, P, Y, 0);
    658 #else
    659 #error mp_exptmod could fail
    660     /* no exptmod for evens */
    661     return MP_VAL;
    662 #endif
    663 #ifdef BN_MP_EXPTMOD_FAST_C
    664   }
    665 #endif
    666 }
    667 
    668 
    669 /* compare two ints (signed)*/
    670 static int mp_cmp (mp_int * a, mp_int * b)
    671 {
    672   /* compare based on sign */
    673   if (a->sign != b->sign) {
    674      if (a->sign == MP_NEG) {
    675         return MP_LT;
    676      } else {
    677         return MP_GT;
    678      }
    679   }
    680 
    681   /* compare digits */
    682   if (a->sign == MP_NEG) {
    683      /* if negative compare opposite direction */
    684      return mp_cmp_mag(b, a);
    685   } else {
    686      return mp_cmp_mag(a, b);
    687   }
    688 }
    689 
    690 
    691 /* compare a digit */
    692 static int mp_cmp_d(mp_int * a, mp_digit b)
    693 {
    694   /* compare based on sign */
    695   if (a->sign == MP_NEG) {
    696     return MP_LT;
    697   }
    698 
    699   /* compare based on magnitude */
    700   if (a->used > 1) {
    701     return MP_GT;
    702   }
    703 
    704   /* compare the only digit of a to b */
    705   if (a->dp[0] > b) {
    706     return MP_GT;
    707   } else if (a->dp[0] < b) {
    708     return MP_LT;
    709   } else {
    710     return MP_EQ;
    711   }
    712 }
    713 
    714 
    715 #ifndef LTM_NO_NEG_EXP
    716 /* hac 14.61, pp608 */
    717 static int mp_invmod (mp_int * a, mp_int * b, mp_int * c)
    718 {
    719   /* b cannot be negative */
    720   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
    721     return MP_VAL;
    722   }
    723 
    724 #ifdef BN_FAST_MP_INVMOD_C
    725   /* if the modulus is odd we can use a faster routine instead */
    726   if (mp_isodd (b) == 1) {
    727     return fast_mp_invmod (a, b, c);
    728   }
    729 #endif
    730 
    731 #ifdef BN_MP_INVMOD_SLOW_C
    732   return mp_invmod_slow(a, b, c);
    733 #endif
    734 
    735 #ifndef BN_FAST_MP_INVMOD_C
    736 #ifndef BN_MP_INVMOD_SLOW_C
    737 #error mp_invmod would always fail
    738 #endif
    739 #endif
    740   return MP_VAL;
    741 }
    742 #endif /* LTM_NO_NEG_EXP */
    743 
    744 
    745 /* get the size for an unsigned equivalent */
    746 static int mp_unsigned_bin_size (mp_int * a)
    747 {
    748   int     size = mp_count_bits (a);
    749   return (size / 8 + ((size & 7) != 0 ? 1 : 0));
    750 }
    751 
    752 
    753 #ifndef LTM_NO_NEG_EXP
    754 /* hac 14.61, pp608 */
    755 static int mp_invmod_slow (mp_int * a, mp_int * b, mp_int * c)
    756 {
    757   mp_int  x, y, u, v, A, B, C, D;
    758   int     res;
    759 
    760   /* b cannot be negative */
    761   if (b->sign == MP_NEG || mp_iszero(b) == 1) {
    762     return MP_VAL;
    763   }
    764 
    765   /* init temps */
    766   if ((res = mp_init_multi(&x, &y, &u, &v,
    767                            &A, &B, &C, &D, NULL)) != MP_OKAY) {
    768      return res;
    769   }
    770 
    771   /* x = a, y = b */
    772   if ((res = mp_mod(a, b, &x)) != MP_OKAY) {
    773       goto LBL_ERR;
    774   }
    775   if ((res = mp_copy (b, &y)) != MP_OKAY) {
    776     goto LBL_ERR;
    777   }
    778 
    779   /* 2. [modified] if x,y are both even then return an error! */
    780   if (mp_iseven (&x) == 1 && mp_iseven (&y) == 1) {
    781     res = MP_VAL;
    782     goto LBL_ERR;
    783   }
    784 
    785   /* 3. u=x, v=y, A=1, B=0, C=0,D=1 */
    786   if ((res = mp_copy (&x, &u)) != MP_OKAY) {
    787     goto LBL_ERR;
    788   }
    789   if ((res = mp_copy (&y, &v)) != MP_OKAY) {
    790     goto LBL_ERR;
    791   }
    792   mp_set (&A, 1);
    793   mp_set (&D, 1);
    794 
    795 top:
    796   /* 4.  while u is even do */
    797   while (mp_iseven (&u) == 1) {
    798     /* 4.1 u = u/2 */
    799     if ((res = mp_div_2 (&u, &u)) != MP_OKAY) {
    800       goto LBL_ERR;
    801     }
    802     /* 4.2 if A or B is odd then */
    803     if (mp_isodd (&A) == 1 || mp_isodd (&B) == 1) {
    804       /* A = (A+y)/2, B = (B-x)/2 */
    805       if ((res = mp_add (&A, &y, &A)) != MP_OKAY) {
    806          goto LBL_ERR;
    807       }
    808       if ((res = mp_sub (&B, &x, &B)) != MP_OKAY) {
    809          goto LBL_ERR;
    810       }
    811     }
    812     /* A = A/2, B = B/2 */
    813     if ((res = mp_div_2 (&A, &A)) != MP_OKAY) {
    814       goto LBL_ERR;
    815     }
    816     if ((res = mp_div_2 (&B, &B)) != MP_OKAY) {
    817       goto LBL_ERR;
    818     }
    819   }
    820 
    821   /* 5.  while v is even do */
    822   while (mp_iseven (&v) == 1) {
    823     /* 5.1 v = v/2 */
    824     if ((res = mp_div_2 (&v, &v)) != MP_OKAY) {
    825       goto LBL_ERR;
    826     }
    827     /* 5.2 if C or D is odd then */
    828     if (mp_isodd (&C) == 1 || mp_isodd (&D) == 1) {
    829       /* C = (C+y)/2, D = (D-x)/2 */
    830       if ((res = mp_add (&C, &y, &C)) != MP_OKAY) {
    831          goto LBL_ERR;
    832       }
    833       if ((res = mp_sub (&D, &x, &D)) != MP_OKAY) {
    834          goto LBL_ERR;
    835       }
    836     }
    837     /* C = C/2, D = D/2 */
    838     if ((res = mp_div_2 (&C, &C)) != MP_OKAY) {
    839       goto LBL_ERR;
    840     }
    841     if ((res = mp_div_2 (&D, &D)) != MP_OKAY) {
    842       goto LBL_ERR;
    843     }
    844   }
    845 
    846   /* 6.  if u >= v then */
    847   if (mp_cmp (&u, &v) != MP_LT) {
    848     /* u = u - v, A = A - C, B = B - D */
    849     if ((res = mp_sub (&u, &v, &u)) != MP_OKAY) {
    850       goto LBL_ERR;
    851     }
    852 
    853     if ((res = mp_sub (&A, &C, &A)) != MP_OKAY) {
    854       goto LBL_ERR;
    855     }
    856 
    857     if ((res = mp_sub (&B, &D, &B)) != MP_OKAY) {
    858       goto LBL_ERR;
    859     }
    860   } else {
    861     /* v - v - u, C = C - A, D = D - B */
    862     if ((res = mp_sub (&v, &u, &v)) != MP_OKAY) {
    863       goto LBL_ERR;
    864     }
    865 
    866     if ((res = mp_sub (&C, &A, &C)) != MP_OKAY) {
    867       goto LBL_ERR;
    868     }
    869 
    870     if ((res = mp_sub (&D, &B, &D)) != MP_OKAY) {
    871       goto LBL_ERR;
    872     }
    873   }
    874 
    875   /* if not zero goto step 4 */
    876   if (mp_iszero (&u) == 0)
    877     goto top;
    878 
    879   /* now a = C, b = D, gcd == g*v */
    880 
    881   /* if v != 1 then there is no inverse */
    882   if (mp_cmp_d (&v, 1) != MP_EQ) {
    883     res = MP_VAL;
    884     goto LBL_ERR;
    885   }
    886 
    887   /* if its too low */
    888   while (mp_cmp_d(&C, 0) == MP_LT) {
    889       if ((res = mp_add(&C, b, &C)) != MP_OKAY) {
    890          goto LBL_ERR;
    891       }
    892   }
    893 
    894   /* too big */
    895   while (mp_cmp_mag(&C, b) != MP_LT) {
    896       if ((res = mp_sub(&C, b, &C)) != MP_OKAY) {
    897          goto LBL_ERR;
    898       }
    899   }
    900 
    901   /* C is now the inverse */
    902   mp_exch (&C, c);
    903   res = MP_OKAY;
    904 LBL_ERR:mp_clear_multi (&x, &y, &u, &v, &A, &B, &C, &D, NULL);
    905   return res;
    906 }
    907 #endif /* LTM_NO_NEG_EXP */
    908 
    909 
    910 /* compare maginitude of two ints (unsigned) */
    911 static int mp_cmp_mag (mp_int * a, mp_int * b)
    912 {
    913   int     n;
    914   mp_digit *tmpa, *tmpb;
    915 
    916   /* compare based on # of non-zero digits */
    917   if (a->used > b->used) {
    918     return MP_GT;
    919   }
    920 
    921   if (a->used < b->used) {
    922     return MP_LT;
    923   }
    924 
    925   /* alias for a */
    926   tmpa = a->dp + (a->used - 1);
    927 
    928   /* alias for b */
    929   tmpb = b->dp + (a->used - 1);
    930 
    931   /* compare based on digits  */
    932   for (n = 0; n < a->used; ++n, --tmpa, --tmpb) {
    933     if (*tmpa > *tmpb) {
    934       return MP_GT;
    935     }
    936 
    937     if (*tmpa < *tmpb) {
    938       return MP_LT;
    939     }
    940   }
    941   return MP_EQ;
    942 }
    943 
    944 
    945 /* reads a unsigned char array, assumes the msb is stored first [big endian] */
    946 static int mp_read_unsigned_bin (mp_int * a, const unsigned char *b, int c)
    947 {
    948   int     res;
    949 
    950   /* make sure there are at least two digits */
    951   if (a->alloc < 2) {
    952      if ((res = mp_grow(a, 2)) != MP_OKAY) {
    953         return res;
    954      }
    955   }
    956 
    957   /* zero the int */
    958   mp_zero (a);
    959 
    960   /* read the bytes in */
    961   while (c-- > 0) {
    962     if ((res = mp_mul_2d (a, 8, a)) != MP_OKAY) {
    963       return res;
    964     }
    965 
    966 #ifndef MP_8BIT
    967       a->dp[0] |= *b++;
    968       a->used += 1;
    969 #else
    970       a->dp[0] = (*b & MP_MASK);
    971       a->dp[1] |= ((*b++ >> 7U) & 1);
    972       a->used += 2;
    973 #endif
    974   }
    975   mp_clamp (a);
    976   return MP_OKAY;
    977 }
    978 
    979 
    980 /* store in unsigned [big endian] format */
    981 static int mp_to_unsigned_bin (mp_int * a, unsigned char *b)
    982 {
    983   int     x, res;
    984   mp_int  t;
    985 
    986   if ((res = mp_init_copy (&t, a)) != MP_OKAY) {
    987     return res;
    988   }
    989 
    990   x = 0;
    991   while (mp_iszero (&t) == 0) {
    992 #ifndef MP_8BIT
    993       b[x++] = (unsigned char) (t.dp[0] & 255);
    994 #else
    995       b[x++] = (unsigned char) (t.dp[0] | ((t.dp[1] & 0x01) << 7));
    996 #endif
    997     if ((res = mp_div_2d (&t, 8, &t, NULL)) != MP_OKAY) {
    998       mp_clear (&t);
    999       return res;
   1000     }
   1001   }
   1002   bn_reverse (b, x);
   1003   mp_clear (&t);
   1004   return MP_OKAY;
   1005 }
   1006 
   1007 
   1008 /* shift right by a certain bit count (store quotient in c, optional remainder in d) */
   1009 static int mp_div_2d (mp_int * a, int b, mp_int * c, mp_int * d)
   1010 {
   1011   mp_digit D, r, rr;
   1012   int     x, res;
   1013   mp_int  t;
   1014 
   1015 
   1016   /* if the shift count is <= 0 then we do no work */
   1017   if (b <= 0) {
   1018     res = mp_copy (a, c);
   1019     if (d != NULL) {
   1020       mp_zero (d);
   1021     }
   1022     return res;
   1023   }
   1024 
   1025   if ((res = mp_init (&t)) != MP_OKAY) {
   1026     return res;
   1027   }
   1028 
   1029   /* get the remainder */
   1030   if (d != NULL) {
   1031     if ((res = mp_mod_2d (a, b, &t)) != MP_OKAY) {
   1032       mp_clear (&t);
   1033       return res;
   1034     }
   1035   }
   1036 
   1037   /* copy */
   1038   if ((res = mp_copy (a, c)) != MP_OKAY) {
   1039     mp_clear (&t);
   1040     return res;
   1041   }
   1042 
   1043   /* shift by as many digits in the bit count */
   1044   if (b >= (int)DIGIT_BIT) {
   1045     mp_rshd (c, b / DIGIT_BIT);
   1046   }
   1047 
   1048   /* shift any bit count < DIGIT_BIT */
   1049   D = (mp_digit) (b % DIGIT_BIT);
   1050   if (D != 0) {
   1051     register mp_digit *tmpc, mask, shift;
   1052 
   1053     /* mask */
   1054     mask = (((mp_digit)1) << D) - 1;
   1055 
   1056     /* shift for lsb */
   1057     shift = DIGIT_BIT - D;
   1058 
   1059     /* alias */
   1060     tmpc = c->dp + (c->used - 1);
   1061 
   1062     /* carry */
   1063     r = 0;
   1064     for (x = c->used - 1; x >= 0; x--) {
   1065       /* get the lower  bits of this word in a temp */
   1066       rr = *tmpc & mask;
   1067 
   1068       /* shift the current word and mix in the carry bits from the previous word */
   1069       *tmpc = (*tmpc >> D) | (r << shift);
   1070       --tmpc;
   1071 
   1072       /* set the carry to the carry bits of the current word found above */
   1073       r = rr;
   1074     }
   1075   }
   1076   mp_clamp (c);
   1077   if (d != NULL) {
   1078     mp_exch (&t, d);
   1079   }
   1080   mp_clear (&t);
   1081   return MP_OKAY;
   1082 }
   1083 
   1084 
   1085 static int mp_init_copy (mp_int * a, mp_int * b)
   1086 {
   1087   int     res;
   1088 
   1089   if ((res = mp_init (a)) != MP_OKAY) {
   1090     return res;
   1091   }
   1092   return mp_copy (b, a);
   1093 }
   1094 
   1095 
   1096 /* set to zero */
   1097 static void mp_zero (mp_int * a)
   1098 {
   1099   int       n;
   1100   mp_digit *tmp;
   1101 
   1102   a->sign = MP_ZPOS;
   1103   a->used = 0;
   1104 
   1105   tmp = a->dp;
   1106   for (n = 0; n < a->alloc; n++) {
   1107      *tmp++ = 0;
   1108   }
   1109 }
   1110 
   1111 
   1112 /* copy, b = a */
   1113 static int mp_copy (mp_int * a, mp_int * b)
   1114 {
   1115   int     res, n;
   1116 
   1117   /* if dst == src do nothing */
   1118   if (a == b) {
   1119     return MP_OKAY;
   1120   }
   1121 
   1122   /* grow dest */
   1123   if (b->alloc < a->used) {
   1124      if ((res = mp_grow (b, a->used)) != MP_OKAY) {
   1125         return res;
   1126      }
   1127   }
   1128 
   1129   /* zero b and copy the parameters over */
   1130   {
   1131     register mp_digit *tmpa, *tmpb;
   1132 
   1133     /* pointer aliases */
   1134 
   1135     /* source */
   1136     tmpa = a->dp;
   1137 
   1138     /* destination */
   1139     tmpb = b->dp;
   1140 
   1141     /* copy all the digits */
   1142     for (n = 0; n < a->used; n++) {
   1143       *tmpb++ = *tmpa++;
   1144     }
   1145 
   1146     /* clear high digits */
   1147     for (; n < b->used; n++) {
   1148       *tmpb++ = 0;
   1149     }
   1150   }
   1151 
   1152   /* copy used count and sign */
   1153   b->used = a->used;
   1154   b->sign = a->sign;
   1155   return MP_OKAY;
   1156 }
   1157 
   1158 
   1159 /* shift right a certain amount of digits */
   1160 static void mp_rshd (mp_int * a, int b)
   1161 {
   1162   int     x;
   1163 
   1164   /* if b <= 0 then ignore it */
   1165   if (b <= 0) {
   1166     return;
   1167   }
   1168 
   1169   /* if b > used then simply zero it and return */
   1170   if (a->used <= b) {
   1171     mp_zero (a);
   1172     return;
   1173   }
   1174 
   1175   {
   1176     register mp_digit *bottom, *top;
   1177 
   1178     /* shift the digits down */
   1179 
   1180     /* bottom */
   1181     bottom = a->dp;
   1182 
   1183     /* top [offset into digits] */
   1184     top = a->dp + b;
   1185 
   1186     /* this is implemented as a sliding window where
   1187      * the window is b-digits long and digits from
   1188      * the top of the window are copied to the bottom
   1189      *
   1190      * e.g.
   1191 
   1192      b-2 | b-1 | b0 | b1 | b2 | ... | bb |   ---->
   1193                  /\                   |      ---->
   1194                   \-------------------/      ---->
   1195      */
   1196     for (x = 0; x < (a->used - b); x++) {
   1197       *bottom++ = *top++;
   1198     }
   1199 
   1200     /* zero the top digits */
   1201     for (; x < a->used; x++) {
   1202       *bottom++ = 0;
   1203     }
   1204   }
   1205 
   1206   /* remove excess digits */
   1207   a->used -= b;
   1208 }
   1209 
   1210 
   1211 /* swap the elements of two integers, for cases where you can't simply swap the
   1212  * mp_int pointers around
   1213  */
   1214 static void mp_exch (mp_int * a, mp_int * b)
   1215 {
   1216   mp_int  t;
   1217 
   1218   t  = *a;
   1219   *a = *b;
   1220   *b = t;
   1221 }
   1222 
   1223 
   1224 /* trim unused digits
   1225  *
   1226  * This is used to ensure that leading zero digits are
   1227  * trimed and the leading "used" digit will be non-zero
   1228  * Typically very fast.  Also fixes the sign if there
   1229  * are no more leading digits
   1230  */
   1231 static void mp_clamp (mp_int * a)
   1232 {
   1233   /* decrease used while the most significant digit is
   1234    * zero.
   1235    */
   1236   while (a->used > 0 && a->dp[a->used - 1] == 0) {
   1237     --(a->used);
   1238   }
   1239 
   1240   /* reset the sign flag if used == 0 */
   1241   if (a->used == 0) {
   1242     a->sign = MP_ZPOS;
   1243   }
   1244 }
   1245 
   1246 
   1247 /* grow as required */
   1248 static int mp_grow (mp_int * a, int size)
   1249 {
   1250   int     i;
   1251   mp_digit *tmp;
   1252 
   1253   /* if the alloc size is smaller alloc more ram */
   1254   if (a->alloc < size) {
   1255     /* ensure there are always at least MP_PREC digits extra on top */
   1256     size += (MP_PREC * 2) - (size % MP_PREC);
   1257 
   1258     /* reallocate the array a->dp
   1259      *
   1260      * We store the return in a temporary variable
   1261      * in case the operation failed we don't want
   1262      * to overwrite the dp member of a.
   1263      */
   1264     tmp = OPT_CAST(mp_digit) XREALLOC (a->dp, sizeof (mp_digit) * size);
   1265     if (tmp == NULL) {
   1266       /* reallocation failed but "a" is still valid [can be freed] */
   1267       return MP_MEM;
   1268     }
   1269 
   1270     /* reallocation succeeded so set a->dp */
   1271     a->dp = tmp;
   1272 
   1273     /* zero excess digits */
   1274     i        = a->alloc;
   1275     a->alloc = size;
   1276     for (; i < a->alloc; i++) {
   1277       a->dp[i] = 0;
   1278     }
   1279   }
   1280   return MP_OKAY;
   1281 }
   1282 
   1283 
   1284 #ifdef BN_MP_ABS_C
   1285 /* b = |a|
   1286  *
   1287  * Simple function copies the input and fixes the sign to positive
   1288  */
   1289 static int mp_abs (mp_int * a, mp_int * b)
   1290 {
   1291   int     res;
   1292 
   1293   /* copy a to b */
   1294   if (a != b) {
   1295      if ((res = mp_copy (a, b)) != MP_OKAY) {
   1296        return res;
   1297      }
   1298   }
   1299 
   1300   /* force the sign of b to positive */
   1301   b->sign = MP_ZPOS;
   1302 
   1303   return MP_OKAY;
   1304 }
   1305 #endif
   1306 
   1307 
   1308 /* set to a digit */
   1309 static void mp_set (mp_int * a, mp_digit b)
   1310 {
   1311   mp_zero (a);
   1312   a->dp[0] = b & MP_MASK;
   1313   a->used  = (a->dp[0] != 0) ? 1 : 0;
   1314 }
   1315 
   1316 
   1317 #ifndef LTM_NO_NEG_EXP
   1318 /* b = a/2 */
   1319 static int mp_div_2(mp_int * a, mp_int * b)
   1320 {
   1321   int     x, res, oldused;
   1322 
   1323   /* copy */
   1324   if (b->alloc < a->used) {
   1325     if ((res = mp_grow (b, a->used)) != MP_OKAY) {
   1326       return res;
   1327     }
   1328   }
   1329 
   1330   oldused = b->used;
   1331   b->used = a->used;
   1332   {
   1333     register mp_digit r, rr, *tmpa, *tmpb;
   1334 
   1335     /* source alias */
   1336     tmpa = a->dp + b->used - 1;
   1337 
   1338     /* dest alias */
   1339     tmpb = b->dp + b->used - 1;
   1340 
   1341     /* carry */
   1342     r = 0;
   1343     for (x = b->used - 1; x >= 0; x--) {
   1344       /* get the carry for the next iteration */
   1345       rr = *tmpa & 1;
   1346 
   1347       /* shift the current digit, add in carry and store */
   1348       *tmpb-- = (*tmpa-- >> 1) | (r << (DIGIT_BIT - 1));
   1349 
   1350       /* forward carry to next iteration */
   1351       r = rr;
   1352     }
   1353 
   1354     /* zero excess digits */
   1355     tmpb = b->dp + b->used;
   1356     for (x = b->used; x < oldused; x++) {
   1357       *tmpb++ = 0;
   1358     }
   1359   }
   1360   b->sign = a->sign;
   1361   mp_clamp (b);
   1362   return MP_OKAY;
   1363 }
   1364 #endif /* LTM_NO_NEG_EXP */
   1365 
   1366 
   1367 /* shift left by a certain bit count */
   1368 static int mp_mul_2d (mp_int * a, int b, mp_int * c)
   1369 {
   1370   mp_digit d;
   1371   int      res;
   1372 
   1373   /* copy */
   1374   if (a != c) {
   1375      if ((res = mp_copy (a, c)) != MP_OKAY) {
   1376        return res;
   1377      }
   1378   }
   1379 
   1380   if (c->alloc < (int)(c->used + b/DIGIT_BIT + 1)) {
   1381      if ((res = mp_grow (c, c->used + b / DIGIT_BIT + 1)) != MP_OKAY) {
   1382        return res;
   1383      }
   1384   }
   1385 
   1386   /* shift by as many digits in the bit count */
   1387   if (b >= (int)DIGIT_BIT) {
   1388     if ((res = mp_lshd (c, b / DIGIT_BIT)) != MP_OKAY) {
   1389       return res;
   1390     }
   1391   }
   1392 
   1393   /* shift any bit count < DIGIT_BIT */
   1394   d = (mp_digit) (b % DIGIT_BIT);
   1395   if (d != 0) {
   1396     register mp_digit *tmpc, shift, mask, r, rr;
   1397     register int x;
   1398 
   1399     /* bitmask for carries */
   1400     mask = (((mp_digit)1) << d) - 1;
   1401 
   1402     /* shift for msbs */
   1403     shift = DIGIT_BIT - d;
   1404 
   1405     /* alias */
   1406     tmpc = c->dp;
   1407 
   1408     /* carry */
   1409     r    = 0;
   1410     for (x = 0; x < c->used; x++) {
   1411       /* get the higher bits of the current word */
   1412       rr = (*tmpc >> shift) & mask;
   1413 
   1414       /* shift the current word and OR in the carry */
   1415       *tmpc = ((*tmpc << d) | r) & MP_MASK;
   1416       ++tmpc;
   1417 
   1418       /* set the carry to the carry bits of the current word */
   1419       r = rr;
   1420     }
   1421 
   1422     /* set final carry */
   1423     if (r != 0) {
   1424        c->dp[(c->used)++] = r;
   1425     }
   1426   }
   1427   mp_clamp (c);
   1428   return MP_OKAY;
   1429 }
   1430 
   1431 
   1432 #ifdef BN_MP_INIT_MULTI_C
   1433 static int mp_init_multi(mp_int *mp, ...)
   1434 {
   1435     mp_err res = MP_OKAY;      /* Assume ok until proven otherwise */
   1436     int n = 0;                 /* Number of ok inits */
   1437     mp_int* cur_arg = mp;
   1438     va_list args;
   1439 
   1440     va_start(args, mp);        /* init args to next argument from caller */
   1441     while (cur_arg != NULL) {
   1442         if (mp_init(cur_arg) != MP_OKAY) {
   1443             /* Oops - error! Back-track and mp_clear what we already
   1444                succeeded in init-ing, then return error.
   1445             */
   1446             va_list clean_args;
   1447 
   1448             /* end the current list */
   1449             va_end(args);
   1450 
   1451             /* now start cleaning up */
   1452             cur_arg = mp;
   1453             va_start(clean_args, mp);
   1454             while (n--) {
   1455                 mp_clear(cur_arg);
   1456                 cur_arg = va_arg(clean_args, mp_int*);
   1457             }
   1458             va_end(clean_args);
   1459             res = MP_MEM;
   1460             break;
   1461         }
   1462         n++;
   1463         cur_arg = va_arg(args, mp_int*);
   1464     }
   1465     va_end(args);
   1466     return res;                /* Assumed ok, if error flagged above. */
   1467 }
   1468 #endif
   1469 
   1470 
   1471 #ifdef BN_MP_CLEAR_MULTI_C
   1472 static void mp_clear_multi(mp_int *mp, ...)
   1473 {
   1474     mp_int* next_mp = mp;
   1475     va_list args;
   1476     va_start(args, mp);
   1477     while (next_mp != NULL) {
   1478         mp_clear(next_mp);
   1479         next_mp = va_arg(args, mp_int*);
   1480     }
   1481     va_end(args);
   1482 }
   1483 #endif
   1484 
   1485 
   1486 /* shift left a certain amount of digits */
   1487 static int mp_lshd (mp_int * a, int b)
   1488 {
   1489   int     x, res;
   1490 
   1491   /* if its less than zero return */
   1492   if (b <= 0) {
   1493     return MP_OKAY;
   1494   }
   1495 
   1496   /* grow to fit the new digits */
   1497   if (a->alloc < a->used + b) {
   1498      if ((res = mp_grow (a, a->used + b)) != MP_OKAY) {
   1499        return res;
   1500      }
   1501   }
   1502 
   1503   {
   1504     register mp_digit *top, *bottom;
   1505 
   1506     /* increment the used by the shift amount then copy upwards */
   1507     a->used += b;
   1508 
   1509     /* top */
   1510     top = a->dp + a->used - 1;
   1511 
   1512     /* base */
   1513     bottom = a->dp + a->used - 1 - b;
   1514 
   1515     /* much like mp_rshd this is implemented using a sliding window
   1516      * except the window goes the otherway around.  Copying from
   1517      * the bottom to the top.  see bn_mp_rshd.c for more info.
   1518      */
   1519     for (x = a->used - 1; x >= b; x--) {
   1520       *top-- = *bottom--;
   1521     }
   1522 
   1523     /* zero the lower digits */
   1524     top = a->dp;
   1525     for (x = 0; x < b; x++) {
   1526       *top++ = 0;
   1527     }
   1528   }
   1529   return MP_OKAY;
   1530 }
   1531 
   1532 
   1533 /* returns the number of bits in an int */
   1534 static int mp_count_bits (mp_int * a)
   1535 {
   1536   int     r;
   1537   mp_digit q;
   1538 
   1539   /* shortcut */
   1540   if (a->used == 0) {
   1541     return 0;
   1542   }
   1543 
   1544   /* get number of digits and add that */
   1545   r = (a->used - 1) * DIGIT_BIT;
   1546 
   1547   /* take the last digit and count the bits in it */
   1548   q = a->dp[a->used - 1];
   1549   while (q > ((mp_digit) 0)) {
   1550     ++r;
   1551     q >>= ((mp_digit) 1);
   1552   }
   1553   return r;
   1554 }
   1555 
   1556 
   1557 /* calc a value mod 2**b */
   1558 static int mp_mod_2d (mp_int * a, int b, mp_int * c)
   1559 {
   1560   int     x, res;
   1561 
   1562   /* if b is <= 0 then zero the int */
   1563   if (b <= 0) {
   1564     mp_zero (c);
   1565     return MP_OKAY;
   1566   }
   1567 
   1568   /* if the modulus is larger than the value than return */
   1569   if (b >= (int) (a->used * DIGIT_BIT)) {
   1570     res = mp_copy (a, c);
   1571     return res;
   1572   }
   1573 
   1574   /* copy */
   1575   if ((res = mp_copy (a, c)) != MP_OKAY) {
   1576     return res;
   1577   }
   1578 
   1579   /* zero digits above the last digit of the modulus */
   1580   for (x = (b / DIGIT_BIT) + ((b % DIGIT_BIT) == 0 ? 0 : 1); x < c->used; x++) {
   1581     c->dp[x] = 0;
   1582   }
   1583   /* clear the digit that is not completely outside/inside the modulus */
   1584   c->dp[b / DIGIT_BIT] &=
   1585     (mp_digit) ((((mp_digit) 1) << (((mp_digit) b) % DIGIT_BIT)) - ((mp_digit) 1));
   1586   mp_clamp (c);
   1587   return MP_OKAY;
   1588 }
   1589 
   1590 
   1591 #ifdef BN_MP_DIV_SMALL
   1592 
   1593 /* slower bit-bang division... also smaller */
   1594 static int mp_div(mp_int * a, mp_int * b, mp_int * c, mp_int * d)
   1595 {
   1596    mp_int ta, tb, tq, q;
   1597    int    res, n, n2;
   1598 
   1599   /* is divisor zero ? */
   1600   if (mp_iszero (b) == 1) {
   1601     return MP_VAL;
   1602   }
   1603 
   1604   /* if a < b then q=0, r = a */
   1605   if (mp_cmp_mag (a, b) == MP_LT) {
   1606     if (d != NULL) {
   1607       res = mp_copy (a, d);
   1608     } else {
   1609       res = MP_OKAY;
   1610     }
   1611     if (c != NULL) {
   1612       mp_zero (c);
   1613     }
   1614     return res;
   1615   }
   1616 
   1617   /* init our temps */
   1618   if ((res = mp_init_multi(&ta, &tb, &tq, &q, NULL) != MP_OKAY)) {
   1619      return res;
   1620   }
   1621 
   1622 
   1623   mp_set(&tq, 1);
   1624   n = mp_count_bits(a) - mp_count_bits(b);
   1625   if (((res = mp_abs(a, &ta)) != MP_OKAY) ||
   1626       ((res = mp_abs(b, &tb)) != MP_OKAY) ||
   1627       ((res = mp_mul_2d(&tb, n, &tb)) != MP_OKAY) ||
   1628       ((res = mp_mul_2d(&tq, n, &tq)) != MP_OKAY)) {
   1629       goto LBL_ERR;
   1630   }
   1631 
   1632   while (n-- >= 0) {
   1633      if (mp_cmp(&tb, &ta) != MP_GT) {
   1634         if (((res = mp_sub(&ta, &tb, &ta)) != MP_OKAY) ||
   1635             ((res = mp_add(&q, &tq, &q)) != MP_OKAY)) {
   1636            goto LBL_ERR;
   1637         }
   1638      }
   1639      if (((res = mp_div_2d(&tb, 1, &tb, NULL)) != MP_OKAY) ||
   1640          ((res = mp_div_2d(&tq, 1, &tq, NULL)) != MP_OKAY)) {
   1641            goto LBL_ERR;
   1642      }
   1643   }
   1644 
   1645   /* now q == quotient and ta == remainder */
   1646   n  = a->sign;
   1647   n2 = (a->sign == b->sign ? MP_ZPOS : MP_NEG);
   1648   if (c != NULL) {
   1649      mp_exch(c, &q);
   1650      c->sign  = (mp_iszero(c) == MP_YES) ? MP_ZPOS : n2;
   1651   }
   1652   if (d != NULL) {
   1653      mp_exch(d, &ta);
   1654      d->sign = (mp_iszero(d) == MP_YES) ? MP_ZPOS : n;
   1655   }
   1656 LBL_ERR:
   1657    mp_clear_multi(&ta, &tb, &tq, &q, NULL);
   1658    return res;
   1659 }
   1660 
   1661 #else
   1662 
   1663 /* integer signed division.
   1664  * c*b + d == a [e.g. a/b, c=quotient, d=remainder]
   1665  * HAC pp.598 Algorithm 14.20
   1666  *
   1667  * Note that the description in HAC is horribly
   1668  * incomplete.  For example, it doesn't consider
   1669  * the case where digits are removed from 'x' in
   1670  * the inner loop.  It also doesn't consider the
   1671  * case that y has fewer than three digits, etc..
   1672  *
   1673  * The overall algorithm is as described as
   1674  * 14.20 from HAC but fixed to treat these cases.
   1675 */
   1676 static int mp_div (mp_int * a, mp_int * b, mp_int * c, mp_int * d)
   1677 {
   1678   mp_int  q, x, y, t1, t2;
   1679   int     res, n, t, i, norm, neg;
   1680 
   1681   /* is divisor zero ? */
   1682   if (mp_iszero (b) == 1) {
   1683     return MP_VAL;
   1684   }
   1685 
   1686   /* if a < b then q=0, r = a */
   1687   if (mp_cmp_mag (a, b) == MP_LT) {
   1688     if (d != NULL) {
   1689       res = mp_copy (a, d);
   1690     } else {
   1691       res = MP_OKAY;
   1692     }
   1693     if (c != NULL) {
   1694       mp_zero (c);
   1695     }
   1696     return res;
   1697   }
   1698 
   1699   if ((res = mp_init_size (&q, a->used + 2)) != MP_OKAY) {
   1700     return res;
   1701   }
   1702   q.used = a->used + 2;
   1703 
   1704   if ((res = mp_init (&t1)) != MP_OKAY) {
   1705     goto LBL_Q;
   1706   }
   1707 
   1708   if ((res = mp_init (&t2)) != MP_OKAY) {
   1709     goto LBL_T1;
   1710   }
   1711 
   1712   if ((res = mp_init_copy (&x, a)) != MP_OKAY) {
   1713     goto LBL_T2;
   1714   }
   1715 
   1716   if ((res = mp_init_copy (&y, b)) != MP_OKAY) {
   1717     goto LBL_X;
   1718   }
   1719 
   1720   /* fix the sign */
   1721   neg = (a->sign == b->sign) ? MP_ZPOS : MP_NEG;
   1722   x.sign = y.sign = MP_ZPOS;
   1723 
   1724   /* normalize both x and y, ensure that y >= b/2, [b == 2**DIGIT_BIT] */
   1725   norm = mp_count_bits(&y) % DIGIT_BIT;
   1726   if (norm < (int)(DIGIT_BIT-1)) {
   1727      norm = (DIGIT_BIT-1) - norm;
   1728      if ((res = mp_mul_2d (&x, norm, &x)) != MP_OKAY) {
   1729        goto LBL_Y;
   1730      }
   1731      if ((res = mp_mul_2d (&y, norm, &y)) != MP_OKAY) {
   1732        goto LBL_Y;
   1733      }
   1734   } else {
   1735      norm = 0;
   1736   }
   1737 
   1738   /* note hac does 0 based, so if used==5 then its 0,1,2,3,4, e.g. use 4 */
   1739   n = x.used - 1;
   1740   t = y.used - 1;
   1741 
   1742   /* while (x >= y*b**n-t) do { q[n-t] += 1; x -= y*b**{n-t} } */
   1743   if ((res = mp_lshd (&y, n - t)) != MP_OKAY) { /* y = y*b**{n-t} */
   1744     goto LBL_Y;
   1745   }
   1746 
   1747   while (mp_cmp (&x, &y) != MP_LT) {
   1748     ++(q.dp[n - t]);
   1749     if ((res = mp_sub (&x, &y, &x)) != MP_OKAY) {
   1750       goto LBL_Y;
   1751     }
   1752   }
   1753 
   1754   /* reset y by shifting it back down */
   1755   mp_rshd (&y, n - t);
   1756 
   1757   /* step 3. for i from n down to (t + 1) */
   1758   for (i = n; i >= (t + 1); i--) {
   1759     if (i > x.used) {
   1760       continue;
   1761     }
   1762 
   1763     /* step 3.1 if xi == yt then set q{i-t-1} to b-1,
   1764      * otherwise set q{i-t-1} to (xi*b + x{i-1})/yt */
   1765     if (x.dp[i] == y.dp[t]) {
   1766       q.dp[i - t - 1] = ((((mp_digit)1) << DIGIT_BIT) - 1);
   1767     } else {
   1768       mp_word tmp;
   1769       tmp = ((mp_word) x.dp[i]) << ((mp_word) DIGIT_BIT);
   1770       tmp |= ((mp_word) x.dp[i - 1]);
   1771       tmp /= ((mp_word) y.dp[t]);
   1772       if (tmp > (mp_word) MP_MASK)
   1773         tmp = MP_MASK;
   1774       q.dp[i - t - 1] = (mp_digit) (tmp & (mp_word) (MP_MASK));
   1775     }
   1776 
   1777     /* while (q{i-t-1} * (yt * b + y{t-1})) >
   1778              xi * b**2 + xi-1 * b + xi-2
   1779 
   1780        do q{i-t-1} -= 1;
   1781     */
   1782     q.dp[i - t - 1] = (q.dp[i - t - 1] + 1) & MP_MASK;
   1783     do {
   1784       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1) & MP_MASK;
   1785 
   1786       /* find left hand */
   1787       mp_zero (&t1);
   1788       t1.dp[0] = (t - 1 < 0) ? 0 : y.dp[t - 1];
   1789       t1.dp[1] = y.dp[t];
   1790       t1.used = 2;
   1791       if ((res = mp_mul_d (&t1, q.dp[i - t - 1], &t1)) != MP_OKAY) {
   1792         goto LBL_Y;
   1793       }
   1794 
   1795       /* find right hand */
   1796       t2.dp[0] = (i - 2 < 0) ? 0 : x.dp[i - 2];
   1797       t2.dp[1] = (i - 1 < 0) ? 0 : x.dp[i - 1];
   1798       t2.dp[2] = x.dp[i];
   1799       t2.used = 3;
   1800     } while (mp_cmp_mag(&t1, &t2) == MP_GT);
   1801 
   1802     /* step 3.3 x = x - q{i-t-1} * y * b**{i-t-1} */
   1803     if ((res = mp_mul_d (&y, q.dp[i - t - 1], &t1)) != MP_OKAY) {
   1804       goto LBL_Y;
   1805     }
   1806 
   1807     if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
   1808       goto LBL_Y;
   1809     }
   1810 
   1811     if ((res = mp_sub (&x, &t1, &x)) != MP_OKAY) {
   1812       goto LBL_Y;
   1813     }
   1814 
   1815     /* if x < 0 then { x = x + y*b**{i-t-1}; q{i-t-1} -= 1; } */
   1816     if (x.sign == MP_NEG) {
   1817       if ((res = mp_copy (&y, &t1)) != MP_OKAY) {
   1818         goto LBL_Y;
   1819       }
   1820       if ((res = mp_lshd (&t1, i - t - 1)) != MP_OKAY) {
   1821         goto LBL_Y;
   1822       }
   1823       if ((res = mp_add (&x, &t1, &x)) != MP_OKAY) {
   1824         goto LBL_Y;
   1825       }
   1826 
   1827       q.dp[i - t - 1] = (q.dp[i - t - 1] - 1UL) & MP_MASK;
   1828     }
   1829   }
   1830 
   1831   /* now q is the quotient and x is the remainder
   1832    * [which we have to normalize]
   1833    */
   1834 
   1835   /* get sign before writing to c */
   1836   x.sign = x.used == 0 ? MP_ZPOS : a->sign;
   1837 
   1838   if (c != NULL) {
   1839     mp_clamp (&q);
   1840     mp_exch (&q, c);
   1841     c->sign = neg;
   1842   }
   1843 
   1844   if (d != NULL) {
   1845     mp_div_2d (&x, norm, &x, NULL);
   1846     mp_exch (&x, d);
   1847   }
   1848 
   1849   res = MP_OKAY;
   1850 
   1851 LBL_Y:mp_clear (&y);
   1852 LBL_X:mp_clear (&x);
   1853 LBL_T2:mp_clear (&t2);
   1854 LBL_T1:mp_clear (&t1);
   1855 LBL_Q:mp_clear (&q);
   1856   return res;
   1857 }
   1858 
   1859 #endif
   1860 
   1861 
   1862 #ifdef MP_LOW_MEM
   1863    #define TAB_SIZE 32
   1864 #else
   1865    #define TAB_SIZE 256
   1866 #endif
   1867 
   1868 static int s_mp_exptmod (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
   1869 {
   1870   mp_int  M[TAB_SIZE], res, mu;
   1871   mp_digit buf;
   1872   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
   1873   int (*redux)(mp_int*,mp_int*,mp_int*);
   1874 
   1875   /* find window size */
   1876   x = mp_count_bits (X);
   1877   if (x <= 7) {
   1878     winsize = 2;
   1879   } else if (x <= 36) {
   1880     winsize = 3;
   1881   } else if (x <= 140) {
   1882     winsize = 4;
   1883   } else if (x <= 450) {
   1884     winsize = 5;
   1885   } else if (x <= 1303) {
   1886     winsize = 6;
   1887   } else if (x <= 3529) {
   1888     winsize = 7;
   1889   } else {
   1890     winsize = 8;
   1891   }
   1892 
   1893 #ifdef MP_LOW_MEM
   1894     if (winsize > 5) {
   1895        winsize = 5;
   1896     }
   1897 #endif
   1898 
   1899   /* init M array */
   1900   /* init first cell */
   1901   if ((err = mp_init(&M[1])) != MP_OKAY) {
   1902      return err;
   1903   }
   1904 
   1905   /* now init the second half of the array */
   1906   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
   1907     if ((err = mp_init(&M[x])) != MP_OKAY) {
   1908       for (y = 1<<(winsize-1); y < x; y++) {
   1909         mp_clear (&M[y]);
   1910       }
   1911       mp_clear(&M[1]);
   1912       return err;
   1913     }
   1914   }
   1915 
   1916   /* create mu, used for Barrett reduction */
   1917   if ((err = mp_init (&mu)) != MP_OKAY) {
   1918     goto LBL_M;
   1919   }
   1920 
   1921   if (redmode == 0) {
   1922      if ((err = mp_reduce_setup (&mu, P)) != MP_OKAY) {
   1923         goto LBL_MU;
   1924      }
   1925      redux = mp_reduce;
   1926   } else {
   1927      if ((err = mp_reduce_2k_setup_l (P, &mu)) != MP_OKAY) {
   1928         goto LBL_MU;
   1929      }
   1930      redux = mp_reduce_2k_l;
   1931   }
   1932 
   1933   /* create M table
   1934    *
   1935    * The M table contains powers of the base,
   1936    * e.g. M[x] = G**x mod P
   1937    *
   1938    * The first half of the table is not
   1939    * computed though accept for M[0] and M[1]
   1940    */
   1941   if ((err = mp_mod (G, P, &M[1])) != MP_OKAY) {
   1942     goto LBL_MU;
   1943   }
   1944 
   1945   /* compute the value at M[1<<(winsize-1)] by squaring
   1946    * M[1] (winsize-1) times
   1947    */
   1948   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
   1949     goto LBL_MU;
   1950   }
   1951 
   1952   for (x = 0; x < (winsize - 1); x++) {
   1953     /* square it */
   1954     if ((err = mp_sqr (&M[1 << (winsize - 1)],
   1955                        &M[1 << (winsize - 1)])) != MP_OKAY) {
   1956       goto LBL_MU;
   1957     }
   1958 
   1959     /* reduce modulo P */
   1960     if ((err = redux (&M[1 << (winsize - 1)], P, &mu)) != MP_OKAY) {
   1961       goto LBL_MU;
   1962     }
   1963   }
   1964 
   1965   /* create upper table, that is M[x] = M[x-1] * M[1] (mod P)
   1966    * for x = (2**(winsize - 1) + 1) to (2**winsize - 1)
   1967    */
   1968   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
   1969     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
   1970       goto LBL_MU;
   1971     }
   1972     if ((err = redux (&M[x], P, &mu)) != MP_OKAY) {
   1973       goto LBL_MU;
   1974     }
   1975   }
   1976 
   1977   /* setup result */
   1978   if ((err = mp_init (&res)) != MP_OKAY) {
   1979     goto LBL_MU;
   1980   }
   1981   mp_set (&res, 1);
   1982 
   1983   /* set initial mode and bit cnt */
   1984   mode   = 0;
   1985   bitcnt = 1;
   1986   buf    = 0;
   1987   digidx = X->used - 1;
   1988   bitcpy = 0;
   1989   bitbuf = 0;
   1990 
   1991   for (;;) {
   1992     /* grab next digit as required */
   1993     if (--bitcnt == 0) {
   1994       /* if digidx == -1 we are out of digits */
   1995       if (digidx == -1) {
   1996         break;
   1997       }
   1998       /* read next digit and reset the bitcnt */
   1999       buf    = X->dp[digidx--];
   2000       bitcnt = (int) DIGIT_BIT;
   2001     }
   2002 
   2003     /* grab the next msb from the exponent */
   2004     y     = (buf >> (mp_digit)(DIGIT_BIT - 1)) & 1;
   2005     buf <<= (mp_digit)1;
   2006 
   2007     /* if the bit is zero and mode == 0 then we ignore it
   2008      * These represent the leading zero bits before the first 1 bit
   2009      * in the exponent.  Technically this opt is not required but it
   2010      * does lower the # of trivial squaring/reductions used
   2011      */
   2012     if (mode == 0 && y == 0) {
   2013       continue;
   2014     }
   2015 
   2016     /* if the bit is zero and mode == 1 then we square */
   2017     if (mode == 1 && y == 0) {
   2018       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   2019         goto LBL_RES;
   2020       }
   2021       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   2022         goto LBL_RES;
   2023       }
   2024       continue;
   2025     }
   2026 
   2027     /* else we add it to the window */
   2028     bitbuf |= (y << (winsize - ++bitcpy));
   2029     mode    = 2;
   2030 
   2031     if (bitcpy == winsize) {
   2032       /* ok window is filled so square as required and multiply  */
   2033       /* square first */
   2034       for (x = 0; x < winsize; x++) {
   2035         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   2036           goto LBL_RES;
   2037         }
   2038         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   2039           goto LBL_RES;
   2040         }
   2041       }
   2042 
   2043       /* then multiply */
   2044       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
   2045         goto LBL_RES;
   2046       }
   2047       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   2048         goto LBL_RES;
   2049       }
   2050 
   2051       /* empty window and reset */
   2052       bitcpy = 0;
   2053       bitbuf = 0;
   2054       mode   = 1;
   2055     }
   2056   }
   2057 
   2058   /* if bits remain then square/multiply */
   2059   if (mode == 2 && bitcpy > 0) {
   2060     /* square then multiply if the bit is set */
   2061     for (x = 0; x < bitcpy; x++) {
   2062       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   2063         goto LBL_RES;
   2064       }
   2065       if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   2066         goto LBL_RES;
   2067       }
   2068 
   2069       bitbuf <<= 1;
   2070       if ((bitbuf & (1 << winsize)) != 0) {
   2071         /* then multiply */
   2072         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
   2073           goto LBL_RES;
   2074         }
   2075         if ((err = redux (&res, P, &mu)) != MP_OKAY) {
   2076           goto LBL_RES;
   2077         }
   2078       }
   2079     }
   2080   }
   2081 
   2082   mp_exch (&res, Y);
   2083   err = MP_OKAY;
   2084 LBL_RES:mp_clear (&res);
   2085 LBL_MU:mp_clear (&mu);
   2086 LBL_M:
   2087   mp_clear(&M[1]);
   2088   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
   2089     mp_clear (&M[x]);
   2090   }
   2091   return err;
   2092 }
   2093 
   2094 
   2095 /* computes b = a*a */
   2096 static int mp_sqr (mp_int * a, mp_int * b)
   2097 {
   2098   int     res;
   2099 
   2100 #ifdef BN_MP_TOOM_SQR_C
   2101   /* use Toom-Cook? */
   2102   if (a->used >= TOOM_SQR_CUTOFF) {
   2103     res = mp_toom_sqr(a, b);
   2104   /* Karatsuba? */
   2105   } else
   2106 #endif
   2107 #ifdef BN_MP_KARATSUBA_SQR_C
   2108 if (a->used >= KARATSUBA_SQR_CUTOFF) {
   2109     res = mp_karatsuba_sqr (a, b);
   2110   } else
   2111 #endif
   2112   {
   2113 #ifdef BN_FAST_S_MP_SQR_C
   2114     /* can we use the fast comba multiplier? */
   2115     if ((a->used * 2 + 1) < MP_WARRAY &&
   2116          a->used <
   2117          (1 << (sizeof(mp_word) * CHAR_BIT - 2*DIGIT_BIT - 1))) {
   2118       res = fast_s_mp_sqr (a, b);
   2119     } else
   2120 #endif
   2121 #ifdef BN_S_MP_SQR_C
   2122       res = s_mp_sqr (a, b);
   2123 #else
   2124 #error mp_sqr could fail
   2125       res = MP_VAL;
   2126 #endif
   2127   }
   2128   b->sign = MP_ZPOS;
   2129   return res;
   2130 }
   2131 
   2132 
   2133 /* reduces a modulo n where n is of the form 2**p - d
   2134    This differs from reduce_2k since "d" can be larger
   2135    than a single digit.
   2136 */
   2137 static int mp_reduce_2k_l(mp_int *a, mp_int *n, mp_int *d)
   2138 {
   2139    mp_int q;
   2140    int    p, res;
   2141 
   2142    if ((res = mp_init(&q)) != MP_OKAY) {
   2143       return res;
   2144    }
   2145 
   2146    p = mp_count_bits(n);
   2147 top:
   2148    /* q = a/2**p, a = a mod 2**p */
   2149    if ((res = mp_div_2d(a, p, &q, a)) != MP_OKAY) {
   2150       goto ERR;
   2151    }
   2152 
   2153    /* q = q * d */
   2154    if ((res = mp_mul(&q, d, &q)) != MP_OKAY) {
   2155       goto ERR;
   2156    }
   2157 
   2158    /* a = a + q */
   2159    if ((res = s_mp_add(a, &q, a)) != MP_OKAY) {
   2160       goto ERR;
   2161    }
   2162 
   2163    if (mp_cmp_mag(a, n) != MP_LT) {
   2164       s_mp_sub(a, n, a);
   2165       goto top;
   2166    }
   2167 
   2168 ERR:
   2169    mp_clear(&q);
   2170    return res;
   2171 }
   2172 
   2173 
   2174 /* determines the setup value */
   2175 static int mp_reduce_2k_setup_l(mp_int *a, mp_int *d)
   2176 {
   2177    int    res;
   2178    mp_int tmp;
   2179 
   2180    if ((res = mp_init(&tmp)) != MP_OKAY) {
   2181       return res;
   2182    }
   2183 
   2184    if ((res = mp_2expt(&tmp, mp_count_bits(a))) != MP_OKAY) {
   2185       goto ERR;
   2186    }
   2187 
   2188    if ((res = s_mp_sub(&tmp, a, d)) != MP_OKAY) {
   2189       goto ERR;
   2190    }
   2191 
   2192 ERR:
   2193    mp_clear(&tmp);
   2194    return res;
   2195 }
   2196 
   2197 
   2198 /* computes a = 2**b
   2199  *
   2200  * Simple algorithm which zeroes the int, grows it then just sets one bit
   2201  * as required.
   2202  */
   2203 static int mp_2expt (mp_int * a, int b)
   2204 {
   2205   int     res;
   2206 
   2207   /* zero a as per default */
   2208   mp_zero (a);
   2209 
   2210   /* grow a to accommodate the single bit */
   2211   if ((res = mp_grow (a, b / DIGIT_BIT + 1)) != MP_OKAY) {
   2212     return res;
   2213   }
   2214 
   2215   /* set the used count of where the bit will go */
   2216   a->used = b / DIGIT_BIT + 1;
   2217 
   2218   /* put the single bit in its place */
   2219   a->dp[b / DIGIT_BIT] = ((mp_digit)1) << (b % DIGIT_BIT);
   2220 
   2221   return MP_OKAY;
   2222 }
   2223 
   2224 
   2225 /* pre-calculate the value required for Barrett reduction
   2226  * For a given modulus "b" it calulates the value required in "a"
   2227  */
   2228 static int mp_reduce_setup (mp_int * a, mp_int * b)
   2229 {
   2230   int     res;
   2231 
   2232   if ((res = mp_2expt (a, b->used * 2 * DIGIT_BIT)) != MP_OKAY) {
   2233     return res;
   2234   }
   2235   return mp_div (a, b, a, NULL);
   2236 }
   2237 
   2238 
   2239 /* reduces x mod m, assumes 0 < x < m**2, mu is
   2240  * precomputed via mp_reduce_setup.
   2241  * From HAC pp.604 Algorithm 14.42
   2242  */
   2243 static int mp_reduce (mp_int * x, mp_int * m, mp_int * mu)
   2244 {
   2245   mp_int  q;
   2246   int     res, um = m->used;
   2247 
   2248   /* q = x */
   2249   if ((res = mp_init_copy (&q, x)) != MP_OKAY) {
   2250     return res;
   2251   }
   2252 
   2253   /* q1 = x / b**(k-1)  */
   2254   mp_rshd (&q, um - 1);
   2255 
   2256   /* according to HAC this optimization is ok */
   2257   if (((unsigned long) um) > (((mp_digit)1) << (DIGIT_BIT - 1))) {
   2258     if ((res = mp_mul (&q, mu, &q)) != MP_OKAY) {
   2259       goto CLEANUP;
   2260     }
   2261   } else {
   2262 #ifdef BN_S_MP_MUL_HIGH_DIGS_C
   2263     if ((res = s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
   2264       goto CLEANUP;
   2265     }
   2266 #elif defined(BN_FAST_S_MP_MUL_HIGH_DIGS_C)
   2267     if ((res = fast_s_mp_mul_high_digs (&q, mu, &q, um)) != MP_OKAY) {
   2268       goto CLEANUP;
   2269     }
   2270 #else
   2271     {
   2272 #error mp_reduce would always fail
   2273       res = MP_VAL;
   2274       goto CLEANUP;
   2275     }
   2276 #endif
   2277   }
   2278 
   2279   /* q3 = q2 / b**(k+1) */
   2280   mp_rshd (&q, um + 1);
   2281 
   2282   /* x = x mod b**(k+1), quick (no division) */
   2283   if ((res = mp_mod_2d (x, DIGIT_BIT * (um + 1), x)) != MP_OKAY) {
   2284     goto CLEANUP;
   2285   }
   2286 
   2287   /* q = q * m mod b**(k+1), quick (no division) */
   2288   if ((res = s_mp_mul_digs (&q, m, &q, um + 1)) != MP_OKAY) {
   2289     goto CLEANUP;
   2290   }
   2291 
   2292   /* x = x - q */
   2293   if ((res = mp_sub (x, &q, x)) != MP_OKAY) {
   2294     goto CLEANUP;
   2295   }
   2296 
   2297   /* If x < 0, add b**(k+1) to it */
   2298   if (mp_cmp_d (x, 0) == MP_LT) {
   2299     mp_set (&q, 1);
   2300     if ((res = mp_lshd (&q, um + 1)) != MP_OKAY) {
   2301       goto CLEANUP;
   2302     }
   2303     if ((res = mp_add (x, &q, x)) != MP_OKAY) {
   2304       goto CLEANUP;
   2305     }
   2306   }
   2307 
   2308   /* Back off if it's too big */
   2309   while (mp_cmp (x, m) != MP_LT) {
   2310     if ((res = s_mp_sub (x, m, x)) != MP_OKAY) {
   2311       goto CLEANUP;
   2312     }
   2313   }
   2314 
   2315 CLEANUP:
   2316   mp_clear (&q);
   2317 
   2318   return res;
   2319 }
   2320 
   2321 
   2322 /* multiplies |a| * |b| and only computes up to digs digits of result
   2323  * HAC pp. 595, Algorithm 14.12  Modified so you can control how
   2324  * many digits of output are created.
   2325  */
   2326 static int s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   2327 {
   2328   mp_int  t;
   2329   int     res, pa, pb, ix, iy;
   2330   mp_digit u;
   2331   mp_word r;
   2332   mp_digit tmpx, *tmpt, *tmpy;
   2333 
   2334   /* can we use the fast multiplier? */
   2335   if (((digs) < MP_WARRAY) &&
   2336       MIN (a->used, b->used) <
   2337           (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
   2338     return fast_s_mp_mul_digs (a, b, c, digs);
   2339   }
   2340 
   2341   if ((res = mp_init_size (&t, digs)) != MP_OKAY) {
   2342     return res;
   2343   }
   2344   t.used = digs;
   2345 
   2346   /* compute the digits of the product directly */
   2347   pa = a->used;
   2348   for (ix = 0; ix < pa; ix++) {
   2349     /* set the carry to zero */
   2350     u = 0;
   2351 
   2352     /* limit ourselves to making digs digits of output */
   2353     pb = MIN (b->used, digs - ix);
   2354 
   2355     /* setup some aliases */
   2356     /* copy of the digit from a used within the nested loop */
   2357     tmpx = a->dp[ix];
   2358 
   2359     /* an alias for the destination shifted ix places */
   2360     tmpt = t.dp + ix;
   2361 
   2362     /* an alias for the digits of b */
   2363     tmpy = b->dp;
   2364 
   2365     /* compute the columns of the output and propagate the carry */
   2366     for (iy = 0; iy < pb; iy++) {
   2367       /* compute the column as a mp_word */
   2368       r       = ((mp_word)*tmpt) +
   2369                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
   2370                 ((mp_word) u);
   2371 
   2372       /* the new column is the lower part of the result */
   2373       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2374 
   2375       /* get the carry word from the result */
   2376       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
   2377     }
   2378     /* set carry if it is placed below digs */
   2379     if (ix + iy < digs) {
   2380       *tmpt = u;
   2381     }
   2382   }
   2383 
   2384   mp_clamp (&t);
   2385   mp_exch (&t, c);
   2386 
   2387   mp_clear (&t);
   2388   return MP_OKAY;
   2389 }
   2390 
   2391 
   2392 /* Fast (comba) multiplier
   2393  *
   2394  * This is the fast column-array [comba] multiplier.  It is
   2395  * designed to compute the columns of the product first
   2396  * then handle the carries afterwards.  This has the effect
   2397  * of making the nested loops that compute the columns very
   2398  * simple and schedulable on super-scalar processors.
   2399  *
   2400  * This has been modified to produce a variable number of
   2401  * digits of output so if say only a half-product is required
   2402  * you don't have to compute the upper half (a feature
   2403  * required for fast Barrett reduction).
   2404  *
   2405  * Based on Algorithm 14.12 on pp.595 of HAC.
   2406  *
   2407  */
   2408 static int fast_s_mp_mul_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   2409 {
   2410   int     olduse, res, pa, ix, iz;
   2411   mp_digit W[MP_WARRAY];
   2412   register mp_word  _W;
   2413 
   2414   /* grow the destination as required */
   2415   if (c->alloc < digs) {
   2416     if ((res = mp_grow (c, digs)) != MP_OKAY) {
   2417       return res;
   2418     }
   2419   }
   2420 
   2421   /* number of output digits to produce */
   2422   pa = MIN(digs, a->used + b->used);
   2423 
   2424   /* clear the carry */
   2425   _W = 0;
   2426   for (ix = 0; ix < pa; ix++) {
   2427       int      tx, ty;
   2428       int      iy;
   2429       mp_digit *tmpx, *tmpy;
   2430 
   2431       /* get offsets into the two bignums */
   2432       ty = MIN(b->used-1, ix);
   2433       tx = ix - ty;
   2434 
   2435       /* setup temp aliases */
   2436       tmpx = a->dp + tx;
   2437       tmpy = b->dp + ty;
   2438 
   2439       /* this is the number of times the loop will iterrate, essentially
   2440          while (tx++ < a->used && ty-- >= 0) { ... }
   2441        */
   2442       iy = MIN(a->used-tx, ty+1);
   2443 
   2444       /* execute loop */
   2445       for (iz = 0; iz < iy; ++iz) {
   2446          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
   2447 
   2448       }
   2449 
   2450       /* store term */
   2451       W[ix] = ((mp_digit)_W) & MP_MASK;
   2452 
   2453       /* make next carry */
   2454       _W = _W >> ((mp_word)DIGIT_BIT);
   2455  }
   2456 
   2457   /* setup dest */
   2458   olduse  = c->used;
   2459   c->used = pa;
   2460 
   2461   {
   2462     register mp_digit *tmpc;
   2463     tmpc = c->dp;
   2464     for (ix = 0; ix < pa+1; ix++) {
   2465       /* now extract the previous digit [below the carry] */
   2466       *tmpc++ = W[ix];
   2467     }
   2468 
   2469     /* clear unused digits [that existed in the old copy of c] */
   2470     for (; ix < olduse; ix++) {
   2471       *tmpc++ = 0;
   2472     }
   2473   }
   2474   mp_clamp (c);
   2475   return MP_OKAY;
   2476 }
   2477 
   2478 
   2479 /* init an mp_init for a given size */
   2480 static int mp_init_size (mp_int * a, int size)
   2481 {
   2482   int x;
   2483 
   2484   /* pad size so there are always extra digits */
   2485   size += (MP_PREC * 2) - (size % MP_PREC);
   2486 
   2487   /* alloc mem */
   2488   a->dp = OPT_CAST(mp_digit) XMALLOC (sizeof (mp_digit) * size);
   2489   if (a->dp == NULL) {
   2490     return MP_MEM;
   2491   }
   2492 
   2493   /* set the members */
   2494   a->used  = 0;
   2495   a->alloc = size;
   2496   a->sign  = MP_ZPOS;
   2497 
   2498   /* zero the digits */
   2499   for (x = 0; x < size; x++) {
   2500       a->dp[x] = 0;
   2501   }
   2502 
   2503   return MP_OKAY;
   2504 }
   2505 
   2506 
   2507 /* low level squaring, b = a*a, HAC pp.596-597, Algorithm 14.16 */
   2508 static int s_mp_sqr (mp_int * a, mp_int * b)
   2509 {
   2510   mp_int  t;
   2511   int     res, ix, iy, pa;
   2512   mp_word r;
   2513   mp_digit u, tmpx, *tmpt;
   2514 
   2515   pa = a->used;
   2516   if ((res = mp_init_size (&t, 2*pa + 1)) != MP_OKAY) {
   2517     return res;
   2518   }
   2519 
   2520   /* default used is maximum possible size */
   2521   t.used = 2*pa + 1;
   2522 
   2523   for (ix = 0; ix < pa; ix++) {
   2524     /* first calculate the digit at 2*ix */
   2525     /* calculate double precision result */
   2526     r = ((mp_word) t.dp[2*ix]) +
   2527         ((mp_word)a->dp[ix])*((mp_word)a->dp[ix]);
   2528 
   2529     /* store lower part in result */
   2530     t.dp[ix+ix] = (mp_digit) (r & ((mp_word) MP_MASK));
   2531 
   2532     /* get the carry */
   2533     u           = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
   2534 
   2535     /* left hand side of A[ix] * A[iy] */
   2536     tmpx        = a->dp[ix];
   2537 
   2538     /* alias for where to store the results */
   2539     tmpt        = t.dp + (2*ix + 1);
   2540 
   2541     for (iy = ix + 1; iy < pa; iy++) {
   2542       /* first calculate the product */
   2543       r       = ((mp_word)tmpx) * ((mp_word)a->dp[iy]);
   2544 
   2545       /* now calculate the double precision result, note we use
   2546        * addition instead of *2 since it's easier to optimize
   2547        */
   2548       r       = ((mp_word) *tmpt) + r + r + ((mp_word) u);
   2549 
   2550       /* store lower part */
   2551       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2552 
   2553       /* get carry */
   2554       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
   2555     }
   2556     /* propagate upwards */
   2557     while (u != ((mp_digit) 0)) {
   2558       r       = ((mp_word) *tmpt) + ((mp_word) u);
   2559       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2560       u       = (mp_digit)(r >> ((mp_word) DIGIT_BIT));
   2561     }
   2562   }
   2563 
   2564   mp_clamp (&t);
   2565   mp_exch (&t, b);
   2566   mp_clear (&t);
   2567   return MP_OKAY;
   2568 }
   2569 
   2570 
   2571 /* multiplies |a| * |b| and does not compute the lower digs digits
   2572  * [meant to get the higher part of the product]
   2573  */
   2574 static int s_mp_mul_high_digs (mp_int * a, mp_int * b, mp_int * c, int digs)
   2575 {
   2576   mp_int  t;
   2577   int     res, pa, pb, ix, iy;
   2578   mp_digit u;
   2579   mp_word r;
   2580   mp_digit tmpx, *tmpt, *tmpy;
   2581 
   2582   /* can we use the fast multiplier? */
   2583 #ifdef BN_FAST_S_MP_MUL_HIGH_DIGS_C
   2584   if (((a->used + b->used + 1) < MP_WARRAY)
   2585       && MIN (a->used, b->used) < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
   2586     return fast_s_mp_mul_high_digs (a, b, c, digs);
   2587   }
   2588 #endif
   2589 
   2590   if ((res = mp_init_size (&t, a->used + b->used + 1)) != MP_OKAY) {
   2591     return res;
   2592   }
   2593   t.used = a->used + b->used + 1;
   2594 
   2595   pa = a->used;
   2596   pb = b->used;
   2597   for (ix = 0; ix < pa; ix++) {
   2598     /* clear the carry */
   2599     u = 0;
   2600 
   2601     /* left hand side of A[ix] * B[iy] */
   2602     tmpx = a->dp[ix];
   2603 
   2604     /* alias to the address of where the digits will be stored */
   2605     tmpt = &(t.dp[digs]);
   2606 
   2607     /* alias for where to read the right hand side from */
   2608     tmpy = b->dp + (digs - ix);
   2609 
   2610     for (iy = digs - ix; iy < pb; iy++) {
   2611       /* calculate the double precision result */
   2612       r       = ((mp_word)*tmpt) +
   2613                 ((mp_word)tmpx) * ((mp_word)*tmpy++) +
   2614                 ((mp_word) u);
   2615 
   2616       /* get the lower part */
   2617       *tmpt++ = (mp_digit) (r & ((mp_word) MP_MASK));
   2618 
   2619       /* carry the carry */
   2620       u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
   2621     }
   2622     *tmpt = u;
   2623   }
   2624   mp_clamp (&t);
   2625   mp_exch (&t, c);
   2626   mp_clear (&t);
   2627   return MP_OKAY;
   2628 }
   2629 
   2630 
   2631 #ifdef BN_MP_MONTGOMERY_SETUP_C
   2632 /* setups the montgomery reduction stuff */
   2633 static int
   2634 mp_montgomery_setup (mp_int * n, mp_digit * rho)
   2635 {
   2636   mp_digit x, b;
   2637 
   2638 /* fast inversion mod 2**k
   2639  *
   2640  * Based on the fact that
   2641  *
   2642  * XA = 1 (mod 2**n)  =>  (X(2-XA)) A = 1 (mod 2**2n)
   2643  *                    =>  2*X*A - X*X*A*A = 1
   2644  *                    =>  2*(1) - (1)     = 1
   2645  */
   2646   b = n->dp[0];
   2647 
   2648   if ((b & 1) == 0) {
   2649     return MP_VAL;
   2650   }
   2651 
   2652   x = (((b + 2) & 4) << 1) + b; /* here x*a==1 mod 2**4 */
   2653   x *= 2 - b * x;               /* here x*a==1 mod 2**8 */
   2654 #if !defined(MP_8BIT)
   2655   x *= 2 - b * x;               /* here x*a==1 mod 2**16 */
   2656 #endif
   2657 #if defined(MP_64BIT) || !(defined(MP_8BIT) || defined(MP_16BIT))
   2658   x *= 2 - b * x;               /* here x*a==1 mod 2**32 */
   2659 #endif
   2660 #ifdef MP_64BIT
   2661   x *= 2 - b * x;               /* here x*a==1 mod 2**64 */
   2662 #endif
   2663 
   2664   /* rho = -1/m mod b */
   2665   *rho = (unsigned long)(((mp_word)1 << ((mp_word) DIGIT_BIT)) - x) & MP_MASK;
   2666 
   2667   return MP_OKAY;
   2668 }
   2669 #endif
   2670 
   2671 
   2672 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
   2673 /* computes xR**-1 == x (mod N) via Montgomery Reduction
   2674  *
   2675  * This is an optimized implementation of montgomery_reduce
   2676  * which uses the comba method to quickly calculate the columns of the
   2677  * reduction.
   2678  *
   2679  * Based on Algorithm 14.32 on pp.601 of HAC.
   2680 */
   2681 static int fast_mp_montgomery_reduce (mp_int * x, mp_int * n, mp_digit rho)
   2682 {
   2683   int     ix, res, olduse;
   2684   mp_word W[MP_WARRAY];
   2685 
   2686   /* get old used count */
   2687   olduse = x->used;
   2688 
   2689   /* grow a as required */
   2690   if (x->alloc < n->used + 1) {
   2691     if ((res = mp_grow (x, n->used + 1)) != MP_OKAY) {
   2692       return res;
   2693     }
   2694   }
   2695 
   2696   /* first we have to get the digits of the input into
   2697    * an array of double precision words W[...]
   2698    */
   2699   {
   2700     register mp_word *_W;
   2701     register mp_digit *tmpx;
   2702 
   2703     /* alias for the W[] array */
   2704     _W   = W;
   2705 
   2706     /* alias for the digits of  x*/
   2707     tmpx = x->dp;
   2708 
   2709     /* copy the digits of a into W[0..a->used-1] */
   2710     for (ix = 0; ix < x->used; ix++) {
   2711       *_W++ = *tmpx++;
   2712     }
   2713 
   2714     /* zero the high words of W[a->used..m->used*2] */
   2715     for (; ix < n->used * 2 + 1; ix++) {
   2716       *_W++ = 0;
   2717     }
   2718   }
   2719 
   2720   /* now we proceed to zero successive digits
   2721    * from the least significant upwards
   2722    */
   2723   for (ix = 0; ix < n->used; ix++) {
   2724     /* mu = ai * m' mod b
   2725      *
   2726      * We avoid a double precision multiplication (which isn't required)
   2727      * by casting the value down to a mp_digit.  Note this requires
   2728      * that W[ix-1] have  the carry cleared (see after the inner loop)
   2729      */
   2730     register mp_digit mu;
   2731     mu = (mp_digit) (((W[ix] & MP_MASK) * rho) & MP_MASK);
   2732 
   2733     /* a = a + mu * m * b**i
   2734      *
   2735      * This is computed in place and on the fly.  The multiplication
   2736      * by b**i is handled by offseting which columns the results
   2737      * are added to.
   2738      *
   2739      * Note the comba method normally doesn't handle carries in the
   2740      * inner loop In this case we fix the carry from the previous
   2741      * column since the Montgomery reduction requires digits of the
   2742      * result (so far) [see above] to work.  This is
   2743      * handled by fixing up one carry after the inner loop.  The
   2744      * carry fixups are done in order so after these loops the
   2745      * first m->used words of W[] have the carries fixed
   2746      */
   2747     {
   2748       register int iy;
   2749       register mp_digit *tmpn;
   2750       register mp_word *_W;
   2751 
   2752       /* alias for the digits of the modulus */
   2753       tmpn = n->dp;
   2754 
   2755       /* Alias for the columns set by an offset of ix */
   2756       _W = W + ix;
   2757 
   2758       /* inner loop */
   2759       for (iy = 0; iy < n->used; iy++) {
   2760           *_W++ += ((mp_word)mu) * ((mp_word)*tmpn++);
   2761       }
   2762     }
   2763 
   2764     /* now fix carry for next digit, W[ix+1] */
   2765     W[ix + 1] += W[ix] >> ((mp_word) DIGIT_BIT);
   2766   }
   2767 
   2768   /* now we have to propagate the carries and
   2769    * shift the words downward [all those least
   2770    * significant digits we zeroed].
   2771    */
   2772   {
   2773     register mp_digit *tmpx;
   2774     register mp_word *_W, *_W1;
   2775 
   2776     /* nox fix rest of carries */
   2777 
   2778     /* alias for current word */
   2779     _W1 = W + ix;
   2780 
   2781     /* alias for next word, where the carry goes */
   2782     _W = W + ++ix;
   2783 
   2784     for (; ix <= n->used * 2 + 1; ix++) {
   2785       *_W++ += *_W1++ >> ((mp_word) DIGIT_BIT);
   2786     }
   2787 
   2788     /* copy out, A = A/b**n
   2789      *
   2790      * The result is A/b**n but instead of converting from an
   2791      * array of mp_word to mp_digit than calling mp_rshd
   2792      * we just copy them in the right order
   2793      */
   2794 
   2795     /* alias for destination word */
   2796     tmpx = x->dp;
   2797 
   2798     /* alias for shifted double precision result */
   2799     _W = W + n->used;
   2800 
   2801     for (ix = 0; ix < n->used + 1; ix++) {
   2802       *tmpx++ = (mp_digit)(*_W++ & ((mp_word) MP_MASK));
   2803     }
   2804 
   2805     /* zero oldused digits, if the input a was larger than
   2806      * m->used+1 we'll have to clear the digits
   2807      */
   2808     for (; ix < olduse; ix++) {
   2809       *tmpx++ = 0;
   2810     }
   2811   }
   2812 
   2813   /* set the max used and clamp */
   2814   x->used = n->used + 1;
   2815   mp_clamp (x);
   2816 
   2817   /* if A >= m then A = A - m */
   2818   if (mp_cmp_mag (x, n) != MP_LT) {
   2819     return s_mp_sub (x, n, x);
   2820   }
   2821   return MP_OKAY;
   2822 }
   2823 #endif
   2824 
   2825 
   2826 #ifdef BN_MP_MUL_2_C
   2827 /* b = a*2 */
   2828 static int mp_mul_2(mp_int * a, mp_int * b)
   2829 {
   2830   int     x, res, oldused;
   2831 
   2832   /* grow to accommodate result */
   2833   if (b->alloc < a->used + 1) {
   2834     if ((res = mp_grow (b, a->used + 1)) != MP_OKAY) {
   2835       return res;
   2836     }
   2837   }
   2838 
   2839   oldused = b->used;
   2840   b->used = a->used;
   2841 
   2842   {
   2843     register mp_digit r, rr, *tmpa, *tmpb;
   2844 
   2845     /* alias for source */
   2846     tmpa = a->dp;
   2847 
   2848     /* alias for dest */
   2849     tmpb = b->dp;
   2850 
   2851     /* carry */
   2852     r = 0;
   2853     for (x = 0; x < a->used; x++) {
   2854 
   2855       /* get what will be the *next* carry bit from the
   2856        * MSB of the current digit
   2857        */
   2858       rr = *tmpa >> ((mp_digit)(DIGIT_BIT - 1));
   2859 
   2860       /* now shift up this digit, add in the carry [from the previous] */
   2861       *tmpb++ = ((*tmpa++ << ((mp_digit)1)) | r) & MP_MASK;
   2862 
   2863       /* copy the carry that would be from the source
   2864        * digit into the next iteration
   2865        */
   2866       r = rr;
   2867     }
   2868 
   2869     /* new leading digit? */
   2870     if (r != 0) {
   2871       /* add a MSB which is always 1 at this point */
   2872       *tmpb = 1;
   2873       ++(b->used);
   2874     }
   2875 
   2876     /* now zero any excess digits on the destination
   2877      * that we didn't write to
   2878      */
   2879     tmpb = b->dp + b->used;
   2880     for (x = b->used; x < oldused; x++) {
   2881       *tmpb++ = 0;
   2882     }
   2883   }
   2884   b->sign = a->sign;
   2885   return MP_OKAY;
   2886 }
   2887 #endif
   2888 
   2889 
   2890 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
   2891 /*
   2892  * shifts with subtractions when the result is greater than b.
   2893  *
   2894  * The method is slightly modified to shift B unconditionally up to just under
   2895  * the leading bit of b.  This saves a lot of multiple precision shifting.
   2896  */
   2897 static int mp_montgomery_calc_normalization (mp_int * a, mp_int * b)
   2898 {
   2899   int     x, bits, res;
   2900 
   2901   /* how many bits of last digit does b use */
   2902   bits = mp_count_bits (b) % DIGIT_BIT;
   2903 
   2904   if (b->used > 1) {
   2905      if ((res = mp_2expt (a, (b->used - 1) * DIGIT_BIT + bits - 1)) != MP_OKAY) {
   2906         return res;
   2907      }
   2908   } else {
   2909      mp_set(a, 1);
   2910      bits = 1;
   2911   }
   2912 
   2913 
   2914   /* now compute C = A * B mod b */
   2915   for (x = bits - 1; x < (int)DIGIT_BIT; x++) {
   2916     if ((res = mp_mul_2 (a, a)) != MP_OKAY) {
   2917       return res;
   2918     }
   2919     if (mp_cmp_mag (a, b) != MP_LT) {
   2920       if ((res = s_mp_sub (a, b, a)) != MP_OKAY) {
   2921         return res;
   2922       }
   2923     }
   2924   }
   2925 
   2926   return MP_OKAY;
   2927 }
   2928 #endif
   2929 
   2930 
   2931 #ifdef BN_MP_EXPTMOD_FAST_C
   2932 /* computes Y == G**X mod P, HAC pp.616, Algorithm 14.85
   2933  *
   2934  * Uses a left-to-right k-ary sliding window to compute the modular exponentiation.
   2935  * The value of k changes based on the size of the exponent.
   2936  *
   2937  * Uses Montgomery or Diminished Radix reduction [whichever appropriate]
   2938  */
   2939 
   2940 static int mp_exptmod_fast (mp_int * G, mp_int * X, mp_int * P, mp_int * Y, int redmode)
   2941 {
   2942   mp_int  M[TAB_SIZE], res;
   2943   mp_digit buf, mp;
   2944   int     err, bitbuf, bitcpy, bitcnt, mode, digidx, x, y, winsize;
   2945 
   2946   /* use a pointer to the reduction algorithm.  This allows us to use
   2947    * one of many reduction algorithms without modding the guts of
   2948    * the code with if statements everywhere.
   2949    */
   2950   int     (*redux)(mp_int*,mp_int*,mp_digit);
   2951 
   2952   /* find window size */
   2953   x = mp_count_bits (X);
   2954   if (x <= 7) {
   2955     winsize = 2;
   2956   } else if (x <= 36) {
   2957     winsize = 3;
   2958   } else if (x <= 140) {
   2959     winsize = 4;
   2960   } else if (x <= 450) {
   2961     winsize = 5;
   2962   } else if (x <= 1303) {
   2963     winsize = 6;
   2964   } else if (x <= 3529) {
   2965     winsize = 7;
   2966   } else {
   2967     winsize = 8;
   2968   }
   2969 
   2970 #ifdef MP_LOW_MEM
   2971   if (winsize > 5) {
   2972      winsize = 5;
   2973   }
   2974 #endif
   2975 
   2976   /* init M array */
   2977   /* init first cell */
   2978   if ((err = mp_init(&M[1])) != MP_OKAY) {
   2979      return err;
   2980   }
   2981 
   2982   /* now init the second half of the array */
   2983   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
   2984     if ((err = mp_init(&M[x])) != MP_OKAY) {
   2985       for (y = 1<<(winsize-1); y < x; y++) {
   2986         mp_clear (&M[y]);
   2987       }
   2988       mp_clear(&M[1]);
   2989       return err;
   2990     }
   2991   }
   2992 
   2993   /* determine and setup reduction code */
   2994   if (redmode == 0) {
   2995 #ifdef BN_MP_MONTGOMERY_SETUP_C
   2996      /* now setup montgomery  */
   2997      if ((err = mp_montgomery_setup (P, &mp)) != MP_OKAY) {
   2998         goto LBL_M;
   2999      }
   3000 #else
   3001      err = MP_VAL;
   3002      goto LBL_M;
   3003 #endif
   3004 
   3005      /* automatically pick the comba one if available (saves quite a few calls/ifs) */
   3006 #ifdef BN_FAST_MP_MONTGOMERY_REDUCE_C
   3007      if (((P->used * 2 + 1) < MP_WARRAY) &&
   3008           P->used < (1 << ((CHAR_BIT * sizeof (mp_word)) - (2 * DIGIT_BIT)))) {
   3009         redux = fast_mp_montgomery_reduce;
   3010      } else
   3011 #endif
   3012      {
   3013 #ifdef BN_MP_MONTGOMERY_REDUCE_C
   3014         /* use slower baseline Montgomery method */
   3015         redux = mp_montgomery_reduce;
   3016 #else
   3017         err = MP_VAL;
   3018         goto LBL_M;
   3019 #endif
   3020      }
   3021   } else if (redmode == 1) {
   3022 #if defined(BN_MP_DR_SETUP_C) && defined(BN_MP_DR_REDUCE_C)
   3023      /* setup DR reduction for moduli of the form B**k - b */
   3024      mp_dr_setup(P, &mp);
   3025      redux = mp_dr_reduce;
   3026 #else
   3027      err = MP_VAL;
   3028      goto LBL_M;
   3029 #endif
   3030   } else {
   3031 #if defined(BN_MP_REDUCE_2K_SETUP_C) && defined(BN_MP_REDUCE_2K_C)
   3032      /* setup DR reduction for moduli of the form 2**k - b */
   3033      if ((err = mp_reduce_2k_setup(P, &mp)) != MP_OKAY) {
   3034         goto LBL_M;
   3035      }
   3036      redux = mp_reduce_2k;
   3037 #else
   3038      err = MP_VAL;
   3039      goto LBL_M;
   3040 #endif
   3041   }
   3042 
   3043   /* setup result */
   3044   if ((err = mp_init (&res)) != MP_OKAY) {
   3045     goto LBL_M;
   3046   }
   3047 
   3048   /* create M table
   3049    *
   3050 
   3051    *
   3052    * The first half of the table is not computed though accept for M[0] and M[1]
   3053    */
   3054 
   3055   if (redmode == 0) {
   3056 #ifdef BN_MP_MONTGOMERY_CALC_NORMALIZATION_C
   3057      /* now we need R mod m */
   3058      if ((err = mp_montgomery_calc_normalization (&res, P)) != MP_OKAY) {
   3059        goto LBL_RES;
   3060      }
   3061 #else
   3062      err = MP_VAL;
   3063      goto LBL_RES;
   3064 #endif
   3065 
   3066      /* now set M[1] to G * R mod m */
   3067      if ((err = mp_mulmod (G, &res, P, &M[1])) != MP_OKAY) {
   3068        goto LBL_RES;
   3069      }
   3070   } else {
   3071      mp_set(&res, 1);
   3072      if ((err = mp_mod(G, P, &M[1])) != MP_OKAY) {
   3073         goto LBL_RES;
   3074      }
   3075   }
   3076 
   3077   /* compute the value at M[1<<(winsize-1)] by squaring M[1] (winsize-1) times */
   3078   if ((err = mp_copy (&M[1], &M[1 << (winsize - 1)])) != MP_OKAY) {
   3079     goto LBL_RES;
   3080   }
   3081 
   3082   for (x = 0; x < (winsize - 1); x++) {
   3083     if ((err = mp_sqr (&M[1 << (winsize - 1)], &M[1 << (winsize - 1)])) != MP_OKAY) {
   3084       goto LBL_RES;
   3085     }
   3086     if ((err = redux (&M[1 << (winsize - 1)], P, mp)) != MP_OKAY) {
   3087       goto LBL_RES;
   3088     }
   3089   }
   3090 
   3091   /* create upper table */
   3092   for (x = (1 << (winsize - 1)) + 1; x < (1 << winsize); x++) {
   3093     if ((err = mp_mul (&M[x - 1], &M[1], &M[x])) != MP_OKAY) {
   3094       goto LBL_RES;
   3095     }
   3096     if ((err = redux (&M[x], P, mp)) != MP_OKAY) {
   3097       goto LBL_RES;
   3098     }
   3099   }
   3100 
   3101   /* set initial mode and bit cnt */
   3102   mode   = 0;
   3103   bitcnt = 1;
   3104   buf    = 0;
   3105   digidx = X->used - 1;
   3106   bitcpy = 0;
   3107   bitbuf = 0;
   3108 
   3109   for (;;) {
   3110     /* grab next digit as required */
   3111     if (--bitcnt == 0) {
   3112       /* if digidx == -1 we are out of digits so break */
   3113       if (digidx == -1) {
   3114         break;
   3115       }
   3116       /* read next digit and reset bitcnt */
   3117       buf    = X->dp[digidx--];
   3118       bitcnt = (int)DIGIT_BIT;
   3119     }
   3120 
   3121     /* grab the next msb from the exponent */
   3122     y     = (mp_digit)(buf >> (DIGIT_BIT - 1)) & 1;
   3123     buf <<= (mp_digit)1;
   3124 
   3125     /* if the bit is zero and mode == 0 then we ignore it
   3126      * These represent the leading zero bits before the first 1 bit
   3127      * in the exponent.  Technically this opt is not required but it
   3128      * does lower the # of trivial squaring/reductions used
   3129      */
   3130     if (mode == 0 && y == 0) {
   3131       continue;
   3132     }
   3133 
   3134     /* if the bit is zero and mode == 1 then we square */
   3135     if (mode == 1 && y == 0) {
   3136       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   3137         goto LBL_RES;
   3138       }
   3139       if ((err = redux (&res, P, mp)) != MP_OKAY) {
   3140         goto LBL_RES;
   3141       }
   3142       continue;
   3143     }
   3144 
   3145     /* else we add it to the window */
   3146     bitbuf |= (y << (winsize - ++bitcpy));
   3147     mode    = 2;
   3148 
   3149     if (bitcpy == winsize) {
   3150       /* ok window is filled so square as required and multiply  */
   3151       /* square first */
   3152       for (x = 0; x < winsize; x++) {
   3153         if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   3154           goto LBL_RES;
   3155         }
   3156         if ((err = redux (&res, P, mp)) != MP_OKAY) {
   3157           goto LBL_RES;
   3158         }
   3159       }
   3160 
   3161       /* then multiply */
   3162       if ((err = mp_mul (&res, &M[bitbuf], &res)) != MP_OKAY) {
   3163         goto LBL_RES;
   3164       }
   3165       if ((err = redux (&res, P, mp)) != MP_OKAY) {
   3166         goto LBL_RES;
   3167       }
   3168 
   3169       /* empty window and reset */
   3170       bitcpy = 0;
   3171       bitbuf = 0;
   3172       mode   = 1;
   3173     }
   3174   }
   3175 
   3176   /* if bits remain then square/multiply */
   3177   if (mode == 2 && bitcpy > 0) {
   3178     /* square then multiply if the bit is set */
   3179     for (x = 0; x < bitcpy; x++) {
   3180       if ((err = mp_sqr (&res, &res)) != MP_OKAY) {
   3181         goto LBL_RES;
   3182       }
   3183       if ((err = redux (&res, P, mp)) != MP_OKAY) {
   3184         goto LBL_RES;
   3185       }
   3186 
   3187       /* get next bit of the window */
   3188       bitbuf <<= 1;
   3189       if ((bitbuf & (1 << winsize)) != 0) {
   3190         /* then multiply */
   3191         if ((err = mp_mul (&res, &M[1], &res)) != MP_OKAY) {
   3192           goto LBL_RES;
   3193         }
   3194         if ((err = redux (&res, P, mp)) != MP_OKAY) {
   3195           goto LBL_RES;
   3196         }
   3197       }
   3198     }
   3199   }
   3200 
   3201   if (redmode == 0) {
   3202      /* fixup result if Montgomery reduction is used
   3203       * recall that any value in a Montgomery system is
   3204       * actually multiplied by R mod n.  So we have
   3205       * to reduce one more time to cancel out the factor
   3206       * of R.
   3207       */
   3208      if ((err = redux(&res, P, mp)) != MP_OKAY) {
   3209        goto LBL_RES;
   3210      }
   3211   }
   3212 
   3213   /* swap res with Y */
   3214   mp_exch (&res, Y);
   3215   err = MP_OKAY;
   3216 LBL_RES:mp_clear (&res);
   3217 LBL_M:
   3218   mp_clear(&M[1]);
   3219   for (x = 1<<(winsize-1); x < (1 << winsize); x++) {
   3220     mp_clear (&M[x]);
   3221   }
   3222   return err;
   3223 }
   3224 #endif
   3225 
   3226 
   3227 #ifdef BN_FAST_S_MP_SQR_C
   3228 /* the jist of squaring...
   3229  * you do like mult except the offset of the tmpx [one that
   3230  * starts closer to zero] can't equal the offset of tmpy.
   3231  * So basically you set up iy like before then you min it with
   3232  * (ty-tx) so that it never happens.  You double all those
   3233  * you add in the inner loop
   3234 
   3235 After that loop you do the squares and add them in.
   3236 */
   3237 
   3238 static int fast_s_mp_sqr (mp_int * a, mp_int * b)
   3239 {
   3240   int       olduse, res, pa, ix, iz;
   3241   mp_digit   W[MP_WARRAY], *tmpx;
   3242   mp_word   W1;
   3243 
   3244   /* grow the destination as required */
   3245   pa = a->used + a->used;
   3246   if (b->alloc < pa) {
   3247     if ((res = mp_grow (b, pa)) != MP_OKAY) {
   3248       return res;
   3249     }
   3250   }
   3251 
   3252   /* number of output digits to produce */
   3253   W1 = 0;
   3254   for (ix = 0; ix < pa; ix++) {
   3255       int      tx, ty, iy;
   3256       mp_word  _W;
   3257       mp_digit *tmpy;
   3258 
   3259       /* clear counter */
   3260       _W = 0;
   3261 
   3262       /* get offsets into the two bignums */
   3263       ty = MIN(a->used-1, ix);
   3264       tx = ix - ty;
   3265 
   3266       /* setup temp aliases */
   3267       tmpx = a->dp + tx;
   3268       tmpy = a->dp + ty;
   3269 
   3270       /* this is the number of times the loop will iterrate, essentially
   3271          while (tx++ < a->used && ty-- >= 0) { ... }
   3272        */
   3273       iy = MIN(a->used-tx, ty+1);
   3274 
   3275       /* now for squaring tx can never equal ty
   3276        * we halve the distance since they approach at a rate of 2x
   3277        * and we have to round because odd cases need to be executed
   3278        */
   3279       iy = MIN(iy, (ty-tx+1)>>1);
   3280 
   3281       /* execute loop */
   3282       for (iz = 0; iz < iy; iz++) {
   3283          _W += ((mp_word)*tmpx++)*((mp_word)*tmpy--);
   3284       }
   3285 
   3286       /* double the inner product and add carry */
   3287       _W = _W + _W + W1;
   3288 
   3289       /* even columns have the square term in them */
   3290       if ((ix&1) == 0) {
   3291          _W += ((mp_word)a->dp[ix>>1])*((mp_word)a->dp[ix>>1]);
   3292       }
   3293 
   3294       /* store it */
   3295       W[ix] = (mp_digit)(_W & MP_MASK);
   3296 
   3297       /* make next carry */
   3298       W1 = _W >> ((mp_word)DIGIT_BIT);
   3299   }
   3300 
   3301   /* setup dest */
   3302   olduse  = b->used;
   3303   b->used = a->used+a->used;
   3304 
   3305   {
   3306     mp_digit *tmpb;
   3307     tmpb = b->dp;
   3308     for (ix = 0; ix < pa; ix++) {
   3309       *tmpb++ = W[ix] & MP_MASK;
   3310     }
   3311 
   3312     /* clear unused digits [that existed in the old copy of c] */
   3313     for (; ix < olduse; ix++) {
   3314       *tmpb++ = 0;
   3315     }
   3316   }
   3317   mp_clamp (b);
   3318   return MP_OKAY;
   3319 }
   3320 #endif
   3321 
   3322 
   3323 #ifdef BN_MP_MUL_D_C
   3324 /* multiply by a digit */
   3325 static int
   3326 mp_mul_d (mp_int * a, mp_digit b, mp_int * c)
   3327 {
   3328   mp_digit u, *tmpa, *tmpc;
   3329   mp_word  r;
   3330   int      ix, res, olduse;
   3331 
   3332   /* make sure c is big enough to hold a*b */
   3333   if (c->alloc < a->used + 1) {
   3334     if ((res = mp_grow (c, a->used + 1)) != MP_OKAY) {
   3335       return res;
   3336     }
   3337   }
   3338 
   3339   /* get the original destinations used count */
   3340   olduse = c->used;
   3341 
   3342   /* set the sign */
   3343   c->sign = a->sign;
   3344 
   3345   /* alias for a->dp [source] */
   3346   tmpa = a->dp;
   3347 
   3348   /* alias for c->dp [dest] */
   3349   tmpc = c->dp;
   3350 
   3351   /* zero carry */
   3352   u = 0;
   3353 
   3354   /* compute columns */
   3355   for (ix = 0; ix < a->used; ix++) {
   3356     /* compute product and carry sum for this term */
   3357     r       = ((mp_word) u) + ((mp_word)*tmpa++) * ((mp_word)b);
   3358 
   3359     /* mask off higher bits to get a single digit */
   3360     *tmpc++ = (mp_digit) (r & ((mp_word) MP_MASK));
   3361 
   3362     /* send carry into next iteration */
   3363     u       = (mp_digit) (r >> ((mp_word) DIGIT_BIT));
   3364   }
   3365 
   3366   /* store final carry [if any] and increment ix offset  */
   3367   *tmpc++ = u;
   3368   ++ix;
   3369 
   3370   /* now zero digits above the top */
   3371   while (ix++ < olduse) {
   3372      *tmpc++ = 0;
   3373   }
   3374 
   3375   /* set used count */
   3376   c->used = a->used + 1;
   3377   mp_clamp(c);
   3378 
   3379   return MP_OKAY;
   3380 }
   3381 #endif
   3382