Home | History | Annotate | Download | only in mtest
      1 /*
      2     mpi.c
      3 
      4     by Michael J. Fromberger <sting (at) linguist.dartmouth.edu>
      5     Copyright (C) 1998 Michael J. Fromberger, All Rights Reserved
      6 
      7     Arbitrary precision integer arithmetic library
      8 
      9     $Id: mpi.c,v 1.2 2005/05/05 14:38:47 tom Exp $
     10  */
     11 
     12 #include "mpi.h"
     13 #include <stdlib.h>
     14 #include <string.h>
     15 #include <ctype.h>
     16 
     17 #if MP_DEBUG
     18 #include <stdio.h>
     19 
     20 #define DIAG(T,V) {fprintf(stderr,T);mp_print(V,stderr);fputc('\n',stderr);}
     21 #else
     22 #define DIAG(T,V)
     23 #endif
     24 
     25 /*
     26    If MP_LOGTAB is not defined, use the math library to compute the
     27    logarithms on the fly.  Otherwise, use the static table below.
     28    Pick which works best for your system.
     29  */
     30 #if MP_LOGTAB
     31 
     32 /* {{{ s_logv_2[] - log table for 2 in various bases */
     33 
     34 /*
     35   A table of the logs of 2 for various bases (the 0 and 1 entries of
     36   this table are meaningless and should not be referenced).
     37 
     38   This table is used to compute output lengths for the mp_toradix()
     39   function.  Since a number n in radix r takes up about log_r(n)
     40   digits, we estimate the output size by taking the least integer
     41   greater than log_r(n), where:
     42 
     43   log_r(n) = log_2(n) * log_r(2)
     44 
     45   This table, therefore, is a table of log_r(2) for 2 <= r <= 36,
     46   which are the output bases supported.
     47  */
     48 
     49 #include "logtab.h"
     50 
     51 /* }}} */
     52 #define LOG_V_2(R)  s_logv_2[(R)]
     53 
     54 #else
     55 
     56 #include <math.h>
     57 #define LOG_V_2(R)  (log(2.0)/log(R))
     58 
     59 #endif
     60 
     61 /* Default precision for newly created mp_int's      */
     62 static unsigned int s_mp_defprec = MP_DEFPREC;
     63 
     64 /* {{{ Digit arithmetic macros */
     65 
     66 /*
     67   When adding and multiplying digits, the results can be larger than
     68   can be contained in an mp_digit.  Thus, an mp_word is used.  These
     69   macros mask off the upper and lower digits of the mp_word (the
     70   mp_word may be more than 2 mp_digits wide, but we only concern
     71   ourselves with the low-order 2 mp_digits)
     72 
     73   If your mp_word DOES have more than 2 mp_digits, you need to
     74   uncomment the first line, and comment out the second.
     75  */
     76 
     77 /* #define  CARRYOUT(W)  (((W)>>DIGIT_BIT)&MP_DIGIT_MAX) */
     78 #define  CARRYOUT(W)  ((W)>>DIGIT_BIT)
     79 #define  ACCUM(W)     ((W)&MP_DIGIT_MAX)
     80 
     81 /* }}} */
     82 
     83 /* {{{ Comparison constants */
     84 
     85 #define  MP_LT       -1
     86 #define  MP_EQ        0
     87 #define  MP_GT        1
     88 
     89 /* }}} */
     90 
     91 /* {{{ Constant strings */
     92 
     93 /* Constant strings returned by mp_strerror() */
     94 static const char *mp_err_string[] = {
     95   "unknown result code",     /* say what?            */
     96   "boolean true",            /* MP_OKAY, MP_YES      */
     97   "boolean false",           /* MP_NO                */
     98   "out of memory",           /* MP_MEM               */
     99   "argument out of range",   /* MP_RANGE             */
    100   "invalid input parameter", /* MP_BADARG            */
    101   "result is undefined"      /* MP_UNDEF             */
    102 };
    103 
    104 /* Value to digit maps for radix conversion   */
    105 
    106 /* s_dmap_1 - standard digits and letters */
    107 static const char *s_dmap_1 =
    108   "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz+/";
    109 
    110 #if 0
    111 /* s_dmap_2 - base64 ordering for digits  */
    112 static const char *s_dmap_2 =
    113   "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
    114 #endif
    115 
    116 /* }}} */
    117 
    118 /* {{{ Static function declarations */
    119 
    120 /*
    121    If MP_MACRO is false, these will be defined as actual functions;
    122    otherwise, suitable macro definitions will be used.  This works
    123    around the fact that ANSI C89 doesn't support an 'inline' keyword
    124    (although I hear C9x will ... about bloody time).  At present, the
    125    macro definitions are identical to the function bodies, but they'll
    126    expand in place, instead of generating a function call.
    127 
    128    I chose these particular functions to be made into macros because
    129    some profiling showed they are called a lot on a typical workload,
    130    and yet they are primarily housekeeping.
    131  */
    132 #if MP_MACRO == 0
    133  void     s_mp_setz(mp_digit *dp, mp_size count); /* zero digits           */
    134  void     s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count); /* copy    */
    135  void    *s_mp_alloc(size_t nb, size_t ni);       /* general allocator     */
    136  void     s_mp_free(void *ptr);                   /* general free function */
    137 #else
    138 
    139  /* Even if these are defined as macros, we need to respect the settings
    140     of the MP_MEMSET and MP_MEMCPY configuration options...
    141   */
    142  #if MP_MEMSET == 0
    143   #define  s_mp_setz(dp, count) \
    144        {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=0;}
    145  #else
    146   #define  s_mp_setz(dp, count) memset(dp, 0, (count) * sizeof(mp_digit))
    147  #endif /* MP_MEMSET */
    148 
    149  #if MP_MEMCPY == 0
    150   #define  s_mp_copy(sp, dp, count) \
    151        {int ix;for(ix=0;ix<(count);ix++)(dp)[ix]=(sp)[ix];}
    152  #else
    153   #define  s_mp_copy(sp, dp, count) memcpy(dp, sp, (count) * sizeof(mp_digit))
    154  #endif /* MP_MEMCPY */
    155 
    156  #define  s_mp_alloc(nb, ni)  calloc(nb, ni)
    157  #define  s_mp_free(ptr) {if(ptr) free(ptr);}
    158 #endif /* MP_MACRO */
    159 
    160 mp_err   s_mp_grow(mp_int *mp, mp_size min);   /* increase allocated size */
    161 mp_err   s_mp_pad(mp_int *mp, mp_size min);    /* left pad with zeroes    */
    162 
    163 void     s_mp_clamp(mp_int *mp);               /* clip leading zeroes     */
    164 
    165 void     s_mp_exch(mp_int *a, mp_int *b);      /* swap a and b in place   */
    166 
    167 mp_err   s_mp_lshd(mp_int *mp, mp_size p);     /* left-shift by p digits  */
    168 void     s_mp_rshd(mp_int *mp, mp_size p);     /* right-shift by p digits */
    169 void     s_mp_div_2d(mp_int *mp, mp_digit d);  /* divide by 2^d in place  */
    170 void     s_mp_mod_2d(mp_int *mp, mp_digit d);  /* modulo 2^d in place     */
    171 mp_err   s_mp_mul_2d(mp_int *mp, mp_digit d);  /* multiply by 2^d in place*/
    172 void     s_mp_div_2(mp_int *mp);               /* divide by 2 in place    */
    173 mp_err   s_mp_mul_2(mp_int *mp);               /* multiply by 2 in place  */
    174 mp_digit s_mp_norm(mp_int *a, mp_int *b);      /* normalize for division  */
    175 mp_err   s_mp_add_d(mp_int *mp, mp_digit d);   /* unsigned digit addition */
    176 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d);   /* unsigned digit subtract */
    177 mp_err   s_mp_mul_d(mp_int *mp, mp_digit d);   /* unsigned digit multiply */
    178 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r);
    179 		                               /* unsigned digit divide   */
    180 mp_err   s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu);
    181                                                /* Barrett reduction       */
    182 mp_err   s_mp_add(mp_int *a, mp_int *b);       /* magnitude addition      */
    183 mp_err   s_mp_sub(mp_int *a, mp_int *b);       /* magnitude subtract      */
    184 mp_err   s_mp_mul(mp_int *a, mp_int *b);       /* magnitude multiply      */
    185 #if 0
    186 void     s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len);
    187                                                /* multiply buffers in place */
    188 #endif
    189 #if MP_SQUARE
    190 mp_err   s_mp_sqr(mp_int *a);                  /* magnitude square        */
    191 #else
    192 #define  s_mp_sqr(a) s_mp_mul(a, a)
    193 #endif
    194 mp_err   s_mp_div(mp_int *a, mp_int *b);       /* magnitude divide        */
    195 mp_err   s_mp_2expt(mp_int *a, mp_digit k);    /* a = 2^k                 */
    196 int      s_mp_cmp(mp_int *a, mp_int *b);       /* magnitude comparison    */
    197 int      s_mp_cmp_d(mp_int *a, mp_digit d);    /* magnitude digit compare */
    198 int      s_mp_ispow2(mp_int *v);               /* is v a power of 2?      */
    199 int      s_mp_ispow2d(mp_digit d);             /* is d a power of 2?      */
    200 
    201 int      s_mp_tovalue(char ch, int r);          /* convert ch to value    */
    202 char     s_mp_todigit(int val, int r, int low); /* convert val to digit   */
    203 int      s_mp_outlen(int bits, int r);          /* output length in bytes */
    204 
    205 /* }}} */
    206 
    207 /* {{{ Default precision manipulation */
    208 
    209 unsigned int mp_get_prec(void)
    210 {
    211   return s_mp_defprec;
    212 
    213 } /* end mp_get_prec() */
    214 
    215 void         mp_set_prec(unsigned int prec)
    216 {
    217   if(prec == 0)
    218     s_mp_defprec = MP_DEFPREC;
    219   else
    220     s_mp_defprec = prec;
    221 
    222 } /* end mp_set_prec() */
    223 
    224 /* }}} */
    225 
    226 /*------------------------------------------------------------------------*/
    227 /* {{{ mp_init(mp) */
    228 
    229 /*
    230   mp_init(mp)
    231 
    232   Initialize a new zero-valued mp_int.  Returns MP_OKAY if successful,
    233   MP_MEM if memory could not be allocated for the structure.
    234  */
    235 
    236 mp_err mp_init(mp_int *mp)
    237 {
    238   return mp_init_size(mp, s_mp_defprec);
    239 
    240 } /* end mp_init() */
    241 
    242 /* }}} */
    243 
    244 /* {{{ mp_init_array(mp[], count) */
    245 
    246 mp_err mp_init_array(mp_int mp[], int count)
    247 {
    248   mp_err  res;
    249   int     pos;
    250 
    251   ARGCHK(mp !=NULL && count > 0, MP_BADARG);
    252 
    253   for(pos = 0; pos < count; ++pos) {
    254     if((res = mp_init(&mp[pos])) != MP_OKAY)
    255       goto CLEANUP;
    256   }
    257 
    258   return MP_OKAY;
    259 
    260  CLEANUP:
    261   while(--pos >= 0)
    262     mp_clear(&mp[pos]);
    263 
    264   return res;
    265 
    266 } /* end mp_init_array() */
    267 
    268 /* }}} */
    269 
    270 /* {{{ mp_init_size(mp, prec) */
    271 
    272 /*
    273   mp_init_size(mp, prec)
    274 
    275   Initialize a new zero-valued mp_int with at least the given
    276   precision; returns MP_OKAY if successful, or MP_MEM if memory could
    277   not be allocated for the structure.
    278  */
    279 
    280 mp_err mp_init_size(mp_int *mp, mp_size prec)
    281 {
    282   ARGCHK(mp != NULL && prec > 0, MP_BADARG);
    283 
    284   if((DIGITS(mp) = s_mp_alloc(prec, sizeof(mp_digit))) == NULL)
    285     return MP_MEM;
    286 
    287   SIGN(mp) = MP_ZPOS;
    288   USED(mp) = 1;
    289   ALLOC(mp) = prec;
    290 
    291   return MP_OKAY;
    292 
    293 } /* end mp_init_size() */
    294 
    295 /* }}} */
    296 
    297 /* {{{ mp_init_copy(mp, from) */
    298 
    299 /*
    300   mp_init_copy(mp, from)
    301 
    302   Initialize mp as an exact copy of from.  Returns MP_OKAY if
    303   successful, MP_MEM if memory could not be allocated for the new
    304   structure.
    305  */
    306 
    307 mp_err mp_init_copy(mp_int *mp, mp_int *from)
    308 {
    309   ARGCHK(mp != NULL && from != NULL, MP_BADARG);
    310 
    311   if(mp == from)
    312     return MP_OKAY;
    313 
    314   if((DIGITS(mp) = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL)
    315     return MP_MEM;
    316 
    317   s_mp_copy(DIGITS(from), DIGITS(mp), USED(from));
    318   USED(mp) = USED(from);
    319   ALLOC(mp) = USED(from);
    320   SIGN(mp) = SIGN(from);
    321 
    322   return MP_OKAY;
    323 
    324 } /* end mp_init_copy() */
    325 
    326 /* }}} */
    327 
    328 /* {{{ mp_copy(from, to) */
    329 
    330 /*
    331   mp_copy(from, to)
    332 
    333   Copies the mp_int 'from' to the mp_int 'to'.  It is presumed that
    334   'to' has already been initialized (if not, use mp_init_copy()
    335   instead). If 'from' and 'to' are identical, nothing happens.
    336  */
    337 
    338 mp_err mp_copy(mp_int *from, mp_int *to)
    339 {
    340   ARGCHK(from != NULL && to != NULL, MP_BADARG);
    341 
    342   if(from == to)
    343     return MP_OKAY;
    344 
    345   { /* copy */
    346     mp_digit   *tmp;
    347 
    348     /*
    349       If the allocated buffer in 'to' already has enough space to hold
    350       all the used digits of 'from', we'll re-use it to avoid hitting
    351       the memory allocater more than necessary; otherwise, we'd have
    352       to grow anyway, so we just allocate a hunk and make the copy as
    353       usual
    354      */
    355     if(ALLOC(to) >= USED(from)) {
    356       s_mp_setz(DIGITS(to) + USED(from), ALLOC(to) - USED(from));
    357       s_mp_copy(DIGITS(from), DIGITS(to), USED(from));
    358 
    359     } else {
    360       if((tmp = s_mp_alloc(USED(from), sizeof(mp_digit))) == NULL)
    361 	return MP_MEM;
    362 
    363       s_mp_copy(DIGITS(from), tmp, USED(from));
    364 
    365       if(DIGITS(to) != NULL) {
    366 #if MP_CRYPTO
    367 	s_mp_setz(DIGITS(to), ALLOC(to));
    368 #endif
    369 	s_mp_free(DIGITS(to));
    370       }
    371 
    372       DIGITS(to) = tmp;
    373       ALLOC(to) = USED(from);
    374     }
    375 
    376     /* Copy the precision and sign from the original */
    377     USED(to) = USED(from);
    378     SIGN(to) = SIGN(from);
    379   } /* end copy */
    380 
    381   return MP_OKAY;
    382 
    383 } /* end mp_copy() */
    384 
    385 /* }}} */
    386 
    387 /* {{{ mp_exch(mp1, mp2) */
    388 
    389 /*
    390   mp_exch(mp1, mp2)
    391 
    392   Exchange mp1 and mp2 without allocating any intermediate memory
    393   (well, unless you count the stack space needed for this call and the
    394   locals it creates...).  This cannot fail.
    395  */
    396 
    397 void mp_exch(mp_int *mp1, mp_int *mp2)
    398 {
    399 #if MP_ARGCHK == 2
    400   assert(mp1 != NULL && mp2 != NULL);
    401 #else
    402   if(mp1 == NULL || mp2 == NULL)
    403     return;
    404 #endif
    405 
    406   s_mp_exch(mp1, mp2);
    407 
    408 } /* end mp_exch() */
    409 
    410 /* }}} */
    411 
    412 /* {{{ mp_clear(mp) */
    413 
    414 /*
    415   mp_clear(mp)
    416 
    417   Release the storage used by an mp_int, and void its fields so that
    418   if someone calls mp_clear() again for the same int later, we won't
    419   get tollchocked.
    420  */
    421 
    422 void   mp_clear(mp_int *mp)
    423 {
    424   if(mp == NULL)
    425     return;
    426 
    427   if(DIGITS(mp) != NULL) {
    428 #if MP_CRYPTO
    429     s_mp_setz(DIGITS(mp), ALLOC(mp));
    430 #endif
    431     s_mp_free(DIGITS(mp));
    432     DIGITS(mp) = NULL;
    433   }
    434 
    435   USED(mp) = 0;
    436   ALLOC(mp) = 0;
    437 
    438 } /* end mp_clear() */
    439 
    440 /* }}} */
    441 
    442 /* {{{ mp_clear_array(mp[], count) */
    443 
    444 void   mp_clear_array(mp_int mp[], int count)
    445 {
    446   ARGCHK(mp != NULL && count > 0, MP_BADARG);
    447 
    448   while(--count >= 0)
    449     mp_clear(&mp[count]);
    450 
    451 } /* end mp_clear_array() */
    452 
    453 /* }}} */
    454 
    455 /* {{{ mp_zero(mp) */
    456 
    457 /*
    458   mp_zero(mp)
    459 
    460   Set mp to zero.  Does not change the allocated size of the structure,
    461   and therefore cannot fail (except on a bad argument, which we ignore)
    462  */
    463 void   mp_zero(mp_int *mp)
    464 {
    465   if(mp == NULL)
    466     return;
    467 
    468   s_mp_setz(DIGITS(mp), ALLOC(mp));
    469   USED(mp) = 1;
    470   SIGN(mp) = MP_ZPOS;
    471 
    472 } /* end mp_zero() */
    473 
    474 /* }}} */
    475 
    476 /* {{{ mp_set(mp, d) */
    477 
    478 void   mp_set(mp_int *mp, mp_digit d)
    479 {
    480   if(mp == NULL)
    481     return;
    482 
    483   mp_zero(mp);
    484   DIGIT(mp, 0) = d;
    485 
    486 } /* end mp_set() */
    487 
    488 /* }}} */
    489 
    490 /* {{{ mp_set_int(mp, z) */
    491 
    492 mp_err mp_set_int(mp_int *mp, long z)
    493 {
    494   int            ix;
    495   unsigned long  v = abs(z);
    496   mp_err         res;
    497 
    498   ARGCHK(mp != NULL, MP_BADARG);
    499 
    500   mp_zero(mp);
    501   if(z == 0)
    502     return MP_OKAY;  /* shortcut for zero */
    503 
    504   for(ix = sizeof(long) - 1; ix >= 0; ix--) {
    505 
    506     if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY)
    507       return res;
    508 
    509     res = s_mp_add_d(mp,
    510 		     (mp_digit)((v >> (ix * CHAR_BIT)) & UCHAR_MAX));
    511     if(res != MP_OKAY)
    512       return res;
    513 
    514   }
    515 
    516   if(z < 0)
    517     SIGN(mp) = MP_NEG;
    518 
    519   return MP_OKAY;
    520 
    521 } /* end mp_set_int() */
    522 
    523 /* }}} */
    524 
    525 /*------------------------------------------------------------------------*/
    526 /* {{{ Digit arithmetic */
    527 
    528 /* {{{ mp_add_d(a, d, b) */
    529 
    530 /*
    531   mp_add_d(a, d, b)
    532 
    533   Compute the sum b = a + d, for a single digit d.  Respects the sign of
    534   its primary addend (single digits are unsigned anyway).
    535  */
    536 
    537 mp_err mp_add_d(mp_int *a, mp_digit d, mp_int *b)
    538 {
    539   mp_err   res = MP_OKAY;
    540 
    541   ARGCHK(a != NULL && b != NULL, MP_BADARG);
    542 
    543   if((res = mp_copy(a, b)) != MP_OKAY)
    544     return res;
    545 
    546   if(SIGN(b) == MP_ZPOS) {
    547     res = s_mp_add_d(b, d);
    548   } else if(s_mp_cmp_d(b, d) >= 0) {
    549     res = s_mp_sub_d(b, d);
    550   } else {
    551     SIGN(b) = MP_ZPOS;
    552 
    553     DIGIT(b, 0) = d - DIGIT(b, 0);
    554   }
    555 
    556   return res;
    557 
    558 } /* end mp_add_d() */
    559 
    560 /* }}} */
    561 
    562 /* {{{ mp_sub_d(a, d, b) */
    563 
    564 /*
    565   mp_sub_d(a, d, b)
    566 
    567   Compute the difference b = a - d, for a single digit d.  Respects the
    568   sign of its subtrahend (single digits are unsigned anyway).
    569  */
    570 
    571 mp_err mp_sub_d(mp_int *a, mp_digit d, mp_int *b)
    572 {
    573   mp_err   res;
    574 
    575   ARGCHK(a != NULL && b != NULL, MP_BADARG);
    576 
    577   if((res = mp_copy(a, b)) != MP_OKAY)
    578     return res;
    579 
    580   if(SIGN(b) == MP_NEG) {
    581     if((res = s_mp_add_d(b, d)) != MP_OKAY)
    582       return res;
    583 
    584   } else if(s_mp_cmp_d(b, d) >= 0) {
    585     if((res = s_mp_sub_d(b, d)) != MP_OKAY)
    586       return res;
    587 
    588   } else {
    589     mp_neg(b, b);
    590 
    591     DIGIT(b, 0) = d - DIGIT(b, 0);
    592     SIGN(b) = MP_NEG;
    593   }
    594 
    595   if(s_mp_cmp_d(b, 0) == 0)
    596     SIGN(b) = MP_ZPOS;
    597 
    598   return MP_OKAY;
    599 
    600 } /* end mp_sub_d() */
    601 
    602 /* }}} */
    603 
    604 /* {{{ mp_mul_d(a, d, b) */
    605 
    606 /*
    607   mp_mul_d(a, d, b)
    608 
    609   Compute the product b = a * d, for a single digit d.  Respects the sign
    610   of its multiplicand (single digits are unsigned anyway)
    611  */
    612 
    613 mp_err mp_mul_d(mp_int *a, mp_digit d, mp_int *b)
    614 {
    615   mp_err  res;
    616 
    617   ARGCHK(a != NULL && b != NULL, MP_BADARG);
    618 
    619   if(d == 0) {
    620     mp_zero(b);
    621     return MP_OKAY;
    622   }
    623 
    624   if((res = mp_copy(a, b)) != MP_OKAY)
    625     return res;
    626 
    627   res = s_mp_mul_d(b, d);
    628 
    629   return res;
    630 
    631 } /* end mp_mul_d() */
    632 
    633 /* }}} */
    634 
    635 /* {{{ mp_mul_2(a, c) */
    636 
    637 mp_err mp_mul_2(mp_int *a, mp_int *c)
    638 {
    639   mp_err  res;
    640 
    641   ARGCHK(a != NULL && c != NULL, MP_BADARG);
    642 
    643   if((res = mp_copy(a, c)) != MP_OKAY)
    644     return res;
    645 
    646   return s_mp_mul_2(c);
    647 
    648 } /* end mp_mul_2() */
    649 
    650 /* }}} */
    651 
    652 /* {{{ mp_div_d(a, d, q, r) */
    653 
    654 /*
    655   mp_div_d(a, d, q, r)
    656 
    657   Compute the quotient q = a / d and remainder r = a mod d, for a
    658   single digit d.  Respects the sign of its divisor (single digits are
    659   unsigned anyway).
    660  */
    661 
    662 mp_err mp_div_d(mp_int *a, mp_digit d, mp_int *q, mp_digit *r)
    663 {
    664   mp_err   res;
    665   mp_digit rem;
    666   int      pow;
    667 
    668   ARGCHK(a != NULL, MP_BADARG);
    669 
    670   if(d == 0)
    671     return MP_RANGE;
    672 
    673   /* Shortcut for powers of two ... */
    674   if((pow = s_mp_ispow2d(d)) >= 0) {
    675     mp_digit  mask;
    676 
    677     mask = (1 << pow) - 1;
    678     rem = DIGIT(a, 0) & mask;
    679 
    680     if(q) {
    681       mp_copy(a, q);
    682       s_mp_div_2d(q, pow);
    683     }
    684 
    685     if(r)
    686       *r = rem;
    687 
    688     return MP_OKAY;
    689   }
    690 
    691   /*
    692     If the quotient is actually going to be returned, we'll try to
    693     avoid hitting the memory allocator by copying the dividend into it
    694     and doing the division there.  This can't be any _worse_ than
    695     always copying, and will sometimes be better (since it won't make
    696     another copy)
    697 
    698     If it's not going to be returned, we need to allocate a temporary
    699     to hold the quotient, which will just be discarded.
    700    */
    701   if(q) {
    702     if((res = mp_copy(a, q)) != MP_OKAY)
    703       return res;
    704 
    705     res = s_mp_div_d(q, d, &rem);
    706     if(s_mp_cmp_d(q, 0) == MP_EQ)
    707       SIGN(q) = MP_ZPOS;
    708 
    709   } else {
    710     mp_int  qp;
    711 
    712     if((res = mp_init_copy(&qp, a)) != MP_OKAY)
    713       return res;
    714 
    715     res = s_mp_div_d(&qp, d, &rem);
    716     if(s_mp_cmp_d(&qp, 0) == 0)
    717       SIGN(&qp) = MP_ZPOS;
    718 
    719     mp_clear(&qp);
    720   }
    721 
    722   if(r)
    723     *r = rem;
    724 
    725   return res;
    726 
    727 } /* end mp_div_d() */
    728 
    729 /* }}} */
    730 
    731 /* {{{ mp_div_2(a, c) */
    732 
    733 /*
    734   mp_div_2(a, c)
    735 
    736   Compute c = a / 2, disregarding the remainder.
    737  */
    738 
    739 mp_err mp_div_2(mp_int *a, mp_int *c)
    740 {
    741   mp_err  res;
    742 
    743   ARGCHK(a != NULL && c != NULL, MP_BADARG);
    744 
    745   if((res = mp_copy(a, c)) != MP_OKAY)
    746     return res;
    747 
    748   s_mp_div_2(c);
    749 
    750   return MP_OKAY;
    751 
    752 } /* end mp_div_2() */
    753 
    754 /* }}} */
    755 
    756 /* {{{ mp_expt_d(a, d, b) */
    757 
    758 mp_err mp_expt_d(mp_int *a, mp_digit d, mp_int *c)
    759 {
    760   mp_int   s, x;
    761   mp_err   res;
    762 
    763   ARGCHK(a != NULL && c != NULL, MP_BADARG);
    764 
    765   if((res = mp_init(&s)) != MP_OKAY)
    766     return res;
    767   if((res = mp_init_copy(&x, a)) != MP_OKAY)
    768     goto X;
    769 
    770   DIGIT(&s, 0) = 1;
    771 
    772   while(d != 0) {
    773     if(d & 1) {
    774       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
    775 	goto CLEANUP;
    776     }
    777 
    778     d >>= 1;
    779 
    780     if((res = s_mp_sqr(&x)) != MP_OKAY)
    781       goto CLEANUP;
    782   }
    783 
    784   s_mp_exch(&s, c);
    785 
    786 CLEANUP:
    787   mp_clear(&x);
    788 X:
    789   mp_clear(&s);
    790 
    791   return res;
    792 
    793 } /* end mp_expt_d() */
    794 
    795 /* }}} */
    796 
    797 /* }}} */
    798 
    799 /*------------------------------------------------------------------------*/
    800 /* {{{ Full arithmetic */
    801 
    802 /* {{{ mp_abs(a, b) */
    803 
    804 /*
    805   mp_abs(a, b)
    806 
    807   Compute b = |a|.  'a' and 'b' may be identical.
    808  */
    809 
    810 mp_err mp_abs(mp_int *a, mp_int *b)
    811 {
    812   mp_err   res;
    813 
    814   ARGCHK(a != NULL && b != NULL, MP_BADARG);
    815 
    816   if((res = mp_copy(a, b)) != MP_OKAY)
    817     return res;
    818 
    819   SIGN(b) = MP_ZPOS;
    820 
    821   return MP_OKAY;
    822 
    823 } /* end mp_abs() */
    824 
    825 /* }}} */
    826 
    827 /* {{{ mp_neg(a, b) */
    828 
    829 /*
    830   mp_neg(a, b)
    831 
    832   Compute b = -a.  'a' and 'b' may be identical.
    833  */
    834 
    835 mp_err mp_neg(mp_int *a, mp_int *b)
    836 {
    837   mp_err   res;
    838 
    839   ARGCHK(a != NULL && b != NULL, MP_BADARG);
    840 
    841   if((res = mp_copy(a, b)) != MP_OKAY)
    842     return res;
    843 
    844   if(s_mp_cmp_d(b, 0) == MP_EQ)
    845     SIGN(b) = MP_ZPOS;
    846   else
    847     SIGN(b) = (SIGN(b) == MP_NEG) ? MP_ZPOS : MP_NEG;
    848 
    849   return MP_OKAY;
    850 
    851 } /* end mp_neg() */
    852 
    853 /* }}} */
    854 
    855 /* {{{ mp_add(a, b, c) */
    856 
    857 /*
    858   mp_add(a, b, c)
    859 
    860   Compute c = a + b.  All parameters may be identical.
    861  */
    862 
    863 mp_err mp_add(mp_int *a, mp_int *b, mp_int *c)
    864 {
    865   mp_err  res;
    866   int     cmp;
    867 
    868   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
    869 
    870   if(SIGN(a) == SIGN(b)) { /* same sign:  add values, keep sign */
    871 
    872     /* Commutativity of addition lets us do this in either order,
    873        so we avoid having to use a temporary even if the result
    874        is supposed to replace the output
    875      */
    876     if(c == b) {
    877       if((res = s_mp_add(c, a)) != MP_OKAY)
    878 	return res;
    879     } else {
    880       if(c != a && (res = mp_copy(a, c)) != MP_OKAY)
    881 	return res;
    882 
    883       if((res = s_mp_add(c, b)) != MP_OKAY)
    884 	return res;
    885     }
    886 
    887   } else if((cmp = s_mp_cmp(a, b)) > 0) {  /* different sign: a > b   */
    888 
    889     /* If the output is going to be clobbered, we will use a temporary
    890        variable; otherwise, we'll do it without touching the memory
    891        allocator at all, if possible
    892      */
    893     if(c == b) {
    894       mp_int  tmp;
    895 
    896       if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
    897 	return res;
    898       if((res = s_mp_sub(&tmp, b)) != MP_OKAY) {
    899 	mp_clear(&tmp);
    900 	return res;
    901       }
    902 
    903       s_mp_exch(&tmp, c);
    904       mp_clear(&tmp);
    905 
    906     } else {
    907 
    908       if(c != a && (res = mp_copy(a, c)) != MP_OKAY)
    909 	return res;
    910       if((res = s_mp_sub(c, b)) != MP_OKAY)
    911 	return res;
    912 
    913     }
    914 
    915   } else if(cmp == 0) {             /* different sign, a == b   */
    916 
    917     mp_zero(c);
    918     return MP_OKAY;
    919 
    920   } else {                          /* different sign: a < b    */
    921 
    922     /* See above... */
    923     if(c == a) {
    924       mp_int  tmp;
    925 
    926       if((res = mp_init_copy(&tmp, b)) != MP_OKAY)
    927 	return res;
    928       if((res = s_mp_sub(&tmp, a)) != MP_OKAY) {
    929 	mp_clear(&tmp);
    930 	return res;
    931       }
    932 
    933       s_mp_exch(&tmp, c);
    934       mp_clear(&tmp);
    935 
    936     } else {
    937 
    938       if(c != b && (res = mp_copy(b, c)) != MP_OKAY)
    939 	return res;
    940       if((res = s_mp_sub(c, a)) != MP_OKAY)
    941 	return res;
    942 
    943     }
    944   }
    945 
    946   if(USED(c) == 1 && DIGIT(c, 0) == 0)
    947     SIGN(c) = MP_ZPOS;
    948 
    949   return MP_OKAY;
    950 
    951 } /* end mp_add() */
    952 
    953 /* }}} */
    954 
    955 /* {{{ mp_sub(a, b, c) */
    956 
    957 /*
    958   mp_sub(a, b, c)
    959 
    960   Compute c = a - b.  All parameters may be identical.
    961  */
    962 
    963 mp_err mp_sub(mp_int *a, mp_int *b, mp_int *c)
    964 {
    965   mp_err  res;
    966   int     cmp;
    967 
    968   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
    969 
    970   if(SIGN(a) != SIGN(b)) {
    971     if(c == a) {
    972       if((res = s_mp_add(c, b)) != MP_OKAY)
    973 	return res;
    974     } else {
    975       if(c != b && ((res = mp_copy(b, c)) != MP_OKAY))
    976 	return res;
    977       if((res = s_mp_add(c, a)) != MP_OKAY)
    978 	return res;
    979       SIGN(c) = SIGN(a);
    980     }
    981 
    982   } else if((cmp = s_mp_cmp(a, b)) > 0) { /* Same sign, a > b */
    983     if(c == b) {
    984       mp_int  tmp;
    985 
    986       if((res = mp_init_copy(&tmp, a)) != MP_OKAY)
    987 	return res;
    988       if((res = s_mp_sub(&tmp, b)) != MP_OKAY) {
    989 	mp_clear(&tmp);
    990 	return res;
    991       }
    992       s_mp_exch(&tmp, c);
    993       mp_clear(&tmp);
    994 
    995     } else {
    996       if(c != a && ((res = mp_copy(a, c)) != MP_OKAY))
    997 	return res;
    998 
    999       if((res = s_mp_sub(c, b)) != MP_OKAY)
   1000 	return res;
   1001     }
   1002 
   1003   } else if(cmp == 0) {  /* Same sign, equal magnitude */
   1004     mp_zero(c);
   1005     return MP_OKAY;
   1006 
   1007   } else {               /* Same sign, b > a */
   1008     if(c == a) {
   1009       mp_int  tmp;
   1010 
   1011       if((res = mp_init_copy(&tmp, b)) != MP_OKAY)
   1012 	return res;
   1013 
   1014       if((res = s_mp_sub(&tmp, a)) != MP_OKAY) {
   1015 	mp_clear(&tmp);
   1016 	return res;
   1017       }
   1018       s_mp_exch(&tmp, c);
   1019       mp_clear(&tmp);
   1020 
   1021     } else {
   1022       if(c != b && ((res = mp_copy(b, c)) != MP_OKAY))
   1023 	return res;
   1024 
   1025       if((res = s_mp_sub(c, a)) != MP_OKAY)
   1026 	return res;
   1027     }
   1028 
   1029     SIGN(c) = !SIGN(b);
   1030   }
   1031 
   1032   if(USED(c) == 1 && DIGIT(c, 0) == 0)
   1033     SIGN(c) = MP_ZPOS;
   1034 
   1035   return MP_OKAY;
   1036 
   1037 } /* end mp_sub() */
   1038 
   1039 /* }}} */
   1040 
   1041 /* {{{ mp_mul(a, b, c) */
   1042 
   1043 /*
   1044   mp_mul(a, b, c)
   1045 
   1046   Compute c = a * b.  All parameters may be identical.
   1047  */
   1048 
   1049 mp_err mp_mul(mp_int *a, mp_int *b, mp_int *c)
   1050 {
   1051   mp_err   res;
   1052   mp_sign  sgn;
   1053 
   1054   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1055 
   1056   sgn = (SIGN(a) == SIGN(b)) ? MP_ZPOS : MP_NEG;
   1057 
   1058   if(c == b) {
   1059     if((res = s_mp_mul(c, a)) != MP_OKAY)
   1060       return res;
   1061 
   1062   } else {
   1063     if((res = mp_copy(a, c)) != MP_OKAY)
   1064       return res;
   1065 
   1066     if((res = s_mp_mul(c, b)) != MP_OKAY)
   1067       return res;
   1068   }
   1069 
   1070   if(sgn == MP_ZPOS || s_mp_cmp_d(c, 0) == MP_EQ)
   1071     SIGN(c) = MP_ZPOS;
   1072   else
   1073     SIGN(c) = sgn;
   1074 
   1075   return MP_OKAY;
   1076 
   1077 } /* end mp_mul() */
   1078 
   1079 /* }}} */
   1080 
   1081 /* {{{ mp_mul_2d(a, d, c) */
   1082 
   1083 /*
   1084   mp_mul_2d(a, d, c)
   1085 
   1086   Compute c = a * 2^d.  a may be the same as c.
   1087  */
   1088 
   1089 mp_err mp_mul_2d(mp_int *a, mp_digit d, mp_int *c)
   1090 {
   1091   mp_err   res;
   1092 
   1093   ARGCHK(a != NULL && c != NULL, MP_BADARG);
   1094 
   1095   if((res = mp_copy(a, c)) != MP_OKAY)
   1096     return res;
   1097 
   1098   if(d == 0)
   1099     return MP_OKAY;
   1100 
   1101   return s_mp_mul_2d(c, d);
   1102 
   1103 } /* end mp_mul() */
   1104 
   1105 /* }}} */
   1106 
   1107 /* {{{ mp_sqr(a, b) */
   1108 
   1109 #if MP_SQUARE
   1110 mp_err mp_sqr(mp_int *a, mp_int *b)
   1111 {
   1112   mp_err   res;
   1113 
   1114   ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1115 
   1116   if((res = mp_copy(a, b)) != MP_OKAY)
   1117     return res;
   1118 
   1119   if((res = s_mp_sqr(b)) != MP_OKAY)
   1120     return res;
   1121 
   1122   SIGN(b) = MP_ZPOS;
   1123 
   1124   return MP_OKAY;
   1125 
   1126 } /* end mp_sqr() */
   1127 #endif
   1128 
   1129 /* }}} */
   1130 
   1131 /* {{{ mp_div(a, b, q, r) */
   1132 
   1133 /*
   1134   mp_div(a, b, q, r)
   1135 
   1136   Compute q = a / b and r = a mod b.  Input parameters may be re-used
   1137   as output parameters.  If q or r is NULL, that portion of the
   1138   computation will be discarded (although it will still be computed)
   1139 
   1140   Pay no attention to the hacker behind the curtain.
   1141  */
   1142 
   1143 mp_err mp_div(mp_int *a, mp_int *b, mp_int *q, mp_int *r)
   1144 {
   1145   mp_err   res;
   1146   mp_int   qtmp, rtmp;
   1147   int      cmp;
   1148 
   1149   ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1150 
   1151   if(mp_cmp_z(b) == MP_EQ)
   1152     return MP_RANGE;
   1153 
   1154   /* If a <= b, we can compute the solution without division, and
   1155      avoid any memory allocation
   1156    */
   1157   if((cmp = s_mp_cmp(a, b)) < 0) {
   1158     if(r) {
   1159       if((res = mp_copy(a, r)) != MP_OKAY)
   1160 	return res;
   1161     }
   1162 
   1163     if(q)
   1164       mp_zero(q);
   1165 
   1166     return MP_OKAY;
   1167 
   1168   } else if(cmp == 0) {
   1169 
   1170     /* Set quotient to 1, with appropriate sign */
   1171     if(q) {
   1172       int qneg = (SIGN(a) != SIGN(b));
   1173 
   1174       mp_set(q, 1);
   1175       if(qneg)
   1176 	SIGN(q) = MP_NEG;
   1177     }
   1178 
   1179     if(r)
   1180       mp_zero(r);
   1181 
   1182     return MP_OKAY;
   1183   }
   1184 
   1185   /* If we get here, it means we actually have to do some division */
   1186 
   1187   /* Set up some temporaries... */
   1188   if((res = mp_init_copy(&qtmp, a)) != MP_OKAY)
   1189     return res;
   1190   if((res = mp_init_copy(&rtmp, b)) != MP_OKAY)
   1191     goto CLEANUP;
   1192 
   1193   if((res = s_mp_div(&qtmp, &rtmp)) != MP_OKAY)
   1194     goto CLEANUP;
   1195 
   1196   /* Compute the signs for the output  */
   1197   SIGN(&rtmp) = SIGN(a); /* Sr = Sa              */
   1198   if(SIGN(a) == SIGN(b))
   1199     SIGN(&qtmp) = MP_ZPOS;  /* Sq = MP_ZPOS if Sa = Sb */
   1200   else
   1201     SIGN(&qtmp) = MP_NEG;   /* Sq = MP_NEG if Sa != Sb */
   1202 
   1203   if(s_mp_cmp_d(&qtmp, 0) == MP_EQ)
   1204     SIGN(&qtmp) = MP_ZPOS;
   1205   if(s_mp_cmp_d(&rtmp, 0) == MP_EQ)
   1206     SIGN(&rtmp) = MP_ZPOS;
   1207 
   1208   /* Copy output, if it is needed      */
   1209   if(q)
   1210     s_mp_exch(&qtmp, q);
   1211 
   1212   if(r)
   1213     s_mp_exch(&rtmp, r);
   1214 
   1215 CLEANUP:
   1216   mp_clear(&rtmp);
   1217   mp_clear(&qtmp);
   1218 
   1219   return res;
   1220 
   1221 } /* end mp_div() */
   1222 
   1223 /* }}} */
   1224 
   1225 /* {{{ mp_div_2d(a, d, q, r) */
   1226 
   1227 mp_err mp_div_2d(mp_int *a, mp_digit d, mp_int *q, mp_int *r)
   1228 {
   1229   mp_err  res;
   1230 
   1231   ARGCHK(a != NULL, MP_BADARG);
   1232 
   1233   if(q) {
   1234     if((res = mp_copy(a, q)) != MP_OKAY)
   1235       return res;
   1236 
   1237     s_mp_div_2d(q, d);
   1238   }
   1239 
   1240   if(r) {
   1241     if((res = mp_copy(a, r)) != MP_OKAY)
   1242       return res;
   1243 
   1244     s_mp_mod_2d(r, d);
   1245   }
   1246 
   1247   return MP_OKAY;
   1248 
   1249 } /* end mp_div_2d() */
   1250 
   1251 /* }}} */
   1252 
   1253 /* {{{ mp_expt(a, b, c) */
   1254 
   1255 /*
   1256   mp_expt(a, b, c)
   1257 
   1258   Compute c = a ** b, that is, raise a to the b power.  Uses a
   1259   standard iterative square-and-multiply technique.
   1260  */
   1261 
   1262 mp_err mp_expt(mp_int *a, mp_int *b, mp_int *c)
   1263 {
   1264   mp_int   s, x;
   1265   mp_err   res;
   1266   mp_digit d;
   1267   int      dig, bit;
   1268 
   1269   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1270 
   1271   if(mp_cmp_z(b) < 0)
   1272     return MP_RANGE;
   1273 
   1274   if((res = mp_init(&s)) != MP_OKAY)
   1275     return res;
   1276 
   1277   mp_set(&s, 1);
   1278 
   1279   if((res = mp_init_copy(&x, a)) != MP_OKAY)
   1280     goto X;
   1281 
   1282   /* Loop over low-order digits in ascending order */
   1283   for(dig = 0; dig < (USED(b) - 1); dig++) {
   1284     d = DIGIT(b, dig);
   1285 
   1286     /* Loop over bits of each non-maximal digit */
   1287     for(bit = 0; bit < DIGIT_BIT; bit++) {
   1288       if(d & 1) {
   1289 	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1290 	  goto CLEANUP;
   1291       }
   1292 
   1293       d >>= 1;
   1294 
   1295       if((res = s_mp_sqr(&x)) != MP_OKAY)
   1296 	goto CLEANUP;
   1297     }
   1298   }
   1299 
   1300   /* Consider now the last digit... */
   1301   d = DIGIT(b, dig);
   1302 
   1303   while(d) {
   1304     if(d & 1) {
   1305       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1306 	goto CLEANUP;
   1307     }
   1308 
   1309     d >>= 1;
   1310 
   1311     if((res = s_mp_sqr(&x)) != MP_OKAY)
   1312       goto CLEANUP;
   1313   }
   1314 
   1315   if(mp_iseven(b))
   1316     SIGN(&s) = SIGN(a);
   1317 
   1318   res = mp_copy(&s, c);
   1319 
   1320 CLEANUP:
   1321   mp_clear(&x);
   1322 X:
   1323   mp_clear(&s);
   1324 
   1325   return res;
   1326 
   1327 } /* end mp_expt() */
   1328 
   1329 /* }}} */
   1330 
   1331 /* {{{ mp_2expt(a, k) */
   1332 
   1333 /* Compute a = 2^k */
   1334 
   1335 mp_err mp_2expt(mp_int *a, mp_digit k)
   1336 {
   1337   ARGCHK(a != NULL, MP_BADARG);
   1338 
   1339   return s_mp_2expt(a, k);
   1340 
   1341 } /* end mp_2expt() */
   1342 
   1343 /* }}} */
   1344 
   1345 /* {{{ mp_mod(a, m, c) */
   1346 
   1347 /*
   1348   mp_mod(a, m, c)
   1349 
   1350   Compute c = a (mod m).  Result will always be 0 <= c < m.
   1351  */
   1352 
   1353 mp_err mp_mod(mp_int *a, mp_int *m, mp_int *c)
   1354 {
   1355   mp_err  res;
   1356   int     mag;
   1357 
   1358   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   1359 
   1360   if(SIGN(m) == MP_NEG)
   1361     return MP_RANGE;
   1362 
   1363   /*
   1364      If |a| > m, we need to divide to get the remainder and take the
   1365      absolute value.
   1366 
   1367      If |a| < m, we don't need to do any division, just copy and adjust
   1368      the sign (if a is negative).
   1369 
   1370      If |a| == m, we can simply set the result to zero.
   1371 
   1372      This order is intended to minimize the average path length of the
   1373      comparison chain on common workloads -- the most frequent cases are
   1374      that |a| != m, so we do those first.
   1375    */
   1376   if((mag = s_mp_cmp(a, m)) > 0) {
   1377     if((res = mp_div(a, m, NULL, c)) != MP_OKAY)
   1378       return res;
   1379 
   1380     if(SIGN(c) == MP_NEG) {
   1381       if((res = mp_add(c, m, c)) != MP_OKAY)
   1382 	return res;
   1383     }
   1384 
   1385   } else if(mag < 0) {
   1386     if((res = mp_copy(a, c)) != MP_OKAY)
   1387       return res;
   1388 
   1389     if(mp_cmp_z(a) < 0) {
   1390       if((res = mp_add(c, m, c)) != MP_OKAY)
   1391 	return res;
   1392 
   1393     }
   1394 
   1395   } else {
   1396     mp_zero(c);
   1397 
   1398   }
   1399 
   1400   return MP_OKAY;
   1401 
   1402 } /* end mp_mod() */
   1403 
   1404 /* }}} */
   1405 
   1406 /* {{{ mp_mod_d(a, d, c) */
   1407 
   1408 /*
   1409   mp_mod_d(a, d, c)
   1410 
   1411   Compute c = a (mod d).  Result will always be 0 <= c < d
   1412  */
   1413 mp_err mp_mod_d(mp_int *a, mp_digit d, mp_digit *c)
   1414 {
   1415   mp_err   res;
   1416   mp_digit rem;
   1417 
   1418   ARGCHK(a != NULL && c != NULL, MP_BADARG);
   1419 
   1420   if(s_mp_cmp_d(a, d) > 0) {
   1421     if((res = mp_div_d(a, d, NULL, &rem)) != MP_OKAY)
   1422       return res;
   1423 
   1424   } else {
   1425     if(SIGN(a) == MP_NEG)
   1426       rem = d - DIGIT(a, 0);
   1427     else
   1428       rem = DIGIT(a, 0);
   1429   }
   1430 
   1431   if(c)
   1432     *c = rem;
   1433 
   1434   return MP_OKAY;
   1435 
   1436 } /* end mp_mod_d() */
   1437 
   1438 /* }}} */
   1439 
   1440 /* {{{ mp_sqrt(a, b) */
   1441 
   1442 /*
   1443   mp_sqrt(a, b)
   1444 
   1445   Compute the integer square root of a, and store the result in b.
   1446   Uses an integer-arithmetic version of Newton's iterative linear
   1447   approximation technique to determine this value; the result has the
   1448   following two properties:
   1449 
   1450      b^2 <= a
   1451      (b+1)^2 >= a
   1452 
   1453   It is a range error to pass a negative value.
   1454  */
   1455 mp_err mp_sqrt(mp_int *a, mp_int *b)
   1456 {
   1457   mp_int   x, t;
   1458   mp_err   res;
   1459 
   1460   ARGCHK(a != NULL && b != NULL, MP_BADARG);
   1461 
   1462   /* Cannot take square root of a negative value */
   1463   if(SIGN(a) == MP_NEG)
   1464     return MP_RANGE;
   1465 
   1466   /* Special cases for zero and one, trivial     */
   1467   if(mp_cmp_d(a, 0) == MP_EQ || mp_cmp_d(a, 1) == MP_EQ)
   1468     return mp_copy(a, b);
   1469 
   1470   /* Initialize the temporaries we'll use below  */
   1471   if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
   1472     return res;
   1473 
   1474   /* Compute an initial guess for the iteration as a itself */
   1475   if((res = mp_init_copy(&x, a)) != MP_OKAY)
   1476     goto X;
   1477 
   1478 s_mp_rshd(&x, (USED(&x)/2)+1);
   1479 mp_add_d(&x, 1, &x);
   1480 
   1481   for(;;) {
   1482     /* t = (x * x) - a */
   1483     mp_copy(&x, &t);      /* can't fail, t is big enough for original x */
   1484     if((res = mp_sqr(&t, &t)) != MP_OKAY ||
   1485        (res = mp_sub(&t, a, &t)) != MP_OKAY)
   1486       goto CLEANUP;
   1487 
   1488     /* t = t / 2x       */
   1489     s_mp_mul_2(&x);
   1490     if((res = mp_div(&t, &x, &t, NULL)) != MP_OKAY)
   1491       goto CLEANUP;
   1492     s_mp_div_2(&x);
   1493 
   1494     /* Terminate the loop, if the quotient is zero */
   1495     if(mp_cmp_z(&t) == MP_EQ)
   1496       break;
   1497 
   1498     /* x = x - t       */
   1499     if((res = mp_sub(&x, &t, &x)) != MP_OKAY)
   1500       goto CLEANUP;
   1501 
   1502   }
   1503 
   1504   /* Copy result to output parameter */
   1505   mp_sub_d(&x, 1, &x);
   1506   s_mp_exch(&x, b);
   1507 
   1508  CLEANUP:
   1509   mp_clear(&x);
   1510  X:
   1511   mp_clear(&t);
   1512 
   1513   return res;
   1514 
   1515 } /* end mp_sqrt() */
   1516 
   1517 /* }}} */
   1518 
   1519 /* }}} */
   1520 
   1521 /*------------------------------------------------------------------------*/
   1522 /* {{{ Modular arithmetic */
   1523 
   1524 #if MP_MODARITH
   1525 /* {{{ mp_addmod(a, b, m, c) */
   1526 
   1527 /*
   1528   mp_addmod(a, b, m, c)
   1529 
   1530   Compute c = (a + b) mod m
   1531  */
   1532 
   1533 mp_err mp_addmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
   1534 {
   1535   mp_err  res;
   1536 
   1537   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
   1538 
   1539   if((res = mp_add(a, b, c)) != MP_OKAY)
   1540     return res;
   1541   if((res = mp_mod(c, m, c)) != MP_OKAY)
   1542     return res;
   1543 
   1544   return MP_OKAY;
   1545 
   1546 }
   1547 
   1548 /* }}} */
   1549 
   1550 /* {{{ mp_submod(a, b, m, c) */
   1551 
   1552 /*
   1553   mp_submod(a, b, m, c)
   1554 
   1555   Compute c = (a - b) mod m
   1556  */
   1557 
   1558 mp_err mp_submod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
   1559 {
   1560   mp_err  res;
   1561 
   1562   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
   1563 
   1564   if((res = mp_sub(a, b, c)) != MP_OKAY)
   1565     return res;
   1566   if((res = mp_mod(c, m, c)) != MP_OKAY)
   1567     return res;
   1568 
   1569   return MP_OKAY;
   1570 
   1571 }
   1572 
   1573 /* }}} */
   1574 
   1575 /* {{{ mp_mulmod(a, b, m, c) */
   1576 
   1577 /*
   1578   mp_mulmod(a, b, m, c)
   1579 
   1580   Compute c = (a * b) mod m
   1581  */
   1582 
   1583 mp_err mp_mulmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
   1584 {
   1585   mp_err  res;
   1586 
   1587   ARGCHK(a != NULL && b != NULL && m != NULL && c != NULL, MP_BADARG);
   1588 
   1589   if((res = mp_mul(a, b, c)) != MP_OKAY)
   1590     return res;
   1591   if((res = mp_mod(c, m, c)) != MP_OKAY)
   1592     return res;
   1593 
   1594   return MP_OKAY;
   1595 
   1596 }
   1597 
   1598 /* }}} */
   1599 
   1600 /* {{{ mp_sqrmod(a, m, c) */
   1601 
   1602 #if MP_SQUARE
   1603 mp_err mp_sqrmod(mp_int *a, mp_int *m, mp_int *c)
   1604 {
   1605   mp_err  res;
   1606 
   1607   ARGCHK(a != NULL && m != NULL && c != NULL, MP_BADARG);
   1608 
   1609   if((res = mp_sqr(a, c)) != MP_OKAY)
   1610     return res;
   1611   if((res = mp_mod(c, m, c)) != MP_OKAY)
   1612     return res;
   1613 
   1614   return MP_OKAY;
   1615 
   1616 } /* end mp_sqrmod() */
   1617 #endif
   1618 
   1619 /* }}} */
   1620 
   1621 /* {{{ mp_exptmod(a, b, m, c) */
   1622 
   1623 /*
   1624   mp_exptmod(a, b, m, c)
   1625 
   1626   Compute c = (a ** b) mod m.  Uses a standard square-and-multiply
   1627   method with modular reductions at each step. (This is basically the
   1628   same code as mp_expt(), except for the addition of the reductions)
   1629 
   1630   The modular reductions are done using Barrett's algorithm (see
   1631   s_mp_reduce() below for details)
   1632  */
   1633 
   1634 mp_err mp_exptmod(mp_int *a, mp_int *b, mp_int *m, mp_int *c)
   1635 {
   1636   mp_int   s, x, mu;
   1637   mp_err   res;
   1638   mp_digit d, *db = DIGITS(b);
   1639   mp_size  ub = USED(b);
   1640   int      dig, bit;
   1641 
   1642   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1643 
   1644   if(mp_cmp_z(b) < 0 || mp_cmp_z(m) <= 0)
   1645     return MP_RANGE;
   1646 
   1647   if((res = mp_init(&s)) != MP_OKAY)
   1648     return res;
   1649   if((res = mp_init_copy(&x, a)) != MP_OKAY)
   1650     goto X;
   1651   if((res = mp_mod(&x, m, &x)) != MP_OKAY ||
   1652      (res = mp_init(&mu)) != MP_OKAY)
   1653     goto MU;
   1654 
   1655   mp_set(&s, 1);
   1656 
   1657   /* mu = b^2k / m */
   1658   s_mp_add_d(&mu, 1);
   1659   s_mp_lshd(&mu, 2 * USED(m));
   1660   if((res = mp_div(&mu, m, &mu, NULL)) != MP_OKAY)
   1661     goto CLEANUP;
   1662 
   1663   /* Loop over digits of b in ascending order, except highest order */
   1664   for(dig = 0; dig < (ub - 1); dig++) {
   1665     d = *db++;
   1666 
   1667     /* Loop over the bits of the lower-order digits */
   1668     for(bit = 0; bit < DIGIT_BIT; bit++) {
   1669       if(d & 1) {
   1670 	if((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1671 	  goto CLEANUP;
   1672 	if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
   1673 	  goto CLEANUP;
   1674       }
   1675 
   1676       d >>= 1;
   1677 
   1678       if((res = s_mp_sqr(&x)) != MP_OKAY)
   1679 	goto CLEANUP;
   1680       if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
   1681 	goto CLEANUP;
   1682     }
   1683   }
   1684 
   1685   /* Now do the last digit... */
   1686   d = *db;
   1687 
   1688   while(d) {
   1689     if(d & 1) {
   1690       if((res = s_mp_mul(&s, &x)) != MP_OKAY)
   1691 	goto CLEANUP;
   1692       if((res = s_mp_reduce(&s, m, &mu)) != MP_OKAY)
   1693 	goto CLEANUP;
   1694     }
   1695 
   1696     d >>= 1;
   1697 
   1698     if((res = s_mp_sqr(&x)) != MP_OKAY)
   1699       goto CLEANUP;
   1700     if((res = s_mp_reduce(&x, m, &mu)) != MP_OKAY)
   1701       goto CLEANUP;
   1702   }
   1703 
   1704   s_mp_exch(&s, c);
   1705 
   1706  CLEANUP:
   1707   mp_clear(&mu);
   1708  MU:
   1709   mp_clear(&x);
   1710  X:
   1711   mp_clear(&s);
   1712 
   1713   return res;
   1714 
   1715 } /* end mp_exptmod() */
   1716 
   1717 /* }}} */
   1718 
   1719 /* {{{ mp_exptmod_d(a, d, m, c) */
   1720 
   1721 mp_err mp_exptmod_d(mp_int *a, mp_digit d, mp_int *m, mp_int *c)
   1722 {
   1723   mp_int   s, x;
   1724   mp_err   res;
   1725 
   1726   ARGCHK(a != NULL && c != NULL, MP_BADARG);
   1727 
   1728   if((res = mp_init(&s)) != MP_OKAY)
   1729     return res;
   1730   if((res = mp_init_copy(&x, a)) != MP_OKAY)
   1731     goto X;
   1732 
   1733   mp_set(&s, 1);
   1734 
   1735   while(d != 0) {
   1736     if(d & 1) {
   1737       if((res = s_mp_mul(&s, &x)) != MP_OKAY ||
   1738 	 (res = mp_mod(&s, m, &s)) != MP_OKAY)
   1739 	goto CLEANUP;
   1740     }
   1741 
   1742     d /= 2;
   1743 
   1744     if((res = s_mp_sqr(&x)) != MP_OKAY ||
   1745        (res = mp_mod(&x, m, &x)) != MP_OKAY)
   1746       goto CLEANUP;
   1747   }
   1748 
   1749   s_mp_exch(&s, c);
   1750 
   1751 CLEANUP:
   1752   mp_clear(&x);
   1753 X:
   1754   mp_clear(&s);
   1755 
   1756   return res;
   1757 
   1758 } /* end mp_exptmod_d() */
   1759 
   1760 /* }}} */
   1761 #endif /* if MP_MODARITH */
   1762 
   1763 /* }}} */
   1764 
   1765 /*------------------------------------------------------------------------*/
   1766 /* {{{ Comparison functions */
   1767 
   1768 /* {{{ mp_cmp_z(a) */
   1769 
   1770 /*
   1771   mp_cmp_z(a)
   1772 
   1773   Compare a <=> 0.  Returns <0 if a<0, 0 if a=0, >0 if a>0.
   1774  */
   1775 
   1776 int    mp_cmp_z(mp_int *a)
   1777 {
   1778   if(SIGN(a) == MP_NEG)
   1779     return MP_LT;
   1780   else if(USED(a) == 1 && DIGIT(a, 0) == 0)
   1781     return MP_EQ;
   1782   else
   1783     return MP_GT;
   1784 
   1785 } /* end mp_cmp_z() */
   1786 
   1787 /* }}} */
   1788 
   1789 /* {{{ mp_cmp_d(a, d) */
   1790 
   1791 /*
   1792   mp_cmp_d(a, d)
   1793 
   1794   Compare a <=> d.  Returns <0 if a<d, 0 if a=d, >0 if a>d
   1795  */
   1796 
   1797 int    mp_cmp_d(mp_int *a, mp_digit d)
   1798 {
   1799   ARGCHK(a != NULL, MP_EQ);
   1800 
   1801   if(SIGN(a) == MP_NEG)
   1802     return MP_LT;
   1803 
   1804   return s_mp_cmp_d(a, d);
   1805 
   1806 } /* end mp_cmp_d() */
   1807 
   1808 /* }}} */
   1809 
   1810 /* {{{ mp_cmp(a, b) */
   1811 
   1812 int    mp_cmp(mp_int *a, mp_int *b)
   1813 {
   1814   ARGCHK(a != NULL && b != NULL, MP_EQ);
   1815 
   1816   if(SIGN(a) == SIGN(b)) {
   1817     int  mag;
   1818 
   1819     if((mag = s_mp_cmp(a, b)) == MP_EQ)
   1820       return MP_EQ;
   1821 
   1822     if(SIGN(a) == MP_ZPOS)
   1823       return mag;
   1824     else
   1825       return -mag;
   1826 
   1827   } else if(SIGN(a) == MP_ZPOS) {
   1828     return MP_GT;
   1829   } else {
   1830     return MP_LT;
   1831   }
   1832 
   1833 } /* end mp_cmp() */
   1834 
   1835 /* }}} */
   1836 
   1837 /* {{{ mp_cmp_mag(a, b) */
   1838 
   1839 /*
   1840   mp_cmp_mag(a, b)
   1841 
   1842   Compares |a| <=> |b|, and returns an appropriate comparison result
   1843  */
   1844 
   1845 int    mp_cmp_mag(mp_int *a, mp_int *b)
   1846 {
   1847   ARGCHK(a != NULL && b != NULL, MP_EQ);
   1848 
   1849   return s_mp_cmp(a, b);
   1850 
   1851 } /* end mp_cmp_mag() */
   1852 
   1853 /* }}} */
   1854 
   1855 /* {{{ mp_cmp_int(a, z) */
   1856 
   1857 /*
   1858   This just converts z to an mp_int, and uses the existing comparison
   1859   routines.  This is sort of inefficient, but it's not clear to me how
   1860   frequently this wil get used anyway.  For small positive constants,
   1861   you can always use mp_cmp_d(), and for zero, there is mp_cmp_z().
   1862  */
   1863 int    mp_cmp_int(mp_int *a, long z)
   1864 {
   1865   mp_int  tmp;
   1866   int     out;
   1867 
   1868   ARGCHK(a != NULL, MP_EQ);
   1869 
   1870   mp_init(&tmp); mp_set_int(&tmp, z);
   1871   out = mp_cmp(a, &tmp);
   1872   mp_clear(&tmp);
   1873 
   1874   return out;
   1875 
   1876 } /* end mp_cmp_int() */
   1877 
   1878 /* }}} */
   1879 
   1880 /* {{{ mp_isodd(a) */
   1881 
   1882 /*
   1883   mp_isodd(a)
   1884 
   1885   Returns a true (non-zero) value if a is odd, false (zero) otherwise.
   1886  */
   1887 int    mp_isodd(mp_int *a)
   1888 {
   1889   ARGCHK(a != NULL, 0);
   1890 
   1891   return (DIGIT(a, 0) & 1);
   1892 
   1893 } /* end mp_isodd() */
   1894 
   1895 /* }}} */
   1896 
   1897 /* {{{ mp_iseven(a) */
   1898 
   1899 int    mp_iseven(mp_int *a)
   1900 {
   1901   return !mp_isodd(a);
   1902 
   1903 } /* end mp_iseven() */
   1904 
   1905 /* }}} */
   1906 
   1907 /* }}} */
   1908 
   1909 /*------------------------------------------------------------------------*/
   1910 /* {{{ Number theoretic functions */
   1911 
   1912 #if MP_NUMTH
   1913 /* {{{ mp_gcd(a, b, c) */
   1914 
   1915 /*
   1916   Like the old mp_gcd() function, except computes the GCD using the
   1917   binary algorithm due to Josef Stein in 1961 (via Knuth).
   1918  */
   1919 mp_err mp_gcd(mp_int *a, mp_int *b, mp_int *c)
   1920 {
   1921   mp_err   res;
   1922   mp_int   u, v, t;
   1923   mp_size  k = 0;
   1924 
   1925   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   1926 
   1927   if(mp_cmp_z(a) == MP_EQ && mp_cmp_z(b) == MP_EQ)
   1928       return MP_RANGE;
   1929   if(mp_cmp_z(a) == MP_EQ) {
   1930     return mp_copy(b, c);
   1931   } else if(mp_cmp_z(b) == MP_EQ) {
   1932     return mp_copy(a, c);
   1933   }
   1934 
   1935   if((res = mp_init(&t)) != MP_OKAY)
   1936     return res;
   1937   if((res = mp_init_copy(&u, a)) != MP_OKAY)
   1938     goto U;
   1939   if((res = mp_init_copy(&v, b)) != MP_OKAY)
   1940     goto V;
   1941 
   1942   SIGN(&u) = MP_ZPOS;
   1943   SIGN(&v) = MP_ZPOS;
   1944 
   1945   /* Divide out common factors of 2 until at least 1 of a, b is even */
   1946   while(mp_iseven(&u) && mp_iseven(&v)) {
   1947     s_mp_div_2(&u);
   1948     s_mp_div_2(&v);
   1949     ++k;
   1950   }
   1951 
   1952   /* Initialize t */
   1953   if(mp_isodd(&u)) {
   1954     if((res = mp_copy(&v, &t)) != MP_OKAY)
   1955       goto CLEANUP;
   1956 
   1957     /* t = -v */
   1958     if(SIGN(&v) == MP_ZPOS)
   1959       SIGN(&t) = MP_NEG;
   1960     else
   1961       SIGN(&t) = MP_ZPOS;
   1962 
   1963   } else {
   1964     if((res = mp_copy(&u, &t)) != MP_OKAY)
   1965       goto CLEANUP;
   1966 
   1967   }
   1968 
   1969   for(;;) {
   1970     while(mp_iseven(&t)) {
   1971       s_mp_div_2(&t);
   1972     }
   1973 
   1974     if(mp_cmp_z(&t) == MP_GT) {
   1975       if((res = mp_copy(&t, &u)) != MP_OKAY)
   1976 	goto CLEANUP;
   1977 
   1978     } else {
   1979       if((res = mp_copy(&t, &v)) != MP_OKAY)
   1980 	goto CLEANUP;
   1981 
   1982       /* v = -t */
   1983       if(SIGN(&t) == MP_ZPOS)
   1984 	SIGN(&v) = MP_NEG;
   1985       else
   1986 	SIGN(&v) = MP_ZPOS;
   1987     }
   1988 
   1989     if((res = mp_sub(&u, &v, &t)) != MP_OKAY)
   1990       goto CLEANUP;
   1991 
   1992     if(s_mp_cmp_d(&t, 0) == MP_EQ)
   1993       break;
   1994   }
   1995 
   1996   s_mp_2expt(&v, k);       /* v = 2^k   */
   1997   res = mp_mul(&u, &v, c); /* c = u * v */
   1998 
   1999  CLEANUP:
   2000   mp_clear(&v);
   2001  V:
   2002   mp_clear(&u);
   2003  U:
   2004   mp_clear(&t);
   2005 
   2006   return res;
   2007 
   2008 } /* end mp_bgcd() */
   2009 
   2010 /* }}} */
   2011 
   2012 /* {{{ mp_lcm(a, b, c) */
   2013 
   2014 /* We compute the least common multiple using the rule:
   2015 
   2016    ab = [a, b](a, b)
   2017 
   2018    ... by computing the product, and dividing out the gcd.
   2019  */
   2020 
   2021 mp_err mp_lcm(mp_int *a, mp_int *b, mp_int *c)
   2022 {
   2023   mp_int  gcd, prod;
   2024   mp_err  res;
   2025 
   2026   ARGCHK(a != NULL && b != NULL && c != NULL, MP_BADARG);
   2027 
   2028   /* Set up temporaries */
   2029   if((res = mp_init(&gcd)) != MP_OKAY)
   2030     return res;
   2031   if((res = mp_init(&prod)) != MP_OKAY)
   2032     goto GCD;
   2033 
   2034   if((res = mp_mul(a, b, &prod)) != MP_OKAY)
   2035     goto CLEANUP;
   2036   if((res = mp_gcd(a, b, &gcd)) != MP_OKAY)
   2037     goto CLEANUP;
   2038 
   2039   res = mp_div(&prod, &gcd, c, NULL);
   2040 
   2041  CLEANUP:
   2042   mp_clear(&prod);
   2043  GCD:
   2044   mp_clear(&gcd);
   2045 
   2046   return res;
   2047 
   2048 } /* end mp_lcm() */
   2049 
   2050 /* }}} */
   2051 
   2052 /* {{{ mp_xgcd(a, b, g, x, y) */
   2053 
   2054 /*
   2055   mp_xgcd(a, b, g, x, y)
   2056 
   2057   Compute g = (a, b) and values x and y satisfying Bezout's identity
   2058   (that is, ax + by = g).  This uses the extended binary GCD algorithm
   2059   based on the Stein algorithm used for mp_gcd()
   2060  */
   2061 
   2062 mp_err mp_xgcd(mp_int *a, mp_int *b, mp_int *g, mp_int *x, mp_int *y)
   2063 {
   2064   mp_int   gx, xc, yc, u, v, A, B, C, D;
   2065   mp_int  *clean[9];
   2066   mp_err   res;
   2067   int      last = -1;
   2068 
   2069   if(mp_cmp_z(b) == 0)
   2070     return MP_RANGE;
   2071 
   2072   /* Initialize all these variables we need */
   2073   if((res = mp_init(&u)) != MP_OKAY) goto CLEANUP;
   2074   clean[++last] = &u;
   2075   if((res = mp_init(&v)) != MP_OKAY) goto CLEANUP;
   2076   clean[++last] = &v;
   2077   if((res = mp_init(&gx)) != MP_OKAY) goto CLEANUP;
   2078   clean[++last] = &gx;
   2079   if((res = mp_init(&A)) != MP_OKAY) goto CLEANUP;
   2080   clean[++last] = &A;
   2081   if((res = mp_init(&B)) != MP_OKAY) goto CLEANUP;
   2082   clean[++last] = &B;
   2083   if((res = mp_init(&C)) != MP_OKAY) goto CLEANUP;
   2084   clean[++last] = &C;
   2085   if((res = mp_init(&D)) != MP_OKAY) goto CLEANUP;
   2086   clean[++last] = &D;
   2087   if((res = mp_init_copy(&xc, a)) != MP_OKAY) goto CLEANUP;
   2088   clean[++last] = &xc;
   2089   mp_abs(&xc, &xc);
   2090   if((res = mp_init_copy(&yc, b)) != MP_OKAY) goto CLEANUP;
   2091   clean[++last] = &yc;
   2092   mp_abs(&yc, &yc);
   2093 
   2094   mp_set(&gx, 1);
   2095 
   2096   /* Divide by two until at least one of them is even */
   2097   while(mp_iseven(&xc) && mp_iseven(&yc)) {
   2098     s_mp_div_2(&xc);
   2099     s_mp_div_2(&yc);
   2100     if((res = s_mp_mul_2(&gx)) != MP_OKAY)
   2101       goto CLEANUP;
   2102   }
   2103 
   2104   mp_copy(&xc, &u);
   2105   mp_copy(&yc, &v);
   2106   mp_set(&A, 1); mp_set(&D, 1);
   2107 
   2108   /* Loop through binary GCD algorithm */
   2109   for(;;) {
   2110     while(mp_iseven(&u)) {
   2111       s_mp_div_2(&u);
   2112 
   2113       if(mp_iseven(&A) && mp_iseven(&B)) {
   2114 	s_mp_div_2(&A); s_mp_div_2(&B);
   2115       } else {
   2116 	if((res = mp_add(&A, &yc, &A)) != MP_OKAY) goto CLEANUP;
   2117 	s_mp_div_2(&A);
   2118 	if((res = mp_sub(&B, &xc, &B)) != MP_OKAY) goto CLEANUP;
   2119 	s_mp_div_2(&B);
   2120       }
   2121     }
   2122 
   2123     while(mp_iseven(&v)) {
   2124       s_mp_div_2(&v);
   2125 
   2126       if(mp_iseven(&C) && mp_iseven(&D)) {
   2127 	s_mp_div_2(&C); s_mp_div_2(&D);
   2128       } else {
   2129 	if((res = mp_add(&C, &yc, &C)) != MP_OKAY) goto CLEANUP;
   2130 	s_mp_div_2(&C);
   2131 	if((res = mp_sub(&D, &xc, &D)) != MP_OKAY) goto CLEANUP;
   2132 	s_mp_div_2(&D);
   2133       }
   2134     }
   2135 
   2136     if(mp_cmp(&u, &v) >= 0) {
   2137       if((res = mp_sub(&u, &v, &u)) != MP_OKAY) goto CLEANUP;
   2138       if((res = mp_sub(&A, &C, &A)) != MP_OKAY) goto CLEANUP;
   2139       if((res = mp_sub(&B, &D, &B)) != MP_OKAY) goto CLEANUP;
   2140 
   2141     } else {
   2142       if((res = mp_sub(&v, &u, &v)) != MP_OKAY) goto CLEANUP;
   2143       if((res = mp_sub(&C, &A, &C)) != MP_OKAY) goto CLEANUP;
   2144       if((res = mp_sub(&D, &B, &D)) != MP_OKAY) goto CLEANUP;
   2145 
   2146     }
   2147 
   2148     /* If we're done, copy results to output */
   2149     if(mp_cmp_z(&u) == 0) {
   2150       if(x)
   2151 	if((res = mp_copy(&C, x)) != MP_OKAY) goto CLEANUP;
   2152 
   2153       if(y)
   2154 	if((res = mp_copy(&D, y)) != MP_OKAY) goto CLEANUP;
   2155 
   2156       if(g)
   2157 	if((res = mp_mul(&gx, &v, g)) != MP_OKAY) goto CLEANUP;
   2158 
   2159       break;
   2160     }
   2161   }
   2162 
   2163  CLEANUP:
   2164   while(last >= 0)
   2165     mp_clear(clean[last--]);
   2166 
   2167   return res;
   2168 
   2169 } /* end mp_xgcd() */
   2170 
   2171 /* }}} */
   2172 
   2173 /* {{{ mp_invmod(a, m, c) */
   2174 
   2175 /*
   2176   mp_invmod(a, m, c)
   2177 
   2178   Compute c = a^-1 (mod m), if there is an inverse for a (mod m).
   2179   This is equivalent to the question of whether (a, m) = 1.  If not,
   2180   MP_UNDEF is returned, and there is no inverse.
   2181  */
   2182 
   2183 mp_err mp_invmod(mp_int *a, mp_int *m, mp_int *c)
   2184 {
   2185   mp_int  g, x;
   2186   mp_err  res;
   2187 
   2188   ARGCHK(a && m && c, MP_BADARG);
   2189 
   2190   if(mp_cmp_z(a) == 0 || mp_cmp_z(m) == 0)
   2191     return MP_RANGE;
   2192 
   2193   if((res = mp_init(&g)) != MP_OKAY)
   2194     return res;
   2195   if((res = mp_init(&x)) != MP_OKAY)
   2196     goto X;
   2197 
   2198   if((res = mp_xgcd(a, m, &g, &x, NULL)) != MP_OKAY)
   2199     goto CLEANUP;
   2200 
   2201   if(mp_cmp_d(&g, 1) != MP_EQ) {
   2202     res = MP_UNDEF;
   2203     goto CLEANUP;
   2204   }
   2205 
   2206   res = mp_mod(&x, m, c);
   2207   SIGN(c) = SIGN(a);
   2208 
   2209 CLEANUP:
   2210   mp_clear(&x);
   2211 X:
   2212   mp_clear(&g);
   2213 
   2214   return res;
   2215 
   2216 } /* end mp_invmod() */
   2217 
   2218 /* }}} */
   2219 #endif /* if MP_NUMTH */
   2220 
   2221 /* }}} */
   2222 
   2223 /*------------------------------------------------------------------------*/
   2224 /* {{{ mp_print(mp, ofp) */
   2225 
   2226 #if MP_IOFUNC
   2227 /*
   2228   mp_print(mp, ofp)
   2229 
   2230   Print a textual representation of the given mp_int on the output
   2231   stream 'ofp'.  Output is generated using the internal radix.
   2232  */
   2233 
   2234 void   mp_print(mp_int *mp, FILE *ofp)
   2235 {
   2236   int   ix;
   2237 
   2238   if(mp == NULL || ofp == NULL)
   2239     return;
   2240 
   2241   fputc((SIGN(mp) == MP_NEG) ? '-' : '+', ofp);
   2242 
   2243   for(ix = USED(mp) - 1; ix >= 0; ix--) {
   2244     fprintf(ofp, DIGIT_FMT, DIGIT(mp, ix));
   2245   }
   2246 
   2247 } /* end mp_print() */
   2248 
   2249 #endif /* if MP_IOFUNC */
   2250 
   2251 /* }}} */
   2252 
   2253 /*------------------------------------------------------------------------*/
   2254 /* {{{ More I/O Functions */
   2255 
   2256 /* {{{ mp_read_signed_bin(mp, str, len) */
   2257 
   2258 /*
   2259    mp_read_signed_bin(mp, str, len)
   2260 
   2261    Read in a raw value (base 256) into the given mp_int
   2262  */
   2263 
   2264 mp_err  mp_read_signed_bin(mp_int *mp, unsigned char *str, int len)
   2265 {
   2266   mp_err         res;
   2267 
   2268   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
   2269 
   2270   if((res = mp_read_unsigned_bin(mp, str + 1, len - 1)) == MP_OKAY) {
   2271     /* Get sign from first byte */
   2272     if(str[0])
   2273       SIGN(mp) = MP_NEG;
   2274     else
   2275       SIGN(mp) = MP_ZPOS;
   2276   }
   2277 
   2278   return res;
   2279 
   2280 } /* end mp_read_signed_bin() */
   2281 
   2282 /* }}} */
   2283 
   2284 /* {{{ mp_signed_bin_size(mp) */
   2285 
   2286 int    mp_signed_bin_size(mp_int *mp)
   2287 {
   2288   ARGCHK(mp != NULL, 0);
   2289 
   2290   return mp_unsigned_bin_size(mp) + 1;
   2291 
   2292 } /* end mp_signed_bin_size() */
   2293 
   2294 /* }}} */
   2295 
   2296 /* {{{ mp_to_signed_bin(mp, str) */
   2297 
   2298 mp_err mp_to_signed_bin(mp_int *mp, unsigned char *str)
   2299 {
   2300   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
   2301 
   2302   /* Caller responsible for allocating enough memory (use mp_raw_size(mp)) */
   2303   str[0] = (char)SIGN(mp);
   2304 
   2305   return mp_to_unsigned_bin(mp, str + 1);
   2306 
   2307 } /* end mp_to_signed_bin() */
   2308 
   2309 /* }}} */
   2310 
   2311 /* {{{ mp_read_unsigned_bin(mp, str, len) */
   2312 
   2313 /*
   2314   mp_read_unsigned_bin(mp, str, len)
   2315 
   2316   Read in an unsigned value (base 256) into the given mp_int
   2317  */
   2318 
   2319 mp_err  mp_read_unsigned_bin(mp_int *mp, unsigned char *str, int len)
   2320 {
   2321   int     ix;
   2322   mp_err  res;
   2323 
   2324   ARGCHK(mp != NULL && str != NULL && len > 0, MP_BADARG);
   2325 
   2326   mp_zero(mp);
   2327 
   2328   for(ix = 0; ix < len; ix++) {
   2329     if((res = s_mp_mul_2d(mp, CHAR_BIT)) != MP_OKAY)
   2330       return res;
   2331 
   2332     if((res = mp_add_d(mp, str[ix], mp)) != MP_OKAY)
   2333       return res;
   2334   }
   2335 
   2336   return MP_OKAY;
   2337 
   2338 } /* end mp_read_unsigned_bin() */
   2339 
   2340 /* }}} */
   2341 
   2342 /* {{{ mp_unsigned_bin_size(mp) */
   2343 
   2344 int     mp_unsigned_bin_size(mp_int *mp)
   2345 {
   2346   mp_digit   topdig;
   2347   int        count;
   2348 
   2349   ARGCHK(mp != NULL, 0);
   2350 
   2351   /* Special case for the value zero */
   2352   if(USED(mp) == 1 && DIGIT(mp, 0) == 0)
   2353     return 1;
   2354 
   2355   count = (USED(mp) - 1) * sizeof(mp_digit);
   2356   topdig = DIGIT(mp, USED(mp) - 1);
   2357 
   2358   while(topdig != 0) {
   2359     ++count;
   2360     topdig >>= CHAR_BIT;
   2361   }
   2362 
   2363   return count;
   2364 
   2365 } /* end mp_unsigned_bin_size() */
   2366 
   2367 /* }}} */
   2368 
   2369 /* {{{ mp_to_unsigned_bin(mp, str) */
   2370 
   2371 mp_err mp_to_unsigned_bin(mp_int *mp, unsigned char *str)
   2372 {
   2373   mp_digit      *dp, *end, d;
   2374   unsigned char *spos;
   2375 
   2376   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
   2377 
   2378   dp = DIGITS(mp);
   2379   end = dp + USED(mp) - 1;
   2380   spos = str;
   2381 
   2382   /* Special case for zero, quick test */
   2383   if(dp == end && *dp == 0) {
   2384     *str = '\0';
   2385     return MP_OKAY;
   2386   }
   2387 
   2388   /* Generate digits in reverse order */
   2389   while(dp < end) {
   2390     int      ix;
   2391 
   2392     d = *dp;
   2393     for(ix = 0; ix < sizeof(mp_digit); ++ix) {
   2394       *spos = d & UCHAR_MAX;
   2395       d >>= CHAR_BIT;
   2396       ++spos;
   2397     }
   2398 
   2399     ++dp;
   2400   }
   2401 
   2402   /* Now handle last digit specially, high order zeroes are not written */
   2403   d = *end;
   2404   while(d != 0) {
   2405     *spos = d & UCHAR_MAX;
   2406     d >>= CHAR_BIT;
   2407     ++spos;
   2408   }
   2409 
   2410   /* Reverse everything to get digits in the correct order */
   2411   while(--spos > str) {
   2412     unsigned char t = *str;
   2413     *str = *spos;
   2414     *spos = t;
   2415 
   2416     ++str;
   2417   }
   2418 
   2419   return MP_OKAY;
   2420 
   2421 } /* end mp_to_unsigned_bin() */
   2422 
   2423 /* }}} */
   2424 
   2425 /* {{{ mp_count_bits(mp) */
   2426 
   2427 int    mp_count_bits(mp_int *mp)
   2428 {
   2429   int      len;
   2430   mp_digit d;
   2431 
   2432   ARGCHK(mp != NULL, MP_BADARG);
   2433 
   2434   len = DIGIT_BIT * (USED(mp) - 1);
   2435   d = DIGIT(mp, USED(mp) - 1);
   2436 
   2437   while(d != 0) {
   2438     ++len;
   2439     d >>= 1;
   2440   }
   2441 
   2442   return len;
   2443 
   2444 } /* end mp_count_bits() */
   2445 
   2446 /* }}} */
   2447 
   2448 /* {{{ mp_read_radix(mp, str, radix) */
   2449 
   2450 /*
   2451   mp_read_radix(mp, str, radix)
   2452 
   2453   Read an integer from the given string, and set mp to the resulting
   2454   value.  The input is presumed to be in base 10.  Leading non-digit
   2455   characters are ignored, and the function reads until a non-digit
   2456   character or the end of the string.
   2457  */
   2458 
   2459 mp_err  mp_read_radix(mp_int *mp, unsigned char *str, int radix)
   2460 {
   2461   int     ix = 0, val = 0;
   2462   mp_err  res;
   2463   mp_sign sig = MP_ZPOS;
   2464 
   2465   ARGCHK(mp != NULL && str != NULL && radix >= 2 && radix <= MAX_RADIX,
   2466 	 MP_BADARG);
   2467 
   2468   mp_zero(mp);
   2469 
   2470   /* Skip leading non-digit characters until a digit or '-' or '+' */
   2471   while(str[ix] &&
   2472 	(s_mp_tovalue(str[ix], radix) < 0) &&
   2473 	str[ix] != '-' &&
   2474 	str[ix] != '+') {
   2475     ++ix;
   2476   }
   2477 
   2478   if(str[ix] == '-') {
   2479     sig = MP_NEG;
   2480     ++ix;
   2481   } else if(str[ix] == '+') {
   2482     sig = MP_ZPOS; /* this is the default anyway... */
   2483     ++ix;
   2484   }
   2485 
   2486   while((val = s_mp_tovalue(str[ix], radix)) >= 0) {
   2487     if((res = s_mp_mul_d(mp, radix)) != MP_OKAY)
   2488       return res;
   2489     if((res = s_mp_add_d(mp, val)) != MP_OKAY)
   2490       return res;
   2491     ++ix;
   2492   }
   2493 
   2494   if(s_mp_cmp_d(mp, 0) == MP_EQ)
   2495     SIGN(mp) = MP_ZPOS;
   2496   else
   2497     SIGN(mp) = sig;
   2498 
   2499   return MP_OKAY;
   2500 
   2501 } /* end mp_read_radix() */
   2502 
   2503 /* }}} */
   2504 
   2505 /* {{{ mp_radix_size(mp, radix) */
   2506 
   2507 int    mp_radix_size(mp_int *mp, int radix)
   2508 {
   2509   int  len;
   2510   ARGCHK(mp != NULL, 0);
   2511 
   2512   len = s_mp_outlen(mp_count_bits(mp), radix) + 1; /* for NUL terminator */
   2513 
   2514   if(mp_cmp_z(mp) < 0)
   2515     ++len; /* for sign */
   2516 
   2517   return len;
   2518 
   2519 } /* end mp_radix_size() */
   2520 
   2521 /* }}} */
   2522 
   2523 /* {{{ mp_value_radix_size(num, qty, radix) */
   2524 
   2525 /* num = number of digits
   2526    qty = number of bits per digit
   2527    radix = target base
   2528 
   2529    Return the number of digits in the specified radix that would be
   2530    needed to express 'num' digits of 'qty' bits each.
   2531  */
   2532 int    mp_value_radix_size(int num, int qty, int radix)
   2533 {
   2534   ARGCHK(num >= 0 && qty > 0 && radix >= 2 && radix <= MAX_RADIX, 0);
   2535 
   2536   return s_mp_outlen(num * qty, radix);
   2537 
   2538 } /* end mp_value_radix_size() */
   2539 
   2540 /* }}} */
   2541 
   2542 /* {{{ mp_toradix(mp, str, radix) */
   2543 
   2544 mp_err mp_toradix(mp_int *mp, unsigned char *str, int radix)
   2545 {
   2546   int  ix, pos = 0;
   2547 
   2548   ARGCHK(mp != NULL && str != NULL, MP_BADARG);
   2549   ARGCHK(radix > 1 && radix <= MAX_RADIX, MP_RANGE);
   2550 
   2551   if(mp_cmp_z(mp) == MP_EQ) {
   2552     str[0] = '0';
   2553     str[1] = '\0';
   2554   } else {
   2555     mp_err   res;
   2556     mp_int   tmp;
   2557     mp_sign  sgn;
   2558     mp_digit rem, rdx = (mp_digit)radix;
   2559     char     ch;
   2560 
   2561     if((res = mp_init_copy(&tmp, mp)) != MP_OKAY)
   2562       return res;
   2563 
   2564     /* Save sign for later, and take absolute value */
   2565     sgn = SIGN(&tmp); SIGN(&tmp) = MP_ZPOS;
   2566 
   2567     /* Generate output digits in reverse order      */
   2568     while(mp_cmp_z(&tmp) != 0) {
   2569       if((res = s_mp_div_d(&tmp, rdx, &rem)) != MP_OKAY) {
   2570 	mp_clear(&tmp);
   2571 	return res;
   2572       }
   2573 
   2574       /* Generate digits, use capital letters */
   2575       ch = s_mp_todigit(rem, radix, 0);
   2576 
   2577       str[pos++] = ch;
   2578     }
   2579 
   2580     /* Add - sign if original value was negative */
   2581     if(sgn == MP_NEG)
   2582       str[pos++] = '-';
   2583 
   2584     /* Add trailing NUL to end the string        */
   2585     str[pos--] = '\0';
   2586 
   2587     /* Reverse the digits and sign indicator     */
   2588     ix = 0;
   2589     while(ix < pos) {
   2590       char tmp = str[ix];
   2591 
   2592       str[ix] = str[pos];
   2593       str[pos] = tmp;
   2594       ++ix;
   2595       --pos;
   2596     }
   2597 
   2598     mp_clear(&tmp);
   2599   }
   2600 
   2601   return MP_OKAY;
   2602 
   2603 } /* end mp_toradix() */
   2604 
   2605 /* }}} */
   2606 
   2607 /* {{{ mp_char2value(ch, r) */
   2608 
   2609 int    mp_char2value(char ch, int r)
   2610 {
   2611   return s_mp_tovalue(ch, r);
   2612 
   2613 } /* end mp_tovalue() */
   2614 
   2615 /* }}} */
   2616 
   2617 /* }}} */
   2618 
   2619 /* {{{ mp_strerror(ec) */
   2620 
   2621 /*
   2622   mp_strerror(ec)
   2623 
   2624   Return a string describing the meaning of error code 'ec'.  The
   2625   string returned is allocated in static memory, so the caller should
   2626   not attempt to modify or free the memory associated with this
   2627   string.
   2628  */
   2629 const char  *mp_strerror(mp_err ec)
   2630 {
   2631   int   aec = (ec < 0) ? -ec : ec;
   2632 
   2633   /* Code values are negative, so the senses of these comparisons
   2634      are accurate */
   2635   if(ec < MP_LAST_CODE || ec > MP_OKAY) {
   2636     return mp_err_string[0];  /* unknown error code */
   2637   } else {
   2638     return mp_err_string[aec + 1];
   2639   }
   2640 
   2641 } /* end mp_strerror() */
   2642 
   2643 /* }}} */
   2644 
   2645 /*========================================================================*/
   2646 /*------------------------------------------------------------------------*/
   2647 /* Static function definitions (internal use only)                        */
   2648 
   2649 /* {{{ Memory management */
   2650 
   2651 /* {{{ s_mp_grow(mp, min) */
   2652 
   2653 /* Make sure there are at least 'min' digits allocated to mp              */
   2654 mp_err   s_mp_grow(mp_int *mp, mp_size min)
   2655 {
   2656   if(min > ALLOC(mp)) {
   2657     mp_digit   *tmp;
   2658 
   2659     /* Set min to next nearest default precision block size */
   2660     min = ((min + (s_mp_defprec - 1)) / s_mp_defprec) * s_mp_defprec;
   2661 
   2662     if((tmp = s_mp_alloc(min, sizeof(mp_digit))) == NULL)
   2663       return MP_MEM;
   2664 
   2665     s_mp_copy(DIGITS(mp), tmp, USED(mp));
   2666 
   2667 #if MP_CRYPTO
   2668     s_mp_setz(DIGITS(mp), ALLOC(mp));
   2669 #endif
   2670     s_mp_free(DIGITS(mp));
   2671     DIGITS(mp) = tmp;
   2672     ALLOC(mp) = min;
   2673   }
   2674 
   2675   return MP_OKAY;
   2676 
   2677 } /* end s_mp_grow() */
   2678 
   2679 /* }}} */
   2680 
   2681 /* {{{ s_mp_pad(mp, min) */
   2682 
   2683 /* Make sure the used size of mp is at least 'min', growing if needed     */
   2684 mp_err   s_mp_pad(mp_int *mp, mp_size min)
   2685 {
   2686   if(min > USED(mp)) {
   2687     mp_err  res;
   2688 
   2689     /* Make sure there is room to increase precision  */
   2690     if(min > ALLOC(mp) && (res = s_mp_grow(mp, min)) != MP_OKAY)
   2691       return res;
   2692 
   2693     /* Increase precision; should already be 0-filled */
   2694     USED(mp) = min;
   2695   }
   2696 
   2697   return MP_OKAY;
   2698 
   2699 } /* end s_mp_pad() */
   2700 
   2701 /* }}} */
   2702 
   2703 /* {{{ s_mp_setz(dp, count) */
   2704 
   2705 #if MP_MACRO == 0
   2706 /* Set 'count' digits pointed to by dp to be zeroes                       */
   2707 void s_mp_setz(mp_digit *dp, mp_size count)
   2708 {
   2709 #if MP_MEMSET == 0
   2710   int  ix;
   2711 
   2712   for(ix = 0; ix < count; ix++)
   2713     dp[ix] = 0;
   2714 #else
   2715   memset(dp, 0, count * sizeof(mp_digit));
   2716 #endif
   2717 
   2718 } /* end s_mp_setz() */
   2719 #endif
   2720 
   2721 /* }}} */
   2722 
   2723 /* {{{ s_mp_copy(sp, dp, count) */
   2724 
   2725 #if MP_MACRO == 0
   2726 /* Copy 'count' digits from sp to dp                                      */
   2727 void s_mp_copy(mp_digit *sp, mp_digit *dp, mp_size count)
   2728 {
   2729 #if MP_MEMCPY == 0
   2730   int  ix;
   2731 
   2732   for(ix = 0; ix < count; ix++)
   2733     dp[ix] = sp[ix];
   2734 #else
   2735   memcpy(dp, sp, count * sizeof(mp_digit));
   2736 #endif
   2737 
   2738 } /* end s_mp_copy() */
   2739 #endif
   2740 
   2741 /* }}} */
   2742 
   2743 /* {{{ s_mp_alloc(nb, ni) */
   2744 
   2745 #if MP_MACRO == 0
   2746 /* Allocate ni records of nb bytes each, and return a pointer to that     */
   2747 void    *s_mp_alloc(size_t nb, size_t ni)
   2748 {
   2749   return calloc(nb, ni);
   2750 
   2751 } /* end s_mp_alloc() */
   2752 #endif
   2753 
   2754 /* }}} */
   2755 
   2756 /* {{{ s_mp_free(ptr) */
   2757 
   2758 #if MP_MACRO == 0
   2759 /* Free the memory pointed to by ptr                                      */
   2760 void     s_mp_free(void *ptr)
   2761 {
   2762   if(ptr)
   2763     free(ptr);
   2764 
   2765 } /* end s_mp_free() */
   2766 #endif
   2767 
   2768 /* }}} */
   2769 
   2770 /* {{{ s_mp_clamp(mp) */
   2771 
   2772 /* Remove leading zeroes from the given value                             */
   2773 void     s_mp_clamp(mp_int *mp)
   2774 {
   2775   mp_size   du = USED(mp);
   2776   mp_digit *zp = DIGITS(mp) + du - 1;
   2777 
   2778   while(du > 1 && !*zp--)
   2779     --du;
   2780 
   2781   USED(mp) = du;
   2782 
   2783 } /* end s_mp_clamp() */
   2784 
   2785 
   2786 /* }}} */
   2787 
   2788 /* {{{ s_mp_exch(a, b) */
   2789 
   2790 /* Exchange the data for a and b; (b, a) = (a, b)                         */
   2791 void     s_mp_exch(mp_int *a, mp_int *b)
   2792 {
   2793   mp_int   tmp;
   2794 
   2795   tmp = *a;
   2796   *a = *b;
   2797   *b = tmp;
   2798 
   2799 } /* end s_mp_exch() */
   2800 
   2801 /* }}} */
   2802 
   2803 /* }}} */
   2804 
   2805 /* {{{ Arithmetic helpers */
   2806 
   2807 /* {{{ s_mp_lshd(mp, p) */
   2808 
   2809 /*
   2810    Shift mp leftward by p digits, growing if needed, and zero-filling
   2811    the in-shifted digits at the right end.  This is a convenient
   2812    alternative to multiplication by powers of the radix
   2813  */
   2814 
   2815 mp_err   s_mp_lshd(mp_int *mp, mp_size p)
   2816 {
   2817   mp_err   res;
   2818   mp_size  pos;
   2819   mp_digit *dp;
   2820   int     ix;
   2821 
   2822   if(p == 0)
   2823     return MP_OKAY;
   2824 
   2825   if((res = s_mp_pad(mp, USED(mp) + p)) != MP_OKAY)
   2826     return res;
   2827 
   2828   pos = USED(mp) - 1;
   2829   dp = DIGITS(mp);
   2830 
   2831   /* Shift all the significant figures over as needed */
   2832   for(ix = pos - p; ix >= 0; ix--)
   2833     dp[ix + p] = dp[ix];
   2834 
   2835   /* Fill the bottom digits with zeroes */
   2836   for(ix = 0; ix < p; ix++)
   2837     dp[ix] = 0;
   2838 
   2839   return MP_OKAY;
   2840 
   2841 } /* end s_mp_lshd() */
   2842 
   2843 /* }}} */
   2844 
   2845 /* {{{ s_mp_rshd(mp, p) */
   2846 
   2847 /*
   2848    Shift mp rightward by p digits.  Maintains the invariant that
   2849    digits above the precision are all zero.  Digits shifted off the
   2850    end are lost.  Cannot fail.
   2851  */
   2852 
   2853 void     s_mp_rshd(mp_int *mp, mp_size p)
   2854 {
   2855   mp_size  ix;
   2856   mp_digit *dp;
   2857 
   2858   if(p == 0)
   2859     return;
   2860 
   2861   /* Shortcut when all digits are to be shifted off */
   2862   if(p >= USED(mp)) {
   2863     s_mp_setz(DIGITS(mp), ALLOC(mp));
   2864     USED(mp) = 1;
   2865     SIGN(mp) = MP_ZPOS;
   2866     return;
   2867   }
   2868 
   2869   /* Shift all the significant figures over as needed */
   2870   dp = DIGITS(mp);
   2871   for(ix = p; ix < USED(mp); ix++)
   2872     dp[ix - p] = dp[ix];
   2873 
   2874   /* Fill the top digits with zeroes */
   2875   ix -= p;
   2876   while(ix < USED(mp))
   2877     dp[ix++] = 0;
   2878 
   2879   /* Strip off any leading zeroes    */
   2880   s_mp_clamp(mp);
   2881 
   2882 } /* end s_mp_rshd() */
   2883 
   2884 /* }}} */
   2885 
   2886 /* {{{ s_mp_div_2(mp) */
   2887 
   2888 /* Divide by two -- take advantage of radix properties to do it fast      */
   2889 void     s_mp_div_2(mp_int *mp)
   2890 {
   2891   s_mp_div_2d(mp, 1);
   2892 
   2893 } /* end s_mp_div_2() */
   2894 
   2895 /* }}} */
   2896 
   2897 /* {{{ s_mp_mul_2(mp) */
   2898 
   2899 mp_err s_mp_mul_2(mp_int *mp)
   2900 {
   2901   int      ix;
   2902   mp_digit kin = 0, kout, *dp = DIGITS(mp);
   2903   mp_err   res;
   2904 
   2905   /* Shift digits leftward by 1 bit */
   2906   for(ix = 0; ix < USED(mp); ix++) {
   2907     kout = (dp[ix] >> (DIGIT_BIT - 1)) & 1;
   2908     dp[ix] = (dp[ix] << 1) | kin;
   2909 
   2910     kin = kout;
   2911   }
   2912 
   2913   /* Deal with rollover from last digit */
   2914   if(kin) {
   2915     if(ix >= ALLOC(mp)) {
   2916       if((res = s_mp_grow(mp, ALLOC(mp) + 1)) != MP_OKAY)
   2917 	return res;
   2918       dp = DIGITS(mp);
   2919     }
   2920 
   2921     dp[ix] = kin;
   2922     USED(mp) += 1;
   2923   }
   2924 
   2925   return MP_OKAY;
   2926 
   2927 } /* end s_mp_mul_2() */
   2928 
   2929 /* }}} */
   2930 
   2931 /* {{{ s_mp_mod_2d(mp, d) */
   2932 
   2933 /*
   2934   Remainder the integer by 2^d, where d is a number of bits.  This
   2935   amounts to a bitwise AND of the value, and does not require the full
   2936   division code
   2937  */
   2938 void     s_mp_mod_2d(mp_int *mp, mp_digit d)
   2939 {
   2940   unsigned int  ndig = (d / DIGIT_BIT), nbit = (d % DIGIT_BIT);
   2941   unsigned int  ix;
   2942   mp_digit      dmask, *dp = DIGITS(mp);
   2943 
   2944   if(ndig >= USED(mp))
   2945     return;
   2946 
   2947   /* Flush all the bits above 2^d in its digit */
   2948   dmask = (1 << nbit) - 1;
   2949   dp[ndig] &= dmask;
   2950 
   2951   /* Flush all digits above the one with 2^d in it */
   2952   for(ix = ndig + 1; ix < USED(mp); ix++)
   2953     dp[ix] = 0;
   2954 
   2955   s_mp_clamp(mp);
   2956 
   2957 } /* end s_mp_mod_2d() */
   2958 
   2959 /* }}} */
   2960 
   2961 /* {{{ s_mp_mul_2d(mp, d) */
   2962 
   2963 /*
   2964   Multiply by the integer 2^d, where d is a number of bits.  This
   2965   amounts to a bitwise shift of the value, and does not require the
   2966   full multiplication code.
   2967  */
   2968 mp_err    s_mp_mul_2d(mp_int *mp, mp_digit d)
   2969 {
   2970   mp_err   res;
   2971   mp_digit save, next, mask, *dp;
   2972   mp_size  used;
   2973   int      ix;
   2974 
   2975   if((res = s_mp_lshd(mp, d / DIGIT_BIT)) != MP_OKAY)
   2976     return res;
   2977 
   2978   dp = DIGITS(mp); used = USED(mp);
   2979   d %= DIGIT_BIT;
   2980 
   2981   mask = (1 << d) - 1;
   2982 
   2983   /* If the shift requires another digit, make sure we've got one to
   2984      work with */
   2985   if((dp[used - 1] >> (DIGIT_BIT - d)) & mask) {
   2986     if((res = s_mp_grow(mp, used + 1)) != MP_OKAY)
   2987       return res;
   2988     dp = DIGITS(mp);
   2989   }
   2990 
   2991   /* Do the shifting... */
   2992   save = 0;
   2993   for(ix = 0; ix < used; ix++) {
   2994     next = (dp[ix] >> (DIGIT_BIT - d)) & mask;
   2995     dp[ix] = (dp[ix] << d) | save;
   2996     save = next;
   2997   }
   2998 
   2999   /* If, at this point, we have a nonzero carryout into the next
   3000      digit, we'll increase the size by one digit, and store it...
   3001    */
   3002   if(save) {
   3003     dp[used] = save;
   3004     USED(mp) += 1;
   3005   }
   3006 
   3007   s_mp_clamp(mp);
   3008   return MP_OKAY;
   3009 
   3010 } /* end s_mp_mul_2d() */
   3011 
   3012 /* }}} */
   3013 
   3014 /* {{{ s_mp_div_2d(mp, d) */
   3015 
   3016 /*
   3017   Divide the integer by 2^d, where d is a number of bits.  This
   3018   amounts to a bitwise shift of the value, and does not require the
   3019   full division code (used in Barrett reduction, see below)
   3020  */
   3021 void     s_mp_div_2d(mp_int *mp, mp_digit d)
   3022 {
   3023   int       ix;
   3024   mp_digit  save, next, mask, *dp = DIGITS(mp);
   3025 
   3026   s_mp_rshd(mp, d / DIGIT_BIT);
   3027   d %= DIGIT_BIT;
   3028 
   3029   mask = (1 << d) - 1;
   3030 
   3031   save = 0;
   3032   for(ix = USED(mp) - 1; ix >= 0; ix--) {
   3033     next = dp[ix] & mask;
   3034     dp[ix] = (dp[ix] >> d) | (save << (DIGIT_BIT - d));
   3035     save = next;
   3036   }
   3037 
   3038   s_mp_clamp(mp);
   3039 
   3040 } /* end s_mp_div_2d() */
   3041 
   3042 /* }}} */
   3043 
   3044 /* {{{ s_mp_norm(a, b) */
   3045 
   3046 /*
   3047   s_mp_norm(a, b)
   3048 
   3049   Normalize a and b for division, where b is the divisor.  In order
   3050   that we might make good guesses for quotient digits, we want the
   3051   leading digit of b to be at least half the radix, which we
   3052   accomplish by multiplying a and b by a constant.  This constant is
   3053   returned (so that it can be divided back out of the remainder at the
   3054   end of the division process).
   3055 
   3056   We multiply by the smallest power of 2 that gives us a leading digit
   3057   at least half the radix.  By choosing a power of 2, we simplify the
   3058   multiplication and division steps to simple shifts.
   3059  */
   3060 mp_digit s_mp_norm(mp_int *a, mp_int *b)
   3061 {
   3062   mp_digit  t, d = 0;
   3063 
   3064   t = DIGIT(b, USED(b) - 1);
   3065   while(t < (RADIX / 2)) {
   3066     t <<= 1;
   3067     ++d;
   3068   }
   3069 
   3070   if(d != 0) {
   3071     s_mp_mul_2d(a, d);
   3072     s_mp_mul_2d(b, d);
   3073   }
   3074 
   3075   return d;
   3076 
   3077 } /* end s_mp_norm() */
   3078 
   3079 /* }}} */
   3080 
   3081 /* }}} */
   3082 
   3083 /* {{{ Primitive digit arithmetic */
   3084 
   3085 /* {{{ s_mp_add_d(mp, d) */
   3086 
   3087 /* Add d to |mp| in place                                                 */
   3088 mp_err   s_mp_add_d(mp_int *mp, mp_digit d)    /* unsigned digit addition */
   3089 {
   3090   mp_word   w, k = 0;
   3091   mp_size   ix = 1, used = USED(mp);
   3092   mp_digit *dp = DIGITS(mp);
   3093 
   3094   w = dp[0] + d;
   3095   dp[0] = ACCUM(w);
   3096   k = CARRYOUT(w);
   3097 
   3098   while(ix < used && k) {
   3099     w = dp[ix] + k;
   3100     dp[ix] = ACCUM(w);
   3101     k = CARRYOUT(w);
   3102     ++ix;
   3103   }
   3104 
   3105   if(k != 0) {
   3106     mp_err  res;
   3107 
   3108     if((res = s_mp_pad(mp, USED(mp) + 1)) != MP_OKAY)
   3109       return res;
   3110 
   3111     DIGIT(mp, ix) = k;
   3112   }
   3113 
   3114   return MP_OKAY;
   3115 
   3116 } /* end s_mp_add_d() */
   3117 
   3118 /* }}} */
   3119 
   3120 /* {{{ s_mp_sub_d(mp, d) */
   3121 
   3122 /* Subtract d from |mp| in place, assumes |mp| > d                        */
   3123 mp_err   s_mp_sub_d(mp_int *mp, mp_digit d)    /* unsigned digit subtract */
   3124 {
   3125   mp_word   w, b = 0;
   3126   mp_size   ix = 1, used = USED(mp);
   3127   mp_digit *dp = DIGITS(mp);
   3128 
   3129   /* Compute initial subtraction    */
   3130   w = (RADIX + dp[0]) - d;
   3131   b = CARRYOUT(w) ? 0 : 1;
   3132   dp[0] = ACCUM(w);
   3133 
   3134   /* Propagate borrows leftward     */
   3135   while(b && ix < used) {
   3136     w = (RADIX + dp[ix]) - b;
   3137     b = CARRYOUT(w) ? 0 : 1;
   3138     dp[ix] = ACCUM(w);
   3139     ++ix;
   3140   }
   3141 
   3142   /* Remove leading zeroes          */
   3143   s_mp_clamp(mp);
   3144 
   3145   /* If we have a borrow out, it's a violation of the input invariant */
   3146   if(b)
   3147     return MP_RANGE;
   3148   else
   3149     return MP_OKAY;
   3150 
   3151 } /* end s_mp_sub_d() */
   3152 
   3153 /* }}} */
   3154 
   3155 /* {{{ s_mp_mul_d(a, d) */
   3156 
   3157 /* Compute a = a * d, single digit multiplication                         */
   3158 mp_err   s_mp_mul_d(mp_int *a, mp_digit d)
   3159 {
   3160   mp_word w, k = 0;
   3161   mp_size ix, max;
   3162   mp_err  res;
   3163   mp_digit *dp = DIGITS(a);
   3164 
   3165   /*
   3166     Single-digit multiplication will increase the precision of the
   3167     output by at most one digit.  However, we can detect when this
   3168     will happen -- if the high-order digit of a, times d, gives a
   3169     two-digit result, then the precision of the result will increase;
   3170     otherwise it won't.  We use this fact to avoid calling s_mp_pad()
   3171     unless absolutely necessary.
   3172    */
   3173   max = USED(a);
   3174   w = dp[max - 1] * d;
   3175   if(CARRYOUT(w) != 0) {
   3176     if((res = s_mp_pad(a, max + 1)) != MP_OKAY)
   3177       return res;
   3178     dp = DIGITS(a);
   3179   }
   3180 
   3181   for(ix = 0; ix < max; ix++) {
   3182     w = (dp[ix] * d) + k;
   3183     dp[ix] = ACCUM(w);
   3184     k = CARRYOUT(w);
   3185   }
   3186 
   3187   /* If there is a precision increase, take care of it here; the above
   3188      test guarantees we have enough storage to do this safely.
   3189    */
   3190   if(k) {
   3191     dp[max] = k;
   3192     USED(a) = max + 1;
   3193   }
   3194 
   3195   s_mp_clamp(a);
   3196 
   3197   return MP_OKAY;
   3198 
   3199 } /* end s_mp_mul_d() */
   3200 
   3201 /* }}} */
   3202 
   3203 /* {{{ s_mp_div_d(mp, d, r) */
   3204 
   3205 /*
   3206   s_mp_div_d(mp, d, r)
   3207 
   3208   Compute the quotient mp = mp / d and remainder r = mp mod d, for a
   3209   single digit d.  If r is null, the remainder will be discarded.
   3210  */
   3211 
   3212 mp_err   s_mp_div_d(mp_int *mp, mp_digit d, mp_digit *r)
   3213 {
   3214   mp_word   w = 0, t;
   3215   mp_int    quot;
   3216   mp_err    res;
   3217   mp_digit *dp = DIGITS(mp), *qp;
   3218   int       ix;
   3219 
   3220   if(d == 0)
   3221     return MP_RANGE;
   3222 
   3223   /* Make room for the quotient */
   3224   if((res = mp_init_size(&quot, USED(mp))) != MP_OKAY)
   3225     return res;
   3226 
   3227   USED(&quot) = USED(mp); /* so clamping will work below */
   3228   qp = DIGITS(&quot);
   3229 
   3230   /* Divide without subtraction */
   3231   for(ix = USED(mp) - 1; ix >= 0; ix--) {
   3232     w = (w << DIGIT_BIT) | dp[ix];
   3233 
   3234     if(w >= d) {
   3235       t = w / d;
   3236       w = w % d;
   3237     } else {
   3238       t = 0;
   3239     }
   3240 
   3241     qp[ix] = t;
   3242   }
   3243 
   3244   /* Deliver the remainder, if desired */
   3245   if(r)
   3246     *r = w;
   3247 
   3248   s_mp_clamp(&quot);
   3249   mp_exch(&quot, mp);
   3250   mp_clear(&quot);
   3251 
   3252   return MP_OKAY;
   3253 
   3254 } /* end s_mp_div_d() */
   3255 
   3256 /* }}} */
   3257 
   3258 /* }}} */
   3259 
   3260 /* {{{ Primitive full arithmetic */
   3261 
   3262 /* {{{ s_mp_add(a, b) */
   3263 
   3264 /* Compute a = |a| + |b|                                                  */
   3265 mp_err   s_mp_add(mp_int *a, mp_int *b)        /* magnitude addition      */
   3266 {
   3267   mp_word   w = 0;
   3268   mp_digit *pa, *pb;
   3269   mp_size   ix, used = USED(b);
   3270   mp_err    res;
   3271 
   3272   /* Make sure a has enough precision for the output value */
   3273   if((used > USED(a)) && (res = s_mp_pad(a, used)) != MP_OKAY)
   3274     return res;
   3275 
   3276   /*
   3277     Add up all digits up to the precision of b.  If b had initially
   3278     the same precision as a, or greater, we took care of it by the
   3279     padding step above, so there is no problem.  If b had initially
   3280     less precision, we'll have to make sure the carry out is duly
   3281     propagated upward among the higher-order digits of the sum.
   3282    */
   3283   pa = DIGITS(a);
   3284   pb = DIGITS(b);
   3285   for(ix = 0; ix < used; ++ix) {
   3286     w += *pa + *pb++;
   3287     *pa++ = ACCUM(w);
   3288     w = CARRYOUT(w);
   3289   }
   3290 
   3291   /* If we run out of 'b' digits before we're actually done, make
   3292      sure the carries get propagated upward...
   3293    */
   3294   used = USED(a);
   3295   while(w && ix < used) {
   3296     w += *pa;
   3297     *pa++ = ACCUM(w);
   3298     w = CARRYOUT(w);
   3299     ++ix;
   3300   }
   3301 
   3302   /* If there's an overall carry out, increase precision and include
   3303      it.  We could have done this initially, but why touch the memory
   3304      allocator unless we're sure we have to?
   3305    */
   3306   if(w) {
   3307     if((res = s_mp_pad(a, used + 1)) != MP_OKAY)
   3308       return res;
   3309 
   3310     DIGIT(a, ix) = w;  /* pa may not be valid after s_mp_pad() call */
   3311   }
   3312 
   3313   return MP_OKAY;
   3314 
   3315 } /* end s_mp_add() */
   3316 
   3317 /* }}} */
   3318 
   3319 /* {{{ s_mp_sub(a, b) */
   3320 
   3321 /* Compute a = |a| - |b|, assumes |a| >= |b|                              */
   3322 mp_err   s_mp_sub(mp_int *a, mp_int *b)        /* magnitude subtract      */
   3323 {
   3324   mp_word   w = 0;
   3325   mp_digit *pa, *pb;
   3326   mp_size   ix, used = USED(b);
   3327 
   3328   /*
   3329     Subtract and propagate borrow.  Up to the precision of b, this
   3330     accounts for the digits of b; after that, we just make sure the
   3331     carries get to the right place.  This saves having to pad b out to
   3332     the precision of a just to make the loops work right...
   3333    */
   3334   pa = DIGITS(a);
   3335   pb = DIGITS(b);
   3336 
   3337   for(ix = 0; ix < used; ++ix) {
   3338     w = (RADIX + *pa) - w - *pb++;
   3339     *pa++ = ACCUM(w);
   3340     w = CARRYOUT(w) ? 0 : 1;
   3341   }
   3342 
   3343   used = USED(a);
   3344   while(ix < used) {
   3345     w = RADIX + *pa - w;
   3346     *pa++ = ACCUM(w);
   3347     w = CARRYOUT(w) ? 0 : 1;
   3348     ++ix;
   3349   }
   3350 
   3351   /* Clobber any leading zeroes we created    */
   3352   s_mp_clamp(a);
   3353 
   3354   /*
   3355      If there was a borrow out, then |b| > |a| in violation
   3356      of our input invariant.  We've already done the work,
   3357      but we'll at least complain about it...
   3358    */
   3359   if(w)
   3360     return MP_RANGE;
   3361   else
   3362     return MP_OKAY;
   3363 
   3364 } /* end s_mp_sub() */
   3365 
   3366 /* }}} */
   3367 
   3368 mp_err   s_mp_reduce(mp_int *x, mp_int *m, mp_int *mu)
   3369 {
   3370   mp_int   q;
   3371   mp_err   res;
   3372   mp_size  um = USED(m);
   3373 
   3374   if((res = mp_init_copy(&q, x)) != MP_OKAY)
   3375     return res;
   3376 
   3377   s_mp_rshd(&q, um - 1);       /* q1 = x / b^(k-1)  */
   3378   s_mp_mul(&q, mu);            /* q2 = q1 * mu      */
   3379   s_mp_rshd(&q, um + 1);       /* q3 = q2 / b^(k+1) */
   3380 
   3381   /* x = x mod b^(k+1), quick (no division) */
   3382   s_mp_mod_2d(x, (mp_digit)(DIGIT_BIT * (um + 1)));
   3383 
   3384   /* q = q * m mod b^(k+1), quick (no division), uses the short multiplier */
   3385 #ifndef SHRT_MUL
   3386   s_mp_mul(&q, m);
   3387   s_mp_mod_2d(&q, (mp_digit)(DIGIT_BIT * (um + 1)));
   3388 #else
   3389   s_mp_mul_dig(&q, m, um + 1);
   3390 #endif
   3391 
   3392   /* x = x - q */
   3393   if((res = mp_sub(x, &q, x)) != MP_OKAY)
   3394     goto CLEANUP;
   3395 
   3396   /* If x < 0, add b^(k+1) to it */
   3397   if(mp_cmp_z(x) < 0) {
   3398     mp_set(&q, 1);
   3399     if((res = s_mp_lshd(&q, um + 1)) != MP_OKAY)
   3400       goto CLEANUP;
   3401     if((res = mp_add(x, &q, x)) != MP_OKAY)
   3402       goto CLEANUP;
   3403   }
   3404 
   3405   /* Back off if it's too big */
   3406   while(mp_cmp(x, m) >= 0) {
   3407     if((res = s_mp_sub(x, m)) != MP_OKAY)
   3408       break;
   3409   }
   3410 
   3411  CLEANUP:
   3412   mp_clear(&q);
   3413 
   3414   return res;
   3415 
   3416 } /* end s_mp_reduce() */
   3417 
   3418 
   3419 
   3420 /* {{{ s_mp_mul(a, b) */
   3421 
   3422 /* Compute a = |a| * |b|                                                  */
   3423 mp_err   s_mp_mul(mp_int *a, mp_int *b)
   3424 {
   3425   mp_word   w, k = 0;
   3426   mp_int    tmp;
   3427   mp_err    res;
   3428   mp_size   ix, jx, ua = USED(a), ub = USED(b);
   3429   mp_digit *pa, *pb, *pt, *pbt;
   3430 
   3431   if((res = mp_init_size(&tmp, ua + ub)) != MP_OKAY)
   3432     return res;
   3433 
   3434   /* This has the effect of left-padding with zeroes... */
   3435   USED(&tmp) = ua + ub;
   3436 
   3437   /* We're going to need the base value each iteration */
   3438   pbt = DIGITS(&tmp);
   3439 
   3440   /* Outer loop:  Digits of b */
   3441 
   3442   pb = DIGITS(b);
   3443   for(ix = 0; ix < ub; ++ix, ++pb) {
   3444     if(*pb == 0)
   3445       continue;
   3446 
   3447     /* Inner product:  Digits of a */
   3448     pa = DIGITS(a);
   3449     for(jx = 0; jx < ua; ++jx, ++pa) {
   3450       pt = pbt + ix + jx;
   3451       w = *pb * *pa + k + *pt;
   3452       *pt = ACCUM(w);
   3453       k = CARRYOUT(w);
   3454     }
   3455 
   3456     pbt[ix + jx] = k;
   3457     k = 0;
   3458   }
   3459 
   3460   s_mp_clamp(&tmp);
   3461   s_mp_exch(&tmp, a);
   3462 
   3463   mp_clear(&tmp);
   3464 
   3465   return MP_OKAY;
   3466 
   3467 } /* end s_mp_mul() */
   3468 
   3469 /* }}} */
   3470 
   3471 /* {{{ s_mp_kmul(a, b, out, len) */
   3472 
   3473 #if 0
   3474 void   s_mp_kmul(mp_digit *a, mp_digit *b, mp_digit *out, mp_size len)
   3475 {
   3476   mp_word   w, k = 0;
   3477   mp_size   ix, jx;
   3478   mp_digit *pa, *pt;
   3479 
   3480   for(ix = 0; ix < len; ++ix, ++b) {
   3481     if(*b == 0)
   3482       continue;
   3483 
   3484     pa = a;
   3485     for(jx = 0; jx < len; ++jx, ++pa) {
   3486       pt = out + ix + jx;
   3487       w = *b * *pa + k + *pt;
   3488       *pt = ACCUM(w);
   3489       k = CARRYOUT(w);
   3490     }
   3491 
   3492     out[ix + jx] = k;
   3493     k = 0;
   3494   }
   3495 
   3496 } /* end s_mp_kmul() */
   3497 #endif
   3498 
   3499 /* }}} */
   3500 
   3501 /* {{{ s_mp_sqr(a) */
   3502 
   3503 /*
   3504   Computes the square of a, in place.  This can be done more
   3505   efficiently than a general multiplication, because many of the
   3506   computation steps are redundant when squaring.  The inner product
   3507   step is a bit more complicated, but we save a fair number of
   3508   iterations of the multiplication loop.
   3509  */
   3510 #if MP_SQUARE
   3511 mp_err   s_mp_sqr(mp_int *a)
   3512 {
   3513   mp_word  w, k = 0;
   3514   mp_int   tmp;
   3515   mp_err   res;
   3516   mp_size  ix, jx, kx, used = USED(a);
   3517   mp_digit *pa1, *pa2, *pt, *pbt;
   3518 
   3519   if((res = mp_init_size(&tmp, 2 * used)) != MP_OKAY)
   3520     return res;
   3521 
   3522   /* Left-pad with zeroes */
   3523   USED(&tmp) = 2 * used;
   3524 
   3525   /* We need the base value each time through the loop */
   3526   pbt = DIGITS(&tmp);
   3527 
   3528   pa1 = DIGITS(a);
   3529   for(ix = 0; ix < used; ++ix, ++pa1) {
   3530     if(*pa1 == 0)
   3531       continue;
   3532 
   3533     w = DIGIT(&tmp, ix + ix) + (*pa1 * *pa1);
   3534 
   3535     pbt[ix + ix] = ACCUM(w);
   3536     k = CARRYOUT(w);
   3537 
   3538     /*
   3539       The inner product is computed as:
   3540 
   3541          (C, S) = t[i,j] + 2 a[i] a[j] + C
   3542 
   3543       This can overflow what can be represented in an mp_word, and
   3544       since C arithmetic does not provide any way to check for
   3545       overflow, we have to check explicitly for overflow conditions
   3546       before they happen.
   3547      */
   3548     for(jx = ix + 1, pa2 = DIGITS(a) + jx; jx < used; ++jx, ++pa2) {
   3549       mp_word  u = 0, v;
   3550 
   3551       /* Store this in a temporary to avoid indirections later */
   3552       pt = pbt + ix + jx;
   3553 
   3554       /* Compute the multiplicative step */
   3555       w = *pa1 * *pa2;
   3556 
   3557       /* If w is more than half MP_WORD_MAX, the doubling will
   3558 	 overflow, and we need to record a carry out into the next
   3559 	 word */
   3560       u = (w >> (MP_WORD_BIT - 1)) & 1;
   3561 
   3562       /* Double what we've got, overflow will be ignored as defined
   3563 	 for C arithmetic (we've already noted if it is to occur)
   3564        */
   3565       w *= 2;
   3566 
   3567       /* Compute the additive step */
   3568       v = *pt + k;
   3569 
   3570       /* If we do not already have an overflow carry, check to see
   3571 	 if the addition will cause one, and set the carry out if so
   3572        */
   3573       u |= ((MP_WORD_MAX - v) < w);
   3574 
   3575       /* Add in the rest, again ignoring overflow */
   3576       w += v;
   3577 
   3578       /* Set the i,j digit of the output */
   3579       *pt = ACCUM(w);
   3580 
   3581       /* Save carry information for the next iteration of the loop.
   3582 	 This is why k must be an mp_word, instead of an mp_digit */
   3583       k = CARRYOUT(w) | (u << DIGIT_BIT);
   3584 
   3585     } /* for(jx ...) */
   3586 
   3587     /* Set the last digit in the cycle and reset the carry */
   3588     k = DIGIT(&tmp, ix + jx) + k;
   3589     pbt[ix + jx] = ACCUM(k);
   3590     k = CARRYOUT(k);
   3591 
   3592     /* If we are carrying out, propagate the carry to the next digit
   3593        in the output.  This may cascade, so we have to be somewhat
   3594        circumspect -- but we will have enough precision in the output
   3595        that we won't overflow
   3596      */
   3597     kx = 1;
   3598     while(k) {
   3599       k = pbt[ix + jx + kx] + 1;
   3600       pbt[ix + jx + kx] = ACCUM(k);
   3601       k = CARRYOUT(k);
   3602       ++kx;
   3603     }
   3604   } /* for(ix ...) */
   3605 
   3606   s_mp_clamp(&tmp);
   3607   s_mp_exch(&tmp, a);
   3608 
   3609   mp_clear(&tmp);
   3610 
   3611   return MP_OKAY;
   3612 
   3613 } /* end s_mp_sqr() */
   3614 #endif
   3615 
   3616 /* }}} */
   3617 
   3618 /* {{{ s_mp_div(a, b) */
   3619 
   3620 /*
   3621   s_mp_div(a, b)
   3622 
   3623   Compute a = a / b and b = a mod b.  Assumes b > a.
   3624  */
   3625 
   3626 mp_err   s_mp_div(mp_int *a, mp_int *b)
   3627 {
   3628   mp_int   quot, rem, t;
   3629   mp_word  q;
   3630   mp_err   res;
   3631   mp_digit d;
   3632   int      ix;
   3633 
   3634   if(mp_cmp_z(b) == 0)
   3635     return MP_RANGE;
   3636 
   3637   /* Shortcut if b is power of two */
   3638   if((ix = s_mp_ispow2(b)) >= 0) {
   3639     mp_copy(a, b);  /* need this for remainder */
   3640     s_mp_div_2d(a, (mp_digit)ix);
   3641     s_mp_mod_2d(b, (mp_digit)ix);
   3642 
   3643     return MP_OKAY;
   3644   }
   3645 
   3646   /* Allocate space to store the quotient */
   3647   if((res = mp_init_size(&quot, USED(a))) != MP_OKAY)
   3648     return res;
   3649 
   3650   /* A working temporary for division     */
   3651   if((res = mp_init_size(&t, USED(a))) != MP_OKAY)
   3652     goto T;
   3653 
   3654   /* Allocate space for the remainder     */
   3655   if((res = mp_init_size(&rem, USED(a))) != MP_OKAY)
   3656     goto REM;
   3657 
   3658   /* Normalize to optimize guessing       */
   3659   d = s_mp_norm(a, b);
   3660 
   3661   /* Perform the division itself...woo!   */
   3662   ix = USED(a) - 1;
   3663 
   3664   while(ix >= 0) {
   3665     /* Find a partial substring of a which is at least b */
   3666     while(s_mp_cmp(&rem, b) < 0 && ix >= 0) {
   3667       if((res = s_mp_lshd(&rem, 1)) != MP_OKAY)
   3668 	goto CLEANUP;
   3669 
   3670       if((res = s_mp_lshd(&quot, 1)) != MP_OKAY)
   3671 	goto CLEANUP;
   3672 
   3673       DIGIT(&rem, 0) = DIGIT(a, ix);
   3674       s_mp_clamp(&rem);
   3675       --ix;
   3676     }
   3677 
   3678     /* If we didn't find one, we're finished dividing    */
   3679     if(s_mp_cmp(&rem, b) < 0)
   3680       break;
   3681 
   3682     /* Compute a guess for the next quotient digit       */
   3683     q = DIGIT(&rem, USED(&rem) - 1);
   3684     if(q <= DIGIT(b, USED(b) - 1) && USED(&rem) > 1)
   3685       q = (q << DIGIT_BIT) | DIGIT(&rem, USED(&rem) - 2);
   3686 
   3687     q /= DIGIT(b, USED(b) - 1);
   3688 
   3689     /* The guess can be as much as RADIX + 1 */
   3690     if(q >= RADIX)
   3691       q = RADIX - 1;
   3692 
   3693     /* See what that multiplies out to                   */
   3694     mp_copy(b, &t);
   3695     if((res = s_mp_mul_d(&t, q)) != MP_OKAY)
   3696       goto CLEANUP;
   3697 
   3698     /*
   3699        If it's too big, back it off.  We should not have to do this
   3700        more than once, or, in rare cases, twice.  Knuth describes a
   3701        method by which this could be reduced to a maximum of once, but
   3702        I didn't implement that here.
   3703      */
   3704     while(s_mp_cmp(&t, &rem) > 0) {
   3705       --q;
   3706       s_mp_sub(&t, b);
   3707     }
   3708 
   3709     /* At this point, q should be the right next digit   */
   3710     if((res = s_mp_sub(&rem, &t)) != MP_OKAY)
   3711       goto CLEANUP;
   3712 
   3713     /*
   3714       Include the digit in the quotient.  We allocated enough memory
   3715       for any quotient we could ever possibly get, so we should not
   3716       have to check for failures here
   3717      */
   3718     DIGIT(&quot, 0) = q;
   3719   }
   3720 
   3721   /* Denormalize remainder                */
   3722   if(d != 0)
   3723     s_mp_div_2d(&rem, d);
   3724 
   3725   s_mp_clamp(&quot);
   3726   s_mp_clamp(&rem);
   3727 
   3728   /* Copy quotient back to output         */
   3729   s_mp_exch(&quot, a);
   3730 
   3731   /* Copy remainder back to output        */
   3732   s_mp_exch(&rem, b);
   3733 
   3734 CLEANUP:
   3735   mp_clear(&rem);
   3736 REM:
   3737   mp_clear(&t);
   3738 T:
   3739   mp_clear(&quot);
   3740 
   3741   return res;
   3742 
   3743 } /* end s_mp_div() */
   3744 
   3745 /* }}} */
   3746 
   3747 /* {{{ s_mp_2expt(a, k) */
   3748 
   3749 mp_err   s_mp_2expt(mp_int *a, mp_digit k)
   3750 {
   3751   mp_err    res;
   3752   mp_size   dig, bit;
   3753 
   3754   dig = k / DIGIT_BIT;
   3755   bit = k % DIGIT_BIT;
   3756 
   3757   mp_zero(a);
   3758   if((res = s_mp_pad(a, dig + 1)) != MP_OKAY)
   3759     return res;
   3760 
   3761   DIGIT(a, dig) |= (1 << bit);
   3762 
   3763   return MP_OKAY;
   3764 
   3765 } /* end s_mp_2expt() */
   3766 
   3767 /* }}} */
   3768 
   3769 
   3770 /* }}} */
   3771 
   3772 /* }}} */
   3773 
   3774 /* {{{ Primitive comparisons */
   3775 
   3776 /* {{{ s_mp_cmp(a, b) */
   3777 
   3778 /* Compare |a| <=> |b|, return 0 if equal, <0 if a<b, >0 if a>b           */
   3779 int      s_mp_cmp(mp_int *a, mp_int *b)
   3780 {
   3781   mp_size   ua = USED(a), ub = USED(b);
   3782 
   3783   if(ua > ub)
   3784     return MP_GT;
   3785   else if(ua < ub)
   3786     return MP_LT;
   3787   else {
   3788     int      ix = ua - 1;
   3789     mp_digit *ap = DIGITS(a) + ix, *bp = DIGITS(b) + ix;
   3790 
   3791     while(ix >= 0) {
   3792       if(*ap > *bp)
   3793 	return MP_GT;
   3794       else if(*ap < *bp)
   3795 	return MP_LT;
   3796 
   3797       --ap; --bp; --ix;
   3798     }
   3799 
   3800     return MP_EQ;
   3801   }
   3802 
   3803 } /* end s_mp_cmp() */
   3804 
   3805 /* }}} */
   3806 
   3807 /* {{{ s_mp_cmp_d(a, d) */
   3808 
   3809 /* Compare |a| <=> d, return 0 if equal, <0 if a<d, >0 if a>d             */
   3810 int      s_mp_cmp_d(mp_int *a, mp_digit d)
   3811 {
   3812   mp_size  ua = USED(a);
   3813   mp_digit *ap = DIGITS(a);
   3814 
   3815   if(ua > 1)
   3816     return MP_GT;
   3817 
   3818   if(*ap < d)
   3819     return MP_LT;
   3820   else if(*ap > d)
   3821     return MP_GT;
   3822   else
   3823     return MP_EQ;
   3824 
   3825 } /* end s_mp_cmp_d() */
   3826 
   3827 /* }}} */
   3828 
   3829 /* {{{ s_mp_ispow2(v) */
   3830 
   3831 /*
   3832   Returns -1 if the value is not a power of two; otherwise, it returns
   3833   k such that v = 2^k, i.e. lg(v).
   3834  */
   3835 int      s_mp_ispow2(mp_int *v)
   3836 {
   3837   mp_digit d, *dp;
   3838   mp_size  uv = USED(v);
   3839   int      extra = 0, ix;
   3840 
   3841   d = DIGIT(v, uv - 1); /* most significant digit of v */
   3842 
   3843   while(d && ((d & 1) == 0)) {
   3844     d >>= 1;
   3845     ++extra;
   3846   }
   3847 
   3848   if(d == 1) {
   3849     ix = uv - 2;
   3850     dp = DIGITS(v) + ix;
   3851 
   3852     while(ix >= 0) {
   3853       if(*dp)
   3854 	return -1; /* not a power of two */
   3855 
   3856       --dp; --ix;
   3857     }
   3858 
   3859     return ((uv - 1) * DIGIT_BIT) + extra;
   3860   }
   3861 
   3862   return -1;
   3863 
   3864 } /* end s_mp_ispow2() */
   3865 
   3866 /* }}} */
   3867 
   3868 /* {{{ s_mp_ispow2d(d) */
   3869 
   3870 int      s_mp_ispow2d(mp_digit d)
   3871 {
   3872   int   pow = 0;
   3873 
   3874   while((d & 1) == 0) {
   3875     ++pow; d >>= 1;
   3876   }
   3877 
   3878   if(d == 1)
   3879     return pow;
   3880 
   3881   return -1;
   3882 
   3883 } /* end s_mp_ispow2d() */
   3884 
   3885 /* }}} */
   3886 
   3887 /* }}} */
   3888 
   3889 /* {{{ Primitive I/O helpers */
   3890 
   3891 /* {{{ s_mp_tovalue(ch, r) */
   3892 
   3893 /*
   3894   Convert the given character to its digit value, in the given radix.
   3895   If the given character is not understood in the given radix, -1 is
   3896   returned.  Otherwise the digit's numeric value is returned.
   3897 
   3898   The results will be odd if you use a radix < 2 or > 62, you are
   3899   expected to know what you're up to.
   3900  */
   3901 int      s_mp_tovalue(char ch, int r)
   3902 {
   3903   int    val, xch;
   3904 
   3905   if(r > 36)
   3906     xch = ch;
   3907   else
   3908     xch = toupper(ch);
   3909 
   3910   if(isdigit(xch))
   3911     val = xch - '0';
   3912   else if(isupper(xch))
   3913     val = xch - 'A' + 10;
   3914   else if(islower(xch))
   3915     val = xch - 'a' + 36;
   3916   else if(xch == '+')
   3917     val = 62;
   3918   else if(xch == '/')
   3919     val = 63;
   3920   else
   3921     return -1;
   3922 
   3923   if(val < 0 || val >= r)
   3924     return -1;
   3925 
   3926   return val;
   3927 
   3928 } /* end s_mp_tovalue() */
   3929 
   3930 /* }}} */
   3931 
   3932 /* {{{ s_mp_todigit(val, r, low) */
   3933 
   3934 /*
   3935   Convert val to a radix-r digit, if possible.  If val is out of range
   3936   for r, returns zero.  Otherwise, returns an ASCII character denoting
   3937   the value in the given radix.
   3938 
   3939   The results may be odd if you use a radix < 2 or > 64, you are
   3940   expected to know what you're doing.
   3941  */
   3942 
   3943 char     s_mp_todigit(int val, int r, int low)
   3944 {
   3945   char   ch;
   3946 
   3947   if(val < 0 || val >= r)
   3948     return 0;
   3949 
   3950   ch = s_dmap_1[val];
   3951 
   3952   if(r <= 36 && low)
   3953     ch = tolower(ch);
   3954 
   3955   return ch;
   3956 
   3957 } /* end s_mp_todigit() */
   3958 
   3959 /* }}} */
   3960 
   3961 /* {{{ s_mp_outlen(bits, radix) */
   3962 
   3963 /*
   3964    Return an estimate for how long a string is needed to hold a radix
   3965    r representation of a number with 'bits' significant bits.
   3966 
   3967    Does not include space for a sign or a NUL terminator.
   3968  */
   3969 int      s_mp_outlen(int bits, int r)
   3970 {
   3971   return (int)((double)bits * LOG_V_2(r));
   3972 
   3973 } /* end s_mp_outlen() */
   3974 
   3975 /* }}} */
   3976 
   3977 /* }}} */
   3978 
   3979 /*------------------------------------------------------------------------*/
   3980 /* HERE THERE BE DRAGONS                                                  */
   3981 /* crc==4242132123, version==2, Sat Feb 02 06:43:52 2002 */
   3982 
   3983 /* $Source: /cvs/libtom/libtommath/mtest/mpi.c,v $ */
   3984 /* $Revision: 1.2 $ */
   3985 /* $Date: 2005/05/05 14:38:47 $ */
   3986