Home | History | Annotate | Download | only in lib
      1 /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is distributed under the University of Illinois Open Source
      6  * License. See LICENSE.TXT for details.
      7  *
      8  * ===----------------------------------------------------------------------===
      9  *
     10  * This file implements __udivmoddi4 for the compiler_rt library.
     11  *
     12  * ===----------------------------------------------------------------------===
     13  */
     14 
     15 #include "int_lib.h"
     16 
     17 /* Effects: if rem != 0, *rem = a % b
     18  * Returns: a / b
     19  */
     20 
     21 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
     22 
     23 du_int
     24 __udivmoddi4(du_int a, du_int b, du_int* rem)
     25 {
     26     const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
     27     const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
     28     udwords n;
     29     n.all = a;
     30     udwords d;
     31     d.all = b;
     32     udwords q;
     33     udwords r;
     34     unsigned sr;
     35     /* special cases, X is unknown, K != 0 */
     36     if (n.s.high == 0)
     37     {
     38         if (d.s.high == 0)
     39         {
     40             /* 0 X
     41              * ---
     42              * 0 X
     43              */
     44             if (rem)
     45                 *rem = n.s.low % d.s.low;
     46             return n.s.low / d.s.low;
     47         }
     48         /* 0 X
     49          * ---
     50          * K X
     51          */
     52         if (rem)
     53             *rem = n.s.low;
     54         return 0;
     55     }
     56     /* n.s.high != 0 */
     57     if (d.s.low == 0)
     58     {
     59         if (d.s.high == 0)
     60         {
     61             /* K X
     62              * ---
     63              * 0 0
     64              */
     65             if (rem)
     66                 *rem = n.s.high % d.s.low;
     67             return n.s.high / d.s.low;
     68         }
     69         /* d.s.high != 0 */
     70         if (n.s.low == 0)
     71         {
     72             /* K 0
     73              * ---
     74              * K 0
     75              */
     76             if (rem)
     77             {
     78                 r.s.high = n.s.high % d.s.high;
     79                 r.s.low = 0;
     80                 *rem = r.all;
     81             }
     82             return n.s.high / d.s.high;
     83         }
     84         /* K K
     85          * ---
     86          * K 0
     87          */
     88         if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
     89         {
     90             if (rem)
     91             {
     92                 r.s.low = n.s.low;
     93                 r.s.high = n.s.high & (d.s.high - 1);
     94                 *rem = r.all;
     95             }
     96             return n.s.high >> __builtin_ctz(d.s.high);
     97         }
     98         /* K K
     99          * ---
    100          * K 0
    101          */
    102         sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
    103         /* 0 <= sr <= n_uword_bits - 2 or sr large */
    104         if (sr > n_uword_bits - 2)
    105         {
    106            if (rem)
    107                 *rem = n.all;
    108             return 0;
    109         }
    110         ++sr;
    111         /* 1 <= sr <= n_uword_bits - 1 */
    112         /* q.all = n.all << (n_udword_bits - sr); */
    113         q.s.low = 0;
    114         q.s.high = n.s.low << (n_uword_bits - sr);
    115         /* r.all = n.all >> sr; */
    116         r.s.high = n.s.high >> sr;
    117         r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    118     }
    119     else  /* d.s.low != 0 */
    120     {
    121         if (d.s.high == 0)
    122         {
    123             /* K X
    124              * ---
    125              * 0 K
    126              */
    127             if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
    128             {
    129                 if (rem)
    130                     *rem = n.s.low & (d.s.low - 1);
    131                 if (d.s.low == 1)
    132                     return n.all;
    133                 unsigned sr = __builtin_ctz(d.s.low);
    134                 q.s.high = n.s.high >> sr;
    135                 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    136                 return q.all;
    137             }
    138             /* K X
    139              * ---
    140              *0 K
    141              */
    142             sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
    143             /* 2 <= sr <= n_udword_bits - 1
    144              * q.all = n.all << (n_udword_bits - sr);
    145              * r.all = n.all >> sr;
    146              * if (sr == n_uword_bits)
    147              * {
    148              *     q.s.low = 0;
    149              *     q.s.high = n.s.low;
    150              *     r.s.high = 0;
    151              *     r.s.low = n.s.high;
    152              * }
    153              * else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
    154              * {
    155              *     q.s.low = 0;
    156              *     q.s.high = n.s.low << (n_uword_bits - sr);
    157              *     r.s.high = n.s.high >> sr;
    158              *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    159              * }
    160              * else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
    161              * {
    162              *     q.s.low = n.s.low << (n_udword_bits - sr);
    163              *     q.s.high = (n.s.high << (n_udword_bits - sr)) |
    164              *              (n.s.low >> (sr - n_uword_bits));
    165              *     r.s.high = 0;
    166              *     r.s.low = n.s.high >> (sr - n_uword_bits);
    167              * }
    168              */
    169             q.s.low =  (n.s.low << (n_udword_bits - sr)) &
    170                      ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1));
    171             q.s.high = ((n.s.low << ( n_uword_bits - sr))                       &
    172                      ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) |
    173                      (((n.s.high << (n_udword_bits - sr))                     |
    174                      (n.s.low >> (sr - n_uword_bits)))                        &
    175                      ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)));
    176             r.s.high = (n.s.high >> sr) &
    177                      ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
    178             r.s.low =  ((n.s.high >> (sr - n_uword_bits))                       &
    179                      ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) |
    180                      (((n.s.high << (n_uword_bits - sr))                      |
    181                      (n.s.low >> sr))                                         &
    182                      ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
    183         }
    184         else
    185         {
    186             /* K X
    187              * ---
    188              * K K
    189              */
    190             sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
    191             /* 0 <= sr <= n_uword_bits - 1 or sr large */
    192             if (sr > n_uword_bits - 1)
    193             {
    194                if (rem)
    195                     *rem = n.all;
    196                 return 0;
    197             }
    198             ++sr;
    199             /* 1 <= sr <= n_uword_bits */
    200             /*  q.all = n.all << (n_udword_bits - sr); */
    201             q.s.low = 0;
    202             q.s.high = n.s.low << (n_uword_bits - sr);
    203             /* r.all = n.all >> sr;
    204              * if (sr < n_uword_bits)
    205              * {
    206              *     r.s.high = n.s.high >> sr;
    207              *     r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    208              * }
    209              * else
    210              * {
    211              *     r.s.high = 0;
    212              *     r.s.low = n.s.high;
    213              * }
    214              */
    215             r.s.high = (n.s.high >> sr) &
    216                      ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1));
    217             r.s.low = (n.s.high << (n_uword_bits - sr)) |
    218                     ((n.s.low >> sr)                  &
    219                     ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)));
    220         }
    221     }
    222     /* Not a special case
    223      * q and r are initialized with:
    224      * q.all = n.all << (n_udword_bits - sr);
    225      * r.all = n.all >> sr;
    226      * 1 <= sr <= n_udword_bits - 1
    227      */
    228     su_int carry = 0;
    229     for (; sr > 0; --sr)
    230     {
    231         /* r:q = ((r:q)  << 1) | carry */
    232         r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
    233         r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
    234         q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
    235         q.s.low  = (q.s.low  << 1) | carry;
    236         /* carry = 0;
    237          * if (r.all >= d.all)
    238          * {
    239          *      r.all -= d.all;
    240          *      carry = 1;
    241          * }
    242          */
    243         const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
    244         carry = s & 1;
    245         r.all -= d.all & s;
    246     }
    247     q.all = (q.all << 1) | carry;
    248     if (rem)
    249         *rem = r.all;
    250     return q.all;
    251 }
    252