Home | History | Annotate | Download | only in builtins
      1 /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------===
      2  *
      3  *                     The LLVM Compiler Infrastructure
      4  *
      5  * This file is dual licensed under the MIT and the University of Illinois Open
      6  * Source Licenses. See LICENSE.TXT for details.
      7  *
      8  * ===----------------------------------------------------------------------===
      9  *
     10  * This file implements __udivmoddi4 for the compiler_rt library.
     11  *
     12  * ===----------------------------------------------------------------------===
     13  */
     14 
     15 #include "int_lib.h"
     16 
     17 /* Effects: if rem != 0, *rem = a % b
     18  * Returns: a / b
     19  */
     20 
     21 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
     22 
     23 COMPILER_RT_ABI du_int
     24 __udivmoddi4(du_int a, du_int b, du_int* rem)
     25 {
     26     const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT;
     27     const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
     28     udwords n;
     29     n.all = a;
     30     udwords d;
     31     d.all = b;
     32     udwords q;
     33     udwords r;
     34     unsigned sr;
     35     /* special cases, X is unknown, K != 0 */
     36     if (n.s.high == 0)
     37     {
     38         if (d.s.high == 0)
     39         {
     40             /* 0 X
     41              * ---
     42              * 0 X
     43              */
     44             if (rem)
     45                 *rem = n.s.low % d.s.low;
     46             return n.s.low / d.s.low;
     47         }
     48         /* 0 X
     49          * ---
     50          * K X
     51          */
     52         if (rem)
     53             *rem = n.s.low;
     54         return 0;
     55     }
     56     /* n.s.high != 0 */
     57     if (d.s.low == 0)
     58     {
     59         if (d.s.high == 0)
     60         {
     61             /* K X
     62              * ---
     63              * 0 0
     64              */
     65             if (rem)
     66                 *rem = n.s.high % d.s.low;
     67             return n.s.high / d.s.low;
     68         }
     69         /* d.s.high != 0 */
     70         if (n.s.low == 0)
     71         {
     72             /* K 0
     73              * ---
     74              * K 0
     75              */
     76             if (rem)
     77             {
     78                 r.s.high = n.s.high % d.s.high;
     79                 r.s.low = 0;
     80                 *rem = r.all;
     81             }
     82             return n.s.high / d.s.high;
     83         }
     84         /* K K
     85          * ---
     86          * K 0
     87          */
     88         if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
     89         {
     90             if (rem)
     91             {
     92                 r.s.low = n.s.low;
     93                 r.s.high = n.s.high & (d.s.high - 1);
     94                 *rem = r.all;
     95             }
     96             return n.s.high >> __builtin_ctz(d.s.high);
     97         }
     98         /* K K
     99          * ---
    100          * K 0
    101          */
    102         sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
    103         /* 0 <= sr <= n_uword_bits - 2 or sr large */
    104         if (sr > n_uword_bits - 2)
    105         {
    106            if (rem)
    107                 *rem = n.all;
    108             return 0;
    109         }
    110         ++sr;
    111         /* 1 <= sr <= n_uword_bits - 1 */
    112         /* q.all = n.all << (n_udword_bits - sr); */
    113         q.s.low = 0;
    114         q.s.high = n.s.low << (n_uword_bits - sr);
    115         /* r.all = n.all >> sr; */
    116         r.s.high = n.s.high >> sr;
    117         r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    118     }
    119     else  /* d.s.low != 0 */
    120     {
    121         if (d.s.high == 0)
    122         {
    123             /* K X
    124              * ---
    125              * 0 K
    126              */
    127             if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
    128             {
    129                 if (rem)
    130                     *rem = n.s.low & (d.s.low - 1);
    131                 if (d.s.low == 1)
    132                     return n.all;
    133                 sr = __builtin_ctz(d.s.low);
    134                 q.s.high = n.s.high >> sr;
    135                 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    136                 return q.all;
    137             }
    138             /* K X
    139              * ---
    140              * 0 K
    141              */
    142             sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high);
    143             /* 2 <= sr <= n_udword_bits - 1
    144              * q.all = n.all << (n_udword_bits - sr);
    145              * r.all = n.all >> sr;
    146              */
    147             if (sr == n_uword_bits)
    148             {
    149                 q.s.low = 0;
    150                 q.s.high = n.s.low;
    151                 r.s.high = 0;
    152                 r.s.low = n.s.high;
    153             }
    154             else if (sr < n_uword_bits)  // 2 <= sr <= n_uword_bits - 1
    155             {
    156                 q.s.low = 0;
    157                 q.s.high = n.s.low << (n_uword_bits - sr);
    158                 r.s.high = n.s.high >> sr;
    159                 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    160             }
    161             else              // n_uword_bits + 1 <= sr <= n_udword_bits - 1
    162             {
    163                 q.s.low = n.s.low << (n_udword_bits - sr);
    164                 q.s.high = (n.s.high << (n_udword_bits - sr)) |
    165                            (n.s.low >> (sr - n_uword_bits));
    166                 r.s.high = 0;
    167                 r.s.low = n.s.high >> (sr - n_uword_bits);
    168             }
    169         }
    170         else
    171         {
    172             /* K X
    173              * ---
    174              * K K
    175              */
    176             sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high);
    177             /* 0 <= sr <= n_uword_bits - 1 or sr large */
    178             if (sr > n_uword_bits - 1)
    179             {
    180                 if (rem)
    181                     *rem = n.all;
    182                 return 0;
    183             }
    184             ++sr;
    185             /* 1 <= sr <= n_uword_bits */
    186             /*  q.all = n.all << (n_udword_bits - sr); */
    187             q.s.low = 0;
    188             if (sr == n_uword_bits)
    189             {
    190                 q.s.high = n.s.low;
    191                 r.s.high = 0;
    192                 r.s.low = n.s.high;
    193             }
    194             else
    195             {
    196                 q.s.high = n.s.low << (n_uword_bits - sr);
    197                 r.s.high = n.s.high >> sr;
    198                 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr);
    199             }
    200         }
    201     }
    202     /* Not a special case
    203      * q and r are initialized with:
    204      * q.all = n.all << (n_udword_bits - sr);
    205      * r.all = n.all >> sr;
    206      * 1 <= sr <= n_udword_bits - 1
    207      */
    208     su_int carry = 0;
    209     for (; sr > 0; --sr)
    210     {
    211         /* r:q = ((r:q)  << 1) | carry */
    212         r.s.high = (r.s.high << 1) | (r.s.low  >> (n_uword_bits - 1));
    213         r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_uword_bits - 1));
    214         q.s.high = (q.s.high << 1) | (q.s.low  >> (n_uword_bits - 1));
    215         q.s.low  = (q.s.low  << 1) | carry;
    216         /* carry = 0;
    217          * if (r.all >= d.all)
    218          * {
    219          *      r.all -= d.all;
    220          *      carry = 1;
    221          * }
    222          */
    223         const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1);
    224         carry = s & 1;
    225         r.all -= d.all & s;
    226     }
    227     q.all = (q.all << 1) | carry;
    228     if (rem)
    229         *rem = r.all;
    230     return q.all;
    231 }
    232