Home | History | Annotate | Download | only in lib
      1 /* ===-- udivmodti4.c - Implement __udivmodti4 -----------------------------===
      2  *
      3  *                    The LLVM Compiler Infrastructure
      4  *
      5  * This file is distributed under the University of Illinois Open Source
      6  * License. See LICENSE.TXT for details.
      7  *
      8  * ===----------------------------------------------------------------------===
      9  *
     10  * This file implements __udivmodti4 for the compiler_rt library.
     11  *
     12  * ===----------------------------------------------------------------------===
     13  */
     14 
     15 #if __x86_64
     16 
     17 #include "int_lib.h"
     18 
     19 /* Effects: if rem != 0, *rem = a % b
     20  * Returns: a / b
     21  */
     22 
     23 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */
     24 
     25 tu_int
     26 __udivmodti4(tu_int a, tu_int b, tu_int* rem)
     27 {
     28     const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT;
     29     const unsigned n_utword_bits = sizeof(tu_int) * CHAR_BIT;
     30     utwords n;
     31     n.all = a;
     32     utwords d;
     33     d.all = b;
     34     utwords q;
     35     utwords r;
     36     unsigned sr;
     37     /* special cases, X is unknown, K != 0 */
     38     if (n.s.high == 0)
     39     {
     40         if (d.s.high == 0)
     41         {
     42             /* 0 X
     43              * ---
     44              * 0 X
     45              */
     46             if (rem)
     47                 *rem = n.s.low % d.s.low;
     48             return n.s.low / d.s.low;
     49         }
     50         /* 0 X
     51          * ---
     52          * K X
     53          */
     54         if (rem)
     55             *rem = n.s.low;
     56         return 0;
     57     }
     58     /* n.s.high != 0 */
     59     if (d.s.low == 0)
     60     {
     61         if (d.s.high == 0)
     62         {
     63             /* K X
     64              * ---
     65              * 0 0
     66              */
     67             if (rem)
     68                 *rem = n.s.high % d.s.low;
     69             return n.s.high / d.s.low;
     70         }
     71         /* d.s.high != 0 */
     72         if (n.s.low == 0)
     73         {
     74             /* K 0
     75              * ---
     76              * K 0
     77              */
     78             if (rem)
     79             {
     80                 r.s.high = n.s.high % d.s.high;
     81                 r.s.low = 0;
     82                 *rem = r.all;
     83             }
     84             return n.s.high / d.s.high;
     85         }
     86         /* K K
     87          * ---
     88          * K 0
     89          */
     90         if ((d.s.high & (d.s.high - 1)) == 0)     /* if d is a power of 2 */
     91         {
     92             if (rem)
     93             {
     94                 r.s.low = n.s.low;
     95                 r.s.high = n.s.high & (d.s.high - 1);
     96                 *rem = r.all;
     97             }
     98             return n.s.high >> __builtin_ctzll(d.s.high);
     99         }
    100         /* K K
    101          * ---
    102          * K 0
    103          */
    104         sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
    105         /* 0 <= sr <= n_udword_bits - 2 or sr large */
    106         if (sr > n_udword_bits - 2)
    107         {
    108            if (rem)
    109                 *rem = n.all;
    110             return 0;
    111         }
    112         ++sr;
    113         /* 1 <= sr <= n_udword_bits - 1 */
    114         /* q.all = n.all << (n_utword_bits - sr); */
    115         q.s.low = 0;
    116         q.s.high = n.s.low << (n_udword_bits - sr);
    117         /* r.all = n.all >> sr; */
    118         r.s.high = n.s.high >> sr;
    119         r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
    120     }
    121     else  /* d.s.low != 0 */
    122     {
    123         if (d.s.high == 0)
    124         {
    125             /* K X
    126              * ---
    127              * 0 K
    128              */
    129             if ((d.s.low & (d.s.low - 1)) == 0)     /* if d is a power of 2 */
    130             {
    131                 if (rem)
    132                     *rem = n.s.low & (d.s.low - 1);
    133                 if (d.s.low == 1)
    134                     return n.all;
    135                 unsigned sr = __builtin_ctzll(d.s.low);
    136                 q.s.high = n.s.high >> sr;
    137                 q.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
    138                 return q.all;
    139             }
    140             /* K X
    141              * ---
    142              * 0 K
    143              */
    144             sr = 1 + n_udword_bits + __builtin_clzll(d.s.low)
    145                                    - __builtin_clzll(n.s.high);
    146             /* 2 <= sr <= n_utword_bits - 1
    147              * q.all = n.all << (n_utword_bits - sr);
    148              * r.all = n.all >> sr;
    149              * if (sr == n_udword_bits)
    150              * {
    151              *     q.s.low = 0;
    152              *     q.s.high = n.s.low;
    153              *     r.s.high = 0;
    154              *     r.s.low = n.s.high;
    155              * }
    156              * else if (sr < n_udword_bits)  // 2 <= sr <= n_udword_bits - 1
    157              * {
    158              *     q.s.low = 0;
    159              *     q.s.high = n.s.low << (n_udword_bits - sr);
    160              *     r.s.high = n.s.high >> sr;
    161              *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
    162              * }
    163              * else              // n_udword_bits + 1 <= sr <= n_utword_bits - 1
    164              * {
    165              *     q.s.low = n.s.low << (n_utword_bits - sr);
    166              *     q.s.high = (n.s.high << (n_utword_bits - sr)) |
    167              *              (n.s.low >> (sr - n_udword_bits));
    168              *     r.s.high = 0;
    169              *     r.s.low = n.s.high >> (sr - n_udword_bits);
    170              * }
    171              */
    172             q.s.low =  (n.s.low << (n_utword_bits - sr)) &
    173                      ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1));
    174             q.s.high = ((n.s.low << ( n_udword_bits - sr))                        &
    175                      ((di_int)(int)(sr - n_udword_bits - 1) >> (n_udword_bits-1))) |
    176                      (((n.s.high << (n_utword_bits - sr))                       |
    177                      (n.s.low >> (sr - n_udword_bits)))                         &
    178                      ((di_int)(int)(n_udword_bits - sr) >> (n_udword_bits-1)));
    179             r.s.high = (n.s.high >> sr) &
    180                      ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
    181             r.s.low =  ((n.s.high >> (sr - n_udword_bits))                        &
    182                      ((di_int)(int)(n_udword_bits - sr - 1) >> (n_udword_bits-1))) |
    183                      (((n.s.high << (n_udword_bits - sr))                       |
    184                      (n.s.low >> sr))                                           &
    185                      ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
    186         }
    187         else
    188         {
    189             /* K X
    190              * ---
    191              * K K
    192              */
    193             sr = __builtin_clzll(d.s.high) - __builtin_clzll(n.s.high);
    194             /*0 <= sr <= n_udword_bits - 1 or sr large */
    195             if (sr > n_udword_bits - 1)
    196             {
    197                if (rem)
    198                     *rem = n.all;
    199                 return 0;
    200             }
    201             ++sr;
    202             /* 1 <= sr <= n_udword_bits */
    203             /* q.all = n.all << (n_utword_bits - sr); */
    204             q.s.low = 0;
    205             q.s.high = n.s.low << (n_udword_bits - sr);
    206             /* r.all = n.all >> sr;
    207              * if (sr < n_udword_bits)
    208              * {
    209              *     r.s.high = n.s.high >> sr;
    210              *     r.s.low = (n.s.high << (n_udword_bits - sr)) | (n.s.low >> sr);
    211              * }
    212              * else
    213              * {
    214              *     r.s.high = 0;
    215              *     r.s.low = n.s.high;
    216              * }
    217              */
    218             r.s.high = (n.s.high >> sr) &
    219                      ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1));
    220             r.s.low = (n.s.high << (n_udword_bits - sr)) |
    221                     ((n.s.low >> sr)                   &
    222                     ((di_int)(int)(sr - n_udword_bits) >> (n_udword_bits-1)));
    223         }
    224     }
    225     /* Not a special case
    226      * q and r are initialized with:
    227      * q.all = n.all << (n_utword_bits - sr);
    228      * r.all = n.all >> sr;
    229      * 1 <= sr <= n_utword_bits - 1
    230      */
    231     su_int carry = 0;
    232     for (; sr > 0; --sr)
    233     {
    234         /* r:q = ((r:q)  << 1) | carry */
    235         r.s.high = (r.s.high << 1) | (r.s.low  >> (n_udword_bits - 1));
    236         r.s.low  = (r.s.low  << 1) | (q.s.high >> (n_udword_bits - 1));
    237         q.s.high = (q.s.high << 1) | (q.s.low  >> (n_udword_bits - 1));
    238         q.s.low  = (q.s.low  << 1) | carry;
    239         /* carry = 0;
    240          * if (r.all >= d.all)
    241          * {
    242          *     r.all -= d.all;
    243          *      carry = 1;
    244          * }
    245          */
    246         const ti_int s = (ti_int)(d.all - r.all - 1) >> (n_utword_bits - 1);
    247         carry = s & 1;
    248         r.all -= d.all & s;
    249     }
    250     q.all = (q.all << 1) | carry;
    251     if (rem)
    252         *rem = r.all;
    253     return q.all;
    254 }
    255 
    256 #endif /* __x86_64 */
    257