1 /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is dual licensed under the MIT and the University of Illinois Open 6 * Source Licenses. See LICENSE.TXT for details. 7 * 8 * ===----------------------------------------------------------------------=== 9 * 10 * This file implements __udivmoddi4 for the compiler_rt library. 11 * 12 * ===----------------------------------------------------------------------=== 13 */ 14 15 #include "int_lib.h" 16 17 /* Effects: if rem != 0, *rem = a % b 18 * Returns: a / b 19 */ 20 21 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 22 23 COMPILER_RT_ABI du_int 24 __udivmoddi4(du_int a, du_int b, du_int* rem) 25 { 26 const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 27 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 28 udwords n; 29 n.all = a; 30 udwords d; 31 d.all = b; 32 udwords q; 33 udwords r; 34 unsigned sr; 35 /* special cases, X is unknown, K != 0 */ 36 if (n.s.high == 0) 37 { 38 if (d.s.high == 0) 39 { 40 /* 0 X 41 * --- 42 * 0 X 43 */ 44 if (rem) 45 *rem = n.s.low % d.s.low; 46 return n.s.low / d.s.low; 47 } 48 /* 0 X 49 * --- 50 * K X 51 */ 52 if (rem) 53 *rem = n.s.low; 54 return 0; 55 } 56 /* n.s.high != 0 */ 57 if (d.s.low == 0) 58 { 59 if (d.s.high == 0) 60 { 61 /* K X 62 * --- 63 * 0 0 64 */ 65 if (rem) 66 *rem = n.s.high % d.s.low; 67 return n.s.high / d.s.low; 68 } 69 /* d.s.high != 0 */ 70 if (n.s.low == 0) 71 { 72 /* K 0 73 * --- 74 * K 0 75 */ 76 if (rem) 77 { 78 r.s.high = n.s.high % d.s.high; 79 r.s.low = 0; 80 *rem = r.all; 81 } 82 return n.s.high / d.s.high; 83 } 84 /* K K 85 * --- 86 * K 0 87 */ 88 if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 89 { 90 if (rem) 91 { 92 r.s.low = n.s.low; 93 r.s.high = n.s.high & (d.s.high - 1); 94 *rem = r.all; 95 } 96 return n.s.high >> __builtin_ctz(d.s.high); 97 } 98 /* K K 99 * --- 100 * K 0 101 */ 102 sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 103 /* 0 <= sr <= n_uword_bits - 2 or sr large */ 104 if (sr > n_uword_bits - 2) 105 { 106 if (rem) 107 *rem = n.all; 108 return 0; 109 } 110 ++sr; 111 /* 1 <= sr <= n_uword_bits - 1 */ 112 /* q.all = n.all << (n_udword_bits - sr); */ 113 q.s.low = 0; 114 q.s.high = n.s.low << (n_uword_bits - sr); 115 /* r.all = n.all >> sr; */ 116 r.s.high = n.s.high >> sr; 117 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 118 } 119 else /* d.s.low != 0 */ 120 { 121 if (d.s.high == 0) 122 { 123 /* K X 124 * --- 125 * 0 K 126 */ 127 if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 128 { 129 if (rem) 130 *rem = n.s.low & (d.s.low - 1); 131 if (d.s.low == 1) 132 return n.all; 133 sr = __builtin_ctz(d.s.low); 134 q.s.high = n.s.high >> sr; 135 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 136 return q.all; 137 } 138 /* K X 139 * --- 140 * 0 K 141 */ 142 sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 143 /* 2 <= sr <= n_udword_bits - 1 144 * q.all = n.all << (n_udword_bits - sr); 145 * r.all = n.all >> sr; 146 */ 147 if (sr == n_uword_bits) 148 { 149 q.s.low = 0; 150 q.s.high = n.s.low; 151 r.s.high = 0; 152 r.s.low = n.s.high; 153 } 154 else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 155 { 156 q.s.low = 0; 157 q.s.high = n.s.low << (n_uword_bits - sr); 158 r.s.high = n.s.high >> sr; 159 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 160 } 161 else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 162 { 163 q.s.low = n.s.low << (n_udword_bits - sr); 164 q.s.high = (n.s.high << (n_udword_bits - sr)) | 165 (n.s.low >> (sr - n_uword_bits)); 166 r.s.high = 0; 167 r.s.low = n.s.high >> (sr - n_uword_bits); 168 } 169 } 170 else 171 { 172 /* K X 173 * --- 174 * K K 175 */ 176 sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 177 /* 0 <= sr <= n_uword_bits - 1 or sr large */ 178 if (sr > n_uword_bits - 1) 179 { 180 if (rem) 181 *rem = n.all; 182 return 0; 183 } 184 ++sr; 185 /* 1 <= sr <= n_uword_bits */ 186 /* q.all = n.all << (n_udword_bits - sr); */ 187 q.s.low = 0; 188 if (sr == n_uword_bits) 189 { 190 q.s.high = n.s.low; 191 r.s.high = 0; 192 r.s.low = n.s.high; 193 } 194 else 195 { 196 q.s.high = n.s.low << (n_uword_bits - sr); 197 r.s.high = n.s.high >> sr; 198 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 199 } 200 } 201 } 202 /* Not a special case 203 * q and r are initialized with: 204 * q.all = n.all << (n_udword_bits - sr); 205 * r.all = n.all >> sr; 206 * 1 <= sr <= n_udword_bits - 1 207 */ 208 su_int carry = 0; 209 for (; sr > 0; --sr) 210 { 211 /* r:q = ((r:q) << 1) | carry */ 212 r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 213 r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 214 q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 215 q.s.low = (q.s.low << 1) | carry; 216 /* carry = 0; 217 * if (r.all >= d.all) 218 * { 219 * r.all -= d.all; 220 * carry = 1; 221 * } 222 */ 223 const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 224 carry = s & 1; 225 r.all -= d.all & s; 226 } 227 q.all = (q.all << 1) | carry; 228 if (rem) 229 *rem = r.all; 230 return q.all; 231 } 232