1 /* ===-- udivmoddi4.c - Implement __udivmoddi4 -----------------------------=== 2 * 3 * The LLVM Compiler Infrastructure 4 * 5 * This file is dual licensed under the MIT and the University of Illinois Open 6 * Source Licenses. See LICENSE.TXT for details. 7 * 8 * ===----------------------------------------------------------------------=== 9 * 10 * This file implements __udivmoddi4 for the compiler_rt library. 11 * 12 * ===----------------------------------------------------------------------=== 13 */ 14 15 #include "int_lib.h" 16 17 /* Effects: if rem != 0, *rem = a % b 18 * Returns: a / b 19 */ 20 21 /* Translated from Figure 3-40 of The PowerPC Compiler Writer's Guide */ 22 23 COMPILER_RT_ABI du_int 24 __udivmoddi4(du_int a, du_int b, du_int* rem) 25 { 26 const unsigned n_uword_bits = sizeof(su_int) * CHAR_BIT; 27 const unsigned n_udword_bits = sizeof(du_int) * CHAR_BIT; 28 udwords n; 29 n.all = a; 30 udwords d; 31 d.all = b; 32 udwords q; 33 udwords r; 34 unsigned sr; 35 /* special cases, X is unknown, K != 0 */ 36 if (n.s.high == 0) 37 { 38 if (d.s.high == 0) 39 { 40 /* 0 X 41 * --- 42 * 0 X 43 */ 44 if (rem) 45 *rem = n.s.low % d.s.low; 46 return n.s.low / d.s.low; 47 } 48 /* 0 X 49 * --- 50 * K X 51 */ 52 if (rem) 53 *rem = n.s.low; 54 return 0; 55 } 56 /* n.s.high != 0 */ 57 if (d.s.low == 0) 58 { 59 if (d.s.high == 0) 60 { 61 /* K X 62 * --- 63 * 0 0 64 */ 65 if (rem) 66 *rem = n.s.high % d.s.low; 67 return n.s.high / d.s.low; 68 } 69 /* d.s.high != 0 */ 70 if (n.s.low == 0) 71 { 72 /* K 0 73 * --- 74 * K 0 75 */ 76 if (rem) 77 { 78 r.s.high = n.s.high % d.s.high; 79 r.s.low = 0; 80 *rem = r.all; 81 } 82 return n.s.high / d.s.high; 83 } 84 /* K K 85 * --- 86 * K 0 87 */ 88 if ((d.s.high & (d.s.high - 1)) == 0) /* if d is a power of 2 */ 89 { 90 if (rem) 91 { 92 r.s.low = n.s.low; 93 r.s.high = n.s.high & (d.s.high - 1); 94 *rem = r.all; 95 } 96 return n.s.high >> __builtin_ctz(d.s.high); 97 } 98 /* K K 99 * --- 100 * K 0 101 */ 102 sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 103 /* 0 <= sr <= n_uword_bits - 2 or sr large */ 104 if (sr > n_uword_bits - 2) 105 { 106 if (rem) 107 *rem = n.all; 108 return 0; 109 } 110 ++sr; 111 /* 1 <= sr <= n_uword_bits - 1 */ 112 /* q.all = n.all << (n_udword_bits - sr); */ 113 q.s.low = 0; 114 q.s.high = n.s.low << (n_uword_bits - sr); 115 /* r.all = n.all >> sr; */ 116 r.s.high = n.s.high >> sr; 117 r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 118 } 119 else /* d.s.low != 0 */ 120 { 121 if (d.s.high == 0) 122 { 123 /* K X 124 * --- 125 * 0 K 126 */ 127 if ((d.s.low & (d.s.low - 1)) == 0) /* if d is a power of 2 */ 128 { 129 if (rem) 130 *rem = n.s.low & (d.s.low - 1); 131 if (d.s.low == 1) 132 return n.all; 133 sr = __builtin_ctz(d.s.low); 134 q.s.high = n.s.high >> sr; 135 q.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 136 return q.all; 137 } 138 /* K X 139 * --- 140 *0 K 141 */ 142 sr = 1 + n_uword_bits + __builtin_clz(d.s.low) - __builtin_clz(n.s.high); 143 /* 2 <= sr <= n_udword_bits - 1 144 * q.all = n.all << (n_udword_bits - sr); 145 * r.all = n.all >> sr; 146 * if (sr == n_uword_bits) 147 * { 148 * q.s.low = 0; 149 * q.s.high = n.s.low; 150 * r.s.high = 0; 151 * r.s.low = n.s.high; 152 * } 153 * else if (sr < n_uword_bits) // 2 <= sr <= n_uword_bits - 1 154 * { 155 * q.s.low = 0; 156 * q.s.high = n.s.low << (n_uword_bits - sr); 157 * r.s.high = n.s.high >> sr; 158 * r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 159 * } 160 * else // n_uword_bits + 1 <= sr <= n_udword_bits - 1 161 * { 162 * q.s.low = n.s.low << (n_udword_bits - sr); 163 * q.s.high = (n.s.high << (n_udword_bits - sr)) | 164 * (n.s.low >> (sr - n_uword_bits)); 165 * r.s.high = 0; 166 * r.s.low = n.s.high >> (sr - n_uword_bits); 167 * } 168 */ 169 q.s.low = (n.s.low << (n_udword_bits - sr)) & 170 ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1)); 171 q.s.high = ((n.s.low << ( n_uword_bits - sr)) & 172 ((si_int)(sr - n_uword_bits - 1) >> (n_uword_bits-1))) | 173 (((n.s.high << (n_udword_bits - sr)) | 174 (n.s.low >> (sr - n_uword_bits))) & 175 ((si_int)(n_uword_bits - sr) >> (n_uword_bits-1))); 176 r.s.high = (n.s.high >> sr) & 177 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); 178 r.s.low = ((n.s.high >> (sr - n_uword_bits)) & 179 ((si_int)(n_uword_bits - sr - 1) >> (n_uword_bits-1))) | 180 (((n.s.high << (n_uword_bits - sr)) | 181 (n.s.low >> sr)) & 182 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); 183 } 184 else 185 { 186 /* K X 187 * --- 188 * K K 189 */ 190 sr = __builtin_clz(d.s.high) - __builtin_clz(n.s.high); 191 /* 0 <= sr <= n_uword_bits - 1 or sr large */ 192 if (sr > n_uword_bits - 1) 193 { 194 if (rem) 195 *rem = n.all; 196 return 0; 197 } 198 ++sr; 199 /* 1 <= sr <= n_uword_bits */ 200 /* q.all = n.all << (n_udword_bits - sr); */ 201 q.s.low = 0; 202 q.s.high = n.s.low << (n_uword_bits - sr); 203 /* r.all = n.all >> sr; 204 * if (sr < n_uword_bits) 205 * { 206 * r.s.high = n.s.high >> sr; 207 * r.s.low = (n.s.high << (n_uword_bits - sr)) | (n.s.low >> sr); 208 * } 209 * else 210 * { 211 * r.s.high = 0; 212 * r.s.low = n.s.high; 213 * } 214 */ 215 r.s.high = (n.s.high >> sr) & 216 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1)); 217 r.s.low = (n.s.high << (n_uword_bits - sr)) | 218 ((n.s.low >> sr) & 219 ((si_int)(sr - n_uword_bits) >> (n_uword_bits-1))); 220 } 221 } 222 /* Not a special case 223 * q and r are initialized with: 224 * q.all = n.all << (n_udword_bits - sr); 225 * r.all = n.all >> sr; 226 * 1 <= sr <= n_udword_bits - 1 227 */ 228 su_int carry = 0; 229 for (; sr > 0; --sr) 230 { 231 /* r:q = ((r:q) << 1) | carry */ 232 r.s.high = (r.s.high << 1) | (r.s.low >> (n_uword_bits - 1)); 233 r.s.low = (r.s.low << 1) | (q.s.high >> (n_uword_bits - 1)); 234 q.s.high = (q.s.high << 1) | (q.s.low >> (n_uword_bits - 1)); 235 q.s.low = (q.s.low << 1) | carry; 236 /* carry = 0; 237 * if (r.all >= d.all) 238 * { 239 * r.all -= d.all; 240 * carry = 1; 241 * } 242 */ 243 const di_int s = (di_int)(d.all - r.all - 1) >> (n_udword_bits - 1); 244 carry = s & 1; 245 r.all -= d.all & s; 246 } 247 q.all = (q.all << 1) | carry; 248 if (rem) 249 *rem = r.all; 250 return q.all; 251 } 252