1 /* 2 * Copyright (c) 2008-2016 Stefan Krah. All rights reserved. 3 * 4 * Redistribution and use in source and binary forms, with or without 5 * modification, are permitted provided that the following conditions 6 * are met: 7 * 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS "AS IS" AND 16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 19 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 25 * SUCH DAMAGE. 26 */ 27 28 29 #ifndef TYPEARITH_H 30 #define TYPEARITH_H 31 32 33 #include "mpdecimal.h" 34 35 36 /*****************************************************************************/ 37 /* Low level native arithmetic on basic types */ 38 /*****************************************************************************/ 39 40 41 /** ------------------------------------------------------------ 42 ** Double width multiplication and division 43 ** ------------------------------------------------------------ 44 */ 45 46 #if defined(CONFIG_64) 47 #if defined(ANSI) 48 #if defined(HAVE_UINT128_T) 49 static inline void 50 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 51 { 52 __uint128_t hl; 53 54 hl = (__uint128_t)a * b; 55 56 *hi = hl >> 64; 57 *lo = (mpd_uint_t)hl; 58 } 59 60 static inline void 61 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo, 62 mpd_uint_t d) 63 { 64 __uint128_t hl; 65 66 hl = ((__uint128_t)hi<<64) + lo; 67 *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */ 68 *r = (mpd_uint_t)(hl - (__uint128_t)(*q) * d); 69 } 70 #else 71 static inline void 72 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 73 { 74 uint32_t w[4], carry; 75 uint32_t ah, al, bh, bl; 76 uint64_t hl; 77 78 ah = (uint32_t)(a>>32); al = (uint32_t)a; 79 bh = (uint32_t)(b>>32); bl = (uint32_t)b; 80 81 hl = (uint64_t)al * bl; 82 w[0] = (uint32_t)hl; 83 carry = (uint32_t)(hl>>32); 84 85 hl = (uint64_t)ah * bl + carry; 86 w[1] = (uint32_t)hl; 87 w[2] = (uint32_t)(hl>>32); 88 89 hl = (uint64_t)al * bh + w[1]; 90 w[1] = (uint32_t)hl; 91 carry = (uint32_t)(hl>>32); 92 93 hl = ((uint64_t)ah * bh + w[2]) + carry; 94 w[2] = (uint32_t)hl; 95 w[3] = (uint32_t)(hl>>32); 96 97 *hi = ((uint64_t)w[3]<<32) + w[2]; 98 *lo = ((uint64_t)w[1]<<32) + w[0]; 99 } 100 101 /* 102 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt 103 * http://www.hackersdelight.org/permissions.htm: 104 * "You are free to use, copy, and distribute any of the code on this web 105 * site, whether modified by you or not. You need not give attribution." 106 * 107 * Slightly modified, comments are mine. 108 */ 109 static inline int 110 nlz(uint64_t x) 111 { 112 int n; 113 114 if (x == 0) return(64); 115 116 n = 0; 117 if (x <= 0x00000000FFFFFFFF) {n = n +32; x = x <<32;} 118 if (x <= 0x0000FFFFFFFFFFFF) {n = n +16; x = x <<16;} 119 if (x <= 0x00FFFFFFFFFFFFFF) {n = n + 8; x = x << 8;} 120 if (x <= 0x0FFFFFFFFFFFFFFF) {n = n + 4; x = x << 4;} 121 if (x <= 0x3FFFFFFFFFFFFFFF) {n = n + 2; x = x << 2;} 122 if (x <= 0x7FFFFFFFFFFFFFFF) {n = n + 1;} 123 124 return n; 125 } 126 127 static inline void 128 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0, 129 mpd_uint_t v) 130 { 131 const mpd_uint_t b = 4294967296; 132 mpd_uint_t un1, un0, 133 vn1, vn0, 134 q1, q0, 135 un32, un21, un10, 136 rhat, t; 137 int s; 138 139 assert(u1 < v); 140 141 s = nlz(v); 142 v = v << s; 143 vn1 = v >> 32; 144 vn0 = v & 0xFFFFFFFF; 145 146 t = (s == 0) ? 0 : u0 >> (64 - s); 147 un32 = (u1 << s) | t; 148 un10 = u0 << s; 149 150 un1 = un10 >> 32; 151 un0 = un10 & 0xFFFFFFFF; 152 153 q1 = un32 / vn1; 154 rhat = un32 - q1*vn1; 155 again1: 156 if (q1 >= b || q1*vn0 > b*rhat + un1) { 157 q1 = q1 - 1; 158 rhat = rhat + vn1; 159 if (rhat < b) goto again1; 160 } 161 162 /* 163 * Before again1 we had: 164 * (1) q1*vn1 + rhat = un32 165 * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1 166 * 167 * The statements inside the if-clause do not change the value 168 * of the left-hand side of (2), and the loop is only exited 169 * if q1*vn0 <= rhat*b + un1, so: 170 * 171 * (3) q1*vn1*b + q1*vn0 <= un32*b + un1 172 * (4) q1*v <= un32*b + un1 173 * (5) 0 <= un32*b + un1 - q1*v 174 * 175 * By (5) we are certain that the possible add-back step from 176 * Knuth's algorithm D is never required. 177 * 178 * Since the final quotient is less than 2**64, the following 179 * must be true: 180 * 181 * (6) un32*b + un1 - q1*v <= UINT64_MAX 182 * 183 * This means that in the following line, the high words 184 * of un32*b and q1*v can be discarded without any effect 185 * on the result. 186 */ 187 un21 = un32*b + un1 - q1*v; 188 189 q0 = un21 / vn1; 190 rhat = un21 - q0*vn1; 191 again2: 192 if (q0 >= b || q0*vn0 > b*rhat + un0) { 193 q0 = q0 - 1; 194 rhat = rhat + vn1; 195 if (rhat < b) goto again2; 196 } 197 198 *q = q1*b + q0; 199 *r = (un21*b + un0 - q0*v) >> s; 200 } 201 #endif 202 203 /* END ANSI */ 204 #elif defined(ASM) 205 static inline void 206 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 207 { 208 mpd_uint_t h, l; 209 210 __asm__ ( "mulq %3\n\t" 211 : "=d" (h), "=a" (l) 212 : "%a" (a), "rm" (b) 213 : "cc" 214 ); 215 216 *hi = h; 217 *lo = l; 218 } 219 220 static inline void 221 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo, 222 mpd_uint_t d) 223 { 224 mpd_uint_t qq, rr; 225 226 __asm__ ( "divq %4\n\t" 227 : "=a" (qq), "=d" (rr) 228 : "a" (lo), "d" (hi), "rm" (d) 229 : "cc" 230 ); 231 232 *q = qq; 233 *r = rr; 234 } 235 /* END GCC ASM */ 236 #elif defined(MASM) 237 #include <intrin.h> 238 #pragma intrinsic(_umul128) 239 240 static inline void 241 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 242 { 243 *lo = _umul128(a, b, hi); 244 } 245 246 void _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo, 247 mpd_uint_t d); 248 249 /* END MASM (_MSC_VER) */ 250 #else 251 #error "need platform specific 128 bit multiplication and division" 252 #endif 253 254 #define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d 255 static inline void 256 _mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp) 257 { 258 assert(exp <= 19); 259 260 if (exp <= 9) { 261 if (exp <= 4) { 262 switch (exp) { 263 case 0: *q = v; *r = 0; break; 264 case 1: DIVMOD(q, r, v, 10UL); break; 265 case 2: DIVMOD(q, r, v, 100UL); break; 266 case 3: DIVMOD(q, r, v, 1000UL); break; 267 case 4: DIVMOD(q, r, v, 10000UL); break; 268 } 269 } 270 else { 271 switch (exp) { 272 case 5: DIVMOD(q, r, v, 100000UL); break; 273 case 6: DIVMOD(q, r, v, 1000000UL); break; 274 case 7: DIVMOD(q, r, v, 10000000UL); break; 275 case 8: DIVMOD(q, r, v, 100000000UL); break; 276 case 9: DIVMOD(q, r, v, 1000000000UL); break; 277 } 278 } 279 } 280 else { 281 if (exp <= 14) { 282 switch (exp) { 283 case 10: DIVMOD(q, r, v, 10000000000ULL); break; 284 case 11: DIVMOD(q, r, v, 100000000000ULL); break; 285 case 12: DIVMOD(q, r, v, 1000000000000ULL); break; 286 case 13: DIVMOD(q, r, v, 10000000000000ULL); break; 287 case 14: DIVMOD(q, r, v, 100000000000000ULL); break; 288 } 289 } 290 else { 291 switch (exp) { 292 case 15: DIVMOD(q, r, v, 1000000000000000ULL); break; 293 case 16: DIVMOD(q, r, v, 10000000000000000ULL); break; 294 case 17: DIVMOD(q, r, v, 100000000000000000ULL); break; 295 case 18: DIVMOD(q, r, v, 1000000000000000000ULL); break; 296 case 19: DIVMOD(q, r, v, 10000000000000000000ULL); break; /* GCOV_NOT_REACHED */ 297 } 298 } 299 } 300 } 301 302 /* END CONFIG_64 */ 303 #elif defined(CONFIG_32) 304 #if defined(ANSI) 305 #if !defined(LEGACY_COMPILER) 306 static inline void 307 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 308 { 309 mpd_uuint_t hl; 310 311 hl = (mpd_uuint_t)a * b; 312 313 *hi = hl >> 32; 314 *lo = (mpd_uint_t)hl; 315 } 316 317 static inline void 318 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo, 319 mpd_uint_t d) 320 { 321 mpd_uuint_t hl; 322 323 hl = ((mpd_uuint_t)hi<<32) + lo; 324 *q = (mpd_uint_t)(hl / d); /* quotient is known to fit */ 325 *r = (mpd_uint_t)(hl - (mpd_uuint_t)(*q) * d); 326 } 327 /* END ANSI + uint64_t */ 328 #else 329 static inline void 330 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 331 { 332 uint16_t w[4], carry; 333 uint16_t ah, al, bh, bl; 334 uint32_t hl; 335 336 ah = (uint16_t)(a>>16); al = (uint16_t)a; 337 bh = (uint16_t)(b>>16); bl = (uint16_t)b; 338 339 hl = (uint32_t)al * bl; 340 w[0] = (uint16_t)hl; 341 carry = (uint16_t)(hl>>16); 342 343 hl = (uint32_t)ah * bl + carry; 344 w[1] = (uint16_t)hl; 345 w[2] = (uint16_t)(hl>>16); 346 347 hl = (uint32_t)al * bh + w[1]; 348 w[1] = (uint16_t)hl; 349 carry = (uint16_t)(hl>>16); 350 351 hl = ((uint32_t)ah * bh + w[2]) + carry; 352 w[2] = (uint16_t)hl; 353 w[3] = (uint16_t)(hl>>16); 354 355 *hi = ((uint32_t)w[3]<<16) + w[2]; 356 *lo = ((uint32_t)w[1]<<16) + w[0]; 357 } 358 359 /* 360 * By Henry S. Warren: http://www.hackersdelight.org/HDcode/divlu.c.txt 361 * http://www.hackersdelight.org/permissions.htm: 362 * "You are free to use, copy, and distribute any of the code on this web 363 * site, whether modified by you or not. You need not give attribution." 364 * 365 * Slightly modified, comments are mine. 366 */ 367 static inline int 368 nlz(uint32_t x) 369 { 370 int n; 371 372 if (x == 0) return(32); 373 374 n = 0; 375 if (x <= 0x0000FFFF) {n = n +16; x = x <<16;} 376 if (x <= 0x00FFFFFF) {n = n + 8; x = x << 8;} 377 if (x <= 0x0FFFFFFF) {n = n + 4; x = x << 4;} 378 if (x <= 0x3FFFFFFF) {n = n + 2; x = x << 2;} 379 if (x <= 0x7FFFFFFF) {n = n + 1;} 380 381 return n; 382 } 383 384 static inline void 385 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t u1, mpd_uint_t u0, 386 mpd_uint_t v) 387 { 388 const mpd_uint_t b = 65536; 389 mpd_uint_t un1, un0, 390 vn1, vn0, 391 q1, q0, 392 un32, un21, un10, 393 rhat, t; 394 int s; 395 396 assert(u1 < v); 397 398 s = nlz(v); 399 v = v << s; 400 vn1 = v >> 16; 401 vn0 = v & 0xFFFF; 402 403 t = (s == 0) ? 0 : u0 >> (32 - s); 404 un32 = (u1 << s) | t; 405 un10 = u0 << s; 406 407 un1 = un10 >> 16; 408 un0 = un10 & 0xFFFF; 409 410 q1 = un32 / vn1; 411 rhat = un32 - q1*vn1; 412 again1: 413 if (q1 >= b || q1*vn0 > b*rhat + un1) { 414 q1 = q1 - 1; 415 rhat = rhat + vn1; 416 if (rhat < b) goto again1; 417 } 418 419 /* 420 * Before again1 we had: 421 * (1) q1*vn1 + rhat = un32 422 * (2) q1*vn1*b + rhat*b + un1 = un32*b + un1 423 * 424 * The statements inside the if-clause do not change the value 425 * of the left-hand side of (2), and the loop is only exited 426 * if q1*vn0 <= rhat*b + un1, so: 427 * 428 * (3) q1*vn1*b + q1*vn0 <= un32*b + un1 429 * (4) q1*v <= un32*b + un1 430 * (5) 0 <= un32*b + un1 - q1*v 431 * 432 * By (5) we are certain that the possible add-back step from 433 * Knuth's algorithm D is never required. 434 * 435 * Since the final quotient is less than 2**32, the following 436 * must be true: 437 * 438 * (6) un32*b + un1 - q1*v <= UINT32_MAX 439 * 440 * This means that in the following line, the high words 441 * of un32*b and q1*v can be discarded without any effect 442 * on the result. 443 */ 444 un21 = un32*b + un1 - q1*v; 445 446 q0 = un21 / vn1; 447 rhat = un21 - q0*vn1; 448 again2: 449 if (q0 >= b || q0*vn0 > b*rhat + un0) { 450 q0 = q0 - 1; 451 rhat = rhat + vn1; 452 if (rhat < b) goto again2; 453 } 454 455 *q = q1*b + q0; 456 *r = (un21*b + un0 - q0*v) >> s; 457 } 458 #endif /* END ANSI + LEGACY_COMPILER */ 459 460 /* END ANSI */ 461 #elif defined(ASM) 462 static inline void 463 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 464 { 465 mpd_uint_t h, l; 466 467 __asm__ ( "mull %3\n\t" 468 : "=d" (h), "=a" (l) 469 : "%a" (a), "rm" (b) 470 : "cc" 471 ); 472 473 *hi = h; 474 *lo = l; 475 } 476 477 static inline void 478 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo, 479 mpd_uint_t d) 480 { 481 mpd_uint_t qq, rr; 482 483 __asm__ ( "divl %4\n\t" 484 : "=a" (qq), "=d" (rr) 485 : "a" (lo), "d" (hi), "rm" (d) 486 : "cc" 487 ); 488 489 *q = qq; 490 *r = rr; 491 } 492 /* END GCC ASM */ 493 #elif defined(MASM) 494 static inline void __cdecl 495 _mpd_mul_words(mpd_uint_t *hi, mpd_uint_t *lo, mpd_uint_t a, mpd_uint_t b) 496 { 497 mpd_uint_t h, l; 498 499 __asm { 500 mov eax, a 501 mul b 502 mov h, edx 503 mov l, eax 504 } 505 506 *hi = h; 507 *lo = l; 508 } 509 510 static inline void __cdecl 511 _mpd_div_words(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t hi, mpd_uint_t lo, 512 mpd_uint_t d) 513 { 514 mpd_uint_t qq, rr; 515 516 __asm { 517 mov eax, lo 518 mov edx, hi 519 div d 520 mov qq, eax 521 mov rr, edx 522 } 523 524 *q = qq; 525 *r = rr; 526 } 527 /* END MASM (_MSC_VER) */ 528 #else 529 #error "need platform specific 64 bit multiplication and division" 530 #endif 531 532 #define DIVMOD(q, r, v, d) *q = v / d; *r = v - *q * d 533 static inline void 534 _mpd_divmod_pow10(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t exp) 535 { 536 assert(exp <= 9); 537 538 if (exp <= 4) { 539 switch (exp) { 540 case 0: *q = v; *r = 0; break; 541 case 1: DIVMOD(q, r, v, 10UL); break; 542 case 2: DIVMOD(q, r, v, 100UL); break; 543 case 3: DIVMOD(q, r, v, 1000UL); break; 544 case 4: DIVMOD(q, r, v, 10000UL); break; 545 } 546 } 547 else { 548 switch (exp) { 549 case 5: DIVMOD(q, r, v, 100000UL); break; 550 case 6: DIVMOD(q, r, v, 1000000UL); break; 551 case 7: DIVMOD(q, r, v, 10000000UL); break; 552 case 8: DIVMOD(q, r, v, 100000000UL); break; 553 case 9: DIVMOD(q, r, v, 1000000000UL); break; /* GCOV_NOT_REACHED */ 554 } 555 } 556 } 557 /* END CONFIG_32 */ 558 559 /* NO CONFIG */ 560 #else 561 #error "define CONFIG_64 or CONFIG_32" 562 #endif /* CONFIG */ 563 564 565 static inline void 566 _mpd_div_word(mpd_uint_t *q, mpd_uint_t *r, mpd_uint_t v, mpd_uint_t d) 567 { 568 *q = v / d; 569 *r = v - *q * d; 570 } 571 572 static inline void 573 _mpd_idiv_word(mpd_ssize_t *q, mpd_ssize_t *r, mpd_ssize_t v, mpd_ssize_t d) 574 { 575 *q = v / d; 576 *r = v - *q * d; 577 } 578 579 580 /** ------------------------------------------------------------ 581 ** Arithmetic with overflow checking 582 ** ------------------------------------------------------------ 583 */ 584 585 /* The following macros do call exit() in case of an overflow. 586 If the library is used correctly (i.e. with valid context 587 parameters), such overflows cannot occur. The macros are used 588 as sanity checks in a couple of strategic places and should 589 be viewed as a handwritten version of gcc's -ftrapv option. */ 590 591 static inline mpd_size_t 592 add_size_t(mpd_size_t a, mpd_size_t b) 593 { 594 if (a > MPD_SIZE_MAX - b) { 595 mpd_err_fatal("add_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */ 596 } 597 return a + b; 598 } 599 600 static inline mpd_size_t 601 sub_size_t(mpd_size_t a, mpd_size_t b) 602 { 603 if (b > a) { 604 mpd_err_fatal("sub_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */ 605 } 606 return a - b; 607 } 608 609 #if MPD_SIZE_MAX != MPD_UINT_MAX 610 #error "adapt mul_size_t() and mulmod_size_t()" 611 #endif 612 613 static inline mpd_size_t 614 mul_size_t(mpd_size_t a, mpd_size_t b) 615 { 616 mpd_uint_t hi, lo; 617 618 _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b); 619 if (hi) { 620 mpd_err_fatal("mul_size_t(): overflow: check the context"); /* GCOV_NOT_REACHED */ 621 } 622 return lo; 623 } 624 625 static inline mpd_size_t 626 add_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow) 627 { 628 mpd_size_t ret; 629 630 *overflow = 0; 631 ret = a + b; 632 if (ret < a) *overflow = 1; 633 return ret; 634 } 635 636 static inline mpd_size_t 637 mul_size_t_overflow(mpd_size_t a, mpd_size_t b, mpd_size_t *overflow) 638 { 639 mpd_uint_t lo; 640 641 _mpd_mul_words((mpd_uint_t *)overflow, &lo, (mpd_uint_t)a, 642 (mpd_uint_t)b); 643 return lo; 644 } 645 646 static inline mpd_ssize_t 647 mod_mpd_ssize_t(mpd_ssize_t a, mpd_ssize_t m) 648 { 649 mpd_ssize_t r = a % m; 650 return (r < 0) ? r + m : r; 651 } 652 653 static inline mpd_size_t 654 mulmod_size_t(mpd_size_t a, mpd_size_t b, mpd_size_t m) 655 { 656 mpd_uint_t hi, lo; 657 mpd_uint_t q, r; 658 659 _mpd_mul_words(&hi, &lo, (mpd_uint_t)a, (mpd_uint_t)b); 660 _mpd_div_words(&q, &r, hi, lo, (mpd_uint_t)m); 661 662 return r; 663 } 664 665 666 #endif /* TYPEARITH_H */ 667 668 669 670