1 // Copyright 2015, VIXL authors 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are met: 6 // 7 // * Redistributions of source code must retain the above copyright notice, 8 // this list of conditions and the following disclaimer. 9 // * Redistributions in binary form must reproduce the above copyright notice, 10 // this list of conditions and the following disclaimer in the documentation 11 // and/or other materials provided with the distribution. 12 // * Neither the name of ARM Limited nor the names of its contributors may be 13 // used to endorse or promote products derived from this software without 14 // specific prior written permission. 15 // 16 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS CONTRIBUTORS "AS IS" AND 17 // ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 18 // WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 19 // DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE 20 // FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 // DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR 22 // SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER 23 // CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, 24 // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 25 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 26 27 #ifndef VIXL_UTILS_H 28 #define VIXL_UTILS_H 29 30 #include <cmath> 31 #include <cstring> 32 #include <limits> 33 #include <vector> 34 35 #include "compiler-intrinsics-vixl.h" 36 #include "globals-vixl.h" 37 38 namespace vixl { 39 40 // Macros for compile-time format checking. 41 #if GCC_VERSION_OR_NEWER(4, 4, 0) 42 #define PRINTF_CHECK(format_index, varargs_index) \ 43 __attribute__((format(gnu_printf, format_index, varargs_index))) 44 #else 45 #define PRINTF_CHECK(format_index, varargs_index) 46 #endif 47 48 #ifdef __GNUC__ 49 #define VIXL_HAS_DEPRECATED_WITH_MSG 50 #elif defined(__clang__) 51 #ifdef __has_extension(attribute_deprecated_with_message) 52 #define VIXL_HAS_DEPRECATED_WITH_MSG 53 #endif 54 #endif 55 56 #ifdef VIXL_HAS_DEPRECATED_WITH_MSG 57 #define VIXL_DEPRECATED(replaced_by, declarator) \ 58 __attribute__((deprecated("Use \"" replaced_by "\" instead"))) declarator 59 #else 60 #define VIXL_DEPRECATED(replaced_by, declarator) declarator 61 #endif 62 63 #ifdef VIXL_DEBUG 64 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_UNREACHABLE() 65 #else 66 #define VIXL_UNREACHABLE_OR_FALLTHROUGH() VIXL_FALLTHROUGH() 67 #endif 68 69 template <typename T, size_t n> 70 size_t ArrayLength(const T (&)[n]) { 71 return n; 72 } 73 74 // Check number width. 75 // TODO: Refactor these using templates. 76 inline bool IsIntN(unsigned n, uint32_t x) { 77 VIXL_ASSERT((0 < n) && (n < 32)); 78 uint32_t limit = UINT32_C(1) << (n - 1); 79 return x < limit; 80 } 81 inline bool IsIntN(unsigned n, int32_t x) { 82 VIXL_ASSERT((0 < n) && (n < 32)); 83 int32_t limit = INT32_C(1) << (n - 1); 84 return (-limit <= x) && (x < limit); 85 } 86 inline bool IsIntN(unsigned n, uint64_t x) { 87 VIXL_ASSERT((0 < n) && (n < 64)); 88 uint64_t limit = UINT64_C(1) << (n - 1); 89 return x < limit; 90 } 91 inline bool IsIntN(unsigned n, int64_t x) { 92 VIXL_ASSERT((0 < n) && (n < 64)); 93 int64_t limit = INT64_C(1) << (n - 1); 94 return (-limit <= x) && (x < limit); 95 } 96 VIXL_DEPRECATED("IsIntN", inline bool is_intn(unsigned n, int64_t x)) { 97 return IsIntN(n, x); 98 } 99 100 inline bool IsUintN(unsigned n, uint32_t x) { 101 VIXL_ASSERT((0 < n) && (n < 32)); 102 return !(x >> n); 103 } 104 inline bool IsUintN(unsigned n, int32_t x) { 105 VIXL_ASSERT((0 < n) && (n < 32)); 106 // Convert to an unsigned integer to avoid implementation-defined behavior. 107 return !(static_cast<uint32_t>(x) >> n); 108 } 109 inline bool IsUintN(unsigned n, uint64_t x) { 110 VIXL_ASSERT((0 < n) && (n < 64)); 111 return !(x >> n); 112 } 113 inline bool IsUintN(unsigned n, int64_t x) { 114 VIXL_ASSERT((0 < n) && (n < 64)); 115 // Convert to an unsigned integer to avoid implementation-defined behavior. 116 return !(static_cast<uint64_t>(x) >> n); 117 } 118 VIXL_DEPRECATED("IsUintN", inline bool is_uintn(unsigned n, int64_t x)) { 119 return IsUintN(n, x); 120 } 121 122 inline uint64_t TruncateToUintN(unsigned n, uint64_t x) { 123 VIXL_ASSERT((0 < n) && (n < 64)); 124 return static_cast<uint64_t>(x) & ((UINT64_C(1) << n) - 1); 125 } 126 VIXL_DEPRECATED("TruncateToUintN", 127 inline uint64_t truncate_to_intn(unsigned n, int64_t x)) { 128 return TruncateToUintN(n, x); 129 } 130 131 // clang-format off 132 #define INT_1_TO_32_LIST(V) \ 133 V(1) V(2) V(3) V(4) V(5) V(6) V(7) V(8) \ 134 V(9) V(10) V(11) V(12) V(13) V(14) V(15) V(16) \ 135 V(17) V(18) V(19) V(20) V(21) V(22) V(23) V(24) \ 136 V(25) V(26) V(27) V(28) V(29) V(30) V(31) V(32) 137 138 #define INT_33_TO_63_LIST(V) \ 139 V(33) V(34) V(35) V(36) V(37) V(38) V(39) V(40) \ 140 V(41) V(42) V(43) V(44) V(45) V(46) V(47) V(48) \ 141 V(49) V(50) V(51) V(52) V(53) V(54) V(55) V(56) \ 142 V(57) V(58) V(59) V(60) V(61) V(62) V(63) 143 144 #define INT_1_TO_63_LIST(V) INT_1_TO_32_LIST(V) INT_33_TO_63_LIST(V) 145 146 // clang-format on 147 148 #define DECLARE_IS_INT_N(N) \ 149 inline bool IsInt##N(int64_t x) { return IsIntN(N, x); } \ 150 VIXL_DEPRECATED("IsInt" #N, inline bool is_int##N(int64_t x)) { \ 151 return IsIntN(N, x); \ 152 } 153 154 #define DECLARE_IS_UINT_N(N) \ 155 inline bool IsUint##N(int64_t x) { return IsUintN(N, x); } \ 156 VIXL_DEPRECATED("IsUint" #N, inline bool is_uint##N(int64_t x)) { \ 157 return IsUintN(N, x); \ 158 } 159 160 #define DECLARE_TRUNCATE_TO_UINT_32(N) \ 161 inline uint32_t TruncateToUint##N(uint64_t x) { \ 162 return static_cast<uint32_t>(TruncateToUintN(N, x)); \ 163 } \ 164 VIXL_DEPRECATED("TruncateToUint" #N, \ 165 inline uint32_t truncate_to_int##N(int64_t x)) { \ 166 return TruncateToUint##N(x); \ 167 } 168 169 INT_1_TO_63_LIST(DECLARE_IS_INT_N) 170 INT_1_TO_63_LIST(DECLARE_IS_UINT_N) 171 INT_1_TO_32_LIST(DECLARE_TRUNCATE_TO_UINT_32) 172 173 #undef DECLARE_IS_INT_N 174 #undef DECLARE_IS_UINT_N 175 #undef DECLARE_TRUNCATE_TO_INT_N 176 177 // Bit field extraction. 178 inline uint64_t ExtractUnsignedBitfield64(int msb, int lsb, uint64_t x) { 179 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 180 (msb >= lsb)); 181 if ((msb == 63) && (lsb == 0)) return x; 182 return (x >> lsb) & ((static_cast<uint64_t>(1) << (1 + msb - lsb)) - 1); 183 } 184 185 186 inline uint32_t ExtractUnsignedBitfield32(int msb, int lsb, uint32_t x) { 187 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 188 (msb >= lsb)); 189 return TruncateToUint32(ExtractUnsignedBitfield64(msb, lsb, x)); 190 } 191 192 193 inline int64_t ExtractSignedBitfield64(int msb, int lsb, int64_t x) { 194 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 195 (msb >= lsb)); 196 uint64_t temp = ExtractUnsignedBitfield64(msb, lsb, x); 197 // If the highest extracted bit is set, sign extend. 198 if ((temp >> (msb - lsb)) == 1) { 199 temp |= ~UINT64_C(0) << (msb - lsb); 200 } 201 int64_t result; 202 memcpy(&result, &temp, sizeof(result)); 203 return result; 204 } 205 206 207 inline int32_t ExtractSignedBitfield32(int msb, int lsb, int32_t x) { 208 VIXL_ASSERT((static_cast<size_t>(msb) < sizeof(x) * 8) && (lsb >= 0) && 209 (msb >= lsb)); 210 uint32_t temp = TruncateToUint32(ExtractSignedBitfield64(msb, lsb, x)); 211 int32_t result; 212 memcpy(&result, &temp, sizeof(result)); 213 return result; 214 } 215 216 217 inline uint64_t RotateRight(uint64_t value, 218 unsigned int rotate, 219 unsigned int width) { 220 VIXL_ASSERT((width > 0) && (width <= 64)); 221 uint64_t width_mask = ~UINT64_C(0) >> (64 - width); 222 rotate &= 63; 223 if (rotate > 0) { 224 value &= width_mask; 225 value = (value << (width - rotate)) | (value >> rotate); 226 } 227 return value & width_mask; 228 } 229 230 231 // Wrapper class for passing FP16 values through the assembler. 232 // This is purely to aid with type checking/casting. 233 class Float16 { 234 public: 235 explicit Float16(double dvalue); 236 Float16() : rawbits_(0x0) {} 237 friend uint16_t Float16ToRawbits(Float16 value); 238 friend Float16 RawbitsToFloat16(uint16_t bits); 239 240 protected: 241 uint16_t rawbits_; 242 }; 243 244 // Floating point representation. 245 uint16_t Float16ToRawbits(Float16 value); 246 247 248 uint32_t FloatToRawbits(float value); 249 VIXL_DEPRECATED("FloatToRawbits", 250 inline uint32_t float_to_rawbits(float value)) { 251 return FloatToRawbits(value); 252 } 253 254 uint64_t DoubleToRawbits(double value); 255 VIXL_DEPRECATED("DoubleToRawbits", 256 inline uint64_t double_to_rawbits(double value)) { 257 return DoubleToRawbits(value); 258 } 259 260 Float16 RawbitsToFloat16(uint16_t bits); 261 262 float RawbitsToFloat(uint32_t bits); 263 VIXL_DEPRECATED("RawbitsToFloat", 264 inline float rawbits_to_float(uint32_t bits)) { 265 return RawbitsToFloat(bits); 266 } 267 268 double RawbitsToDouble(uint64_t bits); 269 VIXL_DEPRECATED("RawbitsToDouble", 270 inline double rawbits_to_double(uint64_t bits)) { 271 return RawbitsToDouble(bits); 272 } 273 274 namespace internal { 275 276 // Internal simulation class used solely by the simulator to 277 // provide an abstraction layer for any half-precision arithmetic. 278 class SimFloat16 : public Float16 { 279 public: 280 // TODO: We should investigate making this constructor explicit. 281 // This is currently difficult to do due to a number of templated 282 // functions in the simulator which rely on returning double values. 283 SimFloat16(double dvalue) : Float16(dvalue) {} // NOLINT(runtime/explicit) 284 SimFloat16(Float16 f) { // NOLINT(runtime/explicit) 285 this->rawbits_ = Float16ToRawbits(f); 286 } 287 SimFloat16() : Float16() {} 288 SimFloat16 operator-() const; 289 SimFloat16 operator+(SimFloat16 rhs) const; 290 SimFloat16 operator-(SimFloat16 rhs) const; 291 SimFloat16 operator*(SimFloat16 rhs) const; 292 SimFloat16 operator/(SimFloat16 rhs) const; 293 bool operator<(SimFloat16 rhs) const; 294 bool operator>(SimFloat16 rhs) const; 295 bool operator==(SimFloat16 rhs) const; 296 bool operator!=(SimFloat16 rhs) const; 297 // This is necessary for conversions peformed in (macro asm) Fmov. 298 bool operator==(double rhs) const; 299 operator double() const; 300 }; 301 } // namespace internal 302 303 uint32_t Float16Sign(internal::SimFloat16 value); 304 305 uint32_t Float16Exp(internal::SimFloat16 value); 306 307 uint32_t Float16Mantissa(internal::SimFloat16 value); 308 309 uint32_t FloatSign(float value); 310 VIXL_DEPRECATED("FloatSign", inline uint32_t float_sign(float value)) { 311 return FloatSign(value); 312 } 313 314 uint32_t FloatExp(float value); 315 VIXL_DEPRECATED("FloatExp", inline uint32_t float_exp(float value)) { 316 return FloatExp(value); 317 } 318 319 uint32_t FloatMantissa(float value); 320 VIXL_DEPRECATED("FloatMantissa", inline uint32_t float_mantissa(float value)) { 321 return FloatMantissa(value); 322 } 323 324 uint32_t DoubleSign(double value); 325 VIXL_DEPRECATED("DoubleSign", inline uint32_t double_sign(double value)) { 326 return DoubleSign(value); 327 } 328 329 uint32_t DoubleExp(double value); 330 VIXL_DEPRECATED("DoubleExp", inline uint32_t double_exp(double value)) { 331 return DoubleExp(value); 332 } 333 334 uint64_t DoubleMantissa(double value); 335 VIXL_DEPRECATED("DoubleMantissa", 336 inline uint64_t double_mantissa(double value)) { 337 return DoubleMantissa(value); 338 } 339 340 internal::SimFloat16 Float16Pack(uint16_t sign, 341 uint16_t exp, 342 uint16_t mantissa); 343 344 float FloatPack(uint32_t sign, uint32_t exp, uint32_t mantissa); 345 VIXL_DEPRECATED("FloatPack", 346 inline float float_pack(uint32_t sign, 347 uint32_t exp, 348 uint32_t mantissa)) { 349 return FloatPack(sign, exp, mantissa); 350 } 351 352 double DoublePack(uint64_t sign, uint64_t exp, uint64_t mantissa); 353 VIXL_DEPRECATED("DoublePack", 354 inline double double_pack(uint32_t sign, 355 uint32_t exp, 356 uint64_t mantissa)) { 357 return DoublePack(sign, exp, mantissa); 358 } 359 360 // An fpclassify() function for 16-bit half-precision floats. 361 int Float16Classify(Float16 value); 362 VIXL_DEPRECATED("Float16Classify", inline int float16classify(uint16_t value)) { 363 return Float16Classify(RawbitsToFloat16(value)); 364 } 365 366 bool IsZero(Float16 value); 367 368 inline bool IsNaN(float value) { return std::isnan(value); } 369 370 inline bool IsNaN(double value) { return std::isnan(value); } 371 372 inline bool IsNaN(Float16 value) { return Float16Classify(value) == FP_NAN; } 373 374 inline bool IsInf(float value) { return std::isinf(value); } 375 376 inline bool IsInf(double value) { return std::isinf(value); } 377 378 inline bool IsInf(Float16 value) { 379 return Float16Classify(value) == FP_INFINITE; 380 } 381 382 383 // NaN tests. 384 inline bool IsSignallingNaN(double num) { 385 const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); 386 uint64_t raw = DoubleToRawbits(num); 387 if (IsNaN(num) && ((raw & kFP64QuietNaNMask) == 0)) { 388 return true; 389 } 390 return false; 391 } 392 393 394 inline bool IsSignallingNaN(float num) { 395 const uint32_t kFP32QuietNaNMask = 0x00400000; 396 uint32_t raw = FloatToRawbits(num); 397 if (IsNaN(num) && ((raw & kFP32QuietNaNMask) == 0)) { 398 return true; 399 } 400 return false; 401 } 402 403 404 inline bool IsSignallingNaN(Float16 num) { 405 const uint16_t kFP16QuietNaNMask = 0x0200; 406 return IsNaN(num) && ((Float16ToRawbits(num) & kFP16QuietNaNMask) == 0); 407 } 408 409 410 template <typename T> 411 inline bool IsQuietNaN(T num) { 412 return IsNaN(num) && !IsSignallingNaN(num); 413 } 414 415 416 // Convert the NaN in 'num' to a quiet NaN. 417 inline double ToQuietNaN(double num) { 418 const uint64_t kFP64QuietNaNMask = UINT64_C(0x0008000000000000); 419 VIXL_ASSERT(IsNaN(num)); 420 return RawbitsToDouble(DoubleToRawbits(num) | kFP64QuietNaNMask); 421 } 422 423 424 inline float ToQuietNaN(float num) { 425 const uint32_t kFP32QuietNaNMask = 0x00400000; 426 VIXL_ASSERT(IsNaN(num)); 427 return RawbitsToFloat(FloatToRawbits(num) | kFP32QuietNaNMask); 428 } 429 430 431 inline internal::SimFloat16 ToQuietNaN(internal::SimFloat16 num) { 432 const uint16_t kFP16QuietNaNMask = 0x0200; 433 VIXL_ASSERT(IsNaN(num)); 434 return internal::SimFloat16( 435 RawbitsToFloat16(Float16ToRawbits(num) | kFP16QuietNaNMask)); 436 } 437 438 439 // Fused multiply-add. 440 inline double FusedMultiplyAdd(double op1, double op2, double a) { 441 return fma(op1, op2, a); 442 } 443 444 445 inline float FusedMultiplyAdd(float op1, float op2, float a) { 446 return fmaf(op1, op2, a); 447 } 448 449 450 inline uint64_t LowestSetBit(uint64_t value) { return value & -value; } 451 452 453 template <typename T> 454 inline int HighestSetBitPosition(T value) { 455 VIXL_ASSERT(value != 0); 456 return (sizeof(value) * 8 - 1) - CountLeadingZeros(value); 457 } 458 459 460 template <typename V> 461 inline int WhichPowerOf2(V value) { 462 VIXL_ASSERT(IsPowerOf2(value)); 463 return CountTrailingZeros(value); 464 } 465 466 467 unsigned CountClearHalfWords(uint64_t imm, unsigned reg_size); 468 469 470 int BitCount(uint64_t value); 471 472 473 template <typename T> 474 T ReverseBits(T value) { 475 VIXL_ASSERT((sizeof(value) == 1) || (sizeof(value) == 2) || 476 (sizeof(value) == 4) || (sizeof(value) == 8)); 477 T result = 0; 478 for (unsigned i = 0; i < (sizeof(value) * 8); i++) { 479 result = (result << 1) | (value & 1); 480 value >>= 1; 481 } 482 return result; 483 } 484 485 486 template <typename T> 487 inline T SignExtend(T val, int bitSize) { 488 VIXL_ASSERT(bitSize > 0); 489 T mask = (T(2) << (bitSize - 1)) - T(1); 490 val &= mask; 491 T sign_bits = -((val >> (bitSize - 1)) << bitSize); 492 val |= sign_bits; 493 return val; 494 } 495 496 497 template <typename T> 498 T ReverseBytes(T value, int block_bytes_log2) { 499 VIXL_ASSERT((sizeof(value) == 4) || (sizeof(value) == 8)); 500 VIXL_ASSERT((1U << block_bytes_log2) <= sizeof(value)); 501 // Split the 64-bit value into an 8-bit array, where b[0] is the least 502 // significant byte, and b[7] is the most significant. 503 uint8_t bytes[8]; 504 uint64_t mask = UINT64_C(0xff00000000000000); 505 for (int i = 7; i >= 0; i--) { 506 bytes[i] = (static_cast<uint64_t>(value) & mask) >> (i * 8); 507 mask >>= 8; 508 } 509 510 // Permutation tables for REV instructions. 511 // permute_table[0] is used by REV16_x, REV16_w 512 // permute_table[1] is used by REV32_x, REV_w 513 // permute_table[2] is used by REV_x 514 VIXL_ASSERT((0 < block_bytes_log2) && (block_bytes_log2 < 4)); 515 static const uint8_t permute_table[3][8] = {{6, 7, 4, 5, 2, 3, 0, 1}, 516 {4, 5, 6, 7, 0, 1, 2, 3}, 517 {0, 1, 2, 3, 4, 5, 6, 7}}; 518 uint64_t temp = 0; 519 for (int i = 0; i < 8; i++) { 520 temp <<= 8; 521 temp |= bytes[permute_table[block_bytes_log2 - 1][i]]; 522 } 523 524 T result; 525 VIXL_STATIC_ASSERT(sizeof(result) <= sizeof(temp)); 526 memcpy(&result, &temp, sizeof(result)); 527 return result; 528 } 529 530 template <unsigned MULTIPLE, typename T> 531 inline bool IsMultiple(T value) { 532 VIXL_ASSERT(IsPowerOf2(MULTIPLE)); 533 return (value & (MULTIPLE - 1)) == 0; 534 } 535 536 template <typename T> 537 inline bool IsMultiple(T value, unsigned multiple) { 538 VIXL_ASSERT(IsPowerOf2(multiple)); 539 return (value & (multiple - 1)) == 0; 540 } 541 542 template <typename T> 543 inline bool IsAligned(T pointer, int alignment) { 544 VIXL_ASSERT(IsPowerOf2(alignment)); 545 return (pointer & (alignment - 1)) == 0; 546 } 547 548 // Pointer alignment 549 // TODO: rename/refactor to make it specific to instructions. 550 template <unsigned ALIGN, typename T> 551 inline bool IsAligned(T pointer) { 552 VIXL_ASSERT(sizeof(pointer) == sizeof(intptr_t)); // NOLINT(runtime/sizeof) 553 // Use C-style casts to get static_cast behaviour for integral types (T), and 554 // reinterpret_cast behaviour for other types. 555 return IsAligned((intptr_t)(pointer), ALIGN); 556 } 557 558 template <typename T> 559 bool IsWordAligned(T pointer) { 560 return IsAligned<4>(pointer); 561 } 562 563 // Increment a pointer until it has the specified alignment. The alignment must 564 // be a power of two. 565 template <class T> 566 T AlignUp(T pointer, 567 typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { 568 VIXL_ASSERT(IsPowerOf2(alignment)); 569 // Use C-style casts to get static_cast behaviour for integral types (T), and 570 // reinterpret_cast behaviour for other types. 571 572 typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = 573 (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; 574 VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); 575 576 size_t mask = alignment - 1; 577 T result = (T)((pointer_raw + mask) & ~mask); 578 VIXL_ASSERT(result >= pointer); 579 580 return result; 581 } 582 583 // Decrement a pointer until it has the specified alignment. The alignment must 584 // be a power of two. 585 template <class T> 586 T AlignDown(T pointer, 587 typename Unsigned<sizeof(T) * kBitsPerByte>::type alignment) { 588 VIXL_ASSERT(IsPowerOf2(alignment)); 589 // Use C-style casts to get static_cast behaviour for integral types (T), and 590 // reinterpret_cast behaviour for other types. 591 592 typename Unsigned<sizeof(T)* kBitsPerByte>::type pointer_raw = 593 (typename Unsigned<sizeof(T) * kBitsPerByte>::type)pointer; 594 VIXL_STATIC_ASSERT(sizeof(pointer) <= sizeof(pointer_raw)); 595 596 size_t mask = alignment - 1; 597 return (T)(pointer_raw & ~mask); 598 } 599 600 601 template <typename T> 602 inline T ExtractBit(T value, unsigned bit) { 603 return (value >> bit) & T(1); 604 } 605 606 template <typename Ts, typename Td> 607 inline Td ExtractBits(Ts value, int least_significant_bit, Td mask) { 608 return Td((value >> least_significant_bit) & Ts(mask)); 609 } 610 611 template <typename Ts, typename Td> 612 inline void AssignBit(Td& dst, // NOLINT(runtime/references) 613 int bit, 614 Ts value) { 615 VIXL_ASSERT((value == Ts(0)) || (value == Ts(1))); 616 VIXL_ASSERT(bit >= 0); 617 VIXL_ASSERT(bit < static_cast<int>(sizeof(Td) * 8)); 618 Td mask(1); 619 dst &= ~(mask << bit); 620 dst |= Td(value) << bit; 621 } 622 623 template <typename Td, typename Ts> 624 inline void AssignBits(Td& dst, // NOLINT(runtime/references) 625 int least_significant_bit, 626 Ts mask, 627 Ts value) { 628 VIXL_ASSERT(least_significant_bit >= 0); 629 VIXL_ASSERT(least_significant_bit < static_cast<int>(sizeof(Td) * 8)); 630 VIXL_ASSERT(((Td(mask) << least_significant_bit) >> least_significant_bit) == 631 Td(mask)); 632 VIXL_ASSERT((value & mask) == value); 633 dst &= ~(Td(mask) << least_significant_bit); 634 dst |= Td(value) << least_significant_bit; 635 } 636 637 class VFP { 638 public: 639 static uint32_t FP32ToImm8(float imm) { 640 // bits: aBbb.bbbc.defg.h000.0000.0000.0000.0000 641 uint32_t bits = FloatToRawbits(imm); 642 // bit7: a000.0000 643 uint32_t bit7 = ((bits >> 31) & 0x1) << 7; 644 // bit6: 0b00.0000 645 uint32_t bit6 = ((bits >> 29) & 0x1) << 6; 646 // bit5_to_0: 00cd.efgh 647 uint32_t bit5_to_0 = (bits >> 19) & 0x3f; 648 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); 649 } 650 static uint32_t FP64ToImm8(double imm) { 651 // bits: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 652 // 0000.0000.0000.0000.0000.0000.0000.0000 653 uint64_t bits = DoubleToRawbits(imm); 654 // bit7: a000.0000 655 uint64_t bit7 = ((bits >> 63) & 0x1) << 7; 656 // bit6: 0b00.0000 657 uint64_t bit6 = ((bits >> 61) & 0x1) << 6; 658 // bit5_to_0: 00cd.efgh 659 uint64_t bit5_to_0 = (bits >> 48) & 0x3f; 660 661 return static_cast<uint32_t>(bit7 | bit6 | bit5_to_0); 662 } 663 static float Imm8ToFP32(uint32_t imm8) { 664 // Imm8: abcdefgh (8 bits) 665 // Single: aBbb.bbbc.defg.h000.0000.0000.0000.0000 (32 bits) 666 // where B is b ^ 1 667 uint32_t bits = imm8; 668 uint32_t bit7 = (bits >> 7) & 0x1; 669 uint32_t bit6 = (bits >> 6) & 0x1; 670 uint32_t bit5_to_0 = bits & 0x3f; 671 uint32_t result = (bit7 << 31) | ((32 - bit6) << 25) | (bit5_to_0 << 19); 672 673 return RawbitsToFloat(result); 674 } 675 static double Imm8ToFP64(uint32_t imm8) { 676 // Imm8: abcdefgh (8 bits) 677 // Double: aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 678 // 0000.0000.0000.0000.0000.0000.0000.0000 (64 bits) 679 // where B is b ^ 1 680 uint32_t bits = imm8; 681 uint64_t bit7 = (bits >> 7) & 0x1; 682 uint64_t bit6 = (bits >> 6) & 0x1; 683 uint64_t bit5_to_0 = bits & 0x3f; 684 uint64_t result = (bit7 << 63) | ((256 - bit6) << 54) | (bit5_to_0 << 48); 685 return RawbitsToDouble(result); 686 } 687 static bool IsImmFP32(float imm) { 688 // Valid values will have the form: 689 // aBbb.bbbc.defg.h000.0000.0000.0000.0000 690 uint32_t bits = FloatToRawbits(imm); 691 // bits[19..0] are cleared. 692 if ((bits & 0x7ffff) != 0) { 693 return false; 694 } 695 696 697 // bits[29..25] are all set or all cleared. 698 uint32_t b_pattern = (bits >> 16) & 0x3e00; 699 if (b_pattern != 0 && b_pattern != 0x3e00) { 700 return false; 701 } 702 // bit[30] and bit[29] are opposite. 703 if (((bits ^ (bits << 1)) & 0x40000000) == 0) { 704 return false; 705 } 706 return true; 707 } 708 static bool IsImmFP64(double imm) { 709 // Valid values will have the form: 710 // aBbb.bbbb.bbcd.efgh.0000.0000.0000.0000 711 // 0000.0000.0000.0000.0000.0000.0000.0000 712 uint64_t bits = DoubleToRawbits(imm); 713 // bits[47..0] are cleared. 714 if ((bits & 0x0000ffffffffffff) != 0) { 715 return false; 716 } 717 // bits[61..54] are all set or all cleared. 718 uint32_t b_pattern = (bits >> 48) & 0x3fc0; 719 if ((b_pattern != 0) && (b_pattern != 0x3fc0)) { 720 return false; 721 } 722 // bit[62] and bit[61] are opposite. 723 if (((bits ^ (bits << 1)) & (UINT64_C(1) << 62)) == 0) { 724 return false; 725 } 726 return true; 727 } 728 }; 729 730 class BitField { 731 // ForEachBitHelper is a functor that will call 732 // bool ForEachBitHelper::execute(ElementType id) const 733 // and expects a boolean in return whether to continue (if true) 734 // or stop (if false) 735 // check_set will check if the bits are on (true) or off(false) 736 template <typename ForEachBitHelper, bool check_set> 737 bool ForEachBit(const ForEachBitHelper& helper) { 738 for (int i = 0; static_cast<size_t>(i) < bitfield_.size(); i++) { 739 if (bitfield_[i] == check_set) 740 if (!helper.execute(i)) return false; 741 } 742 return true; 743 } 744 745 public: 746 explicit BitField(unsigned size) : bitfield_(size, 0) {} 747 748 void Set(int i) { 749 VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); 750 bitfield_[i] = true; 751 } 752 753 void Unset(int i) { 754 VIXL_ASSERT((i >= 0) && (static_cast<size_t>(i) < bitfield_.size())); 755 bitfield_[i] = true; 756 } 757 758 bool IsSet(int i) const { return bitfield_[i]; } 759 760 // For each bit not set in the bitfield call the execute functor 761 // execute. 762 // ForEachBitSetHelper::execute returns true if the iteration through 763 // the bits can continue, otherwise it will stop. 764 // struct ForEachBitSetHelper { 765 // bool execute(int /*id*/) { return false; } 766 // }; 767 template <typename ForEachBitNotSetHelper> 768 bool ForEachBitNotSet(const ForEachBitNotSetHelper& helper) { 769 return ForEachBit<ForEachBitNotSetHelper, false>(helper); 770 } 771 772 // For each bit set in the bitfield call the execute functor 773 // execute. 774 template <typename ForEachBitSetHelper> 775 bool ForEachBitSet(const ForEachBitSetHelper& helper) { 776 return ForEachBit<ForEachBitSetHelper, true>(helper); 777 } 778 779 private: 780 std::vector<bool> bitfield_; 781 }; 782 783 namespace internal { 784 785 typedef int64_t Int64; 786 class Uint64; 787 class Uint128; 788 789 class Uint32 { 790 uint32_t data_; 791 792 public: 793 // Unlike uint32_t, Uint32 has a default constructor. 794 Uint32() { data_ = 0; } 795 explicit Uint32(uint32_t data) : data_(data) {} 796 inline explicit Uint32(Uint64 data); 797 uint32_t Get() const { return data_; } 798 template <int N> 799 int32_t GetSigned() const { 800 return ExtractSignedBitfield32(N - 1, 0, data_); 801 } 802 int32_t GetSigned() const { return data_; } 803 Uint32 operator~() const { return Uint32(~data_); } 804 Uint32 operator-() const { return Uint32(-data_); } 805 bool operator==(Uint32 value) const { return data_ == value.data_; } 806 bool operator!=(Uint32 value) const { return data_ != value.data_; } 807 bool operator>(Uint32 value) const { return data_ > value.data_; } 808 Uint32 operator+(Uint32 value) const { return Uint32(data_ + value.data_); } 809 Uint32 operator-(Uint32 value) const { return Uint32(data_ - value.data_); } 810 Uint32 operator&(Uint32 value) const { return Uint32(data_ & value.data_); } 811 Uint32 operator&=(Uint32 value) { 812 data_ &= value.data_; 813 return *this; 814 } 815 Uint32 operator^(Uint32 value) const { return Uint32(data_ ^ value.data_); } 816 Uint32 operator^=(Uint32 value) { 817 data_ ^= value.data_; 818 return *this; 819 } 820 Uint32 operator|(Uint32 value) const { return Uint32(data_ | value.data_); } 821 Uint32 operator|=(Uint32 value) { 822 data_ |= value.data_; 823 return *this; 824 } 825 // Unlike uint32_t, the shift functions can accept negative shift and 826 // return 0 when the shift is too big. 827 Uint32 operator>>(int shift) const { 828 if (shift == 0) return *this; 829 if (shift < 0) { 830 int tmp = -shift; 831 if (tmp >= 32) return Uint32(0); 832 return Uint32(data_ << tmp); 833 } 834 int tmp = shift; 835 if (tmp >= 32) return Uint32(0); 836 return Uint32(data_ >> tmp); 837 } 838 Uint32 operator<<(int shift) const { 839 if (shift == 0) return *this; 840 if (shift < 0) { 841 int tmp = -shift; 842 if (tmp >= 32) return Uint32(0); 843 return Uint32(data_ >> tmp); 844 } 845 int tmp = shift; 846 if (tmp >= 32) return Uint32(0); 847 return Uint32(data_ << tmp); 848 } 849 }; 850 851 class Uint64 { 852 uint64_t data_; 853 854 public: 855 // Unlike uint64_t, Uint64 has a default constructor. 856 Uint64() { data_ = 0; } 857 explicit Uint64(uint64_t data) : data_(data) {} 858 explicit Uint64(Uint32 data) : data_(data.Get()) {} 859 inline explicit Uint64(Uint128 data); 860 uint64_t Get() const { return data_; } 861 int64_t GetSigned(int N) const { 862 return ExtractSignedBitfield64(N - 1, 0, data_); 863 } 864 int64_t GetSigned() const { return data_; } 865 Uint32 ToUint32() const { 866 VIXL_ASSERT((data_ >> 32) == 0); 867 return Uint32(static_cast<uint32_t>(data_)); 868 } 869 Uint32 GetHigh32() const { return Uint32(data_ >> 32); } 870 Uint32 GetLow32() const { return Uint32(data_ & 0xffffffff); } 871 Uint64 operator~() const { return Uint64(~data_); } 872 Uint64 operator-() const { return Uint64(-data_); } 873 bool operator==(Uint64 value) const { return data_ == value.data_; } 874 bool operator!=(Uint64 value) const { return data_ != value.data_; } 875 Uint64 operator+(Uint64 value) const { return Uint64(data_ + value.data_); } 876 Uint64 operator-(Uint64 value) const { return Uint64(data_ - value.data_); } 877 Uint64 operator&(Uint64 value) const { return Uint64(data_ & value.data_); } 878 Uint64 operator&=(Uint64 value) { 879 data_ &= value.data_; 880 return *this; 881 } 882 Uint64 operator^(Uint64 value) const { return Uint64(data_ ^ value.data_); } 883 Uint64 operator^=(Uint64 value) { 884 data_ ^= value.data_; 885 return *this; 886 } 887 Uint64 operator|(Uint64 value) const { return Uint64(data_ | value.data_); } 888 Uint64 operator|=(Uint64 value) { 889 data_ |= value.data_; 890 return *this; 891 } 892 // Unlike uint64_t, the shift functions can accept negative shift and 893 // return 0 when the shift is too big. 894 Uint64 operator>>(int shift) const { 895 if (shift == 0) return *this; 896 if (shift < 0) { 897 int tmp = -shift; 898 if (tmp >= 64) return Uint64(0); 899 return Uint64(data_ << tmp); 900 } 901 int tmp = shift; 902 if (tmp >= 64) return Uint64(0); 903 return Uint64(data_ >> tmp); 904 } 905 Uint64 operator<<(int shift) const { 906 if (shift == 0) return *this; 907 if (shift < 0) { 908 int tmp = -shift; 909 if (tmp >= 64) return Uint64(0); 910 return Uint64(data_ >> tmp); 911 } 912 int tmp = shift; 913 if (tmp >= 64) return Uint64(0); 914 return Uint64(data_ << tmp); 915 } 916 }; 917 918 class Uint128 { 919 uint64_t data_high_; 920 uint64_t data_low_; 921 922 public: 923 Uint128() : data_high_(0), data_low_(0) {} 924 explicit Uint128(uint64_t data_low) : data_high_(0), data_low_(data_low) {} 925 explicit Uint128(Uint64 data_low) 926 : data_high_(0), data_low_(data_low.Get()) {} 927 Uint128(uint64_t data_high, uint64_t data_low) 928 : data_high_(data_high), data_low_(data_low) {} 929 Uint64 ToUint64() const { 930 VIXL_ASSERT(data_high_ == 0); 931 return Uint64(data_low_); 932 } 933 Uint64 GetHigh64() const { return Uint64(data_high_); } 934 Uint64 GetLow64() const { return Uint64(data_low_); } 935 Uint128 operator~() const { return Uint128(~data_high_, ~data_low_); } 936 bool operator==(Uint128 value) const { 937 return (data_high_ == value.data_high_) && (data_low_ == value.data_low_); 938 } 939 Uint128 operator&(Uint128 value) const { 940 return Uint128(data_high_ & value.data_high_, data_low_ & value.data_low_); 941 } 942 Uint128 operator&=(Uint128 value) { 943 data_high_ &= value.data_high_; 944 data_low_ &= value.data_low_; 945 return *this; 946 } 947 Uint128 operator|=(Uint128 value) { 948 data_high_ |= value.data_high_; 949 data_low_ |= value.data_low_; 950 return *this; 951 } 952 Uint128 operator>>(int shift) const { 953 VIXL_ASSERT((shift >= 0) && (shift < 128)); 954 if (shift == 0) return *this; 955 if (shift >= 64) { 956 return Uint128(0, data_high_ >> (shift - 64)); 957 } 958 uint64_t tmp = (data_high_ << (64 - shift)) | (data_low_ >> shift); 959 return Uint128(data_high_ >> shift, tmp); 960 } 961 Uint128 operator<<(int shift) const { 962 VIXL_ASSERT((shift >= 0) && (shift < 128)); 963 if (shift == 0) return *this; 964 if (shift >= 64) { 965 return Uint128(data_low_ << (shift - 64), 0); 966 } 967 uint64_t tmp = (data_high_ << shift) | (data_low_ >> (64 - shift)); 968 return Uint128(tmp, data_low_ << shift); 969 } 970 }; 971 972 Uint32::Uint32(Uint64 data) : data_(data.ToUint32().Get()) {} 973 Uint64::Uint64(Uint128 data) : data_(data.ToUint64().Get()) {} 974 975 Int64 BitCount(Uint32 value); 976 977 } // namespace internal 978 979 // The default NaN values (for FPCR.DN=1). 980 extern const double kFP64DefaultNaN; 981 extern const float kFP32DefaultNaN; 982 extern const Float16 kFP16DefaultNaN; 983 984 // Floating-point infinity values. 985 extern const Float16 kFP16PositiveInfinity; 986 extern const Float16 kFP16NegativeInfinity; 987 extern const float kFP32PositiveInfinity; 988 extern const float kFP32NegativeInfinity; 989 extern const double kFP64PositiveInfinity; 990 extern const double kFP64NegativeInfinity; 991 992 // Floating-point zero values. 993 extern const Float16 kFP16PositiveZero; 994 extern const Float16 kFP16NegativeZero; 995 996 // AArch64 floating-point specifics. These match IEEE-754. 997 const unsigned kDoubleMantissaBits = 52; 998 const unsigned kDoubleExponentBits = 11; 999 const unsigned kFloatMantissaBits = 23; 1000 const unsigned kFloatExponentBits = 8; 1001 const unsigned kFloat16MantissaBits = 10; 1002 const unsigned kFloat16ExponentBits = 5; 1003 1004 enum FPRounding { 1005 // The first four values are encodable directly by FPCR<RMode>. 1006 FPTieEven = 0x0, 1007 FPPositiveInfinity = 0x1, 1008 FPNegativeInfinity = 0x2, 1009 FPZero = 0x3, 1010 1011 // The final rounding modes are only available when explicitly specified by 1012 // the instruction (such as with fcvta). It cannot be set in FPCR. 1013 FPTieAway, 1014 FPRoundOdd 1015 }; 1016 1017 enum UseDefaultNaN { kUseDefaultNaN, kIgnoreDefaultNaN }; 1018 1019 // Assemble the specified IEEE-754 components into the target type and apply 1020 // appropriate rounding. 1021 // sign: 0 = positive, 1 = negative 1022 // exponent: Unbiased IEEE-754 exponent. 1023 // mantissa: The mantissa of the input. The top bit (which is not encoded for 1024 // normal IEEE-754 values) must not be omitted. This bit has the 1025 // value 'pow(2, exponent)'. 1026 // 1027 // The input value is assumed to be a normalized value. That is, the input may 1028 // not be infinity or NaN. If the source value is subnormal, it must be 1029 // normalized before calling this function such that the highest set bit in the 1030 // mantissa has the value 'pow(2, exponent)'. 1031 // 1032 // Callers should use FPRoundToFloat or FPRoundToDouble directly, rather than 1033 // calling a templated FPRound. 1034 template <class T, int ebits, int mbits> 1035 T FPRound(int64_t sign, 1036 int64_t exponent, 1037 uint64_t mantissa, 1038 FPRounding round_mode) { 1039 VIXL_ASSERT((sign == 0) || (sign == 1)); 1040 1041 // Only FPTieEven and FPRoundOdd rounding modes are implemented. 1042 VIXL_ASSERT((round_mode == FPTieEven) || (round_mode == FPRoundOdd)); 1043 1044 // Rounding can promote subnormals to normals, and normals to infinities. For 1045 // example, a double with exponent 127 (FLT_MAX_EXP) would appear to be 1046 // encodable as a float, but rounding based on the low-order mantissa bits 1047 // could make it overflow. With ties-to-even rounding, this value would become 1048 // an infinity. 1049 1050 // ---- Rounding Method ---- 1051 // 1052 // The exponent is irrelevant in the rounding operation, so we treat the 1053 // lowest-order bit that will fit into the result ('onebit') as having 1054 // the value '1'. Similarly, the highest-order bit that won't fit into 1055 // the result ('halfbit') has the value '0.5'. The 'point' sits between 1056 // 'onebit' and 'halfbit': 1057 // 1058 // These bits fit into the result. 1059 // |---------------------| 1060 // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx 1061 // || 1062 // / | 1063 // / halfbit 1064 // onebit 1065 // 1066 // For subnormal outputs, the range of representable bits is smaller and 1067 // the position of onebit and halfbit depends on the exponent of the 1068 // input, but the method is otherwise similar. 1069 // 1070 // onebit(frac) 1071 // | 1072 // | halfbit(frac) halfbit(adjusted) 1073 // | / / 1074 // | | | 1075 // 0b00.0 (exact) -> 0b00.0 (exact) -> 0b00 1076 // 0b00.0... -> 0b00.0... -> 0b00 1077 // 0b00.1 (exact) -> 0b00.0111..111 -> 0b00 1078 // 0b00.1... -> 0b00.1... -> 0b01 1079 // 0b01.0 (exact) -> 0b01.0 (exact) -> 0b01 1080 // 0b01.0... -> 0b01.0... -> 0b01 1081 // 0b01.1 (exact) -> 0b01.1 (exact) -> 0b10 1082 // 0b01.1... -> 0b01.1... -> 0b10 1083 // 0b10.0 (exact) -> 0b10.0 (exact) -> 0b10 1084 // 0b10.0... -> 0b10.0... -> 0b10 1085 // 0b10.1 (exact) -> 0b10.0111..111 -> 0b10 1086 // 0b10.1... -> 0b10.1... -> 0b11 1087 // 0b11.0 (exact) -> 0b11.0 (exact) -> 0b11 1088 // ... / | / | 1089 // / | / | 1090 // / | 1091 // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / | 1092 // 1093 // mantissa = (mantissa >> shift) + halfbit(adjusted); 1094 1095 static const int mantissa_offset = 0; 1096 static const int exponent_offset = mantissa_offset + mbits; 1097 static const int sign_offset = exponent_offset + ebits; 1098 VIXL_ASSERT(sign_offset == (sizeof(T) * 8 - 1)); 1099 1100 // Bail out early for zero inputs. 1101 if (mantissa == 0) { 1102 return static_cast<T>(sign << sign_offset); 1103 } 1104 1105 // If all bits in the exponent are set, the value is infinite or NaN. 1106 // This is true for all binary IEEE-754 formats. 1107 static const int infinite_exponent = (1 << ebits) - 1; 1108 static const int max_normal_exponent = infinite_exponent - 1; 1109 1110 // Apply the exponent bias to encode it for the result. Doing this early makes 1111 // it easy to detect values that will be infinite or subnormal. 1112 exponent += max_normal_exponent >> 1; 1113 1114 if (exponent > max_normal_exponent) { 1115 // Overflow: the input is too large for the result type to represent. 1116 if (round_mode == FPTieEven) { 1117 // FPTieEven rounding mode handles overflows using infinities. 1118 exponent = infinite_exponent; 1119 mantissa = 0; 1120 } else { 1121 VIXL_ASSERT(round_mode == FPRoundOdd); 1122 // FPRoundOdd rounding mode handles overflows using the largest magnitude 1123 // normal number. 1124 exponent = max_normal_exponent; 1125 mantissa = (UINT64_C(1) << exponent_offset) - 1; 1126 } 1127 return static_cast<T>((sign << sign_offset) | 1128 (exponent << exponent_offset) | 1129 (mantissa << mantissa_offset)); 1130 } 1131 1132 // Calculate the shift required to move the top mantissa bit to the proper 1133 // place in the destination type. 1134 const int highest_significant_bit = 63 - CountLeadingZeros(mantissa); 1135 int shift = highest_significant_bit - mbits; 1136 1137 if (exponent <= 0) { 1138 // The output will be subnormal (before rounding). 1139 // For subnormal outputs, the shift must be adjusted by the exponent. The +1 1140 // is necessary because the exponent of a subnormal value (encoded as 0) is 1141 // the same as the exponent of the smallest normal value (encoded as 1). 1142 shift += -exponent + 1; 1143 1144 // Handle inputs that would produce a zero output. 1145 // 1146 // Shifts higher than highest_significant_bit+1 will always produce a zero 1147 // result. A shift of exactly highest_significant_bit+1 might produce a 1148 // non-zero result after rounding. 1149 if (shift > (highest_significant_bit + 1)) { 1150 if (round_mode == FPTieEven) { 1151 // The result will always be +/-0.0. 1152 return static_cast<T>(sign << sign_offset); 1153 } else { 1154 VIXL_ASSERT(round_mode == FPRoundOdd); 1155 VIXL_ASSERT(mantissa != 0); 1156 // For FPRoundOdd, if the mantissa is too small to represent and 1157 // non-zero return the next "odd" value. 1158 return static_cast<T>((sign << sign_offset) | 1); 1159 } 1160 } 1161 1162 // Properly encode the exponent for a subnormal output. 1163 exponent = 0; 1164 } else { 1165 // Clear the topmost mantissa bit, since this is not encoded in IEEE-754 1166 // normal values. 1167 mantissa &= ~(UINT64_C(1) << highest_significant_bit); 1168 } 1169 1170 // The casts below are only well-defined for unsigned integers. 1171 VIXL_STATIC_ASSERT(std::numeric_limits<T>::is_integer); 1172 VIXL_STATIC_ASSERT(!std::numeric_limits<T>::is_signed); 1173 1174 if (shift > 0) { 1175 if (round_mode == FPTieEven) { 1176 // We have to shift the mantissa to the right. Some precision is lost, so 1177 // we need to apply rounding. 1178 uint64_t onebit_mantissa = (mantissa >> (shift)) & 1; 1179 uint64_t halfbit_mantissa = (mantissa >> (shift - 1)) & 1; 1180 uint64_t adjustment = (halfbit_mantissa & ~onebit_mantissa); 1181 uint64_t adjusted = mantissa - adjustment; 1182 T halfbit_adjusted = (adjusted >> (shift - 1)) & 1; 1183 1184 T result = 1185 static_cast<T>((sign << sign_offset) | (exponent << exponent_offset) | 1186 ((mantissa >> shift) << mantissa_offset)); 1187 1188 // A very large mantissa can overflow during rounding. If this happens, 1189 // the exponent should be incremented and the mantissa set to 1.0 1190 // (encoded as 0). Applying halfbit_adjusted after assembling the float 1191 // has the nice side-effect that this case is handled for free. 1192 // 1193 // This also handles cases where a very large finite value overflows to 1194 // infinity, or where a very large subnormal value overflows to become 1195 // normal. 1196 return result + halfbit_adjusted; 1197 } else { 1198 VIXL_ASSERT(round_mode == FPRoundOdd); 1199 // If any bits at position halfbit or below are set, onebit (ie. the 1200 // bottom bit of the resulting mantissa) must be set. 1201 uint64_t fractional_bits = mantissa & ((UINT64_C(1) << shift) - 1); 1202 if (fractional_bits != 0) { 1203 mantissa |= UINT64_C(1) << shift; 1204 } 1205 1206 return static_cast<T>((sign << sign_offset) | 1207 (exponent << exponent_offset) | 1208 ((mantissa >> shift) << mantissa_offset)); 1209 } 1210 } else { 1211 // We have to shift the mantissa to the left (or not at all). The input 1212 // mantissa is exactly representable in the output mantissa, so apply no 1213 // rounding correction. 1214 return static_cast<T>((sign << sign_offset) | 1215 (exponent << exponent_offset) | 1216 ((mantissa << -shift) << mantissa_offset)); 1217 } 1218 } 1219 1220 1221 // See FPRound for a description of this function. 1222 inline double FPRoundToDouble(int64_t sign, 1223 int64_t exponent, 1224 uint64_t mantissa, 1225 FPRounding round_mode) { 1226 uint64_t bits = 1227 FPRound<uint64_t, kDoubleExponentBits, kDoubleMantissaBits>(sign, 1228 exponent, 1229 mantissa, 1230 round_mode); 1231 return RawbitsToDouble(bits); 1232 } 1233 1234 1235 // See FPRound for a description of this function. 1236 inline Float16 FPRoundToFloat16(int64_t sign, 1237 int64_t exponent, 1238 uint64_t mantissa, 1239 FPRounding round_mode) { 1240 return RawbitsToFloat16( 1241 FPRound<uint16_t, 1242 kFloat16ExponentBits, 1243 kFloat16MantissaBits>(sign, exponent, mantissa, round_mode)); 1244 } 1245 1246 1247 // See FPRound for a description of this function. 1248 static inline float FPRoundToFloat(int64_t sign, 1249 int64_t exponent, 1250 uint64_t mantissa, 1251 FPRounding round_mode) { 1252 uint32_t bits = 1253 FPRound<uint32_t, kFloatExponentBits, kFloatMantissaBits>(sign, 1254 exponent, 1255 mantissa, 1256 round_mode); 1257 return RawbitsToFloat(bits); 1258 } 1259 1260 1261 float FPToFloat(Float16 value, UseDefaultNaN DN, bool* exception = NULL); 1262 float FPToFloat(double value, 1263 FPRounding round_mode, 1264 UseDefaultNaN DN, 1265 bool* exception = NULL); 1266 1267 double FPToDouble(Float16 value, UseDefaultNaN DN, bool* exception = NULL); 1268 double FPToDouble(float value, UseDefaultNaN DN, bool* exception = NULL); 1269 1270 Float16 FPToFloat16(float value, 1271 FPRounding round_mode, 1272 UseDefaultNaN DN, 1273 bool* exception = NULL); 1274 1275 Float16 FPToFloat16(double value, 1276 FPRounding round_mode, 1277 UseDefaultNaN DN, 1278 bool* exception = NULL); 1279 } // namespace vixl 1280 1281 #endif // VIXL_UTILS_H 1282