Home | History | Annotate | Download | only in arm64

Lines Matching full:mantissa

2529 //  mantissa: The mantissa of the input. The top bit (which is not encoded for
2536 // mantissa has the value 'pow(2, exponent)'.
2541 static T FPRound(int64_t sign, int64_t exponent, uint64_t mantissa,
2551 // encodable as a float, but rounding based on the low-order mantissa bits
2565 // mantissa = 0bxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx
2596 // adjusted = frac - (halfbit(mantissa) & ~onebit(frac)); / |
2598 // mantissa = (mantissa >> shift) + halfbit(adjusted);
2606 if (mantissa == 0) {
2623 mantissa = 0;
2626 (mantissa << mantissa_offset));
2629 // Calculate the shift required to move the top mantissa bit to the proper
2631 const int highest_significant_bit = 63 - CountLeadingZeros(mantissa, 64);
2655 // Clear the topmost mantissa bit, since this is not encoded in IEEE-754
2657 mantissa &= ~(1UL << highest_significant_bit);
2661 // We have to shift the mantissa to the right. Some precision is lost, so we
2663 uint64_t onebit_mantissa = (mantissa >> (shift)) & 1;
2664 uint64_t halfbit_mantissa = (mantissa >> (shift-1)) & 1;
2665 uint64_t adjusted = mantissa - (halfbit_mantissa & ~onebit_mantissa);
2670 ((mantissa >> shift) << mantissa_offset));
2672 // A very large mantissa can overflow during rounding. If this happens, the
2673 // exponent should be incremented and the mantissa set to 1.0 (encoded as
2682 // We have to shift the mantissa to the left (or not at all). The input
2683 // mantissa is exactly representable in the output mantissa, so apply no
2687 ((mantissa << -shift) << mantissa_offset));
2694 uint64_t mantissa, FPRounding round_mode) {
2698 mantissa,
2706 uint64_t mantissa, FPRounding round_mode) {
2710 mantissa,
2835 // - The payload (mantissa) is transferred entirely, except that the top
2876 // - The payload (mantissa) is transferred as much as possible, except
2906 // Extract the mantissa and add the implicit '1' bit.
2907 uint64_t mantissa = unsigned_bitextract_64(51, 0, raw);
2909 mantissa |= (1UL << 52);
2911 return FPRoundToFloat(sign, exponent, mantissa, round_mode);