1 /* 2 * Copyright 2014 Google Inc. 3 * 4 * Use of this source code is governed by a BSD-style license that can be 5 * found in the LICENSE file. 6 */ 7 8 #ifndef SkHalf_DEFINED 9 #define SkHalf_DEFINED 10 11 #include "SkNx.h" 12 #include "SkTypes.h" 13 14 #if !defined(_MSC_VER) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 15 #include <x86intrin.h> 16 #endif 17 18 // 16-bit floating point value 19 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa 20 // only used for storage 21 typedef uint16_t SkHalf; 22 23 static constexpr uint16_t SK_HalfMin = 0x0400; // 2^-24 (minimum positive normal value) 24 static constexpr uint16_t SK_HalfMax = 0x7bff; // 65504 25 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10 26 static constexpr uint16_t SK_Half1 = 0x3C00; // 1 27 28 // convert between half and single precision floating point 29 float SkHalfToFloat(SkHalf h); 30 SkHalf SkFloatToHalf(float f); 31 32 // Convert between half and single precision floating point, 33 // assuming inputs and outputs are both finite, and may 34 // flush values which would be denormal half floats to zero. 35 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t); 36 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&); 37 38 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ // 39 40 // Like the serial versions in SkHalf.cpp, these are based on 41 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/ 42 43 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly. 44 45 static inline Sk4f SkHalfToFloat_finite_ftz(const Sk4h& hs) { 46 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) 47 float32x4_t fs; 48 asm ("fcvtl %[fs].4s, %[hs].4h \n" // vcvt_f32_f16(...) 49 : [fs] "=w" (fs) // =w: write-only NEON register 50 : [hs] "w" (hs.fVec)); // w: read-only NEON register 51 return fs; 52 #else 53 Sk4i bits = SkNx_cast<int>(hs), // Expand to 32 bit. 54 sign = bits & 0x00008000, // Save the sign bit for later... 55 positive = bits ^ sign, // ...but strip it off for now. 56 is_norm = 0x03ff < positive; // Exponent > 0? 57 58 // For normal half floats, extend the mantissa by 13 zero bits, 59 // then adjust the exponent from 15 bias to 127 bias. 60 Sk4i norm = (positive << 13) + ((127 - 15) << 23); 61 62 Sk4i merged = (sign << 16) | (norm & is_norm); 63 return Sk4f::Load(&merged); 64 #endif 65 } 66 67 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t hs) { 68 return SkHalfToFloat_finite_ftz(Sk4h::Load(&hs)); 69 } 70 71 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) { 72 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64) 73 float32x4_t vec = fs.fVec; 74 asm ("fcvtn %[vec].4h, %[vec].4s \n" // vcvt_f16_f32(vec) 75 : [vec] "+w" (vec)); // +w: read-write NEON register 76 return vreinterpret_u16_f32(vget_low_f32(vec)); 77 #else 78 Sk4i bits = Sk4i::Load(&fs), 79 sign = bits & 0x80000000, // Save the sign bit for later... 80 positive = bits ^ sign, // ...but strip it off for now. 81 will_be_norm = 0x387fdfff < positive; // greater than largest denorm half? 82 83 // For normal half floats, adjust the exponent from 127 bias to 15 bias, 84 // then drop the bottom 13 mantissa bits. 85 Sk4i norm = (positive - ((127 - 15) << 23)) >> 13; 86 87 Sk4i merged = (sign >> 16) | (will_be_norm & norm); 88 return SkNx_cast<uint16_t>(merged); 89 #endif 90 } 91 92 static inline Sk8f SkHalfToFloat_finite_ftz(const Sk8h& hs) { 93 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 94 return _mm256_cvtph_ps(hs.fVec); 95 96 #else 97 uint64_t parts[2]; 98 hs.store(parts); 99 return SkNx_join(SkHalfToFloat_finite_ftz(parts[0]), 100 SkHalfToFloat_finite_ftz(parts[1])); 101 102 #endif 103 } 104 105 static inline Sk8h SkFloatToHalf_finite_ftz(const Sk8f& fs) { 106 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2 107 return _mm256_cvtps_ph(fs.fVec, _MM_FROUND_CUR_DIRECTION); 108 109 #else 110 uint64_t parts[2]; 111 SkFloatToHalf_finite_ftz(fs.fLo).store(parts+0); 112 SkFloatToHalf_finite_ftz(fs.fHi).store(parts+1); 113 return Sk8h::Load(parts); 114 #endif 115 } 116 117 #endif 118