Home | History | Annotate | Download | only in core
      1 /*
      2  * Copyright 2014 Google Inc.
      3  *
      4  * Use of this source code is governed by a BSD-style license that can be
      5  * found in the LICENSE file.
      6  */
      7 
      8 #ifndef SkHalf_DEFINED
      9 #define SkHalf_DEFINED
     10 
     11 #include "SkNx.h"
     12 #include "SkTypes.h"
     13 
     14 #if !defined(_MSC_VER) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
     15     #include <x86intrin.h>
     16 #endif
     17 
     18 // 16-bit floating point value
     19 // format is 1 bit sign, 5 bits exponent, 10 bits mantissa
     20 // only used for storage
     21 typedef uint16_t SkHalf;
     22 
     23 static constexpr uint16_t SK_HalfMin     = 0x0400; // 2^-24  (minimum positive normal value)
     24 static constexpr uint16_t SK_HalfMax     = 0x7bff; // 65504
     25 static constexpr uint16_t SK_HalfEpsilon = 0x1400; // 2^-10
     26 static constexpr uint16_t SK_Half1       = 0x3C00; // 1
     27 
     28 // convert between half and single precision floating point
     29 float SkHalfToFloat(SkHalf h);
     30 SkHalf SkFloatToHalf(float f);
     31 
     32 // Convert between half and single precision floating point,
     33 // assuming inputs and outputs are both finite, and may
     34 // flush values which would be denormal half floats to zero.
     35 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t);
     36 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f&);
     37 
     38 // ~~~~~~~~~~~ impl ~~~~~~~~~~~~~~ //
     39 
     40 // Like the serial versions in SkHalf.cpp, these are based on
     41 // https://fgiesen.wordpress.com/2012/03/28/half-to-float-done-quic/
     42 
     43 // GCC 4.9 lacks the intrinsics to use ARMv8 f16<->f32 instructions, so we use inline assembly.
     44 
     45 static inline Sk4f SkHalfToFloat_finite_ftz(const Sk4h& hs) {
     46 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
     47     float32x4_t fs;
     48     asm ("fcvtl %[fs].4s, %[hs].4h   \n"   // vcvt_f32_f16(...)
     49         : [fs] "=w" (fs)                   // =w: write-only NEON register
     50         : [hs] "w" (hs.fVec));             //  w: read-only NEON register
     51     return fs;
     52 #else
     53     Sk4i bits     = SkNx_cast<int>(hs),  // Expand to 32 bit.
     54          sign     = bits & 0x00008000,   // Save the sign bit for later...
     55          positive = bits ^ sign,         // ...but strip it off for now.
     56          is_norm  = 0x03ff < positive;   // Exponent > 0?
     57 
     58     // For normal half floats, extend the mantissa by 13 zero bits,
     59     // then adjust the exponent from 15 bias to 127 bias.
     60     Sk4i norm = (positive << 13) + ((127 - 15) << 23);
     61 
     62     Sk4i merged = (sign << 16) | (norm & is_norm);
     63     return Sk4f::Load(&merged);
     64 #endif
     65 }
     66 
     67 static inline Sk4f SkHalfToFloat_finite_ftz(uint64_t hs) {
     68     return SkHalfToFloat_finite_ftz(Sk4h::Load(&hs));
     69 }
     70 
     71 static inline Sk4h SkFloatToHalf_finite_ftz(const Sk4f& fs) {
     72 #if !defined(SKNX_NO_SIMD) && defined(SK_CPU_ARM64)
     73     float32x4_t vec = fs.fVec;
     74     asm ("fcvtn %[vec].4h, %[vec].4s  \n"   // vcvt_f16_f32(vec)
     75         : [vec] "+w" (vec));                // +w: read-write NEON register
     76     return vreinterpret_u16_f32(vget_low_f32(vec));
     77 #else
     78     Sk4i bits         = Sk4i::Load(&fs),
     79          sign         = bits & 0x80000000,      // Save the sign bit for later...
     80          positive     = bits ^ sign,            // ...but strip it off for now.
     81          will_be_norm = 0x387fdfff < positive;  // greater than largest denorm half?
     82 
     83     // For normal half floats, adjust the exponent from 127 bias to 15 bias,
     84     // then drop the bottom 13 mantissa bits.
     85     Sk4i norm = (positive - ((127 - 15) << 23)) >> 13;
     86 
     87     Sk4i merged = (sign >> 16) | (will_be_norm & norm);
     88     return SkNx_cast<uint16_t>(merged);
     89 #endif
     90 }
     91 
     92 static inline Sk8f SkHalfToFloat_finite_ftz(const Sk8h& hs) {
     93 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
     94     return _mm256_cvtph_ps(hs.fVec);
     95 
     96 #else
     97     uint64_t parts[2];
     98     hs.store(parts);
     99     return SkNx_join(SkHalfToFloat_finite_ftz(parts[0]),
    100                      SkHalfToFloat_finite_ftz(parts[1]));
    101 
    102 #endif
    103 }
    104 
    105 static inline Sk8h SkFloatToHalf_finite_ftz(const Sk8f& fs) {
    106 #if !defined(SKNX_NO_SIMD) && SK_CPU_SSE_LEVEL >= SK_CPU_SSE_LEVEL_AVX2
    107     return _mm256_cvtps_ph(fs.fVec, _MM_FROUND_CUR_DIRECTION);
    108 
    109 #else
    110     uint64_t parts[2];
    111     SkFloatToHalf_finite_ftz(fs.fLo).store(parts+0);
    112     SkFloatToHalf_finite_ftz(fs.fHi).store(parts+1);
    113     return Sk8h::Load(parts);
    114 #endif
    115 }
    116 
    117 #endif
    118