1 // This file is part of Eigen, a lightweight C++ template library 2 // for linear algebra. 3 // 4 // Copyright (C) 2016 Benoit Steiner <benoit.steiner.goog (at) gmail.com> 5 // 6 // This Source Code Form is subject to the terms of the Mozilla 7 // Public License v. 2.0. If a copy of the MPL was not distributed 8 // with this file, You can obtain one at http://mozilla.org/MPL/2.0/. 9 10 #ifndef EIGEN_TYPE_CASTING_CUDA_H 11 #define EIGEN_TYPE_CASTING_CUDA_H 12 13 namespace Eigen { 14 15 namespace internal { 16 17 template<> 18 struct scalar_cast_op<float, Eigen::half> { 19 EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) 20 typedef Eigen::half result_type; 21 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const float& a) const { 22 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 23 return __float2half(a); 24 #else 25 return Eigen::half(a); 26 #endif 27 } 28 }; 29 30 template<> 31 struct functor_traits<scalar_cast_op<float, Eigen::half> > 32 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; 33 34 35 template<> 36 struct scalar_cast_op<int, Eigen::half> { 37 EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) 38 typedef Eigen::half result_type; 39 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE Eigen::half operator() (const int& a) const { 40 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 41 return __float2half(static_cast<float>(a)); 42 #else 43 return Eigen::half(static_cast<float>(a)); 44 #endif 45 } 46 }; 47 48 template<> 49 struct functor_traits<scalar_cast_op<int, Eigen::half> > 50 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; 51 52 53 template<> 54 struct scalar_cast_op<Eigen::half, float> { 55 EIGEN_EMPTY_STRUCT_CTOR(scalar_cast_op) 56 typedef float result_type; 57 EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float operator() (const Eigen::half& a) const { 58 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 59 return __half2float(a); 60 #else 61 return static_cast<float>(a); 62 #endif 63 } 64 }; 65 66 template<> 67 struct functor_traits<scalar_cast_op<Eigen::half, float> > 68 { enum { Cost = NumTraits<float>::AddCost, PacketAccess = false }; }; 69 70 71 72 #if defined(EIGEN_HAS_CUDA_FP16) && defined(__CUDA_ARCH__) && __CUDA_ARCH__ >= 300 73 74 template <> 75 struct type_casting_traits<Eigen::half, float> { 76 enum { 77 VectorizedCast = 1, 78 SrcCoeffRatio = 2, 79 TgtCoeffRatio = 1 80 }; 81 }; 82 83 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE float4 pcast<half2, float4>(const half2& a, const half2& b) { 84 float2 r1 = __half22float2(a); 85 float2 r2 = __half22float2(b); 86 return make_float4(r1.x, r1.y, r2.x, r2.y); 87 } 88 89 template <> 90 struct type_casting_traits<float, Eigen::half> { 91 enum { 92 VectorizedCast = 1, 93 SrcCoeffRatio = 1, 94 TgtCoeffRatio = 2 95 }; 96 }; 97 98 template<> EIGEN_DEVICE_FUNC EIGEN_STRONG_INLINE half2 pcast<float4, half2>(const float4& a) { 99 // Simply discard the second half of the input 100 return __floats2half2_rn(a.x, a.y); 101 } 102 103 #elif defined EIGEN_VECTORIZE_AVX512 104 template <> 105 struct type_casting_traits<half, float> { 106 enum { 107 VectorizedCast = 1, 108 SrcCoeffRatio = 1, 109 TgtCoeffRatio = 1 110 }; 111 }; 112 113 template<> EIGEN_STRONG_INLINE Packet16f pcast<Packet16h, Packet16f>(const Packet16h& a) { 114 return half2float(a); 115 } 116 117 template <> 118 struct type_casting_traits<float, half> { 119 enum { 120 VectorizedCast = 1, 121 SrcCoeffRatio = 1, 122 TgtCoeffRatio = 1 123 }; 124 }; 125 126 template<> EIGEN_STRONG_INLINE Packet16h pcast<Packet16f, Packet16h>(const Packet16f& a) { 127 return float2half(a); 128 } 129 130 #elif defined EIGEN_VECTORIZE_AVX 131 132 template <> 133 struct type_casting_traits<Eigen::half, float> { 134 enum { 135 VectorizedCast = 1, 136 SrcCoeffRatio = 1, 137 TgtCoeffRatio = 1 138 }; 139 }; 140 141 template<> EIGEN_STRONG_INLINE Packet8f pcast<Packet8h, Packet8f>(const Packet8h& a) { 142 return half2float(a); 143 } 144 145 template <> 146 struct type_casting_traits<float, Eigen::half> { 147 enum { 148 VectorizedCast = 1, 149 SrcCoeffRatio = 1, 150 TgtCoeffRatio = 1 151 }; 152 }; 153 154 template<> EIGEN_STRONG_INLINE Packet8h pcast<Packet8f, Packet8h>(const Packet8f& a) { 155 return float2half(a); 156 } 157 158 // Disable the following code since it's broken on too many platforms / compilers. 159 //#elif defined(EIGEN_VECTORIZE_SSE) && (!EIGEN_ARCH_x86_64) && (!EIGEN_COMP_MSVC) 160 #elif 0 161 162 template <> 163 struct type_casting_traits<Eigen::half, float> { 164 enum { 165 VectorizedCast = 1, 166 SrcCoeffRatio = 1, 167 TgtCoeffRatio = 1 168 }; 169 }; 170 171 template<> EIGEN_STRONG_INLINE Packet4f pcast<Packet4h, Packet4f>(const Packet4h& a) { 172 __int64_t a64 = _mm_cvtm64_si64(a.x); 173 Eigen::half h = raw_uint16_to_half(static_cast<unsigned short>(a64)); 174 float f1 = static_cast<float>(h); 175 h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 16)); 176 float f2 = static_cast<float>(h); 177 h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 32)); 178 float f3 = static_cast<float>(h); 179 h = raw_uint16_to_half(static_cast<unsigned short>(a64 >> 48)); 180 float f4 = static_cast<float>(h); 181 return _mm_set_ps(f4, f3, f2, f1); 182 } 183 184 template <> 185 struct type_casting_traits<float, Eigen::half> { 186 enum { 187 VectorizedCast = 1, 188 SrcCoeffRatio = 1, 189 TgtCoeffRatio = 1 190 }; 191 }; 192 193 template<> EIGEN_STRONG_INLINE Packet4h pcast<Packet4f, Packet4h>(const Packet4f& a) { 194 EIGEN_ALIGN16 float aux[4]; 195 pstore(aux, a); 196 Eigen::half h0(aux[0]); 197 Eigen::half h1(aux[1]); 198 Eigen::half h2(aux[2]); 199 Eigen::half h3(aux[3]); 200 201 Packet4h result; 202 result.x = _mm_set_pi16(h3.x, h2.x, h1.x, h0.x); 203 return result; 204 } 205 206 #endif 207 208 } // end namespace internal 209 210 } // end namespace Eigen 211 212 #endif // EIGEN_TYPE_CASTING_CUDA_H 213