1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #ifndef sw_Half_hpp 16 #define sw_Half_hpp 17 18 #include <algorithm> 19 #include <cmath> 20 21 namespace sw 22 { 23 class half 24 { 25 public: 26 half() = default; 27 explicit half(float f); 28 29 operator float() const; 30 31 half &operator=(half h); 32 half &operator=(float f); 33 34 private: 35 unsigned short fp16i; 36 }; 37 38 inline half shortAsHalf(short s) 39 { 40 union 41 { 42 half h; 43 short s; 44 } hs; 45 46 hs.s = s; 47 48 return hs.h; 49 } 50 51 class RGB9E5 52 { 53 unsigned int R : 9; 54 unsigned int G : 9; 55 unsigned int B : 9; 56 unsigned int E : 5; 57 58 public: 59 RGB9E5(float rgb[3]) 60 { 61 // B is the exponent bias (15) 62 constexpr int g_sharedexp_bias = 15; 63 64 // N is the number of mantissa bits per component (9) 65 constexpr int g_sharedexp_mantissabits = 9; 66 67 // Emax is the maximum allowed biased exponent value (31) 68 constexpr int g_sharedexp_maxexponent = 31; 69 70 constexpr float g_sharedexp_max = 71 ((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) / 72 static_cast<float>(1 << g_sharedexp_mantissabits)) * 73 static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias)); 74 75 const float red_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[0])); 76 const float green_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[1])); 77 const float blue_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[2])); 78 79 const float max_c = std::max<float>(std::max<float>(red_c, green_c), blue_c); 80 const float exp_p = 81 std::max<float>(-g_sharedexp_bias - 1, floor(log(max_c))) + 1 + g_sharedexp_bias; 82 const int max_s = static_cast<int>( 83 floor((max_c / (pow(2.0f, exp_p - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f)); 84 const int exp_s = 85 static_cast<int>((max_s < pow(2.0f, g_sharedexp_mantissabits)) ? exp_p : exp_p + 1); 86 87 R = static_cast<unsigned int>( 88 floor((red_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f)); 89 G = static_cast<unsigned int>( 90 floor((green_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f)); 91 B = static_cast<unsigned int>( 92 floor((blue_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f)); 93 E = exp_s; 94 } 95 96 operator unsigned int() const 97 { 98 return *reinterpret_cast<const unsigned int*>(this); 99 } 100 101 void toRGB16F(half rgb[3]) const 102 { 103 constexpr int offset = 24; // Exponent bias (15) + number of mantissa bits per component (9) = 24 104 105 const float factor = (1u << E) * (1.0f / (1 << offset)); 106 rgb[0] = half(R * factor); 107 rgb[1] = half(G * factor); 108 rgb[2] = half(B * factor); 109 } 110 }; 111 112 class R11G11B10F 113 { 114 unsigned int R : 11; 115 unsigned int G : 11; 116 unsigned int B : 10; 117 118 static inline half float11ToFloat16(unsigned short fp11) 119 { 120 return shortAsHalf(fp11 << 4); // Sign bit 0 121 } 122 123 static inline half float10ToFloat16(unsigned short fp10) 124 { 125 return shortAsHalf(fp10 << 5); // Sign bit 0 126 } 127 128 inline unsigned short float32ToFloat11(float fp32) 129 { 130 const unsigned int float32MantissaMask = 0x7FFFFF; 131 const unsigned int float32ExponentMask = 0x7F800000; 132 const unsigned int float32SignMask = 0x80000000; 133 const unsigned int float32ValueMask = ~float32SignMask; 134 const unsigned int float32ExponentFirstBit = 23; 135 const unsigned int float32ExponentBias = 127; 136 137 const unsigned short float11Max = 0x7BF; 138 const unsigned short float11MantissaMask = 0x3F; 139 const unsigned short float11ExponentMask = 0x7C0; 140 const unsigned short float11BitMask = 0x7FF; 141 const unsigned int float11ExponentBias = 14; 142 143 const unsigned int float32Maxfloat11 = 0x477E0000; 144 const unsigned int float32Minfloat11 = 0x38800000; 145 146 const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32); 147 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask; 148 149 unsigned int float32Val = float32Bits & float32ValueMask; 150 151 if((float32Val & float32ExponentMask) == float32ExponentMask) 152 { 153 // INF or NAN 154 if((float32Val & float32MantissaMask) != 0) 155 { 156 return float11ExponentMask | 157 (((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) & 158 float11MantissaMask); 159 } 160 else if(float32Sign) 161 { 162 // -INF is clamped to 0 since float11 is positive only 163 return 0; 164 } 165 else 166 { 167 return float11ExponentMask; 168 } 169 } 170 else if(float32Sign) 171 { 172 // float11 is positive only, so clamp to zero 173 return 0; 174 } 175 else if(float32Val > float32Maxfloat11) 176 { 177 // The number is too large to be represented as a float11, set to max 178 return float11Max; 179 } 180 else 181 { 182 if(float32Val < float32Minfloat11) 183 { 184 // The number is too small to be represented as a normalized float11 185 // Convert it to a denormalized value. 186 const unsigned int shift = (float32ExponentBias - float11ExponentBias) - 187 (float32Val >> float32ExponentFirstBit); 188 float32Val = 189 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift; 190 } 191 else 192 { 193 // Rebias the exponent to represent the value as a normalized float11 194 float32Val += 0xC8000000; 195 } 196 197 return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask; 198 } 199 } 200 201 inline unsigned short float32ToFloat10(float fp32) 202 { 203 const unsigned int float32MantissaMask = 0x7FFFFF; 204 const unsigned int float32ExponentMask = 0x7F800000; 205 const unsigned int float32SignMask = 0x80000000; 206 const unsigned int float32ValueMask = ~float32SignMask; 207 const unsigned int float32ExponentFirstBit = 23; 208 const unsigned int float32ExponentBias = 127; 209 210 const unsigned short float10Max = 0x3DF; 211 const unsigned short float10MantissaMask = 0x1F; 212 const unsigned short float10ExponentMask = 0x3E0; 213 const unsigned short float10BitMask = 0x3FF; 214 const unsigned int float10ExponentBias = 14; 215 216 const unsigned int float32Maxfloat10 = 0x477C0000; 217 const unsigned int float32Minfloat10 = 0x38800000; 218 219 const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32); 220 const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask; 221 222 unsigned int float32Val = float32Bits & float32ValueMask; 223 224 if((float32Val & float32ExponentMask) == float32ExponentMask) 225 { 226 // INF or NAN 227 if((float32Val & float32MantissaMask) != 0) 228 { 229 return float10ExponentMask | 230 (((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) & 231 float10MantissaMask); 232 } 233 else if(float32Sign) 234 { 235 // -INF is clamped to 0 since float11 is positive only 236 return 0; 237 } 238 else 239 { 240 return float10ExponentMask; 241 } 242 } 243 else if(float32Sign) 244 { 245 // float10 is positive only, so clamp to zero 246 return 0; 247 } 248 else if(float32Val > float32Maxfloat10) 249 { 250 // The number is too large to be represented as a float11, set to max 251 return float10Max; 252 } 253 else 254 { 255 if(float32Val < float32Minfloat10) 256 { 257 // The number is too small to be represented as a normalized float11 258 // Convert it to a denormalized value. 259 const unsigned int shift = (float32ExponentBias - float10ExponentBias) - 260 (float32Val >> float32ExponentFirstBit); 261 float32Val = 262 ((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift; 263 } 264 else 265 { 266 // Rebias the exponent to represent the value as a normalized float11 267 float32Val += 0xC8000000; 268 } 269 270 return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask; 271 } 272 } 273 274 public: 275 R11G11B10F(float rgb[3]) 276 { 277 R = float32ToFloat11(rgb[0]); 278 G = float32ToFloat11(rgb[1]); 279 B = float32ToFloat10(rgb[2]); 280 } 281 282 operator unsigned int() const 283 { 284 return *reinterpret_cast<const unsigned int*>(this); 285 } 286 287 void toRGB16F(half rgb[3]) const 288 { 289 rgb[0] = float11ToFloat16(R); 290 rgb[1] = float11ToFloat16(G); 291 rgb[2] = float10ToFloat16(B); 292 } 293 }; 294 } 295 296 #endif // sw_Half_hpp 297