Home | History | Annotate | Download | only in System
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #ifndef sw_Half_hpp
     16 #define sw_Half_hpp
     17 
     18 #include <algorithm>
     19 #include <cmath>
     20 
     21 namespace sw
     22 {
     23 	class half
     24 	{
     25 	public:
     26 		half() = default;
     27 		explicit half(float f);
     28 
     29 		operator float() const;
     30 
     31 		half &operator=(half h);
     32 		half &operator=(float f);
     33 
     34 	private:
     35 		unsigned short fp16i;
     36 	};
     37 
     38 	inline half shortAsHalf(short s)
     39 	{
     40 		union
     41 		{
     42 			half h;
     43 			short s;
     44 		} hs;
     45 
     46 		hs.s = s;
     47 
     48 		return hs.h;
     49 	}
     50 
     51 	class RGB9E5
     52 	{
     53 		unsigned int R : 9;
     54 		unsigned int G : 9;
     55 		unsigned int B : 9;
     56 		unsigned int E : 5;
     57 
     58 	public:
     59 		RGB9E5(float rgb[3])
     60 		{
     61 			// B is the exponent bias (15)
     62 			constexpr int g_sharedexp_bias = 15;
     63 
     64 			// N is the number of mantissa bits per component (9)
     65 			constexpr int g_sharedexp_mantissabits = 9;
     66 
     67 			// Emax is the maximum allowed biased exponent value (31)
     68 			constexpr int g_sharedexp_maxexponent = 31;
     69 
     70 			constexpr float g_sharedexp_max =
     71 				((static_cast<float>(1 << g_sharedexp_mantissabits) - 1) /
     72 					static_cast<float>(1 << g_sharedexp_mantissabits)) *
     73 				static_cast<float>(1 << (g_sharedexp_maxexponent - g_sharedexp_bias));
     74 
     75 			const float red_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[0]));
     76 			const float green_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[1]));
     77 			const float blue_c = std::max<float>(0, std::min(g_sharedexp_max, rgb[2]));
     78 
     79 			const float max_c = std::max<float>(std::max<float>(red_c, green_c), blue_c);
     80 			const float exp_p =
     81 				std::max<float>(-g_sharedexp_bias - 1, floor(log(max_c))) + 1 + g_sharedexp_bias;
     82 			const int max_s = static_cast<int>(
     83 				floor((max_c / (pow(2.0f, exp_p - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
     84 			const int exp_s =
     85 				static_cast<int>((max_s < pow(2.0f, g_sharedexp_mantissabits)) ? exp_p : exp_p + 1);
     86 
     87 			R = static_cast<unsigned int>(
     88 				floor((red_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
     89 			G = static_cast<unsigned int>(
     90 				floor((green_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
     91 			B = static_cast<unsigned int>(
     92 				floor((blue_c / (pow(2.0f, exp_s - g_sharedexp_bias - g_sharedexp_mantissabits))) + 0.5f));
     93 			E = exp_s;
     94 		}
     95 
     96 		operator unsigned int() const
     97 		{
     98 			return *reinterpret_cast<const unsigned int*>(this);
     99 		}
    100 
    101 		void toRGB16F(half rgb[3]) const
    102 		{
    103 			constexpr int offset = 24;   // Exponent bias (15) + number of mantissa bits per component (9) = 24
    104 
    105 			const float factor = (1u << E) * (1.0f / (1 << offset));
    106 			rgb[0] = half(R * factor);
    107 			rgb[1] = half(G * factor);
    108 			rgb[2] = half(B * factor);
    109 		}
    110 	};
    111 
    112 	class R11G11B10F
    113 	{
    114 		unsigned int R : 11;
    115 		unsigned int G : 11;
    116 		unsigned int B : 10;
    117 
    118 		static inline half float11ToFloat16(unsigned short fp11)
    119 		{
    120 			return shortAsHalf(fp11 << 4);   // Sign bit 0
    121 		}
    122 
    123 		static inline half float10ToFloat16(unsigned short fp10)
    124 		{
    125 			return shortAsHalf(fp10 << 5);   // Sign bit 0
    126 		}
    127 
    128 		inline unsigned short float32ToFloat11(float fp32)
    129 		{
    130 			const unsigned int float32MantissaMask = 0x7FFFFF;
    131 			const unsigned int float32ExponentMask = 0x7F800000;
    132 			const unsigned int float32SignMask = 0x80000000;
    133 			const unsigned int float32ValueMask = ~float32SignMask;
    134 			const unsigned int float32ExponentFirstBit = 23;
    135 			const unsigned int float32ExponentBias = 127;
    136 
    137 			const unsigned short float11Max = 0x7BF;
    138 			const unsigned short float11MantissaMask = 0x3F;
    139 			const unsigned short float11ExponentMask = 0x7C0;
    140 			const unsigned short float11BitMask = 0x7FF;
    141 			const unsigned int float11ExponentBias = 14;
    142 
    143 			const unsigned int float32Maxfloat11 = 0x477E0000;
    144 			const unsigned int float32Minfloat11 = 0x38800000;
    145 
    146 			const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
    147 			const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
    148 
    149 			unsigned int float32Val = float32Bits & float32ValueMask;
    150 
    151 			if((float32Val & float32ExponentMask) == float32ExponentMask)
    152 			{
    153 				// INF or NAN
    154 				if((float32Val & float32MantissaMask) != 0)
    155 				{
    156 					return float11ExponentMask |
    157 						(((float32Val >> 17) | (float32Val >> 11) | (float32Val >> 6) | (float32Val)) &
    158 							float11MantissaMask);
    159 				}
    160 				else if(float32Sign)
    161 				{
    162 					// -INF is clamped to 0 since float11 is positive only
    163 					return 0;
    164 				}
    165 				else
    166 				{
    167 					return float11ExponentMask;
    168 				}
    169 			}
    170 			else if(float32Sign)
    171 			{
    172 				// float11 is positive only, so clamp to zero
    173 				return 0;
    174 			}
    175 			else if(float32Val > float32Maxfloat11)
    176 			{
    177 				// The number is too large to be represented as a float11, set to max
    178 				return float11Max;
    179 			}
    180 			else
    181 			{
    182 				if(float32Val < float32Minfloat11)
    183 				{
    184 					// The number is too small to be represented as a normalized float11
    185 					// Convert it to a denormalized value.
    186 					const unsigned int shift = (float32ExponentBias - float11ExponentBias) -
    187 						(float32Val >> float32ExponentFirstBit);
    188 					float32Val =
    189 						((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
    190 				}
    191 				else
    192 				{
    193 					// Rebias the exponent to represent the value as a normalized float11
    194 					float32Val += 0xC8000000;
    195 				}
    196 
    197 				return ((float32Val + 0xFFFF + ((float32Val >> 17) & 1)) >> 17) & float11BitMask;
    198 			}
    199 		}
    200 
    201 		inline unsigned short float32ToFloat10(float fp32)
    202 		{
    203 			const unsigned int float32MantissaMask = 0x7FFFFF;
    204 			const unsigned int float32ExponentMask = 0x7F800000;
    205 			const unsigned int float32SignMask = 0x80000000;
    206 			const unsigned int float32ValueMask = ~float32SignMask;
    207 			const unsigned int float32ExponentFirstBit = 23;
    208 			const unsigned int float32ExponentBias = 127;
    209 
    210 			const unsigned short float10Max = 0x3DF;
    211 			const unsigned short float10MantissaMask = 0x1F;
    212 			const unsigned short float10ExponentMask = 0x3E0;
    213 			const unsigned short float10BitMask = 0x3FF;
    214 			const unsigned int float10ExponentBias = 14;
    215 
    216 			const unsigned int float32Maxfloat10 = 0x477C0000;
    217 			const unsigned int float32Minfloat10 = 0x38800000;
    218 
    219 			const unsigned int float32Bits = *reinterpret_cast<unsigned int*>(&fp32);
    220 			const bool float32Sign = (float32Bits & float32SignMask) == float32SignMask;
    221 
    222 			unsigned int float32Val = float32Bits & float32ValueMask;
    223 
    224 			if((float32Val & float32ExponentMask) == float32ExponentMask)
    225 			{
    226 				// INF or NAN
    227 				if((float32Val & float32MantissaMask) != 0)
    228 				{
    229 					return float10ExponentMask |
    230 						(((float32Val >> 18) | (float32Val >> 13) | (float32Val >> 3) | (float32Val)) &
    231 							float10MantissaMask);
    232 				}
    233 				else if(float32Sign)
    234 				{
    235 					// -INF is clamped to 0 since float11 is positive only
    236 					return 0;
    237 				}
    238 				else
    239 				{
    240 					return float10ExponentMask;
    241 				}
    242 			}
    243 			else if(float32Sign)
    244 			{
    245 				// float10 is positive only, so clamp to zero
    246 				return 0;
    247 			}
    248 			else if(float32Val > float32Maxfloat10)
    249 			{
    250 				// The number is too large to be represented as a float11, set to max
    251 				return float10Max;
    252 			}
    253 			else
    254 			{
    255 				if(float32Val < float32Minfloat10)
    256 				{
    257 					// The number is too small to be represented as a normalized float11
    258 					// Convert it to a denormalized value.
    259 					const unsigned int shift = (float32ExponentBias - float10ExponentBias) -
    260 						(float32Val >> float32ExponentFirstBit);
    261 					float32Val =
    262 						((1 << float32ExponentFirstBit) | (float32Val & float32MantissaMask)) >> shift;
    263 				}
    264 				else
    265 				{
    266 					// Rebias the exponent to represent the value as a normalized float11
    267 					float32Val += 0xC8000000;
    268 				}
    269 
    270 				return ((float32Val + 0x1FFFF + ((float32Val >> 18) & 1)) >> 18) & float10BitMask;
    271 			}
    272 		}
    273 
    274 	public:
    275 		R11G11B10F(float rgb[3])
    276 		{
    277 			R = float32ToFloat11(rgb[0]);
    278 			G = float32ToFloat11(rgb[1]);
    279 			B = float32ToFloat10(rgb[2]);
    280 		}
    281 
    282 		operator unsigned int() const
    283 		{
    284 			return *reinterpret_cast<const unsigned int*>(this);
    285 		}
    286 
    287 		void toRGB16F(half rgb[3]) const
    288 		{
    289 			rgb[0] = float11ToFloat16(R);
    290 			rgb[1] = float11ToFloat16(G);
    291 			rgb[2] = float10ToFloat16(B);
    292 		}
    293 	};
    294 }
    295 
    296 #endif   // sw_Half_hpp
    297