Home | History | Annotate | Download | only in Shader
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "SamplerCore.hpp"
     16 
     17 #include "Constants.hpp"
     18 #include "Debug.hpp"
     19 
     20 namespace
     21 {
     22 	void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c)
     23 	{
     24 		switch(swizzle)
     25 		{
     26 		case sw::SWIZZLE_RED:	s = c.x; break;
     27 		case sw::SWIZZLE_GREEN: s = c.y; break;
     28 		case sw::SWIZZLE_BLUE:  s = c.z; break;
     29 		case sw::SWIZZLE_ALPHA: s = c.w; break;
     30 		case sw::SWIZZLE_ZERO:  s = sw::Short4(0x0000, 0x0000, 0x0000, 0x0000); break;
     31 		case sw::SWIZZLE_ONE:   s = sw::Short4(0x1000, 0x1000, 0x1000, 0x1000); break;
     32 		default: ASSERT(false);
     33 		}
     34 	}
     35 
     36 	void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c)
     37 	{
     38 		switch(swizzle)
     39 		{
     40 		case sw::SWIZZLE_RED:	f = c.x; break;
     41 		case sw::SWIZZLE_GREEN: f = c.y; break;
     42 		case sw::SWIZZLE_BLUE:  f = c.z; break;
     43 		case sw::SWIZZLE_ALPHA: f = c.w; break;
     44 		case sw::SWIZZLE_ZERO:  f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
     45 		case sw::SWIZZLE_ONE:   f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break;
     46 		default: ASSERT(false);
     47 		}
     48 	}
     49 }
     50 
     51 namespace sw
     52 {
     53 	SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state)
     54 	{
     55 	}
     56 
     57 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, SamplerMethod method)
     58 	{
     59 		sampleTexture(texture, c, u, v, w, q, dsx, dsy, method, true);
     60 	}
     61 
     62 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, SamplerMethod method, bool fixed12)
     63 	{
     64 		#if PERF_PROFILE
     65 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
     66 
     67 			if(state.compressedFormat)
     68 			{
     69 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
     70 			}
     71 		#endif
     72 
     73 		Float4 uuuu = u;
     74 		Float4 vvvv = v;
     75 		Float4 wwww = w;
     76 
     77 		if(state.textureType == TEXTURE_NULL)
     78 		{
     79 			c.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
     80 			c.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
     81 			c.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
     82 
     83 			if(fixed12)   // FIXME: Convert to fixed12 at higher level, when required
     84 			{
     85 				c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000);
     86 			}
     87 			else
     88 			{
     89 				c.w = Short4((short)0xFFFF, (short)0xFFFF, (short)0xFFFF, (short)0xFFFF);   // FIXME
     90 			}
     91 		}
     92 		else
     93 		{
     94 			Int face[4];
     95 			Float4 lodX;
     96 			Float4 lodY;
     97 			Float4 lodZ;
     98 
     99 			if(state.textureType == TEXTURE_CUBE)
    100 			{
    101 				cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w);
    102 			}
    103 
    104 			Float lod;
    105 			Float anisotropy;
    106 			Float4 uDelta;
    107 			Float4 vDelta;
    108 
    109 			if(state.textureType != TEXTURE_3D)
    110 			{
    111 				if(state.textureType != TEXTURE_CUBE)
    112 				{
    113 					computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, q.x, dsx, dsy, method);
    114 				}
    115 				else
    116 				{
    117 					computeLodCube(texture, lod, lodX, lodY, lodZ, q.x, dsx, dsy, method);
    118 				}
    119 			}
    120 			else
    121 			{
    122 				computeLod3D(texture, lod, uuuu, vvvv, wwww, q.x, dsx, dsy, method);
    123 			}
    124 
    125 			if(!hasFloatTexture())
    126 			{
    127 				sampleFilter(texture, c, uuuu, vvvv, wwww, lod, anisotropy, uDelta, vDelta, face, method);
    128 			}
    129 			else
    130 			{
    131 				Vector4f cf;
    132 
    133 				sampleFloatFilter(texture, cf, uuuu, vvvv, wwww, lod, anisotropy, uDelta, vDelta, face, method);
    134 
    135 				convertFixed12(c, cf);
    136 			}
    137 
    138 			if(fixed12 && !hasFloatTexture())
    139 			{
    140 				if(has16bitTextureFormat())
    141 				{
    142 					switch(state.textureFormat)
    143 					{
    144 					case FORMAT_R5G6B5:
    145 						if(state.sRGB)
    146 						{
    147 							sRGBtoLinear16_5_12(c.x);
    148 							sRGBtoLinear16_6_12(c.y);
    149 							sRGBtoLinear16_5_12(c.z);
    150 						}
    151 						else
    152 						{
    153 							c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800));
    154 							c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00));
    155 							c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800));
    156 						}
    157 						break;
    158 					default:
    159 						ASSERT(false);
    160 					}
    161 				}
    162 				else
    163 				{
    164 					for(int component = 0; component < textureComponentCount(); component++)
    165 					{
    166 						if(state.sRGB && isRGBComponent(component))
    167 						{
    168 							sRGBtoLinear16_8_12(c[component]);   // FIXME: Perform linearization at surface level for read-only textures
    169 						}
    170 						else
    171 						{
    172 							if(hasUnsignedTextureComponent(component))
    173 							{
    174 								c[component] = As<UShort4>(c[component]) >> 4;
    175 							}
    176 							else
    177 							{
    178 								c[component] = c[component] >> 3;
    179 							}
    180 						}
    181 					}
    182 				}
    183 			}
    184 
    185 			if(fixed12 && state.textureFilter != FILTER_GATHER)
    186 			{
    187 				int componentCount = textureComponentCount();
    188 
    189 				switch(state.textureFormat)
    190 				{
    191 				case FORMAT_R8I_SNORM:
    192 				case FORMAT_G8R8I_SNORM:
    193 				case FORMAT_X8B8G8R8I_SNORM:
    194 				case FORMAT_A8B8G8R8I_SNORM:
    195 				case FORMAT_R8:
    196 				case FORMAT_R5G6B5:
    197 				case FORMAT_G8R8:
    198 				case FORMAT_R8I:
    199 				case FORMAT_R8UI:
    200 				case FORMAT_G8R8I:
    201 				case FORMAT_G8R8UI:
    202 				case FORMAT_X8B8G8R8I:
    203 				case FORMAT_X8B8G8R8UI:
    204 				case FORMAT_A8B8G8R8I:
    205 				case FORMAT_A8B8G8R8UI:
    206 				case FORMAT_R16I:
    207 				case FORMAT_R16UI:
    208 				case FORMAT_G16R16:
    209 				case FORMAT_G16R16I:
    210 				case FORMAT_G16R16UI:
    211 				case FORMAT_X16B16G16R16I:
    212 				case FORMAT_X16B16G16R16UI:
    213 				case FORMAT_A16B16G16R16:
    214 				case FORMAT_A16B16G16R16I:
    215 				case FORMAT_A16B16G16R16UI:
    216 				case FORMAT_R32I:
    217 				case FORMAT_R32UI:
    218 				case FORMAT_G32R32I:
    219 				case FORMAT_G32R32UI:
    220 				case FORMAT_X32B32G32R32I:
    221 				case FORMAT_X32B32G32R32UI:
    222 				case FORMAT_A32B32G32R32I:
    223 				case FORMAT_A32B32G32R32UI:
    224 				case FORMAT_X8R8G8B8:
    225 				case FORMAT_X8B8G8R8:
    226 				case FORMAT_A8R8G8B8:
    227 				case FORMAT_A8B8G8R8:
    228 				case FORMAT_SRGB8_X8:
    229 				case FORMAT_SRGB8_A8:
    230 				case FORMAT_V8U8:
    231 				case FORMAT_Q8W8V8U8:
    232 				case FORMAT_X8L8V8U8:
    233 				case FORMAT_V16U16:
    234 				case FORMAT_A16W16V16U16:
    235 				case FORMAT_Q16W16V16U16:
    236 				case FORMAT_YV12_BT601:
    237 				case FORMAT_YV12_BT709:
    238 				case FORMAT_YV12_JFIF:
    239 					if(componentCount < 2) c.y = Short4(0x1000, 0x1000, 0x1000, 0x1000);
    240 					if(componentCount < 3) c.z = Short4(0x1000, 0x1000, 0x1000, 0x1000);
    241 					if(componentCount < 4) c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000);
    242 					break;
    243 				case FORMAT_A8:
    244 					c.w = c.x;
    245 					c.x = Short4(0x0000, 0x0000, 0x0000, 0x0000);
    246 					c.y = Short4(0x0000, 0x0000, 0x0000, 0x0000);
    247 					c.z = Short4(0x0000, 0x0000, 0x0000, 0x0000);
    248 					break;
    249 				case FORMAT_L8:
    250 				case FORMAT_L16:
    251 					c.y = c.x;
    252 					c.z = c.x;
    253 					c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000);
    254 					break;
    255 				case FORMAT_A8L8:
    256 					c.w = c.y;
    257 					c.y = c.x;
    258 					c.z = c.x;
    259 					break;
    260 				case FORMAT_R32F:
    261 					c.y = Short4(0x1000, 0x1000, 0x1000, 0x1000);
    262 				case FORMAT_G32R32F:
    263 					c.z = Short4(0x1000, 0x1000, 0x1000, 0x1000);
    264 				case FORMAT_X32B32G32R32F:
    265 					c.w = Short4(0x1000, 0x1000, 0x1000, 0x1000);
    266 				case FORMAT_A32B32G32R32F:
    267 					break;
    268 				case FORMAT_D32F:
    269 				case FORMAT_D32F_LOCKABLE:
    270 				case FORMAT_D32FS8_TEXTURE:
    271 				case FORMAT_D32FS8_SHADOW:
    272 					c.y = c.x;
    273 					c.z = c.x;
    274 					c.w = c.x;
    275 					break;
    276 				default:
    277 					ASSERT(false);
    278 				}
    279 			}
    280 		}
    281 
    282 		if(fixed12 &&
    283 		   ((state.swizzleR != SWIZZLE_RED) ||
    284 		    (state.swizzleG != SWIZZLE_GREEN) ||
    285 		    (state.swizzleB != SWIZZLE_BLUE) ||
    286 		    (state.swizzleA != SWIZZLE_ALPHA)))
    287 		{
    288 			const Vector4s col(c);
    289 			applySwizzle(state.swizzleR, c.x, col);
    290 			applySwizzle(state.swizzleG, c.y, col);
    291 			applySwizzle(state.swizzleB, c.z, col);
    292 			applySwizzle(state.swizzleA, c.w, col);
    293 		}
    294 	}
    295 
    296 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, SamplerMethod method)
    297 	{
    298 		#if PERF_PROFILE
    299 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
    300 
    301 			if(state.compressedFormat)
    302 			{
    303 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
    304 			}
    305 		#endif
    306 
    307 		if(state.textureType == TEXTURE_NULL)
    308 		{
    309 			c.x = Float4(0.0f);
    310 			c.y = Float4(0.0f);
    311 			c.z = Float4(0.0f);
    312 			c.w = Float4(1.0f);
    313 		}
    314 		else
    315 		{
    316 			if(hasFloatTexture())   // FIXME: Mostly identical to integer sampling
    317 			{
    318 				Float4 uuuu = u;
    319 				Float4 vvvv = v;
    320 				Float4 wwww = w;
    321 
    322 				Int face[4];
    323 				Float4 lodX;
    324 				Float4 lodY;
    325 				Float4 lodZ;
    326 
    327 				if(state.textureType == TEXTURE_CUBE)
    328 				{
    329 					cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w);
    330 				}
    331 
    332 				Float lod;
    333 				Float anisotropy;
    334 				Float4 uDelta;
    335 				Float4 vDelta;
    336 
    337 				if(state.textureType != TEXTURE_3D)
    338 				{
    339 					if(state.textureType != TEXTURE_CUBE)
    340 					{
    341 						computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, q.x, dsx, dsy, method);
    342 					}
    343 					else
    344 					{
    345 						computeLodCube(texture, lod, lodX, lodY, lodZ, q.x, dsx, dsy, method);
    346 					}
    347 				}
    348 				else
    349 				{
    350 					computeLod3D(texture, lod, uuuu, vvvv, wwww, q.x, dsx, dsy, method);
    351 				}
    352 
    353 				sampleFloatFilter(texture, c, uuuu, vvvv, wwww, lod, anisotropy, uDelta, vDelta, face, method);
    354 			}
    355 			else
    356 			{
    357 				Vector4s cs;
    358 
    359 				sampleTexture(texture, cs, u, v, w, q, dsx, dsy, method, false);
    360 
    361 				for(int component = 0; component < textureComponentCount(); component++)
    362 				{
    363 					if(has16bitTextureFormat())
    364 					{
    365 						switch(state.textureFormat)
    366 						{
    367 						case FORMAT_R5G6B5:
    368 							if(state.sRGB)
    369 							{
    370 								sRGBtoLinear16_5_12(cs.x);
    371 								sRGBtoLinear16_6_12(cs.y);
    372 								sRGBtoLinear16_5_12(cs.z);
    373 
    374 								convertSigned12(c.x, cs.x);
    375 								convertSigned12(c.y, cs.y);
    376 								convertSigned12(c.z, cs.z);
    377 							}
    378 							else
    379 							{
    380 								c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
    381 								c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
    382 								c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
    383 							}
    384 							break;
    385 						default:
    386 							ASSERT(false);
    387 						}
    388 					}
    389 					else
    390 					{
    391 						switch(state.textureFormat)
    392 						{
    393 						case FORMAT_R8I:
    394 						case FORMAT_G8R8I:
    395 						case FORMAT_X8B8G8R8I:
    396 						case FORMAT_A8B8G8R8I:
    397 							c[component] = As<Float4>(Int4(cs[component]) >> 8);
    398 							break;
    399 						case FORMAT_R8UI:
    400 						case FORMAT_G8R8UI:
    401 						case FORMAT_X8B8G8R8UI:
    402 						case FORMAT_A8B8G8R8UI:
    403 							c[component] = As<Float4>(Int4(As<UShort4>(cs[component]) >> 8));
    404 							break;
    405 						case FORMAT_R16I:
    406 						case FORMAT_G16R16I:
    407 						case FORMAT_X16B16G16R16I:
    408 						case FORMAT_A16B16G16R16I:
    409 							c[component] = As<Float4>(Int4(cs[component]));
    410 							break;
    411 						case FORMAT_R16UI:
    412 						case FORMAT_G16R16UI:
    413 						case FORMAT_X16B16G16R16UI:
    414 						case FORMAT_A16B16G16R16UI:
    415 							c[component] = As<Float4>(Int4(As<UShort4>(cs[component])));
    416 							break;
    417 						default:
    418 							// Normalized integer formats
    419 							if(state.sRGB && isRGBComponent(component))
    420 							{
    421 								sRGBtoLinear16_8_12(cs[component]);   // FIXME: Perform linearization at surface level for read-only textures
    422 								convertSigned12(c[component], cs[component]);
    423 							}
    424 							else
    425 							{
    426 								if(hasUnsignedTextureComponent(component))
    427 								{
    428 									convertUnsigned16(c[component], cs[component]);
    429 								}
    430 								else
    431 								{
    432 									convertSigned15(c[component], cs[component]);
    433 								}
    434 							}
    435 							break;
    436 						}
    437 					}
    438 				}
    439 			}
    440 
    441 			int componentCount = textureComponentCount();
    442 
    443 			if(state.textureFilter != FILTER_GATHER)
    444 			{
    445 				switch(state.textureFormat)
    446 				{
    447 				case FORMAT_R8I:
    448 				case FORMAT_R8UI:
    449 				case FORMAT_R16I:
    450 				case FORMAT_R16UI:
    451 				case FORMAT_R32I:
    452 				case FORMAT_R32UI:
    453 					c.y = As<Float4>(UInt4(0));
    454 				case FORMAT_G8R8I:
    455 				case FORMAT_G8R8UI:
    456 				case FORMAT_G16R16I:
    457 				case FORMAT_G16R16UI:
    458 				case FORMAT_G32R32I:
    459 				case FORMAT_G32R32UI:
    460 					c.z = As<Float4>(UInt4(0));
    461 				case FORMAT_X8B8G8R8I:
    462 				case FORMAT_X8B8G8R8UI:
    463 				case FORMAT_X16B16G16R16I:
    464 				case FORMAT_X16B16G16R16UI:
    465 				case FORMAT_X32B32G32R32I:
    466 				case FORMAT_X32B32G32R32UI:
    467 					c.w = As<Float4>(UInt4(1));
    468 				case FORMAT_A8B8G8R8I:
    469 				case FORMAT_A8B8G8R8UI:
    470 				case FORMAT_A16B16G16R16I:
    471 				case FORMAT_A16B16G16R16UI:
    472 				case FORMAT_A32B32G32R32I:
    473 				case FORMAT_A32B32G32R32UI:
    474 					break;
    475 				case FORMAT_R8I_SNORM:
    476 				case FORMAT_G8R8I_SNORM:
    477 				case FORMAT_X8B8G8R8I_SNORM:
    478 				case FORMAT_A8B8G8R8I_SNORM:
    479 				case FORMAT_R8:
    480 				case FORMAT_R5G6B5:
    481 				case FORMAT_G8R8:
    482 				case FORMAT_G16R16:
    483 				case FORMAT_A16B16G16R16:
    484 				case FORMAT_X8R8G8B8:
    485 				case FORMAT_X8B8G8R8:
    486 				case FORMAT_A8R8G8B8:
    487 				case FORMAT_A8B8G8R8:
    488 				case FORMAT_SRGB8_X8:
    489 				case FORMAT_SRGB8_A8:
    490 				case FORMAT_V8U8:
    491 				case FORMAT_Q8W8V8U8:
    492 				case FORMAT_X8L8V8U8:
    493 				case FORMAT_V16U16:
    494 				case FORMAT_A16W16V16U16:
    495 				case FORMAT_Q16W16V16U16:
    496 					if(componentCount < 2) c.y = Float4(1.0f);
    497 					if(componentCount < 3) c.z = Float4(1.0f);
    498 					if(componentCount < 4) c.w = Float4(1.0f);
    499 					break;
    500 				case FORMAT_A8:
    501 					c.w = c.x;
    502 					c.x = Float4(0.0f);
    503 					c.y = Float4(0.0f);
    504 					c.z = Float4(0.0f);
    505 					break;
    506 				case FORMAT_L8:
    507 				case FORMAT_L16:
    508 					c.y = c.x;
    509 					c.z = c.x;
    510 					c.w = Float4(1.0f);
    511 					break;
    512 				case FORMAT_A8L8:
    513 					c.w = c.y;
    514 					c.y = c.x;
    515 					c.z = c.x;
    516 					break;
    517 				case FORMAT_R32F:
    518 					c.y = Float4(1.0f);
    519 				case FORMAT_G32R32F:
    520 					c.z = Float4(1.0f);
    521 				case FORMAT_X32B32G32R32F:
    522 					c.w = Float4(1.0f);
    523 				case FORMAT_A32B32G32R32F:
    524 					break;
    525 				case FORMAT_D32F:
    526 				case FORMAT_D32F_LOCKABLE:
    527 				case FORMAT_D32FS8_TEXTURE:
    528 				case FORMAT_D32FS8_SHADOW:
    529 					c.y = c.x;
    530 					c.z = c.x;
    531 					c.w = c.x;
    532 					break;
    533 				default:
    534 					ASSERT(false);
    535 				}
    536 			}
    537 		}
    538 
    539 		if((state.swizzleR != SWIZZLE_RED) ||
    540 		   (state.swizzleG != SWIZZLE_GREEN) ||
    541 		   (state.swizzleB != SWIZZLE_BLUE) ||
    542 		   (state.swizzleA != SWIZZLE_ALPHA))
    543 		{
    544 			const Vector4f col(c);
    545 			applySwizzle(state.swizzleR, c.x, col);
    546 			applySwizzle(state.swizzleG, c.y, col);
    547 			applySwizzle(state.swizzleB, c.z, col);
    548 			applySwizzle(state.swizzleA, c.w, col);
    549 		}
    550 	}
    551 
    552 	void SamplerCore::border(Short4 &mask, Float4 &coordinates)
    553 	{
    554 		Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
    555 		mask = As<Short4>(Int2(As<Int4>(Pack(border, border))));
    556 	}
    557 
    558 	void SamplerCore::border(Int4 &mask, Float4 &coordinates)
    559 	{
    560 		mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
    561 	}
    562 
    563 	Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
    564 	{
    565 		Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
    566 
    567 		if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
    568 		{
    569 			offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f)));
    570 		}
    571 		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
    572 		{
    573 			offset &= Short4(CmpLE(Float4(lod), Float4(0.0f)));
    574 		}
    575 
    576 		if(wrap)
    577 		{
    578 			switch(count)
    579 			{
    580 			case -1: return uvw - offset;
    581 			case  0: return uvw;
    582 			case +1: return uvw + offset;
    583 			case  2: return uvw + offset + offset;
    584 			}
    585 		}
    586 		else   // Clamp or mirror
    587 		{
    588 			switch(count)
    589 			{
    590 			case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
    591 			case  0: return uvw;
    592 			case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
    593 			case  2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
    594 			}
    595 		}
    596 
    597 		return uvw;
    598 	}
    599 
    600 	void SamplerCore::sampleFilter(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerMethod method)
    601 	{
    602 		sampleAniso(texture, c, u, v, w, lod, anisotropy, uDelta, vDelta, face, false, method);
    603 
    604 		if(state.mipmapFilter > MIPMAP_POINT)
    605 		{
    606 			Vector4s cc;
    607 
    608 			sampleAniso(texture, cc, u, v, w, lod, anisotropy, uDelta, vDelta, face, true, method);
    609 
    610 			lod *= Float(1 << 16);
    611 
    612 			UShort4 utri = UShort4(Float4(lod));   // FIXME: Optimize
    613 			Short4 stri = utri >> 1;   // FIXME: Optimize
    614 
    615 			if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
    616 			if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
    617 			if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
    618 			if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
    619 
    620 			utri = ~utri;
    621 			stri = Short4(0x7FFF, 0x7FFF, 0x7FFF, 0x7FFF) - stri;
    622 
    623 			if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
    624 			if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
    625 			if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
    626 			if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
    627 
    628 			c.x += cc.x;
    629 			c.y += cc.y;
    630 			c.z += cc.z;
    631 			c.w += cc.w;
    632 
    633 			if(!hasUnsignedTextureComponent(0)) c.x += c.x;
    634 			if(!hasUnsignedTextureComponent(1)) c.y += c.y;
    635 			if(!hasUnsignedTextureComponent(2)) c.z += c.z;
    636 			if(!hasUnsignedTextureComponent(3)) c.w += c.w;
    637 		}
    638 
    639 		Short4 borderMask;
    640 
    641 		if(state.addressingModeU == ADDRESSING_BORDER)
    642 		{
    643 			Short4 u0;
    644 
    645 			border(u0, u);
    646 
    647 			borderMask = u0;
    648 		}
    649 
    650 		if(state.addressingModeV == ADDRESSING_BORDER)
    651 		{
    652 			Short4 v0;
    653 
    654 			border(v0, v);
    655 
    656 			if(state.addressingModeU == ADDRESSING_BORDER)
    657 			{
    658 				borderMask &= v0;
    659 			}
    660 			else
    661 			{
    662 				borderMask = v0;
    663 			}
    664 		}
    665 
    666 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
    667 		{
    668 			Short4 s0;
    669 
    670 			border(s0, w);
    671 
    672 			if(state.addressingModeU == ADDRESSING_BORDER ||
    673 			   state.addressingModeV == ADDRESSING_BORDER)
    674 			{
    675 				borderMask &= s0;
    676 			}
    677 			else
    678 			{
    679 				borderMask = s0;
    680 			}
    681 		}
    682 
    683 		if(state.addressingModeU == ADDRESSING_BORDER ||
    684 		   state.addressingModeV == ADDRESSING_BORDER ||
    685 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
    686 		{
    687 			Short4 b;
    688 
    689 			c.x = borderMask & c.x | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1));
    690 			c.y = borderMask & c.y | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1));
    691 			c.z = borderMask & c.z | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1));
    692 			c.w = borderMask & c.w | ~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1));
    693 		}
    694 	}
    695 
    696 	void SamplerCore::sampleAniso(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerMethod method)
    697 	{
    698 		if(state.textureFilter != FILTER_ANISOTROPIC || method == Lod)
    699 		{
    700 			sampleQuad(texture, c, u, v, w, lod, face, secondLOD);
    701 		}
    702 		else
    703 		{
    704 			Int a = RoundInt(anisotropy);
    705 
    706 			Vector4s cSum;
    707 
    708 			cSum.x = Short4(0, 0, 0, 0);
    709 			cSum.y = Short4(0, 0, 0, 0);
    710 			cSum.z = Short4(0, 0, 0, 0);
    711 			cSum.w = Short4(0, 0, 0, 0);
    712 
    713 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
    714 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
    715 			UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
    716 			Short4 sw = Short4(cw >> 1);
    717 
    718 			Float4 du = uDelta;
    719 			Float4 dv = vDelta;
    720 
    721 			Float4 u0 = u + B * du;
    722 			Float4 v0 = v + B * dv;
    723 
    724 			du *= A;
    725 			dv *= A;
    726 
    727 			Int i = 0;
    728 
    729 			Do
    730 			{
    731 				sampleQuad(texture, c, u0, v0, w, lod, face, secondLOD);
    732 
    733 				u0 += du;
    734 				v0 += dv;
    735 
    736 				if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
    737 				if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
    738 				if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
    739 				if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
    740 
    741 				i++;
    742 			}
    743 			Until(i >= a)
    744 
    745 			if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
    746 			if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
    747 			if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
    748 			if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
    749 		}
    750 	}
    751 
    752 	void SamplerCore::sampleQuad(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD)
    753 	{
    754 		if(state.textureType != TEXTURE_3D)
    755 		{
    756 			sampleQuad2D(texture, c, u, v, w, lod, face, secondLOD);
    757 		}
    758 		else
    759 		{
    760 			sample3D(texture, c, u, v, w, lod, secondLOD);
    761 		}
    762 	}
    763 
    764 	void SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD)
    765 	{
    766 		int componentCount = textureComponentCount();
    767 		bool gather = state.textureFilter == FILTER_GATHER;
    768 
    769 		Pointer<Byte> mipmap;
    770 		Pointer<Byte> buffer[4];
    771 
    772 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
    773 
    774 		Short4 uuuu = address(u, state.addressingModeU, mipmap);
    775 		Short4 vvvv = address(v, state.addressingModeV, mipmap);
    776 		Short4 wwww = address(w, state.addressingModeW, mipmap);
    777 
    778 		if(state.textureFilter == FILTER_POINT)
    779 		{
    780 			sampleTexel(c, uuuu, vvvv, wwww, mipmap, buffer);
    781 		}
    782 		else
    783 		{
    784 			Vector4s c0;
    785 			Vector4s c1;
    786 			Vector4s c2;
    787 			Vector4s c3;
    788 
    789 			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
    790 			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
    791 			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
    792 			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
    793 
    794 			sampleTexel(c0, uuuu0, vvvv0, wwww, mipmap, buffer);
    795 			sampleTexel(c1, uuuu1, vvvv0, wwww, mipmap, buffer);
    796 			sampleTexel(c2, uuuu0, vvvv1, wwww, mipmap, buffer);
    797 			sampleTexel(c3, uuuu1, vvvv1, wwww, mipmap, buffer);
    798 
    799 			if(!gather)   // Blend
    800 			{
    801 				// Fractions
    802 				UShort4 f0u = uuuu0;
    803 				UShort4 f0v = vvvv0;
    804 
    805 				if(!state.hasNPOTTexture)
    806 				{
    807 					f0u = f0u << *Pointer<Long1>(mipmap + OFFSET(Mipmap,uInt));   // .u
    808 					f0v = f0v << *Pointer<Long1>(mipmap + OFFSET(Mipmap,vInt));   // .v
    809 				}
    810 				else
    811 				{
    812 					f0u = f0u * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
    813 					f0v = f0v * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
    814 				}
    815 
    816 				UShort4 f1u = ~f0u;
    817 				UShort4 f1v = ~f0v;
    818 
    819 				UShort4 f0u0v = MulHigh(f0u, f0v);
    820 				UShort4 f1u0v = MulHigh(f1u, f0v);
    821 				UShort4 f0u1v = MulHigh(f0u, f1v);
    822 				UShort4 f1u1v = MulHigh(f1u, f1v);
    823 
    824 				// Signed fractions
    825 				Short4 f1u1vs;
    826 				Short4 f0u1vs;
    827 				Short4 f1u0vs;
    828 				Short4 f0u0vs;
    829 
    830 				if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
    831 				{
    832 					f1u1vs = f1u1v >> 1;
    833 					f0u1vs = f0u1v >> 1;
    834 					f1u0vs = f1u0v >> 1;
    835 					f0u0vs = f0u0v >> 1;
    836 				}
    837 
    838 				// Bilinear interpolation
    839 				if(componentCount >= 1)
    840 				{
    841 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
    842 					{
    843 						c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u);
    844 						c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u);
    845 						c.x  = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v);
    846 					}
    847 					else
    848 					{
    849 						if(hasUnsignedTextureComponent(0))
    850 						{
    851 							c0.x = MulHigh(As<UShort4>(c0.x), f1u1v);
    852 							c1.x = MulHigh(As<UShort4>(c1.x), f0u1v);
    853 							c2.x = MulHigh(As<UShort4>(c2.x), f1u0v);
    854 							c3.x = MulHigh(As<UShort4>(c3.x), f0u0v);
    855 						}
    856 						else
    857 						{
    858 							c0.x = MulHigh(c0.x, f1u1vs);
    859 							c1.x = MulHigh(c1.x, f0u1vs);
    860 							c2.x = MulHigh(c2.x, f1u0vs);
    861 							c3.x = MulHigh(c3.x, f0u0vs);
    862 						}
    863 
    864 						c.x = (c0.x + c1.x) + (c2.x + c3.x);
    865 						if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x);   // Correct for signed fractions
    866 					}
    867 				}
    868 
    869 				if(componentCount >= 2)
    870 				{
    871 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
    872 					{
    873 						c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u);
    874 						c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u);
    875 						c.y  = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v);
    876 					}
    877 					else
    878 					{
    879 						if(hasUnsignedTextureComponent(1))
    880 						{
    881 							c0.y = MulHigh(As<UShort4>(c0.y), f1u1v);
    882 							c1.y = MulHigh(As<UShort4>(c1.y), f0u1v);
    883 							c2.y = MulHigh(As<UShort4>(c2.y), f1u0v);
    884 							c3.y = MulHigh(As<UShort4>(c3.y), f0u0v);
    885 						}
    886 						else
    887 						{
    888 							c0.y = MulHigh(c0.y, f1u1vs);
    889 							c1.y = MulHigh(c1.y, f0u1vs);
    890 							c2.y = MulHigh(c2.y, f1u0vs);
    891 							c3.y = MulHigh(c3.y, f0u0vs);
    892 						}
    893 
    894 						c.y = (c0.y + c1.y) + (c2.y + c3.y);
    895 						if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y);   // Correct for signed fractions
    896 					}
    897 				}
    898 
    899 				if(componentCount >= 3)
    900 				{
    901 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
    902 					{
    903 						c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u);
    904 						c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u);
    905 						c.z  = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v);
    906 					}
    907 					else
    908 					{
    909 						if(hasUnsignedTextureComponent(2))
    910 						{
    911 							c0.z = MulHigh(As<UShort4>(c0.z), f1u1v);
    912 							c1.z = MulHigh(As<UShort4>(c1.z), f0u1v);
    913 							c2.z = MulHigh(As<UShort4>(c2.z), f1u0v);
    914 							c3.z = MulHigh(As<UShort4>(c3.z), f0u0v);
    915 						}
    916 						else
    917 						{
    918 							c0.z = MulHigh(c0.z, f1u1vs);
    919 							c1.z = MulHigh(c1.z, f0u1vs);
    920 							c2.z = MulHigh(c2.z, f1u0vs);
    921 							c3.z = MulHigh(c3.z, f0u0vs);
    922 						}
    923 
    924 						c.z = (c0.z + c1.z) + (c2.z + c3.z);
    925 						if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z);   // Correct for signed fractions
    926 					}
    927 				}
    928 
    929 				if(componentCount >= 4)
    930 				{
    931 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
    932 					{
    933 						c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u);
    934 						c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u);
    935 						c.w  = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v);
    936 					}
    937 					else
    938 					{
    939 						if(hasUnsignedTextureComponent(3))
    940 						{
    941 							c0.w = MulHigh(As<UShort4>(c0.w), f1u1v);
    942 							c1.w = MulHigh(As<UShort4>(c1.w), f0u1v);
    943 							c2.w = MulHigh(As<UShort4>(c2.w), f1u0v);
    944 							c3.w = MulHigh(As<UShort4>(c3.w), f0u0v);
    945 						}
    946 						else
    947 						{
    948 							c0.w = MulHigh(c0.w, f1u1vs);
    949 							c1.w = MulHigh(c1.w, f0u1vs);
    950 							c2.w = MulHigh(c2.w, f1u0vs);
    951 							c3.w = MulHigh(c3.w, f0u0vs);
    952 						}
    953 
    954 						c.w = (c0.w + c1.w) + (c2.w + c3.w);
    955 						if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w);   // Correct for signed fractions
    956 					}
    957 				}
    958 			}
    959 			else
    960 			{
    961 				c.x = c1.x;
    962 				c.y = c2.x;
    963 				c.z = c3.x;
    964 				c.w = c0.x;
    965 			}
    966 		}
    967 	}
    968 
    969 	void SamplerCore::sample3D(Pointer<Byte> &texture, Vector4s &c_, Float4 &u_, Float4 &v_, Float4 &w_, Float &lod, bool secondLOD)
    970 	{
    971 		int componentCount = textureComponentCount();
    972 
    973 		Pointer<Byte> mipmap;
    974 		Pointer<Byte> buffer[4];
    975 		Int face[4];
    976 
    977 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
    978 
    979 		Short4 uuuu = address(u_, state.addressingModeU, mipmap);
    980 		Short4 vvvv = address(v_, state.addressingModeV, mipmap);
    981 		Short4 wwww = address(w_, state.addressingModeW, mipmap);
    982 
    983 		if(state.textureFilter == FILTER_POINT)
    984 		{
    985 			sampleTexel(c_, uuuu, vvvv, wwww, mipmap, buffer);
    986 		}
    987 		else
    988 		{
    989 			Vector4s c[2][2][2];
    990 
    991 			Short4 u[2][2][2];
    992 			Short4 v[2][2][2];
    993 			Short4 s[2][2][2];
    994 
    995 			for(int i = 0; i < 2; i++)
    996 			{
    997 				for(int j = 0; j < 2; j++)
    998 				{
    999 					for(int k = 0; k < 2; k++)
   1000 					{
   1001 						u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
   1002 						v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
   1003 						s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
   1004 					}
   1005 				}
   1006 			}
   1007 
   1008 			// Fractions
   1009 			UShort4 f[2][2][2];
   1010 			Short4 fs[2][2][2];
   1011 			UShort4 f0u;
   1012 			UShort4 f0v;
   1013 			UShort4 f0s;
   1014 
   1015 			if(!state.hasNPOTTexture)
   1016 			{
   1017 				f0u = As<UShort4>(u[0][0][0]) << *Pointer<Long1>(mipmap + OFFSET(Mipmap,uInt));
   1018 				f0v = As<UShort4>(v[0][0][0]) << *Pointer<Long1>(mipmap + OFFSET(Mipmap,vInt));
   1019 				f0s = As<UShort4>(s[0][0][0]) << *Pointer<Long1>(mipmap + OFFSET(Mipmap,wInt));
   1020 			}
   1021 			else
   1022 			{
   1023 				f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
   1024 				f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
   1025 				f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth));
   1026 			}
   1027 
   1028 			UShort4 f1u = ~f0u;
   1029 			UShort4 f1v = ~f0v;
   1030 			UShort4 f1s = ~f0s;
   1031 
   1032 			f[1][1][1] = MulHigh(f1u, f1v);
   1033 			f[0][1][1] = MulHigh(f0u, f1v);
   1034 			f[1][0][1] = MulHigh(f1u, f0v);
   1035 			f[0][0][1] = MulHigh(f0u, f0v);
   1036 			f[1][1][0] = MulHigh(f1u, f1v);
   1037 			f[0][1][0] = MulHigh(f0u, f1v);
   1038 			f[1][0][0] = MulHigh(f1u, f0v);
   1039 			f[0][0][0] = MulHigh(f0u, f0v);
   1040 
   1041 			f[1][1][1] = MulHigh(f[1][1][1], f1s);
   1042 			f[0][1][1] = MulHigh(f[0][1][1], f1s);
   1043 			f[1][0][1] = MulHigh(f[1][0][1], f1s);
   1044 			f[0][0][1] = MulHigh(f[0][0][1], f1s);
   1045 			f[1][1][0] = MulHigh(f[1][1][0], f0s);
   1046 			f[0][1][0] = MulHigh(f[0][1][0], f0s);
   1047 			f[1][0][0] = MulHigh(f[1][0][0], f0s);
   1048 			f[0][0][0] = MulHigh(f[0][0][0], f0s);
   1049 
   1050 			// Signed fractions
   1051 			if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
   1052 			{
   1053 				fs[0][0][0] = f[0][0][0] >> 1;
   1054 				fs[0][0][1] = f[0][0][1] >> 1;
   1055 				fs[0][1][0] = f[0][1][0] >> 1;
   1056 				fs[0][1][1] = f[0][1][1] >> 1;
   1057 				fs[1][0][0] = f[1][0][0] >> 1;
   1058 				fs[1][0][1] = f[1][0][1] >> 1;
   1059 				fs[1][1][0] = f[1][1][0] >> 1;
   1060 				fs[1][1][1] = f[1][1][1] >> 1;
   1061 			}
   1062 
   1063 			for(int i = 0; i < 2; i++)
   1064 			{
   1065 				for(int j = 0; j < 2; j++)
   1066 				{
   1067 					for(int k = 0; k < 2; k++)
   1068 					{
   1069 						sampleTexel(c[i][j][k], u[i][j][k], v[i][j][k], s[i][j][k], mipmap, buffer);
   1070 
   1071 						if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
   1072 						if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
   1073 						if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
   1074 						if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
   1075 
   1076 						if(i != 0 || j != 0 || k != 0)
   1077 						{
   1078 							if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x;
   1079 							if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y;
   1080 							if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z;
   1081 							if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w;
   1082 						}
   1083 					}
   1084 				}
   1085 			}
   1086 
   1087 			if(componentCount >= 1) c_.x = c[0][0][0].x;
   1088 			if(componentCount >= 2) c_.y = c[0][0][0].y;
   1089 			if(componentCount >= 3) c_.z = c[0][0][0].z;
   1090 			if(componentCount >= 4) c_.w = c[0][0][0].w;
   1091 
   1092 			// Correct for signed fractions
   1093 			if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
   1094 			if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
   1095 			if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
   1096 			if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
   1097 		}
   1098 	}
   1099 
   1100 	void SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerMethod method)
   1101 	{
   1102 		sampleFloatAniso(texture, c, u, v, w, lod, anisotropy, uDelta, vDelta, face, false, method);
   1103 
   1104 		if(state.mipmapFilter > MIPMAP_POINT)
   1105 		{
   1106 			Vector4f cc;
   1107 
   1108 			sampleFloatAniso(texture, cc, u, v, w, lod, anisotropy, uDelta, vDelta, face, true, method);
   1109 
   1110 			Float4 lod4 = Float4(Frac(lod));
   1111 
   1112 			c.x = (cc.x - c.x) * lod4 + c.x;
   1113 			c.y = (cc.y - c.y) * lod4 + c.y;
   1114 			c.z = (cc.z - c.z) * lod4 + c.z;
   1115 			c.w = (cc.w - c.w) * lod4 + c.w;
   1116 		}
   1117 
   1118 		Int4 borderMask;
   1119 
   1120 		if(state.addressingModeU == ADDRESSING_BORDER)
   1121 		{
   1122 			Int4 u0;
   1123 
   1124 			border(u0, u);
   1125 
   1126 			borderMask = u0;
   1127 		}
   1128 
   1129 		if(state.addressingModeV == ADDRESSING_BORDER)
   1130 		{
   1131 			Int4 v0;
   1132 
   1133 			border(v0, v);
   1134 
   1135 			if(state.addressingModeU == ADDRESSING_BORDER)
   1136 			{
   1137 				borderMask &= v0;
   1138 			}
   1139 			else
   1140 			{
   1141 				borderMask = v0;
   1142 			}
   1143 		}
   1144 
   1145 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
   1146 		{
   1147 			Int4 s0;
   1148 
   1149 			border(s0, w);
   1150 
   1151 			if(state.addressingModeU == ADDRESSING_BORDER ||
   1152 			   state.addressingModeV == ADDRESSING_BORDER)
   1153 			{
   1154 				borderMask &= s0;
   1155 			}
   1156 			else
   1157 			{
   1158 				borderMask = s0;
   1159 			}
   1160 		}
   1161 
   1162 		if(state.addressingModeU == ADDRESSING_BORDER ||
   1163 		   state.addressingModeV == ADDRESSING_BORDER ||
   1164 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
   1165 		{
   1166 			Int4 b;
   1167 
   1168 			c.x = As<Float4>(borderMask & As<Int4>(c.x) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0])));
   1169 			c.y = As<Float4>(borderMask & As<Int4>(c.y) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1])));
   1170 			c.z = As<Float4>(borderMask & As<Int4>(c.z) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2])));
   1171 			c.w = As<Float4>(borderMask & As<Int4>(c.w) | ~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3])));
   1172 		}
   1173 	}
   1174 
   1175 	void SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerMethod method)
   1176 	{
   1177 		if(state.textureFilter != FILTER_ANISOTROPIC || method == Lod)
   1178 		{
   1179 			sampleFloat(texture, c, u, v, w, lod, face, secondLOD);
   1180 		}
   1181 		else
   1182 		{
   1183 			Int a = RoundInt(anisotropy);
   1184 
   1185 			Vector4f cSum;
   1186 
   1187 			cSum.x = Float4(0.0f);
   1188 			cSum.y = Float4(0.0f);
   1189 			cSum.z = Float4(0.0f);
   1190 			cSum.w = Float4(0.0f);
   1191 
   1192 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
   1193 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
   1194 
   1195 			Float4 du = uDelta;
   1196 			Float4 dv = vDelta;
   1197 
   1198 			Float4 u0 = u + B * du;
   1199 			Float4 v0 = v + B * dv;
   1200 
   1201 			du *= A;
   1202 			dv *= A;
   1203 
   1204 			Int i = 0;
   1205 
   1206 			Do
   1207 			{
   1208 				sampleFloat(texture, c, u0, v0, w, lod, face, secondLOD);
   1209 
   1210 				u0 += du;
   1211 				v0 += dv;
   1212 
   1213 				cSum.x += c.x * A;
   1214 				cSum.y += c.y * A;
   1215 				cSum.z += c.z * A;
   1216 				cSum.w += c.w * A;
   1217 
   1218 				i++;
   1219 			}
   1220 			Until(i >= a)
   1221 
   1222 			c.x = cSum.x;
   1223 			c.y = cSum.y;
   1224 			c.z = cSum.z;
   1225 			c.w = cSum.w;
   1226 		}
   1227 	}
   1228 
   1229 	void SamplerCore::sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD)
   1230 	{
   1231 		if(state.textureType != TEXTURE_3D)
   1232 		{
   1233 			sampleFloat2D(texture, c, u, v, w, lod, face, secondLOD);
   1234 		}
   1235 		else
   1236 		{
   1237 			sampleFloat3D(texture, c, u, v, w, lod, secondLOD);
   1238 		}
   1239 	}
   1240 
   1241 	void SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, Int face[4], bool secondLOD)
   1242 	{
   1243 		int componentCount = textureComponentCount();
   1244 		bool gather = state.textureFilter == FILTER_GATHER;
   1245 
   1246 		Pointer<Byte> mipmap;
   1247 		Pointer<Byte> buffer[4];
   1248 
   1249 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
   1250 
   1251 		Short4 uuuu = address(u, state.addressingModeU, mipmap);
   1252 		Short4 vvvv = address(v, state.addressingModeV, mipmap);
   1253 		Short4 wwww = address(w, state.addressingModeW, mipmap);
   1254 
   1255 		if(state.textureFilter == FILTER_POINT)
   1256 		{
   1257 			sampleTexel(c, uuuu, vvvv, wwww, w, mipmap, buffer);
   1258 		}
   1259 		else
   1260 		{
   1261 			Vector4f c0;
   1262 			Vector4f c1;
   1263 			Vector4f c2;
   1264 			Vector4f c3;
   1265 
   1266 			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
   1267 			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
   1268 			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
   1269 			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
   1270 
   1271 			sampleTexel(c0, uuuu0, vvvv0, wwww, w, mipmap, buffer);
   1272 			sampleTexel(c1, uuuu1, vvvv0, wwww, w, mipmap, buffer);
   1273 			sampleTexel(c2, uuuu0, vvvv1, wwww, w, mipmap, buffer);
   1274 			sampleTexel(c3, uuuu1, vvvv1, wwww, w, mipmap, buffer);
   1275 
   1276 			if(!gather)   // Blend
   1277 			{
   1278 				// Fractions
   1279 				Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth)));
   1280 				Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight)));
   1281 
   1282 				if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
   1283 				if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
   1284 				if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
   1285 				if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
   1286 
   1287 				if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
   1288 				if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
   1289 				if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
   1290 				if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
   1291 
   1292 				if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x);
   1293 				if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y);
   1294 				if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z);
   1295 				if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w);
   1296 			}
   1297 			else
   1298 			{
   1299 				c.x = c1.x;
   1300 				c.y = c2.x;
   1301 				c.z = c3.x;
   1302 				c.w = c0.x;
   1303 			}
   1304 		}
   1305 	}
   1306 
   1307 	void SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float &lod, bool secondLOD)
   1308 	{
   1309 		int componentCount = textureComponentCount();
   1310 
   1311 		Pointer<Byte> mipmap;
   1312 		Pointer<Byte> buffer[4];
   1313 		Int face[4];
   1314 
   1315 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
   1316 
   1317 		Short4 uuuu = address(u, state.addressingModeU, mipmap);
   1318 		Short4 vvvv = address(v, state.addressingModeV, mipmap);
   1319 		Short4 wwww = address(w, state.addressingModeW, mipmap);
   1320 
   1321 		if(state.textureFilter == FILTER_POINT)
   1322 		{
   1323 			sampleTexel(c, uuuu, vvvv, wwww, w, mipmap, buffer);
   1324 		}
   1325 		else
   1326 		{
   1327 			Vector4f &c0 = c;
   1328 			Vector4f c1;
   1329 			Vector4f c2;
   1330 			Vector4f c3;
   1331 			Vector4f c4;
   1332 			Vector4f c5;
   1333 			Vector4f c6;
   1334 			Vector4f c7;
   1335 
   1336 			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, -1, lod);
   1337 			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, -1, lod);
   1338 			Short4 wwww0 = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, -1, lod);
   1339 			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, +1, lod);
   1340 			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, +1, lod);
   1341 			Short4 wwww1 = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, +1, lod);
   1342 
   1343 			sampleTexel(c0, uuuu0, vvvv0, wwww0, w, mipmap, buffer);
   1344 			sampleTexel(c1, uuuu1, vvvv0, wwww0, w, mipmap, buffer);
   1345 			sampleTexel(c2, uuuu0, vvvv1, wwww0, w, mipmap, buffer);
   1346 			sampleTexel(c3, uuuu1, vvvv1, wwww0, w, mipmap, buffer);
   1347 			sampleTexel(c4, uuuu0, vvvv0, wwww1, w, mipmap, buffer);
   1348 			sampleTexel(c5, uuuu1, vvvv0, wwww1, w, mipmap, buffer);
   1349 			sampleTexel(c6, uuuu0, vvvv1, wwww1, w, mipmap, buffer);
   1350 			sampleTexel(c7, uuuu1, vvvv1, wwww1, w, mipmap, buffer);
   1351 
   1352 			// Fractions
   1353 			Float4 fu = Frac(Float4(As<UShort4>(uuuu0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fWidth)));
   1354 			Float4 fv = Frac(Float4(As<UShort4>(vvvv0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fHeight)));
   1355 			Float4 fw = Frac(Float4(As<UShort4>(wwww0)) * *Pointer<Float4>(mipmap + OFFSET(Mipmap,fDepth)));
   1356 
   1357 			// Blend first slice
   1358 			if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
   1359 			if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
   1360 			if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
   1361 			if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
   1362 
   1363 			if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
   1364 			if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
   1365 			if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
   1366 			if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
   1367 
   1368 			if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x);
   1369 			if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y);
   1370 			if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z);
   1371 			if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w);
   1372 
   1373 			// Blend second slice
   1374 			if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x);
   1375 			if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y);
   1376 			if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z);
   1377 			if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w);
   1378 
   1379 			if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x);
   1380 			if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y);
   1381 			if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z);
   1382 			if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w);
   1383 
   1384 			if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x);
   1385 			if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y);
   1386 			if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z);
   1387 			if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w);
   1388 
   1389 			// Blend slices
   1390 			if(componentCount >= 1) c0.x = c0.x + fw * (c4.x - c0.x);
   1391 			if(componentCount >= 2) c0.y = c0.y + fw * (c4.y - c0.y);
   1392 			if(componentCount >= 3) c0.z = c0.z + fw * (c4.z - c0.z);
   1393 			if(componentCount >= 4) c0.w = c0.w + fw * (c4.w - c0.w);
   1394 		}
   1395 	}
   1396 
   1397 	void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerMethod method)
   1398 	{
   1399 		if(method != Lod)
   1400 		{
   1401 			Float4 duvdxy;
   1402 
   1403 			if(method != Grad)
   1404 			{
   1405 				duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
   1406 			}
   1407 			else
   1408 			{
   1409 				Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
   1410 				Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
   1411 
   1412 				duvdxy = Float4(dudxy.xz, dvdxy.xz);
   1413 			}
   1414 
   1415 			// Scale by texture dimensions and LOD
   1416 			Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD));
   1417 
   1418 			Float4 dUV2dxy = dUVdxy * dUVdxy;
   1419 			Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
   1420 
   1421 			lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
   1422 
   1423 			if(state.textureFilter == FILTER_ANISOTROPIC)
   1424 			{
   1425 				Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z));
   1426 
   1427 				Float4 dudx = duvdxy.xxxx;
   1428 				Float4 dudy = duvdxy.yyyy;
   1429 				Float4 dvdx = duvdxy.zzzz;
   1430 				Float4 dvdy = duvdxy.wwww;
   1431 
   1432 				Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y));
   1433 				uDelta = As<Float4>(As<Int4>(dudx) & mask | As<Int4>(dudy) & ~mask);
   1434 				vDelta = As<Float4>(As<Int4>(dvdx) & mask | As<Int4>(dvdy) & ~mask);
   1435 
   1436 				anisotropy = lod * Rcp_pp(det);
   1437 				anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy)));
   1438 
   1439 				lod *= Rcp_pp(anisotropy * anisotropy);
   1440 			}
   1441 
   1442 			// log2(sqrt(lod))
   1443 			lod = Float(As<Int>(lod));
   1444 			lod -= Float(0x3F800000);
   1445 			lod *= As<Float>(Int(0x33800000));
   1446 
   1447 			if(method == Bias)
   1448 			{
   1449 				lod += lodBias;
   1450 			}
   1451 		}
   1452 		else
   1453 		{
   1454 			lod = lodBias + *Pointer<Float>(texture + OFFSET(Texture,LOD));
   1455 		}
   1456 
   1457 		lod = Max(lod, 0.0f);
   1458 		lod = Min(lod, Float(MIPMAP_LEVELS - 2));   // Trilinear accesses lod+1
   1459 	}
   1460 
   1461 	void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &s, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerMethod method)
   1462 	{
   1463 		if(method != Lod)
   1464 		{
   1465 			if(method != Grad)
   1466 			{
   1467 				Float4 dudxy = u.ywyw - u;
   1468 				Float4 dvdxy = v.ywyw - v;
   1469 				Float4 dsdxy = s.ywyw - s;
   1470 
   1471 				// Scale by texture dimensions and LOD
   1472 				dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1473 				dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1474 				dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1475 
   1476 				dudxy *= dudxy;
   1477 				dvdxy *= dvdxy;
   1478 				dsdxy *= dsdxy;
   1479 
   1480 				dudxy += dvdxy;
   1481 				dudxy += dsdxy;
   1482 
   1483 				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
   1484 			}
   1485 			else
   1486 			{
   1487 				Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
   1488 				Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
   1489 
   1490 				Float4 duvdxy = Float4(dudxy.xz, dvdxy.xz);
   1491 
   1492 				// Scale by texture dimensions and LOD
   1493 				Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1494 
   1495 				Float4 dUV2dxy = dUVdxy * dUVdxy;
   1496 				Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
   1497 
   1498 				lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
   1499 			}
   1500 
   1501 			// log2(sqrt(lod))
   1502 			lod = Float(As<Int>(lod));
   1503 			lod -= Float(0x3F800000);
   1504 			lod *= As<Float>(Int(0x33800000));
   1505 
   1506 			if(method == Bias)
   1507 			{
   1508 				lod += lodBias;
   1509 			}
   1510 		}
   1511 		else
   1512 		{
   1513 			lod = lodBias + *Pointer<Float>(texture + OFFSET(Texture,LOD));
   1514 		}
   1515 
   1516 		lod = Max(lod, 0.0f);
   1517 		lod = Min(lod, Float(MIPMAP_LEVELS - 2));   // Trilinear accesses lod+1
   1518 	}
   1519 
   1520 	void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerMethod method)
   1521 	{
   1522 		if(state.mipmapFilter == MIPMAP_NONE)
   1523 		{
   1524 		}
   1525 		else   // Point and linear filter
   1526 		{
   1527 			if(method != Lod)
   1528 			{
   1529 				Float4 dudxy;
   1530 				Float4 dvdxy;
   1531 				Float4 dsdxy;
   1532 
   1533 				if(method != Grad)
   1534 				{
   1535 					dudxy = uuuu.ywyw - uuuu;
   1536 					dvdxy = vvvv.ywyw - vvvv;
   1537 					dsdxy = wwww.ywyw - wwww;
   1538 				}
   1539 				else
   1540 				{
   1541 					dudxy = dsx.x;
   1542 					dvdxy = dsx.y;
   1543 					dsdxy = dsx.z;
   1544 
   1545 					dudxy = Float4(dudxy.xx, dsy.x.xx);
   1546 					dvdxy = Float4(dvdxy.xx, dsy.y.xx);
   1547 					dsdxy = Float4(dsdxy.xx, dsy.z.xx);
   1548 
   1549 					dudxy = Float4(dudxy.xz, dudxy.xz);
   1550 					dvdxy = Float4(dvdxy.xz, dvdxy.xz);
   1551 					dsdxy = Float4(dsdxy.xz, dsdxy.xz);
   1552 				}
   1553 
   1554 				// Scale by texture dimensions and LOD
   1555 				dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1556 				dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD));
   1557 				dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD));
   1558 
   1559 				dudxy *= dudxy;
   1560 				dvdxy *= dvdxy;
   1561 				dsdxy *= dsdxy;
   1562 
   1563 				dudxy += dvdxy;
   1564 				dudxy += dsdxy;
   1565 
   1566 				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
   1567 
   1568 				// log2(sqrt(lod))
   1569 				lod = Float(As<Int>(lod));
   1570 				lod -= Float(0x3F800000);
   1571 				lod *= As<Float>(Int(0x33800000));
   1572 
   1573 				if(method == Bias)
   1574 				{
   1575 					lod += lodBias;
   1576 				}
   1577 			}
   1578 			else
   1579 			{
   1580 				lod = lodBias + *Pointer<Float>(texture + OFFSET(Texture,LOD));
   1581 			}
   1582 
   1583 			lod = Max(lod, Float(0.0f));    // FIXME
   1584 			lod = Min(lod, Float(MIPMAP_LEVELS - 2));   // Trilinear accesses lod+1
   1585 		}
   1586 	}
   1587 
   1588 	void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z)
   1589 	{
   1590 		Int4 xn = CmpLT(x, Float4(0.0f));   // x < 0
   1591 		Int4 yn = CmpLT(y, Float4(0.0f));   // y < 0
   1592 		Int4 zn = CmpLT(z, Float4(0.0f));   // z < 0
   1593 
   1594 		Float4 absX = Abs(x);
   1595 		Float4 absY = Abs(y);
   1596 		Float4 absZ = Abs(z);
   1597 
   1598 		Int4 xy = CmpNLE(absX, absY);   // abs(x) > abs(y)
   1599 		Int4 yz = CmpNLE(absY, absZ);   // abs(y) > abs(z)
   1600 		Int4 zx = CmpNLE(absZ, absX);   // abs(z) > abs(x)
   1601 		Int4 xMajor = xy & ~zx;   // abs(x) > abs(y) && abs(x) > abs(z)
   1602 		Int4 yMajor = yz & ~xy;   // abs(y) > abs(z) && abs(y) > abs(x)
   1603 		Int4 zMajor = zx & ~yz;   // abs(z) > abs(x) && abs(z) > abs(y)
   1604 
   1605 		// FACE_POSITIVE_X = 000b
   1606 		// FACE_NEGATIVE_X = 001b
   1607 		// FACE_POSITIVE_Y = 010b
   1608 		// FACE_NEGATIVE_Y = 011b
   1609 		// FACE_POSITIVE_Z = 100b
   1610 		// FACE_NEGATIVE_Z = 101b
   1611 
   1612 		Int yAxis = SignMask(yMajor);
   1613 		Int zAxis = SignMask(zMajor);
   1614 
   1615 		Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
   1616 		Int negative = SignMask(n);
   1617 
   1618 		face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
   1619 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
   1620 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
   1621 		face[1] = (face[0] >> 4)  & 0x7;
   1622 		face[2] = (face[0] >> 8)  & 0x7;
   1623 		face[3] = (face[0] >> 12) & 0x7;
   1624 		face[0] &= 0x7;
   1625 
   1626 		Float4 M = Max(Max(absX, absY), absZ);
   1627 
   1628 		// U = xMajor ? (neg ^ -z) : (zMajor & neg) ^ x)
   1629 		U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x))));
   1630 
   1631 		// V = !yMajor ? -y : (n ^ z)
   1632 		V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z))));
   1633 
   1634 		M = reciprocal(M) * Float4(0.5f);
   1635 		U = U * M + Float4(0.5f);
   1636 		V = V * M + Float4(0.5f);
   1637 
   1638 		lodX = x * M;
   1639 		lodY = y * M;
   1640 		lodZ = z * M;
   1641 	}
   1642 
   1643 	void SamplerCore::computeIndices(Int index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, const Pointer<Byte> &mipmap)
   1644 	{
   1645 		Short4 uuu2;
   1646 
   1647 		if(!state.hasNPOTTexture && !hasFloatTexture())
   1648 		{
   1649 			vvvv = As<UShort4>(vvvv) >> *Pointer<Long1>(mipmap + OFFSET(Mipmap,vFrac));
   1650 			uuu2 = uuuu;
   1651 			uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
   1652 			uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
   1653 			uuuu = As<Short4>(As<UInt2>(uuuu) >> *Pointer<Long1>(mipmap + OFFSET(Mipmap,uFrac)));
   1654 			uuu2 = As<Short4>(As<UInt2>(uuu2) >> *Pointer<Long1>(mipmap + OFFSET(Mipmap,uFrac)));
   1655 		}
   1656 		else
   1657 		{
   1658 			uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width)));
   1659 			vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height)));
   1660 			uuu2 = uuuu;
   1661 			uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
   1662 			uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
   1663 			uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
   1664 			uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
   1665 		}
   1666 
   1667 		if((state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY))
   1668 		{
   1669 			if(state.textureType != TEXTURE_2D_ARRAY)
   1670 			{
   1671 				wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)));
   1672 			}
   1673 			Short4 www2 = wwww;
   1674 			wwww = As<Short4>(UnpackLow(wwww, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
   1675 			www2 = As<Short4>(UnpackHigh(www2, Short4(0x0000, 0x0000, 0x0000, 0x0000)));
   1676 			wwww = As<Short4>(MulAdd(wwww, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP))));
   1677 			www2 = As<Short4>(MulAdd(www2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP))));
   1678 			uuuu = As<Short4>(As<Int2>(uuuu) + As<Int2>(wwww));
   1679 			uuu2 = As<Short4>(As<Int2>(uuu2) + As<Int2>(www2));
   1680 		}
   1681 
   1682 		index[0] = Extract(As<Int2>(uuuu), 0);
   1683 		index[1] = Extract(As<Int2>(uuuu), 1);
   1684 		index[2] = Extract(As<Int2>(uuu2), 0);
   1685 		index[3] = Extract(As<Int2>(uuu2), 1);
   1686 	}
   1687 
   1688 	void SamplerCore::sampleTexel(Vector4s &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4])
   1689 	{
   1690 		Int index[4];
   1691 
   1692 		computeIndices(index, uuuu, vvvv, wwww, mipmap);
   1693 
   1694 		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
   1695 		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
   1696 		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
   1697 		int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
   1698 
   1699 		if(has16bitTextureFormat())
   1700 		{
   1701 			c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0);
   1702 			c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1);
   1703 			c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2);
   1704 			c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3);
   1705 
   1706 			switch(state.textureFormat)
   1707 			{
   1708 			case FORMAT_R5G6B5:
   1709 				c.z = (c.x & Short4(0x001Fu)) << 11;
   1710 				c.y = (c.x & Short4(0x07E0u)) << 5;
   1711 				c.x = (c.x & Short4(0xF800u));
   1712 				break;
   1713 			default:
   1714 				ASSERT(false);
   1715 			}
   1716 		}
   1717 		else if(has8bitTextureComponents())
   1718 		{
   1719 			switch(textureComponentCount())
   1720 			{
   1721 			case 4:
   1722 				{
   1723 					Byte8 c0 = *Pointer<Byte8>(buffer[f0] + 4 * index[0]);
   1724 					Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]);
   1725 					Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]);
   1726 					Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]);
   1727 					c.x = UnpackLow(c0, c1);
   1728 					c.y = UnpackLow(c2, c3);
   1729 
   1730 					switch(state.textureFormat)
   1731 					{
   1732 					case FORMAT_A8R8G8B8:
   1733 						c.z = c.x;
   1734 						c.z = As<Short4>(UnpackLow(c.z, c.y));
   1735 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
   1736 						c.y = c.z;
   1737 						c.w = c.x;
   1738 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1739 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1740 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1741 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
   1742 						break;
   1743 					case FORMAT_A8B8G8R8:
   1744 					case FORMAT_A8B8G8R8I:
   1745 					case FORMAT_A8B8G8R8UI:
   1746 					case FORMAT_A8B8G8R8I_SNORM:
   1747 					case FORMAT_Q8W8V8U8:
   1748 					case FORMAT_SRGB8_A8:
   1749 						c.z = c.x;
   1750 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1751 						c.z = As<Short4>(UnpackHigh(c.z, c.y));
   1752 						c.y = c.x;
   1753 						c.w = c.z;
   1754 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1755 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1756 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1757 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
   1758 						break;
   1759 					default:
   1760 						ASSERT(false);
   1761 					}
   1762 				}
   1763 				break;
   1764 			case 3:
   1765 				{
   1766 					Byte8 c0 = *Pointer<Byte8>(buffer[f0] + 4 * index[0]);
   1767 					Byte8 c1 = *Pointer<Byte8>(buffer[f1] + 4 * index[1]);
   1768 					Byte8 c2 = *Pointer<Byte8>(buffer[f2] + 4 * index[2]);
   1769 					Byte8 c3 = *Pointer<Byte8>(buffer[f3] + 4 * index[3]);
   1770 					c.x = UnpackLow(c0, c1);
   1771 					c.y = UnpackLow(c2, c3);
   1772 
   1773 					switch(state.textureFormat)
   1774 					{
   1775 					case FORMAT_X8R8G8B8:
   1776 						c.z = c.x;
   1777 						c.z = As<Short4>(UnpackLow(c.z, c.y));
   1778 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
   1779 						c.y = c.z;
   1780 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1781 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1782 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1783 						break;
   1784 					case FORMAT_X8B8G8R8I_SNORM:
   1785 					case FORMAT_X8B8G8R8UI:
   1786 					case FORMAT_X8B8G8R8I:
   1787 					case FORMAT_X8B8G8R8:
   1788 					case FORMAT_X8L8V8U8:
   1789 					case FORMAT_SRGB8_X8:
   1790 						c.z = c.x;
   1791 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1792 						c.z = As<Short4>(UnpackHigh(c.z, c.y));
   1793 						c.y = c.x;
   1794 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1795 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1796 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1797 						break;
   1798 					default:
   1799 						ASSERT(false);
   1800 					}
   1801 				}
   1802 				break;
   1803 			case 2:
   1804 				c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0);
   1805 				c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1);
   1806 				c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2);
   1807 				c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3);
   1808 
   1809 				switch(state.textureFormat)
   1810 				{
   1811 				case FORMAT_G8R8:
   1812 				case FORMAT_G8R8I:
   1813 				case FORMAT_G8R8UI:
   1814 				case FORMAT_G8R8I_SNORM:
   1815 				case FORMAT_V8U8:
   1816 				case FORMAT_A8L8:
   1817 					c.y = (c.x & Short4(0xFF00u, 0xFF00u, 0xFF00u, 0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8);
   1818 					c.x = (c.x & Short4(0x00FFu, 0x00FFu, 0x00FFu, 0x00FFu)) | (c.x << 8);
   1819 					break;
   1820 				default:
   1821 					ASSERT(false);
   1822 				}
   1823 				break;
   1824 			case 1:
   1825 				{
   1826 					Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
   1827 					Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
   1828 					Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
   1829 					Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
   1830 					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   1831 					c.x = Unpack(As<Byte4>(c0));
   1832 				}
   1833 				break;
   1834 			default:
   1835 				ASSERT(false);
   1836 			}
   1837 		}
   1838 		else if(has16bitTextureComponents())
   1839 		{
   1840 			switch(textureComponentCount())
   1841 			{
   1842 			case 4:
   1843 				c.x = *Pointer<Short4>(buffer[f0] + 8 * index[0]);
   1844 				c.y = *Pointer<Short4>(buffer[f1] + 8 * index[1]);
   1845 				c.z = *Pointer<Short4>(buffer[f2] + 8 * index[2]);
   1846 				c.w = *Pointer<Short4>(buffer[f3] + 8 * index[3]);
   1847 				transpose4x4(c.x, c.y, c.z, c.w);
   1848 				break;
   1849 			case 2:
   1850 				c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
   1851 				c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
   1852 				c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
   1853 				c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
   1854 				c.y = c.x;
   1855 				c.x = As<Short4>(UnpackLow(As<Int2>(c.x), As<Int2>(c.z)));
   1856 				c.y = As<Short4>(UnpackHigh(As<Int2>(c.y), As<Int2>(c.z)));
   1857 				break;
   1858 			case 1:
   1859 				c.x = Insert(c.x, *Pointer<Short>(buffer[f0] + 2 * index[0]), 0);
   1860 				c.x = Insert(c.x, *Pointer<Short>(buffer[f1] + 2 * index[1]), 1);
   1861 				c.x = Insert(c.x, *Pointer<Short>(buffer[f2] + 2 * index[2]), 2);
   1862 				c.x = Insert(c.x, *Pointer<Short>(buffer[f3] + 2 * index[3]), 3);
   1863 				break;
   1864 			default:
   1865 				ASSERT(false);
   1866 			}
   1867 		}
   1868 		else if(hasYuvFormat())
   1869 		{
   1870 			// Generic YPbPr to RGB transformation
   1871 			// R = Y                               +           2 * (1 - Kr) * Pr
   1872 			// G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr
   1873 			// B = Y +           2 * (1 - Kb) * Pb
   1874 
   1875 			float Kb = 0.114f;
   1876 			float Kr = 0.299f;
   1877 			int studioSwing = 1;
   1878 
   1879 			switch(state.textureFormat)
   1880 			{
   1881 			case FORMAT_YV12_BT601:
   1882 				Kb = 0.114f;
   1883 				Kr = 0.299f;
   1884 				studioSwing = 1;
   1885 				break;
   1886 			case FORMAT_YV12_BT709:
   1887 				Kb = 0.0722f;
   1888 				Kr = 0.2126f;
   1889 				studioSwing = 1;
   1890 				break;
   1891 			case FORMAT_YV12_JFIF:
   1892 				Kb = 0.114f;
   1893 				Kr = 0.299f;
   1894 				studioSwing = 0;
   1895 				break;
   1896 			default:
   1897 				ASSERT(false);
   1898 			}
   1899 
   1900 			const float Kg = 1.0f - Kr - Kb;
   1901 
   1902 			const float Rr = 2 * (1 - Kr);
   1903 			const float Gb = -2 * Kb * (1 - Kb) / Kg;
   1904 			const float Gr = -2 * Kr * (1 - Kr) / Kg;
   1905 			const float Bb = 2 * (1 - Kb);
   1906 
   1907 			// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
   1908 			const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f;
   1909 			const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f;
   1910 			const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f;
   1911 
   1912 			const float Rv = Vv *  Rr;
   1913 			const float Gu = Uu *  Gb;
   1914 			const float Gv = Vv *  Gr;
   1915 			const float Bu = Uu *  Bb;
   1916 
   1917 			const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255;
   1918 			const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
   1919 			const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
   1920 
   1921 			Int c0 = Int(*Pointer<Byte>(buffer[0] + index[0]));
   1922 			Int c1 = Int(*Pointer<Byte>(buffer[0] + index[1]));
   1923 			Int c2 = Int(*Pointer<Byte>(buffer[0] + index[2]));
   1924 			Int c3 = Int(*Pointer<Byte>(buffer[0] + index[3]));
   1925 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   1926 			UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
   1927 
   1928 			computeIndices(index, uuuu, vvvv, wwww, mipmap + sizeof(Mipmap));
   1929 			c0 = Int(*Pointer<Byte>(buffer[1] + index[0]));
   1930 			c1 = Int(*Pointer<Byte>(buffer[1] + index[1]));
   1931 			c2 = Int(*Pointer<Byte>(buffer[1] + index[2]));
   1932 			c3 = Int(*Pointer<Byte>(buffer[1] + index[3]));
   1933 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   1934 			UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
   1935 
   1936 			c0 = Int(*Pointer<Byte>(buffer[2] + index[0]));
   1937 			c1 = Int(*Pointer<Byte>(buffer[2] + index[1]));
   1938 			c2 = Int(*Pointer<Byte>(buffer[2] + index[2]));
   1939 			c3 = Int(*Pointer<Byte>(buffer[2] + index[3]));
   1940 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   1941 			UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
   1942 
   1943 			const UShort4 yY = UShort4(iround(Yy * 0x4000));
   1944 			const UShort4 rV = UShort4(iround(Rv * 0x4000));
   1945 			const UShort4 gU = UShort4(iround(-Gu * 0x4000));
   1946 			const UShort4 gV = UShort4(iround(-Gv * 0x4000));
   1947 			const UShort4 bU = UShort4(iround(Bu * 0x4000));
   1948 
   1949 			const UShort4 r0 = UShort4(iround(-R0 * 0x4000));
   1950 			const UShort4 g0 = UShort4(iround(G0 * 0x4000));
   1951 			const UShort4 b0 = UShort4(iround(-B0 * 0x4000));
   1952 
   1953 			UShort4 y = MulHigh(Y, yY);
   1954 			UShort4 r = SubSat(y + MulHigh(V, rV), r0);
   1955 			UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV));
   1956 			UShort4 b = SubSat(y + MulHigh(U, bU), b0);
   1957 
   1958 			c.x = Min(r, UShort4(0x3FFF)) << 2;
   1959 			c.y = Min(g, UShort4(0x3FFF)) << 2;
   1960 			c.z = Min(b, UShort4(0x3FFF)) << 2;
   1961 		}
   1962 		else ASSERT(false);
   1963 	}
   1964 
   1965 	void SamplerCore::sampleTexel(Vector4f &c, Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4])
   1966 	{
   1967 		Int index[4];
   1968 
   1969 		computeIndices(index, uuuu, vvvv, wwww, mipmap);
   1970 
   1971 		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
   1972 		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
   1973 		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
   1974 		int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
   1975 
   1976 		// Read texels
   1977 		switch(textureComponentCount())
   1978 		{
   1979 		case 4:
   1980 			c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
   1981 			c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
   1982 			c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
   1983 			c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
   1984 			transpose4x4(c.x, c.y, c.z, c.w);
   1985 			break;
   1986 		case 3:
   1987 			ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
   1988 			c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
   1989 			c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
   1990 			c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
   1991 			c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
   1992 			transpose4x3(c.x, c.y, c.z, c.w);
   1993 			c.w = Float4(1.0f);
   1994 			break;
   1995 		case 2:
   1996 			// FIXME: Optimal shuffling?
   1997 			c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
   1998 			c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
   1999 			c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
   2000 			c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
   2001 			c.y = c.x;
   2002 			c.x = Float4(c.x.xz, c.z.xz);
   2003 			c.y = Float4(c.y.yw, c.z.yw);
   2004 			break;
   2005 		case 1:
   2006 			// FIXME: Optimal shuffling?
   2007 			c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
   2008 			c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
   2009 			c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
   2010 			c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
   2011 
   2012 			if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER)
   2013 			{
   2014 				Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f));
   2015 
   2016 				c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f)));   // FIXME: Only less-equal?
   2017 			}
   2018 			break;
   2019 		default:
   2020 			ASSERT(false);
   2021 		}
   2022 	}
   2023 
   2024 	void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
   2025 	{
   2026 		if(state.mipmapFilter < MIPMAP_POINT)
   2027 		{
   2028 			mipmap = texture + OFFSET(Texture,mipmap[0]);
   2029 		}
   2030 		else
   2031 		{
   2032 			Int ilod;
   2033 
   2034 			if(state.mipmapFilter == MIPMAP_POINT)
   2035 			{
   2036 				ilod = RoundInt(lod);
   2037 			}
   2038 			else   // Linear
   2039 			{
   2040 				ilod = Int(lod);
   2041 			}
   2042 
   2043 			mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
   2044 		}
   2045 
   2046 		if(state.textureType != TEXTURE_CUBE)
   2047 		{
   2048 			buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0]));
   2049 
   2050 			if(hasYuvFormat())
   2051 			{
   2052 				buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1]));
   2053 				buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2]));
   2054 			}
   2055 		}
   2056 		else
   2057 		{
   2058 			for(int i = 0; i < 4; i++)
   2059 			{
   2060 				buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
   2061 			}
   2062 		}
   2063 	}
   2064 
   2065 	Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap)
   2066 	{
   2067 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
   2068 		{
   2069 			return Short4();   // Unused
   2070 		}
   2071 		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY)
   2072 		{
   2073 			return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1));
   2074 		}
   2075 		else if(addressingMode == ADDRESSING_CLAMP)
   2076 		{
   2077 			Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f));
   2078 
   2079 			return Short4(Int4(clamp * Float4(1 << 16)));
   2080 		}
   2081 		else if(addressingMode == ADDRESSING_MIRROR)
   2082 		{
   2083 			Int4 convert = Int4(uw * Float4(1 << 16));
   2084 			Int4 mirror = (convert << 15) >> 31;
   2085 
   2086 			convert ^= mirror;
   2087 
   2088 			return Short4(convert);
   2089 		}
   2090 		else if(addressingMode == ADDRESSING_MIRRORONCE)
   2091 		{
   2092 			// Absolute value
   2093 			Int4 convert = Int4(Abs(uw * Float4(1 << 16)));
   2094 
   2095 			// Clamp
   2096 			convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
   2097 			convert = As<Int4>(Pack(convert, convert));
   2098 
   2099 			return As<Short4>(Int2(convert)) + Short4((short)0x8000, (short)0x8000, (short)0x8000, (short)0x8000);
   2100 		}
   2101 		else   // Wrap (or border)
   2102 		{
   2103 			return Short4(Int4(uw * Float4(1 << 16)));
   2104 		}
   2105 	}
   2106 
   2107 	void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf)
   2108 	{
   2109 		cs = RoundShort4(cf * Float4(0x1000));
   2110 	}
   2111 
   2112 	void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf)
   2113 	{
   2114 		convertFixed12(cs.x, cf.x);
   2115 		convertFixed12(cs.y, cf.y);
   2116 		convertFixed12(cs.z, cf.z);
   2117 		convertFixed12(cs.w, cf.w);
   2118 	}
   2119 
   2120 	void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs)
   2121 	{
   2122 		cf = Float4(cs) * Float4(1.0f / 0x0FFE);
   2123 	}
   2124 
   2125 //	void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs)
   2126 //	{
   2127 //		convertSigned12(cf.x, cs.x);
   2128 //		convertSigned12(cf.y, cs.y);
   2129 //		convertSigned12(cf.z, cs.z);
   2130 //		convertSigned12(cf.w, cs.w);
   2131 //	}
   2132 
   2133 	void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs)
   2134 	{
   2135 		cf = Float4(cs) * Float4(1.0f / 0x7FFF);
   2136 	}
   2137 
   2138 	void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs)
   2139 	{
   2140 		cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
   2141 	}
   2142 
   2143 	void SamplerCore::sRGBtoLinear16_8_12(Short4 &c)
   2144 	{
   2145 		c = As<UShort4>(c) >> 8;
   2146 
   2147 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_12));
   2148 
   2149 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2150 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2151 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2152 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2153 	}
   2154 
   2155 	void SamplerCore::sRGBtoLinear16_6_12(Short4 &c)
   2156 	{
   2157 		c = As<UShort4>(c) >> 10;
   2158 
   2159 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_12));
   2160 
   2161 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2162 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2163 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2164 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2165 	}
   2166 
   2167 	void SamplerCore::sRGBtoLinear16_5_12(Short4 &c)
   2168 	{
   2169 		c = As<UShort4>(c) >> 11;
   2170 
   2171 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_12));
   2172 
   2173 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2174 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2175 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2176 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2177 	}
   2178 
   2179 	bool SamplerCore::hasFloatTexture() const
   2180 	{
   2181 		return Surface::isFloatFormat(state.textureFormat);
   2182 	}
   2183 
   2184 	bool SamplerCore::hasUnsignedTextureComponent(int component) const
   2185 	{
   2186 		return Surface::isUnsignedComponent(state.textureFormat, component);
   2187 	}
   2188 
   2189 	int SamplerCore::textureComponentCount() const
   2190 	{
   2191 		return Surface::componentCount(state.textureFormat);
   2192 	}
   2193 
   2194 	bool SamplerCore::has16bitTextureFormat() const
   2195 	{
   2196 		switch(state.textureFormat)
   2197 		{
   2198 		case FORMAT_R5G6B5:
   2199 			return true;
   2200 		case FORMAT_R8I_SNORM:
   2201 		case FORMAT_G8R8I_SNORM:
   2202 		case FORMAT_X8B8G8R8I_SNORM:
   2203 		case FORMAT_A8B8G8R8I_SNORM:
   2204 		case FORMAT_R8I:
   2205 		case FORMAT_R8UI:
   2206 		case FORMAT_G8R8I:
   2207 		case FORMAT_G8R8UI:
   2208 		case FORMAT_X8B8G8R8I:
   2209 		case FORMAT_X8B8G8R8UI:
   2210 		case FORMAT_A8B8G8R8I:
   2211 		case FORMAT_A8B8G8R8UI:
   2212 		case FORMAT_R32I:
   2213 		case FORMAT_R32UI:
   2214 		case FORMAT_G32R32I:
   2215 		case FORMAT_G32R32UI:
   2216 		case FORMAT_X32B32G32R32I:
   2217 		case FORMAT_X32B32G32R32UI:
   2218 		case FORMAT_A32B32G32R32I:
   2219 		case FORMAT_A32B32G32R32UI:
   2220 		case FORMAT_G8R8:
   2221 		case FORMAT_X8R8G8B8:
   2222 		case FORMAT_X8B8G8R8:
   2223 		case FORMAT_A8R8G8B8:
   2224 		case FORMAT_A8B8G8R8:
   2225 		case FORMAT_SRGB8_X8:
   2226 		case FORMAT_SRGB8_A8:
   2227 		case FORMAT_V8U8:
   2228 		case FORMAT_Q8W8V8U8:
   2229 		case FORMAT_X8L8V8U8:
   2230 		case FORMAT_R32F:
   2231 		case FORMAT_G32R32F:
   2232 		case FORMAT_X32B32G32R32F:
   2233 		case FORMAT_A32B32G32R32F:
   2234 		case FORMAT_A8:
   2235 		case FORMAT_R8:
   2236 		case FORMAT_L8:
   2237 		case FORMAT_A8L8:
   2238 		case FORMAT_D32F:
   2239 		case FORMAT_D32F_LOCKABLE:
   2240 		case FORMAT_D32FS8_TEXTURE:
   2241 		case FORMAT_D32FS8_SHADOW:
   2242 		case FORMAT_L16:
   2243 		case FORMAT_G16R16:
   2244 		case FORMAT_A16B16G16R16:
   2245 		case FORMAT_V16U16:
   2246 		case FORMAT_A16W16V16U16:
   2247 		case FORMAT_Q16W16V16U16:
   2248 		case FORMAT_R16I:
   2249 		case FORMAT_R16UI:
   2250 		case FORMAT_G16R16I:
   2251 		case FORMAT_G16R16UI:
   2252 		case FORMAT_X16B16G16R16I:
   2253 		case FORMAT_X16B16G16R16UI:
   2254 		case FORMAT_A16B16G16R16I:
   2255 		case FORMAT_A16B16G16R16UI:
   2256 		case FORMAT_YV12_BT601:
   2257 		case FORMAT_YV12_BT709:
   2258 		case FORMAT_YV12_JFIF:
   2259 			return false;
   2260 		default:
   2261 			ASSERT(false);
   2262 		}
   2263 
   2264 		return false;
   2265 	}
   2266 
   2267 	bool SamplerCore::has8bitTextureComponents() const
   2268 	{
   2269 		switch(state.textureFormat)
   2270 		{
   2271 		case FORMAT_G8R8:
   2272 		case FORMAT_X8R8G8B8:
   2273 		case FORMAT_X8B8G8R8:
   2274 		case FORMAT_A8R8G8B8:
   2275 		case FORMAT_A8B8G8R8:
   2276 		case FORMAT_SRGB8_X8:
   2277 		case FORMAT_SRGB8_A8:
   2278 		case FORMAT_V8U8:
   2279 		case FORMAT_Q8W8V8U8:
   2280 		case FORMAT_X8L8V8U8:
   2281 		case FORMAT_A8:
   2282 		case FORMAT_R8:
   2283 		case FORMAT_L8:
   2284 		case FORMAT_A8L8:
   2285 		case FORMAT_R8I_SNORM:
   2286 		case FORMAT_G8R8I_SNORM:
   2287 		case FORMAT_X8B8G8R8I_SNORM:
   2288 		case FORMAT_A8B8G8R8I_SNORM:
   2289 		case FORMAT_R8I:
   2290 		case FORMAT_R8UI:
   2291 		case FORMAT_G8R8I:
   2292 		case FORMAT_G8R8UI:
   2293 		case FORMAT_X8B8G8R8I:
   2294 		case FORMAT_X8B8G8R8UI:
   2295 		case FORMAT_A8B8G8R8I:
   2296 		case FORMAT_A8B8G8R8UI:
   2297 			return true;
   2298 		case FORMAT_R5G6B5:
   2299 		case FORMAT_R32F:
   2300 		case FORMAT_G32R32F:
   2301 		case FORMAT_X32B32G32R32F:
   2302 		case FORMAT_A32B32G32R32F:
   2303 		case FORMAT_D32F:
   2304 		case FORMAT_D32F_LOCKABLE:
   2305 		case FORMAT_D32FS8_TEXTURE:
   2306 		case FORMAT_D32FS8_SHADOW:
   2307 		case FORMAT_L16:
   2308 		case FORMAT_G16R16:
   2309 		case FORMAT_A16B16G16R16:
   2310 		case FORMAT_V16U16:
   2311 		case FORMAT_A16W16V16U16:
   2312 		case FORMAT_Q16W16V16U16:
   2313 		case FORMAT_R32I:
   2314 		case FORMAT_R32UI:
   2315 		case FORMAT_G32R32I:
   2316 		case FORMAT_G32R32UI:
   2317 		case FORMAT_X32B32G32R32I:
   2318 		case FORMAT_X32B32G32R32UI:
   2319 		case FORMAT_A32B32G32R32I:
   2320 		case FORMAT_A32B32G32R32UI:
   2321 		case FORMAT_R16I:
   2322 		case FORMAT_R16UI:
   2323 		case FORMAT_G16R16I:
   2324 		case FORMAT_G16R16UI:
   2325 		case FORMAT_X16B16G16R16I:
   2326 		case FORMAT_X16B16G16R16UI:
   2327 		case FORMAT_A16B16G16R16I:
   2328 		case FORMAT_A16B16G16R16UI:
   2329 		case FORMAT_YV12_BT601:
   2330 		case FORMAT_YV12_BT709:
   2331 		case FORMAT_YV12_JFIF:
   2332 			return false;
   2333 		default:
   2334 			ASSERT(false);
   2335 		}
   2336 
   2337 		return false;
   2338 	}
   2339 
   2340 	bool SamplerCore::has16bitTextureComponents() const
   2341 	{
   2342 		switch(state.textureFormat)
   2343 		{
   2344 		case FORMAT_R5G6B5:
   2345 		case FORMAT_R8I_SNORM:
   2346 		case FORMAT_G8R8I_SNORM:
   2347 		case FORMAT_X8B8G8R8I_SNORM:
   2348 		case FORMAT_A8B8G8R8I_SNORM:
   2349 		case FORMAT_R8I:
   2350 		case FORMAT_R8UI:
   2351 		case FORMAT_G8R8I:
   2352 		case FORMAT_G8R8UI:
   2353 		case FORMAT_X8B8G8R8I:
   2354 		case FORMAT_X8B8G8R8UI:
   2355 		case FORMAT_A8B8G8R8I:
   2356 		case FORMAT_A8B8G8R8UI:
   2357 		case FORMAT_R32I:
   2358 		case FORMAT_R32UI:
   2359 		case FORMAT_G32R32I:
   2360 		case FORMAT_G32R32UI:
   2361 		case FORMAT_X32B32G32R32I:
   2362 		case FORMAT_X32B32G32R32UI:
   2363 		case FORMAT_A32B32G32R32I:
   2364 		case FORMAT_A32B32G32R32UI:
   2365 		case FORMAT_G8R8:
   2366 		case FORMAT_X8R8G8B8:
   2367 		case FORMAT_X8B8G8R8:
   2368 		case FORMAT_A8R8G8B8:
   2369 		case FORMAT_A8B8G8R8:
   2370 		case FORMAT_SRGB8_X8:
   2371 		case FORMAT_SRGB8_A8:
   2372 		case FORMAT_V8U8:
   2373 		case FORMAT_Q8W8V8U8:
   2374 		case FORMAT_X8L8V8U8:
   2375 		case FORMAT_R32F:
   2376 		case FORMAT_G32R32F:
   2377 		case FORMAT_X32B32G32R32F:
   2378 		case FORMAT_A32B32G32R32F:
   2379 		case FORMAT_A8:
   2380 		case FORMAT_R8:
   2381 		case FORMAT_L8:
   2382 		case FORMAT_A8L8:
   2383 		case FORMAT_D32F:
   2384 		case FORMAT_D32F_LOCKABLE:
   2385 		case FORMAT_D32FS8_TEXTURE:
   2386 		case FORMAT_D32FS8_SHADOW:
   2387 		case FORMAT_YV12_BT601:
   2388 		case FORMAT_YV12_BT709:
   2389 		case FORMAT_YV12_JFIF:
   2390 			return false;
   2391 		case FORMAT_L16:
   2392 		case FORMAT_G16R16:
   2393 		case FORMAT_A16B16G16R16:
   2394 		case FORMAT_R16I:
   2395 		case FORMAT_R16UI:
   2396 		case FORMAT_G16R16I:
   2397 		case FORMAT_G16R16UI:
   2398 		case FORMAT_X16B16G16R16I:
   2399 		case FORMAT_X16B16G16R16UI:
   2400 		case FORMAT_A16B16G16R16I:
   2401 		case FORMAT_A16B16G16R16UI:
   2402 		case FORMAT_V16U16:
   2403 		case FORMAT_A16W16V16U16:
   2404 		case FORMAT_Q16W16V16U16:
   2405 			return true;
   2406 		default:
   2407 			ASSERT(false);
   2408 		}
   2409 
   2410 		return false;
   2411 	}
   2412 
   2413 	bool SamplerCore::hasYuvFormat() const
   2414 	{
   2415 		switch(state.textureFormat)
   2416 		{
   2417 		case FORMAT_YV12_BT601:
   2418 		case FORMAT_YV12_BT709:
   2419 		case FORMAT_YV12_JFIF:
   2420 			return true;
   2421 		case FORMAT_R5G6B5:
   2422 		case FORMAT_R8I_SNORM:
   2423 		case FORMAT_G8R8I_SNORM:
   2424 		case FORMAT_X8B8G8R8I_SNORM:
   2425 		case FORMAT_A8B8G8R8I_SNORM:
   2426 		case FORMAT_R8I:
   2427 		case FORMAT_R8UI:
   2428 		case FORMAT_G8R8I:
   2429 		case FORMAT_G8R8UI:
   2430 		case FORMAT_X8B8G8R8I:
   2431 		case FORMAT_X8B8G8R8UI:
   2432 		case FORMAT_A8B8G8R8I:
   2433 		case FORMAT_A8B8G8R8UI:
   2434 		case FORMAT_R32I:
   2435 		case FORMAT_R32UI:
   2436 		case FORMAT_G32R32I:
   2437 		case FORMAT_G32R32UI:
   2438 		case FORMAT_X32B32G32R32I:
   2439 		case FORMAT_X32B32G32R32UI:
   2440 		case FORMAT_A32B32G32R32I:
   2441 		case FORMAT_A32B32G32R32UI:
   2442 		case FORMAT_G8R8:
   2443 		case FORMAT_X8R8G8B8:
   2444 		case FORMAT_X8B8G8R8:
   2445 		case FORMAT_A8R8G8B8:
   2446 		case FORMAT_A8B8G8R8:
   2447 		case FORMAT_SRGB8_X8:
   2448 		case FORMAT_SRGB8_A8:
   2449 		case FORMAT_V8U8:
   2450 		case FORMAT_Q8W8V8U8:
   2451 		case FORMAT_X8L8V8U8:
   2452 		case FORMAT_R32F:
   2453 		case FORMAT_G32R32F:
   2454 		case FORMAT_X32B32G32R32F:
   2455 		case FORMAT_A32B32G32R32F:
   2456 		case FORMAT_A8:
   2457 		case FORMAT_R8:
   2458 		case FORMAT_L8:
   2459 		case FORMAT_A8L8:
   2460 		case FORMAT_D32F:
   2461 		case FORMAT_D32F_LOCKABLE:
   2462 		case FORMAT_D32FS8_TEXTURE:
   2463 		case FORMAT_D32FS8_SHADOW:
   2464 		case FORMAT_L16:
   2465 		case FORMAT_G16R16:
   2466 		case FORMAT_A16B16G16R16:
   2467 		case FORMAT_R16I:
   2468 		case FORMAT_R16UI:
   2469 		case FORMAT_G16R16I:
   2470 		case FORMAT_G16R16UI:
   2471 		case FORMAT_X16B16G16R16I:
   2472 		case FORMAT_X16B16G16R16UI:
   2473 		case FORMAT_A16B16G16R16I:
   2474 		case FORMAT_A16B16G16R16UI:
   2475 		case FORMAT_V16U16:
   2476 		case FORMAT_A16W16V16U16:
   2477 		case FORMAT_Q16W16V16U16:
   2478 			return false;
   2479 		default:
   2480 			ASSERT(false);
   2481 		}
   2482 
   2483 		return false;
   2484 	}
   2485 
   2486 	bool SamplerCore::isRGBComponent(int component) const
   2487 	{
   2488 		switch(state.textureFormat)
   2489 		{
   2490 		case FORMAT_R5G6B5:         return component < 3;
   2491 		case FORMAT_R8I_SNORM:      return component < 1;
   2492 		case FORMAT_G8R8I_SNORM:    return component < 2;
   2493 		case FORMAT_X8B8G8R8I_SNORM: return component < 3;
   2494 		case FORMAT_A8B8G8R8I_SNORM: return component < 3;
   2495 		case FORMAT_R8I:            return component < 1;
   2496 		case FORMAT_R8UI:           return component < 1;
   2497 		case FORMAT_G8R8I:          return component < 2;
   2498 		case FORMAT_G8R8UI:         return component < 2;
   2499 		case FORMAT_X8B8G8R8I:      return component < 3;
   2500 		case FORMAT_X8B8G8R8UI:     return component < 3;
   2501 		case FORMAT_A8B8G8R8I:      return component < 3;
   2502 		case FORMAT_A8B8G8R8UI:     return component < 3;
   2503 		case FORMAT_R32I:           return component < 1;
   2504 		case FORMAT_R32UI:          return component < 1;
   2505 		case FORMAT_G32R32I:        return component < 2;
   2506 		case FORMAT_G32R32UI:       return component < 2;
   2507 		case FORMAT_X32B32G32R32I:  return component < 3;
   2508 		case FORMAT_X32B32G32R32UI: return component < 3;
   2509 		case FORMAT_A32B32G32R32I:  return component < 3;
   2510 		case FORMAT_A32B32G32R32UI: return component < 3;
   2511 		case FORMAT_G8R8:           return component < 2;
   2512 		case FORMAT_X8R8G8B8:       return component < 3;
   2513 		case FORMAT_X8B8G8R8:       return component < 3;
   2514 		case FORMAT_A8R8G8B8:       return component < 3;
   2515 		case FORMAT_A8B8G8R8:       return component < 3;
   2516 		case FORMAT_SRGB8_X8:       return component < 3;
   2517 		case FORMAT_SRGB8_A8:       return component < 3;
   2518 		case FORMAT_V8U8:           return false;
   2519 		case FORMAT_Q8W8V8U8:       return false;
   2520 		case FORMAT_X8L8V8U8:       return false;
   2521 		case FORMAT_R32F:           return component < 1;
   2522 		case FORMAT_G32R32F:        return component < 2;
   2523 		case FORMAT_X32B32G32R32F:  return component < 3;
   2524 		case FORMAT_A32B32G32R32F:  return component < 3;
   2525 		case FORMAT_A8:             return false;
   2526 		case FORMAT_R8:             return component < 1;
   2527 		case FORMAT_L8:             return component < 1;
   2528 		case FORMAT_A8L8:           return component < 1;
   2529 		case FORMAT_D32F:           return false;
   2530 		case FORMAT_D32F_LOCKABLE:  return false;
   2531 		case FORMAT_D32FS8_TEXTURE: return false;
   2532 		case FORMAT_D32FS8_SHADOW:  return false;
   2533 		case FORMAT_L16:            return component < 1;
   2534 		case FORMAT_G16R16:         return component < 2;
   2535 		case FORMAT_A16B16G16R16:   return component < 3;
   2536 		case FORMAT_R16I:           return component < 1;
   2537 		case FORMAT_R16UI:          return component < 1;
   2538 		case FORMAT_G16R16I:        return component < 2;
   2539 		case FORMAT_G16R16UI:       return component < 2;
   2540 		case FORMAT_X16B16G16R16I:  return component < 3;
   2541 		case FORMAT_X16B16G16R16UI: return component < 3;
   2542 		case FORMAT_A16B16G16R16I:  return component < 3;
   2543 		case FORMAT_A16B16G16R16UI: return component < 3;
   2544 		case FORMAT_V16U16:         return false;
   2545 		case FORMAT_A16W16V16U16:   return false;
   2546 		case FORMAT_Q16W16V16U16:   return false;
   2547 		case FORMAT_YV12_BT601:     return component < 3;
   2548 		case FORMAT_YV12_BT709:     return component < 3;
   2549 		case FORMAT_YV12_JFIF:      return component < 3;
   2550 		default:
   2551 			ASSERT(false);
   2552 		}
   2553 
   2554 		return false;
   2555 	}
   2556 }
   2557