Home | History | Annotate | Download | only in Shader
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "SamplerCore.hpp"
     16 
     17 #include "Constants.hpp"
     18 #include "Debug.hpp"
     19 
     20 namespace
     21 {
     22 	void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c)
     23 	{
     24 		switch(swizzle)
     25 		{
     26 		case sw::SWIZZLE_RED:	s = c.x; break;
     27 		case sw::SWIZZLE_GREEN: s = c.y; break;
     28 		case sw::SWIZZLE_BLUE:  s = c.z; break;
     29 		case sw::SWIZZLE_ALPHA: s = c.w; break;
     30 		case sw::SWIZZLE_ZERO:  s = sw::Short4(0x0000); break;
     31 		case sw::SWIZZLE_ONE:   s = sw::Short4(0x1000); break;
     32 		default: ASSERT(false);
     33 		}
     34 	}
     35 
     36 	void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c)
     37 	{
     38 		switch(swizzle)
     39 		{
     40 		case sw::SWIZZLE_RED:	f = c.x; break;
     41 		case sw::SWIZZLE_GREEN: f = c.y; break;
     42 		case sw::SWIZZLE_BLUE:  f = c.z; break;
     43 		case sw::SWIZZLE_ALPHA: f = c.w; break;
     44 		case sw::SWIZZLE_ZERO:  f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
     45 		case sw::SWIZZLE_ONE:   f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break;
     46 		default: ASSERT(false);
     47 		}
     48 	}
     49 }
     50 
     51 namespace sw
     52 {
     53 	extern bool colorsDefaultToZero;
     54 
     55 	SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state)
     56 	{
     57 	}
     58 
     59 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy)
     60 	{
     61 		sampleTexture(texture, c, u, v, w, q, dsx, dsy, dsx, Implicit, true);
     62 	}
     63 
     64 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12)
     65 	{
     66 		#if PERF_PROFILE
     67 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
     68 
     69 			if(state.compressedFormat)
     70 			{
     71 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
     72 			}
     73 		#endif
     74 
     75 		Float4 uuuu = u;
     76 		Float4 vvvv = v;
     77 		Float4 wwww = w;
     78 
     79 		if(state.textureType == TEXTURE_NULL)
     80 		{
     81 			c.x = Short4(0x0000);
     82 			c.y = Short4(0x0000);
     83 			c.z = Short4(0x0000);
     84 
     85 			if(fixed12)   // FIXME: Convert to fixed12 at higher level, when required
     86 			{
     87 				c.w = Short4(0x1000);
     88 			}
     89 			else
     90 			{
     91 				c.w = Short4(0xFFFFu);   // FIXME
     92 			}
     93 		}
     94 		else
     95 		{
     96 			Int face[4];
     97 			Float4 lodX;
     98 			Float4 lodY;
     99 			Float4 lodZ;
    100 
    101 			if(state.textureType == TEXTURE_CUBE)
    102 			{
    103 				cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w);
    104 			}
    105 
    106 			Float lod;
    107 			Float anisotropy;
    108 			Float4 uDelta;
    109 			Float4 vDelta;
    110 			Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x;
    111 
    112 			if(state.textureType != TEXTURE_3D)
    113 			{
    114 				if(state.textureType != TEXTURE_CUBE)
    115 				{
    116 					computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function);
    117 				}
    118 				else
    119 				{
    120 					computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function);
    121 				}
    122 			}
    123 			else
    124 			{
    125 				computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function);
    126 			}
    127 
    128 			if(!hasFloatTexture())
    129 			{
    130 				sampleFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
    131 			}
    132 			else
    133 			{
    134 				Vector4f cf;
    135 
    136 				sampleFloatFilter(texture, cf, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
    137 
    138 				convertFixed12(c, cf);
    139 			}
    140 
    141 			if(fixed12 && !hasFloatTexture())
    142 			{
    143 				if(has16bitTextureFormat())
    144 				{
    145 					switch(state.textureFormat)
    146 					{
    147 					case FORMAT_R5G6B5:
    148 						if(state.sRGB)
    149 						{
    150 							sRGBtoLinear16_5_12(c.x);
    151 							sRGBtoLinear16_6_12(c.y);
    152 							sRGBtoLinear16_5_12(c.z);
    153 						}
    154 						else
    155 						{
    156 							c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800));
    157 							c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00));
    158 							c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800));
    159 						}
    160 						break;
    161 					default:
    162 						ASSERT(false);
    163 					}
    164 				}
    165 				else
    166 				{
    167 					for(int component = 0; component < textureComponentCount(); component++)
    168 					{
    169 						if(state.sRGB && isRGBComponent(component))
    170 						{
    171 							sRGBtoLinear16_8_12(c[component]);   // FIXME: Perform linearization at surface level for read-only textures
    172 						}
    173 						else
    174 						{
    175 							if(hasUnsignedTextureComponent(component))
    176 							{
    177 								c[component] = As<UShort4>(c[component]) >> 4;
    178 							}
    179 							else
    180 							{
    181 								c[component] = c[component] >> 3;
    182 							}
    183 						}
    184 					}
    185 				}
    186 			}
    187 
    188 			if(fixed12 && state.textureFilter != FILTER_GATHER)
    189 			{
    190 				int componentCount = textureComponentCount();
    191 				short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000;
    192 
    193 				switch(state.textureFormat)
    194 				{
    195 				case FORMAT_R8I_SNORM:
    196 				case FORMAT_G8R8I_SNORM:
    197 				case FORMAT_X8B8G8R8I_SNORM:
    198 				case FORMAT_A8B8G8R8I_SNORM:
    199 				case FORMAT_R8:
    200 				case FORMAT_R5G6B5:
    201 				case FORMAT_G8R8:
    202 				case FORMAT_R8I:
    203 				case FORMAT_R8UI:
    204 				case FORMAT_G8R8I:
    205 				case FORMAT_G8R8UI:
    206 				case FORMAT_X8B8G8R8I:
    207 				case FORMAT_X8B8G8R8UI:
    208 				case FORMAT_A8B8G8R8I:
    209 				case FORMAT_A8B8G8R8UI:
    210 				case FORMAT_R16I:
    211 				case FORMAT_R16UI:
    212 				case FORMAT_G16R16:
    213 				case FORMAT_G16R16I:
    214 				case FORMAT_G16R16UI:
    215 				case FORMAT_X16B16G16R16I:
    216 				case FORMAT_X16B16G16R16UI:
    217 				case FORMAT_A16B16G16R16:
    218 				case FORMAT_A16B16G16R16I:
    219 				case FORMAT_A16B16G16R16UI:
    220 				case FORMAT_R32I:
    221 				case FORMAT_R32UI:
    222 				case FORMAT_G32R32I:
    223 				case FORMAT_G32R32UI:
    224 				case FORMAT_X32B32G32R32I:
    225 				case FORMAT_X32B32G32R32UI:
    226 				case FORMAT_A32B32G32R32I:
    227 				case FORMAT_A32B32G32R32UI:
    228 				case FORMAT_X8R8G8B8:
    229 				case FORMAT_X8B8G8R8:
    230 				case FORMAT_A8R8G8B8:
    231 				case FORMAT_A8B8G8R8:
    232 				case FORMAT_SRGB8_X8:
    233 				case FORMAT_SRGB8_A8:
    234 				case FORMAT_V8U8:
    235 				case FORMAT_Q8W8V8U8:
    236 				case FORMAT_X8L8V8U8:
    237 				case FORMAT_V16U16:
    238 				case FORMAT_A16W16V16U16:
    239 				case FORMAT_Q16W16V16U16:
    240 				case FORMAT_YV12_BT601:
    241 				case FORMAT_YV12_BT709:
    242 				case FORMAT_YV12_JFIF:
    243 					if(componentCount < 2) c.y = Short4(defaultColorValue);
    244 					if(componentCount < 3) c.z = Short4(defaultColorValue);
    245 					if(componentCount < 4) c.w = Short4(0x1000);
    246 					break;
    247 				case FORMAT_A8:
    248 					c.w = c.x;
    249 					c.x = Short4(0x0000);
    250 					c.y = Short4(0x0000);
    251 					c.z = Short4(0x0000);
    252 					break;
    253 				case FORMAT_L8:
    254 				case FORMAT_L16:
    255 					c.y = c.x;
    256 					c.z = c.x;
    257 					c.w = Short4(0x1000);
    258 					break;
    259 				case FORMAT_A8L8:
    260 					c.w = c.y;
    261 					c.y = c.x;
    262 					c.z = c.x;
    263 					break;
    264 				case FORMAT_R32F:
    265 					c.y = Short4(defaultColorValue);
    266 				case FORMAT_G32R32F:
    267 					c.z = Short4(defaultColorValue);
    268 				case FORMAT_X32B32G32R32F:
    269 					c.w = Short4(0x1000);
    270 				case FORMAT_A32B32G32R32F:
    271 					break;
    272 				case FORMAT_D32F:
    273 				case FORMAT_D32F_LOCKABLE:
    274 				case FORMAT_D32FS8_TEXTURE:
    275 				case FORMAT_D32FS8_SHADOW:
    276 					c.y = c.x;
    277 					c.z = c.x;
    278 					c.w = c.x;
    279 					break;
    280 				default:
    281 					ASSERT(false);
    282 				}
    283 			}
    284 		}
    285 
    286 		if(fixed12 &&
    287 		   ((state.swizzleR != SWIZZLE_RED) ||
    288 		    (state.swizzleG != SWIZZLE_GREEN) ||
    289 		    (state.swizzleB != SWIZZLE_BLUE) ||
    290 		    (state.swizzleA != SWIZZLE_ALPHA)))
    291 		{
    292 			const Vector4s col(c);
    293 			applySwizzle(state.swizzleR, c.x, col);
    294 			applySwizzle(state.swizzleG, c.y, col);
    295 			applySwizzle(state.swizzleB, c.z, col);
    296 			applySwizzle(state.swizzleA, c.w, col);
    297 		}
    298 	}
    299 
    300 	void SamplerCore::sampleTexture(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
    301 	{
    302 		#if PERF_PROFILE
    303 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
    304 
    305 			if(state.compressedFormat)
    306 			{
    307 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
    308 			}
    309 		#endif
    310 
    311 		if(state.textureType == TEXTURE_NULL)
    312 		{
    313 			c.x = Float4(0.0f);
    314 			c.y = Float4(0.0f);
    315 			c.z = Float4(0.0f);
    316 			c.w = Float4(1.0f);
    317 		}
    318 		else
    319 		{
    320 			// FIXME: YUV and sRGB are not supported by the floating point path
    321 			bool forceFloatFiltering = state.highPrecisionFiltering && !state.sRGB && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
    322 			if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering)   // FIXME: Mostly identical to integer sampling
    323 			{
    324 				Float4 uuuu = u;
    325 				Float4 vvvv = v;
    326 				Float4 wwww = w;
    327 
    328 				Int face[4];
    329 				Float4 lodX;
    330 				Float4 lodY;
    331 				Float4 lodZ;
    332 
    333 				if(state.textureType == TEXTURE_CUBE)
    334 				{
    335 					cubeFace(face, uuuu, vvvv, lodX, lodY, lodZ, u, v, w);
    336 				}
    337 
    338 				Float lod;
    339 				Float anisotropy;
    340 				Float4 uDelta;
    341 				Float4 vDelta;
    342 				Float lodBias = (function == Fetch) ? Float4(As<Int4>(q)).x : q.x;
    343 
    344 				if(state.textureType != TEXTURE_3D)
    345 				{
    346 					if(state.textureType != TEXTURE_CUBE)
    347 					{
    348 						computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, lodBias, dsx, dsy, function);
    349 					}
    350 					else
    351 					{
    352 						computeLodCube(texture, lod, lodX, lodY, lodZ, lodBias, dsx, dsy, function);
    353 					}
    354 				}
    355 				else
    356 				{
    357 					computeLod3D(texture, lod, uuuu, vvvv, wwww, lodBias, dsx, dsy, function);
    358 				}
    359 
    360 				sampleFloatFilter(texture, c, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
    361 
    362 				if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture())
    363 				{
    364 					if(has16bitTextureFormat())
    365 					{
    366 						switch(state.textureFormat)
    367 						{
    368 						case FORMAT_R5G6B5:
    369 							c.x *= Float4(1.0f / 0xF800);
    370 							c.y *= Float4(1.0f / 0xFC00);
    371 							c.z *= Float4(1.0f / 0xF800);
    372 							break;
    373 						default:
    374 							ASSERT(false);
    375 						}
    376 					}
    377 					else
    378 					{
    379 						for(int component = 0; component < textureComponentCount(); component++)
    380 						{
    381 							c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
    382 						}
    383 					}
    384 				}
    385 			}
    386 			else
    387 			{
    388 				Vector4s cs;
    389 
    390 				sampleTexture(texture, cs, u, v, w, q, dsx, dsy, offset, function, false);
    391 
    392 				if(has16bitTextureFormat())
    393 				{
    394 					switch(state.textureFormat)
    395 					{
    396 					case FORMAT_R5G6B5:
    397 						if(state.sRGB)
    398 						{
    399 							sRGBtoLinear16_5_12(cs.x);
    400 							sRGBtoLinear16_6_12(cs.y);
    401 							sRGBtoLinear16_5_12(cs.z);
    402 
    403 							convertSigned12(c.x, cs.x);
    404 							convertSigned12(c.y, cs.y);
    405 							convertSigned12(c.z, cs.z);
    406 						}
    407 						else
    408 						{
    409 							c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
    410 							c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
    411 							c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
    412 						}
    413 						break;
    414 					default:
    415 						ASSERT(false);
    416 					}
    417 				}
    418 				else
    419 				{
    420 					for(int component = 0; component < textureComponentCount(); component++)
    421 					{
    422 						// Normalized integer formats
    423 						if(state.sRGB && isRGBComponent(component))
    424 						{
    425 							sRGBtoLinear16_8_12(cs[component]);   // FIXME: Perform linearization at surface level for read-only textures
    426 							convertSigned12(c[component], cs[component]);
    427 						}
    428 						else
    429 						{
    430 							if(hasUnsignedTextureComponent(component))
    431 							{
    432 								convertUnsigned16(c[component], cs[component]);
    433 							}
    434 							else
    435 							{
    436 								convertSigned15(c[component], cs[component]);
    437 							}
    438 						}
    439 					}
    440 				}
    441 			}
    442 
    443 			int componentCount = textureComponentCount();
    444 			float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f;
    445 
    446 			if(state.textureFilter != FILTER_GATHER)
    447 			{
    448 				switch(state.textureFormat)
    449 				{
    450 				case FORMAT_R8I:
    451 				case FORMAT_R8UI:
    452 				case FORMAT_R16I:
    453 				case FORMAT_R16UI:
    454 				case FORMAT_R32I:
    455 				case FORMAT_R32UI:
    456 					c.y = As<Float4>(UInt4(0));
    457 				case FORMAT_G8R8I:
    458 				case FORMAT_G8R8UI:
    459 				case FORMAT_G16R16I:
    460 				case FORMAT_G16R16UI:
    461 				case FORMAT_G32R32I:
    462 				case FORMAT_G32R32UI:
    463 					c.z = As<Float4>(UInt4(0));
    464 				case FORMAT_X8B8G8R8I:
    465 				case FORMAT_X8B8G8R8UI:
    466 				case FORMAT_X16B16G16R16I:
    467 				case FORMAT_X16B16G16R16UI:
    468 				case FORMAT_X32B32G32R32I:
    469 				case FORMAT_X32B32G32R32UI:
    470 					c.w = As<Float4>(UInt4(1));
    471 				case FORMAT_A8B8G8R8I:
    472 				case FORMAT_A8B8G8R8UI:
    473 				case FORMAT_A16B16G16R16I:
    474 				case FORMAT_A16B16G16R16UI:
    475 				case FORMAT_A32B32G32R32I:
    476 				case FORMAT_A32B32G32R32UI:
    477 					break;
    478 				case FORMAT_R8I_SNORM:
    479 				case FORMAT_G8R8I_SNORM:
    480 				case FORMAT_X8B8G8R8I_SNORM:
    481 				case FORMAT_A8B8G8R8I_SNORM:
    482 				case FORMAT_R8:
    483 				case FORMAT_R5G6B5:
    484 				case FORMAT_G8R8:
    485 				case FORMAT_G16R16:
    486 				case FORMAT_A16B16G16R16:
    487 				case FORMAT_X8R8G8B8:
    488 				case FORMAT_X8B8G8R8:
    489 				case FORMAT_A8R8G8B8:
    490 				case FORMAT_A8B8G8R8:
    491 				case FORMAT_SRGB8_X8:
    492 				case FORMAT_SRGB8_A8:
    493 				case FORMAT_V8U8:
    494 				case FORMAT_Q8W8V8U8:
    495 				case FORMAT_X8L8V8U8:
    496 				case FORMAT_V16U16:
    497 				case FORMAT_A16W16V16U16:
    498 				case FORMAT_Q16W16V16U16:
    499 				case FORMAT_YV12_BT601:
    500 				case FORMAT_YV12_BT709:
    501 				case FORMAT_YV12_JFIF:
    502 					if(componentCount < 2) c.y = Float4(defaultColorValue);
    503 					if(componentCount < 3) c.z = Float4(defaultColorValue);
    504 					if(componentCount < 4) c.w = Float4(1.0f);
    505 					break;
    506 				case FORMAT_A8:
    507 					c.w = c.x;
    508 					c.x = Float4(0.0f);
    509 					c.y = Float4(0.0f);
    510 					c.z = Float4(0.0f);
    511 					break;
    512 				case FORMAT_L8:
    513 				case FORMAT_L16:
    514 					c.y = c.x;
    515 					c.z = c.x;
    516 					c.w = Float4(1.0f);
    517 					break;
    518 				case FORMAT_A8L8:
    519 					c.w = c.y;
    520 					c.y = c.x;
    521 					c.z = c.x;
    522 					break;
    523 				case FORMAT_R32F:
    524 					c.y = Float4(defaultColorValue);
    525 				case FORMAT_G32R32F:
    526 					c.z = Float4(defaultColorValue);
    527 				case FORMAT_X32B32G32R32F:
    528 					c.w = Float4(1.0f);
    529 				case FORMAT_A32B32G32R32F:
    530 					break;
    531 				case FORMAT_D32F:
    532 				case FORMAT_D32F_LOCKABLE:
    533 				case FORMAT_D32FS8_TEXTURE:
    534 				case FORMAT_D32FS8_SHADOW:
    535 					c.y = c.x;
    536 					c.z = c.x;
    537 					c.w = c.x;
    538 					break;
    539 				default:
    540 					ASSERT(false);
    541 				}
    542 			}
    543 		}
    544 
    545 		if((state.swizzleR != SWIZZLE_RED) ||
    546 		   (state.swizzleG != SWIZZLE_GREEN) ||
    547 		   (state.swizzleB != SWIZZLE_BLUE) ||
    548 		   (state.swizzleA != SWIZZLE_ALPHA))
    549 		{
    550 			const Vector4f col(c);
    551 			applySwizzle(state.swizzleR, c.x, col);
    552 			applySwizzle(state.swizzleG, c.y, col);
    553 			applySwizzle(state.swizzleB, c.z, col);
    554 			applySwizzle(state.swizzleA, c.w, col);
    555 		}
    556 	}
    557 
    558 	void SamplerCore::textureSize(Pointer<Byte> &texture, Vector4f &size, Float4 &lod)
    559 	{
    560 		for(int i = 0; i < 4; ++i)
    561 		{
    562 			Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel));
    563 			Pointer<Byte> mipmap = texture + OFFSET(Texture, mipmap) + (As<Int>(Extract(lod, i)) + baseLevel) * sizeof(Mipmap);
    564 			size.x = Insert(size.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
    565 			size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
    566 			size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
    567 		}
    568 	}
    569 
    570 	void SamplerCore::border(Short4 &mask, Float4 &coordinates)
    571 	{
    572 		Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
    573 		mask = As<Short4>(Int2(As<Int4>(Pack(border, border))));
    574 	}
    575 
    576 	void SamplerCore::border(Int4 &mask, Float4 &coordinates)
    577 	{
    578 		mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
    579 	}
    580 
    581 	Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
    582 	{
    583 		Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
    584 
    585 		if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
    586 		{
    587 			offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f)));
    588 		}
    589 		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
    590 		{
    591 			offset &= Short4(CmpLE(Float4(lod), Float4(0.0f)));
    592 		}
    593 
    594 		if(wrap)
    595 		{
    596 			switch(count)
    597 			{
    598 			case -1: return uvw - offset;
    599 			case  0: return uvw;
    600 			case +1: return uvw + offset;
    601 			case  2: return uvw + offset + offset;
    602 			}
    603 		}
    604 		else   // Clamp or mirror
    605 		{
    606 			switch(count)
    607 			{
    608 			case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
    609 			case  0: return uvw;
    610 			case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
    611 			case  2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
    612 			}
    613 		}
    614 
    615 		return uvw;
    616 	}
    617 
    618 	void SamplerCore::sampleFilter(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
    619 	{
    620 		sampleAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
    621 
    622 		if(function == Fetch)
    623 		{
    624 			return;
    625 		}
    626 
    627 		if(state.mipmapFilter > MIPMAP_POINT)
    628 		{
    629 			Vector4s cc;
    630 
    631 			sampleAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
    632 
    633 			lod *= Float(1 << 16);
    634 
    635 			UShort4 utri = UShort4(Float4(lod));   // FIXME: Optimize
    636 			Short4 stri = utri >> 1;   // FIXME: Optimize
    637 
    638 			if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
    639 			if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
    640 			if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
    641 			if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
    642 
    643 			utri = ~utri;
    644 			stri = Short4(0x7FFF) - stri;
    645 
    646 			if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
    647 			if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
    648 			if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
    649 			if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
    650 
    651 			c.x += cc.x;
    652 			c.y += cc.y;
    653 			c.z += cc.z;
    654 			c.w += cc.w;
    655 
    656 			if(!hasUnsignedTextureComponent(0)) c.x += c.x;
    657 			if(!hasUnsignedTextureComponent(1)) c.y += c.y;
    658 			if(!hasUnsignedTextureComponent(2)) c.z += c.z;
    659 			if(!hasUnsignedTextureComponent(3)) c.w += c.w;
    660 		}
    661 
    662 		Short4 borderMask;
    663 
    664 		if(state.addressingModeU == ADDRESSING_BORDER)
    665 		{
    666 			Short4 u0;
    667 
    668 			border(u0, u);
    669 
    670 			borderMask = u0;
    671 		}
    672 
    673 		if(state.addressingModeV == ADDRESSING_BORDER)
    674 		{
    675 			Short4 v0;
    676 
    677 			border(v0, v);
    678 
    679 			if(state.addressingModeU == ADDRESSING_BORDER)
    680 			{
    681 				borderMask &= v0;
    682 			}
    683 			else
    684 			{
    685 				borderMask = v0;
    686 			}
    687 		}
    688 
    689 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
    690 		{
    691 			Short4 s0;
    692 
    693 			border(s0, w);
    694 
    695 			if(state.addressingModeU == ADDRESSING_BORDER ||
    696 			   state.addressingModeV == ADDRESSING_BORDER)
    697 			{
    698 				borderMask &= s0;
    699 			}
    700 			else
    701 			{
    702 				borderMask = s0;
    703 			}
    704 		}
    705 
    706 		if(state.addressingModeU == ADDRESSING_BORDER ||
    707 		   state.addressingModeV == ADDRESSING_BORDER ||
    708 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
    709 		{
    710 			Short4 b;
    711 
    712 			c.x = (borderMask & c.x) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1)));
    713 			c.y = (borderMask & c.y) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1)));
    714 			c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1)));
    715 			c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1)));
    716 		}
    717 	}
    718 
    719 	void SamplerCore::sampleAniso(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
    720 	{
    721 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
    722 		{
    723 			sampleQuad(texture, c, u, v, w, offset, lod, face, secondLOD, function);
    724 		}
    725 		else
    726 		{
    727 			Int a = RoundInt(anisotropy);
    728 
    729 			Vector4s cSum;
    730 
    731 			cSum.x = Short4(0);
    732 			cSum.y = Short4(0);
    733 			cSum.z = Short4(0);
    734 			cSum.w = Short4(0);
    735 
    736 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
    737 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
    738 			UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
    739 			Short4 sw = Short4(cw >> 1);
    740 
    741 			Float4 du = uDelta;
    742 			Float4 dv = vDelta;
    743 
    744 			Float4 u0 = u + B * du;
    745 			Float4 v0 = v + B * dv;
    746 
    747 			du *= A;
    748 			dv *= A;
    749 
    750 			Int i = 0;
    751 
    752 			Do
    753 			{
    754 				sampleQuad(texture, c, u0, v0, w, offset, lod, face, secondLOD, function);
    755 
    756 				u0 += du;
    757 				v0 += dv;
    758 
    759 				if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
    760 				if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
    761 				if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
    762 				if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
    763 
    764 				i++;
    765 			}
    766 			Until(i >= a)
    767 
    768 			if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
    769 			if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
    770 			if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
    771 			if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
    772 		}
    773 	}
    774 
    775 	void SamplerCore::sampleQuad(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
    776 	{
    777 		if(state.textureType != TEXTURE_3D)
    778 		{
    779 			sampleQuad2D(texture, c, u, v, w, offset, lod, face, secondLOD, function);
    780 		}
    781 		else
    782 		{
    783 			sample3D(texture, c, u, v, w, offset, lod, secondLOD, function);
    784 		}
    785 	}
    786 
    787 	void SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Vector4s &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
    788 	{
    789 		int componentCount = textureComponentCount();
    790 		bool gather = state.textureFilter == FILTER_GATHER;
    791 
    792 		Pointer<Byte> mipmap;
    793 		Pointer<Byte> buffer[4];
    794 
    795 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
    796 
    797 		bool texelFetch = (function == Fetch);
    798 
    799 		Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
    800 		Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
    801 		Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
    802 
    803 		if(state.textureFilter == FILTER_POINT || texelFetch)
    804 		{
    805 			c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
    806 		}
    807 		else
    808 		{
    809 			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
    810 			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
    811 			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
    812 			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
    813 
    814 			Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function);
    815 			Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function);
    816 			Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function);
    817 			Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function);
    818 
    819 			if(!gather)   // Blend
    820 			{
    821 				// Fractions
    822 				UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
    823 				UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
    824 
    825 				UShort4 f1u = ~f0u;
    826 				UShort4 f1v = ~f0v;
    827 
    828 				UShort4 f0u0v = MulHigh(f0u, f0v);
    829 				UShort4 f1u0v = MulHigh(f1u, f0v);
    830 				UShort4 f0u1v = MulHigh(f0u, f1v);
    831 				UShort4 f1u1v = MulHigh(f1u, f1v);
    832 
    833 				// Signed fractions
    834 				Short4 f1u1vs;
    835 				Short4 f0u1vs;
    836 				Short4 f1u0vs;
    837 				Short4 f0u0vs;
    838 
    839 				if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
    840 				{
    841 					f1u1vs = f1u1v >> 1;
    842 					f0u1vs = f0u1v >> 1;
    843 					f1u0vs = f1u0v >> 1;
    844 					f0u0vs = f0u0v >> 1;
    845 				}
    846 
    847 				// Bilinear interpolation
    848 				if(componentCount >= 1)
    849 				{
    850 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
    851 					{
    852 						c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u);
    853 						c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u);
    854 						c.x  = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v);
    855 					}
    856 					else
    857 					{
    858 						if(hasUnsignedTextureComponent(0))
    859 						{
    860 							c0.x = MulHigh(As<UShort4>(c0.x), f1u1v);
    861 							c1.x = MulHigh(As<UShort4>(c1.x), f0u1v);
    862 							c2.x = MulHigh(As<UShort4>(c2.x), f1u0v);
    863 							c3.x = MulHigh(As<UShort4>(c3.x), f0u0v);
    864 						}
    865 						else
    866 						{
    867 							c0.x = MulHigh(c0.x, f1u1vs);
    868 							c1.x = MulHigh(c1.x, f0u1vs);
    869 							c2.x = MulHigh(c2.x, f1u0vs);
    870 							c3.x = MulHigh(c3.x, f0u0vs);
    871 						}
    872 
    873 						c.x = (c0.x + c1.x) + (c2.x + c3.x);
    874 						if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x);   // Correct for signed fractions
    875 					}
    876 				}
    877 
    878 				if(componentCount >= 2)
    879 				{
    880 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
    881 					{
    882 						c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u);
    883 						c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u);
    884 						c.y  = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v);
    885 					}
    886 					else
    887 					{
    888 						if(hasUnsignedTextureComponent(1))
    889 						{
    890 							c0.y = MulHigh(As<UShort4>(c0.y), f1u1v);
    891 							c1.y = MulHigh(As<UShort4>(c1.y), f0u1v);
    892 							c2.y = MulHigh(As<UShort4>(c2.y), f1u0v);
    893 							c3.y = MulHigh(As<UShort4>(c3.y), f0u0v);
    894 						}
    895 						else
    896 						{
    897 							c0.y = MulHigh(c0.y, f1u1vs);
    898 							c1.y = MulHigh(c1.y, f0u1vs);
    899 							c2.y = MulHigh(c2.y, f1u0vs);
    900 							c3.y = MulHigh(c3.y, f0u0vs);
    901 						}
    902 
    903 						c.y = (c0.y + c1.y) + (c2.y + c3.y);
    904 						if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y);   // Correct for signed fractions
    905 					}
    906 				}
    907 
    908 				if(componentCount >= 3)
    909 				{
    910 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
    911 					{
    912 						c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u);
    913 						c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u);
    914 						c.z  = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v);
    915 					}
    916 					else
    917 					{
    918 						if(hasUnsignedTextureComponent(2))
    919 						{
    920 							c0.z = MulHigh(As<UShort4>(c0.z), f1u1v);
    921 							c1.z = MulHigh(As<UShort4>(c1.z), f0u1v);
    922 							c2.z = MulHigh(As<UShort4>(c2.z), f1u0v);
    923 							c3.z = MulHigh(As<UShort4>(c3.z), f0u0v);
    924 						}
    925 						else
    926 						{
    927 							c0.z = MulHigh(c0.z, f1u1vs);
    928 							c1.z = MulHigh(c1.z, f0u1vs);
    929 							c2.z = MulHigh(c2.z, f1u0vs);
    930 							c3.z = MulHigh(c3.z, f0u0vs);
    931 						}
    932 
    933 						c.z = (c0.z + c1.z) + (c2.z + c3.z);
    934 						if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z);   // Correct for signed fractions
    935 					}
    936 				}
    937 
    938 				if(componentCount >= 4)
    939 				{
    940 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
    941 					{
    942 						c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u);
    943 						c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u);
    944 						c.w  = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v);
    945 					}
    946 					else
    947 					{
    948 						if(hasUnsignedTextureComponent(3))
    949 						{
    950 							c0.w = MulHigh(As<UShort4>(c0.w), f1u1v);
    951 							c1.w = MulHigh(As<UShort4>(c1.w), f0u1v);
    952 							c2.w = MulHigh(As<UShort4>(c2.w), f1u0v);
    953 							c3.w = MulHigh(As<UShort4>(c3.w), f0u0v);
    954 						}
    955 						else
    956 						{
    957 							c0.w = MulHigh(c0.w, f1u1vs);
    958 							c1.w = MulHigh(c1.w, f0u1vs);
    959 							c2.w = MulHigh(c2.w, f1u0vs);
    960 							c3.w = MulHigh(c3.w, f0u0vs);
    961 						}
    962 
    963 						c.w = (c0.w + c1.w) + (c2.w + c3.w);
    964 						if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w);   // Correct for signed fractions
    965 					}
    966 				}
    967 			}
    968 			else
    969 			{
    970 				c.x = c1.x;
    971 				c.y = c2.x;
    972 				c.z = c3.x;
    973 				c.w = c0.x;
    974 			}
    975 		}
    976 	}
    977 
    978 	void SamplerCore::sample3D(Pointer<Byte> &texture, Vector4s &c_, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
    979 	{
    980 		int componentCount = textureComponentCount();
    981 
    982 		Pointer<Byte> mipmap;
    983 		Pointer<Byte> buffer[4];
    984 		Int face[4];
    985 
    986 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
    987 
    988 		bool texelFetch = (function == Fetch);
    989 
    990 		Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap);
    991 		Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap);
    992 		Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap);
    993 
    994 		if(state.textureFilter == FILTER_POINT || texelFetch)
    995 		{
    996 			c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
    997 		}
    998 		else
    999 		{
   1000 			Vector4s c[2][2][2];
   1001 
   1002 			Short4 u[2][2][2];
   1003 			Short4 v[2][2][2];
   1004 			Short4 s[2][2][2];
   1005 
   1006 			for(int i = 0; i < 2; i++)
   1007 			{
   1008 				for(int j = 0; j < 2; j++)
   1009 				{
   1010 					for(int k = 0; k < 2; k++)
   1011 					{
   1012 						u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
   1013 						v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
   1014 						s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
   1015 					}
   1016 				}
   1017 			}
   1018 
   1019 			// Fractions
   1020 			UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
   1021 			UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
   1022 			UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth));
   1023 
   1024 			UShort4 f1u = ~f0u;
   1025 			UShort4 f1v = ~f0v;
   1026 			UShort4 f1s = ~f0s;
   1027 
   1028 			UShort4 f[2][2][2];
   1029 			Short4 fs[2][2][2];
   1030 
   1031 			f[1][1][1] = MulHigh(f1u, f1v);
   1032 			f[0][1][1] = MulHigh(f0u, f1v);
   1033 			f[1][0][1] = MulHigh(f1u, f0v);
   1034 			f[0][0][1] = MulHigh(f0u, f0v);
   1035 			f[1][1][0] = MulHigh(f1u, f1v);
   1036 			f[0][1][0] = MulHigh(f0u, f1v);
   1037 			f[1][0][0] = MulHigh(f1u, f0v);
   1038 			f[0][0][0] = MulHigh(f0u, f0v);
   1039 
   1040 			f[1][1][1] = MulHigh(f[1][1][1], f1s);
   1041 			f[0][1][1] = MulHigh(f[0][1][1], f1s);
   1042 			f[1][0][1] = MulHigh(f[1][0][1], f1s);
   1043 			f[0][0][1] = MulHigh(f[0][0][1], f1s);
   1044 			f[1][1][0] = MulHigh(f[1][1][0], f0s);
   1045 			f[0][1][0] = MulHigh(f[0][1][0], f0s);
   1046 			f[1][0][0] = MulHigh(f[1][0][0], f0s);
   1047 			f[0][0][0] = MulHigh(f[0][0][0], f0s);
   1048 
   1049 			// Signed fractions
   1050 			if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
   1051 			{
   1052 				fs[0][0][0] = f[0][0][0] >> 1;
   1053 				fs[0][0][1] = f[0][0][1] >> 1;
   1054 				fs[0][1][0] = f[0][1][0] >> 1;
   1055 				fs[0][1][1] = f[0][1][1] >> 1;
   1056 				fs[1][0][0] = f[1][0][0] >> 1;
   1057 				fs[1][0][1] = f[1][0][1] >> 1;
   1058 				fs[1][1][0] = f[1][1][0] >> 1;
   1059 				fs[1][1][1] = f[1][1][1] >> 1;
   1060 			}
   1061 
   1062 			for(int i = 0; i < 2; i++)
   1063 			{
   1064 				for(int j = 0; j < 2; j++)
   1065 				{
   1066 					for(int k = 0; k < 2; k++)
   1067 					{
   1068 						c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function);
   1069 
   1070 						if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
   1071 						if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
   1072 						if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
   1073 						if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
   1074 
   1075 						if(i != 0 || j != 0 || k != 0)
   1076 						{
   1077 							if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x;
   1078 							if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y;
   1079 							if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z;
   1080 							if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w;
   1081 						}
   1082 					}
   1083 				}
   1084 			}
   1085 
   1086 			if(componentCount >= 1) c_.x = c[0][0][0].x;
   1087 			if(componentCount >= 2) c_.y = c[0][0][0].y;
   1088 			if(componentCount >= 3) c_.z = c[0][0][0].z;
   1089 			if(componentCount >= 4) c_.w = c[0][0][0].w;
   1090 
   1091 			// Correct for signed fractions
   1092 			if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
   1093 			if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
   1094 			if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
   1095 			if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
   1096 		}
   1097 	}
   1098 
   1099 	void SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
   1100 	{
   1101 		sampleFloatAniso(texture, c, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
   1102 
   1103 		if(function == Fetch)
   1104 		{
   1105 			return;
   1106 		}
   1107 
   1108 		if(state.mipmapFilter > MIPMAP_POINT)
   1109 		{
   1110 			Vector4f cc;
   1111 
   1112 			sampleFloatAniso(texture, cc, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
   1113 
   1114 			Float4 lod4 = Float4(Frac(lod));
   1115 
   1116 			c.x = (cc.x - c.x) * lod4 + c.x;
   1117 			c.y = (cc.y - c.y) * lod4 + c.y;
   1118 			c.z = (cc.z - c.z) * lod4 + c.z;
   1119 			c.w = (cc.w - c.w) * lod4 + c.w;
   1120 		}
   1121 
   1122 		Int4 borderMask;
   1123 
   1124 		if(state.addressingModeU == ADDRESSING_BORDER)
   1125 		{
   1126 			Int4 u0;
   1127 
   1128 			border(u0, u);
   1129 
   1130 			borderMask = u0;
   1131 		}
   1132 
   1133 		if(state.addressingModeV == ADDRESSING_BORDER)
   1134 		{
   1135 			Int4 v0;
   1136 
   1137 			border(v0, v);
   1138 
   1139 			if(state.addressingModeU == ADDRESSING_BORDER)
   1140 			{
   1141 				borderMask &= v0;
   1142 			}
   1143 			else
   1144 			{
   1145 				borderMask = v0;
   1146 			}
   1147 		}
   1148 
   1149 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
   1150 		{
   1151 			Int4 s0;
   1152 
   1153 			border(s0, w);
   1154 
   1155 			if(state.addressingModeU == ADDRESSING_BORDER ||
   1156 			   state.addressingModeV == ADDRESSING_BORDER)
   1157 			{
   1158 				borderMask &= s0;
   1159 			}
   1160 			else
   1161 			{
   1162 				borderMask = s0;
   1163 			}
   1164 		}
   1165 
   1166 		if(state.addressingModeU == ADDRESSING_BORDER ||
   1167 		   state.addressingModeV == ADDRESSING_BORDER ||
   1168 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
   1169 		{
   1170 			Int4 b;
   1171 
   1172 			c.x = As<Float4>((borderMask & As<Int4>(c.x)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0]))));
   1173 			c.y = As<Float4>((borderMask & As<Int4>(c.y)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1]))));
   1174 			c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2]))));
   1175 			c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3]))));
   1176 		}
   1177 	}
   1178 
   1179 	void SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
   1180 	{
   1181 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
   1182 		{
   1183 			sampleFloat(texture, c, u, v, w, offset, lod, face, secondLOD, function);
   1184 		}
   1185 		else
   1186 		{
   1187 			Int a = RoundInt(anisotropy);
   1188 
   1189 			Vector4f cSum;
   1190 
   1191 			cSum.x = Float4(0.0f);
   1192 			cSum.y = Float4(0.0f);
   1193 			cSum.z = Float4(0.0f);
   1194 			cSum.w = Float4(0.0f);
   1195 
   1196 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
   1197 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
   1198 
   1199 			Float4 du = uDelta;
   1200 			Float4 dv = vDelta;
   1201 
   1202 			Float4 u0 = u + B * du;
   1203 			Float4 v0 = v + B * dv;
   1204 
   1205 			du *= A;
   1206 			dv *= A;
   1207 
   1208 			Int i = 0;
   1209 
   1210 			Do
   1211 			{
   1212 				sampleFloat(texture, c, u0, v0, w, offset, lod, face, secondLOD, function);
   1213 
   1214 				u0 += du;
   1215 				v0 += dv;
   1216 
   1217 				cSum.x += c.x * A;
   1218 				cSum.y += c.y * A;
   1219 				cSum.z += c.z * A;
   1220 				cSum.w += c.w * A;
   1221 
   1222 				i++;
   1223 			}
   1224 			Until(i >= a)
   1225 
   1226 			c.x = cSum.x;
   1227 			c.y = cSum.y;
   1228 			c.z = cSum.z;
   1229 			c.w = cSum.w;
   1230 		}
   1231 	}
   1232 
   1233 	void SamplerCore::sampleFloat(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
   1234 	{
   1235 		if(state.textureType != TEXTURE_3D)
   1236 		{
   1237 			sampleFloat2D(texture, c, u, v, w, offset, lod, face, secondLOD, function);
   1238 		}
   1239 		else
   1240 		{
   1241 			sampleFloat3D(texture, c, u, v, w, offset, lod, secondLOD, function);
   1242 		}
   1243 	}
   1244 
   1245 	void SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
   1246 	{
   1247 		int componentCount = textureComponentCount();
   1248 		bool gather = state.textureFilter == FILTER_GATHER;
   1249 
   1250 		Pointer<Byte> mipmap;
   1251 		Pointer<Byte> buffer[4];
   1252 
   1253 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
   1254 
   1255 		Int4 x0, x1, y0, y1, z0;
   1256 		Float4 fu, fv;
   1257 		Int4 filter = computeFilterOffset(lod);
   1258 		address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
   1259 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
   1260 		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
   1261 
   1262 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
   1263 		y0 *= pitchP;
   1264 		if(hasThirdCoordinate())
   1265 		{
   1266 			Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
   1267 			z0 *= sliceP;
   1268 		}
   1269 
   1270 		if(state.textureFilter == FILTER_POINT || (function == Fetch))
   1271 		{
   1272 			c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
   1273 		}
   1274 		else
   1275 		{
   1276 			y1 *= pitchP;
   1277 
   1278 			Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
   1279 			Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
   1280 			Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
   1281 			Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
   1282 
   1283 			if(!gather)   // Blend
   1284 			{
   1285 				if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
   1286 				if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
   1287 				if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
   1288 				if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
   1289 
   1290 				if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
   1291 				if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
   1292 				if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
   1293 				if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
   1294 
   1295 				if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x);
   1296 				if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y);
   1297 				if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z);
   1298 				if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w);
   1299 			}
   1300 			else
   1301 			{
   1302 				c.x = c1.x;
   1303 				c.y = c2.x;
   1304 				c.z = c3.x;
   1305 				c.w = c0.x;
   1306 			}
   1307 		}
   1308 	}
   1309 
   1310 	void SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Vector4f &c, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
   1311 	{
   1312 		int componentCount = textureComponentCount();
   1313 
   1314 		Pointer<Byte> mipmap;
   1315 		Pointer<Byte> buffer[4];
   1316 		Int face[4];
   1317 
   1318 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
   1319 
   1320 		Int4 x0, x1, y0, y1, z0, z1;
   1321 		Float4 fu, fv, fw;
   1322 		Int4 filter = computeFilterOffset(lod);
   1323 		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
   1324 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
   1325 		address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
   1326 
   1327 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
   1328 		Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
   1329 		y0 *= pitchP;
   1330 		z0 *= sliceP;
   1331 
   1332 		if(state.textureFilter == FILTER_POINT || (function == Fetch))
   1333 		{
   1334 			c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
   1335 		}
   1336 		else
   1337 		{
   1338 			y1 *= pitchP;
   1339 			z1 *= sliceP;
   1340 
   1341 			Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
   1342 			Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
   1343 			Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
   1344 			Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
   1345 			Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function);
   1346 			Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function);
   1347 			Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function);
   1348 			Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function);
   1349 
   1350 			// Blend first slice
   1351 			if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
   1352 			if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
   1353 			if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
   1354 			if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
   1355 
   1356 			if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
   1357 			if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
   1358 			if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
   1359 			if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
   1360 
   1361 			if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x);
   1362 			if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y);
   1363 			if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z);
   1364 			if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w);
   1365 
   1366 			// Blend second slice
   1367 			if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x);
   1368 			if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y);
   1369 			if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z);
   1370 			if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w);
   1371 
   1372 			if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x);
   1373 			if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y);
   1374 			if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z);
   1375 			if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w);
   1376 
   1377 			if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x);
   1378 			if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y);
   1379 			if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z);
   1380 			if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w);
   1381 
   1382 			// Blend slices
   1383 			if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x);
   1384 			if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y);
   1385 			if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z);
   1386 			if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w);
   1387 		}
   1388 	}
   1389 
   1390 	Float SamplerCore::log2sqrt(Float lod)
   1391 	{
   1392 		// log2(sqrt(lod))                               // Equals 0.25 * log2(lod^2).
   1393 		lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
   1394 		lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
   1395 		lod *= As<Float>(Int(0x33000000));               // Scale by 0.25 * 2^-23 (mantissa length).
   1396 
   1397 		return lod;
   1398 	}
   1399 
   1400 	void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
   1401 	{
   1402 		if(function != Lod && function != Fetch)
   1403 		{
   1404 			Float4 duvdxy;
   1405 
   1406 			if(function != Grad)
   1407 			{
   1408 				duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
   1409 			}
   1410 			else
   1411 			{
   1412 				Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
   1413 				Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
   1414 
   1415 				duvdxy = Float4(dudxy.xz, dvdxy.xz);
   1416 			}
   1417 
   1418 			// Scale by texture dimensions and LOD
   1419 			Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD));
   1420 
   1421 			Float4 dUV2dxy = dUVdxy * dUVdxy;
   1422 			Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
   1423 
   1424 			lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
   1425 
   1426 			if(state.textureFilter == FILTER_ANISOTROPIC)
   1427 			{
   1428 				Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z));
   1429 
   1430 				Float4 dudx = duvdxy.xxxx;
   1431 				Float4 dudy = duvdxy.yyyy;
   1432 				Float4 dvdx = duvdxy.zzzz;
   1433 				Float4 dvdy = duvdxy.wwww;
   1434 
   1435 				Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y));
   1436 				uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask)));
   1437 				vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
   1438 
   1439 				anisotropy = lod * Rcp_pp(det);
   1440 				anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy)));
   1441 
   1442 				lod *= Rcp_pp(anisotropy * anisotropy);
   1443 			}
   1444 
   1445 			lod = log2sqrt(lod);   // log2(sqrt(lod))
   1446 
   1447 			if(function == Bias)
   1448 			{
   1449 				lod += lodBias;
   1450 			}
   1451 		}
   1452 		else
   1453 		{
   1454 			lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel)));
   1455 		}
   1456 
   1457 		lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
   1458 		lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
   1459 	}
   1460 
   1461 	void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &s, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
   1462 	{
   1463 		if(function != Lod && function != Fetch)
   1464 		{
   1465 			if(function != Grad)
   1466 			{
   1467 				Float4 dudxy = u.ywyw - u;
   1468 				Float4 dvdxy = v.ywyw - v;
   1469 				Float4 dsdxy = s.ywyw - s;
   1470 
   1471 				// Scale by texture dimensions and LOD
   1472 				dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1473 				dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1474 				dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1475 
   1476 				dudxy *= dudxy;
   1477 				dvdxy *= dvdxy;
   1478 				dsdxy *= dsdxy;
   1479 
   1480 				dudxy += dvdxy;
   1481 				dudxy += dsdxy;
   1482 
   1483 				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
   1484 			}
   1485 			else
   1486 			{
   1487 				Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
   1488 				Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
   1489 
   1490 				Float4 duvdxy = Float4(dudxy.xz, dvdxy.xz);
   1491 
   1492 				// Scale by texture dimensions and LOD
   1493 				Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1494 
   1495 				Float4 dUV2dxy = dUVdxy * dUVdxy;
   1496 				Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
   1497 
   1498 				lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
   1499 			}
   1500 
   1501 			lod = log2sqrt(lod);   // log2(sqrt(lod))
   1502 
   1503 			if(function == Bias)
   1504 			{
   1505 				lod += lodBias;
   1506 			}
   1507 		}
   1508 		else
   1509 		{
   1510 			lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel)));
   1511 		}
   1512 
   1513 		lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
   1514 		lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
   1515 	}
   1516 
   1517 	void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
   1518 	{
   1519 		if(state.mipmapFilter == MIPMAP_NONE)
   1520 		{
   1521 		}
   1522 		else   // Point and linear filter
   1523 		{
   1524 			if(function != Lod && function != Fetch)
   1525 			{
   1526 				Float4 dudxy;
   1527 				Float4 dvdxy;
   1528 				Float4 dsdxy;
   1529 
   1530 				if(function != Grad)
   1531 				{
   1532 					dudxy = uuuu.ywyw - uuuu;
   1533 					dvdxy = vvvv.ywyw - vvvv;
   1534 					dsdxy = wwww.ywyw - wwww;
   1535 				}
   1536 				else
   1537 				{
   1538 					dudxy = dsx.x;
   1539 					dvdxy = dsx.y;
   1540 					dsdxy = dsx.z;
   1541 
   1542 					dudxy = Float4(dudxy.xx, dsy.x.xx);
   1543 					dvdxy = Float4(dvdxy.xx, dsy.y.xx);
   1544 					dsdxy = Float4(dsdxy.xx, dsy.z.xx);
   1545 
   1546 					dudxy = Float4(dudxy.xz, dudxy.xz);
   1547 					dvdxy = Float4(dvdxy.xz, dvdxy.xz);
   1548 					dsdxy = Float4(dsdxy.xz, dsdxy.xz);
   1549 				}
   1550 
   1551 				// Scale by texture dimensions and LOD
   1552 				dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1553 				dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD));
   1554 				dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD));
   1555 
   1556 				dudxy *= dudxy;
   1557 				dvdxy *= dvdxy;
   1558 				dsdxy *= dsdxy;
   1559 
   1560 				dudxy += dvdxy;
   1561 				dudxy += dsdxy;
   1562 
   1563 				lod = Max(Float(dudxy.x), Float(dudxy.y));   // FIXME: Max(dudxy.x, dudxy.y);
   1564 
   1565 				lod = log2sqrt(lod);   // log2(sqrt(lod))
   1566 
   1567 				if(function == Bias)
   1568 				{
   1569 					lod += lodBias;
   1570 				}
   1571 			}
   1572 			else
   1573 			{
   1574 				lod = lodBias + Float(*Pointer<Int>(texture + OFFSET(Texture,baseLevel)));
   1575 			}
   1576 
   1577 			lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
   1578 			lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
   1579 		}
   1580 	}
   1581 
   1582 	void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &lodX, Float4 &lodY, Float4 &lodZ, Float4 &x, Float4 &y, Float4 &z)
   1583 	{
   1584 		Int4 xn = CmpLT(x, Float4(0.0f));   // x < 0
   1585 		Int4 yn = CmpLT(y, Float4(0.0f));   // y < 0
   1586 		Int4 zn = CmpLT(z, Float4(0.0f));   // z < 0
   1587 
   1588 		Float4 absX = Abs(x);
   1589 		Float4 absY = Abs(y);
   1590 		Float4 absZ = Abs(z);
   1591 
   1592 		Int4 xy = CmpNLE(absX, absY);   // abs(x) > abs(y)
   1593 		Int4 yz = CmpNLE(absY, absZ);   // abs(y) > abs(z)
   1594 		Int4 zx = CmpNLE(absZ, absX);   // abs(z) > abs(x)
   1595 		Int4 xMajor = xy & ~zx;   // abs(x) > abs(y) && abs(x) > abs(z)
   1596 		Int4 yMajor = yz & ~xy;   // abs(y) > abs(z) && abs(y) > abs(x)
   1597 		Int4 zMajor = zx & ~yz;   // abs(z) > abs(x) && abs(z) > abs(y)
   1598 
   1599 		// FACE_POSITIVE_X = 000b
   1600 		// FACE_NEGATIVE_X = 001b
   1601 		// FACE_POSITIVE_Y = 010b
   1602 		// FACE_NEGATIVE_Y = 011b
   1603 		// FACE_POSITIVE_Z = 100b
   1604 		// FACE_NEGATIVE_Z = 101b
   1605 
   1606 		Int yAxis = SignMask(yMajor);
   1607 		Int zAxis = SignMask(zMajor);
   1608 
   1609 		Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
   1610 		Int negative = SignMask(n);
   1611 
   1612 		face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
   1613 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
   1614 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
   1615 		face[1] = (face[0] >> 4)  & 0x7;
   1616 		face[2] = (face[0] >> 8)  & 0x7;
   1617 		face[3] = (face[0] >> 12) & 0x7;
   1618 		face[0] &= 0x7;
   1619 
   1620 		Float4 M = Max(Max(absX, absY), absZ);
   1621 
   1622 		// U = xMajor ? (neg ^ -z) : (zMajor & neg) ^ x)
   1623 		U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x))));
   1624 
   1625 		// V = !yMajor ? -y : (n ^ z)
   1626 		V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z))));
   1627 
   1628 		M = reciprocal(M) * Float4(0.5f);
   1629 		U = U * M + Float4(0.5f);
   1630 		V = V * M + Float4(0.5f);
   1631 
   1632 		lodX = x * M;
   1633 		lodY = y * M;
   1634 		lodZ = z * M;
   1635 	}
   1636 
   1637 	Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode)
   1638 	{
   1639 		Int4 tmp = Int4(As<UShort4>(uvw));
   1640 		tmp = tmp + As<Int4>(offset);
   1641 
   1642 		switch (mode)
   1643 		{
   1644 		case AddressingMode::ADDRESSING_WRAP:
   1645 			tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd;
   1646 			break;
   1647 		case AddressingMode::ADDRESSING_CLAMP:
   1648 		case AddressingMode::ADDRESSING_MIRROR:
   1649 		case AddressingMode::ADDRESSING_MIRRORONCE:
   1650 		case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
   1651 			tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
   1652 			break;
   1653 		case ADDRESSING_TEXELFETCH:
   1654 			break;
   1655 		default:
   1656 			ASSERT(false);
   1657 		}
   1658 
   1659 		return As<Short4>(UShort4(tmp));
   1660 	}
   1661 
   1662 	void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function)
   1663 	{
   1664 		bool texelFetch = (function == Fetch);
   1665 		bool hasOffset = (function.option == Offset);
   1666 
   1667 		if(!texelFetch)
   1668 		{
   1669 			uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width)));
   1670 			vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height)));
   1671 		}
   1672 
   1673 		if(hasOffset)
   1674 		{
   1675 			uuuu = applyOffset(uuuu, offset.x, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU);
   1676 			vvvv = applyOffset(vvvv, offset.y, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
   1677 		}
   1678 
   1679 		Short4 uuu2 = uuuu;
   1680 		uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
   1681 		uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
   1682 		uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
   1683 		uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
   1684 
   1685 		if(hasThirdCoordinate())
   1686 		{
   1687 			if(state.textureType != TEXTURE_2D_ARRAY)
   1688 			{
   1689 				if(!texelFetch)
   1690 				{
   1691 					wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)));
   1692 				}
   1693 				if(hasOffset)
   1694 				{
   1695 					wwww = applyOffset(wwww, offset.z, Int4(*Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth))), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW);
   1696 				}
   1697 			}
   1698 			Short4 www2 = wwww;
   1699 			wwww = As<Short4>(UnpackLow(wwww, Short4(0x0000)));
   1700 			www2 = As<Short4>(UnpackHigh(www2, Short4(0x0000)));
   1701 			wwww = As<Short4>(MulAdd(wwww, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP))));
   1702 			www2 = As<Short4>(MulAdd(www2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,sliceP))));
   1703 			uuuu = As<Short4>(As<Int2>(uuuu) + As<Int2>(wwww));
   1704 			uuu2 = As<Short4>(As<Int2>(uuu2) + As<Int2>(www2));
   1705 		}
   1706 
   1707 		index[0] = Extract(As<Int2>(uuuu), 0);
   1708 		index[1] = Extract(As<Int2>(uuuu), 1);
   1709 		index[2] = Extract(As<Int2>(uuu2), 0);
   1710 		index[3] = Extract(As<Int2>(uuu2), 1);
   1711 
   1712 		if(texelFetch)
   1713 		{
   1714 			Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP)));
   1715 			if(hasThirdCoordinate())
   1716 			{
   1717 				size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
   1718 			}
   1719 			UInt min = 0;
   1720 			UInt max = size - 1;
   1721 
   1722 			for(int i = 0; i < 4; i++)
   1723 			{
   1724 				index[i] = Min(Max(index[i], min), max);
   1725 			}
   1726 		}
   1727 	}
   1728 
   1729 	void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function)
   1730 	{
   1731 		UInt4 indices = uuuu + vvvv;
   1732 
   1733 		if(hasThirdCoordinate())
   1734 		{
   1735 			indices += As<UInt4>(wwww);
   1736 		}
   1737 
   1738 		for(int i = 0; i < 4; i++)
   1739 		{
   1740 			index[i] = Extract(As<Int4>(indices), i);
   1741 		}
   1742 	}
   1743 
   1744 	Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
   1745 	{
   1746 		Vector4s c;
   1747 
   1748 		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
   1749 		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
   1750 		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
   1751 		int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
   1752 
   1753 		if(has16bitTextureFormat())
   1754 		{
   1755 			c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
   1756 			c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
   1757 			c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
   1758 			c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
   1759 
   1760 			switch(state.textureFormat)
   1761 			{
   1762 			case FORMAT_R5G6B5:
   1763 				c.z = (c.x & Short4(0x001Fu)) << 11;
   1764 				c.y = (c.x & Short4(0x07E0u)) << 5;
   1765 				c.x = (c.x & Short4(0xF800u));
   1766 				break;
   1767 			default:
   1768 				ASSERT(false);
   1769 			}
   1770 		}
   1771 		else if(has8bitTextureComponents())
   1772 		{
   1773 			switch(textureComponentCount())
   1774 			{
   1775 			case 4:
   1776 				{
   1777 					Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
   1778 					Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
   1779 					Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
   1780 					Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
   1781 					c.x = Unpack(c0, c1);
   1782 					c.y = Unpack(c2, c3);
   1783 
   1784 					switch(state.textureFormat)
   1785 					{
   1786 					case FORMAT_A8R8G8B8:
   1787 						c.z = As<Short4>(UnpackLow(c.x, c.y));
   1788 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
   1789 						c.y = c.z;
   1790 						c.w = c.x;
   1791 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1792 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1793 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1794 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
   1795 						break;
   1796 					case FORMAT_A8B8G8R8:
   1797 					case FORMAT_A8B8G8R8I:
   1798 					case FORMAT_A8B8G8R8I_SNORM:
   1799 					case FORMAT_Q8W8V8U8:
   1800 					case FORMAT_SRGB8_A8:
   1801 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1802 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1803 						c.y = c.x;
   1804 						c.w = c.z;
   1805 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1806 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1807 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1808 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
   1809 						// Propagate sign bit
   1810 						if(state.textureFormat == FORMAT_A8B8G8R8I)
   1811 						{
   1812 							c.x >>= 8;
   1813 							c.y >>= 8;
   1814 							c.z >>= 8;
   1815 							c.w >>= 8;
   1816 						}
   1817 						break;
   1818 					case FORMAT_A8B8G8R8UI:
   1819 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1820 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1821 						c.y = c.x;
   1822 						c.w = c.z;
   1823 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
   1824 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
   1825 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
   1826 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
   1827 						break;
   1828 					default:
   1829 						ASSERT(false);
   1830 					}
   1831 				}
   1832 				break;
   1833 			case 3:
   1834 				{
   1835 					Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
   1836 					Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
   1837 					Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
   1838 					Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
   1839 					c.x = Unpack(c0, c1);
   1840 					c.y = Unpack(c2, c3);
   1841 
   1842 					switch(state.textureFormat)
   1843 					{
   1844 					case FORMAT_X8R8G8B8:
   1845 						c.z = As<Short4>(UnpackLow(c.x, c.y));
   1846 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
   1847 						c.y = c.z;
   1848 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1849 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1850 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1851 						break;
   1852 					case FORMAT_X8B8G8R8I_SNORM:
   1853 					case FORMAT_X8B8G8R8I:
   1854 					case FORMAT_X8B8G8R8:
   1855 					case FORMAT_X8L8V8U8:
   1856 					case FORMAT_SRGB8_X8:
   1857 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1858 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1859 						c.y = c.x;
   1860 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1861 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1862 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1863 						// Propagate sign bit
   1864 						if(state.textureFormat == FORMAT_X8B8G8R8I)
   1865 						{
   1866 							c.x >>= 8;
   1867 							c.y >>= 8;
   1868 							c.z >>= 8;
   1869 						}
   1870 						break;
   1871 					case FORMAT_X8B8G8R8UI:
   1872 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1873 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1874 						c.y = c.x;
   1875 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
   1876 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
   1877 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
   1878 						break;
   1879 					default:
   1880 						ASSERT(false);
   1881 					}
   1882 				}
   1883 				break;
   1884 			case 2:
   1885 				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
   1886 				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
   1887 				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
   1888 				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
   1889 
   1890 				switch(state.textureFormat)
   1891 				{
   1892 				case FORMAT_G8R8:
   1893 				case FORMAT_G8R8I_SNORM:
   1894 				case FORMAT_V8U8:
   1895 				case FORMAT_A8L8:
   1896 					c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8);
   1897 					c.x = (c.x & Short4(0x00FFu)) | (c.x << 8);
   1898 					break;
   1899 				case FORMAT_G8R8I:
   1900 					c.y = c.x >> 8;
   1901 					c.x = (c.x << 8) >> 8; // Propagate sign bit
   1902 					break;
   1903 				case FORMAT_G8R8UI:
   1904 					c.y = As<Short4>(As<UShort4>(c.x) >> 8);
   1905 					c.x &= Short4(0x00FFu);
   1906 					break;
   1907 				default:
   1908 					ASSERT(false);
   1909 				}
   1910 				break;
   1911 			case 1:
   1912 				{
   1913 					Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
   1914 					Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
   1915 					Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
   1916 					Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
   1917 					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   1918 
   1919 					switch(state.textureFormat)
   1920 					{
   1921 					case FORMAT_R8I:
   1922 					case FORMAT_R8UI:
   1923 						{
   1924 							Int zero(0);
   1925 							c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
   1926 							// Propagate sign bit
   1927 							if(state.textureFormat == FORMAT_R8I)
   1928 							{
   1929 								c.x = (c.x << 8) >> 8;
   1930 							}
   1931 						}
   1932 						break;
   1933 					default:
   1934 						c.x = Unpack(As<Byte4>(c0));
   1935 						break;
   1936 					}
   1937 				}
   1938 				break;
   1939 			default:
   1940 				ASSERT(false);
   1941 			}
   1942 		}
   1943 		else if(has16bitTextureComponents())
   1944 		{
   1945 			switch(textureComponentCount())
   1946 			{
   1947 			case 4:
   1948 				c.x = Pointer<Short4>(buffer[f0])[index[0]];
   1949 				c.y = Pointer<Short4>(buffer[f1])[index[1]];
   1950 				c.z = Pointer<Short4>(buffer[f2])[index[2]];
   1951 				c.w = Pointer<Short4>(buffer[f3])[index[3]];
   1952 				transpose4x4(c.x, c.y, c.z, c.w);
   1953 				break;
   1954 			case 2:
   1955 				c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
   1956 				c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
   1957 				c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
   1958 				c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
   1959 				c.y = c.x;
   1960 				c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
   1961 				c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
   1962 				break;
   1963 			case 1:
   1964 				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
   1965 				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
   1966 				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
   1967 				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
   1968 				break;
   1969 			default:
   1970 				ASSERT(false);
   1971 			}
   1972 		}
   1973 		else ASSERT(false);
   1974 
   1975 		return c;
   1976 	}
   1977 
   1978 	Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
   1979 	{
   1980 		Vector4s c;
   1981 
   1982 		UInt index[4];
   1983 		computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
   1984 
   1985 		if(hasYuvFormat())
   1986 		{
   1987 			// Generic YPbPr to RGB transformation
   1988 			// R = Y                               +           2 * (1 - Kr) * Pr
   1989 			// G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr
   1990 			// B = Y +           2 * (1 - Kb) * Pb
   1991 
   1992 			float Kb = 0.114f;
   1993 			float Kr = 0.299f;
   1994 			int studioSwing = 1;
   1995 
   1996 			switch(state.textureFormat)
   1997 			{
   1998 			case FORMAT_YV12_BT601:
   1999 				Kb = 0.114f;
   2000 				Kr = 0.299f;
   2001 				studioSwing = 1;
   2002 				break;
   2003 			case FORMAT_YV12_BT709:
   2004 				Kb = 0.0722f;
   2005 				Kr = 0.2126f;
   2006 				studioSwing = 1;
   2007 				break;
   2008 			case FORMAT_YV12_JFIF:
   2009 				Kb = 0.114f;
   2010 				Kr = 0.299f;
   2011 				studioSwing = 0;
   2012 				break;
   2013 			default:
   2014 				ASSERT(false);
   2015 			}
   2016 
   2017 			const float Kg = 1.0f - Kr - Kb;
   2018 
   2019 			const float Rr = 2 * (1 - Kr);
   2020 			const float Gb = -2 * Kb * (1 - Kb) / Kg;
   2021 			const float Gr = -2 * Kr * (1 - Kr) / Kg;
   2022 			const float Bb = 2 * (1 - Kb);
   2023 
   2024 			// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
   2025 			const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f;
   2026 			const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f;
   2027 			const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f;
   2028 
   2029 			const float Rv = Vv *  Rr;
   2030 			const float Gu = Uu *  Gb;
   2031 			const float Gv = Vv *  Gr;
   2032 			const float Bu = Uu *  Bb;
   2033 
   2034 			const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255;
   2035 			const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
   2036 			const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
   2037 
   2038 			Int c0 = Int(buffer[0][index[0]]);
   2039 			Int c1 = Int(buffer[0][index[1]]);
   2040 			Int c2 = Int(buffer[0][index[2]]);
   2041 			Int c3 = Int(buffer[0][index[3]]);
   2042 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   2043 			UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
   2044 
   2045 			computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
   2046 			c0 = Int(buffer[1][index[0]]);
   2047 			c1 = Int(buffer[1][index[1]]);
   2048 			c2 = Int(buffer[1][index[2]]);
   2049 			c3 = Int(buffer[1][index[3]]);
   2050 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   2051 			UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
   2052 
   2053 			c0 = Int(buffer[2][index[0]]);
   2054 			c1 = Int(buffer[2][index[1]]);
   2055 			c2 = Int(buffer[2][index[2]]);
   2056 			c3 = Int(buffer[2][index[3]]);
   2057 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   2058 			UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
   2059 
   2060 			const UShort4 yY = UShort4(iround(Yy * 0x4000));
   2061 			const UShort4 rV = UShort4(iround(Rv * 0x4000));
   2062 			const UShort4 gU = UShort4(iround(-Gu * 0x4000));
   2063 			const UShort4 gV = UShort4(iround(-Gv * 0x4000));
   2064 			const UShort4 bU = UShort4(iround(Bu * 0x4000));
   2065 
   2066 			const UShort4 r0 = UShort4(iround(-R0 * 0x4000));
   2067 			const UShort4 g0 = UShort4(iround(G0 * 0x4000));
   2068 			const UShort4 b0 = UShort4(iround(-B0 * 0x4000));
   2069 
   2070 			UShort4 y = MulHigh(Y, yY);
   2071 			UShort4 r = SubSat(y + MulHigh(V, rV), r0);
   2072 			UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV));
   2073 			UShort4 b = SubSat(y + MulHigh(U, bU), b0);
   2074 
   2075 			c.x = Min(r, UShort4(0x3FFF)) << 2;
   2076 			c.y = Min(g, UShort4(0x3FFF)) << 2;
   2077 			c.z = Min(b, UShort4(0x3FFF)) << 2;
   2078 		}
   2079 		else
   2080 		{
   2081 			return sampleTexel(index, buffer);
   2082 		}
   2083 
   2084 		return c;
   2085 	}
   2086 
   2087 	Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
   2088 	{
   2089 		Vector4f c;
   2090 
   2091 		UInt index[4];
   2092 		computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
   2093 
   2094 		if(hasFloatTexture() || has32bitIntegerTextureComponents())
   2095 		{
   2096 			int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
   2097 			int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
   2098 			int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
   2099 			int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
   2100 
   2101 			// Read texels
   2102 			switch(textureComponentCount())
   2103 			{
   2104 			case 4:
   2105 				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
   2106 				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
   2107 				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
   2108 				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
   2109 				transpose4x4(c.x, c.y, c.z, c.w);
   2110 				break;
   2111 			case 3:
   2112 				ASSERT(state.textureFormat == FORMAT_X32B32G32R32F);
   2113 				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
   2114 				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
   2115 				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
   2116 				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
   2117 				transpose4x3(c.x, c.y, c.z, c.w);
   2118 				c.w = Float4(1.0f);
   2119 				break;
   2120 			case 2:
   2121 				// FIXME: Optimal shuffling?
   2122 				c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
   2123 				c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
   2124 				c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
   2125 				c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
   2126 				c.y = c.x;
   2127 				c.x = Float4(c.x.xz, c.z.xz);
   2128 				c.y = Float4(c.y.yw, c.z.yw);
   2129 				break;
   2130 			case 1:
   2131 				// FIXME: Optimal shuffling?
   2132 				c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
   2133 				c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
   2134 				c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
   2135 				c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
   2136 
   2137 				if(state.textureFormat == FORMAT_D32FS8_SHADOW && state.textureFilter != FILTER_GATHER)
   2138 				{
   2139 					Float4 d = Min(Max(z, Float4(0.0f)), Float4(1.0f));
   2140 
   2141 					c.x = As<Float4>(As<Int4>(CmpNLT(c.x, d)) & As<Int4>(Float4(1.0f)));   // FIXME: Only less-equal?
   2142 				}
   2143 				break;
   2144 			default:
   2145 				ASSERT(false);
   2146 			}
   2147 		}
   2148 		else
   2149 		{
   2150 			ASSERT(!hasYuvFormat());
   2151 
   2152 			Vector4s cs = sampleTexel(index, buffer);
   2153 
   2154 			bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat);
   2155 			int componentCount = textureComponentCount();
   2156 			for(int n = 0; n < componentCount; ++n)
   2157 			{
   2158 				if(hasUnsignedTextureComponent(n))
   2159 				{
   2160 					if(isInteger)
   2161 					{
   2162 						c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
   2163 					}
   2164 					else
   2165 					{
   2166 						c[n] = Float4(As<UShort4>(cs[n]));
   2167 					}
   2168 				}
   2169 				else
   2170 				{
   2171 					if(isInteger)
   2172 					{
   2173 						c[n] = As<Float4>(Int4(cs[n]));
   2174 					}
   2175 					else
   2176 					{
   2177 						c[n] = Float4(cs[n]);
   2178 					}
   2179 				}
   2180 			}
   2181 		}
   2182 
   2183 		return c;
   2184 	}
   2185 
   2186 	void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
   2187 	{
   2188 		if(state.mipmapFilter < MIPMAP_POINT)
   2189 		{
   2190 			mipmap = texture + OFFSET(Texture,mipmap[0]);
   2191 		}
   2192 		else
   2193 		{
   2194 			Int ilod;
   2195 
   2196 			if(state.mipmapFilter == MIPMAP_POINT)
   2197 			{
   2198 				ilod = RoundInt(lod);
   2199 			}
   2200 			else   // Linear
   2201 			{
   2202 				ilod = Int(lod);
   2203 			}
   2204 
   2205 			mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
   2206 		}
   2207 
   2208 		if(state.textureType != TEXTURE_CUBE)
   2209 		{
   2210 			buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0]));
   2211 
   2212 			if(hasYuvFormat())
   2213 			{
   2214 				buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1]));
   2215 				buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2]));
   2216 			}
   2217 		}
   2218 		else
   2219 		{
   2220 			for(int i = 0; i < 4; i++)
   2221 			{
   2222 				buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
   2223 			}
   2224 		}
   2225 	}
   2226 
   2227 	Int4 SamplerCore::computeFilterOffset(Float &lod)
   2228 	{
   2229 		Int4 filtering((state.textureFilter == FILTER_POINT) ? 0 : 1);
   2230 		if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
   2231 		{
   2232 			filtering &= CmpNLE(Float4(lod), Float4(0.0f));
   2233 		}
   2234 		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
   2235 		{
   2236 			filtering &= CmpLE(Float4(lod), Float4(0.0f));
   2237 		}
   2238 
   2239 		return filtering;
   2240 	}
   2241 
   2242 	Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte>& mipmap)
   2243 	{
   2244 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
   2245 		{
   2246 			return Short4();   // Unused
   2247 		}
   2248 		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY)
   2249 		{
   2250 			return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1));
   2251 		}
   2252 		else if(addressingMode == ADDRESSING_CLAMP)
   2253 		{
   2254 			Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f));
   2255 
   2256 			return Short4(Int4(clamp * Float4(1 << 16)));
   2257 		}
   2258 		else if(addressingMode == ADDRESSING_MIRROR)
   2259 		{
   2260 			Int4 convert = Int4(uw * Float4(1 << 16));
   2261 			Int4 mirror = (convert << 15) >> 31;
   2262 
   2263 			convert ^= mirror;
   2264 
   2265 			return Short4(convert);
   2266 		}
   2267 		else if(addressingMode == ADDRESSING_MIRRORONCE)
   2268 		{
   2269 			// Absolute value
   2270 			Int4 convert = Int4(Abs(uw * Float4(1 << 16)));
   2271 
   2272 			// Clamp
   2273 			convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
   2274 			convert = As<Int4>(Pack(convert, convert));
   2275 
   2276 			return As<Short4>(Int2(convert)) + Short4(0x8000u);
   2277 		}
   2278 		else   // Wrap (or border)
   2279 		{
   2280 			return Short4(Int4(uw * Float4(1 << 16)));
   2281 		}
   2282 	}
   2283 
   2284 	void SamplerCore::address(Float4 &uvw, Int4& xyz0, Int4& xyz1, Float4& f, Pointer<Byte>& mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
   2285 	{
   2286 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
   2287 		{
   2288 			return; // Unused
   2289 		}
   2290 
   2291 		Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16));
   2292 		Int4 maxXYZ = dim - Int4(1);
   2293 
   2294 		if(function == Fetch)
   2295 		{
   2296 			xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
   2297 		}
   2298 		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers
   2299 		{
   2300 			xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
   2301 		}
   2302 		else
   2303 		{
   2304 			const int halfBits = 0x3effffff; // Value just under 0.5f
   2305 			const int oneBits  = 0x3f7fffff; // Value just under 1.0f
   2306 			const int twoBits  = 0x3fffffff; // Value just under 2.0f
   2307 
   2308 			Float4 coord = Float4(dim);
   2309 			switch(addressingMode)
   2310 			{
   2311 			case ADDRESSING_CLAMP:
   2312 				{
   2313 					Float4 one = As<Float4>(Int4(oneBits));
   2314 					coord *= Min(Max(uvw, Float4(0.0f)), one);
   2315 				}
   2316 				break;
   2317 			case ADDRESSING_MIRROR:
   2318 				{
   2319 					Float4 half = As<Float4>(Int4(halfBits));
   2320 					Float4 one = As<Float4>(Int4(oneBits));
   2321 					Float4 two = As<Float4>(Int4(twoBits));
   2322 					coord *= one - Abs(two * Frac(uvw * half) - one);
   2323 				}
   2324 				break;
   2325 			case ADDRESSING_MIRRORONCE:
   2326 				{
   2327 					Float4 half = As<Float4>(Int4(halfBits));
   2328 					Float4 one = As<Float4>(Int4(oneBits));
   2329 					Float4 two = As<Float4>(Int4(twoBits));
   2330 					coord *= one - Abs(two * Frac(Min(Max(uvw, -one), two) * half) - one);
   2331 				}
   2332 				break;
   2333 			default:   // Wrap (or border)
   2334 				coord *= Frac(uvw);
   2335 				break;
   2336 			}
   2337 
   2338 			xyz0 = Int4(coord);
   2339 
   2340 			if(function.option == Offset)
   2341 			{
   2342 				xyz0 += As<Int4>(texOffset);
   2343 				switch(addressingMode)
   2344 				{
   2345 				case ADDRESSING_MIRROR:
   2346 				case ADDRESSING_MIRRORONCE:
   2347 				case ADDRESSING_BORDER:
   2348 					// FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
   2349 				case ADDRESSING_CLAMP:
   2350 					xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
   2351 					break;
   2352 				default:   // Wrap
   2353 					xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
   2354 					break;
   2355 				}
   2356 			}
   2357 
   2358 			if(state.textureFilter != FILTER_POINT) // Compute 2nd coordinate, if needed
   2359 			{
   2360 				bool gather = state.textureFilter == FILTER_GATHER;
   2361 
   2362 				xyz1 = xyz0 + filter; // Increment
   2363 
   2364 				if(!gather)
   2365 				{
   2366 					Float4 frac = Frac(coord);
   2367 					f = Abs(frac - Float4(0.5f));
   2368 					xyz1 -= CmpLT(frac, Float4(0.5f)) & (filter + filter); // Decrement xyz if necessary
   2369 				}
   2370 
   2371 				switch(addressingMode)
   2372 				{
   2373 				case ADDRESSING_MIRROR:
   2374 				case ADDRESSING_MIRRORONCE:
   2375 				case ADDRESSING_BORDER:
   2376 					// FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE and ADDRESSING_BORDER. Fall through to Clamp.
   2377 				case ADDRESSING_CLAMP:
   2378 					xyz1 = gather ? Min(xyz1, maxXYZ) : Min(Max(xyz1, Int4(0)), maxXYZ);
   2379 					break;
   2380 				default:   // Wrap
   2381 					{
   2382 						// The coordinates overflow or underflow by at most 1
   2383 						Int4 over = CmpNLT(xyz1, dim);
   2384 						xyz1 = (over & Int4(0)) | (~over & xyz1); // xyz >= dim ? 0 : xyz
   2385 						if(!gather)
   2386 						{
   2387 							Int4 under = CmpLT(xyz1, Int4(0));
   2388 							xyz1 = (under & maxXYZ) | (~under & xyz1); // xyz < 0 ? dim - 1 : xyz
   2389 						}
   2390 					}
   2391 					break;
   2392 				}
   2393 			}
   2394 		}
   2395 	}
   2396 
   2397 	void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf)
   2398 	{
   2399 		cs = RoundShort4(cf * Float4(0x1000));
   2400 	}
   2401 
   2402 	void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf)
   2403 	{
   2404 		convertFixed12(cs.x, cf.x);
   2405 		convertFixed12(cs.y, cf.y);
   2406 		convertFixed12(cs.z, cf.z);
   2407 		convertFixed12(cs.w, cf.w);
   2408 	}
   2409 
   2410 	void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs)
   2411 	{
   2412 		cf = Float4(cs) * Float4(1.0f / 0x0FFE);
   2413 	}
   2414 
   2415 //	void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs)
   2416 //	{
   2417 //		convertSigned12(cf.x, cs.x);
   2418 //		convertSigned12(cf.y, cs.y);
   2419 //		convertSigned12(cf.z, cs.z);
   2420 //		convertSigned12(cf.w, cs.w);
   2421 //	}
   2422 
   2423 	void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs)
   2424 	{
   2425 		cf = Float4(cs) * Float4(1.0f / 0x7FFF);
   2426 	}
   2427 
   2428 	void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs)
   2429 	{
   2430 		cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
   2431 	}
   2432 
   2433 	void SamplerCore::sRGBtoLinear16_8_12(Short4 &c)
   2434 	{
   2435 		c = As<UShort4>(c) >> 8;
   2436 
   2437 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_12));
   2438 
   2439 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2440 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2441 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2442 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2443 	}
   2444 
   2445 	void SamplerCore::sRGBtoLinear16_6_12(Short4 &c)
   2446 	{
   2447 		c = As<UShort4>(c) >> 10;
   2448 
   2449 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_12));
   2450 
   2451 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2452 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2453 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2454 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2455 	}
   2456 
   2457 	void SamplerCore::sRGBtoLinear16_5_12(Short4 &c)
   2458 	{
   2459 		c = As<UShort4>(c) >> 11;
   2460 
   2461 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_12));
   2462 
   2463 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2464 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2465 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2466 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2467 	}
   2468 
   2469 	bool SamplerCore::hasFloatTexture() const
   2470 	{
   2471 		return Surface::isFloatFormat(state.textureFormat);
   2472 	}
   2473 
   2474 	bool SamplerCore::hasUnnormalizedIntegerTexture() const
   2475 	{
   2476 		return Surface::isNonNormalizedInteger(state.textureFormat);
   2477 	}
   2478 
   2479 	bool SamplerCore::hasUnsignedTextureComponent(int component) const
   2480 	{
   2481 		return Surface::isUnsignedComponent(state.textureFormat, component);
   2482 	}
   2483 
   2484 	int SamplerCore::textureComponentCount() const
   2485 	{
   2486 		return Surface::componentCount(state.textureFormat);
   2487 	}
   2488 
   2489 	bool SamplerCore::hasThirdCoordinate() const
   2490 	{
   2491 		return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY);
   2492 	}
   2493 
   2494 	bool SamplerCore::has16bitTextureFormat() const
   2495 	{
   2496 		switch(state.textureFormat)
   2497 		{
   2498 		case FORMAT_R5G6B5:
   2499 			return true;
   2500 		case FORMAT_R8I_SNORM:
   2501 		case FORMAT_G8R8I_SNORM:
   2502 		case FORMAT_X8B8G8R8I_SNORM:
   2503 		case FORMAT_A8B8G8R8I_SNORM:
   2504 		case FORMAT_R8I:
   2505 		case FORMAT_R8UI:
   2506 		case FORMAT_G8R8I:
   2507 		case FORMAT_G8R8UI:
   2508 		case FORMAT_X8B8G8R8I:
   2509 		case FORMAT_X8B8G8R8UI:
   2510 		case FORMAT_A8B8G8R8I:
   2511 		case FORMAT_A8B8G8R8UI:
   2512 		case FORMAT_R32I:
   2513 		case FORMAT_R32UI:
   2514 		case FORMAT_G32R32I:
   2515 		case FORMAT_G32R32UI:
   2516 		case FORMAT_X32B32G32R32I:
   2517 		case FORMAT_X32B32G32R32UI:
   2518 		case FORMAT_A32B32G32R32I:
   2519 		case FORMAT_A32B32G32R32UI:
   2520 		case FORMAT_G8R8:
   2521 		case FORMAT_X8R8G8B8:
   2522 		case FORMAT_X8B8G8R8:
   2523 		case FORMAT_A8R8G8B8:
   2524 		case FORMAT_A8B8G8R8:
   2525 		case FORMAT_SRGB8_X8:
   2526 		case FORMAT_SRGB8_A8:
   2527 		case FORMAT_V8U8:
   2528 		case FORMAT_Q8W8V8U8:
   2529 		case FORMAT_X8L8V8U8:
   2530 		case FORMAT_R32F:
   2531 		case FORMAT_G32R32F:
   2532 		case FORMAT_X32B32G32R32F:
   2533 		case FORMAT_A32B32G32R32F:
   2534 		case FORMAT_A8:
   2535 		case FORMAT_R8:
   2536 		case FORMAT_L8:
   2537 		case FORMAT_A8L8:
   2538 		case FORMAT_D32F:
   2539 		case FORMAT_D32F_LOCKABLE:
   2540 		case FORMAT_D32FS8_TEXTURE:
   2541 		case FORMAT_D32FS8_SHADOW:
   2542 		case FORMAT_L16:
   2543 		case FORMAT_G16R16:
   2544 		case FORMAT_A16B16G16R16:
   2545 		case FORMAT_V16U16:
   2546 		case FORMAT_A16W16V16U16:
   2547 		case FORMAT_Q16W16V16U16:
   2548 		case FORMAT_R16I:
   2549 		case FORMAT_R16UI:
   2550 		case FORMAT_G16R16I:
   2551 		case FORMAT_G16R16UI:
   2552 		case FORMAT_X16B16G16R16I:
   2553 		case FORMAT_X16B16G16R16UI:
   2554 		case FORMAT_A16B16G16R16I:
   2555 		case FORMAT_A16B16G16R16UI:
   2556 		case FORMAT_YV12_BT601:
   2557 		case FORMAT_YV12_BT709:
   2558 		case FORMAT_YV12_JFIF:
   2559 			return false;
   2560 		default:
   2561 			ASSERT(false);
   2562 		}
   2563 
   2564 		return false;
   2565 	}
   2566 
   2567 	bool SamplerCore::has8bitTextureComponents() const
   2568 	{
   2569 		switch(state.textureFormat)
   2570 		{
   2571 		case FORMAT_G8R8:
   2572 		case FORMAT_X8R8G8B8:
   2573 		case FORMAT_X8B8G8R8:
   2574 		case FORMAT_A8R8G8B8:
   2575 		case FORMAT_A8B8G8R8:
   2576 		case FORMAT_SRGB8_X8:
   2577 		case FORMAT_SRGB8_A8:
   2578 		case FORMAT_V8U8:
   2579 		case FORMAT_Q8W8V8U8:
   2580 		case FORMAT_X8L8V8U8:
   2581 		case FORMAT_A8:
   2582 		case FORMAT_R8:
   2583 		case FORMAT_L8:
   2584 		case FORMAT_A8L8:
   2585 		case FORMAT_R8I_SNORM:
   2586 		case FORMAT_G8R8I_SNORM:
   2587 		case FORMAT_X8B8G8R8I_SNORM:
   2588 		case FORMAT_A8B8G8R8I_SNORM:
   2589 		case FORMAT_R8I:
   2590 		case FORMAT_R8UI:
   2591 		case FORMAT_G8R8I:
   2592 		case FORMAT_G8R8UI:
   2593 		case FORMAT_X8B8G8R8I:
   2594 		case FORMAT_X8B8G8R8UI:
   2595 		case FORMAT_A8B8G8R8I:
   2596 		case FORMAT_A8B8G8R8UI:
   2597 			return true;
   2598 		case FORMAT_R5G6B5:
   2599 		case FORMAT_R32F:
   2600 		case FORMAT_G32R32F:
   2601 		case FORMAT_X32B32G32R32F:
   2602 		case FORMAT_A32B32G32R32F:
   2603 		case FORMAT_D32F:
   2604 		case FORMAT_D32F_LOCKABLE:
   2605 		case FORMAT_D32FS8_TEXTURE:
   2606 		case FORMAT_D32FS8_SHADOW:
   2607 		case FORMAT_L16:
   2608 		case FORMAT_G16R16:
   2609 		case FORMAT_A16B16G16R16:
   2610 		case FORMAT_V16U16:
   2611 		case FORMAT_A16W16V16U16:
   2612 		case FORMAT_Q16W16V16U16:
   2613 		case FORMAT_R32I:
   2614 		case FORMAT_R32UI:
   2615 		case FORMAT_G32R32I:
   2616 		case FORMAT_G32R32UI:
   2617 		case FORMAT_X32B32G32R32I:
   2618 		case FORMAT_X32B32G32R32UI:
   2619 		case FORMAT_A32B32G32R32I:
   2620 		case FORMAT_A32B32G32R32UI:
   2621 		case FORMAT_R16I:
   2622 		case FORMAT_R16UI:
   2623 		case FORMAT_G16R16I:
   2624 		case FORMAT_G16R16UI:
   2625 		case FORMAT_X16B16G16R16I:
   2626 		case FORMAT_X16B16G16R16UI:
   2627 		case FORMAT_A16B16G16R16I:
   2628 		case FORMAT_A16B16G16R16UI:
   2629 		case FORMAT_YV12_BT601:
   2630 		case FORMAT_YV12_BT709:
   2631 		case FORMAT_YV12_JFIF:
   2632 			return false;
   2633 		default:
   2634 			ASSERT(false);
   2635 		}
   2636 
   2637 		return false;
   2638 	}
   2639 
   2640 	bool SamplerCore::has16bitTextureComponents() const
   2641 	{
   2642 		switch(state.textureFormat)
   2643 		{
   2644 		case FORMAT_R5G6B5:
   2645 		case FORMAT_R8I_SNORM:
   2646 		case FORMAT_G8R8I_SNORM:
   2647 		case FORMAT_X8B8G8R8I_SNORM:
   2648 		case FORMAT_A8B8G8R8I_SNORM:
   2649 		case FORMAT_R8I:
   2650 		case FORMAT_R8UI:
   2651 		case FORMAT_G8R8I:
   2652 		case FORMAT_G8R8UI:
   2653 		case FORMAT_X8B8G8R8I:
   2654 		case FORMAT_X8B8G8R8UI:
   2655 		case FORMAT_A8B8G8R8I:
   2656 		case FORMAT_A8B8G8R8UI:
   2657 		case FORMAT_R32I:
   2658 		case FORMAT_R32UI:
   2659 		case FORMAT_G32R32I:
   2660 		case FORMAT_G32R32UI:
   2661 		case FORMAT_X32B32G32R32I:
   2662 		case FORMAT_X32B32G32R32UI:
   2663 		case FORMAT_A32B32G32R32I:
   2664 		case FORMAT_A32B32G32R32UI:
   2665 		case FORMAT_G8R8:
   2666 		case FORMAT_X8R8G8B8:
   2667 		case FORMAT_X8B8G8R8:
   2668 		case FORMAT_A8R8G8B8:
   2669 		case FORMAT_A8B8G8R8:
   2670 		case FORMAT_SRGB8_X8:
   2671 		case FORMAT_SRGB8_A8:
   2672 		case FORMAT_V8U8:
   2673 		case FORMAT_Q8W8V8U8:
   2674 		case FORMAT_X8L8V8U8:
   2675 		case FORMAT_R32F:
   2676 		case FORMAT_G32R32F:
   2677 		case FORMAT_X32B32G32R32F:
   2678 		case FORMAT_A32B32G32R32F:
   2679 		case FORMAT_A8:
   2680 		case FORMAT_R8:
   2681 		case FORMAT_L8:
   2682 		case FORMAT_A8L8:
   2683 		case FORMAT_D32F:
   2684 		case FORMAT_D32F_LOCKABLE:
   2685 		case FORMAT_D32FS8_TEXTURE:
   2686 		case FORMAT_D32FS8_SHADOW:
   2687 		case FORMAT_YV12_BT601:
   2688 		case FORMAT_YV12_BT709:
   2689 		case FORMAT_YV12_JFIF:
   2690 			return false;
   2691 		case FORMAT_L16:
   2692 		case FORMAT_G16R16:
   2693 		case FORMAT_A16B16G16R16:
   2694 		case FORMAT_R16I:
   2695 		case FORMAT_R16UI:
   2696 		case FORMAT_G16R16I:
   2697 		case FORMAT_G16R16UI:
   2698 		case FORMAT_X16B16G16R16I:
   2699 		case FORMAT_X16B16G16R16UI:
   2700 		case FORMAT_A16B16G16R16I:
   2701 		case FORMAT_A16B16G16R16UI:
   2702 		case FORMAT_V16U16:
   2703 		case FORMAT_A16W16V16U16:
   2704 		case FORMAT_Q16W16V16U16:
   2705 			return true;
   2706 		default:
   2707 			ASSERT(false);
   2708 		}
   2709 
   2710 		return false;
   2711 	}
   2712 
   2713 	bool SamplerCore::has32bitIntegerTextureComponents() const
   2714 	{
   2715 		switch(state.textureFormat)
   2716 		{
   2717 		case FORMAT_R5G6B5:
   2718 		case FORMAT_R8I_SNORM:
   2719 		case FORMAT_G8R8I_SNORM:
   2720 		case FORMAT_X8B8G8R8I_SNORM:
   2721 		case FORMAT_A8B8G8R8I_SNORM:
   2722 		case FORMAT_R8I:
   2723 		case FORMAT_R8UI:
   2724 		case FORMAT_G8R8I:
   2725 		case FORMAT_G8R8UI:
   2726 		case FORMAT_X8B8G8R8I:
   2727 		case FORMAT_X8B8G8R8UI:
   2728 		case FORMAT_A8B8G8R8I:
   2729 		case FORMAT_A8B8G8R8UI:
   2730 		case FORMAT_G8R8:
   2731 		case FORMAT_X8R8G8B8:
   2732 		case FORMAT_X8B8G8R8:
   2733 		case FORMAT_A8R8G8B8:
   2734 		case FORMAT_A8B8G8R8:
   2735 		case FORMAT_SRGB8_X8:
   2736 		case FORMAT_SRGB8_A8:
   2737 		case FORMAT_V8U8:
   2738 		case FORMAT_Q8W8V8U8:
   2739 		case FORMAT_X8L8V8U8:
   2740 		case FORMAT_L16:
   2741 		case FORMAT_G16R16:
   2742 		case FORMAT_A16B16G16R16:
   2743 		case FORMAT_R16I:
   2744 		case FORMAT_R16UI:
   2745 		case FORMAT_G16R16I:
   2746 		case FORMAT_G16R16UI:
   2747 		case FORMAT_X16B16G16R16I:
   2748 		case FORMAT_X16B16G16R16UI:
   2749 		case FORMAT_A16B16G16R16I:
   2750 		case FORMAT_A16B16G16R16UI:
   2751 		case FORMAT_V16U16:
   2752 		case FORMAT_A16W16V16U16:
   2753 		case FORMAT_Q16W16V16U16:
   2754 		case FORMAT_R32F:
   2755 		case FORMAT_G32R32F:
   2756 		case FORMAT_X32B32G32R32F:
   2757 		case FORMAT_A32B32G32R32F:
   2758 		case FORMAT_A8:
   2759 		case FORMAT_R8:
   2760 		case FORMAT_L8:
   2761 		case FORMAT_A8L8:
   2762 		case FORMAT_D32F:
   2763 		case FORMAT_D32F_LOCKABLE:
   2764 		case FORMAT_D32FS8_TEXTURE:
   2765 		case FORMAT_D32FS8_SHADOW:
   2766 		case FORMAT_YV12_BT601:
   2767 		case FORMAT_YV12_BT709:
   2768 		case FORMAT_YV12_JFIF:
   2769 			return false;
   2770 		case FORMAT_R32I:
   2771 		case FORMAT_R32UI:
   2772 		case FORMAT_G32R32I:
   2773 		case FORMAT_G32R32UI:
   2774 		case FORMAT_X32B32G32R32I:
   2775 		case FORMAT_X32B32G32R32UI:
   2776 		case FORMAT_A32B32G32R32I:
   2777 		case FORMAT_A32B32G32R32UI:
   2778 			return true;
   2779 		default:
   2780 			ASSERT(false);
   2781 		}
   2782 
   2783 		return false;
   2784 	}
   2785 
   2786 	bool SamplerCore::hasYuvFormat() const
   2787 	{
   2788 		switch(state.textureFormat)
   2789 		{
   2790 		case FORMAT_YV12_BT601:
   2791 		case FORMAT_YV12_BT709:
   2792 		case FORMAT_YV12_JFIF:
   2793 			return true;
   2794 		case FORMAT_R5G6B5:
   2795 		case FORMAT_R8I_SNORM:
   2796 		case FORMAT_G8R8I_SNORM:
   2797 		case FORMAT_X8B8G8R8I_SNORM:
   2798 		case FORMAT_A8B8G8R8I_SNORM:
   2799 		case FORMAT_R8I:
   2800 		case FORMAT_R8UI:
   2801 		case FORMAT_G8R8I:
   2802 		case FORMAT_G8R8UI:
   2803 		case FORMAT_X8B8G8R8I:
   2804 		case FORMAT_X8B8G8R8UI:
   2805 		case FORMAT_A8B8G8R8I:
   2806 		case FORMAT_A8B8G8R8UI:
   2807 		case FORMAT_R32I:
   2808 		case FORMAT_R32UI:
   2809 		case FORMAT_G32R32I:
   2810 		case FORMAT_G32R32UI:
   2811 		case FORMAT_X32B32G32R32I:
   2812 		case FORMAT_X32B32G32R32UI:
   2813 		case FORMAT_A32B32G32R32I:
   2814 		case FORMAT_A32B32G32R32UI:
   2815 		case FORMAT_G8R8:
   2816 		case FORMAT_X8R8G8B8:
   2817 		case FORMAT_X8B8G8R8:
   2818 		case FORMAT_A8R8G8B8:
   2819 		case FORMAT_A8B8G8R8:
   2820 		case FORMAT_SRGB8_X8:
   2821 		case FORMAT_SRGB8_A8:
   2822 		case FORMAT_V8U8:
   2823 		case FORMAT_Q8W8V8U8:
   2824 		case FORMAT_X8L8V8U8:
   2825 		case FORMAT_R32F:
   2826 		case FORMAT_G32R32F:
   2827 		case FORMAT_X32B32G32R32F:
   2828 		case FORMAT_A32B32G32R32F:
   2829 		case FORMAT_A8:
   2830 		case FORMAT_R8:
   2831 		case FORMAT_L8:
   2832 		case FORMAT_A8L8:
   2833 		case FORMAT_D32F:
   2834 		case FORMAT_D32F_LOCKABLE:
   2835 		case FORMAT_D32FS8_TEXTURE:
   2836 		case FORMAT_D32FS8_SHADOW:
   2837 		case FORMAT_L16:
   2838 		case FORMAT_G16R16:
   2839 		case FORMAT_A16B16G16R16:
   2840 		case FORMAT_R16I:
   2841 		case FORMAT_R16UI:
   2842 		case FORMAT_G16R16I:
   2843 		case FORMAT_G16R16UI:
   2844 		case FORMAT_X16B16G16R16I:
   2845 		case FORMAT_X16B16G16R16UI:
   2846 		case FORMAT_A16B16G16R16I:
   2847 		case FORMAT_A16B16G16R16UI:
   2848 		case FORMAT_V16U16:
   2849 		case FORMAT_A16W16V16U16:
   2850 		case FORMAT_Q16W16V16U16:
   2851 			return false;
   2852 		default:
   2853 			ASSERT(false);
   2854 		}
   2855 
   2856 		return false;
   2857 	}
   2858 
   2859 	bool SamplerCore::isRGBComponent(int component) const
   2860 	{
   2861 		switch(state.textureFormat)
   2862 		{
   2863 		case FORMAT_R5G6B5:         return component < 3;
   2864 		case FORMAT_R8I_SNORM:      return component < 1;
   2865 		case FORMAT_G8R8I_SNORM:    return component < 2;
   2866 		case FORMAT_X8B8G8R8I_SNORM: return component < 3;
   2867 		case FORMAT_A8B8G8R8I_SNORM: return component < 3;
   2868 		case FORMAT_R8I:            return component < 1;
   2869 		case FORMAT_R8UI:           return component < 1;
   2870 		case FORMAT_G8R8I:          return component < 2;
   2871 		case FORMAT_G8R8UI:         return component < 2;
   2872 		case FORMAT_X8B8G8R8I:      return component < 3;
   2873 		case FORMAT_X8B8G8R8UI:     return component < 3;
   2874 		case FORMAT_A8B8G8R8I:      return component < 3;
   2875 		case FORMAT_A8B8G8R8UI:     return component < 3;
   2876 		case FORMAT_R32I:           return component < 1;
   2877 		case FORMAT_R32UI:          return component < 1;
   2878 		case FORMAT_G32R32I:        return component < 2;
   2879 		case FORMAT_G32R32UI:       return component < 2;
   2880 		case FORMAT_X32B32G32R32I:  return component < 3;
   2881 		case FORMAT_X32B32G32R32UI: return component < 3;
   2882 		case FORMAT_A32B32G32R32I:  return component < 3;
   2883 		case FORMAT_A32B32G32R32UI: return component < 3;
   2884 		case FORMAT_G8R8:           return component < 2;
   2885 		case FORMAT_X8R8G8B8:       return component < 3;
   2886 		case FORMAT_X8B8G8R8:       return component < 3;
   2887 		case FORMAT_A8R8G8B8:       return component < 3;
   2888 		case FORMAT_A8B8G8R8:       return component < 3;
   2889 		case FORMAT_SRGB8_X8:       return component < 3;
   2890 		case FORMAT_SRGB8_A8:       return component < 3;
   2891 		case FORMAT_V8U8:           return false;
   2892 		case FORMAT_Q8W8V8U8:       return false;
   2893 		case FORMAT_X8L8V8U8:       return false;
   2894 		case FORMAT_R32F:           return component < 1;
   2895 		case FORMAT_G32R32F:        return component < 2;
   2896 		case FORMAT_X32B32G32R32F:  return component < 3;
   2897 		case FORMAT_A32B32G32R32F:  return component < 3;
   2898 		case FORMAT_A8:             return false;
   2899 		case FORMAT_R8:             return component < 1;
   2900 		case FORMAT_L8:             return component < 1;
   2901 		case FORMAT_A8L8:           return component < 1;
   2902 		case FORMAT_D32F:           return false;
   2903 		case FORMAT_D32F_LOCKABLE:  return false;
   2904 		case FORMAT_D32FS8_TEXTURE: return false;
   2905 		case FORMAT_D32FS8_SHADOW:  return false;
   2906 		case FORMAT_L16:            return component < 1;
   2907 		case FORMAT_G16R16:         return component < 2;
   2908 		case FORMAT_A16B16G16R16:   return component < 3;
   2909 		case FORMAT_R16I:           return component < 1;
   2910 		case FORMAT_R16UI:          return component < 1;
   2911 		case FORMAT_G16R16I:        return component < 2;
   2912 		case FORMAT_G16R16UI:       return component < 2;
   2913 		case FORMAT_X16B16G16R16I:  return component < 3;
   2914 		case FORMAT_X16B16G16R16UI: return component < 3;
   2915 		case FORMAT_A16B16G16R16I:  return component < 3;
   2916 		case FORMAT_A16B16G16R16UI: return component < 3;
   2917 		case FORMAT_V16U16:         return false;
   2918 		case FORMAT_A16W16V16U16:   return false;
   2919 		case FORMAT_Q16W16V16U16:   return false;
   2920 		case FORMAT_YV12_BT601:     return component < 3;
   2921 		case FORMAT_YV12_BT709:     return component < 3;
   2922 		case FORMAT_YV12_JFIF:      return component < 3;
   2923 		default:
   2924 			ASSERT(false);
   2925 		}
   2926 
   2927 		return false;
   2928 	}
   2929 }
   2930