Home | History | Annotate | Download | only in Shader
      1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved.
      2 //
      3 // Licensed under the Apache License, Version 2.0 (the "License");
      4 // you may not use this file except in compliance with the License.
      5 // You may obtain a copy of the License at
      6 //
      7 //    http://www.apache.org/licenses/LICENSE-2.0
      8 //
      9 // Unless required by applicable law or agreed to in writing, software
     10 // distributed under the License is distributed on an "AS IS" BASIS,
     11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     12 // See the License for the specific language governing permissions and
     13 // limitations under the License.
     14 
     15 #include "SamplerCore.hpp"
     16 
     17 #include "Constants.hpp"
     18 #include "Common/Debug.hpp"
     19 
     20 namespace
     21 {
     22 	void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c)
     23 	{
     24 		switch(swizzle)
     25 		{
     26 		case sw::SWIZZLE_RED:	s = c.x; break;
     27 		case sw::SWIZZLE_GREEN: s = c.y; break;
     28 		case sw::SWIZZLE_BLUE:  s = c.z; break;
     29 		case sw::SWIZZLE_ALPHA: s = c.w; break;
     30 		case sw::SWIZZLE_ZERO:  s = sw::Short4(0x0000); break;
     31 		case sw::SWIZZLE_ONE:   s = sw::Short4(0x1000); break;
     32 		default: ASSERT(false);
     33 		}
     34 	}
     35 
     36 	void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c)
     37 	{
     38 		switch(swizzle)
     39 		{
     40 		case sw::SWIZZLE_RED:	f = c.x; break;
     41 		case sw::SWIZZLE_GREEN: f = c.y; break;
     42 		case sw::SWIZZLE_BLUE:  f = c.z; break;
     43 		case sw::SWIZZLE_ALPHA: f = c.w; break;
     44 		case sw::SWIZZLE_ZERO:  f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break;
     45 		case sw::SWIZZLE_ONE:   f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break;
     46 		default: ASSERT(false);
     47 		}
     48 	}
     49 }
     50 
     51 namespace sw
     52 {
     53 	extern bool colorsDefaultToZero;
     54 
     55 	SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state)
     56 	{
     57 	}
     58 
     59 	Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy)
     60 	{
     61 		return sampleTexture(texture, u, v, w, q, q, dsx, dsy, (dsx), Implicit, true);
     62 	}
     63 
     64 	Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12)
     65 	{
     66 		Vector4s c;
     67 
     68 		#if PERF_PROFILE
     69 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
     70 
     71 			if(state.compressedFormat)
     72 			{
     73 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
     74 			}
     75 		#endif
     76 
     77 		if(state.textureType == TEXTURE_NULL)
     78 		{
     79 			c.x = Short4(0x0000);
     80 			c.y = Short4(0x0000);
     81 			c.z = Short4(0x0000);
     82 
     83 			if(fixed12)   // FIXME: Convert to fixed12 at higher level, when required
     84 			{
     85 				c.w = Short4(0x1000);
     86 			}
     87 			else
     88 			{
     89 				c.w = Short4(0xFFFFu);   // FIXME
     90 			}
     91 		}
     92 		else
     93 		{
     94 			Float4 uuuu = u;
     95 			Float4 vvvv = v;
     96 			Float4 wwww = w;
     97 			Float4 qqqq = q;
     98 
     99 			Int face[4];
    100 			Float lod;
    101 			Float anisotropy;
    102 			Float4 uDelta;
    103 			Float4 vDelta;
    104 
    105 			if(state.textureType != TEXTURE_3D)
    106 			{
    107 				if(state.textureType != TEXTURE_CUBE)
    108 				{
    109 					computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function);
    110 				}
    111 				else
    112 				{
    113 					Float4 M;
    114 					cubeFace(face, uuuu, vvvv, u, v, w, M);
    115 					computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function);
    116 				}
    117 			}
    118 			else
    119 			{
    120 				computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function);
    121 			}
    122 
    123 			if(!hasFloatTexture())
    124 			{
    125 				c = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function);
    126 			}
    127 			else
    128 			{
    129 				Vector4f cf = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function);
    130 
    131 				convertFixed12(c, cf);
    132 			}
    133 
    134 			if(fixed12)
    135 			{
    136 				if(!hasFloatTexture())
    137 				{
    138 					if(state.textureFormat == FORMAT_R5G6B5)
    139 					{
    140 						c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800));
    141 						c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00));
    142 						c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800));
    143 					}
    144 					else
    145 					{
    146 						for(int component = 0; component < textureComponentCount(); component++)
    147 						{
    148 							if(hasUnsignedTextureComponent(component))
    149 							{
    150 								c[component] = As<UShort4>(c[component]) >> 4;
    151 							}
    152 							else
    153 							{
    154 								c[component] = c[component] >> 3;
    155 							}
    156 						}
    157 					}
    158 				}
    159 
    160 				if(state.textureFilter != FILTER_GATHER)
    161 				{
    162 					int componentCount = textureComponentCount();
    163 					short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000;
    164 
    165 					switch(state.textureFormat)
    166 					{
    167 					case FORMAT_R8_SNORM:
    168 					case FORMAT_G8R8_SNORM:
    169 					case FORMAT_X8B8G8R8_SNORM:
    170 					case FORMAT_A8B8G8R8_SNORM:
    171 					case FORMAT_R8:
    172 					case FORMAT_R5G6B5:
    173 					case FORMAT_G8R8:
    174 					case FORMAT_R8I:
    175 					case FORMAT_R8UI:
    176 					case FORMAT_G8R8I:
    177 					case FORMAT_G8R8UI:
    178 					case FORMAT_X8B8G8R8I:
    179 					case FORMAT_X8B8G8R8UI:
    180 					case FORMAT_A8B8G8R8I:
    181 					case FORMAT_A8B8G8R8UI:
    182 					case FORMAT_R16I:
    183 					case FORMAT_R16UI:
    184 					case FORMAT_G16R16:
    185 					case FORMAT_G16R16I:
    186 					case FORMAT_G16R16UI:
    187 					case FORMAT_X16B16G16R16I:
    188 					case FORMAT_X16B16G16R16UI:
    189 					case FORMAT_A16B16G16R16:
    190 					case FORMAT_A16B16G16R16I:
    191 					case FORMAT_A16B16G16R16UI:
    192 					case FORMAT_R32I:
    193 					case FORMAT_R32UI:
    194 					case FORMAT_G32R32I:
    195 					case FORMAT_G32R32UI:
    196 					case FORMAT_X32B32G32R32I:
    197 					case FORMAT_X32B32G32R32UI:
    198 					case FORMAT_A32B32G32R32I:
    199 					case FORMAT_A32B32G32R32UI:
    200 					case FORMAT_X8R8G8B8:
    201 					case FORMAT_X8B8G8R8:
    202 					case FORMAT_A8R8G8B8:
    203 					case FORMAT_A8B8G8R8:
    204 					case FORMAT_SRGB8_X8:
    205 					case FORMAT_SRGB8_A8:
    206 					case FORMAT_V8U8:
    207 					case FORMAT_Q8W8V8U8:
    208 					case FORMAT_X8L8V8U8:
    209 					case FORMAT_V16U16:
    210 					case FORMAT_A16W16V16U16:
    211 					case FORMAT_Q16W16V16U16:
    212 					case FORMAT_YV12_BT601:
    213 					case FORMAT_YV12_BT709:
    214 					case FORMAT_YV12_JFIF:
    215 						if(componentCount < 2) c.y = Short4(defaultColorValue);
    216 						if(componentCount < 3) c.z = Short4(defaultColorValue);
    217 						if(componentCount < 4) c.w = Short4(0x1000);
    218 						break;
    219 					case FORMAT_A8:
    220 						c.w = c.x;
    221 						c.x = Short4(0x0000);
    222 						c.y = Short4(0x0000);
    223 						c.z = Short4(0x0000);
    224 						break;
    225 					case FORMAT_L8:
    226 					case FORMAT_L16:
    227 						c.y = c.x;
    228 						c.z = c.x;
    229 						c.w = Short4(0x1000);
    230 						break;
    231 					case FORMAT_A8L8:
    232 						c.w = c.y;
    233 						c.y = c.x;
    234 						c.z = c.x;
    235 						break;
    236 					case FORMAT_R32F:
    237 						c.y = Short4(defaultColorValue);
    238 					case FORMAT_G32R32F:
    239 						c.z = Short4(defaultColorValue);
    240 					case FORMAT_X32B32G32R32F:
    241 					case FORMAT_X32B32G32R32F_UNSIGNED:
    242 						c.w = Short4(0x1000);
    243 					case FORMAT_A32B32G32R32F:
    244 						break;
    245 					case FORMAT_D32F:
    246 					case FORMAT_D32FS8:
    247 					case FORMAT_D32F_LOCKABLE:
    248 					case FORMAT_D32FS8_TEXTURE:
    249 					case FORMAT_D32F_SHADOW:
    250 					case FORMAT_D32FS8_SHADOW:
    251 						c.y = c.x;
    252 						c.z = c.x;
    253 						c.w = c.x;
    254 						break;
    255 					default:
    256 						ASSERT(false);
    257 					}
    258 				}
    259 
    260 				if((state.swizzleR != SWIZZLE_RED) ||
    261 				   (state.swizzleG != SWIZZLE_GREEN) ||
    262 				   (state.swizzleB != SWIZZLE_BLUE) ||
    263 				   (state.swizzleA != SWIZZLE_ALPHA))
    264 				{
    265 					const Vector4s col(c);
    266 					applySwizzle(state.swizzleR, c.x, col);
    267 					applySwizzle(state.swizzleG, c.y, col);
    268 					applySwizzle(state.swizzleB, c.z, col);
    269 					applySwizzle(state.swizzleA, c.w, col);
    270 				}
    271 			}
    272 		}
    273 
    274 		return c;
    275 	}
    276 
    277 	Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function)
    278 	{
    279 		Vector4f c;
    280 
    281 		#if PERF_PROFILE
    282 			AddAtomic(Pointer<Long>(&profiler.texOperations), 4);
    283 
    284 			if(state.compressedFormat)
    285 			{
    286 				AddAtomic(Pointer<Long>(&profiler.compressedTex), 4);
    287 			}
    288 		#endif
    289 
    290 		if(state.textureType == TEXTURE_NULL)
    291 		{
    292 			c.x = Float4(0.0f);
    293 			c.y = Float4(0.0f);
    294 			c.z = Float4(0.0f);
    295 			c.w = Float4(1.0f);
    296 		}
    297 		else
    298 		{
    299 			// FIXME: YUV is not supported by the floating point path
    300 			bool forceFloatFiltering = state.highPrecisionFiltering && !hasYuvFormat() && (state.textureFilter != FILTER_POINT);
    301 			bool seamlessCube = (state.addressingModeU == ADDRESSING_SEAMLESS);
    302 			bool rectangleTexture = (state.textureType == TEXTURE_RECTANGLE);
    303 			if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering || seamlessCube || rectangleTexture)   // FIXME: Mostly identical to integer sampling
    304 			{
    305 				Float4 uuuu = u;
    306 				Float4 vvvv = v;
    307 				Float4 wwww = w;
    308 				Float4 qqqq = q;
    309 
    310 				Int face[4];
    311 				Float lod;
    312 				Float anisotropy;
    313 				Float4 uDelta;
    314 				Float4 vDelta;
    315 
    316 				if(state.textureType != TEXTURE_3D)
    317 				{
    318 					if(state.textureType != TEXTURE_CUBE)
    319 					{
    320 						computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function);
    321 					}
    322 					else
    323 					{
    324 						Float4 M;
    325 						cubeFace(face, uuuu, vvvv, u, v, w, M);
    326 						computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function);
    327 					}
    328 				}
    329 				else
    330 				{
    331 					computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function);
    332 				}
    333 
    334 				c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function);
    335 
    336 				if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture())
    337 				{
    338 					if(has16bitTextureFormat())
    339 					{
    340 						switch(state.textureFormat)
    341 						{
    342 						case FORMAT_R5G6B5:
    343 							c.x *= Float4(1.0f / 0xF800);
    344 							c.y *= Float4(1.0f / 0xFC00);
    345 							c.z *= Float4(1.0f / 0xF800);
    346 							break;
    347 						default:
    348 							ASSERT(false);
    349 						}
    350 					}
    351 					else
    352 					{
    353 						for(int component = 0; component < textureComponentCount(); component++)
    354 						{
    355 							c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF);
    356 						}
    357 					}
    358 				}
    359 			}
    360 			else
    361 			{
    362 				Vector4s cs = sampleTexture(texture, u, v, w, q, bias, dsx, dsy, offset, function, false);
    363 
    364 				if(state.textureFormat ==  FORMAT_R5G6B5)
    365 				{
    366 					c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800);
    367 					c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00);
    368 					c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800);
    369 				}
    370 				else
    371 				{
    372 					for(int component = 0; component < textureComponentCount(); component++)
    373 					{
    374 						if(hasUnsignedTextureComponent(component))
    375 						{
    376 							convertUnsigned16(c[component], cs[component]);
    377 						}
    378 						else
    379 						{
    380 							convertSigned15(c[component], cs[component]);
    381 						}
    382 					}
    383 				}
    384 			}
    385 
    386 			int componentCount = textureComponentCount();
    387 			float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f;
    388 
    389 			if(state.textureFilter != FILTER_GATHER)
    390 			{
    391 				switch(state.textureFormat)
    392 				{
    393 				case FORMAT_R8I:
    394 				case FORMAT_R8UI:
    395 				case FORMAT_R16I:
    396 				case FORMAT_R16UI:
    397 				case FORMAT_R32I:
    398 				case FORMAT_R32UI:
    399 					c.y = As<Float4>(UInt4(0));
    400 				case FORMAT_G8R8I:
    401 				case FORMAT_G8R8UI:
    402 				case FORMAT_G16R16I:
    403 				case FORMAT_G16R16UI:
    404 				case FORMAT_G32R32I:
    405 				case FORMAT_G32R32UI:
    406 					c.z = As<Float4>(UInt4(0));
    407 				case FORMAT_X8B8G8R8I:
    408 				case FORMAT_X8B8G8R8UI:
    409 				case FORMAT_X16B16G16R16I:
    410 				case FORMAT_X16B16G16R16UI:
    411 				case FORMAT_X32B32G32R32I:
    412 				case FORMAT_X32B32G32R32UI:
    413 					c.w = As<Float4>(UInt4(1));
    414 				case FORMAT_A8B8G8R8I:
    415 				case FORMAT_A8B8G8R8UI:
    416 				case FORMAT_A16B16G16R16I:
    417 				case FORMAT_A16B16G16R16UI:
    418 				case FORMAT_A32B32G32R32I:
    419 				case FORMAT_A32B32G32R32UI:
    420 					break;
    421 				case FORMAT_R8_SNORM:
    422 				case FORMAT_G8R8_SNORM:
    423 				case FORMAT_X8B8G8R8_SNORM:
    424 				case FORMAT_A8B8G8R8_SNORM:
    425 				case FORMAT_R8:
    426 				case FORMAT_R5G6B5:
    427 				case FORMAT_G8R8:
    428 				case FORMAT_G16R16:
    429 				case FORMAT_A16B16G16R16:
    430 				case FORMAT_X8R8G8B8:
    431 				case FORMAT_X8B8G8R8:
    432 				case FORMAT_A8R8G8B8:
    433 				case FORMAT_A8B8G8R8:
    434 				case FORMAT_SRGB8_X8:
    435 				case FORMAT_SRGB8_A8:
    436 				case FORMAT_V8U8:
    437 				case FORMAT_Q8W8V8U8:
    438 				case FORMAT_X8L8V8U8:
    439 				case FORMAT_V16U16:
    440 				case FORMAT_A16W16V16U16:
    441 				case FORMAT_Q16W16V16U16:
    442 				case FORMAT_YV12_BT601:
    443 				case FORMAT_YV12_BT709:
    444 				case FORMAT_YV12_JFIF:
    445 					if(componentCount < 2) c.y = Float4(defaultColorValue);
    446 					if(componentCount < 3) c.z = Float4(defaultColorValue);
    447 					if(componentCount < 4) c.w = Float4(1.0f);
    448 					break;
    449 				case FORMAT_A8:
    450 					c.w = c.x;
    451 					c.x = Float4(0.0f);
    452 					c.y = Float4(0.0f);
    453 					c.z = Float4(0.0f);
    454 					break;
    455 				case FORMAT_L8:
    456 				case FORMAT_L16:
    457 					c.y = c.x;
    458 					c.z = c.x;
    459 					c.w = Float4(1.0f);
    460 					break;
    461 				case FORMAT_A8L8:
    462 					c.w = c.y;
    463 					c.y = c.x;
    464 					c.z = c.x;
    465 					break;
    466 				case FORMAT_R32F:
    467 					c.y = Float4(defaultColorValue);
    468 				case FORMAT_G32R32F:
    469 					c.z = Float4(defaultColorValue);
    470 				case FORMAT_X32B32G32R32F:
    471 				case FORMAT_X32B32G32R32F_UNSIGNED:
    472 					c.w = Float4(1.0f);
    473 				case FORMAT_A32B32G32R32F:
    474 					break;
    475 				case FORMAT_D32F:
    476 				case FORMAT_D32FS8:
    477 				case FORMAT_D32F_LOCKABLE:
    478 				case FORMAT_D32FS8_TEXTURE:
    479 				case FORMAT_D32F_SHADOW:
    480 				case FORMAT_D32FS8_SHADOW:
    481 					c.y = Float4(0.0f);
    482 					c.z = Float4(0.0f);
    483 					c.w = Float4(1.0f);
    484 					break;
    485 				default:
    486 					ASSERT(false);
    487 				}
    488 			}
    489 
    490 			if((state.swizzleR != SWIZZLE_RED) ||
    491 			   (state.swizzleG != SWIZZLE_GREEN) ||
    492 			   (state.swizzleB != SWIZZLE_BLUE) ||
    493 			   (state.swizzleA != SWIZZLE_ALPHA))
    494 			{
    495 				const Vector4f col(c);
    496 				applySwizzle(state.swizzleR, c.x, col);
    497 				applySwizzle(state.swizzleG, c.y, col);
    498 				applySwizzle(state.swizzleB, c.z, col);
    499 				applySwizzle(state.swizzleA, c.w, col);
    500 			}
    501 		}
    502 
    503 		return c;
    504 	}
    505 
    506 	Vector4f SamplerCore::textureSize(Pointer<Byte> &texture, Float4 &lod)
    507 	{
    508 		Vector4f size;
    509 
    510 		for(int i = 0; i < 4; ++i)
    511 		{
    512 			Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel));
    513 			Pointer<Byte> mipmap = texture + OFFSET(Texture, mipmap) + (As<Int>(Extract(lod, i)) + baseLevel) * sizeof(Mipmap);
    514 			size.x = Insert(size.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i);
    515 			size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i);
    516 			size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i);
    517 		}
    518 
    519 		return size;
    520 	}
    521 
    522 	void SamplerCore::border(Short4 &mask, Float4 &coordinates)
    523 	{
    524 		Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
    525 		mask = As<Short4>(Int2(As<Int4>(PackSigned(border, border))));
    526 	}
    527 
    528 	void SamplerCore::border(Int4 &mask, Float4 &coordinates)
    529 	{
    530 		mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f)));
    531 	}
    532 
    533 	Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod)
    534 	{
    535 		Short4 offset = *Pointer<Short4>(mipmap + halfOffset);
    536 
    537 		if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
    538 		{
    539 			offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f)));
    540 		}
    541 		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
    542 		{
    543 			offset &= Short4(CmpLE(Float4(lod), Float4(0.0f)));
    544 		}
    545 
    546 		if(wrap)
    547 		{
    548 			switch(count)
    549 			{
    550 			case -1: return uvw - offset;
    551 			case  0: return uvw;
    552 			case +1: return uvw + offset;
    553 			case  2: return uvw + offset + offset;
    554 			}
    555 		}
    556 		else   // Clamp or mirror
    557 		{
    558 			switch(count)
    559 			{
    560 			case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset));
    561 			case  0: return uvw;
    562 			case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset));
    563 			case  2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset));
    564 			}
    565 		}
    566 
    567 		return uvw;
    568 	}
    569 
    570 	Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
    571 	{
    572 		Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
    573 
    574 		if(function == Fetch)
    575 		{
    576 			return c;
    577 		}
    578 
    579 		if(state.mipmapFilter == MIPMAP_LINEAR)
    580 		{
    581 			Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
    582 
    583 			lod *= Float(1 << 16);
    584 
    585 			UShort4 utri = UShort4(Float4(lod));   // FIXME: Optimize
    586 			Short4 stri = utri >> 1;   // FIXME: Optimize
    587 
    588 			if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri);
    589 			if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri);
    590 			if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri);
    591 			if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri);
    592 
    593 			utri = ~utri;
    594 			stri = Short4(0x7FFF) - stri;
    595 
    596 			if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri);
    597 			if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri);
    598 			if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri);
    599 			if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri);
    600 
    601 			c.x += cc.x;
    602 			c.y += cc.y;
    603 			c.z += cc.z;
    604 			c.w += cc.w;
    605 
    606 			if(!hasUnsignedTextureComponent(0)) c.x += c.x;
    607 			if(!hasUnsignedTextureComponent(1)) c.y += c.y;
    608 			if(!hasUnsignedTextureComponent(2)) c.z += c.z;
    609 			if(!hasUnsignedTextureComponent(3)) c.w += c.w;
    610 		}
    611 
    612 		Short4 borderMask;
    613 
    614 		if(state.addressingModeU == ADDRESSING_BORDER)
    615 		{
    616 			Short4 u0;
    617 
    618 			border(u0, u);
    619 
    620 			borderMask = u0;
    621 		}
    622 
    623 		if(state.addressingModeV == ADDRESSING_BORDER)
    624 		{
    625 			Short4 v0;
    626 
    627 			border(v0, v);
    628 
    629 			if(state.addressingModeU == ADDRESSING_BORDER)
    630 			{
    631 				borderMask &= v0;
    632 			}
    633 			else
    634 			{
    635 				borderMask = v0;
    636 			}
    637 		}
    638 
    639 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
    640 		{
    641 			Short4 s0;
    642 
    643 			border(s0, w);
    644 
    645 			if(state.addressingModeU == ADDRESSING_BORDER ||
    646 			   state.addressingModeV == ADDRESSING_BORDER)
    647 			{
    648 				borderMask &= s0;
    649 			}
    650 			else
    651 			{
    652 				borderMask = s0;
    653 			}
    654 		}
    655 
    656 		if(state.addressingModeU == ADDRESSING_BORDER ||
    657 		   state.addressingModeV == ADDRESSING_BORDER ||
    658 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
    659 		{
    660 			Short4 b;
    661 
    662 			c.x = (borderMask & c.x) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1)));
    663 			c.y = (borderMask & c.y) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1)));
    664 			c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1)));
    665 			c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1)));
    666 		}
    667 
    668 		return c;
    669 	}
    670 
    671 	Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
    672 	{
    673 		Vector4s c;
    674 
    675 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
    676 		{
    677 			c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function);
    678 		}
    679 		else
    680 		{
    681 			Int a = RoundInt(anisotropy);
    682 
    683 			Vector4s cSum;
    684 
    685 			cSum.x = Short4(0);
    686 			cSum.y = Short4(0);
    687 			cSum.z = Short4(0);
    688 			cSum.w = Short4(0);
    689 
    690 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
    691 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
    692 			UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a);
    693 			Short4 sw = Short4(cw >> 1);
    694 
    695 			Float4 du = uDelta;
    696 			Float4 dv = vDelta;
    697 
    698 			Float4 u0 = u + B * du;
    699 			Float4 v0 = v + B * dv;
    700 
    701 			du *= A;
    702 			dv *= A;
    703 
    704 			Int i = 0;
    705 
    706 			Do
    707 			{
    708 				c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function);
    709 
    710 				u0 += du;
    711 				v0 += dv;
    712 
    713 				if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw);
    714 				if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw);
    715 				if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw);
    716 				if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw);
    717 
    718 				i++;
    719 			}
    720 			Until(i >= a)
    721 
    722 			if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x);
    723 			if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y);
    724 			if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z);
    725 			if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w);
    726 		}
    727 
    728 		return c;
    729 	}
    730 
    731 	Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
    732 	{
    733 		if(state.textureType != TEXTURE_3D)
    734 		{
    735 			return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function);
    736 		}
    737 		else
    738 		{
    739 			return sample3D(texture, u, v, w, offset, lod, secondLOD, function);
    740 		}
    741 	}
    742 
    743 	Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
    744 	{
    745 		Vector4s c;
    746 
    747 		int componentCount = textureComponentCount();
    748 		bool gather = state.textureFilter == FILTER_GATHER;
    749 
    750 		Pointer<Byte> mipmap;
    751 		Pointer<Byte> buffer[4];
    752 
    753 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
    754 
    755 		bool texelFetch = (function == Fetch);
    756 
    757 		Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap);
    758 		Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap);
    759 		Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap);
    760 
    761 		if(state.textureFilter == FILTER_POINT || texelFetch)
    762 		{
    763 			c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
    764 		}
    765 		else
    766 		{
    767 			Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod);
    768 			Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod);
    769 			Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod);
    770 			Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod);
    771 
    772 			Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function);
    773 			Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function);
    774 			Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function);
    775 			Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function);
    776 
    777 			if(!gather)   // Blend
    778 			{
    779 				// Fractions
    780 				UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
    781 				UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
    782 
    783 				UShort4 f1u = ~f0u;
    784 				UShort4 f1v = ~f0v;
    785 
    786 				UShort4 f0u0v = MulHigh(f0u, f0v);
    787 				UShort4 f1u0v = MulHigh(f1u, f0v);
    788 				UShort4 f0u1v = MulHigh(f0u, f1v);
    789 				UShort4 f1u1v = MulHigh(f1u, f1v);
    790 
    791 				// Signed fractions
    792 				Short4 f1u1vs;
    793 				Short4 f0u1vs;
    794 				Short4 f1u0vs;
    795 				Short4 f0u0vs;
    796 
    797 				if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
    798 				{
    799 					f1u1vs = f1u1v >> 1;
    800 					f0u1vs = f0u1v >> 1;
    801 					f1u0vs = f1u0v >> 1;
    802 					f0u0vs = f0u0v >> 1;
    803 				}
    804 
    805 				// Bilinear interpolation
    806 				if(componentCount >= 1)
    807 				{
    808 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(0))
    809 					{
    810 						c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u);
    811 						c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u);
    812 						c.x  = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v);
    813 					}
    814 					else
    815 					{
    816 						if(hasUnsignedTextureComponent(0))
    817 						{
    818 							c0.x = MulHigh(As<UShort4>(c0.x), f1u1v);
    819 							c1.x = MulHigh(As<UShort4>(c1.x), f0u1v);
    820 							c2.x = MulHigh(As<UShort4>(c2.x), f1u0v);
    821 							c3.x = MulHigh(As<UShort4>(c3.x), f0u0v);
    822 						}
    823 						else
    824 						{
    825 							c0.x = MulHigh(c0.x, f1u1vs);
    826 							c1.x = MulHigh(c1.x, f0u1vs);
    827 							c2.x = MulHigh(c2.x, f1u0vs);
    828 							c3.x = MulHigh(c3.x, f0u0vs);
    829 						}
    830 
    831 						c.x = (c0.x + c1.x) + (c2.x + c3.x);
    832 						if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x);   // Correct for signed fractions
    833 					}
    834 				}
    835 
    836 				if(componentCount >= 2)
    837 				{
    838 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(1))
    839 					{
    840 						c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u);
    841 						c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u);
    842 						c.y  = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v);
    843 					}
    844 					else
    845 					{
    846 						if(hasUnsignedTextureComponent(1))
    847 						{
    848 							c0.y = MulHigh(As<UShort4>(c0.y), f1u1v);
    849 							c1.y = MulHigh(As<UShort4>(c1.y), f0u1v);
    850 							c2.y = MulHigh(As<UShort4>(c2.y), f1u0v);
    851 							c3.y = MulHigh(As<UShort4>(c3.y), f0u0v);
    852 						}
    853 						else
    854 						{
    855 							c0.y = MulHigh(c0.y, f1u1vs);
    856 							c1.y = MulHigh(c1.y, f0u1vs);
    857 							c2.y = MulHigh(c2.y, f1u0vs);
    858 							c3.y = MulHigh(c3.y, f0u0vs);
    859 						}
    860 
    861 						c.y = (c0.y + c1.y) + (c2.y + c3.y);
    862 						if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y);   // Correct for signed fractions
    863 					}
    864 				}
    865 
    866 				if(componentCount >= 3)
    867 				{
    868 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(2))
    869 					{
    870 						c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u);
    871 						c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u);
    872 						c.z  = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v);
    873 					}
    874 					else
    875 					{
    876 						if(hasUnsignedTextureComponent(2))
    877 						{
    878 							c0.z = MulHigh(As<UShort4>(c0.z), f1u1v);
    879 							c1.z = MulHigh(As<UShort4>(c1.z), f0u1v);
    880 							c2.z = MulHigh(As<UShort4>(c2.z), f1u0v);
    881 							c3.z = MulHigh(As<UShort4>(c3.z), f0u0v);
    882 						}
    883 						else
    884 						{
    885 							c0.z = MulHigh(c0.z, f1u1vs);
    886 							c1.z = MulHigh(c1.z, f0u1vs);
    887 							c2.z = MulHigh(c2.z, f1u0vs);
    888 							c3.z = MulHigh(c3.z, f0u0vs);
    889 						}
    890 
    891 						c.z = (c0.z + c1.z) + (c2.z + c3.z);
    892 						if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z);   // Correct for signed fractions
    893 					}
    894 				}
    895 
    896 				if(componentCount >= 4)
    897 				{
    898 					if(has16bitTextureComponents() && hasUnsignedTextureComponent(3))
    899 					{
    900 						c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u);
    901 						c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u);
    902 						c.w  = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v);
    903 					}
    904 					else
    905 					{
    906 						if(hasUnsignedTextureComponent(3))
    907 						{
    908 							c0.w = MulHigh(As<UShort4>(c0.w), f1u1v);
    909 							c1.w = MulHigh(As<UShort4>(c1.w), f0u1v);
    910 							c2.w = MulHigh(As<UShort4>(c2.w), f1u0v);
    911 							c3.w = MulHigh(As<UShort4>(c3.w), f0u0v);
    912 						}
    913 						else
    914 						{
    915 							c0.w = MulHigh(c0.w, f1u1vs);
    916 							c1.w = MulHigh(c1.w, f0u1vs);
    917 							c2.w = MulHigh(c2.w, f1u0vs);
    918 							c3.w = MulHigh(c3.w, f0u0vs);
    919 						}
    920 
    921 						c.w = (c0.w + c1.w) + (c2.w + c3.w);
    922 						if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w);   // Correct for signed fractions
    923 					}
    924 				}
    925 			}
    926 			else
    927 			{
    928 				c.x = c1.x;
    929 				c.y = c2.x;
    930 				c.z = c3.x;
    931 				c.w = c0.x;
    932 			}
    933 		}
    934 
    935 		return c;
    936 	}
    937 
    938 	Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
    939 	{
    940 		Vector4s c_;
    941 
    942 		int componentCount = textureComponentCount();
    943 
    944 		Pointer<Byte> mipmap;
    945 		Pointer<Byte> buffer[4];
    946 		Int face[4];
    947 
    948 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
    949 
    950 		bool texelFetch = (function == Fetch);
    951 
    952 		Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap);
    953 		Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap);
    954 		Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap);
    955 
    956 		if(state.textureFilter == FILTER_POINT || texelFetch)
    957 		{
    958 			c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function);
    959 		}
    960 		else
    961 		{
    962 			Vector4s c[2][2][2];
    963 
    964 			Short4 u[2][2][2];
    965 			Short4 v[2][2][2];
    966 			Short4 s[2][2][2];
    967 
    968 			for(int i = 0; i < 2; i++)
    969 			{
    970 				for(int j = 0; j < 2; j++)
    971 				{
    972 					for(int k = 0; k < 2; k++)
    973 					{
    974 						u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod);
    975 						v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod);
    976 						s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod);
    977 					}
    978 				}
    979 			}
    980 
    981 			// Fractions
    982 			UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width));
    983 			UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height));
    984 			UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth));
    985 
    986 			UShort4 f1u = ~f0u;
    987 			UShort4 f1v = ~f0v;
    988 			UShort4 f1s = ~f0s;
    989 
    990 			UShort4 f[2][2][2];
    991 			Short4 fs[2][2][2];
    992 
    993 			f[1][1][1] = MulHigh(f1u, f1v);
    994 			f[0][1][1] = MulHigh(f0u, f1v);
    995 			f[1][0][1] = MulHigh(f1u, f0v);
    996 			f[0][0][1] = MulHigh(f0u, f0v);
    997 			f[1][1][0] = MulHigh(f1u, f1v);
    998 			f[0][1][0] = MulHigh(f0u, f1v);
    999 			f[1][0][0] = MulHigh(f1u, f0v);
   1000 			f[0][0][0] = MulHigh(f0u, f0v);
   1001 
   1002 			f[1][1][1] = MulHigh(f[1][1][1], f1s);
   1003 			f[0][1][1] = MulHigh(f[0][1][1], f1s);
   1004 			f[1][0][1] = MulHigh(f[1][0][1], f1s);
   1005 			f[0][0][1] = MulHigh(f[0][0][1], f1s);
   1006 			f[1][1][0] = MulHigh(f[1][1][0], f0s);
   1007 			f[0][1][0] = MulHigh(f[0][1][0], f0s);
   1008 			f[1][0][0] = MulHigh(f[1][0][0], f0s);
   1009 			f[0][0][0] = MulHigh(f[0][0][0], f0s);
   1010 
   1011 			// Signed fractions
   1012 			if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3))
   1013 			{
   1014 				fs[0][0][0] = f[0][0][0] >> 1;
   1015 				fs[0][0][1] = f[0][0][1] >> 1;
   1016 				fs[0][1][0] = f[0][1][0] >> 1;
   1017 				fs[0][1][1] = f[0][1][1] >> 1;
   1018 				fs[1][0][0] = f[1][0][0] >> 1;
   1019 				fs[1][0][1] = f[1][0][1] >> 1;
   1020 				fs[1][1][0] = f[1][1][0] >> 1;
   1021 				fs[1][1][1] = f[1][1][1] >> 1;
   1022 			}
   1023 
   1024 			for(int i = 0; i < 2; i++)
   1025 			{
   1026 				for(int j = 0; j < 2; j++)
   1027 				{
   1028 					for(int k = 0; k < 2; k++)
   1029 					{
   1030 						c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function);
   1031 
   1032 						if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); }
   1033 						if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); }
   1034 						if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); }
   1035 						if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); }
   1036 
   1037 						if(i != 0 || j != 0 || k != 0)
   1038 						{
   1039 							if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x;
   1040 							if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y;
   1041 							if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z;
   1042 							if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w;
   1043 						}
   1044 					}
   1045 				}
   1046 			}
   1047 
   1048 			if(componentCount >= 1) c_.x = c[0][0][0].x;
   1049 			if(componentCount >= 2) c_.y = c[0][0][0].y;
   1050 			if(componentCount >= 3) c_.z = c[0][0][0].z;
   1051 			if(componentCount >= 4) c_.w = c[0][0][0].w;
   1052 
   1053 			// Correct for signed fractions
   1054 			if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x);
   1055 			if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y);
   1056 			if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z);
   1057 			if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w);
   1058 		}
   1059 
   1060 		return c_;
   1061 	}
   1062 
   1063 	Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function)
   1064 	{
   1065 		Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function);
   1066 
   1067 		if(function == Fetch)
   1068 		{
   1069 			return c;
   1070 		}
   1071 
   1072 		if(state.mipmapFilter == MIPMAP_LINEAR)
   1073 		{
   1074 			Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function);
   1075 
   1076 			Float4 lod4 = Float4(Frac(lod));
   1077 
   1078 			c.x = (cc.x - c.x) * lod4 + c.x;
   1079 			c.y = (cc.y - c.y) * lod4 + c.y;
   1080 			c.z = (cc.z - c.z) * lod4 + c.z;
   1081 			c.w = (cc.w - c.w) * lod4 + c.w;
   1082 		}
   1083 
   1084 		Int4 borderMask;
   1085 
   1086 		if(state.addressingModeU == ADDRESSING_BORDER)
   1087 		{
   1088 			Int4 u0;
   1089 
   1090 			border(u0, u);
   1091 
   1092 			borderMask = u0;
   1093 		}
   1094 
   1095 		if(state.addressingModeV == ADDRESSING_BORDER)
   1096 		{
   1097 			Int4 v0;
   1098 
   1099 			border(v0, v);
   1100 
   1101 			if(state.addressingModeU == ADDRESSING_BORDER)
   1102 			{
   1103 				borderMask &= v0;
   1104 			}
   1105 			else
   1106 			{
   1107 				borderMask = v0;
   1108 			}
   1109 		}
   1110 
   1111 		if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)
   1112 		{
   1113 			Int4 s0;
   1114 
   1115 			border(s0, w);
   1116 
   1117 			if(state.addressingModeU == ADDRESSING_BORDER ||
   1118 			   state.addressingModeV == ADDRESSING_BORDER)
   1119 			{
   1120 				borderMask &= s0;
   1121 			}
   1122 			else
   1123 			{
   1124 				borderMask = s0;
   1125 			}
   1126 		}
   1127 
   1128 		if(state.addressingModeU == ADDRESSING_BORDER ||
   1129 		   state.addressingModeV == ADDRESSING_BORDER ||
   1130 		   (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D))
   1131 		{
   1132 			Int4 b;
   1133 
   1134 			c.x = As<Float4>((borderMask & As<Int4>(c.x)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0]))));
   1135 			c.y = As<Float4>((borderMask & As<Int4>(c.y)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1]))));
   1136 			c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2]))));
   1137 			c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3]))));
   1138 		}
   1139 
   1140 		return c;
   1141 	}
   1142 
   1143 	Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function)
   1144 	{
   1145 		Vector4f c;
   1146 
   1147 		if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch)
   1148 		{
   1149 			c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function);
   1150 		}
   1151 		else
   1152 		{
   1153 			Int a = RoundInt(anisotropy);
   1154 
   1155 			Vector4f cSum;
   1156 
   1157 			cSum.x = Float4(0.0f);
   1158 			cSum.y = Float4(0.0f);
   1159 			cSum.z = Float4(0.0f);
   1160 			cSum.w = Float4(0.0f);
   1161 
   1162 			Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a);
   1163 			Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a);
   1164 
   1165 			Float4 du = uDelta;
   1166 			Float4 dv = vDelta;
   1167 
   1168 			Float4 u0 = u + B * du;
   1169 			Float4 v0 = v + B * dv;
   1170 
   1171 			du *= A;
   1172 			dv *= A;
   1173 
   1174 			Int i = 0;
   1175 
   1176 			Do
   1177 			{
   1178 				c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function);
   1179 
   1180 				u0 += du;
   1181 				v0 += dv;
   1182 
   1183 				cSum.x += c.x * A;
   1184 				cSum.y += c.y * A;
   1185 				cSum.z += c.z * A;
   1186 				cSum.w += c.w * A;
   1187 
   1188 				i++;
   1189 			}
   1190 			Until(i >= a)
   1191 
   1192 			c.x = cSum.x;
   1193 			c.y = cSum.y;
   1194 			c.z = cSum.z;
   1195 			c.w = cSum.w;
   1196 		}
   1197 
   1198 		return c;
   1199 	}
   1200 
   1201 	Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
   1202 	{
   1203 		if(state.textureType != TEXTURE_3D)
   1204 		{
   1205 			return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function);
   1206 		}
   1207 		else
   1208 		{
   1209 			return sampleFloat3D(texture, u, v, w, offset, lod, secondLOD, function);
   1210 		}
   1211 	}
   1212 
   1213 	Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function)
   1214 	{
   1215 		Vector4f c;
   1216 
   1217 		int componentCount = textureComponentCount();
   1218 		bool gather = state.textureFilter == FILTER_GATHER;
   1219 
   1220 		Pointer<Byte> mipmap;
   1221 		Pointer<Byte> buffer[4];
   1222 
   1223 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
   1224 
   1225 		Int4 x0, x1, y0, y1, z0;
   1226 		Float4 fu, fv;
   1227 		Int4 filter = computeFilterOffset(lod);
   1228 		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
   1229 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
   1230 		address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
   1231 
   1232 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
   1233 		y0 *= pitchP;
   1234 		if(hasThirdCoordinate())
   1235 		{
   1236 			Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
   1237 			z0 *= sliceP;
   1238 		}
   1239 
   1240 		if(state.textureFilter == FILTER_POINT || (function == Fetch))
   1241 		{
   1242 			c = sampleTexel(x0, y0, z0, q, mipmap, buffer, function);
   1243 		}
   1244 		else
   1245 		{
   1246 			y1 *= pitchP;
   1247 
   1248 			Vector4f c0 = sampleTexel(x0, y0, z0, q, mipmap, buffer, function);
   1249 			Vector4f c1 = sampleTexel(x1, y0, z0, q, mipmap, buffer, function);
   1250 			Vector4f c2 = sampleTexel(x0, y1, z0, q, mipmap, buffer, function);
   1251 			Vector4f c3 = sampleTexel(x1, y1, z0, q, mipmap, buffer, function);
   1252 
   1253 			if(!gather)   // Blend
   1254 			{
   1255 				if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
   1256 				if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
   1257 				if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
   1258 				if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
   1259 
   1260 				if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
   1261 				if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
   1262 				if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
   1263 				if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
   1264 
   1265 				if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x);
   1266 				if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y);
   1267 				if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z);
   1268 				if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w);
   1269 			}
   1270 			else
   1271 			{
   1272 				c.x = c1.x;
   1273 				c.y = c2.x;
   1274 				c.z = c3.x;
   1275 				c.w = c0.x;
   1276 			}
   1277 		}
   1278 
   1279 		return c;
   1280 	}
   1281 
   1282 	Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function)
   1283 	{
   1284 		Vector4f c;
   1285 
   1286 		int componentCount = textureComponentCount();
   1287 
   1288 		Pointer<Byte> mipmap;
   1289 		Pointer<Byte> buffer[4];
   1290 		Int face[4];
   1291 
   1292 		selectMipmap(texture, buffer, mipmap, lod, face, secondLOD);
   1293 
   1294 		Int4 x0, x1, y0, y1, z0, z1;
   1295 		Float4 fu, fv, fw;
   1296 		Int4 filter = computeFilterOffset(lod);
   1297 		address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function);
   1298 		address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function);
   1299 		address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function);
   1300 
   1301 		Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16);
   1302 		Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16);
   1303 		y0 *= pitchP;
   1304 		z0 *= sliceP;
   1305 
   1306 		if(state.textureFilter == FILTER_POINT || (function == Fetch))
   1307 		{
   1308 			c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
   1309 		}
   1310 		else
   1311 		{
   1312 			y1 *= pitchP;
   1313 			z1 *= sliceP;
   1314 
   1315 			Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function);
   1316 			Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function);
   1317 			Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function);
   1318 			Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function);
   1319 			Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function);
   1320 			Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function);
   1321 			Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function);
   1322 			Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function);
   1323 
   1324 			// Blend first slice
   1325 			if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x);
   1326 			if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y);
   1327 			if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z);
   1328 			if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w);
   1329 
   1330 			if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x);
   1331 			if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y);
   1332 			if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z);
   1333 			if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w);
   1334 
   1335 			if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x);
   1336 			if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y);
   1337 			if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z);
   1338 			if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w);
   1339 
   1340 			// Blend second slice
   1341 			if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x);
   1342 			if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y);
   1343 			if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z);
   1344 			if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w);
   1345 
   1346 			if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x);
   1347 			if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y);
   1348 			if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z);
   1349 			if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w);
   1350 
   1351 			if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x);
   1352 			if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y);
   1353 			if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z);
   1354 			if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w);
   1355 
   1356 			// Blend slices
   1357 			if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x);
   1358 			if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y);
   1359 			if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z);
   1360 			if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w);
   1361 		}
   1362 
   1363 		return c;
   1364 	}
   1365 
   1366 	Float SamplerCore::log2sqrt(Float lod)
   1367 	{
   1368 		// log2(sqrt(lod))                               // Equals 0.25 * log2(lod^2).
   1369 		lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
   1370 		lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
   1371 		lod *= As<Float>(Int(0x33000000));               // Scale by 0.25 * 2^-23 (mantissa length).
   1372 
   1373 		return lod;
   1374 	}
   1375 
   1376 	Float SamplerCore::log2(Float lod)
   1377 	{
   1378 		lod *= lod;                                      // Squaring doubles the exponent and produces an extra bit of precision.
   1379 		lod = Float(As<Int>(lod)) - Float(0x3F800000);   // Interpret as integer and subtract the exponent bias.
   1380 		lod *= As<Float>(Int(0x33800000));               // Scale by 0.5 * 2^-23 (mantissa length).
   1381 
   1382 		return lod;
   1383 	}
   1384 
   1385 	void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
   1386 	{
   1387 		if(function != Lod && function != Fetch)
   1388 		{
   1389 			Float4 duvdxy;
   1390 
   1391 			if(function != Grad)   // Implicit
   1392 			{
   1393 				duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx);
   1394 			}
   1395 			else
   1396 			{
   1397 				Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx);
   1398 				Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx);
   1399 
   1400 				duvdxy = Float4(dudxy.xz, dvdxy.xz);
   1401 			}
   1402 
   1403 			// Scale by texture dimensions and global LOD.
   1404 			Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD));
   1405 
   1406 			Float4 dUV2dxy = dUVdxy * dUVdxy;
   1407 			Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw;
   1408 
   1409 			lod = Max(Float(dUV2.x), Float(dUV2.y));   // Square length of major axis
   1410 
   1411 			if(state.textureFilter == FILTER_ANISOTROPIC)
   1412 			{
   1413 				Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z));
   1414 
   1415 				Float4 dudx = duvdxy.xxxx;
   1416 				Float4 dudy = duvdxy.yyyy;
   1417 				Float4 dvdx = duvdxy.zzzz;
   1418 				Float4 dvdy = duvdxy.wwww;
   1419 
   1420 				Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y));
   1421 				uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask)));
   1422 				vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask)));
   1423 
   1424 				anisotropy = lod * Rcp_pp(det);
   1425 				anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy)));
   1426 
   1427 				lod *= Rcp_pp(anisotropy * anisotropy);
   1428 			}
   1429 
   1430 			lod = log2sqrt(lod);   // log2(sqrt(lod))
   1431 
   1432 			if(function == Bias)
   1433 			{
   1434 				lod += lodBias;
   1435 			}
   1436 		}
   1437 		else if(function == Lod)
   1438 		{
   1439 			lod = lodBias;
   1440 		}
   1441 		else if(function == Fetch)
   1442 		{
   1443 			// TODO: Eliminate int-float-int conversion.
   1444 			lod = Float(As<Int>(lodBias));
   1445 		}
   1446 		else if(function == Base)
   1447 		{
   1448 			lod = Float(0);
   1449 		}
   1450 		else assert(false);
   1451 
   1452 		lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
   1453 		lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
   1454 	}
   1455 
   1456 	void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function)
   1457 	{
   1458 		if(function != Lod && function != Fetch)
   1459 		{
   1460 			Float4 dudxy, dvdxy, dsdxy;
   1461 
   1462 			if(function != Grad)  // Implicit
   1463 			{
   1464 				Float4 U = u * M;
   1465 				Float4 V = v * M;
   1466 				Float4 W = w * M;
   1467 
   1468 				dudxy = Abs(U - U.xxxx);
   1469 				dvdxy = Abs(V - V.xxxx);
   1470 				dsdxy = Abs(W - W.xxxx);
   1471 			}
   1472 			else
   1473 			{
   1474 				dudxy = Float4(dsx.x.xx, dsy.x.xx);
   1475 				dvdxy = Float4(dsx.y.xx, dsy.y.xx);
   1476 				dsdxy = Float4(dsx.z.xx, dsy.z.xx);
   1477 
   1478 				dudxy = Abs(dudxy * Float4(M.x));
   1479 				dvdxy = Abs(dvdxy * Float4(M.x));
   1480 				dsdxy = Abs(dsdxy * Float4(M.x));
   1481 			}
   1482 
   1483 			// Compute the largest Manhattan distance in two dimensions.
   1484 			// This takes the footprint across adjacent faces into account.
   1485 			Float4 duvdxy = dudxy + dvdxy;
   1486 			Float4 dusdxy = dudxy + dsdxy;
   1487 			Float4 dvsdxy = dvdxy + dsdxy;
   1488 
   1489 			dudxy = Max(Max(duvdxy, dusdxy), dvsdxy);
   1490 
   1491 			lod = Max(Float(dudxy.y), Float(dudxy.z));   // FIXME: Max(dudxy.y, dudxy.z);
   1492 
   1493 			// Scale by texture dimension and global LOD.
   1494 			lod *= *Pointer<Float>(texture + OFFSET(Texture,widthLOD));
   1495 
   1496 			lod = log2(lod);
   1497 
   1498 			if(function == Bias)
   1499 			{
   1500 				lod += lodBias;
   1501 			}
   1502 		}
   1503 		else if(function == Lod)
   1504 		{
   1505 			lod = lodBias;
   1506 		}
   1507 		else if(function == Fetch)
   1508 		{
   1509 			// TODO: Eliminate int-float-int conversion.
   1510 			lod = Float(As<Int>(lodBias));
   1511 		}
   1512 		else if(function == Base)
   1513 		{
   1514 			lod = Float(0);
   1515 		}
   1516 		else assert(false);
   1517 
   1518 		lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
   1519 		lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
   1520 	}
   1521 
   1522 	void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function)
   1523 	{
   1524 		if(function != Lod && function != Fetch)
   1525 		{
   1526 			Float4 dudxy, dvdxy, dsdxy;
   1527 
   1528 			if(function != Grad)   // Implicit
   1529 			{
   1530 				dudxy = uuuu - uuuu.xxxx;
   1531 				dvdxy = vvvv - vvvv.xxxx;
   1532 				dsdxy = wwww - wwww.xxxx;
   1533 			}
   1534 			else
   1535 			{
   1536 				dudxy = Float4(dsx.x.xx, dsy.x.xx);
   1537 				dvdxy = Float4(dsx.y.xx, dsy.y.xx);
   1538 				dsdxy = Float4(dsx.z.xx, dsy.z.xx);
   1539 			}
   1540 
   1541 			// Scale by texture dimensions and global LOD.
   1542 			dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD));
   1543 			dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD));
   1544 			dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD));
   1545 
   1546 			dudxy *= dudxy;
   1547 			dvdxy *= dvdxy;
   1548 			dsdxy *= dsdxy;
   1549 
   1550 			dudxy += dvdxy;
   1551 			dudxy += dsdxy;
   1552 
   1553 			lod = Max(Float(dudxy.y), Float(dudxy.z));   // FIXME: Max(dudxy.y, dudxy.z);
   1554 
   1555 			lod = log2sqrt(lod);   // log2(sqrt(lod))
   1556 
   1557 			if(function == Bias)
   1558 			{
   1559 				lod += lodBias;
   1560 			}
   1561 		}
   1562 		else if(function == Lod)
   1563 		{
   1564 			lod = lodBias;
   1565 		}
   1566 		else if(function == Fetch)
   1567 		{
   1568 			// TODO: Eliminate int-float-int conversion.
   1569 			lod = Float(As<Int>(lodBias));
   1570 		}
   1571 		else if(function == Base)
   1572 		{
   1573 			lod = Float(0);
   1574 		}
   1575 		else assert(false);
   1576 
   1577 		lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod)));
   1578 		lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod)));
   1579 	}
   1580 
   1581 	void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M)
   1582 	{
   1583 		Int4 xn = CmpLT(x, Float4(0.0f));   // x < 0
   1584 		Int4 yn = CmpLT(y, Float4(0.0f));   // y < 0
   1585 		Int4 zn = CmpLT(z, Float4(0.0f));   // z < 0
   1586 
   1587 		Float4 absX = Abs(x);
   1588 		Float4 absY = Abs(y);
   1589 		Float4 absZ = Abs(z);
   1590 
   1591 		Int4 xy = CmpNLE(absX, absY);   // abs(x) > abs(y)
   1592 		Int4 yz = CmpNLE(absY, absZ);   // abs(y) > abs(z)
   1593 		Int4 zx = CmpNLE(absZ, absX);   // abs(z) > abs(x)
   1594 		Int4 xMajor = xy & ~zx;   // abs(x) > abs(y) && abs(x) > abs(z)
   1595 		Int4 yMajor = yz & ~xy;   // abs(y) > abs(z) && abs(y) > abs(x)
   1596 		Int4 zMajor = zx & ~yz;   // abs(z) > abs(x) && abs(z) > abs(y)
   1597 
   1598 		// FACE_POSITIVE_X = 000b
   1599 		// FACE_NEGATIVE_X = 001b
   1600 		// FACE_POSITIVE_Y = 010b
   1601 		// FACE_NEGATIVE_Y = 011b
   1602 		// FACE_POSITIVE_Z = 100b
   1603 		// FACE_NEGATIVE_Z = 101b
   1604 
   1605 		Int yAxis = SignMask(yMajor);
   1606 		Int zAxis = SignMask(zMajor);
   1607 
   1608 		Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000);
   1609 		Int negative = SignMask(n);
   1610 
   1611 		face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4);
   1612 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4);
   1613 		face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4);
   1614 		face[1] = (face[0] >> 4)  & 0x7;
   1615 		face[2] = (face[0] >> 8)  & 0x7;
   1616 		face[3] = (face[0] >> 12) & 0x7;
   1617 		face[0] &= 0x7;
   1618 
   1619 		M = Max(Max(absX, absY), absZ);
   1620 
   1621 		// U = xMajor ? (neg ^ -z) : ((zMajor & neg) ^ x)
   1622 		U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x))));
   1623 
   1624 		// V = !yMajor ? -y : (n ^ z)
   1625 		V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z))));
   1626 
   1627 		M = reciprocal(M) * Float4(0.5f);
   1628 		U = U * M + Float4(0.5f);
   1629 		V = V * M + Float4(0.5f);
   1630 	}
   1631 
   1632 	Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode)
   1633 	{
   1634 		Int4 tmp = Int4(As<UShort4>(uvw));
   1635 		tmp = tmp + As<Int4>(offset);
   1636 
   1637 		switch(mode)
   1638 		{
   1639 		case AddressingMode::ADDRESSING_WRAP:
   1640 			tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd;
   1641 			break;
   1642 		case AddressingMode::ADDRESSING_CLAMP:
   1643 		case AddressingMode::ADDRESSING_MIRROR:
   1644 		case AddressingMode::ADDRESSING_MIRRORONCE:
   1645 		case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER
   1646 			tmp = Min(Max(tmp, Int4(0)), whd - Int4(1));
   1647 			break;
   1648 		case ADDRESSING_TEXELFETCH:
   1649 			break;
   1650 		case AddressingMode::ADDRESSING_SEAMLESS:
   1651 			ASSERT(false);   // Cube sampling doesn't support offset.
   1652 		default:
   1653 			ASSERT(false);
   1654 		}
   1655 
   1656 		return As<Short4>(UShort4(tmp));
   1657 	}
   1658 
   1659 	void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function)
   1660 	{
   1661 		bool texelFetch = (function == Fetch);
   1662 		bool hasOffset = (function.option == Offset);
   1663 
   1664 		if(!texelFetch)
   1665 		{
   1666 			uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width)));
   1667 			vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height)));
   1668 		}
   1669 
   1670 		if(hasOffset)
   1671 		{
   1672 			UShort4 w = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width));
   1673 			uuuu = applyOffset(uuuu, offset.x, Int4(w), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU);
   1674 			UShort4 h = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height));
   1675 			vvvv = applyOffset(vvvv, offset.y, Int4(h), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV);
   1676 		}
   1677 
   1678 		Short4 uuu2 = uuuu;
   1679 		uuuu = As<Short4>(UnpackLow(uuuu, vvvv));
   1680 		uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv));
   1681 		uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
   1682 		uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP))));
   1683 
   1684 		if(hasThirdCoordinate())
   1685 		{
   1686 			if(state.textureType != TEXTURE_2D_ARRAY)
   1687 			{
   1688 				if(!texelFetch)
   1689 				{
   1690 					wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)));
   1691 				}
   1692 
   1693 				if(hasOffset)
   1694 				{
   1695 					UShort4 d = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth));
   1696 					wwww = applyOffset(wwww, offset.z, Int4(d), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW);
   1697 				}
   1698 			}
   1699 
   1700 			UInt4 uv(As<UInt2>(uuuu), As<UInt2>(uuu2));
   1701 			uv += As<UInt4>(Int4(As<UShort4>(wwww))) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP));
   1702 
   1703 			index[0] = Extract(As<Int4>(uv), 0);
   1704 			index[1] = Extract(As<Int4>(uv), 1);
   1705 			index[2] = Extract(As<Int4>(uv), 2);
   1706 			index[3] = Extract(As<Int4>(uv), 3);
   1707 		}
   1708 		else
   1709 		{
   1710 			index[0] = Extract(As<Int2>(uuuu), 0);
   1711 			index[1] = Extract(As<Int2>(uuuu), 1);
   1712 			index[2] = Extract(As<Int2>(uuu2), 0);
   1713 			index[3] = Extract(As<Int2>(uuu2), 1);
   1714 		}
   1715 
   1716 		if(texelFetch)
   1717 		{
   1718 			Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP)));
   1719 			if(hasThirdCoordinate())
   1720 			{
   1721 				size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)));
   1722 			}
   1723 			UInt min = 0;
   1724 			UInt max = size - 1;
   1725 
   1726 			for(int i = 0; i < 4; i++)
   1727 			{
   1728 				index[i] = Min(Max(index[i], min), max);
   1729 			}
   1730 		}
   1731 	}
   1732 
   1733 	void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function)
   1734 	{
   1735 		UInt4 indices = uuuu + vvvv;
   1736 
   1737 		if(hasThirdCoordinate())
   1738 		{
   1739 			indices += As<UInt4>(wwww);
   1740 		}
   1741 
   1742 		for(int i = 0; i < 4; i++)
   1743 		{
   1744 			index[i] = Extract(As<Int4>(indices), i);
   1745 		}
   1746 	}
   1747 
   1748 	Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4])
   1749 	{
   1750 		Vector4s c;
   1751 
   1752 		int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
   1753 		int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
   1754 		int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
   1755 		int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
   1756 
   1757 		if(has16bitTextureFormat())
   1758 		{
   1759 			c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
   1760 			c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
   1761 			c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
   1762 			c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
   1763 
   1764 			switch(state.textureFormat)
   1765 			{
   1766 			case FORMAT_R5G6B5:
   1767 				c.z = (c.x & Short4(0x001Fu)) << 11;
   1768 				c.y = (c.x & Short4(0x07E0u)) << 5;
   1769 				c.x = (c.x & Short4(0xF800u));
   1770 				break;
   1771 			default:
   1772 				ASSERT(false);
   1773 			}
   1774 		}
   1775 		else if(has8bitTextureComponents())
   1776 		{
   1777 			switch(textureComponentCount())
   1778 			{
   1779 			case 4:
   1780 				{
   1781 					Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
   1782 					Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
   1783 					Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
   1784 					Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
   1785 					c.x = Unpack(c0, c1);
   1786 					c.y = Unpack(c2, c3);
   1787 
   1788 					switch(state.textureFormat)
   1789 					{
   1790 					case FORMAT_A8R8G8B8:
   1791 						c.z = As<Short4>(UnpackLow(c.x, c.y));
   1792 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
   1793 						c.y = c.z;
   1794 						c.w = c.x;
   1795 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1796 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1797 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1798 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
   1799 						break;
   1800 					case FORMAT_A8B8G8R8:
   1801 					case FORMAT_A8B8G8R8I:
   1802 					case FORMAT_A8B8G8R8_SNORM:
   1803 					case FORMAT_Q8W8V8U8:
   1804 					case FORMAT_SRGB8_A8:
   1805 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1806 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1807 						c.y = c.x;
   1808 						c.w = c.z;
   1809 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1810 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1811 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1812 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w));
   1813 						// Propagate sign bit
   1814 						if(state.textureFormat == FORMAT_A8B8G8R8I)
   1815 						{
   1816 							c.x >>= 8;
   1817 							c.y >>= 8;
   1818 							c.z >>= 8;
   1819 							c.w >>= 8;
   1820 						}
   1821 						break;
   1822 					case FORMAT_A8B8G8R8UI:
   1823 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1824 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1825 						c.y = c.x;
   1826 						c.w = c.z;
   1827 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
   1828 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
   1829 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
   1830 						c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0)));
   1831 						break;
   1832 					default:
   1833 						ASSERT(false);
   1834 					}
   1835 				}
   1836 				break;
   1837 			case 3:
   1838 				{
   1839 					Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]];
   1840 					Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]];
   1841 					Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]];
   1842 					Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]];
   1843 					c.x = Unpack(c0, c1);
   1844 					c.y = Unpack(c2, c3);
   1845 
   1846 					switch(state.textureFormat)
   1847 					{
   1848 					case FORMAT_X8R8G8B8:
   1849 						c.z = As<Short4>(UnpackLow(c.x, c.y));
   1850 						c.x = As<Short4>(UnpackHigh(c.x, c.y));
   1851 						c.y = c.z;
   1852 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1853 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1854 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1855 						break;
   1856 					case FORMAT_X8B8G8R8_SNORM:
   1857 					case FORMAT_X8B8G8R8I:
   1858 					case FORMAT_X8B8G8R8:
   1859 					case FORMAT_X8L8V8U8:
   1860 					case FORMAT_SRGB8_X8:
   1861 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1862 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1863 						c.y = c.x;
   1864 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x));
   1865 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y));
   1866 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z));
   1867 						// Propagate sign bit
   1868 						if(state.textureFormat == FORMAT_X8B8G8R8I)
   1869 						{
   1870 							c.x >>= 8;
   1871 							c.y >>= 8;
   1872 							c.z >>= 8;
   1873 						}
   1874 						break;
   1875 					case FORMAT_X8B8G8R8UI:
   1876 						c.z = As<Short4>(UnpackHigh(c.x, c.y));
   1877 						c.x = As<Short4>(UnpackLow(c.x, c.y));
   1878 						c.y = c.x;
   1879 						c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0)));
   1880 						c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0)));
   1881 						c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0)));
   1882 						break;
   1883 					default:
   1884 						ASSERT(false);
   1885 					}
   1886 				}
   1887 				break;
   1888 			case 2:
   1889 				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
   1890 				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
   1891 				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
   1892 				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
   1893 
   1894 				switch(state.textureFormat)
   1895 				{
   1896 				case FORMAT_G8R8:
   1897 				case FORMAT_G8R8_SNORM:
   1898 				case FORMAT_V8U8:
   1899 				case FORMAT_A8L8:
   1900 					c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8);
   1901 					c.x = (c.x & Short4(0x00FFu)) | (c.x << 8);
   1902 					break;
   1903 				case FORMAT_G8R8I:
   1904 					c.y = c.x >> 8;
   1905 					c.x = (c.x << 8) >> 8; // Propagate sign bit
   1906 					break;
   1907 				case FORMAT_G8R8UI:
   1908 					c.y = As<Short4>(As<UShort4>(c.x) >> 8);
   1909 					c.x &= Short4(0x00FFu);
   1910 					break;
   1911 				default:
   1912 					ASSERT(false);
   1913 				}
   1914 				break;
   1915 			case 1:
   1916 				{
   1917 					Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0]));
   1918 					Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1]));
   1919 					Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2]));
   1920 					Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3]));
   1921 					c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   1922 
   1923 					switch(state.textureFormat)
   1924 					{
   1925 					case FORMAT_R8I:
   1926 					case FORMAT_R8UI:
   1927 						{
   1928 							Int zero(0);
   1929 							c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero));
   1930 							// Propagate sign bit
   1931 							if(state.textureFormat == FORMAT_R8I)
   1932 							{
   1933 								c.x = (c.x << 8) >> 8;
   1934 							}
   1935 						}
   1936 						break;
   1937 					default:
   1938 						c.x = Unpack(As<Byte4>(c0));
   1939 						break;
   1940 					}
   1941 				}
   1942 				break;
   1943 			default:
   1944 				ASSERT(false);
   1945 			}
   1946 		}
   1947 		else if(has16bitTextureComponents())
   1948 		{
   1949 			switch(textureComponentCount())
   1950 			{
   1951 			case 4:
   1952 				c.x = Pointer<Short4>(buffer[f0])[index[0]];
   1953 				c.y = Pointer<Short4>(buffer[f1])[index[1]];
   1954 				c.z = Pointer<Short4>(buffer[f2])[index[2]];
   1955 				c.w = Pointer<Short4>(buffer[f3])[index[3]];
   1956 				transpose4x4(c.x, c.y, c.z, c.w);
   1957 				break;
   1958 			case 3:
   1959 				c.x = Pointer<Short4>(buffer[f0])[index[0]];
   1960 				c.y = Pointer<Short4>(buffer[f1])[index[1]];
   1961 				c.z = Pointer<Short4>(buffer[f2])[index[2]];
   1962 				c.w = Pointer<Short4>(buffer[f3])[index[3]];
   1963 				transpose4x3(c.x, c.y, c.z, c.w);
   1964 				break;
   1965 			case 2:
   1966 				c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]);
   1967 				c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1])));
   1968 				c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]);
   1969 				c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3])));
   1970 				c.y = c.x;
   1971 				c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z));
   1972 				c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z));
   1973 				break;
   1974 			case 1:
   1975 				c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0);
   1976 				c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1);
   1977 				c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2);
   1978 				c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3);
   1979 				break;
   1980 			default:
   1981 				ASSERT(false);
   1982 			}
   1983 		}
   1984 		else ASSERT(false);
   1985 
   1986 		if(state.sRGB)
   1987 		{
   1988 			if(state.textureFormat == FORMAT_R5G6B5)
   1989 			{
   1990 				sRGBtoLinear16_5_16(c.x);
   1991 				sRGBtoLinear16_6_16(c.y);
   1992 				sRGBtoLinear16_5_16(c.z);
   1993 			}
   1994 			else
   1995 			{
   1996 				for(int i = 0; i < textureComponentCount(); i++)
   1997 				{
   1998 					if(isRGBComponent(i))
   1999 					{
   2000 						sRGBtoLinear16_8_16(c[i]);
   2001 					}
   2002 				}
   2003 			}
   2004 		}
   2005 
   2006 		return c;
   2007 	}
   2008 
   2009 	Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
   2010 	{
   2011 		Vector4s c;
   2012 
   2013 		UInt index[4];
   2014 		computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function);
   2015 
   2016 		if(hasYuvFormat())
   2017 		{
   2018 			// Generic YPbPr to RGB transformation
   2019 			// R = Y                               +           2 * (1 - Kr) * Pr
   2020 			// G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr
   2021 			// B = Y +           2 * (1 - Kb) * Pb
   2022 
   2023 			float Kb = 0.114f;
   2024 			float Kr = 0.299f;
   2025 			int studioSwing = 1;
   2026 
   2027 			switch(state.textureFormat)
   2028 			{
   2029 			case FORMAT_YV12_BT601:
   2030 				Kb = 0.114f;
   2031 				Kr = 0.299f;
   2032 				studioSwing = 1;
   2033 				break;
   2034 			case FORMAT_YV12_BT709:
   2035 				Kb = 0.0722f;
   2036 				Kr = 0.2126f;
   2037 				studioSwing = 1;
   2038 				break;
   2039 			case FORMAT_YV12_JFIF:
   2040 				Kb = 0.114f;
   2041 				Kr = 0.299f;
   2042 				studioSwing = 0;
   2043 				break;
   2044 			default:
   2045 				ASSERT(false);
   2046 			}
   2047 
   2048 			const float Kg = 1.0f - Kr - Kb;
   2049 
   2050 			const float Rr = 2 * (1 - Kr);
   2051 			const float Gb = -2 * Kb * (1 - Kb) / Kg;
   2052 			const float Gr = -2 * Kr * (1 - Kr) / Kg;
   2053 			const float Bb = 2 * (1 - Kb);
   2054 
   2055 			// Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240]
   2056 			const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f;
   2057 			const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f;
   2058 			const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f;
   2059 
   2060 			const float Rv = Vv *  Rr;
   2061 			const float Gu = Uu *  Gb;
   2062 			const float Gv = Vv *  Gr;
   2063 			const float Bu = Uu *  Bb;
   2064 
   2065 			const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255;
   2066 			const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255;
   2067 			const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255;
   2068 
   2069 			Int c0 = Int(buffer[0][index[0]]);
   2070 			Int c1 = Int(buffer[0][index[1]]);
   2071 			Int c2 = Int(buffer[0][index[2]]);
   2072 			Int c3 = Int(buffer[0][index[3]]);
   2073 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   2074 			UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0)));
   2075 
   2076 			computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function);
   2077 			c0 = Int(buffer[1][index[0]]);
   2078 			c1 = Int(buffer[1][index[1]]);
   2079 			c2 = Int(buffer[1][index[2]]);
   2080 			c3 = Int(buffer[1][index[3]]);
   2081 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   2082 			UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0)));
   2083 
   2084 			c0 = Int(buffer[2][index[0]]);
   2085 			c1 = Int(buffer[2][index[1]]);
   2086 			c2 = Int(buffer[2][index[2]]);
   2087 			c3 = Int(buffer[2][index[3]]);
   2088 			c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24);
   2089 			UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0)));
   2090 
   2091 			const UShort4 yY = UShort4(iround(Yy * 0x4000));
   2092 			const UShort4 rV = UShort4(iround(Rv * 0x4000));
   2093 			const UShort4 gU = UShort4(iround(-Gu * 0x4000));
   2094 			const UShort4 gV = UShort4(iround(-Gv * 0x4000));
   2095 			const UShort4 bU = UShort4(iround(Bu * 0x4000));
   2096 
   2097 			const UShort4 r0 = UShort4(iround(-R0 * 0x4000));
   2098 			const UShort4 g0 = UShort4(iround(G0 * 0x4000));
   2099 			const UShort4 b0 = UShort4(iround(-B0 * 0x4000));
   2100 
   2101 			UShort4 y = MulHigh(Y, yY);
   2102 			UShort4 r = SubSat(y + MulHigh(V, rV), r0);
   2103 			UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV));
   2104 			UShort4 b = SubSat(y + MulHigh(U, bU), b0);
   2105 
   2106 			c.x = Min(r, UShort4(0x3FFF)) << 2;
   2107 			c.y = Min(g, UShort4(0x3FFF)) << 2;
   2108 			c.z = Min(b, UShort4(0x3FFF)) << 2;
   2109 		}
   2110 		else
   2111 		{
   2112 			return sampleTexel(index, buffer);
   2113 		}
   2114 
   2115 		return c;
   2116 	}
   2117 
   2118 	Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function)
   2119 	{
   2120 		Vector4f c;
   2121 
   2122 		UInt index[4];
   2123 		computeIndices(index, uuuu, vvvv, wwww, mipmap, function);
   2124 
   2125 		if(hasFloatTexture() || has32bitIntegerTextureComponents())
   2126 		{
   2127 			int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0;
   2128 			int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0;
   2129 			int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0;
   2130 			int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0;
   2131 
   2132 			// Read texels
   2133 			switch(textureComponentCount())
   2134 			{
   2135 			case 4:
   2136 				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
   2137 				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
   2138 				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
   2139 				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
   2140 				transpose4x4(c.x, c.y, c.z, c.w);
   2141 				break;
   2142 			case 3:
   2143 				c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16);
   2144 				c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16);
   2145 				c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16);
   2146 				c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16);
   2147 				transpose4x3(c.x, c.y, c.z, c.w);
   2148 				break;
   2149 			case 2:
   2150 				// FIXME: Optimal shuffling?
   2151 				c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8);
   2152 				c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8);
   2153 				c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8);
   2154 				c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8);
   2155 				c.y = c.x;
   2156 				c.x = Float4(c.x.xz, c.z.xz);
   2157 				c.y = Float4(c.y.yw, c.z.yw);
   2158 				break;
   2159 			case 1:
   2160 				// FIXME: Optimal shuffling?
   2161 				c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4);
   2162 				c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4);
   2163 				c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4);
   2164 				c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4);
   2165 				break;
   2166 			default:
   2167 				ASSERT(false);
   2168 			}
   2169 
   2170 			if(state.compare != COMPARE_BYPASS)
   2171 			{
   2172 				Float4 ref = z;
   2173 
   2174 				if(!hasFloatTexture())
   2175 				{
   2176 					ref = Min(Max(ref, Float4(0.0f)), Float4(1.0f));
   2177 				}
   2178 
   2179 				Int4 boolean;
   2180 
   2181 				switch(state.compare)
   2182 				{
   2183 				case COMPARE_LESSEQUAL:    boolean = CmpLE(ref, c.x);  break;
   2184 				case COMPARE_GREATEREQUAL: boolean = CmpNLT(ref, c.x); break;
   2185 				case COMPARE_LESS:         boolean = CmpLT(ref, c.x);  break;
   2186 				case COMPARE_GREATER:      boolean = CmpNLE(ref, c.x); break;
   2187 				case COMPARE_EQUAL:        boolean = CmpEQ(ref, c.x);  break;
   2188 				case COMPARE_NOTEQUAL:     boolean = CmpNEQ(ref, c.x); break;
   2189 				case COMPARE_ALWAYS:       boolean = Int4(-1);         break;
   2190 				case COMPARE_NEVER:        boolean = Int4(0);          break;
   2191 				default:                   ASSERT(false);
   2192 				}
   2193 
   2194 				c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f)));
   2195 				c.y = Float4(0.0f);
   2196 				c.z = Float4(0.0f);
   2197 				c.w = Float4(1.0f);
   2198 			}
   2199 		}
   2200 		else
   2201 		{
   2202 			ASSERT(!hasYuvFormat());
   2203 
   2204 			Vector4s cs = sampleTexel(index, buffer);
   2205 
   2206 			bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat);
   2207 			int componentCount = textureComponentCount();
   2208 			for(int n = 0; n < componentCount; n++)
   2209 			{
   2210 				if(hasUnsignedTextureComponent(n))
   2211 				{
   2212 					if(isInteger)
   2213 					{
   2214 						c[n] = As<Float4>(Int4(As<UShort4>(cs[n])));
   2215 					}
   2216 					else
   2217 					{
   2218 						c[n] = Float4(As<UShort4>(cs[n]));
   2219 					}
   2220 				}
   2221 				else
   2222 				{
   2223 					if(isInteger)
   2224 					{
   2225 						c[n] = As<Float4>(Int4(cs[n]));
   2226 					}
   2227 					else
   2228 					{
   2229 						c[n] = Float4(cs[n]);
   2230 					}
   2231 				}
   2232 			}
   2233 		}
   2234 
   2235 		return c;
   2236 	}
   2237 
   2238 	void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD)
   2239 	{
   2240 		if(state.mipmapFilter == MIPMAP_NONE)
   2241 		{
   2242 			mipmap = texture + OFFSET(Texture,mipmap[0]);
   2243 		}
   2244 		else
   2245 		{
   2246 			Int ilod;
   2247 
   2248 			if(state.mipmapFilter == MIPMAP_POINT)
   2249 			{
   2250 				ilod = RoundInt(lod);
   2251 			}
   2252 			else   // MIPMAP_LINEAR
   2253 			{
   2254 				ilod = Int(lod);
   2255 			}
   2256 
   2257 			mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap);
   2258 		}
   2259 
   2260 		if(state.textureType != TEXTURE_CUBE)
   2261 		{
   2262 			buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0]));
   2263 
   2264 			if(hasYuvFormat())
   2265 			{
   2266 				buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1]));
   2267 				buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2]));
   2268 			}
   2269 		}
   2270 		else
   2271 		{
   2272 			for(int i = 0; i < 4; i++)
   2273 			{
   2274 				buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*));
   2275 			}
   2276 		}
   2277 	}
   2278 
   2279 	Int4 SamplerCore::computeFilterOffset(Float &lod)
   2280 	{
   2281 		Int4 filter = -1;
   2282 
   2283 		if(state.textureFilter == FILTER_POINT)
   2284 		{
   2285 			filter = 0;
   2286 		}
   2287 		else if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
   2288 		{
   2289 			filter = CmpNLE(Float4(lod), Float4(0.0f));
   2290 		}
   2291 		else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR)
   2292 		{
   2293 			filter = CmpLE(Float4(lod), Float4(0.0f));
   2294 		}
   2295 
   2296 		return filter;
   2297 	}
   2298 
   2299 	Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap)
   2300 	{
   2301 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
   2302 		{
   2303 			return Short4();   // Unused
   2304 		}
   2305 		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY)
   2306 		{
   2307 			return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1));
   2308 		}
   2309 		else if(addressingMode == ADDRESSING_CLAMP || addressingMode == ADDRESSING_BORDER)
   2310 		{
   2311 			Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f));
   2312 
   2313 			return Short4(Int4(clamp * Float4(1 << 16)));
   2314 		}
   2315 		else if(addressingMode == ADDRESSING_MIRROR)
   2316 		{
   2317 			Int4 convert = Int4(uw * Float4(1 << 16));
   2318 			Int4 mirror = (convert << 15) >> 31;
   2319 
   2320 			convert ^= mirror;
   2321 
   2322 			return Short4(convert);
   2323 		}
   2324 		else if(addressingMode == ADDRESSING_MIRRORONCE)
   2325 		{
   2326 			// Absolute value
   2327 			Int4 convert = Int4(Abs(uw * Float4(1 << 16)));
   2328 
   2329 			// Clamp
   2330 			convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000);
   2331 			convert = As<Int4>(PackSigned(convert, convert));
   2332 
   2333 			return As<Short4>(Int2(convert)) + Short4(0x8000u);
   2334 		}
   2335 		else   // Wrap
   2336 		{
   2337 			return Short4(Int4(uw * Float4(1 << 16)));
   2338 		}
   2339 	}
   2340 
   2341 	void SamplerCore::address(Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function)
   2342 	{
   2343 		if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY)
   2344 		{
   2345 			return;   // Unused
   2346 		}
   2347 
   2348 		Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16));
   2349 		Int4 maxXYZ = dim - Int4(1);
   2350 
   2351 		if(function == Fetch)
   2352 		{
   2353 			xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ);
   2354 		}
   2355 		else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY)   // Note: Offset does not apply to array layers
   2356 		{
   2357 			xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ);
   2358 		}
   2359 		else
   2360 		{
   2361 			const int halfBits = 0x3EFFFFFF;   // Value just under 0.5f
   2362 			const int oneBits  = 0x3F7FFFFF;   // Value just under 1.0f
   2363 			const int twoBits  = 0x3FFFFFFF;   // Value just under 2.0f
   2364 
   2365 			bool pointFilter = state.textureFilter == FILTER_POINT ||
   2366 			                   state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
   2367 			                   state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT;
   2368 
   2369 			Float4 coord = uvw;
   2370 
   2371 			if(state.textureType == TEXTURE_RECTANGLE)
   2372 			{
   2373 				coord = Min(Max(coord, Float4(0.0f)), Float4(dim - Int4(1)));
   2374 			}
   2375 			else
   2376 			{
   2377 				switch(addressingMode)
   2378 				{
   2379 				case ADDRESSING_CLAMP:
   2380 				case ADDRESSING_BORDER:
   2381 				case ADDRESSING_SEAMLESS:
   2382 					// Linear filtering of cube doesn't require clamping because the coordinates
   2383 					// are already in [0, 1] range and numerical imprecision is tolerated.
   2384 					if(addressingMode != ADDRESSING_SEAMLESS || pointFilter)
   2385 					{
   2386 						Float4 one = As<Float4>(Int4(oneBits));
   2387 						coord = Min(Max(coord, Float4(0.0f)), one);
   2388 					}
   2389 					break;
   2390 				case ADDRESSING_MIRROR:
   2391 				{
   2392 					Float4 half = As<Float4>(Int4(halfBits));
   2393 					Float4 one = As<Float4>(Int4(oneBits));
   2394 					Float4 two = As<Float4>(Int4(twoBits));
   2395 					coord = one - Abs(two * Frac(coord * half) - one);
   2396 				}
   2397 				break;
   2398 				case ADDRESSING_MIRRORONCE:
   2399 				{
   2400 					Float4 half = As<Float4>(Int4(halfBits));
   2401 					Float4 one = As<Float4>(Int4(oneBits));
   2402 					Float4 two = As<Float4>(Int4(twoBits));
   2403 					coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one);
   2404 				}
   2405 				break;
   2406 				default:   // Wrap
   2407 					coord = Frac(coord);
   2408 					break;
   2409 				}
   2410 
   2411 				coord = coord * Float4(dim);
   2412 			}
   2413 
   2414 			if(state.textureFilter == FILTER_POINT ||
   2415 			   state.textureFilter == FILTER_GATHER)
   2416 			{
   2417 				xyz0 = Int4(coord);
   2418 			}
   2419 			else
   2420 			{
   2421 				if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR ||
   2422 				   state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT)
   2423 				{
   2424 					coord -= As<Float4>(As<Int4>(Float4(0.5f)) & filter);
   2425 				}
   2426 				else
   2427 				{
   2428 					coord -= Float4(0.5f);
   2429 				}
   2430 
   2431 				Float4 floor = Floor(coord);
   2432 				xyz0 = Int4(floor);
   2433 				f = coord - floor;
   2434 			}
   2435 
   2436 			if(function.option == Offset)
   2437 			{
   2438 				xyz0 += As<Int4>(texOffset);
   2439 			}
   2440 
   2441 			if(addressingMode == ADDRESSING_SEAMLESS)
   2442 			{
   2443 				xyz0 += Int4(1);
   2444 			}
   2445 
   2446 			xyz1 = xyz0 - filter;   // Increment
   2447 
   2448 			if(function.option == Offset)
   2449 			{
   2450 				switch(addressingMode)
   2451 				{
   2452 				case ADDRESSING_SEAMLESS:
   2453 					ASSERT(false);   // Cube sampling doesn't support offset.
   2454 				case ADDRESSING_MIRROR:
   2455 				case ADDRESSING_MIRRORONCE:
   2456 				case ADDRESSING_BORDER:
   2457 					// FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, and ADDRESSING_BORDER.
   2458 					// Fall through to Clamp.
   2459 				case ADDRESSING_CLAMP:
   2460 					xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ);
   2461 					xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ);
   2462 					break;
   2463 				default:   // Wrap
   2464 					xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
   2465 					xyz1 = (xyz1 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim;
   2466 					break;
   2467 				}
   2468 			}
   2469 			else if(state.textureFilter != FILTER_POINT)
   2470 			{
   2471 				switch(addressingMode)
   2472 				{
   2473 				case ADDRESSING_SEAMLESS:
   2474 					break;
   2475 				case ADDRESSING_MIRROR:
   2476 				case ADDRESSING_MIRRORONCE:
   2477 				case ADDRESSING_BORDER:
   2478 				case ADDRESSING_CLAMP:
   2479 					xyz0 = Max(xyz0, Int4(0));
   2480 					xyz1 = Min(xyz1, maxXYZ);
   2481 					break;
   2482 				default:   // Wrap
   2483 					{
   2484 						Int4 under = CmpLT(xyz0, Int4(0));
   2485 						xyz0 = (under & maxXYZ) | (~under & xyz0);   // xyz < 0 ? dim - 1 : xyz   // FIXME: IfThenElse()
   2486 
   2487 						Int4 nover = CmpLT(xyz1, dim);
   2488 						xyz1 = nover & xyz1;   // xyz >= dim ? 0 : xyz
   2489 					}
   2490 					break;
   2491 				}
   2492 			}
   2493 		}
   2494 	}
   2495 
   2496 	void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf)
   2497 	{
   2498 		cs = RoundShort4(cf * Float4(0x1000));
   2499 	}
   2500 
   2501 	void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf)
   2502 	{
   2503 		convertFixed12(cs.x, cf.x);
   2504 		convertFixed12(cs.y, cf.y);
   2505 		convertFixed12(cs.z, cf.z);
   2506 		convertFixed12(cs.w, cf.w);
   2507 	}
   2508 
   2509 	void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs)
   2510 	{
   2511 		cf = Float4(cs) * Float4(1.0f / 0x0FFE);
   2512 	}
   2513 
   2514 //	void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs)
   2515 //	{
   2516 //		convertSigned12(cf.x, cs.x);
   2517 //		convertSigned12(cf.y, cs.y);
   2518 //		convertSigned12(cf.z, cs.z);
   2519 //		convertSigned12(cf.w, cs.w);
   2520 //	}
   2521 
   2522 	void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs)
   2523 	{
   2524 		cf = Float4(cs) * Float4(1.0f / 0x7FFF);
   2525 	}
   2526 
   2527 	void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs)
   2528 	{
   2529 		cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF);
   2530 	}
   2531 
   2532 	void SamplerCore::sRGBtoLinear16_8_16(Short4 &c)
   2533 	{
   2534 		c = As<UShort4>(c) >> 8;
   2535 
   2536 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16));
   2537 
   2538 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2539 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2540 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2541 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2542 	}
   2543 
   2544 	void SamplerCore::sRGBtoLinear16_6_16(Short4 &c)
   2545 	{
   2546 		c = As<UShort4>(c) >> 10;
   2547 
   2548 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_16));
   2549 
   2550 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2551 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2552 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2553 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2554 	}
   2555 
   2556 	void SamplerCore::sRGBtoLinear16_5_16(Short4 &c)
   2557 	{
   2558 		c = As<UShort4>(c) >> 11;
   2559 
   2560 		Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_16));
   2561 
   2562 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0);
   2563 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1);
   2564 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2);
   2565 		c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3);
   2566 	}
   2567 
   2568 	bool SamplerCore::hasFloatTexture() const
   2569 	{
   2570 		return Surface::isFloatFormat(state.textureFormat);
   2571 	}
   2572 
   2573 	bool SamplerCore::hasUnnormalizedIntegerTexture() const
   2574 	{
   2575 		return Surface::isNonNormalizedInteger(state.textureFormat);
   2576 	}
   2577 
   2578 	bool SamplerCore::hasUnsignedTextureComponent(int component) const
   2579 	{
   2580 		return Surface::isUnsignedComponent(state.textureFormat, component);
   2581 	}
   2582 
   2583 	int SamplerCore::textureComponentCount() const
   2584 	{
   2585 		return Surface::componentCount(state.textureFormat);
   2586 	}
   2587 
   2588 	bool SamplerCore::hasThirdCoordinate() const
   2589 	{
   2590 		return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY);
   2591 	}
   2592 
   2593 	bool SamplerCore::has16bitTextureFormat() const
   2594 	{
   2595 		switch(state.textureFormat)
   2596 		{
   2597 		case FORMAT_R5G6B5:
   2598 			return true;
   2599 		case FORMAT_R8_SNORM:
   2600 		case FORMAT_G8R8_SNORM:
   2601 		case FORMAT_X8B8G8R8_SNORM:
   2602 		case FORMAT_A8B8G8R8_SNORM:
   2603 		case FORMAT_R8I:
   2604 		case FORMAT_R8UI:
   2605 		case FORMAT_G8R8I:
   2606 		case FORMAT_G8R8UI:
   2607 		case FORMAT_X8B8G8R8I:
   2608 		case FORMAT_X8B8G8R8UI:
   2609 		case FORMAT_A8B8G8R8I:
   2610 		case FORMAT_A8B8G8R8UI:
   2611 		case FORMAT_R32I:
   2612 		case FORMAT_R32UI:
   2613 		case FORMAT_G32R32I:
   2614 		case FORMAT_G32R32UI:
   2615 		case FORMAT_X32B32G32R32I:
   2616 		case FORMAT_X32B32G32R32UI:
   2617 		case FORMAT_A32B32G32R32I:
   2618 		case FORMAT_A32B32G32R32UI:
   2619 		case FORMAT_G8R8:
   2620 		case FORMAT_X8R8G8B8:
   2621 		case FORMAT_X8B8G8R8:
   2622 		case FORMAT_A8R8G8B8:
   2623 		case FORMAT_A8B8G8R8:
   2624 		case FORMAT_SRGB8_X8:
   2625 		case FORMAT_SRGB8_A8:
   2626 		case FORMAT_V8U8:
   2627 		case FORMAT_Q8W8V8U8:
   2628 		case FORMAT_X8L8V8U8:
   2629 		case FORMAT_R32F:
   2630 		case FORMAT_G32R32F:
   2631 		case FORMAT_X32B32G32R32F:
   2632 		case FORMAT_A32B32G32R32F:
   2633 		case FORMAT_X32B32G32R32F_UNSIGNED:
   2634 		case FORMAT_A8:
   2635 		case FORMAT_R8:
   2636 		case FORMAT_L8:
   2637 		case FORMAT_A8L8:
   2638 		case FORMAT_D32F:
   2639 		case FORMAT_D32FS8:
   2640 		case FORMAT_D32F_LOCKABLE:
   2641 		case FORMAT_D32FS8_TEXTURE:
   2642 		case FORMAT_D32F_SHADOW:
   2643 		case FORMAT_D32FS8_SHADOW:
   2644 		case FORMAT_L16:
   2645 		case FORMAT_G16R16:
   2646 		case FORMAT_A16B16G16R16:
   2647 		case FORMAT_V16U16:
   2648 		case FORMAT_A16W16V16U16:
   2649 		case FORMAT_Q16W16V16U16:
   2650 		case FORMAT_R16I:
   2651 		case FORMAT_R16UI:
   2652 		case FORMAT_G16R16I:
   2653 		case FORMAT_G16R16UI:
   2654 		case FORMAT_X16B16G16R16I:
   2655 		case FORMAT_X16B16G16R16UI:
   2656 		case FORMAT_A16B16G16R16I:
   2657 		case FORMAT_A16B16G16R16UI:
   2658 		case FORMAT_YV12_BT601:
   2659 		case FORMAT_YV12_BT709:
   2660 		case FORMAT_YV12_JFIF:
   2661 			return false;
   2662 		default:
   2663 			ASSERT(false);
   2664 		}
   2665 
   2666 		return false;
   2667 	}
   2668 
   2669 	bool SamplerCore::has8bitTextureComponents() const
   2670 	{
   2671 		switch(state.textureFormat)
   2672 		{
   2673 		case FORMAT_G8R8:
   2674 		case FORMAT_X8R8G8B8:
   2675 		case FORMAT_X8B8G8R8:
   2676 		case FORMAT_A8R8G8B8:
   2677 		case FORMAT_A8B8G8R8:
   2678 		case FORMAT_SRGB8_X8:
   2679 		case FORMAT_SRGB8_A8:
   2680 		case FORMAT_V8U8:
   2681 		case FORMAT_Q8W8V8U8:
   2682 		case FORMAT_X8L8V8U8:
   2683 		case FORMAT_A8:
   2684 		case FORMAT_R8:
   2685 		case FORMAT_L8:
   2686 		case FORMAT_A8L8:
   2687 		case FORMAT_R8_SNORM:
   2688 		case FORMAT_G8R8_SNORM:
   2689 		case FORMAT_X8B8G8R8_SNORM:
   2690 		case FORMAT_A8B8G8R8_SNORM:
   2691 		case FORMAT_R8I:
   2692 		case FORMAT_R8UI:
   2693 		case FORMAT_G8R8I:
   2694 		case FORMAT_G8R8UI:
   2695 		case FORMAT_X8B8G8R8I:
   2696 		case FORMAT_X8B8G8R8UI:
   2697 		case FORMAT_A8B8G8R8I:
   2698 		case FORMAT_A8B8G8R8UI:
   2699 			return true;
   2700 		case FORMAT_R5G6B5:
   2701 		case FORMAT_R32F:
   2702 		case FORMAT_G32R32F:
   2703 		case FORMAT_X32B32G32R32F:
   2704 		case FORMAT_A32B32G32R32F:
   2705 		case FORMAT_X32B32G32R32F_UNSIGNED:
   2706 		case FORMAT_D32F:
   2707 		case FORMAT_D32FS8:
   2708 		case FORMAT_D32F_LOCKABLE:
   2709 		case FORMAT_D32FS8_TEXTURE:
   2710 		case FORMAT_D32F_SHADOW:
   2711 		case FORMAT_D32FS8_SHADOW:
   2712 		case FORMAT_L16:
   2713 		case FORMAT_G16R16:
   2714 		case FORMAT_A16B16G16R16:
   2715 		case FORMAT_V16U16:
   2716 		case FORMAT_A16W16V16U16:
   2717 		case FORMAT_Q16W16V16U16:
   2718 		case FORMAT_R32I:
   2719 		case FORMAT_R32UI:
   2720 		case FORMAT_G32R32I:
   2721 		case FORMAT_G32R32UI:
   2722 		case FORMAT_X32B32G32R32I:
   2723 		case FORMAT_X32B32G32R32UI:
   2724 		case FORMAT_A32B32G32R32I:
   2725 		case FORMAT_A32B32G32R32UI:
   2726 		case FORMAT_R16I:
   2727 		case FORMAT_R16UI:
   2728 		case FORMAT_G16R16I:
   2729 		case FORMAT_G16R16UI:
   2730 		case FORMAT_X16B16G16R16I:
   2731 		case FORMAT_X16B16G16R16UI:
   2732 		case FORMAT_A16B16G16R16I:
   2733 		case FORMAT_A16B16G16R16UI:
   2734 		case FORMAT_YV12_BT601:
   2735 		case FORMAT_YV12_BT709:
   2736 		case FORMAT_YV12_JFIF:
   2737 			return false;
   2738 		default:
   2739 			ASSERT(false);
   2740 		}
   2741 
   2742 		return false;
   2743 	}
   2744 
   2745 	bool SamplerCore::has16bitTextureComponents() const
   2746 	{
   2747 		switch(state.textureFormat)
   2748 		{
   2749 		case FORMAT_R5G6B5:
   2750 		case FORMAT_R8_SNORM:
   2751 		case FORMAT_G8R8_SNORM:
   2752 		case FORMAT_X8B8G8R8_SNORM:
   2753 		case FORMAT_A8B8G8R8_SNORM:
   2754 		case FORMAT_R8I:
   2755 		case FORMAT_R8UI:
   2756 		case FORMAT_G8R8I:
   2757 		case FORMAT_G8R8UI:
   2758 		case FORMAT_X8B8G8R8I:
   2759 		case FORMAT_X8B8G8R8UI:
   2760 		case FORMAT_A8B8G8R8I:
   2761 		case FORMAT_A8B8G8R8UI:
   2762 		case FORMAT_R32I:
   2763 		case FORMAT_R32UI:
   2764 		case FORMAT_G32R32I:
   2765 		case FORMAT_G32R32UI:
   2766 		case FORMAT_X32B32G32R32I:
   2767 		case FORMAT_X32B32G32R32UI:
   2768 		case FORMAT_A32B32G32R32I:
   2769 		case FORMAT_A32B32G32R32UI:
   2770 		case FORMAT_G8R8:
   2771 		case FORMAT_X8R8G8B8:
   2772 		case FORMAT_X8B8G8R8:
   2773 		case FORMAT_A8R8G8B8:
   2774 		case FORMAT_A8B8G8R8:
   2775 		case FORMAT_SRGB8_X8:
   2776 		case FORMAT_SRGB8_A8:
   2777 		case FORMAT_V8U8:
   2778 		case FORMAT_Q8W8V8U8:
   2779 		case FORMAT_X8L8V8U8:
   2780 		case FORMAT_R32F:
   2781 		case FORMAT_G32R32F:
   2782 		case FORMAT_X32B32G32R32F:
   2783 		case FORMAT_A32B32G32R32F:
   2784 		case FORMAT_X32B32G32R32F_UNSIGNED:
   2785 		case FORMAT_A8:
   2786 		case FORMAT_R8:
   2787 		case FORMAT_L8:
   2788 		case FORMAT_A8L8:
   2789 		case FORMAT_D32F:
   2790 		case FORMAT_D32FS8:
   2791 		case FORMAT_D32F_LOCKABLE:
   2792 		case FORMAT_D32FS8_TEXTURE:
   2793 		case FORMAT_D32F_SHADOW:
   2794 		case FORMAT_D32FS8_SHADOW:
   2795 		case FORMAT_YV12_BT601:
   2796 		case FORMAT_YV12_BT709:
   2797 		case FORMAT_YV12_JFIF:
   2798 			return false;
   2799 		case FORMAT_L16:
   2800 		case FORMAT_G16R16:
   2801 		case FORMAT_A16B16G16R16:
   2802 		case FORMAT_R16I:
   2803 		case FORMAT_R16UI:
   2804 		case FORMAT_G16R16I:
   2805 		case FORMAT_G16R16UI:
   2806 		case FORMAT_X16B16G16R16I:
   2807 		case FORMAT_X16B16G16R16UI:
   2808 		case FORMAT_A16B16G16R16I:
   2809 		case FORMAT_A16B16G16R16UI:
   2810 		case FORMAT_V16U16:
   2811 		case FORMAT_A16W16V16U16:
   2812 		case FORMAT_Q16W16V16U16:
   2813 			return true;
   2814 		default:
   2815 			ASSERT(false);
   2816 		}
   2817 
   2818 		return false;
   2819 	}
   2820 
   2821 	bool SamplerCore::has32bitIntegerTextureComponents() const
   2822 	{
   2823 		switch(state.textureFormat)
   2824 		{
   2825 		case FORMAT_R5G6B5:
   2826 		case FORMAT_R8_SNORM:
   2827 		case FORMAT_G8R8_SNORM:
   2828 		case FORMAT_X8B8G8R8_SNORM:
   2829 		case FORMAT_A8B8G8R8_SNORM:
   2830 		case FORMAT_R8I:
   2831 		case FORMAT_R8UI:
   2832 		case FORMAT_G8R8I:
   2833 		case FORMAT_G8R8UI:
   2834 		case FORMAT_X8B8G8R8I:
   2835 		case FORMAT_X8B8G8R8UI:
   2836 		case FORMAT_A8B8G8R8I:
   2837 		case FORMAT_A8B8G8R8UI:
   2838 		case FORMAT_G8R8:
   2839 		case FORMAT_X8R8G8B8:
   2840 		case FORMAT_X8B8G8R8:
   2841 		case FORMAT_A8R8G8B8:
   2842 		case FORMAT_A8B8G8R8:
   2843 		case FORMAT_SRGB8_X8:
   2844 		case FORMAT_SRGB8_A8:
   2845 		case FORMAT_V8U8:
   2846 		case FORMAT_Q8W8V8U8:
   2847 		case FORMAT_X8L8V8U8:
   2848 		case FORMAT_L16:
   2849 		case FORMAT_G16R16:
   2850 		case FORMAT_A16B16G16R16:
   2851 		case FORMAT_R16I:
   2852 		case FORMAT_R16UI:
   2853 		case FORMAT_G16R16I:
   2854 		case FORMAT_G16R16UI:
   2855 		case FORMAT_X16B16G16R16I:
   2856 		case FORMAT_X16B16G16R16UI:
   2857 		case FORMAT_A16B16G16R16I:
   2858 		case FORMAT_A16B16G16R16UI:
   2859 		case FORMAT_V16U16:
   2860 		case FORMAT_A16W16V16U16:
   2861 		case FORMAT_Q16W16V16U16:
   2862 		case FORMAT_R32F:
   2863 		case FORMAT_G32R32F:
   2864 		case FORMAT_X32B32G32R32F:
   2865 		case FORMAT_A32B32G32R32F:
   2866 		case FORMAT_X32B32G32R32F_UNSIGNED:
   2867 		case FORMAT_A8:
   2868 		case FORMAT_R8:
   2869 		case FORMAT_L8:
   2870 		case FORMAT_A8L8:
   2871 		case FORMAT_D32F:
   2872 		case FORMAT_D32FS8:
   2873 		case FORMAT_D32F_LOCKABLE:
   2874 		case FORMAT_D32FS8_TEXTURE:
   2875 		case FORMAT_D32F_SHADOW:
   2876 		case FORMAT_D32FS8_SHADOW:
   2877 		case FORMAT_YV12_BT601:
   2878 		case FORMAT_YV12_BT709:
   2879 		case FORMAT_YV12_JFIF:
   2880 			return false;
   2881 		case FORMAT_R32I:
   2882 		case FORMAT_R32UI:
   2883 		case FORMAT_G32R32I:
   2884 		case FORMAT_G32R32UI:
   2885 		case FORMAT_X32B32G32R32I:
   2886 		case FORMAT_X32B32G32R32UI:
   2887 		case FORMAT_A32B32G32R32I:
   2888 		case FORMAT_A32B32G32R32UI:
   2889 			return true;
   2890 		default:
   2891 			ASSERT(false);
   2892 		}
   2893 
   2894 		return false;
   2895 	}
   2896 
   2897 	bool SamplerCore::hasYuvFormat() const
   2898 	{
   2899 		switch(state.textureFormat)
   2900 		{
   2901 		case FORMAT_YV12_BT601:
   2902 		case FORMAT_YV12_BT709:
   2903 		case FORMAT_YV12_JFIF:
   2904 			return true;
   2905 		case FORMAT_R5G6B5:
   2906 		case FORMAT_R8_SNORM:
   2907 		case FORMAT_G8R8_SNORM:
   2908 		case FORMAT_X8B8G8R8_SNORM:
   2909 		case FORMAT_A8B8G8R8_SNORM:
   2910 		case FORMAT_R8I:
   2911 		case FORMAT_R8UI:
   2912 		case FORMAT_G8R8I:
   2913 		case FORMAT_G8R8UI:
   2914 		case FORMAT_X8B8G8R8I:
   2915 		case FORMAT_X8B8G8R8UI:
   2916 		case FORMAT_A8B8G8R8I:
   2917 		case FORMAT_A8B8G8R8UI:
   2918 		case FORMAT_R32I:
   2919 		case FORMAT_R32UI:
   2920 		case FORMAT_G32R32I:
   2921 		case FORMAT_G32R32UI:
   2922 		case FORMAT_X32B32G32R32I:
   2923 		case FORMAT_X32B32G32R32UI:
   2924 		case FORMAT_A32B32G32R32I:
   2925 		case FORMAT_A32B32G32R32UI:
   2926 		case FORMAT_G8R8:
   2927 		case FORMAT_X8R8G8B8:
   2928 		case FORMAT_X8B8G8R8:
   2929 		case FORMAT_A8R8G8B8:
   2930 		case FORMAT_A8B8G8R8:
   2931 		case FORMAT_SRGB8_X8:
   2932 		case FORMAT_SRGB8_A8:
   2933 		case FORMAT_V8U8:
   2934 		case FORMAT_Q8W8V8U8:
   2935 		case FORMAT_X8L8V8U8:
   2936 		case FORMAT_R32F:
   2937 		case FORMAT_G32R32F:
   2938 		case FORMAT_X32B32G32R32F:
   2939 		case FORMAT_A32B32G32R32F:
   2940 		case FORMAT_X32B32G32R32F_UNSIGNED:
   2941 		case FORMAT_A8:
   2942 		case FORMAT_R8:
   2943 		case FORMAT_L8:
   2944 		case FORMAT_A8L8:
   2945 		case FORMAT_D32F:
   2946 		case FORMAT_D32FS8:
   2947 		case FORMAT_D32F_LOCKABLE:
   2948 		case FORMAT_D32FS8_TEXTURE:
   2949 		case FORMAT_D32F_SHADOW:
   2950 		case FORMAT_D32FS8_SHADOW:
   2951 		case FORMAT_L16:
   2952 		case FORMAT_G16R16:
   2953 		case FORMAT_A16B16G16R16:
   2954 		case FORMAT_R16I:
   2955 		case FORMAT_R16UI:
   2956 		case FORMAT_G16R16I:
   2957 		case FORMAT_G16R16UI:
   2958 		case FORMAT_X16B16G16R16I:
   2959 		case FORMAT_X16B16G16R16UI:
   2960 		case FORMAT_A16B16G16R16I:
   2961 		case FORMAT_A16B16G16R16UI:
   2962 		case FORMAT_V16U16:
   2963 		case FORMAT_A16W16V16U16:
   2964 		case FORMAT_Q16W16V16U16:
   2965 			return false;
   2966 		default:
   2967 			ASSERT(false);
   2968 		}
   2969 
   2970 		return false;
   2971 	}
   2972 
   2973 	bool SamplerCore::isRGBComponent(int component) const
   2974 	{
   2975 		switch(state.textureFormat)
   2976 		{
   2977 		case FORMAT_R5G6B5:         return component < 3;
   2978 		case FORMAT_R8_SNORM:      return component < 1;
   2979 		case FORMAT_G8R8_SNORM:    return component < 2;
   2980 		case FORMAT_X8B8G8R8_SNORM: return component < 3;
   2981 		case FORMAT_A8B8G8R8_SNORM: return component < 3;
   2982 		case FORMAT_R8I:            return component < 1;
   2983 		case FORMAT_R8UI:           return component < 1;
   2984 		case FORMAT_G8R8I:          return component < 2;
   2985 		case FORMAT_G8R8UI:         return component < 2;
   2986 		case FORMAT_X8B8G8R8I:      return component < 3;
   2987 		case FORMAT_X8B8G8R8UI:     return component < 3;
   2988 		case FORMAT_A8B8G8R8I:      return component < 3;
   2989 		case FORMAT_A8B8G8R8UI:     return component < 3;
   2990 		case FORMAT_R32I:           return component < 1;
   2991 		case FORMAT_R32UI:          return component < 1;
   2992 		case FORMAT_G32R32I:        return component < 2;
   2993 		case FORMAT_G32R32UI:       return component < 2;
   2994 		case FORMAT_X32B32G32R32I:  return component < 3;
   2995 		case FORMAT_X32B32G32R32UI: return component < 3;
   2996 		case FORMAT_A32B32G32R32I:  return component < 3;
   2997 		case FORMAT_A32B32G32R32UI: return component < 3;
   2998 		case FORMAT_G8R8:           return component < 2;
   2999 		case FORMAT_X8R8G8B8:       return component < 3;
   3000 		case FORMAT_X8B8G8R8:       return component < 3;
   3001 		case FORMAT_A8R8G8B8:       return component < 3;
   3002 		case FORMAT_A8B8G8R8:       return component < 3;
   3003 		case FORMAT_SRGB8_X8:       return component < 3;
   3004 		case FORMAT_SRGB8_A8:       return component < 3;
   3005 		case FORMAT_V8U8:           return false;
   3006 		case FORMAT_Q8W8V8U8:       return false;
   3007 		case FORMAT_X8L8V8U8:       return false;
   3008 		case FORMAT_R32F:           return component < 1;
   3009 		case FORMAT_G32R32F:        return component < 2;
   3010 		case FORMAT_X32B32G32R32F:  return component < 3;
   3011 		case FORMAT_A32B32G32R32F:  return component < 3;
   3012 		case FORMAT_X32B32G32R32F_UNSIGNED: return component < 3;
   3013 		case FORMAT_A8:             return false;
   3014 		case FORMAT_R8:             return component < 1;
   3015 		case FORMAT_L8:             return component < 1;
   3016 		case FORMAT_A8L8:           return component < 1;
   3017 		case FORMAT_D32F:           return false;
   3018 		case FORMAT_D32FS8:         return false;
   3019 		case FORMAT_D32F_LOCKABLE:  return false;
   3020 		case FORMAT_D32FS8_TEXTURE: return false;
   3021 		case FORMAT_D32F_SHADOW:    return false;
   3022 		case FORMAT_D32FS8_SHADOW:  return false;
   3023 		case FORMAT_L16:            return component < 1;
   3024 		case FORMAT_G16R16:         return component < 2;
   3025 		case FORMAT_A16B16G16R16:   return component < 3;
   3026 		case FORMAT_R16I:           return component < 1;
   3027 		case FORMAT_R16UI:          return component < 1;
   3028 		case FORMAT_G16R16I:        return component < 2;
   3029 		case FORMAT_G16R16UI:       return component < 2;
   3030 		case FORMAT_X16B16G16R16I:  return component < 3;
   3031 		case FORMAT_X16B16G16R16UI: return component < 3;
   3032 		case FORMAT_A16B16G16R16I:  return component < 3;
   3033 		case FORMAT_A16B16G16R16UI: return component < 3;
   3034 		case FORMAT_V16U16:         return false;
   3035 		case FORMAT_A16W16V16U16:   return false;
   3036 		case FORMAT_Q16W16V16U16:   return false;
   3037 		case FORMAT_YV12_BT601:     return component < 3;
   3038 		case FORMAT_YV12_BT709:     return component < 3;
   3039 		case FORMAT_YV12_JFIF:      return component < 3;
   3040 		default:
   3041 			ASSERT(false);
   3042 		}
   3043 
   3044 		return false;
   3045 	}
   3046 }
   3047