1 // Copyright 2016 The SwiftShader Authors. All Rights Reserved. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 #include "SamplerCore.hpp" 16 17 #include "Constants.hpp" 18 #include "Common/Debug.hpp" 19 20 namespace 21 { 22 void applySwizzle(sw::SwizzleType swizzle, sw::Short4& s, const sw::Vector4s& c) 23 { 24 switch(swizzle) 25 { 26 case sw::SWIZZLE_RED: s = c.x; break; 27 case sw::SWIZZLE_GREEN: s = c.y; break; 28 case sw::SWIZZLE_BLUE: s = c.z; break; 29 case sw::SWIZZLE_ALPHA: s = c.w; break; 30 case sw::SWIZZLE_ZERO: s = sw::Short4(0x0000); break; 31 case sw::SWIZZLE_ONE: s = sw::Short4(0x1000); break; 32 default: ASSERT(false); 33 } 34 } 35 36 void applySwizzle(sw::SwizzleType swizzle, sw::Float4& f, const sw::Vector4f& c) 37 { 38 switch(swizzle) 39 { 40 case sw::SWIZZLE_RED: f = c.x; break; 41 case sw::SWIZZLE_GREEN: f = c.y; break; 42 case sw::SWIZZLE_BLUE: f = c.z; break; 43 case sw::SWIZZLE_ALPHA: f = c.w; break; 44 case sw::SWIZZLE_ZERO: f = sw::Float4(0.0f, 0.0f, 0.0f, 0.0f); break; 45 case sw::SWIZZLE_ONE: f = sw::Float4(1.0f, 1.0f, 1.0f, 1.0f); break; 46 default: ASSERT(false); 47 } 48 } 49 } 50 51 namespace sw 52 { 53 extern bool colorsDefaultToZero; 54 55 SamplerCore::SamplerCore(Pointer<Byte> &constants, const Sampler::State &state) : constants(constants), state(state) 56 { 57 } 58 59 Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy) 60 { 61 return sampleTexture(texture, u, v, w, q, q, dsx, dsy, (dsx), Implicit, true); 62 } 63 64 Vector4s SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function, bool fixed12) 65 { 66 Vector4s c; 67 68 #if PERF_PROFILE 69 AddAtomic(Pointer<Long>(&profiler.texOperations), 4); 70 71 if(state.compressedFormat) 72 { 73 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4); 74 } 75 #endif 76 77 if(state.textureType == TEXTURE_NULL) 78 { 79 c.x = Short4(0x0000); 80 c.y = Short4(0x0000); 81 c.z = Short4(0x0000); 82 83 if(fixed12) // FIXME: Convert to fixed12 at higher level, when required 84 { 85 c.w = Short4(0x1000); 86 } 87 else 88 { 89 c.w = Short4(0xFFFFu); // FIXME 90 } 91 } 92 else 93 { 94 Float4 uuuu = u; 95 Float4 vvvv = v; 96 Float4 wwww = w; 97 Float4 qqqq = q; 98 99 Int face[4]; 100 Float lod; 101 Float anisotropy; 102 Float4 uDelta; 103 Float4 vDelta; 104 105 if(state.textureType != TEXTURE_3D) 106 { 107 if(state.textureType != TEXTURE_CUBE) 108 { 109 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function); 110 } 111 else 112 { 113 Float4 M; 114 cubeFace(face, uuuu, vvvv, u, v, w, M); 115 computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function); 116 } 117 } 118 else 119 { 120 computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function); 121 } 122 123 if(!hasFloatTexture()) 124 { 125 c = sampleFilter(texture, uuuu, vvvv, wwww, offset, lod, anisotropy, uDelta, vDelta, face, function); 126 } 127 else 128 { 129 Vector4f cf = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function); 130 131 convertFixed12(c, cf); 132 } 133 134 if(fixed12) 135 { 136 if(!hasFloatTexture()) 137 { 138 if(state.textureFormat == FORMAT_R5G6B5) 139 { 140 c.x = MulHigh(As<UShort4>(c.x), UShort4(0x10000000 / 0xF800)); 141 c.y = MulHigh(As<UShort4>(c.y), UShort4(0x10000000 / 0xFC00)); 142 c.z = MulHigh(As<UShort4>(c.z), UShort4(0x10000000 / 0xF800)); 143 } 144 else 145 { 146 for(int component = 0; component < textureComponentCount(); component++) 147 { 148 if(hasUnsignedTextureComponent(component)) 149 { 150 c[component] = As<UShort4>(c[component]) >> 4; 151 } 152 else 153 { 154 c[component] = c[component] >> 3; 155 } 156 } 157 } 158 } 159 160 if(state.textureFilter != FILTER_GATHER) 161 { 162 int componentCount = textureComponentCount(); 163 short defaultColorValue = colorsDefaultToZero ? 0x0000 : 0x1000; 164 165 switch(state.textureFormat) 166 { 167 case FORMAT_R8_SNORM: 168 case FORMAT_G8R8_SNORM: 169 case FORMAT_X8B8G8R8_SNORM: 170 case FORMAT_A8B8G8R8_SNORM: 171 case FORMAT_R8: 172 case FORMAT_R5G6B5: 173 case FORMAT_G8R8: 174 case FORMAT_R8I: 175 case FORMAT_R8UI: 176 case FORMAT_G8R8I: 177 case FORMAT_G8R8UI: 178 case FORMAT_X8B8G8R8I: 179 case FORMAT_X8B8G8R8UI: 180 case FORMAT_A8B8G8R8I: 181 case FORMAT_A8B8G8R8UI: 182 case FORMAT_R16I: 183 case FORMAT_R16UI: 184 case FORMAT_G16R16: 185 case FORMAT_G16R16I: 186 case FORMAT_G16R16UI: 187 case FORMAT_X16B16G16R16I: 188 case FORMAT_X16B16G16R16UI: 189 case FORMAT_A16B16G16R16: 190 case FORMAT_A16B16G16R16I: 191 case FORMAT_A16B16G16R16UI: 192 case FORMAT_R32I: 193 case FORMAT_R32UI: 194 case FORMAT_G32R32I: 195 case FORMAT_G32R32UI: 196 case FORMAT_X32B32G32R32I: 197 case FORMAT_X32B32G32R32UI: 198 case FORMAT_A32B32G32R32I: 199 case FORMAT_A32B32G32R32UI: 200 case FORMAT_X8R8G8B8: 201 case FORMAT_X8B8G8R8: 202 case FORMAT_A8R8G8B8: 203 case FORMAT_A8B8G8R8: 204 case FORMAT_SRGB8_X8: 205 case FORMAT_SRGB8_A8: 206 case FORMAT_V8U8: 207 case FORMAT_Q8W8V8U8: 208 case FORMAT_X8L8V8U8: 209 case FORMAT_V16U16: 210 case FORMAT_A16W16V16U16: 211 case FORMAT_Q16W16V16U16: 212 case FORMAT_YV12_BT601: 213 case FORMAT_YV12_BT709: 214 case FORMAT_YV12_JFIF: 215 if(componentCount < 2) c.y = Short4(defaultColorValue); 216 if(componentCount < 3) c.z = Short4(defaultColorValue); 217 if(componentCount < 4) c.w = Short4(0x1000); 218 break; 219 case FORMAT_A8: 220 c.w = c.x; 221 c.x = Short4(0x0000); 222 c.y = Short4(0x0000); 223 c.z = Short4(0x0000); 224 break; 225 case FORMAT_L8: 226 case FORMAT_L16: 227 c.y = c.x; 228 c.z = c.x; 229 c.w = Short4(0x1000); 230 break; 231 case FORMAT_A8L8: 232 c.w = c.y; 233 c.y = c.x; 234 c.z = c.x; 235 break; 236 case FORMAT_R32F: 237 c.y = Short4(defaultColorValue); 238 case FORMAT_G32R32F: 239 c.z = Short4(defaultColorValue); 240 case FORMAT_X32B32G32R32F: 241 case FORMAT_X32B32G32R32F_UNSIGNED: 242 c.w = Short4(0x1000); 243 case FORMAT_A32B32G32R32F: 244 break; 245 case FORMAT_D32F: 246 case FORMAT_D32FS8: 247 case FORMAT_D32F_LOCKABLE: 248 case FORMAT_D32FS8_TEXTURE: 249 case FORMAT_D32F_SHADOW: 250 case FORMAT_D32FS8_SHADOW: 251 c.y = c.x; 252 c.z = c.x; 253 c.w = c.x; 254 break; 255 default: 256 ASSERT(false); 257 } 258 } 259 260 if((state.swizzleR != SWIZZLE_RED) || 261 (state.swizzleG != SWIZZLE_GREEN) || 262 (state.swizzleB != SWIZZLE_BLUE) || 263 (state.swizzleA != SWIZZLE_ALPHA)) 264 { 265 const Vector4s col(c); 266 applySwizzle(state.swizzleR, c.x, col); 267 applySwizzle(state.swizzleG, c.y, col); 268 applySwizzle(state.swizzleB, c.z, col); 269 applySwizzle(state.swizzleA, c.w, col); 270 } 271 } 272 } 273 274 return c; 275 } 276 277 Vector4f SamplerCore::sampleTexture(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Float4 &bias, Vector4f &dsx, Vector4f &dsy, Vector4f &offset, SamplerFunction function) 278 { 279 Vector4f c; 280 281 #if PERF_PROFILE 282 AddAtomic(Pointer<Long>(&profiler.texOperations), 4); 283 284 if(state.compressedFormat) 285 { 286 AddAtomic(Pointer<Long>(&profiler.compressedTex), 4); 287 } 288 #endif 289 290 if(state.textureType == TEXTURE_NULL) 291 { 292 c.x = Float4(0.0f); 293 c.y = Float4(0.0f); 294 c.z = Float4(0.0f); 295 c.w = Float4(1.0f); 296 } 297 else 298 { 299 // FIXME: YUV is not supported by the floating point path 300 bool forceFloatFiltering = state.highPrecisionFiltering && !hasYuvFormat() && (state.textureFilter != FILTER_POINT); 301 bool seamlessCube = (state.addressingModeU == ADDRESSING_SEAMLESS); 302 bool rectangleTexture = (state.textureType == TEXTURE_RECTANGLE); 303 if(hasFloatTexture() || hasUnnormalizedIntegerTexture() || forceFloatFiltering || seamlessCube || rectangleTexture) // FIXME: Mostly identical to integer sampling 304 { 305 Float4 uuuu = u; 306 Float4 vvvv = v; 307 Float4 wwww = w; 308 Float4 qqqq = q; 309 310 Int face[4]; 311 Float lod; 312 Float anisotropy; 313 Float4 uDelta; 314 Float4 vDelta; 315 316 if(state.textureType != TEXTURE_3D) 317 { 318 if(state.textureType != TEXTURE_CUBE) 319 { 320 computeLod(texture, lod, anisotropy, uDelta, vDelta, uuuu, vvvv, bias.x, dsx, dsy, function); 321 } 322 else 323 { 324 Float4 M; 325 cubeFace(face, uuuu, vvvv, u, v, w, M); 326 computeLodCube(texture, lod, u, v, w, bias.x, dsx, dsy, M, function); 327 } 328 } 329 else 330 { 331 computeLod3D(texture, lod, uuuu, vvvv, wwww, bias.x, dsx, dsy, function); 332 } 333 334 c = sampleFloatFilter(texture, uuuu, vvvv, wwww, qqqq, offset, lod, anisotropy, uDelta, vDelta, face, function); 335 336 if(!hasFloatTexture() && !hasUnnormalizedIntegerTexture()) 337 { 338 if(has16bitTextureFormat()) 339 { 340 switch(state.textureFormat) 341 { 342 case FORMAT_R5G6B5: 343 c.x *= Float4(1.0f / 0xF800); 344 c.y *= Float4(1.0f / 0xFC00); 345 c.z *= Float4(1.0f / 0xF800); 346 break; 347 default: 348 ASSERT(false); 349 } 350 } 351 else 352 { 353 for(int component = 0; component < textureComponentCount(); component++) 354 { 355 c[component] *= Float4(hasUnsignedTextureComponent(component) ? 1.0f / 0xFFFF : 1.0f / 0x7FFF); 356 } 357 } 358 } 359 } 360 else 361 { 362 Vector4s cs = sampleTexture(texture, u, v, w, q, bias, dsx, dsy, offset, function, false); 363 364 if(state.textureFormat == FORMAT_R5G6B5) 365 { 366 c.x = Float4(As<UShort4>(cs.x)) * Float4(1.0f / 0xF800); 367 c.y = Float4(As<UShort4>(cs.y)) * Float4(1.0f / 0xFC00); 368 c.z = Float4(As<UShort4>(cs.z)) * Float4(1.0f / 0xF800); 369 } 370 else 371 { 372 for(int component = 0; component < textureComponentCount(); component++) 373 { 374 if(hasUnsignedTextureComponent(component)) 375 { 376 convertUnsigned16(c[component], cs[component]); 377 } 378 else 379 { 380 convertSigned15(c[component], cs[component]); 381 } 382 } 383 } 384 } 385 386 int componentCount = textureComponentCount(); 387 float defaultColorValue = colorsDefaultToZero ? 0.0f : 1.0f; 388 389 if(state.textureFilter != FILTER_GATHER) 390 { 391 switch(state.textureFormat) 392 { 393 case FORMAT_R8I: 394 case FORMAT_R8UI: 395 case FORMAT_R16I: 396 case FORMAT_R16UI: 397 case FORMAT_R32I: 398 case FORMAT_R32UI: 399 c.y = As<Float4>(UInt4(0)); 400 case FORMAT_G8R8I: 401 case FORMAT_G8R8UI: 402 case FORMAT_G16R16I: 403 case FORMAT_G16R16UI: 404 case FORMAT_G32R32I: 405 case FORMAT_G32R32UI: 406 c.z = As<Float4>(UInt4(0)); 407 case FORMAT_X8B8G8R8I: 408 case FORMAT_X8B8G8R8UI: 409 case FORMAT_X16B16G16R16I: 410 case FORMAT_X16B16G16R16UI: 411 case FORMAT_X32B32G32R32I: 412 case FORMAT_X32B32G32R32UI: 413 c.w = As<Float4>(UInt4(1)); 414 case FORMAT_A8B8G8R8I: 415 case FORMAT_A8B8G8R8UI: 416 case FORMAT_A16B16G16R16I: 417 case FORMAT_A16B16G16R16UI: 418 case FORMAT_A32B32G32R32I: 419 case FORMAT_A32B32G32R32UI: 420 break; 421 case FORMAT_R8_SNORM: 422 case FORMAT_G8R8_SNORM: 423 case FORMAT_X8B8G8R8_SNORM: 424 case FORMAT_A8B8G8R8_SNORM: 425 case FORMAT_R8: 426 case FORMAT_R5G6B5: 427 case FORMAT_G8R8: 428 case FORMAT_G16R16: 429 case FORMAT_A16B16G16R16: 430 case FORMAT_X8R8G8B8: 431 case FORMAT_X8B8G8R8: 432 case FORMAT_A8R8G8B8: 433 case FORMAT_A8B8G8R8: 434 case FORMAT_SRGB8_X8: 435 case FORMAT_SRGB8_A8: 436 case FORMAT_V8U8: 437 case FORMAT_Q8W8V8U8: 438 case FORMAT_X8L8V8U8: 439 case FORMAT_V16U16: 440 case FORMAT_A16W16V16U16: 441 case FORMAT_Q16W16V16U16: 442 case FORMAT_YV12_BT601: 443 case FORMAT_YV12_BT709: 444 case FORMAT_YV12_JFIF: 445 if(componentCount < 2) c.y = Float4(defaultColorValue); 446 if(componentCount < 3) c.z = Float4(defaultColorValue); 447 if(componentCount < 4) c.w = Float4(1.0f); 448 break; 449 case FORMAT_A8: 450 c.w = c.x; 451 c.x = Float4(0.0f); 452 c.y = Float4(0.0f); 453 c.z = Float4(0.0f); 454 break; 455 case FORMAT_L8: 456 case FORMAT_L16: 457 c.y = c.x; 458 c.z = c.x; 459 c.w = Float4(1.0f); 460 break; 461 case FORMAT_A8L8: 462 c.w = c.y; 463 c.y = c.x; 464 c.z = c.x; 465 break; 466 case FORMAT_R32F: 467 c.y = Float4(defaultColorValue); 468 case FORMAT_G32R32F: 469 c.z = Float4(defaultColorValue); 470 case FORMAT_X32B32G32R32F: 471 case FORMAT_X32B32G32R32F_UNSIGNED: 472 c.w = Float4(1.0f); 473 case FORMAT_A32B32G32R32F: 474 break; 475 case FORMAT_D32F: 476 case FORMAT_D32FS8: 477 case FORMAT_D32F_LOCKABLE: 478 case FORMAT_D32FS8_TEXTURE: 479 case FORMAT_D32F_SHADOW: 480 case FORMAT_D32FS8_SHADOW: 481 c.y = Float4(0.0f); 482 c.z = Float4(0.0f); 483 c.w = Float4(1.0f); 484 break; 485 default: 486 ASSERT(false); 487 } 488 } 489 490 if((state.swizzleR != SWIZZLE_RED) || 491 (state.swizzleG != SWIZZLE_GREEN) || 492 (state.swizzleB != SWIZZLE_BLUE) || 493 (state.swizzleA != SWIZZLE_ALPHA)) 494 { 495 const Vector4f col(c); 496 applySwizzle(state.swizzleR, c.x, col); 497 applySwizzle(state.swizzleG, c.y, col); 498 applySwizzle(state.swizzleB, c.z, col); 499 applySwizzle(state.swizzleA, c.w, col); 500 } 501 } 502 503 return c; 504 } 505 506 Vector4f SamplerCore::textureSize(Pointer<Byte> &texture, Float4 &lod) 507 { 508 Vector4f size; 509 510 for(int i = 0; i < 4; ++i) 511 { 512 Int baseLevel = *Pointer<Int>(texture + OFFSET(Texture, baseLevel)); 513 Pointer<Byte> mipmap = texture + OFFSET(Texture, mipmap) + (As<Int>(Extract(lod, i)) + baseLevel) * sizeof(Mipmap); 514 size.x = Insert(size.x, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, width)))), i); 515 size.y = Insert(size.y, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, height)))), i); 516 size.z = Insert(size.z, As<Float>(Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth)))), i); 517 } 518 519 return size; 520 } 521 522 void SamplerCore::border(Short4 &mask, Float4 &coordinates) 523 { 524 Int4 border = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f))); 525 mask = As<Short4>(Int2(As<Int4>(PackSigned(border, border)))); 526 } 527 528 void SamplerCore::border(Int4 &mask, Float4 &coordinates) 529 { 530 mask = As<Int4>(CmpLT(Abs(coordinates - Float4(0.5f)), Float4(0.5f))); 531 } 532 533 Short4 SamplerCore::offsetSample(Short4 &uvw, Pointer<Byte> &mipmap, int halfOffset, bool wrap, int count, Float &lod) 534 { 535 Short4 offset = *Pointer<Short4>(mipmap + halfOffset); 536 537 if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 538 { 539 offset &= Short4(CmpNLE(Float4(lod), Float4(0.0f))); 540 } 541 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR) 542 { 543 offset &= Short4(CmpLE(Float4(lod), Float4(0.0f))); 544 } 545 546 if(wrap) 547 { 548 switch(count) 549 { 550 case -1: return uvw - offset; 551 case 0: return uvw; 552 case +1: return uvw + offset; 553 case 2: return uvw + offset + offset; 554 } 555 } 556 else // Clamp or mirror 557 { 558 switch(count) 559 { 560 case -1: return SubSat(As<UShort4>(uvw), As<UShort4>(offset)); 561 case 0: return uvw; 562 case +1: return AddSat(As<UShort4>(uvw), As<UShort4>(offset)); 563 case 2: return AddSat(AddSat(As<UShort4>(uvw), As<UShort4>(offset)), As<UShort4>(offset)); 564 } 565 } 566 567 return uvw; 568 } 569 570 Vector4s SamplerCore::sampleFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) 571 { 572 Vector4s c = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, false, function); 573 574 if(function == Fetch) 575 { 576 return c; 577 } 578 579 if(state.mipmapFilter == MIPMAP_LINEAR) 580 { 581 Vector4s cc = sampleAniso(texture, u, v, w, offset, lod, anisotropy, uDelta, vDelta, face, true, function); 582 583 lod *= Float(1 << 16); 584 585 UShort4 utri = UShort4(Float4(lod)); // FIXME: Optimize 586 Short4 stri = utri >> 1; // FIXME: Optimize 587 588 if(hasUnsignedTextureComponent(0)) cc.x = MulHigh(As<UShort4>(cc.x), utri); else cc.x = MulHigh(cc.x, stri); 589 if(hasUnsignedTextureComponent(1)) cc.y = MulHigh(As<UShort4>(cc.y), utri); else cc.y = MulHigh(cc.y, stri); 590 if(hasUnsignedTextureComponent(2)) cc.z = MulHigh(As<UShort4>(cc.z), utri); else cc.z = MulHigh(cc.z, stri); 591 if(hasUnsignedTextureComponent(3)) cc.w = MulHigh(As<UShort4>(cc.w), utri); else cc.w = MulHigh(cc.w, stri); 592 593 utri = ~utri; 594 stri = Short4(0x7FFF) - stri; 595 596 if(hasUnsignedTextureComponent(0)) c.x = MulHigh(As<UShort4>(c.x), utri); else c.x = MulHigh(c.x, stri); 597 if(hasUnsignedTextureComponent(1)) c.y = MulHigh(As<UShort4>(c.y), utri); else c.y = MulHigh(c.y, stri); 598 if(hasUnsignedTextureComponent(2)) c.z = MulHigh(As<UShort4>(c.z), utri); else c.z = MulHigh(c.z, stri); 599 if(hasUnsignedTextureComponent(3)) c.w = MulHigh(As<UShort4>(c.w), utri); else c.w = MulHigh(c.w, stri); 600 601 c.x += cc.x; 602 c.y += cc.y; 603 c.z += cc.z; 604 c.w += cc.w; 605 606 if(!hasUnsignedTextureComponent(0)) c.x += c.x; 607 if(!hasUnsignedTextureComponent(1)) c.y += c.y; 608 if(!hasUnsignedTextureComponent(2)) c.z += c.z; 609 if(!hasUnsignedTextureComponent(3)) c.w += c.w; 610 } 611 612 Short4 borderMask; 613 614 if(state.addressingModeU == ADDRESSING_BORDER) 615 { 616 Short4 u0; 617 618 border(u0, u); 619 620 borderMask = u0; 621 } 622 623 if(state.addressingModeV == ADDRESSING_BORDER) 624 { 625 Short4 v0; 626 627 border(v0, v); 628 629 if(state.addressingModeU == ADDRESSING_BORDER) 630 { 631 borderMask &= v0; 632 } 633 else 634 { 635 borderMask = v0; 636 } 637 } 638 639 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D) 640 { 641 Short4 s0; 642 643 border(s0, w); 644 645 if(state.addressingModeU == ADDRESSING_BORDER || 646 state.addressingModeV == ADDRESSING_BORDER) 647 { 648 borderMask &= s0; 649 } 650 else 651 { 652 borderMask = s0; 653 } 654 } 655 656 if(state.addressingModeU == ADDRESSING_BORDER || 657 state.addressingModeV == ADDRESSING_BORDER || 658 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)) 659 { 660 Short4 b; 661 662 c.x = (borderMask & c.x) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[0])) >> (hasUnsignedTextureComponent(0) ? 0 : 1))); 663 c.y = (borderMask & c.y) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[1])) >> (hasUnsignedTextureComponent(1) ? 0 : 1))); 664 c.z = (borderMask & c.z) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[2])) >> (hasUnsignedTextureComponent(2) ? 0 : 1))); 665 c.w = (borderMask & c.w) | (~borderMask & (*Pointer<Short4>(texture + OFFSET(Texture,borderColor4[3])) >> (hasUnsignedTextureComponent(3) ? 0 : 1))); 666 } 667 668 return c; 669 } 670 671 Vector4s SamplerCore::sampleAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) 672 { 673 Vector4s c; 674 675 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch) 676 { 677 c = sampleQuad(texture, u, v, w, offset, lod, face, secondLOD, function); 678 } 679 else 680 { 681 Int a = RoundInt(anisotropy); 682 683 Vector4s cSum; 684 685 cSum.x = Short4(0); 686 cSum.y = Short4(0); 687 cSum.z = Short4(0); 688 cSum.w = Short4(0); 689 690 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); 691 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); 692 UShort4 cw = *Pointer<UShort4>(constants + OFFSET(Constants,cWeight) + 8 * a); 693 Short4 sw = Short4(cw >> 1); 694 695 Float4 du = uDelta; 696 Float4 dv = vDelta; 697 698 Float4 u0 = u + B * du; 699 Float4 v0 = v + B * dv; 700 701 du *= A; 702 dv *= A; 703 704 Int i = 0; 705 706 Do 707 { 708 c = sampleQuad(texture, u0, v0, w, offset, lod, face, secondLOD, function); 709 710 u0 += du; 711 v0 += dv; 712 713 if(hasUnsignedTextureComponent(0)) cSum.x += As<Short4>(MulHigh(As<UShort4>(c.x), cw)); else cSum.x += MulHigh(c.x, sw); 714 if(hasUnsignedTextureComponent(1)) cSum.y += As<Short4>(MulHigh(As<UShort4>(c.y), cw)); else cSum.y += MulHigh(c.y, sw); 715 if(hasUnsignedTextureComponent(2)) cSum.z += As<Short4>(MulHigh(As<UShort4>(c.z), cw)); else cSum.z += MulHigh(c.z, sw); 716 if(hasUnsignedTextureComponent(3)) cSum.w += As<Short4>(MulHigh(As<UShort4>(c.w), cw)); else cSum.w += MulHigh(c.w, sw); 717 718 i++; 719 } 720 Until(i >= a) 721 722 if(hasUnsignedTextureComponent(0)) c.x = cSum.x; else c.x = AddSat(cSum.x, cSum.x); 723 if(hasUnsignedTextureComponent(1)) c.y = cSum.y; else c.y = AddSat(cSum.y, cSum.y); 724 if(hasUnsignedTextureComponent(2)) c.z = cSum.z; else c.z = AddSat(cSum.z, cSum.z); 725 if(hasUnsignedTextureComponent(3)) c.w = cSum.w; else c.w = AddSat(cSum.w, cSum.w); 726 } 727 728 return c; 729 } 730 731 Vector4s SamplerCore::sampleQuad(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 732 { 733 if(state.textureType != TEXTURE_3D) 734 { 735 return sampleQuad2D(texture, u, v, w, offset, lod, face, secondLOD, function); 736 } 737 else 738 { 739 return sample3D(texture, u, v, w, offset, lod, secondLOD, function); 740 } 741 } 742 743 Vector4s SamplerCore::sampleQuad2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 744 { 745 Vector4s c; 746 747 int componentCount = textureComponentCount(); 748 bool gather = state.textureFilter == FILTER_GATHER; 749 750 Pointer<Byte> mipmap; 751 Pointer<Byte> buffer[4]; 752 753 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 754 755 bool texelFetch = (function == Fetch); 756 757 Short4 uuuu = texelFetch ? Short4(As<Int4>(u)) : address(u, state.addressingModeU, mipmap); 758 Short4 vvvv = texelFetch ? Short4(As<Int4>(v)) : address(v, state.addressingModeV, mipmap); 759 Short4 wwww = texelFetch ? Short4(As<Int4>(w)) : address(w, state.addressingModeW, mipmap); 760 761 if(state.textureFilter == FILTER_POINT || texelFetch) 762 { 763 c = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function); 764 } 765 else 766 { 767 Short4 uuuu0 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 0 : -1, lod); 768 Short4 vvvv0 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 0 : -1, lod); 769 Short4 uuuu1 = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, gather ? 2 : +1, lod); 770 Short4 vvvv1 = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, gather ? 2 : +1, lod); 771 772 Vector4s c0 = sampleTexel(uuuu0, vvvv0, wwww, offset, mipmap, buffer, function); 773 Vector4s c1 = sampleTexel(uuuu1, vvvv0, wwww, offset, mipmap, buffer, function); 774 Vector4s c2 = sampleTexel(uuuu0, vvvv1, wwww, offset, mipmap, buffer, function); 775 Vector4s c3 = sampleTexel(uuuu1, vvvv1, wwww, offset, mipmap, buffer, function); 776 777 if(!gather) // Blend 778 { 779 // Fractions 780 UShort4 f0u = As<UShort4>(uuuu0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width)); 781 UShort4 f0v = As<UShort4>(vvvv0) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height)); 782 783 UShort4 f1u = ~f0u; 784 UShort4 f1v = ~f0v; 785 786 UShort4 f0u0v = MulHigh(f0u, f0v); 787 UShort4 f1u0v = MulHigh(f1u, f0v); 788 UShort4 f0u1v = MulHigh(f0u, f1v); 789 UShort4 f1u1v = MulHigh(f1u, f1v); 790 791 // Signed fractions 792 Short4 f1u1vs; 793 Short4 f0u1vs; 794 Short4 f1u0vs; 795 Short4 f0u0vs; 796 797 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) 798 { 799 f1u1vs = f1u1v >> 1; 800 f0u1vs = f0u1v >> 1; 801 f1u0vs = f1u0v >> 1; 802 f0u0vs = f0u0v >> 1; 803 } 804 805 // Bilinear interpolation 806 if(componentCount >= 1) 807 { 808 if(has16bitTextureComponents() && hasUnsignedTextureComponent(0)) 809 { 810 c0.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0u) + MulHigh(As<UShort4>(c1.x), f0u); 811 c2.x = As<UShort4>(c2.x) - MulHigh(As<UShort4>(c2.x), f0u) + MulHigh(As<UShort4>(c3.x), f0u); 812 c.x = As<UShort4>(c0.x) - MulHigh(As<UShort4>(c0.x), f0v) + MulHigh(As<UShort4>(c2.x), f0v); 813 } 814 else 815 { 816 if(hasUnsignedTextureComponent(0)) 817 { 818 c0.x = MulHigh(As<UShort4>(c0.x), f1u1v); 819 c1.x = MulHigh(As<UShort4>(c1.x), f0u1v); 820 c2.x = MulHigh(As<UShort4>(c2.x), f1u0v); 821 c3.x = MulHigh(As<UShort4>(c3.x), f0u0v); 822 } 823 else 824 { 825 c0.x = MulHigh(c0.x, f1u1vs); 826 c1.x = MulHigh(c1.x, f0u1vs); 827 c2.x = MulHigh(c2.x, f1u0vs); 828 c3.x = MulHigh(c3.x, f0u0vs); 829 } 830 831 c.x = (c0.x + c1.x) + (c2.x + c3.x); 832 if(!hasUnsignedTextureComponent(0)) c.x = AddSat(c.x, c.x); // Correct for signed fractions 833 } 834 } 835 836 if(componentCount >= 2) 837 { 838 if(has16bitTextureComponents() && hasUnsignedTextureComponent(1)) 839 { 840 c0.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0u) + MulHigh(As<UShort4>(c1.y), f0u); 841 c2.y = As<UShort4>(c2.y) - MulHigh(As<UShort4>(c2.y), f0u) + MulHigh(As<UShort4>(c3.y), f0u); 842 c.y = As<UShort4>(c0.y) - MulHigh(As<UShort4>(c0.y), f0v) + MulHigh(As<UShort4>(c2.y), f0v); 843 } 844 else 845 { 846 if(hasUnsignedTextureComponent(1)) 847 { 848 c0.y = MulHigh(As<UShort4>(c0.y), f1u1v); 849 c1.y = MulHigh(As<UShort4>(c1.y), f0u1v); 850 c2.y = MulHigh(As<UShort4>(c2.y), f1u0v); 851 c3.y = MulHigh(As<UShort4>(c3.y), f0u0v); 852 } 853 else 854 { 855 c0.y = MulHigh(c0.y, f1u1vs); 856 c1.y = MulHigh(c1.y, f0u1vs); 857 c2.y = MulHigh(c2.y, f1u0vs); 858 c3.y = MulHigh(c3.y, f0u0vs); 859 } 860 861 c.y = (c0.y + c1.y) + (c2.y + c3.y); 862 if(!hasUnsignedTextureComponent(1)) c.y = AddSat(c.y, c.y); // Correct for signed fractions 863 } 864 } 865 866 if(componentCount >= 3) 867 { 868 if(has16bitTextureComponents() && hasUnsignedTextureComponent(2)) 869 { 870 c0.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0u) + MulHigh(As<UShort4>(c1.z), f0u); 871 c2.z = As<UShort4>(c2.z) - MulHigh(As<UShort4>(c2.z), f0u) + MulHigh(As<UShort4>(c3.z), f0u); 872 c.z = As<UShort4>(c0.z) - MulHigh(As<UShort4>(c0.z), f0v) + MulHigh(As<UShort4>(c2.z), f0v); 873 } 874 else 875 { 876 if(hasUnsignedTextureComponent(2)) 877 { 878 c0.z = MulHigh(As<UShort4>(c0.z), f1u1v); 879 c1.z = MulHigh(As<UShort4>(c1.z), f0u1v); 880 c2.z = MulHigh(As<UShort4>(c2.z), f1u0v); 881 c3.z = MulHigh(As<UShort4>(c3.z), f0u0v); 882 } 883 else 884 { 885 c0.z = MulHigh(c0.z, f1u1vs); 886 c1.z = MulHigh(c1.z, f0u1vs); 887 c2.z = MulHigh(c2.z, f1u0vs); 888 c3.z = MulHigh(c3.z, f0u0vs); 889 } 890 891 c.z = (c0.z + c1.z) + (c2.z + c3.z); 892 if(!hasUnsignedTextureComponent(2)) c.z = AddSat(c.z, c.z); // Correct for signed fractions 893 } 894 } 895 896 if(componentCount >= 4) 897 { 898 if(has16bitTextureComponents() && hasUnsignedTextureComponent(3)) 899 { 900 c0.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0u) + MulHigh(As<UShort4>(c1.w), f0u); 901 c2.w = As<UShort4>(c2.w) - MulHigh(As<UShort4>(c2.w), f0u) + MulHigh(As<UShort4>(c3.w), f0u); 902 c.w = As<UShort4>(c0.w) - MulHigh(As<UShort4>(c0.w), f0v) + MulHigh(As<UShort4>(c2.w), f0v); 903 } 904 else 905 { 906 if(hasUnsignedTextureComponent(3)) 907 { 908 c0.w = MulHigh(As<UShort4>(c0.w), f1u1v); 909 c1.w = MulHigh(As<UShort4>(c1.w), f0u1v); 910 c2.w = MulHigh(As<UShort4>(c2.w), f1u0v); 911 c3.w = MulHigh(As<UShort4>(c3.w), f0u0v); 912 } 913 else 914 { 915 c0.w = MulHigh(c0.w, f1u1vs); 916 c1.w = MulHigh(c1.w, f0u1vs); 917 c2.w = MulHigh(c2.w, f1u0vs); 918 c3.w = MulHigh(c3.w, f0u0vs); 919 } 920 921 c.w = (c0.w + c1.w) + (c2.w + c3.w); 922 if(!hasUnsignedTextureComponent(3)) c.w = AddSat(c.w, c.w); // Correct for signed fractions 923 } 924 } 925 } 926 else 927 { 928 c.x = c1.x; 929 c.y = c2.x; 930 c.z = c3.x; 931 c.w = c0.x; 932 } 933 } 934 935 return c; 936 } 937 938 Vector4s SamplerCore::sample3D(Pointer<Byte> &texture, Float4 &u_, Float4 &v_, Float4 &w_, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function) 939 { 940 Vector4s c_; 941 942 int componentCount = textureComponentCount(); 943 944 Pointer<Byte> mipmap; 945 Pointer<Byte> buffer[4]; 946 Int face[4]; 947 948 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 949 950 bool texelFetch = (function == Fetch); 951 952 Short4 uuuu = texelFetch ? Short4(As<Int4>(u_)) : address(u_, state.addressingModeU, mipmap); 953 Short4 vvvv = texelFetch ? Short4(As<Int4>(v_)) : address(v_, state.addressingModeV, mipmap); 954 Short4 wwww = texelFetch ? Short4(As<Int4>(w_)) : address(w_, state.addressingModeW, mipmap); 955 956 if(state.textureFilter == FILTER_POINT || texelFetch) 957 { 958 c_ = sampleTexel(uuuu, vvvv, wwww, offset, mipmap, buffer, function); 959 } 960 else 961 { 962 Vector4s c[2][2][2]; 963 964 Short4 u[2][2][2]; 965 Short4 v[2][2][2]; 966 Short4 s[2][2][2]; 967 968 for(int i = 0; i < 2; i++) 969 { 970 for(int j = 0; j < 2; j++) 971 { 972 for(int k = 0; k < 2; k++) 973 { 974 u[i][j][k] = offsetSample(uuuu, mipmap, OFFSET(Mipmap,uHalf), state.addressingModeU == ADDRESSING_WRAP, i * 2 - 1, lod); 975 v[i][j][k] = offsetSample(vvvv, mipmap, OFFSET(Mipmap,vHalf), state.addressingModeV == ADDRESSING_WRAP, j * 2 - 1, lod); 976 s[i][j][k] = offsetSample(wwww, mipmap, OFFSET(Mipmap,wHalf), state.addressingModeW == ADDRESSING_WRAP, k * 2 - 1, lod); 977 } 978 } 979 } 980 981 // Fractions 982 UShort4 f0u = As<UShort4>(u[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,width)); 983 UShort4 f0v = As<UShort4>(v[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,height)); 984 UShort4 f0s = As<UShort4>(s[0][0][0]) * *Pointer<UShort4>(mipmap + OFFSET(Mipmap,depth)); 985 986 UShort4 f1u = ~f0u; 987 UShort4 f1v = ~f0v; 988 UShort4 f1s = ~f0s; 989 990 UShort4 f[2][2][2]; 991 Short4 fs[2][2][2]; 992 993 f[1][1][1] = MulHigh(f1u, f1v); 994 f[0][1][1] = MulHigh(f0u, f1v); 995 f[1][0][1] = MulHigh(f1u, f0v); 996 f[0][0][1] = MulHigh(f0u, f0v); 997 f[1][1][0] = MulHigh(f1u, f1v); 998 f[0][1][0] = MulHigh(f0u, f1v); 999 f[1][0][0] = MulHigh(f1u, f0v); 1000 f[0][0][0] = MulHigh(f0u, f0v); 1001 1002 f[1][1][1] = MulHigh(f[1][1][1], f1s); 1003 f[0][1][1] = MulHigh(f[0][1][1], f1s); 1004 f[1][0][1] = MulHigh(f[1][0][1], f1s); 1005 f[0][0][1] = MulHigh(f[0][0][1], f1s); 1006 f[1][1][0] = MulHigh(f[1][1][0], f0s); 1007 f[0][1][0] = MulHigh(f[0][1][0], f0s); 1008 f[1][0][0] = MulHigh(f[1][0][0], f0s); 1009 f[0][0][0] = MulHigh(f[0][0][0], f0s); 1010 1011 // Signed fractions 1012 if(!hasUnsignedTextureComponent(0) || !hasUnsignedTextureComponent(1) || !hasUnsignedTextureComponent(2) || !hasUnsignedTextureComponent(3)) 1013 { 1014 fs[0][0][0] = f[0][0][0] >> 1; 1015 fs[0][0][1] = f[0][0][1] >> 1; 1016 fs[0][1][0] = f[0][1][0] >> 1; 1017 fs[0][1][1] = f[0][1][1] >> 1; 1018 fs[1][0][0] = f[1][0][0] >> 1; 1019 fs[1][0][1] = f[1][0][1] >> 1; 1020 fs[1][1][0] = f[1][1][0] >> 1; 1021 fs[1][1][1] = f[1][1][1] >> 1; 1022 } 1023 1024 for(int i = 0; i < 2; i++) 1025 { 1026 for(int j = 0; j < 2; j++) 1027 { 1028 for(int k = 0; k < 2; k++) 1029 { 1030 c[i][j][k] = sampleTexel(u[i][j][k], v[i][j][k], s[i][j][k], offset, mipmap, buffer, function); 1031 1032 if(componentCount >= 1) { if(hasUnsignedTextureComponent(0)) c[i][j][k].x = MulHigh(As<UShort4>(c[i][j][k].x), f[1 - i][1 - j][1 - k]); else c[i][j][k].x = MulHigh(c[i][j][k].x, fs[1 - i][1 - j][1 - k]); } 1033 if(componentCount >= 2) { if(hasUnsignedTextureComponent(1)) c[i][j][k].y = MulHigh(As<UShort4>(c[i][j][k].y), f[1 - i][1 - j][1 - k]); else c[i][j][k].y = MulHigh(c[i][j][k].y, fs[1 - i][1 - j][1 - k]); } 1034 if(componentCount >= 3) { if(hasUnsignedTextureComponent(2)) c[i][j][k].z = MulHigh(As<UShort4>(c[i][j][k].z), f[1 - i][1 - j][1 - k]); else c[i][j][k].z = MulHigh(c[i][j][k].z, fs[1 - i][1 - j][1 - k]); } 1035 if(componentCount >= 4) { if(hasUnsignedTextureComponent(3)) c[i][j][k].w = MulHigh(As<UShort4>(c[i][j][k].w), f[1 - i][1 - j][1 - k]); else c[i][j][k].w = MulHigh(c[i][j][k].w, fs[1 - i][1 - j][1 - k]); } 1036 1037 if(i != 0 || j != 0 || k != 0) 1038 { 1039 if(componentCount >= 1) c[0][0][0].x += c[i][j][k].x; 1040 if(componentCount >= 2) c[0][0][0].y += c[i][j][k].y; 1041 if(componentCount >= 3) c[0][0][0].z += c[i][j][k].z; 1042 if(componentCount >= 4) c[0][0][0].w += c[i][j][k].w; 1043 } 1044 } 1045 } 1046 } 1047 1048 if(componentCount >= 1) c_.x = c[0][0][0].x; 1049 if(componentCount >= 2) c_.y = c[0][0][0].y; 1050 if(componentCount >= 3) c_.z = c[0][0][0].z; 1051 if(componentCount >= 4) c_.w = c[0][0][0].w; 1052 1053 // Correct for signed fractions 1054 if(componentCount >= 1) if(!hasUnsignedTextureComponent(0)) c_.x = AddSat(c_.x, c_.x); 1055 if(componentCount >= 2) if(!hasUnsignedTextureComponent(1)) c_.y = AddSat(c_.y, c_.y); 1056 if(componentCount >= 3) if(!hasUnsignedTextureComponent(2)) c_.z = AddSat(c_.z, c_.z); 1057 if(componentCount >= 4) if(!hasUnsignedTextureComponent(3)) c_.w = AddSat(c_.w, c_.w); 1058 } 1059 1060 return c_; 1061 } 1062 1063 Vector4f SamplerCore::sampleFloatFilter(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], SamplerFunction function) 1064 { 1065 Vector4f c = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, false, function); 1066 1067 if(function == Fetch) 1068 { 1069 return c; 1070 } 1071 1072 if(state.mipmapFilter == MIPMAP_LINEAR) 1073 { 1074 Vector4f cc = sampleFloatAniso(texture, u, v, w, q, offset, lod, anisotropy, uDelta, vDelta, face, true, function); 1075 1076 Float4 lod4 = Float4(Frac(lod)); 1077 1078 c.x = (cc.x - c.x) * lod4 + c.x; 1079 c.y = (cc.y - c.y) * lod4 + c.y; 1080 c.z = (cc.z - c.z) * lod4 + c.z; 1081 c.w = (cc.w - c.w) * lod4 + c.w; 1082 } 1083 1084 Int4 borderMask; 1085 1086 if(state.addressingModeU == ADDRESSING_BORDER) 1087 { 1088 Int4 u0; 1089 1090 border(u0, u); 1091 1092 borderMask = u0; 1093 } 1094 1095 if(state.addressingModeV == ADDRESSING_BORDER) 1096 { 1097 Int4 v0; 1098 1099 border(v0, v); 1100 1101 if(state.addressingModeU == ADDRESSING_BORDER) 1102 { 1103 borderMask &= v0; 1104 } 1105 else 1106 { 1107 borderMask = v0; 1108 } 1109 } 1110 1111 if(state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D) 1112 { 1113 Int4 s0; 1114 1115 border(s0, w); 1116 1117 if(state.addressingModeU == ADDRESSING_BORDER || 1118 state.addressingModeV == ADDRESSING_BORDER) 1119 { 1120 borderMask &= s0; 1121 } 1122 else 1123 { 1124 borderMask = s0; 1125 } 1126 } 1127 1128 if(state.addressingModeU == ADDRESSING_BORDER || 1129 state.addressingModeV == ADDRESSING_BORDER || 1130 (state.addressingModeW == ADDRESSING_BORDER && state.textureType == TEXTURE_3D)) 1131 { 1132 Int4 b; 1133 1134 c.x = As<Float4>((borderMask & As<Int4>(c.x)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[0])))); 1135 c.y = As<Float4>((borderMask & As<Int4>(c.y)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[1])))); 1136 c.z = As<Float4>((borderMask & As<Int4>(c.z)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[2])))); 1137 c.w = As<Float4>((borderMask & As<Int4>(c.w)) | (~borderMask & *Pointer<Int4>(texture + OFFSET(Texture,borderColorF[3])))); 1138 } 1139 1140 return c; 1141 } 1142 1143 Vector4f SamplerCore::sampleFloatAniso(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Int face[4], bool secondLOD, SamplerFunction function) 1144 { 1145 Vector4f c; 1146 1147 if(state.textureFilter != FILTER_ANISOTROPIC || function == Lod || function == Fetch) 1148 { 1149 c = sampleFloat(texture, u, v, w, q, offset, lod, face, secondLOD, function); 1150 } 1151 else 1152 { 1153 Int a = RoundInt(anisotropy); 1154 1155 Vector4f cSum; 1156 1157 cSum.x = Float4(0.0f); 1158 cSum.y = Float4(0.0f); 1159 cSum.z = Float4(0.0f); 1160 cSum.w = Float4(0.0f); 1161 1162 Float4 A = *Pointer<Float4>(constants + OFFSET(Constants,uvWeight) + 16 * a); 1163 Float4 B = *Pointer<Float4>(constants + OFFSET(Constants,uvStart) + 16 * a); 1164 1165 Float4 du = uDelta; 1166 Float4 dv = vDelta; 1167 1168 Float4 u0 = u + B * du; 1169 Float4 v0 = v + B * dv; 1170 1171 du *= A; 1172 dv *= A; 1173 1174 Int i = 0; 1175 1176 Do 1177 { 1178 c = sampleFloat(texture, u0, v0, w, q, offset, lod, face, secondLOD, function); 1179 1180 u0 += du; 1181 v0 += dv; 1182 1183 cSum.x += c.x * A; 1184 cSum.y += c.y * A; 1185 cSum.z += c.z * A; 1186 cSum.w += c.w * A; 1187 1188 i++; 1189 } 1190 Until(i >= a) 1191 1192 c.x = cSum.x; 1193 c.y = cSum.y; 1194 c.z = cSum.z; 1195 c.w = cSum.w; 1196 } 1197 1198 return c; 1199 } 1200 1201 Vector4f SamplerCore::sampleFloat(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 1202 { 1203 if(state.textureType != TEXTURE_3D) 1204 { 1205 return sampleFloat2D(texture, u, v, w, q, offset, lod, face, secondLOD, function); 1206 } 1207 else 1208 { 1209 return sampleFloat3D(texture, u, v, w, offset, lod, secondLOD, function); 1210 } 1211 } 1212 1213 Vector4f SamplerCore::sampleFloat2D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Float4 &q, Vector4f &offset, Float &lod, Int face[4], bool secondLOD, SamplerFunction function) 1214 { 1215 Vector4f c; 1216 1217 int componentCount = textureComponentCount(); 1218 bool gather = state.textureFilter == FILTER_GATHER; 1219 1220 Pointer<Byte> mipmap; 1221 Pointer<Byte> buffer[4]; 1222 1223 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 1224 1225 Int4 x0, x1, y0, y1, z0; 1226 Float4 fu, fv; 1227 Int4 filter = computeFilterOffset(lod); 1228 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function); 1229 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function); 1230 address(w, z0, z0, fv, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function); 1231 1232 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16); 1233 y0 *= pitchP; 1234 if(hasThirdCoordinate()) 1235 { 1236 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16); 1237 z0 *= sliceP; 1238 } 1239 1240 if(state.textureFilter == FILTER_POINT || (function == Fetch)) 1241 { 1242 c = sampleTexel(x0, y0, z0, q, mipmap, buffer, function); 1243 } 1244 else 1245 { 1246 y1 *= pitchP; 1247 1248 Vector4f c0 = sampleTexel(x0, y0, z0, q, mipmap, buffer, function); 1249 Vector4f c1 = sampleTexel(x1, y0, z0, q, mipmap, buffer, function); 1250 Vector4f c2 = sampleTexel(x0, y1, z0, q, mipmap, buffer, function); 1251 Vector4f c3 = sampleTexel(x1, y1, z0, q, mipmap, buffer, function); 1252 1253 if(!gather) // Blend 1254 { 1255 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); 1256 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); 1257 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); 1258 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); 1259 1260 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); 1261 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); 1262 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); 1263 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); 1264 1265 if(componentCount >= 1) c.x = c0.x + fv * (c2.x - c0.x); 1266 if(componentCount >= 2) c.y = c0.y + fv * (c2.y - c0.y); 1267 if(componentCount >= 3) c.z = c0.z + fv * (c2.z - c0.z); 1268 if(componentCount >= 4) c.w = c0.w + fv * (c2.w - c0.w); 1269 } 1270 else 1271 { 1272 c.x = c1.x; 1273 c.y = c2.x; 1274 c.z = c3.x; 1275 c.w = c0.x; 1276 } 1277 } 1278 1279 return c; 1280 } 1281 1282 Vector4f SamplerCore::sampleFloat3D(Pointer<Byte> &texture, Float4 &u, Float4 &v, Float4 &w, Vector4f &offset, Float &lod, bool secondLOD, SamplerFunction function) 1283 { 1284 Vector4f c; 1285 1286 int componentCount = textureComponentCount(); 1287 1288 Pointer<Byte> mipmap; 1289 Pointer<Byte> buffer[4]; 1290 Int face[4]; 1291 1292 selectMipmap(texture, buffer, mipmap, lod, face, secondLOD); 1293 1294 Int4 x0, x1, y0, y1, z0, z1; 1295 Float4 fu, fv, fw; 1296 Int4 filter = computeFilterOffset(lod); 1297 address(u, x0, x1, fu, mipmap, offset.x, filter, OFFSET(Mipmap, width), state.addressingModeU, function); 1298 address(v, y0, y1, fv, mipmap, offset.y, filter, OFFSET(Mipmap, height), state.addressingModeV, function); 1299 address(w, z0, z1, fw, mipmap, offset.z, filter, OFFSET(Mipmap, depth), state.addressingModeW, function); 1300 1301 Int4 pitchP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, pitchP), 16); 1302 Int4 sliceP = *Pointer<Int4>(mipmap + OFFSET(Mipmap, sliceP), 16); 1303 y0 *= pitchP; 1304 z0 *= sliceP; 1305 1306 if(state.textureFilter == FILTER_POINT || (function == Fetch)) 1307 { 1308 c = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1309 } 1310 else 1311 { 1312 y1 *= pitchP; 1313 z1 *= sliceP; 1314 1315 Vector4f c0 = sampleTexel(x0, y0, z0, w, mipmap, buffer, function); 1316 Vector4f c1 = sampleTexel(x1, y0, z0, w, mipmap, buffer, function); 1317 Vector4f c2 = sampleTexel(x0, y1, z0, w, mipmap, buffer, function); 1318 Vector4f c3 = sampleTexel(x1, y1, z0, w, mipmap, buffer, function); 1319 Vector4f c4 = sampleTexel(x0, y0, z1, w, mipmap, buffer, function); 1320 Vector4f c5 = sampleTexel(x1, y0, z1, w, mipmap, buffer, function); 1321 Vector4f c6 = sampleTexel(x0, y1, z1, w, mipmap, buffer, function); 1322 Vector4f c7 = sampleTexel(x1, y1, z1, w, mipmap, buffer, function); 1323 1324 // Blend first slice 1325 if(componentCount >= 1) c0.x = c0.x + fu * (c1.x - c0.x); 1326 if(componentCount >= 2) c0.y = c0.y + fu * (c1.y - c0.y); 1327 if(componentCount >= 3) c0.z = c0.z + fu * (c1.z - c0.z); 1328 if(componentCount >= 4) c0.w = c0.w + fu * (c1.w - c0.w); 1329 1330 if(componentCount >= 1) c2.x = c2.x + fu * (c3.x - c2.x); 1331 if(componentCount >= 2) c2.y = c2.y + fu * (c3.y - c2.y); 1332 if(componentCount >= 3) c2.z = c2.z + fu * (c3.z - c2.z); 1333 if(componentCount >= 4) c2.w = c2.w + fu * (c3.w - c2.w); 1334 1335 if(componentCount >= 1) c0.x = c0.x + fv * (c2.x - c0.x); 1336 if(componentCount >= 2) c0.y = c0.y + fv * (c2.y - c0.y); 1337 if(componentCount >= 3) c0.z = c0.z + fv * (c2.z - c0.z); 1338 if(componentCount >= 4) c0.w = c0.w + fv * (c2.w - c0.w); 1339 1340 // Blend second slice 1341 if(componentCount >= 1) c4.x = c4.x + fu * (c5.x - c4.x); 1342 if(componentCount >= 2) c4.y = c4.y + fu * (c5.y - c4.y); 1343 if(componentCount >= 3) c4.z = c4.z + fu * (c5.z - c4.z); 1344 if(componentCount >= 4) c4.w = c4.w + fu * (c5.w - c4.w); 1345 1346 if(componentCount >= 1) c6.x = c6.x + fu * (c7.x - c6.x); 1347 if(componentCount >= 2) c6.y = c6.y + fu * (c7.y - c6.y); 1348 if(componentCount >= 3) c6.z = c6.z + fu * (c7.z - c6.z); 1349 if(componentCount >= 4) c6.w = c6.w + fu * (c7.w - c6.w); 1350 1351 if(componentCount >= 1) c4.x = c4.x + fv * (c6.x - c4.x); 1352 if(componentCount >= 2) c4.y = c4.y + fv * (c6.y - c4.y); 1353 if(componentCount >= 3) c4.z = c4.z + fv * (c6.z - c4.z); 1354 if(componentCount >= 4) c4.w = c4.w + fv * (c6.w - c4.w); 1355 1356 // Blend slices 1357 if(componentCount >= 1) c.x = c0.x + fw * (c4.x - c0.x); 1358 if(componentCount >= 2) c.y = c0.y + fw * (c4.y - c0.y); 1359 if(componentCount >= 3) c.z = c0.z + fw * (c4.z - c0.z); 1360 if(componentCount >= 4) c.w = c0.w + fw * (c4.w - c0.w); 1361 } 1362 1363 return c; 1364 } 1365 1366 Float SamplerCore::log2sqrt(Float lod) 1367 { 1368 // log2(sqrt(lod)) // Equals 0.25 * log2(lod^2). 1369 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. 1370 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. 1371 lod *= As<Float>(Int(0x33000000)); // Scale by 0.25 * 2^-23 (mantissa length). 1372 1373 return lod; 1374 } 1375 1376 Float SamplerCore::log2(Float lod) 1377 { 1378 lod *= lod; // Squaring doubles the exponent and produces an extra bit of precision. 1379 lod = Float(As<Int>(lod)) - Float(0x3F800000); // Interpret as integer and subtract the exponent bias. 1380 lod *= As<Float>(Int(0x33800000)); // Scale by 0.5 * 2^-23 (mantissa length). 1381 1382 return lod; 1383 } 1384 1385 void SamplerCore::computeLod(Pointer<Byte> &texture, Float &lod, Float &anisotropy, Float4 &uDelta, Float4 &vDelta, Float4 &uuuu, Float4 &vvvv, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) 1386 { 1387 if(function != Lod && function != Fetch) 1388 { 1389 Float4 duvdxy; 1390 1391 if(function != Grad) // Implicit 1392 { 1393 duvdxy = Float4(uuuu.yz, vvvv.yz) - Float4(uuuu.xx, vvvv.xx); 1394 } 1395 else 1396 { 1397 Float4 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1398 Float4 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1399 1400 duvdxy = Float4(dudxy.xz, dvdxy.xz); 1401 } 1402 1403 // Scale by texture dimensions and global LOD. 1404 Float4 dUVdxy = duvdxy * *Pointer<Float4>(texture + OFFSET(Texture,widthHeightLOD)); 1405 1406 Float4 dUV2dxy = dUVdxy * dUVdxy; 1407 Float4 dUV2 = dUV2dxy.xy + dUV2dxy.zw; 1408 1409 lod = Max(Float(dUV2.x), Float(dUV2.y)); // Square length of major axis 1410 1411 if(state.textureFilter == FILTER_ANISOTROPIC) 1412 { 1413 Float det = Abs(Float(dUVdxy.x) * Float(dUVdxy.w) - Float(dUVdxy.y) * Float(dUVdxy.z)); 1414 1415 Float4 dudx = duvdxy.xxxx; 1416 Float4 dudy = duvdxy.yyyy; 1417 Float4 dvdx = duvdxy.zzzz; 1418 Float4 dvdy = duvdxy.wwww; 1419 1420 Int4 mask = As<Int4>(CmpNLT(dUV2.x, dUV2.y)); 1421 uDelta = As<Float4>((As<Int4>(dudx) & mask) | ((As<Int4>(dudy) & ~mask))); 1422 vDelta = As<Float4>((As<Int4>(dvdx) & mask) | ((As<Int4>(dvdy) & ~mask))); 1423 1424 anisotropy = lod * Rcp_pp(det); 1425 anisotropy = Min(anisotropy, *Pointer<Float>(texture + OFFSET(Texture,maxAnisotropy))); 1426 1427 lod *= Rcp_pp(anisotropy * anisotropy); 1428 } 1429 1430 lod = log2sqrt(lod); // log2(sqrt(lod)) 1431 1432 if(function == Bias) 1433 { 1434 lod += lodBias; 1435 } 1436 } 1437 else if(function == Lod) 1438 { 1439 lod = lodBias; 1440 } 1441 else if(function == Fetch) 1442 { 1443 // TODO: Eliminate int-float-int conversion. 1444 lod = Float(As<Int>(lodBias)); 1445 } 1446 else if(function == Base) 1447 { 1448 lod = Float(0); 1449 } 1450 else assert(false); 1451 1452 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1453 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1454 } 1455 1456 void SamplerCore::computeLodCube(Pointer<Byte> &texture, Float &lod, Float4 &u, Float4 &v, Float4 &w, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, Float4 &M, SamplerFunction function) 1457 { 1458 if(function != Lod && function != Fetch) 1459 { 1460 Float4 dudxy, dvdxy, dsdxy; 1461 1462 if(function != Grad) // Implicit 1463 { 1464 Float4 U = u * M; 1465 Float4 V = v * M; 1466 Float4 W = w * M; 1467 1468 dudxy = Abs(U - U.xxxx); 1469 dvdxy = Abs(V - V.xxxx); 1470 dsdxy = Abs(W - W.xxxx); 1471 } 1472 else 1473 { 1474 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1475 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1476 dsdxy = Float4(dsx.z.xx, dsy.z.xx); 1477 1478 dudxy = Abs(dudxy * Float4(M.x)); 1479 dvdxy = Abs(dvdxy * Float4(M.x)); 1480 dsdxy = Abs(dsdxy * Float4(M.x)); 1481 } 1482 1483 // Compute the largest Manhattan distance in two dimensions. 1484 // This takes the footprint across adjacent faces into account. 1485 Float4 duvdxy = dudxy + dvdxy; 1486 Float4 dusdxy = dudxy + dsdxy; 1487 Float4 dvsdxy = dvdxy + dsdxy; 1488 1489 dudxy = Max(Max(duvdxy, dusdxy), dvsdxy); 1490 1491 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); 1492 1493 // Scale by texture dimension and global LOD. 1494 lod *= *Pointer<Float>(texture + OFFSET(Texture,widthLOD)); 1495 1496 lod = log2(lod); 1497 1498 if(function == Bias) 1499 { 1500 lod += lodBias; 1501 } 1502 } 1503 else if(function == Lod) 1504 { 1505 lod = lodBias; 1506 } 1507 else if(function == Fetch) 1508 { 1509 // TODO: Eliminate int-float-int conversion. 1510 lod = Float(As<Int>(lodBias)); 1511 } 1512 else if(function == Base) 1513 { 1514 lod = Float(0); 1515 } 1516 else assert(false); 1517 1518 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1519 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1520 } 1521 1522 void SamplerCore::computeLod3D(Pointer<Byte> &texture, Float &lod, Float4 &uuuu, Float4 &vvvv, Float4 &wwww, const Float &lodBias, Vector4f &dsx, Vector4f &dsy, SamplerFunction function) 1523 { 1524 if(function != Lod && function != Fetch) 1525 { 1526 Float4 dudxy, dvdxy, dsdxy; 1527 1528 if(function != Grad) // Implicit 1529 { 1530 dudxy = uuuu - uuuu.xxxx; 1531 dvdxy = vvvv - vvvv.xxxx; 1532 dsdxy = wwww - wwww.xxxx; 1533 } 1534 else 1535 { 1536 dudxy = Float4(dsx.x.xx, dsy.x.xx); 1537 dvdxy = Float4(dsx.y.xx, dsy.y.xx); 1538 dsdxy = Float4(dsx.z.xx, dsy.z.xx); 1539 } 1540 1541 // Scale by texture dimensions and global LOD. 1542 dudxy *= *Pointer<Float4>(texture + OFFSET(Texture,widthLOD)); 1543 dvdxy *= *Pointer<Float4>(texture + OFFSET(Texture,heightLOD)); 1544 dsdxy *= *Pointer<Float4>(texture + OFFSET(Texture,depthLOD)); 1545 1546 dudxy *= dudxy; 1547 dvdxy *= dvdxy; 1548 dsdxy *= dsdxy; 1549 1550 dudxy += dvdxy; 1551 dudxy += dsdxy; 1552 1553 lod = Max(Float(dudxy.y), Float(dudxy.z)); // FIXME: Max(dudxy.y, dudxy.z); 1554 1555 lod = log2sqrt(lod); // log2(sqrt(lod)) 1556 1557 if(function == Bias) 1558 { 1559 lod += lodBias; 1560 } 1561 } 1562 else if(function == Lod) 1563 { 1564 lod = lodBias; 1565 } 1566 else if(function == Fetch) 1567 { 1568 // TODO: Eliminate int-float-int conversion. 1569 lod = Float(As<Int>(lodBias)); 1570 } 1571 else if(function == Base) 1572 { 1573 lod = Float(0); 1574 } 1575 else assert(false); 1576 1577 lod = Max(lod, *Pointer<Float>(texture + OFFSET(Texture, minLod))); 1578 lod = Min(lod, *Pointer<Float>(texture + OFFSET(Texture, maxLod))); 1579 } 1580 1581 void SamplerCore::cubeFace(Int face[4], Float4 &U, Float4 &V, Float4 &x, Float4 &y, Float4 &z, Float4 &M) 1582 { 1583 Int4 xn = CmpLT(x, Float4(0.0f)); // x < 0 1584 Int4 yn = CmpLT(y, Float4(0.0f)); // y < 0 1585 Int4 zn = CmpLT(z, Float4(0.0f)); // z < 0 1586 1587 Float4 absX = Abs(x); 1588 Float4 absY = Abs(y); 1589 Float4 absZ = Abs(z); 1590 1591 Int4 xy = CmpNLE(absX, absY); // abs(x) > abs(y) 1592 Int4 yz = CmpNLE(absY, absZ); // abs(y) > abs(z) 1593 Int4 zx = CmpNLE(absZ, absX); // abs(z) > abs(x) 1594 Int4 xMajor = xy & ~zx; // abs(x) > abs(y) && abs(x) > abs(z) 1595 Int4 yMajor = yz & ~xy; // abs(y) > abs(z) && abs(y) > abs(x) 1596 Int4 zMajor = zx & ~yz; // abs(z) > abs(x) && abs(z) > abs(y) 1597 1598 // FACE_POSITIVE_X = 000b 1599 // FACE_NEGATIVE_X = 001b 1600 // FACE_POSITIVE_Y = 010b 1601 // FACE_NEGATIVE_Y = 011b 1602 // FACE_POSITIVE_Z = 100b 1603 // FACE_NEGATIVE_Z = 101b 1604 1605 Int yAxis = SignMask(yMajor); 1606 Int zAxis = SignMask(zMajor); 1607 1608 Int4 n = ((xn & xMajor) | (yn & yMajor) | (zn & zMajor)) & Int4(0x80000000); 1609 Int negative = SignMask(n); 1610 1611 face[0] = *Pointer<Int>(constants + OFFSET(Constants,transposeBit0) + negative * 4); 1612 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit1) + yAxis * 4); 1613 face[0] |= *Pointer<Int>(constants + OFFSET(Constants,transposeBit2) + zAxis * 4); 1614 face[1] = (face[0] >> 4) & 0x7; 1615 face[2] = (face[0] >> 8) & 0x7; 1616 face[3] = (face[0] >> 12) & 0x7; 1617 face[0] &= 0x7; 1618 1619 M = Max(Max(absX, absY), absZ); 1620 1621 // U = xMajor ? (neg ^ -z) : ((zMajor & neg) ^ x) 1622 U = As<Float4>((xMajor & (n ^ As<Int4>(-z))) | (~xMajor & ((zMajor & n) ^ As<Int4>(x)))); 1623 1624 // V = !yMajor ? -y : (n ^ z) 1625 V = As<Float4>((~yMajor & As<Int4>(-y)) | (yMajor & (n ^ As<Int4>(z)))); 1626 1627 M = reciprocal(M) * Float4(0.5f); 1628 U = U * M + Float4(0.5f); 1629 V = V * M + Float4(0.5f); 1630 } 1631 1632 Short4 SamplerCore::applyOffset(Short4 &uvw, Float4 &offset, const Int4 &whd, AddressingMode mode) 1633 { 1634 Int4 tmp = Int4(As<UShort4>(uvw)); 1635 tmp = tmp + As<Int4>(offset); 1636 1637 switch(mode) 1638 { 1639 case AddressingMode::ADDRESSING_WRAP: 1640 tmp = (tmp + whd * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % whd; 1641 break; 1642 case AddressingMode::ADDRESSING_CLAMP: 1643 case AddressingMode::ADDRESSING_MIRROR: 1644 case AddressingMode::ADDRESSING_MIRRORONCE: 1645 case AddressingMode::ADDRESSING_BORDER: // FIXME: Implement and test ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, ADDRESSING_BORDER 1646 tmp = Min(Max(tmp, Int4(0)), whd - Int4(1)); 1647 break; 1648 case ADDRESSING_TEXELFETCH: 1649 break; 1650 case AddressingMode::ADDRESSING_SEAMLESS: 1651 ASSERT(false); // Cube sampling doesn't support offset. 1652 default: 1653 ASSERT(false); 1654 } 1655 1656 return As<Short4>(UShort4(tmp)); 1657 } 1658 1659 void SamplerCore::computeIndices(UInt index[4], Short4 uuuu, Short4 vvvv, Short4 wwww, Vector4f &offset, const Pointer<Byte> &mipmap, SamplerFunction function) 1660 { 1661 bool texelFetch = (function == Fetch); 1662 bool hasOffset = (function.option == Offset); 1663 1664 if(!texelFetch) 1665 { 1666 uuuu = MulHigh(As<UShort4>(uuuu), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width))); 1667 vvvv = MulHigh(As<UShort4>(vvvv), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height))); 1668 } 1669 1670 if(hasOffset) 1671 { 1672 UShort4 w = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, width)); 1673 uuuu = applyOffset(uuuu, offset.x, Int4(w), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeU); 1674 UShort4 h = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, height)); 1675 vvvv = applyOffset(vvvv, offset.y, Int4(h), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeV); 1676 } 1677 1678 Short4 uuu2 = uuuu; 1679 uuuu = As<Short4>(UnpackLow(uuuu, vvvv)); 1680 uuu2 = As<Short4>(UnpackHigh(uuu2, vvvv)); 1681 uuuu = As<Short4>(MulAdd(uuuu, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); 1682 uuu2 = As<Short4>(MulAdd(uuu2, *Pointer<Short4>(mipmap + OFFSET(Mipmap,onePitchP)))); 1683 1684 if(hasThirdCoordinate()) 1685 { 1686 if(state.textureType != TEXTURE_2D_ARRAY) 1687 { 1688 if(!texelFetch) 1689 { 1690 wwww = MulHigh(As<UShort4>(wwww), *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth))); 1691 } 1692 1693 if(hasOffset) 1694 { 1695 UShort4 d = *Pointer<UShort4>(mipmap + OFFSET(Mipmap, depth)); 1696 wwww = applyOffset(wwww, offset.z, Int4(d), texelFetch ? ADDRESSING_TEXELFETCH : state.addressingModeW); 1697 } 1698 } 1699 1700 UInt4 uv(As<UInt2>(uuuu), As<UInt2>(uuu2)); 1701 uv += As<UInt4>(Int4(As<UShort4>(wwww))) * *Pointer<UInt4>(mipmap + OFFSET(Mipmap, sliceP)); 1702 1703 index[0] = Extract(As<Int4>(uv), 0); 1704 index[1] = Extract(As<Int4>(uv), 1); 1705 index[2] = Extract(As<Int4>(uv), 2); 1706 index[3] = Extract(As<Int4>(uv), 3); 1707 } 1708 else 1709 { 1710 index[0] = Extract(As<Int2>(uuuu), 0); 1711 index[1] = Extract(As<Int2>(uuuu), 1); 1712 index[2] = Extract(As<Int2>(uuu2), 0); 1713 index[3] = Extract(As<Int2>(uuu2), 1); 1714 } 1715 1716 if(texelFetch) 1717 { 1718 Int size = Int(*Pointer<Int>(mipmap + OFFSET(Mipmap, sliceP))); 1719 if(hasThirdCoordinate()) 1720 { 1721 size *= Int(*Pointer<Short>(mipmap + OFFSET(Mipmap, depth))); 1722 } 1723 UInt min = 0; 1724 UInt max = size - 1; 1725 1726 for(int i = 0; i < 4; i++) 1727 { 1728 index[i] = Min(Max(index[i], min), max); 1729 } 1730 } 1731 } 1732 1733 void SamplerCore::computeIndices(UInt index[4], Int4& uuuu, Int4& vvvv, Int4& wwww, const Pointer<Byte> &mipmap, SamplerFunction function) 1734 { 1735 UInt4 indices = uuuu + vvvv; 1736 1737 if(hasThirdCoordinate()) 1738 { 1739 indices += As<UInt4>(wwww); 1740 } 1741 1742 for(int i = 0; i < 4; i++) 1743 { 1744 index[i] = Extract(As<Int4>(indices), i); 1745 } 1746 } 1747 1748 Vector4s SamplerCore::sampleTexel(UInt index[4], Pointer<Byte> buffer[4]) 1749 { 1750 Vector4s c; 1751 1752 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0; 1753 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0; 1754 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; 1755 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; 1756 1757 if(has16bitTextureFormat()) 1758 { 1759 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1760 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1761 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1762 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1763 1764 switch(state.textureFormat) 1765 { 1766 case FORMAT_R5G6B5: 1767 c.z = (c.x & Short4(0x001Fu)) << 11; 1768 c.y = (c.x & Short4(0x07E0u)) << 5; 1769 c.x = (c.x & Short4(0xF800u)); 1770 break; 1771 default: 1772 ASSERT(false); 1773 } 1774 } 1775 else if(has8bitTextureComponents()) 1776 { 1777 switch(textureComponentCount()) 1778 { 1779 case 4: 1780 { 1781 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]]; 1782 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]]; 1783 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]]; 1784 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]]; 1785 c.x = Unpack(c0, c1); 1786 c.y = Unpack(c2, c3); 1787 1788 switch(state.textureFormat) 1789 { 1790 case FORMAT_A8R8G8B8: 1791 c.z = As<Short4>(UnpackLow(c.x, c.y)); 1792 c.x = As<Short4>(UnpackHigh(c.x, c.y)); 1793 c.y = c.z; 1794 c.w = c.x; 1795 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1796 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1797 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1798 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); 1799 break; 1800 case FORMAT_A8B8G8R8: 1801 case FORMAT_A8B8G8R8I: 1802 case FORMAT_A8B8G8R8_SNORM: 1803 case FORMAT_Q8W8V8U8: 1804 case FORMAT_SRGB8_A8: 1805 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1806 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1807 c.y = c.x; 1808 c.w = c.z; 1809 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1810 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1811 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1812 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(c.w)); 1813 // Propagate sign bit 1814 if(state.textureFormat == FORMAT_A8B8G8R8I) 1815 { 1816 c.x >>= 8; 1817 c.y >>= 8; 1818 c.z >>= 8; 1819 c.w >>= 8; 1820 } 1821 break; 1822 case FORMAT_A8B8G8R8UI: 1823 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1824 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1825 c.y = c.x; 1826 c.w = c.z; 1827 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); 1828 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); 1829 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); 1830 c.w = UnpackHigh(As<Byte8>(c.w), As<Byte8>(Short4(0))); 1831 break; 1832 default: 1833 ASSERT(false); 1834 } 1835 } 1836 break; 1837 case 3: 1838 { 1839 Byte4 c0 = Pointer<Byte4>(buffer[f0])[index[0]]; 1840 Byte4 c1 = Pointer<Byte4>(buffer[f1])[index[1]]; 1841 Byte4 c2 = Pointer<Byte4>(buffer[f2])[index[2]]; 1842 Byte4 c3 = Pointer<Byte4>(buffer[f3])[index[3]]; 1843 c.x = Unpack(c0, c1); 1844 c.y = Unpack(c2, c3); 1845 1846 switch(state.textureFormat) 1847 { 1848 case FORMAT_X8R8G8B8: 1849 c.z = As<Short4>(UnpackLow(c.x, c.y)); 1850 c.x = As<Short4>(UnpackHigh(c.x, c.y)); 1851 c.y = c.z; 1852 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1853 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1854 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1855 break; 1856 case FORMAT_X8B8G8R8_SNORM: 1857 case FORMAT_X8B8G8R8I: 1858 case FORMAT_X8B8G8R8: 1859 case FORMAT_X8L8V8U8: 1860 case FORMAT_SRGB8_X8: 1861 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1862 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1863 c.y = c.x; 1864 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(c.x)); 1865 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(c.y)); 1866 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(c.z)); 1867 // Propagate sign bit 1868 if(state.textureFormat == FORMAT_X8B8G8R8I) 1869 { 1870 c.x >>= 8; 1871 c.y >>= 8; 1872 c.z >>= 8; 1873 } 1874 break; 1875 case FORMAT_X8B8G8R8UI: 1876 c.z = As<Short4>(UnpackHigh(c.x, c.y)); 1877 c.x = As<Short4>(UnpackLow(c.x, c.y)); 1878 c.y = c.x; 1879 c.x = UnpackLow(As<Byte8>(c.x), As<Byte8>(Short4(0))); 1880 c.y = UnpackHigh(As<Byte8>(c.y), As<Byte8>(Short4(0))); 1881 c.z = UnpackLow(As<Byte8>(c.z), As<Byte8>(Short4(0))); 1882 break; 1883 default: 1884 ASSERT(false); 1885 } 1886 } 1887 break; 1888 case 2: 1889 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1890 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1891 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1892 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1893 1894 switch(state.textureFormat) 1895 { 1896 case FORMAT_G8R8: 1897 case FORMAT_G8R8_SNORM: 1898 case FORMAT_V8U8: 1899 case FORMAT_A8L8: 1900 c.y = (c.x & Short4(0xFF00u)) | As<Short4>(As<UShort4>(c.x) >> 8); 1901 c.x = (c.x & Short4(0x00FFu)) | (c.x << 8); 1902 break; 1903 case FORMAT_G8R8I: 1904 c.y = c.x >> 8; 1905 c.x = (c.x << 8) >> 8; // Propagate sign bit 1906 break; 1907 case FORMAT_G8R8UI: 1908 c.y = As<Short4>(As<UShort4>(c.x) >> 8); 1909 c.x &= Short4(0x00FFu); 1910 break; 1911 default: 1912 ASSERT(false); 1913 } 1914 break; 1915 case 1: 1916 { 1917 Int c0 = Int(*Pointer<Byte>(buffer[f0] + index[0])); 1918 Int c1 = Int(*Pointer<Byte>(buffer[f1] + index[1])); 1919 Int c2 = Int(*Pointer<Byte>(buffer[f2] + index[2])); 1920 Int c3 = Int(*Pointer<Byte>(buffer[f3] + index[3])); 1921 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 1922 1923 switch(state.textureFormat) 1924 { 1925 case FORMAT_R8I: 1926 case FORMAT_R8UI: 1927 { 1928 Int zero(0); 1929 c.x = Unpack(As<Byte4>(c0), As<Byte4>(zero)); 1930 // Propagate sign bit 1931 if(state.textureFormat == FORMAT_R8I) 1932 { 1933 c.x = (c.x << 8) >> 8; 1934 } 1935 } 1936 break; 1937 default: 1938 c.x = Unpack(As<Byte4>(c0)); 1939 break; 1940 } 1941 } 1942 break; 1943 default: 1944 ASSERT(false); 1945 } 1946 } 1947 else if(has16bitTextureComponents()) 1948 { 1949 switch(textureComponentCount()) 1950 { 1951 case 4: 1952 c.x = Pointer<Short4>(buffer[f0])[index[0]]; 1953 c.y = Pointer<Short4>(buffer[f1])[index[1]]; 1954 c.z = Pointer<Short4>(buffer[f2])[index[2]]; 1955 c.w = Pointer<Short4>(buffer[f3])[index[3]]; 1956 transpose4x4(c.x, c.y, c.z, c.w); 1957 break; 1958 case 3: 1959 c.x = Pointer<Short4>(buffer[f0])[index[0]]; 1960 c.y = Pointer<Short4>(buffer[f1])[index[1]]; 1961 c.z = Pointer<Short4>(buffer[f2])[index[2]]; 1962 c.w = Pointer<Short4>(buffer[f3])[index[3]]; 1963 transpose4x3(c.x, c.y, c.z, c.w); 1964 break; 1965 case 2: 1966 c.x = *Pointer<Short4>(buffer[f0] + 4 * index[0]); 1967 c.x = As<Short4>(UnpackLow(c.x, *Pointer<Short4>(buffer[f1] + 4 * index[1]))); 1968 c.z = *Pointer<Short4>(buffer[f2] + 4 * index[2]); 1969 c.z = As<Short4>(UnpackLow(c.z, *Pointer<Short4>(buffer[f3] + 4 * index[3]))); 1970 c.y = c.x; 1971 c.x = UnpackLow(As<Int2>(c.x), As<Int2>(c.z)); 1972 c.y = UnpackHigh(As<Int2>(c.y), As<Int2>(c.z)); 1973 break; 1974 case 1: 1975 c.x = Insert(c.x, Pointer<Short>(buffer[f0])[index[0]], 0); 1976 c.x = Insert(c.x, Pointer<Short>(buffer[f1])[index[1]], 1); 1977 c.x = Insert(c.x, Pointer<Short>(buffer[f2])[index[2]], 2); 1978 c.x = Insert(c.x, Pointer<Short>(buffer[f3])[index[3]], 3); 1979 break; 1980 default: 1981 ASSERT(false); 1982 } 1983 } 1984 else ASSERT(false); 1985 1986 if(state.sRGB) 1987 { 1988 if(state.textureFormat == FORMAT_R5G6B5) 1989 { 1990 sRGBtoLinear16_5_16(c.x); 1991 sRGBtoLinear16_6_16(c.y); 1992 sRGBtoLinear16_5_16(c.z); 1993 } 1994 else 1995 { 1996 for(int i = 0; i < textureComponentCount(); i++) 1997 { 1998 if(isRGBComponent(i)) 1999 { 2000 sRGBtoLinear16_8_16(c[i]); 2001 } 2002 } 2003 } 2004 } 2005 2006 return c; 2007 } 2008 2009 Vector4s SamplerCore::sampleTexel(Short4 &uuuu, Short4 &vvvv, Short4 &wwww, Vector4f &offset, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) 2010 { 2011 Vector4s c; 2012 2013 UInt index[4]; 2014 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap, function); 2015 2016 if(hasYuvFormat()) 2017 { 2018 // Generic YPbPr to RGB transformation 2019 // R = Y + 2 * (1 - Kr) * Pr 2020 // G = Y - 2 * Kb * (1 - Kb) / Kg * Pb - 2 * Kr * (1 - Kr) / Kg * Pr 2021 // B = Y + 2 * (1 - Kb) * Pb 2022 2023 float Kb = 0.114f; 2024 float Kr = 0.299f; 2025 int studioSwing = 1; 2026 2027 switch(state.textureFormat) 2028 { 2029 case FORMAT_YV12_BT601: 2030 Kb = 0.114f; 2031 Kr = 0.299f; 2032 studioSwing = 1; 2033 break; 2034 case FORMAT_YV12_BT709: 2035 Kb = 0.0722f; 2036 Kr = 0.2126f; 2037 studioSwing = 1; 2038 break; 2039 case FORMAT_YV12_JFIF: 2040 Kb = 0.114f; 2041 Kr = 0.299f; 2042 studioSwing = 0; 2043 break; 2044 default: 2045 ASSERT(false); 2046 } 2047 2048 const float Kg = 1.0f - Kr - Kb; 2049 2050 const float Rr = 2 * (1 - Kr); 2051 const float Gb = -2 * Kb * (1 - Kb) / Kg; 2052 const float Gr = -2 * Kr * (1 - Kr) / Kg; 2053 const float Bb = 2 * (1 - Kb); 2054 2055 // Scaling and bias for studio-swing range: Y = [16 .. 235], U/V = [16 .. 240] 2056 const float Yy = studioSwing ? 255.0f / (235 - 16) : 1.0f; 2057 const float Uu = studioSwing ? 255.0f / (240 - 16) : 1.0f; 2058 const float Vv = studioSwing ? 255.0f / (240 - 16) : 1.0f; 2059 2060 const float Rv = Vv * Rr; 2061 const float Gu = Uu * Gb; 2062 const float Gv = Vv * Gr; 2063 const float Bu = Uu * Bb; 2064 2065 const float R0 = (studioSwing * -16 * Yy - 128 * Rv) / 255; 2066 const float G0 = (studioSwing * -16 * Yy - 128 * Gu - 128 * Gv) / 255; 2067 const float B0 = (studioSwing * -16 * Yy - 128 * Bu) / 255; 2068 2069 Int c0 = Int(buffer[0][index[0]]); 2070 Int c1 = Int(buffer[0][index[1]]); 2071 Int c2 = Int(buffer[0][index[2]]); 2072 Int c3 = Int(buffer[0][index[3]]); 2073 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 2074 UShort4 Y = As<UShort4>(Unpack(As<Byte4>(c0))); 2075 2076 computeIndices(index, uuuu, vvvv, wwww, offset, mipmap + sizeof(Mipmap), function); 2077 c0 = Int(buffer[1][index[0]]); 2078 c1 = Int(buffer[1][index[1]]); 2079 c2 = Int(buffer[1][index[2]]); 2080 c3 = Int(buffer[1][index[3]]); 2081 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 2082 UShort4 V = As<UShort4>(Unpack(As<Byte4>(c0))); 2083 2084 c0 = Int(buffer[2][index[0]]); 2085 c1 = Int(buffer[2][index[1]]); 2086 c2 = Int(buffer[2][index[2]]); 2087 c3 = Int(buffer[2][index[3]]); 2088 c0 = c0 | (c1 << 8) | (c2 << 16) | (c3 << 24); 2089 UShort4 U = As<UShort4>(Unpack(As<Byte4>(c0))); 2090 2091 const UShort4 yY = UShort4(iround(Yy * 0x4000)); 2092 const UShort4 rV = UShort4(iround(Rv * 0x4000)); 2093 const UShort4 gU = UShort4(iround(-Gu * 0x4000)); 2094 const UShort4 gV = UShort4(iround(-Gv * 0x4000)); 2095 const UShort4 bU = UShort4(iround(Bu * 0x4000)); 2096 2097 const UShort4 r0 = UShort4(iround(-R0 * 0x4000)); 2098 const UShort4 g0 = UShort4(iround(G0 * 0x4000)); 2099 const UShort4 b0 = UShort4(iround(-B0 * 0x4000)); 2100 2101 UShort4 y = MulHigh(Y, yY); 2102 UShort4 r = SubSat(y + MulHigh(V, rV), r0); 2103 UShort4 g = SubSat(y + g0, MulHigh(U, gU) + MulHigh(V, gV)); 2104 UShort4 b = SubSat(y + MulHigh(U, bU), b0); 2105 2106 c.x = Min(r, UShort4(0x3FFF)) << 2; 2107 c.y = Min(g, UShort4(0x3FFF)) << 2; 2108 c.z = Min(b, UShort4(0x3FFF)) << 2; 2109 } 2110 else 2111 { 2112 return sampleTexel(index, buffer); 2113 } 2114 2115 return c; 2116 } 2117 2118 Vector4f SamplerCore::sampleTexel(Int4 &uuuu, Int4 &vvvv, Int4 &wwww, Float4 &z, Pointer<Byte> &mipmap, Pointer<Byte> buffer[4], SamplerFunction function) 2119 { 2120 Vector4f c; 2121 2122 UInt index[4]; 2123 computeIndices(index, uuuu, vvvv, wwww, mipmap, function); 2124 2125 if(hasFloatTexture() || has32bitIntegerTextureComponents()) 2126 { 2127 int f0 = state.textureType == TEXTURE_CUBE ? 0 : 0; 2128 int f1 = state.textureType == TEXTURE_CUBE ? 1 : 0; 2129 int f2 = state.textureType == TEXTURE_CUBE ? 2 : 0; 2130 int f3 = state.textureType == TEXTURE_CUBE ? 3 : 0; 2131 2132 // Read texels 2133 switch(textureComponentCount()) 2134 { 2135 case 4: 2136 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); 2137 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); 2138 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); 2139 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); 2140 transpose4x4(c.x, c.y, c.z, c.w); 2141 break; 2142 case 3: 2143 c.x = *Pointer<Float4>(buffer[f0] + index[0] * 16, 16); 2144 c.y = *Pointer<Float4>(buffer[f1] + index[1] * 16, 16); 2145 c.z = *Pointer<Float4>(buffer[f2] + index[2] * 16, 16); 2146 c.w = *Pointer<Float4>(buffer[f3] + index[3] * 16, 16); 2147 transpose4x3(c.x, c.y, c.z, c.w); 2148 break; 2149 case 2: 2150 // FIXME: Optimal shuffling? 2151 c.x.xy = *Pointer<Float4>(buffer[f0] + index[0] * 8); 2152 c.x.zw = *Pointer<Float4>(buffer[f1] + index[1] * 8 - 8); 2153 c.z.xy = *Pointer<Float4>(buffer[f2] + index[2] * 8); 2154 c.z.zw = *Pointer<Float4>(buffer[f3] + index[3] * 8 - 8); 2155 c.y = c.x; 2156 c.x = Float4(c.x.xz, c.z.xz); 2157 c.y = Float4(c.y.yw, c.z.yw); 2158 break; 2159 case 1: 2160 // FIXME: Optimal shuffling? 2161 c.x.x = *Pointer<Float>(buffer[f0] + index[0] * 4); 2162 c.x.y = *Pointer<Float>(buffer[f1] + index[1] * 4); 2163 c.x.z = *Pointer<Float>(buffer[f2] + index[2] * 4); 2164 c.x.w = *Pointer<Float>(buffer[f3] + index[3] * 4); 2165 break; 2166 default: 2167 ASSERT(false); 2168 } 2169 2170 if(state.compare != COMPARE_BYPASS) 2171 { 2172 Float4 ref = z; 2173 2174 if(!hasFloatTexture()) 2175 { 2176 ref = Min(Max(ref, Float4(0.0f)), Float4(1.0f)); 2177 } 2178 2179 Int4 boolean; 2180 2181 switch(state.compare) 2182 { 2183 case COMPARE_LESSEQUAL: boolean = CmpLE(ref, c.x); break; 2184 case COMPARE_GREATEREQUAL: boolean = CmpNLT(ref, c.x); break; 2185 case COMPARE_LESS: boolean = CmpLT(ref, c.x); break; 2186 case COMPARE_GREATER: boolean = CmpNLE(ref, c.x); break; 2187 case COMPARE_EQUAL: boolean = CmpEQ(ref, c.x); break; 2188 case COMPARE_NOTEQUAL: boolean = CmpNEQ(ref, c.x); break; 2189 case COMPARE_ALWAYS: boolean = Int4(-1); break; 2190 case COMPARE_NEVER: boolean = Int4(0); break; 2191 default: ASSERT(false); 2192 } 2193 2194 c.x = As<Float4>(boolean & As<Int4>(Float4(1.0f))); 2195 c.y = Float4(0.0f); 2196 c.z = Float4(0.0f); 2197 c.w = Float4(1.0f); 2198 } 2199 } 2200 else 2201 { 2202 ASSERT(!hasYuvFormat()); 2203 2204 Vector4s cs = sampleTexel(index, buffer); 2205 2206 bool isInteger = Surface::isNonNormalizedInteger(state.textureFormat); 2207 int componentCount = textureComponentCount(); 2208 for(int n = 0; n < componentCount; n++) 2209 { 2210 if(hasUnsignedTextureComponent(n)) 2211 { 2212 if(isInteger) 2213 { 2214 c[n] = As<Float4>(Int4(As<UShort4>(cs[n]))); 2215 } 2216 else 2217 { 2218 c[n] = Float4(As<UShort4>(cs[n])); 2219 } 2220 } 2221 else 2222 { 2223 if(isInteger) 2224 { 2225 c[n] = As<Float4>(Int4(cs[n])); 2226 } 2227 else 2228 { 2229 c[n] = Float4(cs[n]); 2230 } 2231 } 2232 } 2233 } 2234 2235 return c; 2236 } 2237 2238 void SamplerCore::selectMipmap(Pointer<Byte> &texture, Pointer<Byte> buffer[4], Pointer<Byte> &mipmap, Float &lod, Int face[4], bool secondLOD) 2239 { 2240 if(state.mipmapFilter == MIPMAP_NONE) 2241 { 2242 mipmap = texture + OFFSET(Texture,mipmap[0]); 2243 } 2244 else 2245 { 2246 Int ilod; 2247 2248 if(state.mipmapFilter == MIPMAP_POINT) 2249 { 2250 ilod = RoundInt(lod); 2251 } 2252 else // MIPMAP_LINEAR 2253 { 2254 ilod = Int(lod); 2255 } 2256 2257 mipmap = texture + OFFSET(Texture,mipmap) + ilod * sizeof(Mipmap) + secondLOD * sizeof(Mipmap); 2258 } 2259 2260 if(state.textureType != TEXTURE_CUBE) 2261 { 2262 buffer[0] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[0])); 2263 2264 if(hasYuvFormat()) 2265 { 2266 buffer[1] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[1])); 2267 buffer[2] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer[2])); 2268 } 2269 } 2270 else 2271 { 2272 for(int i = 0; i < 4; i++) 2273 { 2274 buffer[i] = *Pointer<Pointer<Byte> >(mipmap + OFFSET(Mipmap,buffer) + face[i] * sizeof(void*)); 2275 } 2276 } 2277 } 2278 2279 Int4 SamplerCore::computeFilterOffset(Float &lod) 2280 { 2281 Int4 filter = -1; 2282 2283 if(state.textureFilter == FILTER_POINT) 2284 { 2285 filter = 0; 2286 } 2287 else if(state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 2288 { 2289 filter = CmpNLE(Float4(lod), Float4(0.0f)); 2290 } 2291 else if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR) 2292 { 2293 filter = CmpLE(Float4(lod), Float4(0.0f)); 2294 } 2295 2296 return filter; 2297 } 2298 2299 Short4 SamplerCore::address(Float4 &uw, AddressingMode addressingMode, Pointer<Byte> &mipmap) 2300 { 2301 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY) 2302 { 2303 return Short4(); // Unused 2304 } 2305 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) 2306 { 2307 return Min(Max(Short4(RoundInt(uw)), Short4(0)), *Pointer<Short4>(mipmap + OFFSET(Mipmap, depth)) - Short4(1)); 2308 } 2309 else if(addressingMode == ADDRESSING_CLAMP || addressingMode == ADDRESSING_BORDER) 2310 { 2311 Float4 clamp = Min(Max(uw, Float4(0.0f)), Float4(65535.0f / 65536.0f)); 2312 2313 return Short4(Int4(clamp * Float4(1 << 16))); 2314 } 2315 else if(addressingMode == ADDRESSING_MIRROR) 2316 { 2317 Int4 convert = Int4(uw * Float4(1 << 16)); 2318 Int4 mirror = (convert << 15) >> 31; 2319 2320 convert ^= mirror; 2321 2322 return Short4(convert); 2323 } 2324 else if(addressingMode == ADDRESSING_MIRRORONCE) 2325 { 2326 // Absolute value 2327 Int4 convert = Int4(Abs(uw * Float4(1 << 16))); 2328 2329 // Clamp 2330 convert -= Int4(0x00008000, 0x00008000, 0x00008000, 0x00008000); 2331 convert = As<Int4>(PackSigned(convert, convert)); 2332 2333 return As<Short4>(Int2(convert)) + Short4(0x8000u); 2334 } 2335 else // Wrap 2336 { 2337 return Short4(Int4(uw * Float4(1 << 16))); 2338 } 2339 } 2340 2341 void SamplerCore::address(Float4 &uvw, Int4 &xyz0, Int4 &xyz1, Float4 &f, Pointer<Byte> &mipmap, Float4 &texOffset, Int4 &filter, int whd, AddressingMode addressingMode, SamplerFunction function) 2342 { 2343 if(addressingMode == ADDRESSING_LAYER && state.textureType != TEXTURE_2D_ARRAY) 2344 { 2345 return; // Unused 2346 } 2347 2348 Int4 dim = Int4(*Pointer<Short4>(mipmap + whd, 16)); 2349 Int4 maxXYZ = dim - Int4(1); 2350 2351 if(function == Fetch) 2352 { 2353 xyz0 = Min(Max(((function.option == Offset) && (addressingMode != ADDRESSING_LAYER)) ? As<Int4>(uvw) + As<Int4>(texOffset) : As<Int4>(uvw), Int4(0)), maxXYZ); 2354 } 2355 else if(addressingMode == ADDRESSING_LAYER && state.textureType == TEXTURE_2D_ARRAY) // Note: Offset does not apply to array layers 2356 { 2357 xyz0 = Min(Max(RoundInt(uvw), Int4(0)), maxXYZ); 2358 } 2359 else 2360 { 2361 const int halfBits = 0x3EFFFFFF; // Value just under 0.5f 2362 const int oneBits = 0x3F7FFFFF; // Value just under 1.0f 2363 const int twoBits = 0x3FFFFFFF; // Value just under 2.0f 2364 2365 bool pointFilter = state.textureFilter == FILTER_POINT || 2366 state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR || 2367 state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT; 2368 2369 Float4 coord = uvw; 2370 2371 if(state.textureType == TEXTURE_RECTANGLE) 2372 { 2373 coord = Min(Max(coord, Float4(0.0f)), Float4(dim - Int4(1))); 2374 } 2375 else 2376 { 2377 switch(addressingMode) 2378 { 2379 case ADDRESSING_CLAMP: 2380 case ADDRESSING_BORDER: 2381 case ADDRESSING_SEAMLESS: 2382 // Linear filtering of cube doesn't require clamping because the coordinates 2383 // are already in [0, 1] range and numerical imprecision is tolerated. 2384 if(addressingMode != ADDRESSING_SEAMLESS || pointFilter) 2385 { 2386 Float4 one = As<Float4>(Int4(oneBits)); 2387 coord = Min(Max(coord, Float4(0.0f)), one); 2388 } 2389 break; 2390 case ADDRESSING_MIRROR: 2391 { 2392 Float4 half = As<Float4>(Int4(halfBits)); 2393 Float4 one = As<Float4>(Int4(oneBits)); 2394 Float4 two = As<Float4>(Int4(twoBits)); 2395 coord = one - Abs(two * Frac(coord * half) - one); 2396 } 2397 break; 2398 case ADDRESSING_MIRRORONCE: 2399 { 2400 Float4 half = As<Float4>(Int4(halfBits)); 2401 Float4 one = As<Float4>(Int4(oneBits)); 2402 Float4 two = As<Float4>(Int4(twoBits)); 2403 coord = one - Abs(two * Frac(Min(Max(coord, -one), two) * half) - one); 2404 } 2405 break; 2406 default: // Wrap 2407 coord = Frac(coord); 2408 break; 2409 } 2410 2411 coord = coord * Float4(dim); 2412 } 2413 2414 if(state.textureFilter == FILTER_POINT || 2415 state.textureFilter == FILTER_GATHER) 2416 { 2417 xyz0 = Int4(coord); 2418 } 2419 else 2420 { 2421 if(state.textureFilter == FILTER_MIN_POINT_MAG_LINEAR || 2422 state.textureFilter == FILTER_MIN_LINEAR_MAG_POINT) 2423 { 2424 coord -= As<Float4>(As<Int4>(Float4(0.5f)) & filter); 2425 } 2426 else 2427 { 2428 coord -= Float4(0.5f); 2429 } 2430 2431 Float4 floor = Floor(coord); 2432 xyz0 = Int4(floor); 2433 f = coord - floor; 2434 } 2435 2436 if(function.option == Offset) 2437 { 2438 xyz0 += As<Int4>(texOffset); 2439 } 2440 2441 if(addressingMode == ADDRESSING_SEAMLESS) 2442 { 2443 xyz0 += Int4(1); 2444 } 2445 2446 xyz1 = xyz0 - filter; // Increment 2447 2448 if(function.option == Offset) 2449 { 2450 switch(addressingMode) 2451 { 2452 case ADDRESSING_SEAMLESS: 2453 ASSERT(false); // Cube sampling doesn't support offset. 2454 case ADDRESSING_MIRROR: 2455 case ADDRESSING_MIRRORONCE: 2456 case ADDRESSING_BORDER: 2457 // FIXME: Implement ADDRESSING_MIRROR, ADDRESSING_MIRRORONCE, and ADDRESSING_BORDER. 2458 // Fall through to Clamp. 2459 case ADDRESSING_CLAMP: 2460 xyz0 = Min(Max(xyz0, Int4(0)), maxXYZ); 2461 xyz1 = Min(Max(xyz1, Int4(0)), maxXYZ); 2462 break; 2463 default: // Wrap 2464 xyz0 = (xyz0 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim; 2465 xyz1 = (xyz1 + dim * Int4(-MIN_PROGRAM_TEXEL_OFFSET)) % dim; 2466 break; 2467 } 2468 } 2469 else if(state.textureFilter != FILTER_POINT) 2470 { 2471 switch(addressingMode) 2472 { 2473 case ADDRESSING_SEAMLESS: 2474 break; 2475 case ADDRESSING_MIRROR: 2476 case ADDRESSING_MIRRORONCE: 2477 case ADDRESSING_BORDER: 2478 case ADDRESSING_CLAMP: 2479 xyz0 = Max(xyz0, Int4(0)); 2480 xyz1 = Min(xyz1, maxXYZ); 2481 break; 2482 default: // Wrap 2483 { 2484 Int4 under = CmpLT(xyz0, Int4(0)); 2485 xyz0 = (under & maxXYZ) | (~under & xyz0); // xyz < 0 ? dim - 1 : xyz // FIXME: IfThenElse() 2486 2487 Int4 nover = CmpLT(xyz1, dim); 2488 xyz1 = nover & xyz1; // xyz >= dim ? 0 : xyz 2489 } 2490 break; 2491 } 2492 } 2493 } 2494 } 2495 2496 void SamplerCore::convertFixed12(Short4 &cs, Float4 &cf) 2497 { 2498 cs = RoundShort4(cf * Float4(0x1000)); 2499 } 2500 2501 void SamplerCore::convertFixed12(Vector4s &cs, Vector4f &cf) 2502 { 2503 convertFixed12(cs.x, cf.x); 2504 convertFixed12(cs.y, cf.y); 2505 convertFixed12(cs.z, cf.z); 2506 convertFixed12(cs.w, cf.w); 2507 } 2508 2509 void SamplerCore::convertSigned12(Float4 &cf, Short4 &cs) 2510 { 2511 cf = Float4(cs) * Float4(1.0f / 0x0FFE); 2512 } 2513 2514 // void SamplerCore::convertSigned12(Vector4f &cf, Vector4s &cs) 2515 // { 2516 // convertSigned12(cf.x, cs.x); 2517 // convertSigned12(cf.y, cs.y); 2518 // convertSigned12(cf.z, cs.z); 2519 // convertSigned12(cf.w, cs.w); 2520 // } 2521 2522 void SamplerCore::convertSigned15(Float4 &cf, Short4 &cs) 2523 { 2524 cf = Float4(cs) * Float4(1.0f / 0x7FFF); 2525 } 2526 2527 void SamplerCore::convertUnsigned16(Float4 &cf, Short4 &cs) 2528 { 2529 cf = Float4(As<UShort4>(cs)) * Float4(1.0f / 0xFFFF); 2530 } 2531 2532 void SamplerCore::sRGBtoLinear16_8_16(Short4 &c) 2533 { 2534 c = As<UShort4>(c) >> 8; 2535 2536 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear8_16)); 2537 2538 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2539 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2540 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2541 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2542 } 2543 2544 void SamplerCore::sRGBtoLinear16_6_16(Short4 &c) 2545 { 2546 c = As<UShort4>(c) >> 10; 2547 2548 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear6_16)); 2549 2550 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2551 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2552 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2553 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2554 } 2555 2556 void SamplerCore::sRGBtoLinear16_5_16(Short4 &c) 2557 { 2558 c = As<UShort4>(c) >> 11; 2559 2560 Pointer<Byte> LUT = Pointer<Byte>(constants + OFFSET(Constants,sRGBtoLinear5_16)); 2561 2562 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 0))), 0); 2563 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 1))), 1); 2564 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 2))), 2); 2565 c = Insert(c, *Pointer<Short>(LUT + 2 * Int(Extract(c, 3))), 3); 2566 } 2567 2568 bool SamplerCore::hasFloatTexture() const 2569 { 2570 return Surface::isFloatFormat(state.textureFormat); 2571 } 2572 2573 bool SamplerCore::hasUnnormalizedIntegerTexture() const 2574 { 2575 return Surface::isNonNormalizedInteger(state.textureFormat); 2576 } 2577 2578 bool SamplerCore::hasUnsignedTextureComponent(int component) const 2579 { 2580 return Surface::isUnsignedComponent(state.textureFormat, component); 2581 } 2582 2583 int SamplerCore::textureComponentCount() const 2584 { 2585 return Surface::componentCount(state.textureFormat); 2586 } 2587 2588 bool SamplerCore::hasThirdCoordinate() const 2589 { 2590 return (state.textureType == TEXTURE_3D) || (state.textureType == TEXTURE_2D_ARRAY); 2591 } 2592 2593 bool SamplerCore::has16bitTextureFormat() const 2594 { 2595 switch(state.textureFormat) 2596 { 2597 case FORMAT_R5G6B5: 2598 return true; 2599 case FORMAT_R8_SNORM: 2600 case FORMAT_G8R8_SNORM: 2601 case FORMAT_X8B8G8R8_SNORM: 2602 case FORMAT_A8B8G8R8_SNORM: 2603 case FORMAT_R8I: 2604 case FORMAT_R8UI: 2605 case FORMAT_G8R8I: 2606 case FORMAT_G8R8UI: 2607 case FORMAT_X8B8G8R8I: 2608 case FORMAT_X8B8G8R8UI: 2609 case FORMAT_A8B8G8R8I: 2610 case FORMAT_A8B8G8R8UI: 2611 case FORMAT_R32I: 2612 case FORMAT_R32UI: 2613 case FORMAT_G32R32I: 2614 case FORMAT_G32R32UI: 2615 case FORMAT_X32B32G32R32I: 2616 case FORMAT_X32B32G32R32UI: 2617 case FORMAT_A32B32G32R32I: 2618 case FORMAT_A32B32G32R32UI: 2619 case FORMAT_G8R8: 2620 case FORMAT_X8R8G8B8: 2621 case FORMAT_X8B8G8R8: 2622 case FORMAT_A8R8G8B8: 2623 case FORMAT_A8B8G8R8: 2624 case FORMAT_SRGB8_X8: 2625 case FORMAT_SRGB8_A8: 2626 case FORMAT_V8U8: 2627 case FORMAT_Q8W8V8U8: 2628 case FORMAT_X8L8V8U8: 2629 case FORMAT_R32F: 2630 case FORMAT_G32R32F: 2631 case FORMAT_X32B32G32R32F: 2632 case FORMAT_A32B32G32R32F: 2633 case FORMAT_X32B32G32R32F_UNSIGNED: 2634 case FORMAT_A8: 2635 case FORMAT_R8: 2636 case FORMAT_L8: 2637 case FORMAT_A8L8: 2638 case FORMAT_D32F: 2639 case FORMAT_D32FS8: 2640 case FORMAT_D32F_LOCKABLE: 2641 case FORMAT_D32FS8_TEXTURE: 2642 case FORMAT_D32F_SHADOW: 2643 case FORMAT_D32FS8_SHADOW: 2644 case FORMAT_L16: 2645 case FORMAT_G16R16: 2646 case FORMAT_A16B16G16R16: 2647 case FORMAT_V16U16: 2648 case FORMAT_A16W16V16U16: 2649 case FORMAT_Q16W16V16U16: 2650 case FORMAT_R16I: 2651 case FORMAT_R16UI: 2652 case FORMAT_G16R16I: 2653 case FORMAT_G16R16UI: 2654 case FORMAT_X16B16G16R16I: 2655 case FORMAT_X16B16G16R16UI: 2656 case FORMAT_A16B16G16R16I: 2657 case FORMAT_A16B16G16R16UI: 2658 case FORMAT_YV12_BT601: 2659 case FORMAT_YV12_BT709: 2660 case FORMAT_YV12_JFIF: 2661 return false; 2662 default: 2663 ASSERT(false); 2664 } 2665 2666 return false; 2667 } 2668 2669 bool SamplerCore::has8bitTextureComponents() const 2670 { 2671 switch(state.textureFormat) 2672 { 2673 case FORMAT_G8R8: 2674 case FORMAT_X8R8G8B8: 2675 case FORMAT_X8B8G8R8: 2676 case FORMAT_A8R8G8B8: 2677 case FORMAT_A8B8G8R8: 2678 case FORMAT_SRGB8_X8: 2679 case FORMAT_SRGB8_A8: 2680 case FORMAT_V8U8: 2681 case FORMAT_Q8W8V8U8: 2682 case FORMAT_X8L8V8U8: 2683 case FORMAT_A8: 2684 case FORMAT_R8: 2685 case FORMAT_L8: 2686 case FORMAT_A8L8: 2687 case FORMAT_R8_SNORM: 2688 case FORMAT_G8R8_SNORM: 2689 case FORMAT_X8B8G8R8_SNORM: 2690 case FORMAT_A8B8G8R8_SNORM: 2691 case FORMAT_R8I: 2692 case FORMAT_R8UI: 2693 case FORMAT_G8R8I: 2694 case FORMAT_G8R8UI: 2695 case FORMAT_X8B8G8R8I: 2696 case FORMAT_X8B8G8R8UI: 2697 case FORMAT_A8B8G8R8I: 2698 case FORMAT_A8B8G8R8UI: 2699 return true; 2700 case FORMAT_R5G6B5: 2701 case FORMAT_R32F: 2702 case FORMAT_G32R32F: 2703 case FORMAT_X32B32G32R32F: 2704 case FORMAT_A32B32G32R32F: 2705 case FORMAT_X32B32G32R32F_UNSIGNED: 2706 case FORMAT_D32F: 2707 case FORMAT_D32FS8: 2708 case FORMAT_D32F_LOCKABLE: 2709 case FORMAT_D32FS8_TEXTURE: 2710 case FORMAT_D32F_SHADOW: 2711 case FORMAT_D32FS8_SHADOW: 2712 case FORMAT_L16: 2713 case FORMAT_G16R16: 2714 case FORMAT_A16B16G16R16: 2715 case FORMAT_V16U16: 2716 case FORMAT_A16W16V16U16: 2717 case FORMAT_Q16W16V16U16: 2718 case FORMAT_R32I: 2719 case FORMAT_R32UI: 2720 case FORMAT_G32R32I: 2721 case FORMAT_G32R32UI: 2722 case FORMAT_X32B32G32R32I: 2723 case FORMAT_X32B32G32R32UI: 2724 case FORMAT_A32B32G32R32I: 2725 case FORMAT_A32B32G32R32UI: 2726 case FORMAT_R16I: 2727 case FORMAT_R16UI: 2728 case FORMAT_G16R16I: 2729 case FORMAT_G16R16UI: 2730 case FORMAT_X16B16G16R16I: 2731 case FORMAT_X16B16G16R16UI: 2732 case FORMAT_A16B16G16R16I: 2733 case FORMAT_A16B16G16R16UI: 2734 case FORMAT_YV12_BT601: 2735 case FORMAT_YV12_BT709: 2736 case FORMAT_YV12_JFIF: 2737 return false; 2738 default: 2739 ASSERT(false); 2740 } 2741 2742 return false; 2743 } 2744 2745 bool SamplerCore::has16bitTextureComponents() const 2746 { 2747 switch(state.textureFormat) 2748 { 2749 case FORMAT_R5G6B5: 2750 case FORMAT_R8_SNORM: 2751 case FORMAT_G8R8_SNORM: 2752 case FORMAT_X8B8G8R8_SNORM: 2753 case FORMAT_A8B8G8R8_SNORM: 2754 case FORMAT_R8I: 2755 case FORMAT_R8UI: 2756 case FORMAT_G8R8I: 2757 case FORMAT_G8R8UI: 2758 case FORMAT_X8B8G8R8I: 2759 case FORMAT_X8B8G8R8UI: 2760 case FORMAT_A8B8G8R8I: 2761 case FORMAT_A8B8G8R8UI: 2762 case FORMAT_R32I: 2763 case FORMAT_R32UI: 2764 case FORMAT_G32R32I: 2765 case FORMAT_G32R32UI: 2766 case FORMAT_X32B32G32R32I: 2767 case FORMAT_X32B32G32R32UI: 2768 case FORMAT_A32B32G32R32I: 2769 case FORMAT_A32B32G32R32UI: 2770 case FORMAT_G8R8: 2771 case FORMAT_X8R8G8B8: 2772 case FORMAT_X8B8G8R8: 2773 case FORMAT_A8R8G8B8: 2774 case FORMAT_A8B8G8R8: 2775 case FORMAT_SRGB8_X8: 2776 case FORMAT_SRGB8_A8: 2777 case FORMAT_V8U8: 2778 case FORMAT_Q8W8V8U8: 2779 case FORMAT_X8L8V8U8: 2780 case FORMAT_R32F: 2781 case FORMAT_G32R32F: 2782 case FORMAT_X32B32G32R32F: 2783 case FORMAT_A32B32G32R32F: 2784 case FORMAT_X32B32G32R32F_UNSIGNED: 2785 case FORMAT_A8: 2786 case FORMAT_R8: 2787 case FORMAT_L8: 2788 case FORMAT_A8L8: 2789 case FORMAT_D32F: 2790 case FORMAT_D32FS8: 2791 case FORMAT_D32F_LOCKABLE: 2792 case FORMAT_D32FS8_TEXTURE: 2793 case FORMAT_D32F_SHADOW: 2794 case FORMAT_D32FS8_SHADOW: 2795 case FORMAT_YV12_BT601: 2796 case FORMAT_YV12_BT709: 2797 case FORMAT_YV12_JFIF: 2798 return false; 2799 case FORMAT_L16: 2800 case FORMAT_G16R16: 2801 case FORMAT_A16B16G16R16: 2802 case FORMAT_R16I: 2803 case FORMAT_R16UI: 2804 case FORMAT_G16R16I: 2805 case FORMAT_G16R16UI: 2806 case FORMAT_X16B16G16R16I: 2807 case FORMAT_X16B16G16R16UI: 2808 case FORMAT_A16B16G16R16I: 2809 case FORMAT_A16B16G16R16UI: 2810 case FORMAT_V16U16: 2811 case FORMAT_A16W16V16U16: 2812 case FORMAT_Q16W16V16U16: 2813 return true; 2814 default: 2815 ASSERT(false); 2816 } 2817 2818 return false; 2819 } 2820 2821 bool SamplerCore::has32bitIntegerTextureComponents() const 2822 { 2823 switch(state.textureFormat) 2824 { 2825 case FORMAT_R5G6B5: 2826 case FORMAT_R8_SNORM: 2827 case FORMAT_G8R8_SNORM: 2828 case FORMAT_X8B8G8R8_SNORM: 2829 case FORMAT_A8B8G8R8_SNORM: 2830 case FORMAT_R8I: 2831 case FORMAT_R8UI: 2832 case FORMAT_G8R8I: 2833 case FORMAT_G8R8UI: 2834 case FORMAT_X8B8G8R8I: 2835 case FORMAT_X8B8G8R8UI: 2836 case FORMAT_A8B8G8R8I: 2837 case FORMAT_A8B8G8R8UI: 2838 case FORMAT_G8R8: 2839 case FORMAT_X8R8G8B8: 2840 case FORMAT_X8B8G8R8: 2841 case FORMAT_A8R8G8B8: 2842 case FORMAT_A8B8G8R8: 2843 case FORMAT_SRGB8_X8: 2844 case FORMAT_SRGB8_A8: 2845 case FORMAT_V8U8: 2846 case FORMAT_Q8W8V8U8: 2847 case FORMAT_X8L8V8U8: 2848 case FORMAT_L16: 2849 case FORMAT_G16R16: 2850 case FORMAT_A16B16G16R16: 2851 case FORMAT_R16I: 2852 case FORMAT_R16UI: 2853 case FORMAT_G16R16I: 2854 case FORMAT_G16R16UI: 2855 case FORMAT_X16B16G16R16I: 2856 case FORMAT_X16B16G16R16UI: 2857 case FORMAT_A16B16G16R16I: 2858 case FORMAT_A16B16G16R16UI: 2859 case FORMAT_V16U16: 2860 case FORMAT_A16W16V16U16: 2861 case FORMAT_Q16W16V16U16: 2862 case FORMAT_R32F: 2863 case FORMAT_G32R32F: 2864 case FORMAT_X32B32G32R32F: 2865 case FORMAT_A32B32G32R32F: 2866 case FORMAT_X32B32G32R32F_UNSIGNED: 2867 case FORMAT_A8: 2868 case FORMAT_R8: 2869 case FORMAT_L8: 2870 case FORMAT_A8L8: 2871 case FORMAT_D32F: 2872 case FORMAT_D32FS8: 2873 case FORMAT_D32F_LOCKABLE: 2874 case FORMAT_D32FS8_TEXTURE: 2875 case FORMAT_D32F_SHADOW: 2876 case FORMAT_D32FS8_SHADOW: 2877 case FORMAT_YV12_BT601: 2878 case FORMAT_YV12_BT709: 2879 case FORMAT_YV12_JFIF: 2880 return false; 2881 case FORMAT_R32I: 2882 case FORMAT_R32UI: 2883 case FORMAT_G32R32I: 2884 case FORMAT_G32R32UI: 2885 case FORMAT_X32B32G32R32I: 2886 case FORMAT_X32B32G32R32UI: 2887 case FORMAT_A32B32G32R32I: 2888 case FORMAT_A32B32G32R32UI: 2889 return true; 2890 default: 2891 ASSERT(false); 2892 } 2893 2894 return false; 2895 } 2896 2897 bool SamplerCore::hasYuvFormat() const 2898 { 2899 switch(state.textureFormat) 2900 { 2901 case FORMAT_YV12_BT601: 2902 case FORMAT_YV12_BT709: 2903 case FORMAT_YV12_JFIF: 2904 return true; 2905 case FORMAT_R5G6B5: 2906 case FORMAT_R8_SNORM: 2907 case FORMAT_G8R8_SNORM: 2908 case FORMAT_X8B8G8R8_SNORM: 2909 case FORMAT_A8B8G8R8_SNORM: 2910 case FORMAT_R8I: 2911 case FORMAT_R8UI: 2912 case FORMAT_G8R8I: 2913 case FORMAT_G8R8UI: 2914 case FORMAT_X8B8G8R8I: 2915 case FORMAT_X8B8G8R8UI: 2916 case FORMAT_A8B8G8R8I: 2917 case FORMAT_A8B8G8R8UI: 2918 case FORMAT_R32I: 2919 case FORMAT_R32UI: 2920 case FORMAT_G32R32I: 2921 case FORMAT_G32R32UI: 2922 case FORMAT_X32B32G32R32I: 2923 case FORMAT_X32B32G32R32UI: 2924 case FORMAT_A32B32G32R32I: 2925 case FORMAT_A32B32G32R32UI: 2926 case FORMAT_G8R8: 2927 case FORMAT_X8R8G8B8: 2928 case FORMAT_X8B8G8R8: 2929 case FORMAT_A8R8G8B8: 2930 case FORMAT_A8B8G8R8: 2931 case FORMAT_SRGB8_X8: 2932 case FORMAT_SRGB8_A8: 2933 case FORMAT_V8U8: 2934 case FORMAT_Q8W8V8U8: 2935 case FORMAT_X8L8V8U8: 2936 case FORMAT_R32F: 2937 case FORMAT_G32R32F: 2938 case FORMAT_X32B32G32R32F: 2939 case FORMAT_A32B32G32R32F: 2940 case FORMAT_X32B32G32R32F_UNSIGNED: 2941 case FORMAT_A8: 2942 case FORMAT_R8: 2943 case FORMAT_L8: 2944 case FORMAT_A8L8: 2945 case FORMAT_D32F: 2946 case FORMAT_D32FS8: 2947 case FORMAT_D32F_LOCKABLE: 2948 case FORMAT_D32FS8_TEXTURE: 2949 case FORMAT_D32F_SHADOW: 2950 case FORMAT_D32FS8_SHADOW: 2951 case FORMAT_L16: 2952 case FORMAT_G16R16: 2953 case FORMAT_A16B16G16R16: 2954 case FORMAT_R16I: 2955 case FORMAT_R16UI: 2956 case FORMAT_G16R16I: 2957 case FORMAT_G16R16UI: 2958 case FORMAT_X16B16G16R16I: 2959 case FORMAT_X16B16G16R16UI: 2960 case FORMAT_A16B16G16R16I: 2961 case FORMAT_A16B16G16R16UI: 2962 case FORMAT_V16U16: 2963 case FORMAT_A16W16V16U16: 2964 case FORMAT_Q16W16V16U16: 2965 return false; 2966 default: 2967 ASSERT(false); 2968 } 2969 2970 return false; 2971 } 2972 2973 bool SamplerCore::isRGBComponent(int component) const 2974 { 2975 switch(state.textureFormat) 2976 { 2977 case FORMAT_R5G6B5: return component < 3; 2978 case FORMAT_R8_SNORM: return component < 1; 2979 case FORMAT_G8R8_SNORM: return component < 2; 2980 case FORMAT_X8B8G8R8_SNORM: return component < 3; 2981 case FORMAT_A8B8G8R8_SNORM: return component < 3; 2982 case FORMAT_R8I: return component < 1; 2983 case FORMAT_R8UI: return component < 1; 2984 case FORMAT_G8R8I: return component < 2; 2985 case FORMAT_G8R8UI: return component < 2; 2986 case FORMAT_X8B8G8R8I: return component < 3; 2987 case FORMAT_X8B8G8R8UI: return component < 3; 2988 case FORMAT_A8B8G8R8I: return component < 3; 2989 case FORMAT_A8B8G8R8UI: return component < 3; 2990 case FORMAT_R32I: return component < 1; 2991 case FORMAT_R32UI: return component < 1; 2992 case FORMAT_G32R32I: return component < 2; 2993 case FORMAT_G32R32UI: return component < 2; 2994 case FORMAT_X32B32G32R32I: return component < 3; 2995 case FORMAT_X32B32G32R32UI: return component < 3; 2996 case FORMAT_A32B32G32R32I: return component < 3; 2997 case FORMAT_A32B32G32R32UI: return component < 3; 2998 case FORMAT_G8R8: return component < 2; 2999 case FORMAT_X8R8G8B8: return component < 3; 3000 case FORMAT_X8B8G8R8: return component < 3; 3001 case FORMAT_A8R8G8B8: return component < 3; 3002 case FORMAT_A8B8G8R8: return component < 3; 3003 case FORMAT_SRGB8_X8: return component < 3; 3004 case FORMAT_SRGB8_A8: return component < 3; 3005 case FORMAT_V8U8: return false; 3006 case FORMAT_Q8W8V8U8: return false; 3007 case FORMAT_X8L8V8U8: return false; 3008 case FORMAT_R32F: return component < 1; 3009 case FORMAT_G32R32F: return component < 2; 3010 case FORMAT_X32B32G32R32F: return component < 3; 3011 case FORMAT_A32B32G32R32F: return component < 3; 3012 case FORMAT_X32B32G32R32F_UNSIGNED: return component < 3; 3013 case FORMAT_A8: return false; 3014 case FORMAT_R8: return component < 1; 3015 case FORMAT_L8: return component < 1; 3016 case FORMAT_A8L8: return component < 1; 3017 case FORMAT_D32F: return false; 3018 case FORMAT_D32FS8: return false; 3019 case FORMAT_D32F_LOCKABLE: return false; 3020 case FORMAT_D32FS8_TEXTURE: return false; 3021 case FORMAT_D32F_SHADOW: return false; 3022 case FORMAT_D32FS8_SHADOW: return false; 3023 case FORMAT_L16: return component < 1; 3024 case FORMAT_G16R16: return component < 2; 3025 case FORMAT_A16B16G16R16: return component < 3; 3026 case FORMAT_R16I: return component < 1; 3027 case FORMAT_R16UI: return component < 1; 3028 case FORMAT_G16R16I: return component < 2; 3029 case FORMAT_G16R16UI: return component < 2; 3030 case FORMAT_X16B16G16R16I: return component < 3; 3031 case FORMAT_X16B16G16R16UI: return component < 3; 3032 case FORMAT_A16B16G16R16I: return component < 3; 3033 case FORMAT_A16B16G16R16UI: return component < 3; 3034 case FORMAT_V16U16: return false; 3035 case FORMAT_A16W16V16U16: return false; 3036 case FORMAT_Q16W16V16U16: return false; 3037 case FORMAT_YV12_BT601: return component < 3; 3038 case FORMAT_YV12_BT709: return component < 3; 3039 case FORMAT_YV12_JFIF: return component < 3; 3040 default: 3041 ASSERT(false); 3042 } 3043 3044 return false; 3045 } 3046 } 3047