1 /*------------------------------------------------------------------------- 2 * drawElements Quality Program Reference Renderer 3 * ----------------------------------------------- 4 * 5 * Copyright 2014 The Android Open Source Project 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file 21 * \brief Reference implementation for per-fragment operations. 22 *//*--------------------------------------------------------------------*/ 23 24 #include "rrFragmentOperations.hpp" 25 #include "tcuVectorUtil.hpp" 26 #include "tcuTextureUtil.hpp" 27 #include <limits> 28 29 using tcu::IVec2; 30 using tcu::Vec3; 31 using tcu::Vec4; 32 using tcu::IVec4; 33 using tcu::UVec4; 34 using tcu::min; 35 using tcu::max; 36 using tcu::clamp; 37 using de::min; 38 using de::max; 39 using de::clamp; 40 41 namespace rr 42 { 43 44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue. 45 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask) 46 { 47 return (oldValue & ~mask) | (newValue & mask); 48 } 49 50 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect) 51 { 52 return de::inBounds(point.x(), rect.left, rect.left + rect.width) && 53 de::inBounds(point.y(), rect.bottom, rect.bottom + rect.height); 54 } 55 56 static inline Vec4 unpremultiply (const Vec4& v) 57 { 58 if (v.w() > 0.0f) 59 return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w()); 60 else 61 { 62 DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f); 63 return Vec4(0.0f, 0.0f, 0.0f, 0.0f); 64 } 65 } 66 67 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const Vec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); } 68 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const IVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); } 69 void clearMultisampleColorBuffer (const tcu::PixelBufferAccess& dst, const UVec4& v, const WindowRectangle& r) { tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>()); } 70 void clearMultisampleDepthBuffer (const tcu::PixelBufferAccess& dst, float v, const WindowRectangle& r) { tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); } 71 void clearMultisampleStencilBuffer (const tcu::PixelBufferAccess& dst, int v, const WindowRectangle& r) { tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v); } 72 73 FragmentProcessor::FragmentProcessor (void) 74 : m_sampleRegister() 75 { 76 } 77 78 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect) 79 { 80 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 81 { 82 if (m_sampleRegister[regSampleNdx].isAlive) 83 { 84 int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment; 85 86 if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect)) 87 m_sampleRegister[regSampleNdx].isAlive = false; 88 } 89 } 90 } 91 92 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer) 93 { 94 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION) \ 95 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 96 { \ 97 if (m_sampleRegister[regSampleNdx].isAlive) \ 98 { \ 99 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \ 100 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \ 101 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \ 102 int maskedRef = stencilState.compMask & clampedStencilRef; \ 103 int maskedBuf = stencilState.compMask & stencilBufferValue; \ 104 DE_UNREF(maskedRef); \ 105 DE_UNREF(maskedBuf); \ 106 \ 107 m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION); \ 108 } \ 109 } 110 111 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1); 112 113 switch (stencilState.func) 114 { 115 case TESTFUNC_NEVER: SAMPLE_REGISTER_STENCIL_COMPARE(false) break; 116 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_STENCIL_COMPARE(true) break; 117 case TESTFUNC_LESS: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef < maskedBuf) break; 118 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf) break; 119 case TESTFUNC_GREATER: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef > maskedBuf) break; 120 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf) break; 121 case TESTFUNC_EQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf) break; 122 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf) break; 123 default: 124 DE_ASSERT(false); 125 } 126 127 #undef SAMPLE_REGISTER_STENCIL_COMPARE 128 } 129 130 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer) 131 { 132 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION) \ 133 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 134 { \ 135 if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed) \ 136 { \ 137 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \ 138 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \ 139 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \ 140 \ 141 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \ 142 m_sampleRegister[regSampleNdx].isAlive = false; \ 143 } \ 144 } 145 146 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1); 147 148 switch (stencilState.sFail) 149 { 150 case STENCILOP_KEEP: SAMPLE_REGISTER_SFAIL(stencilBufferValue) break; 151 case STENCILOP_ZERO: SAMPLE_REGISTER_SFAIL(0) break; 152 case STENCILOP_REPLACE: SAMPLE_REGISTER_SFAIL(clampedStencilRef) break; 153 case STENCILOP_INCR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break; 154 case STENCILOP_DECR: SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break; 155 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break; 156 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break; 157 case STENCILOP_INVERT: SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1)) break; 158 default: 159 DE_ASSERT(false); 160 } 161 162 #undef SAMPLE_REGISTER_SFAIL 163 } 164 165 166 void FragmentProcessor::executeDepthBoundsTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const float minDepthBound, const float maxDepthBound, const tcu::ConstPixelBufferAccess& depthBuffer) 167 { 168 if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV) 169 { 170 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx) 171 { 172 if (m_sampleRegister[regSampleNdx].isAlive) 173 { 174 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 175 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; 176 const float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 177 178 if (!de::inRange(depthBufferValue, minDepthBound, maxDepthBound)) 179 m_sampleRegister[regSampleNdx].isAlive = false; 180 } 181 } 182 } 183 else 184 { 185 /* Convert float bounds to target buffer format for comparison */ 186 187 deUint32 minDepthBoundUint, maxDepthBoundUint; 188 { 189 deUint32 buffer[2]; 190 DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize()); 191 192 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer); 193 access.setPixDepth(minDepthBound, 0, 0, 0); 194 minDepthBoundUint = access.getPixelUint(0, 0, 0).x(); 195 } 196 { 197 deUint32 buffer[2]; 198 199 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer); 200 access.setPixDepth(maxDepthBound, 0, 0, 0); 201 maxDepthBoundUint = access.getPixelUint(0, 0, 0).x(); 202 } 203 204 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx) 205 { 206 if (m_sampleRegister[regSampleNdx].isAlive) 207 { 208 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 209 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment]; 210 const deUint32 depthBufferValue = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x(); 211 212 if (!de::inRange(depthBufferValue, minDepthBoundUint, maxDepthBoundUint)) 213 m_sampleRegister[regSampleNdx].isAlive = false; 214 } 215 } 216 } 217 } 218 219 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer) 220 { 221 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION) \ 222 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 223 { \ 224 if (m_sampleRegister[regSampleNdx].isAlive) \ 225 { \ 226 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \ 227 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \ 228 float depthBufferValue = depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \ 229 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \ 230 float sampleDepth = de::clamp(sampleDepthFloat, 0.0f, 1.0f); \ 231 \ 232 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \ 233 \ 234 DE_UNREF(depthBufferValue); \ 235 DE_UNREF(sampleDepth); \ 236 } \ 237 } 238 239 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION) \ 240 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 241 { \ 242 if (m_sampleRegister[regSampleNdx].isAlive) \ 243 { \ 244 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \ 245 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \ 246 deUint32 depthBufferValue = depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x(); \ 247 float sampleDepthFloat = frag.sampleDepths[fragSampleNdx]; \ 248 \ 249 /* Convert input float to target buffer format for comparison */ \ 250 \ 251 deUint32 buffer[2]; \ 252 \ 253 DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize()); \ 254 \ 255 tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer); \ 256 access.setPixDepth(sampleDepthFloat, 0, 0, 0); \ 257 deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x(); \ 258 \ 259 m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION); \ 260 \ 261 DE_UNREF(depthBufferValue); \ 262 DE_UNREF(sampleDepth); \ 263 } \ 264 } 265 266 if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV) 267 { 268 269 switch (depthFunc) 270 { 271 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_F(false) break; 272 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_F(true) break; 273 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth < depthBufferValue) break; 274 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue) break; 275 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth > depthBufferValue) break; 276 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue) break; 277 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue) break; 278 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue) break; 279 default: 280 DE_ASSERT(false); 281 } 282 283 } 284 else 285 { 286 switch (depthFunc) 287 { 288 case TESTFUNC_NEVER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(false) break; 289 case TESTFUNC_ALWAYS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(true) break; 290 case TESTFUNC_LESS: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth < depthBufferValue) break; 291 case TESTFUNC_LEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue) break; 292 case TESTFUNC_GREATER: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth > depthBufferValue) break; 293 case TESTFUNC_GEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue) break; 294 case TESTFUNC_EQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue) break; 295 case TESTFUNC_NOTEQUAL: SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue) break; 296 default: 297 DE_ASSERT(false); 298 } 299 } 300 301 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F 302 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI 303 } 304 305 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer) 306 { 307 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 308 { 309 if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed) 310 { 311 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 312 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; 313 const float clampedDepth = de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f); 314 315 depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 316 } 317 } 318 } 319 320 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer) 321 { 322 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION) \ 323 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 324 { \ 325 if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION)) \ 326 { \ 327 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; \ 328 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; \ 329 int stencilBufferValue = stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \ 330 \ 331 stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); \ 332 } \ 333 } 334 335 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION) \ 336 switch (stencilState.OP_NAME) \ 337 { \ 338 case STENCILOP_KEEP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue) break; \ 339 case STENCILOP_ZERO: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0) break; \ 340 case STENCILOP_REPLACE: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef) break; \ 341 case STENCILOP_INCR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1)) break; \ 342 case STENCILOP_DECR: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1)) break; \ 343 case STENCILOP_INCR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1)) break; \ 344 case STENCILOP_DECR_WRAP: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1)) break; \ 345 case STENCILOP_INVERT: SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1)) break; \ 346 default: \ 347 DE_ASSERT(false); \ 348 } 349 350 int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1); 351 352 SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed) 353 SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed) 354 355 #undef SWITCH_DPFAIL_OR_DPPASS 356 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS 357 } 358 359 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState) 360 { 361 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \ 362 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 363 { \ 364 if (m_sampleRegister[regSampleNdx].isAlive) \ 365 { \ 366 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \ 367 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \ 368 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \ 369 DE_UNREF(src); \ 370 DE_UNREF(src1); \ 371 DE_UNREF(dst); \ 372 \ 373 m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION); \ 374 } \ 375 } 376 377 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME) \ 378 switch (blendRGBState.FUNC_NAME) \ 379 { \ 380 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f)) break; \ 381 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f)) break; \ 382 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2)) break; \ 383 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2)) break; \ 384 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2)) break; \ 385 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2)) break; \ 386 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w())) break; \ 387 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w())) break; \ 388 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w())) break; \ 389 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w())) break; \ 390 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2)) break; \ 391 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2)) break; \ 392 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w())) break; \ 393 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w())) break; \ 394 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w()))) break; \ 395 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2)) break; \ 396 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2)) break; \ 397 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w())) break; \ 398 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w())) break; \ 399 default: \ 400 DE_ASSERT(false); \ 401 } 402 403 SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB) 404 SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB) 405 406 #undef SWITCH_SRC_OR_DST_FACTOR_RGB 407 #undef SAMPLE_REGISTER_BLEND_FACTOR 408 } 409 410 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState) 411 { 412 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION) \ 413 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 414 { \ 415 if (m_sampleRegister[regSampleNdx].isAlive) \ 416 { \ 417 const Vec4& src = m_sampleRegister[regSampleNdx].clampedBlendSrcColor; \ 418 const Vec4& src1 = m_sampleRegister[regSampleNdx].clampedBlendSrc1Color; \ 419 const Vec4& dst = m_sampleRegister[regSampleNdx].clampedBlendDstColor; \ 420 DE_UNREF(src); \ 421 DE_UNREF(src1); \ 422 DE_UNREF(dst); \ 423 \ 424 m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION); \ 425 } \ 426 } 427 428 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME) \ 429 switch (blendAState.FUNC_NAME) \ 430 { \ 431 case BLENDFUNC_ZERO: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f) break; \ 432 case BLENDFUNC_ONE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \ 433 case BLENDFUNC_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \ 434 case BLENDFUNC_ONE_MINUS_SRC_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \ 435 case BLENDFUNC_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \ 436 case BLENDFUNC_ONE_MINUS_DST_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \ 437 case BLENDFUNC_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w()) break; \ 438 case BLENDFUNC_ONE_MINUS_SRC_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w()) break; \ 439 case BLENDFUNC_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w()) break; \ 440 case BLENDFUNC_ONE_MINUS_DST_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w()) break; \ 441 case BLENDFUNC_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \ 442 case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \ 443 case BLENDFUNC_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w()) break; \ 444 case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w()) break; \ 445 case BLENDFUNC_SRC_ALPHA_SATURATE: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f) break; \ 446 case BLENDFUNC_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \ 447 case BLENDFUNC_ONE_MINUS_SRC1_COLOR: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \ 448 case BLENDFUNC_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w()) break; \ 449 case BLENDFUNC_ONE_MINUS_SRC1_ALPHA: SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w()) break; \ 450 default: \ 451 DE_ASSERT(false); \ 452 } 453 454 SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA) 455 SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA) 456 457 #undef SWITCH_SRC_OR_DST_FACTOR_A 458 #undef SAMPLE_REGISTER_BLEND_FACTOR 459 } 460 461 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState) 462 { 463 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION) \ 464 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 465 { \ 466 if (m_sampleRegister[regSampleNdx].isAlive) \ 467 { \ 468 SampleData& sample = m_sampleRegister[regSampleNdx]; \ 469 const Vec4& srcColor = sample.clampedBlendSrcColor; \ 470 const Vec4& dstColor = sample.clampedBlendDstColor; \ 471 \ 472 sample.COLOR_NAME = (COLOR_EXPRESSION); \ 473 } \ 474 } 475 476 switch (blendRGBState.equation) 477 { 478 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break; 479 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB) break; 480 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB) break; 481 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break; 482 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2))) break; 483 default: 484 DE_ASSERT(false); 485 } 486 487 switch (blendAState.equation) 488 { 489 case BLENDEQUATION_ADD: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA) break; 490 case BLENDEQUATION_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA) break; 491 case BLENDEQUATION_REVERSE_SUBTRACT: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA) break; 492 case BLENDEQUATION_MIN: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w())) break; 493 case BLENDEQUATION_MAX: SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w())) break; 494 default: 495 DE_ASSERT(false); 496 } 497 #undef SAMPLE_REGISTER_BLENDED_COLOR 498 } 499 500 namespace advblend 501 { 502 503 inline float multiply (float src, float dst) { return src*dst; } 504 inline float screen (float src, float dst) { return src + dst - src*dst; } 505 inline float darken (float src, float dst) { return de::min(src, dst); } 506 inline float lighten (float src, float dst) { return de::max(src, dst); } 507 inline float difference (float src, float dst) { return de::abs(dst-src); } 508 inline float exclusion (float src, float dst) { return src + dst - 2.0f*src*dst; } 509 510 inline float overlay (float src, float dst) 511 { 512 if (dst <= 0.5f) 513 return 2.0f*src*dst; 514 else 515 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst); 516 } 517 518 inline float colordodge (float src, float dst) 519 { 520 if (dst <= 0.0f) 521 return 0.0f; 522 else if (src < 1.0f) 523 return de::min(1.0f, dst/(1.0f-src)); 524 else 525 return 1.0f; 526 } 527 528 inline float colorburn (float src, float dst) 529 { 530 if (dst >= 1.0f) 531 return 1.0f; 532 else if (src > 0.0f) 533 return 1.0f - de::min(1.0f, (1.0f-dst)/src); 534 else 535 return 0.0f; 536 } 537 538 inline float hardlight (float src, float dst) 539 { 540 if (src <= 0.5f) 541 return 2.0f*src*dst; 542 else 543 return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst); 544 } 545 546 inline float softlight (float src, float dst) 547 { 548 if (src <= 0.5f) 549 return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst); 550 else if (dst <= 0.25f) 551 return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f); 552 else 553 return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst); 554 } 555 556 inline float minComp (const Vec3& v) 557 { 558 return de::min(de::min(v.x(), v.y()), v.z()); 559 } 560 561 inline float maxComp (const Vec3& v) 562 { 563 return de::max(de::max(v.x(), v.y()), v.z()); 564 } 565 566 inline float luminosity (const Vec3& rgb) 567 { 568 return dot(rgb, Vec3(0.3f, 0.59f, 0.11f)); 569 } 570 571 inline float saturation (const Vec3& rgb) 572 { 573 return maxComp(rgb) - minComp(rgb); 574 } 575 576 Vec3 setLum (const Vec3& cbase, const Vec3& clum) 577 { 578 const float lbase = luminosity(cbase); 579 const float llum = luminosity(clum); 580 const float ldiff = llum - lbase; 581 const Vec3 color = cbase + Vec3(ldiff); 582 const float minC = minComp(color); 583 const float maxC = maxComp(color); 584 585 if (minC < 0.0f) 586 return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f)); 587 else if (maxC > 1.0f) 588 return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f)); 589 else 590 return color; 591 } 592 593 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum) 594 { 595 const float minbase = minComp(cbase); 596 const float sbase = saturation(cbase); 597 const float ssat = saturation(csat); 598 Vec3 color = Vec3(0.0f); 599 600 if (sbase > 0.0f) 601 color = (cbase - minbase) * ssat / sbase; 602 603 return setLum(color, clum); 604 } 605 606 } // advblend 607 608 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation) 609 { 610 using namespace advblend; 611 612 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME) \ 613 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 614 { \ 615 if (m_sampleRegister[regSampleNdx].isAlive) \ 616 { \ 617 SampleData& sample = m_sampleRegister[regSampleNdx]; \ 618 const Vec4& srcColor = sample.clampedBlendSrcColor; \ 619 const Vec4& dstColor = sample.clampedBlendDstColor; \ 620 const Vec3& bias = sample.blendSrcFactorRGB; \ 621 const float p0 = sample.blendSrcFactorA; \ 622 const float r = FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0]; \ 623 const float g = FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1]; \ 624 const float b = FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2]; \ 625 \ 626 sample.blendedRGB = Vec3(r, g, b); \ 627 } \ 628 } 629 630 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION) \ 631 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) \ 632 { \ 633 if (m_sampleRegister[regSampleNdx].isAlive) \ 634 { \ 635 SampleData& sample = m_sampleRegister[regSampleNdx]; \ 636 const Vec3 srcColor = sample.clampedBlendSrcColor.swizzle(0,1,2); \ 637 const Vec3 dstColor = sample.clampedBlendDstColor.swizzle(0,1,2); \ 638 const Vec3& bias = sample.blendSrcFactorRGB; \ 639 const float p0 = sample.blendSrcFactorA; \ 640 \ 641 sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias; \ 642 } \ 643 } 644 645 // Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names. 646 // \note clampedBlend*Color contains clamped & unpremultiplied colors 647 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 648 { 649 if (m_sampleRegister[regSampleNdx].isAlive) 650 { 651 SampleData& sample = m_sampleRegister[regSampleNdx]; 652 const Vec4& srcColor = sample.clampedBlendSrcColor; 653 const Vec4& dstColor = sample.clampedBlendDstColor; 654 const float srcA = srcColor.w(); 655 const float dstA = dstColor.w(); 656 const float p0 = srcA*dstA; 657 const float p1 = srcA*(1.0f-dstA); 658 const float p2 = dstA*(1.0f-srcA); 659 const Vec3 bias (srcColor[0]*p1 + dstColor[0]*p2, 660 srcColor[1]*p1 + dstColor[1]*p2, 661 srcColor[2]*p1 + dstColor[2]*p2); 662 663 sample.blendSrcFactorRGB = bias; 664 sample.blendSrcFactorA = p0; 665 sample.blendedA = p0 + p1 + p2; 666 } 667 } 668 669 switch (equation) 670 { 671 case BLENDEQUATION_ADVANCED_MULTIPLY: SAMPLE_REGISTER_ADV_BLEND(multiply); break; 672 case BLENDEQUATION_ADVANCED_SCREEN: SAMPLE_REGISTER_ADV_BLEND(screen); break; 673 case BLENDEQUATION_ADVANCED_OVERLAY: SAMPLE_REGISTER_ADV_BLEND(overlay); break; 674 case BLENDEQUATION_ADVANCED_DARKEN: SAMPLE_REGISTER_ADV_BLEND(darken); break; 675 case BLENDEQUATION_ADVANCED_LIGHTEN: SAMPLE_REGISTER_ADV_BLEND(lighten); break; 676 case BLENDEQUATION_ADVANCED_COLORDODGE: SAMPLE_REGISTER_ADV_BLEND(colordodge); break; 677 case BLENDEQUATION_ADVANCED_COLORBURN: SAMPLE_REGISTER_ADV_BLEND(colorburn); break; 678 case BLENDEQUATION_ADVANCED_HARDLIGHT: SAMPLE_REGISTER_ADV_BLEND(hardlight); break; 679 case BLENDEQUATION_ADVANCED_SOFTLIGHT: SAMPLE_REGISTER_ADV_BLEND(softlight); break; 680 case BLENDEQUATION_ADVANCED_DIFFERENCE: SAMPLE_REGISTER_ADV_BLEND(difference); break; 681 case BLENDEQUATION_ADVANCED_EXCLUSION: SAMPLE_REGISTER_ADV_BLEND(exclusion); break; 682 case BLENDEQUATION_ADVANCED_HSL_HUE: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor)); break; 683 case BLENDEQUATION_ADVANCED_HSL_SATURATION: SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor)); break; 684 case BLENDEQUATION_ADVANCED_HSL_COLOR: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor)); break; 685 case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY: SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor)); break; 686 default: 687 DE_ASSERT(false); 688 } 689 690 #undef SAMPLE_REGISTER_ADV_BLEND 691 #undef SAMPLE_REGISTER_ADV_BLEND_HSL 692 } 693 694 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer) 695 { 696 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 697 { 698 if (m_sampleRegister[regSampleNdx].isAlive) 699 { 700 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 701 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; 702 Vec4 combinedColor; 703 704 combinedColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB; 705 combinedColor.w() = m_sampleRegister[regSampleNdx].blendedA; 706 707 if (isSRGB) 708 combinedColor = tcu::linearToSRGB(combinedColor); 709 710 colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 711 } 712 } 713 } 714 715 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer) 716 { 717 const int fragStride = 4; 718 const int xStride = colorBuffer.getRowPitch(); 719 const int yStride = colorBuffer.getSlicePitch(); 720 deUint8* const basePtr = (deUint8*)colorBuffer.getDataPtr(); 721 722 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 723 { 724 if (m_sampleRegister[regSampleNdx].isAlive) 725 { 726 const int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 727 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; 728 deUint8* dstPtr = basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride; 729 730 dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x()); 731 dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y()); 732 dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z()); 733 dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA); 734 } 735 } 736 } 737 738 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer) 739 { 740 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 741 { 742 if (m_sampleRegister[regSampleNdx].isAlive) 743 { 744 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 745 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; 746 Vec4 originalColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 747 Vec4 newColor; 748 749 newColor.xyz() = m_sampleRegister[regSampleNdx].blendedRGB; 750 newColor.w() = m_sampleRegister[regSampleNdx].blendedA; 751 752 if (isSRGB) 753 newColor = tcu::linearToSRGB(newColor); 754 755 newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor; 756 757 colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 758 } 759 } 760 } 761 762 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer) 763 { 764 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 765 { 766 if (m_sampleRegister[regSampleNdx].isAlive) 767 { 768 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 769 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; 770 const IVec4 originalValue = colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 771 772 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 773 } 774 } 775 } 776 777 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer) 778 { 779 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 780 { 781 if (m_sampleRegister[regSampleNdx].isAlive) 782 { 783 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 784 const Fragment& frag = inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment]; 785 const UVec4 originalValue = colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 786 787 colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 788 } 789 } 790 } 791 792 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess& msColorBuffer, 793 const rr::MultisamplePixelBufferAccess& msDepthBuffer, 794 const rr::MultisamplePixelBufferAccess& msStencilBuffer, 795 const Fragment* inputFragments, 796 int numFragments, 797 FaceType fragmentFacing, 798 const FragmentOperationState& state) 799 { 800 DE_ASSERT(fragmentFacing < FACETYPE_LAST); 801 DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior 802 803 const tcu::PixelBufferAccess& colorBuffer = msColorBuffer.raw(); 804 const tcu::PixelBufferAccess& depthBuffer = msDepthBuffer.raw(); 805 const tcu::PixelBufferAccess& stencilBuffer = msStencilBuffer.raw(); 806 807 bool hasDepth = depthBuffer.getWidth() > 0 && depthBuffer.getHeight() > 0 && depthBuffer.getDepth() > 0; 808 bool hasStencil = stencilBuffer.getWidth() > 0 && stencilBuffer.getHeight() > 0 && stencilBuffer.getDepth() > 0; 809 bool doDepthBoundsTest = hasDepth && state.depthBoundsTestEnabled; 810 bool doDepthTest = hasDepth && state.depthTestEnabled; 811 bool doStencilTest = hasStencil && state.stencilTestEnabled; 812 813 tcu::TextureChannelClass colorbufferClass = tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type); 814 rr::GenericVecType fragmentDataType = (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT)); 815 816 DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth()) && (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth())); 817 DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight()) && (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight())); 818 DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth()) && (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth())); 819 820 // Combined formats must be separated beforehand 821 DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D)); 822 DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S)); 823 824 int numSamplesPerFragment = colorBuffer.getWidth(); 825 int totalNumSamples = numFragments*numSamplesPerFragment; 826 int numSampleGroups = (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up. 827 const StencilState& stencilState = state.stencilStates[fragmentFacing]; 828 Vec4 colorMaskFactor (state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f); 829 Vec4 colorMaskNegationFactor (state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f); 830 bool sRGBTarget = state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat()); 831 832 DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0); 833 834 // Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform 835 // the per-sample operations for one group at a time. 836 837 for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++) 838 { 839 // The index of the fragment of the sample at the beginning of m_sampleRegisters. 840 int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment; 841 842 // Initialize sample data in the sample register. 843 844 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 845 { 846 int fragNdx = groupFirstFragNdx + regSampleNdx/numSamplesPerFragment; 847 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 848 849 if (fragNdx < numFragments) 850 { 851 m_sampleRegister[regSampleNdx].isAlive = (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0; 852 m_sampleRegister[regSampleNdx].depthPassed = true; // \note This will stay true if depth test is disabled. 853 } 854 else 855 m_sampleRegister[regSampleNdx].isAlive = false; 856 } 857 858 // Scissor test. 859 860 if (state.scissorTestEnabled) 861 executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle); 862 863 // Depth bounds test. 864 865 if (doDepthBoundsTest) 866 executeDepthBoundsTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.minDepthBound, state.maxDepthBound, depthBuffer); 867 868 // Stencil test. 869 870 if (doStencilTest) 871 { 872 executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer); 873 executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer); 874 } 875 876 // Depth test. 877 // \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test. 878 879 if (doDepthTest) 880 { 881 executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer); 882 883 if (state.depthMask) 884 executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer); 885 } 886 887 // Do dpFail and dpPass stencil writes. 888 889 if (doStencilTest) 890 executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer); 891 892 // Kill the samples that failed depth test. 893 894 if (doDepthTest) 895 { 896 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 897 m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed; 898 } 899 900 // Paint fragments to target 901 902 switch (fragmentDataType) 903 { 904 case rr::GENERICVECTYPE_FLOAT: 905 { 906 // Select min/max clamping values for blending factors and operands 907 Vec4 minClampValue; 908 Vec4 maxClampValue; 909 910 if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT) 911 { 912 minClampValue = Vec4(0.0f); 913 maxClampValue = Vec4(1.0f); 914 } 915 else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT) 916 { 917 minClampValue = Vec4(-1.0f); 918 maxClampValue = Vec4(1.0f); 919 } 920 else 921 { 922 // No clamping 923 minClampValue = Vec4(-std::numeric_limits<float>::infinity()); 924 maxClampValue = Vec4(std::numeric_limits<float>::infinity()); 925 } 926 927 // Blend calculation - only if using blend. 928 if (state.blendMode == BLENDMODE_STANDARD) 929 { 930 // Put dst color to register, doing srgb-to-linear conversion if needed. 931 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 932 { 933 if (m_sampleRegister[regSampleNdx].isAlive) 934 { 935 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 936 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment]; 937 Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 938 939 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = clamp(frag.value.get<float>(), minClampValue, maxClampValue); 940 m_sampleRegister[regSampleNdx].clampedBlendSrc1Color = clamp(frag.value1.get<float>(), minClampValue, maxClampValue); 941 m_sampleRegister[regSampleNdx].clampedBlendDstColor = clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue); 942 } 943 } 944 945 // Calculate blend factors to register. 946 executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState); 947 executeBlendFactorComputeA(state.blendColor, state.blendAState); 948 949 // Compute blended color. 950 executeBlend(state.blendRGBState, state.blendAState); 951 } 952 else if (state.blendMode == BLENDMODE_ADVANCED) 953 { 954 // Unpremultiply colors for blending, and do sRGB->linear if necessary 955 // \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names 956 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 957 { 958 if (m_sampleRegister[regSampleNdx].isAlive) 959 { 960 int fragSampleNdx = regSampleNdx % numSamplesPerFragment; 961 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment]; 962 const Vec4 srcColor = frag.value.get<float>(); 963 const Vec4 dstColor = colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()); 964 965 m_sampleRegister[regSampleNdx].clampedBlendSrcColor = unpremultiply(clamp(srcColor, minClampValue, maxClampValue)); 966 m_sampleRegister[regSampleNdx].clampedBlendDstColor = unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue)); 967 } 968 } 969 970 executeAdvancedBlend(state.blendEquationAdvaced); 971 } 972 else 973 { 974 // Not using blend - just put values to register as-is. 975 DE_ASSERT(state.blendMode == BLENDMODE_NONE); 976 977 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 978 { 979 if (m_sampleRegister[regSampleNdx].isAlive) 980 { 981 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment]; 982 983 m_sampleRegister[regSampleNdx].blendedRGB = frag.value.get<float>().xyz(); 984 m_sampleRegister[regSampleNdx].blendedA = frag.value.get<float>().w(); 985 } 986 } 987 } 988 989 // Clamp result values in sample register 990 if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT) 991 { 992 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 993 { 994 if (m_sampleRegister[regSampleNdx].isAlive) 995 { 996 m_sampleRegister[regSampleNdx].blendedRGB = clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2), maxClampValue.swizzle(0, 1, 2)); 997 m_sampleRegister[regSampleNdx].blendedA = clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w()); 998 } 999 } 1000 } 1001 1002 // Finally, write the colors to the color buffer. 1003 1004 if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3]) 1005 { 1006 if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8)) 1007 executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer); 1008 else 1009 executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer); 1010 } 1011 else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3]) 1012 executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer); 1013 break; 1014 } 1015 case rr::GENERICVECTYPE_INT32: 1016 // Write fragments 1017 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 1018 { 1019 if (m_sampleRegister[regSampleNdx].isAlive) 1020 { 1021 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment]; 1022 1023 m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>(); 1024 } 1025 } 1026 1027 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3]) 1028 executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer); 1029 break; 1030 1031 case rr::GENERICVECTYPE_UINT32: 1032 // Write fragments 1033 for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++) 1034 { 1035 if (m_sampleRegister[regSampleNdx].isAlive) 1036 { 1037 const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment]; 1038 1039 m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>(); 1040 } 1041 } 1042 1043 if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3]) 1044 executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer); 1045 break; 1046 1047 default: 1048 DE_ASSERT(DE_FALSE); 1049 } 1050 } 1051 } 1052 1053 } // rr 1054