Home | History | Annotate | Download | only in referencerenderer
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program Reference Renderer
      3  * -----------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Reference implementation for per-fragment operations.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "rrFragmentOperations.hpp"
     25 #include "tcuVectorUtil.hpp"
     26 #include "tcuTextureUtil.hpp"
     27 #include <limits>
     28 
     29 using tcu::IVec2;
     30 using tcu::Vec3;
     31 using tcu::Vec4;
     32 using tcu::IVec4;
     33 using tcu::UVec4;
     34 using tcu::min;
     35 using tcu::max;
     36 using tcu::clamp;
     37 using de::min;
     38 using de::max;
     39 using de::clamp;
     40 
     41 namespace rr
     42 {
     43 
     44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
     45 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
     46 {
     47 	return (oldValue & ~mask) | (newValue & mask);
     48 }
     49 
     50 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
     51 {
     52 	return de::inBounds(point.x(), rect.left,		rect.left + rect.width) &&
     53 		   de::inBounds(point.y(), rect.bottom,		rect.bottom + rect.height);
     54 }
     55 
     56 static inline Vec4 unpremultiply (const Vec4& v)
     57 {
     58 	if (v.w() > 0.0f)
     59 		return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
     60 	else
     61 	{
     62 		DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
     63 		return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
     64 	}
     65 }
     66 
     67 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const Vec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
     68 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const IVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
     69 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const UVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>());	}
     70 void clearMultisampleDepthBuffer	(const tcu::PixelBufferAccess& dst, float v,		const WindowRectangle& r)	{ tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
     71 void clearMultisampleStencilBuffer	(const tcu::PixelBufferAccess& dst, int v,			const WindowRectangle& r)	{ tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
     72 
     73 FragmentProcessor::FragmentProcessor (void)
     74 	: m_sampleRegister()
     75 {
     76 }
     77 
     78 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
     79 {
     80 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
     81 	{
     82 		if (m_sampleRegister[regSampleNdx].isAlive)
     83 		{
     84 			int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
     85 
     86 			if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
     87 				m_sampleRegister[regSampleNdx].isAlive = false;
     88 		}
     89 	}
     90 }
     91 
     92 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
     93 {
     94 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION)																					\
     95 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)															\
     96 	{																																		\
     97 		if (m_sampleRegister[regSampleNdx].isAlive)																							\
     98 		{																																	\
     99 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;													\
    100 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];					\
    101 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
    102 			int					maskedRef			= stencilState.compMask & clampedStencilRef;											\
    103 			int					maskedBuf			= stencilState.compMask & stencilBufferValue;											\
    104 			DE_UNREF(maskedRef);																											\
    105 			DE_UNREF(maskedBuf);																											\
    106 																																			\
    107 			m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION);															\
    108 		}																																	\
    109 	}
    110 
    111 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
    112 
    113 	switch (stencilState.func)
    114 	{
    115 		case TESTFUNC_NEVER:	SAMPLE_REGISTER_STENCIL_COMPARE(false)						break;
    116 		case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_STENCIL_COMPARE(true)						break;
    117 		case TESTFUNC_LESS:		SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <  maskedBuf)		break;
    118 		case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf)		break;
    119 		case TESTFUNC_GREATER:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >  maskedBuf)		break;
    120 		case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf)		break;
    121 		case TESTFUNC_EQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf)		break;
    122 		case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf)		break;
    123 		default:
    124 			DE_ASSERT(false);
    125 	}
    126 
    127 #undef SAMPLE_REGISTER_STENCIL_COMPARE
    128 }
    129 
    130 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
    131 {
    132 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION)																																		\
    133 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																									\
    134 	{																																												\
    135 		if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed)																				\
    136 		{																																											\
    137 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																							\
    138 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];															\
    139 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());											\
    140 																																													\
    141 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
    142 			m_sampleRegister[regSampleNdx].isAlive = false;																															\
    143 		}																																											\
    144 	}
    145 
    146 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
    147 
    148 	switch (stencilState.sFail)
    149 	{
    150 		case STENCILOP_KEEP:		SAMPLE_REGISTER_SFAIL(stencilBufferValue)												break;
    151 		case STENCILOP_ZERO:		SAMPLE_REGISTER_SFAIL(0)																break;
    152 		case STENCILOP_REPLACE:		SAMPLE_REGISTER_SFAIL(clampedStencilRef)												break;
    153 		case STENCILOP_INCR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))		break;
    154 		case STENCILOP_DECR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))		break;
    155 		case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1))				break;
    156 		case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1))				break;
    157 		case STENCILOP_INVERT:		SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;
    158 		default:
    159 			DE_ASSERT(false);
    160 	}
    161 
    162 #undef SAMPLE_REGISTER_SFAIL
    163 }
    164 
    165 
    166 void FragmentProcessor::executeDepthBoundsTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const float minDepthBound, const float maxDepthBound, const tcu::ConstPixelBufferAccess& depthBuffer)
    167 {
    168 	if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
    169 	{
    170 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
    171 		{
    172 			if (m_sampleRegister[regSampleNdx].isAlive)
    173 			{
    174 				const int			fragSampleNdx		= regSampleNdx % numSamplesPerFragment;
    175 				const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    176 				const float			depthBufferValue	= depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    177 
    178 				if (!de::inRange(depthBufferValue, minDepthBound, maxDepthBound))
    179 					m_sampleRegister[regSampleNdx].isAlive = false;
    180 			}
    181 		}
    182 	}
    183 	else
    184 	{
    185 		/* Convert float bounds to target buffer format for comparison */
    186 
    187 		deUint32 minDepthBoundUint, maxDepthBoundUint;
    188 		{
    189 			deUint32 buffer[2];
    190 			DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());
    191 
    192 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
    193 			access.setPixDepth(minDepthBound, 0, 0, 0);
    194 			minDepthBoundUint = access.getPixelUint(0, 0, 0).x();
    195 		}
    196 		{
    197 			deUint32 buffer[2];
    198 
    199 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);
    200 			access.setPixDepth(maxDepthBound, 0, 0, 0);
    201 			maxDepthBoundUint = access.getPixelUint(0, 0, 0).x();
    202 		}
    203 
    204 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; ++regSampleNdx)
    205 		{
    206 			if (m_sampleRegister[regSampleNdx].isAlive)
    207 			{
    208 				const int			fragSampleNdx		= regSampleNdx % numSamplesPerFragment;
    209 				const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx / numSamplesPerFragment];
    210 				const deUint32		depthBufferValue	= depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();
    211 
    212 				if (!de::inRange(depthBufferValue, minDepthBoundUint, maxDepthBoundUint))
    213 					m_sampleRegister[regSampleNdx].isAlive = false;
    214 			}
    215 		}
    216 	}
    217 }
    218 
    219 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
    220 {
    221 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION)																						\
    222 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
    223 	{																																			\
    224 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
    225 		{																																		\
    226 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
    227 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
    228 			float				depthBufferValue	= depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());			\
    229 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
    230 			float				sampleDepth			= de::clamp(sampleDepthFloat, 0.0f, 1.0f);													\
    231 																																				\
    232 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
    233 																																				\
    234 			DE_UNREF(depthBufferValue);																											\
    235 			DE_UNREF(sampleDepth);																												\
    236 		}																																		\
    237 	}
    238 
    239 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION)																					\
    240 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
    241 	{																																			\
    242 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
    243 		{																																		\
    244 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
    245 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
    246 			deUint32			depthBufferValue	= depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();	\
    247 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
    248 																																				\
    249 			/* Convert input float to target buffer format for comparison */																	\
    250 																																				\
    251 			deUint32 buffer[2];																													\
    252 																																				\
    253 			DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());														\
    254 																																				\
    255 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);															\
    256 			access.setPixDepth(sampleDepthFloat, 0, 0, 0);																						\
    257 			deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x();																			\
    258 																																				\
    259 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
    260 																																				\
    261 			DE_UNREF(depthBufferValue);																											\
    262 			DE_UNREF(sampleDepth);																												\
    263 		}																																		\
    264 	}
    265 
    266 	if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
    267 	{
    268 
    269 		switch (depthFunc)
    270 		{
    271 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(false)							break;
    272 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_F(true)								break;
    273 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <  depthBufferValue)	break;
    274 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue)	break;
    275 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >  depthBufferValue)	break;
    276 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue)	break;
    277 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue)	break;
    278 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue)	break;
    279 			default:
    280 				DE_ASSERT(false);
    281 		}
    282 
    283 	}
    284 	else
    285 	{
    286 		switch (depthFunc)
    287 		{
    288 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(false)							break;
    289 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(true)								break;
    290 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <  depthBufferValue)	break;
    291 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue)	break;
    292 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >  depthBufferValue)	break;
    293 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue)	break;
    294 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue)	break;
    295 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue)	break;
    296 			default:
    297 				DE_ASSERT(false);
    298 		}
    299 	}
    300 
    301 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
    302 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
    303 }
    304 
    305 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
    306 {
    307 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    308 	{
    309 		if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
    310 		{
    311 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    312 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    313 			const float			clampedDepth	= de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
    314 
    315 			depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    316 		}
    317 	}
    318 }
    319 
    320 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
    321 {
    322 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION)																													\
    323 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																								\
    324 	{																																											\
    325 		if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION))																												\
    326 		{																																										\
    327 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																						\
    328 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];														\
    329 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());										\
    330 																																												\
    331 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
    332 		}																																										\
    333 	}
    334 
    335 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION)																											\
    336 		switch (stencilState.OP_NAME)																														\
    337 		{																																					\
    338 			case STENCILOP_KEEP:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue)												break;	\
    339 			case STENCILOP_ZERO:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0)																break;	\
    340 			case STENCILOP_REPLACE:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef)												break;	\
    341 			case STENCILOP_INCR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))	break;	\
    342 			case STENCILOP_DECR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))	break;	\
    343 			case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1))			break;	\
    344 			case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1))			break;	\
    345 			case STENCILOP_INVERT:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;	\
    346 			default:																																		\
    347 				DE_ASSERT(false);																															\
    348 		}
    349 
    350 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
    351 
    352 	SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
    353 	SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
    354 
    355 #undef SWITCH_DPFAIL_OR_DPPASS
    356 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
    357 }
    358 
    359 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
    360 {
    361 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)																				\
    362 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																	\
    363 	{																																				\
    364 		if (m_sampleRegister[regSampleNdx].isAlive)																									\
    365 		{																																			\
    366 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;																\
    367 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;																\
    368 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;																\
    369 			DE_UNREF(src);																															\
    370 			DE_UNREF(src1);																															\
    371 			DE_UNREF(dst);																															\
    372 																																					\
    373 			m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);																		\
    374 		}																																			\
    375 	}
    376 
    377 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME)																					\
    378 	switch (blendRGBState.FUNC_NAME)																											\
    379 	{																																			\
    380 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f))								break;	\
    381 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f))								break;	\
    382 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2))						break;	\
    383 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2))			break;	\
    384 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2))						break;	\
    385 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2))			break;	\
    386 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w()))							break;	\
    387 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w()))						break;	\
    388 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w()))							break;	\
    389 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w()))						break;	\
    390 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2))				break;	\
    391 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2))	break;	\
    392 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w()))						break;	\
    393 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w()))				break;	\
    394 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w())))	break;	\
    395 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2))						break;	\
    396 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2))			break;	\
    397 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w()))							break;	\
    398 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w()))					break;	\
    399 		default:																																\
    400 			DE_ASSERT(false);																													\
    401 	}
    402 
    403 	SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
    404 	SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
    405 
    406 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
    407 #undef SAMPLE_REGISTER_BLEND_FACTOR
    408 }
    409 
    410 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
    411 {
    412 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)														\
    413 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)											\
    414 	{																														\
    415 		if (m_sampleRegister[regSampleNdx].isAlive)																			\
    416 		{																													\
    417 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;										\
    418 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;										\
    419 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;										\
    420 			DE_UNREF(src);																									\
    421 			DE_UNREF(src1);																									\
    422 			DE_UNREF(dst);																									\
    423 																															\
    424 			m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);												\
    425 		}																													\
    426 	}
    427 
    428 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME)																		\
    429 	switch (blendAState.FUNC_NAME)																								\
    430 	{																															\
    431 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f)						break;	\
    432 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
    433 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
    434 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
    435 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
    436 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
    437 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
    438 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
    439 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
    440 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
    441 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
    442 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
    443 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
    444 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
    445 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
    446 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
    447 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
    448 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
    449 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
    450 		default:																												\
    451 			DE_ASSERT(false);																									\
    452 	}
    453 
    454 	SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
    455 	SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
    456 
    457 #undef SWITCH_SRC_OR_DST_FACTOR_A
    458 #undef SAMPLE_REGISTER_BLEND_FACTOR
    459 }
    460 
    461 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
    462 {
    463 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION)						\
    464 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)		\
    465 	{																					\
    466 		if (m_sampleRegister[regSampleNdx].isAlive)										\
    467 		{																				\
    468 			SampleData& sample		= m_sampleRegister[regSampleNdx];					\
    469 			const Vec4& srcColor	= sample.clampedBlendSrcColor;						\
    470 			const Vec4& dstColor	= sample.clampedBlendDstColor;						\
    471 																						\
    472 			sample.COLOR_NAME = (COLOR_EXPRESSION);										\
    473 		}																				\
    474 	}
    475 
    476 	switch (blendRGBState.equation)
    477 	{
    478 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
    479 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
    480 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB)	break;
    481 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
    482 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
    483 		default:
    484 			DE_ASSERT(false);
    485 	}
    486 
    487 	switch (blendAState.equation)
    488 	{
    489 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA)	break;
    490 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA)	break;
    491 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA)	break;
    492 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w()))											break;
    493 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w()))											break;
    494 		default:
    495 			DE_ASSERT(false);
    496 	}
    497 #undef SAMPLE_REGISTER_BLENDED_COLOR
    498 }
    499 
    500 namespace advblend
    501 {
    502 
    503 inline float	multiply	(float src, float dst) { return src*dst;					}
    504 inline float	screen		(float src, float dst) { return src + dst - src*dst;		}
    505 inline float	darken		(float src, float dst) { return de::min(src, dst);			}
    506 inline float	lighten		(float src, float dst) { return de::max(src, dst);			}
    507 inline float	difference	(float src, float dst) { return de::abs(dst-src);			}
    508 inline float	exclusion	(float src, float dst) { return src + dst - 2.0f*src*dst;	}
    509 
    510 inline float overlay (float src, float dst)
    511 {
    512 	if (dst <= 0.5f)
    513 		return 2.0f*src*dst;
    514 	else
    515 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
    516 }
    517 
    518 inline float colordodge (float src, float dst)
    519 {
    520 	if (dst <= 0.0f)
    521 		return 0.0f;
    522 	else if (src < 1.0f)
    523 		return de::min(1.0f, dst/(1.0f-src));
    524 	else
    525 		return 1.0f;
    526 }
    527 
    528 inline float colorburn (float src, float dst)
    529 {
    530 	if (dst >= 1.0f)
    531 		return 1.0f;
    532 	else if (src > 0.0f)
    533 		return 1.0f - de::min(1.0f, (1.0f-dst)/src);
    534 	else
    535 		return 0.0f;
    536 }
    537 
    538 inline float hardlight (float src, float dst)
    539 {
    540 	if (src <= 0.5f)
    541 		return 2.0f*src*dst;
    542 	else
    543 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
    544 }
    545 
    546 inline float softlight (float src, float dst)
    547 {
    548 	if (src <= 0.5f)
    549 		return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
    550 	else if (dst <= 0.25f)
    551 		return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
    552 	else
    553 		return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
    554 }
    555 
    556 inline float minComp (const Vec3& v)
    557 {
    558 	return de::min(de::min(v.x(), v.y()), v.z());
    559 }
    560 
    561 inline float maxComp (const Vec3& v)
    562 {
    563 	return de::max(de::max(v.x(), v.y()), v.z());
    564 }
    565 
    566 inline float luminosity (const Vec3& rgb)
    567 {
    568 	return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
    569 }
    570 
    571 inline float saturation (const Vec3& rgb)
    572 {
    573 	return maxComp(rgb) - minComp(rgb);
    574 }
    575 
    576 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
    577 {
    578 	const float		lbase	= luminosity(cbase);
    579 	const float		llum	= luminosity(clum);
    580 	const float		ldiff	= llum - lbase;
    581 	const Vec3		color	= cbase + Vec3(ldiff);
    582 	const float		minC	= minComp(color);
    583 	const float		maxC	= maxComp(color);
    584 
    585 	if (minC < 0.0f)
    586 		return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
    587 	else if (maxC > 1.0f)
    588 		return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
    589 	else
    590 		return color;
    591 }
    592 
    593 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
    594 {
    595 	const float		minbase	= minComp(cbase);
    596 	const float		sbase	= saturation(cbase);
    597 	const float		ssat	= saturation(csat);
    598 	Vec3			color	= Vec3(0.0f);
    599 
    600 	if (sbase > 0.0f)
    601 		color = (cbase - minbase) * ssat / sbase;
    602 
    603 	return setLum(color, clum);
    604 }
    605 
    606 } // advblend
    607 
    608 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
    609 {
    610 	using namespace advblend;
    611 
    612 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME)											\
    613 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
    614 	{																						\
    615 		if (m_sampleRegister[regSampleNdx].isAlive)											\
    616 		{																					\
    617 			SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
    618 			const Vec4&	srcColor	= sample.clampedBlendSrcColor;							\
    619 			const Vec4&	dstColor	= sample.clampedBlendDstColor;							\
    620 			const Vec3&	bias		= sample.blendSrcFactorRGB;								\
    621 			const float	p0			= sample.blendSrcFactorA;								\
    622 			const float	r			= FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0];	\
    623 			const float	g			= FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1];	\
    624 			const float	b			= FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2];	\
    625 																							\
    626 			sample.blendedRGB = Vec3(r, g, b);												\
    627 		}																					\
    628 	}
    629 
    630 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION)										\
    631 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
    632 	{																						\
    633 		if (m_sampleRegister[regSampleNdx].isAlive)											\
    634 		{																					\
    635 			SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
    636 			const Vec3	srcColor	= sample.clampedBlendSrcColor.swizzle(0,1,2);			\
    637 			const Vec3	dstColor	= sample.clampedBlendDstColor.swizzle(0,1,2);			\
    638 			const Vec3&	bias		= sample.blendSrcFactorRGB;								\
    639 			const float	p0			= sample.blendSrcFactorA;								\
    640 																							\
    641 			sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias;								\
    642 		}																					\
    643 	}
    644 
    645 	// Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
    646 	// \note clampedBlend*Color contains clamped & unpremultiplied colors
    647 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    648 	{
    649 		if (m_sampleRegister[regSampleNdx].isAlive)
    650 		{
    651 			SampleData&	sample		= m_sampleRegister[regSampleNdx];
    652 			const Vec4&	srcColor	= sample.clampedBlendSrcColor;
    653 			const Vec4&	dstColor	= sample.clampedBlendDstColor;
    654 			const float	srcA		= srcColor.w();
    655 			const float	dstA		= dstColor.w();
    656 			const float	p0			= srcA*dstA;
    657 			const float p1			= srcA*(1.0f-dstA);
    658 			const float p2			= dstA*(1.0f-srcA);
    659 			const Vec3	bias		(srcColor[0]*p1 + dstColor[0]*p2,
    660 									 srcColor[1]*p1 + dstColor[1]*p2,
    661 									 srcColor[2]*p1 + dstColor[2]*p2);
    662 
    663 			sample.blendSrcFactorRGB	= bias;
    664 			sample.blendSrcFactorA		= p0;
    665 			sample.blendedA				= p0 + p1 + p2;
    666 		}
    667 	}
    668 
    669 	switch (equation)
    670 	{
    671 		case BLENDEQUATION_ADVANCED_MULTIPLY:		SAMPLE_REGISTER_ADV_BLEND(multiply);									break;
    672 		case BLENDEQUATION_ADVANCED_SCREEN:			SAMPLE_REGISTER_ADV_BLEND(screen);										break;
    673 		case BLENDEQUATION_ADVANCED_OVERLAY:		SAMPLE_REGISTER_ADV_BLEND(overlay);										break;
    674 		case BLENDEQUATION_ADVANCED_DARKEN:			SAMPLE_REGISTER_ADV_BLEND(darken);										break;
    675 		case BLENDEQUATION_ADVANCED_LIGHTEN:		SAMPLE_REGISTER_ADV_BLEND(lighten);										break;
    676 		case BLENDEQUATION_ADVANCED_COLORDODGE:		SAMPLE_REGISTER_ADV_BLEND(colordodge);									break;
    677 		case BLENDEQUATION_ADVANCED_COLORBURN:		SAMPLE_REGISTER_ADV_BLEND(colorburn);									break;
    678 		case BLENDEQUATION_ADVANCED_HARDLIGHT:		SAMPLE_REGISTER_ADV_BLEND(hardlight);									break;
    679 		case BLENDEQUATION_ADVANCED_SOFTLIGHT:		SAMPLE_REGISTER_ADV_BLEND(softlight);									break;
    680 		case BLENDEQUATION_ADVANCED_DIFFERENCE:		SAMPLE_REGISTER_ADV_BLEND(difference);									break;
    681 		case BLENDEQUATION_ADVANCED_EXCLUSION:		SAMPLE_REGISTER_ADV_BLEND(exclusion);									break;
    682 		case BLENDEQUATION_ADVANCED_HSL_HUE:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor));	break;
    683 		case BLENDEQUATION_ADVANCED_HSL_SATURATION:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor));	break;
    684 		case BLENDEQUATION_ADVANCED_HSL_COLOR:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor));				break;
    685 		case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor));				break;
    686 		default:
    687 			DE_ASSERT(false);
    688 	}
    689 
    690 #undef SAMPLE_REGISTER_ADV_BLEND
    691 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
    692 }
    693 
    694 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
    695 {
    696 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    697 	{
    698 		if (m_sampleRegister[regSampleNdx].isAlive)
    699 		{
    700 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    701 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    702 			Vec4				combinedColor;
    703 
    704 			combinedColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
    705 			combinedColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
    706 
    707 			if (isSRGB)
    708 				combinedColor = tcu::linearToSRGB(combinedColor);
    709 
    710 			colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    711 		}
    712 	}
    713 }
    714 
    715 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
    716 {
    717 	const int		fragStride	= 4;
    718 	const int		xStride		= colorBuffer.getRowPitch();
    719 	const int		yStride		= colorBuffer.getSlicePitch();
    720 	deUint8* const	basePtr		= (deUint8*)colorBuffer.getDataPtr();
    721 
    722 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    723 	{
    724 		if (m_sampleRegister[regSampleNdx].isAlive)
    725 		{
    726 			const int			fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    727 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    728 			deUint8*			dstPtr			= basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
    729 
    730 			dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
    731 			dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
    732 			dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
    733 			dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
    734 		}
    735 	}
    736 }
    737 
    738 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
    739 {
    740 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    741 	{
    742 		if (m_sampleRegister[regSampleNdx].isAlive)
    743 		{
    744 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    745 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    746 			Vec4				originalColor	= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    747 			Vec4				newColor;
    748 
    749 			newColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
    750 			newColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
    751 
    752 			if (isSRGB)
    753 				newColor = tcu::linearToSRGB(newColor);
    754 
    755 			newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
    756 
    757 			colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    758 		}
    759 	}
    760 }
    761 
    762 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
    763 {
    764 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    765 	{
    766 		if (m_sampleRegister[regSampleNdx].isAlive)
    767 		{
    768 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    769 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    770 			const IVec4			originalValue	= colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    771 
    772 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    773 		}
    774 	}
    775 }
    776 
    777 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
    778 {
    779 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    780 	{
    781 		if (m_sampleRegister[regSampleNdx].isAlive)
    782 		{
    783 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    784 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    785 			const UVec4			originalValue	= colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    786 
    787 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    788 		}
    789 	}
    790 }
    791 
    792 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess&		msColorBuffer,
    793 								const rr::MultisamplePixelBufferAccess&		msDepthBuffer,
    794 								const rr::MultisamplePixelBufferAccess&		msStencilBuffer,
    795 								const Fragment*								inputFragments,
    796 								int											numFragments,
    797 								FaceType									fragmentFacing,
    798 								const FragmentOperationState&				state)
    799 {
    800 	DE_ASSERT(fragmentFacing < FACETYPE_LAST);
    801 	DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
    802 
    803 	const tcu::PixelBufferAccess&	colorBuffer			= msColorBuffer.raw();
    804 	const tcu::PixelBufferAccess&	depthBuffer			= msDepthBuffer.raw();
    805 	const tcu::PixelBufferAccess&	stencilBuffer		= msStencilBuffer.raw();
    806 
    807 	bool							hasDepth			= depthBuffer.getWidth() > 0	&& depthBuffer.getHeight() > 0		&& depthBuffer.getDepth() > 0;
    808 	bool							hasStencil			= stencilBuffer.getWidth() > 0	&& stencilBuffer.getHeight() > 0	&& stencilBuffer.getDepth() > 0;
    809 	bool							doDepthBoundsTest	= hasDepth		&& state.depthBoundsTestEnabled;
    810 	bool							doDepthTest			= hasDepth		&& state.depthTestEnabled;
    811 	bool							doStencilTest		= hasStencil	&& state.stencilTestEnabled;
    812 
    813 	tcu::TextureChannelClass		colorbufferClass	= tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
    814 	rr::GenericVecType				fragmentDataType	= (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
    815 
    816 	DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth())	&& (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
    817 	DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight())	&& (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
    818 	DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth())	&& (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
    819 
    820 	// Combined formats must be separated beforehand
    821 	DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D));
    822 	DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S));
    823 
    824 	int						numSamplesPerFragment		= colorBuffer.getWidth();
    825 	int						totalNumSamples				= numFragments*numSamplesPerFragment;
    826 	int						numSampleGroups				= (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
    827 	const StencilState&		stencilState				= state.stencilStates[fragmentFacing];
    828 	Vec4					colorMaskFactor				(state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
    829 	Vec4					colorMaskNegationFactor		(state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
    830 	bool					sRGBTarget					= state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
    831 
    832 	DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
    833 
    834 	// Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
    835 	// the per-sample operations for one group at a time.
    836 
    837 	for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
    838 	{
    839 		// The index of the fragment of the sample at the beginning of m_sampleRegisters.
    840 		int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
    841 
    842 		// Initialize sample data in the sample register.
    843 
    844 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    845 		{
    846 			int fragNdx			= groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
    847 			int fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    848 
    849 			if (fragNdx < numFragments)
    850 			{
    851 				m_sampleRegister[regSampleNdx].isAlive		= (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
    852 				m_sampleRegister[regSampleNdx].depthPassed	= true; // \note This will stay true if depth test is disabled.
    853 			}
    854 			else
    855 				m_sampleRegister[regSampleNdx].isAlive = false;
    856 		}
    857 
    858 		// Scissor test.
    859 
    860 		if (state.scissorTestEnabled)
    861 			executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
    862 
    863 		// Depth bounds test.
    864 
    865 		if (doDepthBoundsTest)
    866 			executeDepthBoundsTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.minDepthBound, state.maxDepthBound, depthBuffer);
    867 
    868 		// Stencil test.
    869 
    870 		if (doStencilTest)
    871 		{
    872 			executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
    873 			executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
    874 		}
    875 
    876 		// Depth test.
    877 		// \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
    878 
    879 		if (doDepthTest)
    880 		{
    881 			executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
    882 
    883 			if (state.depthMask)
    884 				executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
    885 		}
    886 
    887 		// Do dpFail and dpPass stencil writes.
    888 
    889 		if (doStencilTest)
    890 			executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
    891 
    892 		// Kill the samples that failed depth test.
    893 
    894 		if (doDepthTest)
    895 		{
    896 			for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    897 				m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
    898 		}
    899 
    900 		// Paint fragments to target
    901 
    902 		switch (fragmentDataType)
    903 		{
    904 			case rr::GENERICVECTYPE_FLOAT:
    905 			{
    906 				// Select min/max clamping values for blending factors and operands
    907 				Vec4 minClampValue;
    908 				Vec4 maxClampValue;
    909 
    910 				if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT)
    911 				{
    912 					minClampValue = Vec4(0.0f);
    913 					maxClampValue = Vec4(1.0f);
    914 				}
    915 				else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT)
    916 				{
    917 					minClampValue = Vec4(-1.0f);
    918 					maxClampValue = Vec4(1.0f);
    919 				}
    920 				else
    921 				{
    922 					// No clamping
    923 					minClampValue = Vec4(-std::numeric_limits<float>::infinity());
    924 					maxClampValue = Vec4(std::numeric_limits<float>::infinity());
    925 				}
    926 
    927 				// Blend calculation - only if using blend.
    928 				if (state.blendMode == BLENDMODE_STANDARD)
    929 				{
    930 					// Put dst color to register, doing srgb-to-linear conversion if needed.
    931 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    932 					{
    933 						if (m_sampleRegister[regSampleNdx].isAlive)
    934 						{
    935 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    936 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    937 							Vec4				dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    938 
    939 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= clamp(frag.value.get<float>(), minClampValue, maxClampValue);
    940 							m_sampleRegister[regSampleNdx].clampedBlendSrc1Color	= clamp(frag.value1.get<float>(), minClampValue, maxClampValue);
    941 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue);
    942 						}
    943 					}
    944 
    945 					// Calculate blend factors to register.
    946 					executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
    947 					executeBlendFactorComputeA(state.blendColor, state.blendAState);
    948 
    949 					// Compute blended color.
    950 					executeBlend(state.blendRGBState, state.blendAState);
    951 				}
    952 				else if (state.blendMode == BLENDMODE_ADVANCED)
    953 				{
    954 					// Unpremultiply colors for blending, and do sRGB->linear if necessary
    955 					// \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
    956 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    957 					{
    958 						if (m_sampleRegister[regSampleNdx].isAlive)
    959 						{
    960 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    961 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    962 							const Vec4			srcColor		= frag.value.get<float>();
    963 							const Vec4			dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    964 
    965 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= unpremultiply(clamp(srcColor, minClampValue, maxClampValue));
    966 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue));
    967 						}
    968 					}
    969 
    970 					executeAdvancedBlend(state.blendEquationAdvaced);
    971 				}
    972 				else
    973 				{
    974 					// Not using blend - just put values to register as-is.
    975 					DE_ASSERT(state.blendMode == BLENDMODE_NONE);
    976 
    977 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    978 					{
    979 						if (m_sampleRegister[regSampleNdx].isAlive)
    980 						{
    981 							const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    982 
    983 							m_sampleRegister[regSampleNdx].blendedRGB	= frag.value.get<float>().xyz();
    984 							m_sampleRegister[regSampleNdx].blendedA		= frag.value.get<float>().w();
    985 						}
    986 					}
    987 				}
    988 
    989 				// Clamp result values in sample register
    990 				if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
    991 				{
    992 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    993 					{
    994 						if (m_sampleRegister[regSampleNdx].isAlive)
    995 						{
    996 							m_sampleRegister[regSampleNdx].blendedRGB	= clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2), maxClampValue.swizzle(0, 1, 2));
    997 							m_sampleRegister[regSampleNdx].blendedA		= clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w());
    998 						}
    999 					}
   1000 				}
   1001 
   1002 				// Finally, write the colors to the color buffer.
   1003 
   1004 				if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
   1005 				{
   1006 					if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
   1007 						executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
   1008 					else
   1009 						executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
   1010 				}
   1011 				else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
   1012 					executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
   1013 				break;
   1014 			}
   1015 			case rr::GENERICVECTYPE_INT32:
   1016 				// Write fragments
   1017 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
   1018 				{
   1019 					if (m_sampleRegister[regSampleNdx].isAlive)
   1020 					{
   1021 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
   1022 
   1023 						m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
   1024 					}
   1025 				}
   1026 
   1027 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
   1028 					executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
   1029 				break;
   1030 
   1031 			case rr::GENERICVECTYPE_UINT32:
   1032 				// Write fragments
   1033 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
   1034 				{
   1035 					if (m_sampleRegister[regSampleNdx].isAlive)
   1036 					{
   1037 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
   1038 
   1039 						m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
   1040 					}
   1041 				}
   1042 
   1043 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
   1044 					executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
   1045 				break;
   1046 
   1047 			default:
   1048 				DE_ASSERT(DE_FALSE);
   1049 		}
   1050 	}
   1051 }
   1052 
   1053 } // rr
   1054