Home | History | Annotate | Download | only in referencerenderer
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program Reference Renderer
      3  * -----------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Reference implementation for per-fragment operations.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "rrFragmentOperations.hpp"
     25 #include "tcuVectorUtil.hpp"
     26 #include "tcuTextureUtil.hpp"
     27 #include <limits>
     28 
     29 using tcu::IVec2;
     30 using tcu::Vec3;
     31 using tcu::Vec4;
     32 using tcu::IVec4;
     33 using tcu::UVec4;
     34 using tcu::min;
     35 using tcu::max;
     36 using tcu::clamp;
     37 using de::min;
     38 using de::max;
     39 using de::clamp;
     40 
     41 namespace rr
     42 {
     43 
     44 // Return oldValue with the bits indicated by mask replaced by corresponding bits of newValue.
     45 static inline int maskedBitReplace (int oldValue, int newValue, deUint32 mask)
     46 {
     47 	return (oldValue & ~mask) | (newValue & mask);
     48 }
     49 
     50 static inline bool isInsideRect (const IVec2& point, const WindowRectangle& rect)
     51 {
     52 	return de::inBounds(point.x(), rect.left,		rect.left + rect.width) &&
     53 		   de::inBounds(point.y(), rect.bottom,		rect.bottom + rect.height);
     54 }
     55 
     56 static inline Vec4 unpremultiply (const Vec4& v)
     57 {
     58 	if (v.w() > 0.0f)
     59 		return Vec4(v.x()/v.w(), v.y()/v.w(), v.z()/v.w(), v.w());
     60 	else
     61 	{
     62 		DE_ASSERT(v.x() == 0.0f && v.y() == 0.0f && v.z() == 0.0f);
     63 		return Vec4(0.0f, 0.0f, 0.0f, 0.0f);
     64 	}
     65 }
     66 
     67 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const Vec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
     68 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const IVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);				}
     69 void clearMultisampleColorBuffer	(const tcu::PixelBufferAccess& dst, const UVec4& v,	const WindowRectangle& r)	{ tcu::clear(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v.cast<int>());	}
     70 void clearMultisampleDepthBuffer	(const tcu::PixelBufferAccess& dst, float v,		const WindowRectangle& r)	{ tcu::clearDepth(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
     71 void clearMultisampleStencilBuffer	(const tcu::PixelBufferAccess& dst, int v,			const WindowRectangle& r)	{ tcu::clearStencil(tcu::getSubregion(dst, 0, r.left, r.bottom, dst.getWidth(), r.width, r.height), v);			}
     72 
     73 FragmentProcessor::FragmentProcessor (void)
     74 	: m_sampleRegister()
     75 {
     76 }
     77 
     78 void FragmentProcessor::executeScissorTest (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const WindowRectangle& scissorRect)
     79 {
     80 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
     81 	{
     82 		if (m_sampleRegister[regSampleNdx].isAlive)
     83 		{
     84 			int fragNdx = fragNdxOffset + regSampleNdx/numSamplesPerFragment;
     85 
     86 			if (!isInsideRect(inputFragments[fragNdx].pixelCoord, scissorRect))
     87 				m_sampleRegister[regSampleNdx].isAlive = false;
     88 		}
     89 	}
     90 }
     91 
     92 void FragmentProcessor::executeStencilCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::ConstPixelBufferAccess& stencilBuffer)
     93 {
     94 #define SAMPLE_REGISTER_STENCIL_COMPARE(COMPARE_EXPRESSION)																					\
     95 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)															\
     96 	{																																		\
     97 		if (m_sampleRegister[regSampleNdx].isAlive)																							\
     98 		{																																	\
     99 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;													\
    100 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];					\
    101 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
    102 			int					maskedRef			= stencilState.compMask & clampedStencilRef;											\
    103 			int					maskedBuf			= stencilState.compMask & stencilBufferValue;											\
    104 			DE_UNREF(maskedRef);																											\
    105 			DE_UNREF(maskedBuf);																											\
    106 																																			\
    107 			m_sampleRegister[regSampleNdx].stencilPassed = (COMPARE_EXPRESSION);															\
    108 		}																																	\
    109 	}
    110 
    111 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
    112 
    113 	switch (stencilState.func)
    114 	{
    115 		case TESTFUNC_NEVER:	SAMPLE_REGISTER_STENCIL_COMPARE(false)						break;
    116 		case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_STENCIL_COMPARE(true)						break;
    117 		case TESTFUNC_LESS:		SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <  maskedBuf)		break;
    118 		case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef <= maskedBuf)		break;
    119 		case TESTFUNC_GREATER:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >  maskedBuf)		break;
    120 		case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef >= maskedBuf)		break;
    121 		case TESTFUNC_EQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef == maskedBuf)		break;
    122 		case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_STENCIL_COMPARE(maskedRef != maskedBuf)		break;
    123 		default:
    124 			DE_ASSERT(false);
    125 	}
    126 
    127 #undef SAMPLE_REGISTER_STENCIL_COMPARE
    128 }
    129 
    130 void FragmentProcessor::executeStencilSFail (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
    131 {
    132 #define SAMPLE_REGISTER_SFAIL(SFAIL_EXPRESSION)																																		\
    133 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																									\
    134 	{																																												\
    135 		if (m_sampleRegister[regSampleNdx].isAlive && !m_sampleRegister[regSampleNdx].stencilPassed)																				\
    136 		{																																											\
    137 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																							\
    138 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];															\
    139 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());											\
    140 																																													\
    141 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (SFAIL_EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
    142 			m_sampleRegister[regSampleNdx].isAlive = false;																															\
    143 		}																																											\
    144 	}
    145 
    146 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
    147 
    148 	switch (stencilState.sFail)
    149 	{
    150 		case STENCILOP_KEEP:		SAMPLE_REGISTER_SFAIL(stencilBufferValue)												break;
    151 		case STENCILOP_ZERO:		SAMPLE_REGISTER_SFAIL(0)																break;
    152 		case STENCILOP_REPLACE:		SAMPLE_REGISTER_SFAIL(clampedStencilRef)												break;
    153 		case STENCILOP_INCR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))		break;
    154 		case STENCILOP_DECR:		SAMPLE_REGISTER_SFAIL(de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))		break;
    155 		case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue + 1) & ((1<<numStencilBits) - 1))				break;
    156 		case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_SFAIL((stencilBufferValue - 1) & ((1<<numStencilBits) - 1))				break;
    157 		case STENCILOP_INVERT:		SAMPLE_REGISTER_SFAIL((~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;
    158 		default:
    159 			DE_ASSERT(false);
    160 	}
    161 
    162 #undef SAMPLE_REGISTER_SFAIL
    163 }
    164 
    165 void FragmentProcessor::executeDepthCompare (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, TestFunc depthFunc, const tcu::ConstPixelBufferAccess& depthBuffer)
    166 {
    167 #define SAMPLE_REGISTER_DEPTH_COMPARE_F(COMPARE_EXPRESSION)																						\
    168 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
    169 	{																																			\
    170 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
    171 		{																																		\
    172 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
    173 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
    174 			float				depthBufferValue	= depthBuffer.getPixDepth(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());			\
    175 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
    176 			float				sampleDepth			= de::clamp(sampleDepthFloat, 0.0f, 1.0f);													\
    177 																																				\
    178 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
    179 																																				\
    180 			DE_UNREF(depthBufferValue);																											\
    181 			DE_UNREF(sampleDepth);																												\
    182 		}																																		\
    183 	}
    184 
    185 #define SAMPLE_REGISTER_DEPTH_COMPARE_UI(COMPARE_EXPRESSION)																					\
    186 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																\
    187 	{																																			\
    188 		if (m_sampleRegister[regSampleNdx].isAlive)																								\
    189 		{																																		\
    190 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;														\
    191 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];						\
    192 			deUint32			depthBufferValue	= depthBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y()).x();	\
    193 			float				sampleDepthFloat	= frag.sampleDepths[fragSampleNdx];															\
    194 																																				\
    195 			/* Convert input float to target buffer format for comparison */																	\
    196 																																				\
    197 			deUint32 buffer[2];																													\
    198 																																				\
    199 			DE_ASSERT(sizeof(buffer) >= (size_t)depthBuffer.getFormat().getPixelSize());														\
    200 																																				\
    201 			tcu::PixelBufferAccess access(depthBuffer.getFormat(), 1, 1, 1, &buffer);															\
    202 			access.setPixDepth(sampleDepthFloat, 0, 0, 0);																						\
    203 			deUint32 sampleDepth = access.getPixelUint(0, 0, 0).x();																			\
    204 																																				\
    205 			m_sampleRegister[regSampleNdx].depthPassed = (COMPARE_EXPRESSION);																	\
    206 																																				\
    207 			DE_UNREF(depthBufferValue);																											\
    208 			DE_UNREF(sampleDepth);																												\
    209 		}																																		\
    210 	}
    211 
    212 	if (depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT || depthBuffer.getFormat().type == tcu::TextureFormat::FLOAT_UNSIGNED_INT_24_8_REV)
    213 	{
    214 
    215 		switch (depthFunc)
    216 		{
    217 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(false)							break;
    218 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_F(true)								break;
    219 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <  depthBufferValue)	break;
    220 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth <= depthBufferValue)	break;
    221 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >  depthBufferValue)	break;
    222 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth >= depthBufferValue)	break;
    223 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth == depthBufferValue)	break;
    224 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_F(sampleDepth != depthBufferValue)	break;
    225 			default:
    226 				DE_ASSERT(false);
    227 		}
    228 
    229 	}
    230 	else
    231 	{
    232 		switch (depthFunc)
    233 		{
    234 			case TESTFUNC_NEVER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(false)							break;
    235 			case TESTFUNC_ALWAYS:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(true)								break;
    236 			case TESTFUNC_LESS:		SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <  depthBufferValue)	break;
    237 			case TESTFUNC_LEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth <= depthBufferValue)	break;
    238 			case TESTFUNC_GREATER:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >  depthBufferValue)	break;
    239 			case TESTFUNC_GEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth >= depthBufferValue)	break;
    240 			case TESTFUNC_EQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth == depthBufferValue)	break;
    241 			case TESTFUNC_NOTEQUAL:	SAMPLE_REGISTER_DEPTH_COMPARE_UI(sampleDepth != depthBufferValue)	break;
    242 			default:
    243 				DE_ASSERT(false);
    244 		}
    245 	}
    246 
    247 #undef SAMPLE_REGISTER_DEPTH_COMPARE_F
    248 #undef SAMPLE_REGISTER_DEPTH_COMPARE_UI
    249 }
    250 
    251 void FragmentProcessor::executeDepthWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& depthBuffer)
    252 {
    253 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    254 	{
    255 		if (m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed)
    256 		{
    257 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    258 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    259 			const float			clampedDepth	= de::clamp(frag.sampleDepths[fragSampleNdx], 0.0f, 1.0f);
    260 
    261 			depthBuffer.setPixDepth(clampedDepth, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    262 		}
    263 	}
    264 }
    265 
    266 void FragmentProcessor::executeStencilDpFailAndPass (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const StencilState& stencilState, int numStencilBits, const tcu::PixelBufferAccess& stencilBuffer)
    267 {
    268 #define SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, EXPRESSION)																													\
    269 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																								\
    270 	{																																											\
    271 		if (m_sampleRegister[regSampleNdx].isAlive && (CONDITION))																												\
    272 		{																																										\
    273 			int					fragSampleNdx		= regSampleNdx % numSamplesPerFragment;																						\
    274 			const Fragment&		frag				= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];														\
    275 			int					stencilBufferValue	= stencilBuffer.getPixStencil(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());										\
    276 																																												\
    277 			stencilBuffer.setPixStencil(maskedBitReplace(stencilBufferValue, (EXPRESSION), stencilState.writeMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());	\
    278 		}																																										\
    279 	}
    280 
    281 #define SWITCH_DPFAIL_OR_DPPASS(OP_NAME, CONDITION)																											\
    282 		switch (stencilState.OP_NAME)																														\
    283 		{																																					\
    284 			case STENCILOP_KEEP:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, stencilBufferValue)												break;	\
    285 			case STENCILOP_ZERO:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, 0)																break;	\
    286 			case STENCILOP_REPLACE:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, clampedStencilRef)												break;	\
    287 			case STENCILOP_INCR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue+1, 0, (1<<numStencilBits) - 1))	break;	\
    288 			case STENCILOP_DECR:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, de::clamp(stencilBufferValue-1, 0, (1<<numStencilBits) - 1))	break;	\
    289 			case STENCILOP_INCR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue + 1) & ((1<<numStencilBits) - 1))			break;	\
    290 			case STENCILOP_DECR_WRAP:	SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (stencilBufferValue - 1) & ((1<<numStencilBits) - 1))			break;	\
    291 			case STENCILOP_INVERT:		SAMPLE_REGISTER_DPFAIL_OR_DPPASS(CONDITION, (~stencilBufferValue) & ((1<<numStencilBits) - 1))				break;	\
    292 			default:																																		\
    293 				DE_ASSERT(false);																															\
    294 		}
    295 
    296 	int clampedStencilRef = de::clamp(stencilState.ref, 0, (1<<numStencilBits)-1);
    297 
    298 	SWITCH_DPFAIL_OR_DPPASS(dpFail, !m_sampleRegister[regSampleNdx].depthPassed)
    299 	SWITCH_DPFAIL_OR_DPPASS(dpPass, m_sampleRegister[regSampleNdx].depthPassed)
    300 
    301 #undef SWITCH_DPFAIL_OR_DPPASS
    302 #undef SAMPLE_REGISTER_DPFAIL_OR_DPPASS
    303 }
    304 
    305 void FragmentProcessor::executeBlendFactorComputeRGB (const Vec4& blendColor, const BlendState& blendRGBState)
    306 {
    307 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)																				\
    308 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)																	\
    309 	{																																				\
    310 		if (m_sampleRegister[regSampleNdx].isAlive)																									\
    311 		{																																			\
    312 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;																\
    313 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;																\
    314 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;																\
    315 			DE_UNREF(src);																															\
    316 			DE_UNREF(src1);																															\
    317 			DE_UNREF(dst);																															\
    318 																																					\
    319 			m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);																		\
    320 		}																																			\
    321 	}
    322 
    323 #define SWITCH_SRC_OR_DST_FACTOR_RGB(FUNC_NAME, FACTOR_NAME)																					\
    324 	switch (blendRGBState.FUNC_NAME)																											\
    325 	{																																			\
    326 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(0.0f))								break;	\
    327 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f))								break;	\
    328 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.swizzle(0,1,2))						break;	\
    329 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src.swizzle(0,1,2))			break;	\
    330 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.swizzle(0,1,2))						break;	\
    331 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - dst.swizzle(0,1,2))			break;	\
    332 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src.w()))							break;	\
    333 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src.w()))						break;	\
    334 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(dst.w()))							break;	\
    335 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - dst.w()))						break;	\
    336 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.swizzle(0,1,2))				break;	\
    337 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - blendColor.swizzle(0,1,2))	break;	\
    338 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(blendColor.w()))						break;	\
    339 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - blendColor.w()))				break;	\
    340 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(de::min(src.w(), 1.0f - dst.w())))	break;	\
    341 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.swizzle(0,1,2))						break;	\
    342 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f) - src1.swizzle(0,1,2))			break;	\
    343 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(src1.w()))							break;	\
    344 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, Vec3(1.0f - src1.w()))					break;	\
    345 		default:																																\
    346 			DE_ASSERT(false);																													\
    347 	}
    348 
    349 	SWITCH_SRC_OR_DST_FACTOR_RGB(srcFunc, blendSrcFactorRGB)
    350 	SWITCH_SRC_OR_DST_FACTOR_RGB(dstFunc, blendDstFactorRGB)
    351 
    352 #undef SWITCH_SRC_OR_DST_FACTOR_RGB
    353 #undef SAMPLE_REGISTER_BLEND_FACTOR
    354 }
    355 
    356 void FragmentProcessor::executeBlendFactorComputeA (const Vec4& blendColor, const BlendState& blendAState)
    357 {
    358 #define SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, FACTOR_EXPRESSION)														\
    359 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)											\
    360 	{																														\
    361 		if (m_sampleRegister[regSampleNdx].isAlive)																			\
    362 		{																													\
    363 			const Vec4& src		= m_sampleRegister[regSampleNdx].clampedBlendSrcColor;										\
    364 			const Vec4& src1	= m_sampleRegister[regSampleNdx].clampedBlendSrc1Color;										\
    365 			const Vec4& dst		= m_sampleRegister[regSampleNdx].clampedBlendDstColor;										\
    366 			DE_UNREF(src);																									\
    367 			DE_UNREF(src1);																									\
    368 			DE_UNREF(dst);																									\
    369 																															\
    370 			m_sampleRegister[regSampleNdx].FACTOR_NAME = (FACTOR_EXPRESSION);												\
    371 		}																													\
    372 	}
    373 
    374 #define SWITCH_SRC_OR_DST_FACTOR_A(FUNC_NAME, FACTOR_NAME)																		\
    375 	switch (blendAState.FUNC_NAME)																								\
    376 	{																															\
    377 		case BLENDFUNC_ZERO:						SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 0.0f)						break;	\
    378 		case BLENDFUNC_ONE:							SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
    379 		case BLENDFUNC_SRC_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
    380 		case BLENDFUNC_ONE_MINUS_SRC_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
    381 		case BLENDFUNC_DST_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
    382 		case BLENDFUNC_ONE_MINUS_DST_COLOR:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
    383 		case BLENDFUNC_SRC_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src.w())					break;	\
    384 		case BLENDFUNC_ONE_MINUS_SRC_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src.w())			break;	\
    385 		case BLENDFUNC_DST_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, dst.w())					break;	\
    386 		case BLENDFUNC_ONE_MINUS_DST_ALPHA:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - dst.w())			break;	\
    387 		case BLENDFUNC_CONSTANT_COLOR:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
    388 		case BLENDFUNC_ONE_MINUS_CONSTANT_COLOR:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
    389 		case BLENDFUNC_CONSTANT_ALPHA:				SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, blendColor.w())			break;	\
    390 		case BLENDFUNC_ONE_MINUS_CONSTANT_ALPHA:	SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - blendColor.w())	break;	\
    391 		case BLENDFUNC_SRC_ALPHA_SATURATE:			SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f)						break;	\
    392 		case BLENDFUNC_SRC1_COLOR:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
    393 		case BLENDFUNC_ONE_MINUS_SRC1_COLOR:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
    394 		case BLENDFUNC_SRC1_ALPHA:					SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, src1.w())					break;	\
    395 		case BLENDFUNC_ONE_MINUS_SRC1_ALPHA:		SAMPLE_REGISTER_BLEND_FACTOR(FACTOR_NAME, 1.0f - src1.w())			break;	\
    396 		default:																												\
    397 			DE_ASSERT(false);																									\
    398 	}
    399 
    400 	SWITCH_SRC_OR_DST_FACTOR_A(srcFunc, blendSrcFactorA)
    401 	SWITCH_SRC_OR_DST_FACTOR_A(dstFunc, blendDstFactorA)
    402 
    403 #undef SWITCH_SRC_OR_DST_FACTOR_A
    404 #undef SAMPLE_REGISTER_BLEND_FACTOR
    405 }
    406 
    407 void FragmentProcessor::executeBlend (const BlendState& blendRGBState, const BlendState& blendAState)
    408 {
    409 #define SAMPLE_REGISTER_BLENDED_COLOR(COLOR_NAME, COLOR_EXPRESSION)						\
    410 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)		\
    411 	{																					\
    412 		if (m_sampleRegister[regSampleNdx].isAlive)										\
    413 		{																				\
    414 			SampleData& sample		= m_sampleRegister[regSampleNdx];					\
    415 			const Vec4& srcColor	= sample.clampedBlendSrcColor;						\
    416 			const Vec4& dstColor	= sample.clampedBlendDstColor;						\
    417 																						\
    418 			sample.COLOR_NAME = (COLOR_EXPRESSION);										\
    419 		}																				\
    420 	}
    421 
    422 	switch (blendRGBState.equation)
    423 	{
    424 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB + dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
    425 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB - dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB)	break;
    426 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, dstColor.swizzle(0,1,2)*sample.blendDstFactorRGB - srcColor.swizzle(0,1,2)*sample.blendSrcFactorRGB)	break;
    427 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, min(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
    428 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedRGB, max(srcColor.swizzle(0,1,2), dstColor.swizzle(0,1,2)))												break;
    429 		default:
    430 			DE_ASSERT(false);
    431 	}
    432 
    433 	switch (blendAState.equation)
    434 	{
    435 		case BLENDEQUATION_ADD:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA + dstColor.w()*sample.blendDstFactorA)	break;
    436 		case BLENDEQUATION_SUBTRACT:			SAMPLE_REGISTER_BLENDED_COLOR(blendedA, srcColor.w()*sample.blendSrcFactorA - dstColor.w()*sample.blendDstFactorA)	break;
    437 		case BLENDEQUATION_REVERSE_SUBTRACT:	SAMPLE_REGISTER_BLENDED_COLOR(blendedA, dstColor.w()*sample.blendDstFactorA - srcColor.w()*sample.blendSrcFactorA)	break;
    438 		case BLENDEQUATION_MIN:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, min(srcColor.w(), dstColor.w()))											break;
    439 		case BLENDEQUATION_MAX:					SAMPLE_REGISTER_BLENDED_COLOR(blendedA, max(srcColor.w(), dstColor.w()))											break;
    440 		default:
    441 			DE_ASSERT(false);
    442 	}
    443 #undef SAMPLE_REGISTER_BLENDED_COLOR
    444 }
    445 
    446 namespace advblend
    447 {
    448 
    449 inline float	multiply	(float src, float dst) { return src*dst;					}
    450 inline float	screen		(float src, float dst) { return src + dst - src*dst;		}
    451 inline float	darken		(float src, float dst) { return de::min(src, dst);			}
    452 inline float	lighten		(float src, float dst) { return de::max(src, dst);			}
    453 inline float	difference	(float src, float dst) { return de::abs(dst-src);			}
    454 inline float	exclusion	(float src, float dst) { return src + dst - 2.0f*src*dst;	}
    455 
    456 inline float overlay (float src, float dst)
    457 {
    458 	if (dst <= 0.5f)
    459 		return 2.0f*src*dst;
    460 	else
    461 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
    462 }
    463 
    464 inline float colordodge (float src, float dst)
    465 {
    466 	if (dst <= 0.0f)
    467 		return 0.0f;
    468 	else if (src < 1.0f)
    469 		return de::min(1.0f, dst/(1.0f-src));
    470 	else
    471 		return 1.0f;
    472 }
    473 
    474 inline float colorburn (float src, float dst)
    475 {
    476 	if (dst >= 1.0f)
    477 		return 1.0f;
    478 	else if (src > 0.0f)
    479 		return 1.0f - de::min(1.0f, (1.0f-dst)/src);
    480 	else
    481 		return 0.0f;
    482 }
    483 
    484 inline float hardlight (float src, float dst)
    485 {
    486 	if (src <= 0.5f)
    487 		return 2.0f*src*dst;
    488 	else
    489 		return 1.0f - 2.0f*(1.0f-src)*(1.0f-dst);
    490 }
    491 
    492 inline float softlight (float src, float dst)
    493 {
    494 	if (src <= 0.5f)
    495 		return dst - (1.0f - 2.0f*src)*dst*(1.0f-dst);
    496 	else if (dst <= 0.25f)
    497 		return dst + (2.0f*src - 1.0f)*dst*((16.0f*dst - 12.0f)*dst + 3.0f);
    498 	else
    499 		return dst + (2.0f*src - 1.0f)*(deFloatSqrt(dst)-dst);
    500 }
    501 
    502 inline float minComp (const Vec3& v)
    503 {
    504 	return de::min(de::min(v.x(), v.y()), v.z());
    505 }
    506 
    507 inline float maxComp (const Vec3& v)
    508 {
    509 	return de::max(de::max(v.x(), v.y()), v.z());
    510 }
    511 
    512 inline float luminosity (const Vec3& rgb)
    513 {
    514 	return dot(rgb, Vec3(0.3f, 0.59f, 0.11f));
    515 }
    516 
    517 inline float saturation (const Vec3& rgb)
    518 {
    519 	return maxComp(rgb) - minComp(rgb);
    520 }
    521 
    522 Vec3 setLum (const Vec3& cbase, const Vec3& clum)
    523 {
    524 	const float		lbase	= luminosity(cbase);
    525 	const float		llum	= luminosity(clum);
    526 	const float		ldiff	= llum - lbase;
    527 	const Vec3		color	= cbase + Vec3(ldiff);
    528 	const float		minC	= minComp(color);
    529 	const float		maxC	= maxComp(color);
    530 
    531 	if (minC < 0.0f)
    532 		return llum + ((color-llum)*llum / (llum != minC ? (llum-minC) : 1.0f));
    533 	else if (maxC > 1.0f)
    534 		return llum + ((color-llum)*(1.0f-llum) / (llum != maxC ? (maxC-llum) : 1.0f));
    535 	else
    536 		return color;
    537 }
    538 
    539 Vec3 setLumSat (const Vec3& cbase, const Vec3& csat, const Vec3& clum)
    540 {
    541 	const float		minbase	= minComp(cbase);
    542 	const float		sbase	= saturation(cbase);
    543 	const float		ssat	= saturation(csat);
    544 	Vec3			color	= Vec3(0.0f);
    545 
    546 	if (sbase > 0.0f)
    547 		color = (cbase - minbase) * ssat / sbase;
    548 	else
    549 		color = color;
    550 
    551 	return setLum(color, clum);
    552 }
    553 
    554 } // advblend
    555 
    556 void FragmentProcessor::executeAdvancedBlend (BlendEquationAdvanced equation)
    557 {
    558 	using namespace advblend;
    559 
    560 #define SAMPLE_REGISTER_ADV_BLEND(FUNCTION_NAME)											\
    561 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
    562 	{																						\
    563 		if (m_sampleRegister[regSampleNdx].isAlive)											\
    564 		{																					\
    565 			SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
    566 			const Vec4&	srcColor	= sample.clampedBlendSrcColor;							\
    567 			const Vec4&	dstColor	= sample.clampedBlendDstColor;							\
    568 			const Vec3&	bias		= sample.blendSrcFactorRGB;								\
    569 			const float	p0			= sample.blendSrcFactorA;								\
    570 			const float	r			= FUNCTION_NAME(srcColor[0], dstColor[0])*p0 + bias[0];	\
    571 			const float	g			= FUNCTION_NAME(srcColor[1], dstColor[1])*p0 + bias[1];	\
    572 			const float	b			= FUNCTION_NAME(srcColor[2], dstColor[2])*p0 + bias[2];	\
    573 																							\
    574 			sample.blendedRGB = Vec3(r, g, b);												\
    575 		}																					\
    576 	}
    577 
    578 #define SAMPLE_REGISTER_ADV_BLEND_HSL(COLOR_EXPRESSION)										\
    579 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)			\
    580 	{																						\
    581 		if (m_sampleRegister[regSampleNdx].isAlive)											\
    582 		{																					\
    583 			SampleData&	sample		= m_sampleRegister[regSampleNdx];						\
    584 			const Vec3	srcColor	= sample.clampedBlendSrcColor.swizzle(0,1,2);			\
    585 			const Vec3	dstColor	= sample.clampedBlendDstColor.swizzle(0,1,2);			\
    586 			const Vec3&	bias		= sample.blendSrcFactorRGB;								\
    587 			const float	p0			= sample.blendSrcFactorA;								\
    588 																							\
    589 			sample.blendedRGB = (COLOR_EXPRESSION)*p0 + bias;								\
    590 		}																					\
    591 	}
    592 
    593 	// Pre-compute factors & compute alpha \todo [2014-03-18 pyry] Re-using variable names.
    594 	// \note clampedBlend*Color contains clamped & unpremultiplied colors
    595 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    596 	{
    597 		if (m_sampleRegister[regSampleNdx].isAlive)
    598 		{
    599 			SampleData&	sample		= m_sampleRegister[regSampleNdx];
    600 			const Vec4&	srcColor	= sample.clampedBlendSrcColor;
    601 			const Vec4&	dstColor	= sample.clampedBlendDstColor;
    602 			const float	srcA		= srcColor.w();
    603 			const float	dstA		= dstColor.w();
    604 			const float	p0			= srcA*dstA;
    605 			const float p1			= srcA*(1.0f-dstA);
    606 			const float p2			= dstA*(1.0f-srcA);
    607 			const Vec3	bias		(srcColor[0]*p1 + dstColor[0]*p2,
    608 									 srcColor[1]*p1 + dstColor[1]*p2,
    609 									 srcColor[2]*p1 + dstColor[2]*p2);
    610 
    611 			sample.blendSrcFactorRGB	= bias;
    612 			sample.blendSrcFactorA		= p0;
    613 			sample.blendedA				= p0 + p1 + p2;
    614 		}
    615 	}
    616 
    617 	switch (equation)
    618 	{
    619 		case BLENDEQUATION_ADVANCED_MULTIPLY:		SAMPLE_REGISTER_ADV_BLEND(multiply);									break;
    620 		case BLENDEQUATION_ADVANCED_SCREEN:			SAMPLE_REGISTER_ADV_BLEND(screen);										break;
    621 		case BLENDEQUATION_ADVANCED_OVERLAY:		SAMPLE_REGISTER_ADV_BLEND(overlay);										break;
    622 		case BLENDEQUATION_ADVANCED_DARKEN:			SAMPLE_REGISTER_ADV_BLEND(darken);										break;
    623 		case BLENDEQUATION_ADVANCED_LIGHTEN:		SAMPLE_REGISTER_ADV_BLEND(lighten);										break;
    624 		case BLENDEQUATION_ADVANCED_COLORDODGE:		SAMPLE_REGISTER_ADV_BLEND(colordodge);									break;
    625 		case BLENDEQUATION_ADVANCED_COLORBURN:		SAMPLE_REGISTER_ADV_BLEND(colorburn);									break;
    626 		case BLENDEQUATION_ADVANCED_HARDLIGHT:		SAMPLE_REGISTER_ADV_BLEND(hardlight);									break;
    627 		case BLENDEQUATION_ADVANCED_SOFTLIGHT:		SAMPLE_REGISTER_ADV_BLEND(softlight);									break;
    628 		case BLENDEQUATION_ADVANCED_DIFFERENCE:		SAMPLE_REGISTER_ADV_BLEND(difference);									break;
    629 		case BLENDEQUATION_ADVANCED_EXCLUSION:		SAMPLE_REGISTER_ADV_BLEND(exclusion);									break;
    630 		case BLENDEQUATION_ADVANCED_HSL_HUE:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(srcColor, dstColor, dstColor));	break;
    631 		case BLENDEQUATION_ADVANCED_HSL_SATURATION:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLumSat(dstColor, srcColor, dstColor));	break;
    632 		case BLENDEQUATION_ADVANCED_HSL_COLOR:		SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(srcColor, dstColor));				break;
    633 		case BLENDEQUATION_ADVANCED_HSL_LUMINOSITY:	SAMPLE_REGISTER_ADV_BLEND_HSL(setLum(dstColor, srcColor));				break;
    634 		default:
    635 			DE_ASSERT(false);
    636 	}
    637 
    638 #undef SAMPLE_REGISTER_ADV_BLEND
    639 #undef SAMPLE_REGISTER_ADV_BLEND_HSL
    640 }
    641 
    642 void FragmentProcessor::executeColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
    643 {
    644 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    645 	{
    646 		if (m_sampleRegister[regSampleNdx].isAlive)
    647 		{
    648 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    649 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    650 			Vec4				combinedColor;
    651 
    652 			combinedColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
    653 			combinedColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
    654 
    655 			if (isSRGB)
    656 				combinedColor = tcu::linearToSRGB(combinedColor);
    657 
    658 			colorBuffer.setPixel(combinedColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    659 		}
    660 	}
    661 }
    662 
    663 void FragmentProcessor::executeRGBA8ColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::PixelBufferAccess& colorBuffer)
    664 {
    665 	const int		fragStride	= 4;
    666 	const int		xStride		= colorBuffer.getRowPitch();
    667 	const int		yStride		= colorBuffer.getSlicePitch();
    668 	deUint8* const	basePtr		= (deUint8*)colorBuffer.getDataPtr();
    669 
    670 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    671 	{
    672 		if (m_sampleRegister[regSampleNdx].isAlive)
    673 		{
    674 			const int			fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    675 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    676 			deUint8*			dstPtr			= basePtr + fragSampleNdx*fragStride + frag.pixelCoord.x()*xStride + frag.pixelCoord.y()*yStride;
    677 
    678 			dstPtr[0] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.x());
    679 			dstPtr[1] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.y());
    680 			dstPtr[2] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedRGB.z());
    681 			dstPtr[3] = tcu::floatToU8(m_sampleRegister[regSampleNdx].blendedA);
    682 		}
    683 	}
    684 }
    685 
    686 void FragmentProcessor::executeMaskedColorWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const Vec4& colorMaskFactor, const Vec4& colorMaskNegationFactor, bool isSRGB, const tcu::PixelBufferAccess& colorBuffer)
    687 {
    688 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    689 	{
    690 		if (m_sampleRegister[regSampleNdx].isAlive)
    691 		{
    692 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    693 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    694 			Vec4				originalColor	= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    695 			Vec4				newColor;
    696 
    697 			newColor.xyz()	= m_sampleRegister[regSampleNdx].blendedRGB;
    698 			newColor.w()	= m_sampleRegister[regSampleNdx].blendedA;
    699 
    700 			if (isSRGB)
    701 				newColor = tcu::linearToSRGB(newColor);
    702 
    703 			newColor = colorMaskFactor*newColor + colorMaskNegationFactor*originalColor;
    704 
    705 			colorBuffer.setPixel(newColor, fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    706 		}
    707 	}
    708 }
    709 
    710 void FragmentProcessor::executeSignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
    711 {
    712 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    713 	{
    714 		if (m_sampleRegister[regSampleNdx].isAlive)
    715 		{
    716 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    717 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    718 			const IVec4			originalValue	= colorBuffer.getPixelInt(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    719 
    720 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].signedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    721 		}
    722 	}
    723 }
    724 
    725 void FragmentProcessor::executeUnsignedValueWrite (int fragNdxOffset, int numSamplesPerFragment, const Fragment* inputFragments, const tcu::BVec4& colorMask, const tcu::PixelBufferAccess& colorBuffer)
    726 {
    727 	for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    728 	{
    729 		if (m_sampleRegister[regSampleNdx].isAlive)
    730 		{
    731 			int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    732 			const Fragment&		frag			= inputFragments[fragNdxOffset + regSampleNdx/numSamplesPerFragment];
    733 			const UVec4			originalValue	= colorBuffer.getPixelUint(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    734 
    735 			colorBuffer.setPixel(tcu::select(m_sampleRegister[regSampleNdx].unsignedValue, originalValue, colorMask), fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    736 		}
    737 	}
    738 }
    739 
    740 void FragmentProcessor::render (const rr::MultisamplePixelBufferAccess&		msColorBuffer,
    741 								const rr::MultisamplePixelBufferAccess&		msDepthBuffer,
    742 								const rr::MultisamplePixelBufferAccess&		msStencilBuffer,
    743 								const Fragment*								inputFragments,
    744 								int											numFragments,
    745 								FaceType									fragmentFacing,
    746 								const FragmentOperationState&				state)
    747 {
    748 	DE_ASSERT(fragmentFacing < FACETYPE_LAST);
    749 	DE_ASSERT(state.numStencilBits < 32); // code bitshifts numStencilBits, avoid undefined behavior
    750 
    751 	const tcu::PixelBufferAccess&	colorBuffer			= msColorBuffer.raw();
    752 	const tcu::PixelBufferAccess&	depthBuffer			= msDepthBuffer.raw();
    753 	const tcu::PixelBufferAccess&	stencilBuffer		= msStencilBuffer.raw();
    754 
    755 	bool							hasDepth			= depthBuffer.getWidth() > 0	&& depthBuffer.getHeight() > 0		&& depthBuffer.getDepth() > 0;
    756 	bool							hasStencil			= stencilBuffer.getWidth() > 0	&& stencilBuffer.getHeight() > 0	&& stencilBuffer.getDepth() > 0;
    757 	bool							doDepthTest			= hasDepth && state.depthTestEnabled;
    758 	bool							doStencilTest		= hasStencil && state.stencilTestEnabled;
    759 
    760 	tcu::TextureChannelClass		colorbufferClass	= tcu::getTextureChannelClass(msColorBuffer.raw().getFormat().type);
    761 	rr::GenericVecType				fragmentDataType	= (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_INTEGER) ? (rr::GENERICVECTYPE_INT32) : ((colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_INTEGER) ? (rr::GENERICVECTYPE_UINT32) : (rr::GENERICVECTYPE_FLOAT));
    762 
    763 	DE_ASSERT((!hasDepth || colorBuffer.getWidth() == depthBuffer.getWidth())	&& (!hasStencil || colorBuffer.getWidth() == stencilBuffer.getWidth()));
    764 	DE_ASSERT((!hasDepth || colorBuffer.getHeight() == depthBuffer.getHeight())	&& (!hasStencil || colorBuffer.getHeight() == stencilBuffer.getHeight()));
    765 	DE_ASSERT((!hasDepth || colorBuffer.getDepth() == depthBuffer.getDepth())	&& (!hasStencil || colorBuffer.getDepth() == stencilBuffer.getDepth()));
    766 
    767 	// Combined formats must be separated beforehand
    768 	DE_ASSERT(!hasDepth || (!tcu::isCombinedDepthStencilType(depthBuffer.getFormat().type) && depthBuffer.getFormat().order == tcu::TextureFormat::D));
    769 	DE_ASSERT(!hasStencil || (!tcu::isCombinedDepthStencilType(stencilBuffer.getFormat().type) && stencilBuffer.getFormat().order == tcu::TextureFormat::S));
    770 
    771 	int						numSamplesPerFragment		= colorBuffer.getWidth();
    772 	int						totalNumSamples				= numFragments*numSamplesPerFragment;
    773 	int						numSampleGroups				= (totalNumSamples - 1) / SAMPLE_REGISTER_SIZE + 1; // \note totalNumSamples/SAMPLE_REGISTER_SIZE rounded up.
    774 	const StencilState&		stencilState				= state.stencilStates[fragmentFacing];
    775 	Vec4					colorMaskFactor				(state.colorMask[0] ? 1.0f : 0.0f, state.colorMask[1] ? 1.0f : 0.0f, state.colorMask[2] ? 1.0f : 0.0f, state.colorMask[3] ? 1.0f : 0.0f);
    776 	Vec4					colorMaskNegationFactor		(state.colorMask[0] ? 0.0f : 1.0f, state.colorMask[1] ? 0.0f : 1.0f, state.colorMask[2] ? 0.0f : 1.0f, state.colorMask[3] ? 0.0f : 1.0f);
    777 	bool					sRGBTarget					= state.sRGBEnabled && tcu::isSRGB(colorBuffer.getFormat());
    778 
    779 	DE_ASSERT(SAMPLE_REGISTER_SIZE % numSamplesPerFragment == 0);
    780 
    781 	// Divide the fragments' samples into groups of size SAMPLE_REGISTER_SIZE, and perform
    782 	// the per-sample operations for one group at a time.
    783 
    784 	for (int sampleGroupNdx = 0; sampleGroupNdx < numSampleGroups; sampleGroupNdx++)
    785 	{
    786 		// The index of the fragment of the sample at the beginning of m_sampleRegisters.
    787 		int groupFirstFragNdx = (sampleGroupNdx*SAMPLE_REGISTER_SIZE) / numSamplesPerFragment;
    788 
    789 		// Initialize sample data in the sample register.
    790 
    791 		for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    792 		{
    793 			int fragNdx			= groupFirstFragNdx + regSampleNdx/numSamplesPerFragment;
    794 			int fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    795 
    796 			if (fragNdx < numFragments)
    797 			{
    798 				m_sampleRegister[regSampleNdx].isAlive		= (inputFragments[fragNdx].coverage & (1u << fragSampleNdx)) != 0;
    799 				m_sampleRegister[regSampleNdx].depthPassed	= true; // \note This will stay true if depth test is disabled.
    800 			}
    801 			else
    802 				m_sampleRegister[regSampleNdx].isAlive = false;
    803 		}
    804 
    805 		// Scissor test.
    806 
    807 		if (state.scissorTestEnabled)
    808 			executeScissorTest(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.scissorRectangle);
    809 
    810 		// Stencil test.
    811 
    812 		if (doStencilTest)
    813 		{
    814 			executeStencilCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
    815 			executeStencilSFail(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
    816 		}
    817 
    818 		// Depth test.
    819 		// \note Current value of isAlive is needed for dpPass and dpFail, so it's only updated after them and not right after depth test.
    820 
    821 		if (doDepthTest)
    822 		{
    823 			executeDepthCompare(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.depthFunc, depthBuffer);
    824 
    825 			if (state.depthMask)
    826 				executeDepthWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, depthBuffer);
    827 		}
    828 
    829 		// Do dpFail and dpPass stencil writes.
    830 
    831 		if (doStencilTest)
    832 			executeStencilDpFailAndPass(groupFirstFragNdx, numSamplesPerFragment, inputFragments, stencilState, state.numStencilBits, stencilBuffer);
    833 
    834 		// Kill the samples that failed depth test.
    835 
    836 		if (doDepthTest)
    837 		{
    838 			for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    839 				m_sampleRegister[regSampleNdx].isAlive = m_sampleRegister[regSampleNdx].isAlive && m_sampleRegister[regSampleNdx].depthPassed;
    840 		}
    841 
    842 		// Paint fragments to target
    843 
    844 		switch (fragmentDataType)
    845 		{
    846 			case rr::GENERICVECTYPE_FLOAT:
    847 			{
    848 				// Select min/max clamping values for blending factors and operands
    849 				Vec4 minClampValue;
    850 				Vec4 maxClampValue;
    851 
    852 				if (colorbufferClass == tcu::TEXTURECHANNELCLASS_UNSIGNED_FIXED_POINT)
    853 				{
    854 					minClampValue = Vec4(0.0f);
    855 					maxClampValue = Vec4(1.0f);
    856 				}
    857 				else if (colorbufferClass == tcu::TEXTURECHANNELCLASS_SIGNED_FIXED_POINT)
    858 				{
    859 					minClampValue = Vec4(-1.0f);
    860 					maxClampValue = Vec4(1.0f);
    861 				}
    862 				else
    863 				{
    864 					// No clamping
    865 					minClampValue = Vec4(-std::numeric_limits<float>::infinity());
    866 					maxClampValue = Vec4(std::numeric_limits<float>::infinity());
    867 				}
    868 
    869 				// Blend calculation - only if using blend.
    870 				if (state.blendMode == BLENDMODE_STANDARD)
    871 				{
    872 					// Put dst color to register, doing srgb-to-linear conversion if needed.
    873 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    874 					{
    875 						if (m_sampleRegister[regSampleNdx].isAlive)
    876 						{
    877 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    878 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    879 							Vec4				dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    880 
    881 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= clamp(frag.value.get<float>(), minClampValue, maxClampValue);
    882 							m_sampleRegister[regSampleNdx].clampedBlendSrc1Color	= clamp(frag.value1.get<float>(), minClampValue, maxClampValue);
    883 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue);
    884 						}
    885 					}
    886 
    887 					// Calculate blend factors to register.
    888 					executeBlendFactorComputeRGB(state.blendColor, state.blendRGBState);
    889 					executeBlendFactorComputeA(state.blendColor, state.blendAState);
    890 
    891 					// Compute blended color.
    892 					executeBlend(state.blendRGBState, state.blendAState);
    893 				}
    894 				else if (state.blendMode == BLENDMODE_ADVANCED)
    895 				{
    896 					// Unpremultiply colors for blending, and do sRGB->linear if necessary
    897 					// \todo [2014-03-17 pyry] Re-consider clampedBlend*Color var names
    898 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    899 					{
    900 						if (m_sampleRegister[regSampleNdx].isAlive)
    901 						{
    902 							int					fragSampleNdx	= regSampleNdx % numSamplesPerFragment;
    903 							const Fragment&		frag			= inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    904 							const Vec4			srcColor		= frag.value.get<float>();
    905 							const Vec4			dstColor		= colorBuffer.getPixel(fragSampleNdx, frag.pixelCoord.x(), frag.pixelCoord.y());
    906 
    907 							m_sampleRegister[regSampleNdx].clampedBlendSrcColor		= unpremultiply(clamp(srcColor, minClampValue, maxClampValue));
    908 							m_sampleRegister[regSampleNdx].clampedBlendDstColor		= unpremultiply(clamp(sRGBTarget ? tcu::sRGBToLinear(dstColor) : dstColor, minClampValue, maxClampValue));
    909 						}
    910 					}
    911 
    912 					executeAdvancedBlend(state.blendEquationAdvaced);
    913 				}
    914 				else
    915 				{
    916 					// Not using blend - just put values to register as-is.
    917 					DE_ASSERT(state.blendMode == BLENDMODE_NONE);
    918 
    919 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    920 					{
    921 						if (m_sampleRegister[regSampleNdx].isAlive)
    922 						{
    923 							const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    924 
    925 							m_sampleRegister[regSampleNdx].blendedRGB	= frag.value.get<float>().xyz();
    926 							m_sampleRegister[regSampleNdx].blendedA		= frag.value.get<float>().w();
    927 						}
    928 					}
    929 				}
    930 
    931 				// Clamp result values in sample register
    932 				if (colorbufferClass != tcu::TEXTURECHANNELCLASS_FLOATING_POINT)
    933 				{
    934 					for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    935 					{
    936 						if (m_sampleRegister[regSampleNdx].isAlive)
    937 						{
    938 							m_sampleRegister[regSampleNdx].blendedRGB	= clamp(m_sampleRegister[regSampleNdx].blendedRGB, minClampValue.swizzle(0, 1, 2), maxClampValue.swizzle(0, 1, 2));
    939 							m_sampleRegister[regSampleNdx].blendedA		= clamp(m_sampleRegister[regSampleNdx].blendedA, minClampValue.w(), maxClampValue.w());
    940 						}
    941 					}
    942 				}
    943 
    944 				// Finally, write the colors to the color buffer.
    945 
    946 				if (state.colorMask[0] && state.colorMask[1] && state.colorMask[2] && state.colorMask[3])
    947 				{
    948 					if (colorBuffer.getFormat() == tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNORM_INT8))
    949 						executeRGBA8ColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorBuffer);
    950 					else
    951 						executeColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, sRGBTarget, colorBuffer);
    952 				}
    953 				else if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
    954 					executeMaskedColorWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, colorMaskFactor, colorMaskNegationFactor, sRGBTarget, colorBuffer);
    955 				break;
    956 			}
    957 			case rr::GENERICVECTYPE_INT32:
    958 				// Write fragments
    959 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    960 				{
    961 					if (m_sampleRegister[regSampleNdx].isAlive)
    962 					{
    963 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    964 
    965 						m_sampleRegister[regSampleNdx].signedValue = frag.value.get<deInt32>();
    966 					}
    967 				}
    968 
    969 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
    970 					executeSignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
    971 				break;
    972 
    973 			case rr::GENERICVECTYPE_UINT32:
    974 				// Write fragments
    975 				for (int regSampleNdx = 0; regSampleNdx < SAMPLE_REGISTER_SIZE; regSampleNdx++)
    976 				{
    977 					if (m_sampleRegister[regSampleNdx].isAlive)
    978 					{
    979 						const Fragment& frag = inputFragments[groupFirstFragNdx + regSampleNdx/numSamplesPerFragment];
    980 
    981 						m_sampleRegister[regSampleNdx].unsignedValue = frag.value.get<deUint32>();
    982 					}
    983 				}
    984 
    985 				if (state.colorMask[0] || state.colorMask[1] || state.colorMask[2] || state.colorMask[3])
    986 					executeUnsignedValueWrite(groupFirstFragNdx, numSamplesPerFragment, inputFragments, state.colorMask, colorBuffer);
    987 				break;
    988 
    989 			default:
    990 				DE_ASSERT(DE_FALSE);
    991 		}
    992 	}
    993 }
    994 
    995 } // rr
    996