Home | History | Annotate | Download | only in functional
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program OpenGL ES 3.1 Module
      3  * -------------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Shader atomic operation tests.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "es31fShaderAtomicOpTests.hpp"
     25 #include "gluShaderProgram.hpp"
     26 #include "gluShaderUtil.hpp"
     27 #include "gluRenderContext.hpp"
     28 #include "gluObjectWrapper.hpp"
     29 #include "gluProgramInterfaceQuery.hpp"
     30 #include "tcuVector.hpp"
     31 #include "tcuTestLog.hpp"
     32 #include "tcuVectorUtil.hpp"
     33 #include "tcuFormatUtil.hpp"
     34 #include "deStringUtil.hpp"
     35 #include "deRandom.hpp"
     36 #include "glwFunctions.hpp"
     37 #include "glwEnums.hpp"
     38 
     39 #include <algorithm>
     40 #include <set>
     41 
     42 namespace deqp
     43 {
     44 namespace gles31
     45 {
     46 namespace Functional
     47 {
     48 
     49 using std::string;
     50 using std::vector;
     51 using tcu::TestLog;
     52 using tcu::UVec3;
     53 using std::set;
     54 using namespace glu;
     55 
     56 template<typename T, int Size>
     57 static inline T product (const tcu::Vector<T, Size>& v)
     58 {
     59 	T res = v[0];
     60 	for (int ndx = 1; ndx < Size; ndx++)
     61 		res *= v[ndx];
     62 	return res;
     63 }
     64 
     65 class ShaderAtomicOpCase : public TestCase
     66 {
     67 public:
     68 							ShaderAtomicOpCase	(Context& context, const char* name, const char* funcName, AtomicOperandType operandType, DataType type, Precision precision, const UVec3& workGroupSize);
     69 							~ShaderAtomicOpCase	(void);
     70 
     71 	void					init				(void);
     72 	void					deinit				(void);
     73 	IterateResult			iterate				(void);
     74 
     75 protected:
     76 	virtual void			getInputs			(int numValues, int stride, void* inputs) const = 0;
     77 	virtual bool			verify				(int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const = 0;
     78 
     79 	const string			m_funcName;
     80 	const AtomicOperandType	m_operandType;
     81 	const DataType			m_type;
     82 	const Precision			m_precision;
     83 
     84 	const UVec3				m_workGroupSize;
     85 	const UVec3				m_numWorkGroups;
     86 
     87 	deUint32				m_initialValue;
     88 
     89 private:
     90 							ShaderAtomicOpCase	(const ShaderAtomicOpCase& other);
     91 	ShaderAtomicOpCase&		operator=			(const ShaderAtomicOpCase& other);
     92 
     93 	ShaderProgram*			m_program;
     94 };
     95 
     96 ShaderAtomicOpCase::ShaderAtomicOpCase (Context& context, const char* name, const char* funcName, AtomicOperandType operandType, DataType type, Precision precision, const UVec3& workGroupSize)
     97 	: TestCase			(context, name, funcName)
     98 	, m_funcName		(funcName)
     99 	, m_operandType		(operandType)
    100 	, m_type			(type)
    101 	, m_precision		(precision)
    102 	, m_workGroupSize	(workGroupSize)
    103 	, m_numWorkGroups	(4,4,4)
    104 	, m_initialValue	(0)
    105 	, m_program			(DE_NULL)
    106 {
    107 }
    108 
    109 ShaderAtomicOpCase::~ShaderAtomicOpCase (void)
    110 {
    111 	ShaderAtomicOpCase::deinit();
    112 }
    113 
    114 void ShaderAtomicOpCase::init (void)
    115 {
    116 	const bool			isSSBO		= m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE;
    117 	const char*			precName	= getPrecisionName(m_precision);
    118 	const char*			typeName	= getDataTypeName(m_type);
    119 
    120 	const DataType		outType		= isSSBO ? m_type : glu::TYPE_UINT;
    121 	const char*			outTypeName	= getDataTypeName(outType);
    122 
    123 	const deUint32		numValues	= product(m_workGroupSize)*product(m_numWorkGroups);
    124 	std::ostringstream	src;
    125 
    126 	src << "#version 310 es\n"
    127 		<< "layout(local_size_x = " << m_workGroupSize.x()
    128 		<< ", local_size_y = " << m_workGroupSize.y()
    129 		<< ", local_size_z = " << m_workGroupSize.z() << ") in;\n"
    130 		<< "layout(binding = 0) buffer InOut\n"
    131 		<< "{\n"
    132 		<< "	" << precName << " " << typeName << " inputValues[" << numValues << "];\n"
    133 		<< "	" << precName << " " << outTypeName << " outputValues[" << numValues << "];\n"
    134 		<< "	" << (isSSBO ? "coherent " : "") << precName << " " << outTypeName << " groupValues[" << product(m_numWorkGroups) << "];\n"
    135 		<< "} sb_inout;\n";
    136 
    137 	if (!isSSBO)
    138 		src << "shared " << precName << " " << typeName << " s_var;\n";
    139 
    140 	src << "\n"
    141 		<< "void main (void)\n"
    142 		<< "{\n"
    143 		<< "	uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
    144 		<< "	uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    145 		<< "	uint globalOffs = localSize*globalNdx;\n"
    146 		<< "	uint offset     = globalOffs + gl_LocalInvocationIndex;\n"
    147 		<< "\n";
    148 
    149 	if (isSSBO)
    150 	{
    151 		DE_ASSERT(outType == m_type);
    152 		src << "	sb_inout.outputValues[offset] = " << m_funcName << "(sb_inout.groupValues[globalNdx], sb_inout.inputValues[offset]);\n";
    153 	}
    154 	else
    155 	{
    156 		const string		castBeg	= outType != m_type ? (string(outTypeName) + "(") : string("");
    157 		const char* const	castEnd	= outType != m_type ? ")" : "";
    158 
    159 		src << "	if (gl_LocalInvocationIndex == 0u)\n"
    160 			<< "		s_var = " << typeName << "(" << tcu::toHex(m_initialValue) << "u);\n"
    161 			<< "	barrier();\n"
    162 			<< "	" << precName << " " << typeName << " res = " << m_funcName << "(s_var, sb_inout.inputValues[offset]);\n"
    163 			<< "	sb_inout.outputValues[offset] = " << castBeg << "res" << castEnd << ";\n"
    164 			<< "	barrier();\n"
    165 			<< "	if (gl_LocalInvocationIndex == 0u)\n"
    166 			<< "		sb_inout.groupValues[globalNdx] = " << castBeg << "s_var" << castEnd << ";\n";
    167 	}
    168 
    169 	src << "}\n";
    170 
    171 	DE_ASSERT(!m_program);
    172 	m_program = new ShaderProgram(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
    173 
    174 	m_testCtx.getLog() << *m_program;
    175 
    176 	if (!m_program->isOk())
    177 	{
    178 		delete m_program;
    179 		m_program = DE_NULL;
    180 		throw tcu::TestError("Compile failed");
    181 	}
    182 }
    183 
    184 void ShaderAtomicOpCase::deinit (void)
    185 {
    186 	delete m_program;
    187 	m_program = DE_NULL;
    188 }
    189 
    190 ShaderAtomicOpCase::IterateResult ShaderAtomicOpCase::iterate (void)
    191 {
    192 	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    193 	const deUint32				program			= m_program->getProgram();
    194 	const Buffer				inoutBuffer		(m_context.getRenderContext());
    195 	const deUint32				blockNdx		= gl.getProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, "InOut");
    196 	const InterfaceBlockInfo	blockInfo		= getProgramInterfaceBlockInfo(gl, program, GL_SHADER_STORAGE_BLOCK, blockNdx);
    197 	const deUint32				inVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.inputValues[0]");
    198 	const InterfaceVariableInfo	inVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, inVarNdx);
    199 	const deUint32				outVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.outputValues[0]");
    200 	const InterfaceVariableInfo	outVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, outVarNdx);
    201 	const deUint32				groupVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.groupValues[0]");
    202 	const InterfaceVariableInfo	groupVarInfo	= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, groupVarNdx);
    203 	const deUint32				numValues		= product(m_workGroupSize)*product(m_numWorkGroups);
    204 
    205 	TCU_CHECK(inVarInfo.arraySize == numValues &&
    206 			  outVarInfo.arraySize == numValues &&
    207 			  groupVarInfo.arraySize == product(m_numWorkGroups));
    208 
    209 	gl.useProgram(program);
    210 
    211 	// Setup buffer.
    212 	{
    213 		vector<deUint8> bufData(blockInfo.dataSize);
    214 		std::fill(bufData.begin(), bufData.end(), 0);
    215 
    216 		getInputs((int)numValues, (int)inVarInfo.arrayStride, &bufData[0] + inVarInfo.offset);
    217 
    218 		if (m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE)
    219 		{
    220 			for (deUint32 valNdx = 0; valNdx < product(m_numWorkGroups); valNdx++)
    221 				*(deUint32*)(&bufData[0] + groupVarInfo.offset + groupVarInfo.arrayStride*valNdx) = m_initialValue;
    222 		}
    223 
    224 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inoutBuffer);
    225 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, &bufData[0], GL_STATIC_READ);
    226 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *inoutBuffer);
    227 		GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
    228 	}
    229 
    230 	gl.dispatchCompute(m_numWorkGroups.x(), m_numWorkGroups.y(), m_numWorkGroups.z());
    231 
    232 	// Read back and compare
    233 	{
    234 		const void*		resPtr		= gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, blockInfo.dataSize, GL_MAP_READ_BIT);
    235 		bool			isOk		= true;
    236 
    237 		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
    238 		TCU_CHECK(resPtr);
    239 
    240 		isOk = verify((int)numValues,
    241 					  (int)inVarInfo.arrayStride, (const deUint8*)resPtr + inVarInfo.offset,
    242 					  (int)outVarInfo.arrayStride, (const deUint8*)resPtr + outVarInfo.offset,
    243 					  (int)groupVarInfo.arrayStride, (const deUint8*)resPtr + groupVarInfo.offset);
    244 
    245 		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
    246 		GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
    247 
    248 		m_testCtx.setTestResult(isOk ? QP_TEST_RESULT_PASS	: QP_TEST_RESULT_FAIL,
    249 								isOk ? "Pass"				: "Comparison failed");
    250 	}
    251 
    252 	return STOP;
    253 }
    254 
    255 class ShaderAtomicAddCase : public ShaderAtomicOpCase
    256 {
    257 public:
    258 	ShaderAtomicAddCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    259 		: ShaderAtomicOpCase(context, name, "atomicAdd", operandType, type, precision, UVec3(3,2,1))
    260 	{
    261 		m_initialValue = 1;
    262 	}
    263 
    264 protected:
    265 	void getInputs (int numValues, int stride, void* inputs) const
    266 	{
    267 		de::Random	rnd			(deStringHash(getName()));
    268 		const int	maxVal		= m_precision == PRECISION_LOWP ? 2 : 32;
    269 		const int	minVal		= 1;
    270 
    271 		// \todo [2013-09-04 pyry] Negative values!
    272 
    273 		for (int valNdx = 0; valNdx < numValues; valNdx++)
    274 			*(int*)((deUint8*)inputs + stride*valNdx) = rnd.getInt(minVal, maxVal);
    275 	}
    276 
    277 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
    278 	{
    279 		const int	workGroupSize	= (int)product(m_workGroupSize);
    280 		const int	numWorkGroups	= numValues/workGroupSize;
    281 
    282 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    283 		{
    284 			const int	groupOffset		= groupNdx*workGroupSize;
    285 			const int	groupOutput		= *(const deInt32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
    286 			set<int>	outValues;
    287 			bool		maxFound		= false;
    288 			int			valueSum		= (int)m_initialValue;
    289 
    290 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    291 			{
    292 				const int inputValue = *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
    293 				valueSum += inputValue;
    294 			}
    295 
    296 			if (groupOutput != valueSum)
    297 			{
    298 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected sum " << valueSum << ", got " << groupOutput << TestLog::EndMessage;
    299 				return false;
    300 			}
    301 
    302 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    303 			{
    304 				const int	inputValue		= *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
    305 				const int	outputValue		= *(const deInt32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
    306 
    307 				if (!de::inRange(outputValue, (int)m_initialValue, valueSum-inputValue))
    308 				{
    309 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    310 														   << ": expected value in range [" << m_initialValue << ", " << (valueSum-inputValue)
    311 														   << "], got " << outputValue
    312 									   << TestLog::EndMessage;
    313 					return false;
    314 				}
    315 
    316 				if (outValues.find(outputValue) != outValues.end())
    317 				{
    318 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    319 														   << ": found duplicate value " << outputValue
    320 									   << TestLog::EndMessage;
    321 					return false;
    322 				}
    323 
    324 				outValues.insert(outputValue);
    325 				if (outputValue == valueSum-inputValue)
    326 					maxFound = true;
    327 			}
    328 
    329 			if (!maxFound)
    330 			{
    331 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find maximum expected value from group " << groupNdx << TestLog::EndMessage;
    332 				return false;
    333 			}
    334 
    335 			if (outValues.find((int)m_initialValue) == outValues.end())
    336 			{
    337 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx << TestLog::EndMessage;
    338 				return false;
    339 			}
    340 		}
    341 
    342 		return true;
    343 	}
    344 };
    345 
    346 
    347 static int getPrecisionNumIntegerBits (glu::Precision precision)
    348 {
    349 	switch (precision)
    350 	{
    351 		case glu::PRECISION_HIGHP:		return 32;
    352 		case glu::PRECISION_MEDIUMP:	return 16;
    353 		case glu::PRECISION_LOWP:		return 9;
    354 		default:
    355 			DE_ASSERT(false);
    356 			return 0;
    357 	}
    358 }
    359 
    360 static deUint32 getPrecisionMask (int numPreciseBits)
    361 {
    362 	// \note: bit shift with larger or equal than var length is undefined, use 64 bit ints
    363 	return (deUint32)((((deUint64)1u) << numPreciseBits) - 1) ;
    364 }
    365 
    366 static bool intEqualsAfterUintCast (deInt32 value, deUint32 casted, glu::Precision precision)
    367 {
    368 	// Bit format of 'casted' = [ uint -> highp uint promotion bits (0) ] [ sign extend bits (s) ] [ value bits ]
    369 	//                                                                                             |--min len---|
    370 	//                                                                    |---------------signed length---------|
    371 	//                          |-------------------------------- highp uint length ----------------------------|
    372 
    373 	const deUint32	reference		= (deUint32)value;
    374 	const int		signBitOn		= value < 0;
    375 	const int		numPreciseBits	= getPrecisionNumIntegerBits(precision);
    376 	const deUint32	preciseMask		= getPrecisionMask(numPreciseBits);
    377 
    378 	// Lowest N bits must match, N = minimum precision
    379 	if ((reference & preciseMask) != (casted & preciseMask))
    380 		return false;
    381 
    382 	// Other lowest bits must match the sign and the remaining (topmost) if any must be 0
    383 	for (int signedIntegerLength = numPreciseBits; signedIntegerLength <= 32; ++signedIntegerLength)
    384 	{
    385 		const deUint32 signBits = (signBitOn) ? (getPrecisionMask(signedIntegerLength)) : (0u);
    386 
    387 		if ((signBits & ~preciseMask) == (casted & ~preciseMask))
    388 			return true;
    389 	}
    390 	return false;
    391 }
    392 
    393 static bool containsAfterUintCast (const std::set<deInt32>& haystack, deUint32 needle, glu::Precision precision)
    394 {
    395 	for (std::set<deInt32>::const_iterator it = haystack.begin(); it != haystack.end(); ++it)
    396 		if (intEqualsAfterUintCast(*it, needle, precision))
    397 			return true;
    398 	return false;
    399 }
    400 
    401 static bool containsAfterUintCast (const std::set<deUint32>& haystack, deInt32 needle, glu::Precision precision)
    402 {
    403 	for (std::set<deUint32>::const_iterator it = haystack.begin(); it != haystack.end(); ++it)
    404 		if (intEqualsAfterUintCast(needle, *it, precision))
    405 			return true;
    406 	return false;
    407 }
    408 
    409 class ShaderAtomicMinCase : public ShaderAtomicOpCase
    410 {
    411 public:
    412 	ShaderAtomicMinCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    413 		: ShaderAtomicOpCase(context, name, "atomicMin", operandType, type, precision, UVec3(3,2,1))
    414 	{
    415 		m_initialValue = m_precision == PRECISION_LOWP ? 100 : 1000;
    416 	}
    417 
    418 protected:
    419 	void getInputs (int numValues, int stride, void* inputs) const
    420 	{
    421 		de::Random	rnd			(deStringHash(getName()));
    422 		const bool	isSigned	= m_type == TYPE_INT;
    423 		const int	maxVal		= m_precision == PRECISION_LOWP ? 100 : 1000;
    424 		const int	minVal		= isSigned ? -maxVal : 0;
    425 
    426 		for (int valNdx = 0; valNdx < numValues; valNdx++)
    427 			*(int*)((deUint8*)inputs + stride*valNdx) = rnd.getInt(minVal, maxVal);
    428 	}
    429 
    430 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
    431 	{
    432 		const int	workGroupSize	= (int)product(m_workGroupSize);
    433 		const int	numWorkGroups	= numValues/workGroupSize;
    434 		bool		anyError		= false;
    435 
    436 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    437 		{
    438 			const int		groupOffset		= groupNdx*workGroupSize;
    439 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
    440 			set<deInt32>	inValues;
    441 			set<deUint32>	outValues;
    442 			int				minValue		= (int)m_initialValue;
    443 
    444 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    445 			{
    446 				const deInt32 inputValue = *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
    447 				inValues.insert(inputValue);
    448 				minValue = de::min(inputValue, minValue);
    449 			}
    450 
    451 			if (!intEqualsAfterUintCast(minValue, groupOutput, m_precision))
    452 			{
    453 				m_testCtx.getLog()
    454 					<< TestLog::Message
    455 					<< "ERROR: at group " << groupNdx
    456 					<< ": expected minimum " << minValue << " (" << tcu::Format::Hex<8>((deUint32)minValue) << ")"
    457 					<< ", got " << groupOutput << " (" << tcu::Format::Hex<8>(groupOutput) << ")"
    458 					<< TestLog::EndMessage;
    459 				anyError = true;
    460 			}
    461 
    462 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    463 			{
    464 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
    465 
    466 				if (!containsAfterUintCast(inValues, outputValue, m_precision) &&
    467 					!intEqualsAfterUintCast((deInt32)m_initialValue, outputValue, m_precision))
    468 				{
    469 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    470 														   << ": found unexpected value " << outputValue
    471 														   << " (" << tcu::Format::Hex<8>(outputValue) << ")"
    472 									   << TestLog::EndMessage;
    473 					anyError = true;
    474 				}
    475 
    476 				outValues.insert(outputValue);
    477 			}
    478 
    479 			if (!containsAfterUintCast(outValues, (int)m_initialValue, m_precision))
    480 			{
    481 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx << TestLog::EndMessage;
    482 				anyError = true;
    483 			}
    484 		}
    485 
    486 		return !anyError;
    487 	}
    488 };
    489 
    490 class ShaderAtomicMaxCase : public ShaderAtomicOpCase
    491 {
    492 public:
    493 	ShaderAtomicMaxCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    494 		: ShaderAtomicOpCase(context, name, "atomicMax", operandType, type, precision, UVec3(3,2,1))
    495 	{
    496 		const bool isSigned = m_type == TYPE_INT;
    497 		m_initialValue = isSigned ? (m_precision == PRECISION_LOWP ? -100 : -1000) : 0;
    498 	}
    499 
    500 protected:
    501 	void getInputs (int numValues, int stride, void* inputs) const
    502 	{
    503 		de::Random	rnd			(deStringHash(getName()));
    504 		const bool	isSigned	= m_type == TYPE_INT;
    505 		const int	maxVal		= m_precision == PRECISION_LOWP ? 100 : 1000;
    506 		const int	minVal		= isSigned ? -maxVal : 0;
    507 
    508 		for (int valNdx = 0; valNdx < numValues; valNdx++)
    509 			*(int*)((deUint8*)inputs + stride*valNdx) = rnd.getInt(minVal, maxVal);
    510 	}
    511 
    512 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
    513 	{
    514 		const int	workGroupSize	= (int)product(m_workGroupSize);
    515 		const int	numWorkGroups	= numValues/workGroupSize;
    516 		bool		anyError		= false;
    517 
    518 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    519 		{
    520 			const int		groupOffset		= groupNdx*workGroupSize;
    521 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
    522 			set<int>		inValues;
    523 			set<deUint32>	outValues;
    524 			int				maxValue		= (int)m_initialValue;
    525 
    526 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    527 			{
    528 				const deInt32 inputValue = *(const deInt32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
    529 				inValues.insert(inputValue);
    530 				maxValue = de::max(maxValue, inputValue);
    531 			}
    532 
    533 			if (!intEqualsAfterUintCast(maxValue, groupOutput, m_precision))
    534 			{
    535 				m_testCtx.getLog()
    536 					<< TestLog::Message
    537 					<< "ERROR: at group " << groupNdx
    538 					<< ": expected maximum " << maxValue << " (" << tcu::Format::Hex<8>((deUint32)maxValue) << ")"
    539 					<< ", got " << groupOutput << " (" << tcu::Format::Hex<8>(groupOutput) << ")"
    540 					<< TestLog::EndMessage;
    541 				anyError = true;
    542 			}
    543 
    544 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    545 			{
    546 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
    547 
    548 				if (!containsAfterUintCast(inValues, outputValue, m_precision) &&
    549 					!intEqualsAfterUintCast((deInt32)m_initialValue, outputValue, m_precision))
    550 				{
    551 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    552 														   << ": found unexpected value " << outputValue
    553 														   << " (" << tcu::Format::Hex<8>(outputValue) << ")"
    554 									   << TestLog::EndMessage;
    555 					anyError = true;
    556 				}
    557 
    558 				outValues.insert(outputValue);
    559 			}
    560 
    561 			if (!containsAfterUintCast(outValues, (int)m_initialValue, m_precision))
    562 			{
    563 				m_testCtx.getLog() << TestLog::Message << "ERROR: could not find initial value from group " << groupNdx << TestLog::EndMessage;
    564 				anyError = true;
    565 			}
    566 		}
    567 
    568 		return !anyError;
    569 	}
    570 };
    571 
    572 class ShaderAtomicAndCase : public ShaderAtomicOpCase
    573 {
    574 public:
    575 	ShaderAtomicAndCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    576 		: ShaderAtomicOpCase(context, name, "atomicAnd", operandType, type, precision, UVec3(3,2,1))
    577 	{
    578 		const int		numBits		= m_precision == PRECISION_HIGHP ? 32 :
    579 									  m_precision == PRECISION_MEDIUMP ? 16 : 8;
    580 		const deUint32	valueMask	= numBits == 32 ? ~0u : (1u<<numBits)-1u;
    581 		m_initialValue = ~((1u<<(numBits-1u)) | 1u) & valueMask; // All bits except lowest and highest set.
    582 	}
    583 
    584 protected:
    585 	void getInputs (int numValues, int stride, void* inputs) const
    586 	{
    587 		de::Random		rnd				(deStringHash(getName()));
    588 		const int		workGroupSize	= (int)product(m_workGroupSize);
    589 		const int		numWorkGroups	= numValues/workGroupSize;
    590 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
    591 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
    592 		const deUint32	valueMask		= numBits == 32 ? ~0u : (1u<<numBits)-1u;
    593 
    594 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    595 		{
    596 			const int		groupOffset		= groupNdx*workGroupSize;
    597 			const deUint32	groupMask		= 1<<rnd.getInt(0, numBits-2); // One bit is always set.
    598 
    599 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    600 				*(deUint32*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = (rnd.getUint32() & valueMask) | groupMask;
    601 		}
    602 	}
    603 
    604 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
    605 	{
    606 		const int		workGroupSize	= (int)product(m_workGroupSize);
    607 		const int		numWorkGroups	= numValues/workGroupSize;
    608 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
    609 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
    610 		const deUint32	compareMask		= (m_type == TYPE_UINT || numBits == 32) ? ~0u : (1u<<numBits)-1u;
    611 
    612 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    613 		{
    614 			const int		groupOffset		= groupNdx*workGroupSize;
    615 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
    616 			deUint32		expectedValue	= m_initialValue;
    617 
    618 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    619 			{
    620 				const deUint32 inputValue = *(const deUint32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
    621 				expectedValue &= inputValue;
    622 			}
    623 
    624 			if ((groupOutput & compareMask) != (expectedValue & compareMask))
    625 			{
    626 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected " << tcu::toHex(expectedValue) << ", got " << tcu::toHex(groupOutput) << TestLog::EndMessage;
    627 				return false;
    628 			}
    629 
    630 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    631 			{
    632 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
    633 
    634 				if ((compareMask & (outputValue & ~m_initialValue)) != 0)
    635 				{
    636 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    637 														   << ": found unexpected value " << tcu::toHex(outputValue)
    638 									   << TestLog::EndMessage;
    639 					return false;
    640 				}
    641 			}
    642 		}
    643 
    644 		return true;
    645 	}
    646 };
    647 
    648 class ShaderAtomicOrCase : public ShaderAtomicOpCase
    649 {
    650 public:
    651 	ShaderAtomicOrCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    652 		: ShaderAtomicOpCase(context, name, "atomicOr", operandType, type, precision, UVec3(3,2,1))
    653 	{
    654 		m_initialValue = 1u; // Lowest bit set.
    655 	}
    656 
    657 protected:
    658 	void getInputs (int numValues, int stride, void* inputs) const
    659 	{
    660 		de::Random		rnd				(deStringHash(getName()));
    661 		const int		workGroupSize	= (int)product(m_workGroupSize);
    662 		const int		numWorkGroups	= numValues/workGroupSize;
    663 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
    664 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
    665 
    666 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    667 		{
    668 			const int groupOffset = groupNdx*workGroupSize;
    669 
    670 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    671 				*(deUint32*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = 1u<<rnd.getInt(0, numBits-1);
    672 		}
    673 	}
    674 
    675 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
    676 	{
    677 		const int		workGroupSize	= (int)product(m_workGroupSize);
    678 		const int		numWorkGroups	= numValues/workGroupSize;
    679 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
    680 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
    681 		const deUint32	compareMask		= (m_type == TYPE_UINT || numBits == 32) ? ~0u : (1u<<numBits)-1u;
    682 
    683 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    684 		{
    685 			const int		groupOffset		= groupNdx*workGroupSize;
    686 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
    687 			deUint32		expectedValue	= m_initialValue;
    688 
    689 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    690 			{
    691 				const deUint32 inputValue = *(const deUint32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
    692 				expectedValue |= inputValue;
    693 			}
    694 
    695 			if ((groupOutput & compareMask) != (expectedValue & compareMask))
    696 			{
    697 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected " << tcu::toHex(expectedValue) << ", got " << tcu::toHex(groupOutput) << TestLog::EndMessage;
    698 				return false;
    699 			}
    700 
    701 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    702 			{
    703 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
    704 
    705 				if ((compareMask & (outputValue & m_initialValue)) == 0)
    706 				{
    707 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    708 														   << ": found unexpected value " << tcu::toHex(outputValue)
    709 									   << TestLog::EndMessage;
    710 					return false;
    711 				}
    712 			}
    713 		}
    714 
    715 		return true;
    716 	}
    717 };
    718 
    719 class ShaderAtomicXorCase : public ShaderAtomicOpCase
    720 {
    721 public:
    722 	ShaderAtomicXorCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    723 		: ShaderAtomicOpCase(context, name, "atomicXor", operandType, type, precision, UVec3(3,2,1))
    724 	{
    725 		m_initialValue = 0;
    726 	}
    727 
    728 protected:
    729 	void getInputs (int numValues, int stride, void* inputs) const
    730 	{
    731 		de::Random		rnd				(deStringHash(getName()));
    732 		const int		workGroupSize	= (int)product(m_workGroupSize);
    733 		const int		numWorkGroups	= numValues/workGroupSize;
    734 
    735 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    736 		{
    737 			const int groupOffset = groupNdx*workGroupSize;
    738 
    739 			// First uses random bit-pattern.
    740 			*(deUint32*)((deUint8*)inputs + stride*(groupOffset)) = rnd.getUint32();
    741 
    742 			// Rest have either all or no bits set.
    743 			for (int localNdx = 1; localNdx < workGroupSize; localNdx++)
    744 				*(deUint32*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = rnd.getBool() ? ~0u : 0u;
    745 		}
    746 	}
    747 
    748 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
    749 	{
    750 		const int		workGroupSize	= (int)product(m_workGroupSize);
    751 		const int		numWorkGroups	= numValues/workGroupSize;
    752 		const int		numBits			= m_precision == PRECISION_HIGHP ? 32 :
    753 										  m_precision == PRECISION_MEDIUMP ? 16 : 8;
    754 		const deUint32	compareMask		= numBits == 32 ? ~0u : (1u<<numBits)-1u;
    755 
    756 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    757 		{
    758 			const int		groupOffset		= groupNdx*workGroupSize;
    759 			const deUint32	groupOutput		= *(const deUint32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
    760 			const deUint32	randomValue		= *(const deInt32*)((const deUint8*)inputs + inputStride*groupOffset);
    761 			const deUint32	expected0		= randomValue ^ 0u;
    762 			const deUint32	expected1		= randomValue ^ ~0u;
    763 			int				numXorZeros		= (m_initialValue == 0) ? 1 : 0;
    764 
    765 			for (int localNdx = 1; localNdx < workGroupSize; localNdx++)
    766 			{
    767 				const deUint32 inputValue = *(const deUint32*)((const deUint8*)inputs + inputStride*(groupOffset+localNdx));
    768 				if (inputValue == 0)
    769 					numXorZeros += 1;
    770 			}
    771 
    772 			const deUint32 expected = (numXorZeros%2 == 0) ? expected0 : expected1;
    773 
    774 			if ((groupOutput & compareMask) != (expected & compareMask))
    775 			{
    776 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected " << tcu::toHex(expected0)
    777 													   << " or " << tcu::toHex(expected1) << " (compare mask " << tcu::toHex(compareMask)
    778 													   << "), got " << tcu::toHex(groupOutput) << TestLog::EndMessage;
    779 				return false;
    780 			}
    781 
    782 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    783 			{
    784 				const deUint32 outputValue = *(const deUint32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
    785 
    786 				if ((outputValue & compareMask) != 0 &&
    787 					(outputValue & compareMask) != compareMask &&
    788 					(outputValue & compareMask) != (expected0&compareMask) &&
    789 					(outputValue & compareMask) != (expected1&compareMask))
    790 				{
    791 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    792 														   << ": found unexpected value " << tcu::toHex(outputValue)
    793 									   << TestLog::EndMessage;
    794 					return false;
    795 				}
    796 			}
    797 		}
    798 
    799 		return true;
    800 	}
    801 };
    802 
    803 class ShaderAtomicExchangeCase : public ShaderAtomicOpCase
    804 {
    805 public:
    806 	ShaderAtomicExchangeCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    807 		: ShaderAtomicOpCase(context, name, "atomicExchange", operandType, type, precision, UVec3(3,2,1))
    808 	{
    809 		m_initialValue = 0;
    810 	}
    811 
    812 protected:
    813 	void getInputs (int numValues, int stride, void* inputs) const
    814 	{
    815 		const int	workGroupSize	= (int)product(m_workGroupSize);
    816 		const int	numWorkGroups	= numValues/workGroupSize;
    817 
    818 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    819 		{
    820 			const int groupOffset = groupNdx*workGroupSize;
    821 
    822 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    823 				*(int*)((deUint8*)inputs + stride*(groupOffset+localNdx)) = localNdx+1;
    824 		}
    825 	}
    826 
    827 	bool verify (int numValues, int inputStride, const void* inputs, int outputStride, const void* outputs, int groupStride, const void* groupOutputs) const
    828 	{
    829 		const int	workGroupSize	= (int)product(m_workGroupSize);
    830 		const int	numWorkGroups	= numValues/workGroupSize;
    831 
    832 		DE_UNREF(inputStride && inputs);
    833 
    834 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
    835 		{
    836 			const int	groupOffset		= groupNdx*workGroupSize;
    837 			const int	groupOutput		= *(const deInt32*)((const deUint8*)groupOutputs + groupNdx*groupStride);
    838 			set<int>	usedValues;
    839 
    840 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
    841 			{
    842 				const int outputValue = *(const deInt32*)((const deUint8*)outputs + outputStride*(groupOffset+localNdx));
    843 
    844 				if (!de::inRange(outputValue, 0, workGroupSize) || usedValues.find(outputValue) != usedValues.end())
    845 				{
    846 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
    847 														   << ": found unexpected value " << outputValue
    848 									   << TestLog::EndMessage;
    849 					return false;
    850 				}
    851 				usedValues.insert(outputValue);
    852 			}
    853 
    854 			if (!de::inRange(groupOutput, 0, workGroupSize) || usedValues.find(groupOutput) != usedValues.end())
    855 			{
    856 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": unexpected final value" << groupOutput << TestLog::EndMessage;
    857 				return false;
    858 			}
    859 		}
    860 
    861 		return true;
    862 	}
    863 };
    864 
    865 class ShaderAtomicCompSwapCase : public TestCase
    866 {
    867 public:
    868 									ShaderAtomicCompSwapCase	(Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision);
    869 									~ShaderAtomicCompSwapCase	(void);
    870 
    871 	void							init						(void);
    872 	void							deinit						(void);
    873 	IterateResult					iterate						(void);
    874 
    875 protected:
    876 
    877 private:
    878 									ShaderAtomicCompSwapCase	(const ShaderAtomicCompSwapCase& other);
    879 	ShaderAtomicCompSwapCase&		operator=					(const ShaderAtomicCompSwapCase& other);
    880 
    881 	const AtomicOperandType			m_operandType;
    882 	const DataType					m_type;
    883 	const Precision					m_precision;
    884 
    885 	const UVec3						m_workGroupSize;
    886 	const UVec3						m_numWorkGroups;
    887 
    888 	ShaderProgram*					m_program;
    889 };
    890 
    891 ShaderAtomicCompSwapCase::ShaderAtomicCompSwapCase (Context& context, const char* name, AtomicOperandType operandType, DataType type, Precision precision)
    892 	: TestCase			(context, name, "atomicCompSwap() Test")
    893 	, m_operandType		(operandType)
    894 	, m_type			(type)
    895 	, m_precision		(precision)
    896 	, m_workGroupSize	(3,2,1)
    897 	, m_numWorkGroups	(4,4,4)
    898 	, m_program			(DE_NULL)
    899 {
    900 }
    901 
    902 ShaderAtomicCompSwapCase::~ShaderAtomicCompSwapCase (void)
    903 {
    904 	ShaderAtomicCompSwapCase::deinit();
    905 }
    906 
    907 void ShaderAtomicCompSwapCase::init (void)
    908 {
    909 	const bool			isSSBO		= m_operandType == ATOMIC_OPERAND_BUFFER_VARIABLE;
    910 	const char*			precName	= getPrecisionName(m_precision);
    911 	const char*			typeName	= getDataTypeName(m_type);
    912 	const deUint32		numValues	= product(m_workGroupSize)*product(m_numWorkGroups);
    913 	std::ostringstream	src;
    914 
    915 	src << "#version 310 es\n"
    916 		<< "layout(local_size_x = " << m_workGroupSize.x()
    917 		<< ", local_size_y = " << m_workGroupSize.y()
    918 		<< ", local_size_z = " << m_workGroupSize.z() << ") in;\n"
    919 		<< "layout(binding = 0) buffer InOut\n"
    920 		<< "{\n"
    921 		<< "	" << precName << " " << typeName << " compareValues[" << numValues << "];\n"
    922 		<< "	" << precName << " " << typeName << " exchangeValues[" << numValues << "];\n"
    923 		<< "	" << precName << " " << typeName << " outputValues[" << numValues << "];\n"
    924 		<< "	" << (isSSBO ? "coherent " : "") << precName << " " << typeName << " groupValues[" << product(m_numWorkGroups) << "];\n"
    925 		<< "} sb_inout;\n";
    926 
    927 	if (!isSSBO)
    928 		src << "shared " << precName << " " << typeName << " s_var;\n";
    929 
    930 	src << "\n"
    931 		<< "void main (void)\n"
    932 		<< "{\n"
    933 		<< "	uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
    934 		<< "	uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    935 		<< "	uint globalOffs = localSize*globalNdx;\n"
    936 		<< "	uint offset     = globalOffs + gl_LocalInvocationIndex;\n"
    937 		<< "\n";
    938 
    939 	if (!isSSBO)
    940 	{
    941 		src << "	if (gl_LocalInvocationIndex == 0u)\n"
    942 			<< "		s_var = " << typeName << "(" << 0 << ");\n"
    943 			<< "\n";
    944 	}
    945 
    946 	src << "	" << precName << " " << typeName << " compare = sb_inout.compareValues[offset];\n"
    947 		<< "	" << precName << " " << typeName << " exchange = sb_inout.exchangeValues[offset];\n"
    948 		<< "	" << precName << " " << typeName << " result;\n"
    949 		<< "	bool swapDone = false;\n"
    950 		<< "\n"
    951 		<< "	for (uint ndx = 0u; ndx < localSize; ndx++)\n"
    952 		<< "	{\n"
    953 		<< "		barrier();\n"
    954 		<< "		if (!swapDone)\n"
    955 		<< "		{\n"
    956 		<< "			result = atomicCompSwap(" << (isSSBO ? "sb_inout.groupValues[globalNdx]" : "s_var") << ", compare, exchange);\n"
    957 		<< "			if (result == compare)\n"
    958 		<< "				swapDone = true;\n"
    959 		<< "		}\n"
    960 		<< "	}\n"
    961 		<< "\n"
    962 		<< "	sb_inout.outputValues[offset] = result;\n";
    963 
    964 	if (!isSSBO)
    965 	{
    966 		src << "	barrier();\n"
    967 			<< "	if (gl_LocalInvocationIndex == 0u)\n"
    968 			<< "		sb_inout.groupValues[globalNdx] = s_var;\n";
    969 	}
    970 
    971 	src << "}\n";
    972 
    973 	DE_ASSERT(!m_program);
    974 	m_program = new ShaderProgram(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
    975 
    976 	m_testCtx.getLog() << *m_program;
    977 
    978 	if (!m_program->isOk())
    979 	{
    980 		delete m_program;
    981 		m_program = DE_NULL;
    982 		throw tcu::TestError("Compile failed");
    983 	}
    984 }
    985 
    986 void ShaderAtomicCompSwapCase::deinit (void)
    987 {
    988 	delete m_program;
    989 	m_program = DE_NULL;
    990 }
    991 
    992 ShaderAtomicOpCase::IterateResult ShaderAtomicCompSwapCase::iterate (void)
    993 {
    994 	const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    995 	const deUint32				program			= m_program->getProgram();
    996 	const Buffer				inoutBuffer		(m_context.getRenderContext());
    997 	const deUint32				blockNdx		= gl.getProgramResourceIndex(program, GL_SHADER_STORAGE_BLOCK, "InOut");
    998 	const InterfaceBlockInfo	blockInfo		= getProgramInterfaceBlockInfo(gl, program, GL_SHADER_STORAGE_BLOCK, blockNdx);
    999 	const deUint32				cmpVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.compareValues[0]");
   1000 	const InterfaceVariableInfo	cmpVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, cmpVarNdx);
   1001 	const deUint32				exhVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.exchangeValues[0]");
   1002 	const InterfaceVariableInfo	exhVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, exhVarNdx);
   1003 	const deUint32				outVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.outputValues[0]");
   1004 	const InterfaceVariableInfo	outVarInfo		= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, outVarNdx);
   1005 	const deUint32				groupVarNdx		= gl.getProgramResourceIndex(program, GL_BUFFER_VARIABLE, "InOut.groupValues[0]");
   1006 	const InterfaceVariableInfo	groupVarInfo	= getProgramInterfaceVariableInfo(gl, program, GL_BUFFER_VARIABLE, groupVarNdx);
   1007 	const deUint32				numValues		= product(m_workGroupSize)*product(m_numWorkGroups);
   1008 
   1009 	TCU_CHECK(cmpVarInfo.arraySize == numValues &&
   1010 			  exhVarInfo.arraySize == numValues &&
   1011 			  outVarInfo.arraySize == numValues &&
   1012 			  groupVarInfo.arraySize == product(m_numWorkGroups));
   1013 
   1014 	gl.useProgram(program);
   1015 
   1016 	// \todo [2013-09-05 pyry] Use randomized input values!
   1017 
   1018 	// Setup buffer.
   1019 	{
   1020 		const deUint32	workGroupSize	= product(m_workGroupSize);
   1021 		vector<deUint8>	bufData			(blockInfo.dataSize);
   1022 
   1023 		std::fill(bufData.begin(), bufData.end(), 0);
   1024 
   1025 		for (deUint32 ndx = 0; ndx < numValues; ndx++)
   1026 			*(deUint32*)(&bufData[0] + cmpVarInfo.offset + cmpVarInfo.arrayStride*ndx) = ndx%workGroupSize;
   1027 
   1028 		for (deUint32 ndx = 0; ndx < numValues; ndx++)
   1029 			*(deUint32*)(&bufData[0] + exhVarInfo.offset + exhVarInfo.arrayStride*ndx) = (ndx%workGroupSize)+1;
   1030 
   1031 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inoutBuffer);
   1032 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, &bufData[0], GL_STATIC_READ);
   1033 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *inoutBuffer);
   1034 		GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
   1035 	}
   1036 
   1037 	gl.dispatchCompute(m_numWorkGroups.x(), m_numWorkGroups.y(), m_numWorkGroups.z());
   1038 
   1039 	// Read back and compare
   1040 	{
   1041 		const void*		resPtr			= gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, blockInfo.dataSize, GL_MAP_READ_BIT);
   1042 		const int		numWorkGroups	= (int)product(m_numWorkGroups);
   1043 		const int		workGroupSize	= (int)product(m_workGroupSize);
   1044 		bool			isOk			= true;
   1045 
   1046 		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
   1047 		TCU_CHECK(resPtr);
   1048 
   1049 		for (int groupNdx = 0; groupNdx < numWorkGroups; groupNdx++)
   1050 		{
   1051 			const int	groupOffset		= groupNdx*workGroupSize;
   1052 			const int	groupOutput		= *(const deInt32*)((const deUint8*)resPtr + groupVarInfo.offset + groupNdx*groupVarInfo.arrayStride);
   1053 
   1054 			for (int localNdx = 0; localNdx < workGroupSize; localNdx++)
   1055 			{
   1056 				const int	refValue		= localNdx;
   1057 				const int	outputValue		= *(const deInt32*)((const deUint8*)resPtr + outVarInfo.offset + outVarInfo.arrayStride*(groupOffset+localNdx));
   1058 
   1059 				if (outputValue != refValue)
   1060 				{
   1061 					m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ", invocation " << localNdx
   1062 														   << ": expected " << refValue << ", got " << outputValue
   1063 									   << TestLog::EndMessage;
   1064 					isOk = false;
   1065 					break;
   1066 				}
   1067 			}
   1068 
   1069 			if (groupOutput != workGroupSize)
   1070 			{
   1071 				m_testCtx.getLog() << TestLog::Message << "ERROR: at group " << groupNdx << ": expected" << workGroupSize << ", got " << groupOutput << TestLog::EndMessage;
   1072 				isOk = false;
   1073 				break;
   1074 			}
   1075 		}
   1076 
   1077 		gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER);
   1078 		GLU_EXPECT_NO_ERROR(gl.getError(), "glUnmapBuffer()");
   1079 
   1080 		m_testCtx.setTestResult(isOk ? QP_TEST_RESULT_PASS	: QP_TEST_RESULT_FAIL,
   1081 								isOk ? "Pass"				: "Comparison failed");
   1082 	}
   1083 
   1084 	return STOP;
   1085 }
   1086 
   1087 ShaderAtomicOpTests::ShaderAtomicOpTests (Context& context, const char* name, AtomicOperandType operandType)
   1088 	: TestCaseGroup	(context, name, "Atomic Operation Tests")
   1089 	, m_operandType	(operandType)
   1090 {
   1091 }
   1092 
   1093 ShaderAtomicOpTests::~ShaderAtomicOpTests (void)
   1094 {
   1095 }
   1096 
   1097 template<typename T>
   1098 static tcu::TestCaseGroup* createAtomicOpGroup (Context& context, AtomicOperandType operandType, const char* groupName)
   1099 {
   1100 	tcu::TestCaseGroup *const group = new tcu::TestCaseGroup(context.getTestContext(), groupName, (string("Atomic ") + groupName).c_str());
   1101 	try
   1102 	{
   1103 		for (int precNdx = 0; precNdx < PRECISION_LAST; precNdx++)
   1104 		{
   1105 			for (int typeNdx = 0; typeNdx < 2; typeNdx++)
   1106 			{
   1107 				const Precision		precision		= Precision(precNdx);
   1108 				const DataType		type			= typeNdx > 0 ? TYPE_INT : TYPE_UINT;
   1109 				const string		caseName		= string(getPrecisionName(precision)) + "_" + getDataTypeName(type);
   1110 
   1111 				group->addChild(new T(context, caseName.c_str(), operandType, type, precision));
   1112 			}
   1113 		}
   1114 
   1115 		return group;
   1116 	}
   1117 	catch (...)
   1118 	{
   1119 		delete group;
   1120 		throw;
   1121 	}
   1122 }
   1123 
   1124 void ShaderAtomicOpTests::init (void)
   1125 {
   1126 	addChild(createAtomicOpGroup<ShaderAtomicAddCase>		(m_context, m_operandType, "add"));
   1127 	addChild(createAtomicOpGroup<ShaderAtomicMinCase>		(m_context, m_operandType, "min"));
   1128 	addChild(createAtomicOpGroup<ShaderAtomicMaxCase>		(m_context, m_operandType, "max"));
   1129 	addChild(createAtomicOpGroup<ShaderAtomicAndCase>		(m_context, m_operandType, "and"));
   1130 	addChild(createAtomicOpGroup<ShaderAtomicOrCase>		(m_context, m_operandType, "or"));
   1131 	addChild(createAtomicOpGroup<ShaderAtomicXorCase>		(m_context, m_operandType, "xor"));
   1132 	addChild(createAtomicOpGroup<ShaderAtomicExchangeCase>	(m_context, m_operandType, "exchange"));
   1133 	addChild(createAtomicOpGroup<ShaderAtomicCompSwapCase>	(m_context, m_operandType, "compswap"));
   1134 }
   1135 
   1136 } // Functional
   1137 } // gles31
   1138 } // deqp
   1139