Home | History | Annotate | Download | only in functional
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program OpenGL ES 3.1 Module
      3  * -------------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Basic Compute Shader Tests.
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "es31fBasicComputeShaderTests.hpp"
     25 #include "gluShaderProgram.hpp"
     26 #include "gluObjectWrapper.hpp"
     27 #include "gluRenderContext.hpp"
     28 #include "gluProgramInterfaceQuery.hpp"
     29 #include "gluContextInfo.hpp"
     30 #include "glwFunctions.hpp"
     31 #include "glwEnums.hpp"
     32 #include "tcuTestLog.hpp"
     33 #include "deRandom.hpp"
     34 #include "deStringUtil.hpp"
     35 #include "deMemory.h"
     36 
     37 namespace deqp
     38 {
     39 namespace gles31
     40 {
     41 namespace Functional
     42 {
     43 
     44 using std::string;
     45 using std::vector;
     46 using tcu::TestLog;
     47 using namespace glu;
     48 
     49 //! Utility for mapping buffers.
     50 class BufferMemMap
     51 {
     52 public:
     53 	BufferMemMap (const glw::Functions& gl, deUint32 target, int offset, int size, deUint32 access)
     54 		: m_gl		(gl)
     55 		, m_target	(target)
     56 		, m_ptr		(DE_NULL)
     57 	{
     58 		m_ptr = gl.mapBufferRange(target, offset, size, access);
     59 		GLU_EXPECT_NO_ERROR(gl.getError(), "glMapBufferRange()");
     60 		TCU_CHECK(m_ptr);
     61 	}
     62 
     63 	~BufferMemMap (void)
     64 	{
     65 		m_gl.unmapBuffer(m_target);
     66 	}
     67 
     68 	void*	getPtr		(void) const { return m_ptr; }
     69 	void*	operator*	(void) const { return m_ptr; }
     70 
     71 private:
     72 							BufferMemMap			(const BufferMemMap& other);
     73 	BufferMemMap&			operator=				(const BufferMemMap& other);
     74 
     75 	const glw::Functions&	m_gl;
     76 	const deUint32			m_target;
     77 	void*					m_ptr;
     78 };
     79 
     80 namespace
     81 {
     82 
     83 class EmptyComputeShaderCase : public TestCase
     84 {
     85 public:
     86 	EmptyComputeShaderCase (Context& context)
     87 		: TestCase(context, "empty", "Empty shader")
     88 	{
     89 	}
     90 
     91 	IterateResult iterate (void)
     92 	{
     93 		const ShaderProgram program(m_context.getRenderContext(),
     94 			ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE,
     95 				"#version 310 es\n"
     96 				"layout (local_size_x = 1) in;\n"
     97 				"void main (void) {}\n"
     98 				));
     99 
    100 		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
    101 
    102 		m_testCtx.getLog() << program;
    103 		if (!program.isOk())
    104 			TCU_FAIL("Compile failed");
    105 
    106 		gl.useProgram(program.getProgram());
    107 		gl.dispatchCompute(1, 1, 1);
    108 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    109 
    110 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    111 		return STOP;
    112 	}
    113 };
    114 
    115 class UBOToSSBOInvertCase : public TestCase
    116 {
    117 public:
    118 	UBOToSSBOInvertCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
    119 		: TestCase		(context, name, description)
    120 		, m_numValues	(numValues)
    121 		, m_localSize	(localSize)
    122 		, m_workSize	(workSize)
    123 	{
    124 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
    125 	}
    126 
    127 	IterateResult iterate (void)
    128 	{
    129 		std::ostringstream src;
    130 		src << "#version 310 es\n"
    131 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
    132 			<< "uniform Input {\n"
    133 			<< "    uint values[" << m_numValues << "];\n"
    134 			<< "} ub_in;\n"
    135 			<< "layout(binding = 1) buffer Output {\n"
    136 			<< "    uint values[" << m_numValues << "];\n"
    137 			<< "} sb_out;\n"
    138 			<< "void main (void) {\n"
    139 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
    140 			<< "    uint numValuesPerInv = uint(ub_in.values.length()) / (size.x*size.y*size.z);\n"
    141 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
    142 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
    143 			<< "\n"
    144 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
    145 			<< "        sb_out.values[offset + ndx] = ~ub_in.values[offset + ndx];\n"
    146 			<< "}\n";
    147 
    148 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    149 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
    150 		const Buffer				inputBuffer		(m_context.getRenderContext());
    151 		const Buffer				outputBuffer	(m_context.getRenderContext());
    152 		std::vector<deUint32>		inputValues		(m_numValues);
    153 
    154 		// Compute input values.
    155 		{
    156 			de::Random rnd(0x111223f);
    157 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
    158 				inputValues[ndx] = rnd.getUint32();
    159 		}
    160 
    161 		m_testCtx.getLog() << program;
    162 		if (!program.isOk())
    163 			TCU_FAIL("Compile failed");
    164 
    165 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    166 
    167 		gl.useProgram(program.getProgram());
    168 
    169 		// Input buffer setup
    170 		{
    171 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM_BLOCK, "Input");
    172 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_UNIFORM_BLOCK, blockIndex);
    173 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "Input.values");
    174 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_UNIFORM, valueIndex);
    175 
    176 			gl.bindBuffer(GL_UNIFORM_BUFFER, *inputBuffer);
    177 			gl.bufferData(GL_UNIFORM_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
    178 
    179 			{
    180 				const BufferMemMap bufMap(gl, GL_UNIFORM_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
    181 
    182 				for (deUint32 ndx = 0; ndx < de::min(valueInfo.arraySize, (deUint32)inputValues.size()); ndx++)
    183 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
    184 			}
    185 
    186 			gl.uniformBlockBinding(program.getProgram(), blockIndex, 0);
    187 			gl.bindBufferBase(GL_UNIFORM_BUFFER, 0, *inputBuffer);
    188 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
    189 		}
    190 
    191 		// Output buffer setup
    192 		{
    193 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    194 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    195 
    196 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
    197 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
    198 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer);
    199 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
    200 		}
    201 
    202 		// Dispatch compute workload
    203 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    204 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    205 
    206 		// Read back and compare
    207 		{
    208 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    209 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    210 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
    211 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    212 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
    213 
    214 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
    215 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
    216 			{
    217 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
    218 				const deUint32	ref		= ~inputValues[ndx];
    219 
    220 				if (res != ref)
    221 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
    222 			}
    223 		}
    224 
    225 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    226 		return STOP;
    227 	}
    228 
    229 private:
    230 	const int			m_numValues;
    231 	const tcu::IVec3	m_localSize;
    232 	const tcu::IVec3	m_workSize;
    233 };
    234 
    235 class CopyInvertSSBOCase : public TestCase
    236 {
    237 public:
    238 	CopyInvertSSBOCase (Context& context, const char* name, const char* description, int numValues, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
    239 		: TestCase		(context, name, description)
    240 		, m_numValues	(numValues)
    241 		, m_localSize	(localSize)
    242 		, m_workSize	(workSize)
    243 	{
    244 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
    245 	}
    246 
    247 	IterateResult iterate (void)
    248 	{
    249 		std::ostringstream src;
    250 		src << "#version 310 es\n"
    251 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
    252 			<< "layout(binding = 0) buffer Input {\n"
    253 			<< "    uint values[" << m_numValues << "];\n"
    254 			<< "} sb_in;\n"
    255 			<< "layout (binding = 1) buffer Output {\n"
    256 			<< "    uint values[" << m_numValues << "];\n"
    257 			<< "} sb_out;\n"
    258 			<< "void main (void) {\n"
    259 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
    260 			<< "    uint numValuesPerInv = uint(sb_in.values.length()) / (size.x*size.y*size.z);\n"
    261 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
    262 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
    263 			<< "\n"
    264 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
    265 			<< "        sb_out.values[offset + ndx] = ~sb_in.values[offset + ndx];\n"
    266 			<< "}\n";
    267 
    268 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    269 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
    270 		const Buffer				inputBuffer		(m_context.getRenderContext());
    271 		const Buffer				outputBuffer	(m_context.getRenderContext());
    272 		std::vector<deUint32>		inputValues		(m_numValues);
    273 
    274 		// Compute input values.
    275 		{
    276 			de::Random rnd(0x124fef);
    277 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
    278 				inputValues[ndx] = rnd.getUint32();
    279 		}
    280 
    281 		m_testCtx.getLog() << program;
    282 		if (!program.isOk())
    283 			TCU_FAIL("Compile failed");
    284 
    285 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    286 
    287 		gl.useProgram(program.getProgram());
    288 
    289 		// Input buffer setup
    290 		{
    291 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
    292 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
    293 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
    294 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    295 
    296 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
    297 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
    298 
    299 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
    300 
    301 			{
    302 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
    303 
    304 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
    305 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
    306 			}
    307 
    308 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
    309 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
    310 		}
    311 
    312 		// Output buffer setup
    313 		{
    314 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    315 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
    316 
    317 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
    318 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockInfo.dataSize, DE_NULL, GL_STREAM_READ);
    319 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *outputBuffer);
    320 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
    321 		}
    322 
    323 		// Dispatch compute workload
    324 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    325 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    326 
    327 		// Read back and compare
    328 		{
    329 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    330 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    331 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
    332 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    333 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
    334 
    335 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
    336 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
    337 			{
    338 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
    339 				const deUint32	ref		= ~inputValues[ndx];
    340 
    341 				if (res != ref)
    342 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
    343 			}
    344 		}
    345 
    346 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    347 		return STOP;
    348 	}
    349 
    350 private:
    351 	const int			m_numValues;
    352 	const tcu::IVec3	m_localSize;
    353 	const tcu::IVec3	m_workSize;
    354 };
    355 
    356 class InvertSSBOInPlaceCase : public TestCase
    357 {
    358 public:
    359 	InvertSSBOInPlaceCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
    360 		: TestCase		(context, name, description)
    361 		, m_numValues	(numValues)
    362 		, m_isSized		(isSized)
    363 		, m_localSize	(localSize)
    364 		, m_workSize	(workSize)
    365 	{
    366 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
    367 	}
    368 
    369 	IterateResult iterate (void)
    370 	{
    371 		std::ostringstream src;
    372 		src << "#version 310 es\n"
    373 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
    374 			<< "layout(binding = 0) buffer InOut {\n"
    375 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
    376 			<< "} sb_inout;\n"
    377 			<< "void main (void) {\n"
    378 			<< "    uvec3 size           = gl_NumWorkGroups * gl_WorkGroupSize;\n"
    379 			<< "    uint numValuesPerInv = uint(sb_inout.values.length()) / (size.x*size.y*size.z);\n"
    380 			<< "    uint groupNdx        = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
    381 			<< "    uint offset          = numValuesPerInv*groupNdx;\n"
    382 			<< "\n"
    383 			<< "    for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
    384 			<< "        sb_inout.values[offset + ndx] = ~sb_inout.values[offset + ndx];\n"
    385 			<< "}\n";
    386 
    387 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    388 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
    389 
    390 		m_testCtx.getLog() << program;
    391 		if (!program.isOk())
    392 			TCU_FAIL("Compile failed");
    393 
    394 		const Buffer				outputBuffer	(m_context.getRenderContext());
    395 		const deUint32				valueIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "InOut.values");
    396 		const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    397 		const deUint32				blockSize		= valueInfo.arrayStride*(deUint32)m_numValues;
    398 		std::vector<deUint32>		inputValues		(m_numValues);
    399 
    400 		// Compute input values.
    401 		{
    402 			de::Random rnd(0x82ce7f);
    403 			for (int ndx = 0; ndx < (int)inputValues.size(); ndx++)
    404 				inputValues[ndx] = rnd.getUint32();
    405 		}
    406 
    407 		TCU_CHECK(valueInfo.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
    408 
    409 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    410 
    411 		gl.useProgram(program.getProgram());
    412 
    413 		// Output buffer setup
    414 		{
    415 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
    416 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_DRAW);
    417 
    418 			{
    419 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockSize, GL_MAP_WRITE_BIT);
    420 
    421 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
    422 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
    423 			}
    424 
    425 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
    426 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
    427 		}
    428 
    429 		// Dispatch compute workload
    430 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    431 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    432 
    433 		// Read back and compare
    434 		{
    435 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
    436 
    437 			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
    438 			{
    439 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
    440 				const deUint32	ref		= ~inputValues[ndx];
    441 
    442 				if (res != ref)
    443 					throw tcu::TestError(string("Comparison failed for InOut.values[") + de::toString(ndx) + "]");
    444 			}
    445 		}
    446 
    447 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    448 		return STOP;
    449 	}
    450 
    451 private:
    452 	const int			m_numValues;
    453 	const bool			m_isSized;
    454 	const tcu::IVec3	m_localSize;
    455 	const tcu::IVec3	m_workSize;
    456 };
    457 
    458 class WriteToMultipleSSBOCase : public TestCase
    459 {
    460 public:
    461 	WriteToMultipleSSBOCase (Context& context, const char* name, const char* description, int numValues, bool isSized, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
    462 		: TestCase		(context, name, description)
    463 		, m_numValues	(numValues)
    464 		, m_isSized		(isSized)
    465 		, m_localSize	(localSize)
    466 		, m_workSize	(workSize)
    467 	{
    468 		DE_ASSERT(m_numValues % (m_workSize[0]*m_workSize[1]*m_workSize[2]*m_localSize[0]*m_localSize[1]*m_localSize[2]) == 0);
    469 	}
    470 
    471 	IterateResult iterate (void)
    472 	{
    473 		std::ostringstream src;
    474 		src << "#version 310 es\n"
    475 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
    476 			<< "layout(binding = 0) buffer Out0 {\n"
    477 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
    478 			<< "} sb_out0;\n"
    479 			<< "layout(binding = 1) buffer Out1 {\n"
    480 			<< "    uint values[" << (m_isSized ? de::toString(m_numValues) : string("")) << "];\n"
    481 			<< "} sb_out1;\n"
    482 			<< "void main (void) {\n"
    483 			<< "    uvec3 size      = gl_NumWorkGroups * gl_WorkGroupSize;\n"
    484 			<< "    uint groupNdx   = size.x*size.y*gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x;\n"
    485 			<< "\n"
    486 			<< "    {\n"
    487 			<< "        uint numValuesPerInv = uint(sb_out0.values.length()) / (size.x*size.y*size.z);\n"
    488 			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
    489 			<< "\n"
    490 			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
    491 			<< "            sb_out0.values[offset + ndx] = offset + ndx;\n"
    492 			<< "    }\n"
    493 			<< "    {\n"
    494 			<< "        uint numValuesPerInv = uint(sb_out1.values.length()) / (size.x*size.y*size.z);\n"
    495 			<< "        uint offset          = numValuesPerInv*groupNdx;\n"
    496 			<< "\n"
    497 			<< "        for (uint ndx = 0u; ndx < numValuesPerInv; ndx++)\n"
    498 			<< "            sb_out1.values[offset + ndx] = uint(sb_out1.values.length()) - offset - ndx;\n"
    499 			<< "    }\n"
    500 			<< "}\n";
    501 
    502 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    503 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
    504 
    505 		m_testCtx.getLog() << program;
    506 		if (!program.isOk())
    507 			TCU_FAIL("Compile failed");
    508 
    509 		const Buffer				outputBuffer0	(m_context.getRenderContext());
    510 		const deUint32				value0Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out0.values");
    511 		const InterfaceVariableInfo	value0Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value0Index);
    512 		const deUint32				block0Size		= value0Info.arrayStride*(deUint32)m_numValues;
    513 
    514 		const Buffer				outputBuffer1	(m_context.getRenderContext());
    515 		const deUint32				value1Index		= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Out1.values");
    516 		const InterfaceVariableInfo	value1Info		= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, value1Index);
    517 		const deUint32				block1Size		= value1Info.arrayStride*(deUint32)m_numValues;
    518 
    519 		TCU_CHECK(value0Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
    520 		TCU_CHECK(value1Info.arraySize == (deUint32)(m_isSized ? m_numValues : 0));
    521 
    522 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    523 
    524 		gl.useProgram(program.getProgram());
    525 
    526 		// Output buffer setup
    527 		{
    528 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
    529 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block0Size, DE_NULL, GL_STREAM_DRAW);
    530 
    531 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer0);
    532 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
    533 		}
    534 		{
    535 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
    536 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, block1Size, DE_NULL, GL_STREAM_DRAW);
    537 
    538 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *outputBuffer1);
    539 			GLU_EXPECT_NO_ERROR(gl.getError(), "Buffer setup failed");
    540 		}
    541 
    542 		// Dispatch compute workload
    543 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    544 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    545 
    546 		// Read back and compare
    547 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer0);
    548 		{
    549 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block0Size, GL_MAP_READ_BIT);
    550 
    551 			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
    552 			{
    553 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value0Info.offset + value0Info.arrayStride*ndx));
    554 				const deUint32	ref		= ndx;
    555 
    556 				if (res != ref)
    557 					throw tcu::TestError(string("Comparison failed for Out0.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
    558 			}
    559 		}
    560 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer1);
    561 		{
    562 			const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, block1Size, GL_MAP_READ_BIT);
    563 
    564 			for (deUint32 ndx = 0; ndx < (deUint32)m_numValues; ndx++)
    565 			{
    566 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + value1Info.offset + value1Info.arrayStride*ndx));
    567 				const deUint32	ref		= m_numValues - ndx;
    568 
    569 				if (res != ref)
    570 					throw tcu::TestError(string("Comparison failed for Out1.values[") + de::toString(ndx) + "] res=" + de::toString(res) + " ref=" + de::toString(ref));
    571 			}
    572 		}
    573 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    574 		return STOP;
    575 	}
    576 
    577 private:
    578 	const int			m_numValues;
    579 	const bool			m_isSized;
    580 	const tcu::IVec3	m_localSize;
    581 	const tcu::IVec3	m_workSize;
    582 };
    583 
    584 class SSBOLocalBarrierCase : public TestCase
    585 {
    586 public:
    587 	SSBOLocalBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
    588 		: TestCase		(context, name, description)
    589 		, m_localSize	(localSize)
    590 		, m_workSize	(workSize)
    591 	{
    592 	}
    593 
    594 	IterateResult iterate (void)
    595 	{
    596 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    597 		const Buffer				outputBuffer	(m_context.getRenderContext());
    598 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
    599 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
    600 		const int					numValues		= workGroupSize*workGroupCount;
    601 
    602 		std::ostringstream src;
    603 		src << "#version 310 es\n"
    604 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
    605 			<< "layout(binding = 0) buffer Output {\n"
    606 			<< "    coherent uint values[" << numValues << "];\n"
    607 			<< "} sb_out;\n\n"
    608 			<< "shared uint offsets[" << workGroupSize << "];\n\n"
    609 			<< "void main (void) {\n"
    610 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
    611 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    612 			<< "    uint globalOffs = localSize*globalNdx;\n"
    613 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
    614 			<< "\n"
    615 			<< "    sb_out.values[globalOffs + localOffs] = globalOffs;\n"
    616 			<< "    memoryBarrierBuffer();\n"
    617 			<< "    barrier();\n"
    618 			<< "    sb_out.values[globalOffs + ((localOffs+1u)%localSize)] += localOffs;\n"
    619 			<< "    memoryBarrierBuffer();\n"
    620 			<< "    barrier();\n"
    621 			<< "    sb_out.values[globalOffs + ((localOffs+2u)%localSize)] += localOffs;\n"
    622 			<< "}\n";
    623 
    624 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
    625 
    626 		m_testCtx.getLog() << program;
    627 		if (!program.isOk())
    628 			TCU_FAIL("Compile failed");
    629 
    630 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    631 
    632 		gl.useProgram(program.getProgram());
    633 
    634 		// Output buffer setup
    635 		{
    636 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    637 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    638 
    639 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
    640 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
    641 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
    642 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
    643 		}
    644 
    645 		// Dispatch compute workload
    646 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    647 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    648 
    649 		// Read back and compare
    650 		{
    651 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    652 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    653 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
    654 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    655 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
    656 
    657 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
    658 			{
    659 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
    660 				{
    661 					const int		globalOffs	= groupNdx*workGroupSize;
    662 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
    663 					const int		offs0		= localOffs-1 < 0 ? ((localOffs+workGroupSize-1)%workGroupSize) : ((localOffs-1)%workGroupSize);
    664 					const int		offs1		= localOffs-2 < 0 ? ((localOffs+workGroupSize-2)%workGroupSize) : ((localOffs-2)%workGroupSize);
    665 					const deUint32	ref			= (deUint32)(globalOffs + offs0 + offs1);
    666 
    667 					if (res != ref)
    668 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
    669 				}
    670 			}
    671 		}
    672 
    673 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    674 		return STOP;
    675 	}
    676 
    677 private:
    678 	const tcu::IVec3	m_localSize;
    679 	const tcu::IVec3	m_workSize;
    680 };
    681 
    682 class SSBOBarrierCase : public TestCase
    683 {
    684 public:
    685 	SSBOBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec3& workSize)
    686 		: TestCase		(context, name, description)
    687 		, m_workSize	(workSize)
    688 	{
    689 	}
    690 
    691 	IterateResult iterate (void)
    692 	{
    693 		const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() <<
    694 			ComputeSource("#version 310 es\n"
    695 						  "layout (local_size_x = 1) in;\n"
    696 						  "uniform uint u_baseVal;\n"
    697 						  "layout(binding = 1) buffer Output {\n"
    698 						  "    uint values[];\n"
    699 						  "};\n"
    700 						  "void main (void) {\n"
    701 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    702 						  "    values[offset] = u_baseVal+offset;\n"
    703 						  "}\n"));
    704 		const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() <<
    705 			ComputeSource("#version 310 es\n"
    706 						  "layout (local_size_x = 1) in;\n"
    707 						  "uniform uint u_baseVal;\n"
    708 						  "layout(binding = 1) buffer Input {\n"
    709 						  "    uint values[];\n"
    710 						  "};\n"
    711 						  "layout(binding = 0) buffer Output {\n"
    712 						  "    coherent uint sum;\n"
    713 						  "};\n"
    714 						  "void main (void) {\n"
    715 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    716 						  "    uint value  = values[offset];\n"
    717 						  "    atomicAdd(sum, value);\n"
    718 						  "}\n"));
    719 
    720 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    721 		const Buffer				tempBuffer		(m_context.getRenderContext());
    722 		const Buffer				outputBuffer	(m_context.getRenderContext());
    723 		const deUint32				baseValue		= 127;
    724 
    725 		m_testCtx.getLog() << program0 << program1;
    726 		if (!program0.isOk() || !program1.isOk())
    727 			TCU_FAIL("Compile failed");
    728 
    729 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    730 
    731 		// Temp buffer setup
    732 		{
    733 			const deUint32				valueIndex		= gl.getProgramResourceIndex(program0.getProgram(), GL_BUFFER_VARIABLE, "values[0]");
    734 			const InterfaceVariableInfo	valueInfo		= getProgramInterfaceVariableInfo(gl, program0.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    735 			const deUint32				bufferSize		= valueInfo.arrayStride*m_workSize[0]*m_workSize[1]*m_workSize[2];
    736 
    737 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *tempBuffer);
    738 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)bufferSize, DE_NULL, GL_STATIC_DRAW);
    739 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *tempBuffer);
    740 			GLU_EXPECT_NO_ERROR(gl.getError(), "Temp buffer setup failed");
    741 		}
    742 
    743 		// Output buffer setup
    744 		{
    745 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    746 			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    747 
    748 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
    749 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
    750 
    751 			{
    752 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
    753 				deMemset(bufMap.getPtr(), 0, blockSize);
    754 			}
    755 
    756 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
    757 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
    758 		}
    759 
    760 		// Dispatch compute workload
    761 		gl.useProgram(program0.getProgram());
    762 		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
    763 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    764 		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
    765 		gl.useProgram(program1.getProgram());
    766 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    767 		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
    768 
    769 		// Read back and compare
    770 		{
    771 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    772 			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    773 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
    774 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    775 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
    776 
    777 			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
    778 			deUint32					ref			= 0;
    779 
    780 			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]*m_workSize[2]; ndx++)
    781 				ref += baseValue + (deUint32)ndx;
    782 
    783 			if (res != ref)
    784 			{
    785 				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
    786 				throw tcu::TestError("Comparison failed");
    787 			}
    788 		}
    789 
    790 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    791 		return STOP;
    792 	}
    793 
    794 private:
    795 	const tcu::IVec3	m_workSize;
    796 };
    797 
    798 class BasicSharedVarCase : public TestCase
    799 {
    800 public:
    801 	BasicSharedVarCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
    802 		: TestCase		(context, name, description)
    803 		, m_localSize	(localSize)
    804 		, m_workSize	(workSize)
    805 	{
    806 	}
    807 
    808 	IterateResult iterate (void)
    809 	{
    810 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    811 		const Buffer				outputBuffer	(m_context.getRenderContext());
    812 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
    813 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
    814 		const int					numValues		= workGroupSize*workGroupCount;
    815 
    816 		std::ostringstream src;
    817 		src << "#version 310 es\n"
    818 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
    819 			<< "layout(binding = 0) buffer Output {\n"
    820 			<< "    uint values[" << numValues << "];\n"
    821 			<< "} sb_out;\n\n"
    822 			<< "shared uint offsets[" << workGroupSize << "];\n\n"
    823 			<< "void main (void) {\n"
    824 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
    825 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    826 			<< "    uint globalOffs = localSize*globalNdx;\n"
    827 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
    828 			<< "\n"
    829 			<< "    offsets[localSize-localOffs-1u] = globalOffs + localOffs*localOffs;\n"
    830 			<< "    barrier();\n"
    831 			<< "    sb_out.values[globalOffs + localOffs] = offsets[localOffs];\n"
    832 			<< "}\n";
    833 
    834 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
    835 
    836 		m_testCtx.getLog() << program;
    837 		if (!program.isOk())
    838 			TCU_FAIL("Compile failed");
    839 
    840 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    841 
    842 		gl.useProgram(program.getProgram());
    843 
    844 		// Output buffer setup
    845 		{
    846 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    847 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    848 
    849 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
    850 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
    851 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
    852 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
    853 		}
    854 
    855 		// Dispatch compute workload
    856 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    857 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    858 
    859 		// Read back and compare
    860 		{
    861 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    862 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    863 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
    864 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    865 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
    866 
    867 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
    868 			{
    869 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
    870 				{
    871 					const int		globalOffs	= groupNdx*workGroupSize;
    872 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
    873 					const deUint32	ref			= (deUint32)(globalOffs + (workGroupSize-localOffs-1)*(workGroupSize-localOffs-1));
    874 
    875 					if (res != ref)
    876 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
    877 				}
    878 			}
    879 		}
    880 
    881 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    882 		return STOP;
    883 	}
    884 
    885 private:
    886 	const tcu::IVec3	m_localSize;
    887 	const tcu::IVec3	m_workSize;
    888 };
    889 
    890 class SharedVarAtomicOpCase : public TestCase
    891 {
    892 public:
    893 	SharedVarAtomicOpCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
    894 		: TestCase		(context, name, description)
    895 		, m_localSize	(localSize)
    896 		, m_workSize	(workSize)
    897 	{
    898 	}
    899 
    900 	IterateResult iterate (void)
    901 	{
    902 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
    903 		const Buffer				outputBuffer	(m_context.getRenderContext());
    904 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
    905 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
    906 		const int					numValues		= workGroupSize*workGroupCount;
    907 
    908 		std::ostringstream src;
    909 		src << "#version 310 es\n"
    910 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
    911 			<< "layout(binding = 0) buffer Output {\n"
    912 			<< "    uint values[" << numValues << "];\n"
    913 			<< "} sb_out;\n\n"
    914 			<< "shared uint count;\n\n"
    915 			<< "void main (void) {\n"
    916 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
    917 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    918 			<< "    uint globalOffs = localSize*globalNdx;\n"
    919 			<< "\n"
    920 			<< "    count = 0u;\n"
    921 			<< "    barrier();\n"
    922 			<< "    uint oldVal = atomicAdd(count, 1u);\n"
    923 			<< "    sb_out.values[globalOffs+oldVal] = oldVal+1u;\n"
    924 			<< "}\n";
    925 
    926 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
    927 
    928 		m_testCtx.getLog() << program;
    929 		if (!program.isOk())
    930 			TCU_FAIL("Compile failed");
    931 
    932 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
    933 
    934 		gl.useProgram(program.getProgram());
    935 
    936 		// Output buffer setup
    937 		{
    938 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    939 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    940 
    941 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
    942 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
    943 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
    944 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
    945 		}
    946 
    947 		// Dispatch compute workload
    948 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
    949 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
    950 
    951 		// Read back and compare
    952 		{
    953 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
    954 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
    955 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
    956 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
    957 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
    958 
    959 			for (int groupNdx = 0; groupNdx < workGroupCount; groupNdx++)
    960 			{
    961 				for (int localOffs = 0; localOffs < workGroupSize; localOffs++)
    962 				{
    963 					const int		globalOffs	= groupNdx*workGroupSize;
    964 					const deUint32	res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*(globalOffs + localOffs)));
    965 					const deUint32	ref			= (deUint32)(localOffs+1);
    966 
    967 					if (res != ref)
    968 						throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(globalOffs + localOffs) + "]");
    969 				}
    970 			}
    971 		}
    972 
    973 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    974 		return STOP;
    975 	}
    976 
    977 private:
    978 	const tcu::IVec3	m_localSize;
    979 	const tcu::IVec3	m_workSize;
    980 };
    981 
    982 class CopyImageToSSBOCase : public TestCase
    983 {
    984 public:
    985 	CopyImageToSSBOCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
    986 		: TestCase		(context, name, description)
    987 		, m_localSize	(localSize)
    988 		, m_imageSize	(imageSize)
    989 	{
    990 		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
    991 		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
    992 	}
    993 
    994 	IterateResult iterate (void)
    995 	{
    996 
    997 		std::ostringstream src;
    998 		src << "#version 310 es\n"
    999 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
   1000 			<< "layout(r32ui, binding = 1) readonly uniform highp uimage2D u_srcImg;\n"
   1001 			<< "layout(binding = 0) buffer Output {\n"
   1002 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
   1003 			<< "} sb_out;\n\n"
   1004 			<< "void main (void) {\n"
   1005 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
   1006 			<< "    uint value  = imageLoad(u_srcImg, ivec2(gl_GlobalInvocationID.xy)).x;\n"
   1007 			<< "    sb_out.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x] = value;\n"
   1008 			<< "}\n";
   1009 
   1010 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
   1011 		const Buffer				outputBuffer	(m_context.getRenderContext());
   1012 		const Texture				inputTexture	(m_context.getRenderContext());
   1013 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
   1014 		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
   1015 		de::Random					rnd				(0xab2c7);
   1016 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
   1017 
   1018 		m_testCtx.getLog() << program;
   1019 		if (!program.isOk())
   1020 			TCU_FAIL("Compile failed");
   1021 
   1022 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
   1023 
   1024 		gl.useProgram(program.getProgram());
   1025 
   1026 		// Input values
   1027 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
   1028 			*i = rnd.getUint32();
   1029 
   1030 		// Input image setup
   1031 		gl.bindTexture(GL_TEXTURE_2D, *inputTexture);
   1032 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
   1033 		gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_imageSize[0], m_imageSize[1], GL_RED_INTEGER, GL_UNSIGNED_INT, &inputValues[0]);
   1034 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1035 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1036 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
   1037 
   1038 		// Bind to unit 1
   1039 		gl.bindImageTexture(1, *inputTexture, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
   1040 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
   1041 
   1042 		// Output buffer setup
   1043 		{
   1044 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
   1045 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
   1046 
   1047 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
   1048 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
   1049 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
   1050 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
   1051 		}
   1052 
   1053 		// Dispatch compute workload
   1054 		gl.dispatchCompute(workSize[0], workSize[1], 1);
   1055 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
   1056 
   1057 		// Read back and compare
   1058 		{
   1059 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
   1060 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
   1061 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
   1062 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
   1063 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
   1064 
   1065 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
   1066 
   1067 			for (deUint32 ndx = 0; ndx < valueInfo.arraySize; ndx++)
   1068 			{
   1069 				const deUint32	res		= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*ndx));
   1070 				const deUint32	ref		= inputValues[ndx];
   1071 
   1072 				if (res != ref)
   1073 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(ndx) + "]");
   1074 			}
   1075 		}
   1076 
   1077 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   1078 		return STOP;
   1079 	}
   1080 
   1081 private:
   1082 	const tcu::IVec2	m_localSize;
   1083 	const tcu::IVec2	m_imageSize;
   1084 };
   1085 
   1086 class CopySSBOToImageCase : public TestCase
   1087 {
   1088 public:
   1089 	CopySSBOToImageCase (Context& context, const char* name, const char* description, const tcu::IVec2& localSize, const tcu::IVec2& imageSize)
   1090 		: TestCase		(context, name, description)
   1091 		, m_localSize	(localSize)
   1092 		, m_imageSize	(imageSize)
   1093 	{
   1094 		DE_ASSERT(m_imageSize[0] % m_localSize[0] == 0);
   1095 		DE_ASSERT(m_imageSize[1] % m_localSize[1] == 0);
   1096 	}
   1097 
   1098 	IterateResult iterate (void)
   1099 	{
   1100 
   1101 		std::ostringstream src;
   1102 		src << "#version 310 es\n"
   1103 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ") in;\n"
   1104 			<< "layout(r32ui, binding = 1) writeonly uniform highp uimage2D u_dstImg;\n"
   1105 			<< "buffer Input {\n"
   1106 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]) << "];\n"
   1107 			<< "} sb_in;\n\n"
   1108 			<< "void main (void) {\n"
   1109 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
   1110 			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
   1111 			<< "    imageStore(u_dstImg, ivec2(gl_GlobalInvocationID.xy), uvec4(value, 0, 0, 0));\n"
   1112 			<< "}\n";
   1113 
   1114 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
   1115 		const Buffer				inputBuffer		(m_context.getRenderContext());
   1116 		const Texture				outputTexture	(m_context.getRenderContext());
   1117 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
   1118 		const tcu::IVec2			workSize		= m_imageSize / m_localSize;
   1119 		de::Random					rnd				(0x77238ac2);
   1120 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]);
   1121 
   1122 		m_testCtx.getLog() << program;
   1123 		if (!program.isOk())
   1124 			TCU_FAIL("Compile failed");
   1125 
   1126 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << workSize << TestLog::EndMessage;
   1127 
   1128 		gl.useProgram(program.getProgram());
   1129 
   1130 		// Input values
   1131 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
   1132 			*i = rnd.getUint32();
   1133 
   1134 		// Input buffer setup
   1135 		{
   1136 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
   1137 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
   1138 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
   1139 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
   1140 
   1141 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
   1142 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
   1143 
   1144 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
   1145 
   1146 			{
   1147 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
   1148 
   1149 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
   1150 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
   1151 			}
   1152 
   1153 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
   1154 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
   1155 		}
   1156 
   1157 		// Output image setup
   1158 		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
   1159 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
   1160 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1161 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1162 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
   1163 
   1164 		// Bind to unit 1
   1165 		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
   1166 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
   1167 
   1168 		// Dispatch compute workload
   1169 		gl.dispatchCompute(workSize[0], workSize[1], 1);
   1170 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
   1171 
   1172 		// Read back and compare
   1173 		{
   1174 			Framebuffer			fbo			(m_context.getRenderContext());
   1175 			vector<deUint32>	pixels		(inputValues.size()*4);
   1176 
   1177 			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
   1178 			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
   1179 			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
   1180 
   1181 			// \note In ES3 we have to use GL_RGBA_INTEGER
   1182 			gl.readBuffer(GL_COLOR_ATTACHMENT0);
   1183 			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
   1184 			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
   1185 
   1186 			for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
   1187 			{
   1188 				const deUint32	res		= pixels[ndx*4];
   1189 				const deUint32	ref		= inputValues[ndx];
   1190 
   1191 				if (res != ref)
   1192 					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(ndx));
   1193 			}
   1194 		}
   1195 
   1196 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   1197 		return STOP;
   1198 	}
   1199 
   1200 private:
   1201 	const tcu::IVec2	m_localSize;
   1202 	const tcu::IVec2	m_imageSize;
   1203 };
   1204 
   1205 class ImageAtomicOpCase : public TestCase
   1206 {
   1207 public:
   1208 	ImageAtomicOpCase (Context& context, const char* name, const char* description, int localSize, const tcu::IVec2& imageSize)
   1209 		: TestCase		(context, name, description)
   1210 		, m_localSize	(localSize)
   1211 		, m_imageSize	(imageSize)
   1212 	{
   1213 	}
   1214 
   1215 	void init (void)
   1216 	{
   1217 		if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
   1218 			throw tcu::NotSupportedError("Test requires OES_shader_image_atomic extension");
   1219 	}
   1220 
   1221 	IterateResult iterate (void)
   1222 	{
   1223 
   1224 		std::ostringstream src;
   1225 		src << "#version 310 es\n"
   1226 			<< "#extension GL_OES_shader_image_atomic : require\n"
   1227 			<< "layout (local_size_x = " << m_localSize << ") in;\n"
   1228 			<< "layout(r32ui, binding = 1) uniform highp uimage2D u_dstImg;\n"
   1229 			<< "buffer Input {\n"
   1230 			<< "    uint values[" << (m_imageSize[0]*m_imageSize[1]*m_localSize) << "];\n"
   1231 			<< "} sb_in;\n\n"
   1232 			<< "void main (void) {\n"
   1233 			<< "    uint stride = gl_NumWorkGroups.x*gl_WorkGroupSize.x;\n"
   1234 			<< "    uint value  = sb_in.values[gl_GlobalInvocationID.y*stride + gl_GlobalInvocationID.x];\n"
   1235 			<< "\n"
   1236 			<< "    if (gl_LocalInvocationIndex == 0u)\n"
   1237 			<< "        imageStore(u_dstImg, ivec2(gl_WorkGroupID.xy), uvec4(0));\n"
   1238 			<< "    barrier();\n"
   1239 			<< "    imageAtomicAdd(u_dstImg, ivec2(gl_WorkGroupID.xy), value);\n"
   1240 			<< "}\n";
   1241 
   1242 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
   1243 		const Buffer				inputBuffer		(m_context.getRenderContext());
   1244 		const Texture				outputTexture	(m_context.getRenderContext());
   1245 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ShaderSource(SHADERTYPE_COMPUTE, src.str()));
   1246 		de::Random					rnd				(0x77238ac2);
   1247 		vector<deUint32>			inputValues		(m_imageSize[0]*m_imageSize[1]*m_localSize);
   1248 
   1249 		m_testCtx.getLog() << program;
   1250 		if (!program.isOk())
   1251 			TCU_FAIL("Compile failed");
   1252 
   1253 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_imageSize << TestLog::EndMessage;
   1254 
   1255 		gl.useProgram(program.getProgram());
   1256 
   1257 		// Input values
   1258 		for (vector<deUint32>::iterator i = inputValues.begin(); i != inputValues.end(); ++i)
   1259 			*i = rnd.getUint32();
   1260 
   1261 		// Input buffer setup
   1262 		{
   1263 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Input");
   1264 			const InterfaceBlockInfo	blockInfo	= getProgramInterfaceBlockInfo(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex);
   1265 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Input.values");
   1266 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
   1267 
   1268 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *inputBuffer);
   1269 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, (glw::GLsizeiptr)blockInfo.dataSize, DE_NULL, GL_STATIC_DRAW);
   1270 
   1271 			TCU_CHECK(valueInfo.arraySize == (deUint32)inputValues.size());
   1272 
   1273 			{
   1274 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, (int)blockInfo.dataSize, GL_MAP_WRITE_BIT);
   1275 
   1276 				for (deUint32 ndx = 0; ndx < (deUint32)inputValues.size(); ndx++)
   1277 					*(deUint32*)((deUint8*)bufMap.getPtr() + valueInfo.offset + ndx*valueInfo.arrayStride) = inputValues[ndx];
   1278 			}
   1279 
   1280 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, blockInfo.bufferBinding, *inputBuffer);
   1281 			GLU_EXPECT_NO_ERROR(gl.getError(), "Input buffer setup failed");
   1282 		}
   1283 
   1284 		// Output image setup
   1285 		gl.bindTexture(GL_TEXTURE_2D, *outputTexture);
   1286 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_imageSize[0], m_imageSize[1]);
   1287 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1288 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1289 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
   1290 
   1291 		// Bind to unit 1
   1292 		gl.bindImageTexture(1, *outputTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
   1293 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
   1294 
   1295 		// Dispatch compute workload
   1296 		gl.dispatchCompute(m_imageSize[0], m_imageSize[1], 1);
   1297 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
   1298 
   1299 		// Read back and compare
   1300 		{
   1301 			Framebuffer			fbo			(m_context.getRenderContext());
   1302 			vector<deUint32>	pixels		(m_imageSize[0]*m_imageSize[1]*4);
   1303 
   1304 			gl.bindFramebuffer(GL_FRAMEBUFFER, *fbo);
   1305 			gl.framebufferTexture2D(GL_FRAMEBUFFER, GL_COLOR_ATTACHMENT0, GL_TEXTURE_2D, *outputTexture, 0);
   1306 			TCU_CHECK(gl.checkFramebufferStatus(GL_FRAMEBUFFER) == GL_FRAMEBUFFER_COMPLETE);
   1307 
   1308 			// \note In ES3 we have to use GL_RGBA_INTEGER
   1309 			gl.readBuffer(GL_COLOR_ATTACHMENT0);
   1310 			gl.readPixels(0, 0, m_imageSize[0], m_imageSize[1], GL_RGBA_INTEGER, GL_UNSIGNED_INT, &pixels[0]);
   1311 			GLU_EXPECT_NO_ERROR(gl.getError(), "Reading pixels failed");
   1312 
   1313 			for (int pixelNdx = 0; pixelNdx < (int)inputValues.size()/m_localSize; pixelNdx++)
   1314 			{
   1315 				const deUint32	res		= pixels[pixelNdx*4];
   1316 				deUint32		ref		= 0;
   1317 
   1318 				for (int offs = 0; offs < m_localSize; offs++)
   1319 					ref += inputValues[pixelNdx*m_localSize + offs];
   1320 
   1321 				if (res != ref)
   1322 					throw tcu::TestError(string("Comparison failed for pixel ") + de::toString(pixelNdx));
   1323 			}
   1324 		}
   1325 
   1326 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   1327 		return STOP;
   1328 	}
   1329 
   1330 private:
   1331 	const int			m_localSize;
   1332 	const tcu::IVec2	m_imageSize;
   1333 };
   1334 
   1335 class ImageBarrierCase : public TestCase
   1336 {
   1337 public:
   1338 	ImageBarrierCase (Context& context, const char* name, const char* description, const tcu::IVec2& workSize)
   1339 		: TestCase		(context, name, description)
   1340 		, m_workSize	(workSize)
   1341 	{
   1342 	}
   1343 
   1344 	IterateResult iterate (void)
   1345 	{
   1346 		const ShaderProgram program0(m_context.getRenderContext(), ProgramSources() <<
   1347 			ComputeSource("#version 310 es\n"
   1348 						  "layout (local_size_x = 1) in;\n"
   1349 						  "uniform uint u_baseVal;\n"
   1350 						  "layout(r32ui, binding = 2) writeonly uniform highp uimage2D u_img;\n"
   1351 						  "void main (void) {\n"
   1352 						  "    uint offset = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
   1353 						  "    imageStore(u_img, ivec2(gl_WorkGroupID.xy), uvec4(offset+u_baseVal, 0, 0, 0));\n"
   1354 						  "}\n"));
   1355 		const ShaderProgram program1(m_context.getRenderContext(), ProgramSources() <<
   1356 			ComputeSource("#version 310 es\n"
   1357 						  "layout (local_size_x = 1) in;\n"
   1358 						  "layout(r32ui, binding = 2) readonly uniform highp uimage2D u_img;\n"
   1359 						  "layout(binding = 0) buffer Output {\n"
   1360 						  "    coherent uint sum;\n"
   1361 						  "};\n"
   1362 						  "void main (void) {\n"
   1363 						  "    uint value = imageLoad(u_img, ivec2(gl_WorkGroupID.xy)).x;\n"
   1364 						  "    atomicAdd(sum, value);\n"
   1365 						  "}\n"));
   1366 
   1367 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
   1368 		const Texture				tempTexture		(m_context.getRenderContext());
   1369 		const Buffer				outputBuffer	(m_context.getRenderContext());
   1370 		const deUint32				baseValue		= 127;
   1371 
   1372 		m_testCtx.getLog() << program0 << program1;
   1373 		if (!program0.isOk() || !program1.isOk())
   1374 			TCU_FAIL("Compile failed");
   1375 
   1376 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
   1377 
   1378 		// Temp texture setup
   1379 		gl.bindTexture(GL_TEXTURE_2D, *tempTexture);
   1380 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize[0], m_workSize[1]);
   1381 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1382 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1383 		GLU_EXPECT_NO_ERROR(gl.getError(), "Uploading image data failed");
   1384 
   1385 		// Bind to unit 2
   1386 		gl.bindImageTexture(2, *tempTexture, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
   1387 		GLU_EXPECT_NO_ERROR(gl.getError(), "Image setup failed");
   1388 
   1389 		// Output buffer setup
   1390 		{
   1391 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
   1392 			const int			blockSize		= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
   1393 
   1394 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
   1395 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
   1396 
   1397 			{
   1398 				const BufferMemMap bufMap(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_WRITE_BIT);
   1399 				deMemset(bufMap.getPtr(), 0, blockSize);
   1400 			}
   1401 
   1402 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
   1403 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
   1404 		}
   1405 
   1406 		// Dispatch compute workload
   1407 		gl.useProgram(program0.getProgram());
   1408 		gl.uniform1ui(gl.getUniformLocation(program0.getProgram(), "u_baseVal"), baseValue);
   1409 		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
   1410 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   1411 		gl.useProgram(program1.getProgram());
   1412 		gl.dispatchCompute(m_workSize[0], m_workSize[1], 1);
   1413 		GLU_EXPECT_NO_ERROR(gl.getError(), "Failed to dispatch commands");
   1414 
   1415 		// Read back and compare
   1416 		{
   1417 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
   1418 			const int					blockSize	= getProgramResourceInt(gl, program1.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
   1419 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program1.getProgram(), GL_BUFFER_VARIABLE, "sum");
   1420 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program1.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
   1421 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
   1422 
   1423 			const deUint32				res			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset));
   1424 			deUint32					ref			= 0;
   1425 
   1426 			for (int ndx = 0; ndx < m_workSize[0]*m_workSize[1]; ndx++)
   1427 				ref += baseValue + (deUint32)ndx;
   1428 
   1429 			if (res != ref)
   1430 			{
   1431 				m_testCtx.getLog() << TestLog::Message << "ERROR: comparison failed, expected " << ref << ", got " << res << TestLog::EndMessage;
   1432 				throw tcu::TestError("Comparison failed");
   1433 			}
   1434 		}
   1435 
   1436 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   1437 		return STOP;
   1438 	}
   1439 
   1440 private:
   1441 	const tcu::IVec2	m_workSize;
   1442 };
   1443 
   1444 class AtomicCounterCase : public TestCase
   1445 {
   1446 public:
   1447 	AtomicCounterCase (Context& context, const char* name, const char* description, const tcu::IVec3& localSize, const tcu::IVec3& workSize)
   1448 		: TestCase		(context, name, description)
   1449 		, m_localSize	(localSize)
   1450 		, m_workSize	(workSize)
   1451 	{
   1452 	}
   1453 
   1454 	IterateResult iterate (void)
   1455 	{
   1456 		const glw::Functions&		gl				= m_context.getRenderContext().getFunctions();
   1457 		const Buffer				outputBuffer	(m_context.getRenderContext());
   1458 		const Buffer				counterBuffer	(m_context.getRenderContext());
   1459 		const int					workGroupSize	= m_localSize[0]*m_localSize[1]*m_localSize[2];
   1460 		const int					workGroupCount	= m_workSize[0]*m_workSize[1]*m_workSize[2];
   1461 		const int					numValues		= workGroupSize*workGroupCount;
   1462 
   1463 		std::ostringstream src;
   1464 		src << "#version 310 es\n"
   1465 			<< "layout (local_size_x = " << m_localSize[0] << ", local_size_y = " << m_localSize[1] << ", local_size_z = " << m_localSize[2] << ") in;\n"
   1466 			<< "layout(binding = 0) buffer Output {\n"
   1467 			<< "    uint values[" << numValues << "];\n"
   1468 			<< "} sb_out;\n\n"
   1469 			<< "layout(binding = 0, offset = 0) uniform atomic_uint u_count;\n\n"
   1470 			<< "void main (void) {\n"
   1471 			<< "    uint localSize  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z;\n"
   1472 			<< "    uint globalNdx  = gl_NumWorkGroups.x*gl_NumWorkGroups.y*gl_WorkGroupID.z + gl_NumWorkGroups.x*gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
   1473 			<< "    uint globalOffs = localSize*globalNdx;\n"
   1474 			<< "    uint localOffs  = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_LocalInvocationID.z + gl_WorkGroupSize.x*gl_LocalInvocationID.y + gl_LocalInvocationID.x;\n"
   1475 			<< "\n"
   1476 			<< "    uint oldVal = atomicCounterIncrement(u_count);\n"
   1477 			<< "    sb_out.values[globalOffs+localOffs] = oldVal;\n"
   1478 			<< "}\n";
   1479 
   1480 		const ShaderProgram			program			(m_context.getRenderContext(), ProgramSources() << ComputeSource(src.str()));
   1481 
   1482 		m_testCtx.getLog() << program;
   1483 		if (!program.isOk())
   1484 			TCU_FAIL("Compile failed");
   1485 
   1486 		m_testCtx.getLog() << TestLog::Message << "Work groups: " << m_workSize << TestLog::EndMessage;
   1487 
   1488 		gl.useProgram(program.getProgram());
   1489 
   1490 		// Atomic counter buffer setup
   1491 		{
   1492 			const deUint32	uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
   1493 			const deUint32	bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
   1494 			const deUint32	bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
   1495 
   1496 			gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, *counterBuffer);
   1497 			gl.bufferData(GL_ATOMIC_COUNTER_BUFFER, bufferSize, DE_NULL, GL_STREAM_READ);
   1498 
   1499 			{
   1500 				const BufferMemMap memMap(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_WRITE_BIT);
   1501 				deMemset(memMap.getPtr(), 0, (int)bufferSize);
   1502 			}
   1503 
   1504 			gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 0, *counterBuffer);
   1505 			GLU_EXPECT_NO_ERROR(gl.getError(), "Atomic counter buffer setup failed");
   1506 		}
   1507 
   1508 		// Output buffer setup
   1509 		{
   1510 			const deUint32		blockIndex		= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
   1511 			const int			blockSize		= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
   1512 
   1513 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *outputBuffer);
   1514 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, blockSize, DE_NULL, GL_STREAM_READ);
   1515 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, *outputBuffer);
   1516 			GLU_EXPECT_NO_ERROR(gl.getError(), "Output buffer setup failed");
   1517 		}
   1518 
   1519 		// Dispatch compute workload
   1520 		gl.dispatchCompute(m_workSize[0], m_workSize[1], m_workSize[2]);
   1521 		GLU_EXPECT_NO_ERROR(gl.getError(), "glDispatchCompute()");
   1522 
   1523 		// Read back and compare atomic counter
   1524 		{
   1525 			const deUint32		uniformIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_UNIFORM, "u_count");
   1526 			const deUint32		uniformOffset	= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_OFFSET);
   1527 			const deUint32		bufferIndex		= getProgramResourceUint(gl, program.getProgram(), GL_UNIFORM, uniformIndex, GL_ATOMIC_COUNTER_BUFFER_INDEX);
   1528 			const deUint32		bufferSize		= getProgramResourceUint(gl, program.getProgram(), GL_ATOMIC_COUNTER_BUFFER, bufferIndex, GL_BUFFER_DATA_SIZE);
   1529 			const BufferMemMap	bufMap			(gl, GL_ATOMIC_COUNTER_BUFFER, 0, bufferSize, GL_MAP_READ_BIT);
   1530 
   1531 			const deUint32		resVal			= *((const deUint32*)((const deUint8*)bufMap.getPtr() + uniformOffset));
   1532 
   1533 			if (resVal != (deUint32)numValues)
   1534 				throw tcu::TestError("Invalid atomic counter value");
   1535 		}
   1536 
   1537 		// Read back and compare SSBO
   1538 		{
   1539 			const deUint32				blockIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_SHADER_STORAGE_BLOCK, "Output");
   1540 			const int					blockSize	= getProgramResourceInt(gl, program.getProgram(), GL_SHADER_STORAGE_BLOCK, blockIndex, GL_BUFFER_DATA_SIZE);
   1541 			const deUint32				valueIndex	= gl.getProgramResourceIndex(program.getProgram(), GL_BUFFER_VARIABLE, "Output.values");
   1542 			const InterfaceVariableInfo	valueInfo	= getProgramInterfaceVariableInfo(gl, program.getProgram(), GL_BUFFER_VARIABLE, valueIndex);
   1543 			const BufferMemMap			bufMap		(gl, GL_SHADER_STORAGE_BUFFER, 0, blockSize, GL_MAP_READ_BIT);
   1544 			deUint32					valSum		= 0;
   1545 			deUint32					refSum		= 0;
   1546 
   1547 			for (int valNdx = 0; valNdx < numValues; valNdx++)
   1548 			{
   1549 				const deUint32 res = *((const deUint32*)((const deUint8*)bufMap.getPtr() + valueInfo.offset + valueInfo.arrayStride*valNdx));
   1550 
   1551 				valSum += res;
   1552 				refSum += (deUint32)valNdx;
   1553 
   1554 				if (!de::inBounds<deUint32>(res, 0, (deUint32)numValues))
   1555 					throw tcu::TestError(string("Comparison failed for Output.values[") + de::toString(valNdx) + "]");
   1556 			}
   1557 
   1558 			if (valSum != refSum)
   1559 				throw tcu::TestError("Total sum of values in Output.values doesn't match");
   1560 		}
   1561 
   1562 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   1563 		return STOP;
   1564 	}
   1565 
   1566 private:
   1567 	const tcu::IVec3	m_localSize;
   1568 	const tcu::IVec3	m_workSize;
   1569 };
   1570 
   1571 } // anonymous
   1572 
   1573 BasicComputeShaderTests::BasicComputeShaderTests (Context& context)
   1574 	: TestCaseGroup(context, "basic", "Basic Compute Shader Tests")
   1575 {
   1576 }
   1577 
   1578 BasicComputeShaderTests::~BasicComputeShaderTests (void)
   1579 {
   1580 }
   1581 
   1582 void BasicComputeShaderTests::init (void)
   1583 {
   1584 	addChild(new EmptyComputeShaderCase(m_context));
   1585 
   1586 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_invocation",			"Copy from UBO to SSBO, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1587 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_single_group",					"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(2,1,4),	tcu::IVec3(1,1,1)));
   1588 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_invocations",			"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
   1589 	addChild(new UBOToSSBOInvertCase	(m_context, "ubo_to_ssbo_multiple_groups",				"Copy from UBO to SSBO, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
   1590 
   1591 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_single_invocation",				"Copy between SSBOs, inverting bits",	256,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1592 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_invocations",			"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,1,1),	tcu::IVec3(2,4,1)));
   1593 	addChild(new CopyInvertSSBOCase		(m_context, "copy_ssbo_multiple_groups",				"Copy between SSBOs, inverting bits",	1024,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
   1594 
   1595 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_single_invocation",				"Read and write same SSBO",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1596 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_rw_multiple_groups",					"Read and write same SSBO",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
   1597 
   1598 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_single_invocation",		"Read and write same SSBO",				256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1599 	addChild(new InvertSSBOInPlaceCase	(m_context, "ssbo_unsized_arr_multiple_groups",			"Read and write same SSBO",				1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
   1600 
   1601 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_single_invocation",		"Write to multiple SSBOs",				256,	true,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1602 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_arr_multiple_groups",		"Write to multiple SSBOs",				1024,	true,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
   1603 
   1604 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_single_invocation",	"Write to multiple SSBOs",			256,	false,	tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1605 	addChild(new WriteToMultipleSSBOCase(m_context, "write_multiple_unsized_arr_multiple_groups",	"Write to multiple SSBOs",			1024,	false,	tcu::IVec3(1,4,2),	tcu::IVec3(2,2,4)));
   1606 
   1607 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_invocation",		"SSBO local barrier usage",				tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1608 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_single_group",			"SSBO local barrier usage",				tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
   1609 	addChild(new SSBOLocalBarrierCase	(m_context, "ssbo_local_barrier_multiple_groups",		"SSBO local barrier usage",				tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
   1610 
   1611 	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_single",					"SSBO memory barrier usage",			tcu::IVec3(1,1,1)));
   1612 	addChild(new SSBOBarrierCase		(m_context, "ssbo_cmd_barrier_multiple",				"SSBO memory barrier usage",			tcu::IVec3(11,5,7)));
   1613 
   1614 	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_invocation",				"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1615 	addChild(new BasicSharedVarCase		(m_context, "shared_var_single_group",					"Basic shared variable usage",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
   1616 	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_invocations",			"Basic shared variable usage",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
   1617 	addChild(new BasicSharedVarCase		(m_context, "shared_var_multiple_groups",				"Basic shared variable usage",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
   1618 
   1619 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_invocation",		"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1620 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_single_group",			"Atomic operation with shared var",		tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
   1621 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_invocations",	"Atomic operation with shared var",		tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
   1622 	addChild(new SharedVarAtomicOpCase	(m_context, "shared_atomic_op_multiple_groups",			"Atomic operation with shared var",		tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
   1623 
   1624 	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_small",					"Image to SSBO copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
   1625 	addChild(new CopyImageToSSBOCase	(m_context, "copy_image_to_ssbo_large",					"Image to SSBO copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
   1626 
   1627 	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_small",					"SSBO to image copy",					tcu::IVec2(1,1),	tcu::IVec2(64,64)));
   1628 	addChild(new CopySSBOToImageCase	(m_context, "copy_ssbo_to_image_large",					"SSBO to image copy",					tcu::IVec2(2,4),	tcu::IVec2(512,512)));
   1629 
   1630 	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_1",				"Atomic operation with image",			1,	tcu::IVec2(64,64)));
   1631 	addChild(new ImageAtomicOpCase		(m_context, "image_atomic_op_local_size_8",				"Atomic operation with image",			8,	tcu::IVec2(64,64)));
   1632 
   1633 	addChild(new ImageBarrierCase		(m_context, "image_barrier_single",						"Image barrier",						tcu::IVec2(1,1)));
   1634 	addChild(new ImageBarrierCase		(m_context, "image_barrier_multiple",					"Image barrier",						tcu::IVec2(64,64)));
   1635 
   1636 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_invocation",			"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(1,1,1)));
   1637 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_single_group",				"Basic atomic counter test",			tcu::IVec3(3,2,5),	tcu::IVec3(1,1,1)));
   1638 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_invocations",		"Basic atomic counter test",			tcu::IVec3(1,1,1),	tcu::IVec3(2,5,4)));
   1639 	addChild(new AtomicCounterCase		(m_context, "atomic_counter_multiple_groups",			"Basic atomic counter test",			tcu::IVec3(3,4,1),	tcu::IVec3(2,7,3)));
   1640 }
   1641 
   1642 } // Functional
   1643 } // gles31
   1644 } // deqp
   1645