Home | History | Annotate | Download | only in functional
      1 /*-------------------------------------------------------------------------
      2  * drawElements Quality Program OpenGL ES 3.1 Module
      3  * -------------------------------------------------
      4  *
      5  * Copyright 2014 The Android Open Source Project
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file
     21  * \brief Synchronization Tests
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "es31fSynchronizationTests.hpp"
     25 #include "tcuTestLog.hpp"
     26 #include "tcuSurface.hpp"
     27 #include "tcuRenderTarget.hpp"
     28 #include "gluRenderContext.hpp"
     29 #include "gluShaderProgram.hpp"
     30 #include "gluObjectWrapper.hpp"
     31 #include "gluPixelTransfer.hpp"
     32 #include "gluContextInfo.hpp"
     33 #include "glwFunctions.hpp"
     34 #include "glwEnums.hpp"
     35 #include "deStringUtil.hpp"
     36 #include "deSharedPtr.hpp"
     37 #include "deMemory.h"
     38 #include "deRandom.hpp"
     39 
     40 #include <map>
     41 
     42 namespace deqp
     43 {
     44 namespace gles31
     45 {
     46 namespace Functional
     47 {
     48 namespace
     49 {
     50 
     51 
     52 static bool validateSortedAtomicRampAdditionValueChain (const std::vector<deUint32>& valueChain, deUint32 sumValue, int& invalidOperationNdx, deUint32& errorDelta, deUint32& errorExpected)
     53 {
     54 	std::vector<deUint32> chainDelta(valueChain.size());
     55 
     56 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
     57 		chainDelta[callNdx] = ((callNdx + 1 == (int)valueChain.size()) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
     58 
     59 	// chainDelta contains now the actual additions applied to the value
     60 	// check there exists an addition ramp form 1 to ...
     61 	std::sort(chainDelta.begin(), chainDelta.end());
     62 
     63 	for (int callNdx = 0; callNdx < (int)valueChain.size(); ++callNdx)
     64 	{
     65 		if ((int)chainDelta[callNdx] != callNdx+1)
     66 		{
     67 			invalidOperationNdx = callNdx;
     68 			errorDelta = chainDelta[callNdx];
     69 			errorExpected = callNdx+1;
     70 
     71 			return false;
     72 		}
     73 	}
     74 
     75 	return true;
     76 }
     77 
     78 static void readBuffer (const glw::Functions& gl, deUint32 target, int numElements, std::vector<deUint32>& result)
     79 {
     80 	const void* ptr = gl.mapBufferRange(target, 0, (int)(sizeof(deUint32) * numElements), GL_MAP_READ_BIT);
     81 	GLU_EXPECT_NO_ERROR(gl.getError(), "map");
     82 
     83 	if (!ptr)
     84 		throw tcu::TestError("mapBufferRange returned NULL");
     85 
     86 	result.resize(numElements);
     87 	memcpy(&result[0], ptr, sizeof(deUint32) * numElements);
     88 
     89 	if (gl.unmapBuffer(target) == GL_FALSE)
     90 		throw tcu::TestError("unmapBuffer returned false");
     91 }
     92 
     93 static deUint32 readBufferUint32 (const glw::Functions& gl, deUint32 target)
     94 {
     95 	std::vector<deUint32> vec;
     96 
     97 	readBuffer(gl, target, 1, vec);
     98 
     99 	return vec[0];
    100 }
    101 
    102 //! Generate a ramp of values from 1 to numElements, and shuffle it
    103 void generateShuffledRamp (int numElements, std::vector<int>& ramp)
    104 {
    105 	de::Random rng(0xabcd);
    106 
    107 	// some positive (non-zero) unique values
    108 	ramp.resize(numElements);
    109 	for (int callNdx = 0; callNdx < numElements; ++callNdx)
    110 		ramp[callNdx] = callNdx + 1;
    111 
    112 	rng.shuffle(ramp.begin(), ramp.end());
    113 }
    114 
    115 class InterInvocationTestCase : public TestCase
    116 {
    117 public:
    118 	enum StorageType
    119 	{
    120 		STORAGE_BUFFER = 0,
    121 		STORAGE_IMAGE,
    122 
    123 		STORAGE_LAST
    124 	};
    125 	enum CaseFlags
    126 	{
    127 		FLAG_ATOMIC				= 0x1,
    128 		FLAG_ALIASING_STORAGES	= 0x2,
    129 		FLAG_IN_GROUP			= 0x4,
    130 	};
    131 
    132 						InterInvocationTestCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags = 0);
    133 						~InterInvocationTestCase	(void);
    134 
    135 private:
    136 	void				init						(void);
    137 	void				deinit						(void);
    138 	IterateResult		iterate						(void);
    139 
    140 	void				runCompute					(void);
    141 	bool				verifyResults				(void);
    142 	virtual std::string	genShaderSource				(void) const = 0;
    143 
    144 protected:
    145 	std::string			genBarrierSource			(void) const;
    146 
    147 	const StorageType	m_storage;
    148 	const bool			m_useAtomic;
    149 	const bool			m_aliasingStorages;
    150 	const bool			m_syncWithGroup;
    151 	const int			m_workWidth;				// !< total work width
    152 	const int			m_workHeight;				// !<     ...    height
    153 	const int			m_localWidth;				// !< group width
    154 	const int			m_localHeight;				// !< group height
    155 	const int			m_elementsPerInvocation;	// !< elements accessed by a single invocation
    156 
    157 private:
    158 	glw::GLuint			m_storageBuf;
    159 	glw::GLuint			m_storageTex;
    160 	glw::GLuint			m_resultBuf;
    161 	glu::ShaderProgram*	m_program;
    162 };
    163 
    164 InterInvocationTestCase::InterInvocationTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
    165 	: TestCase					(context, name, desc)
    166 	, m_storage					(storage)
    167 	, m_useAtomic				((flags & FLAG_ATOMIC) != 0)
    168 	, m_aliasingStorages		((flags & FLAG_ALIASING_STORAGES) != 0)
    169 	, m_syncWithGroup			((flags & FLAG_IN_GROUP) != 0)
    170 	, m_workWidth				(256)
    171 	, m_workHeight				(256)
    172 	, m_localWidth				(16)
    173 	, m_localHeight				(8)
    174 	, m_elementsPerInvocation	(8)
    175 	, m_storageBuf				(0)
    176 	, m_storageTex				(0)
    177 	, m_resultBuf				(0)
    178 	, m_program					(DE_NULL)
    179 {
    180 	DE_ASSERT(m_storage < STORAGE_LAST);
    181 	DE_ASSERT(m_localWidth*m_localHeight <= 128); // minimum MAX_COMPUTE_WORK_GROUP_INVOCATIONS value
    182 }
    183 
    184 InterInvocationTestCase::~InterInvocationTestCase (void)
    185 {
    186 	deinit();
    187 }
    188 
    189 void InterInvocationTestCase::init (void)
    190 {
    191 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
    192 
    193 	// requirements
    194 
    195 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
    196 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
    197 
    198 	// program
    199 
    200 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genShaderSource()));
    201 	m_testCtx.getLog() << *m_program;
    202 	if (!m_program->isOk())
    203 		throw tcu::TestError("could not build program");
    204 
    205 	// source
    206 
    207 	if (m_storage == STORAGE_BUFFER)
    208 	{
    209 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
    210 		const int				bufferSize		= bufferElements * sizeof(deUint32);
    211 		std::vector<deUint32>	zeroBuffer		(bufferElements, 0);
    212 
    213 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating zero-filled buffer for storage, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
    214 
    215 		gl.genBuffers(1, &m_storageBuf);
    216 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_storageBuf);
    217 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
    218 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
    219 	}
    220 	else if (m_storage == STORAGE_IMAGE)
    221 	{
    222 		const int				bufferElements	= m_workWidth * m_workHeight * m_elementsPerInvocation;
    223 		const int				bufferSize		= bufferElements * sizeof(deUint32);
    224 
    225 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating image for storage, size " << m_workWidth << "x" << m_workHeight * m_elementsPerInvocation << ", " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
    226 
    227 		gl.genTextures(1, &m_storageTex);
    228 		gl.bindTexture(GL_TEXTURE_2D, m_storageTex);
    229 		gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, m_workWidth, m_workHeight * m_elementsPerInvocation);
    230 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
    231 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
    232 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage image");
    233 
    234 		// Zero-fill
    235 		m_testCtx.getLog() << tcu::TestLog::Message << "Filling image with 0." << tcu::TestLog::EndMessage;
    236 
    237 		{
    238 			const std::vector<deInt32> zeroBuffer(m_workWidth * m_workHeight * m_elementsPerInvocation, 0);
    239 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, m_workWidth, m_workHeight * m_elementsPerInvocation, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
    240 			GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
    241 		}
    242 	}
    243 	else
    244 		DE_ASSERT(DE_FALSE);
    245 
    246 	// destination
    247 
    248 	{
    249 		const int				bufferElements	= m_workWidth * m_workHeight;
    250 		const int				bufferSize		= bufferElements * sizeof(deUint32);
    251 		std::vector<deInt32>	negativeBuffer	(bufferElements, -1);
    252 
    253 		m_testCtx.getLog() << tcu::TestLog::Message << "Allocating -1 filled buffer for results, size " << bufferElements << " elements, " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
    254 
    255 		gl.genBuffers(1, &m_resultBuf);
    256 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
    257 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &negativeBuffer[0], GL_STATIC_DRAW);
    258 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen storage buf");
    259 	}
    260 }
    261 
    262 void InterInvocationTestCase::deinit (void)
    263 {
    264 	if (m_storageBuf)
    265 	{
    266 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_storageBuf);
    267 		m_storageBuf = DE_NULL;
    268 	}
    269 
    270 	if (m_storageTex)
    271 	{
    272 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_storageTex);
    273 		m_storageTex = DE_NULL;
    274 	}
    275 
    276 	if (m_resultBuf)
    277 	{
    278 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_resultBuf);
    279 		m_resultBuf = DE_NULL;
    280 	}
    281 
    282 	delete m_program;
    283 	m_program = DE_NULL;
    284 }
    285 
    286 InterInvocationTestCase::IterateResult InterInvocationTestCase::iterate (void)
    287 {
    288 	// Dispatch
    289 	runCompute();
    290 
    291 	// Verify buffer contents
    292 	if (verifyResults())
    293 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
    294 	else
    295 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
    296 
    297 	return STOP;
    298 }
    299 
    300 void InterInvocationTestCase::runCompute (void)
    301 {
    302 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
    303 	const int				groupsX	= m_workWidth / m_localWidth;
    304 	const int				groupsY	= m_workHeight / m_localHeight;
    305 
    306 	DE_ASSERT((m_workWidth % m_localWidth) == 0);
    307 	DE_ASSERT((m_workHeight % m_localHeight) == 0);
    308 
    309 	m_testCtx.getLog()
    310 		<< tcu::TestLog::Message
    311 		<< "Dispatching compute.\n"
    312 		<< "	group size: " << m_localWidth << "x" << m_localHeight << "\n"
    313 		<< "	dispatch size: " << groupsX << "x" << groupsY << "\n"
    314 		<< "	total work size: " << m_workWidth << "x" << m_workHeight << "\n"
    315 		<< tcu::TestLog::EndMessage;
    316 
    317 	gl.useProgram(m_program->getProgram());
    318 
    319 	// source
    320 	if (m_storage == STORAGE_BUFFER && !m_aliasingStorages)
    321 	{
    322 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
    323 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
    324 	}
    325 	else if (m_storage == STORAGE_BUFFER && m_aliasingStorages)
    326 	{
    327 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageBuf);
    328 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageBuf);
    329 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buf");
    330 
    331 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same buffer object to buffer storages." << tcu::TestLog::EndMessage;
    332 	}
    333 	else if (m_storage == STORAGE_IMAGE && !m_aliasingStorages)
    334 	{
    335 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
    336 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
    337 	}
    338 	else if (m_storage == STORAGE_IMAGE && m_aliasingStorages)
    339 	{
    340 		gl.bindImageTexture(1, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
    341 		gl.bindImageTexture(2, m_storageTex, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32I);
    342 
    343 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
    344 
    345 		m_testCtx.getLog() << tcu::TestLog::Message << "Binding same texture level to image storages." << tcu::TestLog::EndMessage;
    346 	}
    347 	else
    348 		DE_ASSERT(DE_FALSE);
    349 
    350 	// destination
    351 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_resultBuf);
    352 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buf");
    353 
    354 	// dispatch
    355 	gl.dispatchCompute(groupsX, groupsY, 1);
    356 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatchCompute");
    357 }
    358 
    359 bool InterInvocationTestCase::verifyResults (void)
    360 {
    361 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
    362 	const int				errorFloodThreshold	= 5;
    363 	int						numErrorsLogged		= 0;
    364 	const void*				mapped				= DE_NULL;
    365 	std::vector<deInt32>	results				(m_workWidth * m_workHeight);
    366 	bool					error				= false;
    367 
    368 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_resultBuf);
    369 	mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_workWidth * m_workHeight * sizeof(deInt32), GL_MAP_READ_BIT);
    370 	GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
    371 
    372 	// copy to properly aligned array
    373 	deMemcpy(&results[0], mapped, m_workWidth * m_workHeight * sizeof(deUint32));
    374 
    375 	if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
    376 		throw tcu::TestError("memory map store corrupted");
    377 
    378 	// check the results
    379 	for (int ndx = 0; ndx < (int)results.size(); ++ndx)
    380 	{
    381 		if (results[ndx] != 1)
    382 		{
    383 			error = true;
    384 
    385 			if (numErrorsLogged == 0)
    386 				m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
    387 			if (numErrorsLogged++ < errorFloodThreshold)
    388 				m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
    389 			else
    390 			{
    391 				// after N errors, no point continuing verification
    392 				m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
    393 				break;
    394 			}
    395 		}
    396 	}
    397 
    398 	if (!error)
    399 		m_testCtx.getLog() << tcu::TestLog::Message << "Result buffer ok." << tcu::TestLog::EndMessage;
    400 	return !error;
    401 }
    402 
    403 std::string InterInvocationTestCase::genBarrierSource (void) const
    404 {
    405 	std::ostringstream buf;
    406 
    407 	if (m_syncWithGroup)
    408 	{
    409 		// Wait until all invocations in this work group have their texture/buffer read/write operations complete
    410 		// \note We could also use memoryBarrierBuffer() or memoryBarrierImage() in place of groupMemoryBarrier() but
    411 		//       we only require intra-workgroup synchronization.
    412 		buf << "\n"
    413 			<< "	groupMemoryBarrier();\n"
    414 			<< "	barrier();\n"
    415 			<< "\n";
    416 	}
    417 	else if (m_storage == STORAGE_BUFFER)
    418 	{
    419 		DE_ASSERT(!m_syncWithGroup);
    420 
    421 		// Waiting only for data written by this invocation. Since all buffer reads and writes are
    422 		// processed in order (within a single invocation), we don't have to do anything.
    423 		buf << "\n";
    424 	}
    425 	else if (m_storage == STORAGE_IMAGE)
    426 	{
    427 		DE_ASSERT(!m_syncWithGroup);
    428 
    429 		// Waiting only for data written by this invocation. But since operations complete in undefined
    430 		// order, we have to wait for them to complete.
    431 		buf << "\n"
    432 			<< "	memoryBarrierImage();\n"
    433 			<< "\n";
    434 	}
    435 	else
    436 		DE_ASSERT(DE_FALSE);
    437 
    438 	return buf.str();
    439 }
    440 
    441 class InvocationBasicCase : public InterInvocationTestCase
    442 {
    443 public:
    444 							InvocationBasicCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
    445 private:
    446 	std::string				genShaderSource			(void) const;
    447 	virtual std::string		genShaderMainBlock		(void) const = 0;
    448 };
    449 
    450 InvocationBasicCase::InvocationBasicCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
    451 	: InterInvocationTestCase(context, name, desc, storage, flags)
    452 {
    453 }
    454 
    455 std::string InvocationBasicCase::genShaderSource (void) const
    456 {
    457 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    458 	std::ostringstream	buf;
    459 
    460 	buf << "#version 310 es\n"
    461 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
    462 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
    463 		<< "layout(binding=0, std430) buffer Output\n"
    464 		<< "{\n"
    465 		<< "	highp int values[];\n"
    466 		<< "} sb_result;\n";
    467 
    468 	if (m_storage == STORAGE_BUFFER)
    469 		buf << "layout(binding=1, std430) coherent buffer Storage\n"
    470 			<< "{\n"
    471 			<< "	highp int values[];\n"
    472 			<< "} sb_store;\n"
    473 			<< "\n"
    474 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
    475 			<< "{\n"
    476 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    477 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
    478 			<< "}\n";
    479 	else if (m_storage == STORAGE_IMAGE)
    480 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image;\n"
    481 			<< "\n"
    482 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
    483 			<< "{\n"
    484 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
    485 			<< "}\n";
    486 	else
    487 		DE_ASSERT(DE_FALSE);
    488 
    489 	buf << "\n"
    490 		<< "void main (void)\n"
    491 		<< "{\n"
    492 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
    493 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
    494 		<< "	bool allOk      = true;\n"
    495 		<< "\n"
    496 		<< genShaderMainBlock()
    497 		<< "\n"
    498 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
    499 		<< "}\n";
    500 
    501 	return buf.str();
    502 }
    503 
    504 class InvocationWriteReadCase : public InvocationBasicCase
    505 {
    506 public:
    507 					InvocationWriteReadCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
    508 private:
    509 	std::string		genShaderMainBlock			(void) const;
    510 };
    511 
    512 InvocationWriteReadCase::InvocationWriteReadCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
    513 	: InvocationBasicCase(context, name, desc, storage, flags)
    514 {
    515 }
    516 
    517 std::string InvocationWriteReadCase::genShaderMainBlock (void) const
    518 {
    519 	std::ostringstream buf;
    520 
    521 	// write
    522 
    523 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    524 	{
    525 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    526 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
    527 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    528 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
    529 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    530 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
    531 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    532 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
    533 		else
    534 			DE_ASSERT(DE_FALSE);
    535 	}
    536 
    537 	// barrier
    538 
    539 	buf << genBarrierSource();
    540 
    541 	// read
    542 
    543 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    544 	{
    545 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
    546 
    547 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    548 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 0) == groupNdx);\n";
    549 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    550 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
    551 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    552 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 0) == groupNdx);\n";
    553 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    554 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
    555 		else
    556 			DE_ASSERT(DE_FALSE);
    557 	}
    558 
    559 	return buf.str();
    560 }
    561 
    562 class InvocationReadWriteCase : public InvocationBasicCase
    563 {
    564 public:
    565 					InvocationReadWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
    566 private:
    567 	std::string		genShaderMainBlock			(void) const;
    568 };
    569 
    570 InvocationReadWriteCase::InvocationReadWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
    571 	: InvocationBasicCase(context, name, desc, storage, flags)
    572 {
    573 }
    574 
    575 std::string InvocationReadWriteCase::genShaderMainBlock (void) const
    576 {
    577 	std::ostringstream buf;
    578 
    579 	// read
    580 
    581 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    582 	{
    583 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
    584 
    585 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    586 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == 0);\n";
    587 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    588 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == 0);\n";
    589 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    590 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == 0);\n";
    591 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    592 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == 0);\n";
    593 		else
    594 			DE_ASSERT(DE_FALSE);
    595 	}
    596 
    597 	// barrier
    598 
    599 	buf << genBarrierSource();
    600 
    601 	// write
    602 
    603 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    604 	{
    605 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    606 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], groupNdx);\n";
    607 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    608 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = groupNdx;\n";
    609 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    610 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), int(groupNdx));\n";
    611 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    612 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(int(groupNdx), 0, 0, 0));\n";
    613 		else
    614 			DE_ASSERT(DE_FALSE);
    615 	}
    616 
    617 	return buf.str();
    618 }
    619 
    620 class InvocationOverWriteCase : public InvocationBasicCase
    621 {
    622 public:
    623 					InvocationOverWriteCase		(Context& context, const char* name, const char* desc, StorageType storage, int flags);
    624 private:
    625 	std::string		genShaderMainBlock			(void) const;
    626 };
    627 
    628 InvocationOverWriteCase::InvocationOverWriteCase (Context& context, const char* name, const char* desc, StorageType storage, int flags)
    629 	: InvocationBasicCase(context, name, desc, storage, flags)
    630 {
    631 }
    632 
    633 std::string InvocationOverWriteCase::genShaderMainBlock (void) const
    634 {
    635 	std::ostringstream buf;
    636 
    637 	// write
    638 
    639 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    640 	{
    641 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    642 			buf << "\tatomicAdd(sb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
    643 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    644 			buf << "\tsb_store.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
    645 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    646 			buf << "\timageAtomicAdd(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
    647 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    648 			buf << "\timageStore(u_image, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
    649 		else
    650 			DE_ASSERT(DE_FALSE);
    651 	}
    652 
    653 	// barrier
    654 
    655 	buf << genBarrierSource();
    656 
    657 	// write over
    658 
    659 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    660 	{
    661 		// write another invocation's value or our own value depending on test type
    662 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+4) + ", " + de::toString(3*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
    663 
    664 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    665 			buf << "\tatomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
    666 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    667 			buf << "\tsb_store.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
    668 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    669 			buf << "\timageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
    670 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    671 			buf << "\timageStore(u_image, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
    672 		else
    673 			DE_ASSERT(DE_FALSE);
    674 	}
    675 
    676 	// barrier
    677 
    678 	buf << genBarrierSource();
    679 
    680 	// read
    681 
    682 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    683 	{
    684 		// check another invocation's value or our own value depending on test type
    685 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+1) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
    686 
    687 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    688 			buf << "\tallOk = allOk && (atomicExchange(sb_store.values[getIndex(" << localID << ", " << ndx << ")], 123) == groupNdx);\n";
    689 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    690 			buf << "\tallOk = allOk && (sb_store.values[getIndex(" << localID << ", " << ndx << ")] == groupNdx);\n";
    691 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    692 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image, getCoord(" << localID << ", " << ndx << "), 123) == groupNdx);\n";
    693 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    694 			buf << "\tallOk = allOk && (imageLoad(u_image, getCoord(" << localID << ", " << ndx << ")).x == groupNdx);\n";
    695 		else
    696 			DE_ASSERT(DE_FALSE);
    697 	}
    698 
    699 	return buf.str();
    700 }
    701 
    702 class InvocationAliasWriteCase : public InterInvocationTestCase
    703 {
    704 public:
    705 	enum TestType
    706 	{
    707 		TYPE_WRITE = 0,
    708 		TYPE_OVERWRITE,
    709 
    710 		TYPE_LAST
    711 	};
    712 
    713 					InvocationAliasWriteCase	(Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags);
    714 private:
    715 	std::string		genShaderSource				(void) const;
    716 
    717 	const TestType	m_type;
    718 };
    719 
    720 InvocationAliasWriteCase::InvocationAliasWriteCase (Context& context, const char* name, const char* desc, TestType type, StorageType storage, int flags)
    721 	: InterInvocationTestCase	(context, name, desc, storage, flags | FLAG_ALIASING_STORAGES)
    722 	, m_type					(type)
    723 {
    724 	DE_ASSERT(type < TYPE_LAST);
    725 }
    726 
    727 std::string InvocationAliasWriteCase::genShaderSource (void) const
    728 {
    729 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
    730 	std::ostringstream	buf;
    731 
    732 	buf << "#version 310 es\n"
    733 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
    734 		<< "layout (local_size_x=" << m_localWidth << ", local_size_y=" << m_localHeight << ") in;\n"
    735 		<< "layout(binding=0, std430) buffer Output\n"
    736 		<< "{\n"
    737 		<< "	highp int values[];\n"
    738 		<< "} sb_result;\n";
    739 
    740 	if (m_storage == STORAGE_BUFFER)
    741 		buf << "layout(binding=1, std430) coherent buffer Storage0\n"
    742 			<< "{\n"
    743 			<< "	highp int values[];\n"
    744 			<< "} sb_store0;\n"
    745 			<< "layout(binding=2, std430) coherent buffer Storage1\n"
    746 			<< "{\n"
    747 			<< "	highp int values[];\n"
    748 			<< "} sb_store1;\n"
    749 			<< "\n"
    750 			<< "highp int getIndex (in highp uvec2 localID, in highp int element)\n"
    751 			<< "{\n"
    752 			<< "	highp uint groupNdx = gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x;\n"
    753 			<< "	return int((localID.y * gl_NumWorkGroups.x * gl_NumWorkGroups.y * gl_WorkGroupSize.x) + (groupNdx * gl_WorkGroupSize.x) + localID.x) * " << m_elementsPerInvocation << " + element;\n"
    754 			<< "}\n";
    755 	else if (m_storage == STORAGE_IMAGE)
    756 		buf << "layout(r32i, binding=1) coherent uniform highp iimage2D u_image0;\n"
    757 			<< "layout(r32i, binding=2) coherent uniform highp iimage2D u_image1;\n"
    758 			<< "\n"
    759 			<< "highp ivec2 getCoord (in highp uvec2 localID, in highp int element)\n"
    760 			<< "{\n"
    761 			<< "	return ivec2(int(gl_WorkGroupID.x * gl_WorkGroupSize.x + localID.x), int(gl_WorkGroupID.y * gl_WorkGroupSize.y + localID.y) + element * " << m_workHeight << ");\n"
    762 			<< "}\n";
    763 	else
    764 		DE_ASSERT(DE_FALSE);
    765 
    766 	buf << "\n"
    767 		<< "void main (void)\n"
    768 		<< "{\n"
    769 		<< "	int resultNdx   = int(gl_GlobalInvocationID.y * gl_NumWorkGroups.x * gl_WorkGroupSize.x + gl_GlobalInvocationID.x);\n"
    770 		<< "	int groupNdx    = int(gl_NumWorkGroups.x * gl_WorkGroupID.y + gl_WorkGroupID.x);\n"
    771 		<< "	bool allOk      = true;\n"
    772 		<< "\n";
    773 
    774 	if (m_type == TYPE_OVERWRITE)
    775 	{
    776 		// write
    777 
    778 		for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    779 		{
    780 			if (m_storage == STORAGE_BUFFER && m_useAtomic)
    781 				buf << "\tatomicAdd(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 456);\n";
    782 			else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    783 				buf << "\tsb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] = 456;\n";
    784 			else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    785 				buf << "\timageAtomicAdd(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 456);\n";
    786 			else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    787 				buf << "\timageStore(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), ivec4(456, 0, 0, 0));\n";
    788 			else
    789 				DE_ASSERT(DE_FALSE);
    790 		}
    791 
    792 		// barrier
    793 
    794 		buf << genBarrierSource();
    795 	}
    796 	else
    797 		DE_ASSERT(m_type == TYPE_WRITE);
    798 
    799 	// write (again)
    800 
    801 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    802 	{
    803 		const std::string localID = (m_syncWithGroup) ? ("(gl_LocalInvocationID.xy + uvec2(" + de::toString(ndx+2) + ", " + de::toString(2*ndx) + ")) % gl_WorkGroupSize.xy") : ("gl_LocalInvocationID.xy");
    804 
    805 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    806 			buf << "\tatomicExchange(sb_store1.values[getIndex(" << localID << ", " << ndx << ")], groupNdx);\n";
    807 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    808 			buf << "\tsb_store1.values[getIndex(" << localID << ", " << ndx << ")] = groupNdx;\n";
    809 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    810 			buf << "\timageAtomicExchange(u_image1, getCoord(" << localID << ", " << ndx << "), groupNdx);\n";
    811 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    812 			buf << "\timageStore(u_image1, getCoord(" << localID << ", " << ndx << "), ivec4(groupNdx, 0, 0, 0));\n";
    813 		else
    814 			DE_ASSERT(DE_FALSE);
    815 	}
    816 
    817 	// barrier
    818 
    819 	buf << genBarrierSource();
    820 
    821 	// read
    822 
    823 	for (int ndx = 0; ndx < m_elementsPerInvocation; ++ndx)
    824 	{
    825 		if (m_storage == STORAGE_BUFFER && m_useAtomic)
    826 			buf << "\tallOk = allOk && (atomicExchange(sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")], 123) == groupNdx);\n";
    827 		else if (m_storage == STORAGE_BUFFER && !m_useAtomic)
    828 			buf << "\tallOk = allOk && (sb_store0.values[getIndex(gl_LocalInvocationID.xy, " << ndx << ")] == groupNdx);\n";
    829 		else if (m_storage == STORAGE_IMAGE && m_useAtomic)
    830 			buf << "\tallOk = allOk && (imageAtomicExchange(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << "), 123) == groupNdx);\n";
    831 		else if (m_storage == STORAGE_IMAGE && !m_useAtomic)
    832 			buf << "\tallOk = allOk && (imageLoad(u_image0, getCoord(gl_LocalInvocationID.xy, " << ndx << ")).x == groupNdx);\n";
    833 		else
    834 			DE_ASSERT(DE_FALSE);
    835 	}
    836 
    837 	// return result
    838 
    839 	buf << "\n"
    840 		<< "	sb_result.values[resultNdx] = (allOk) ? (1) : (0);\n"
    841 		<< "}\n";
    842 
    843 	return buf.str();
    844 }
    845 
    846 namespace op
    847 {
    848 
    849 struct WriteData
    850 {
    851 	int targetHandle;
    852 	int seed;
    853 
    854 	static WriteData Generate(int targetHandle, int seed)
    855 	{
    856 		WriteData retVal;
    857 
    858 		retVal.targetHandle = targetHandle;
    859 		retVal.seed = seed;
    860 
    861 		return retVal;
    862 	}
    863 };
    864 
    865 struct ReadData
    866 {
    867 	int targetHandle;
    868 	int seed;
    869 
    870 	static ReadData Generate(int targetHandle, int seed)
    871 	{
    872 		ReadData retVal;
    873 
    874 		retVal.targetHandle = targetHandle;
    875 		retVal.seed = seed;
    876 
    877 		return retVal;
    878 	}
    879 };
    880 
    881 struct Barrier
    882 {
    883 };
    884 
    885 struct WriteDataInterleaved
    886 {
    887 	int		targetHandle;
    888 	int		seed;
    889 	bool	evenOdd;
    890 
    891 	static WriteDataInterleaved Generate(int targetHandle, int seed, bool evenOdd)
    892 	{
    893 		WriteDataInterleaved retVal;
    894 
    895 		retVal.targetHandle = targetHandle;
    896 		retVal.seed = seed;
    897 		retVal.evenOdd = evenOdd;
    898 
    899 		return retVal;
    900 	}
    901 };
    902 
    903 struct ReadDataInterleaved
    904 {
    905 	int targetHandle;
    906 	int seed0;
    907 	int seed1;
    908 
    909 	static ReadDataInterleaved Generate(int targetHandle, int seed0, int seed1)
    910 	{
    911 		ReadDataInterleaved retVal;
    912 
    913 		retVal.targetHandle = targetHandle;
    914 		retVal.seed0 = seed0;
    915 		retVal.seed1 = seed1;
    916 
    917 		return retVal;
    918 	}
    919 };
    920 
    921 struct ReadMultipleData
    922 {
    923 	int targetHandle0;
    924 	int seed0;
    925 	int targetHandle1;
    926 	int seed1;
    927 
    928 	static ReadMultipleData Generate(int targetHandle0, int seed0, int targetHandle1, int seed1)
    929 	{
    930 		ReadMultipleData retVal;
    931 
    932 		retVal.targetHandle0 = targetHandle0;
    933 		retVal.seed0 = seed0;
    934 		retVal.targetHandle1 = targetHandle1;
    935 		retVal.seed1 = seed1;
    936 
    937 		return retVal;
    938 	}
    939 };
    940 
    941 struct ReadZeroData
    942 {
    943 	int targetHandle;
    944 
    945 	static ReadZeroData Generate(int targetHandle)
    946 	{
    947 		ReadZeroData retVal;
    948 
    949 		retVal.targetHandle = targetHandle;
    950 
    951 		return retVal;
    952 	}
    953 };
    954 
    955 } // namespace op
    956 
    957 class InterCallTestCase;
    958 
    959 class InterCallOperations
    960 {
    961 public:
    962 	InterCallOperations& operator<< (const op::WriteData&);
    963 	InterCallOperations& operator<< (const op::ReadData&);
    964 	InterCallOperations& operator<< (const op::Barrier&);
    965 	InterCallOperations& operator<< (const op::ReadMultipleData&);
    966 	InterCallOperations& operator<< (const op::WriteDataInterleaved&);
    967 	InterCallOperations& operator<< (const op::ReadDataInterleaved&);
    968 	InterCallOperations& operator<< (const op::ReadZeroData&);
    969 
    970 private:
    971 	struct Command
    972 	{
    973 		enum CommandType
    974 		{
    975 			TYPE_WRITE = 0,
    976 			TYPE_READ,
    977 			TYPE_BARRIER,
    978 			TYPE_READ_MULTIPLE,
    979 			TYPE_WRITE_INTERLEAVE,
    980 			TYPE_READ_INTERLEAVE,
    981 			TYPE_READ_ZERO,
    982 
    983 			TYPE_LAST
    984 		};
    985 
    986 		CommandType type;
    987 
    988 		union CommandUnion
    989 		{
    990 			op::WriteData				write;
    991 			op::ReadData				read;
    992 			op::Barrier					barrier;
    993 			op::ReadMultipleData		readMulti;
    994 			op::WriteDataInterleaved	writeInterleave;
    995 			op::ReadDataInterleaved		readInterleave;
    996 			op::ReadZeroData			readZero;
    997 		} u_cmd;
    998 	};
    999 
   1000 	friend class InterCallTestCase;
   1001 
   1002 	std::vector<Command> m_cmds;
   1003 };
   1004 
   1005 InterCallOperations& InterCallOperations::operator<< (const op::WriteData& cmd)
   1006 {
   1007 	m_cmds.push_back(Command());
   1008 	m_cmds.back().type = Command::TYPE_WRITE;
   1009 	m_cmds.back().u_cmd.write = cmd;
   1010 
   1011 	return *this;
   1012 }
   1013 
   1014 InterCallOperations& InterCallOperations::operator<< (const op::ReadData& cmd)
   1015 {
   1016 	m_cmds.push_back(Command());
   1017 	m_cmds.back().type = Command::TYPE_READ;
   1018 	m_cmds.back().u_cmd.read = cmd;
   1019 
   1020 	return *this;
   1021 }
   1022 
   1023 InterCallOperations& InterCallOperations::operator<< (const op::Barrier& cmd)
   1024 {
   1025 	m_cmds.push_back(Command());
   1026 	m_cmds.back().type = Command::TYPE_BARRIER;
   1027 	m_cmds.back().u_cmd.barrier = cmd;
   1028 
   1029 	return *this;
   1030 }
   1031 
   1032 InterCallOperations& InterCallOperations::operator<< (const op::ReadMultipleData& cmd)
   1033 {
   1034 	m_cmds.push_back(Command());
   1035 	m_cmds.back().type = Command::TYPE_READ_MULTIPLE;
   1036 	m_cmds.back().u_cmd.readMulti = cmd;
   1037 
   1038 	return *this;
   1039 }
   1040 
   1041 InterCallOperations& InterCallOperations::operator<< (const op::WriteDataInterleaved& cmd)
   1042 {
   1043 	m_cmds.push_back(Command());
   1044 	m_cmds.back().type = Command::TYPE_WRITE_INTERLEAVE;
   1045 	m_cmds.back().u_cmd.writeInterleave = cmd;
   1046 
   1047 	return *this;
   1048 }
   1049 
   1050 InterCallOperations& InterCallOperations::operator<< (const op::ReadDataInterleaved& cmd)
   1051 {
   1052 	m_cmds.push_back(Command());
   1053 	m_cmds.back().type = Command::TYPE_READ_INTERLEAVE;
   1054 	m_cmds.back().u_cmd.readInterleave = cmd;
   1055 
   1056 	return *this;
   1057 }
   1058 
   1059 InterCallOperations& InterCallOperations::operator<< (const op::ReadZeroData& cmd)
   1060 {
   1061 	m_cmds.push_back(Command());
   1062 	m_cmds.back().type = Command::TYPE_READ_ZERO;
   1063 	m_cmds.back().u_cmd.readZero = cmd;
   1064 
   1065 	return *this;
   1066 }
   1067 
   1068 class InterCallTestCase : public TestCase
   1069 {
   1070 public:
   1071 	enum StorageType
   1072 	{
   1073 		STORAGE_BUFFER = 0,
   1074 		STORAGE_IMAGE,
   1075 
   1076 		STORAGE_LAST
   1077 	};
   1078 	enum Flags
   1079 	{
   1080 		FLAG_USE_ATOMIC	= 1,
   1081 		FLAG_USE_INT	= 2,
   1082 	};
   1083 													InterCallTestCase			(Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops);
   1084 													~InterCallTestCase			(void);
   1085 
   1086 private:
   1087 	void											init						(void);
   1088 	void											deinit						(void);
   1089 	IterateResult									iterate						(void);
   1090 	bool											verifyResults				(void);
   1091 
   1092 	void											runCommand					(const op::WriteData& cmd, int stepNdx, int& programFriendlyName);
   1093 	void											runCommand					(const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
   1094 	void											runCommand					(const op::Barrier&);
   1095 	void											runCommand					(const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
   1096 	void											runCommand					(const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName);
   1097 	void											runCommand					(const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
   1098 	void											runCommand					(const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
   1099 	void											runSingleRead				(int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName);
   1100 
   1101 	glw::GLuint										genStorage					(int friendlyName);
   1102 	glw::GLuint										genResultStorage			(void);
   1103 	glu::ShaderProgram*								genWriteProgram				(int seed);
   1104 	glu::ShaderProgram*								genReadProgram				(int seed);
   1105 	glu::ShaderProgram*								genReadMultipleProgram		(int seed0, int seed1);
   1106 	glu::ShaderProgram*								genWriteInterleavedProgram	(int seed, bool evenOdd);
   1107 	glu::ShaderProgram*								genReadInterleavedProgram	(int seed0, int seed1);
   1108 	glu::ShaderProgram*								genReadZeroProgram			(void);
   1109 
   1110 	const StorageType								m_storage;
   1111 	const int										m_invocationGridSize;	// !< width and height of the two dimensional work dispatch
   1112 	const int										m_perInvocationSize;	// !< number of elements accessed in single invocation
   1113 	const std::vector<InterCallOperations::Command>	m_cmds;
   1114 	const bool										m_useAtomic;
   1115 	const bool										m_formatInteger;
   1116 
   1117 	std::vector<glu::ShaderProgram*>				m_operationPrograms;
   1118 	std::vector<glw::GLuint>						m_operationResultStorages;
   1119 	std::map<int, glw::GLuint>						m_storageIDs;
   1120 };
   1121 
   1122 InterCallTestCase::InterCallTestCase (Context& context, const char* name, const char* desc, StorageType storage, int flags, const InterCallOperations& ops)
   1123 	: TestCase					(context, name, desc)
   1124 	, m_storage					(storage)
   1125 	, m_invocationGridSize		(512)
   1126 	, m_perInvocationSize		(2)
   1127 	, m_cmds					(ops.m_cmds)
   1128 	, m_useAtomic				((flags & FLAG_USE_ATOMIC) != 0)
   1129 	, m_formatInteger			((flags & FLAG_USE_INT) != 0)
   1130 {
   1131 }
   1132 
   1133 InterCallTestCase::~InterCallTestCase (void)
   1134 {
   1135 	deinit();
   1136 }
   1137 
   1138 void InterCallTestCase::init (void)
   1139 {
   1140 	int programFriendlyName = 0;
   1141 
   1142 	// requirements
   1143 
   1144 	if (m_useAtomic && m_storage == STORAGE_IMAGE && !m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
   1145 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic extension");
   1146 
   1147 	// generate resources and validate command list
   1148 
   1149 	m_operationPrograms.resize(m_cmds.size(), DE_NULL);
   1150 	m_operationResultStorages.resize(m_cmds.size(), 0);
   1151 
   1152 	for (int step = 0; step < (int)m_cmds.size(); ++step)
   1153 	{
   1154 		switch (m_cmds[step].type)
   1155 		{
   1156 			case InterCallOperations::Command::TYPE_WRITE:
   1157 			{
   1158 				const op::WriteData& cmd = m_cmds[step].u_cmd.write;
   1159 
   1160 				// new storage handle?
   1161 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
   1162 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
   1163 
   1164 				// program
   1165 				{
   1166 					glu::ShaderProgram* program = genWriteProgram(cmd.seed);
   1167 
   1168 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
   1169 					m_testCtx.getLog() << *program;
   1170 
   1171 					if (!program->isOk())
   1172 						throw tcu::TestError("could not build program");
   1173 
   1174 					m_operationPrograms[step] = program;
   1175 				}
   1176 				break;
   1177 			}
   1178 
   1179 			case InterCallOperations::Command::TYPE_READ:
   1180 			{
   1181 				const op::ReadData& cmd = m_cmds[step].u_cmd.read;
   1182 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
   1183 
   1184 				// program and result storage
   1185 				{
   1186 					glu::ShaderProgram* program = genReadProgram(cmd.seed);
   1187 
   1188 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
   1189 					m_testCtx.getLog() << *program;
   1190 
   1191 					if (!program->isOk())
   1192 						throw tcu::TestError("could not build program");
   1193 
   1194 					m_operationPrograms[step] = program;
   1195 					m_operationResultStorages[step] = genResultStorage();
   1196 				}
   1197 				break;
   1198 			}
   1199 
   1200 			case InterCallOperations::Command::TYPE_BARRIER:
   1201 			{
   1202 				break;
   1203 			}
   1204 
   1205 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:
   1206 			{
   1207 				const op::ReadMultipleData& cmd = m_cmds[step].u_cmd.readMulti;
   1208 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle0) != m_storageIDs.end());
   1209 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle1) != m_storageIDs.end());
   1210 
   1211 				// program
   1212 				{
   1213 					glu::ShaderProgram* program = genReadMultipleProgram(cmd.seed0, cmd.seed1);
   1214 
   1215 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
   1216 					m_testCtx.getLog() << *program;
   1217 
   1218 					if (!program->isOk())
   1219 						throw tcu::TestError("could not build program");
   1220 
   1221 					m_operationPrograms[step] = program;
   1222 					m_operationResultStorages[step] = genResultStorage();
   1223 				}
   1224 				break;
   1225 			}
   1226 
   1227 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:
   1228 			{
   1229 				const op::WriteDataInterleaved& cmd = m_cmds[step].u_cmd.writeInterleave;
   1230 
   1231 				// new storage handle?
   1232 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
   1233 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
   1234 
   1235 				// program
   1236 				{
   1237 					glu::ShaderProgram* program = genWriteInterleavedProgram(cmd.seed, cmd.evenOdd);
   1238 
   1239 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
   1240 					m_testCtx.getLog() << *program;
   1241 
   1242 					if (!program->isOk())
   1243 						throw tcu::TestError("could not build program");
   1244 
   1245 					m_operationPrograms[step] = program;
   1246 				}
   1247 				break;
   1248 			}
   1249 
   1250 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:
   1251 			{
   1252 				const op::ReadDataInterleaved& cmd = m_cmds[step].u_cmd.readInterleave;
   1253 				DE_ASSERT(m_storageIDs.find(cmd.targetHandle) != m_storageIDs.end());
   1254 
   1255 				// program
   1256 				{
   1257 					glu::ShaderProgram* program = genReadInterleavedProgram(cmd.seed0, cmd.seed1);
   1258 
   1259 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
   1260 					m_testCtx.getLog() << *program;
   1261 
   1262 					if (!program->isOk())
   1263 						throw tcu::TestError("could not build program");
   1264 
   1265 					m_operationPrograms[step] = program;
   1266 					m_operationResultStorages[step] = genResultStorage();
   1267 				}
   1268 				break;
   1269 			}
   1270 
   1271 			case InterCallOperations::Command::TYPE_READ_ZERO:
   1272 			{
   1273 				const op::ReadZeroData& cmd = m_cmds[step].u_cmd.readZero;
   1274 
   1275 				// new storage handle?
   1276 				if (m_storageIDs.find(cmd.targetHandle) == m_storageIDs.end())
   1277 					m_storageIDs[cmd.targetHandle] = genStorage(cmd.targetHandle);
   1278 
   1279 				// program
   1280 				{
   1281 					glu::ShaderProgram* program = genReadZeroProgram();
   1282 
   1283 					m_testCtx.getLog() << tcu::TestLog::Message << "Program #" << ++programFriendlyName << tcu::TestLog::EndMessage;
   1284 					m_testCtx.getLog() << *program;
   1285 
   1286 					if (!program->isOk())
   1287 						throw tcu::TestError("could not build program");
   1288 
   1289 					m_operationPrograms[step] = program;
   1290 					m_operationResultStorages[step] = genResultStorage();
   1291 				}
   1292 				break;
   1293 			}
   1294 
   1295 			default:
   1296 				DE_ASSERT(DE_FALSE);
   1297 		}
   1298 	}
   1299 }
   1300 
   1301 void InterCallTestCase::deinit (void)
   1302 {
   1303 	// programs
   1304 	for (int ndx = 0; ndx < (int)m_operationPrograms.size(); ++ndx)
   1305 		delete m_operationPrograms[ndx];
   1306 	m_operationPrograms.clear();
   1307 
   1308 	// result storages
   1309 	for (int ndx = 0; ndx < (int)m_operationResultStorages.size(); ++ndx)
   1310 	{
   1311 		if (m_operationResultStorages[ndx])
   1312 			m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_operationResultStorages[ndx]);
   1313 	}
   1314 	m_operationResultStorages.clear();
   1315 
   1316 	// storage
   1317 	for (std::map<int, glw::GLuint>::const_iterator it = m_storageIDs.begin(); it != m_storageIDs.end(); ++it)
   1318 	{
   1319 		const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   1320 
   1321 		if (m_storage == STORAGE_BUFFER)
   1322 			gl.deleteBuffers(1, &it->second);
   1323 		else if (m_storage == STORAGE_IMAGE)
   1324 			gl.deleteTextures(1, &it->second);
   1325 		else
   1326 			DE_ASSERT(DE_FALSE);
   1327 	}
   1328 	m_storageIDs.clear();
   1329 }
   1330 
   1331 InterCallTestCase::IterateResult InterCallTestCase::iterate (void)
   1332 {
   1333 	int programFriendlyName			= 0;
   1334 	int resultStorageFriendlyName	= 0;
   1335 
   1336 	m_testCtx.getLog() << tcu::TestLog::Message << "Running operations:" << tcu::TestLog::EndMessage;
   1337 
   1338 	// run steps
   1339 
   1340 	for (int step = 0; step < (int)m_cmds.size(); ++step)
   1341 	{
   1342 		switch (m_cmds[step].type)
   1343 		{
   1344 			case InterCallOperations::Command::TYPE_WRITE:				runCommand(m_cmds[step].u_cmd.write,			step,	programFriendlyName);								break;
   1345 			case InterCallOperations::Command::TYPE_READ:				runCommand(m_cmds[step].u_cmd.read,				step,	programFriendlyName, resultStorageFriendlyName);	break;
   1346 			case InterCallOperations::Command::TYPE_BARRIER:			runCommand(m_cmds[step].u_cmd.barrier);																		break;
   1347 			case InterCallOperations::Command::TYPE_READ_MULTIPLE:		runCommand(m_cmds[step].u_cmd.readMulti,		step,	programFriendlyName, resultStorageFriendlyName);	break;
   1348 			case InterCallOperations::Command::TYPE_WRITE_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.writeInterleave,	step,	programFriendlyName);								break;
   1349 			case InterCallOperations::Command::TYPE_READ_INTERLEAVE:	runCommand(m_cmds[step].u_cmd.readInterleave,	step,	programFriendlyName, resultStorageFriendlyName);	break;
   1350 			case InterCallOperations::Command::TYPE_READ_ZERO:			runCommand(m_cmds[step].u_cmd.readZero,			step,	programFriendlyName, resultStorageFriendlyName);	break;
   1351 			default:
   1352 				DE_ASSERT(DE_FALSE);
   1353 		}
   1354 	}
   1355 
   1356 	// read results from result buffers
   1357 	if (verifyResults())
   1358 		m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   1359 	else
   1360 		m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, (std::string((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) + " content verification failed").c_str());
   1361 
   1362 	return STOP;
   1363 }
   1364 
   1365 bool InterCallTestCase::verifyResults (void)
   1366 {
   1367 	int		resultBufferFriendlyName	= 0;
   1368 	bool	allResultsOk				= true;
   1369 	bool	anyResult					= false;
   1370 
   1371 	m_testCtx.getLog() << tcu::TestLog::Message << "Reading verifier program results" << tcu::TestLog::EndMessage;
   1372 
   1373 	for (int step = 0; step < (int)m_cmds.size(); ++step)
   1374 	{
   1375 		const int	errorFloodThreshold	= 5;
   1376 		int			numErrorsLogged		= 0;
   1377 
   1378 		if (m_operationResultStorages[step])
   1379 		{
   1380 			const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
   1381 			const void*				mapped	= DE_NULL;
   1382 			std::vector<deInt32>	results	(m_invocationGridSize * m_invocationGridSize);
   1383 			bool					error	= false;
   1384 
   1385 			anyResult = true;
   1386 
   1387 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_operationResultStorages[step]);
   1388 			mapped = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), GL_MAP_READ_BIT);
   1389 			GLU_EXPECT_NO_ERROR(gl.getError(), "map buffer");
   1390 
   1391 			// copy to properly aligned array
   1392 			deMemcpy(&results[0], mapped, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32));
   1393 
   1394 			if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) != GL_TRUE)
   1395 				throw tcu::TestError("memory map store corrupted");
   1396 
   1397 			// check the results
   1398 			for (int ndx = 0; ndx < (int)results.size(); ++ndx)
   1399 			{
   1400 				if (results[ndx] != 1)
   1401 				{
   1402 					error = true;
   1403 
   1404 					if (numErrorsLogged == 0)
   1405 						m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " failed, got unexpected values.\n" << tcu::TestLog::EndMessage;
   1406 					if (numErrorsLogged++ < errorFloodThreshold)
   1407 						m_testCtx.getLog() << tcu::TestLog::Message << "	Error at index " << ndx << ": expected 1, got " << results[ndx] << ".\n" << tcu::TestLog::EndMessage;
   1408 					else
   1409 					{
   1410 						// after N errors, no point continuing verification
   1411 						m_testCtx.getLog() << tcu::TestLog::Message << "	-- too many errors, skipping verification --\n" << tcu::TestLog::EndMessage;
   1412 						break;
   1413 					}
   1414 				}
   1415 			}
   1416 
   1417 			if (error)
   1418 			{
   1419 				allResultsOk = false;
   1420 			}
   1421 			else
   1422 				m_testCtx.getLog() << tcu::TestLog::Message << "Result storage #" << ++resultBufferFriendlyName << " ok." << tcu::TestLog::EndMessage;
   1423 		}
   1424 	}
   1425 
   1426 	DE_ASSERT(anyResult);
   1427 	DE_UNREF(anyResult);
   1428 
   1429 	return allResultsOk;
   1430 }
   1431 
   1432 void InterCallTestCase::runCommand (const op::WriteData& cmd, int stepNdx, int& programFriendlyName)
   1433 {
   1434 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   1435 
   1436 	m_testCtx.getLog()
   1437 		<< tcu::TestLog::Message
   1438 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
   1439 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
   1440 		<< tcu::TestLog::EndMessage;
   1441 
   1442 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
   1443 
   1444 	// set destination
   1445 	if (m_storage == STORAGE_BUFFER)
   1446 	{
   1447 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
   1448 
   1449 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
   1450 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
   1451 	}
   1452 	else if (m_storage == STORAGE_IMAGE)
   1453 	{
   1454 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
   1455 
   1456 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
   1457 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
   1458 	}
   1459 	else
   1460 		DE_ASSERT(DE_FALSE);
   1461 
   1462 	// calc
   1463 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
   1464 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
   1465 }
   1466 
   1467 void InterCallTestCase::runCommand (const op::ReadData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
   1468 {
   1469 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
   1470 }
   1471 
   1472 void InterCallTestCase::runCommand (const op::Barrier& cmd)
   1473 {
   1474 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   1475 
   1476 	DE_UNREF(cmd);
   1477 
   1478 	if (m_storage == STORAGE_BUFFER)
   1479 	{
   1480 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_STORAGE_BARRIER_BIT" << tcu::TestLog::EndMessage;
   1481 		gl.memoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);
   1482 	}
   1483 	else if (m_storage == STORAGE_IMAGE)
   1484 	{
   1485 		m_testCtx.getLog() << tcu::TestLog::Message << "Memory Barrier\n\tbits = GL_SHADER_IMAGE_ACCESS_BARRIER_BIT" << tcu::TestLog::EndMessage;
   1486 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   1487 	}
   1488 	else
   1489 		DE_ASSERT(DE_FALSE);
   1490 }
   1491 
   1492 void InterCallTestCase::runCommand (const op::ReadMultipleData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
   1493 {
   1494 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   1495 
   1496 	m_testCtx.getLog()
   1497 		<< tcu::TestLog::Message
   1498 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffers") : ("images")) << " #" << cmd.targetHandle0 << " and #" << cmd.targetHandle1 << ".\n"
   1499 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
   1500 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
   1501 		<< tcu::TestLog::EndMessage;
   1502 
   1503 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
   1504 
   1505 	// set sources
   1506 	if (m_storage == STORAGE_BUFFER)
   1507 	{
   1508 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
   1509 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
   1510 
   1511 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[cmd.targetHandle0]);
   1512 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_storageIDs[cmd.targetHandle1]);
   1513 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffers");
   1514 	}
   1515 	else if (m_storage == STORAGE_IMAGE)
   1516 	{
   1517 		DE_ASSERT(m_storageIDs[cmd.targetHandle0]);
   1518 		DE_ASSERT(m_storageIDs[cmd.targetHandle1]);
   1519 
   1520 		gl.bindImageTexture(1, m_storageIDs[cmd.targetHandle0], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
   1521 		gl.bindImageTexture(2, m_storageIDs[cmd.targetHandle1], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
   1522 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source images");
   1523 	}
   1524 	else
   1525 		DE_ASSERT(DE_FALSE);
   1526 
   1527 	// set destination
   1528 	DE_ASSERT(m_operationResultStorages[stepNdx]);
   1529 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
   1530 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
   1531 
   1532 	// calc
   1533 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
   1534 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read multi");
   1535 }
   1536 
   1537 void InterCallTestCase::runCommand (const op::WriteDataInterleaved& cmd, int stepNdx, int& programFriendlyName)
   1538 {
   1539 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   1540 
   1541 	m_testCtx.getLog()
   1542 		<< tcu::TestLog::Message
   1543 		<< "Running program #" << ++programFriendlyName << " to write " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << cmd.targetHandle << ".\n"
   1544 		<< "	Writing to every " << ((cmd.evenOdd) ? ("even") : ("odd")) << " " << ((m_storage == STORAGE_BUFFER) ? ("element") : ("column")) << ".\n"
   1545 		<< "	Dispatch size: " << m_invocationGridSize / 2 << "x" << m_invocationGridSize << "."
   1546 		<< tcu::TestLog::EndMessage;
   1547 
   1548 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
   1549 
   1550 	// set destination
   1551 	if (m_storage == STORAGE_BUFFER)
   1552 	{
   1553 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
   1554 
   1555 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_storageIDs[cmd.targetHandle]);
   1556 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination buffer");
   1557 	}
   1558 	else if (m_storage == STORAGE_IMAGE)
   1559 	{
   1560 		DE_ASSERT(m_storageIDs[cmd.targetHandle]);
   1561 
   1562 		gl.bindImageTexture(0, m_storageIDs[cmd.targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_WRITE_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
   1563 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind destination image");
   1564 	}
   1565 	else
   1566 		DE_ASSERT(DE_FALSE);
   1567 
   1568 	// calc
   1569 	gl.dispatchCompute(m_invocationGridSize / 2, m_invocationGridSize, 1);
   1570 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch write");
   1571 }
   1572 
   1573 void InterCallTestCase::runCommand (const op::ReadDataInterleaved& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
   1574 {
   1575 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
   1576 }
   1577 
   1578 void InterCallTestCase::runCommand (const op::ReadZeroData& cmd, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
   1579 {
   1580 	runSingleRead(cmd.targetHandle, stepNdx, programFriendlyName, resultStorageFriendlyName);
   1581 }
   1582 
   1583 void InterCallTestCase::runSingleRead (int targetHandle, int stepNdx, int& programFriendlyName, int& resultStorageFriendlyName)
   1584 {
   1585 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   1586 
   1587 	m_testCtx.getLog()
   1588 		<< tcu::TestLog::Message
   1589 		<< "Running program #" << ++programFriendlyName << " to verify " << ((m_storage == STORAGE_BUFFER) ? ("buffer") : ("image")) << " #" << targetHandle << ".\n"
   1590 		<< "	Writing results to result storage #" << ++resultStorageFriendlyName << ".\n"
   1591 		<< "	Dispatch size: " << m_invocationGridSize << "x" << m_invocationGridSize << "."
   1592 		<< tcu::TestLog::EndMessage;
   1593 
   1594 	gl.useProgram(m_operationPrograms[stepNdx]->getProgram());
   1595 
   1596 	// set source
   1597 	if (m_storage == STORAGE_BUFFER)
   1598 	{
   1599 		DE_ASSERT(m_storageIDs[targetHandle]);
   1600 
   1601 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_storageIDs[targetHandle]);
   1602 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source buffer");
   1603 	}
   1604 	else if (m_storage == STORAGE_IMAGE)
   1605 	{
   1606 		DE_ASSERT(m_storageIDs[targetHandle]);
   1607 
   1608 		gl.bindImageTexture(1, m_storageIDs[targetHandle], 0, GL_FALSE, 0, (m_useAtomic) ? (GL_READ_WRITE) : (GL_READ_ONLY), (m_formatInteger) ? (GL_R32I) : (GL_R32F));
   1609 		GLU_EXPECT_NO_ERROR(gl.getError(), "bind source image");
   1610 	}
   1611 	else
   1612 		DE_ASSERT(DE_FALSE);
   1613 
   1614 	// set destination
   1615 	DE_ASSERT(m_operationResultStorages[stepNdx]);
   1616 	gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 0, m_operationResultStorages[stepNdx]);
   1617 	GLU_EXPECT_NO_ERROR(gl.getError(), "bind result buffer");
   1618 
   1619 	// calc
   1620 	gl.dispatchCompute(m_invocationGridSize, m_invocationGridSize, 1);
   1621 	GLU_EXPECT_NO_ERROR(gl.getError(), "dispatch read");
   1622 }
   1623 
   1624 glw::GLuint InterCallTestCase::genStorage (int friendlyName)
   1625 {
   1626 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   1627 
   1628 	if (m_storage == STORAGE_BUFFER)
   1629 	{
   1630 		const int		numElements		= m_invocationGridSize * m_invocationGridSize * m_perInvocationSize;
   1631 		const int		bufferSize		= numElements * ((m_formatInteger) ? (sizeof(deInt32)) : (sizeof(glw::GLfloat)));
   1632 		glw::GLuint		retVal			= 0;
   1633 
   1634 		m_testCtx.getLog() << tcu::TestLog::Message << "Creating buffer #" << friendlyName << ", size " << bufferSize << " bytes." << tcu::TestLog::EndMessage;
   1635 
   1636 		gl.genBuffers(1, &retVal);
   1637 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
   1638 
   1639 		if (m_formatInteger)
   1640 		{
   1641 			const std::vector<deUint32> zeroBuffer(numElements, 0);
   1642 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
   1643 		}
   1644 		else
   1645 		{
   1646 			const std::vector<float> zeroBuffer(numElements, 0.0f);
   1647 			gl.bufferData(GL_SHADER_STORAGE_BUFFER, bufferSize, &zeroBuffer[0], GL_STATIC_DRAW);
   1648 		}
   1649 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
   1650 
   1651 		return retVal;
   1652 	}
   1653 	else if (m_storage == STORAGE_IMAGE)
   1654 	{
   1655 		const int	imageWidth	= m_invocationGridSize;
   1656 		const int	imageHeight	= m_invocationGridSize * m_perInvocationSize;
   1657 		glw::GLuint	retVal		= 0;
   1658 
   1659 		m_testCtx.getLog()
   1660 			<< tcu::TestLog::Message
   1661 			<< "Creating image #" << friendlyName << ", size " << imageWidth << "x" << imageHeight
   1662 			<< ", internalformat = " << ((m_formatInteger) ? ("r32i") : ("r32f"))
   1663 			<< ", size = " << (imageWidth*imageHeight*sizeof(deUint32)) << " bytes."
   1664 			<< tcu::TestLog::EndMessage;
   1665 
   1666 		gl.genTextures(1, &retVal);
   1667 		gl.bindTexture(GL_TEXTURE_2D, retVal);
   1668 
   1669 		if (m_formatInteger)
   1670 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32I, imageWidth, imageHeight);
   1671 		else
   1672 			gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32F, imageWidth, imageHeight);
   1673 
   1674 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   1675 		gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   1676 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen image");
   1677 
   1678 		m_testCtx.getLog()
   1679 			<< tcu::TestLog::Message
   1680 			<< "Filling image with 0"
   1681 			<< tcu::TestLog::EndMessage;
   1682 
   1683 		if (m_formatInteger)
   1684 		{
   1685 			const std::vector<deInt32> zeroBuffer(imageWidth * imageHeight, 0);
   1686 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED_INTEGER, GL_INT, &zeroBuffer[0]);
   1687 		}
   1688 		else
   1689 		{
   1690 			const std::vector<float> zeroBuffer(imageWidth * imageHeight, 0.0f);
   1691 			gl.texSubImage2D(GL_TEXTURE_2D, 0, 0, 0, imageWidth, imageHeight, GL_RED, GL_FLOAT, &zeroBuffer[0]);
   1692 		}
   1693 
   1694 		GLU_EXPECT_NO_ERROR(gl.getError(), "specify image contents");
   1695 
   1696 		return retVal;
   1697 	}
   1698 	else
   1699 	{
   1700 		DE_ASSERT(DE_FALSE);
   1701 		return 0;
   1702 	}
   1703 }
   1704 
   1705 glw::GLuint InterCallTestCase::genResultStorage (void)
   1706 {
   1707 	const glw::Functions&	gl		= m_context.getRenderContext().getFunctions();
   1708 	glw::GLuint				retVal	= 0;
   1709 
   1710 	gl.genBuffers(1, &retVal);
   1711 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, retVal);
   1712 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, m_invocationGridSize * m_invocationGridSize * sizeof(deUint32), DE_NULL, GL_STATIC_DRAW);
   1713 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffer");
   1714 
   1715 	return retVal;
   1716 }
   1717 
   1718 glu::ShaderProgram* InterCallTestCase::genWriteProgram (int seed)
   1719 {
   1720 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
   1721 	std::ostringstream	buf;
   1722 
   1723 	buf << "#version 310 es\n"
   1724 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
   1725 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
   1726 
   1727 	if (m_storage == STORAGE_BUFFER)
   1728 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
   1729 			<< "{\n"
   1730 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
   1731 			<< "} sb_out;\n";
   1732 	else if (m_storage == STORAGE_IMAGE)
   1733 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
   1734 	else
   1735 		DE_ASSERT(DE_FALSE);
   1736 
   1737 	buf << "\n"
   1738 		<< "void main (void)\n"
   1739 		<< "{\n"
   1740 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
   1741 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
   1742 		<< "\n";
   1743 
   1744 	// Write to buffer/image m_perInvocationSize elements
   1745 	if (m_storage == STORAGE_BUFFER)
   1746 	{
   1747 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
   1748 		{
   1749 			if (m_useAtomic)
   1750 				buf << "	atomicExchange(";
   1751 			else
   1752 				buf << "	";
   1753 
   1754 			buf << "sb_out.values[(groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]";
   1755 
   1756 			if (m_useAtomic)
   1757 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1758 			else
   1759 				buf << " = " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
   1760 		}
   1761 	}
   1762 	else if (m_storage == STORAGE_IMAGE)
   1763 	{
   1764 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
   1765 		{
   1766 			if (m_useAtomic)
   1767 				buf << "	imageAtomicExchange";
   1768 			else
   1769 				buf << "	imageStore";
   1770 
   1771 			buf << "(u_imageOut, ivec2((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
   1772 
   1773 			if (m_useAtomic)
   1774 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1775 			else
   1776 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
   1777 		}
   1778 	}
   1779 	else
   1780 		DE_ASSERT(DE_FALSE);
   1781 
   1782 	buf << "}\n";
   1783 
   1784 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
   1785 }
   1786 
   1787 glu::ShaderProgram* InterCallTestCase::genReadProgram (int seed)
   1788 {
   1789 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
   1790 	std::ostringstream	buf;
   1791 
   1792 	buf << "#version 310 es\n"
   1793 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
   1794 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
   1795 
   1796 	if (m_storage == STORAGE_BUFFER)
   1797 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
   1798 			<< "{\n"
   1799 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
   1800 			<< "} sb_in;\n";
   1801 	else if (m_storage == STORAGE_IMAGE)
   1802 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
   1803 	else
   1804 		DE_ASSERT(DE_FALSE);
   1805 
   1806 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
   1807 		<< "{\n"
   1808 		<< "	highp int resultOk[];\n"
   1809 		<< "} sb_result;\n"
   1810 		<< "\n"
   1811 		<< "void main (void)\n"
   1812 		<< "{\n"
   1813 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
   1814 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
   1815 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
   1816 		<< "	bool allOk = true;\n"
   1817 		<< "\n";
   1818 
   1819 	// Verify data
   1820 
   1821 	if (m_storage == STORAGE_BUFFER)
   1822 	{
   1823 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   1824 		{
   1825 			if (!m_useAtomic)
   1826 				buf << "	allOk = allOk && (sb_in.values[(groupNdx + "
   1827 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "] == "
   1828 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1829 			else
   1830 				buf << "	allOk = allOk && (atomicExchange(sb_in.values[(groupNdx + "
   1831 					<< seed + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "], zero) == "
   1832 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1833 		}
   1834 	}
   1835 	else if (m_storage == STORAGE_IMAGE)
   1836 	{
   1837 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   1838 		{
   1839 			if (!m_useAtomic)
   1840 				buf	<< "	allOk = allOk && (imageLoad(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
   1841 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)).x == "
   1842 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1843 			else
   1844 				buf << "	allOk = allOk && (imageAtomicExchange(u_imageIn, ivec2((gl_GlobalInvocationID.x + "
   1845 					<< (seed + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u), zero) == "
   1846 					<< ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1847 		}
   1848 	}
   1849 	else
   1850 		DE_ASSERT(DE_FALSE);
   1851 
   1852 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
   1853 		<< "}\n";
   1854 
   1855 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
   1856 }
   1857 
   1858 glu::ShaderProgram* InterCallTestCase::genReadMultipleProgram (int seed0, int seed1)
   1859 {
   1860 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
   1861 	std::ostringstream	buf;
   1862 
   1863 	buf << "#version 310 es\n"
   1864 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
   1865 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
   1866 
   1867 	if (m_storage == STORAGE_BUFFER)
   1868 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer0\n"
   1869 			<< "{\n"
   1870 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
   1871 			<< "} sb_in0;\n"
   1872 			<< "layout(binding=2, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer1\n"
   1873 			<< "{\n"
   1874 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
   1875 			<< "} sb_in1;\n";
   1876 	else if (m_storage == STORAGE_IMAGE)
   1877 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn0;\n"
   1878 			<< "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=2) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn1;\n";
   1879 	else
   1880 		DE_ASSERT(DE_FALSE);
   1881 
   1882 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
   1883 		<< "{\n"
   1884 		<< "	highp int resultOk[];\n"
   1885 		<< "} sb_result;\n"
   1886 		<< "\n"
   1887 		<< "void main (void)\n"
   1888 		<< "{\n"
   1889 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
   1890 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
   1891 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
   1892 		<< "	bool allOk = true;\n"
   1893 		<< "\n";
   1894 
   1895 	// Verify data
   1896 
   1897 	if (m_storage == STORAGE_BUFFER)
   1898 	{
   1899 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   1900 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in0.values[(groupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
   1901 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in1.values[(groupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize << "]" << ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1902 	}
   1903 	else if (m_storage == STORAGE_IMAGE)
   1904 	{
   1905 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   1906 			buf << "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn0, ivec2((gl_GlobalInvocationID.x + " << (seed0 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n"
   1907 				<< "	allOk = allOk && (" << ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn1, ivec2((gl_GlobalInvocationID.x + " << (seed1 + readNdx*100) << "u) % " << m_invocationGridSize << "u, gl_GlobalInvocationID.y + " << readNdx*m_invocationGridSize << "u)" << ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1908 	}
   1909 	else
   1910 		DE_ASSERT(DE_FALSE);
   1911 
   1912 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
   1913 		<< "}\n";
   1914 
   1915 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
   1916 }
   1917 
   1918 glu::ShaderProgram* InterCallTestCase::genWriteInterleavedProgram (int seed, bool evenOdd)
   1919 {
   1920 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
   1921 	std::ostringstream	buf;
   1922 
   1923 	buf << "#version 310 es\n"
   1924 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
   1925 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
   1926 
   1927 	if (m_storage == STORAGE_BUFFER)
   1928 		buf << "layout(binding=0, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
   1929 			<< "{\n"
   1930 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
   1931 			<< "} sb_out;\n";
   1932 	else if (m_storage == STORAGE_IMAGE)
   1933 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=0) " << ((m_useAtomic) ? ("coherent ") : ("writeonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageOut;\n";
   1934 	else
   1935 		DE_ASSERT(DE_FALSE);
   1936 
   1937 	buf << "\n"
   1938 		<< "void main (void)\n"
   1939 		<< "{\n"
   1940 		<< "	uvec3 size    = gl_NumWorkGroups * gl_WorkGroupSize;\n"
   1941 		<< "	int groupNdx  = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
   1942 		<< "\n";
   1943 
   1944 	// Write to buffer/image m_perInvocationSize elements
   1945 	if (m_storage == STORAGE_BUFFER)
   1946 	{
   1947 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
   1948 		{
   1949 			if (m_useAtomic)
   1950 				buf << "	atomicExchange(";
   1951 			else
   1952 				buf << "	";
   1953 
   1954 			buf << "sb_out.values[((groupNdx + " << seed + writeNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize / 2 * m_perInvocationSize  << ") * 2 + " << ((evenOdd) ? (0) : (1)) << "]";
   1955 
   1956 			if (m_useAtomic)
   1957 				buf << ", " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1958 			else
   1959 				buf << "= " << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx);\n";
   1960 		}
   1961 	}
   1962 	else if (m_storage == STORAGE_IMAGE)
   1963 	{
   1964 		for (int writeNdx = 0; writeNdx < m_perInvocationSize; ++writeNdx)
   1965 		{
   1966 			if (m_useAtomic)
   1967 				buf << "	imageAtomicExchange";
   1968 			else
   1969 				buf << "	imageStore";
   1970 
   1971 			buf << "(u_imageOut, ivec2(((int(gl_GlobalInvocationID.x) + " << (seed + writeNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + " << ((evenOdd) ? (0) : (1)) << ", int(gl_GlobalInvocationID.y) + " << writeNdx*m_invocationGridSize << "), ";
   1972 
   1973 			if (m_useAtomic)
   1974 				buf << ((m_formatInteger) ? ("int") : ("float")) << "(groupNdx));\n";
   1975 			else
   1976 				buf << ((m_formatInteger) ? ("ivec4(int(groupNdx), 0, 0, 0)") : ("vec4(float(groupNdx), 0.0, 0.0, 0.0)")) << ");\n";
   1977 		}
   1978 	}
   1979 	else
   1980 		DE_ASSERT(DE_FALSE);
   1981 
   1982 	buf << "}\n";
   1983 
   1984 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
   1985 }
   1986 
   1987 glu::ShaderProgram* InterCallTestCase::genReadInterleavedProgram (int seed0, int seed1)
   1988 {
   1989 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
   1990 	std::ostringstream	buf;
   1991 
   1992 	buf << "#version 310 es\n"
   1993 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
   1994 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
   1995 
   1996 	if (m_storage == STORAGE_BUFFER)
   1997 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
   1998 			<< "{\n"
   1999 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
   2000 			<< "} sb_in;\n";
   2001 	else if (m_storage == STORAGE_IMAGE)
   2002 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
   2003 	else
   2004 		DE_ASSERT(DE_FALSE);
   2005 
   2006 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
   2007 		<< "{\n"
   2008 		<< "	highp int resultOk[];\n"
   2009 		<< "} sb_result;\n"
   2010 		<< "\n"
   2011 		<< "void main (void)\n"
   2012 		<< "{\n"
   2013 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
   2014 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
   2015 		<< "	int interleavedGroupNdx = int((size.x >> 1U) * size.y * gl_GlobalInvocationID.z + (size.x >> 1U) * gl_GlobalInvocationID.y + (gl_GlobalInvocationID.x >> 1U));\n"
   2016 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " zero = " << ((m_formatInteger) ? ("0") : ("0.0")) << ";\n"
   2017 		<< "	bool allOk = true;\n"
   2018 		<< "\n";
   2019 
   2020 	// Verify data
   2021 
   2022 	if (m_storage == STORAGE_BUFFER)
   2023 	{
   2024 		buf << "	if (groupNdx % 2 == 0)\n"
   2025 			<< "	{\n";
   2026 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   2027 			buf << "		allOk = allOk && ("
   2028 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed0 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 0]"
   2029 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
   2030 		buf << "	}\n"
   2031 			<< "	else\n"
   2032 			<< "	{\n";
   2033 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   2034 			buf << "		allOk = allOk && ("
   2035 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[((interleavedGroupNdx + " << seed1 + readNdx*m_invocationGridSize*m_invocationGridSize / 2 << ") % " << m_invocationGridSize*m_invocationGridSize*m_perInvocationSize / 2 << ") * 2 + 1]"
   2036 				<< ((m_useAtomic) ? (", zero)") : ("")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
   2037 		buf << "	}\n";
   2038 	}
   2039 	else if (m_storage == STORAGE_IMAGE)
   2040 	{
   2041 		buf << "	if (groupNdx % 2 == 0)\n"
   2042 			<< "	{\n";
   2043 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   2044 			buf << "		allOk = allOk && ("
   2045 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
   2046 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed0 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 0, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
   2047 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
   2048 		buf << "	}\n"
   2049 			<< "	else\n"
   2050 			<< "	{\n";
   2051 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   2052 			buf << "		allOk = allOk && ("
   2053 				<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad"))
   2054 				<< "(u_imageIn, ivec2(((int(gl_GlobalInvocationID.x >> 1U) + " << (seed1 + readNdx*100) << ") % " << m_invocationGridSize / 2 << ") * 2 + 1, int(gl_GlobalInvocationID.y) + " << readNdx*m_invocationGridSize << ")"
   2055 				<< ((m_useAtomic) ? (", zero)") : (").x")) << " == " << ((m_formatInteger) ? ("int") : ("float")) << "(interleavedGroupNdx));\n";
   2056 		buf << "	}\n";
   2057 	}
   2058 	else
   2059 		DE_ASSERT(DE_FALSE);
   2060 
   2061 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
   2062 		<< "}\n";
   2063 
   2064 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
   2065 }
   2066 
   2067 glu::ShaderProgram*	InterCallTestCase::genReadZeroProgram (void)
   2068 {
   2069 	const bool			useImageAtomics = m_useAtomic && m_storage == STORAGE_IMAGE;
   2070 	std::ostringstream	buf;
   2071 
   2072 	buf << "#version 310 es\n"
   2073 		<< ((useImageAtomics) ? ("#extension GL_OES_shader_image_atomic : require\n") : (""))
   2074 		<< "layout (local_size_x = 1, local_size_y = 1) in;\n";
   2075 
   2076 	if (m_storage == STORAGE_BUFFER)
   2077 		buf << "layout(binding=1, std430) " << ((m_useAtomic) ? ("coherent ") : ("")) << "buffer Buffer\n"
   2078 			<< "{\n"
   2079 			<< "	highp " << ((m_formatInteger) ? ("int") : ("float")) << " values[];\n"
   2080 			<< "} sb_in;\n";
   2081 	else if (m_storage == STORAGE_IMAGE)
   2082 		buf << "layout(" << ((m_formatInteger) ? ("r32i") : ("r32f")) << ", binding=1) " << ((m_useAtomic) ? ("coherent ") : ("readonly ")) << "uniform highp " << ((m_formatInteger) ? ("iimage2D") : ("image2D")) << " u_imageIn;\n";
   2083 	else
   2084 		DE_ASSERT(DE_FALSE);
   2085 
   2086 	buf << "layout(binding=0, std430) buffer ResultBuffer\n"
   2087 		<< "{\n"
   2088 		<< "	highp int resultOk[];\n"
   2089 		<< "} sb_result;\n"
   2090 		<< "\n"
   2091 		<< "void main (void)\n"
   2092 		<< "{\n"
   2093 		<< "	uvec3 size = gl_NumWorkGroups * gl_WorkGroupSize;\n"
   2094 		<< "	int groupNdx = int(size.x * size.y * gl_GlobalInvocationID.z + size.x*gl_GlobalInvocationID.y + gl_GlobalInvocationID.x);\n"
   2095 		<< "	" << ((m_formatInteger) ? ("int") : ("float")) << " anything = " << ((m_formatInteger) ? ("5") : ("5.0")) << ";\n"
   2096 		<< "	bool allOk = true;\n"
   2097 		<< "\n";
   2098 
   2099 	// Verify data
   2100 
   2101 	if (m_storage == STORAGE_BUFFER)
   2102 	{
   2103 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   2104 			buf << "	allOk = allOk && ("
   2105 				<< ((m_useAtomic) ? ("atomicExchange(") : ("")) << "sb_in.values[groupNdx * " << m_perInvocationSize << " + " << readNdx << "]"
   2106 				<< ((m_useAtomic) ? (", anything)") : ("")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
   2107 	}
   2108 	else if (m_storage == STORAGE_IMAGE)
   2109 	{
   2110 		for (int readNdx = 0; readNdx < m_perInvocationSize; ++readNdx)
   2111 			buf << "	allOk = allOk && ("
   2112 			<< ((m_useAtomic) ? ("imageAtomicExchange") : ("imageLoad")) << "(u_imageIn, ivec2(gl_GlobalInvocationID.x, gl_GlobalInvocationID.y + " << (readNdx*m_invocationGridSize) << "u)"
   2113 			<< ((m_useAtomic) ? (", anything)") : (").x")) << " == " << ((m_formatInteger) ? ("0") : ("0.0")) << ");\n";
   2114 	}
   2115 	else
   2116 		DE_ASSERT(DE_FALSE);
   2117 
   2118 	buf << "	sb_result.resultOk[groupNdx] = (allOk) ? (1) : (0);\n"
   2119 		<< "}\n";
   2120 
   2121 	return new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(buf.str()));
   2122 }
   2123 
   2124 class SSBOConcurrentAtomicCase : public TestCase
   2125 {
   2126 public:
   2127 
   2128 							SSBOConcurrentAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
   2129 							~SSBOConcurrentAtomicCase	(void);
   2130 
   2131 	void					init						(void);
   2132 	void					deinit						(void);
   2133 	IterateResult			iterate						(void);
   2134 
   2135 private:
   2136 	std::string				genComputeSource			(void) const;
   2137 
   2138 	const int				m_numCalls;
   2139 	const int				m_workSize;
   2140 	glu::ShaderProgram*		m_program;
   2141 	deUint32				m_bufferID;
   2142 	std::vector<deUint32>	m_intermediateResultBuffers;
   2143 };
   2144 
   2145 SSBOConcurrentAtomicCase::SSBOConcurrentAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
   2146 	: TestCase		(context, name, description)
   2147 	, m_numCalls	(numCalls)
   2148 	, m_workSize	(workSize)
   2149 	, m_program		(DE_NULL)
   2150 	, m_bufferID	(DE_NULL)
   2151 {
   2152 }
   2153 
   2154 SSBOConcurrentAtomicCase::~SSBOConcurrentAtomicCase (void)
   2155 {
   2156 	deinit();
   2157 }
   2158 
   2159 void SSBOConcurrentAtomicCase::init (void)
   2160 {
   2161 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
   2162 	std::vector<deUint32>	zeroData			(m_workSize, 0);
   2163 
   2164 	// gen buffers
   2165 
   2166 	gl.genBuffers(1, &m_bufferID);
   2167 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
   2168 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
   2169 
   2170 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
   2171 	{
   2172 		deUint32 buffer = 0;
   2173 
   2174 		gl.genBuffers(1, &buffer);
   2175 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
   2176 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
   2177 
   2178 		m_intermediateResultBuffers.push_back(buffer);
   2179 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
   2180 	}
   2181 
   2182 	// gen program
   2183 
   2184 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
   2185 	m_testCtx.getLog() << *m_program;
   2186 	if (!m_program->isOk())
   2187 		throw tcu::TestError("could not build program");
   2188 }
   2189 
   2190 void SSBOConcurrentAtomicCase::deinit (void)
   2191 {
   2192 	if (m_bufferID)
   2193 	{
   2194 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
   2195 		m_bufferID = 0;
   2196 	}
   2197 
   2198 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
   2199 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
   2200 	m_intermediateResultBuffers.clear();
   2201 
   2202 	delete m_program;
   2203 	m_program = DE_NULL;
   2204 }
   2205 
   2206 TestCase::IterateResult SSBOConcurrentAtomicCase::iterate (void)
   2207 {
   2208 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
   2209 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
   2210 	std::vector<int>		deltas;
   2211 
   2212 	// generate unique deltas
   2213 	generateShuffledRamp(m_numCalls, deltas);
   2214 
   2215 	// invoke program N times, each with a different delta
   2216 	{
   2217 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
   2218 
   2219 		m_testCtx.getLog()
   2220 			<< tcu::TestLog::Message
   2221 			<< "Running shader " << m_numCalls << " times.\n"
   2222 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
   2223 			<< "Setting u_atomicDelta to a unique value for each call.\n"
   2224 			<< tcu::TestLog::EndMessage;
   2225 
   2226 		if (deltaLocation == -1)
   2227 			throw tcu::TestError("u_atomicDelta location was -1");
   2228 
   2229 		gl.useProgram(m_program->getProgram());
   2230 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 2, m_bufferID);
   2231 
   2232 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2233 		{
   2234 			m_testCtx.getLog()
   2235 				<< tcu::TestLog::Message
   2236 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
   2237 				<< tcu::TestLog::EndMessage;
   2238 
   2239 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
   2240 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
   2241 			gl.dispatchCompute(m_workSize, 1, 1);
   2242 		}
   2243 
   2244 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
   2245 	}
   2246 
   2247 	// Verify result
   2248 	{
   2249 		std::vector<deUint32> result;
   2250 
   2251 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
   2252 
   2253 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
   2254 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, result);
   2255 
   2256 		for (int ndx = 0; ndx < m_workSize; ++ndx)
   2257 		{
   2258 			if (result[ndx] != sumValue)
   2259 			{
   2260 				m_testCtx.getLog()
   2261 					<< tcu::TestLog::Message
   2262 					<< "Work buffer error, at index " << ndx << " expected value " << (sumValue) << ", got " << result[ndx] << "\n"
   2263 					<< "Work buffer contains invalid values."
   2264 					<< tcu::TestLog::EndMessage;
   2265 
   2266 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
   2267 				return STOP;
   2268 			}
   2269 		}
   2270 
   2271 		m_testCtx.getLog() << tcu::TestLog::Message << "Work buffer contents are valid." << tcu::TestLog::EndMessage;
   2272 	}
   2273 
   2274 	// verify steps
   2275 	{
   2276 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
   2277 		std::vector<deUint32>				valueChain			(m_numCalls);
   2278 
   2279 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
   2280 
   2281 		// collect results
   2282 
   2283 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2284 		{
   2285 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
   2286 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize, intermediateResults[callNdx]);
   2287 		}
   2288 
   2289 		// verify values
   2290 
   2291 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
   2292 		{
   2293 			int			invalidOperationNdx;
   2294 			deUint32	errorDelta;
   2295 			deUint32	errorExpected;
   2296 
   2297 			// collect result chain for each element
   2298 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2299 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
   2300 
   2301 			// check there exists a path from 0 to sumValue using each addition once
   2302 			// decompose cumulative results to addition operations (all additions positive => this works)
   2303 
   2304 			std::sort(valueChain.begin(), valueChain.end());
   2305 
   2306 			// validate chain
   2307 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
   2308 			{
   2309 				m_testCtx.getLog()
   2310 					<< tcu::TestLog::Message
   2311 					<< "Intermediate buffer error, at value index " << valueNdx << ", applied operation index " << invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
   2312 					<< "Intermediate buffer contains invalid values. Values at index " << valueNdx << "\n"
   2313 					<< tcu::TestLog::EndMessage;
   2314 
   2315 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
   2316 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
   2317 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
   2318 
   2319 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
   2320 				return STOP;
   2321 			}
   2322 		}
   2323 
   2324 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
   2325 	}
   2326 
   2327 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   2328 	return STOP;
   2329 }
   2330 
   2331 std::string SSBOConcurrentAtomicCase::genComputeSource (void) const
   2332 {
   2333 	std::ostringstream buf;
   2334 
   2335 	buf	<< "#version 310 es\n"
   2336 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
   2337 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
   2338 		<< "{\n"
   2339 		<< "	highp uint values[" << m_workSize << "];\n"
   2340 		<< "} sb_ires;\n"
   2341 		<< "\n"
   2342 		<< "layout (binding = 2, std430) volatile buffer WorkBuffer\n"
   2343 		<< "{\n"
   2344 		<< "	highp uint values[" << m_workSize << "];\n"
   2345 		<< "} sb_work;\n"
   2346 		<< "uniform highp uint u_atomicDelta;\n"
   2347 		<< "\n"
   2348 		<< "void main ()\n"
   2349 		<< "{\n"
   2350 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x;\n"
   2351 		<< "	sb_ires.values[invocationIndex] = atomicAdd(sb_work.values[invocationIndex], u_atomicDelta);\n"
   2352 		<< "}";
   2353 
   2354 	return buf.str();
   2355 }
   2356 
   2357 class ConcurrentAtomicCounterCase : public TestCase
   2358 {
   2359 public:
   2360 
   2361 							ConcurrentAtomicCounterCase		(Context& context, const char* name, const char* description, int numCalls, int workSize);
   2362 							~ConcurrentAtomicCounterCase	(void);
   2363 
   2364 	void					init							(void);
   2365 	void					deinit							(void);
   2366 	IterateResult			iterate							(void);
   2367 
   2368 private:
   2369 	std::string				genComputeSource				(bool evenOdd) const;
   2370 
   2371 	const int				m_numCalls;
   2372 	const int				m_workSize;
   2373 	glu::ShaderProgram*		m_evenProgram;
   2374 	glu::ShaderProgram*		m_oddProgram;
   2375 	deUint32				m_counterBuffer;
   2376 	deUint32				m_intermediateResultBuffer;
   2377 };
   2378 
   2379 ConcurrentAtomicCounterCase::ConcurrentAtomicCounterCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
   2380 	: TestCase					(context, name, description)
   2381 	, m_numCalls				(numCalls)
   2382 	, m_workSize				(workSize)
   2383 	, m_evenProgram				(DE_NULL)
   2384 	, m_oddProgram				(DE_NULL)
   2385 	, m_counterBuffer			(DE_NULL)
   2386 	, m_intermediateResultBuffer(DE_NULL)
   2387 {
   2388 }
   2389 
   2390 ConcurrentAtomicCounterCase::~ConcurrentAtomicCounterCase (void)
   2391 {
   2392 	deinit();
   2393 }
   2394 
   2395 void ConcurrentAtomicCounterCase::init (void)
   2396 {
   2397 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
   2398 	const std::vector<deUint32>	zeroData	(m_numCalls * m_workSize, 0);
   2399 
   2400 	// gen buffer
   2401 
   2402 	gl.genBuffers(1, &m_counterBuffer);
   2403 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_counterBuffer);
   2404 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32), &zeroData[0], GL_DYNAMIC_COPY);
   2405 
   2406 	gl.genBuffers(1, &m_intermediateResultBuffer);
   2407 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
   2408 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_numCalls * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
   2409 
   2410 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
   2411 
   2412 	// gen programs
   2413 
   2414 	{
   2415 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "EvenProgram", "Even program");
   2416 
   2417 		m_evenProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(true)));
   2418 		m_testCtx.getLog() << *m_evenProgram;
   2419 		if (!m_evenProgram->isOk())
   2420 			throw tcu::TestError("could not build program");
   2421 	}
   2422 	{
   2423 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "OddProgram", "Odd program");
   2424 
   2425 		m_oddProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource(false)));
   2426 		m_testCtx.getLog() << *m_oddProgram;
   2427 		if (!m_oddProgram->isOk())
   2428 			throw tcu::TestError("could not build program");
   2429 	}
   2430 }
   2431 
   2432 void ConcurrentAtomicCounterCase::deinit (void)
   2433 {
   2434 	if (m_counterBuffer)
   2435 	{
   2436 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_counterBuffer);
   2437 		m_counterBuffer = 0;
   2438 	}
   2439 	if (m_intermediateResultBuffer)
   2440 	{
   2441 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffer);
   2442 		m_intermediateResultBuffer = 0;
   2443 	}
   2444 
   2445 	delete m_evenProgram;
   2446 	m_evenProgram = DE_NULL;
   2447 
   2448 	delete m_oddProgram;
   2449 	m_oddProgram = DE_NULL;
   2450 }
   2451 
   2452 TestCase::IterateResult ConcurrentAtomicCounterCase::iterate (void)
   2453 {
   2454 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   2455 
   2456 	// invoke program N times, each with a different delta
   2457 	{
   2458 		const int evenCallNdxLocation	= gl.getUniformLocation(m_evenProgram->getProgram(), "u_callNdx");
   2459 		const int oddCallNdxLocation	= gl.getUniformLocation(m_oddProgram->getProgram(), "u_callNdx");
   2460 
   2461 		m_testCtx.getLog()
   2462 			<< tcu::TestLog::Message
   2463 			<< "Running shader pair (even & odd) " << m_numCalls << " times.\n"
   2464 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
   2465 			<< tcu::TestLog::EndMessage;
   2466 
   2467 		if (evenCallNdxLocation == -1)
   2468 			throw tcu::TestError("u_callNdx location was -1");
   2469 		if (oddCallNdxLocation == -1)
   2470 			throw tcu::TestError("u_callNdx location was -1");
   2471 
   2472 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffer);
   2473 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_counterBuffer);
   2474 
   2475 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2476 		{
   2477 			gl.useProgram(m_evenProgram->getProgram());
   2478 			gl.uniform1ui(evenCallNdxLocation, (deUint32)callNdx);
   2479 			gl.dispatchCompute(m_workSize, 1, 1);
   2480 
   2481 			gl.useProgram(m_oddProgram->getProgram());
   2482 			gl.uniform1ui(oddCallNdxLocation, (deUint32)callNdx);
   2483 			gl.dispatchCompute(m_workSize, 1, 1);
   2484 		}
   2485 
   2486 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
   2487 	}
   2488 
   2489 	// Verify result
   2490 	{
   2491 		deUint32 result;
   2492 
   2493 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
   2494 
   2495 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_counterBuffer);
   2496 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
   2497 
   2498 		if ((int)result != m_numCalls*m_workSize)
   2499 		{
   2500 			m_testCtx.getLog()
   2501 				<< tcu::TestLog::Message
   2502 				<< "Counter buffer error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
   2503 				<< tcu::TestLog::EndMessage;
   2504 
   2505 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
   2506 			return STOP;
   2507 		}
   2508 
   2509 		m_testCtx.getLog() << tcu::TestLog::Message << "Counter buffer is valid." << tcu::TestLog::EndMessage;
   2510 	}
   2511 
   2512 	// verify steps
   2513 	{
   2514 		std::vector<deUint32> intermediateResults;
   2515 
   2516 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
   2517 
   2518 		// collect results
   2519 
   2520 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffer);
   2521 		readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_numCalls * m_workSize, intermediateResults);
   2522 
   2523 		// verify values
   2524 
   2525 		std::sort(intermediateResults.begin(), intermediateResults.end());
   2526 
   2527 		for (int valueNdx = 0; valueNdx < m_workSize * m_numCalls; ++valueNdx)
   2528 		{
   2529 			if ((int)intermediateResults[valueNdx] != valueNdx)
   2530 			{
   2531 				m_testCtx.getLog()
   2532 					<< tcu::TestLog::Message
   2533 					<< "Intermediate buffer error, at value index " << valueNdx << ", expected " << valueNdx << ", got " << intermediateResults[valueNdx] << ".\n"
   2534 					<< "Intermediate buffer contains invalid values. Intermediate results:\n"
   2535 					<< tcu::TestLog::EndMessage;
   2536 
   2537 				for (int logCallNdx = 0; logCallNdx < m_workSize * m_numCalls; ++logCallNdx)
   2538 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx] << tcu::TestLog::EndMessage;
   2539 
   2540 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
   2541 				return STOP;
   2542 			}
   2543 		}
   2544 
   2545 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
   2546 	}
   2547 
   2548 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   2549 	return STOP;
   2550 }
   2551 
   2552 std::string ConcurrentAtomicCounterCase::genComputeSource (bool evenOdd) const
   2553 {
   2554 	std::ostringstream buf;
   2555 
   2556 	buf	<< "#version 310 es\n"
   2557 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
   2558 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
   2559 		<< "{\n"
   2560 		<< "	highp uint values[" << m_workSize * m_numCalls << "];\n"
   2561 		<< "} sb_ires;\n"
   2562 		<< "\n"
   2563 		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
   2564 		<< "uniform highp uint u_callNdx;\n"
   2565 		<< "\n"
   2566 		<< "void main ()\n"
   2567 		<< "{\n"
   2568 		<< "	highp uint dataNdx = u_callNdx * " << m_workSize << "u + gl_GlobalInvocationID.x;\n"
   2569 		<< "	if ((dataNdx % 2u) == " << ((evenOdd) ? (0) : (1)) << "u)\n"
   2570 		<< "		sb_ires.values[dataNdx] = atomicCounterIncrement(u_counter);\n"
   2571 		<< "}";
   2572 
   2573 	return buf.str();
   2574 }
   2575 
   2576 class ConcurrentImageAtomicCase : public TestCase
   2577 {
   2578 public:
   2579 
   2580 							ConcurrentImageAtomicCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
   2581 							~ConcurrentImageAtomicCase	(void);
   2582 
   2583 	void					init						(void);
   2584 	void					deinit						(void);
   2585 	IterateResult			iterate						(void);
   2586 
   2587 private:
   2588 	void					readWorkImage				(std::vector<deUint32>& result);
   2589 
   2590 	std::string				genComputeSource			(void) const;
   2591 	std::string				genImageReadSource			(void) const;
   2592 	std::string				genImageClearSource			(void) const;
   2593 
   2594 	const int				m_numCalls;
   2595 	const int				m_workSize;
   2596 	glu::ShaderProgram*		m_program;
   2597 	glu::ShaderProgram*		m_imageReadProgram;
   2598 	glu::ShaderProgram*		m_imageClearProgram;
   2599 	deUint32				m_imageID;
   2600 	std::vector<deUint32>	m_intermediateResultBuffers;
   2601 };
   2602 
   2603 ConcurrentImageAtomicCase::ConcurrentImageAtomicCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
   2604 	: TestCase				(context, name, description)
   2605 	, m_numCalls			(numCalls)
   2606 	, m_workSize			(workSize)
   2607 	, m_program				(DE_NULL)
   2608 	, m_imageReadProgram	(DE_NULL)
   2609 	, m_imageClearProgram	(DE_NULL)
   2610 	, m_imageID				(DE_NULL)
   2611 {
   2612 }
   2613 
   2614 ConcurrentImageAtomicCase::~ConcurrentImageAtomicCase (void)
   2615 {
   2616 	deinit();
   2617 }
   2618 
   2619 void ConcurrentImageAtomicCase::init (void)
   2620 {
   2621 	const glw::Functions&	gl					= m_context.getRenderContext().getFunctions();
   2622 	std::vector<deUint32>	zeroData			(m_workSize * m_workSize, 0);
   2623 
   2624 	if (!m_context.getContextInfo().isExtensionSupported("GL_OES_shader_image_atomic"))
   2625 		throw tcu::NotSupportedError("Test requires GL_OES_shader_image_atomic");
   2626 
   2627 	// gen image
   2628 
   2629 	gl.genTextures(1, &m_imageID);
   2630 	gl.bindTexture(GL_TEXTURE_2D, m_imageID);
   2631 	gl.texStorage2D(GL_TEXTURE_2D, 1, GL_R32UI, m_workSize, m_workSize);
   2632 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_NEAREST);
   2633 	gl.texParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_NEAREST);
   2634 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen tex");
   2635 
   2636 	// gen buffers
   2637 
   2638 	for (int ndx = 0; ndx < m_numCalls; ++ndx)
   2639 	{
   2640 		deUint32 buffer = 0;
   2641 
   2642 		gl.genBuffers(1, &buffer);
   2643 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, buffer);
   2644 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, sizeof(deUint32) * m_workSize * m_workSize, &zeroData[0], GL_DYNAMIC_COPY);
   2645 
   2646 		m_intermediateResultBuffers.push_back(buffer);
   2647 		GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
   2648 	}
   2649 
   2650 	// gen programs
   2651 
   2652 	m_program = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genComputeSource()));
   2653 	m_testCtx.getLog() << *m_program;
   2654 	if (!m_program->isOk())
   2655 		throw tcu::TestError("could not build program");
   2656 
   2657 	m_imageReadProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageReadSource()));
   2658 	if (!m_imageReadProgram->isOk())
   2659 	{
   2660 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageReadProgram", "Image read program");
   2661 
   2662 		m_testCtx.getLog() << *m_imageReadProgram;
   2663 		throw tcu::TestError("could not build program");
   2664 	}
   2665 
   2666 	m_imageClearProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genImageClearSource()));
   2667 	if (!m_imageClearProgram->isOk())
   2668 	{
   2669 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "ImageClearProgram", "Image read program");
   2670 
   2671 		m_testCtx.getLog() << *m_imageClearProgram;
   2672 		throw tcu::TestError("could not build program");
   2673 	}
   2674 }
   2675 
   2676 void ConcurrentImageAtomicCase::deinit (void)
   2677 {
   2678 	if (m_imageID)
   2679 	{
   2680 		m_context.getRenderContext().getFunctions().deleteTextures(1, &m_imageID);
   2681 		m_imageID = 0;
   2682 	}
   2683 
   2684 	for (int ndx = 0; ndx < (int)m_intermediateResultBuffers.size(); ++ndx)
   2685 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_intermediateResultBuffers[ndx]);
   2686 	m_intermediateResultBuffers.clear();
   2687 
   2688 	delete m_program;
   2689 	m_program = DE_NULL;
   2690 
   2691 	delete m_imageReadProgram;
   2692 	m_imageReadProgram = DE_NULL;
   2693 
   2694 	delete m_imageClearProgram;
   2695 	m_imageClearProgram = DE_NULL;
   2696 }
   2697 
   2698 TestCase::IterateResult ConcurrentImageAtomicCase::iterate (void)
   2699 {
   2700 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
   2701 	const deUint32			sumValue		= (deUint32)(m_numCalls * (m_numCalls + 1) / 2);
   2702 	std::vector<int>		deltas;
   2703 
   2704 	// generate unique deltas
   2705 	generateShuffledRamp(m_numCalls, deltas);
   2706 
   2707 	// clear image
   2708 	{
   2709 		m_testCtx.getLog() << tcu::TestLog::Message << "Clearing image contents" << tcu::TestLog::EndMessage;
   2710 
   2711 		gl.useProgram(m_imageClearProgram->getProgram());
   2712 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_WRITE_ONLY, GL_R32UI);
   2713 		gl.dispatchCompute(m_workSize, m_workSize, 1);
   2714 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   2715 
   2716 		GLU_EXPECT_NO_ERROR(gl.getError(), "clear");
   2717 	}
   2718 
   2719 	// invoke program N times, each with a different delta
   2720 	{
   2721 		const int deltaLocation = gl.getUniformLocation(m_program->getProgram(), "u_atomicDelta");
   2722 
   2723 		m_testCtx.getLog()
   2724 			<< tcu::TestLog::Message
   2725 			<< "Running shader " << m_numCalls << " times.\n"
   2726 			<< "Num groups = (" << m_workSize << ", " << m_workSize << ", 1)\n"
   2727 			<< "Setting u_atomicDelta to a unique value for each call.\n"
   2728 			<< tcu::TestLog::EndMessage;
   2729 
   2730 		if (deltaLocation == -1)
   2731 			throw tcu::TestError("u_atomicDelta location was -1");
   2732 
   2733 		gl.useProgram(m_program->getProgram());
   2734 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_WRITE, GL_R32UI);
   2735 
   2736 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2737 		{
   2738 			m_testCtx.getLog()
   2739 				<< tcu::TestLog::Message
   2740 				<< "Call " << callNdx << ": u_atomicDelta = " << deltas[callNdx]
   2741 				<< tcu::TestLog::EndMessage;
   2742 
   2743 			gl.uniform1ui(deltaLocation, deltas[callNdx]);
   2744 			gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_intermediateResultBuffers[callNdx]);
   2745 			gl.dispatchCompute(m_workSize, m_workSize, 1);
   2746 		}
   2747 
   2748 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
   2749 	}
   2750 
   2751 	// Verify result
   2752 	{
   2753 		std::vector<deUint32> result;
   2754 
   2755 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work image, it should be filled with value " << sumValue << tcu::TestLog::EndMessage;
   2756 
   2757 		readWorkImage(result);
   2758 
   2759 		for (int ndx = 0; ndx < m_workSize * m_workSize; ++ndx)
   2760 		{
   2761 			if (result[ndx] != sumValue)
   2762 			{
   2763 				m_testCtx.getLog()
   2764 					<< tcu::TestLog::Message
   2765 					<< "Work image error, at index (" << ndx % m_workSize << ", " << ndx / m_workSize << ") expected value " << (sumValue) << ", got " << result[ndx] << "\n"
   2766 					<< "Work image contains invalid values."
   2767 					<< tcu::TestLog::EndMessage;
   2768 
   2769 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Image contents invalid");
   2770 				return STOP;
   2771 			}
   2772 		}
   2773 
   2774 		m_testCtx.getLog() << tcu::TestLog::Message << "Work image contents are valid." << tcu::TestLog::EndMessage;
   2775 	}
   2776 
   2777 	// verify steps
   2778 	{
   2779 		std::vector<std::vector<deUint32> >	intermediateResults	(m_numCalls);
   2780 		std::vector<deUint32>				valueChain			(m_numCalls);
   2781 		std::vector<deUint32>				chainDelta			(m_numCalls);
   2782 
   2783 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying intermediate results. " << tcu::TestLog::EndMessage;
   2784 
   2785 		// collect results
   2786 
   2787 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2788 		{
   2789 			gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_intermediateResultBuffers[callNdx]);
   2790 			readBuffer(gl, GL_SHADER_STORAGE_BUFFER, m_workSize * m_workSize, intermediateResults[callNdx]);
   2791 		}
   2792 
   2793 		// verify values
   2794 
   2795 		for (int valueNdx = 0; valueNdx < m_workSize; ++valueNdx)
   2796 		{
   2797 			int			invalidOperationNdx;
   2798 			deUint32	errorDelta;
   2799 			deUint32	errorExpected;
   2800 
   2801 			// collect result chain for each element
   2802 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2803 				valueChain[callNdx] = intermediateResults[callNdx][valueNdx];
   2804 
   2805 			// check there exists a path from 0 to sumValue using each addition once
   2806 			// decompose cumulative results to addition operations (all additions positive => this works)
   2807 
   2808 			std::sort(valueChain.begin(), valueChain.end());
   2809 
   2810 			for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   2811 				chainDelta[callNdx] = ((callNdx + 1 == m_numCalls) ? (sumValue) : (valueChain[callNdx+1])) - valueChain[callNdx];
   2812 
   2813 			// chainDelta contains now the actual additions applied to the value
   2814 			std::sort(chainDelta.begin(), chainDelta.end());
   2815 
   2816 			// validate chain
   2817 			if (!validateSortedAtomicRampAdditionValueChain(valueChain, sumValue, invalidOperationNdx, errorDelta, errorExpected))
   2818 			{
   2819 				m_testCtx.getLog()
   2820 					<< tcu::TestLog::Message
   2821 					<< "Intermediate buffer error, at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << "), applied operation index "
   2822 					<< invalidOperationNdx << ", value was increased by " << errorDelta << ", but expected " << errorExpected << ".\n"
   2823 					<< "Intermediate buffer contains invalid values. Values at index (" << valueNdx % m_workSize << ", " << valueNdx / m_workSize << ")\n"
   2824 					<< tcu::TestLog::EndMessage;
   2825 
   2826 				for (int logCallNdx = 0; logCallNdx < m_numCalls; ++logCallNdx)
   2827 					m_testCtx.getLog() << tcu::TestLog::Message << "Value[" << logCallNdx << "] = " << intermediateResults[logCallNdx][valueNdx] << tcu::TestLog::EndMessage;
   2828 				m_testCtx.getLog() << tcu::TestLog::Message << "Result = " << sumValue << tcu::TestLog::EndMessage;
   2829 
   2830 				m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
   2831 				return STOP;
   2832 			}
   2833 		}
   2834 
   2835 		m_testCtx.getLog() << tcu::TestLog::Message << "Intermediate buffers are valid." << tcu::TestLog::EndMessage;
   2836 	}
   2837 
   2838 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   2839 	return STOP;
   2840 }
   2841 
   2842 void ConcurrentImageAtomicCase::readWorkImage (std::vector<deUint32>& result)
   2843 {
   2844 	const glw::Functions&	gl				= m_context.getRenderContext().getFunctions();
   2845 	glu::Buffer				resultBuffer	(m_context.getRenderContext());
   2846 
   2847 	// Read image to an ssbo
   2848 
   2849 	{
   2850 		const std::vector<deUint32> zeroData(m_workSize*m_workSize, 0);
   2851 
   2852 		gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, *resultBuffer);
   2853 		gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * m_workSize * m_workSize), &zeroData[0], GL_DYNAMIC_COPY);
   2854 
   2855 		gl.memoryBarrier(GL_SHADER_IMAGE_ACCESS_BARRIER_BIT);
   2856 		gl.useProgram(m_imageReadProgram->getProgram());
   2857 
   2858 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, *resultBuffer);
   2859 		gl.bindImageTexture(2, m_imageID, 0, GL_FALSE, 0, GL_READ_ONLY, GL_R32UI);
   2860 		gl.dispatchCompute(m_workSize, m_workSize, 1);
   2861 
   2862 		GLU_EXPECT_NO_ERROR(gl.getError(), "read");
   2863 	}
   2864 
   2865 	// Read ssbo
   2866 	{
   2867 		const void* ptr = gl.mapBufferRange(GL_SHADER_STORAGE_BUFFER, 0, (int)(sizeof(deUint32) * m_workSize * m_workSize), GL_MAP_READ_BIT);
   2868 		GLU_EXPECT_NO_ERROR(gl.getError(), "map");
   2869 
   2870 		if (!ptr)
   2871 			throw tcu::TestError("mapBufferRange returned NULL");
   2872 
   2873 		result.resize(m_workSize * m_workSize);
   2874 		memcpy(&result[0], ptr, sizeof(deUint32) * m_workSize * m_workSize);
   2875 
   2876 		if (gl.unmapBuffer(GL_SHADER_STORAGE_BUFFER) == GL_FALSE)
   2877 			throw tcu::TestError("unmapBuffer returned false");
   2878 	}
   2879 }
   2880 
   2881 std::string ConcurrentImageAtomicCase::genComputeSource (void) const
   2882 {
   2883 	std::ostringstream buf;
   2884 
   2885 	buf	<< "#version 310 es\n"
   2886 		<< "#extension GL_OES_shader_image_atomic : require\n"
   2887 		<< "\n"
   2888 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
   2889 		<< "layout (binding = 1, std430) writeonly buffer IntermediateResults\n"
   2890 		<< "{\n"
   2891 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
   2892 		<< "} sb_ires;\n"
   2893 		<< "\n"
   2894 		<< "layout (binding = 2, r32ui) volatile uniform highp uimage2D u_workImage;\n"
   2895 		<< "uniform highp uint u_atomicDelta;\n"
   2896 		<< "\n"
   2897 		<< "void main ()\n"
   2898 		<< "{\n"
   2899 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
   2900 		<< "	sb_ires.values[invocationIndex] = imageAtomicAdd(u_workImage, ivec2(gl_GlobalInvocationID.xy), u_atomicDelta);\n"
   2901 		<< "}";
   2902 
   2903 	return buf.str();
   2904 }
   2905 
   2906 std::string ConcurrentImageAtomicCase::genImageReadSource (void) const
   2907 {
   2908 	std::ostringstream buf;
   2909 
   2910 	buf	<< "#version 310 es\n"
   2911 		<< "\n"
   2912 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
   2913 		<< "layout (binding = 1, std430) writeonly buffer ImageValues\n"
   2914 		<< "{\n"
   2915 		<< "	highp uint values[" << m_workSize * m_workSize << "];\n"
   2916 		<< "} sb_res;\n"
   2917 		<< "\n"
   2918 		<< "layout (binding = 2, r32ui) readonly uniform highp uimage2D u_workImage;\n"
   2919 		<< "\n"
   2920 		<< "void main ()\n"
   2921 		<< "{\n"
   2922 		<< "	highp uint invocationIndex = gl_GlobalInvocationID.x + gl_GlobalInvocationID.y * uint(" << m_workSize <<");\n"
   2923 		<< "	sb_res.values[invocationIndex] = imageLoad(u_workImage, ivec2(gl_GlobalInvocationID.xy)).x;\n"
   2924 		<< "}";
   2925 
   2926 	return buf.str();
   2927 }
   2928 
   2929 std::string ConcurrentImageAtomicCase::genImageClearSource (void) const
   2930 {
   2931 	std::ostringstream buf;
   2932 
   2933 	buf	<< "#version 310 es\n"
   2934 		<< "\n"
   2935 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
   2936 		<< "layout (binding = 2, r32ui) writeonly uniform highp uimage2D u_workImage;\n"
   2937 		<< "\n"
   2938 		<< "void main ()\n"
   2939 		<< "{\n"
   2940 		<< "	imageStore(u_workImage, ivec2(gl_GlobalInvocationID.xy), uvec4(0, 0, 0, 0));\n"
   2941 		<< "}";
   2942 
   2943 	return buf.str();
   2944 }
   2945 
   2946 class ConcurrentSSBOAtomicCounterMixedCase : public TestCase
   2947 {
   2948 public:
   2949 							ConcurrentSSBOAtomicCounterMixedCase	(Context& context, const char* name, const char* description, int numCalls, int workSize);
   2950 							~ConcurrentSSBOAtomicCounterMixedCase	(void);
   2951 
   2952 	void					init									(void);
   2953 	void					deinit									(void);
   2954 	IterateResult			iterate									(void);
   2955 
   2956 private:
   2957 	std::string				genSSBOComputeSource					(void) const;
   2958 	std::string				genAtomicCounterComputeSource			(void) const;
   2959 
   2960 	const int				m_numCalls;
   2961 	const int				m_workSize;
   2962 	deUint32				m_bufferID;
   2963 	glu::ShaderProgram*		m_ssboAtomicProgram;
   2964 	glu::ShaderProgram*		m_atomicCounterProgram;
   2965 };
   2966 
   2967 ConcurrentSSBOAtomicCounterMixedCase::ConcurrentSSBOAtomicCounterMixedCase (Context& context, const char* name, const char* description, int numCalls, int workSize)
   2968 	: TestCase					(context, name, description)
   2969 	, m_numCalls				(numCalls)
   2970 	, m_workSize				(workSize)
   2971 	, m_bufferID				(DE_NULL)
   2972 	, m_ssboAtomicProgram		(DE_NULL)
   2973 	, m_atomicCounterProgram	(DE_NULL)
   2974 {
   2975 	// SSBO atomic XORs cancel out
   2976 	DE_ASSERT((workSize * numCalls) % (16 * 2) == 0);
   2977 }
   2978 
   2979 ConcurrentSSBOAtomicCounterMixedCase::~ConcurrentSSBOAtomicCounterMixedCase (void)
   2980 {
   2981 	deinit();
   2982 }
   2983 
   2984 void ConcurrentSSBOAtomicCounterMixedCase::init (void)
   2985 {
   2986 	const glw::Functions&		gl			= m_context.getRenderContext().getFunctions();
   2987 	const deUint32				zeroBuf[2]	= { 0, 0 };
   2988 
   2989 	// gen buffer
   2990 
   2991 	gl.genBuffers(1, &m_bufferID);
   2992 	gl.bindBuffer(GL_SHADER_STORAGE_BUFFER, m_bufferID);
   2993 	gl.bufferData(GL_SHADER_STORAGE_BUFFER, (int)(sizeof(deUint32) * 2), zeroBuf, GL_DYNAMIC_COPY);
   2994 
   2995 	GLU_EXPECT_NO_ERROR(gl.getError(), "gen buffers");
   2996 
   2997 	// gen programs
   2998 
   2999 	{
   3000 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "SSBOProgram", "SSBO atomic program");
   3001 
   3002 		m_ssboAtomicProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genSSBOComputeSource()));
   3003 		m_testCtx.getLog() << *m_ssboAtomicProgram;
   3004 		if (!m_ssboAtomicProgram->isOk())
   3005 			throw tcu::TestError("could not build program");
   3006 	}
   3007 	{
   3008 		const tcu::ScopedLogSection section(m_testCtx.getLog(), "AtomicCounterProgram", "Atomic counter program");
   3009 
   3010 		m_atomicCounterProgram = new glu::ShaderProgram(m_context.getRenderContext(), glu::ProgramSources() << glu::ComputeSource(genAtomicCounterComputeSource()));
   3011 		m_testCtx.getLog() << *m_atomicCounterProgram;
   3012 		if (!m_atomicCounterProgram->isOk())
   3013 			throw tcu::TestError("could not build program");
   3014 	}
   3015 }
   3016 
   3017 void ConcurrentSSBOAtomicCounterMixedCase::deinit (void)
   3018 {
   3019 	if (m_bufferID)
   3020 	{
   3021 		m_context.getRenderContext().getFunctions().deleteBuffers(1, &m_bufferID);
   3022 		m_bufferID = 0;
   3023 	}
   3024 
   3025 	delete m_ssboAtomicProgram;
   3026 	m_ssboAtomicProgram = DE_NULL;
   3027 
   3028 	delete m_atomicCounterProgram;
   3029 	m_atomicCounterProgram = DE_NULL;
   3030 }
   3031 
   3032 TestCase::IterateResult ConcurrentSSBOAtomicCounterMixedCase::iterate (void)
   3033 {
   3034 	const glw::Functions& gl = m_context.getRenderContext().getFunctions();
   3035 
   3036 	m_testCtx.getLog() << tcu::TestLog::Message << "Testing atomic counters and SSBO atomic operations with both backed by the same buffer." << tcu::TestLog::EndMessage;
   3037 
   3038 	// invoke programs N times
   3039 	{
   3040 		m_testCtx.getLog()
   3041 			<< tcu::TestLog::Message
   3042 			<< "Running SSBO atomic program and atomic counter program " << m_numCalls << " times. (interleaved)\n"
   3043 			<< "Num groups = (" << m_workSize << ", 1, 1)\n"
   3044 			<< tcu::TestLog::EndMessage;
   3045 
   3046 		gl.bindBufferBase(GL_SHADER_STORAGE_BUFFER, 1, m_bufferID);
   3047 		gl.bindBufferBase(GL_ATOMIC_COUNTER_BUFFER, 2, m_bufferID);
   3048 
   3049 		for (int callNdx = 0; callNdx < m_numCalls; ++callNdx)
   3050 		{
   3051 			gl.useProgram(m_atomicCounterProgram->getProgram());
   3052 			gl.dispatchCompute(m_workSize, 1, 1);
   3053 
   3054 			gl.useProgram(m_ssboAtomicProgram->getProgram());
   3055 			gl.dispatchCompute(m_workSize, 1, 1);
   3056 		}
   3057 
   3058 		GLU_EXPECT_NO_ERROR(gl.getError(), "post dispatch");
   3059 	}
   3060 
   3061 	// Verify result
   3062 	{
   3063 		deUint32 result;
   3064 
   3065 		// XORs cancel out, only addition is left
   3066 		m_testCtx.getLog() << tcu::TestLog::Message << "Verifying work buffer, it should be " << m_numCalls*m_workSize << tcu::TestLog::EndMessage;
   3067 
   3068 		gl.bindBuffer(GL_ATOMIC_COUNTER_BUFFER, m_bufferID);
   3069 		result = readBufferUint32(gl, GL_ATOMIC_COUNTER_BUFFER);
   3070 
   3071 		if ((int)result != m_numCalls*m_workSize)
   3072 		{
   3073 			m_testCtx.getLog()
   3074 				<< tcu::TestLog::Message
   3075 				<< "Buffer value error, expected value " << (m_numCalls*m_workSize) << ", got " << result << "\n"
   3076 				<< tcu::TestLog::EndMessage;
   3077 
   3078 			m_testCtx.setTestResult(QP_TEST_RESULT_FAIL, "Buffer contents invalid");
   3079 			return STOP;
   3080 		}
   3081 
   3082 		m_testCtx.getLog() << tcu::TestLog::Message << "Buffer is valid." << tcu::TestLog::EndMessage;
   3083 	}
   3084 
   3085 	m_testCtx.setTestResult(QP_TEST_RESULT_PASS, "Pass");
   3086 	return STOP;
   3087 }
   3088 
   3089 std::string ConcurrentSSBOAtomicCounterMixedCase::genSSBOComputeSource (void) const
   3090 {
   3091 	std::ostringstream buf;
   3092 
   3093 	buf	<< "#version 310 es\n"
   3094 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
   3095 		<< "layout (binding = 1, std430) volatile buffer WorkBuffer\n"
   3096 		<< "{\n"
   3097 		<< "	highp uint targetValue;\n"
   3098 		<< "	highp uint dummy;\n"
   3099 		<< "} sb_work;\n"
   3100 		<< "\n"
   3101 		<< "void main ()\n"
   3102 		<< "{\n"
   3103 		<< "	// flip high bits\n"
   3104 		<< "	highp uint mask = uint(1) << (16u + (gl_GlobalInvocationID.x % 16u));\n"
   3105 		<< "	sb_work.dummy = atomicXor(sb_work.targetValue, mask);\n"
   3106 		<< "}";
   3107 
   3108 	return buf.str();
   3109 }
   3110 
   3111 std::string ConcurrentSSBOAtomicCounterMixedCase::genAtomicCounterComputeSource (void) const
   3112 {
   3113 	std::ostringstream buf;
   3114 
   3115 	buf	<< "#version 310 es\n"
   3116 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
   3117 		<< "\n"
   3118 		<< "layout (binding = 2, offset = 0) uniform atomic_uint u_counter;\n"
   3119 		<< "\n"
   3120 		<< "void main ()\n"
   3121 		<< "{\n"
   3122 		<< "	atomicCounterIncrement(u_counter);\n"
   3123 		<< "}";
   3124 
   3125 	return buf.str();
   3126 }
   3127 
   3128 } // anonymous
   3129 
   3130 SynchronizationTests::SynchronizationTests (Context& context)
   3131 	: TestCaseGroup(context, "synchronization", "Synchronization tests")
   3132 {
   3133 }
   3134 
   3135 SynchronizationTests::~SynchronizationTests (void)
   3136 {
   3137 }
   3138 
   3139 void SynchronizationTests::init (void)
   3140 {
   3141 	tcu::TestCaseGroup* const inInvocationGroup		= new tcu::TestCaseGroup(m_testCtx, "in_invocation",	"Test intra-invocation synchronization");
   3142 	tcu::TestCaseGroup* const interInvocationGroup	= new tcu::TestCaseGroup(m_testCtx, "inter_invocation", "Test inter-invocation synchronization");
   3143 	tcu::TestCaseGroup* const interCallGroup		= new tcu::TestCaseGroup(m_testCtx, "inter_call",       "Test inter-call synchronization");
   3144 
   3145 	addChild(inInvocationGroup);
   3146 	addChild(interInvocationGroup);
   3147 	addChild(interCallGroup);
   3148 
   3149 	// .in_invocation & .inter_invocation
   3150 	{
   3151 		static const struct CaseConfig
   3152 		{
   3153 			const char*									namePrefix;
   3154 			const InterInvocationTestCase::StorageType	storage;
   3155 			const int									flags;
   3156 		} configs[] =
   3157 		{
   3158 			{ "image",			InterInvocationTestCase::STORAGE_IMAGE,		0										},
   3159 			{ "image_atomic",	InterInvocationTestCase::STORAGE_IMAGE,		InterInvocationTestCase::FLAG_ATOMIC	},
   3160 			{ "ssbo",			InterInvocationTestCase::STORAGE_BUFFER,	0										},
   3161 			{ "ssbo_atomic",	InterInvocationTestCase::STORAGE_BUFFER,	InterInvocationTestCase::FLAG_ATOMIC	},
   3162 		};
   3163 
   3164 		for (int groupNdx = 0; groupNdx < 2; ++groupNdx)
   3165 		{
   3166 			tcu::TestCaseGroup* const	targetGroup	= (groupNdx == 0) ? (inInvocationGroup) : (interInvocationGroup);
   3167 			const int					extraFlags	= (groupNdx == 0) ? (0) : (InterInvocationTestCase::FLAG_IN_GROUP);
   3168 
   3169 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
   3170 			{
   3171 				const char* const target = (configs[configNdx].storage == InterInvocationTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
   3172 
   3173 				targetGroup->addChild(new InvocationWriteReadCase(m_context,
   3174 																  (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
   3175 																  (std::string("Write to ") + target + " and read it").c_str(),
   3176 																  configs[configNdx].storage,
   3177 																  configs[configNdx].flags | extraFlags));
   3178 
   3179 				targetGroup->addChild(new InvocationReadWriteCase(m_context,
   3180 																  (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
   3181 																  (std::string("Read form ") + target + " and then write to it").c_str(),
   3182 																  configs[configNdx].storage,
   3183 																  configs[configNdx].flags | extraFlags));
   3184 
   3185 				targetGroup->addChild(new InvocationOverWriteCase(m_context,
   3186 																  (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
   3187 																  (std::string("Write to ") + target + " twice and read it").c_str(),
   3188 																  configs[configNdx].storage,
   3189 																  configs[configNdx].flags | extraFlags));
   3190 
   3191 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
   3192 																   (std::string(configs[configNdx].namePrefix) + "_alias_write").c_str(),
   3193 																   (std::string("Write to aliasing ") + target + " and read it").c_str(),
   3194 																   InvocationAliasWriteCase::TYPE_WRITE,
   3195 																   configs[configNdx].storage,
   3196 																   configs[configNdx].flags | extraFlags));
   3197 
   3198 				targetGroup->addChild(new InvocationAliasWriteCase(m_context,
   3199 																   (std::string(configs[configNdx].namePrefix) + "_alias_overwrite").c_str(),
   3200 																   (std::string("Write to aliasing ") + target + "s and read it").c_str(),
   3201 																   InvocationAliasWriteCase::TYPE_OVERWRITE,
   3202 																   configs[configNdx].storage,
   3203 																   configs[configNdx].flags | extraFlags));
   3204 			}
   3205 		}
   3206 	}
   3207 
   3208 	// .inter_call
   3209 	{
   3210 		tcu::TestCaseGroup* const withBarrierGroup		= new tcu::TestCaseGroup(m_testCtx, "with_memory_barrier", "Synchronize with memory barrier");
   3211 		tcu::TestCaseGroup* const withoutBarrierGroup	= new tcu::TestCaseGroup(m_testCtx, "without_memory_barrier", "Synchronize without memory barrier");
   3212 
   3213 		interCallGroup->addChild(withBarrierGroup);
   3214 		interCallGroup->addChild(withoutBarrierGroup);
   3215 
   3216 		// .with_memory_barrier
   3217 		{
   3218 			static const struct CaseConfig
   3219 			{
   3220 				const char*								namePrefix;
   3221 				const InterCallTestCase::StorageType	storage;
   3222 				const int								flags;
   3223 			} configs[] =
   3224 			{
   3225 				{ "image",			InterCallTestCase::STORAGE_IMAGE,	0																		},
   3226 				{ "image_atomic",	InterCallTestCase::STORAGE_IMAGE,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
   3227 				{ "ssbo",			InterCallTestCase::STORAGE_BUFFER,	0																		},
   3228 				{ "ssbo_atomic",	InterCallTestCase::STORAGE_BUFFER,	InterCallTestCase::FLAG_USE_ATOMIC | InterCallTestCase::FLAG_USE_INT	},
   3229 			};
   3230 
   3231 			const int seed0 = 123;
   3232 			const int seed1 = 457;
   3233 
   3234 			for (int configNdx = 0; configNdx < DE_LENGTH_OF_ARRAY(configs); ++configNdx)
   3235 			{
   3236 				const char* const target = (configs[configNdx].storage == InterCallTestCase::STORAGE_BUFFER) ? ("buffer") : ("image");
   3237 
   3238 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
   3239 																 (std::string(configs[configNdx].namePrefix) + "_write_read").c_str(),
   3240 																 (std::string("Write to ") + target + " and read it").c_str(),
   3241 																 configs[configNdx].storage,
   3242 																 configs[configNdx].flags,
   3243 																 InterCallOperations()
   3244 																	<< op::WriteData::Generate(1, seed0)
   3245 																	<< op::Barrier()
   3246 																	<< op::ReadData::Generate(1, seed0)));
   3247 
   3248 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
   3249 																 (std::string(configs[configNdx].namePrefix) + "_read_write").c_str(),
   3250 																 (std::string("Read from ") + target + " and then write to it").c_str(),
   3251 																 configs[configNdx].storage,
   3252 																 configs[configNdx].flags,
   3253 																 InterCallOperations()
   3254 																	<< op::ReadZeroData::Generate(1)
   3255 																	<< op::Barrier()
   3256 																	<< op::WriteData::Generate(1, seed0)));
   3257 
   3258 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
   3259 																 (std::string(configs[configNdx].namePrefix) + "_overwrite").c_str(),
   3260 																 (std::string("Write to ") + target + " twice and read it").c_str(),
   3261 																 configs[configNdx].storage,
   3262 																 configs[configNdx].flags,
   3263 																 InterCallOperations()
   3264 																	<< op::WriteData::Generate(1, seed0)
   3265 																	<< op::Barrier()
   3266 																	<< op::WriteData::Generate(1, seed1)
   3267 																	<< op::Barrier()
   3268 																	<< op::ReadData::Generate(1, seed1)));
   3269 
   3270 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
   3271 																 (std::string(configs[configNdx].namePrefix) + "_multiple_write_read").c_str(),
   3272 																 (std::string("Write to multiple ") + target + "s and read them").c_str(),
   3273 																 configs[configNdx].storage,
   3274 																 configs[configNdx].flags,
   3275 																 InterCallOperations()
   3276 																	<< op::WriteData::Generate(1, seed0)
   3277 																	<< op::WriteData::Generate(2, seed1)
   3278 																	<< op::Barrier()
   3279 																	<< op::ReadMultipleData::Generate(1, seed0, 2, seed1)));
   3280 
   3281 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
   3282 																 (std::string(configs[configNdx].namePrefix) + "_multiple_interleaved_write_read").c_str(),
   3283 																 (std::string("Write to same ") + target + " in multiple calls and read it").c_str(),
   3284 																 configs[configNdx].storage,
   3285 																 configs[configNdx].flags,
   3286 																 InterCallOperations()
   3287 																	<< op::WriteDataInterleaved::Generate(1, seed0, true)
   3288 																	<< op::WriteDataInterleaved::Generate(1, seed1, false)
   3289 																	<< op::Barrier()
   3290 																	<< op::ReadDataInterleaved::Generate(1, seed0, seed1)));
   3291 
   3292 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
   3293 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_ordered").c_str(),
   3294 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
   3295 																 configs[configNdx].storage,
   3296 																 configs[configNdx].flags,
   3297 																 InterCallOperations()
   3298 																	<< op::WriteData::Generate(1, seed0)
   3299 																	<< op::WriteData::Generate(2, seed1)
   3300 																	<< op::Barrier()
   3301 																	<< op::ReadData::Generate(1, seed0)
   3302 																	<< op::ReadData::Generate(2, seed1)));
   3303 
   3304 				withBarrierGroup->addChild(new InterCallTestCase(m_context,
   3305 																 (std::string(configs[configNdx].namePrefix) + "_multiple_unrelated_write_read_non_ordered").c_str(),
   3306 																 (std::string("Two unrelated ") + target + " write-reads").c_str(),
   3307 																 configs[configNdx].storage,
   3308 																 configs[configNdx].flags,
   3309 																 InterCallOperations()
   3310 																	<< op::WriteData::Generate(1, seed0)
   3311 																	<< op::WriteData::Generate(2, seed1)
   3312 																	<< op::Barrier()
   3313 																	<< op::ReadData::Generate(2, seed1)
   3314 																	<< op::ReadData::Generate(1, seed0)));
   3315 			}
   3316 
   3317 			// .without_memory_barrier
   3318 			{
   3319 				struct InvocationConfig
   3320 				{
   3321 					const char*	name;
   3322 					int			count;
   3323 				};
   3324 
   3325 				static const InvocationConfig ssboInvocations[] =
   3326 				{
   3327 					{ "1k",		1024	},
   3328 					{ "4k",		4096	},
   3329 					{ "32k",	32768	},
   3330 				};
   3331 				static const InvocationConfig imageInvocations[] =
   3332 				{
   3333 					{ "8x8",		8	},
   3334 					{ "32x32",		32	},
   3335 					{ "128x128",	128	},
   3336 				};
   3337 				static const InvocationConfig counterInvocations[] =
   3338 				{
   3339 					{ "32",		32		},
   3340 					{ "128",	128		},
   3341 					{ "1k",		1024	},
   3342 				};
   3343 				static const int callCounts[] = { 2, 5, 100 };
   3344 
   3345 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(ssboInvocations); ++invocationNdx)
   3346 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
   3347 						withoutBarrierGroup->addChild(new SSBOConcurrentAtomicCase(m_context, (std::string("ssbo_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + ssboInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], ssboInvocations[invocationNdx].count));
   3348 
   3349 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(imageInvocations); ++invocationNdx)
   3350 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
   3351 						withoutBarrierGroup->addChild(new ConcurrentImageAtomicCase(m_context, (std::string("image_atomic_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + imageInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], imageInvocations[invocationNdx].count));
   3352 
   3353 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
   3354 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
   3355 						withoutBarrierGroup->addChild(new ConcurrentAtomicCounterCase(m_context, (std::string("atomic_counter_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
   3356 
   3357 				for (int invocationNdx = 0; invocationNdx < DE_LENGTH_OF_ARRAY(counterInvocations); ++invocationNdx)
   3358 					for (int callCountNdx = 0; callCountNdx < DE_LENGTH_OF_ARRAY(callCounts); ++callCountNdx)
   3359 						withoutBarrierGroup->addChild(new ConcurrentSSBOAtomicCounterMixedCase(m_context, (std::string("ssbo_atomic_counter_mixed_dispatch_") + de::toString(callCounts[callCountNdx]) + "_calls_" + counterInvocations[invocationNdx].name + "_invocations").c_str(),	"", callCounts[callCountNdx], counterInvocations[invocationNdx].count));
   3360 			}
   3361 		}
   3362 	}
   3363 }
   3364 
   3365 } // Functional
   3366 } // gles31
   3367 } // deqp
   3368