Home | History | Annotate | Download | only in image
      1 /*------------------------------------------------------------------------
      2  * Vulkan Conformance Tests
      3  * ------------------------
      4  *
      5  * Copyright (c) 2016 The Khronos Group Inc.
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file  vktImageAtomicOperationTests.cpp
     21  * \brief Image atomic operation tests
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "vktImageAtomicOperationTests.hpp"
     25 
     26 #include "deUniquePtr.hpp"
     27 #include "deStringUtil.hpp"
     28 
     29 #include "vktTestCaseUtil.hpp"
     30 #include "vkPrograms.hpp"
     31 #include "vkImageUtil.hpp"
     32 #include "vktImageTestsUtil.hpp"
     33 #include "vkBuilderUtil.hpp"
     34 #include "vkRef.hpp"
     35 #include "vkRefUtil.hpp"
     36 #include "vkTypeUtil.hpp"
     37 
     38 #include "tcuTextureUtil.hpp"
     39 #include "tcuTexture.hpp"
     40 #include "tcuVectorType.hpp"
     41 
     42 namespace vkt
     43 {
     44 namespace image
     45 {
     46 namespace
     47 {
     48 
     49 using namespace vk;
     50 using namespace std;
     51 using de::toString;
     52 
     53 using tcu::TextureFormat;
     54 using tcu::IVec2;
     55 using tcu::IVec3;
     56 using tcu::UVec3;
     57 using tcu::Vec4;
     58 using tcu::IVec4;
     59 using tcu::UVec4;
     60 using tcu::CubeFace;
     61 using tcu::Texture1D;
     62 using tcu::Texture2D;
     63 using tcu::Texture3D;
     64 using tcu::Texture2DArray;
     65 using tcu::TextureCube;
     66 using tcu::PixelBufferAccess;
     67 using tcu::ConstPixelBufferAccess;
     68 using tcu::Vector;
     69 using tcu::TestContext;
     70 
     71 enum
     72 {
     73 	NUM_INVOCATIONS_PER_PIXEL = 5u
     74 };
     75 
     76 enum AtomicOperation
     77 {
     78 	ATOMIC_OPERATION_ADD = 0,
     79 	ATOMIC_OPERATION_MIN,
     80 	ATOMIC_OPERATION_MAX,
     81 	ATOMIC_OPERATION_AND,
     82 	ATOMIC_OPERATION_OR,
     83 	ATOMIC_OPERATION_XOR,
     84 	ATOMIC_OPERATION_EXCHANGE,
     85 
     86 	ATOMIC_OPERATION_LAST
     87 };
     88 
     89 static string getCoordStr (const ImageType		imageType,
     90 						   const std::string&	x,
     91 						   const std::string&	y,
     92 						   const std::string&	z)
     93 {
     94 	switch (imageType)
     95 	{
     96 		case IMAGE_TYPE_1D:
     97 		case IMAGE_TYPE_BUFFER:
     98 			return x;
     99 		case IMAGE_TYPE_1D_ARRAY:
    100 		case IMAGE_TYPE_2D:
    101 			return string("ivec2(" + x + "," + y + ")");
    102 		case IMAGE_TYPE_2D_ARRAY:
    103 		case IMAGE_TYPE_3D:
    104 		case IMAGE_TYPE_CUBE:
    105 		case IMAGE_TYPE_CUBE_ARRAY:
    106 			return string("ivec3(" + x + "," + y + "," + z + ")");
    107 		default:
    108 			DE_ASSERT(false);
    109 			return DE_NULL;
    110 	}
    111 }
    112 
    113 static string getAtomicFuncArgumentShaderStr (const AtomicOperation	op,
    114 											  const string&			x,
    115 											  const string&			y,
    116 											  const string&			z,
    117 											  const IVec3&			gridSize)
    118 {
    119 	switch (op)
    120 	{
    121 		case ATOMIC_OPERATION_ADD:
    122 		case ATOMIC_OPERATION_MIN:
    123 		case ATOMIC_OPERATION_MAX:
    124 		case ATOMIC_OPERATION_AND:
    125 		case ATOMIC_OPERATION_OR:
    126 		case ATOMIC_OPERATION_XOR:
    127 			return string("(" + x + "*" + x + " + " + y + "*" + y + " + " + z + "*" + z + ")");
    128 		case ATOMIC_OPERATION_EXCHANGE:
    129 			return string("((" + z + "*" + toString(gridSize.x()) + " + " + x + ")*" + toString(gridSize.y()) + " + " + y + ")");
    130 		default:
    131 			DE_ASSERT(false);
    132 			return DE_NULL;
    133 	}
    134 }
    135 
    136 static string getAtomicOperationCaseName (const AtomicOperation op)
    137 {
    138 	switch (op)
    139 	{
    140 		case ATOMIC_OPERATION_ADD:			return string("add");
    141 		case ATOMIC_OPERATION_MIN:			return string("min");
    142 		case ATOMIC_OPERATION_MAX:			return string("max");
    143 		case ATOMIC_OPERATION_AND:			return string("and");
    144 		case ATOMIC_OPERATION_OR:			return string("or");
    145 		case ATOMIC_OPERATION_XOR:			return string("xor");
    146 		case ATOMIC_OPERATION_EXCHANGE:		return string("exchange");
    147 		default:
    148 			DE_ASSERT(false);
    149 			return DE_NULL;
    150 	}
    151 }
    152 
    153 static string getAtomicOperationShaderFuncName (const AtomicOperation op)
    154 {
    155 	switch (op)
    156 	{
    157 		case ATOMIC_OPERATION_ADD:			return string("imageAtomicAdd");
    158 		case ATOMIC_OPERATION_MIN:			return string("imageAtomicMin");
    159 		case ATOMIC_OPERATION_MAX:			return string("imageAtomicMax");
    160 		case ATOMIC_OPERATION_AND:			return string("imageAtomicAnd");
    161 		case ATOMIC_OPERATION_OR:			return string("imageAtomicOr");
    162 		case ATOMIC_OPERATION_XOR:			return string("imageAtomicXor");
    163 		case ATOMIC_OPERATION_EXCHANGE:		return string("imageAtomicExchange");
    164 		default:
    165 			DE_ASSERT(false);
    166 			return DE_NULL;
    167 	}
    168 }
    169 
    170 static deInt32 getOperationInitialValue (const AtomicOperation op)
    171 {
    172 	switch (op)
    173 	{
    174 		// \note 18 is just an arbitrary small nonzero value.
    175 		case ATOMIC_OPERATION_ADD:			return 18;
    176 		case ATOMIC_OPERATION_MIN:			return (1 << 15) - 1;
    177 		case ATOMIC_OPERATION_MAX:			return 18;
    178 		case ATOMIC_OPERATION_AND:			return (1 << 15) - 1;
    179 		case ATOMIC_OPERATION_OR:			return 18;
    180 		case ATOMIC_OPERATION_XOR:			return 18;
    181 		case ATOMIC_OPERATION_EXCHANGE:		return 18;
    182 		default:
    183 			DE_ASSERT(false);
    184 			return -1;
    185 	}
    186 }
    187 
    188 static deInt32 getAtomicFuncArgument (const AtomicOperation op, const IVec3& invocationID, const IVec3& gridSize)
    189 {
    190 	const int x = invocationID.x();
    191 	const int y = invocationID.y();
    192 	const int z = invocationID.z();
    193 
    194 	switch (op)
    195 	{
    196 		// \note Fall-throughs.
    197 		case ATOMIC_OPERATION_ADD:
    198 		case ATOMIC_OPERATION_MIN:
    199 		case ATOMIC_OPERATION_MAX:
    200 		case ATOMIC_OPERATION_AND:
    201 		case ATOMIC_OPERATION_OR:
    202 		case ATOMIC_OPERATION_XOR:
    203 			return x*x + y*y + z*z;
    204 		case ATOMIC_OPERATION_EXCHANGE:
    205 			return (z*gridSize.x() + x)*gridSize.y() + y;
    206 		default:
    207 			DE_ASSERT(false);
    208 			return -1;
    209 	}
    210 }
    211 
    212 //! An order-independent operation is one for which the end result doesn't depend on the order in which the operations are carried (i.e. is both commutative and associative).
    213 static bool isOrderIndependentAtomicOperation (const AtomicOperation op)
    214 {
    215 	return	op == ATOMIC_OPERATION_ADD ||
    216 			op == ATOMIC_OPERATION_MIN ||
    217 			op == ATOMIC_OPERATION_MAX ||
    218 			op == ATOMIC_OPERATION_AND ||
    219 			op == ATOMIC_OPERATION_OR ||
    220 			op == ATOMIC_OPERATION_XOR;
    221 }
    222 
    223 //! Computes the result of an atomic operation where "a" is the data operated on and "b" is the parameter to the atomic function.
    224 static deInt32 computeBinaryAtomicOperationResult (const AtomicOperation op, const deInt32 a, const deInt32 b)
    225 {
    226 	switch (op)
    227 	{
    228 		case ATOMIC_OPERATION_ADD:			return a + b;
    229 		case ATOMIC_OPERATION_MIN:			return de::min(a, b);
    230 		case ATOMIC_OPERATION_MAX:			return de::max(a, b);
    231 		case ATOMIC_OPERATION_AND:			return a & b;
    232 		case ATOMIC_OPERATION_OR:			return a | b;
    233 		case ATOMIC_OPERATION_XOR:			return a ^ b;
    234 		case ATOMIC_OPERATION_EXCHANGE:		return b;
    235 		default:
    236 			DE_ASSERT(false);
    237 			return -1;
    238 	}
    239 }
    240 
    241 class BinaryAtomicEndResultCase : public vkt::TestCase
    242 {
    243 public:
    244 								BinaryAtomicEndResultCase  (tcu::TestContext&			testCtx,
    245 															const string&				name,
    246 															const string&				description,
    247 															const ImageType				imageType,
    248 															const tcu::UVec3&			imageSize,
    249 															const tcu::TextureFormat&	format,
    250 															const AtomicOperation		operation,
    251 															const glu::GLSLVersion		glslVersion);
    252 
    253 	void						initPrograms			   (SourceCollections&			sourceCollections) const;
    254 	TestInstance*				createInstance			   (Context&					context) const;
    255 private:
    256 
    257 	const ImageType				m_imageType;
    258 	const tcu::UVec3			m_imageSize;
    259 	const tcu::TextureFormat	m_format;
    260 	const AtomicOperation		m_operation;
    261 	const glu::GLSLVersion		m_glslVersion;
    262 };
    263 
    264 BinaryAtomicEndResultCase::BinaryAtomicEndResultCase (tcu::TestContext&			testCtx,
    265 													  const string&				name,
    266 													  const string&				description,
    267 													  const ImageType			imageType,
    268 													  const tcu::UVec3&			imageSize,
    269 													  const tcu::TextureFormat&	format,
    270 													  const AtomicOperation		operation,
    271 													  const glu::GLSLVersion	glslVersion)
    272 	: TestCase		(testCtx, name, description)
    273 	, m_imageType	(imageType)
    274 	, m_imageSize	(imageSize)
    275 	, m_format		(format)
    276 	, m_operation	(operation)
    277 	, m_glslVersion	(glslVersion)
    278 {
    279 }
    280 
    281 void BinaryAtomicEndResultCase::initPrograms (SourceCollections& sourceCollections) const
    282 {
    283 	const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
    284 
    285 	const bool		uintFormat				= isUintFormat(mapTextureFormat(m_format));
    286 	const bool		intFormat				= isIntFormat(mapTextureFormat(m_format));
    287 	const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
    288 	const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
    289 
    290 	const string	atomicArgExpr			= (uintFormat ? "uint" : intFormat ? "int" : "float")
    291 											+ getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
    292 
    293 	const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
    294 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
    295 	const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
    296 
    297 	string source = versionDecl + "\n"
    298 					"precision highp " + shaderImageTypeStr + ";\n"
    299 					"\n"
    300 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
    301 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
    302 					"\n"
    303 					"void main (void)\n"
    304 					"{\n"
    305 					"	int gx = int(gl_GlobalInvocationID.x);\n"
    306 					"	int gy = int(gl_GlobalInvocationID.y);\n"
    307 					"	int gz = int(gl_GlobalInvocationID.z);\n"
    308 					"	" + atomicInvocation + ";\n"
    309 					"}\n";
    310 
    311 	sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
    312 }
    313 
    314 class BinaryAtomicIntermValuesCase : public vkt::TestCase
    315 {
    316 public:
    317 								BinaryAtomicIntermValuesCase   (tcu::TestContext&			testCtx,
    318 																const string&				name,
    319 																const string&				description,
    320 																const ImageType				imageType,
    321 																const tcu::UVec3&			imageSize,
    322 																const tcu::TextureFormat&	format,
    323 																const AtomicOperation		operation,
    324 																const glu::GLSLVersion		glslVersion);
    325 
    326 	void						initPrograms				   (SourceCollections&			sourceCollections) const;
    327 	TestInstance*				createInstance				   (Context&					context) const;
    328 private:
    329 
    330 	const ImageType				m_imageType;
    331 	const tcu::UVec3			m_imageSize;
    332 	const tcu::TextureFormat	m_format;
    333 	const AtomicOperation		m_operation;
    334 	const glu::GLSLVersion		m_glslVersion;
    335 };
    336 
    337 BinaryAtomicIntermValuesCase::BinaryAtomicIntermValuesCase (TestContext&			testCtx,
    338 															const string&			name,
    339 															const string&			description,
    340 															const ImageType			imageType,
    341 															const tcu::UVec3&		imageSize,
    342 															const TextureFormat&	format,
    343 															const AtomicOperation	operation,
    344 															const glu::GLSLVersion	glslVersion)
    345 	: TestCase		(testCtx, name, description)
    346 	, m_imageType	(imageType)
    347 	, m_imageSize	(imageSize)
    348 	, m_format		(format)
    349 	, m_operation	(operation)
    350 	, m_glslVersion	(glslVersion)
    351 {
    352 }
    353 
    354 void BinaryAtomicIntermValuesCase::initPrograms (SourceCollections& sourceCollections) const
    355 {
    356 	const string	versionDecl				= glu::getGLSLVersionDeclaration(m_glslVersion);
    357 
    358 	const bool		uintFormat				= isUintFormat(mapTextureFormat(m_format));
    359 	const bool		intFormat				= isIntFormat(mapTextureFormat(m_format));
    360 	const string	colorVecTypeName		= string(uintFormat ? "u" : intFormat ? "i" : "") + "vec4";
    361 	const UVec3		gridSize				= getShaderGridSize(m_imageType, m_imageSize);
    362 	const string	atomicCoord				= getCoordStr(m_imageType, "gx % " + toString(gridSize.x()), "gy", "gz");
    363 	const string	invocationCoord			= getCoordStr(m_imageType, "gx", "gy", "gz");
    364 	const string	atomicArgExpr			= (uintFormat ? "uint" : intFormat ? "int" : "float")
    365 											+ getAtomicFuncArgumentShaderStr(m_operation, "gx", "gy", "gz", IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z()));
    366 
    367 	const string	atomicInvocation		= getAtomicOperationShaderFuncName(m_operation) + "(u_resultImage, " + atomicCoord + ", " + atomicArgExpr + ")";
    368 	const string	shaderImageFormatStr	= getShaderImageFormatQualifier(m_format);
    369 	const string	shaderImageTypeStr		= getShaderImageType(m_format, m_imageType);
    370 
    371 	string source = versionDecl + "\n"
    372 					"precision highp " + shaderImageTypeStr + ";\n"
    373 					"\n"
    374 					"layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
    375 					"layout (" + shaderImageFormatStr + ", binding=0) coherent uniform " + shaderImageTypeStr + " u_resultImage;\n"
    376 					"layout (" + shaderImageFormatStr + ", binding=1) writeonly uniform " + shaderImageTypeStr + " u_intermValuesImage;\n"
    377 					"\n"
    378 					"void main (void)\n"
    379 					"{\n"
    380 					"	int gx = int(gl_GlobalInvocationID.x);\n"
    381 					"	int gy = int(gl_GlobalInvocationID.y);\n"
    382 					"	int gz = int(gl_GlobalInvocationID.z);\n"
    383 					"	imageStore(u_intermValuesImage, " + invocationCoord + ", " + colorVecTypeName + "(" + atomicInvocation + "));\n"
    384 					"}\n";
    385 
    386 	sourceCollections.glslSources.add(m_name) << glu::ComputeSource(source.c_str());
    387 }
    388 
    389 class BinaryAtomicInstanceBase : public vkt::TestInstance
    390 {
    391 public:
    392 
    393 								BinaryAtomicInstanceBase (Context&						context,
    394 														  const string&					name,
    395 														  const ImageType				imageType,
    396 														  const tcu::UVec3&				imageSize,
    397 														  const TextureFormat&			format,
    398 														  const AtomicOperation			operation);
    399 
    400 	tcu::TestStatus				iterate					 (void);
    401 
    402 	virtual deUint32			getOutputBufferSize		 (void) const = 0;
    403 
    404 	virtual void				prepareResources		 (void) = 0;
    405 	virtual void				prepareDescriptors		 (void) = 0;
    406 
    407 	virtual void				commandsBeforeCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
    408 	virtual void				commandsAfterCompute	 (const VkCommandBuffer			cmdBuffer) const = 0;
    409 
    410 	virtual bool				verifyResult			 (Allocation&					outputBufferAllocation) const = 0;
    411 
    412 protected:
    413 	const string				m_name;
    414 	const ImageType				m_imageType;
    415 	const tcu::UVec3			m_imageSize;
    416 	const TextureFormat			m_format;
    417 	const AtomicOperation		m_operation;
    418 
    419 	de::MovePtr<Buffer>			m_outputBuffer;
    420 	Move<VkDescriptorPool>		m_descriptorPool;
    421 	Move<VkDescriptorSetLayout>	m_descriptorSetLayout;
    422 	Move<VkDescriptorSet>		m_descriptorSet;
    423 	de::MovePtr<Image>			m_resultImage;
    424 	Move<VkImageView>			m_resultImageView;
    425 };
    426 
    427 BinaryAtomicInstanceBase::BinaryAtomicInstanceBase (Context&				context,
    428 													const string&			name,
    429 													const ImageType			imageType,
    430 													const tcu::UVec3&		imageSize,
    431 													const TextureFormat&	format,
    432 													const AtomicOperation	operation)
    433 	: vkt::TestInstance	(context)
    434 	, m_name			(name)
    435 	, m_imageType		(imageType)
    436 	, m_imageSize		(imageSize)
    437 	, m_format			(format)
    438 	, m_operation		(operation)
    439 {
    440 }
    441 
    442 tcu::TestStatus	BinaryAtomicInstanceBase::iterate (void)
    443 {
    444 	const VkDevice			device				= m_context.getDevice();
    445 	const DeviceInterface&	deviceInterface		= m_context.getDeviceInterface();
    446 	const VkQueue			queue				= m_context.getUniversalQueue();
    447 	const deUint32			queueFamilyIndex	= m_context.getUniversalQueueFamilyIndex();
    448 	Allocator&				allocator			= m_context.getDefaultAllocator();
    449 	const VkDeviceSize		imageSizeInBytes	= tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
    450 	const VkDeviceSize		outBuffSizeInBytes	= getOutputBufferSize();
    451 
    452 	const VkImageCreateInfo imageParams	=
    453 	{
    454 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,					// VkStructureType			sType;
    455 		DE_NULL,												// const void*				pNext;
    456 		(m_imageType == IMAGE_TYPE_CUBE ||
    457 		 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
    458 		 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
    459 		 (VkImageCreateFlags)0u),								// VkImageCreateFlags		flags;
    460 		mapImageType(m_imageType),								// VkImageType				imageType;
    461 		mapTextureFormat(m_format),								// VkFormat					format;
    462 		makeExtent3D(getLayerSize(m_imageType, m_imageSize)),	// VkExtent3D				extent;
    463 		1u,														// deUint32					mipLevels;
    464 		getNumLayers(m_imageType, m_imageSize),					// deUint32					arrayLayers;
    465 		VK_SAMPLE_COUNT_1_BIT,									// VkSampleCountFlagBits	samples;
    466 		VK_IMAGE_TILING_OPTIMAL,								// VkImageTiling			tiling;
    467 		VK_IMAGE_USAGE_STORAGE_BIT |
    468 		VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
    469 		VK_IMAGE_USAGE_TRANSFER_DST_BIT,						// VkImageUsageFlags		usage;
    470 		VK_SHARING_MODE_EXCLUSIVE,								// VkSharingMode			sharingMode;
    471 		0u,														// deUint32					queueFamilyIndexCount;
    472 		DE_NULL,												// const deUint32*			pQueueFamilyIndices;
    473 		VK_IMAGE_LAYOUT_UNDEFINED,								// VkImageLayout			initialLayout;
    474 	};
    475 
    476 	//Create the image that is going to store results of atomic operations
    477 	m_resultImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
    478 
    479 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
    480 
    481 	m_resultImageView = makeImageView(deviceInterface, device, m_resultImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
    482 
    483 	//Prepare the buffer with the initial data for the image
    484 	const Buffer inputBuffer(deviceInterface, device, allocator, makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_SRC_BIT), MemoryRequirement::HostVisible);
    485 
    486 	Allocation& inputBufferAllocation = inputBuffer.getAllocation();
    487 
    488 	//Prepare the initial data for the image
    489 	const tcu::IVec4 initialValue(getOperationInitialValue(m_operation));
    490 
    491 	tcu::UVec3 gridSize = getShaderGridSize(m_imageType, m_imageSize);
    492 	tcu::PixelBufferAccess inputPixelBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), inputBufferAllocation.getHostPtr());
    493 
    494 	for (deUint32 z = 0; z < gridSize.z(); z++)
    495 	for (deUint32 y = 0; y < gridSize.y(); y++)
    496 	for (deUint32 x = 0; x < gridSize.x(); x++)
    497 	{
    498 		inputPixelBuffer.setPixel(initialValue, x, y, z);
    499 	}
    500 
    501 	flushMappedMemoryRange(deviceInterface, device, inputBufferAllocation.getMemory(), inputBufferAllocation.getOffset(), imageSizeInBytes);
    502 
    503 	// Create a buffer to store shader output copied from result image
    504 	m_outputBuffer = de::MovePtr<Buffer>(new Buffer(deviceInterface, device, allocator, makeBufferCreateInfo(outBuffSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT), MemoryRequirement::HostVisible));
    505 
    506 	prepareResources();
    507 
    508 	prepareDescriptors();
    509 
    510 	// Create pipeline
    511 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, device, m_context.getBinaryCollection().get(m_name), 0));
    512 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, device, *m_descriptorSetLayout));
    513 	const Unique<VkPipeline>		pipeline(makeComputePipeline(deviceInterface, device, *pipelineLayout, *shaderModule));
    514 
    515 	// Create command buffer
    516 	const Unique<VkCommandPool>		cmdPool(createCommandPool(deviceInterface, device, VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, queueFamilyIndex));
    517 	const Unique<VkCommandBuffer>	cmdBuffer(allocateCommandBuffer(deviceInterface, device, *cmdPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
    518 
    519 	beginCommandBuffer(deviceInterface, *cmdBuffer);
    520 
    521 	deviceInterface.cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipeline);
    522 	deviceInterface.cmdBindDescriptorSets(*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &m_descriptorSet.get(), 0u, DE_NULL);
    523 
    524 	const VkBufferMemoryBarrier inputBufferPostHostWriteBarrier	=
    525 		makeBufferMemoryBarrier(VK_ACCESS_HOST_WRITE_BIT,
    526 								VK_ACCESS_TRANSFER_READ_BIT,
    527 								*inputBuffer,
    528 								0ull,
    529 								imageSizeInBytes);
    530 
    531 	const VkImageMemoryBarrier	resultImagePreCopyBarrier =
    532 		makeImageMemoryBarrier(	0u,
    533 								VK_ACCESS_TRANSFER_WRITE_BIT,
    534 								VK_IMAGE_LAYOUT_UNDEFINED,
    535 								VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
    536 								m_resultImage->get(),
    537 								subresourceRange);
    538 
    539 	deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 1u, &inputBufferPostHostWriteBarrier, 1u, &resultImagePreCopyBarrier);
    540 
    541 	const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
    542 
    543 	deviceInterface.cmdCopyBufferToImage(*cmdBuffer, *inputBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL, 1u, &bufferImageCopyParams);
    544 
    545 	const VkImageMemoryBarrier	resultImagePostCopyBarrier	=
    546 		makeImageMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
    547 								VK_ACCESS_SHADER_READ_BIT,
    548 								VK_IMAGE_LAYOUT_TRANSFER_DST_OPTIMAL,
    549 								VK_IMAGE_LAYOUT_GENERAL,
    550 								m_resultImage->get(),
    551 								subresourceRange);
    552 
    553 	deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostCopyBarrier);
    554 
    555 	commandsBeforeCompute(*cmdBuffer);
    556 
    557 	deviceInterface.cmdDispatch(*cmdBuffer, NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
    558 
    559 	commandsAfterCompute(*cmdBuffer);
    560 
    561 	const VkBufferMemoryBarrier	outputBufferPreHostReadBarrier
    562 		= makeBufferMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
    563 									VK_ACCESS_HOST_READ_BIT,
    564 									m_outputBuffer->get(),
    565 									0ull,
    566 									outBuffSizeInBytes);
    567 
    568 	deviceInterface.cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, DE_FALSE, 0u, DE_NULL, 1u, &outputBufferPreHostReadBarrier, 0u, DE_NULL);
    569 
    570 	endCommandBuffer(deviceInterface, *cmdBuffer);
    571 
    572 	submitCommandsAndWait(deviceInterface, device, queue, *cmdBuffer);
    573 
    574 	Allocation& outputBufferAllocation = m_outputBuffer->getAllocation();
    575 
    576 	invalidateMappedMemoryRange(deviceInterface, device, outputBufferAllocation.getMemory(), outputBufferAllocation.getOffset(), outBuffSizeInBytes);
    577 
    578 	if (verifyResult(outputBufferAllocation))
    579 		return tcu::TestStatus::pass("Comparison succeeded");
    580 	else
    581 		return tcu::TestStatus::fail("Comparison failed");
    582 }
    583 
    584 class BinaryAtomicEndResultInstance : public BinaryAtomicInstanceBase
    585 {
    586 public:
    587 
    588 						BinaryAtomicEndResultInstance  (Context&				context,
    589 														const string&			name,
    590 														const ImageType			imageType,
    591 														const tcu::UVec3&		imageSize,
    592 														const TextureFormat&	format,
    593 														const AtomicOperation	operation)
    594 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
    595 
    596 	virtual deUint32	getOutputBufferSize			   (void) const;
    597 
    598 	virtual void		prepareResources			   (void) {}
    599 	virtual void		prepareDescriptors			   (void);
    600 
    601 	virtual void		commandsBeforeCompute		   (const VkCommandBuffer) const {}
    602 	virtual void		commandsAfterCompute		   (const VkCommandBuffer	cmdBuffer) const;
    603 
    604 	virtual bool		verifyResult				   (Allocation&				outputBufferAllocation) const;
    605 };
    606 
    607 deUint32 BinaryAtomicEndResultInstance::getOutputBufferSize (void) const
    608 {
    609 	return tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
    610 }
    611 
    612 void BinaryAtomicEndResultInstance::prepareDescriptors (void)
    613 {
    614 	const VkDevice			device			= m_context.getDevice();
    615 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
    616 
    617 	m_descriptorSetLayout =
    618 		DescriptorSetLayoutBuilder()
    619 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
    620 		.build(deviceInterface, device);
    621 
    622 	m_descriptorPool =
    623 		DescriptorPoolBuilder()
    624 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE)
    625 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
    626 
    627 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
    628 
    629 	const VkDescriptorImageInfo	descResultImageInfo = makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
    630 
    631 	DescriptorSetUpdateBuilder()
    632 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
    633 		.update(deviceInterface, device);
    634 }
    635 
    636 void BinaryAtomicEndResultInstance::commandsAfterCompute (const VkCommandBuffer	cmdBuffer) const
    637 {
    638 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
    639 	const VkImageSubresourceRange	subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
    640 
    641 	const VkImageMemoryBarrier	resultImagePostDispatchBarrier =
    642 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
    643 								VK_ACCESS_TRANSFER_READ_BIT,
    644 								VK_IMAGE_LAYOUT_GENERAL,
    645 								VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
    646 								m_resultImage->get(),
    647 								subresourceRange);
    648 
    649 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &resultImagePostDispatchBarrier);
    650 
    651 	const VkBufferImageCopy		bufferImageCopyParams = makeBufferImageCopy(makeExtent3D(getLayerSize(m_imageType, m_imageSize)), getNumLayers(m_imageType, m_imageSize));
    652 
    653 	deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_resultImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
    654 }
    655 
    656 bool BinaryAtomicEndResultInstance::verifyResult (Allocation& outputBufferAllocation) const
    657 {
    658 	const UVec3	gridSize			= getShaderGridSize(m_imageType, m_imageSize);
    659 	const IVec3 extendedGridSize	= IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
    660 
    661 	tcu::ConstPixelBufferAccess resultBuffer(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputBufferAllocation.getHostPtr());
    662 
    663 	for (deInt32 z = 0; z < resultBuffer.getDepth();  z++)
    664 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
    665 	for (deInt32 x = 0; x < resultBuffer.getWidth();  x++)
    666 	{
    667 		deInt32 resultValue = resultBuffer.getPixelInt(x, y, z).x();
    668 
    669 		if (isOrderIndependentAtomicOperation(m_operation))
    670 		{
    671 			deInt32 reference = getOperationInitialValue(m_operation);
    672 
    673 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
    674 			{
    675 				const IVec3 gid(x + i*gridSize.x(), y, z);
    676 				reference = computeBinaryAtomicOperationResult(m_operation, reference, getAtomicFuncArgument(m_operation, gid, extendedGridSize));
    677 			}
    678 
    679 			if (resultValue != reference)
    680 				return false;
    681 		}
    682 		else if (m_operation == ATOMIC_OPERATION_EXCHANGE)
    683 		{
    684 			// Check if the end result equals one of the atomic args.
    685 			bool matchFound = false;
    686 
    687 			for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL) && !matchFound; i++)
    688 			{
    689 				const IVec3 gid(x + i*gridSize.x(), y, z);
    690 				matchFound = (resultValue == getAtomicFuncArgument(m_operation, gid, extendedGridSize));
    691 			}
    692 
    693 			if (!matchFound)
    694 				return false;
    695 		}
    696 		else
    697 			DE_ASSERT(false);
    698 	}
    699 	return true;
    700 }
    701 
    702 TestInstance* BinaryAtomicEndResultCase::createInstance (Context& context) const
    703 {
    704 	return new BinaryAtomicEndResultInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
    705 }
    706 
    707 class BinaryAtomicIntermValuesInstance : public BinaryAtomicInstanceBase
    708 {
    709 public:
    710 
    711 						BinaryAtomicIntermValuesInstance   (Context&				context,
    712 															const string&			name,
    713 															const ImageType			imageType,
    714 															const tcu::UVec3&		imageSize,
    715 															const TextureFormat&	format,
    716 															const AtomicOperation	operation)
    717 							: BinaryAtomicInstanceBase(context, name, imageType, imageSize, format, operation) {}
    718 
    719 	virtual deUint32	getOutputBufferSize				   (void) const;
    720 
    721 	virtual void		prepareResources				   (void);
    722 	virtual void		prepareDescriptors				   (void);
    723 
    724 	virtual void		commandsBeforeCompute			   (const VkCommandBuffer	cmdBuffer) const;
    725 	virtual void		commandsAfterCompute			   (const VkCommandBuffer	cmdBuffer) const;
    726 
    727 	virtual bool		verifyResult					   (Allocation&				outputBufferAllocation) const;
    728 
    729 protected:
    730 
    731 	bool				verifyRecursive					   (const deInt32			index,
    732 															const deInt32			valueSoFar,
    733 															bool					argsUsed[NUM_INVOCATIONS_PER_PIXEL],
    734 															const deInt32			atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
    735 															const deInt32			resultValues[NUM_INVOCATIONS_PER_PIXEL]) const;
    736 	de::MovePtr<Image>	m_intermResultsImage;
    737 	Move<VkImageView>	m_intermResultsImageView;
    738 };
    739 
    740 deUint32 BinaryAtomicIntermValuesInstance::getOutputBufferSize (void) const
    741 {
    742 	return NUM_INVOCATIONS_PER_PIXEL * tcu::getPixelSize(m_format) * getNumPixels(m_imageType, m_imageSize);
    743 }
    744 
    745 void BinaryAtomicIntermValuesInstance::prepareResources (void)
    746 {
    747 	const VkDevice			device			= m_context.getDevice();
    748 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
    749 	Allocator&				allocator		= m_context.getDefaultAllocator();
    750 
    751 	const UVec3 layerSize			= getLayerSize(m_imageType, m_imageSize);
    752 	const bool  isCubeBasedImage	= (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY);
    753 	const UVec3 extendedLayerSize	= isCubeBasedImage	? UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), NUM_INVOCATIONS_PER_PIXEL * layerSize.y(), layerSize.z())
    754 														: UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
    755 
    756 	const VkImageCreateInfo imageParams =
    757 	{
    758 		VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO,		// VkStructureType			sType;
    759 		DE_NULL,									// const void*				pNext;
    760 		(m_imageType == IMAGE_TYPE_CUBE ||
    761 		 m_imageType == IMAGE_TYPE_CUBE_ARRAY ?
    762 		 (VkImageCreateFlags)VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT :
    763 		 (VkImageCreateFlags)0u),					// VkImageCreateFlags		flags;
    764 		mapImageType(m_imageType),					// VkImageType				imageType;
    765 		mapTextureFormat(m_format),					// VkFormat					format;
    766 		makeExtent3D(extendedLayerSize),			// VkExtent3D				extent;
    767 		1u,											// deUint32					mipLevels;
    768 		getNumLayers(m_imageType, m_imageSize),		// deUint32					arrayLayers;
    769 		VK_SAMPLE_COUNT_1_BIT,						// VkSampleCountFlagBits	samples;
    770 		VK_IMAGE_TILING_OPTIMAL,					// VkImageTiling			tiling;
    771 		VK_IMAGE_USAGE_STORAGE_BIT |
    772 		VK_IMAGE_USAGE_TRANSFER_SRC_BIT,
    773 		VK_SHARING_MODE_EXCLUSIVE,					// VkSharingMode			sharingMode;
    774 		0u,											// deUint32					queueFamilyIndexCount;
    775 		DE_NULL,									// const deUint32*			pQueueFamilyIndices;
    776 		VK_IMAGE_LAYOUT_UNDEFINED,					// VkImageLayout			initialLayout;
    777 	};
    778 
    779 	m_intermResultsImage = de::MovePtr<Image>(new Image(deviceInterface, device, allocator, imageParams, MemoryRequirement::Any));
    780 
    781 	const VkImageSubresourceRange subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
    782 
    783 	m_intermResultsImageView = makeImageView(deviceInterface, device, m_intermResultsImage->get(), mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange);
    784 }
    785 
    786 void BinaryAtomicIntermValuesInstance::prepareDescriptors (void)
    787 {
    788 	const VkDevice			device			= m_context.getDevice();
    789 	const DeviceInterface&	deviceInterface = m_context.getDeviceInterface();
    790 
    791 	m_descriptorSetLayout =
    792 		DescriptorSetLayoutBuilder()
    793 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
    794 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
    795 		.build(deviceInterface, device);
    796 
    797 	m_descriptorPool =
    798 		DescriptorPoolBuilder()
    799 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 2u)
    800 		.build(deviceInterface, device, VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
    801 
    802 	m_descriptorSet = makeDescriptorSet(deviceInterface, device, *m_descriptorPool, *m_descriptorSetLayout);
    803 
    804 	const VkDescriptorImageInfo	descResultImageInfo			= makeDescriptorImageInfo(DE_NULL, *m_resultImageView, VK_IMAGE_LAYOUT_GENERAL);
    805 	const VkDescriptorImageInfo	descIntermResultsImageInfo	= makeDescriptorImageInfo(DE_NULL, *m_intermResultsImageView, VK_IMAGE_LAYOUT_GENERAL);
    806 
    807 	DescriptorSetUpdateBuilder()
    808 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descResultImageInfo)
    809 		.writeSingle(*m_descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &descIntermResultsImageInfo)
    810 		.update(deviceInterface, device);
    811 }
    812 
    813 void BinaryAtomicIntermValuesInstance::commandsBeforeCompute (const VkCommandBuffer cmdBuffer) const
    814 {
    815 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
    816 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
    817 
    818 	const VkImageMemoryBarrier	imagePreDispatchBarrier =
    819 		makeImageMemoryBarrier(	0u,
    820 								VK_ACCESS_SHADER_WRITE_BIT,
    821 								VK_IMAGE_LAYOUT_UNDEFINED,
    822 								VK_IMAGE_LAYOUT_GENERAL,
    823 								m_intermResultsImage->get(),
    824 								subresourceRange);
    825 
    826 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePreDispatchBarrier);
    827 }
    828 
    829 void BinaryAtomicIntermValuesInstance::commandsAfterCompute (const VkCommandBuffer cmdBuffer) const
    830 {
    831 	const DeviceInterface&			deviceInterface		= m_context.getDeviceInterface();
    832 	const VkImageSubresourceRange	subresourceRange	= makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
    833 
    834 	const VkImageMemoryBarrier	imagePostDispatchBarrier =
    835 		makeImageMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
    836 								VK_ACCESS_TRANSFER_READ_BIT,
    837 								VK_IMAGE_LAYOUT_GENERAL,
    838 								VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
    839 								m_intermResultsImage->get(),
    840 								subresourceRange);
    841 
    842 	deviceInterface.cmdPipelineBarrier(cmdBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, DE_FALSE, 0u, DE_NULL, 0u, DE_NULL, 1u, &imagePostDispatchBarrier);
    843 
    844 	const UVec3					layerSize				= getLayerSize(m_imageType, m_imageSize);
    845 	const UVec3					extendedLayerSize		= UVec3(NUM_INVOCATIONS_PER_PIXEL * layerSize.x(), layerSize.y(), layerSize.z());
    846 	const VkBufferImageCopy		bufferImageCopyParams	= makeBufferImageCopy(makeExtent3D(extendedLayerSize), getNumLayers(m_imageType, m_imageSize));
    847 
    848 	deviceInterface.cmdCopyImageToBuffer(cmdBuffer, m_intermResultsImage->get(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, m_outputBuffer->get(), 1u, &bufferImageCopyParams);
    849 }
    850 
    851 bool BinaryAtomicIntermValuesInstance::verifyResult (Allocation&	outputBufferAllocation) const
    852 {
    853 	const UVec3	gridSize		 = getShaderGridSize(m_imageType, m_imageSize);
    854 	const IVec3 extendedGridSize = IVec3(NUM_INVOCATIONS_PER_PIXEL*gridSize.x(), gridSize.y(), gridSize.z());
    855 
    856 	tcu::ConstPixelBufferAccess resultBuffer(m_format, extendedGridSize.x(), extendedGridSize.y(), extendedGridSize.z(), outputBufferAllocation.getHostPtr());
    857 
    858 	for (deInt32 z = 0; z < resultBuffer.getDepth(); z++)
    859 	for (deInt32 y = 0; y < resultBuffer.getHeight(); y++)
    860 	for (deUint32 x = 0; x < gridSize.x(); x++)
    861 	{
    862 		deInt32 resultValues[NUM_INVOCATIONS_PER_PIXEL];
    863 		deInt32 atomicArgs[NUM_INVOCATIONS_PER_PIXEL];
    864 		bool	argsUsed[NUM_INVOCATIONS_PER_PIXEL];
    865 
    866 		for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
    867 		{
    868 			IVec3 gid(x + i*gridSize.x(), y, z);
    869 
    870 			resultValues[i] = resultBuffer.getPixelInt(gid.x(), gid.y(), gid.z()).x();
    871 			atomicArgs[i]	= getAtomicFuncArgument(m_operation, gid, extendedGridSize);
    872 			argsUsed[i]		= false;
    873 		}
    874 
    875 		// Verify that the return values form a valid sequence.
    876 		if (!verifyRecursive(0, getOperationInitialValue(m_operation), argsUsed, atomicArgs, resultValues))
    877 		{
    878 			return false;
    879 		}
    880 	}
    881 
    882 	return true;
    883 }
    884 
    885 bool BinaryAtomicIntermValuesInstance::verifyRecursive (const deInt32	index,
    886 														const deInt32	valueSoFar,
    887 														bool			argsUsed[NUM_INVOCATIONS_PER_PIXEL],
    888 														const deInt32	atomicArgs[NUM_INVOCATIONS_PER_PIXEL],
    889 														const deInt32	resultValues[NUM_INVOCATIONS_PER_PIXEL]) const
    890 {
    891 	if (index >= static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL))
    892 		return true;
    893 
    894 	for (deInt32 i = 0; i < static_cast<deInt32>(NUM_INVOCATIONS_PER_PIXEL); i++)
    895 	{
    896 		if (!argsUsed[i] && resultValues[i] == valueSoFar)
    897 		{
    898 			argsUsed[i] = true;
    899 
    900 			if (verifyRecursive(index + 1, computeBinaryAtomicOperationResult(m_operation, valueSoFar, atomicArgs[i]), argsUsed, atomicArgs, resultValues))
    901 			{
    902 				return true;
    903 			}
    904 
    905 			argsUsed[i] = false;
    906 		}
    907 	}
    908 
    909 	return false;
    910 }
    911 
    912 TestInstance* BinaryAtomicIntermValuesCase::createInstance (Context& context) const
    913 {
    914 	return new BinaryAtomicIntermValuesInstance(context, m_name, m_imageType, m_imageSize, m_format, m_operation);
    915 }
    916 
    917 } // anonymous ns
    918 
    919 tcu::TestCaseGroup* createImageAtomicOperationTests (tcu::TestContext& testCtx)
    920 {
    921 	de::MovePtr<tcu::TestCaseGroup> imageAtomicOperationsTests(new tcu::TestCaseGroup(testCtx, "atomic_operations", "Atomic image operations cases"));
    922 
    923 	struct ImageParams
    924 	{
    925 		ImageParams(const ImageType imageType, const tcu::UVec3& imageSize)
    926 			: m_imageType	(imageType)
    927 			, m_imageSize	(imageSize)
    928 		{
    929 		}
    930 		const ImageType		m_imageType;
    931 		const tcu::UVec3	m_imageSize;
    932 	};
    933 
    934 	static const ImageParams imageParamsArray[] =
    935 	{
    936 		ImageParams(IMAGE_TYPE_1D,			tcu::UVec3(64u, 1u, 1u)),
    937 		ImageParams(IMAGE_TYPE_1D_ARRAY,	tcu::UVec3(64u, 1u, 8u)),
    938 		ImageParams(IMAGE_TYPE_2D,			tcu::UVec3(64u, 64u, 1u)),
    939 		ImageParams(IMAGE_TYPE_2D_ARRAY,	tcu::UVec3(64u, 64u, 8u)),
    940 		ImageParams(IMAGE_TYPE_3D,			tcu::UVec3(64u, 64u, 8u)),
    941 		ImageParams(IMAGE_TYPE_CUBE,		tcu::UVec3(64u, 64u, 1u)),
    942 		ImageParams(IMAGE_TYPE_CUBE_ARRAY,	tcu::UVec3(64u, 64u, 2u))
    943 	};
    944 
    945 	static const tcu::TextureFormat formats[] =
    946 	{
    947 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::UNSIGNED_INT32),
    948 		tcu::TextureFormat(tcu::TextureFormat::R, tcu::TextureFormat::SIGNED_INT32)
    949 	};
    950 
    951 	for (deUint32 operationI = 0; operationI < ATOMIC_OPERATION_LAST; operationI++)
    952 	{
    953 		const AtomicOperation operation = (AtomicOperation)operationI;
    954 
    955 		de::MovePtr<tcu::TestCaseGroup> operationGroup(new tcu::TestCaseGroup(testCtx, getAtomicOperationCaseName(operation).c_str(), ""));
    956 
    957 		for (deUint32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParamsArray); imageTypeNdx++)
    958 		{
    959 			const ImageType	 imageType = imageParamsArray[imageTypeNdx].m_imageType;
    960 			const tcu::UVec3 imageSize = imageParamsArray[imageTypeNdx].m_imageSize;
    961 
    962 			de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
    963 
    964 			for (deUint32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); formatNdx++)
    965 			{
    966 				const TextureFormat&	format		= formats[formatNdx];
    967 				const std::string		formatName	= getShaderImageFormatQualifier(format);
    968 
    969 				//!< Atomic case checks the end result of the operations, and not the intermediate return values
    970 				const string caseEndResult = formatName + "_end_result";
    971 				imageTypeGroup->addChild(new BinaryAtomicEndResultCase(testCtx, caseEndResult, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
    972 
    973 				//!< Atomic case checks the return values of the atomic function and not the end result.
    974 				const string caseIntermValues = formatName + "_intermediate_values";
    975 				imageTypeGroup->addChild(new BinaryAtomicIntermValuesCase(testCtx, caseIntermValues, "", imageType, imageSize, format, operation, glu::GLSL_VERSION_440));
    976 			}
    977 
    978 			operationGroup->addChild(imageTypeGroup.release());
    979 		}
    980 
    981 		imageAtomicOperationsTests->addChild(operationGroup.release());
    982 	}
    983 
    984 	return imageAtomicOperationsTests.release();
    985 }
    986 
    987 } // image
    988 } // vkt
    989