Home | History | Annotate | Download | only in subgroups
      1 /*------------------------------------------------------------------------
      2  * Vulkan Conformance Tests
      3  * ------------------------
      4  *
      5  * Copyright (c) 2017 The Khronos Group Inc.
      6  * Copyright (c) 2017 Codeplay Software Ltd.
      7  *
      8  * Licensed under the Apache License, Version 2.0 (the "License");
      9  * you may not use this file except in compliance with the License.
     10  * You may obtain a copy of the License at
     11  *
     12  *      http://www.apache.org/licenses/LICENSE-2.0
     13  *
     14  * Unless required by applicable law or agreed to in writing, software
     15  * distributed under the License is distributed on an "AS IS" BASIS,
     16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     17  * See the License for the specific language governing permissions and
     18  * limitations under the License.
     19  *
     20  */ /*!
     21  * \file
     22  * \brief Subgroups Tests Utils
     23  */ /*--------------------------------------------------------------------*/
     24 
     25 #include "vktSubgroupsTestsUtils.hpp"
     26 #include "deRandom.hpp"
     27 #include "tcuCommandLine.hpp"
     28 #include "tcuStringTemplate.hpp"
     29 #include "vkBarrierUtil.hpp"
     30 #include "vkImageUtil.hpp"
     31 #include "vkTypeUtil.hpp"
     32 #include "vkCmdUtil.hpp"
     33 #include "vkObjUtil.hpp"
     34 
     35 using namespace tcu;
     36 using namespace std;
     37 using namespace vk;
     38 using namespace vkt;
     39 
     40 namespace
     41 {
     42 deUint32 getFormatSizeInBytes(const VkFormat format)
     43 {
     44 	switch (format)
     45 	{
     46 		default:
     47 			DE_FATAL("Unhandled format!");
     48 			return 0;
     49 		case VK_FORMAT_R32_SINT:
     50 		case VK_FORMAT_R32_UINT:
     51 			return sizeof(deInt32);
     52 		case VK_FORMAT_R32G32_SINT:
     53 		case VK_FORMAT_R32G32_UINT:
     54 			return static_cast<deUint32>(sizeof(deInt32) * 2);
     55 		case VK_FORMAT_R32G32B32_SINT:
     56 		case VK_FORMAT_R32G32B32_UINT:
     57 		case VK_FORMAT_R32G32B32A32_SINT:
     58 		case VK_FORMAT_R32G32B32A32_UINT:
     59 			return static_cast<deUint32>(sizeof(deInt32) * 4);
     60 		case VK_FORMAT_R32_SFLOAT:
     61 			return 4;
     62 		case VK_FORMAT_R32G32_SFLOAT:
     63 			return 8;
     64 		case VK_FORMAT_R32G32B32_SFLOAT:
     65 			return 16;
     66 		case VK_FORMAT_R32G32B32A32_SFLOAT:
     67 			return 16;
     68 		case VK_FORMAT_R64_SFLOAT:
     69 			return 8;
     70 		case VK_FORMAT_R64G64_SFLOAT:
     71 			return 16;
     72 		case VK_FORMAT_R64G64B64_SFLOAT:
     73 			return 32;
     74 		case VK_FORMAT_R64G64B64A64_SFLOAT:
     75 			return 32;
     76 		// The below formats are used to represent bool and bvec* types. These
     77 		// types are passed to the shader as int and ivec* types, before the
     78 		// calculations are done as booleans. We need a distinct type here so
     79 		// that the shader generators can switch on it and generate the correct
     80 		// shader source for testing.
     81 		case VK_FORMAT_R8_USCALED:
     82 			return sizeof(deInt32);
     83 		case VK_FORMAT_R8G8_USCALED:
     84 			return static_cast<deUint32>(sizeof(deInt32) * 2);
     85 		case VK_FORMAT_R8G8B8_USCALED:
     86 		case VK_FORMAT_R8G8B8A8_USCALED:
     87 			return static_cast<deUint32>(sizeof(deInt32) * 4);
     88 	}
     89 }
     90 
     91 deUint32 getElementSizeInBytes(
     92 	const VkFormat format,
     93 	const subgroups::SSBOData::InputDataLayoutType layout)
     94 {
     95 	deUint32 bytes = getFormatSizeInBytes(format);
     96 	if (layout == subgroups::SSBOData::LayoutStd140)
     97 		return bytes < 16 ? 16 : bytes;
     98 	else
     99 		return bytes;
    100 }
    101 
    102 Move<VkPipelineLayout> makePipelineLayout(
    103 	Context& context, const VkDescriptorSetLayout descriptorSetLayout)
    104 {
    105 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
    106 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
    107 		DE_NULL,			  // const void*            pNext;
    108 		0u,					  // VkPipelineLayoutCreateFlags    flags;
    109 		1u,					  // deUint32             setLayoutCount;
    110 		&descriptorSetLayout, // const VkDescriptorSetLayout*   pSetLayouts;
    111 		0u,					  // deUint32             pushConstantRangeCount;
    112 		DE_NULL, // const VkPushConstantRange*   pPushConstantRanges;
    113 	};
    114 	return createPipelineLayout(context.getDeviceInterface(),
    115 								context.getDevice(), &pipelineLayoutParams);
    116 }
    117 
    118 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
    119 {
    120 	VkAttachmentReference colorReference = {
    121 		0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
    122 	};
    123 
    124 	const VkSubpassDescription subpassDescription = {0u,
    125 													 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
    126 													 DE_NULL, DE_NULL, 0, DE_NULL
    127 													};
    128 
    129 	const VkSubpassDependency subpassDependencies[2] = {
    130 		{   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
    131 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
    132 			VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
    133 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
    134 			VK_DEPENDENCY_BY_REGION_BIT
    135 		},
    136 		{   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
    137 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
    138 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
    139 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
    140 			VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
    141 		},
    142 	};
    143 
    144 	VkAttachmentDescription attachmentDescription = {0u, format,
    145 													 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
    146 													 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
    147 													 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
    148 													 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
    149 													};
    150 
    151 	const VkRenderPassCreateInfo renderPassCreateInfo = {
    152 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
    153 		&attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
    154 	};
    155 
    156 	return createRenderPass(context.getDeviceInterface(), context.getDevice(),
    157 							&renderPassCreateInfo);
    158 }
    159 
    160 Move<VkFramebuffer> makeFramebuffer(Context& context,
    161 									const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
    162 									deUint32 height)
    163 {
    164 	const VkFramebufferCreateInfo framebufferCreateInfo = {
    165 		VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
    166 		&imageView, width, height, 1
    167 	};
    168 
    169 	return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
    170 							 &framebufferCreateInfo);
    171 }
    172 
    173 Move<VkPipeline> makeGraphicsPipeline(Context&									context,
    174 									  const VkPipelineLayout					pipelineLayout,
    175 									  const VkShaderStageFlags					stages,
    176 									  const VkShaderModule						vertexShaderModule,
    177 									  const VkShaderModule						fragmentShaderModule,
    178 									  const VkShaderModule						geometryShaderModule,
    179 									  const VkShaderModule						tessellationControlModule,
    180 									  const VkShaderModule						tessellationEvaluationModule,
    181 									  const VkRenderPass						renderPass,
    182 									  const VkPrimitiveTopology					topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
    183 									  const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
    184 									  const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
    185 									  const bool								frameBufferTests = false,
    186 									  const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT)
    187 {
    188 	std::vector<VkViewport>	noViewports;
    189 	std::vector<VkRect2D>	noScissors;
    190 
    191 	const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
    192 	{
    193 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
    194 		DE_NULL,													// const void*									pNext;
    195 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
    196 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
    197 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
    198 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
    199 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
    200 	};
    201 
    202 	const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
    203 	const VkColorComponentFlags colorComponent =
    204 												numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
    205 												numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
    206 												numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
    207 												VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
    208 
    209 	const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
    210 	{
    211 		VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
    212 		VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
    213 		colorComponent
    214 	};
    215 
    216 	const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
    217 	{
    218 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
    219 		VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
    220 		{ 0.0f, 0.0f, 0.0f, 0.0f }
    221 	};
    222 
    223 	const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u;
    224 
    225 	return vk::makeGraphicsPipeline(context.getDeviceInterface(),	// const DeviceInterface&                        vk
    226 									context.getDevice(),			// const VkDevice                                device
    227 									pipelineLayout,					// const VkPipelineLayout                        pipelineLayout
    228 									vertexShaderModule,				// const VkShaderModule                          vertexShaderModule
    229 									tessellationControlModule,		// const VkShaderModule                          tessellationControlShaderModule
    230 									tessellationEvaluationModule,	// const VkShaderModule                          tessellationEvalShaderModule
    231 									geometryShaderModule,			// const VkShaderModule                          geometryShaderModule
    232 									fragmentShaderModule,			// const VkShaderModule                          fragmentShaderModule
    233 									renderPass,						// const VkRenderPass                            renderPass
    234 									noViewports,					// const std::vector<VkViewport>&                viewports
    235 									noScissors,						// const std::vector<VkRect2D>&                  scissors
    236 									topology,						// const VkPrimitiveTopology                     topology
    237 									0u,								// const deUint32                                subpass
    238 									patchControlPoints,				// const deUint32                                patchControlPoints
    239 									&vertexInputStateCreateInfo,	// const VkPipelineVertexInputStateCreateInfo*   vertexInputStateCreateInfo
    240 									DE_NULL,						// const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo
    241 									DE_NULL,						// const VkPipelineMultisampleStateCreateInfo*   multisampleStateCreateInfo
    242 									DE_NULL,						// const VkPipelineDepthStencilStateCreateInfo*  depthStencilStateCreateInfo
    243 									&colorBlendStateCreateInfo);	// const VkPipelineColorBlendStateCreateInfo*    colorBlendStateCreateInfo
    244 }
    245 
    246 Move<VkPipeline> makeComputePipeline(Context& context,
    247 									 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
    248 									 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
    249 {
    250 	const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
    251 
    252 	const vk::VkSpecializationMapEntry entries[3] =
    253 	{
    254 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
    255 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
    256 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
    257 	};
    258 
    259 	const vk::VkSpecializationInfo info =
    260 	{
    261 		/* mapEntryCount = */ 3,
    262 		/* pMapEntries   = */ entries,
    263 		/* dataSize      = */ sizeof(localSize),
    264 		/* pData         = */ localSize
    265 	};
    266 
    267 	const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
    268 	{
    269 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType					sType;
    270 		DE_NULL,												// const void*						pNext;
    271 		0u,														// VkPipelineShaderStageCreateFlags	flags;
    272 		VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits			stage;
    273 		shaderModule,											// VkShaderModule					module;
    274 		"main",													// const char*						pName;
    275 		&info,													// const VkSpecializationInfo*		pSpecializationInfo;
    276 	};
    277 
    278 	const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
    279 	{
    280 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
    281 		DE_NULL,										// const void*						pNext;
    282 		0u,												// VkPipelineCreateFlags			flags;
    283 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
    284 		pipelineLayout,									// VkPipelineLayout					layout;
    285 		DE_NULL,										// VkPipeline						basePipelineHandle;
    286 		0,												// deInt32							basePipelineIndex;
    287 	};
    288 
    289 	return createComputePipeline(context.getDeviceInterface(),
    290 								 context.getDevice(), DE_NULL, &pipelineCreateInfo);
    291 }
    292 
    293 Move<VkDescriptorSet> makeDescriptorSet(Context& context,
    294 										const VkDescriptorPool descriptorPool,
    295 										const VkDescriptorSetLayout setLayout)
    296 {
    297 	const VkDescriptorSetAllocateInfo allocateParams =
    298 	{
    299 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
    300 		// sType;
    301 		DE_NULL,		// const void*          pNext;
    302 		descriptorPool, // VkDescriptorPool       descriptorPool;
    303 		1u,				// deUint32           setLayoutCount;
    304 		&setLayout,		// const VkDescriptorSetLayout* pSetLayouts;
    305 	};
    306 	return allocateDescriptorSet(
    307 			   context.getDeviceInterface(), context.getDevice(), &allocateParams);
    308 }
    309 
    310 Move<VkCommandPool> makeCommandPool(Context& context)
    311 {
    312 	const VkCommandPoolCreateInfo commandPoolParams =
    313 	{
    314 		VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
    315 		DE_NULL,									// const void*        pNext;
    316 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
    317 		// flags;
    318 		context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
    319 	};
    320 
    321 	return createCommandPool(
    322 			   context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
    323 }
    324 
    325 Move<VkCommandBuffer> makeCommandBuffer(
    326 	Context& context, const VkCommandPool commandPool)
    327 {
    328 	const VkCommandBufferAllocateInfo bufferAllocateParams =
    329 	{
    330 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
    331 		DE_NULL,										// const void*			pNext;
    332 		commandPool,									// VkCommandPool		commandPool;
    333 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
    334 		1u,												// deUint32				bufferCount;
    335 	};
    336 	return allocateCommandBuffer(context.getDeviceInterface(),
    337 								 context.getDevice(), &bufferAllocateParams);
    338 }
    339 
    340 Move<VkFence> submitCommandBuffer(
    341 	Context& context, const VkCommandBuffer commandBuffer)
    342 {
    343 	const VkFenceCreateInfo fenceParams =
    344 	{
    345 		VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType    sType;
    346 		DE_NULL,							 // const void*      pNext;
    347 		0u,									 // VkFenceCreateFlags flags;
    348 	};
    349 
    350 	Move<VkFence> fence(createFence(
    351 							context.getDeviceInterface(), context.getDevice(), &fenceParams));
    352 
    353 	const VkSubmitInfo submitInfo =
    354 	{
    355 		VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType      sType;
    356 		DE_NULL,					   // const void*        pNext;
    357 		0u,							   // deUint32         waitSemaphoreCount;
    358 		DE_NULL,					   // const VkSemaphore*   pWaitSemaphores;
    359 		(const VkPipelineStageFlags*)DE_NULL,
    360 		1u,				// deUint32         commandBufferCount;
    361 		&commandBuffer, // const VkCommandBuffer* pCommandBuffers;
    362 		0u,				// deUint32         signalSemaphoreCount;
    363 		DE_NULL,		// const VkSemaphore*   pSignalSemaphores;
    364 	};
    365 
    366 	vk::VkResult result = (context.getDeviceInterface().queueSubmit(
    367 							   context.getUniversalQueue(), 1u, &submitInfo, *fence));
    368 	VK_CHECK(result);
    369 
    370 	return Move<VkFence>(fence);
    371 }
    372 
    373 void waitFence(Context& context, Move<VkFence> fence)
    374 {
    375 	VK_CHECK(context.getDeviceInterface().waitForFences(
    376 				 context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
    377 }
    378 
    379 struct Buffer;
    380 struct Image;
    381 
    382 struct BufferOrImage
    383 {
    384 	bool isImage() const
    385 	{
    386 		return m_isImage;
    387 	}
    388 
    389 	Buffer* getAsBuffer()
    390 	{
    391 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
    392 		return reinterpret_cast<Buffer* >(this);
    393 	}
    394 
    395 	Image* getAsImage()
    396 	{
    397 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
    398 		return reinterpret_cast<Image*>(this);
    399 	}
    400 
    401 	virtual VkDescriptorType getType() const
    402 	{
    403 		if (m_isImage)
    404 		{
    405 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
    406 		}
    407 		else
    408 		{
    409 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
    410 		}
    411 	}
    412 
    413 	Allocation& getAllocation() const
    414 	{
    415 		return *m_allocation;
    416 	}
    417 
    418 	virtual ~BufferOrImage() {}
    419 
    420 protected:
    421 	explicit BufferOrImage(bool image) : m_isImage(image) {}
    422 
    423 	bool m_isImage;
    424 	de::details::MovePtr<Allocation> m_allocation;
    425 };
    426 
    427 struct Buffer : public BufferOrImage
    428 {
    429 	explicit Buffer(
    430 		Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
    431 		: BufferOrImage		(false)
    432 		, m_sizeInBytes		(sizeInBytes)
    433 		, m_usage			(usage)
    434 	{
    435 		const vk::VkBufferCreateInfo bufferCreateInfo =
    436 		{
    437 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
    438 			DE_NULL,
    439 			0u,
    440 			m_sizeInBytes,
    441 			m_usage,
    442 			VK_SHARING_MODE_EXCLUSIVE,
    443 			0u,
    444 			DE_NULL,
    445 		};
    446 		m_buffer = createBuffer(context.getDeviceInterface(),
    447 								context.getDevice(), &bufferCreateInfo);
    448 		vk::VkMemoryRequirements req = getBufferMemoryRequirements(
    449 										   context.getDeviceInterface(), context.getDevice(), *m_buffer);
    450 		m_allocation = context.getDefaultAllocator().allocate(
    451 						   req, MemoryRequirement::HostVisible);
    452 		VK_CHECK(context.getDeviceInterface().bindBufferMemory(
    453 					 context.getDevice(), *m_buffer, m_allocation->getMemory(),
    454 					 m_allocation->getOffset()));
    455 	}
    456 
    457 	virtual VkDescriptorType getType() const
    458 	{
    459 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
    460 		{
    461 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
    462 		}
    463 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
    464 	}
    465 
    466 	VkBuffer getBuffer() const {
    467 		return *m_buffer;
    468 	}
    469 
    470 	const VkBuffer* getBufferPtr() const {
    471 		return &(*m_buffer);
    472 	}
    473 
    474 	VkDeviceSize getSize() const {
    475 		return m_sizeInBytes;
    476 	}
    477 
    478 private:
    479 	Move<VkBuffer>				m_buffer;
    480 	VkDeviceSize				m_sizeInBytes;
    481 	const VkBufferUsageFlags	m_usage;
    482 };
    483 
    484 struct Image : public BufferOrImage
    485 {
    486 	explicit Image(Context& context, deUint32 width, deUint32 height,
    487 				   VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
    488 		: BufferOrImage(true)
    489 	{
    490 		const VkImageCreateInfo imageCreateInfo =
    491 		{
    492 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
    493 			format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
    494 			VK_IMAGE_TILING_OPTIMAL, usage,
    495 			VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
    496 			VK_IMAGE_LAYOUT_UNDEFINED
    497 		};
    498 		m_image = createImage(context.getDeviceInterface(), context.getDevice(),
    499 							  &imageCreateInfo);
    500 		vk::VkMemoryRequirements req = getImageMemoryRequirements(
    501 										   context.getDeviceInterface(), context.getDevice(), *m_image);
    502 		req.size *= 2;
    503 		m_allocation =
    504 			context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
    505 		VK_CHECK(context.getDeviceInterface().bindImageMemory(
    506 					 context.getDevice(), *m_image, m_allocation->getMemory(),
    507 					 m_allocation->getOffset()));
    508 
    509 		const VkComponentMapping componentMapping =
    510 		{
    511 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
    512 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
    513 		};
    514 
    515 		const VkImageViewCreateInfo imageViewCreateInfo =
    516 		{
    517 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
    518 			VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
    519 			{
    520 				VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
    521 			}
    522 		};
    523 
    524 		m_imageView = createImageView(context.getDeviceInterface(),
    525 									  context.getDevice(), &imageViewCreateInfo);
    526 
    527 		const struct VkSamplerCreateInfo samplerCreateInfo =
    528 		{
    529 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
    530 			DE_NULL,
    531 			0u,
    532 			VK_FILTER_NEAREST,
    533 			VK_FILTER_NEAREST,
    534 			VK_SAMPLER_MIPMAP_MODE_NEAREST,
    535 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    536 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    537 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    538 			0.0f,
    539 			VK_FALSE,
    540 			1.0f,
    541 			DE_FALSE,
    542 			VK_COMPARE_OP_ALWAYS,
    543 			0.0f,
    544 			0.0f,
    545 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
    546 			VK_FALSE,
    547 		};
    548 
    549 		m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
    550 	}
    551 
    552 	VkImage getImage() const {
    553 		return *m_image;
    554 	}
    555 
    556 	VkImageView getImageView() const {
    557 		return *m_imageView;
    558 	}
    559 
    560 	VkSampler getSampler() const {
    561 		return *m_sampler;
    562 	}
    563 
    564 private:
    565 	Move<VkImage> m_image;
    566 	Move<VkImageView> m_imageView;
    567 	Move<VkSampler> m_sampler;
    568 };
    569 }
    570 
    571 std::string vkt::subgroups::getSharedMemoryBallotHelper()
    572 {
    573 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
    574 			"uvec4 sharedMemoryBallot(bool vote)\n"
    575 			"{\n"
    576 			"  uint groupOffset = gl_SubgroupID;\n"
    577 			"  // One invocation in the group 0's the whole group's data\n"
    578 			"  if (subgroupElect())\n"
    579 			"  {\n"
    580 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
    581 			"  }\n"
    582 			"  subgroupMemoryBarrierShared();\n"
    583 			"  if (vote)\n"
    584 			"  {\n"
    585 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
    586 			"    const highp uint bitToSet = 1u << invocationId;\n"
    587 			"    switch (gl_SubgroupInvocationID / 32)\n"
    588 			"    {\n"
    589 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
    590 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
    591 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
    592 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
    593 			"    }\n"
    594 			"  }\n"
    595 			"  subgroupMemoryBarrierShared();\n"
    596 			"  return superSecretComputeShaderHelper[groupOffset];\n"
    597 			"}\n";
    598 }
    599 
    600 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
    601 {
    602 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
    603 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
    604 	subgroupProperties.pNext = DE_NULL;
    605 
    606 	VkPhysicalDeviceProperties2 properties;
    607 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
    608 	properties.pNext = &subgroupProperties;
    609 
    610 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
    611 
    612 	return subgroupProperties.subgroupSize;
    613 }
    614 
    615 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
    616 	return 128u;
    617 }
    618 
    619 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
    620 {
    621 	switch (stage)
    622 	{
    623 		default:
    624 			DE_FATAL("Unhandled stage!");
    625 			return "";
    626 		case VK_SHADER_STAGE_COMPUTE_BIT:
    627 			return "compute";
    628 		case VK_SHADER_STAGE_FRAGMENT_BIT:
    629 			return "fragment";
    630 		case VK_SHADER_STAGE_VERTEX_BIT:
    631 			return "vertex";
    632 		case VK_SHADER_STAGE_GEOMETRY_BIT:
    633 			return "geometry";
    634 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
    635 			return "tess_control";
    636 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
    637 			return "tess_eval";
    638 	}
    639 }
    640 
    641 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
    642 {
    643 	switch (bit)
    644 	{
    645 		default:
    646 			DE_FATAL("Unknown subgroup feature category!");
    647 			return "";
    648 		case VK_SUBGROUP_FEATURE_BASIC_BIT:
    649 			return "VK_SUBGROUP_FEATURE_BASIC_BIT";
    650 		case VK_SUBGROUP_FEATURE_VOTE_BIT:
    651 			return "VK_SUBGROUP_FEATURE_VOTE_BIT";
    652 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
    653 			return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
    654 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:
    655 			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
    656 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
    657 			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
    658 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
    659 			return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
    660 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
    661 			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
    662 		case VK_SUBGROUP_FEATURE_QUAD_BIT:
    663 			return "VK_SUBGROUP_FEATURE_QUAD_BIT";
    664 	}
    665 }
    666 
    667 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection)
    668 {
    669 	{
    670 	/*
    671 		"#version 450\n"
    672 		"void main (void)\n"
    673 		"{\n"
    674 		"  float pixelSize = 2.0f/1024.0f;\n"
    675 		"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
    676 		"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
    677 		"  gl_PointSize = 1.0f;\n"
    678 		"}\n"
    679 	*/
    680 		const std::string vertNoSubgroup =
    681 			"; SPIR-V\n"
    682 			"; Version: 1.3\n"
    683 			"; Generator: Khronos Glslang Reference Front End; 1\n"
    684 			"; Bound: 37\n"
    685 			"; Schema: 0\n"
    686 			"OpCapability Shader\n"
    687 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
    688 			"OpMemoryModel Logical GLSL450\n"
    689 			"OpEntryPoint Vertex %4 \"main\" %22 %26\n"
    690 			"OpMemberDecorate %20 0 BuiltIn Position\n"
    691 			"OpMemberDecorate %20 1 BuiltIn PointSize\n"
    692 			"OpMemberDecorate %20 2 BuiltIn ClipDistance\n"
    693 			"OpMemberDecorate %20 3 BuiltIn CullDistance\n"
    694 			"OpDecorate %20 Block\n"
    695 			"OpDecorate %26 BuiltIn VertexIndex\n"
    696 			"%2 = OpTypeVoid\n"
    697 			"%3 = OpTypeFunction %2\n"
    698 			"%6 = OpTypeFloat 32\n"
    699 			"%7 = OpTypePointer Function %6\n"
    700 			"%9 = OpConstant %6 0.00195313\n"
    701 			"%12 = OpConstant %6 2\n"
    702 			"%14 = OpConstant %6 1\n"
    703 			"%16 = OpTypeVector %6 4\n"
    704 			"%17 = OpTypeInt 32 0\n"
    705 			"%18 = OpConstant %17 1\n"
    706 			"%19 = OpTypeArray %6 %18\n"
    707 			"%20 = OpTypeStruct %16 %6 %19 %19\n"
    708 			"%21 = OpTypePointer Output %20\n"
    709 			"%22 = OpVariable %21 Output\n"
    710 			"%23 = OpTypeInt 32 1\n"
    711 			"%24 = OpConstant %23 0\n"
    712 			"%25 = OpTypePointer Input %23\n"
    713 			"%26 = OpVariable %25 Input\n"
    714 			"%33 = OpConstant %6 0\n"
    715 			"%35 = OpTypePointer Output %16\n"
    716 			"%37 = OpConstant %23 1\n"
    717 			"%38 = OpTypePointer Output %6\n"
    718 			"%4 = OpFunction %2 None %3\n"
    719 			"%5 = OpLabel\n"
    720 			"%8 = OpVariable %7 Function\n"
    721 			"%10 = OpVariable %7 Function\n"
    722 			"OpStore %8 %9\n"
    723 			"%11 = OpLoad %6 %8\n"
    724 			"%13 = OpFDiv %6 %11 %12\n"
    725 			"%15 = OpFSub %6 %13 %14\n"
    726 			"OpStore %10 %15\n"
    727 			"%27 = OpLoad %23 %26\n"
    728 			"%28 = OpConvertSToF %6 %27\n"
    729 			"%29 = OpLoad %6 %8\n"
    730 			"%30 = OpFMul %6 %28 %29\n"
    731 			"%31 = OpLoad %6 %10\n"
    732 			"%32 = OpFAdd %6 %30 %31\n"
    733 			"%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n"
    734 			"%36 = OpAccessChain %35 %22 %24\n"
    735 			"OpStore %36 %34\n"
    736 			"%39 = OpAccessChain %38 %22 %37\n"
    737 			"OpStore %39 %14\n"
    738 			"OpReturn\n"
    739 			"OpFunctionEnd\n";
    740 		programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup;
    741 	}
    742 
    743 	{
    744 	/*
    745 		"#version 450\n"
    746 		"layout(vertices=1) out;\n"
    747 		"\n"
    748 		"void main (void)\n"
    749 		"{\n"
    750 		"  if (gl_InvocationID == 0)\n"
    751 		"  {\n"
    752 		"    gl_TessLevelOuter[0] = 1.0f;\n"
    753 		"    gl_TessLevelOuter[1] = 1.0f;\n"
    754 		"  }\n"
    755 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
    756 		"}\n"
    757 	*/
    758 		const std::string tescNoSubgroup =
    759 			"; SPIR-V\n"
    760 			"; Version: 1.3\n"
    761 			"; Generator: Khronos Glslang Reference Front End; 1\n"
    762 			"; Bound: 45\n"
    763 			"; Schema: 0\n"
    764 			"OpCapability Tessellation\n"
    765 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
    766 			"OpMemoryModel Logical GLSL450\n"
    767 			"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n"
    768 			"OpExecutionMode %4 OutputVertices 1\n"
    769 			"OpDecorate %8 BuiltIn InvocationId\n"
    770 			"OpDecorate %20 Patch\n"
    771 			"OpDecorate %20 BuiltIn TessLevelOuter\n"
    772 			"OpMemberDecorate %29 0 BuiltIn Position\n"
    773 			"OpMemberDecorate %29 1 BuiltIn PointSize\n"
    774 			"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
    775 			"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
    776 			"OpDecorate %29 Block\n"
    777 			"OpMemberDecorate %34 0 BuiltIn Position\n"
    778 			"OpMemberDecorate %34 1 BuiltIn PointSize\n"
    779 			"OpMemberDecorate %34 2 BuiltIn ClipDistance\n"
    780 			"OpMemberDecorate %34 3 BuiltIn CullDistance\n"
    781 			"OpDecorate %34 Block\n"
    782 			"%2 = OpTypeVoid\n"
    783 			"%3 = OpTypeFunction %2\n"
    784 			"%6 = OpTypeInt 32 1\n"
    785 			"%7 = OpTypePointer Input %6\n"
    786 			"%8 = OpVariable %7 Input\n"
    787 			"%10 = OpConstant %6 0\n"
    788 			"%11 = OpTypeBool\n"
    789 			"%15 = OpTypeFloat 32\n"
    790 			"%16 = OpTypeInt 32 0\n"
    791 			"%17 = OpConstant %16 4\n"
    792 			"%18 = OpTypeArray %15 %17\n"
    793 			"%19 = OpTypePointer Output %18\n"
    794 			"%20 = OpVariable %19 Output\n"
    795 			"%21 = OpConstant %15 1\n"
    796 			"%22 = OpTypePointer Output %15\n"
    797 			"%24 = OpConstant %6 1\n"
    798 			"%26 = OpTypeVector %15 4\n"
    799 			"%27 = OpConstant %16 1\n"
    800 			"%28 = OpTypeArray %15 %27\n"
    801 			"%29 = OpTypeStruct %26 %15 %28 %28\n"
    802 			"%30 = OpTypeArray %29 %27\n"
    803 			"%31 = OpTypePointer Output %30\n"
    804 			"%32 = OpVariable %31 Output\n"
    805 			"%34 = OpTypeStruct %26 %15 %28 %28\n"
    806 			"%35 = OpConstant %16 32\n"
    807 			"%36 = OpTypeArray %34 %35\n"
    808 			"%37 = OpTypePointer Input %36\n"
    809 			"%38 = OpVariable %37 Input\n"
    810 			"%40 = OpTypePointer Input %26\n"
    811 			"%43 = OpTypePointer Output %26\n"
    812 			"%4 = OpFunction %2 None %3\n"
    813 			"%5 = OpLabel\n"
    814 			"%9 = OpLoad %6 %8\n"
    815 			"%12 = OpIEqual %11 %9 %10\n"
    816 			"OpSelectionMerge %14 None\n"
    817 			"OpBranchConditional %12 %13 %14\n"
    818 			"%13 = OpLabel\n"
    819 			"%23 = OpAccessChain %22 %20 %10\n"
    820 			"OpStore %23 %21\n"
    821 			"%25 = OpAccessChain %22 %20 %24\n"
    822 			"OpStore %25 %21\n"
    823 			"OpBranch %14\n"
    824 			"%14 = OpLabel\n"
    825 			"%33 = OpLoad %6 %8\n"
    826 			"%39 = OpLoad %6 %8\n"
    827 			"%41 = OpAccessChain %40 %38 %39 %10\n"
    828 			"%42 = OpLoad %26 %41\n"
    829 			"%44 = OpAccessChain %43 %32 %33 %10\n"
    830 			"OpStore %44 %42\n"
    831 			"OpReturn\n"
    832 			"OpFunctionEnd\n";
    833 		programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup;
    834 	}
    835 
    836 	{
    837 	/*
    838 		"#version 450\n"
    839 		"layout(isolines) in;\n"
    840 		"\n"
    841 		"void main (void)\n"
    842 		"{\n"
    843 		"  float pixelSize = 2.0f/1024.0f;\n"
    844 		"  gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n"
    845 		"}\n";
    846 	*/
    847 		const std::string teseNoSubgroup =
    848 			"; SPIR-V\n"
    849 			"; Version: 1.3\n"
    850 			"; Generator: Khronos Glslang Reference Front End; 2\n"
    851 			"; Bound: 42\n"
    852 			"; Schema: 0\n"
    853 			"OpCapability Tessellation\n"
    854 			"%1 = OpExtInstImport \"GLSL.std.450\"\n"
    855 			"OpMemoryModel Logical GLSL450\n"
    856 			"OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n"
    857 			"OpExecutionMode %4 Isolines\n"
    858 			"OpExecutionMode %4 SpacingEqual\n"
    859 			"OpExecutionMode %4 VertexOrderCcw\n"
    860 			"OpMemberDecorate %14 0 BuiltIn Position\n"
    861 			"OpMemberDecorate %14 1 BuiltIn PointSize\n"
    862 			"OpMemberDecorate %14 2 BuiltIn ClipDistance\n"
    863 			"OpMemberDecorate %14 3 BuiltIn CullDistance\n"
    864 			"OpDecorate %14 Block\n"
    865 			"OpMemberDecorate %19 0 BuiltIn Position\n"
    866 			"OpMemberDecorate %19 1 BuiltIn PointSize\n"
    867 			"OpMemberDecorate %19 2 BuiltIn ClipDistance\n"
    868 			"OpMemberDecorate %19 3 BuiltIn CullDistance\n"
    869 			"OpDecorate %19 Block\n"
    870 			"OpDecorate %29 BuiltIn TessCoord\n"
    871 			"%2 = OpTypeVoid\n"
    872 			"%3 = OpTypeFunction %2\n"
    873 			"%6 = OpTypeFloat 32\n"
    874 			"%7 = OpTypePointer Function %6\n"
    875 			"%9 = OpConstant %6 0.00195313\n"
    876 			"%10 = OpTypeVector %6 4\n"
    877 			"%11 = OpTypeInt 32 0\n"
    878 			"%12 = OpConstant %11 1\n"
    879 			"%13 = OpTypeArray %6 %12\n"
    880 			"%14 = OpTypeStruct %10 %6 %13 %13\n"
    881 			"%15 = OpTypePointer Output %14\n"
    882 			"%16 = OpVariable %15 Output\n"
    883 			"%17 = OpTypeInt 32 1\n"
    884 			"%18 = OpConstant %17 0\n"
    885 			"%19 = OpTypeStruct %10 %6 %13 %13\n"
    886 			"%20 = OpConstant %11 32\n"
    887 			"%21 = OpTypeArray %19 %20\n"
    888 			"%22 = OpTypePointer Input %21\n"
    889 			"%23 = OpVariable %22 Input\n"
    890 			"%24 = OpTypePointer Input %10\n"
    891 			"%27 = OpTypeVector %6 3\n"
    892 			"%28 = OpTypePointer Input %27\n"
    893 			"%29 = OpVariable %28 Input\n"
    894 			"%30 = OpConstant %11 0\n"
    895 			"%31 = OpTypePointer Input %6\n"
    896 			"%36 = OpConstant %6 2\n"
    897 			"%40 = OpTypePointer Output %10\n"
    898 			"%4 = OpFunction %2 None %3\n"
    899 			"%5 = OpLabel\n"
    900 			"%8 = OpVariable %7 Function\n"
    901 			"OpStore %8 %9\n"
    902 			"%25 = OpAccessChain %24 %23 %18 %18\n"
    903 			"%26 = OpLoad %10 %25\n"
    904 			"%32 = OpAccessChain %31 %29 %30\n"
    905 			"%33 = OpLoad %6 %32\n"
    906 			"%34 = OpLoad %6 %8\n"
    907 			"%35 = OpFMul %6 %33 %34\n"
    908 			"%37 = OpFDiv %6 %35 %36\n"
    909 			"%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n"
    910 			"%39 = OpFAdd %10 %26 %38\n"
    911 			"%41 = OpAccessChain %40 %16 %18\n"
    912 			"OpStore %41 %39\n"
    913 			"OpReturn\n"
    914 			"OpFunctionEnd\n";
    915 		programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup;
    916 	}
    917 
    918 }
    919 
    920 
    921 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
    922 {
    923 	switch (stage)
    924 	{
    925 		default:
    926 			DE_FATAL("Unhandled stage!");
    927 			return "";
    928 		case VK_SHADER_STAGE_FRAGMENT_BIT:
    929 			return
    930 				"#version 450\n"
    931 				"void main (void)\n"
    932 				"{\n"
    933 				"  float pixelSize = 2.0f/1024.0f;\n"
    934 				"   float pixelPosition = pixelSize/2.0f - 1.0f;\n"
    935 				"  gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n"
    936 				"}\n";
    937 		case VK_SHADER_STAGE_GEOMETRY_BIT:
    938 			return
    939 				"#version 450\n"
    940 				"void main (void)\n"
    941 				"{\n"
    942 				"}\n";
    943 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
    944 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
    945 			return
    946 				"#version 450\n"
    947 				"void main (void)\n"
    948 				"{\n"
    949 				"}\n";
    950 	}
    951 }
    952 
    953 bool vkt::subgroups::isSubgroupSupported(Context& context)
    954 {
    955 	return context.contextSupports(vk::ApiVersion(1, 1, 0));
    956 }
    957 
    958 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
    959 	Context& context, const VkShaderStageFlags stage)
    960 {
    961 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
    962 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
    963 	subgroupProperties.pNext = DE_NULL;
    964 
    965 	VkPhysicalDeviceProperties2 properties;
    966 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
    967 	properties.pNext = &subgroupProperties;
    968 
    969 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
    970 
    971 	return (stage & subgroupProperties.supportedStages) ? true : false;
    972 }
    973 
    974 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
    975 	VkShaderStageFlags stage)
    976 {
    977 	switch (stage)
    978 	{
    979 		default:
    980 			return false;
    981 		case VK_SHADER_STAGE_COMPUTE_BIT:
    982 			return true;
    983 	}
    984 }
    985 
    986 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
    987 	Context& context,
    988 	VkSubgroupFeatureFlagBits bit) {
    989 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
    990 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
    991 	subgroupProperties.pNext = DE_NULL;
    992 
    993 	VkPhysicalDeviceProperties2 properties;
    994 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
    995 	properties.pNext = &subgroupProperties;
    996 
    997 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
    998 
    999 	return (bit & subgroupProperties.supportedOperations) ? true : false;
   1000 }
   1001 
   1002 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
   1003 {
   1004 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
   1005 				context.getInstanceInterface(), context.getPhysicalDevice());
   1006 	return features.fragmentStoresAndAtomics ? true : false;
   1007 }
   1008 
   1009 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
   1010 {
   1011 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
   1012 				context.getInstanceInterface(), context.getPhysicalDevice());
   1013 	return features.vertexPipelineStoresAndAtomics ? true : false;
   1014 }
   1015 
   1016 bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
   1017 {
   1018 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
   1019 				context.getInstanceInterface(), context.getPhysicalDevice());
   1020 	return features.shaderFloat64 ? true : false;
   1021 }
   1022 
   1023 bool vkt::subgroups::isDoubleFormat(VkFormat format)
   1024 {
   1025 	switch (format)
   1026 	{
   1027 		default:
   1028 			return false;
   1029 		case VK_FORMAT_R64_SFLOAT:
   1030 		case VK_FORMAT_R64G64_SFLOAT:
   1031 		case VK_FORMAT_R64G64B64_SFLOAT:
   1032 		case VK_FORMAT_R64G64B64A64_SFLOAT:
   1033 			return true;
   1034 	}
   1035 }
   1036 
   1037 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format)
   1038 {
   1039 	switch (format)
   1040 	{
   1041 		default:
   1042 			DE_FATAL("Unhandled format!");
   1043 			return "";
   1044 		case VK_FORMAT_R32_SINT:
   1045 			return "int";
   1046 		case VK_FORMAT_R32G32_SINT:
   1047 			return "ivec2";
   1048 		case VK_FORMAT_R32G32B32_SINT:
   1049 			return "ivec3";
   1050 		case VK_FORMAT_R32G32B32A32_SINT:
   1051 			return "ivec4";
   1052 		case VK_FORMAT_R32_UINT:
   1053 			return "uint";
   1054 		case VK_FORMAT_R32G32_UINT:
   1055 			return "uvec2";
   1056 		case VK_FORMAT_R32G32B32_UINT:
   1057 			return "uvec3";
   1058 		case VK_FORMAT_R32G32B32A32_UINT:
   1059 			return "uvec4";
   1060 		case VK_FORMAT_R32_SFLOAT:
   1061 			return "float";
   1062 		case VK_FORMAT_R32G32_SFLOAT:
   1063 			return "vec2";
   1064 		case VK_FORMAT_R32G32B32_SFLOAT:
   1065 			return "vec3";
   1066 		case VK_FORMAT_R32G32B32A32_SFLOAT:
   1067 			return "vec4";
   1068 		case VK_FORMAT_R64_SFLOAT:
   1069 			return "double";
   1070 		case VK_FORMAT_R64G64_SFLOAT:
   1071 			return "dvec2";
   1072 		case VK_FORMAT_R64G64B64_SFLOAT:
   1073 			return "dvec3";
   1074 		case VK_FORMAT_R64G64B64A64_SFLOAT:
   1075 			return "dvec4";
   1076 		case VK_FORMAT_R8_USCALED:
   1077 			return "bool";
   1078 		case VK_FORMAT_R8G8_USCALED:
   1079 			return "bvec2";
   1080 		case VK_FORMAT_R8G8B8_USCALED:
   1081 			return "bvec3";
   1082 		case VK_FORMAT_R8G8B8A8_USCALED:
   1083 			return "bvec4";
   1084 	}
   1085 }
   1086 
   1087 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection)
   1088 {
   1089 	/*
   1090 		"layout(location = 0) in highp vec4 in_position;\n"
   1091 		"void main (void)\n"
   1092 		"{\n"
   1093 		"  gl_Position = in_position;\n"
   1094 		"}\n";
   1095 	*/
   1096 	programCollection.spirvAsmSources.add("vert") <<
   1097 		"; SPIR-V\n"
   1098 		"; Version: 1.3\n"
   1099 		"; Generator: Khronos Glslang Reference Front End; 2\n"
   1100 		"; Bound: 21\n"
   1101 		"; Schema: 0\n"
   1102 		"OpCapability Shader\n"
   1103 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
   1104 		"OpMemoryModel Logical GLSL450\n"
   1105 		"OpEntryPoint Vertex %4 \"main\" %13 %17\n"
   1106 		"OpMemberDecorate %11 0 BuiltIn Position\n"
   1107 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
   1108 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
   1109 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
   1110 		"OpDecorate %11 Block\n"
   1111 		"OpDecorate %17 Location 0\n"
   1112 		"%2 = OpTypeVoid\n"
   1113 		"%3 = OpTypeFunction %2\n"
   1114 		"%6 = OpTypeFloat 32\n"
   1115 		"%7 = OpTypeVector %6 4\n"
   1116 		"%8 = OpTypeInt 32 0\n"
   1117 		"%9 = OpConstant %8 1\n"
   1118 		"%10 = OpTypeArray %6 %9\n"
   1119 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
   1120 		"%12 = OpTypePointer Output %11\n"
   1121 		"%13 = OpVariable %12 Output\n"
   1122 		"%14 = OpTypeInt 32 1\n"
   1123 		"%15 = OpConstant %14 0\n"
   1124 		"%16 = OpTypePointer Input %7\n"
   1125 		"%17 = OpVariable %16 Input\n"
   1126 		"%19 = OpTypePointer Output %7\n"
   1127 		"%4 = OpFunction %2 None %3\n"
   1128 		"%5 = OpLabel\n"
   1129 		"%18 = OpLoad %7 %17\n"
   1130 		"%20 = OpAccessChain %19 %13 %15\n"
   1131 		"OpStore %20 %18\n"
   1132 		"OpReturn\n"
   1133 		"OpFunctionEnd\n";
   1134 }
   1135 
   1136 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection)
   1137 {
   1138 	/*
   1139 		"layout(location = 0) in float in_color;\n"
   1140 		"layout(location = 0) out uint out_color;\n"
   1141 		"void main()\n"
   1142 		{\n"
   1143 		"	out_color = uint(in_color);\n"
   1144 		"}\n";
   1145 	*/
   1146 	programCollection.spirvAsmSources.add("fragment") <<
   1147 		"; SPIR-V\n"
   1148 		"; Version: 1.3\n"
   1149 		"; Generator: Khronos Glslang Reference Front End; 2\n"
   1150 		"; Bound: 14\n"
   1151 		"; Schema: 0\n"
   1152 		"OpCapability Shader\n"
   1153 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
   1154 		"OpMemoryModel Logical GLSL450\n"
   1155 		"OpEntryPoint Fragment %4 \"main\" %8 %11\n"
   1156 		"OpExecutionMode %4 OriginUpperLeft\n"
   1157 		"OpDecorate %8 Location 0\n"
   1158 		"OpDecorate %11 Location 0\n"
   1159 		"%2 = OpTypeVoid\n"
   1160 		"%3 = OpTypeFunction %2\n"
   1161 		"%6 = OpTypeInt 32 0\n"
   1162 		"%7 = OpTypePointer Output %6\n"
   1163 		"%8 = OpVariable %7 Output\n"
   1164 		"%9 = OpTypeFloat 32\n"
   1165 		"%10 = OpTypePointer Input %9\n"
   1166 		"%11 = OpVariable %10 Input\n"
   1167 		"%4 = OpFunction %2 None %3\n"
   1168 		"%5 = OpLabel\n"
   1169 		"%12 = OpLoad %9 %11\n"
   1170 		"%13 = OpConvertFToU %6 %12\n"
   1171 		"OpStore %8 %13\n"
   1172 		"OpReturn\n"
   1173 		"OpFunctionEnd\n";
   1174 }
   1175 
   1176 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection)
   1177 {
   1178 	/*
   1179 		"#extension GL_KHR_shader_subgroup_basic: enable\n"
   1180 		"#extension GL_EXT_tessellation_shader : require\n"
   1181 		"layout(vertices = 2) out;\n"
   1182 		"void main (void)\n"
   1183 		"{\n"
   1184 		"  if (gl_InvocationID == 0)\n"
   1185 		  {\n"
   1186 		"    gl_TessLevelOuter[0] = 1.0f;\n"
   1187 		"    gl_TessLevelOuter[1] = 1.0f;\n"
   1188 		"  }\n"
   1189 		"  gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n"
   1190 		"}\n";
   1191 	*/
   1192 	programCollection.spirvAsmSources.add("tesc") <<
   1193 		"; SPIR-V\n"
   1194 		"; Version: 1.3\n"
   1195 		"; Generator: Khronos Glslang Reference Front End; 2\n"
   1196 		"; Bound: 46\n"
   1197 		"; Schema: 0\n"
   1198 		"OpCapability Tessellation\n"
   1199 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
   1200 		"OpMemoryModel Logical GLSL450\n"
   1201 		"OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n"
   1202 		"OpExecutionMode %4 OutputVertices 2\n"
   1203 		"OpDecorate %8 BuiltIn InvocationId\n"
   1204 		"OpDecorate %20 Patch\n"
   1205 		"OpDecorate %20 BuiltIn TessLevelOuter\n"
   1206 		"OpMemberDecorate %29 0 BuiltIn Position\n"
   1207 		"OpMemberDecorate %29 1 BuiltIn PointSize\n"
   1208 		"OpMemberDecorate %29 2 BuiltIn ClipDistance\n"
   1209 		"OpMemberDecorate %29 3 BuiltIn CullDistance\n"
   1210 		"OpDecorate %29 Block\n"
   1211 		"OpMemberDecorate %35 0 BuiltIn Position\n"
   1212 		"OpMemberDecorate %35 1 BuiltIn PointSize\n"
   1213 		"OpMemberDecorate %35 2 BuiltIn ClipDistance\n"
   1214 		"OpMemberDecorate %35 3 BuiltIn CullDistance\n"
   1215 		"OpDecorate %35 Block\n"
   1216 		"%2 = OpTypeVoid\n"
   1217 		"%3 = OpTypeFunction %2\n"
   1218 		"%6 = OpTypeInt 32 1\n"
   1219 		"%7 = OpTypePointer Input %6\n"
   1220 		"%8 = OpVariable %7 Input\n"
   1221 		"%10 = OpConstant %6 0\n"
   1222 		"%11 = OpTypeBool\n"
   1223 		"%15 = OpTypeFloat 32\n"
   1224 		"%16 = OpTypeInt 32 0\n"
   1225 		"%17 = OpConstant %16 4\n"
   1226 		"%18 = OpTypeArray %15 %17\n"
   1227 		"%19 = OpTypePointer Output %18\n"
   1228 		"%20 = OpVariable %19 Output\n"
   1229 		"%21 = OpConstant %15 1\n"
   1230 		"%22 = OpTypePointer Output %15\n"
   1231 		"%24 = OpConstant %6 1\n"
   1232 		"%26 = OpTypeVector %15 4\n"
   1233 		"%27 = OpConstant %16 1\n"
   1234 		"%28 = OpTypeArray %15 %27\n"
   1235 		"%29 = OpTypeStruct %26 %15 %28 %28\n"
   1236 		"%30 = OpConstant %16 2\n"
   1237 		"%31 = OpTypeArray %29 %30\n"
   1238 		"%32 = OpTypePointer Output %31\n"
   1239 		"%33 = OpVariable %32 Output\n"
   1240 		"%35 = OpTypeStruct %26 %15 %28 %28\n"
   1241 		"%36 = OpConstant %16 32\n"
   1242 		"%37 = OpTypeArray %35 %36\n"
   1243 		"%38 = OpTypePointer Input %37\n"
   1244 		"%39 = OpVariable %38 Input\n"
   1245 		"%41 = OpTypePointer Input %26\n"
   1246 		"%44 = OpTypePointer Output %26\n"
   1247 		"%4 = OpFunction %2 None %3\n"
   1248 		"%5 = OpLabel\n"
   1249 		"%9 = OpLoad %6 %8\n"
   1250 		"%12 = OpIEqual %11 %9 %10\n"
   1251 		"OpSelectionMerge %14 None\n"
   1252 		"OpBranchConditional %12 %13 %14\n"
   1253 		"%13 = OpLabel\n"
   1254 		"%23 = OpAccessChain %22 %20 %10\n"
   1255 		"OpStore %23 %21\n"
   1256 		"%25 = OpAccessChain %22 %20 %24\n"
   1257 		"OpStore %25 %21\n"
   1258 		"OpBranch %14\n"
   1259 		"%14 = OpLabel\n"
   1260 		"%34 = OpLoad %6 %8\n"
   1261 		"%40 = OpLoad %6 %8\n"
   1262 		"%42 = OpAccessChain %41 %39 %40 %10\n"
   1263 		"%43 = OpLoad %26 %42\n"
   1264 		"%45 = OpAccessChain %44 %33 %34 %10\n"
   1265 		"OpStore %45 %43\n"
   1266 		"OpReturn\n"
   1267 		"OpFunctionEnd\n";
   1268 }
   1269 
   1270 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection)
   1271 {
   1272 	/*
   1273 		"#extension GL_KHR_shader_subgroup_ballot: enable\n"
   1274 		"#extension GL_EXT_tessellation_shader : require\n"
   1275 		"layout(isolines, equal_spacing, ccw ) in;\n"
   1276 		"layout(location = 0) in float in_color[];\n"
   1277 		"layout(location = 0) out float out_color;\n"
   1278 		"\n"
   1279 		"void main (void)\n"
   1280 		"{\n"
   1281 		"  gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n"
   1282 		"  out_color = in_color[0];\n"
   1283 		"}\n";
   1284 	*/
   1285 	programCollection.spirvAsmSources.add("tese") <<
   1286 		"; SPIR-V\n"
   1287 		"; Version: 1.3\n"
   1288 		"; Generator: Khronos Glslang Reference Front End; 2\n"
   1289 		"; Bound: 45\n"
   1290 		"; Schema: 0\n"
   1291 		"OpCapability Tessellation\n"
   1292 		"%1 = OpExtInstImport \"GLSL.std.450\"\n"
   1293 		"OpMemoryModel Logical GLSL450\n"
   1294 		"OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n"
   1295 		"OpExecutionMode %4 Isolines\n"
   1296 		"OpExecutionMode %4 SpacingEqual\n"
   1297 		"OpExecutionMode %4 VertexOrderCcw\n"
   1298 		"OpMemberDecorate %11 0 BuiltIn Position\n"
   1299 		"OpMemberDecorate %11 1 BuiltIn PointSize\n"
   1300 		"OpMemberDecorate %11 2 BuiltIn ClipDistance\n"
   1301 		"OpMemberDecorate %11 3 BuiltIn CullDistance\n"
   1302 		"OpDecorate %11 Block\n"
   1303 		"OpMemberDecorate %16 0 BuiltIn Position\n"
   1304 		"OpMemberDecorate %16 1 BuiltIn PointSize\n"
   1305 		"OpMemberDecorate %16 2 BuiltIn ClipDistance\n"
   1306 		"OpMemberDecorate %16 3 BuiltIn CullDistance\n"
   1307 		"OpDecorate %16 Block\n"
   1308 		"OpDecorate %29 BuiltIn TessCoord\n"
   1309 		"OpDecorate %39 Location 0\n"
   1310 		"OpDecorate %42 Location 0\n"
   1311 		"%2 = OpTypeVoid\n"
   1312 		"%3 = OpTypeFunction %2\n"
   1313 		"%6 = OpTypeFloat 32\n"
   1314 		"%7 = OpTypeVector %6 4\n"
   1315 		"%8 = OpTypeInt 32 0\n"
   1316 		"%9 = OpConstant %8 1\n"
   1317 		"%10 = OpTypeArray %6 %9\n"
   1318 		"%11 = OpTypeStruct %7 %6 %10 %10\n"
   1319 		"%12 = OpTypePointer Output %11\n"
   1320 		"%13 = OpVariable %12 Output\n"
   1321 		"%14 = OpTypeInt 32 1\n"
   1322 		"%15 = OpConstant %14 0\n"
   1323 		"%16 = OpTypeStruct %7 %6 %10 %10\n"
   1324 		"%17 = OpConstant %8 32\n"
   1325 		"%18 = OpTypeArray %16 %17\n"
   1326 		"%19 = OpTypePointer Input %18\n"
   1327 		"%20 = OpVariable %19 Input\n"
   1328 		"%21 = OpTypePointer Input %7\n"
   1329 		"%24 = OpConstant %14 1\n"
   1330 		"%27 = OpTypeVector %6 3\n"
   1331 		"%28 = OpTypePointer Input %27\n"
   1332 		"%29 = OpVariable %28 Input\n"
   1333 		"%30 = OpConstant %8 0\n"
   1334 		"%31 = OpTypePointer Input %6\n"
   1335 		"%36 = OpTypePointer Output %7\n"
   1336 		"%38 = OpTypePointer Output %6\n"
   1337 		"%39 = OpVariable %38 Output\n"
   1338 		"%40 = OpTypeArray %6 %17\n"
   1339 		"%41 = OpTypePointer Input %40\n"
   1340 		"%42 = OpVariable %41 Input\n"
   1341 		"%4 = OpFunction %2 None %3\n"
   1342 		"%5 = OpLabel\n"
   1343 		"%22 = OpAccessChain %21 %20 %15 %15\n"
   1344 		"%23 = OpLoad %7 %22\n"
   1345 		"%25 = OpAccessChain %21 %20 %24 %15\n"
   1346 		"%26 = OpLoad %7 %25\n"
   1347 		"%32 = OpAccessChain %31 %29 %30\n"
   1348 		"%33 = OpLoad %6 %32\n"
   1349 		"%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n"
   1350 		"%35 = OpExtInst %7 %1 FMix %23 %26 %34\n"
   1351 		"%37 = OpAccessChain %36 %13 %15\n"
   1352 		"OpStore %37 %35\n"
   1353 		"%43 = OpAccessChain %31 %42 %15\n"
   1354 		"%44 = OpLoad %6 %43\n"
   1355 		"OpStore %39 %44\n"
   1356 		"OpReturn\n"
   1357 		"OpFunctionEnd\n";
   1358 }
   1359 
   1360 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options,  vk::GlslSourceCollection& collection)
   1361 {
   1362 	tcu::StringTemplate geometryTemplate(glslTemplate);
   1363 
   1364 	map<string, string>		linesParams;
   1365 	linesParams.insert(pair<string, string>("TOPOLOGY", "lines"));
   1366 
   1367 	map<string, string>		pointsParams;
   1368 	pointsParams.insert(pair<string, string>("TOPOLOGY", "points"));
   1369 
   1370 	collection.add("geometry_lines")	<< glu::GeometrySource(geometryTemplate.specialize(linesParams))	<< options;
   1371 	collection.add("geometry_points")	<< glu::GeometrySource(geometryTemplate.specialize(pointsParams))	<< options;
   1372 }
   1373 
   1374 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection)
   1375 {
   1376 	tcu::StringTemplate geometryTemplate(spirvTemplate);
   1377 
   1378 	map<string, string>		linesParams;
   1379 	linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines"));
   1380 
   1381 	map<string, string>		pointsParams;
   1382 	pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints"));
   1383 
   1384 	collection.add("geometry_lines")	<< geometryTemplate.specialize(linesParams)		<< options;
   1385 	collection.add("geometry_points")	<< geometryTemplate.specialize(pointsParams)	<< options;
   1386 }
   1387 
   1388 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
   1389 {
   1390 	const vk::VkFormat format = data.format;
   1391 	const vk::VkDeviceSize size = data.numElements *
   1392 		(data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout));
   1393 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
   1394 	{
   1395 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
   1396 
   1397 		switch (format)
   1398 		{
   1399 			default:
   1400 				DE_FATAL("Illegal buffer format");
   1401 				break;
   1402 			case VK_FORMAT_R8_USCALED:
   1403 			case VK_FORMAT_R8G8_USCALED:
   1404 			case VK_FORMAT_R8G8B8_USCALED:
   1405 			case VK_FORMAT_R8G8B8A8_USCALED:
   1406 			case VK_FORMAT_R32_SINT:
   1407 			case VK_FORMAT_R32G32_SINT:
   1408 			case VK_FORMAT_R32G32B32_SINT:
   1409 			case VK_FORMAT_R32G32B32A32_SINT:
   1410 			case VK_FORMAT_R32_UINT:
   1411 			case VK_FORMAT_R32G32_UINT:
   1412 			case VK_FORMAT_R32G32B32_UINT:
   1413 			case VK_FORMAT_R32G32B32A32_UINT:
   1414 			{
   1415 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
   1416 
   1417 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++)
   1418 				{
   1419 					ptr[k] = rnd.getUint32();
   1420 				}
   1421 			}
   1422 			break;
   1423 			case VK_FORMAT_R32_SFLOAT:
   1424 			case VK_FORMAT_R32G32_SFLOAT:
   1425 			case VK_FORMAT_R32G32B32_SFLOAT:
   1426 			case VK_FORMAT_R32G32B32A32_SFLOAT:
   1427 			{
   1428 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
   1429 
   1430 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++)
   1431 				{
   1432 					ptr[k] = rnd.getFloat();
   1433 				}
   1434 			}
   1435 			break;
   1436 			case VK_FORMAT_R64_SFLOAT:
   1437 			case VK_FORMAT_R64G64_SFLOAT:
   1438 			case VK_FORMAT_R64G64B64_SFLOAT:
   1439 			case VK_FORMAT_R64G64B64A64_SFLOAT:
   1440 			{
   1441 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
   1442 
   1443 				for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++)
   1444 				{
   1445 					ptr[k] = rnd.getDouble();
   1446 				}
   1447 			}
   1448 			break;
   1449 		}
   1450 	}
   1451 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
   1452 	{
   1453 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
   1454 
   1455 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
   1456 		{
   1457 			ptr[k] = 0;
   1458 		}
   1459 	}
   1460 
   1461 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
   1462 	{
   1463 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
   1464 	}
   1465 }
   1466 
   1467 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage)
   1468 {
   1469 	switch(shaderStage)
   1470 	{
   1471 		case VK_SHADER_STAGE_VERTEX_BIT:
   1472 			return 0u;
   1473 			break;
   1474 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
   1475 			return 1u;
   1476 			break;
   1477 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
   1478 			return 2u;
   1479 			break;
   1480 		case VK_SHADER_STAGE_GEOMETRY_BIT:
   1481 			return 3u;
   1482 			break;
   1483 		default:
   1484 			DE_ASSERT(0);
   1485 			return -1;
   1486 	}
   1487 	DE_ASSERT(0);
   1488 	return -1;
   1489 }
   1490 
   1491 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest(
   1492 	Context& context, VkFormat format, SSBOData* extraData,
   1493 	deUint32 extraDataCount,
   1494 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
   1495 	const VkShaderStageFlags shaderStage)
   1496 {
   1497 	const deUint32							maxWidth				= 1024u;
   1498 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
   1499 	DescriptorSetLayoutBuilder				layoutBuilder;
   1500 	DescriptorPoolBuilder					poolBuilder;
   1501 	DescriptorSetUpdateBuilder				updateBuilder;
   1502 	Move <VkDescriptorPool>					descriptorPool;
   1503 	Move <VkDescriptorSet>					descriptorSet;
   1504 
   1505 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1506 																		context.getBinaryCollection().get("vert"), 0u));
   1507 	const Unique<VkShaderModule>			teCtrlShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1508 																		context.getBinaryCollection().get("tesc"), 0u));
   1509 	const Unique<VkShaderModule>			teEvalShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1510 																		context.getBinaryCollection().get("tese"), 0u));
   1511 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1512 																	context.getBinaryCollection().get("fragment"), 0u));
   1513 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
   1514 
   1515 	const VkVertexInputBindingDescription	vertexInputBinding		=
   1516 	{
   1517 		0u,											// binding;
   1518 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
   1519 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
   1520 	};
   1521 
   1522 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
   1523 	{
   1524 		0u,
   1525 		0u,
   1526 		VK_FORMAT_R32G32B32A32_SFLOAT,
   1527 		0u
   1528 	};
   1529 
   1530 	for (deUint32 i = 0u; i < extraDataCount; i++)
   1531 	{
   1532 		if (extraData[i].isImage)
   1533 		{
   1534 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
   1535 		}
   1536 		else
   1537 		{
   1538 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
   1539 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
   1540 		}
   1541 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   1542 		initializeMemory(context, alloc, extraData[i]);
   1543 	}
   1544 
   1545 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   1546 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL);
   1547 
   1548 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   1549 
   1550 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(context, *descriptorSetLayout));
   1551 
   1552 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
   1553 																	VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT |
   1554 																	VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
   1555 																	*vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule,
   1556 																	*renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
   1557 
   1558 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   1559 		poolBuilder.addType(inputBuffers[ndx]->getType());
   1560 
   1561 	if (extraDataCount > 0)
   1562 	{
   1563 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   1564 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
   1565 		descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
   1566 	}
   1567 
   1568 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
   1569 	{
   1570 		if (inputBuffers[buffersNdx]->isImage())
   1571 		{
   1572 			VkDescriptorImageInfo info =
   1573 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
   1574 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   1575 
   1576 			updateBuilder.writeSingle(*descriptorSet,
   1577 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   1578 										inputBuffers[buffersNdx]->getType(), &info);
   1579 		}
   1580 		else
   1581 		{
   1582 			VkDescriptorBufferInfo info =
   1583 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
   1584 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
   1585 
   1586 			updateBuilder.writeSingle(*descriptorSet,
   1587 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   1588 										inputBuffers[buffersNdx]->getType(), &info);
   1589 		}
   1590 	}
   1591 
   1592 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   1593 
   1594 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(context));
   1595 	const deUint32							subgroupSize			= getSubgroupSize(context);
   1596 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
   1597 	const vk::VkDeviceSize					vertexBufferSize		= 2ull * maxWidth * sizeof(tcu::Vec4);
   1598 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
   1599 	unsigned								totalIterations			= 0u;
   1600 	unsigned								failedIterations		= 0u;
   1601 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   1602 
   1603 	{
   1604 		const Allocation&		alloc				= vertexBuffer.getAllocation();
   1605 		std::vector<tcu::Vec4>	data				(2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f));
   1606 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
   1607 		float					leftHandPosition	= -1.0f;
   1608 
   1609 		for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u)
   1610 		{
   1611 			data[ndx][0] = leftHandPosition;
   1612 			leftHandPosition += pixelSize;
   1613 			data[ndx+1][0] = leftHandPosition;
   1614 		}
   1615 
   1616 		deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4));
   1617 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
   1618 	}
   1619 
   1620 	for (deUint32 width = 1u; width < maxWidth; ++width)
   1621 	{
   1622 		const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
   1623 		const VkViewport			viewport			= makeViewport(maxWidth, 1u);
   1624 		const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
   1625 		const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
   1626 		Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
   1627 		const VkDeviceSize			vertexBufferOffset	= 0u;
   1628 
   1629 		totalIterations++;
   1630 
   1631 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   1632 		{
   1633 
   1634 			context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
   1635 			context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
   1636 
   1637 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
   1638 
   1639 			context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   1640 
   1641 			if (extraDataCount > 0)
   1642 			{
   1643 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   1644 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   1645 					&descriptorSet.get(), 0u, DE_NULL);
   1646 			}
   1647 
   1648 			context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
   1649 			context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0);
   1650 
   1651 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
   1652 
   1653 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
   1654 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   1655 
   1656 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   1657 			waitFence(context, fence);
   1658 		}
   1659 
   1660 		{
   1661 			const Allocation& allocResult = imageBufferResult.getAllocation();
   1662 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
   1663 
   1664 			std::vector<const void*> datas;
   1665 			datas.push_back(allocResult.getHostPtr());
   1666 			if (!checkResult(datas, width/2u, subgroupSize))
   1667 				failedIterations++;
   1668 		}
   1669 	}
   1670 
   1671 	if (0 < failedIterations)
   1672 	{
   1673 		context.getTestContext().getLog()
   1674 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   1675 				<< totalIterations << " values passed" << TestLog::EndMessage;
   1676 		return tcu::TestStatus::fail("Failed!");
   1677 	}
   1678 
   1679 	return tcu::TestStatus::pass("OK");
   1680 }
   1681 
   1682 bool vkt::subgroups::check(std::vector<const void*> datas,
   1683 	deUint32 width, deUint32 ref)
   1684 {
   1685 	const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]);
   1686 
   1687 	for (deUint32 n = 0; n < width; ++n)
   1688 	{
   1689 		if (data[n] != ref)
   1690 		{
   1691 			return false;
   1692 		}
   1693 	}
   1694 
   1695 	return true;
   1696 }
   1697 
   1698 bool vkt::subgroups::checkCompute(std::vector<const void*> datas,
   1699 	const deUint32 numWorkgroups[3], const deUint32 localSize[3],
   1700 	deUint32 ref)
   1701 {
   1702 	const deUint32 globalSizeX = numWorkgroups[0] * localSize[0];
   1703 	const deUint32 globalSizeY = numWorkgroups[1] * localSize[1];
   1704 	const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2];
   1705 
   1706 	return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref);
   1707 }
   1708 
   1709 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest(
   1710 	Context& context, VkFormat format, SSBOData* extraData,
   1711 	deUint32 extraDataCount,
   1712 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
   1713 {
   1714 	const deUint32							maxWidth				= 1024u;
   1715 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
   1716 	DescriptorSetLayoutBuilder				layoutBuilder;
   1717 	DescriptorPoolBuilder					poolBuilder;
   1718 	DescriptorSetUpdateBuilder				updateBuilder;
   1719 	Move <VkDescriptorPool>					descriptorPool;
   1720 	Move <VkDescriptorSet>					descriptorSet;
   1721 
   1722 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1723 																		context.getBinaryCollection().get("vert"), 0u));
   1724 	const Unique<VkShaderModule>			geometryShaderModule	(createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1725 																		context.getBinaryCollection().get("geometry"), 0u));
   1726 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1727 																	context.getBinaryCollection().get("fragment"), 0u));
   1728 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
   1729 	const VkVertexInputBindingDescription	vertexInputBinding		=
   1730 	{
   1731 		0u,											// binding;
   1732 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
   1733 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
   1734 	};
   1735 
   1736 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
   1737 	{
   1738 		0u,
   1739 		0u,
   1740 		VK_FORMAT_R32G32B32A32_SFLOAT,
   1741 		0u
   1742 	};
   1743 
   1744 	for (deUint32 i = 0u; i < extraDataCount; i++)
   1745 	{
   1746 		if (extraData[i].isImage)
   1747 		{
   1748 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
   1749 		}
   1750 		else
   1751 		{
   1752 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
   1753 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
   1754 		}
   1755 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   1756 		initializeMemory(context, alloc, extraData[i]);
   1757 	}
   1758 
   1759 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   1760 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
   1761 
   1762 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   1763 
   1764 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(context, *descriptorSetLayout));
   1765 
   1766 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
   1767 																	VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
   1768 																	*vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL,
   1769 																	*renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format));
   1770 
   1771 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   1772 		poolBuilder.addType(inputBuffers[ndx]->getType());
   1773 
   1774 	if (extraDataCount > 0)
   1775 	{
   1776 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   1777 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
   1778 		descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
   1779 	}
   1780 
   1781 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
   1782 	{
   1783 		if (inputBuffers[buffersNdx]->isImage())
   1784 		{
   1785 			VkDescriptorImageInfo info =
   1786 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
   1787 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   1788 
   1789 			updateBuilder.writeSingle(*descriptorSet,
   1790 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   1791 										inputBuffers[buffersNdx]->getType(), &info);
   1792 		}
   1793 		else
   1794 		{
   1795 			VkDescriptorBufferInfo info =
   1796 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
   1797 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
   1798 
   1799 			updateBuilder.writeSingle(*descriptorSet,
   1800 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   1801 										inputBuffers[buffersNdx]->getType(), &info);
   1802 		}
   1803 	}
   1804 
   1805 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   1806 
   1807 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(context));
   1808 	const deUint32							subgroupSize			= getSubgroupSize(context);
   1809 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
   1810 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
   1811 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
   1812 	unsigned								totalIterations			= 0u;
   1813 	unsigned								failedIterations		= 0u;
   1814 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   1815 
   1816 	{
   1817 		const Allocation&		alloc				= vertexBuffer.getAllocation();
   1818 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
   1819 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
   1820 		float					leftHandPosition	= -1.0f;
   1821 
   1822 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
   1823 		{
   1824 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
   1825 			leftHandPosition += pixelSize;
   1826 		}
   1827 
   1828 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
   1829 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
   1830 	}
   1831 
   1832 	for (deUint32 width = 1u; width < maxWidth; width++)
   1833 	{
   1834 		totalIterations++;
   1835 		const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
   1836 		const VkViewport			viewport			= makeViewport(maxWidth, 1u);
   1837 		const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
   1838 		const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
   1839 		Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
   1840 		const VkDeviceSize			vertexBufferOffset	= 0u;
   1841 
   1842 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
   1843 		{
   1844 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
   1845 			initializeMemory(context, alloc, extraData[ndx]);
   1846 		}
   1847 
   1848 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   1849 		{
   1850 			context.getDeviceInterface().cmdSetViewport(
   1851 				*cmdBuffer, 0, 1, &viewport);
   1852 
   1853 			context.getDeviceInterface().cmdSetScissor(
   1854 				*cmdBuffer, 0, 1, &scissor);
   1855 
   1856 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
   1857 
   1858 			context.getDeviceInterface().cmdBindPipeline(
   1859 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   1860 
   1861 			if (extraDataCount > 0)
   1862 			{
   1863 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   1864 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   1865 					&descriptorSet.get(), 0u, DE_NULL);
   1866 			}
   1867 
   1868 			context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
   1869 
   1870 			context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
   1871 
   1872 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
   1873 
   1874 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
   1875 
   1876 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   1877 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   1878 			waitFence(context, fence);
   1879 		}
   1880 
   1881 		{
   1882 			const Allocation& allocResult = imageBufferResult.getAllocation();
   1883 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
   1884 
   1885 			std::vector<const void*> datas;
   1886 			datas.push_back(allocResult.getHostPtr());
   1887 			if (!checkResult(datas, width, subgroupSize))
   1888 				failedIterations++;
   1889 		}
   1890 	}
   1891 
   1892 	if (0 < failedIterations)
   1893 	{
   1894 		context.getTestContext().getLog()
   1895 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   1896 				<< totalIterations << " values passed" << TestLog::EndMessage;
   1897 		return tcu::TestStatus::fail("Failed!");
   1898 	}
   1899 
   1900 	return tcu::TestStatus::pass("OK");
   1901 }
   1902 
   1903 
   1904 tcu::TestStatus vkt::subgroups::allStages(
   1905 	Context& context, VkFormat format, SSBOData* extraDatas,
   1906 	deUint32 extraDatasCount,
   1907 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize),
   1908 	const VkShaderStageFlags shaderStageTested)
   1909 {
   1910 	const deUint32					maxWidth			= 1024u;
   1911 	vector<VkShaderStageFlagBits>	stagesVector;
   1912 	VkShaderStageFlags				shaderStageRequired	= (VkShaderStageFlags)0ull;
   1913 
   1914 	Move<VkShaderModule>			vertexShaderModule;
   1915 	Move<VkShaderModule>			teCtrlShaderModule;
   1916 	Move<VkShaderModule>			teEvalShaderModule;
   1917 	Move<VkShaderModule>			geometryShaderModule;
   1918 	Move<VkShaderModule>			fragmentShaderModule;
   1919 
   1920 	if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT)
   1921 	{
   1922 		stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT);
   1923 	}
   1924 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
   1925 	{
   1926 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT);
   1927 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT;
   1928 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
   1929 	}
   1930 	if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
   1931 	{
   1932 		stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT);
   1933 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT;
   1934 		shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT;
   1935 	}
   1936 	if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT)
   1937 	{
   1938 		stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT);
   1939 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
   1940 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
   1941 	}
   1942 	if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
   1943 	{
   1944 		const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT;
   1945 		shaderStageRequired |=  (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required;
   1946 	}
   1947 
   1948 	const deUint32	stagesCount	= static_cast<deUint32>(stagesVector.size());
   1949 	const string	vert		= (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT)					? "vert_noSubgroup"		: "vert";
   1950 	const string	tesc		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)		? "tesc_noSubgroup"		: "tesc";
   1951 	const string	tese		= (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)	? "tese_noSubgroup"		: "tese";
   1952 
   1953 	shaderStageRequired = shaderStageTested | shaderStageRequired;
   1954 
   1955 	vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u);
   1956 	if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT)
   1957 	{
   1958 		teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u);
   1959 		teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u);
   1960 	}
   1961 	if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT)
   1962 	{
   1963 		if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT)
   1964 		{
   1965 			// tessellation shaders output line primitives
   1966 			geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u);
   1967 		}
   1968 		else
   1969 		{
   1970 			// otherwise points are processed by geometry shader
   1971 			geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u);
   1972 		}
   1973 	}
   1974 	if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT)
   1975 		fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u);
   1976 
   1977 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount);
   1978 
   1979 	DescriptorSetLayoutBuilder layoutBuilder;
   1980 	// The implicit result SSBO we use to store our outputs from the shader
   1981 	for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
   1982 	{
   1983 		const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth;
   1984 		const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize;
   1985 		inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1986 
   1987 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL);
   1988 	}
   1989 
   1990 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
   1991 	{
   1992 		const deUint32 datasNdx = ndx - stagesCount;
   1993 		if (extraDatas[datasNdx].isImage)
   1994 		{
   1995 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format));
   1996 		}
   1997 		else
   1998 		{
   1999 			const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements;
   2000 			inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   2001 		}
   2002 
   2003 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
   2004 		initializeMemory(context, alloc, extraDatas[datasNdx]);
   2005 
   2006 		layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1,
   2007 								extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL);
   2008 	}
   2009 
   2010 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   2011 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   2012 
   2013 	const Unique<VkPipelineLayout> pipelineLayout(
   2014 		makePipelineLayout(context, *descriptorSetLayout));
   2015 
   2016 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
   2017 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   2018 										shaderStageRequired,
   2019 										*vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule,
   2020 										*renderPass,
   2021 										(shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
   2022 
   2023 	DescriptorPoolBuilder poolBuilder;
   2024 
   2025 	for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++)
   2026 	{
   2027 		poolBuilder.addType(inputBuffers[ndx]->getType());
   2028 	}
   2029 
   2030 	const Unique<VkDescriptorPool> descriptorPool(
   2031 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2032 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   2033 
   2034 	// Create descriptor set
   2035 	const Unique<VkDescriptorSet> descriptorSet(
   2036 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   2037 
   2038 	DescriptorSetUpdateBuilder updateBuilder;
   2039 
   2040 	for (deUint32 ndx = 0u; ndx < stagesCount; ndx++)
   2041 	{
   2042 		if (inputBuffers[ndx]->isImage())
   2043 		{
   2044 			VkDescriptorImageInfo info =
   2045 				makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
   2046 										inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2047 
   2048 			updateBuilder.writeSingle(*descriptorSet,
   2049 									  DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
   2050 									  inputBuffers[ndx]->getType(), &info);
   2051 		}
   2052 		else
   2053 		{
   2054 			VkDescriptorBufferInfo info =
   2055 				makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
   2056 										 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
   2057 
   2058 			updateBuilder.writeSingle(*descriptorSet,
   2059 									  DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])),
   2060 									  inputBuffers[ndx]->getType(), &info);
   2061 		}
   2062 	}
   2063 
   2064 	for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++)
   2065 	{
   2066 		if (inputBuffers[ndx]->isImage())
   2067 		{
   2068 			VkDescriptorImageInfo info =
   2069 				makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(),
   2070 										inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2071 
   2072 			updateBuilder.writeSingle(*descriptorSet,
   2073 									  DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding),
   2074 									  inputBuffers[ndx]->getType(), &info);
   2075 		}
   2076 		else
   2077 		{
   2078 			VkDescriptorBufferInfo info =
   2079 				makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(),
   2080 										 0ull, inputBuffers[ndx]->getAsBuffer()->getSize());
   2081 
   2082 			updateBuilder.writeSingle(*descriptorSet,
   2083 									  DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding),
   2084 									  inputBuffers[ndx]->getType(), &info);
   2085 		}
   2086 	}
   2087 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2088 
   2089 	{
   2090 		const Unique<VkCommandPool>		cmdPool					(makeCommandPool(context));
   2091 		const deUint32					subgroupSize			= getSubgroupSize(context);
   2092 		const Unique<VkCommandBuffer>	cmdBuffer				(makeCommandBuffer(context, *cmdPool));
   2093 		unsigned						totalIterations			= 0u;
   2094 		unsigned						failedIterations		= 0u;
   2095 		Image							resultImage				(context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   2096 		const Unique<VkFramebuffer>		framebuffer				(makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1));
   2097 		const VkViewport				viewport				= makeViewport(maxWidth, 1u);
   2098 		const VkRect2D					scissor					= makeRect2D(maxWidth, 1u);
   2099 		const vk::VkDeviceSize			imageResultSize			= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
   2100 		Buffer							imageBufferResult		(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
   2101 		const VkImageSubresourceRange	subresourceRange		=
   2102 		{
   2103 			VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
   2104 			0u,																	//deUint32				baseMipLevel
   2105 			1u,																	//deUint32				levelCount
   2106 			0u,																	//deUint32				baseArrayLayer
   2107 			1u																	//deUint32				layerCount
   2108 		};
   2109 
   2110 		const VkImageMemoryBarrier		colorAttachmentBarrier	= makeImageMemoryBarrier(
   2111 			(VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
   2112 			VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL,
   2113 			resultImage.getImage(), subresourceRange);
   2114 
   2115 		for (deUint32 width = 1u; width < maxWidth; width++)
   2116 		{
   2117 			for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx)
   2118 			{
   2119 				// re-init the data
   2120 				const Allocation& alloc = inputBuffers[ndx]->getAllocation();
   2121 				initializeMemory(context, alloc, extraDatas[ndx - stagesCount]);
   2122 			}
   2123 
   2124 			totalIterations++;
   2125 
   2126 			beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2127 
   2128 			context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier);
   2129 
   2130 			context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport);
   2131 
   2132 			context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor);
   2133 
   2134 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
   2135 
   2136 			context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   2137 
   2138 			context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2139 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   2140 					&descriptorSet.get(), 0u, DE_NULL);
   2141 
   2142 			context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
   2143 
   2144 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
   2145 
   2146 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
   2147 
   2148 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2149 
   2150 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2151 			waitFence(context, fence);
   2152 
   2153 			for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx)
   2154 			{
   2155 				std::vector<const void*> datas;
   2156 				if (!inputBuffers[ndx]->isImage())
   2157 				{
   2158 					const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation();
   2159 					invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
   2160 					// we always have our result data first
   2161 					datas.push_back(resultAlloc.getHostPtr());
   2162 				}
   2163 
   2164 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
   2165 				{
   2166 					const deUint32 datasNdx = index - stagesCount;
   2167 					if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage()))
   2168 					{
   2169 						const Allocation& resultAlloc = inputBuffers[index]->getAllocation();
   2170 						invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
   2171 						// we always have our result data first
   2172 						datas.push_back(resultAlloc.getHostPtr());
   2173 					}
   2174 				}
   2175 
   2176 				if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize))
   2177 					failedIterations++;
   2178 			}
   2179 			if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT)
   2180 			{
   2181 				std::vector<const void*> datas;
   2182 				const Allocation& resultAlloc = imageBufferResult.getAllocation();
   2183 				invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
   2184 
   2185 				// we always have our result data first
   2186 				datas.push_back(resultAlloc.getHostPtr());
   2187 
   2188 				for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index)
   2189 				{
   2190 					const deUint32 datasNdx = index - stagesCount;
   2191 					if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage()))
   2192 					{
   2193 						const Allocation& alloc = inputBuffers[index]->getAllocation();
   2194 						invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
   2195 						// we always have our result data first
   2196 						datas.push_back(alloc.getHostPtr());
   2197 					}
   2198 				}
   2199 
   2200 				if (!checkResult(datas, width , subgroupSize))
   2201 					failedIterations++;
   2202 			}
   2203 
   2204 			context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   2205 		}
   2206 
   2207 		if (0 < failedIterations)
   2208 		{
   2209 			context.getTestContext().getLog()
   2210 					<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2211 					<< totalIterations << " values passed" << TestLog::EndMessage;
   2212 			return tcu::TestStatus::fail("Failed!");
   2213 		}
   2214 	}
   2215 
   2216 	return tcu::TestStatus::pass("OK");
   2217 }
   2218 
   2219 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
   2220 	SSBOData* extraData, deUint32 extraDataCount,
   2221 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
   2222 {
   2223 	const deUint32							maxWidth				= 1024u;
   2224 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
   2225 	DescriptorSetLayoutBuilder				layoutBuilder;
   2226 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule
   2227 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
   2228 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule
   2229 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
   2230 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
   2231 
   2232 	const VkVertexInputBindingDescription	vertexInputBinding		=
   2233 	{
   2234 		0u,											// binding;
   2235 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
   2236 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
   2237 	};
   2238 
   2239 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
   2240 	{
   2241 		0u,
   2242 		0u,
   2243 		VK_FORMAT_R32G32B32A32_SFLOAT,
   2244 		0u
   2245 	};
   2246 
   2247 	for (deUint32 i = 0u; i < extraDataCount; i++)
   2248 	{
   2249 		if (extraData[i].isImage)
   2250 		{
   2251 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
   2252 		}
   2253 		else
   2254 		{
   2255 			vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements;
   2256 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
   2257 		}
   2258 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   2259 		initializeMemory(context, alloc, extraData[i]);
   2260 	}
   2261 
   2262 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   2263 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
   2264 
   2265 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   2266 
   2267 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(context, *descriptorSetLayout));
   2268 
   2269 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
   2270 																		VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
   2271 																		*vertexShaderModule, *fragmentShaderModule,
   2272 																		DE_NULL, DE_NULL, DE_NULL,
   2273 																		*renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
   2274 																		&vertexInputBinding, &vertexInputAttribute, true, format));
   2275 	DescriptorPoolBuilder					poolBuilder;
   2276 	DescriptorSetUpdateBuilder				updateBuilder;
   2277 
   2278 
   2279 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
   2280 		poolBuilder.addType(inputBuffers[ndx]->getType());
   2281 
   2282 	Move <VkDescriptorPool>					descriptorPool;
   2283 	Move <VkDescriptorSet>					descriptorSet;
   2284 
   2285 	if (extraDataCount > 0)
   2286 	{
   2287 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2288 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
   2289 		descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
   2290 	}
   2291 
   2292 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   2293 	{
   2294 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
   2295 		initializeMemory(context, alloc, extraData[ndx]);
   2296 	}
   2297 
   2298 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
   2299 	{
   2300 		if (inputBuffers[buffersNdx]->isImage())
   2301 		{
   2302 			VkDescriptorImageInfo info =
   2303 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
   2304 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2305 
   2306 			updateBuilder.writeSingle(*descriptorSet,
   2307 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   2308 										inputBuffers[buffersNdx]->getType(), &info);
   2309 		}
   2310 		else
   2311 		{
   2312 			VkDescriptorBufferInfo info =
   2313 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
   2314 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
   2315 
   2316 			updateBuilder.writeSingle(*descriptorSet,
   2317 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   2318 										inputBuffers[buffersNdx]->getType(), &info);
   2319 		}
   2320 	}
   2321 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2322 
   2323 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(context));
   2324 
   2325 	const deUint32							subgroupSize			= getSubgroupSize(context);
   2326 
   2327 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
   2328 
   2329 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
   2330 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
   2331 
   2332 	unsigned								totalIterations			= 0u;
   2333 	unsigned								failedIterations		= 0u;
   2334 
   2335 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   2336 
   2337 	{
   2338 		const Allocation&		alloc				= vertexBuffer.getAllocation();
   2339 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
   2340 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
   2341 		float					leftHandPosition	= -1.0f;
   2342 
   2343 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
   2344 		{
   2345 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
   2346 			leftHandPosition += pixelSize;
   2347 		}
   2348 
   2349 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
   2350 		flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc);
   2351 	}
   2352 
   2353 	for (deUint32 width = 1u; width < maxWidth; width++)
   2354 	{
   2355 		totalIterations++;
   2356 		const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
   2357 		const VkViewport			viewport			= makeViewport(maxWidth, 1u);
   2358 		const VkRect2D				scissor				= makeRect2D(maxWidth, 1u);
   2359 		const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
   2360 		Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
   2361 		const VkDeviceSize			vertexBufferOffset	= 0u;
   2362 
   2363 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
   2364 		{
   2365 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
   2366 			initializeMemory(context, alloc, extraData[ndx]);
   2367 		}
   2368 
   2369 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2370 		{
   2371 			context.getDeviceInterface().cmdSetViewport(
   2372 				*cmdBuffer, 0, 1, &viewport);
   2373 
   2374 			context.getDeviceInterface().cmdSetScissor(
   2375 				*cmdBuffer, 0, 1, &scissor);
   2376 
   2377 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f));
   2378 
   2379 			context.getDeviceInterface().cmdBindPipeline(
   2380 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   2381 
   2382 			if (extraDataCount > 0)
   2383 			{
   2384 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2385 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   2386 					&descriptorSet.get(), 0u, DE_NULL);
   2387 			}
   2388 
   2389 			context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
   2390 
   2391 			context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
   2392 
   2393 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
   2394 
   2395 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
   2396 
   2397 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2398 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2399 			waitFence(context, fence);
   2400 		}
   2401 
   2402 		{
   2403 			const Allocation& allocResult = imageBufferResult.getAllocation();
   2404 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult);
   2405 
   2406 			std::vector<const void*> datas;
   2407 			datas.push_back(allocResult.getHostPtr());
   2408 			if (!checkResult(datas, width, subgroupSize))
   2409 				failedIterations++;
   2410 		}
   2411 	}
   2412 
   2413 	if (0 < failedIterations)
   2414 	{
   2415 		context.getTestContext().getLog()
   2416 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2417 				<< totalIterations << " values passed" << TestLog::EndMessage;
   2418 		return tcu::TestStatus::fail("Failed!");
   2419 	}
   2420 
   2421 	return tcu::TestStatus::pass("OK");
   2422 }
   2423 
   2424 
   2425 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest	(Context& context, VkFormat format, SSBOData* extraDatas,
   2426 	deUint32 extraDatasCount,
   2427 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
   2428 						deUint32 height, deUint32 subgroupSize))
   2429 {
   2430 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule
   2431 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
   2432 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule
   2433 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
   2434 
   2435 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
   2436 
   2437 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2438 	{
   2439 		if (extraDatas[i].isImage)
   2440 		{
   2441 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
   2442 										static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
   2443 		}
   2444 		else
   2445 		{
   2446 			vk::VkDeviceSize size =
   2447 				getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements;
   2448 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
   2449 		}
   2450 
   2451 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   2452 		initializeMemory(context, alloc, extraDatas[i]);
   2453 	}
   2454 
   2455 	DescriptorSetLayoutBuilder layoutBuilder;
   2456 
   2457 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2458 	{
   2459 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
   2460 								 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
   2461 	}
   2462 
   2463 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   2464 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   2465 
   2466 	const Unique<VkPipelineLayout> pipelineLayout(
   2467 		makePipelineLayout(context, *descriptorSetLayout));
   2468 
   2469 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
   2470 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   2471 									  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
   2472 									  *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP,
   2473 									  DE_NULL, DE_NULL, true));
   2474 
   2475 	DescriptorPoolBuilder poolBuilder;
   2476 
   2477 	// To stop validation complaining, always add at least one type to pool.
   2478 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
   2479 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2480 	{
   2481 		poolBuilder.addType(inputBuffers[i]->getType());
   2482 	}
   2483 
   2484 	Move<VkDescriptorPool> descriptorPool;
   2485 	// Create descriptor set
   2486 	Move<VkDescriptorSet> descriptorSet;
   2487 
   2488 	if (extraDatasCount > 0)
   2489 	{
   2490 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2491 													VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
   2492 
   2493 		descriptorSet	= makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
   2494 	}
   2495 
   2496 	DescriptorSetUpdateBuilder updateBuilder;
   2497 
   2498 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2499 	{
   2500 		if (inputBuffers[i]->isImage())
   2501 		{
   2502 			VkDescriptorImageInfo info =
   2503 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   2504 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2505 
   2506 			updateBuilder.writeSingle(*descriptorSet,
   2507 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2508 									  inputBuffers[i]->getType(), &info);
   2509 		}
   2510 		else
   2511 		{
   2512 			VkDescriptorBufferInfo info =
   2513 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
   2514 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
   2515 
   2516 			updateBuilder.writeSingle(*descriptorSet,
   2517 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2518 									  inputBuffers[i]->getType(), &info);
   2519 		}
   2520 	}
   2521 
   2522 	if (extraDatasCount > 0)
   2523 		updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2524 
   2525 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   2526 
   2527 	const deUint32 subgroupSize = getSubgroupSize(context);
   2528 
   2529 	const Unique<VkCommandBuffer> cmdBuffer(
   2530 		makeCommandBuffer(context, *cmdPool));
   2531 
   2532 	unsigned totalIterations = 0;
   2533 	unsigned failedIterations = 0;
   2534 
   2535 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
   2536 	{
   2537 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
   2538 		{
   2539 			totalIterations++;
   2540 
   2541 			// re-init the data
   2542 			for (deUint32 i = 0; i < extraDatasCount; i++)
   2543 			{
   2544 				const Allocation& alloc = inputBuffers[i]->getAllocation();
   2545 				initializeMemory(context, alloc, extraDatas[i]);
   2546 			}
   2547 
   2548 			VkDeviceSize formatSize = getFormatSizeInBytes(format);
   2549 			const VkDeviceSize resultImageSizeInBytes =
   2550 				width * height * formatSize;
   2551 
   2552 			Image resultImage(context, width, height, format,
   2553 							  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
   2554 							  VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   2555 
   2556 			Buffer resultBuffer(context, resultImageSizeInBytes,
   2557 								VK_IMAGE_USAGE_TRANSFER_DST_BIT);
   2558 
   2559 			const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
   2560 													*renderPass, resultImage.getImageView(), width, height));
   2561 
   2562 			beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2563 
   2564 			VkViewport viewport = makeViewport(width, height);
   2565 
   2566 			context.getDeviceInterface().cmdSetViewport(
   2567 				*cmdBuffer, 0, 1, &viewport);
   2568 
   2569 			VkRect2D scissor = {{0, 0}, {width, height}};
   2570 
   2571 			context.getDeviceInterface().cmdSetScissor(
   2572 				*cmdBuffer, 0, 1, &scissor);
   2573 
   2574 			beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f));
   2575 
   2576 			context.getDeviceInterface().cmdBindPipeline(
   2577 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   2578 
   2579 			if (extraDatasCount > 0)
   2580 			{
   2581 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2582 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   2583 						&descriptorSet.get(), 0u, DE_NULL);
   2584 			}
   2585 
   2586 			context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0);
   2587 
   2588 			endRenderPass(context.getDeviceInterface(), *cmdBuffer);
   2589 
   2590 			copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL);
   2591 
   2592 			endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2593 
   2594 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2595 
   2596 			waitFence(context, fence);
   2597 
   2598 			std::vector<const void*> datas;
   2599 			{
   2600 				const Allocation& resultAlloc = resultBuffer.getAllocation();
   2601 				invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
   2602 
   2603 				// we always have our result data first
   2604 				datas.push_back(resultAlloc.getHostPtr());
   2605 			}
   2606 
   2607 			if (!checkResult(datas, width, height, subgroupSize))
   2608 			{
   2609 				failedIterations++;
   2610 			}
   2611 
   2612 			context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   2613 		}
   2614 	}
   2615 
   2616 	if (0 < failedIterations)
   2617 	{
   2618 		context.getTestContext().getLog()
   2619 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2620 				<< totalIterations << " values passed" << TestLog::EndMessage;
   2621 		return tcu::TestStatus::fail("Failed!");
   2622 	}
   2623 
   2624 	return tcu::TestStatus::pass("OK");
   2625 }
   2626 
   2627 tcu::TestStatus vkt::subgroups::makeComputeTest(
   2628 	Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
   2629 	bool (*checkResult)(std::vector<const void*> datas,
   2630 						const deUint32 numWorkgroups[3], const deUint32 localSize[3],
   2631 						deUint32 subgroupSize))
   2632 {
   2633 	VkDeviceSize elementSize = getFormatSizeInBytes(format);
   2634 
   2635 	const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
   2636 										  maxSupportedSubgroupSize() *
   2637 										  maxSupportedSubgroupSize();
   2638 	const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
   2639 
   2640 	Buffer resultBuffer(
   2641 		context, resultBufferSizeInBytes);
   2642 
   2643 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
   2644 
   2645 	for (deUint32 i = 0; i < inputsCount; i++)
   2646 	{
   2647 		if (inputs[i].isImage)
   2648 		{
   2649 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
   2650 										static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
   2651 		}
   2652 		else
   2653 		{
   2654 			vk::VkDeviceSize size =
   2655 				getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
   2656 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   2657 		}
   2658 
   2659 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   2660 		initializeMemory(context, alloc, inputs[i]);
   2661 	}
   2662 
   2663 	DescriptorSetLayoutBuilder layoutBuilder;
   2664 	layoutBuilder.addBinding(
   2665 		resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
   2666 
   2667 	for (deUint32 i = 0; i < inputsCount; i++)
   2668 	{
   2669 		layoutBuilder.addBinding(
   2670 			inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
   2671 	}
   2672 
   2673 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   2674 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   2675 
   2676 	const Unique<VkShaderModule> shaderModule(
   2677 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   2678 						   context.getBinaryCollection().get("comp"), 0u));
   2679 	const Unique<VkPipelineLayout> pipelineLayout(
   2680 		makePipelineLayout(context, *descriptorSetLayout));
   2681 
   2682 	DescriptorPoolBuilder poolBuilder;
   2683 
   2684 	poolBuilder.addType(resultBuffer.getType());
   2685 
   2686 	for (deUint32 i = 0; i < inputsCount; i++)
   2687 	{
   2688 		poolBuilder.addType(inputBuffers[i]->getType());
   2689 	}
   2690 
   2691 	const Unique<VkDescriptorPool> descriptorPool(
   2692 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2693 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   2694 
   2695 	// Create descriptor set
   2696 	const Unique<VkDescriptorSet> descriptorSet(
   2697 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   2698 
   2699 	DescriptorSetUpdateBuilder updateBuilder;
   2700 
   2701 	const VkDescriptorBufferInfo resultDescriptorInfo =
   2702 		makeDescriptorBufferInfo(
   2703 			resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
   2704 
   2705 	updateBuilder.writeSingle(*descriptorSet,
   2706 							  DescriptorSetUpdateBuilder::Location::binding(0u),
   2707 							  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
   2708 
   2709 	for (deUint32 i = 0; i < inputsCount; i++)
   2710 	{
   2711 		if (inputBuffers[i]->isImage())
   2712 		{
   2713 			VkDescriptorImageInfo info =
   2714 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   2715 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2716 
   2717 			updateBuilder.writeSingle(*descriptorSet,
   2718 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
   2719 									  inputBuffers[i]->getType(), &info);
   2720 		}
   2721 		else
   2722 		{
   2723 			vk::VkDeviceSize size =
   2724 				getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements;
   2725 			VkDescriptorBufferInfo info =
   2726 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
   2727 
   2728 			updateBuilder.writeSingle(*descriptorSet,
   2729 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
   2730 									  inputBuffers[i]->getType(), &info);
   2731 		}
   2732 	}
   2733 
   2734 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2735 
   2736 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   2737 
   2738 	unsigned totalIterations = 0;
   2739 	unsigned failedIterations = 0;
   2740 
   2741 	const deUint32 subgroupSize = getSubgroupSize(context);
   2742 
   2743 	const Unique<VkCommandBuffer> cmdBuffer(
   2744 		makeCommandBuffer(context, *cmdPool));
   2745 
   2746 	const deUint32 numWorkgroups[3] = {4, 2, 2};
   2747 
   2748 	const deUint32 localSizesToTestCount = 15;
   2749 	deUint32 localSizesToTest[localSizesToTestCount][3] =
   2750 	{
   2751 		{1, 1, 1},
   2752 		{32, 4, 1},
   2753 		{32, 1, 4},
   2754 		{1, 32, 4},
   2755 		{1, 4, 32},
   2756 		{4, 1, 32},
   2757 		{4, 32, 1},
   2758 		{subgroupSize, 1, 1},
   2759 		{1, subgroupSize, 1},
   2760 		{1, 1, subgroupSize},
   2761 		{3, 5, 7},
   2762 		{128, 1, 1},
   2763 		{1, 128, 1},
   2764 		{1, 1, 64},
   2765 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
   2766 	};
   2767 
   2768 	Move<VkPipeline> lastPipeline(
   2769 		makeComputePipeline(context, *pipelineLayout, *shaderModule,
   2770 							localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
   2771 
   2772 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
   2773 	{
   2774 		const deUint32 nextX = localSizesToTest[index + 1][0];
   2775 		const deUint32 nextY = localSizesToTest[index + 1][1];
   2776 		const deUint32 nextZ = localSizesToTest[index + 1][2];
   2777 
   2778 		// we are running one test
   2779 		totalIterations++;
   2780 
   2781 		beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2782 
   2783 		context.getDeviceInterface().cmdBindPipeline(
   2784 			*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
   2785 
   2786 		context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2787 				VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
   2788 				&descriptorSet.get(), 0u, DE_NULL);
   2789 
   2790 		context.getDeviceInterface().cmdDispatch(*cmdBuffer,
   2791 				numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
   2792 
   2793 		endCommandBuffer(context.getDeviceInterface(), *cmdBuffer);
   2794 
   2795 		Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2796 
   2797 		Move<VkPipeline> nextPipeline(
   2798 			makeComputePipeline(context, *pipelineLayout, *shaderModule,
   2799 								nextX, nextY, nextZ));
   2800 
   2801 		waitFence(context, fence);
   2802 
   2803 		std::vector<const void*> datas;
   2804 
   2805 		{
   2806 			const Allocation& resultAlloc = resultBuffer.getAllocation();
   2807 			invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
   2808 
   2809 			// we always have our result data first
   2810 			datas.push_back(resultAlloc.getHostPtr());
   2811 		}
   2812 
   2813 		for (deUint32 i = 0; i < inputsCount; i++)
   2814 		{
   2815 			if (!inputBuffers[i]->isImage())
   2816 			{
   2817 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
   2818 				invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc);
   2819 
   2820 				// we always have our result data first
   2821 				datas.push_back(resultAlloc.getHostPtr());
   2822 			}
   2823 		}
   2824 
   2825 		if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
   2826 		{
   2827 			failedIterations++;
   2828 		}
   2829 
   2830 		context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   2831 
   2832 		lastPipeline = nextPipeline;
   2833 	}
   2834 
   2835 	if (0 < failedIterations)
   2836 	{
   2837 		context.getTestContext().getLog()
   2838 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2839 				<< totalIterations << " values passed" << TestLog::EndMessage;
   2840 		return tcu::TestStatus::fail("Failed!");
   2841 	}
   2842 
   2843 	return tcu::TestStatus::pass("OK");
   2844 }
   2845