Home | History | Annotate | Download | only in subgroups
      1 /*------------------------------------------------------------------------
      2  * Vulkan Conformance Tests
      3  * ------------------------
      4  *
      5  * Copyright (c) 2017 The Khronos Group Inc.
      6  * Copyright (c) 2017 Codeplay Software Ltd.
      7  *
      8  * Licensed under the Apache License, Version 2.0 (the "License");
      9  * you may not use this file except in compliance with the License.
     10  * You may obtain a copy of the License at
     11  *
     12  *      http://www.apache.org/licenses/LICENSE-2.0
     13  *
     14  * Unless required by applicable law or agreed to in writing, software
     15  * distributed under the License is distributed on an "AS IS" BASIS,
     16  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     17  * See the License for the specific language governing permissions and
     18  * limitations under the License.
     19  *
     20  */ /*!
     21  * \file
     22  * \brief Subgroups Tests Utils
     23  */ /*--------------------------------------------------------------------*/
     24 
     25 #include "vktSubgroupsTestsUtils.hpp"
     26 #include "deRandom.hpp"
     27 #include "tcuCommandLine.hpp"
     28 #include "vkImageUtil.hpp"
     29 
     30 using namespace tcu;
     31 using namespace std;
     32 using namespace vk;
     33 using namespace vkt;
     34 
     35 namespace
     36 {
     37 deUint32 getFormatSizeInBytes(const VkFormat format)
     38 {
     39 	switch (format)
     40 	{
     41 		default:
     42 			DE_FATAL("Unhandled format!");
     43 		case VK_FORMAT_R32_SINT:
     44 		case VK_FORMAT_R32_UINT:
     45 			return sizeof(deInt32);
     46 		case VK_FORMAT_R32G32_SINT:
     47 		case VK_FORMAT_R32G32_UINT:
     48 			return static_cast<deUint32>(sizeof(deInt32) * 2);
     49 		case VK_FORMAT_R32G32B32_SINT:
     50 		case VK_FORMAT_R32G32B32_UINT:
     51 		case VK_FORMAT_R32G32B32A32_SINT:
     52 		case VK_FORMAT_R32G32B32A32_UINT:
     53 			return static_cast<deUint32>(sizeof(deInt32) * 4);
     54 		case VK_FORMAT_R32_SFLOAT:
     55 			return 4;
     56 		case VK_FORMAT_R32G32_SFLOAT:
     57 			return 8;
     58 		case VK_FORMAT_R32G32B32_SFLOAT:
     59 			return 16;
     60 		case VK_FORMAT_R32G32B32A32_SFLOAT:
     61 			return 16;
     62 		case VK_FORMAT_R64_SFLOAT:
     63 			return 8;
     64 		case VK_FORMAT_R64G64_SFLOAT:
     65 			return 16;
     66 		case VK_FORMAT_R64G64B64_SFLOAT:
     67 			return 32;
     68 		case VK_FORMAT_R64G64B64A64_SFLOAT:
     69 			return 32;
     70 		// The below formats are used to represent bool and bvec* types. These
     71 		// types are passed to the shader as int and ivec* types, before the
     72 		// calculations are done as booleans. We need a distinct type here so
     73 		// that the shader generators can switch on it and generate the correct
     74 		// shader source for testing.
     75 		case VK_FORMAT_R8_USCALED:
     76 			return sizeof(deInt32);
     77 		case VK_FORMAT_R8G8_USCALED:
     78 			return static_cast<deUint32>(sizeof(deInt32) * 2);
     79 		case VK_FORMAT_R8G8B8_USCALED:
     80 		case VK_FORMAT_R8G8B8A8_USCALED:
     81 			return static_cast<deUint32>(sizeof(deInt32) * 4);
     82 	}
     83 }
     84 
     85 Move<VkPipelineLayout> makePipelineLayout(
     86 	Context& context, const VkDescriptorSetLayout descriptorSetLayout)
     87 {
     88 	const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = {
     89 		VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType;
     90 		DE_NULL,			  // const void*            pNext;
     91 		0u,					  // VkPipelineLayoutCreateFlags    flags;
     92 		1u,					  // deUint32             setLayoutCount;
     93 		&descriptorSetLayout, // const VkDescriptorSetLayout*   pSetLayouts;
     94 		0u,					  // deUint32             pushConstantRangeCount;
     95 		DE_NULL, // const VkPushConstantRange*   pPushConstantRanges;
     96 	};
     97 	return createPipelineLayout(context.getDeviceInterface(),
     98 								context.getDevice(), &pipelineLayoutParams);
     99 }
    100 
    101 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format)
    102 {
    103 	VkAttachmentReference colorReference = {
    104 		0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL
    105 	};
    106 
    107 	const VkSubpassDescription subpassDescription = {0u,
    108 													 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference,
    109 													 DE_NULL, DE_NULL, 0, DE_NULL
    110 													};
    111 
    112 	const VkSubpassDependency subpassDependencies[2] = {
    113 		{   VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
    114 			VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
    115 			VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
    116 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
    117 			VK_DEPENDENCY_BY_REGION_BIT
    118 		},
    119 		{   0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT,
    120 			VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT,
    121 			VK_ACCESS_COLOR_ATTACHMENT_READ_BIT |
    122 			VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT,
    123 			VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT
    124 		},
    125 	};
    126 
    127 	VkAttachmentDescription attachmentDescription = {0u, format,
    128 													 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR,
    129 													 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE,
    130 													 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED,
    131 													 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL
    132 													};
    133 
    134 	const VkRenderPassCreateInfo renderPassCreateInfo = {
    135 		VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1,
    136 		&attachmentDescription, 1, &subpassDescription, 2, subpassDependencies
    137 	};
    138 
    139 	return createRenderPass(context.getDeviceInterface(), context.getDevice(),
    140 							&renderPassCreateInfo);
    141 }
    142 
    143 Move<VkFramebuffer> makeFramebuffer(Context& context,
    144 									const VkRenderPass renderPass, const VkImageView imageView, deUint32 width,
    145 									deUint32 height)
    146 {
    147 	const VkFramebufferCreateInfo framebufferCreateInfo = {
    148 		VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1,
    149 		&imageView, width, height, 1
    150 	};
    151 
    152 	return createFramebuffer(context.getDeviceInterface(), context.getDevice(),
    153 							 &framebufferCreateInfo);
    154 }
    155 
    156 Move<VkPipeline> makeGraphicsPipeline(Context&									context,
    157 									  const VkPipelineLayout					pipelineLayout,
    158 									  const VkShaderStageFlags					stages,
    159 									  const VkShaderModule						vertexShaderModule,
    160 									  const VkShaderModule						fragmentShaderModule,
    161 									  const VkShaderModule						geometryShaderModule,
    162 									  const VkShaderModule						tessellationControlModule,
    163 									  const VkShaderModule						tessellationEvaluationModule,
    164 									  const VkRenderPass						renderPass,
    165 									  const VkPrimitiveTopology					topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST,
    166 									  const VkVertexInputBindingDescription*	vertexInputBindingDescription = DE_NULL,
    167 									  const VkVertexInputAttributeDescription*	vertexInputAttributeDescriptions = DE_NULL,
    168 									  const vk::VkFormat						attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT
    169 									  )
    170 {
    171 	const VkBool32 disableRasterization = !(VK_SHADER_STAGE_FRAGMENT_BIT & stages);
    172 	std::vector<vk::VkPipelineShaderStageCreateInfo> pipelineShaderStageParams;
    173 	{
    174 		const vk::VkPipelineShaderStageCreateInfo	stageCreateInfo =
    175 		{
    176 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType					sType
    177 				DE_NULL,											// const void*						pNext
    178 				0u,													// VkPipelineShaderStageCreateFlags	flags
    179 				VK_SHADER_STAGE_VERTEX_BIT,							// VkShaderStageFlagBits			stage
    180 				vertexShaderModule,									// VkShaderModule					module
    181 				"main",												// const char*						pName
    182 				DE_NULL												// const VkSpecializationInfo*		pSpecializationInfo
    183 		};
    184 		pipelineShaderStageParams.push_back(stageCreateInfo);
    185 	}
    186 
    187 	if (VK_SHADER_STAGE_FRAGMENT_BIT & stages)
    188 	{
    189 		const vk::VkPipelineShaderStageCreateInfo	stageCreateInfo =
    190 		{
    191 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType
    192 			DE_NULL,												// const void*							pNext
    193 			0u,														// VkPipelineShaderStageCreateFlags		flags
    194 			VK_SHADER_STAGE_FRAGMENT_BIT,							// VkShaderStageFlagBits				stage
    195 			fragmentShaderModule,									// VkShaderModule						module
    196 			"main",													// const char*							pName
    197 			DE_NULL													// const VkSpecializationInfo*			pSpecializationInfo
    198 		};
    199 		pipelineShaderStageParams.push_back(stageCreateInfo);
    200 	}
    201 
    202 	if (VK_SHADER_STAGE_GEOMETRY_BIT & stages)
    203 	{
    204 		const vk::VkPipelineShaderStageCreateInfo	stageCreateInfo =
    205 		{
    206 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType
    207 			DE_NULL,												// const void*							pNext
    208 			0u,														// VkPipelineShaderStageCreateFlags		flags
    209 			VK_SHADER_STAGE_GEOMETRY_BIT,							// VkShaderStageFlagBits				stage
    210 			geometryShaderModule,									// VkShaderModule						module
    211 			"main",													// const char*							pName
    212 			DE_NULL,												// const VkSpecializationInfo*			pSpecializationInfo
    213 		};
    214 		pipelineShaderStageParams.push_back(stageCreateInfo);
    215 	}
    216 
    217 	if (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT & stages)
    218 	{
    219 		const vk::VkPipelineShaderStageCreateInfo	stageCreateInfo =
    220 		{
    221 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType
    222 			DE_NULL,												// const void*							pNext
    223 			0u,														// VkPipelineShaderStageCreateFlags		flags
    224 			VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT,				// VkShaderStageFlagBits				stage
    225 			tessellationControlModule,								// VkShaderModule						module
    226 			"main",													// const char*							pName
    227 			DE_NULL													// const VkSpecializationInfo*			pSpecializationInfo
    228 		};
    229 		pipelineShaderStageParams.push_back(stageCreateInfo);
    230 	}
    231 
    232 	if (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT & stages)
    233 	{
    234 		const vk::VkPipelineShaderStageCreateInfo	stageCreateInfo =
    235 		{
    236 			VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType						sType
    237 			DE_NULL,												// const void*							pNext
    238 			0u,														// VkPipelineShaderStageCreateFlags		flags
    239 			VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,			// VkShaderStageFlagBits				stage
    240 			tessellationEvaluationModule,							// VkShaderModule						module
    241 			"main",													// const char*							pName
    242 			DE_NULL													// const VkSpecializationInfo*			pSpecializationInfo
    243 		};
    244 		pipelineShaderStageParams.push_back(stageCreateInfo);
    245 	}
    246 
    247 	const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo =
    248 	{
    249 		VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO,	// VkStructureType								sType;
    250 		DE_NULL,													// const void*									pNext;
    251 		0u,															// VkPipelineVertexInputStateCreateFlags		flags;
    252 		vertexInputBindingDescription == DE_NULL ? 0u : 1u,			// deUint32										vertexBindingDescriptionCount;
    253 		vertexInputBindingDescription,								// const VkVertexInputBindingDescription*		pVertexBindingDescriptions;
    254 		vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u,		// deUint32										vertexAttributeDescriptionCount;
    255 		vertexInputAttributeDescriptions,							// const VkVertexInputAttributeDescription*		pVertexAttributeDescriptions;
    256 	};
    257 
    258 	const VkPipelineTessellationStateCreateInfo tessellationStateCreateInfo =
    259 	{
    260 		VK_STRUCTURE_TYPE_PIPELINE_TESSELLATION_STATE_CREATE_INFO,
    261 		DE_NULL,
    262 		0,
    263 		1
    264 	};
    265 
    266 	const VkPipelineInputAssemblyStateCreateInfo inputAssemblyStateCreateInfo =
    267 	{
    268 		VK_STRUCTURE_TYPE_PIPELINE_INPUT_ASSEMBLY_STATE_CREATE_INFO, DE_NULL,
    269 		0u, topology, VK_FALSE
    270 	};
    271 
    272 	const VkPipelineViewportStateCreateInfo viewportStateCreateInfo =
    273 	{
    274 		VK_STRUCTURE_TYPE_PIPELINE_VIEWPORT_STATE_CREATE_INFO, DE_NULL, 0u, 1u,
    275 		DE_NULL, 1u, DE_NULL,
    276 	};
    277 
    278 	const VkPipelineRasterizationStateCreateInfo rasterizationStateCreateInfo =
    279 	{
    280 		VK_STRUCTURE_TYPE_PIPELINE_RASTERIZATION_STATE_CREATE_INFO, DE_NULL,
    281 		0u, VK_FALSE, disableRasterization, VK_POLYGON_MODE_FILL, VK_CULL_MODE_NONE,
    282 		VK_FRONT_FACE_COUNTER_CLOCKWISE, VK_FALSE, 0.0f, 0.0f, 0.0f, 1.0f
    283 	};
    284 
    285 	const VkPipelineMultisampleStateCreateInfo multisampleStateCreateInfo =
    286 	{
    287 		VK_STRUCTURE_TYPE_PIPELINE_MULTISAMPLE_STATE_CREATE_INFO, DE_NULL, 0u,
    288 		VK_SAMPLE_COUNT_1_BIT, VK_FALSE, 0.0f, DE_NULL, VK_FALSE, VK_FALSE
    289 	};
    290 
    291 	const VkStencilOpState stencilOpState =
    292 	{
    293 		VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_STENCIL_OP_KEEP, VK_COMPARE_OP_NEVER,
    294 		0, 0, 0
    295 	};
    296 
    297 	const VkPipelineDepthStencilStateCreateInfo depthStencilStateCreateInfo =
    298 	{
    299 		VK_STRUCTURE_TYPE_PIPELINE_DEPTH_STENCIL_STATE_CREATE_INFO, DE_NULL, 0u,
    300 		VK_FALSE, VK_FALSE, VK_COMPARE_OP_NEVER, VK_FALSE, VK_FALSE, stencilOpState,
    301 		stencilOpState, 0.0f, 0.0f
    302 	};
    303 
    304 	const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order);
    305 	const VkColorComponentFlags colorComponent =
    306 												numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT :
    307 												numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT :
    308 												numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT :
    309 												VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT;
    310 
    311 	const VkPipelineColorBlendAttachmentState colorBlendAttachmentState =
    312 	{
    313 		VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
    314 		VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD,
    315 		colorComponent
    316 	};
    317 
    318 	const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo =
    319 	{
    320 		VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u,
    321 		VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState,
    322 		{ 0.0f, 0.0f, 0.0f, 0.0f }
    323 	};
    324 
    325 	const VkDynamicState dynamicState[2] =
    326 	{
    327 		VK_DYNAMIC_STATE_VIEWPORT, VK_DYNAMIC_STATE_SCISSOR
    328 	};
    329 
    330 	const VkPipelineDynamicStateCreateInfo dynamicStateCreateInfo =
    331 	{
    332 		VK_STRUCTURE_TYPE_PIPELINE_DYNAMIC_STATE_CREATE_INFO, DE_NULL, 0u, 2,
    333 		dynamicState,
    334 	};
    335 
    336 	const bool usingTessellation = (VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT & stages)
    337 								   || (VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT & stages);
    338 
    339 	const VkGraphicsPipelineCreateInfo pipelineCreateInfo =
    340 	{
    341 		VK_STRUCTURE_TYPE_GRAPHICS_PIPELINE_CREATE_INFO, DE_NULL, 0u,
    342 		static_cast<deUint32>(pipelineShaderStageParams.size()),
    343 		&pipelineShaderStageParams[0], &vertexInputStateCreateInfo,
    344 		&inputAssemblyStateCreateInfo, usingTessellation ? &tessellationStateCreateInfo : DE_NULL, &viewportStateCreateInfo,
    345 		&rasterizationStateCreateInfo, &multisampleStateCreateInfo,
    346 		&depthStencilStateCreateInfo, &colorBlendStateCreateInfo,
    347 		&dynamicStateCreateInfo, pipelineLayout, renderPass, 0, DE_NULL, 0
    348 	};
    349 
    350 	return createGraphicsPipeline(context.getDeviceInterface(),
    351 								  context.getDevice(), DE_NULL, &pipelineCreateInfo);
    352 }
    353 
    354 Move<VkPipeline> makeComputePipeline(Context& context,
    355 									 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule,
    356 									 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ)
    357 {
    358 	const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ};
    359 
    360 	const vk::VkSpecializationMapEntry entries[3] =
    361 	{
    362 		{0, sizeof(deUint32) * 0, sizeof(deUint32)},
    363 		{1, sizeof(deUint32) * 1, sizeof(deUint32)},
    364 		{2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)},
    365 	};
    366 
    367 	const vk::VkSpecializationInfo info =
    368 	{
    369 		/* mapEntryCount = */ 3,
    370 		/* pMapEntries   = */ entries,
    371 		/* dataSize      = */ sizeof(localSize),
    372 		/* pData         = */ localSize
    373 	};
    374 
    375 	const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams =
    376 	{
    377 		VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO,	// VkStructureType					sType;
    378 		DE_NULL,												// const void*						pNext;
    379 		0u,														// VkPipelineShaderStageCreateFlags	flags;
    380 		VK_SHADER_STAGE_COMPUTE_BIT,							// VkShaderStageFlagBits			stage;
    381 		shaderModule,											// VkShaderModule					module;
    382 		"main",													// const char*						pName;
    383 		&info,													// const VkSpecializationInfo*		pSpecializationInfo;
    384 	};
    385 
    386 	const vk::VkComputePipelineCreateInfo pipelineCreateInfo =
    387 	{
    388 		VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO,	// VkStructureType	sType;
    389 		DE_NULL,										// const void*						pNext;
    390 		0u,												// VkPipelineCreateFlags			flags;
    391 		pipelineShaderStageParams,						// VkPipelineShaderStageCreateInfo	stage;
    392 		pipelineLayout,									// VkPipelineLayout					layout;
    393 		DE_NULL,										// VkPipeline						basePipelineHandle;
    394 		0,												// deInt32							basePipelineIndex;
    395 	};
    396 
    397 	return createComputePipeline(context.getDeviceInterface(),
    398 								 context.getDevice(), DE_NULL, &pipelineCreateInfo);
    399 }
    400 
    401 Move<VkDescriptorSet> makeDescriptorSet(Context& context,
    402 										const VkDescriptorPool descriptorPool,
    403 										const VkDescriptorSetLayout setLayout)
    404 {
    405 	const VkDescriptorSetAllocateInfo allocateParams =
    406 	{
    407 		VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType
    408 		// sType;
    409 		DE_NULL,		// const void*          pNext;
    410 		descriptorPool, // VkDescriptorPool       descriptorPool;
    411 		1u,				// deUint32           setLayoutCount;
    412 		&setLayout,		// const VkDescriptorSetLayout* pSetLayouts;
    413 	};
    414 	return allocateDescriptorSet(
    415 			   context.getDeviceInterface(), context.getDevice(), &allocateParams);
    416 }
    417 
    418 Move<VkCommandPool> makeCommandPool(Context& context)
    419 {
    420 	const VkCommandPoolCreateInfo commandPoolParams =
    421 	{
    422 		VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType;
    423 		DE_NULL,									// const void*        pNext;
    424 		VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags
    425 		// flags;
    426 		context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex;
    427 	};
    428 
    429 	return createCommandPool(
    430 			   context.getDeviceInterface(), context.getDevice(), &commandPoolParams);
    431 }
    432 
    433 Move<VkCommandBuffer> makeCommandBuffer(
    434 	Context& context, const VkCommandPool commandPool)
    435 {
    436 	const VkCommandBufferAllocateInfo bufferAllocateParams =
    437 	{
    438 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO,	// VkStructureType		sType;
    439 		DE_NULL,										// const void*			pNext;
    440 		commandPool,									// VkCommandPool		commandPool;
    441 		VK_COMMAND_BUFFER_LEVEL_PRIMARY,				// VkCommandBufferLevel	level;
    442 		1u,												// deUint32				bufferCount;
    443 	};
    444 	return allocateCommandBuffer(context.getDeviceInterface(),
    445 								 context.getDevice(), &bufferAllocateParams);
    446 }
    447 
    448 void beginCommandBuffer(Context& context, const VkCommandBuffer commandBuffer)
    449 {
    450 	const VkCommandBufferBeginInfo commandBufBeginParams =
    451 	{
    452 		VK_STRUCTURE_TYPE_COMMAND_BUFFER_BEGIN_INFO,		// VkStructureType				sType;
    453 		DE_NULL,											// const void*					pNext;
    454 		0u,													// VkCommandBufferUsageFlags	flags;
    455 		(const VkCommandBufferInheritanceInfo*)DE_NULL,
    456 	};
    457 	VK_CHECK(context.getDeviceInterface().beginCommandBuffer(
    458 				 commandBuffer, &commandBufBeginParams));
    459 }
    460 
    461 void endCommandBuffer(Context& context, const VkCommandBuffer commandBuffer)
    462 {
    463 	VK_CHECK(context.getDeviceInterface().endCommandBuffer(commandBuffer));
    464 }
    465 
    466 Move<VkFence> submitCommandBuffer(
    467 	Context& context, const VkCommandBuffer commandBuffer)
    468 {
    469 	const VkFenceCreateInfo fenceParams =
    470 	{
    471 		VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType    sType;
    472 		DE_NULL,							 // const void*      pNext;
    473 		0u,									 // VkFenceCreateFlags flags;
    474 	};
    475 
    476 	Move<VkFence> fence(createFence(
    477 							context.getDeviceInterface(), context.getDevice(), &fenceParams));
    478 
    479 	const VkSubmitInfo submitInfo =
    480 	{
    481 		VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType      sType;
    482 		DE_NULL,					   // const void*        pNext;
    483 		0u,							   // deUint32         waitSemaphoreCount;
    484 		DE_NULL,					   // const VkSemaphore*   pWaitSemaphores;
    485 		(const VkPipelineStageFlags*)DE_NULL,
    486 		1u,				// deUint32         commandBufferCount;
    487 		&commandBuffer, // const VkCommandBuffer* pCommandBuffers;
    488 		0u,				// deUint32         signalSemaphoreCount;
    489 		DE_NULL,		// const VkSemaphore*   pSignalSemaphores;
    490 	};
    491 
    492 	vk::VkResult result = (context.getDeviceInterface().queueSubmit(
    493 							   context.getUniversalQueue(), 1u, &submitInfo, *fence));
    494 	VK_CHECK(result);
    495 
    496 	return Move<VkFence>(fence);
    497 }
    498 
    499 void waitFence(Context& context, Move<VkFence> fence)
    500 {
    501 	VK_CHECK(context.getDeviceInterface().waitForFences(
    502 				 context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull));
    503 }
    504 
    505 struct Buffer;
    506 struct Image;
    507 
    508 struct BufferOrImage
    509 {
    510 	bool isImage() const
    511 	{
    512 		return m_isImage;
    513 	}
    514 
    515 	Buffer* getAsBuffer()
    516 	{
    517 		if (m_isImage) DE_FATAL("Trying to get a buffer as an image!");
    518 		return reinterpret_cast<Buffer* >(this);
    519 	}
    520 
    521 	Image* getAsImage()
    522 	{
    523 		if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!");
    524 		return reinterpret_cast<Image*>(this);
    525 	}
    526 
    527 	virtual VkDescriptorType getType() const
    528 	{
    529 		if (m_isImage)
    530 		{
    531 			return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE;
    532 		}
    533 		else
    534 		{
    535 			return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
    536 		}
    537 	}
    538 
    539 	Allocation& getAllocation() const
    540 	{
    541 		return *m_allocation;
    542 	}
    543 
    544 	virtual ~BufferOrImage() {}
    545 
    546 protected:
    547 	explicit BufferOrImage(bool image) : m_isImage(image) {}
    548 
    549 	bool m_isImage;
    550 	de::details::MovePtr<Allocation> m_allocation;
    551 };
    552 
    553 struct Buffer : public BufferOrImage
    554 {
    555 	explicit Buffer(
    556 		Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT)
    557 		: BufferOrImage		(false)
    558 		, m_sizeInBytes		(sizeInBytes)
    559 		, m_usage			(usage)
    560 	{
    561 		const vk::VkBufferCreateInfo bufferCreateInfo =
    562 		{
    563 			VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,
    564 			DE_NULL,
    565 			0u,
    566 			sizeInBytes,
    567 			m_usage,
    568 			VK_SHARING_MODE_EXCLUSIVE,
    569 			0u,
    570 			DE_NULL,
    571 		};
    572 		m_buffer = createBuffer(context.getDeviceInterface(),
    573 								context.getDevice(), &bufferCreateInfo);
    574 		vk::VkMemoryRequirements req = getBufferMemoryRequirements(
    575 										   context.getDeviceInterface(), context.getDevice(), *m_buffer);
    576 		req.size *= 2;
    577 		m_allocation = context.getDefaultAllocator().allocate(
    578 						   req, MemoryRequirement::HostVisible);
    579 		VK_CHECK(context.getDeviceInterface().bindBufferMemory(
    580 					 context.getDevice(), *m_buffer, m_allocation->getMemory(),
    581 					 m_allocation->getOffset()));
    582 	}
    583 
    584 	virtual VkDescriptorType getType() const
    585 	{
    586 		if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage)
    587 		{
    588 			return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER;
    589 		}
    590 		return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER;
    591 	}
    592 
    593 	VkBuffer getBuffer() const {
    594 		return *m_buffer;
    595 	}
    596 
    597 	const VkBuffer* getBufferPtr() const {
    598 		return &(*m_buffer);
    599 	}
    600 
    601 	VkDeviceSize getSize() const {
    602 		return m_sizeInBytes;
    603 	}
    604 
    605 private:
    606 	Move<VkBuffer>				m_buffer;
    607 	VkDeviceSize				m_sizeInBytes;
    608 	const VkBufferUsageFlags	m_usage;
    609 };
    610 
    611 struct Image : public BufferOrImage
    612 {
    613 	explicit Image(Context& context, deUint32 width, deUint32 height,
    614 				   VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT)
    615 		: BufferOrImage(true)
    616 	{
    617 		const VkImageCreateInfo imageCreateInfo =
    618 		{
    619 			VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D,
    620 			format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT,
    621 			VK_IMAGE_TILING_OPTIMAL, usage,
    622 			VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL,
    623 			VK_IMAGE_LAYOUT_UNDEFINED
    624 		};
    625 		m_image = createImage(context.getDeviceInterface(), context.getDevice(),
    626 							  &imageCreateInfo);
    627 		vk::VkMemoryRequirements req = getImageMemoryRequirements(
    628 										   context.getDeviceInterface(), context.getDevice(), *m_image);
    629 		req.size *= 2;
    630 		m_allocation =
    631 			context.getDefaultAllocator().allocate(req, MemoryRequirement::Any);
    632 		VK_CHECK(context.getDeviceInterface().bindImageMemory(
    633 					 context.getDevice(), *m_image, m_allocation->getMemory(),
    634 					 m_allocation->getOffset()));
    635 
    636 		const VkComponentMapping componentMapping =
    637 		{
    638 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY,
    639 			VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY
    640 		};
    641 
    642 		const VkImageViewCreateInfo imageViewCreateInfo =
    643 		{
    644 			VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image,
    645 			VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping,
    646 			{
    647 				VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1,
    648 			}
    649 		};
    650 
    651 		m_imageView = createImageView(context.getDeviceInterface(),
    652 									  context.getDevice(), &imageViewCreateInfo);
    653 
    654 		const struct VkSamplerCreateInfo samplerCreateInfo =
    655 		{
    656 			VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO,
    657 			DE_NULL,
    658 			0u,
    659 			VK_FILTER_NEAREST,
    660 			VK_FILTER_NEAREST,
    661 			VK_SAMPLER_MIPMAP_MODE_NEAREST,
    662 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    663 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    664 			VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE,
    665 			0.0f,
    666 			VK_FALSE,
    667 			1.0f,
    668 			DE_FALSE,
    669 			VK_COMPARE_OP_ALWAYS,
    670 			0.0f,
    671 			0.0f,
    672 			VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK,
    673 			VK_FALSE,
    674 		};
    675 
    676 		m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo);
    677 	}
    678 
    679 	VkImage getImage() const {
    680 		return *m_image;
    681 	}
    682 
    683 	VkImageView getImageView() const {
    684 		return *m_imageView;
    685 	}
    686 
    687 	VkSampler getSampler() const {
    688 		return *m_sampler;
    689 	}
    690 
    691 private:
    692 	Move<VkImage> m_image;
    693 	Move<VkImageView> m_imageView;
    694 	Move<VkSampler> m_sampler;
    695 };
    696 }
    697 
    698 std::string vkt::subgroups::getSharedMemoryBallotHelper()
    699 {
    700 	return	"shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n"
    701 			"uvec4 sharedMemoryBallot(bool vote)\n"
    702 			"{\n"
    703 			"  uint groupOffset = gl_SubgroupID;\n"
    704 			"  // One invocation in the group 0's the whole group's data\n"
    705 			"  if (subgroupElect())\n"
    706 			"  {\n"
    707 			"    superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n"
    708 			"  }\n"
    709 			"  subgroupMemoryBarrierShared();\n"
    710 			"  if (vote)\n"
    711 			"  {\n"
    712 			"    const highp uint invocationId = gl_SubgroupInvocationID % 32;\n"
    713 			"    const highp uint bitToSet = 1u << invocationId;\n"
    714 			"    switch (gl_SubgroupInvocationID / 32)\n"
    715 			"    {\n"
    716 			"    case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n"
    717 			"    case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n"
    718 			"    case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n"
    719 			"    case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n"
    720 			"    }\n"
    721 			"  }\n"
    722 			"  subgroupMemoryBarrierShared();\n"
    723 			"  return superSecretComputeShaderHelper[groupOffset];\n"
    724 			"}\n";
    725 }
    726 
    727 deUint32 vkt::subgroups::getSubgroupSize(Context& context)
    728 {
    729 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
    730 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
    731 	subgroupProperties.pNext = DE_NULL;
    732 
    733 	VkPhysicalDeviceProperties2 properties;
    734 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
    735 	properties.pNext = &subgroupProperties;
    736 
    737 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
    738 
    739 	return subgroupProperties.subgroupSize;
    740 }
    741 
    742 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() {
    743 	return 128u;
    744 }
    745 
    746 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage)
    747 {
    748 	switch (stage)
    749 	{
    750 		default:
    751 			DE_FATAL("Unhandled stage!");
    752 		case VK_SHADER_STAGE_COMPUTE_BIT:
    753 			return "compute";
    754 		case VK_SHADER_STAGE_FRAGMENT_BIT:
    755 			return "fragment";
    756 		case VK_SHADER_STAGE_VERTEX_BIT:
    757 			return "vertex";
    758 		case VK_SHADER_STAGE_GEOMETRY_BIT:
    759 			return "geometry";
    760 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
    761 			return "tess_control";
    762 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
    763 			return "tess_eval";
    764 	}
    765 }
    766 
    767 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit)
    768 {
    769 	switch (bit)
    770 	{
    771 		default:
    772 			DE_FATAL("Unknown subgroup feature category!");
    773 		case VK_SUBGROUP_FEATURE_BASIC_BIT:
    774 			return "VK_SUBGROUP_FEATURE_BASIC_BIT";
    775 		case VK_SUBGROUP_FEATURE_VOTE_BIT:
    776 			return "VK_SUBGROUP_FEATURE_VOTE_BIT";
    777 		case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT:
    778 			return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT";
    779 		case VK_SUBGROUP_FEATURE_BALLOT_BIT:
    780 			return "VK_SUBGROUP_FEATURE_BALLOT_BIT";
    781 		case VK_SUBGROUP_FEATURE_SHUFFLE_BIT:
    782 			return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT";
    783 		case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT:
    784 			return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT";
    785 		case VK_SUBGROUP_FEATURE_CLUSTERED_BIT:
    786 			return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT";
    787 		case VK_SUBGROUP_FEATURE_QUAD_BIT:
    788 			return "VK_SUBGROUP_FEATURE_QUAD_BIT";
    789 	}
    790 }
    791 
    792 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage)
    793 {
    794 	switch (stage)
    795 	{
    796 		default:
    797 			DE_FATAL("Unhandled stage!");
    798 		case VK_SHADER_STAGE_FRAGMENT_BIT:
    799 			return
    800 				"#version 450\n"
    801 				"void main (void)\n"
    802 				"{\n"
    803 				"  vec2 uv = vec2((gl_VertexIndex << 1) & 2, gl_VertexIndex & 2);\n"
    804 				"  gl_Position = vec4(uv * 2.0f + -1.0f, 0.0f, 1.0f);\n"
    805 				"}\n";
    806 		case VK_SHADER_STAGE_GEOMETRY_BIT:
    807 			return
    808 				"#version 450\n"
    809 				"void main (void)\n"
    810 				"{\n"
    811 				"}\n";
    812 		case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT:
    813 		case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT:
    814 			return
    815 				"#version 450\n"
    816 				"void main (void)\n"
    817 				"{\n"
    818 				"}\n";
    819 	}
    820 }
    821 
    822 bool vkt::subgroups::isSubgroupSupported(Context& context)
    823 {
    824 	return context.contextSupports(vk::ApiVersion(1, 1, 0));
    825 }
    826 
    827 bool vkt::subgroups::areSubgroupOperationsSupportedForStage(
    828 	Context& context, const VkShaderStageFlags stage)
    829 {
    830 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
    831 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
    832 	subgroupProperties.pNext = DE_NULL;
    833 
    834 	VkPhysicalDeviceProperties2 properties;
    835 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
    836 	properties.pNext = &subgroupProperties;
    837 
    838 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
    839 
    840 	return (stage & subgroupProperties.supportedStages) ? true : false;
    841 }
    842 
    843 bool vkt::subgroups::areSubgroupOperationsRequiredForStage(
    844 	VkShaderStageFlags stage)
    845 {
    846 	switch (stage)
    847 	{
    848 		default:
    849 			return false;
    850 		case VK_SHADER_STAGE_COMPUTE_BIT:
    851 			return true;
    852 	}
    853 }
    854 
    855 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice(
    856 	Context& context,
    857 	VkSubgroupFeatureFlagBits bit) {
    858 	VkPhysicalDeviceSubgroupProperties subgroupProperties;
    859 	subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES;
    860 	subgroupProperties.pNext = DE_NULL;
    861 
    862 	VkPhysicalDeviceProperties2 properties;
    863 	properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2;
    864 	properties.pNext = &subgroupProperties;
    865 
    866 	context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties);
    867 
    868 	return (bit & subgroupProperties.supportedOperations) ? true : false;
    869 }
    870 
    871 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context)
    872 {
    873 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
    874 				context.getInstanceInterface(), context.getPhysicalDevice());
    875 	return features.fragmentStoresAndAtomics ? true : false;
    876 }
    877 
    878 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context)
    879 {
    880 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
    881 				context.getInstanceInterface(), context.getPhysicalDevice());
    882 	return features.vertexPipelineStoresAndAtomics ? true : false;
    883 }
    884 
    885 bool vkt::subgroups::isDoubleSupportedForDevice(Context& context)
    886 {
    887 	const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures(
    888 				context.getInstanceInterface(), context.getPhysicalDevice());
    889 	return features.shaderFloat64 ? true : false;
    890 }
    891 
    892 bool vkt::subgroups::isDoubleFormat(VkFormat format)
    893 {
    894 	switch (format)
    895 	{
    896 		default:
    897 			return false;
    898 		case VK_FORMAT_R64_SFLOAT:
    899 		case VK_FORMAT_R64G64_SFLOAT:
    900 		case VK_FORMAT_R64G64B64_SFLOAT:
    901 		case VK_FORMAT_R64G64B64A64_SFLOAT:
    902 			return true;
    903 	}
    904 }
    905 
    906 std::string vkt::subgroups::getFormatNameForGLSL(VkFormat format)
    907 {
    908 	switch (format)
    909 	{
    910 		default:
    911 			DE_FATAL("Unhandled format!");
    912 		case VK_FORMAT_R32_SINT:
    913 			return "int";
    914 		case VK_FORMAT_R32G32_SINT:
    915 			return "ivec2";
    916 		case VK_FORMAT_R32G32B32_SINT:
    917 			return "ivec3";
    918 		case VK_FORMAT_R32G32B32A32_SINT:
    919 			return "ivec4";
    920 		case VK_FORMAT_R32_UINT:
    921 			return "uint";
    922 		case VK_FORMAT_R32G32_UINT:
    923 			return "uvec2";
    924 		case VK_FORMAT_R32G32B32_UINT:
    925 			return "uvec3";
    926 		case VK_FORMAT_R32G32B32A32_UINT:
    927 			return "uvec4";
    928 		case VK_FORMAT_R32_SFLOAT:
    929 			return "float";
    930 		case VK_FORMAT_R32G32_SFLOAT:
    931 			return "vec2";
    932 		case VK_FORMAT_R32G32B32_SFLOAT:
    933 			return "vec3";
    934 		case VK_FORMAT_R32G32B32A32_SFLOAT:
    935 			return "vec4";
    936 		case VK_FORMAT_R64_SFLOAT:
    937 			return "double";
    938 		case VK_FORMAT_R64G64_SFLOAT:
    939 			return "dvec2";
    940 		case VK_FORMAT_R64G64B64_SFLOAT:
    941 			return "dvec3";
    942 		case VK_FORMAT_R64G64B64A64_SFLOAT:
    943 			return "dvec4";
    944 		case VK_FORMAT_R8_USCALED:
    945 			return "bool";
    946 		case VK_FORMAT_R8G8_USCALED:
    947 			return "bvec2";
    948 		case VK_FORMAT_R8G8B8_USCALED:
    949 			return "bvec3";
    950 		case VK_FORMAT_R8G8B8A8_USCALED:
    951 			return "bvec4";
    952 	}
    953 }
    954 
    955 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data)
    956 {
    957 	const vk::VkFormat format = data.format;
    958 	const vk::VkDeviceSize size = getFormatSizeInBytes(format) * data.numElements;
    959 	if (subgroups::SSBOData::InitializeNonZero == data.initializeType)
    960 	{
    961 		de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed());
    962 
    963 		switch (format)
    964 		{
    965 			default:
    966 				DE_FATAL("Illegal buffer format");
    967 			case VK_FORMAT_R8_USCALED:
    968 			case VK_FORMAT_R8G8_USCALED:
    969 			case VK_FORMAT_R8G8B8_USCALED:
    970 			case VK_FORMAT_R8G8B8A8_USCALED:
    971 			case VK_FORMAT_R32_SINT:
    972 			case VK_FORMAT_R32G32_SINT:
    973 			case VK_FORMAT_R32G32B32_SINT:
    974 			case VK_FORMAT_R32G32B32A32_SINT:
    975 			case VK_FORMAT_R32_UINT:
    976 			case VK_FORMAT_R32G32_UINT:
    977 			case VK_FORMAT_R32G32B32_UINT:
    978 			case VK_FORMAT_R32G32B32A32_UINT:
    979 			{
    980 				deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
    981 
    982 				for (vk::VkDeviceSize k = 0; k < (size / 4); k++)
    983 				{
    984 					ptr[k] = rnd.getUint32();
    985 				}
    986 			}
    987 			break;
    988 			case VK_FORMAT_R32_SFLOAT:
    989 			case VK_FORMAT_R32G32_SFLOAT:
    990 			case VK_FORMAT_R32G32B32_SFLOAT:
    991 			case VK_FORMAT_R32G32B32A32_SFLOAT:
    992 			{
    993 				float* ptr = reinterpret_cast<float*>(alloc.getHostPtr());
    994 
    995 				for (vk::VkDeviceSize k = 0; k < (size / 4); k++)
    996 				{
    997 					ptr[k] = rnd.getFloat();
    998 				}
    999 			}
   1000 			break;
   1001 			case VK_FORMAT_R64_SFLOAT:
   1002 			case VK_FORMAT_R64G64_SFLOAT:
   1003 			case VK_FORMAT_R64G64B64_SFLOAT:
   1004 			case VK_FORMAT_R64G64B64A64_SFLOAT:
   1005 			{
   1006 				double* ptr = reinterpret_cast<double*>(alloc.getHostPtr());
   1007 
   1008 				for (vk::VkDeviceSize k = 0; k < (size / 4); k++)
   1009 				{
   1010 					ptr[k] = rnd.getDouble();
   1011 				}
   1012 			}
   1013 			break;
   1014 		}
   1015 	}
   1016 	else if (subgroups::SSBOData::InitializeZero == data.initializeType)
   1017 	{
   1018 		deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr());
   1019 
   1020 		for (vk::VkDeviceSize k = 0; k < size / 4; k++)
   1021 		{
   1022 			ptr[k] = 0;
   1023 		}
   1024 	}
   1025 
   1026 	if (subgroups::SSBOData::InitializeNone != data.initializeType)
   1027 	{
   1028 		flushMappedMemoryRange(context.getDeviceInterface(),
   1029 							   context.getDevice(), alloc.getMemory(), alloc.getOffset(), size);
   1030 	}
   1031 }
   1032 
   1033 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationTest(
   1034 	Context& context, VkFormat format, SSBOData* extraDatas,
   1035 	deUint32 extraDatasCount,
   1036 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
   1037 {
   1038 	const deUint32 maxWidth = 1024;
   1039 
   1040 	const Unique<VkShaderModule> vertexShaderModule(
   1041 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1042 						   context.getBinaryCollection().get("vert"), 0u));
   1043 	const Unique<VkShaderModule> tessellationControlShaderModule(
   1044 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1045 						   context.getBinaryCollection().get("tesc"), 0u));
   1046 	const Unique<VkShaderModule> tessellationEvaluationShaderModule(
   1047 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1048 						   context.getBinaryCollection().get("tese"), 0u));
   1049 
   1050 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount + 1);
   1051 
   1052 	// The implicit result SSBO we use to store our outputs from the shader
   1053 	{
   1054 		vk::VkDeviceSize size = getFormatSizeInBytes(format) * maxWidth * 2;
   1055 		inputBuffers[0] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1056 	}
   1057 
   1058 	for (deUint32 i = 0; i < (inputBuffers.size() - 1); i++)
   1059 	{
   1060 		if (extraDatas[i].isImage)
   1061 		{
   1062 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Image(context,
   1063 											static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
   1064 		}
   1065 		else
   1066 		{
   1067 			vk::VkDeviceSize size =
   1068 				getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
   1069 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1070 		}
   1071 
   1072 		const Allocation& alloc = inputBuffers[i + 1]->getAllocation();
   1073 		initializeMemory(context, alloc, extraDatas[i]);
   1074 	}
   1075 
   1076 	DescriptorSetLayoutBuilder layoutBuilder;
   1077 
   1078 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1079 	{
   1080 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
   1081 								 VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, DE_NULL);
   1082 	}
   1083 
   1084 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   1085 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   1086 
   1087 	const Unique<VkPipelineLayout> pipelineLayout(
   1088 		makePipelineLayout(context, *descriptorSetLayout));
   1089 
   1090 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, VK_FORMAT_R32_SFLOAT));
   1091 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   1092 									  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
   1093 									  *vertexShaderModule, DE_NULL, DE_NULL, *tessellationControlShaderModule, *tessellationEvaluationShaderModule,
   1094 									  *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST));
   1095 
   1096 	DescriptorPoolBuilder poolBuilder;
   1097 
   1098 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1099 	{
   1100 		poolBuilder.addType(inputBuffers[i]->getType());
   1101 	}
   1102 
   1103 	const Unique<VkDescriptorPool> descriptorPool(
   1104 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   1105 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   1106 
   1107 	// Create descriptor set
   1108 	const Unique<VkDescriptorSet> descriptorSet(
   1109 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   1110 
   1111 	DescriptorSetUpdateBuilder updateBuilder;
   1112 
   1113 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1114 	{
   1115 		if (inputBuffers[i]->isImage())
   1116 		{
   1117 			VkDescriptorImageInfo info =
   1118 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   1119 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   1120 
   1121 			updateBuilder.writeSingle(*descriptorSet,
   1122 									  DescriptorSetUpdateBuilder::Location::binding(i),
   1123 									  inputBuffers[i]->getType(), &info);
   1124 		}
   1125 		else
   1126 		{
   1127 			VkDescriptorBufferInfo info =
   1128 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
   1129 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
   1130 
   1131 			updateBuilder.writeSingle(*descriptorSet,
   1132 									  DescriptorSetUpdateBuilder::Location::binding(i),
   1133 									  inputBuffers[i]->getType(), &info);
   1134 		}
   1135 	}
   1136 
   1137 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   1138 
   1139 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   1140 
   1141 	const deUint32 subgroupSize = getSubgroupSize(context);
   1142 
   1143 	const Unique<VkCommandBuffer> cmdBuffer(
   1144 		makeCommandBuffer(context, *cmdPool));
   1145 
   1146 	unsigned totalIterations = 0;
   1147 	unsigned failedIterations = 0;
   1148 
   1149 	Image discardableImage(context, 1, 1, VK_FORMAT_R32_SFLOAT,
   1150 						   VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
   1151 						   VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   1152 
   1153 	for (deUint32 width = 1; width < maxWidth; width++)
   1154 	{
   1155 		for (deUint32 i = 1; i < inputBuffers.size(); i++)
   1156 		{
   1157 			// re-init the data
   1158 			const Allocation& alloc = inputBuffers[i]->getAllocation();
   1159 			initializeMemory(context, alloc, extraDatas[i - 1]);
   1160 		}
   1161 
   1162 		totalIterations++;
   1163 
   1164 		const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
   1165 												*renderPass, discardableImage.getImageView(), 1, 1));
   1166 
   1167 		const VkClearValue clearValue = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
   1168 
   1169 		const VkRenderPassBeginInfo renderPassBeginInfo = {
   1170 			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, DE_NULL, *renderPass,
   1171 			*framebuffer, {{0, 0}, {1, 1}}, 1, &clearValue,
   1172 		};
   1173 
   1174 		beginCommandBuffer(context, *cmdBuffer);
   1175 
   1176 		VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
   1177 
   1178 		context.getDeviceInterface().cmdSetViewport(
   1179 			*cmdBuffer, 0, 1, &viewport);
   1180 
   1181 		VkRect2D scissor = {{0, 0}, {1, 1}};
   1182 
   1183 		context.getDeviceInterface().cmdSetScissor(
   1184 			*cmdBuffer, 0, 1, &scissor);
   1185 
   1186 		context.getDeviceInterface().cmdBeginRenderPass(
   1187 			*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
   1188 
   1189 		context.getDeviceInterface().cmdBindPipeline(
   1190 			*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   1191 
   1192 		context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   1193 				VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   1194 				&descriptorSet.get(), 0u, DE_NULL);
   1195 
   1196 		context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
   1197 
   1198 		context.getDeviceInterface().cmdEndRenderPass(*cmdBuffer);
   1199 
   1200 		endCommandBuffer(context, *cmdBuffer);
   1201 
   1202 		Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   1203 
   1204 		waitFence(context, fence);
   1205 
   1206 		std::vector<const void*> datas;
   1207 
   1208 		for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1209 		{
   1210 			if (!inputBuffers[i]->isImage())
   1211 			{
   1212 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
   1213 				invalidateMappedMemoryRange(context.getDeviceInterface(),
   1214 											context.getDevice(), resultAlloc.getMemory(),
   1215 											resultAlloc.getOffset(), inputBuffers[i]->getAsBuffer()->getSize());
   1216 
   1217 				// we always have our result data first
   1218 				datas.push_back(resultAlloc.getHostPtr());
   1219 			}
   1220 		}
   1221 
   1222 		if (!checkResult(datas, width * 2, subgroupSize))
   1223 		{
   1224 			failedIterations++;
   1225 		}
   1226 
   1227 		context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   1228 	}
   1229 
   1230 	if (0 < failedIterations)
   1231 	{
   1232 		context.getTestContext().getLog()
   1233 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   1234 				<< totalIterations << " values passed" << TestLog::EndMessage;
   1235 		return tcu::TestStatus::fail("Failed!");
   1236 	}
   1237 
   1238 	return tcu::TestStatus::pass("OK");
   1239 }
   1240 
   1241 tcu::TestStatus vkt::subgroups::makeTessellationControlTest(
   1242 	Context& context, VkFormat format, SSBOData* extraDatas,
   1243 	deUint32 extraDatasCount,
   1244 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
   1245 {
   1246 	const deUint32 maxWidth = 1024;
   1247 
   1248 	const Unique<VkShaderModule> vertexShaderModule(
   1249 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1250 						   context.getBinaryCollection().get("vert"), 0u));
   1251 	const Unique<VkShaderModule> tessellationControlShaderModule(
   1252 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1253 						   context.getBinaryCollection().get("tesc"), 0u));
   1254 	const Unique<VkShaderModule> tessellationEvaluationShaderModule(
   1255 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1256 						   context.getBinaryCollection().get("tese"), 0u));
   1257 
   1258 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount + 1);
   1259 
   1260 	// The implicit result SSBO we use to store our outputs from the vertex shader
   1261 	{
   1262 		vk::VkDeviceSize size = getFormatSizeInBytes(format) * maxWidth;
   1263 		inputBuffers[0] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1264 	}
   1265 
   1266 	for (deUint32 i = 0; i < (inputBuffers.size() - 1); i++)
   1267 	{
   1268 		if (extraDatas[i].isImage)
   1269 		{
   1270 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Image(context,
   1271 											static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
   1272 		}
   1273 		else
   1274 		{
   1275 			vk::VkDeviceSize size =
   1276 				getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
   1277 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1278 		}
   1279 
   1280 		const Allocation& alloc = inputBuffers[i + 1]->getAllocation();
   1281 		initializeMemory(context, alloc, extraDatas[i]);
   1282 	}
   1283 
   1284 	DescriptorSetLayoutBuilder layoutBuilder;
   1285 
   1286 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1287 	{
   1288 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
   1289 								 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT, DE_NULL);
   1290 	}
   1291 
   1292 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   1293 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   1294 
   1295 	const Unique<VkPipelineLayout> pipelineLayout(
   1296 		makePipelineLayout(context, *descriptorSetLayout));
   1297 
   1298 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, VK_FORMAT_R32_SFLOAT));
   1299 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   1300 									  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT,
   1301 									  *vertexShaderModule, DE_NULL, DE_NULL, *tessellationControlShaderModule, *tessellationEvaluationShaderModule,
   1302 									  *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST));
   1303 
   1304 	DescriptorPoolBuilder poolBuilder;
   1305 
   1306 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1307 	{
   1308 		poolBuilder.addType(inputBuffers[i]->getType());
   1309 	}
   1310 
   1311 	const Unique<VkDescriptorPool> descriptorPool(
   1312 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   1313 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   1314 
   1315 	// Create descriptor set
   1316 	const Unique<VkDescriptorSet> descriptorSet(
   1317 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   1318 
   1319 	DescriptorSetUpdateBuilder updateBuilder;
   1320 
   1321 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1322 	{
   1323 		if (inputBuffers[i]->isImage())
   1324 		{
   1325 			VkDescriptorImageInfo info =
   1326 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   1327 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   1328 
   1329 			updateBuilder.writeSingle(*descriptorSet,
   1330 									  DescriptorSetUpdateBuilder::Location::binding(i),
   1331 									  inputBuffers[i]->getType(), &info);
   1332 		}
   1333 		else
   1334 		{
   1335 			VkDescriptorBufferInfo info =
   1336 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
   1337 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
   1338 
   1339 			updateBuilder.writeSingle(*descriptorSet,
   1340 									  DescriptorSetUpdateBuilder::Location::binding(i),
   1341 									  inputBuffers[i]->getType(), &info);
   1342 		}
   1343 	}
   1344 
   1345 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   1346 
   1347 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   1348 
   1349 	const deUint32 subgroupSize = getSubgroupSize(context);
   1350 
   1351 	const Unique<VkCommandBuffer> cmdBuffer(
   1352 		makeCommandBuffer(context, *cmdPool));
   1353 
   1354 	unsigned totalIterations = 0;
   1355 	unsigned failedIterations = 0;
   1356 
   1357 	Image discardableImage(context, 1, 1, VK_FORMAT_R32_SFLOAT,
   1358 						   VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
   1359 						   VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   1360 
   1361 	for (deUint32 width = 1; width < maxWidth; width++)
   1362 	{
   1363 		for (deUint32 i = 1; i < inputBuffers.size(); i++)
   1364 		{
   1365 			// re-init the data
   1366 			const Allocation& alloc = inputBuffers[i]->getAllocation();
   1367 			initializeMemory(context, alloc, extraDatas[i - 1]);
   1368 		}
   1369 
   1370 		totalIterations++;
   1371 
   1372 		const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
   1373 												*renderPass, discardableImage.getImageView(), 1, 1));
   1374 
   1375 		const VkClearValue clearValue = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
   1376 
   1377 		const VkRenderPassBeginInfo renderPassBeginInfo = {
   1378 			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, DE_NULL, *renderPass,
   1379 			*framebuffer, {{0, 0}, {1, 1}}, 1, &clearValue,
   1380 		};
   1381 
   1382 		beginCommandBuffer(context, *cmdBuffer);
   1383 
   1384 		VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
   1385 
   1386 		context.getDeviceInterface().cmdSetViewport(
   1387 			*cmdBuffer, 0, 1, &viewport);
   1388 
   1389 		VkRect2D scissor = {{0, 0}, {1, 1}};
   1390 
   1391 		context.getDeviceInterface().cmdSetScissor(
   1392 			*cmdBuffer, 0, 1, &scissor);
   1393 
   1394 		context.getDeviceInterface().cmdBeginRenderPass(
   1395 			*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
   1396 
   1397 		context.getDeviceInterface().cmdBindPipeline(
   1398 			*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   1399 
   1400 		context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   1401 				VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   1402 				&descriptorSet.get(), 0u, DE_NULL);
   1403 
   1404 		context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
   1405 
   1406 		context.getDeviceInterface().cmdEndRenderPass(*cmdBuffer);
   1407 
   1408 		endCommandBuffer(context, *cmdBuffer);
   1409 
   1410 		Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   1411 
   1412 		waitFence(context, fence);
   1413 
   1414 		std::vector<const void*> datas;
   1415 
   1416 		for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1417 		{
   1418 			if (!inputBuffers[i]->isImage())
   1419 			{
   1420 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
   1421 				invalidateMappedMemoryRange(context.getDeviceInterface(),
   1422 											context.getDevice(), resultAlloc.getMemory(),
   1423 											resultAlloc.getOffset(), inputBuffers[i]->getAsBuffer()->getSize());
   1424 
   1425 				// we always have our result data first
   1426 				datas.push_back(resultAlloc.getHostPtr());
   1427 			}
   1428 		}
   1429 
   1430 		if (!checkResult(datas, width, subgroupSize))
   1431 		{
   1432 			failedIterations++;
   1433 		}
   1434 
   1435 		context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   1436 	}
   1437 
   1438 	if (0 < failedIterations)
   1439 	{
   1440 		context.getTestContext().getLog()
   1441 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   1442 				<< totalIterations << " values passed" << TestLog::EndMessage;
   1443 		return tcu::TestStatus::fail("Failed!");
   1444 	}
   1445 
   1446 	return tcu::TestStatus::pass("OK");
   1447 }
   1448 
   1449 tcu::TestStatus vkt::subgroups::makeGeometryTest(
   1450 	Context& context, VkFormat format, SSBOData* extraDatas,
   1451 	deUint32 extraDatasCount,
   1452 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
   1453 {
   1454 	const deUint32 maxWidth = 1024;
   1455 
   1456 	const Unique<VkShaderModule> vertexShaderModule(
   1457 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1458 						   context.getBinaryCollection().get("vert"), 0u));
   1459 	const Unique<VkShaderModule> geometryShaderModule(
   1460 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1461 						   context.getBinaryCollection().get("geom"), 0u));
   1462 
   1463 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount + 1);
   1464 
   1465 	// The implicit result SSBO we use to store our outputs from the vertex shader
   1466 	{
   1467 		vk::VkDeviceSize size = getFormatSizeInBytes(format) * maxWidth;
   1468 		inputBuffers[0] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1469 	}
   1470 
   1471 	for (deUint32 i = 0; i < (inputBuffers.size() - 1); i++)
   1472 	{
   1473 		if (extraDatas[i].isImage)
   1474 		{
   1475 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Image(context,
   1476 											static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
   1477 		}
   1478 		else
   1479 		{
   1480 			vk::VkDeviceSize size =
   1481 				getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
   1482 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1483 		}
   1484 
   1485 		const Allocation& alloc = inputBuffers[i + 1]->getAllocation();
   1486 		initializeMemory(context, alloc, extraDatas[i]);
   1487 	}
   1488 
   1489 	DescriptorSetLayoutBuilder layoutBuilder;
   1490 
   1491 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1492 	{
   1493 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
   1494 								 VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL);
   1495 	}
   1496 
   1497 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   1498 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   1499 
   1500 	const Unique<VkPipelineLayout> pipelineLayout(
   1501 		makePipelineLayout(context, *descriptorSetLayout));
   1502 
   1503 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, VK_FORMAT_R32_SFLOAT));
   1504 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   1505 									  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_GEOMETRY_BIT,
   1506 									  *vertexShaderModule, DE_NULL, *geometryShaderModule, DE_NULL, DE_NULL,
   1507 									  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
   1508 
   1509 	DescriptorPoolBuilder poolBuilder;
   1510 
   1511 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1512 	{
   1513 		poolBuilder.addType(inputBuffers[i]->getType());
   1514 	}
   1515 
   1516 	const Unique<VkDescriptorPool> descriptorPool(
   1517 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   1518 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   1519 
   1520 	// Create descriptor set
   1521 	const Unique<VkDescriptorSet> descriptorSet(
   1522 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   1523 
   1524 	DescriptorSetUpdateBuilder updateBuilder;
   1525 
   1526 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1527 	{
   1528 		if (inputBuffers[i]->isImage())
   1529 		{
   1530 			VkDescriptorImageInfo info =
   1531 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   1532 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   1533 
   1534 			updateBuilder.writeSingle(*descriptorSet,
   1535 									  DescriptorSetUpdateBuilder::Location::binding(i),
   1536 									  inputBuffers[i]->getType(), &info);
   1537 		}
   1538 		else
   1539 		{
   1540 			VkDescriptorBufferInfo info =
   1541 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
   1542 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
   1543 
   1544 			updateBuilder.writeSingle(*descriptorSet,
   1545 									  DescriptorSetUpdateBuilder::Location::binding(i),
   1546 									  inputBuffers[i]->getType(), &info);
   1547 		}
   1548 	}
   1549 
   1550 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   1551 
   1552 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   1553 
   1554 	const deUint32 subgroupSize = getSubgroupSize(context);
   1555 
   1556 	const Unique<VkCommandBuffer> cmdBuffer(
   1557 		makeCommandBuffer(context, *cmdPool));
   1558 
   1559 	unsigned totalIterations = 0;
   1560 	unsigned failedIterations = 0;
   1561 
   1562 	Image discardableImage(context, 1, 1, VK_FORMAT_R32_SFLOAT,
   1563 						   VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
   1564 						   VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   1565 
   1566 	for (deUint32 width = 1; width < maxWidth; width++)
   1567 	{
   1568 		for (deUint32 i = 1; i < inputBuffers.size(); i++)
   1569 		{
   1570 			// re-init the data
   1571 			const Allocation& alloc = inputBuffers[i]->getAllocation();
   1572 			initializeMemory(context, alloc, extraDatas[i - 1]);
   1573 		}
   1574 
   1575 		totalIterations++;
   1576 
   1577 		const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
   1578 												*renderPass, discardableImage.getImageView(), 1, 1));
   1579 
   1580 		const VkClearValue clearValue = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
   1581 
   1582 		const VkRenderPassBeginInfo renderPassBeginInfo = {
   1583 			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, DE_NULL, *renderPass,
   1584 			*framebuffer, {{0, 0}, {1, 1}}, 1, &clearValue,
   1585 		};
   1586 
   1587 		beginCommandBuffer(context, *cmdBuffer);
   1588 
   1589 		VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
   1590 
   1591 		context.getDeviceInterface().cmdSetViewport(
   1592 			*cmdBuffer, 0, 1, &viewport);
   1593 
   1594 		VkRect2D scissor = {{0, 0}, {1, 1}};
   1595 
   1596 		context.getDeviceInterface().cmdSetScissor(
   1597 			*cmdBuffer, 0, 1, &scissor);
   1598 
   1599 		context.getDeviceInterface().cmdBeginRenderPass(
   1600 			*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
   1601 
   1602 		context.getDeviceInterface().cmdBindPipeline(
   1603 			*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   1604 
   1605 		context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   1606 				VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   1607 				&descriptorSet.get(), 0u, DE_NULL);
   1608 
   1609 		context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
   1610 
   1611 		context.getDeviceInterface().cmdEndRenderPass(*cmdBuffer);
   1612 
   1613 		endCommandBuffer(context, *cmdBuffer);
   1614 
   1615 		Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   1616 
   1617 		waitFence(context, fence);
   1618 
   1619 		std::vector<const void*> datas;
   1620 
   1621 		for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1622 		{
   1623 			if (!inputBuffers[i]->isImage())
   1624 			{
   1625 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
   1626 				invalidateMappedMemoryRange(context.getDeviceInterface(),
   1627 											context.getDevice(), resultAlloc.getMemory(),
   1628 											resultAlloc.getOffset(), inputBuffers[i]->getAsBuffer()->getSize());
   1629 
   1630 				// we always have our result data first
   1631 				datas.push_back(resultAlloc.getHostPtr());
   1632 			}
   1633 		}
   1634 
   1635 		if (!checkResult(datas, width, subgroupSize))
   1636 		{
   1637 			failedIterations++;
   1638 		}
   1639 
   1640 		context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   1641 	}
   1642 
   1643 	if (0 < failedIterations)
   1644 	{
   1645 		context.getTestContext().getLog()
   1646 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   1647 				<< totalIterations << " values passed" << TestLog::EndMessage;
   1648 		return tcu::TestStatus::fail("Failed!");
   1649 	}
   1650 
   1651 	return tcu::TestStatus::pass("OK");
   1652 }
   1653 
   1654 VkImageMemoryBarrier makeImageMemoryBarrier	(const VkAccessFlags			srcAccessMask,
   1655 											 const VkAccessFlags			dstAccessMask,
   1656 											 const VkImageLayout			oldLayout,
   1657 											 const VkImageLayout			newLayout,
   1658 											 const VkImage					image,
   1659 											 const VkImageSubresourceRange	subresourceRange)
   1660 {
   1661 	const VkImageMemoryBarrier barrier =
   1662 	{
   1663 		VK_STRUCTURE_TYPE_IMAGE_MEMORY_BARRIER,			// VkStructureType			sType;
   1664 		DE_NULL,										// const void*				pNext;
   1665 		srcAccessMask,									// VkAccessFlags			outputMask;
   1666 		dstAccessMask,									// VkAccessFlags			inputMask;
   1667 		oldLayout,										// VkImageLayout			oldLayout;
   1668 		newLayout,										// VkImageLayout			newLayout;
   1669 		VK_QUEUE_FAMILY_IGNORED,						// deUint32					srcQueueFamilyIndex;
   1670 		VK_QUEUE_FAMILY_IGNORED,						// deUint32					destQueueFamilyIndex;
   1671 		image,											// VkImage					image;
   1672 		subresourceRange,								// VkImageSubresourceRange	subresourceRange;
   1673 	};
   1674 	return barrier;
   1675 }
   1676 
   1677 VkBufferMemoryBarrier makeBufferMemoryBarrier (const VkAccessFlags	srcAccessMask,
   1678 											   const VkAccessFlags	dstAccessMask,
   1679 											   const VkBuffer		buffer,
   1680 											   const VkDeviceSize	offset,
   1681 											   const VkDeviceSize	bufferSizeBytes)
   1682 {
   1683 	const VkBufferMemoryBarrier barrier =
   1684 	{
   1685 		VK_STRUCTURE_TYPE_BUFFER_MEMORY_BARRIER,	// VkStructureType	sType;
   1686 		DE_NULL,									// const void*		pNext;
   1687 		srcAccessMask,								// VkAccessFlags	srcAccessMask;
   1688 		dstAccessMask,								// VkAccessFlags	dstAccessMask;
   1689 		VK_QUEUE_FAMILY_IGNORED,					// deUint32			srcQueueFamilyIndex;
   1690 		VK_QUEUE_FAMILY_IGNORED,					// deUint32			destQueueFamilyIndex;
   1691 		buffer,										// VkBuffer			buffer;
   1692 		offset,										// VkDeviceSize		offset;
   1693 		bufferSizeBytes,							// VkDeviceSize		size;
   1694 	};
   1695 	return barrier;
   1696 }
   1697 
   1698 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format,
   1699 	SSBOData* extraData, deUint32 extraDataCount,
   1700 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
   1701 {
   1702 	const deUint32							maxWidth				= 1024u;
   1703 	vector<de::SharedPtr<BufferOrImage> >	inputBuffers			(extraDataCount);
   1704 	DescriptorSetLayoutBuilder				layoutBuilder;
   1705 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule
   1706 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
   1707 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule
   1708 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
   1709 	const Unique<VkRenderPass>				renderPass				(makeRenderPass(context, format));
   1710 
   1711 	const VkVertexInputBindingDescription	vertexInputBinding		=
   1712 	{
   1713 		0u,											// binding;
   1714 		static_cast<deUint32>(sizeof(tcu::Vec4)),	// stride;
   1715 		VK_VERTEX_INPUT_RATE_VERTEX					// inputRate
   1716 	};
   1717 
   1718 	const VkVertexInputAttributeDescription	vertexInputAttribute	=
   1719 	{
   1720 		0u,
   1721 		0u,
   1722 		VK_FORMAT_R32G32B32A32_SFLOAT,
   1723 		0u
   1724 	};
   1725 
   1726 	for (deUint32 i = 0u; i < extraDataCount; i++)
   1727 	{
   1728 		if (extraData[i].isImage)
   1729 		{
   1730 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format));
   1731 		}
   1732 		else
   1733 		{
   1734 			vk::VkDeviceSize size = getFormatSizeInBytes(extraData[i].format) * extraData[i].numElements;
   1735 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
   1736 		}
   1737 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   1738 		initializeMemory(context, alloc, extraData[i]);
   1739 	}
   1740 
   1741 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   1742 		layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
   1743 
   1744 	const Unique<VkDescriptorSetLayout>		descriptorSetLayout		(layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   1745 
   1746 	const Unique<VkPipelineLayout>			pipelineLayout			(makePipelineLayout(context, *descriptorSetLayout));
   1747 
   1748 	const Unique<VkPipeline>				pipeline				(makeGraphicsPipeline(context, *pipelineLayout,
   1749 																		VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
   1750 																		*vertexShaderModule, *fragmentShaderModule,
   1751 																		DE_NULL, DE_NULL, DE_NULL,
   1752 																		*renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST,
   1753 																		&vertexInputBinding, &vertexInputAttribute, format));
   1754 	DescriptorPoolBuilder					poolBuilder;
   1755 	DescriptorSetUpdateBuilder				updateBuilder;
   1756 
   1757 
   1758 	for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
   1759 		poolBuilder.addType(inputBuffers[ndx]->getType());
   1760 
   1761 	Move <VkDescriptorPool>					descriptorPool;
   1762 	Move <VkDescriptorSet>					descriptorSet;
   1763 
   1764 	if (extraDataCount > 0)
   1765 	{
   1766 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   1767 							VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
   1768 		descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
   1769 	}
   1770 
   1771 	for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++)
   1772 	{
   1773 		const Allocation& alloc = inputBuffers[ndx]->getAllocation();
   1774 		initializeMemory(context, alloc, extraData[ndx]);
   1775 	}
   1776 
   1777 	for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++)
   1778 	{
   1779 		if (inputBuffers[buffersNdx]->isImage())
   1780 		{
   1781 			VkDescriptorImageInfo info =
   1782 				makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(),
   1783 										inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   1784 
   1785 			updateBuilder.writeSingle(*descriptorSet,
   1786 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   1787 										inputBuffers[buffersNdx]->getType(), &info);
   1788 		}
   1789 		else
   1790 		{
   1791 			VkDescriptorBufferInfo info =
   1792 				makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(),
   1793 										0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize());
   1794 
   1795 			updateBuilder.writeSingle(*descriptorSet,
   1796 										DescriptorSetUpdateBuilder::Location::binding(buffersNdx),
   1797 										inputBuffers[buffersNdx]->getType(), &info);
   1798 		}
   1799 	}
   1800 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   1801 
   1802 	const Unique<VkCommandPool>				cmdPool					(makeCommandPool(context));
   1803 
   1804 	const deUint32							subgroupSize			= getSubgroupSize(context);
   1805 
   1806 	const Unique<VkCommandBuffer>			cmdBuffer				(makeCommandBuffer(context, *cmdPool));
   1807 
   1808 	const vk::VkDeviceSize					vertexBufferSize		= maxWidth * sizeof(tcu::Vec4);
   1809 	Buffer									vertexBuffer			(context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT);
   1810 
   1811 	unsigned								totalIterations			= 0u;
   1812 	unsigned								failedIterations		= 0u;
   1813 
   1814 	Image									discardableImage		(context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   1815 
   1816 	{
   1817 		const Allocation&		alloc				= vertexBuffer.getAllocation();
   1818 		std::vector<tcu::Vec4>	data				(maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f));
   1819 		const float				pixelSize			= 2.0f / static_cast<float>(maxWidth);
   1820 		float					leftHandPosition	= -1.0f;
   1821 
   1822 		for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx)
   1823 		{
   1824 			data[ndx][0] = leftHandPosition + pixelSize / 2.0f;
   1825 			leftHandPosition += pixelSize;
   1826 		}
   1827 
   1828 		deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4));
   1829 		vk::flushMappedMemoryRange(context.getDeviceInterface(), context.getDevice(), alloc.getMemory(), alloc.getOffset(), vertexBufferSize);
   1830 	}
   1831 
   1832 	for (deUint32 width = 1u; width < maxWidth; width++)
   1833 	{
   1834 		totalIterations++;
   1835 		const Unique<VkFramebuffer>	framebuffer			(makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1));
   1836 		const VkClearValue			clearValue			= {{{0.0f, 0.0f, 0.0f, 0.0f}}};
   1837 		const VkViewport			viewport			= {0.0f, 0.0f, static_cast<float>(maxWidth), 1.0f, 0.0f, 1.0f};
   1838 		const VkRect2D				scissor				= {{0, 0}, {maxWidth, 1}};
   1839 		const vk::VkDeviceSize		imageResultSize		= tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth;
   1840 		Buffer						imageBufferResult	(context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
   1841 		const VkDeviceSize			vertexBufferOffset	= 0u;
   1842 
   1843 		for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++)
   1844 		{
   1845 			const Allocation& alloc = inputBuffers[ndx]->getAllocation();
   1846 			initializeMemory(context, alloc, extraData[ndx]);
   1847 		}
   1848 
   1849 		const VkRenderPassBeginInfo	renderPassBeginInfo	=
   1850 		{
   1851 			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, DE_NULL, *renderPass,
   1852 			*framebuffer, {{0, 0}, {maxWidth, 1}}, 1, &clearValue,
   1853 		};
   1854 
   1855 		beginCommandBuffer(context, *cmdBuffer);
   1856 		{
   1857 			context.getDeviceInterface().cmdSetViewport(
   1858 				*cmdBuffer, 0, 1, &viewport);
   1859 
   1860 			context.getDeviceInterface().cmdSetScissor(
   1861 				*cmdBuffer, 0, 1, &scissor);
   1862 
   1863 			context.getDeviceInterface().cmdBeginRenderPass(
   1864 				*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
   1865 
   1866 			context.getDeviceInterface().cmdBindPipeline(
   1867 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   1868 
   1869 			if (extraDataCount > 0)
   1870 			{
   1871 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   1872 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   1873 					&descriptorSet.get(), 0u, DE_NULL);
   1874 			}
   1875 
   1876 			context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset);
   1877 
   1878 			context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u);
   1879 
   1880 			context.getDeviceInterface().cmdEndRenderPass(*cmdBuffer);
   1881 
   1882 			const VkImageSubresourceRange	subresourceRange	=
   1883 			{
   1884 				VK_IMAGE_ASPECT_COLOR_BIT,											//VkImageAspectFlags	aspectMask
   1885 				0u,																	//deUint32				baseMipLevel
   1886 				1u,																	//deUint32				levelCount
   1887 				0u,																	//deUint32				baseArrayLayer
   1888 				1u																	//deUint32				layerCount
   1889 			};
   1890 
   1891 			const VkBufferImageCopy			copyRegion			=
   1892 			{
   1893 				0ull,																//	VkDeviceSize				bufferOffset;
   1894 				0u,																	//	deUint32					bufferRowLength;
   1895 				0u,																	//	deUint32					bufferImageHeight;
   1896 				makeImageSubresourceLayers(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 0u, 1u),	//	VkImageSubresourceLayers	imageSubresource;
   1897 				makeOffset3D(0, 0, 0),												//	VkOffset3D					imageOffset;
   1898 				makeExtent3D(IVec3(maxWidth,1,1)),									//	VkExtent3D					imageExtent;
   1899 			};
   1900 
   1901 			const VkImageMemoryBarrier prepareForTransferBarrier = makeImageMemoryBarrier(
   1902 																	VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
   1903 																	VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
   1904 																	discardableImage.getImage(), subresourceRange);
   1905 
   1906 			const VkBufferMemoryBarrier copyBarrier = makeBufferMemoryBarrier(
   1907 														VK_ACCESS_TRANSFER_WRITE_BIT, VK_ACCESS_HOST_READ_BIT,
   1908 														imageBufferResult.getBuffer(), 0ull, imageResultSize);
   1909 
   1910 			context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &prepareForTransferBarrier);
   1911 			context.getDeviceInterface().cmdCopyImageToBuffer(*cmdBuffer, discardableImage.getImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, imageBufferResult.getBuffer(), 1u, &copyRegion);
   1912 			context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 1u, &copyBarrier, 0u, (const VkImageMemoryBarrier*)DE_NULL);
   1913 
   1914 			endCommandBuffer(context, *cmdBuffer);
   1915 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   1916 			waitFence(context, fence);
   1917 		}
   1918 
   1919 		{
   1920 			const Allocation& allocResult = imageBufferResult.getAllocation();
   1921 			invalidateMappedMemoryRange(context.getDeviceInterface(), context.getDevice(), allocResult.getMemory(), allocResult.getOffset(), imageResultSize);
   1922 
   1923 			std::vector<const void*> datas;
   1924 			datas.push_back(allocResult.getHostPtr());
   1925 			if (!checkResult(datas, width, subgroupSize))
   1926 				failedIterations++;
   1927 		}
   1928 	}
   1929 
   1930 	if (0 < failedIterations)
   1931 	{
   1932 		context.getTestContext().getLog()
   1933 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   1934 				<< totalIterations << " values passed" << TestLog::EndMessage;
   1935 		return tcu::TestStatus::fail("Failed!");
   1936 	}
   1937 
   1938 	return tcu::TestStatus::pass("OK");
   1939 }
   1940 
   1941 tcu::TestStatus vkt::subgroups::makeVertexTest(
   1942 	Context& context, VkFormat format, SSBOData* extraDatas,
   1943 	deUint32 extraDatasCount,
   1944 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize))
   1945 {
   1946 	const deUint32 maxWidth = 1024;
   1947 
   1948 	const Unique<VkShaderModule> vertexShaderModule(
   1949 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   1950 						   context.getBinaryCollection().get("vert"), 0u));
   1951 
   1952 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount + 1);
   1953 
   1954 	// The implicit result SSBO we use to store our outputs from the vertex shader
   1955 	{
   1956 		vk::VkDeviceSize size = getFormatSizeInBytes(format) * maxWidth;
   1957 		inputBuffers[0] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1958 	}
   1959 
   1960 	for (deUint32 i = 0; i < (inputBuffers.size() - 1); i++)
   1961 	{
   1962 		if (extraDatas[i].isImage)
   1963 		{
   1964 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Image(context,
   1965 											static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
   1966 		}
   1967 		else
   1968 		{
   1969 			vk::VkDeviceSize size =
   1970 				getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
   1971 			inputBuffers[i + 1] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   1972 		}
   1973 
   1974 		const Allocation& alloc = inputBuffers[i + 1]->getAllocation();
   1975 		initializeMemory(context, alloc, extraDatas[i]);
   1976 	}
   1977 
   1978 	DescriptorSetLayoutBuilder layoutBuilder;
   1979 
   1980 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   1981 	{
   1982 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
   1983 								 VK_SHADER_STAGE_VERTEX_BIT, DE_NULL);
   1984 	}
   1985 
   1986 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   1987 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   1988 
   1989 	const Unique<VkPipelineLayout> pipelineLayout(
   1990 		makePipelineLayout(context, *descriptorSetLayout));
   1991 
   1992 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, VK_FORMAT_R32_SFLOAT));
   1993 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   1994 									  VK_SHADER_STAGE_VERTEX_BIT, *vertexShaderModule, DE_NULL, DE_NULL, DE_NULL, DE_NULL,
   1995 									  *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST));
   1996 
   1997 	DescriptorPoolBuilder poolBuilder;
   1998 
   1999 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   2000 	{
   2001 		poolBuilder.addType(inputBuffers[i]->getType());
   2002 	}
   2003 
   2004 	const Unique<VkDescriptorPool> descriptorPool(
   2005 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2006 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   2007 
   2008 	// Create descriptor set
   2009 	const Unique<VkDescriptorSet> descriptorSet(
   2010 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   2011 
   2012 	DescriptorSetUpdateBuilder updateBuilder;
   2013 
   2014 	for (deUint32 i = 0; i < inputBuffers.size(); i++)
   2015 	{
   2016 		if (inputBuffers[i]->isImage())
   2017 		{
   2018 			VkDescriptorImageInfo info =
   2019 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   2020 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2021 
   2022 			updateBuilder.writeSingle(*descriptorSet,
   2023 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2024 									  inputBuffers[i]->getType(), &info);
   2025 		}
   2026 		else
   2027 		{
   2028 			VkDescriptorBufferInfo info =
   2029 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
   2030 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
   2031 
   2032 			updateBuilder.writeSingle(*descriptorSet,
   2033 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2034 									  inputBuffers[i]->getType(), &info);
   2035 		}
   2036 	}
   2037 
   2038 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2039 
   2040 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   2041 
   2042 	const deUint32 subgroupSize = getSubgroupSize(context);
   2043 
   2044 	const Unique<VkCommandBuffer> cmdBuffer(
   2045 		makeCommandBuffer(context, *cmdPool));
   2046 
   2047 	unsigned totalIterations = 0;
   2048 	unsigned failedIterations = 0;
   2049 
   2050 	Image discardableImage(context, 1, 1, VK_FORMAT_R32_SFLOAT,
   2051 						   VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
   2052 						   VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   2053 
   2054 	for (deUint32 width = 1; width < maxWidth; width++)
   2055 	{
   2056 		for (deUint32 i = 1; i < inputBuffers.size(); i++)
   2057 		{
   2058 			// re-init the data
   2059 			const Allocation& alloc = inputBuffers[i]->getAllocation();
   2060 			initializeMemory(context, alloc, extraDatas[i - 1]);
   2061 		}
   2062 
   2063 		totalIterations++;
   2064 
   2065 		const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
   2066 												*renderPass, discardableImage.getImageView(), 1, 1));
   2067 
   2068 		const VkClearValue clearValue = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
   2069 
   2070 		const VkRenderPassBeginInfo renderPassBeginInfo = {
   2071 			VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, DE_NULL, *renderPass,
   2072 			*framebuffer, {{0, 0}, {1, 1}}, 1, &clearValue,
   2073 		};
   2074 
   2075 		beginCommandBuffer(context, *cmdBuffer);
   2076 
   2077 		VkViewport viewport = {0.0f, 0.0f, 1.0f, 1.0f, 0.0f, 1.0f};
   2078 
   2079 		context.getDeviceInterface().cmdSetViewport(
   2080 			*cmdBuffer, 0, 1, &viewport);
   2081 
   2082 		VkRect2D scissor = {{0, 0}, {1, 1}};
   2083 
   2084 		context.getDeviceInterface().cmdSetScissor(
   2085 			*cmdBuffer, 0, 1, &scissor);
   2086 
   2087 		context.getDeviceInterface().cmdBeginRenderPass(
   2088 			*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
   2089 
   2090 		context.getDeviceInterface().cmdBindPipeline(
   2091 			*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   2092 
   2093 		context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2094 				VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   2095 				&descriptorSet.get(), 0u, DE_NULL);
   2096 
   2097 		context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0);
   2098 
   2099 		context.getDeviceInterface().cmdEndRenderPass(*cmdBuffer);
   2100 
   2101 		endCommandBuffer(context, *cmdBuffer);
   2102 
   2103 		Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2104 
   2105 		waitFence(context, fence);
   2106 
   2107 		std::vector<const void*> datas;
   2108 
   2109 		for (deUint32 i = 0; i < inputBuffers.size(); i++)
   2110 		{
   2111 			if (!inputBuffers[i]->isImage())
   2112 			{
   2113 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
   2114 				invalidateMappedMemoryRange(context.getDeviceInterface(),
   2115 											context.getDevice(), resultAlloc.getMemory(),
   2116 											resultAlloc.getOffset(), inputBuffers[i]->getAsBuffer()->getSize());
   2117 
   2118 				// we always have our result data first
   2119 				datas.push_back(resultAlloc.getHostPtr());
   2120 			}
   2121 		}
   2122 		if (!checkResult(datas, width, subgroupSize))
   2123 		{
   2124 			failedIterations++;
   2125 		}
   2126 
   2127 		context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   2128 	}
   2129 
   2130 	if (0 < failedIterations)
   2131 	{
   2132 		context.getTestContext().getLog()
   2133 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2134 				<< totalIterations << " values passed" << TestLog::EndMessage;
   2135 		return tcu::TestStatus::fail("Failed!");
   2136 	}
   2137 
   2138 	return tcu::TestStatus::pass("OK");
   2139 }
   2140 
   2141 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest	(Context& context, VkFormat format, SSBOData* extraDatas,
   2142 	deUint32 extraDatasCount,
   2143 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
   2144 						deUint32 height, deUint32 subgroupSize))
   2145 {
   2146 	const Unique<VkShaderModule>			vertexShaderModule		(createShaderModule
   2147 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u));
   2148 	const Unique<VkShaderModule>			fragmentShaderModule	(createShaderModule
   2149 																		(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u));
   2150 
   2151 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
   2152 
   2153 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2154 	{
   2155 		if (extraDatas[i].isImage)
   2156 		{
   2157 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
   2158 										static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
   2159 		}
   2160 		else
   2161 		{
   2162 			vk::VkDeviceSize size =
   2163 				getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
   2164 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT));
   2165 		}
   2166 
   2167 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   2168 		initializeMemory(context, alloc, extraDatas[i]);
   2169 	}
   2170 
   2171 	DescriptorSetLayoutBuilder layoutBuilder;
   2172 
   2173 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2174 	{
   2175 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
   2176 								 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
   2177 	}
   2178 
   2179 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   2180 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   2181 
   2182 	const Unique<VkPipelineLayout> pipelineLayout(
   2183 		makePipelineLayout(context, *descriptorSetLayout));
   2184 
   2185 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
   2186 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   2187 									  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
   2188 									  *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass));
   2189 
   2190 	DescriptorPoolBuilder poolBuilder;
   2191 
   2192 	// To stop validation complaining, always add at least one type to pool.
   2193 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
   2194 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2195 	{
   2196 		poolBuilder.addType(inputBuffers[i]->getType());
   2197 	}
   2198 
   2199 	Move<VkDescriptorPool> descriptorPool;
   2200 	// Create descriptor set
   2201 	Move<VkDescriptorSet> descriptorSet;
   2202 
   2203 	if (extraDatasCount > 0)
   2204 	{
   2205 		descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2206 													VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u);
   2207 
   2208 		descriptorSet	= makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout);
   2209 	}
   2210 
   2211 	DescriptorSetUpdateBuilder updateBuilder;
   2212 
   2213 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2214 	{
   2215 		if (inputBuffers[i]->isImage())
   2216 		{
   2217 			VkDescriptorImageInfo info =
   2218 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   2219 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2220 
   2221 			updateBuilder.writeSingle(*descriptorSet,
   2222 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2223 									  inputBuffers[i]->getType(), &info);
   2224 		}
   2225 		else
   2226 		{
   2227 			VkDescriptorBufferInfo info =
   2228 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
   2229 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
   2230 
   2231 			updateBuilder.writeSingle(*descriptorSet,
   2232 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2233 									  inputBuffers[i]->getType(), &info);
   2234 		}
   2235 	}
   2236 
   2237 	if (extraDatasCount > 0)
   2238 		updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2239 
   2240 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   2241 
   2242 	const deUint32 subgroupSize = getSubgroupSize(context);
   2243 
   2244 	const Unique<VkCommandBuffer> cmdBuffer(
   2245 		makeCommandBuffer(context, *cmdPool));
   2246 
   2247 	unsigned totalIterations = 0;
   2248 	unsigned failedIterations = 0;
   2249 
   2250 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
   2251 	{
   2252 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
   2253 		{
   2254 			totalIterations++;
   2255 
   2256 			// re-init the data
   2257 			for (deUint32 i = 0; i < extraDatasCount; i++)
   2258 			{
   2259 				const Allocation& alloc = inputBuffers[i]->getAllocation();
   2260 				initializeMemory(context, alloc, extraDatas[i]);
   2261 			}
   2262 
   2263 			VkDeviceSize formatSize = getFormatSizeInBytes(format);
   2264 			const VkDeviceSize resultImageSizeInBytes =
   2265 				width * height * formatSize;
   2266 
   2267 			Image resultImage(context, width, height, format,
   2268 							  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
   2269 							  VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   2270 
   2271 			Buffer resultBuffer(context, resultImageSizeInBytes,
   2272 								VK_IMAGE_USAGE_TRANSFER_DST_BIT);
   2273 
   2274 			const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
   2275 													*renderPass, resultImage.getImageView(), width, height));
   2276 
   2277 			const VkClearValue clearValue = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
   2278 
   2279 			const VkRenderPassBeginInfo renderPassBeginInfo = {
   2280 				VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, DE_NULL, *renderPass,
   2281 				*framebuffer, {{0, 0}, {width, height}}, 1, &clearValue,
   2282 			};
   2283 
   2284 			beginCommandBuffer(context, *cmdBuffer);
   2285 
   2286 			VkViewport viewport = {0.0f, 0.0f, static_cast<float>(width),
   2287 								   static_cast<float>(height), 0.0f, 1.0f
   2288 								  };
   2289 
   2290 			context.getDeviceInterface().cmdSetViewport(
   2291 				*cmdBuffer, 0, 1, &viewport);
   2292 
   2293 			VkRect2D scissor = {{0, 0}, {width, height}};
   2294 
   2295 			context.getDeviceInterface().cmdSetScissor(
   2296 				*cmdBuffer, 0, 1, &scissor);
   2297 
   2298 			context.getDeviceInterface().cmdBeginRenderPass(
   2299 				*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
   2300 
   2301 			context.getDeviceInterface().cmdBindPipeline(
   2302 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   2303 
   2304 			if (extraDatasCount > 0)
   2305 			{
   2306 				context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2307 						VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   2308 						&descriptorSet.get(), 0u, DE_NULL);
   2309 			}
   2310 
   2311 			context.getDeviceInterface().cmdDraw(*cmdBuffer, 3, 1, 0, 0);
   2312 
   2313 			context.getDeviceInterface().cmdEndRenderPass(*cmdBuffer);
   2314 
   2315 			vk::VkBufferImageCopy region = {0, 0, 0,
   2316 				{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, {0, 0, 0},
   2317 				{width, height, 1}
   2318 			};
   2319 			context.getDeviceInterface().cmdCopyImageToBuffer(*cmdBuffer,
   2320 					resultImage.getImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
   2321 					resultBuffer.getBuffer(), 1, &region);
   2322 
   2323 			endCommandBuffer(context, *cmdBuffer);
   2324 
   2325 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2326 
   2327 			waitFence(context, fence);
   2328 
   2329 			std::vector<const void*> datas;
   2330 			{
   2331 				const Allocation& resultAlloc = resultBuffer.getAllocation();
   2332 				invalidateMappedMemoryRange(context.getDeviceInterface(),
   2333 											context.getDevice(), resultAlloc.getMemory(),
   2334 											resultAlloc.getOffset(), resultImageSizeInBytes);
   2335 
   2336 				// we always have our result data first
   2337 				datas.push_back(resultAlloc.getHostPtr());
   2338 			}
   2339 
   2340 			if (!checkResult(datas, width, height, subgroupSize))
   2341 			{
   2342 				failedIterations++;
   2343 			}
   2344 
   2345 			context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   2346 		}
   2347 	}
   2348 
   2349 	if (0 < failedIterations)
   2350 	{
   2351 		context.getTestContext().getLog()
   2352 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2353 				<< totalIterations << " values passed" << TestLog::EndMessage;
   2354 		return tcu::TestStatus::fail("Failed!");
   2355 	}
   2356 
   2357 	return tcu::TestStatus::pass("OK");
   2358 }
   2359 
   2360 tcu::TestStatus vkt::subgroups::makeFragmentTest(
   2361 	Context& context, VkFormat format, SSBOData* extraDatas,
   2362 	deUint32 extraDatasCount,
   2363 	bool (*checkResult)(std::vector<const void*> datas, deUint32 width,
   2364 						deUint32 height, deUint32 subgroupSize))
   2365 {
   2366 	const Unique<VkShaderModule> vertexShaderModule(
   2367 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   2368 						   context.getBinaryCollection().get("vert"), 0u));
   2369 	const Unique<VkShaderModule> fragmentShaderModule(
   2370 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   2371 						   context.getBinaryCollection().get("frag"), 0u));
   2372 
   2373 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount);
   2374 
   2375 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2376 	{
   2377 		if (extraDatas[i].isImage)
   2378 		{
   2379 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
   2380 										static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format));
   2381 		}
   2382 		else
   2383 		{
   2384 			vk::VkDeviceSize size =
   2385 				getFormatSizeInBytes(extraDatas[i].format) * extraDatas[i].numElements;
   2386 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   2387 		}
   2388 
   2389 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   2390 		initializeMemory(context, alloc, extraDatas[i]);
   2391 	}
   2392 
   2393 	DescriptorSetLayoutBuilder layoutBuilder;
   2394 
   2395 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2396 	{
   2397 		layoutBuilder.addBinding(inputBuffers[i]->getType(), 1,
   2398 								 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL);
   2399 	}
   2400 
   2401 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   2402 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   2403 
   2404 	const Unique<VkPipelineLayout> pipelineLayout(
   2405 		makePipelineLayout(context, *descriptorSetLayout));
   2406 
   2407 	const Unique<VkRenderPass> renderPass(makeRenderPass(context, format));
   2408 	const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout,
   2409 									  VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT,
   2410 									  *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass));
   2411 
   2412 	DescriptorPoolBuilder poolBuilder;
   2413 
   2414 	// To stop validation complaining, always add at least one type to pool.
   2415 	poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER);
   2416 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2417 	{
   2418 		poolBuilder.addType(inputBuffers[i]->getType());
   2419 	}
   2420 
   2421 	const Unique<VkDescriptorPool> descriptorPool(
   2422 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2423 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   2424 
   2425 	// Create descriptor set
   2426 	const Unique<VkDescriptorSet> descriptorSet(
   2427 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   2428 
   2429 	DescriptorSetUpdateBuilder updateBuilder;
   2430 
   2431 	for (deUint32 i = 0; i < extraDatasCount; i++)
   2432 	{
   2433 		if (inputBuffers[i]->isImage())
   2434 		{
   2435 			VkDescriptorImageInfo info =
   2436 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   2437 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2438 
   2439 			updateBuilder.writeSingle(*descriptorSet,
   2440 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2441 									  inputBuffers[i]->getType(), &info);
   2442 		}
   2443 		else
   2444 		{
   2445 			VkDescriptorBufferInfo info =
   2446 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(),
   2447 										 0ull, inputBuffers[i]->getAsBuffer()->getSize());
   2448 
   2449 			updateBuilder.writeSingle(*descriptorSet,
   2450 									  DescriptorSetUpdateBuilder::Location::binding(i),
   2451 									  inputBuffers[i]->getType(), &info);
   2452 		}
   2453 	}
   2454 
   2455 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2456 
   2457 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   2458 
   2459 	const deUint32 subgroupSize = getSubgroupSize(context);
   2460 
   2461 	const Unique<VkCommandBuffer> cmdBuffer(
   2462 		makeCommandBuffer(context, *cmdPool));
   2463 
   2464 	unsigned totalIterations = 0;
   2465 	unsigned failedIterations = 0;
   2466 
   2467 	for (deUint32 width = 8; width <= subgroupSize; width *= 2)
   2468 	{
   2469 		for (deUint32 height = 8; height <= subgroupSize; height *= 2)
   2470 		{
   2471 			totalIterations++;
   2472 
   2473 			// re-init the data
   2474 			for (deUint32 i = 0; i < extraDatasCount; i++)
   2475 			{
   2476 				const Allocation& alloc = inputBuffers[i]->getAllocation();
   2477 				initializeMemory(context, alloc, extraDatas[i]);
   2478 			}
   2479 
   2480 			VkDeviceSize formatSize = getFormatSizeInBytes(format);
   2481 			const VkDeviceSize resultImageSizeInBytes =
   2482 				width * height * formatSize;
   2483 
   2484 			Image resultImage(context, width, height, format,
   2485 							  VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT |
   2486 							  VK_IMAGE_USAGE_TRANSFER_SRC_BIT);
   2487 
   2488 			Buffer resultBuffer(context, resultImageSizeInBytes,
   2489 								VK_IMAGE_USAGE_TRANSFER_DST_BIT);
   2490 
   2491 			const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context,
   2492 													*renderPass, resultImage.getImageView(), width, height));
   2493 
   2494 			const VkClearValue clearValue = {{{0.0f, 0.0f, 0.0f, 0.0f}}};
   2495 
   2496 			const VkRenderPassBeginInfo renderPassBeginInfo = {
   2497 				VK_STRUCTURE_TYPE_RENDER_PASS_BEGIN_INFO, DE_NULL, *renderPass,
   2498 				*framebuffer, {{0, 0}, {width, height}}, 1, &clearValue,
   2499 			};
   2500 
   2501 			beginCommandBuffer(context, *cmdBuffer);
   2502 
   2503 			VkViewport viewport = {0.0f, 0.0f, static_cast<float>(width),
   2504 								   static_cast<float>(height), 0.0f, 1.0f
   2505 								  };
   2506 
   2507 			context.getDeviceInterface().cmdSetViewport(
   2508 				*cmdBuffer, 0, 1, &viewport);
   2509 
   2510 			VkRect2D scissor = {{0, 0}, {width, height}};
   2511 
   2512 			context.getDeviceInterface().cmdSetScissor(
   2513 				*cmdBuffer, 0, 1, &scissor);
   2514 
   2515 			context.getDeviceInterface().cmdBeginRenderPass(
   2516 				*cmdBuffer, &renderPassBeginInfo, VK_SUBPASS_CONTENTS_INLINE);
   2517 
   2518 			context.getDeviceInterface().cmdBindPipeline(
   2519 				*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline);
   2520 
   2521 			context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2522 					VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u,
   2523 					&descriptorSet.get(), 0u, DE_NULL);
   2524 
   2525 			context.getDeviceInterface().cmdDraw(*cmdBuffer, 3, 1, 0, 0);
   2526 
   2527 			context.getDeviceInterface().cmdEndRenderPass(*cmdBuffer);
   2528 
   2529 			vk::VkBufferImageCopy region = {0, 0, 0,
   2530 				{VK_IMAGE_ASPECT_COLOR_BIT, 0, 0, 1}, {0, 0, 0},
   2531 				{width, height, 1}};
   2532 
   2533 			const vk::VkImageSubresourceRange subresourceRange = {
   2534 				VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, 1u};
   2535 
   2536 			const VkImageMemoryBarrier prepareForTransferBarrier = makeImageMemoryBarrier(
   2537 																	VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_ACCESS_TRANSFER_READ_BIT,
   2538 																	VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
   2539 																	resultImage.getImage(), subresourceRange);
   2540 
   2541 			context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer,
   2542 					VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT,
   2543 					(VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u,
   2544 					(const VkBufferMemoryBarrier*)DE_NULL, 1u, &prepareForTransferBarrier);
   2545 
   2546 			context.getDeviceInterface().cmdCopyImageToBuffer(*cmdBuffer,
   2547 					resultImage.getImage(), VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
   2548 					resultBuffer.getBuffer(), 1, &region);
   2549 
   2550 			endCommandBuffer(context, *cmdBuffer);
   2551 
   2552 			Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2553 
   2554 			waitFence(context, fence);
   2555 
   2556 			std::vector<const void*> datas;
   2557 			{
   2558 				const Allocation& resultAlloc = resultBuffer.getAllocation();
   2559 				invalidateMappedMemoryRange(context.getDeviceInterface(),
   2560 											context.getDevice(), resultAlloc.getMemory(),
   2561 											resultAlloc.getOffset(), resultImageSizeInBytes);
   2562 
   2563 				// we always have our result data first
   2564 				datas.push_back(resultAlloc.getHostPtr());
   2565 			}
   2566 
   2567 			for (deUint32 i = 0; i < extraDatasCount; i++)
   2568 			{
   2569 				if (!inputBuffers[i]->isImage())
   2570 				{
   2571 					const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
   2572 					invalidateMappedMemoryRange(context.getDeviceInterface(),
   2573 												context.getDevice(), resultAlloc.getMemory(),
   2574 												resultAlloc.getOffset(), inputBuffers[i]->getAsBuffer()->getSize());
   2575 
   2576 					// we always have our result data first
   2577 					datas.push_back(resultAlloc.getHostPtr());
   2578 				}
   2579 			}
   2580 
   2581 			if (!checkResult(datas, width, height, subgroupSize))
   2582 			{
   2583 				failedIterations++;
   2584 			}
   2585 
   2586 			context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   2587 		}
   2588 	}
   2589 
   2590 	if (0 < failedIterations)
   2591 	{
   2592 		context.getTestContext().getLog()
   2593 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2594 				<< totalIterations << " values passed" << TestLog::EndMessage;
   2595 		return tcu::TestStatus::fail("Failed!");
   2596 	}
   2597 
   2598 	return tcu::TestStatus::pass("OK");
   2599 }
   2600 
   2601 tcu::TestStatus vkt::subgroups::makeComputeTest(
   2602 	Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount,
   2603 	bool (*checkResult)(std::vector<const void*> datas,
   2604 						const deUint32 numWorkgroups[3], const deUint32 localSize[3],
   2605 						deUint32 subgroupSize))
   2606 {
   2607 	VkDeviceSize elementSize = getFormatSizeInBytes(format);
   2608 
   2609 	const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() *
   2610 										  maxSupportedSubgroupSize() *
   2611 										  maxSupportedSubgroupSize();
   2612 	const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize;
   2613 
   2614 	Buffer resultBuffer(
   2615 		context, resultBufferSizeInBytes);
   2616 
   2617 	std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount);
   2618 
   2619 	for (deUint32 i = 0; i < inputsCount; i++)
   2620 	{
   2621 		if (inputs[i].isImage)
   2622 		{
   2623 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context,
   2624 										static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format));
   2625 		}
   2626 		else
   2627 		{
   2628 			vk::VkDeviceSize size =
   2629 				getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
   2630 			inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size));
   2631 		}
   2632 
   2633 		const Allocation& alloc = inputBuffers[i]->getAllocation();
   2634 		initializeMemory(context, alloc, inputs[i]);
   2635 	}
   2636 
   2637 	DescriptorSetLayoutBuilder layoutBuilder;
   2638 	layoutBuilder.addBinding(
   2639 		resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
   2640 
   2641 	for (deUint32 i = 0; i < inputsCount; i++)
   2642 	{
   2643 		layoutBuilder.addBinding(
   2644 			inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL);
   2645 	}
   2646 
   2647 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
   2648 		layoutBuilder.build(context.getDeviceInterface(), context.getDevice()));
   2649 
   2650 	const Unique<VkShaderModule> shaderModule(
   2651 		createShaderModule(context.getDeviceInterface(), context.getDevice(),
   2652 						   context.getBinaryCollection().get("comp"), 0u));
   2653 	const Unique<VkPipelineLayout> pipelineLayout(
   2654 		makePipelineLayout(context, *descriptorSetLayout));
   2655 
   2656 	DescriptorPoolBuilder poolBuilder;
   2657 
   2658 	poolBuilder.addType(resultBuffer.getType());
   2659 
   2660 	for (deUint32 i = 0; i < inputsCount; i++)
   2661 	{
   2662 		poolBuilder.addType(inputBuffers[i]->getType());
   2663 	}
   2664 
   2665 	const Unique<VkDescriptorPool> descriptorPool(
   2666 		poolBuilder.build(context.getDeviceInterface(), context.getDevice(),
   2667 						  VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
   2668 
   2669 	// Create descriptor set
   2670 	const Unique<VkDescriptorSet> descriptorSet(
   2671 		makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout));
   2672 
   2673 	DescriptorSetUpdateBuilder updateBuilder;
   2674 
   2675 	const VkDescriptorBufferInfo resultDescriptorInfo =
   2676 		makeDescriptorBufferInfo(
   2677 			resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes);
   2678 
   2679 	updateBuilder.writeSingle(*descriptorSet,
   2680 							  DescriptorSetUpdateBuilder::Location::binding(0u),
   2681 							  VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo);
   2682 
   2683 	for (deUint32 i = 0; i < inputsCount; i++)
   2684 	{
   2685 		if (inputBuffers[i]->isImage())
   2686 		{
   2687 			VkDescriptorImageInfo info =
   2688 				makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(),
   2689 										inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL);
   2690 
   2691 			updateBuilder.writeSingle(*descriptorSet,
   2692 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
   2693 									  inputBuffers[i]->getType(), &info);
   2694 		}
   2695 		else
   2696 		{
   2697 			vk::VkDeviceSize size =
   2698 				getFormatSizeInBytes(inputs[i].format) * inputs[i].numElements;
   2699 			VkDescriptorBufferInfo info =
   2700 				makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size);
   2701 
   2702 			updateBuilder.writeSingle(*descriptorSet,
   2703 									  DescriptorSetUpdateBuilder::Location::binding(i + 1),
   2704 									  inputBuffers[i]->getType(), &info);
   2705 		}
   2706 	}
   2707 
   2708 	updateBuilder.update(context.getDeviceInterface(), context.getDevice());
   2709 
   2710 	const Unique<VkCommandPool> cmdPool(makeCommandPool(context));
   2711 
   2712 	unsigned totalIterations = 0;
   2713 	unsigned failedIterations = 0;
   2714 
   2715 	const deUint32 subgroupSize = getSubgroupSize(context);
   2716 
   2717 	const Unique<VkCommandBuffer> cmdBuffer(
   2718 		makeCommandBuffer(context, *cmdPool));
   2719 
   2720 	const deUint32 numWorkgroups[3] = {4, 2, 2};
   2721 
   2722 	const deUint32 localSizesToTestCount = 15;
   2723 	deUint32 localSizesToTest[localSizesToTestCount][3] =
   2724 	{
   2725 		{1, 1, 1},
   2726 		{32, 4, 1},
   2727 		{32, 1, 4},
   2728 		{1, 32, 4},
   2729 		{1, 4, 32},
   2730 		{4, 1, 32},
   2731 		{4, 32, 1},
   2732 		{subgroupSize, 1, 1},
   2733 		{1, subgroupSize, 1},
   2734 		{1, 1, subgroupSize},
   2735 		{3, 5, 7},
   2736 		{128, 1, 1},
   2737 		{1, 128, 1},
   2738 		{1, 1, 64},
   2739 		{1, 1, 1} // Isn't used, just here to make double buffering checks easier
   2740 	};
   2741 
   2742 	Move<VkPipeline> lastPipeline(
   2743 		makeComputePipeline(context, *pipelineLayout, *shaderModule,
   2744 							localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2]));
   2745 
   2746 	for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++)
   2747 	{
   2748 		const deUint32 nextX = localSizesToTest[index + 1][0];
   2749 		const deUint32 nextY = localSizesToTest[index + 1][1];
   2750 		const deUint32 nextZ = localSizesToTest[index + 1][2];
   2751 
   2752 		// we are running one test
   2753 		totalIterations++;
   2754 
   2755 		beginCommandBuffer(context, *cmdBuffer);
   2756 
   2757 		context.getDeviceInterface().cmdBindPipeline(
   2758 			*cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline);
   2759 
   2760 		context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer,
   2761 				VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u,
   2762 				&descriptorSet.get(), 0u, DE_NULL);
   2763 
   2764 		context.getDeviceInterface().cmdDispatch(*cmdBuffer,
   2765 				numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]);
   2766 
   2767 		endCommandBuffer(context, *cmdBuffer);
   2768 
   2769 		Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer));
   2770 
   2771 		Move<VkPipeline> nextPipeline(
   2772 			makeComputePipeline(context, *pipelineLayout, *shaderModule,
   2773 								nextX, nextY, nextZ));
   2774 
   2775 		waitFence(context, fence);
   2776 
   2777 		std::vector<const void*> datas;
   2778 
   2779 		{
   2780 			const Allocation& resultAlloc = resultBuffer.getAllocation();
   2781 			invalidateMappedMemoryRange(context.getDeviceInterface(),
   2782 										context.getDevice(), resultAlloc.getMemory(),
   2783 										resultAlloc.getOffset(), resultBufferSizeInBytes);
   2784 
   2785 			// we always have our result data first
   2786 			datas.push_back(resultAlloc.getHostPtr());
   2787 		}
   2788 
   2789 		for (deUint32 i = 0; i < inputsCount; i++)
   2790 		{
   2791 			if (!inputBuffers[i]->isImage())
   2792 			{
   2793 				vk::VkDeviceSize size =
   2794 					getFormatSizeInBytes(inputs[i].format) *
   2795 					inputs[i].numElements;
   2796 				const Allocation& resultAlloc = inputBuffers[i]->getAllocation();
   2797 				invalidateMappedMemoryRange(context.getDeviceInterface(),
   2798 											context.getDevice(), resultAlloc.getMemory(),
   2799 											resultAlloc.getOffset(), size);
   2800 
   2801 				// we always have our result data first
   2802 				datas.push_back(resultAlloc.getHostPtr());
   2803 			}
   2804 		}
   2805 
   2806 		if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize))
   2807 		{
   2808 			failedIterations++;
   2809 		}
   2810 
   2811 		context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0);
   2812 
   2813 		lastPipeline = nextPipeline;
   2814 	}
   2815 
   2816 	if (0 < failedIterations)
   2817 	{
   2818 		context.getTestContext().getLog()
   2819 				<< TestLog::Message << (totalIterations - failedIterations) << " / "
   2820 				<< totalIterations << " values passed" << TestLog::EndMessage;
   2821 		return tcu::TestStatus::fail("Failed!");
   2822 	}
   2823 
   2824 	return tcu::TestStatus::pass("OK");
   2825 }
   2826