1 /*------------------------------------------------------------------------ 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2017 The Khronos Group Inc. 6 * Copyright (c) 2017 Codeplay Software Ltd. 7 * 8 * Licensed under the Apache License, Version 2.0 (the "License"); 9 * you may not use this file except in compliance with the License. 10 * You may obtain a copy of the License at 11 * 12 * http://www.apache.org/licenses/LICENSE-2.0 13 * 14 * Unless required by applicable law or agreed to in writing, software 15 * distributed under the License is distributed on an "AS IS" BASIS, 16 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 17 * See the License for the specific language governing permissions and 18 * limitations under the License. 19 * 20 */ /*! 21 * \file 22 * \brief Subgroups Tests Utils 23 */ /*--------------------------------------------------------------------*/ 24 25 #include "vktSubgroupsTestsUtils.hpp" 26 #include "deRandom.hpp" 27 #include "tcuCommandLine.hpp" 28 #include "tcuStringTemplate.hpp" 29 #include "vkBarrierUtil.hpp" 30 #include "vkImageUtil.hpp" 31 #include "vkTypeUtil.hpp" 32 #include "vkCmdUtil.hpp" 33 #include "vkObjUtil.hpp" 34 35 using namespace tcu; 36 using namespace std; 37 using namespace vk; 38 using namespace vkt; 39 40 namespace 41 { 42 deUint32 getFormatSizeInBytes(const VkFormat format) 43 { 44 switch (format) 45 { 46 default: 47 DE_FATAL("Unhandled format!"); 48 return 0; 49 case VK_FORMAT_R32_SINT: 50 case VK_FORMAT_R32_UINT: 51 return sizeof(deInt32); 52 case VK_FORMAT_R32G32_SINT: 53 case VK_FORMAT_R32G32_UINT: 54 return static_cast<deUint32>(sizeof(deInt32) * 2); 55 case VK_FORMAT_R32G32B32_SINT: 56 case VK_FORMAT_R32G32B32_UINT: 57 case VK_FORMAT_R32G32B32A32_SINT: 58 case VK_FORMAT_R32G32B32A32_UINT: 59 return static_cast<deUint32>(sizeof(deInt32) * 4); 60 case VK_FORMAT_R32_SFLOAT: 61 return 4; 62 case VK_FORMAT_R32G32_SFLOAT: 63 return 8; 64 case VK_FORMAT_R32G32B32_SFLOAT: 65 return 16; 66 case VK_FORMAT_R32G32B32A32_SFLOAT: 67 return 16; 68 case VK_FORMAT_R64_SFLOAT: 69 return 8; 70 case VK_FORMAT_R64G64_SFLOAT: 71 return 16; 72 case VK_FORMAT_R64G64B64_SFLOAT: 73 return 32; 74 case VK_FORMAT_R64G64B64A64_SFLOAT: 75 return 32; 76 // The below formats are used to represent bool and bvec* types. These 77 // types are passed to the shader as int and ivec* types, before the 78 // calculations are done as booleans. We need a distinct type here so 79 // that the shader generators can switch on it and generate the correct 80 // shader source for testing. 81 case VK_FORMAT_R8_USCALED: 82 return sizeof(deInt32); 83 case VK_FORMAT_R8G8_USCALED: 84 return static_cast<deUint32>(sizeof(deInt32) * 2); 85 case VK_FORMAT_R8G8B8_USCALED: 86 case VK_FORMAT_R8G8B8A8_USCALED: 87 return static_cast<deUint32>(sizeof(deInt32) * 4); 88 } 89 } 90 91 deUint32 getElementSizeInBytes( 92 const VkFormat format, 93 const subgroups::SSBOData::InputDataLayoutType layout) 94 { 95 deUint32 bytes = getFormatSizeInBytes(format); 96 if (layout == subgroups::SSBOData::LayoutStd140) 97 return bytes < 16 ? 16 : bytes; 98 else 99 return bytes; 100 } 101 102 Move<VkPipelineLayout> makePipelineLayout( 103 Context& context, const VkDescriptorSetLayout descriptorSetLayout) 104 { 105 const vk::VkPipelineLayoutCreateInfo pipelineLayoutParams = { 106 VK_STRUCTURE_TYPE_PIPELINE_LAYOUT_CREATE_INFO, // VkStructureType sType; 107 DE_NULL, // const void* pNext; 108 0u, // VkPipelineLayoutCreateFlags flags; 109 1u, // deUint32 setLayoutCount; 110 &descriptorSetLayout, // const VkDescriptorSetLayout* pSetLayouts; 111 0u, // deUint32 pushConstantRangeCount; 112 DE_NULL, // const VkPushConstantRange* pPushConstantRanges; 113 }; 114 return createPipelineLayout(context.getDeviceInterface(), 115 context.getDevice(), &pipelineLayoutParams); 116 } 117 118 Move<VkRenderPass> makeRenderPass(Context& context, VkFormat format) 119 { 120 VkAttachmentReference colorReference = { 121 0, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL 122 }; 123 124 const VkSubpassDescription subpassDescription = {0u, 125 VK_PIPELINE_BIND_POINT_GRAPHICS, 0, DE_NULL, 1, &colorReference, 126 DE_NULL, DE_NULL, 0, DE_NULL 127 }; 128 129 const VkSubpassDependency subpassDependencies[2] = { 130 { VK_SUBPASS_EXTERNAL, 0u, VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 131 VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 132 VK_ACCESS_MEMORY_READ_BIT, VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 133 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, 134 VK_DEPENDENCY_BY_REGION_BIT 135 }, 136 { 0u, VK_SUBPASS_EXTERNAL, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, 137 VK_PIPELINE_STAGE_BOTTOM_OF_PIPE_BIT, 138 VK_ACCESS_COLOR_ATTACHMENT_READ_BIT | 139 VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, 140 VK_ACCESS_MEMORY_READ_BIT, VK_DEPENDENCY_BY_REGION_BIT 141 }, 142 }; 143 144 VkAttachmentDescription attachmentDescription = {0u, format, 145 VK_SAMPLE_COUNT_1_BIT, VK_ATTACHMENT_LOAD_OP_CLEAR, 146 VK_ATTACHMENT_STORE_OP_STORE, VK_ATTACHMENT_LOAD_OP_DONT_CARE, 147 VK_ATTACHMENT_STORE_OP_DONT_CARE, VK_IMAGE_LAYOUT_UNDEFINED, 148 VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL 149 }; 150 151 const VkRenderPassCreateInfo renderPassCreateInfo = { 152 VK_STRUCTURE_TYPE_RENDER_PASS_CREATE_INFO, DE_NULL, 0u, 1, 153 &attachmentDescription, 1, &subpassDescription, 2, subpassDependencies 154 }; 155 156 return createRenderPass(context.getDeviceInterface(), context.getDevice(), 157 &renderPassCreateInfo); 158 } 159 160 Move<VkFramebuffer> makeFramebuffer(Context& context, 161 const VkRenderPass renderPass, const VkImageView imageView, deUint32 width, 162 deUint32 height) 163 { 164 const VkFramebufferCreateInfo framebufferCreateInfo = { 165 VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO, DE_NULL, 0u, renderPass, 1, 166 &imageView, width, height, 1 167 }; 168 169 return createFramebuffer(context.getDeviceInterface(), context.getDevice(), 170 &framebufferCreateInfo); 171 } 172 173 Move<VkPipeline> makeGraphicsPipeline(Context& context, 174 const VkPipelineLayout pipelineLayout, 175 const VkShaderStageFlags stages, 176 const VkShaderModule vertexShaderModule, 177 const VkShaderModule fragmentShaderModule, 178 const VkShaderModule geometryShaderModule, 179 const VkShaderModule tessellationControlModule, 180 const VkShaderModule tessellationEvaluationModule, 181 const VkRenderPass renderPass, 182 const VkPrimitiveTopology topology = VK_PRIMITIVE_TOPOLOGY_TRIANGLE_LIST, 183 const VkVertexInputBindingDescription* vertexInputBindingDescription = DE_NULL, 184 const VkVertexInputAttributeDescription* vertexInputAttributeDescriptions = DE_NULL, 185 const bool frameBufferTests = false, 186 const vk::VkFormat attachmentFormat = VK_FORMAT_R32G32B32A32_SFLOAT) 187 { 188 std::vector<VkViewport> noViewports; 189 std::vector<VkRect2D> noScissors; 190 191 const VkPipelineVertexInputStateCreateInfo vertexInputStateCreateInfo = 192 { 193 VK_STRUCTURE_TYPE_PIPELINE_VERTEX_INPUT_STATE_CREATE_INFO, // VkStructureType sType; 194 DE_NULL, // const void* pNext; 195 0u, // VkPipelineVertexInputStateCreateFlags flags; 196 vertexInputBindingDescription == DE_NULL ? 0u : 1u, // deUint32 vertexBindingDescriptionCount; 197 vertexInputBindingDescription, // const VkVertexInputBindingDescription* pVertexBindingDescriptions; 198 vertexInputAttributeDescriptions == DE_NULL ? 0u : 1u, // deUint32 vertexAttributeDescriptionCount; 199 vertexInputAttributeDescriptions, // const VkVertexInputAttributeDescription* pVertexAttributeDescriptions; 200 }; 201 202 const deUint32 numChannels = getNumUsedChannels(mapVkFormat(attachmentFormat).order); 203 const VkColorComponentFlags colorComponent = 204 numChannels == 1 ? VK_COLOR_COMPONENT_R_BIT : 205 numChannels == 2 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT : 206 numChannels == 3 ? VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT : 207 VK_COLOR_COMPONENT_R_BIT | VK_COLOR_COMPONENT_G_BIT | VK_COLOR_COMPONENT_B_BIT | VK_COLOR_COMPONENT_A_BIT; 208 209 const VkPipelineColorBlendAttachmentState colorBlendAttachmentState = 210 { 211 VK_FALSE, VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 212 VK_BLEND_FACTOR_ZERO, VK_BLEND_FACTOR_ZERO, VK_BLEND_OP_ADD, 213 colorComponent 214 }; 215 216 const VkPipelineColorBlendStateCreateInfo colorBlendStateCreateInfo = 217 { 218 VK_STRUCTURE_TYPE_PIPELINE_COLOR_BLEND_STATE_CREATE_INFO, DE_NULL, 0u, 219 VK_FALSE, VK_LOGIC_OP_CLEAR, 1, &colorBlendAttachmentState, 220 { 0.0f, 0.0f, 0.0f, 0.0f } 221 }; 222 223 const deUint32 patchControlPoints = (VK_SHADER_STAGE_FRAGMENT_BIT & stages && frameBufferTests) ? 2u : 1u; 224 225 return vk::makeGraphicsPipeline(context.getDeviceInterface(), // const DeviceInterface& vk 226 context.getDevice(), // const VkDevice device 227 pipelineLayout, // const VkPipelineLayout pipelineLayout 228 vertexShaderModule, // const VkShaderModule vertexShaderModule 229 tessellationControlModule, // const VkShaderModule tessellationControlShaderModule 230 tessellationEvaluationModule, // const VkShaderModule tessellationEvalShaderModule 231 geometryShaderModule, // const VkShaderModule geometryShaderModule 232 fragmentShaderModule, // const VkShaderModule fragmentShaderModule 233 renderPass, // const VkRenderPass renderPass 234 noViewports, // const std::vector<VkViewport>& viewports 235 noScissors, // const std::vector<VkRect2D>& scissors 236 topology, // const VkPrimitiveTopology topology 237 0u, // const deUint32 subpass 238 patchControlPoints, // const deUint32 patchControlPoints 239 &vertexInputStateCreateInfo, // const VkPipelineVertexInputStateCreateInfo* vertexInputStateCreateInfo 240 DE_NULL, // const VkPipelineRasterizationStateCreateInfo* rasterizationStateCreateInfo 241 DE_NULL, // const VkPipelineMultisampleStateCreateInfo* multisampleStateCreateInfo 242 DE_NULL, // const VkPipelineDepthStencilStateCreateInfo* depthStencilStateCreateInfo 243 &colorBlendStateCreateInfo); // const VkPipelineColorBlendStateCreateInfo* colorBlendStateCreateInfo 244 } 245 246 Move<VkPipeline> makeComputePipeline(Context& context, 247 const VkPipelineLayout pipelineLayout, const VkShaderModule shaderModule, 248 deUint32 localSizeX, deUint32 localSizeY, deUint32 localSizeZ) 249 { 250 const deUint32 localSize[3] = {localSizeX, localSizeY, localSizeZ}; 251 252 const vk::VkSpecializationMapEntry entries[3] = 253 { 254 {0, sizeof(deUint32) * 0, sizeof(deUint32)}, 255 {1, sizeof(deUint32) * 1, sizeof(deUint32)}, 256 {2, static_cast<deUint32>(sizeof(deUint32) * 2), sizeof(deUint32)}, 257 }; 258 259 const vk::VkSpecializationInfo info = 260 { 261 /* mapEntryCount = */ 3, 262 /* pMapEntries = */ entries, 263 /* dataSize = */ sizeof(localSize), 264 /* pData = */ localSize 265 }; 266 267 const vk::VkPipelineShaderStageCreateInfo pipelineShaderStageParams = 268 { 269 VK_STRUCTURE_TYPE_PIPELINE_SHADER_STAGE_CREATE_INFO, // VkStructureType sType; 270 DE_NULL, // const void* pNext; 271 0u, // VkPipelineShaderStageCreateFlags flags; 272 VK_SHADER_STAGE_COMPUTE_BIT, // VkShaderStageFlagBits stage; 273 shaderModule, // VkShaderModule module; 274 "main", // const char* pName; 275 &info, // const VkSpecializationInfo* pSpecializationInfo; 276 }; 277 278 const vk::VkComputePipelineCreateInfo pipelineCreateInfo = 279 { 280 VK_STRUCTURE_TYPE_COMPUTE_PIPELINE_CREATE_INFO, // VkStructureType sType; 281 DE_NULL, // const void* pNext; 282 0u, // VkPipelineCreateFlags flags; 283 pipelineShaderStageParams, // VkPipelineShaderStageCreateInfo stage; 284 pipelineLayout, // VkPipelineLayout layout; 285 DE_NULL, // VkPipeline basePipelineHandle; 286 0, // deInt32 basePipelineIndex; 287 }; 288 289 return createComputePipeline(context.getDeviceInterface(), 290 context.getDevice(), DE_NULL, &pipelineCreateInfo); 291 } 292 293 Move<VkDescriptorSet> makeDescriptorSet(Context& context, 294 const VkDescriptorPool descriptorPool, 295 const VkDescriptorSetLayout setLayout) 296 { 297 const VkDescriptorSetAllocateInfo allocateParams = 298 { 299 VK_STRUCTURE_TYPE_DESCRIPTOR_SET_ALLOCATE_INFO, // VkStructureType 300 // sType; 301 DE_NULL, // const void* pNext; 302 descriptorPool, // VkDescriptorPool descriptorPool; 303 1u, // deUint32 setLayoutCount; 304 &setLayout, // const VkDescriptorSetLayout* pSetLayouts; 305 }; 306 return allocateDescriptorSet( 307 context.getDeviceInterface(), context.getDevice(), &allocateParams); 308 } 309 310 Move<VkCommandPool> makeCommandPool(Context& context) 311 { 312 const VkCommandPoolCreateInfo commandPoolParams = 313 { 314 VK_STRUCTURE_TYPE_COMMAND_POOL_CREATE_INFO, // VkStructureType sType; 315 DE_NULL, // const void* pNext; 316 VK_COMMAND_POOL_CREATE_RESET_COMMAND_BUFFER_BIT, // VkCommandPoolCreateFlags 317 // flags; 318 context.getUniversalQueueFamilyIndex(), // deUint32 queueFamilyIndex; 319 }; 320 321 return createCommandPool( 322 context.getDeviceInterface(), context.getDevice(), &commandPoolParams); 323 } 324 325 Move<VkCommandBuffer> makeCommandBuffer( 326 Context& context, const VkCommandPool commandPool) 327 { 328 const VkCommandBufferAllocateInfo bufferAllocateParams = 329 { 330 VK_STRUCTURE_TYPE_COMMAND_BUFFER_ALLOCATE_INFO, // VkStructureType sType; 331 DE_NULL, // const void* pNext; 332 commandPool, // VkCommandPool commandPool; 333 VK_COMMAND_BUFFER_LEVEL_PRIMARY, // VkCommandBufferLevel level; 334 1u, // deUint32 bufferCount; 335 }; 336 return allocateCommandBuffer(context.getDeviceInterface(), 337 context.getDevice(), &bufferAllocateParams); 338 } 339 340 Move<VkFence> submitCommandBuffer( 341 Context& context, const VkCommandBuffer commandBuffer) 342 { 343 const VkFenceCreateInfo fenceParams = 344 { 345 VK_STRUCTURE_TYPE_FENCE_CREATE_INFO, // VkStructureType sType; 346 DE_NULL, // const void* pNext; 347 0u, // VkFenceCreateFlags flags; 348 }; 349 350 Move<VkFence> fence(createFence( 351 context.getDeviceInterface(), context.getDevice(), &fenceParams)); 352 353 const VkSubmitInfo submitInfo = 354 { 355 VK_STRUCTURE_TYPE_SUBMIT_INFO, // VkStructureType sType; 356 DE_NULL, // const void* pNext; 357 0u, // deUint32 waitSemaphoreCount; 358 DE_NULL, // const VkSemaphore* pWaitSemaphores; 359 (const VkPipelineStageFlags*)DE_NULL, 360 1u, // deUint32 commandBufferCount; 361 &commandBuffer, // const VkCommandBuffer* pCommandBuffers; 362 0u, // deUint32 signalSemaphoreCount; 363 DE_NULL, // const VkSemaphore* pSignalSemaphores; 364 }; 365 366 vk::VkResult result = (context.getDeviceInterface().queueSubmit( 367 context.getUniversalQueue(), 1u, &submitInfo, *fence)); 368 VK_CHECK(result); 369 370 return Move<VkFence>(fence); 371 } 372 373 void waitFence(Context& context, Move<VkFence> fence) 374 { 375 VK_CHECK(context.getDeviceInterface().waitForFences( 376 context.getDevice(), 1u, &fence.get(), DE_TRUE, ~0ull)); 377 } 378 379 struct Buffer; 380 struct Image; 381 382 struct BufferOrImage 383 { 384 bool isImage() const 385 { 386 return m_isImage; 387 } 388 389 Buffer* getAsBuffer() 390 { 391 if (m_isImage) DE_FATAL("Trying to get a buffer as an image!"); 392 return reinterpret_cast<Buffer* >(this); 393 } 394 395 Image* getAsImage() 396 { 397 if (!m_isImage) DE_FATAL("Trying to get an image as a buffer!"); 398 return reinterpret_cast<Image*>(this); 399 } 400 401 virtual VkDescriptorType getType() const 402 { 403 if (m_isImage) 404 { 405 return VK_DESCRIPTOR_TYPE_STORAGE_IMAGE; 406 } 407 else 408 { 409 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 410 } 411 } 412 413 Allocation& getAllocation() const 414 { 415 return *m_allocation; 416 } 417 418 virtual ~BufferOrImage() {} 419 420 protected: 421 explicit BufferOrImage(bool image) : m_isImage(image) {} 422 423 bool m_isImage; 424 de::details::MovePtr<Allocation> m_allocation; 425 }; 426 427 struct Buffer : public BufferOrImage 428 { 429 explicit Buffer( 430 Context& context, VkDeviceSize sizeInBytes, VkBufferUsageFlags usage = VK_BUFFER_USAGE_STORAGE_BUFFER_BIT) 431 : BufferOrImage (false) 432 , m_sizeInBytes (sizeInBytes) 433 , m_usage (usage) 434 { 435 const vk::VkBufferCreateInfo bufferCreateInfo = 436 { 437 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, 438 DE_NULL, 439 0u, 440 m_sizeInBytes, 441 m_usage, 442 VK_SHARING_MODE_EXCLUSIVE, 443 0u, 444 DE_NULL, 445 }; 446 m_buffer = createBuffer(context.getDeviceInterface(), 447 context.getDevice(), &bufferCreateInfo); 448 vk::VkMemoryRequirements req = getBufferMemoryRequirements( 449 context.getDeviceInterface(), context.getDevice(), *m_buffer); 450 m_allocation = context.getDefaultAllocator().allocate( 451 req, MemoryRequirement::HostVisible); 452 VK_CHECK(context.getDeviceInterface().bindBufferMemory( 453 context.getDevice(), *m_buffer, m_allocation->getMemory(), 454 m_allocation->getOffset())); 455 } 456 457 virtual VkDescriptorType getType() const 458 { 459 if (VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT == m_usage) 460 { 461 return VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER; 462 } 463 return VK_DESCRIPTOR_TYPE_STORAGE_BUFFER; 464 } 465 466 VkBuffer getBuffer() const { 467 return *m_buffer; 468 } 469 470 const VkBuffer* getBufferPtr() const { 471 return &(*m_buffer); 472 } 473 474 VkDeviceSize getSize() const { 475 return m_sizeInBytes; 476 } 477 478 private: 479 Move<VkBuffer> m_buffer; 480 VkDeviceSize m_sizeInBytes; 481 const VkBufferUsageFlags m_usage; 482 }; 483 484 struct Image : public BufferOrImage 485 { 486 explicit Image(Context& context, deUint32 width, deUint32 height, 487 VkFormat format, VkImageUsageFlags usage = VK_IMAGE_USAGE_STORAGE_BIT) 488 : BufferOrImage(true) 489 { 490 const VkImageCreateInfo imageCreateInfo = 491 { 492 VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO, DE_NULL, 0, VK_IMAGE_TYPE_2D, 493 format, {width, height, 1}, 1, 1, VK_SAMPLE_COUNT_1_BIT, 494 VK_IMAGE_TILING_OPTIMAL, usage, 495 VK_SHARING_MODE_EXCLUSIVE, 0u, DE_NULL, 496 VK_IMAGE_LAYOUT_UNDEFINED 497 }; 498 m_image = createImage(context.getDeviceInterface(), context.getDevice(), 499 &imageCreateInfo); 500 vk::VkMemoryRequirements req = getImageMemoryRequirements( 501 context.getDeviceInterface(), context.getDevice(), *m_image); 502 req.size *= 2; 503 m_allocation = 504 context.getDefaultAllocator().allocate(req, MemoryRequirement::Any); 505 VK_CHECK(context.getDeviceInterface().bindImageMemory( 506 context.getDevice(), *m_image, m_allocation->getMemory(), 507 m_allocation->getOffset())); 508 509 const VkComponentMapping componentMapping = 510 { 511 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY, 512 VK_COMPONENT_SWIZZLE_IDENTITY, VK_COMPONENT_SWIZZLE_IDENTITY 513 }; 514 515 const VkImageViewCreateInfo imageViewCreateInfo = 516 { 517 VK_STRUCTURE_TYPE_IMAGE_VIEW_CREATE_INFO, DE_NULL, 0, *m_image, 518 VK_IMAGE_VIEW_TYPE_2D, imageCreateInfo.format, componentMapping, 519 { 520 VK_IMAGE_ASPECT_COLOR_BIT, 0, 1, 0, 1, 521 } 522 }; 523 524 m_imageView = createImageView(context.getDeviceInterface(), 525 context.getDevice(), &imageViewCreateInfo); 526 527 const struct VkSamplerCreateInfo samplerCreateInfo = 528 { 529 VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO, 530 DE_NULL, 531 0u, 532 VK_FILTER_NEAREST, 533 VK_FILTER_NEAREST, 534 VK_SAMPLER_MIPMAP_MODE_NEAREST, 535 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, 536 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, 537 VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE, 538 0.0f, 539 VK_FALSE, 540 1.0f, 541 DE_FALSE, 542 VK_COMPARE_OP_ALWAYS, 543 0.0f, 544 0.0f, 545 VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK, 546 VK_FALSE, 547 }; 548 549 m_sampler = createSampler(context.getDeviceInterface(), context.getDevice(), &samplerCreateInfo); 550 } 551 552 VkImage getImage() const { 553 return *m_image; 554 } 555 556 VkImageView getImageView() const { 557 return *m_imageView; 558 } 559 560 VkSampler getSampler() const { 561 return *m_sampler; 562 } 563 564 private: 565 Move<VkImage> m_image; 566 Move<VkImageView> m_imageView; 567 Move<VkSampler> m_sampler; 568 }; 569 } 570 571 std::string vkt::subgroups::getSharedMemoryBallotHelper() 572 { 573 return "shared uvec4 superSecretComputeShaderHelper[gl_WorkGroupSize.x * gl_WorkGroupSize.y * gl_WorkGroupSize.z];\n" 574 "uvec4 sharedMemoryBallot(bool vote)\n" 575 "{\n" 576 " uint groupOffset = gl_SubgroupID;\n" 577 " // One invocation in the group 0's the whole group's data\n" 578 " if (subgroupElect())\n" 579 " {\n" 580 " superSecretComputeShaderHelper[groupOffset] = uvec4(0);\n" 581 " }\n" 582 " subgroupMemoryBarrierShared();\n" 583 " if (vote)\n" 584 " {\n" 585 " const highp uint invocationId = gl_SubgroupInvocationID % 32;\n" 586 " const highp uint bitToSet = 1u << invocationId;\n" 587 " switch (gl_SubgroupInvocationID / 32)\n" 588 " {\n" 589 " case 0: atomicOr(superSecretComputeShaderHelper[groupOffset].x, bitToSet); break;\n" 590 " case 1: atomicOr(superSecretComputeShaderHelper[groupOffset].y, bitToSet); break;\n" 591 " case 2: atomicOr(superSecretComputeShaderHelper[groupOffset].z, bitToSet); break;\n" 592 " case 3: atomicOr(superSecretComputeShaderHelper[groupOffset].w, bitToSet); break;\n" 593 " }\n" 594 " }\n" 595 " subgroupMemoryBarrierShared();\n" 596 " return superSecretComputeShaderHelper[groupOffset];\n" 597 "}\n"; 598 } 599 600 deUint32 vkt::subgroups::getSubgroupSize(Context& context) 601 { 602 VkPhysicalDeviceSubgroupProperties subgroupProperties; 603 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; 604 subgroupProperties.pNext = DE_NULL; 605 606 VkPhysicalDeviceProperties2 properties; 607 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 608 properties.pNext = &subgroupProperties; 609 610 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties); 611 612 return subgroupProperties.subgroupSize; 613 } 614 615 VkDeviceSize vkt::subgroups::maxSupportedSubgroupSize() { 616 return 128u; 617 } 618 619 std::string vkt::subgroups::getShaderStageName(VkShaderStageFlags stage) 620 { 621 switch (stage) 622 { 623 default: 624 DE_FATAL("Unhandled stage!"); 625 return ""; 626 case VK_SHADER_STAGE_COMPUTE_BIT: 627 return "compute"; 628 case VK_SHADER_STAGE_FRAGMENT_BIT: 629 return "fragment"; 630 case VK_SHADER_STAGE_VERTEX_BIT: 631 return "vertex"; 632 case VK_SHADER_STAGE_GEOMETRY_BIT: 633 return "geometry"; 634 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: 635 return "tess_control"; 636 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: 637 return "tess_eval"; 638 } 639 } 640 641 std::string vkt::subgroups::getSubgroupFeatureName(vk::VkSubgroupFeatureFlagBits bit) 642 { 643 switch (bit) 644 { 645 default: 646 DE_FATAL("Unknown subgroup feature category!"); 647 return ""; 648 case VK_SUBGROUP_FEATURE_BASIC_BIT: 649 return "VK_SUBGROUP_FEATURE_BASIC_BIT"; 650 case VK_SUBGROUP_FEATURE_VOTE_BIT: 651 return "VK_SUBGROUP_FEATURE_VOTE_BIT"; 652 case VK_SUBGROUP_FEATURE_ARITHMETIC_BIT: 653 return "VK_SUBGROUP_FEATURE_ARITHMETIC_BIT"; 654 case VK_SUBGROUP_FEATURE_BALLOT_BIT: 655 return "VK_SUBGROUP_FEATURE_BALLOT_BIT"; 656 case VK_SUBGROUP_FEATURE_SHUFFLE_BIT: 657 return "VK_SUBGROUP_FEATURE_SHUFFLE_BIT"; 658 case VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT: 659 return "VK_SUBGROUP_FEATURE_SHUFFLE_RELATIVE_BIT"; 660 case VK_SUBGROUP_FEATURE_CLUSTERED_BIT: 661 return "VK_SUBGROUP_FEATURE_CLUSTERED_BIT"; 662 case VK_SUBGROUP_FEATURE_QUAD_BIT: 663 return "VK_SUBGROUP_FEATURE_QUAD_BIT"; 664 } 665 } 666 667 void vkt::subgroups::addNoSubgroupShader (SourceCollections& programCollection) 668 { 669 { 670 /* 671 "#version 450\n" 672 "void main (void)\n" 673 "{\n" 674 " float pixelSize = 2.0f/1024.0f;\n" 675 " float pixelPosition = pixelSize/2.0f - 1.0f;\n" 676 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n" 677 " gl_PointSize = 1.0f;\n" 678 "}\n" 679 */ 680 const std::string vertNoSubgroup = 681 "; SPIR-V\n" 682 "; Version: 1.3\n" 683 "; Generator: Khronos Glslang Reference Front End; 1\n" 684 "; Bound: 37\n" 685 "; Schema: 0\n" 686 "OpCapability Shader\n" 687 "%1 = OpExtInstImport \"GLSL.std.450\"\n" 688 "OpMemoryModel Logical GLSL450\n" 689 "OpEntryPoint Vertex %4 \"main\" %22 %26\n" 690 "OpMemberDecorate %20 0 BuiltIn Position\n" 691 "OpMemberDecorate %20 1 BuiltIn PointSize\n" 692 "OpMemberDecorate %20 2 BuiltIn ClipDistance\n" 693 "OpMemberDecorate %20 3 BuiltIn CullDistance\n" 694 "OpDecorate %20 Block\n" 695 "OpDecorate %26 BuiltIn VertexIndex\n" 696 "%2 = OpTypeVoid\n" 697 "%3 = OpTypeFunction %2\n" 698 "%6 = OpTypeFloat 32\n" 699 "%7 = OpTypePointer Function %6\n" 700 "%9 = OpConstant %6 0.00195313\n" 701 "%12 = OpConstant %6 2\n" 702 "%14 = OpConstant %6 1\n" 703 "%16 = OpTypeVector %6 4\n" 704 "%17 = OpTypeInt 32 0\n" 705 "%18 = OpConstant %17 1\n" 706 "%19 = OpTypeArray %6 %18\n" 707 "%20 = OpTypeStruct %16 %6 %19 %19\n" 708 "%21 = OpTypePointer Output %20\n" 709 "%22 = OpVariable %21 Output\n" 710 "%23 = OpTypeInt 32 1\n" 711 "%24 = OpConstant %23 0\n" 712 "%25 = OpTypePointer Input %23\n" 713 "%26 = OpVariable %25 Input\n" 714 "%33 = OpConstant %6 0\n" 715 "%35 = OpTypePointer Output %16\n" 716 "%37 = OpConstant %23 1\n" 717 "%38 = OpTypePointer Output %6\n" 718 "%4 = OpFunction %2 None %3\n" 719 "%5 = OpLabel\n" 720 "%8 = OpVariable %7 Function\n" 721 "%10 = OpVariable %7 Function\n" 722 "OpStore %8 %9\n" 723 "%11 = OpLoad %6 %8\n" 724 "%13 = OpFDiv %6 %11 %12\n" 725 "%15 = OpFSub %6 %13 %14\n" 726 "OpStore %10 %15\n" 727 "%27 = OpLoad %23 %26\n" 728 "%28 = OpConvertSToF %6 %27\n" 729 "%29 = OpLoad %6 %8\n" 730 "%30 = OpFMul %6 %28 %29\n" 731 "%31 = OpLoad %6 %10\n" 732 "%32 = OpFAdd %6 %30 %31\n" 733 "%34 = OpCompositeConstruct %16 %32 %33 %33 %14\n" 734 "%36 = OpAccessChain %35 %22 %24\n" 735 "OpStore %36 %34\n" 736 "%39 = OpAccessChain %38 %22 %37\n" 737 "OpStore %39 %14\n" 738 "OpReturn\n" 739 "OpFunctionEnd\n"; 740 programCollection.spirvAsmSources.add("vert_noSubgroup") << vertNoSubgroup; 741 } 742 743 { 744 /* 745 "#version 450\n" 746 "layout(vertices=1) out;\n" 747 "\n" 748 "void main (void)\n" 749 "{\n" 750 " if (gl_InvocationID == 0)\n" 751 " {\n" 752 " gl_TessLevelOuter[0] = 1.0f;\n" 753 " gl_TessLevelOuter[1] = 1.0f;\n" 754 " }\n" 755 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" 756 "}\n" 757 */ 758 const std::string tescNoSubgroup = 759 "; SPIR-V\n" 760 "; Version: 1.3\n" 761 "; Generator: Khronos Glslang Reference Front End; 1\n" 762 "; Bound: 45\n" 763 "; Schema: 0\n" 764 "OpCapability Tessellation\n" 765 "%1 = OpExtInstImport \"GLSL.std.450\"\n" 766 "OpMemoryModel Logical GLSL450\n" 767 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %32 %38\n" 768 "OpExecutionMode %4 OutputVertices 1\n" 769 "OpDecorate %8 BuiltIn InvocationId\n" 770 "OpDecorate %20 Patch\n" 771 "OpDecorate %20 BuiltIn TessLevelOuter\n" 772 "OpMemberDecorate %29 0 BuiltIn Position\n" 773 "OpMemberDecorate %29 1 BuiltIn PointSize\n" 774 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n" 775 "OpMemberDecorate %29 3 BuiltIn CullDistance\n" 776 "OpDecorate %29 Block\n" 777 "OpMemberDecorate %34 0 BuiltIn Position\n" 778 "OpMemberDecorate %34 1 BuiltIn PointSize\n" 779 "OpMemberDecorate %34 2 BuiltIn ClipDistance\n" 780 "OpMemberDecorate %34 3 BuiltIn CullDistance\n" 781 "OpDecorate %34 Block\n" 782 "%2 = OpTypeVoid\n" 783 "%3 = OpTypeFunction %2\n" 784 "%6 = OpTypeInt 32 1\n" 785 "%7 = OpTypePointer Input %6\n" 786 "%8 = OpVariable %7 Input\n" 787 "%10 = OpConstant %6 0\n" 788 "%11 = OpTypeBool\n" 789 "%15 = OpTypeFloat 32\n" 790 "%16 = OpTypeInt 32 0\n" 791 "%17 = OpConstant %16 4\n" 792 "%18 = OpTypeArray %15 %17\n" 793 "%19 = OpTypePointer Output %18\n" 794 "%20 = OpVariable %19 Output\n" 795 "%21 = OpConstant %15 1\n" 796 "%22 = OpTypePointer Output %15\n" 797 "%24 = OpConstant %6 1\n" 798 "%26 = OpTypeVector %15 4\n" 799 "%27 = OpConstant %16 1\n" 800 "%28 = OpTypeArray %15 %27\n" 801 "%29 = OpTypeStruct %26 %15 %28 %28\n" 802 "%30 = OpTypeArray %29 %27\n" 803 "%31 = OpTypePointer Output %30\n" 804 "%32 = OpVariable %31 Output\n" 805 "%34 = OpTypeStruct %26 %15 %28 %28\n" 806 "%35 = OpConstant %16 32\n" 807 "%36 = OpTypeArray %34 %35\n" 808 "%37 = OpTypePointer Input %36\n" 809 "%38 = OpVariable %37 Input\n" 810 "%40 = OpTypePointer Input %26\n" 811 "%43 = OpTypePointer Output %26\n" 812 "%4 = OpFunction %2 None %3\n" 813 "%5 = OpLabel\n" 814 "%9 = OpLoad %6 %8\n" 815 "%12 = OpIEqual %11 %9 %10\n" 816 "OpSelectionMerge %14 None\n" 817 "OpBranchConditional %12 %13 %14\n" 818 "%13 = OpLabel\n" 819 "%23 = OpAccessChain %22 %20 %10\n" 820 "OpStore %23 %21\n" 821 "%25 = OpAccessChain %22 %20 %24\n" 822 "OpStore %25 %21\n" 823 "OpBranch %14\n" 824 "%14 = OpLabel\n" 825 "%33 = OpLoad %6 %8\n" 826 "%39 = OpLoad %6 %8\n" 827 "%41 = OpAccessChain %40 %38 %39 %10\n" 828 "%42 = OpLoad %26 %41\n" 829 "%44 = OpAccessChain %43 %32 %33 %10\n" 830 "OpStore %44 %42\n" 831 "OpReturn\n" 832 "OpFunctionEnd\n"; 833 programCollection.spirvAsmSources.add("tesc_noSubgroup") << tescNoSubgroup; 834 } 835 836 { 837 /* 838 "#version 450\n" 839 "layout(isolines) in;\n" 840 "\n" 841 "void main (void)\n" 842 "{\n" 843 " float pixelSize = 2.0f/1024.0f;\n" 844 " gl_Position = gl_in[0].gl_Position + gl_TessCoord.x * pixelSize / 2.0f;\n" 845 "}\n"; 846 */ 847 const std::string teseNoSubgroup = 848 "; SPIR-V\n" 849 "; Version: 1.3\n" 850 "; Generator: Khronos Glslang Reference Front End; 2\n" 851 "; Bound: 42\n" 852 "; Schema: 0\n" 853 "OpCapability Tessellation\n" 854 "%1 = OpExtInstImport \"GLSL.std.450\"\n" 855 "OpMemoryModel Logical GLSL450\n" 856 "OpEntryPoint TessellationEvaluation %4 \"main\" %16 %23 %29\n" 857 "OpExecutionMode %4 Isolines\n" 858 "OpExecutionMode %4 SpacingEqual\n" 859 "OpExecutionMode %4 VertexOrderCcw\n" 860 "OpMemberDecorate %14 0 BuiltIn Position\n" 861 "OpMemberDecorate %14 1 BuiltIn PointSize\n" 862 "OpMemberDecorate %14 2 BuiltIn ClipDistance\n" 863 "OpMemberDecorate %14 3 BuiltIn CullDistance\n" 864 "OpDecorate %14 Block\n" 865 "OpMemberDecorate %19 0 BuiltIn Position\n" 866 "OpMemberDecorate %19 1 BuiltIn PointSize\n" 867 "OpMemberDecorate %19 2 BuiltIn ClipDistance\n" 868 "OpMemberDecorate %19 3 BuiltIn CullDistance\n" 869 "OpDecorate %19 Block\n" 870 "OpDecorate %29 BuiltIn TessCoord\n" 871 "%2 = OpTypeVoid\n" 872 "%3 = OpTypeFunction %2\n" 873 "%6 = OpTypeFloat 32\n" 874 "%7 = OpTypePointer Function %6\n" 875 "%9 = OpConstant %6 0.00195313\n" 876 "%10 = OpTypeVector %6 4\n" 877 "%11 = OpTypeInt 32 0\n" 878 "%12 = OpConstant %11 1\n" 879 "%13 = OpTypeArray %6 %12\n" 880 "%14 = OpTypeStruct %10 %6 %13 %13\n" 881 "%15 = OpTypePointer Output %14\n" 882 "%16 = OpVariable %15 Output\n" 883 "%17 = OpTypeInt 32 1\n" 884 "%18 = OpConstant %17 0\n" 885 "%19 = OpTypeStruct %10 %6 %13 %13\n" 886 "%20 = OpConstant %11 32\n" 887 "%21 = OpTypeArray %19 %20\n" 888 "%22 = OpTypePointer Input %21\n" 889 "%23 = OpVariable %22 Input\n" 890 "%24 = OpTypePointer Input %10\n" 891 "%27 = OpTypeVector %6 3\n" 892 "%28 = OpTypePointer Input %27\n" 893 "%29 = OpVariable %28 Input\n" 894 "%30 = OpConstant %11 0\n" 895 "%31 = OpTypePointer Input %6\n" 896 "%36 = OpConstant %6 2\n" 897 "%40 = OpTypePointer Output %10\n" 898 "%4 = OpFunction %2 None %3\n" 899 "%5 = OpLabel\n" 900 "%8 = OpVariable %7 Function\n" 901 "OpStore %8 %9\n" 902 "%25 = OpAccessChain %24 %23 %18 %18\n" 903 "%26 = OpLoad %10 %25\n" 904 "%32 = OpAccessChain %31 %29 %30\n" 905 "%33 = OpLoad %6 %32\n" 906 "%34 = OpLoad %6 %8\n" 907 "%35 = OpFMul %6 %33 %34\n" 908 "%37 = OpFDiv %6 %35 %36\n" 909 "%38 = OpCompositeConstruct %10 %37 %37 %37 %37\n" 910 "%39 = OpFAdd %10 %26 %38\n" 911 "%41 = OpAccessChain %40 %16 %18\n" 912 "OpStore %41 %39\n" 913 "OpReturn\n" 914 "OpFunctionEnd\n"; 915 programCollection.spirvAsmSources.add("tese_noSubgroup") << teseNoSubgroup; 916 } 917 918 } 919 920 921 std::string vkt::subgroups::getVertShaderForStage(vk::VkShaderStageFlags stage) 922 { 923 switch (stage) 924 { 925 default: 926 DE_FATAL("Unhandled stage!"); 927 return ""; 928 case VK_SHADER_STAGE_FRAGMENT_BIT: 929 return 930 "#version 450\n" 931 "void main (void)\n" 932 "{\n" 933 " float pixelSize = 2.0f/1024.0f;\n" 934 " float pixelPosition = pixelSize/2.0f - 1.0f;\n" 935 " gl_Position = vec4(float(gl_VertexIndex) * pixelSize + pixelPosition, 0.0f, 0.0f, 1.0f);\n" 936 "}\n"; 937 case VK_SHADER_STAGE_GEOMETRY_BIT: 938 return 939 "#version 450\n" 940 "void main (void)\n" 941 "{\n" 942 "}\n"; 943 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: 944 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: 945 return 946 "#version 450\n" 947 "void main (void)\n" 948 "{\n" 949 "}\n"; 950 } 951 } 952 953 bool vkt::subgroups::isSubgroupSupported(Context& context) 954 { 955 return context.contextSupports(vk::ApiVersion(1, 1, 0)); 956 } 957 958 bool vkt::subgroups::areSubgroupOperationsSupportedForStage( 959 Context& context, const VkShaderStageFlags stage) 960 { 961 VkPhysicalDeviceSubgroupProperties subgroupProperties; 962 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; 963 subgroupProperties.pNext = DE_NULL; 964 965 VkPhysicalDeviceProperties2 properties; 966 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 967 properties.pNext = &subgroupProperties; 968 969 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties); 970 971 return (stage & subgroupProperties.supportedStages) ? true : false; 972 } 973 974 bool vkt::subgroups::areSubgroupOperationsRequiredForStage( 975 VkShaderStageFlags stage) 976 { 977 switch (stage) 978 { 979 default: 980 return false; 981 case VK_SHADER_STAGE_COMPUTE_BIT: 982 return true; 983 } 984 } 985 986 bool vkt::subgroups::isSubgroupFeatureSupportedForDevice( 987 Context& context, 988 VkSubgroupFeatureFlagBits bit) { 989 VkPhysicalDeviceSubgroupProperties subgroupProperties; 990 subgroupProperties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_SUBGROUP_PROPERTIES; 991 subgroupProperties.pNext = DE_NULL; 992 993 VkPhysicalDeviceProperties2 properties; 994 properties.sType = VK_STRUCTURE_TYPE_PHYSICAL_DEVICE_PROPERTIES_2; 995 properties.pNext = &subgroupProperties; 996 997 context.getInstanceInterface().getPhysicalDeviceProperties2(context.getPhysicalDevice(), &properties); 998 999 return (bit & subgroupProperties.supportedOperations) ? true : false; 1000 } 1001 1002 bool vkt::subgroups::isFragmentSSBOSupportedForDevice(Context& context) 1003 { 1004 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures( 1005 context.getInstanceInterface(), context.getPhysicalDevice()); 1006 return features.fragmentStoresAndAtomics ? true : false; 1007 } 1008 1009 bool vkt::subgroups::isVertexSSBOSupportedForDevice(Context& context) 1010 { 1011 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures( 1012 context.getInstanceInterface(), context.getPhysicalDevice()); 1013 return features.vertexPipelineStoresAndAtomics ? true : false; 1014 } 1015 1016 bool vkt::subgroups::isDoubleSupportedForDevice(Context& context) 1017 { 1018 const VkPhysicalDeviceFeatures features = getPhysicalDeviceFeatures( 1019 context.getInstanceInterface(), context.getPhysicalDevice()); 1020 return features.shaderFloat64 ? true : false; 1021 } 1022 1023 bool vkt::subgroups::isDoubleFormat(VkFormat format) 1024 { 1025 switch (format) 1026 { 1027 default: 1028 return false; 1029 case VK_FORMAT_R64_SFLOAT: 1030 case VK_FORMAT_R64G64_SFLOAT: 1031 case VK_FORMAT_R64G64B64_SFLOAT: 1032 case VK_FORMAT_R64G64B64A64_SFLOAT: 1033 return true; 1034 } 1035 } 1036 1037 std::string vkt::subgroups::getFormatNameForGLSL (VkFormat format) 1038 { 1039 switch (format) 1040 { 1041 default: 1042 DE_FATAL("Unhandled format!"); 1043 return ""; 1044 case VK_FORMAT_R32_SINT: 1045 return "int"; 1046 case VK_FORMAT_R32G32_SINT: 1047 return "ivec2"; 1048 case VK_FORMAT_R32G32B32_SINT: 1049 return "ivec3"; 1050 case VK_FORMAT_R32G32B32A32_SINT: 1051 return "ivec4"; 1052 case VK_FORMAT_R32_UINT: 1053 return "uint"; 1054 case VK_FORMAT_R32G32_UINT: 1055 return "uvec2"; 1056 case VK_FORMAT_R32G32B32_UINT: 1057 return "uvec3"; 1058 case VK_FORMAT_R32G32B32A32_UINT: 1059 return "uvec4"; 1060 case VK_FORMAT_R32_SFLOAT: 1061 return "float"; 1062 case VK_FORMAT_R32G32_SFLOAT: 1063 return "vec2"; 1064 case VK_FORMAT_R32G32B32_SFLOAT: 1065 return "vec3"; 1066 case VK_FORMAT_R32G32B32A32_SFLOAT: 1067 return "vec4"; 1068 case VK_FORMAT_R64_SFLOAT: 1069 return "double"; 1070 case VK_FORMAT_R64G64_SFLOAT: 1071 return "dvec2"; 1072 case VK_FORMAT_R64G64B64_SFLOAT: 1073 return "dvec3"; 1074 case VK_FORMAT_R64G64B64A64_SFLOAT: 1075 return "dvec4"; 1076 case VK_FORMAT_R8_USCALED: 1077 return "bool"; 1078 case VK_FORMAT_R8G8_USCALED: 1079 return "bvec2"; 1080 case VK_FORMAT_R8G8B8_USCALED: 1081 return "bvec3"; 1082 case VK_FORMAT_R8G8B8A8_USCALED: 1083 return "bvec4"; 1084 } 1085 } 1086 1087 void vkt::subgroups::setVertexShaderFrameBuffer (SourceCollections& programCollection) 1088 { 1089 /* 1090 "layout(location = 0) in highp vec4 in_position;\n" 1091 "void main (void)\n" 1092 "{\n" 1093 " gl_Position = in_position;\n" 1094 "}\n"; 1095 */ 1096 programCollection.spirvAsmSources.add("vert") << 1097 "; SPIR-V\n" 1098 "; Version: 1.3\n" 1099 "; Generator: Khronos Glslang Reference Front End; 2\n" 1100 "; Bound: 21\n" 1101 "; Schema: 0\n" 1102 "OpCapability Shader\n" 1103 "%1 = OpExtInstImport \"GLSL.std.450\"\n" 1104 "OpMemoryModel Logical GLSL450\n" 1105 "OpEntryPoint Vertex %4 \"main\" %13 %17\n" 1106 "OpMemberDecorate %11 0 BuiltIn Position\n" 1107 "OpMemberDecorate %11 1 BuiltIn PointSize\n" 1108 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n" 1109 "OpMemberDecorate %11 3 BuiltIn CullDistance\n" 1110 "OpDecorate %11 Block\n" 1111 "OpDecorate %17 Location 0\n" 1112 "%2 = OpTypeVoid\n" 1113 "%3 = OpTypeFunction %2\n" 1114 "%6 = OpTypeFloat 32\n" 1115 "%7 = OpTypeVector %6 4\n" 1116 "%8 = OpTypeInt 32 0\n" 1117 "%9 = OpConstant %8 1\n" 1118 "%10 = OpTypeArray %6 %9\n" 1119 "%11 = OpTypeStruct %7 %6 %10 %10\n" 1120 "%12 = OpTypePointer Output %11\n" 1121 "%13 = OpVariable %12 Output\n" 1122 "%14 = OpTypeInt 32 1\n" 1123 "%15 = OpConstant %14 0\n" 1124 "%16 = OpTypePointer Input %7\n" 1125 "%17 = OpVariable %16 Input\n" 1126 "%19 = OpTypePointer Output %7\n" 1127 "%4 = OpFunction %2 None %3\n" 1128 "%5 = OpLabel\n" 1129 "%18 = OpLoad %7 %17\n" 1130 "%20 = OpAccessChain %19 %13 %15\n" 1131 "OpStore %20 %18\n" 1132 "OpReturn\n" 1133 "OpFunctionEnd\n"; 1134 } 1135 1136 void vkt::subgroups::setFragmentShaderFrameBuffer (vk::SourceCollections& programCollection) 1137 { 1138 /* 1139 "layout(location = 0) in float in_color;\n" 1140 "layout(location = 0) out uint out_color;\n" 1141 "void main()\n" 1142 {\n" 1143 " out_color = uint(in_color);\n" 1144 "}\n"; 1145 */ 1146 programCollection.spirvAsmSources.add("fragment") << 1147 "; SPIR-V\n" 1148 "; Version: 1.3\n" 1149 "; Generator: Khronos Glslang Reference Front End; 2\n" 1150 "; Bound: 14\n" 1151 "; Schema: 0\n" 1152 "OpCapability Shader\n" 1153 "%1 = OpExtInstImport \"GLSL.std.450\"\n" 1154 "OpMemoryModel Logical GLSL450\n" 1155 "OpEntryPoint Fragment %4 \"main\" %8 %11\n" 1156 "OpExecutionMode %4 OriginUpperLeft\n" 1157 "OpDecorate %8 Location 0\n" 1158 "OpDecorate %11 Location 0\n" 1159 "%2 = OpTypeVoid\n" 1160 "%3 = OpTypeFunction %2\n" 1161 "%6 = OpTypeInt 32 0\n" 1162 "%7 = OpTypePointer Output %6\n" 1163 "%8 = OpVariable %7 Output\n" 1164 "%9 = OpTypeFloat 32\n" 1165 "%10 = OpTypePointer Input %9\n" 1166 "%11 = OpVariable %10 Input\n" 1167 "%4 = OpFunction %2 None %3\n" 1168 "%5 = OpLabel\n" 1169 "%12 = OpLoad %9 %11\n" 1170 "%13 = OpConvertFToU %6 %12\n" 1171 "OpStore %8 %13\n" 1172 "OpReturn\n" 1173 "OpFunctionEnd\n"; 1174 } 1175 1176 void vkt::subgroups::setTesCtrlShaderFrameBuffer (vk::SourceCollections& programCollection) 1177 { 1178 /* 1179 "#extension GL_KHR_shader_subgroup_basic: enable\n" 1180 "#extension GL_EXT_tessellation_shader : require\n" 1181 "layout(vertices = 2) out;\n" 1182 "void main (void)\n" 1183 "{\n" 1184 " if (gl_InvocationID == 0)\n" 1185 {\n" 1186 " gl_TessLevelOuter[0] = 1.0f;\n" 1187 " gl_TessLevelOuter[1] = 1.0f;\n" 1188 " }\n" 1189 " gl_out[gl_InvocationID].gl_Position = gl_in[gl_InvocationID].gl_Position;\n" 1190 "}\n"; 1191 */ 1192 programCollection.spirvAsmSources.add("tesc") << 1193 "; SPIR-V\n" 1194 "; Version: 1.3\n" 1195 "; Generator: Khronos Glslang Reference Front End; 2\n" 1196 "; Bound: 46\n" 1197 "; Schema: 0\n" 1198 "OpCapability Tessellation\n" 1199 "%1 = OpExtInstImport \"GLSL.std.450\"\n" 1200 "OpMemoryModel Logical GLSL450\n" 1201 "OpEntryPoint TessellationControl %4 \"main\" %8 %20 %33 %39\n" 1202 "OpExecutionMode %4 OutputVertices 2\n" 1203 "OpDecorate %8 BuiltIn InvocationId\n" 1204 "OpDecorate %20 Patch\n" 1205 "OpDecorate %20 BuiltIn TessLevelOuter\n" 1206 "OpMemberDecorate %29 0 BuiltIn Position\n" 1207 "OpMemberDecorate %29 1 BuiltIn PointSize\n" 1208 "OpMemberDecorate %29 2 BuiltIn ClipDistance\n" 1209 "OpMemberDecorate %29 3 BuiltIn CullDistance\n" 1210 "OpDecorate %29 Block\n" 1211 "OpMemberDecorate %35 0 BuiltIn Position\n" 1212 "OpMemberDecorate %35 1 BuiltIn PointSize\n" 1213 "OpMemberDecorate %35 2 BuiltIn ClipDistance\n" 1214 "OpMemberDecorate %35 3 BuiltIn CullDistance\n" 1215 "OpDecorate %35 Block\n" 1216 "%2 = OpTypeVoid\n" 1217 "%3 = OpTypeFunction %2\n" 1218 "%6 = OpTypeInt 32 1\n" 1219 "%7 = OpTypePointer Input %6\n" 1220 "%8 = OpVariable %7 Input\n" 1221 "%10 = OpConstant %6 0\n" 1222 "%11 = OpTypeBool\n" 1223 "%15 = OpTypeFloat 32\n" 1224 "%16 = OpTypeInt 32 0\n" 1225 "%17 = OpConstant %16 4\n" 1226 "%18 = OpTypeArray %15 %17\n" 1227 "%19 = OpTypePointer Output %18\n" 1228 "%20 = OpVariable %19 Output\n" 1229 "%21 = OpConstant %15 1\n" 1230 "%22 = OpTypePointer Output %15\n" 1231 "%24 = OpConstant %6 1\n" 1232 "%26 = OpTypeVector %15 4\n" 1233 "%27 = OpConstant %16 1\n" 1234 "%28 = OpTypeArray %15 %27\n" 1235 "%29 = OpTypeStruct %26 %15 %28 %28\n" 1236 "%30 = OpConstant %16 2\n" 1237 "%31 = OpTypeArray %29 %30\n" 1238 "%32 = OpTypePointer Output %31\n" 1239 "%33 = OpVariable %32 Output\n" 1240 "%35 = OpTypeStruct %26 %15 %28 %28\n" 1241 "%36 = OpConstant %16 32\n" 1242 "%37 = OpTypeArray %35 %36\n" 1243 "%38 = OpTypePointer Input %37\n" 1244 "%39 = OpVariable %38 Input\n" 1245 "%41 = OpTypePointer Input %26\n" 1246 "%44 = OpTypePointer Output %26\n" 1247 "%4 = OpFunction %2 None %3\n" 1248 "%5 = OpLabel\n" 1249 "%9 = OpLoad %6 %8\n" 1250 "%12 = OpIEqual %11 %9 %10\n" 1251 "OpSelectionMerge %14 None\n" 1252 "OpBranchConditional %12 %13 %14\n" 1253 "%13 = OpLabel\n" 1254 "%23 = OpAccessChain %22 %20 %10\n" 1255 "OpStore %23 %21\n" 1256 "%25 = OpAccessChain %22 %20 %24\n" 1257 "OpStore %25 %21\n" 1258 "OpBranch %14\n" 1259 "%14 = OpLabel\n" 1260 "%34 = OpLoad %6 %8\n" 1261 "%40 = OpLoad %6 %8\n" 1262 "%42 = OpAccessChain %41 %39 %40 %10\n" 1263 "%43 = OpLoad %26 %42\n" 1264 "%45 = OpAccessChain %44 %33 %34 %10\n" 1265 "OpStore %45 %43\n" 1266 "OpReturn\n" 1267 "OpFunctionEnd\n"; 1268 } 1269 1270 void vkt::subgroups::setTesEvalShaderFrameBuffer (vk::SourceCollections& programCollection) 1271 { 1272 /* 1273 "#extension GL_KHR_shader_subgroup_ballot: enable\n" 1274 "#extension GL_EXT_tessellation_shader : require\n" 1275 "layout(isolines, equal_spacing, ccw ) in;\n" 1276 "layout(location = 0) in float in_color[];\n" 1277 "layout(location = 0) out float out_color;\n" 1278 "\n" 1279 "void main (void)\n" 1280 "{\n" 1281 " gl_Position = mix(gl_in[0].gl_Position, gl_in[1].gl_Position, gl_TessCoord.x);\n" 1282 " out_color = in_color[0];\n" 1283 "}\n"; 1284 */ 1285 programCollection.spirvAsmSources.add("tese") << 1286 "; SPIR-V\n" 1287 "; Version: 1.3\n" 1288 "; Generator: Khronos Glslang Reference Front End; 2\n" 1289 "; Bound: 45\n" 1290 "; Schema: 0\n" 1291 "OpCapability Tessellation\n" 1292 "%1 = OpExtInstImport \"GLSL.std.450\"\n" 1293 "OpMemoryModel Logical GLSL450\n" 1294 "OpEntryPoint TessellationEvaluation %4 \"main\" %13 %20 %29 %39 %42\n" 1295 "OpExecutionMode %4 Isolines\n" 1296 "OpExecutionMode %4 SpacingEqual\n" 1297 "OpExecutionMode %4 VertexOrderCcw\n" 1298 "OpMemberDecorate %11 0 BuiltIn Position\n" 1299 "OpMemberDecorate %11 1 BuiltIn PointSize\n" 1300 "OpMemberDecorate %11 2 BuiltIn ClipDistance\n" 1301 "OpMemberDecorate %11 3 BuiltIn CullDistance\n" 1302 "OpDecorate %11 Block\n" 1303 "OpMemberDecorate %16 0 BuiltIn Position\n" 1304 "OpMemberDecorate %16 1 BuiltIn PointSize\n" 1305 "OpMemberDecorate %16 2 BuiltIn ClipDistance\n" 1306 "OpMemberDecorate %16 3 BuiltIn CullDistance\n" 1307 "OpDecorate %16 Block\n" 1308 "OpDecorate %29 BuiltIn TessCoord\n" 1309 "OpDecorate %39 Location 0\n" 1310 "OpDecorate %42 Location 0\n" 1311 "%2 = OpTypeVoid\n" 1312 "%3 = OpTypeFunction %2\n" 1313 "%6 = OpTypeFloat 32\n" 1314 "%7 = OpTypeVector %6 4\n" 1315 "%8 = OpTypeInt 32 0\n" 1316 "%9 = OpConstant %8 1\n" 1317 "%10 = OpTypeArray %6 %9\n" 1318 "%11 = OpTypeStruct %7 %6 %10 %10\n" 1319 "%12 = OpTypePointer Output %11\n" 1320 "%13 = OpVariable %12 Output\n" 1321 "%14 = OpTypeInt 32 1\n" 1322 "%15 = OpConstant %14 0\n" 1323 "%16 = OpTypeStruct %7 %6 %10 %10\n" 1324 "%17 = OpConstant %8 32\n" 1325 "%18 = OpTypeArray %16 %17\n" 1326 "%19 = OpTypePointer Input %18\n" 1327 "%20 = OpVariable %19 Input\n" 1328 "%21 = OpTypePointer Input %7\n" 1329 "%24 = OpConstant %14 1\n" 1330 "%27 = OpTypeVector %6 3\n" 1331 "%28 = OpTypePointer Input %27\n" 1332 "%29 = OpVariable %28 Input\n" 1333 "%30 = OpConstant %8 0\n" 1334 "%31 = OpTypePointer Input %6\n" 1335 "%36 = OpTypePointer Output %7\n" 1336 "%38 = OpTypePointer Output %6\n" 1337 "%39 = OpVariable %38 Output\n" 1338 "%40 = OpTypeArray %6 %17\n" 1339 "%41 = OpTypePointer Input %40\n" 1340 "%42 = OpVariable %41 Input\n" 1341 "%4 = OpFunction %2 None %3\n" 1342 "%5 = OpLabel\n" 1343 "%22 = OpAccessChain %21 %20 %15 %15\n" 1344 "%23 = OpLoad %7 %22\n" 1345 "%25 = OpAccessChain %21 %20 %24 %15\n" 1346 "%26 = OpLoad %7 %25\n" 1347 "%32 = OpAccessChain %31 %29 %30\n" 1348 "%33 = OpLoad %6 %32\n" 1349 "%34 = OpCompositeConstruct %7 %33 %33 %33 %33\n" 1350 "%35 = OpExtInst %7 %1 FMix %23 %26 %34\n" 1351 "%37 = OpAccessChain %36 %13 %15\n" 1352 "OpStore %37 %35\n" 1353 "%43 = OpAccessChain %31 %42 %15\n" 1354 "%44 = OpLoad %6 %43\n" 1355 "OpStore %39 %44\n" 1356 "OpReturn\n" 1357 "OpFunctionEnd\n"; 1358 } 1359 1360 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& glslTemplate, const vk::ShaderBuildOptions& options, vk::GlslSourceCollection& collection) 1361 { 1362 tcu::StringTemplate geometryTemplate(glslTemplate); 1363 1364 map<string, string> linesParams; 1365 linesParams.insert(pair<string, string>("TOPOLOGY", "lines")); 1366 1367 map<string, string> pointsParams; 1368 pointsParams.insert(pair<string, string>("TOPOLOGY", "points")); 1369 1370 collection.add("geometry_lines") << glu::GeometrySource(geometryTemplate.specialize(linesParams)) << options; 1371 collection.add("geometry_points") << glu::GeometrySource(geometryTemplate.specialize(pointsParams)) << options; 1372 } 1373 1374 void vkt::subgroups::addGeometryShadersFromTemplate (const std::string& spirvTemplate, const vk::SpirVAsmBuildOptions& options, vk::SpirVAsmCollection& collection) 1375 { 1376 tcu::StringTemplate geometryTemplate(spirvTemplate); 1377 1378 map<string, string> linesParams; 1379 linesParams.insert(pair<string, string>("TOPOLOGY", "InputLines")); 1380 1381 map<string, string> pointsParams; 1382 pointsParams.insert(pair<string, string>("TOPOLOGY", "InputPoints")); 1383 1384 collection.add("geometry_lines") << geometryTemplate.specialize(linesParams) << options; 1385 collection.add("geometry_points") << geometryTemplate.specialize(pointsParams) << options; 1386 } 1387 1388 void initializeMemory(Context& context, const Allocation& alloc, subgroups::SSBOData& data) 1389 { 1390 const vk::VkFormat format = data.format; 1391 const vk::VkDeviceSize size = data.numElements * 1392 (data.isImage ? getFormatSizeInBytes(format) : getElementSizeInBytes(format, data.layout)); 1393 if (subgroups::SSBOData::InitializeNonZero == data.initializeType) 1394 { 1395 de::Random rnd(context.getTestContext().getCommandLine().getBaseSeed()); 1396 1397 switch (format) 1398 { 1399 default: 1400 DE_FATAL("Illegal buffer format"); 1401 break; 1402 case VK_FORMAT_R8_USCALED: 1403 case VK_FORMAT_R8G8_USCALED: 1404 case VK_FORMAT_R8G8B8_USCALED: 1405 case VK_FORMAT_R8G8B8A8_USCALED: 1406 case VK_FORMAT_R32_SINT: 1407 case VK_FORMAT_R32G32_SINT: 1408 case VK_FORMAT_R32G32B32_SINT: 1409 case VK_FORMAT_R32G32B32A32_SINT: 1410 case VK_FORMAT_R32_UINT: 1411 case VK_FORMAT_R32G32_UINT: 1412 case VK_FORMAT_R32G32B32_UINT: 1413 case VK_FORMAT_R32G32B32A32_UINT: 1414 { 1415 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr()); 1416 1417 for (vk::VkDeviceSize k = 0; k < (size / sizeof(deUint32)); k++) 1418 { 1419 ptr[k] = rnd.getUint32(); 1420 } 1421 } 1422 break; 1423 case VK_FORMAT_R32_SFLOAT: 1424 case VK_FORMAT_R32G32_SFLOAT: 1425 case VK_FORMAT_R32G32B32_SFLOAT: 1426 case VK_FORMAT_R32G32B32A32_SFLOAT: 1427 { 1428 float* ptr = reinterpret_cast<float*>(alloc.getHostPtr()); 1429 1430 for (vk::VkDeviceSize k = 0; k < (size / sizeof(float)); k++) 1431 { 1432 ptr[k] = rnd.getFloat(); 1433 } 1434 } 1435 break; 1436 case VK_FORMAT_R64_SFLOAT: 1437 case VK_FORMAT_R64G64_SFLOAT: 1438 case VK_FORMAT_R64G64B64_SFLOAT: 1439 case VK_FORMAT_R64G64B64A64_SFLOAT: 1440 { 1441 double* ptr = reinterpret_cast<double*>(alloc.getHostPtr()); 1442 1443 for (vk::VkDeviceSize k = 0; k < (size / sizeof(double)); k++) 1444 { 1445 ptr[k] = rnd.getDouble(); 1446 } 1447 } 1448 break; 1449 } 1450 } 1451 else if (subgroups::SSBOData::InitializeZero == data.initializeType) 1452 { 1453 deUint32* ptr = reinterpret_cast<deUint32*>(alloc.getHostPtr()); 1454 1455 for (vk::VkDeviceSize k = 0; k < size / 4; k++) 1456 { 1457 ptr[k] = 0; 1458 } 1459 } 1460 1461 if (subgroups::SSBOData::InitializeNone != data.initializeType) 1462 { 1463 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc); 1464 } 1465 } 1466 1467 deUint32 getResultBinding (const VkShaderStageFlagBits shaderStage) 1468 { 1469 switch(shaderStage) 1470 { 1471 case VK_SHADER_STAGE_VERTEX_BIT: 1472 return 0u; 1473 break; 1474 case VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT: 1475 return 1u; 1476 break; 1477 case VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT: 1478 return 2u; 1479 break; 1480 case VK_SHADER_STAGE_GEOMETRY_BIT: 1481 return 3u; 1482 break; 1483 default: 1484 DE_ASSERT(0); 1485 return -1; 1486 } 1487 DE_ASSERT(0); 1488 return -1; 1489 } 1490 1491 tcu::TestStatus vkt::subgroups::makeTessellationEvaluationFrameBufferTest( 1492 Context& context, VkFormat format, SSBOData* extraData, 1493 deUint32 extraDataCount, 1494 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize), 1495 const VkShaderStageFlags shaderStage) 1496 { 1497 const deUint32 maxWidth = 1024u; 1498 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount); 1499 DescriptorSetLayoutBuilder layoutBuilder; 1500 DescriptorPoolBuilder poolBuilder; 1501 DescriptorSetUpdateBuilder updateBuilder; 1502 Move <VkDescriptorPool> descriptorPool; 1503 Move <VkDescriptorSet> descriptorSet; 1504 1505 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(), 1506 context.getBinaryCollection().get("vert"), 0u)); 1507 const Unique<VkShaderModule> teCtrlShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(), 1508 context.getBinaryCollection().get("tesc"), 0u)); 1509 const Unique<VkShaderModule> teEvalShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(), 1510 context.getBinaryCollection().get("tese"), 0u)); 1511 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(), 1512 context.getBinaryCollection().get("fragment"), 0u)); 1513 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format)); 1514 1515 const VkVertexInputBindingDescription vertexInputBinding = 1516 { 1517 0u, // binding; 1518 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride; 1519 VK_VERTEX_INPUT_RATE_VERTEX // inputRate 1520 }; 1521 1522 const VkVertexInputAttributeDescription vertexInputAttribute = 1523 { 1524 0u, 1525 0u, 1526 VK_FORMAT_R32G32B32A32_SFLOAT, 1527 0u 1528 }; 1529 1530 for (deUint32 i = 0u; i < extraDataCount; i++) 1531 { 1532 if (extraData[i].isImage) 1533 { 1534 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format)); 1535 } 1536 else 1537 { 1538 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements; 1539 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)); 1540 } 1541 const Allocation& alloc = inputBuffers[i]->getAllocation(); 1542 initializeMemory(context, alloc, extraData[i]); 1543 } 1544 1545 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++) 1546 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, shaderStage, DE_NULL); 1547 1548 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice())); 1549 1550 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout)); 1551 1552 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout, 1553 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | 1554 VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT | VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT, 1555 *vertexShaderModule, *fragmentShaderModule, DE_NULL, *teCtrlShaderModule, *teEvalShaderModule, 1556 *renderPass, VK_PRIMITIVE_TOPOLOGY_PATCH_LIST, &vertexInputBinding, &vertexInputAttribute, true, format)); 1557 1558 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++) 1559 poolBuilder.addType(inputBuffers[ndx]->getType()); 1560 1561 if (extraDataCount > 0) 1562 { 1563 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(), 1564 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 1565 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout); 1566 } 1567 1568 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++) 1569 { 1570 if (inputBuffers[buffersNdx]->isImage()) 1571 { 1572 VkDescriptorImageInfo info = 1573 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(), 1574 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL); 1575 1576 updateBuilder.writeSingle(*descriptorSet, 1577 DescriptorSetUpdateBuilder::Location::binding(buffersNdx), 1578 inputBuffers[buffersNdx]->getType(), &info); 1579 } 1580 else 1581 { 1582 VkDescriptorBufferInfo info = 1583 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 1584 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize()); 1585 1586 updateBuilder.writeSingle(*descriptorSet, 1587 DescriptorSetUpdateBuilder::Location::binding(buffersNdx), 1588 inputBuffers[buffersNdx]->getType(), &info); 1589 } 1590 } 1591 1592 updateBuilder.update(context.getDeviceInterface(), context.getDevice()); 1593 1594 const Unique<VkCommandPool> cmdPool (makeCommandPool(context)); 1595 const deUint32 subgroupSize = getSubgroupSize(context); 1596 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool)); 1597 const vk::VkDeviceSize vertexBufferSize = 2ull * maxWidth * sizeof(tcu::Vec4); 1598 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); 1599 unsigned totalIterations = 0u; 1600 unsigned failedIterations = 0u; 1601 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); 1602 1603 { 1604 const Allocation& alloc = vertexBuffer.getAllocation(); 1605 std::vector<tcu::Vec4> data (2u * maxWidth, Vec4(1.0f, 0.0f, 1.0f, 1.0f)); 1606 const float pixelSize = 2.0f / static_cast<float>(maxWidth); 1607 float leftHandPosition = -1.0f; 1608 1609 for(deUint32 ndx = 0u; ndx < data.size(); ndx+=2u) 1610 { 1611 data[ndx][0] = leftHandPosition; 1612 leftHandPosition += pixelSize; 1613 data[ndx+1][0] = leftHandPosition; 1614 } 1615 1616 deMemcpy(alloc.getHostPtr(), &data[0], data.size() * sizeof(tcu::Vec4)); 1617 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc); 1618 } 1619 1620 for (deUint32 width = 1u; width < maxWidth; ++width) 1621 { 1622 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1)); 1623 const VkViewport viewport = makeViewport(maxWidth, 1u); 1624 const VkRect2D scissor = makeRect2D(maxWidth, 1u); 1625 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth; 1626 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT); 1627 const VkDeviceSize vertexBufferOffset = 0u; 1628 1629 totalIterations++; 1630 1631 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 1632 { 1633 1634 context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport); 1635 context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor); 1636 1637 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f)); 1638 1639 context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); 1640 1641 if (extraDataCount > 0) 1642 { 1643 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer, 1644 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, 1645 &descriptorSet.get(), 0u, DE_NULL); 1646 } 1647 1648 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset); 1649 context.getDeviceInterface().cmdDraw(*cmdBuffer, 2 * width, 1, 0, 0); 1650 1651 endRenderPass(context.getDeviceInterface(), *cmdBuffer); 1652 1653 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 1654 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 1655 1656 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer)); 1657 waitFence(context, fence); 1658 } 1659 1660 { 1661 const Allocation& allocResult = imageBufferResult.getAllocation(); 1662 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult); 1663 1664 std::vector<const void*> datas; 1665 datas.push_back(allocResult.getHostPtr()); 1666 if (!checkResult(datas, width/2u, subgroupSize)) 1667 failedIterations++; 1668 } 1669 } 1670 1671 if (0 < failedIterations) 1672 { 1673 context.getTestContext().getLog() 1674 << TestLog::Message << (totalIterations - failedIterations) << " / " 1675 << totalIterations << " values passed" << TestLog::EndMessage; 1676 return tcu::TestStatus::fail("Failed!"); 1677 } 1678 1679 return tcu::TestStatus::pass("OK"); 1680 } 1681 1682 bool vkt::subgroups::check(std::vector<const void*> datas, 1683 deUint32 width, deUint32 ref) 1684 { 1685 const deUint32* data = reinterpret_cast<const deUint32*>(datas[0]); 1686 1687 for (deUint32 n = 0; n < width; ++n) 1688 { 1689 if (data[n] != ref) 1690 { 1691 return false; 1692 } 1693 } 1694 1695 return true; 1696 } 1697 1698 bool vkt::subgroups::checkCompute(std::vector<const void*> datas, 1699 const deUint32 numWorkgroups[3], const deUint32 localSize[3], 1700 deUint32 ref) 1701 { 1702 const deUint32 globalSizeX = numWorkgroups[0] * localSize[0]; 1703 const deUint32 globalSizeY = numWorkgroups[1] * localSize[1]; 1704 const deUint32 globalSizeZ = numWorkgroups[2] * localSize[2]; 1705 1706 return check(datas, globalSizeX * globalSizeY * globalSizeZ, ref); 1707 } 1708 1709 tcu::TestStatus vkt::subgroups::makeGeometryFrameBufferTest( 1710 Context& context, VkFormat format, SSBOData* extraData, 1711 deUint32 extraDataCount, 1712 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize)) 1713 { 1714 const deUint32 maxWidth = 1024u; 1715 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount); 1716 DescriptorSetLayoutBuilder layoutBuilder; 1717 DescriptorPoolBuilder poolBuilder; 1718 DescriptorSetUpdateBuilder updateBuilder; 1719 Move <VkDescriptorPool> descriptorPool; 1720 Move <VkDescriptorSet> descriptorSet; 1721 1722 const Unique<VkShaderModule> vertexShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(), 1723 context.getBinaryCollection().get("vert"), 0u)); 1724 const Unique<VkShaderModule> geometryShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(), 1725 context.getBinaryCollection().get("geometry"), 0u)); 1726 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule(context.getDeviceInterface(), context.getDevice(), 1727 context.getBinaryCollection().get("fragment"), 0u)); 1728 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format)); 1729 const VkVertexInputBindingDescription vertexInputBinding = 1730 { 1731 0u, // binding; 1732 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride; 1733 VK_VERTEX_INPUT_RATE_VERTEX // inputRate 1734 }; 1735 1736 const VkVertexInputAttributeDescription vertexInputAttribute = 1737 { 1738 0u, 1739 0u, 1740 VK_FORMAT_R32G32B32A32_SFLOAT, 1741 0u 1742 }; 1743 1744 for (deUint32 i = 0u; i < extraDataCount; i++) 1745 { 1746 if (extraData[i].isImage) 1747 { 1748 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format)); 1749 } 1750 else 1751 { 1752 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements; 1753 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)); 1754 } 1755 const Allocation& alloc = inputBuffers[i]->getAllocation(); 1756 initializeMemory(context, alloc, extraData[i]); 1757 } 1758 1759 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++) 1760 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_GEOMETRY_BIT, DE_NULL); 1761 1762 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice())); 1763 1764 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout)); 1765 1766 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout, 1767 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT | VK_SHADER_STAGE_GEOMETRY_BIT, 1768 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, DE_NULL, DE_NULL, 1769 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, &vertexInputBinding, &vertexInputAttribute, true, format)); 1770 1771 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++) 1772 poolBuilder.addType(inputBuffers[ndx]->getType()); 1773 1774 if (extraDataCount > 0) 1775 { 1776 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(), 1777 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 1778 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout); 1779 } 1780 1781 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++) 1782 { 1783 if (inputBuffers[buffersNdx]->isImage()) 1784 { 1785 VkDescriptorImageInfo info = 1786 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(), 1787 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL); 1788 1789 updateBuilder.writeSingle(*descriptorSet, 1790 DescriptorSetUpdateBuilder::Location::binding(buffersNdx), 1791 inputBuffers[buffersNdx]->getType(), &info); 1792 } 1793 else 1794 { 1795 VkDescriptorBufferInfo info = 1796 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 1797 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize()); 1798 1799 updateBuilder.writeSingle(*descriptorSet, 1800 DescriptorSetUpdateBuilder::Location::binding(buffersNdx), 1801 inputBuffers[buffersNdx]->getType(), &info); 1802 } 1803 } 1804 1805 updateBuilder.update(context.getDeviceInterface(), context.getDevice()); 1806 1807 const Unique<VkCommandPool> cmdPool (makeCommandPool(context)); 1808 const deUint32 subgroupSize = getSubgroupSize(context); 1809 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool)); 1810 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4); 1811 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); 1812 unsigned totalIterations = 0u; 1813 unsigned failedIterations = 0u; 1814 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); 1815 1816 { 1817 const Allocation& alloc = vertexBuffer.getAllocation(); 1818 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f)); 1819 const float pixelSize = 2.0f / static_cast<float>(maxWidth); 1820 float leftHandPosition = -1.0f; 1821 1822 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx) 1823 { 1824 data[ndx][0] = leftHandPosition + pixelSize / 2.0f; 1825 leftHandPosition += pixelSize; 1826 } 1827 1828 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4)); 1829 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc); 1830 } 1831 1832 for (deUint32 width = 1u; width < maxWidth; width++) 1833 { 1834 totalIterations++; 1835 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1)); 1836 const VkViewport viewport = makeViewport(maxWidth, 1u); 1837 const VkRect2D scissor = makeRect2D(maxWidth, 1u); 1838 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth; 1839 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT); 1840 const VkDeviceSize vertexBufferOffset = 0u; 1841 1842 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++) 1843 { 1844 const Allocation& alloc = inputBuffers[ndx]->getAllocation(); 1845 initializeMemory(context, alloc, extraData[ndx]); 1846 } 1847 1848 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 1849 { 1850 context.getDeviceInterface().cmdSetViewport( 1851 *cmdBuffer, 0, 1, &viewport); 1852 1853 context.getDeviceInterface().cmdSetScissor( 1854 *cmdBuffer, 0, 1, &scissor); 1855 1856 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f)); 1857 1858 context.getDeviceInterface().cmdBindPipeline( 1859 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); 1860 1861 if (extraDataCount > 0) 1862 { 1863 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer, 1864 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, 1865 &descriptorSet.get(), 0u, DE_NULL); 1866 } 1867 1868 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset); 1869 1870 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u); 1871 1872 endRenderPass(context.getDeviceInterface(), *cmdBuffer); 1873 1874 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 1875 1876 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 1877 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer)); 1878 waitFence(context, fence); 1879 } 1880 1881 { 1882 const Allocation& allocResult = imageBufferResult.getAllocation(); 1883 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult); 1884 1885 std::vector<const void*> datas; 1886 datas.push_back(allocResult.getHostPtr()); 1887 if (!checkResult(datas, width, subgroupSize)) 1888 failedIterations++; 1889 } 1890 } 1891 1892 if (0 < failedIterations) 1893 { 1894 context.getTestContext().getLog() 1895 << TestLog::Message << (totalIterations - failedIterations) << " / " 1896 << totalIterations << " values passed" << TestLog::EndMessage; 1897 return tcu::TestStatus::fail("Failed!"); 1898 } 1899 1900 return tcu::TestStatus::pass("OK"); 1901 } 1902 1903 1904 tcu::TestStatus vkt::subgroups::allStages( 1905 Context& context, VkFormat format, SSBOData* extraDatas, 1906 deUint32 extraDatasCount, 1907 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize), 1908 const VkShaderStageFlags shaderStageTested) 1909 { 1910 const deUint32 maxWidth = 1024u; 1911 vector<VkShaderStageFlagBits> stagesVector; 1912 VkShaderStageFlags shaderStageRequired = (VkShaderStageFlags)0ull; 1913 1914 Move<VkShaderModule> vertexShaderModule; 1915 Move<VkShaderModule> teCtrlShaderModule; 1916 Move<VkShaderModule> teEvalShaderModule; 1917 Move<VkShaderModule> geometryShaderModule; 1918 Move<VkShaderModule> fragmentShaderModule; 1919 1920 if (shaderStageTested & VK_SHADER_STAGE_VERTEX_BIT) 1921 { 1922 stagesVector.push_back(VK_SHADER_STAGE_VERTEX_BIT); 1923 } 1924 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) 1925 { 1926 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT); 1927 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT; 1928 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT; 1929 } 1930 if (shaderStageTested & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) 1931 { 1932 stagesVector.push_back(VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT); 1933 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_VERTEX_BIT; 1934 shaderStageRequired |= (shaderStageTested & (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? (VkShaderStageFlags) 0u : (VkShaderStageFlags)VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT; 1935 } 1936 if (shaderStageTested & VK_SHADER_STAGE_GEOMETRY_BIT) 1937 { 1938 stagesVector.push_back(VK_SHADER_STAGE_GEOMETRY_BIT); 1939 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT; 1940 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required; 1941 } 1942 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT) 1943 { 1944 const VkShaderStageFlags required = VK_SHADER_STAGE_VERTEX_BIT; 1945 shaderStageRequired |= (shaderStageTested & required) ? (VkShaderStageFlags) 0 : required; 1946 } 1947 1948 const deUint32 stagesCount = static_cast<deUint32>(stagesVector.size()); 1949 const string vert = (shaderStageRequired & VK_SHADER_STAGE_VERTEX_BIT) ? "vert_noSubgroup" : "vert"; 1950 const string tesc = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? "tesc_noSubgroup" : "tesc"; 1951 const string tese = (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? "tese_noSubgroup" : "tese"; 1952 1953 shaderStageRequired = shaderStageTested | shaderStageRequired; 1954 1955 vertexShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(vert), 0u); 1956 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) 1957 { 1958 teCtrlShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tesc), 0u); 1959 teEvalShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get(tese), 0u); 1960 } 1961 if (shaderStageRequired & VK_SHADER_STAGE_GEOMETRY_BIT) 1962 { 1963 if (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) 1964 { 1965 // tessellation shaders output line primitives 1966 geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_lines"), 0u); 1967 } 1968 else 1969 { 1970 // otherwise points are processed by geometry shader 1971 geometryShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("geometry_points"), 0u); 1972 } 1973 } 1974 if (shaderStageRequired & VK_SHADER_STAGE_FRAGMENT_BIT) 1975 fragmentShaderModule = createShaderModule(context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u); 1976 1977 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(stagesCount + extraDatasCount); 1978 1979 DescriptorSetLayoutBuilder layoutBuilder; 1980 // The implicit result SSBO we use to store our outputs from the shader 1981 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx) 1982 { 1983 const VkDeviceSize shaderSize = (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? maxWidth * 2 : maxWidth; 1984 const VkDeviceSize size = getElementSizeInBytes(format, SSBOData::LayoutStd430) * shaderSize; 1985 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size)); 1986 1987 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, stagesVector[ndx], getResultBinding(stagesVector[ndx]), DE_NULL); 1988 } 1989 1990 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx) 1991 { 1992 const deUint32 datasNdx = ndx - stagesCount; 1993 if (extraDatas[datasNdx].isImage) 1994 { 1995 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraDatas[datasNdx].numElements), 1, extraDatas[datasNdx].format)); 1996 } 1997 else 1998 { 1999 const vk::VkDeviceSize size = getElementSizeInBytes(extraDatas[datasNdx].format, extraDatas[datasNdx].layout) * extraDatas[datasNdx].numElements; 2000 inputBuffers[ndx] = de::SharedPtr<BufferOrImage>(new Buffer(context, size)); 2001 } 2002 2003 const Allocation& alloc = inputBuffers[ndx]->getAllocation(); 2004 initializeMemory(context, alloc, extraDatas[datasNdx]); 2005 2006 layoutBuilder.addIndexedBinding(inputBuffers[ndx]->getType(), 1, 2007 extraDatas[datasNdx].stages, extraDatas[datasNdx].binding, DE_NULL); 2008 } 2009 2010 const Unique<VkDescriptorSetLayout> descriptorSetLayout( 2011 layoutBuilder.build(context.getDeviceInterface(), context.getDevice())); 2012 2013 const Unique<VkPipelineLayout> pipelineLayout( 2014 makePipelineLayout(context, *descriptorSetLayout)); 2015 2016 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format)); 2017 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout, 2018 shaderStageRequired, 2019 *vertexShaderModule, *fragmentShaderModule, *geometryShaderModule, *teCtrlShaderModule, *teEvalShaderModule, 2020 *renderPass, 2021 (shaderStageRequired & VK_SHADER_STAGE_TESSELLATION_CONTROL_BIT) ? VK_PRIMITIVE_TOPOLOGY_PATCH_LIST : VK_PRIMITIVE_TOPOLOGY_POINT_LIST)); 2022 2023 DescriptorPoolBuilder poolBuilder; 2024 2025 for (deUint32 ndx = 0u; ndx < static_cast<deUint32>(inputBuffers.size()); ndx++) 2026 { 2027 poolBuilder.addType(inputBuffers[ndx]->getType()); 2028 } 2029 2030 const Unique<VkDescriptorPool> descriptorPool( 2031 poolBuilder.build(context.getDeviceInterface(), context.getDevice(), 2032 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u)); 2033 2034 // Create descriptor set 2035 const Unique<VkDescriptorSet> descriptorSet( 2036 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout)); 2037 2038 DescriptorSetUpdateBuilder updateBuilder; 2039 2040 for (deUint32 ndx = 0u; ndx < stagesCount; ndx++) 2041 { 2042 if (inputBuffers[ndx]->isImage()) 2043 { 2044 VkDescriptorImageInfo info = 2045 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), 2046 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL); 2047 2048 updateBuilder.writeSingle(*descriptorSet, 2049 DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])), 2050 inputBuffers[ndx]->getType(), &info); 2051 } 2052 else 2053 { 2054 VkDescriptorBufferInfo info = 2055 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 2056 0ull, inputBuffers[ndx]->getAsBuffer()->getSize()); 2057 2058 updateBuilder.writeSingle(*descriptorSet, 2059 DescriptorSetUpdateBuilder::Location::binding(getResultBinding(stagesVector[ndx])), 2060 inputBuffers[ndx]->getType(), &info); 2061 } 2062 } 2063 2064 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ndx++) 2065 { 2066 if (inputBuffers[ndx]->isImage()) 2067 { 2068 VkDescriptorImageInfo info = 2069 makeDescriptorImageInfo(inputBuffers[ndx]->getAsImage()->getSampler(), 2070 inputBuffers[ndx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL); 2071 2072 updateBuilder.writeSingle(*descriptorSet, 2073 DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx -stagesCount].binding), 2074 inputBuffers[ndx]->getType(), &info); 2075 } 2076 else 2077 { 2078 VkDescriptorBufferInfo info = 2079 makeDescriptorBufferInfo(inputBuffers[ndx]->getAsBuffer()->getBuffer(), 2080 0ull, inputBuffers[ndx]->getAsBuffer()->getSize()); 2081 2082 updateBuilder.writeSingle(*descriptorSet, 2083 DescriptorSetUpdateBuilder::Location::binding(extraDatas[ndx - stagesCount].binding), 2084 inputBuffers[ndx]->getType(), &info); 2085 } 2086 } 2087 updateBuilder.update(context.getDeviceInterface(), context.getDevice()); 2088 2089 { 2090 const Unique<VkCommandPool> cmdPool (makeCommandPool(context)); 2091 const deUint32 subgroupSize = getSubgroupSize(context); 2092 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool)); 2093 unsigned totalIterations = 0u; 2094 unsigned failedIterations = 0u; 2095 Image resultImage (context, maxWidth, 1, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); 2096 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, resultImage.getImageView(), maxWidth, 1)); 2097 const VkViewport viewport = makeViewport(maxWidth, 1u); 2098 const VkRect2D scissor = makeRect2D(maxWidth, 1u); 2099 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth; 2100 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT); 2101 const VkImageSubresourceRange subresourceRange = 2102 { 2103 VK_IMAGE_ASPECT_COLOR_BIT, //VkImageAspectFlags aspectMask 2104 0u, //deUint32 baseMipLevel 2105 1u, //deUint32 levelCount 2106 0u, //deUint32 baseArrayLayer 2107 1u //deUint32 layerCount 2108 }; 2109 2110 const VkImageMemoryBarrier colorAttachmentBarrier = makeImageMemoryBarrier( 2111 (VkAccessFlags)0u, VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, 2112 VK_IMAGE_LAYOUT_UNDEFINED, VK_IMAGE_LAYOUT_COLOR_ATTACHMENT_OPTIMAL, 2113 resultImage.getImage(), subresourceRange); 2114 2115 for (deUint32 width = 1u; width < maxWidth; width++) 2116 { 2117 for (deUint32 ndx = stagesCount; ndx < stagesCount + extraDatasCount; ++ndx) 2118 { 2119 // re-init the data 2120 const Allocation& alloc = inputBuffers[ndx]->getAllocation(); 2121 initializeMemory(context, alloc, extraDatas[ndx - stagesCount]); 2122 } 2123 2124 totalIterations++; 2125 2126 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2127 2128 context.getDeviceInterface().cmdPipelineBarrier(*cmdBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COLOR_ATTACHMENT_OUTPUT_BIT, (VkDependencyFlags)0, 0u, (const VkMemoryBarrier*)DE_NULL, 0u, (const VkBufferMemoryBarrier*)DE_NULL, 1u, &colorAttachmentBarrier); 2129 2130 context.getDeviceInterface().cmdSetViewport(*cmdBuffer, 0, 1, &viewport); 2131 2132 context.getDeviceInterface().cmdSetScissor(*cmdBuffer, 0, 1, &scissor); 2133 2134 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f)); 2135 2136 context.getDeviceInterface().cmdBindPipeline(*cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); 2137 2138 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer, 2139 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, 2140 &descriptorSet.get(), 0u, DE_NULL); 2141 2142 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1, 0, 0); 2143 2144 endRenderPass(context.getDeviceInterface(), *cmdBuffer); 2145 2146 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(width, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 2147 2148 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2149 2150 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer)); 2151 waitFence(context, fence); 2152 2153 for (deUint32 ndx = 0u; ndx < stagesCount; ++ndx) 2154 { 2155 std::vector<const void*> datas; 2156 if (!inputBuffers[ndx]->isImage()) 2157 { 2158 const Allocation& resultAlloc = inputBuffers[ndx]->getAllocation(); 2159 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc); 2160 // we always have our result data first 2161 datas.push_back(resultAlloc.getHostPtr()); 2162 } 2163 2164 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index) 2165 { 2166 const deUint32 datasNdx = index - stagesCount; 2167 if ((stagesVector[ndx] & extraDatas[datasNdx].stages) && (!inputBuffers[index]->isImage())) 2168 { 2169 const Allocation& resultAlloc = inputBuffers[index]->getAllocation(); 2170 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc); 2171 // we always have our result data first 2172 datas.push_back(resultAlloc.getHostPtr()); 2173 } 2174 } 2175 2176 if (!checkResult(datas, (stagesVector[ndx] == VK_SHADER_STAGE_TESSELLATION_EVALUATION_BIT) ? width * 2 : width , subgroupSize)) 2177 failedIterations++; 2178 } 2179 if (shaderStageTested & VK_SHADER_STAGE_FRAGMENT_BIT) 2180 { 2181 std::vector<const void*> datas; 2182 const Allocation& resultAlloc = imageBufferResult.getAllocation(); 2183 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc); 2184 2185 // we always have our result data first 2186 datas.push_back(resultAlloc.getHostPtr()); 2187 2188 for (deUint32 index = stagesCount; index < stagesCount + extraDatasCount; ++index) 2189 { 2190 const deUint32 datasNdx = index - stagesCount; 2191 if (VK_SHADER_STAGE_FRAGMENT_BIT & extraDatas[datasNdx].stages && (!inputBuffers[index]->isImage())) 2192 { 2193 const Allocation& alloc = inputBuffers[index]->getAllocation(); 2194 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), alloc); 2195 // we always have our result data first 2196 datas.push_back(alloc.getHostPtr()); 2197 } 2198 } 2199 2200 if (!checkResult(datas, width , subgroupSize)) 2201 failedIterations++; 2202 } 2203 2204 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0); 2205 } 2206 2207 if (0 < failedIterations) 2208 { 2209 context.getTestContext().getLog() 2210 << TestLog::Message << (totalIterations - failedIterations) << " / " 2211 << totalIterations << " values passed" << TestLog::EndMessage; 2212 return tcu::TestStatus::fail("Failed!"); 2213 } 2214 } 2215 2216 return tcu::TestStatus::pass("OK"); 2217 } 2218 2219 tcu::TestStatus vkt::subgroups::makeVertexFrameBufferTest(Context& context, vk::VkFormat format, 2220 SSBOData* extraData, deUint32 extraDataCount, 2221 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, deUint32 subgroupSize)) 2222 { 2223 const deUint32 maxWidth = 1024u; 2224 vector<de::SharedPtr<BufferOrImage> > inputBuffers (extraDataCount); 2225 DescriptorSetLayoutBuilder layoutBuilder; 2226 const Unique<VkShaderModule> vertexShaderModule (createShaderModule 2227 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u)); 2228 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule 2229 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u)); 2230 const Unique<VkRenderPass> renderPass (makeRenderPass(context, format)); 2231 2232 const VkVertexInputBindingDescription vertexInputBinding = 2233 { 2234 0u, // binding; 2235 static_cast<deUint32>(sizeof(tcu::Vec4)), // stride; 2236 VK_VERTEX_INPUT_RATE_VERTEX // inputRate 2237 }; 2238 2239 const VkVertexInputAttributeDescription vertexInputAttribute = 2240 { 2241 0u, 2242 0u, 2243 VK_FORMAT_R32G32B32A32_SFLOAT, 2244 0u 2245 }; 2246 2247 for (deUint32 i = 0u; i < extraDataCount; i++) 2248 { 2249 if (extraData[i].isImage) 2250 { 2251 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, static_cast<deUint32>(extraData[i].numElements), 1u, extraData[i].format)); 2252 } 2253 else 2254 { 2255 vk::VkDeviceSize size = getElementSizeInBytes(extraData[i].format, extraData[i].layout) * extraData[i].numElements; 2256 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)); 2257 } 2258 const Allocation& alloc = inputBuffers[i]->getAllocation(); 2259 initializeMemory(context, alloc, extraData[i]); 2260 } 2261 2262 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++) 2263 layoutBuilder.addBinding(inputBuffers[ndx]->getType(), 1u, VK_SHADER_STAGE_VERTEX_BIT, DE_NULL); 2264 2265 const Unique<VkDescriptorSetLayout> descriptorSetLayout (layoutBuilder.build(context.getDeviceInterface(), context.getDevice())); 2266 2267 const Unique<VkPipelineLayout> pipelineLayout (makePipelineLayout(context, *descriptorSetLayout)); 2268 2269 const Unique<VkPipeline> pipeline (makeGraphicsPipeline(context, *pipelineLayout, 2270 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 2271 *vertexShaderModule, *fragmentShaderModule, 2272 DE_NULL, DE_NULL, DE_NULL, 2273 *renderPass, VK_PRIMITIVE_TOPOLOGY_POINT_LIST, 2274 &vertexInputBinding, &vertexInputAttribute, true, format)); 2275 DescriptorPoolBuilder poolBuilder; 2276 DescriptorSetUpdateBuilder updateBuilder; 2277 2278 2279 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++) 2280 poolBuilder.addType(inputBuffers[ndx]->getType()); 2281 2282 Move <VkDescriptorPool> descriptorPool; 2283 Move <VkDescriptorSet> descriptorSet; 2284 2285 if (extraDataCount > 0) 2286 { 2287 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(), 2288 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 2289 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout); 2290 } 2291 2292 for (deUint32 ndx = 0u; ndx < extraDataCount; ndx++) 2293 { 2294 const Allocation& alloc = inputBuffers[ndx]->getAllocation(); 2295 initializeMemory(context, alloc, extraData[ndx]); 2296 } 2297 2298 for (deUint32 buffersNdx = 0u; buffersNdx < inputBuffers.size(); buffersNdx++) 2299 { 2300 if (inputBuffers[buffersNdx]->isImage()) 2301 { 2302 VkDescriptorImageInfo info = 2303 makeDescriptorImageInfo(inputBuffers[buffersNdx]->getAsImage()->getSampler(), 2304 inputBuffers[buffersNdx]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL); 2305 2306 updateBuilder.writeSingle(*descriptorSet, 2307 DescriptorSetUpdateBuilder::Location::binding(buffersNdx), 2308 inputBuffers[buffersNdx]->getType(), &info); 2309 } 2310 else 2311 { 2312 VkDescriptorBufferInfo info = 2313 makeDescriptorBufferInfo(inputBuffers[buffersNdx]->getAsBuffer()->getBuffer(), 2314 0ull, inputBuffers[buffersNdx]->getAsBuffer()->getSize()); 2315 2316 updateBuilder.writeSingle(*descriptorSet, 2317 DescriptorSetUpdateBuilder::Location::binding(buffersNdx), 2318 inputBuffers[buffersNdx]->getType(), &info); 2319 } 2320 } 2321 updateBuilder.update(context.getDeviceInterface(), context.getDevice()); 2322 2323 const Unique<VkCommandPool> cmdPool (makeCommandPool(context)); 2324 2325 const deUint32 subgroupSize = getSubgroupSize(context); 2326 2327 const Unique<VkCommandBuffer> cmdBuffer (makeCommandBuffer(context, *cmdPool)); 2328 2329 const vk::VkDeviceSize vertexBufferSize = maxWidth * sizeof(tcu::Vec4); 2330 Buffer vertexBuffer (context, vertexBufferSize, VK_BUFFER_USAGE_VERTEX_BUFFER_BIT); 2331 2332 unsigned totalIterations = 0u; 2333 unsigned failedIterations = 0u; 2334 2335 Image discardableImage (context, maxWidth, 1u, format, VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_TRANSFER_SRC_BIT); 2336 2337 { 2338 const Allocation& alloc = vertexBuffer.getAllocation(); 2339 std::vector<tcu::Vec4> data (maxWidth, Vec4(1.0f, 1.0f, 1.0f, 1.0f)); 2340 const float pixelSize = 2.0f / static_cast<float>(maxWidth); 2341 float leftHandPosition = -1.0f; 2342 2343 for(deUint32 ndx = 0u; ndx < maxWidth; ++ndx) 2344 { 2345 data[ndx][0] = leftHandPosition + pixelSize / 2.0f; 2346 leftHandPosition += pixelSize; 2347 } 2348 2349 deMemcpy(alloc.getHostPtr(), &data[0], maxWidth * sizeof(tcu::Vec4)); 2350 flushAlloc(context.getDeviceInterface(), context.getDevice(), alloc); 2351 } 2352 2353 for (deUint32 width = 1u; width < maxWidth; width++) 2354 { 2355 totalIterations++; 2356 const Unique<VkFramebuffer> framebuffer (makeFramebuffer(context, *renderPass, discardableImage.getImageView(), maxWidth, 1)); 2357 const VkViewport viewport = makeViewport(maxWidth, 1u); 2358 const VkRect2D scissor = makeRect2D(maxWidth, 1u); 2359 const vk::VkDeviceSize imageResultSize = tcu::getPixelSize(vk::mapVkFormat(format)) * maxWidth; 2360 Buffer imageBufferResult (context, imageResultSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT); 2361 const VkDeviceSize vertexBufferOffset = 0u; 2362 2363 for (deUint32 ndx = 0u; ndx < inputBuffers.size(); ndx++) 2364 { 2365 const Allocation& alloc = inputBuffers[ndx]->getAllocation(); 2366 initializeMemory(context, alloc, extraData[ndx]); 2367 } 2368 2369 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2370 { 2371 context.getDeviceInterface().cmdSetViewport( 2372 *cmdBuffer, 0, 1, &viewport); 2373 2374 context.getDeviceInterface().cmdSetScissor( 2375 *cmdBuffer, 0, 1, &scissor); 2376 2377 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, maxWidth, 1u), tcu::Vec4(0.0f)); 2378 2379 context.getDeviceInterface().cmdBindPipeline( 2380 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); 2381 2382 if (extraDataCount > 0) 2383 { 2384 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer, 2385 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, 2386 &descriptorSet.get(), 0u, DE_NULL); 2387 } 2388 2389 context.getDeviceInterface().cmdBindVertexBuffers(*cmdBuffer, 0u, 1u, vertexBuffer.getBufferPtr(), &vertexBufferOffset); 2390 2391 context.getDeviceInterface().cmdDraw(*cmdBuffer, width, 1u, 0u, 0u); 2392 2393 endRenderPass(context.getDeviceInterface(), *cmdBuffer); 2394 2395 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, discardableImage.getImage(), imageBufferResult.getBuffer(), tcu::IVec2(maxWidth, 1), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 2396 2397 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2398 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer)); 2399 waitFence(context, fence); 2400 } 2401 2402 { 2403 const Allocation& allocResult = imageBufferResult.getAllocation(); 2404 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), allocResult); 2405 2406 std::vector<const void*> datas; 2407 datas.push_back(allocResult.getHostPtr()); 2408 if (!checkResult(datas, width, subgroupSize)) 2409 failedIterations++; 2410 } 2411 } 2412 2413 if (0 < failedIterations) 2414 { 2415 context.getTestContext().getLog() 2416 << TestLog::Message << (totalIterations - failedIterations) << " / " 2417 << totalIterations << " values passed" << TestLog::EndMessage; 2418 return tcu::TestStatus::fail("Failed!"); 2419 } 2420 2421 return tcu::TestStatus::pass("OK"); 2422 } 2423 2424 2425 tcu::TestStatus vkt::subgroups::makeFragmentFrameBufferTest (Context& context, VkFormat format, SSBOData* extraDatas, 2426 deUint32 extraDatasCount, 2427 bool (*checkResult)(std::vector<const void*> datas, deUint32 width, 2428 deUint32 height, deUint32 subgroupSize)) 2429 { 2430 const Unique<VkShaderModule> vertexShaderModule (createShaderModule 2431 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("vert"), 0u)); 2432 const Unique<VkShaderModule> fragmentShaderModule (createShaderModule 2433 (context.getDeviceInterface(), context.getDevice(), context.getBinaryCollection().get("fragment"), 0u)); 2434 2435 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(extraDatasCount); 2436 2437 for (deUint32 i = 0; i < extraDatasCount; i++) 2438 { 2439 if (extraDatas[i].isImage) 2440 { 2441 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, 2442 static_cast<deUint32>(extraDatas[i].numElements), 1, extraDatas[i].format)); 2443 } 2444 else 2445 { 2446 vk::VkDeviceSize size = 2447 getElementSizeInBytes(extraDatas[i].format, extraDatas[i].layout) * extraDatas[i].numElements; 2448 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size, VK_BUFFER_USAGE_UNIFORM_BUFFER_BIT)); 2449 } 2450 2451 const Allocation& alloc = inputBuffers[i]->getAllocation(); 2452 initializeMemory(context, alloc, extraDatas[i]); 2453 } 2454 2455 DescriptorSetLayoutBuilder layoutBuilder; 2456 2457 for (deUint32 i = 0; i < extraDatasCount; i++) 2458 { 2459 layoutBuilder.addBinding(inputBuffers[i]->getType(), 1, 2460 VK_SHADER_STAGE_FRAGMENT_BIT, DE_NULL); 2461 } 2462 2463 const Unique<VkDescriptorSetLayout> descriptorSetLayout( 2464 layoutBuilder.build(context.getDeviceInterface(), context.getDevice())); 2465 2466 const Unique<VkPipelineLayout> pipelineLayout( 2467 makePipelineLayout(context, *descriptorSetLayout)); 2468 2469 const Unique<VkRenderPass> renderPass(makeRenderPass(context, format)); 2470 const Unique<VkPipeline> pipeline(makeGraphicsPipeline(context, *pipelineLayout, 2471 VK_SHADER_STAGE_VERTEX_BIT | VK_SHADER_STAGE_FRAGMENT_BIT, 2472 *vertexShaderModule, *fragmentShaderModule, DE_NULL, DE_NULL, DE_NULL, *renderPass, VK_PRIMITIVE_TOPOLOGY_TRIANGLE_STRIP, 2473 DE_NULL, DE_NULL, true)); 2474 2475 DescriptorPoolBuilder poolBuilder; 2476 2477 // To stop validation complaining, always add at least one type to pool. 2478 poolBuilder.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER); 2479 for (deUint32 i = 0; i < extraDatasCount; i++) 2480 { 2481 poolBuilder.addType(inputBuffers[i]->getType()); 2482 } 2483 2484 Move<VkDescriptorPool> descriptorPool; 2485 // Create descriptor set 2486 Move<VkDescriptorSet> descriptorSet; 2487 2488 if (extraDatasCount > 0) 2489 { 2490 descriptorPool = poolBuilder.build(context.getDeviceInterface(), context.getDevice(), 2491 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u); 2492 2493 descriptorSet = makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout); 2494 } 2495 2496 DescriptorSetUpdateBuilder updateBuilder; 2497 2498 for (deUint32 i = 0; i < extraDatasCount; i++) 2499 { 2500 if (inputBuffers[i]->isImage()) 2501 { 2502 VkDescriptorImageInfo info = 2503 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), 2504 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL); 2505 2506 updateBuilder.writeSingle(*descriptorSet, 2507 DescriptorSetUpdateBuilder::Location::binding(i), 2508 inputBuffers[i]->getType(), &info); 2509 } 2510 else 2511 { 2512 VkDescriptorBufferInfo info = 2513 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 2514 0ull, inputBuffers[i]->getAsBuffer()->getSize()); 2515 2516 updateBuilder.writeSingle(*descriptorSet, 2517 DescriptorSetUpdateBuilder::Location::binding(i), 2518 inputBuffers[i]->getType(), &info); 2519 } 2520 } 2521 2522 if (extraDatasCount > 0) 2523 updateBuilder.update(context.getDeviceInterface(), context.getDevice()); 2524 2525 const Unique<VkCommandPool> cmdPool(makeCommandPool(context)); 2526 2527 const deUint32 subgroupSize = getSubgroupSize(context); 2528 2529 const Unique<VkCommandBuffer> cmdBuffer( 2530 makeCommandBuffer(context, *cmdPool)); 2531 2532 unsigned totalIterations = 0; 2533 unsigned failedIterations = 0; 2534 2535 for (deUint32 width = 8; width <= subgroupSize; width *= 2) 2536 { 2537 for (deUint32 height = 8; height <= subgroupSize; height *= 2) 2538 { 2539 totalIterations++; 2540 2541 // re-init the data 2542 for (deUint32 i = 0; i < extraDatasCount; i++) 2543 { 2544 const Allocation& alloc = inputBuffers[i]->getAllocation(); 2545 initializeMemory(context, alloc, extraDatas[i]); 2546 } 2547 2548 VkDeviceSize formatSize = getFormatSizeInBytes(format); 2549 const VkDeviceSize resultImageSizeInBytes = 2550 width * height * formatSize; 2551 2552 Image resultImage(context, width, height, format, 2553 VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | 2554 VK_IMAGE_USAGE_TRANSFER_SRC_BIT); 2555 2556 Buffer resultBuffer(context, resultImageSizeInBytes, 2557 VK_IMAGE_USAGE_TRANSFER_DST_BIT); 2558 2559 const Unique<VkFramebuffer> framebuffer(makeFramebuffer(context, 2560 *renderPass, resultImage.getImageView(), width, height)); 2561 2562 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2563 2564 VkViewport viewport = makeViewport(width, height); 2565 2566 context.getDeviceInterface().cmdSetViewport( 2567 *cmdBuffer, 0, 1, &viewport); 2568 2569 VkRect2D scissor = {{0, 0}, {width, height}}; 2570 2571 context.getDeviceInterface().cmdSetScissor( 2572 *cmdBuffer, 0, 1, &scissor); 2573 2574 beginRenderPass(context.getDeviceInterface(), *cmdBuffer, *renderPass, *framebuffer, makeRect2D(0, 0, width, height), tcu::Vec4(0.0f)); 2575 2576 context.getDeviceInterface().cmdBindPipeline( 2577 *cmdBuffer, VK_PIPELINE_BIND_POINT_GRAPHICS, *pipeline); 2578 2579 if (extraDatasCount > 0) 2580 { 2581 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer, 2582 VK_PIPELINE_BIND_POINT_GRAPHICS, *pipelineLayout, 0u, 1u, 2583 &descriptorSet.get(), 0u, DE_NULL); 2584 } 2585 2586 context.getDeviceInterface().cmdDraw(*cmdBuffer, 4, 1, 0, 0); 2587 2588 endRenderPass(context.getDeviceInterface(), *cmdBuffer); 2589 2590 copyImageToBuffer(context.getDeviceInterface(), *cmdBuffer, resultImage.getImage(), resultBuffer.getBuffer(), tcu::IVec2(width, height), VK_ACCESS_COLOR_ATTACHMENT_WRITE_BIT, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL); 2591 2592 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2593 2594 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer)); 2595 2596 waitFence(context, fence); 2597 2598 std::vector<const void*> datas; 2599 { 2600 const Allocation& resultAlloc = resultBuffer.getAllocation(); 2601 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc); 2602 2603 // we always have our result data first 2604 datas.push_back(resultAlloc.getHostPtr()); 2605 } 2606 2607 if (!checkResult(datas, width, height, subgroupSize)) 2608 { 2609 failedIterations++; 2610 } 2611 2612 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0); 2613 } 2614 } 2615 2616 if (0 < failedIterations) 2617 { 2618 context.getTestContext().getLog() 2619 << TestLog::Message << (totalIterations - failedIterations) << " / " 2620 << totalIterations << " values passed" << TestLog::EndMessage; 2621 return tcu::TestStatus::fail("Failed!"); 2622 } 2623 2624 return tcu::TestStatus::pass("OK"); 2625 } 2626 2627 tcu::TestStatus vkt::subgroups::makeComputeTest( 2628 Context& context, VkFormat format, SSBOData* inputs, deUint32 inputsCount, 2629 bool (*checkResult)(std::vector<const void*> datas, 2630 const deUint32 numWorkgroups[3], const deUint32 localSize[3], 2631 deUint32 subgroupSize)) 2632 { 2633 VkDeviceSize elementSize = getFormatSizeInBytes(format); 2634 2635 const VkDeviceSize resultBufferSize = maxSupportedSubgroupSize() * 2636 maxSupportedSubgroupSize() * 2637 maxSupportedSubgroupSize(); 2638 const VkDeviceSize resultBufferSizeInBytes = resultBufferSize * elementSize; 2639 2640 Buffer resultBuffer( 2641 context, resultBufferSizeInBytes); 2642 2643 std::vector< de::SharedPtr<BufferOrImage> > inputBuffers(inputsCount); 2644 2645 for (deUint32 i = 0; i < inputsCount; i++) 2646 { 2647 if (inputs[i].isImage) 2648 { 2649 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Image(context, 2650 static_cast<deUint32>(inputs[i].numElements), 1, inputs[i].format)); 2651 } 2652 else 2653 { 2654 vk::VkDeviceSize size = 2655 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements; 2656 inputBuffers[i] = de::SharedPtr<BufferOrImage>(new Buffer(context, size)); 2657 } 2658 2659 const Allocation& alloc = inputBuffers[i]->getAllocation(); 2660 initializeMemory(context, alloc, inputs[i]); 2661 } 2662 2663 DescriptorSetLayoutBuilder layoutBuilder; 2664 layoutBuilder.addBinding( 2665 resultBuffer.getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL); 2666 2667 for (deUint32 i = 0; i < inputsCount; i++) 2668 { 2669 layoutBuilder.addBinding( 2670 inputBuffers[i]->getType(), 1, VK_SHADER_STAGE_COMPUTE_BIT, DE_NULL); 2671 } 2672 2673 const Unique<VkDescriptorSetLayout> descriptorSetLayout( 2674 layoutBuilder.build(context.getDeviceInterface(), context.getDevice())); 2675 2676 const Unique<VkShaderModule> shaderModule( 2677 createShaderModule(context.getDeviceInterface(), context.getDevice(), 2678 context.getBinaryCollection().get("comp"), 0u)); 2679 const Unique<VkPipelineLayout> pipelineLayout( 2680 makePipelineLayout(context, *descriptorSetLayout)); 2681 2682 DescriptorPoolBuilder poolBuilder; 2683 2684 poolBuilder.addType(resultBuffer.getType()); 2685 2686 for (deUint32 i = 0; i < inputsCount; i++) 2687 { 2688 poolBuilder.addType(inputBuffers[i]->getType()); 2689 } 2690 2691 const Unique<VkDescriptorPool> descriptorPool( 2692 poolBuilder.build(context.getDeviceInterface(), context.getDevice(), 2693 VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u)); 2694 2695 // Create descriptor set 2696 const Unique<VkDescriptorSet> descriptorSet( 2697 makeDescriptorSet(context, *descriptorPool, *descriptorSetLayout)); 2698 2699 DescriptorSetUpdateBuilder updateBuilder; 2700 2701 const VkDescriptorBufferInfo resultDescriptorInfo = 2702 makeDescriptorBufferInfo( 2703 resultBuffer.getBuffer(), 0ull, resultBufferSizeInBytes); 2704 2705 updateBuilder.writeSingle(*descriptorSet, 2706 DescriptorSetUpdateBuilder::Location::binding(0u), 2707 VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &resultDescriptorInfo); 2708 2709 for (deUint32 i = 0; i < inputsCount; i++) 2710 { 2711 if (inputBuffers[i]->isImage()) 2712 { 2713 VkDescriptorImageInfo info = 2714 makeDescriptorImageInfo(inputBuffers[i]->getAsImage()->getSampler(), 2715 inputBuffers[i]->getAsImage()->getImageView(), VK_IMAGE_LAYOUT_GENERAL); 2716 2717 updateBuilder.writeSingle(*descriptorSet, 2718 DescriptorSetUpdateBuilder::Location::binding(i + 1), 2719 inputBuffers[i]->getType(), &info); 2720 } 2721 else 2722 { 2723 vk::VkDeviceSize size = 2724 getElementSizeInBytes(inputs[i].format, inputs[i].layout) * inputs[i].numElements; 2725 VkDescriptorBufferInfo info = 2726 makeDescriptorBufferInfo(inputBuffers[i]->getAsBuffer()->getBuffer(), 0ull, size); 2727 2728 updateBuilder.writeSingle(*descriptorSet, 2729 DescriptorSetUpdateBuilder::Location::binding(i + 1), 2730 inputBuffers[i]->getType(), &info); 2731 } 2732 } 2733 2734 updateBuilder.update(context.getDeviceInterface(), context.getDevice()); 2735 2736 const Unique<VkCommandPool> cmdPool(makeCommandPool(context)); 2737 2738 unsigned totalIterations = 0; 2739 unsigned failedIterations = 0; 2740 2741 const deUint32 subgroupSize = getSubgroupSize(context); 2742 2743 const Unique<VkCommandBuffer> cmdBuffer( 2744 makeCommandBuffer(context, *cmdPool)); 2745 2746 const deUint32 numWorkgroups[3] = {4, 2, 2}; 2747 2748 const deUint32 localSizesToTestCount = 15; 2749 deUint32 localSizesToTest[localSizesToTestCount][3] = 2750 { 2751 {1, 1, 1}, 2752 {32, 4, 1}, 2753 {32, 1, 4}, 2754 {1, 32, 4}, 2755 {1, 4, 32}, 2756 {4, 1, 32}, 2757 {4, 32, 1}, 2758 {subgroupSize, 1, 1}, 2759 {1, subgroupSize, 1}, 2760 {1, 1, subgroupSize}, 2761 {3, 5, 7}, 2762 {128, 1, 1}, 2763 {1, 128, 1}, 2764 {1, 1, 64}, 2765 {1, 1, 1} // Isn't used, just here to make double buffering checks easier 2766 }; 2767 2768 Move<VkPipeline> lastPipeline( 2769 makeComputePipeline(context, *pipelineLayout, *shaderModule, 2770 localSizesToTest[0][0], localSizesToTest[0][1], localSizesToTest[0][2])); 2771 2772 for (deUint32 index = 0; index < (localSizesToTestCount - 1); index++) 2773 { 2774 const deUint32 nextX = localSizesToTest[index + 1][0]; 2775 const deUint32 nextY = localSizesToTest[index + 1][1]; 2776 const deUint32 nextZ = localSizesToTest[index + 1][2]; 2777 2778 // we are running one test 2779 totalIterations++; 2780 2781 beginCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2782 2783 context.getDeviceInterface().cmdBindPipeline( 2784 *cmdBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *lastPipeline); 2785 2786 context.getDeviceInterface().cmdBindDescriptorSets(*cmdBuffer, 2787 VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, 2788 &descriptorSet.get(), 0u, DE_NULL); 2789 2790 context.getDeviceInterface().cmdDispatch(*cmdBuffer, 2791 numWorkgroups[0], numWorkgroups[1], numWorkgroups[2]); 2792 2793 endCommandBuffer(context.getDeviceInterface(), *cmdBuffer); 2794 2795 Move<VkFence> fence(submitCommandBuffer(context, *cmdBuffer)); 2796 2797 Move<VkPipeline> nextPipeline( 2798 makeComputePipeline(context, *pipelineLayout, *shaderModule, 2799 nextX, nextY, nextZ)); 2800 2801 waitFence(context, fence); 2802 2803 std::vector<const void*> datas; 2804 2805 { 2806 const Allocation& resultAlloc = resultBuffer.getAllocation(); 2807 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc); 2808 2809 // we always have our result data first 2810 datas.push_back(resultAlloc.getHostPtr()); 2811 } 2812 2813 for (deUint32 i = 0; i < inputsCount; i++) 2814 { 2815 if (!inputBuffers[i]->isImage()) 2816 { 2817 const Allocation& resultAlloc = inputBuffers[i]->getAllocation(); 2818 invalidateAlloc(context.getDeviceInterface(), context.getDevice(), resultAlloc); 2819 2820 // we always have our result data first 2821 datas.push_back(resultAlloc.getHostPtr()); 2822 } 2823 } 2824 2825 if (!checkResult(datas, numWorkgroups, localSizesToTest[index], subgroupSize)) 2826 { 2827 failedIterations++; 2828 } 2829 2830 context.getDeviceInterface().resetCommandBuffer(*cmdBuffer, 0); 2831 2832 lastPipeline = nextPipeline; 2833 } 2834 2835 if (0 < failedIterations) 2836 { 2837 context.getTestContext().getLog() 2838 << TestLog::Message << (totalIterations - failedIterations) << " / " 2839 << totalIterations << " values passed" << TestLog::EndMessage; 2840 return tcu::TestStatus::fail("Failed!"); 2841 } 2842 2843 return tcu::TestStatus::pass("OK"); 2844 } 2845