1 /*------------------------------------------------------------------------ 2 * Vulkan Conformance Tests 3 * ------------------------ 4 * 5 * Copyright (c) 2016 The Khronos Group Inc. 6 * 7 * Licensed under the Apache License, Version 2.0 (the "License"); 8 * you may not use this file except in compliance with the License. 9 * You may obtain a copy of the License at 10 * 11 * http://www.apache.org/licenses/LICENSE-2.0 12 * 13 * Unless required by applicable law or agreed to in writing, software 14 * distributed under the License is distributed on an "AS IS" BASIS, 15 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 16 * See the License for the specific language governing permissions and 17 * limitations under the License. 18 * 19 *//*! 20 * \file vktSparseResourcesBufferSparseResidency.cpp 21 * \brief Sparse partially resident buffers tests 22 *//*--------------------------------------------------------------------*/ 23 24 #include "vktSparseResourcesBufferSparseResidency.hpp" 25 #include "vktSparseResourcesTestsUtil.hpp" 26 #include "vktSparseResourcesBase.hpp" 27 #include "vktTestCaseUtil.hpp" 28 29 #include "vkDefs.hpp" 30 #include "vkRef.hpp" 31 #include "vkRefUtil.hpp" 32 #include "vkPlatform.hpp" 33 #include "vkPrograms.hpp" 34 #include "vkRefUtil.hpp" 35 #include "vkMemUtil.hpp" 36 #include "vkQueryUtil.hpp" 37 #include "vkBuilderUtil.hpp" 38 #include "vkTypeUtil.hpp" 39 40 #include "deStringUtil.hpp" 41 #include "deUniquePtr.hpp" 42 43 #include <string> 44 #include <vector> 45 46 using namespace vk; 47 48 namespace vkt 49 { 50 namespace sparse 51 { 52 namespace 53 { 54 55 enum ShaderParameters 56 { 57 SIZE_OF_UINT_IN_SHADER = 4u, 58 }; 59 60 class BufferSparseResidencyCase : public TestCase 61 { 62 public: 63 BufferSparseResidencyCase (tcu::TestContext& testCtx, 64 const std::string& name, 65 const std::string& description, 66 const deUint32 bufferSize, 67 const glu::GLSLVersion glslVersion); 68 69 void initPrograms (SourceCollections& sourceCollections) const; 70 TestInstance* createInstance (Context& context) const; 71 72 private: 73 const deUint32 m_bufferSize; 74 const glu::GLSLVersion m_glslVersion; 75 }; 76 77 BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext& testCtx, 78 const std::string& name, 79 const std::string& description, 80 const deUint32 bufferSize, 81 const glu::GLSLVersion glslVersion) 82 : TestCase (testCtx, name, description) 83 , m_bufferSize (bufferSize) 84 , m_glslVersion (glslVersion) 85 { 86 } 87 88 void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const 89 { 90 const char* const versionDecl = glu::getGLSLVersionDeclaration(m_glslVersion); 91 const deUint32 iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER; 92 93 std::ostringstream src; 94 95 src << versionDecl << "\n" 96 << "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n" 97 << "layout(set = 0, binding = 0, std430) readonly buffer Input\n" 98 << "{\n" 99 << " uint data[];\n" 100 << "} sb_in;\n" 101 << "\n" 102 << "layout(set = 0, binding = 1, std430) writeonly buffer Output\n" 103 << "{\n" 104 << " uint result[];\n" 105 << "} sb_out;\n" 106 << "\n" 107 << "void main (void)\n" 108 << "{\n" 109 << " for(int i=0; i<" << iterationsCount << "; ++i) \n" 110 << " {\n" 111 << " sb_out.result[i] = sb_in.data[i];" 112 << " }\n" 113 << "}\n"; 114 115 sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str()); 116 } 117 118 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance 119 { 120 public: 121 BufferSparseResidencyInstance (Context& context, 122 const deUint32 bufferSize); 123 124 tcu::TestStatus iterate (void); 125 126 private: 127 const deUint32 m_bufferSize; 128 }; 129 130 BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context& context, 131 const deUint32 bufferSize) 132 : SparseResourcesBaseInstance (context) 133 , m_bufferSize (bufferSize) 134 { 135 } 136 137 tcu::TestStatus BufferSparseResidencyInstance::iterate (void) 138 { 139 const InstanceInterface& instance = m_context.getInstanceInterface(); 140 const VkPhysicalDevice physicalDevice = m_context.getPhysicalDevice(); 141 const VkPhysicalDeviceProperties physicalDeviceProperties = getPhysicalDeviceProperties(instance, physicalDevice); 142 143 if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer) 144 TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported"); 145 146 { 147 // Create logical device supporting both sparse and compute operations 148 QueueRequirementsVec queueRequirements; 149 queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u)); 150 queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u)); 151 152 createDeviceSupportingQueues(queueRequirements); 153 } 154 155 const DeviceInterface& deviceInterface = getDeviceInterface(); 156 const Queue& sparseQueue = getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0); 157 const Queue& computeQueue = getQueue(VK_QUEUE_COMPUTE_BIT, 0); 158 159 VkBufferCreateInfo bufferCreateInfo = 160 { 161 VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO, // VkStructureType sType; 162 DE_NULL, // const void* pNext; 163 VK_BUFFER_CREATE_SPARSE_BINDING_BIT | 164 VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT, // VkBufferCreateFlags flags; 165 m_bufferSize, // VkDeviceSize size; 166 VK_BUFFER_USAGE_STORAGE_BUFFER_BIT | 167 VK_BUFFER_USAGE_TRANSFER_SRC_BIT, // VkBufferUsageFlags usage; 168 VK_SHARING_MODE_EXCLUSIVE, // VkSharingMode sharingMode; 169 0u, // deUint32 queueFamilyIndexCount; 170 DE_NULL // const deUint32* pQueueFamilyIndices; 171 }; 172 173 const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex }; 174 175 if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex) 176 { 177 bufferCreateInfo.sharingMode = VK_SHARING_MODE_CONCURRENT; 178 bufferCreateInfo.queueFamilyIndexCount = 2u; 179 bufferCreateInfo.pQueueFamilyIndices = queueFamilyIndices; 180 } 181 182 // Create sparse buffer 183 const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo)); 184 185 // Create sparse buffer memory bind semaphore 186 const Unique<VkSemaphore> bufferMemoryBindSemaphore(makeSemaphore(deviceInterface, getDevice())); 187 188 const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer); 189 190 if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize) 191 TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits"); 192 193 DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0); 194 195 const deUint32 numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment); 196 std::vector<DeviceMemorySp> deviceMemUniquePtrVec; 197 198 { 199 std::vector<VkSparseMemoryBind> sparseMemoryBinds; 200 const deUint32 memoryType = findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any); 201 202 if (memoryType == NO_MATCH_FOUND) 203 return tcu::TestStatus::fail("No matching memory type found"); 204 205 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2) 206 { 207 const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType, bufferMemRequirements.alignment * sparseBindNdx); 208 209 deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL)))); 210 211 sparseMemoryBinds.push_back(sparseMemoryBind); 212 } 213 214 const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(*sparseBuffer, static_cast<deUint32>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]); 215 216 const VkBindSparseInfo bindSparseInfo = 217 { 218 VK_STRUCTURE_TYPE_BIND_SPARSE_INFO, //VkStructureType sType; 219 DE_NULL, //const void* pNext; 220 0u, //deUint32 waitSemaphoreCount; 221 DE_NULL, //const VkSemaphore* pWaitSemaphores; 222 1u, //deUint32 bufferBindCount; 223 &sparseBufferBindInfo, //const VkSparseBufferMemoryBindInfo* pBufferBinds; 224 0u, //deUint32 imageOpaqueBindCount; 225 DE_NULL, //const VkSparseImageOpaqueMemoryBindInfo* pImageOpaqueBinds; 226 0u, //deUint32 imageBindCount; 227 DE_NULL, //const VkSparseImageMemoryBindInfo* pImageBinds; 228 1u, //deUint32 signalSemaphoreCount; 229 &bufferMemoryBindSemaphore.get() //const VkSemaphore* pSignalSemaphores; 230 }; 231 232 VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL)); 233 } 234 235 // Create input buffer 236 const VkBufferCreateInfo inputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT); 237 const Unique<VkBuffer> inputBuffer (createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo)); 238 const de::UniquePtr<Allocation> inputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible)); 239 240 241 std::vector<deUint8> referenceData; 242 referenceData.resize(m_bufferSize); 243 244 for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx) 245 { 246 referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u); 247 } 248 249 deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize); 250 251 flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), m_bufferSize); 252 253 // Create output buffer 254 const VkBufferCreateInfo outputBufferCreateInfo = makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT); 255 const Unique<VkBuffer> outputBuffer (createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo)); 256 const de::UniquePtr<Allocation> outputBufferAlloc (bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible)); 257 258 // Create command buffer for compute and data transfer oparations 259 const Unique<VkCommandPool> commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex)); 260 const Unique<VkCommandBuffer> commandBuffer(makeCommandBuffer(deviceInterface, getDevice(), *commandPool)); 261 262 // Start recording compute and transfer commands 263 beginCommandBuffer(deviceInterface, *commandBuffer); 264 265 // Create descriptor set 266 const Unique<VkDescriptorSetLayout> descriptorSetLayout( 267 DescriptorSetLayoutBuilder() 268 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 269 .addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT) 270 .build(deviceInterface, getDevice())); 271 272 // Create compute pipeline 273 const Unique<VkShaderModule> shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL)); 274 const Unique<VkPipelineLayout> pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout)); 275 const Unique<VkPipeline> computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule)); 276 277 deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline); 278 279 const Unique<VkDescriptorPool> descriptorPool( 280 DescriptorPoolBuilder() 281 .addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u) 282 .build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u)); 283 284 const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout)); 285 286 { 287 const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize); 288 const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize); 289 290 DescriptorSetUpdateBuilder() 291 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo) 292 .writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo) 293 .update(deviceInterface, getDevice()); 294 } 295 296 deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL); 297 298 { 299 const VkBufferMemoryBarrier inputBufferBarrier 300 = makeBufferMemoryBarrier( VK_ACCESS_HOST_WRITE_BIT, 301 VK_ACCESS_SHADER_READ_BIT, 302 *inputBuffer, 303 0ull, 304 m_bufferSize); 305 306 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL); 307 } 308 309 deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u); 310 311 { 312 const VkBufferMemoryBarrier sparseBufferBarrier 313 = makeBufferMemoryBarrier( VK_ACCESS_SHADER_WRITE_BIT, 314 VK_ACCESS_TRANSFER_READ_BIT, 315 *sparseBuffer, 316 0ull, 317 m_bufferSize); 318 319 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL); 320 } 321 322 { 323 const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize); 324 325 deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy); 326 } 327 328 { 329 const VkBufferMemoryBarrier outputBufferBarrier 330 = makeBufferMemoryBarrier( VK_ACCESS_TRANSFER_WRITE_BIT, 331 VK_ACCESS_HOST_READ_BIT, 332 *outputBuffer, 333 0ull, 334 m_bufferSize); 335 336 deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL); 337 } 338 339 // End recording compute and transfer commands 340 endCommandBuffer(deviceInterface, *commandBuffer); 341 342 const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT }; 343 344 // Submit transfer commands for execution and wait for completion 345 submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), waitStageBits); 346 347 // Retrieve data from output buffer to host memory 348 invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSize); 349 350 const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr()); 351 352 // Wait for sparse queue to become idle 353 deviceInterface.queueWaitIdle(sparseQueue.queueHandle); 354 355 // Compare output data with reference data 356 for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx) 357 { 358 const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment); 359 const deUint32 offset = alignment * sparseBindNdx; 360 const deUint32 size = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment; 361 362 if (sparseBindNdx % 2u == 0u) 363 { 364 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0) 365 return tcu::TestStatus::fail("Failed"); 366 } 367 else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict) 368 { 369 deMemset(&referenceData[offset], 0u, size); 370 371 if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0) 372 return tcu::TestStatus::fail("Failed"); 373 } 374 } 375 376 return tcu::TestStatus::pass("Passed"); 377 } 378 379 TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const 380 { 381 return new BufferSparseResidencyInstance(context, m_bufferSize); 382 } 383 384 } // anonymous ns 385 386 void addBufferSparseResidencyTests(tcu::TestCaseGroup* group) 387 { 388 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440)); 389 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440)); 390 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440)); 391 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440)); 392 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440)); 393 group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440)); 394 } 395 396 } // sparse 397 } // vkt 398