Home | History | Annotate | Download | only in sparse_resources
      1 /*------------------------------------------------------------------------
      2  * Vulkan Conformance Tests
      3  * ------------------------
      4  *
      5  * Copyright (c) 2016 The Khronos Group Inc.
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file  vktSparseResourcesBufferSparseResidency.cpp
     21  * \brief Sparse partially resident buffers tests
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "vktSparseResourcesBufferSparseResidency.hpp"
     25 #include "vktSparseResourcesTestsUtil.hpp"
     26 #include "vktSparseResourcesBase.hpp"
     27 #include "vktTestCaseUtil.hpp"
     28 
     29 #include "vkDefs.hpp"
     30 #include "vkRef.hpp"
     31 #include "vkRefUtil.hpp"
     32 #include "vkPlatform.hpp"
     33 #include "vkPrograms.hpp"
     34 #include "vkRefUtil.hpp"
     35 #include "vkMemUtil.hpp"
     36 #include "vkQueryUtil.hpp"
     37 #include "vkBuilderUtil.hpp"
     38 #include "vkTypeUtil.hpp"
     39 
     40 #include "deStringUtil.hpp"
     41 #include "deUniquePtr.hpp"
     42 
     43 #include <string>
     44 #include <vector>
     45 
     46 using namespace vk;
     47 
     48 namespace vkt
     49 {
     50 namespace sparse
     51 {
     52 namespace
     53 {
     54 
     55 enum ShaderParameters
     56 {
     57 	SIZE_OF_UINT_IN_SHADER = 4u,
     58 };
     59 
     60 class BufferSparseResidencyCase : public TestCase
     61 {
     62 public:
     63 					BufferSparseResidencyCase	(tcu::TestContext&		testCtx,
     64 												 const std::string&		name,
     65 												 const std::string&		description,
     66 												 const deUint32			bufferSize,
     67 												 const glu::GLSLVersion	glslVersion);
     68 
     69 	void			initPrograms				(SourceCollections&		sourceCollections) const;
     70 	TestInstance*	createInstance				(Context&				context) const;
     71 
     72 private:
     73 	const deUint32			m_bufferSize;
     74 	const glu::GLSLVersion	m_glslVersion;
     75 };
     76 
     77 BufferSparseResidencyCase::BufferSparseResidencyCase (tcu::TestContext&			testCtx,
     78 													  const std::string&		name,
     79 													  const std::string&		description,
     80 													  const deUint32			bufferSize,
     81 													  const glu::GLSLVersion	glslVersion)
     82 	: TestCase			(testCtx, name, description)
     83 	, m_bufferSize		(bufferSize)
     84 	, m_glslVersion		(glslVersion)
     85 {
     86 }
     87 
     88 void BufferSparseResidencyCase::initPrograms (SourceCollections& sourceCollections) const
     89 {
     90 	const char* const	versionDecl		= glu::getGLSLVersionDeclaration(m_glslVersion);
     91 	const deUint32		iterationsCount = m_bufferSize / SIZE_OF_UINT_IN_SHADER;
     92 
     93 	std::ostringstream src;
     94 
     95 	src << versionDecl << "\n"
     96 		<< "layout (local_size_x = 1, local_size_y = 1, local_size_z = 1) in;\n"
     97 		<< "layout(set = 0, binding = 0, std430) readonly buffer Input\n"
     98 		<< "{\n"
     99 		<< "	uint data[];\n"
    100 		<< "} sb_in;\n"
    101 		<< "\n"
    102 		<< "layout(set = 0, binding = 1, std430) writeonly buffer Output\n"
    103 		<< "{\n"
    104 		<< "	uint result[];\n"
    105 		<< "} sb_out;\n"
    106 		<< "\n"
    107 		<< "void main (void)\n"
    108 		<< "{\n"
    109 		<< "	for(int i=0; i<" << iterationsCount << "; ++i) \n"
    110 		<< "	{\n"
    111 		<< "		sb_out.result[i] = sb_in.data[i];"
    112 		<< "	}\n"
    113 		<< "}\n";
    114 
    115 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
    116 }
    117 
    118 class BufferSparseResidencyInstance : public SparseResourcesBaseInstance
    119 {
    120 public:
    121 					BufferSparseResidencyInstance	(Context&			context,
    122 													 const deUint32		bufferSize);
    123 
    124 	tcu::TestStatus	iterate							(void);
    125 
    126 private:
    127 	const deUint32	m_bufferSize;
    128 };
    129 
    130 BufferSparseResidencyInstance::BufferSparseResidencyInstance (Context&			context,
    131 														      const deUint32	bufferSize)
    132 	: SparseResourcesBaseInstance	(context)
    133 	, m_bufferSize					(bufferSize)
    134 {
    135 }
    136 
    137 tcu::TestStatus BufferSparseResidencyInstance::iterate (void)
    138 {
    139 	const InstanceInterface&		 instance					= m_context.getInstanceInterface();
    140 	const VkPhysicalDevice			 physicalDevice				= m_context.getPhysicalDevice();
    141 	const VkPhysicalDeviceProperties physicalDeviceProperties	= getPhysicalDeviceProperties(instance, physicalDevice);
    142 
    143 	if (!getPhysicalDeviceFeatures(instance, physicalDevice).sparseResidencyBuffer)
    144 		TCU_THROW(NotSupportedError, "Sparse partially resident buffers not supported");
    145 
    146 	{
    147 		// Create logical device supporting both sparse and compute operations
    148 		QueueRequirementsVec queueRequirements;
    149 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
    150 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
    151 
    152 		createDeviceSupportingQueues(queueRequirements);
    153 	}
    154 
    155 	const DeviceInterface&	deviceInterface	= getDeviceInterface();
    156 	const Queue&			sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
    157 	const Queue&			computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
    158 
    159 	VkBufferCreateInfo bufferCreateInfo =
    160 	{
    161 		VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO,	// VkStructureType		sType;
    162 		DE_NULL,								// const void*			pNext;
    163 		VK_BUFFER_CREATE_SPARSE_BINDING_BIT |
    164 		VK_BUFFER_CREATE_SPARSE_RESIDENCY_BIT,	// VkBufferCreateFlags	flags;
    165 		m_bufferSize,							// VkDeviceSize			size;
    166 		VK_BUFFER_USAGE_STORAGE_BUFFER_BIT |
    167 		VK_BUFFER_USAGE_TRANSFER_SRC_BIT,		// VkBufferUsageFlags	usage;
    168 		VK_SHARING_MODE_EXCLUSIVE,				// VkSharingMode		sharingMode;
    169 		0u,										// deUint32				queueFamilyIndexCount;
    170 		DE_NULL									// const deUint32*		pQueueFamilyIndices;
    171 	};
    172 
    173 	const deUint32 queueFamilyIndices[] = { sparseQueue.queueFamilyIndex, computeQueue.queueFamilyIndex };
    174 
    175 	if (sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex)
    176 	{
    177 		bufferCreateInfo.sharingMode			= VK_SHARING_MODE_CONCURRENT;
    178 		bufferCreateInfo.queueFamilyIndexCount	= 2u;
    179 		bufferCreateInfo.pQueueFamilyIndices	= queueFamilyIndices;
    180 	}
    181 
    182 	// Create sparse buffer
    183 	const Unique<VkBuffer> sparseBuffer(createBuffer(deviceInterface, getDevice(), &bufferCreateInfo));
    184 
    185 	// Create sparse buffer memory bind semaphore
    186 	const Unique<VkSemaphore> bufferMemoryBindSemaphore(makeSemaphore(deviceInterface, getDevice()));
    187 
    188 	const VkMemoryRequirements bufferMemRequirements = getBufferMemoryRequirements(deviceInterface, getDevice(), *sparseBuffer);
    189 
    190 	if (bufferMemRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
    191 		TCU_THROW(NotSupportedError, "Required memory size for sparse resources exceeds device limits");
    192 
    193 	DE_ASSERT((bufferMemRequirements.size % bufferMemRequirements.alignment) == 0);
    194 
    195 	const deUint32				numSparseSlots = static_cast<deUint32>(bufferMemRequirements.size / bufferMemRequirements.alignment);
    196 	std::vector<DeviceMemorySp>	deviceMemUniquePtrVec;
    197 
    198 	{
    199 		std::vector<VkSparseMemoryBind>		sparseMemoryBinds;
    200 		const deUint32						memoryType		= findMatchingMemoryType(instance, physicalDevice, bufferMemRequirements, MemoryRequirement::Any);
    201 
    202 		if (memoryType == NO_MATCH_FOUND)
    203 			return tcu::TestStatus::fail("No matching memory type found");
    204 
    205 		for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; sparseBindNdx += 2)
    206 		{
    207 			const VkSparseMemoryBind sparseMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(), bufferMemRequirements.alignment, memoryType, bufferMemRequirements.alignment * sparseBindNdx);
    208 
    209 			deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(sparseMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
    210 
    211 			sparseMemoryBinds.push_back(sparseMemoryBind);
    212 		}
    213 
    214 		const VkSparseBufferMemoryBindInfo sparseBufferBindInfo = makeSparseBufferMemoryBindInfo(*sparseBuffer, static_cast<deUint32>(sparseMemoryBinds.size()), &sparseMemoryBinds[0]);
    215 
    216 		const VkBindSparseInfo bindSparseInfo =
    217 		{
    218 			VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,			//VkStructureType							sType;
    219 			DE_NULL,									//const void*								pNext;
    220 			0u,											//deUint32									waitSemaphoreCount;
    221 			DE_NULL,									//const VkSemaphore*						pWaitSemaphores;
    222 			1u,											//deUint32									bufferBindCount;
    223 			&sparseBufferBindInfo,						//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
    224 			0u,											//deUint32									imageOpaqueBindCount;
    225 			DE_NULL,									//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
    226 			0u,											//deUint32									imageBindCount;
    227 			DE_NULL,									//const VkSparseImageMemoryBindInfo*		pImageBinds;
    228 			1u,											//deUint32									signalSemaphoreCount;
    229 			&bufferMemoryBindSemaphore.get()			//const VkSemaphore*						pSignalSemaphores;
    230 		};
    231 
    232 		VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
    233 	}
    234 
    235 	// Create input buffer
    236 	const VkBufferCreateInfo		inputBufferCreateInfo	= makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_STORAGE_BUFFER_BIT);
    237 	const Unique<VkBuffer>			inputBuffer				(createBuffer(deviceInterface, getDevice(), &inputBufferCreateInfo));
    238 	const de::UniquePtr<Allocation>	inputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *inputBuffer, MemoryRequirement::HostVisible));
    239 
    240 
    241 	std::vector<deUint8> referenceData;
    242 	referenceData.resize(m_bufferSize);
    243 
    244 	for (deUint32 valueNdx = 0; valueNdx < m_bufferSize; ++valueNdx)
    245 	{
    246 		referenceData[valueNdx] = static_cast<deUint8>((valueNdx % bufferMemRequirements.alignment) + 1u);
    247 	}
    248 
    249 	deMemcpy(inputBufferAlloc->getHostPtr(), &referenceData[0], m_bufferSize);
    250 
    251 	flushMappedMemoryRange(deviceInterface, getDevice(), inputBufferAlloc->getMemory(), inputBufferAlloc->getOffset(), m_bufferSize);
    252 
    253 	// Create output buffer
    254 	const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(m_bufferSize, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
    255 	const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
    256 	const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
    257 
    258 	// Create command buffer for compute and data transfer oparations
    259 	const Unique<VkCommandPool>	  commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
    260 	const Unique<VkCommandBuffer> commandBuffer(makeCommandBuffer(deviceInterface, getDevice(), *commandPool));
    261 
    262 	// Start recording compute and transfer commands
    263 	beginCommandBuffer(deviceInterface, *commandBuffer);
    264 
    265 	// Create descriptor set
    266 	const Unique<VkDescriptorSetLayout> descriptorSetLayout(
    267 		DescriptorSetLayoutBuilder()
    268 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
    269 		.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, VK_SHADER_STAGE_COMPUTE_BIT)
    270 		.build(deviceInterface, getDevice()));
    271 
    272 	// Create compute pipeline
    273 	const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
    274 	const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
    275 	const Unique<VkPipeline>		computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
    276 
    277 	deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
    278 
    279 	const Unique<VkDescriptorPool> descriptorPool(
    280 		DescriptorPoolBuilder()
    281 		.addType(VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, 2u)
    282 		.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
    283 
    284 	const Unique<VkDescriptorSet> descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
    285 
    286 	{
    287 		const VkDescriptorBufferInfo inputBufferInfo = makeDescriptorBufferInfo(*inputBuffer, 0ull, m_bufferSize);
    288 		const VkDescriptorBufferInfo sparseBufferInfo = makeDescriptorBufferInfo(*sparseBuffer, 0ull, m_bufferSize);
    289 
    290 		DescriptorSetUpdateBuilder()
    291 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &inputBufferInfo)
    292 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(1u), VK_DESCRIPTOR_TYPE_STORAGE_BUFFER, &sparseBufferInfo)
    293 			.update(deviceInterface, getDevice());
    294 	}
    295 
    296 	deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
    297 
    298 	{
    299 		const VkBufferMemoryBarrier inputBufferBarrier
    300 			= makeBufferMemoryBarrier(	VK_ACCESS_HOST_WRITE_BIT,
    301 										VK_ACCESS_SHADER_READ_BIT,
    302 										*inputBuffer,
    303 										0ull,
    304 										m_bufferSize);
    305 
    306 		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_HOST_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 1u, &inputBufferBarrier, 0u, DE_NULL);
    307 	}
    308 
    309 	deviceInterface.cmdDispatch(*commandBuffer, 1u, 1u, 1u);
    310 
    311 	{
    312 		const VkBufferMemoryBarrier sparseBufferBarrier
    313 			= makeBufferMemoryBarrier(	VK_ACCESS_SHADER_WRITE_BIT,
    314 										VK_ACCESS_TRANSFER_READ_BIT,
    315 										*sparseBuffer,
    316 										0ull,
    317 										m_bufferSize);
    318 
    319 		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 1u, &sparseBufferBarrier, 0u, DE_NULL);
    320 	}
    321 
    322 	{
    323 		const VkBufferCopy bufferCopy = makeBufferCopy(0u, 0u, m_bufferSize);
    324 
    325 		deviceInterface.cmdCopyBuffer(*commandBuffer, *sparseBuffer, *outputBuffer, 1u, &bufferCopy);
    326 	}
    327 
    328 	{
    329 		const VkBufferMemoryBarrier outputBufferBarrier
    330 			= makeBufferMemoryBarrier(	VK_ACCESS_TRANSFER_WRITE_BIT,
    331 										VK_ACCESS_HOST_READ_BIT,
    332 										*outputBuffer,
    333 										0ull,
    334 										m_bufferSize);
    335 
    336 		deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferBarrier, 0u, DE_NULL);
    337 	}
    338 
    339 	// End recording compute and transfer commands
    340 	endCommandBuffer(deviceInterface, *commandBuffer);
    341 
    342 	const VkPipelineStageFlags waitStageBits[] = { VK_PIPELINE_STAGE_TRANSFER_BIT };
    343 
    344 	// Submit transfer commands for execution and wait for completion
    345 	submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &bufferMemoryBindSemaphore.get(), waitStageBits);
    346 
    347 	// Retrieve data from output buffer to host memory
    348 	invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), m_bufferSize);
    349 
    350 	const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
    351 
    352 	// Wait for sparse queue to become idle
    353 	deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
    354 
    355 	// Compare output data with reference data
    356 	for (deUint32 sparseBindNdx = 0; sparseBindNdx < numSparseSlots; ++sparseBindNdx)
    357 	{
    358 		const deUint32 alignment = static_cast<deUint32>(bufferMemRequirements.alignment);
    359 		const deUint32 offset	 = alignment * sparseBindNdx;
    360 		const deUint32 size		 = sparseBindNdx == (numSparseSlots - 1) ? m_bufferSize % alignment : alignment;
    361 
    362 		if (sparseBindNdx % 2u == 0u)
    363 		{
    364 			if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
    365 				return tcu::TestStatus::fail("Failed");
    366 		}
    367 		else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
    368 		{
    369 			deMemset(&referenceData[offset], 0u, size);
    370 
    371 			if (deMemCmp(&referenceData[offset], outputData + offset, size) != 0)
    372 				return tcu::TestStatus::fail("Failed");
    373 		}
    374 	}
    375 
    376 	return tcu::TestStatus::pass("Passed");
    377 }
    378 
    379 TestInstance* BufferSparseResidencyCase::createInstance (Context& context) const
    380 {
    381 	return new BufferSparseResidencyInstance(context, m_bufferSize);
    382 }
    383 
    384 } // anonymous ns
    385 
    386 void addBufferSparseResidencyTests(tcu::TestCaseGroup* group)
    387 {
    388 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_10", "", 1 << 10, glu::GLSL_VERSION_440));
    389 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_12", "", 1 << 12, glu::GLSL_VERSION_440));
    390 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_16", "", 1 << 16, glu::GLSL_VERSION_440));
    391 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_17", "", 1 << 17, glu::GLSL_VERSION_440));
    392 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_20", "", 1 << 20, glu::GLSL_VERSION_440));
    393 	group->addChild(new BufferSparseResidencyCase(group->getTestContext(), "buffer_size_2_24", "", 1 << 24, glu::GLSL_VERSION_440));
    394 }
    395 
    396 } // sparse
    397 } // vkt
    398