Home | History | Annotate | Download | only in sparse_resources
      1 /*------------------------------------------------------------------------
      2  * Vulkan Conformance Tests
      3  * ------------------------
      4  *
      5  * Copyright (c) 2016 The Khronos Group Inc.
      6  *
      7  * Licensed under the Apache License, Version 2.0 (the "License");
      8  * you may not use this file except in compliance with the License.
      9  * You may obtain a copy of the License at
     10  *
     11  *      http://www.apache.org/licenses/LICENSE-2.0
     12  *
     13  * Unless required by applicable law or agreed to in writing, software
     14  * distributed under the License is distributed on an "AS IS" BASIS,
     15  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
     16  * See the License for the specific language governing permissions and
     17  * limitations under the License.
     18  *
     19  *//*!
     20  * \file  vktSparseResourcesImageSparseResidency.cpp
     21  * \brief Sparse partially resident images tests
     22  *//*--------------------------------------------------------------------*/
     23 
     24 #include "vktSparseResourcesBufferSparseBinding.hpp"
     25 #include "vktSparseResourcesTestsUtil.hpp"
     26 #include "vktSparseResourcesBase.hpp"
     27 #include "vktTestCaseUtil.hpp"
     28 
     29 #include "vkDefs.hpp"
     30 #include "vkRef.hpp"
     31 #include "vkRefUtil.hpp"
     32 #include "vkPlatform.hpp"
     33 #include "vkPrograms.hpp"
     34 #include "vkMemUtil.hpp"
     35 #include "vkBuilderUtil.hpp"
     36 #include "vkImageUtil.hpp"
     37 #include "vkQueryUtil.hpp"
     38 #include "vkTypeUtil.hpp"
     39 
     40 #include "deUniquePtr.hpp"
     41 #include "deStringUtil.hpp"
     42 
     43 #include <string>
     44 #include <vector>
     45 
     46 using namespace vk;
     47 
     48 namespace vkt
     49 {
     50 namespace sparse
     51 {
     52 namespace
     53 {
     54 
     55 const std::string getCoordStr  (const ImageType		imageType,
     56 								const std::string&	x,
     57 								const std::string&	y,
     58 								const std::string&	z)
     59 {
     60 	switch (imageType)
     61 	{
     62 		case IMAGE_TYPE_1D:
     63 		case IMAGE_TYPE_BUFFER:
     64 			return x;
     65 
     66 		case IMAGE_TYPE_1D_ARRAY:
     67 		case IMAGE_TYPE_2D:
     68 			return "ivec2(" + x + "," + y + ")";
     69 
     70 		case IMAGE_TYPE_2D_ARRAY:
     71 		case IMAGE_TYPE_3D:
     72 		case IMAGE_TYPE_CUBE:
     73 		case IMAGE_TYPE_CUBE_ARRAY:
     74 			return "ivec3(" + x + "," + y + "," + z + ")";
     75 
     76 		default:
     77 			DE_ASSERT(false);
     78 			return "";
     79 	}
     80 }
     81 
     82 tcu::UVec3 alignedDivide (const VkExtent3D& extent, const VkExtent3D& divisor)
     83 {
     84 	tcu::UVec3 result;
     85 
     86 	result.x() = extent.width  / divisor.width  + ((extent.width  % divisor.width)  ? 1u : 0u);
     87 	result.y() = extent.height / divisor.height + ((extent.height % divisor.height) ? 1u : 0u);
     88 	result.z() = extent.depth  / divisor.depth  + ((extent.depth  % divisor.depth)  ? 1u : 0u);
     89 
     90 	return result;
     91 }
     92 
     93 tcu::UVec3 computeWorkGroupSize (const tcu::UVec3& gridSize)
     94 {
     95 	const deUint32		maxComputeWorkGroupInvocations	= 128u;
     96 	const tcu::UVec3	maxComputeWorkGroupSize			= tcu::UVec3(128u, 128u, 64u);
     97 
     98 	const deUint32 xWorkGroupSize = std::min(std::min(gridSize.x(), maxComputeWorkGroupSize.x()), maxComputeWorkGroupInvocations);
     99 	const deUint32 yWorkGroupSize = std::min(std::min(gridSize.y(), maxComputeWorkGroupSize.y()), maxComputeWorkGroupInvocations /  xWorkGroupSize);
    100 	const deUint32 zWorkGroupSize = std::min(std::min(gridSize.z(), maxComputeWorkGroupSize.z()), maxComputeWorkGroupInvocations / (xWorkGroupSize*yWorkGroupSize));
    101 
    102 	return tcu::UVec3(xWorkGroupSize, yWorkGroupSize, zWorkGroupSize);
    103 }
    104 
    105 class ImageSparseResidencyCase : public TestCase
    106 {
    107 public:
    108 					ImageSparseResidencyCase	(tcu::TestContext&			testCtx,
    109 												 const std::string&			name,
    110 												 const std::string&			description,
    111 												 const ImageType			imageType,
    112 												 const tcu::UVec3&			imageSize,
    113 												 const tcu::TextureFormat&	format,
    114 												 const glu::GLSLVersion		glslVersion,
    115 												 const bool					useDeviceGroups);
    116 
    117 	void			initPrograms				(SourceCollections&			sourceCollections) const;
    118 	TestInstance*	createInstance				(Context&					context) const;
    119 
    120 private:
    121 	const bool					m_useDeviceGroups;
    122 	const ImageType				m_imageType;
    123 	const tcu::UVec3			m_imageSize;
    124 	const tcu::TextureFormat	m_format;
    125 	const glu::GLSLVersion		m_glslVersion;
    126 };
    127 
    128 ImageSparseResidencyCase::ImageSparseResidencyCase (tcu::TestContext&			testCtx,
    129 													const std::string&			name,
    130 													const std::string&			description,
    131 													const ImageType				imageType,
    132 													const tcu::UVec3&			imageSize,
    133 													const tcu::TextureFormat&	format,
    134 													const glu::GLSLVersion		glslVersion,
    135 													const bool					useDeviceGroups)
    136 	: TestCase				(testCtx, name, description)
    137 	, m_useDeviceGroups		(useDeviceGroups)
    138 	, m_imageType			(imageType)
    139 	, m_imageSize			(imageSize)
    140 	, m_format				(format)
    141 	, m_glslVersion			(glslVersion)
    142 {
    143 }
    144 
    145 void ImageSparseResidencyCase::initPrograms (SourceCollections&	sourceCollections) const
    146 {
    147 	// Create compute program
    148 	const char* const versionDecl			= glu::getGLSLVersionDeclaration(m_glslVersion);
    149 	const std::string imageTypeStr			= getShaderImageType(m_format, m_imageType);
    150 	const std::string formatQualifierStr	= getShaderImageFormatQualifier(m_format);
    151 	const std::string formatDataStr			= getShaderImageDataType(m_format);
    152 	const tcu::UVec3  gridSize				= getShaderGridSize(m_imageType, m_imageSize);
    153 	const tcu::UVec3  workGroupSize			= computeWorkGroupSize(gridSize);
    154 
    155 	std::ostringstream src;
    156 	src << versionDecl << "\n"
    157 		<< "layout (local_size_x = " << workGroupSize.x() << ", local_size_y = " << workGroupSize.y() << ", local_size_z = " << workGroupSize.z() << ") in; \n"
    158 		<< "layout (binding = 0, " << formatQualifierStr << ") writeonly uniform highp " << imageTypeStr << " u_image;\n"
    159 		<< "void main (void)\n"
    160 		<< "{\n"
    161 		<< "	if( gl_GlobalInvocationID.x < " << gridSize.x() << " ) \n"
    162 		<< "	if( gl_GlobalInvocationID.y < " << gridSize.y() << " ) \n"
    163 		<< "	if( gl_GlobalInvocationID.z < " << gridSize.z() << " ) \n"
    164 		<< "	{\n"
    165 		<< "		imageStore(u_image, " << getCoordStr(m_imageType, "gl_GlobalInvocationID.x", "gl_GlobalInvocationID.y", "gl_GlobalInvocationID.z") << ","
    166 		<< formatDataStr << "( int(gl_GlobalInvocationID.x) % 127, int(gl_GlobalInvocationID.y) % 127, int(gl_GlobalInvocationID.z) % 127, 1));\n"
    167 		<< "	}\n"
    168 		<< "}\n";
    169 
    170 	sourceCollections.glslSources.add("comp") << glu::ComputeSource(src.str());
    171 }
    172 
    173 class ImageSparseResidencyInstance : public SparseResourcesBaseInstance
    174 {
    175 public:
    176 					ImageSparseResidencyInstance(Context&									 context,
    177 												 const ImageType							 imageType,
    178 												 const tcu::UVec3&							 imageSize,
    179 												 const tcu::TextureFormat&					 format,
    180 												 const bool									 useDeviceGroups);
    181 
    182 
    183 	tcu::TestStatus	iterate						(void);
    184 
    185 private:
    186 	const bool					m_useDeviceGroups;
    187 	const ImageType				m_imageType;
    188 	const tcu::UVec3			m_imageSize;
    189 	const tcu::TextureFormat	m_format;
    190 };
    191 
    192 ImageSparseResidencyInstance::ImageSparseResidencyInstance (Context&					context,
    193 															const ImageType				imageType,
    194 															const tcu::UVec3&			imageSize,
    195 															const tcu::TextureFormat&	format,
    196 															const bool					useDeviceGroups)
    197 	: SparseResourcesBaseInstance	(context, useDeviceGroups)
    198 	, m_useDeviceGroups				(useDeviceGroups)
    199 	, m_imageType					(imageType)
    200 	, m_imageSize					(imageSize)
    201 	, m_format						(format)
    202 {
    203 }
    204 
    205 tcu::TestStatus ImageSparseResidencyInstance::iterate (void)
    206 {
    207 	const InstanceInterface&			instance = m_context.getInstanceInterface();
    208 
    209 	{
    210 		// Create logical device supporting both sparse and compute queues
    211 		QueueRequirementsVec queueRequirements;
    212 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_SPARSE_BINDING_BIT, 1u));
    213 		queueRequirements.push_back(QueueRequirements(VK_QUEUE_COMPUTE_BIT, 1u));
    214 
    215 		createDeviceSupportingQueues(queueRequirements);
    216 	}
    217 
    218 	VkImageCreateInfo					imageCreateInfo;
    219 	VkSparseImageMemoryRequirements		aspectRequirements;
    220 	VkExtent3D							imageGranularity;
    221 	std::vector<DeviceMemorySp>			deviceMemUniquePtrVec;
    222 
    223 	const DeviceInterface&	deviceInterface	= getDeviceInterface();
    224 	const Queue&			sparseQueue		= getQueue(VK_QUEUE_SPARSE_BINDING_BIT, 0);
    225 	const Queue&			computeQueue	= getQueue(VK_QUEUE_COMPUTE_BIT, 0);
    226 
    227 	// Go through all physical devices
    228 	for (deUint32 physDevID = 0; physDevID < m_numPhysicalDevices; physDevID++)
    229 	{
    230 		const deUint32						firstDeviceID				= physDevID;
    231 		const deUint32						secondDeviceID				= (firstDeviceID + 1) % m_numPhysicalDevices;
    232 
    233 		const VkPhysicalDevice				physicalDevice				= getPhysicalDevice(firstDeviceID);
    234 		const VkPhysicalDeviceProperties	physicalDeviceProperties	= getPhysicalDeviceProperties(instance, physicalDevice);
    235 
    236 		// Check if image size does not exceed device limits
    237 		if (!isImageSizeSupported(instance, physicalDevice, m_imageType, m_imageSize))
    238 			TCU_THROW(NotSupportedError, "Image size not supported for device");
    239 
    240 		// Check if device supports sparse operations for image type
    241 		if (!checkSparseSupportForImageType(instance, physicalDevice, m_imageType))
    242 			TCU_THROW(NotSupportedError, "Sparse residency for image type is not supported");
    243 
    244 		imageCreateInfo.sType					= VK_STRUCTURE_TYPE_IMAGE_CREATE_INFO;
    245 		imageCreateInfo.pNext					= DE_NULL;
    246 		imageCreateInfo.flags					= VK_IMAGE_CREATE_SPARSE_RESIDENCY_BIT | VK_IMAGE_CREATE_SPARSE_BINDING_BIT;
    247 		imageCreateInfo.imageType				= mapImageType(m_imageType);
    248 		imageCreateInfo.format					= mapTextureFormat(m_format);
    249 		imageCreateInfo.extent					= makeExtent3D(getLayerSize(m_imageType, m_imageSize));
    250 		imageCreateInfo.mipLevels				= 1u;
    251 		imageCreateInfo.arrayLayers				= getNumLayers(m_imageType, m_imageSize);
    252 		imageCreateInfo.samples					= VK_SAMPLE_COUNT_1_BIT;
    253 		imageCreateInfo.tiling					= VK_IMAGE_TILING_OPTIMAL;
    254 		imageCreateInfo.initialLayout			= VK_IMAGE_LAYOUT_UNDEFINED;
    255 		imageCreateInfo.usage					= VK_IMAGE_USAGE_TRANSFER_SRC_BIT |
    256 												  VK_IMAGE_USAGE_STORAGE_BIT;
    257 		imageCreateInfo.sharingMode				= VK_SHARING_MODE_EXCLUSIVE;
    258 		imageCreateInfo.queueFamilyIndexCount	= 0u;
    259 		imageCreateInfo.pQueueFamilyIndices		= DE_NULL;
    260 
    261 		if (m_imageType == IMAGE_TYPE_CUBE || m_imageType == IMAGE_TYPE_CUBE_ARRAY)
    262 		{
    263 			imageCreateInfo.flags |= VK_IMAGE_CREATE_CUBE_COMPATIBLE_BIT;
    264 		}
    265 
    266 		// Check if device supports sparse operations for image format
    267 		if (!checkSparseSupportForImageFormat(instance, physicalDevice, imageCreateInfo))
    268 			TCU_THROW(NotSupportedError, "The image format does not support sparse operations");
    269 
    270 		// Create sparse image
    271 		const Unique<VkImage> sparseImage(createImage(deviceInterface, getDevice(), &imageCreateInfo));
    272 
    273 		// Create sparse image memory bind semaphore
    274 		const Unique<VkSemaphore> imageMemoryBindSemaphore(createSemaphore(deviceInterface, getDevice()));
    275 
    276 		{
    277 			// Get image general memory requirements
    278 			const VkMemoryRequirements imageMemoryRequirements = getImageMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
    279 
    280 			if (imageMemoryRequirements.size > physicalDeviceProperties.limits.sparseAddressSpaceSize)
    281 				TCU_THROW(NotSupportedError, "Required memory size for sparse resource exceeds device limits");
    282 
    283 			DE_ASSERT((imageMemoryRequirements.size % imageMemoryRequirements.alignment) == 0);
    284 
    285 			// Get sparse image sparse memory requirements
    286 			const std::vector<VkSparseImageMemoryRequirements> sparseMemoryRequirements = getImageSparseMemoryRequirements(deviceInterface, getDevice(), *sparseImage);
    287 
    288 			DE_ASSERT(sparseMemoryRequirements.size() != 0);
    289 
    290 			const deUint32 colorAspectIndex		= getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_COLOR_BIT);
    291 			const deUint32 metadataAspectIndex	= getSparseAspectRequirementsIndex(sparseMemoryRequirements, VK_IMAGE_ASPECT_METADATA_BIT);
    292 
    293 			if (colorAspectIndex == NO_MATCH_FOUND)
    294 				TCU_THROW(NotSupportedError, "Not supported image aspect - the test supports currently only VK_IMAGE_ASPECT_COLOR_BIT");
    295 
    296 			aspectRequirements	= sparseMemoryRequirements[colorAspectIndex];
    297 			imageGranularity	= aspectRequirements.formatProperties.imageGranularity;
    298 
    299 			const VkImageAspectFlags aspectMask = aspectRequirements.formatProperties.aspectMask;
    300 
    301 			DE_ASSERT((aspectRequirements.imageMipTailSize % imageMemoryRequirements.alignment) == 0);
    302 
    303 			std::vector<VkSparseImageMemoryBind> imageResidencyMemoryBinds;
    304 			std::vector<VkSparseMemoryBind>		 imageMipTailMemoryBinds;
    305 
    306 			const deUint32						 memoryType = findMatchingMemoryType(instance, physicalDevice, imageMemoryRequirements, MemoryRequirement::Any);
    307 
    308 			if (memoryType == NO_MATCH_FOUND)
    309 				return tcu::TestStatus::fail("No matching memory type found");
    310 
    311 			// Bind device memory for each aspect
    312 			for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
    313 			{
    314 				for (deUint32 mipLevelNdx = 0; mipLevelNdx < aspectRequirements.imageMipTailFirstLod; ++mipLevelNdx)
    315 				{
    316 					const VkImageSubresource subresource		= { aspectMask, mipLevelNdx, layerNdx };
    317 					const VkExtent3D		 mipExtent			= mipLevelExtents(imageCreateInfo.extent, mipLevelNdx);
    318 					const tcu::UVec3		 numSparseBinds		= alignedDivide(mipExtent, imageGranularity);
    319 					const tcu::UVec3		 lastBlockExtent	= tcu::UVec3(mipExtent.width  % imageGranularity.width  ? mipExtent.width   % imageGranularity.width  : imageGranularity.width,
    320 																			 mipExtent.height % imageGranularity.height ? mipExtent.height  % imageGranularity.height : imageGranularity.height,
    321 																			 mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth   % imageGranularity.depth  : imageGranularity.depth);
    322 					for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
    323 					for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
    324 					for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
    325 					{
    326 						const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
    327 
    328 						if (linearIndex % 2u == 1u)
    329 						{
    330 							continue;
    331 						}
    332 
    333 						VkOffset3D offset;
    334 						offset.x = x*imageGranularity.width;
    335 						offset.y = y*imageGranularity.height;
    336 						offset.z = z*imageGranularity.depth;
    337 
    338 						VkExtent3D extent;
    339 						extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
    340 						extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
    341 						extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
    342 
    343 						const VkSparseImageMemoryBind imageMemoryBind = makeSparseImageMemoryBind(deviceInterface, getDevice(),
    344 							imageMemoryRequirements.alignment, memoryType, subresource, offset, extent);
    345 
    346 						deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
    347 
    348 						imageResidencyMemoryBinds.push_back(imageMemoryBind);
    349 					}
    350 				}
    351 
    352 				if (!(aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
    353 				{
    354 					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
    355 						aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset + layerNdx * aspectRequirements.imageMipTailStride);
    356 
    357 					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
    358 
    359 					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
    360 				}
    361 
    362 				// Metadata
    363 				if (metadataAspectIndex != NO_MATCH_FOUND)
    364 				{
    365 					const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
    366 
    367 					if (!(metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
    368 					{
    369 						const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
    370 							metadataAspectRequirements.imageMipTailSize, memoryType,
    371 							metadataAspectRequirements.imageMipTailOffset + layerNdx * metadataAspectRequirements.imageMipTailStride,
    372 							VK_SPARSE_MEMORY_BIND_METADATA_BIT);
    373 
    374 						deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
    375 
    376 						imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
    377 					}
    378 				}
    379 			}
    380 
    381 			if ((aspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT) && aspectRequirements.imageMipTailFirstLod < imageCreateInfo.mipLevels)
    382 			{
    383 				const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
    384 					aspectRequirements.imageMipTailSize, memoryType, aspectRequirements.imageMipTailOffset);
    385 
    386 				deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
    387 
    388 				imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
    389 			}
    390 
    391 			// Metadata
    392 			if (metadataAspectIndex != NO_MATCH_FOUND)
    393 			{
    394 				const VkSparseImageMemoryRequirements metadataAspectRequirements = sparseMemoryRequirements[metadataAspectIndex];
    395 
    396 				if ((metadataAspectRequirements.formatProperties.flags & VK_SPARSE_IMAGE_FORMAT_SINGLE_MIPTAIL_BIT))
    397 				{
    398 					const VkSparseMemoryBind imageMipTailMemoryBind = makeSparseMemoryBind(deviceInterface, getDevice(),
    399 						metadataAspectRequirements.imageMipTailSize, memoryType, metadataAspectRequirements.imageMipTailOffset,
    400 						VK_SPARSE_MEMORY_BIND_METADATA_BIT);
    401 
    402 					deviceMemUniquePtrVec.push_back(makeVkSharedPtr(Move<VkDeviceMemory>(check<VkDeviceMemory>(imageMipTailMemoryBind.memory), Deleter<VkDeviceMemory>(deviceInterface, getDevice(), DE_NULL))));
    403 
    404 					imageMipTailMemoryBinds.push_back(imageMipTailMemoryBind);
    405 				}
    406 			}
    407 
    408 			const VkDeviceGroupBindSparseInfo devGroupBindSparseInfo =
    409 			{
    410 				VK_STRUCTURE_TYPE_DEVICE_GROUP_BIND_SPARSE_INFO_KHR,	//VkStructureType							sType;
    411 				DE_NULL,												//const void*								pNext;
    412 				firstDeviceID,											//deUint32									resourceDeviceIndex;
    413 				secondDeviceID,											//deUint32									memoryDeviceIndex;
    414 			};
    415 
    416 			VkBindSparseInfo bindSparseInfo =
    417 			{
    418 				VK_STRUCTURE_TYPE_BIND_SPARSE_INFO,						//VkStructureType							sType;
    419 				m_useDeviceGroups ? &devGroupBindSparseInfo : DE_NULL,	//const void*								pNext;
    420 				0u,														//deUint32									waitSemaphoreCount;
    421 				DE_NULL,												//const VkSemaphore*						pWaitSemaphores;
    422 				0u,														//deUint32									bufferBindCount;
    423 				DE_NULL,												//const VkSparseBufferMemoryBindInfo*		pBufferBinds;
    424 				0u,														//deUint32									imageOpaqueBindCount;
    425 				DE_NULL,												//const VkSparseImageOpaqueMemoryBindInfo*	pImageOpaqueBinds;
    426 				0u,														//deUint32									imageBindCount;
    427 				DE_NULL,												//const VkSparseImageMemoryBindInfo*		pImageBinds;
    428 				1u,														//deUint32									signalSemaphoreCount;
    429 				&imageMemoryBindSemaphore.get()							//const VkSemaphore*						pSignalSemaphores;
    430 			};
    431 
    432 			VkSparseImageMemoryBindInfo		  imageResidencyBindInfo;
    433 			VkSparseImageOpaqueMemoryBindInfo imageMipTailBindInfo;
    434 
    435 			if (imageResidencyMemoryBinds.size() > 0)
    436 			{
    437 				imageResidencyBindInfo.image		= *sparseImage;
    438 				imageResidencyBindInfo.bindCount	= static_cast<deUint32>(imageResidencyMemoryBinds.size());
    439 				imageResidencyBindInfo.pBinds		= &imageResidencyMemoryBinds[0];
    440 
    441 				bindSparseInfo.imageBindCount		= 1u;
    442 				bindSparseInfo.pImageBinds			= &imageResidencyBindInfo;
    443 			}
    444 
    445 			if (imageMipTailMemoryBinds.size() > 0)
    446 			{
    447 				imageMipTailBindInfo.image			= *sparseImage;
    448 				imageMipTailBindInfo.bindCount		= static_cast<deUint32>(imageMipTailMemoryBinds.size());
    449 				imageMipTailBindInfo.pBinds			= &imageMipTailMemoryBinds[0];
    450 
    451 				bindSparseInfo.imageOpaqueBindCount = 1u;
    452 				bindSparseInfo.pImageOpaqueBinds	= &imageMipTailBindInfo;
    453 			}
    454 
    455 			// Submit sparse bind commands for execution
    456 			VK_CHECK(deviceInterface.queueBindSparse(sparseQueue.queueHandle, 1u, &bindSparseInfo, DE_NULL));
    457 		}
    458 
    459 		// Create command buffer for compute and transfer oparations
    460 		const Unique<VkCommandPool>	  commandPool(makeCommandPool(deviceInterface, getDevice(), computeQueue.queueFamilyIndex));
    461 		const Unique<VkCommandBuffer> commandBuffer(allocateCommandBuffer(deviceInterface, getDevice(), *commandPool, VK_COMMAND_BUFFER_LEVEL_PRIMARY));
    462 
    463 		// Start recording commands
    464 		beginCommandBuffer(deviceInterface, *commandBuffer);
    465 
    466 		// Create descriptor set layout
    467 		const Unique<VkDescriptorSetLayout> descriptorSetLayout(
    468 			DescriptorSetLayoutBuilder()
    469 			.addSingleBinding(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, VK_SHADER_STAGE_COMPUTE_BIT)
    470 			.build(deviceInterface, getDevice()));
    471 
    472 		// Create and bind compute pipeline
    473 		const Unique<VkShaderModule>	shaderModule(createShaderModule(deviceInterface, getDevice(), m_context.getBinaryCollection().get("comp"), DE_NULL));
    474 		const Unique<VkPipelineLayout>	pipelineLayout(makePipelineLayout(deviceInterface, getDevice(), *descriptorSetLayout));
    475 		const Unique<VkPipeline>		computePipeline(makeComputePipeline(deviceInterface, getDevice(), *pipelineLayout, *shaderModule));
    476 
    477 		deviceInterface.cmdBindPipeline(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *computePipeline);
    478 
    479 		// Create and bind descriptor set
    480 		const Unique<VkDescriptorPool> descriptorPool(
    481 			DescriptorPoolBuilder()
    482 			.addType(VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, 1u)
    483 			.build(deviceInterface, getDevice(), VK_DESCRIPTOR_POOL_CREATE_FREE_DESCRIPTOR_SET_BIT, 1u));
    484 
    485 		const Unique<VkDescriptorSet>	descriptorSet(makeDescriptorSet(deviceInterface, getDevice(), *descriptorPool, *descriptorSetLayout));
    486 
    487 		const VkImageSubresourceRange	subresourceRange = makeImageSubresourceRange(VK_IMAGE_ASPECT_COLOR_BIT, 0u, 1u, 0u, getNumLayers(m_imageType, m_imageSize));
    488 		const Unique<VkImageView>		imageView(makeImageView(deviceInterface, getDevice(), *sparseImage, mapImageViewType(m_imageType), mapTextureFormat(m_format), subresourceRange));
    489 		const VkDescriptorImageInfo		sparseImageInfo  = makeDescriptorImageInfo(DE_NULL, *imageView, VK_IMAGE_LAYOUT_GENERAL);
    490 
    491 		DescriptorSetUpdateBuilder()
    492 			.writeSingle(*descriptorSet, DescriptorSetUpdateBuilder::Location::binding(0u), VK_DESCRIPTOR_TYPE_STORAGE_IMAGE, &sparseImageInfo)
    493 			.update(deviceInterface, getDevice());
    494 
    495 		deviceInterface.cmdBindDescriptorSets(*commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, *pipelineLayout, 0u, 1u, &descriptorSet.get(), 0u, DE_NULL);
    496 
    497 		{
    498 			const VkImageMemoryBarrier sparseImageLayoutChangeBarrier = makeImageMemoryBarrier
    499 			(
    500 				0u,
    501 				VK_ACCESS_SHADER_WRITE_BIT,
    502 				VK_IMAGE_LAYOUT_UNDEFINED,
    503 				VK_IMAGE_LAYOUT_GENERAL,
    504 				sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? sparseQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
    505 				sparseQueue.queueFamilyIndex != computeQueue.queueFamilyIndex ? computeQueue.queueFamilyIndex : VK_QUEUE_FAMILY_IGNORED,
    506 				*sparseImage,
    507 				subresourceRange
    508 			);
    509 
    510 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TOP_OF_PIPE_BIT, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageLayoutChangeBarrier);
    511 		}
    512 
    513 		const tcu::UVec3  gridSize = getShaderGridSize(m_imageType, m_imageSize);
    514 
    515 		{
    516 			const tcu::UVec3  workGroupSize = computeWorkGroupSize(gridSize);
    517 
    518 			const deUint32 xWorkGroupCount = gridSize.x() / workGroupSize.x() + (gridSize.x() % workGroupSize.x() ? 1u : 0u);
    519 			const deUint32 yWorkGroupCount = gridSize.y() / workGroupSize.y() + (gridSize.y() % workGroupSize.y() ? 1u : 0u);
    520 			const deUint32 zWorkGroupCount = gridSize.z() / workGroupSize.z() + (gridSize.z() % workGroupSize.z() ? 1u : 0u);
    521 
    522 			const tcu::UVec3 maxComputeWorkGroupCount = tcu::UVec3(65535u, 65535u, 65535u);
    523 
    524 			if (maxComputeWorkGroupCount.x() < xWorkGroupCount ||
    525 				maxComputeWorkGroupCount.y() < yWorkGroupCount ||
    526 				maxComputeWorkGroupCount.z() < zWorkGroupCount)
    527 			{
    528 				TCU_THROW(NotSupportedError, "Image size is not supported");
    529 			}
    530 
    531 			deviceInterface.cmdDispatch(*commandBuffer, xWorkGroupCount, yWorkGroupCount, zWorkGroupCount);
    532 		}
    533 
    534 		{
    535 			const VkImageMemoryBarrier sparseImageTrasferBarrier = makeImageMemoryBarrier
    536 			(
    537 				VK_ACCESS_SHADER_WRITE_BIT,
    538 				VK_ACCESS_TRANSFER_READ_BIT,
    539 				VK_IMAGE_LAYOUT_GENERAL,
    540 				VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL,
    541 				*sparseImage,
    542 				subresourceRange
    543 			);
    544 
    545 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_PIPELINE_STAGE_TRANSFER_BIT, 0u, 0u, DE_NULL, 0u, DE_NULL, 1u, &sparseImageTrasferBarrier);
    546 		}
    547 
    548 		const deUint32					imageSizeInBytes		= getNumPixels(m_imageType, m_imageSize) * tcu::getPixelSize(m_format);
    549 		const VkBufferCreateInfo		outputBufferCreateInfo	= makeBufferCreateInfo(imageSizeInBytes, VK_BUFFER_USAGE_TRANSFER_DST_BIT);
    550 		const Unique<VkBuffer>			outputBuffer			(createBuffer(deviceInterface, getDevice(), &outputBufferCreateInfo));
    551 		const de::UniquePtr<Allocation>	outputBufferAlloc		(bindBuffer(deviceInterface, getDevice(), getAllocator(), *outputBuffer, MemoryRequirement::HostVisible));
    552 
    553 		{
    554 			const VkBufferImageCopy bufferImageCopy = makeBufferImageCopy(imageCreateInfo.extent, imageCreateInfo.arrayLayers);
    555 
    556 			deviceInterface.cmdCopyImageToBuffer(*commandBuffer, *sparseImage, VK_IMAGE_LAYOUT_TRANSFER_SRC_OPTIMAL, *outputBuffer, 1u, &bufferImageCopy);
    557 		}
    558 
    559 		{
    560 			const VkBufferMemoryBarrier outputBufferHostReadBarrier = makeBufferMemoryBarrier
    561 			(
    562 				VK_ACCESS_TRANSFER_WRITE_BIT,
    563 				VK_ACCESS_HOST_READ_BIT,
    564 				*outputBuffer,
    565 				0u,
    566 				imageSizeInBytes
    567 			);
    568 
    569 			deviceInterface.cmdPipelineBarrier(*commandBuffer, VK_PIPELINE_STAGE_TRANSFER_BIT, VK_PIPELINE_STAGE_HOST_BIT, 0u, 0u, DE_NULL, 1u, &outputBufferHostReadBarrier, 0u, DE_NULL);
    570 		}
    571 
    572 		// End recording commands
    573 		endCommandBuffer(deviceInterface, *commandBuffer);
    574 
    575 		// The stage at which execution is going to wait for finish of sparse binding operations
    576 		const VkPipelineStageFlags stageBits[] = { VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT };
    577 
    578 		// Submit commands for execution and wait for completion
    579 		submitCommandsAndWait(deviceInterface, getDevice(), computeQueue.queueHandle, *commandBuffer, 1u, &imageMemoryBindSemaphore.get(), stageBits,
    580 			0, DE_NULL, m_useDeviceGroups, firstDeviceID);
    581 
    582 		// Retrieve data from buffer to host memory
    583 		invalidateMappedMemoryRange(deviceInterface, getDevice(), outputBufferAlloc->getMemory(), outputBufferAlloc->getOffset(), imageSizeInBytes);
    584 
    585 		const deUint8* outputData = static_cast<const deUint8*>(outputBufferAlloc->getHostPtr());
    586 		const tcu::ConstPixelBufferAccess pixelBuffer = tcu::ConstPixelBufferAccess(m_format, gridSize.x(), gridSize.y(), gridSize.z(), outputData);
    587 
    588 		// Wait for sparse queue to become idle
    589 		//vsk fails:
    590 		deviceInterface.queueWaitIdle(sparseQueue.queueHandle);
    591 
    592 		// Validate results
    593 		if( aspectRequirements.imageMipTailFirstLod > 0u )
    594 		{
    595 			const VkExtent3D		 mipExtent		 = mipLevelExtents(imageCreateInfo.extent, 0u);
    596 			const tcu::UVec3		 numSparseBinds  = alignedDivide(mipExtent, imageGranularity);
    597 			const tcu::UVec3		 lastBlockExtent = tcu::UVec3(	mipExtent.width  % imageGranularity.width  ? mipExtent.width  % imageGranularity.width  : imageGranularity.width,
    598 																	mipExtent.height % imageGranularity.height ? mipExtent.height % imageGranularity.height : imageGranularity.height,
    599 																	mipExtent.depth  % imageGranularity.depth  ? mipExtent.depth  % imageGranularity.depth  : imageGranularity.depth);
    600 
    601 			for (deUint32 layerNdx = 0; layerNdx < imageCreateInfo.arrayLayers; ++layerNdx)
    602 			{
    603 				for (deUint32 z = 0; z < numSparseBinds.z(); ++z)
    604 				for (deUint32 y = 0; y < numSparseBinds.y(); ++y)
    605 				for (deUint32 x = 0; x < numSparseBinds.x(); ++x)
    606 				{
    607 					VkExtent3D offset;
    608 					offset.width  = x*imageGranularity.width;
    609 					offset.height = y*imageGranularity.height;
    610 					offset.depth  = z*imageGranularity.depth + layerNdx*numSparseBinds.z()*imageGranularity.depth;
    611 
    612 					VkExtent3D extent;
    613 					extent.width  = (x == numSparseBinds.x() - 1) ? lastBlockExtent.x() : imageGranularity.width;
    614 					extent.height = (y == numSparseBinds.y() - 1) ? lastBlockExtent.y() : imageGranularity.height;
    615 					extent.depth  = (z == numSparseBinds.z() - 1) ? lastBlockExtent.z() : imageGranularity.depth;
    616 
    617 					const deUint32 linearIndex = x + y*numSparseBinds.x() + z*numSparseBinds.x()*numSparseBinds.y() + layerNdx*numSparseBinds.x()*numSparseBinds.y()*numSparseBinds.z();
    618 
    619 					if (linearIndex % 2u == 0u)
    620 					{
    621 						for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
    622 						for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
    623 						for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
    624 						{
    625 							const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
    626 							const tcu::UVec4 outputValue	= pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
    627 
    628 							if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
    629 								return tcu::TestStatus::fail("Failed");
    630 						}
    631 					}
    632 					else if (physicalDeviceProperties.sparseProperties.residencyNonResidentStrict)
    633 					{
    634 						for (deUint32 offsetZ = offset.depth;  offsetZ < offset.depth  + extent.depth;  ++offsetZ)
    635 						for (deUint32 offsetY = offset.height; offsetY < offset.height + extent.height; ++offsetY)
    636 						for (deUint32 offsetX = offset.width;  offsetX < offset.width  + extent.width;  ++offsetX)
    637 						{
    638 							const tcu::UVec4 referenceValue = tcu::UVec4(0u, 0u, 0u, 0u);
    639 							const tcu::UVec4 outputValue = pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
    640 
    641 							if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
    642 								return tcu::TestStatus::fail("Failed");
    643 						}
    644 					}
    645 				}
    646 			}
    647 		}
    648 		else
    649 		{
    650 			const VkExtent3D mipExtent = mipLevelExtents(imageCreateInfo.extent, 0u);
    651 
    652 			for (deUint32 offsetZ = 0u; offsetZ < mipExtent.depth * imageCreateInfo.arrayLayers; ++offsetZ)
    653 			for (deUint32 offsetY = 0u; offsetY < mipExtent.height; ++offsetY)
    654 			for (deUint32 offsetX = 0u; offsetX < mipExtent.width;  ++offsetX)
    655 			{
    656 				const tcu::UVec4 referenceValue = tcu::UVec4(offsetX % 127u, offsetY % 127u, offsetZ % 127u, 1u);
    657 				const tcu::UVec4 outputValue	= pixelBuffer.getPixelUint(offsetX, offsetY, offsetZ);
    658 
    659 				if (deMemCmp(&outputValue, &referenceValue, sizeof(deUint32) * getNumUsedChannels(m_format.order)) != 0)
    660 					return tcu::TestStatus::fail("Failed");
    661 			}
    662 		}
    663 	}
    664 
    665 	return tcu::TestStatus::pass("Passed");
    666 }
    667 
    668 TestInstance* ImageSparseResidencyCase::createInstance (Context& context) const
    669 {
    670 	return new ImageSparseResidencyInstance(context, m_imageType, m_imageSize, m_format, m_useDeviceGroups);
    671 }
    672 
    673 } // anonymous ns
    674 
    675 tcu::TestCaseGroup* createImageSparseResidencyTestsCommon (tcu::TestContext& testCtx, de::MovePtr<tcu::TestCaseGroup> testGroup, const bool useDeviceGroup = false)
    676 {
    677 	static const deUint32 sizeCountPerImageType = 3u;
    678 
    679 	struct ImageParameters
    680 	{
    681 		ImageType	imageType;
    682 		tcu::UVec3	imageSizes[sizeCountPerImageType];
    683 	};
    684 
    685 	static const ImageParameters imageParametersArray[] =
    686 	{
    687 		{ IMAGE_TYPE_2D,		 { tcu::UVec3(512u, 256u, 1u),  tcu::UVec3(1024u, 128u, 1u), tcu::UVec3(11u,  137u, 1u) } },
    688 		{ IMAGE_TYPE_2D_ARRAY,	 { tcu::UVec3(512u, 256u, 6u),	tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } },
    689 		{ IMAGE_TYPE_CUBE,		 { tcu::UVec3(256u, 256u, 1u),	tcu::UVec3(128u,  128u, 1u), tcu::UVec3(137u, 137u, 1u) } },
    690 		{ IMAGE_TYPE_CUBE_ARRAY, { tcu::UVec3(256u, 256u, 6u),	tcu::UVec3(128u,  128u, 8u), tcu::UVec3(137u, 137u, 3u) } },
    691 		{ IMAGE_TYPE_3D,		 { tcu::UVec3(512u, 256u, 16u), tcu::UVec3(1024u, 128u, 8u), tcu::UVec3(11u,  137u, 3u) } }
    692 	};
    693 
    694 	static const tcu::TextureFormat formats[] =
    695 	{
    696 		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT32),
    697 		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT16),
    698 		tcu::TextureFormat(tcu::TextureFormat::R,	 tcu::TextureFormat::SIGNED_INT8),
    699 		tcu::TextureFormat(tcu::TextureFormat::RG,	 tcu::TextureFormat::SIGNED_INT32),
    700 		tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT16),
    701 		tcu::TextureFormat(tcu::TextureFormat::RG,   tcu::TextureFormat::SIGNED_INT8),
    702 		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT32),
    703 		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT16),
    704 		tcu::TextureFormat(tcu::TextureFormat::RGBA, tcu::TextureFormat::UNSIGNED_INT8)
    705 	};
    706 
    707 	for (deInt32 imageTypeNdx = 0; imageTypeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray); ++imageTypeNdx)
    708 	{
    709 		const ImageType					imageType = imageParametersArray[imageTypeNdx].imageType;
    710 		de::MovePtr<tcu::TestCaseGroup> imageTypeGroup(new tcu::TestCaseGroup(testCtx, getImageTypeName(imageType).c_str(), ""));
    711 
    712 		for (deInt32 formatNdx = 0; formatNdx < DE_LENGTH_OF_ARRAY(formats); ++formatNdx)
    713 		{
    714 			const tcu::TextureFormat&		format = formats[formatNdx];
    715 			de::MovePtr<tcu::TestCaseGroup> formatGroup(new tcu::TestCaseGroup(testCtx, getShaderImageFormatQualifier(format).c_str(), ""));
    716 
    717 			for (deInt32 imageSizeNdx = 0; imageSizeNdx < DE_LENGTH_OF_ARRAY(imageParametersArray[imageTypeNdx].imageSizes); ++imageSizeNdx)
    718 			{
    719 				const tcu::UVec3 imageSize = imageParametersArray[imageTypeNdx].imageSizes[imageSizeNdx];
    720 
    721 				std::ostringstream stream;
    722 				stream << imageSize.x() << "_" << imageSize.y() << "_" << imageSize.z();
    723 
    724 				formatGroup->addChild(new ImageSparseResidencyCase(testCtx, stream.str(), "", imageType, imageSize, format, glu::GLSL_VERSION_440, useDeviceGroup));
    725 			}
    726 			imageTypeGroup->addChild(formatGroup.release());
    727 		}
    728 		testGroup->addChild(imageTypeGroup.release());
    729 	}
    730 
    731 	return testGroup.release();
    732 }
    733 
    734 tcu::TestCaseGroup* createImageSparseResidencyTests (tcu::TestContext& testCtx)
    735 {
    736 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "image_sparse_residency", "Buffer Sparse Residency"));
    737 	return createImageSparseResidencyTestsCommon(testCtx, testGroup);
    738 }
    739 
    740 tcu::TestCaseGroup* createDeviceGroupImageSparseResidencyTests (tcu::TestContext& testCtx)
    741 {
    742 	de::MovePtr<tcu::TestCaseGroup> testGroup(new tcu::TestCaseGroup(testCtx, "device_group_image_sparse_residency", "Buffer Sparse Residency"));
    743 	return createImageSparseResidencyTestsCommon(testCtx, testGroup, true);
    744 }
    745 
    746 } // sparse
    747 } // vkt
    748