1 /* 2 * Copyright 2016 Red Hat. 3 * Copyright 2016 Bas Nieuwenhuizen 4 * 5 * based in part on anv driver which is: 6 * Copyright 2015 Intel Corporation 7 * 8 * Permission is hereby granted, free of charge, to any person obtaining a 9 * copy of this software and associated documentation files (the "Software"), 10 * to deal in the Software without restriction, including without limitation 11 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 12 * and/or sell copies of the Software, and to permit persons to whom the 13 * Software is furnished to do so, subject to the following conditions: 14 * 15 * The above copyright notice and this permission notice (including the next 16 * paragraph) shall be included in all copies or substantial portions of the 17 * Software. 18 * 19 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 20 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 21 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 22 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 23 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 24 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 25 * IN THE SOFTWARE. 26 */ 27 28 #include <dlfcn.h> 29 #include <stdbool.h> 30 #include <string.h> 31 #include <unistd.h> 32 #include <fcntl.h> 33 #include <sys/stat.h> 34 #include "radv_private.h" 35 #include "util/strtod.h" 36 37 #include <xf86drm.h> 38 #include <amdgpu.h> 39 #include <amdgpu_drm.h> 40 #include "amdgpu_id.h" 41 #include "winsys/amdgpu/radv_amdgpu_winsys_public.h" 42 #include "ac_llvm_util.h" 43 #include "vk_format.h" 44 #include "sid.h" 45 #include "util/debug.h" 46 struct radv_dispatch_table dtable; 47 48 static int 49 radv_get_function_timestamp(void *ptr, uint32_t* timestamp) 50 { 51 Dl_info info; 52 struct stat st; 53 if (!dladdr(ptr, &info) || !info.dli_fname) { 54 return -1; 55 } 56 if (stat(info.dli_fname, &st)) { 57 return -1; 58 } 59 *timestamp = st.st_mtim.tv_sec; 60 return 0; 61 } 62 63 static int 64 radv_device_get_cache_uuid(enum radeon_family family, void *uuid) 65 { 66 uint32_t mesa_timestamp, llvm_timestamp; 67 uint16_t f = family; 68 memset(uuid, 0, VK_UUID_SIZE); 69 if (radv_get_function_timestamp(radv_device_get_cache_uuid, &mesa_timestamp) || 70 radv_get_function_timestamp(LLVMInitializeAMDGPUTargetInfo, &llvm_timestamp)) 71 return -1; 72 73 memcpy(uuid, &mesa_timestamp, 4); 74 memcpy((char*)uuid + 4, &llvm_timestamp, 4); 75 memcpy((char*)uuid + 8, &f, 2); 76 snprintf((char*)uuid + 10, VK_UUID_SIZE - 10, "radv"); 77 return 0; 78 } 79 80 static const VkExtensionProperties instance_extensions[] = { 81 { 82 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, 83 .specVersion = 25, 84 }, 85 #ifdef VK_USE_PLATFORM_XCB_KHR 86 { 87 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, 88 .specVersion = 6, 89 }, 90 #endif 91 #ifdef VK_USE_PLATFORM_XLIB_KHR 92 { 93 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, 94 .specVersion = 6, 95 }, 96 #endif 97 #ifdef VK_USE_PLATFORM_WAYLAND_KHR 98 { 99 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, 100 .specVersion = 5, 101 }, 102 #endif 103 }; 104 105 static const VkExtensionProperties common_device_extensions[] = { 106 { 107 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, 108 .specVersion = 1, 109 }, 110 { 111 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, 112 .specVersion = 68, 113 }, 114 { 115 .extensionName = VK_AMD_DRAW_INDIRECT_COUNT_EXTENSION_NAME, 116 .specVersion = 1, 117 }, 118 { 119 .extensionName = VK_AMD_NEGATIVE_VIEWPORT_HEIGHT_EXTENSION_NAME, 120 .specVersion = 1, 121 }, 122 }; 123 124 static VkResult 125 radv_extensions_register(struct radv_instance *instance, 126 struct radv_extensions *extensions, 127 const VkExtensionProperties *new_ext, 128 uint32_t num_ext) 129 { 130 size_t new_size; 131 VkExtensionProperties *new_ptr; 132 133 assert(new_ext && num_ext > 0); 134 135 if (!new_ext) 136 return VK_ERROR_INITIALIZATION_FAILED; 137 138 new_size = (extensions->num_ext + num_ext) * sizeof(VkExtensionProperties); 139 new_ptr = vk_realloc(&instance->alloc, extensions->ext_array, 140 new_size, 8, VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 141 142 /* Old array continues to be valid, update nothing */ 143 if (!new_ptr) 144 return VK_ERROR_OUT_OF_HOST_MEMORY; 145 146 memcpy(&new_ptr[extensions->num_ext], new_ext, 147 num_ext * sizeof(VkExtensionProperties)); 148 extensions->ext_array = new_ptr; 149 extensions->num_ext += num_ext; 150 151 return VK_SUCCESS; 152 } 153 154 static void 155 radv_extensions_finish(struct radv_instance *instance, 156 struct radv_extensions *extensions) 157 { 158 assert(extensions); 159 160 if (!extensions) 161 radv_loge("Attemted to free invalid extension struct\n"); 162 163 if (extensions->ext_array) 164 vk_free(&instance->alloc, extensions->ext_array); 165 } 166 167 static bool 168 is_extension_enabled(const VkExtensionProperties *extensions, 169 size_t num_ext, 170 const char *name) 171 { 172 assert(extensions && name); 173 174 for (uint32_t i = 0; i < num_ext; i++) { 175 if (strcmp(name, extensions[i].extensionName) == 0) 176 return true; 177 } 178 179 return false; 180 } 181 182 static VkResult 183 radv_physical_device_init(struct radv_physical_device *device, 184 struct radv_instance *instance, 185 const char *path) 186 { 187 VkResult result; 188 drmVersionPtr version; 189 int fd; 190 191 fd = open(path, O_RDWR | O_CLOEXEC); 192 if (fd < 0) 193 return VK_ERROR_INCOMPATIBLE_DRIVER; 194 195 version = drmGetVersion(fd); 196 if (!version) { 197 close(fd); 198 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, 199 "failed to get version %s: %m", path); 200 } 201 202 if (strcmp(version->name, "amdgpu")) { 203 drmFreeVersion(version); 204 close(fd); 205 return VK_ERROR_INCOMPATIBLE_DRIVER; 206 } 207 drmFreeVersion(version); 208 209 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 210 device->instance = instance; 211 assert(strlen(path) < ARRAY_SIZE(device->path)); 212 strncpy(device->path, path, ARRAY_SIZE(device->path)); 213 214 device->ws = radv_amdgpu_winsys_create(fd); 215 if (!device->ws) { 216 result = VK_ERROR_INCOMPATIBLE_DRIVER; 217 goto fail; 218 } 219 device->ws->query_info(device->ws, &device->rad_info); 220 result = radv_init_wsi(device); 221 if (result != VK_SUCCESS) { 222 device->ws->destroy(device->ws); 223 goto fail; 224 } 225 226 if (radv_device_get_cache_uuid(device->rad_info.family, device->uuid)) { 227 radv_finish_wsi(device); 228 device->ws->destroy(device->ws); 229 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, 230 "cannot generate UUID"); 231 goto fail; 232 } 233 234 result = radv_extensions_register(instance, 235 &device->extensions, 236 common_device_extensions, 237 ARRAY_SIZE(common_device_extensions)); 238 if (result != VK_SUCCESS) 239 goto fail; 240 241 fprintf(stderr, "WARNING: radv is not a conformant vulkan implementation, testing use only.\n"); 242 device->name = device->rad_info.name; 243 close(fd); 244 return VK_SUCCESS; 245 246 fail: 247 close(fd); 248 return result; 249 } 250 251 static void 252 radv_physical_device_finish(struct radv_physical_device *device) 253 { 254 radv_extensions_finish(device->instance, &device->extensions); 255 radv_finish_wsi(device); 256 device->ws->destroy(device->ws); 257 } 258 259 260 static void * 261 default_alloc_func(void *pUserData, size_t size, size_t align, 262 VkSystemAllocationScope allocationScope) 263 { 264 return malloc(size); 265 } 266 267 static void * 268 default_realloc_func(void *pUserData, void *pOriginal, size_t size, 269 size_t align, VkSystemAllocationScope allocationScope) 270 { 271 return realloc(pOriginal, size); 272 } 273 274 static void 275 default_free_func(void *pUserData, void *pMemory) 276 { 277 free(pMemory); 278 } 279 280 static const VkAllocationCallbacks default_alloc = { 281 .pUserData = NULL, 282 .pfnAllocation = default_alloc_func, 283 .pfnReallocation = default_realloc_func, 284 .pfnFree = default_free_func, 285 }; 286 287 static const struct debug_control radv_debug_options[] = { 288 {"fastclears", RADV_DEBUG_FAST_CLEARS}, 289 {"nodcc", RADV_DEBUG_NO_DCC}, 290 {"shaders", RADV_DEBUG_DUMP_SHADERS}, 291 {"nocache", RADV_DEBUG_NO_CACHE}, 292 {"shaderstats", RADV_DEBUG_DUMP_SHADER_STATS}, 293 {"nohiz", RADV_DEBUG_NO_HIZ}, 294 {"nocompute", RADV_DEBUG_NO_COMPUTE_QUEUE}, 295 {"unsafemath", RADV_DEBUG_UNSAFE_MATH}, 296 {NULL, 0} 297 }; 298 299 VkResult radv_CreateInstance( 300 const VkInstanceCreateInfo* pCreateInfo, 301 const VkAllocationCallbacks* pAllocator, 302 VkInstance* pInstance) 303 { 304 struct radv_instance *instance; 305 306 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); 307 308 uint32_t client_version; 309 if (pCreateInfo->pApplicationInfo && 310 pCreateInfo->pApplicationInfo->apiVersion != 0) { 311 client_version = pCreateInfo->pApplicationInfo->apiVersion; 312 } else { 313 client_version = VK_MAKE_VERSION(1, 0, 0); 314 } 315 316 if (VK_MAKE_VERSION(1, 0, 0) > client_version || 317 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) { 318 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, 319 "Client requested version %d.%d.%d", 320 VK_VERSION_MAJOR(client_version), 321 VK_VERSION_MINOR(client_version), 322 VK_VERSION_PATCH(client_version)); 323 } 324 325 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 326 if (!is_extension_enabled(instance_extensions, 327 ARRAY_SIZE(instance_extensions), 328 pCreateInfo->ppEnabledExtensionNames[i])) 329 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 330 } 331 332 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, 333 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 334 if (!instance) 335 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 336 337 memset(instance, 0, sizeof(*instance)); 338 339 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 340 341 if (pAllocator) 342 instance->alloc = *pAllocator; 343 else 344 instance->alloc = default_alloc; 345 346 instance->apiVersion = client_version; 347 instance->physicalDeviceCount = -1; 348 349 _mesa_locale_init(); 350 351 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 352 353 instance->debug_flags = parse_debug_string(getenv("RADV_DEBUG"), 354 radv_debug_options); 355 356 *pInstance = radv_instance_to_handle(instance); 357 358 return VK_SUCCESS; 359 } 360 361 void radv_DestroyInstance( 362 VkInstance _instance, 363 const VkAllocationCallbacks* pAllocator) 364 { 365 RADV_FROM_HANDLE(radv_instance, instance, _instance); 366 367 for (int i = 0; i < instance->physicalDeviceCount; ++i) { 368 radv_physical_device_finish(instance->physicalDevices + i); 369 } 370 371 VG(VALGRIND_DESTROY_MEMPOOL(instance)); 372 373 _mesa_locale_fini(); 374 375 vk_free(&instance->alloc, instance); 376 } 377 378 VkResult radv_EnumeratePhysicalDevices( 379 VkInstance _instance, 380 uint32_t* pPhysicalDeviceCount, 381 VkPhysicalDevice* pPhysicalDevices) 382 { 383 RADV_FROM_HANDLE(radv_instance, instance, _instance); 384 VkResult result; 385 386 if (instance->physicalDeviceCount < 0) { 387 char path[20]; 388 instance->physicalDeviceCount = 0; 389 for (unsigned i = 0; i < RADV_MAX_DRM_DEVICES; i++) { 390 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i); 391 result = radv_physical_device_init(instance->physicalDevices + 392 instance->physicalDeviceCount, 393 instance, path); 394 if (result == VK_SUCCESS) 395 ++instance->physicalDeviceCount; 396 else if (result != VK_ERROR_INCOMPATIBLE_DRIVER) 397 return result; 398 } 399 } 400 401 if (!pPhysicalDevices) { 402 *pPhysicalDeviceCount = instance->physicalDeviceCount; 403 } else { 404 *pPhysicalDeviceCount = MIN2(*pPhysicalDeviceCount, instance->physicalDeviceCount); 405 for (unsigned i = 0; i < *pPhysicalDeviceCount; ++i) 406 pPhysicalDevices[i] = radv_physical_device_to_handle(instance->physicalDevices + i); 407 } 408 409 return *pPhysicalDeviceCount < instance->physicalDeviceCount ? VK_INCOMPLETE 410 : VK_SUCCESS; 411 } 412 413 void radv_GetPhysicalDeviceFeatures( 414 VkPhysicalDevice physicalDevice, 415 VkPhysicalDeviceFeatures* pFeatures) 416 { 417 // RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 418 419 memset(pFeatures, 0, sizeof(*pFeatures)); 420 421 *pFeatures = (VkPhysicalDeviceFeatures) { 422 .robustBufferAccess = true, 423 .fullDrawIndexUint32 = true, 424 .imageCubeArray = true, 425 .independentBlend = true, 426 .geometryShader = false, 427 .tessellationShader = false, 428 .sampleRateShading = false, 429 .dualSrcBlend = true, 430 .logicOp = true, 431 .multiDrawIndirect = true, 432 .drawIndirectFirstInstance = true, 433 .depthClamp = true, 434 .depthBiasClamp = true, 435 .fillModeNonSolid = true, 436 .depthBounds = true, 437 .wideLines = true, 438 .largePoints = true, 439 .alphaToOne = true, 440 .multiViewport = false, 441 .samplerAnisotropy = true, 442 .textureCompressionETC2 = false, 443 .textureCompressionASTC_LDR = false, 444 .textureCompressionBC = true, 445 .occlusionQueryPrecise = true, 446 .pipelineStatisticsQuery = false, 447 .vertexPipelineStoresAndAtomics = true, 448 .fragmentStoresAndAtomics = true, 449 .shaderTessellationAndGeometryPointSize = true, 450 .shaderImageGatherExtended = true, 451 .shaderStorageImageExtendedFormats = true, 452 .shaderStorageImageMultisample = false, 453 .shaderUniformBufferArrayDynamicIndexing = true, 454 .shaderSampledImageArrayDynamicIndexing = true, 455 .shaderStorageBufferArrayDynamicIndexing = true, 456 .shaderStorageImageArrayDynamicIndexing = true, 457 .shaderStorageImageReadWithoutFormat = false, 458 .shaderStorageImageWriteWithoutFormat = false, 459 .shaderClipDistance = true, 460 .shaderCullDistance = true, 461 .shaderFloat64 = false, 462 .shaderInt64 = false, 463 .shaderInt16 = false, 464 .alphaToOne = true, 465 .variableMultisampleRate = false, 466 .inheritedQueries = false, 467 }; 468 } 469 470 void radv_GetPhysicalDeviceProperties( 471 VkPhysicalDevice physicalDevice, 472 VkPhysicalDeviceProperties* pProperties) 473 { 474 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 475 VkSampleCountFlags sample_counts = 0xf; 476 VkPhysicalDeviceLimits limits = { 477 .maxImageDimension1D = (1 << 14), 478 .maxImageDimension2D = (1 << 14), 479 .maxImageDimension3D = (1 << 11), 480 .maxImageDimensionCube = (1 << 14), 481 .maxImageArrayLayers = (1 << 11), 482 .maxTexelBufferElements = 128 * 1024 * 1024, 483 .maxUniformBufferRange = UINT32_MAX, 484 .maxStorageBufferRange = UINT32_MAX, 485 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, 486 .maxMemoryAllocationCount = UINT32_MAX, 487 .maxSamplerAllocationCount = 64 * 1024, 488 .bufferImageGranularity = 64, /* A cache line */ 489 .sparseAddressSpaceSize = 0, 490 .maxBoundDescriptorSets = MAX_SETS, 491 .maxPerStageDescriptorSamplers = 64, 492 .maxPerStageDescriptorUniformBuffers = 64, 493 .maxPerStageDescriptorStorageBuffers = 64, 494 .maxPerStageDescriptorSampledImages = 64, 495 .maxPerStageDescriptorStorageImages = 64, 496 .maxPerStageDescriptorInputAttachments = 64, 497 .maxPerStageResources = 128, 498 .maxDescriptorSetSamplers = 256, 499 .maxDescriptorSetUniformBuffers = 256, 500 .maxDescriptorSetUniformBuffersDynamic = 256, 501 .maxDescriptorSetStorageBuffers = 256, 502 .maxDescriptorSetStorageBuffersDynamic = 256, 503 .maxDescriptorSetSampledImages = 256, 504 .maxDescriptorSetStorageImages = 256, 505 .maxDescriptorSetInputAttachments = 256, 506 .maxVertexInputAttributes = 32, 507 .maxVertexInputBindings = 32, 508 .maxVertexInputAttributeOffset = 2047, 509 .maxVertexInputBindingStride = 2048, 510 .maxVertexOutputComponents = 128, 511 .maxTessellationGenerationLevel = 0, 512 .maxTessellationPatchSize = 0, 513 .maxTessellationControlPerVertexInputComponents = 0, 514 .maxTessellationControlPerVertexOutputComponents = 0, 515 .maxTessellationControlPerPatchOutputComponents = 0, 516 .maxTessellationControlTotalOutputComponents = 0, 517 .maxTessellationEvaluationInputComponents = 0, 518 .maxTessellationEvaluationOutputComponents = 0, 519 .maxGeometryShaderInvocations = 32, 520 .maxGeometryInputComponents = 64, 521 .maxGeometryOutputComponents = 128, 522 .maxGeometryOutputVertices = 256, 523 .maxGeometryTotalOutputComponents = 1024, 524 .maxFragmentInputComponents = 128, 525 .maxFragmentOutputAttachments = 8, 526 .maxFragmentDualSrcAttachments = 1, 527 .maxFragmentCombinedOutputResources = 8, 528 .maxComputeSharedMemorySize = 32768, 529 .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, 530 .maxComputeWorkGroupInvocations = 2048, 531 .maxComputeWorkGroupSize = { 532 2048, 533 2048, 534 2048 535 }, 536 .subPixelPrecisionBits = 4 /* FIXME */, 537 .subTexelPrecisionBits = 4 /* FIXME */, 538 .mipmapPrecisionBits = 4 /* FIXME */, 539 .maxDrawIndexedIndexValue = UINT32_MAX, 540 .maxDrawIndirectCount = UINT32_MAX, 541 .maxSamplerLodBias = 16, 542 .maxSamplerAnisotropy = 16, 543 .maxViewports = MAX_VIEWPORTS, 544 .maxViewportDimensions = { (1 << 14), (1 << 14) }, 545 .viewportBoundsRange = { INT16_MIN, INT16_MAX }, 546 .viewportSubPixelBits = 13, /* We take a float? */ 547 .minMemoryMapAlignment = 4096, /* A page */ 548 .minTexelBufferOffsetAlignment = 1, 549 .minUniformBufferOffsetAlignment = 4, 550 .minStorageBufferOffsetAlignment = 4, 551 .minTexelOffset = -32, 552 .maxTexelOffset = 31, 553 .minTexelGatherOffset = -32, 554 .maxTexelGatherOffset = 31, 555 .minInterpolationOffset = -2, 556 .maxInterpolationOffset = 2, 557 .subPixelInterpolationOffsetBits = 8, 558 .maxFramebufferWidth = (1 << 14), 559 .maxFramebufferHeight = (1 << 14), 560 .maxFramebufferLayers = (1 << 10), 561 .framebufferColorSampleCounts = sample_counts, 562 .framebufferDepthSampleCounts = sample_counts, 563 .framebufferStencilSampleCounts = sample_counts, 564 .framebufferNoAttachmentsSampleCounts = sample_counts, 565 .maxColorAttachments = MAX_RTS, 566 .sampledImageColorSampleCounts = sample_counts, 567 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, 568 .sampledImageDepthSampleCounts = sample_counts, 569 .sampledImageStencilSampleCounts = sample_counts, 570 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, 571 .maxSampleMaskWords = 1, 572 .timestampComputeAndGraphics = false, 573 .timestampPeriod = 100000.0 / pdevice->rad_info.clock_crystal_freq, 574 .maxClipDistances = 8, 575 .maxCullDistances = 8, 576 .maxCombinedClipAndCullDistances = 8, 577 .discreteQueuePriorities = 1, 578 .pointSizeRange = { 0.125, 255.875 }, 579 .lineWidthRange = { 0.0, 7.9921875 }, 580 .pointSizeGranularity = (1.0 / 8.0), 581 .lineWidthGranularity = (1.0 / 128.0), 582 .strictLines = false, /* FINISHME */ 583 .standardSampleLocations = true, 584 .optimalBufferCopyOffsetAlignment = 128, 585 .optimalBufferCopyRowPitchAlignment = 128, 586 .nonCoherentAtomSize = 64, 587 }; 588 589 *pProperties = (VkPhysicalDeviceProperties) { 590 .apiVersion = VK_MAKE_VERSION(1, 0, 5), 591 .driverVersion = 1, 592 .vendorID = 0x1002, 593 .deviceID = pdevice->rad_info.pci_id, 594 .deviceType = VK_PHYSICAL_DEVICE_TYPE_DISCRETE_GPU, 595 .limits = limits, 596 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ 597 }; 598 599 strcpy(pProperties->deviceName, pdevice->name); 600 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE); 601 } 602 603 void radv_GetPhysicalDeviceQueueFamilyProperties( 604 VkPhysicalDevice physicalDevice, 605 uint32_t* pCount, 606 VkQueueFamilyProperties* pQueueFamilyProperties) 607 { 608 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 609 int num_queue_families = 1; 610 int idx; 611 if (pdevice->rad_info.compute_rings > 0 && 612 pdevice->rad_info.chip_class >= CIK && 613 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) 614 num_queue_families++; 615 616 if (pQueueFamilyProperties == NULL) { 617 *pCount = num_queue_families; 618 return; 619 } 620 621 if (!*pCount) 622 return; 623 624 idx = 0; 625 if (*pCount >= 1) { 626 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { 627 .queueFlags = VK_QUEUE_GRAPHICS_BIT | 628 VK_QUEUE_COMPUTE_BIT | 629 VK_QUEUE_TRANSFER_BIT, 630 .queueCount = 1, 631 .timestampValidBits = 64, 632 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, 633 }; 634 idx++; 635 } 636 637 if (pdevice->rad_info.compute_rings > 0 && 638 pdevice->rad_info.chip_class >= CIK && 639 !(pdevice->instance->debug_flags & RADV_DEBUG_NO_COMPUTE_QUEUE)) { 640 if (*pCount > idx) { 641 pQueueFamilyProperties[idx] = (VkQueueFamilyProperties) { 642 .queueFlags = VK_QUEUE_COMPUTE_BIT | VK_QUEUE_TRANSFER_BIT, 643 .queueCount = pdevice->rad_info.compute_rings, 644 .timestampValidBits = 64, 645 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, 646 }; 647 idx++; 648 } 649 } 650 *pCount = idx; 651 } 652 653 void radv_GetPhysicalDeviceMemoryProperties( 654 VkPhysicalDevice physicalDevice, 655 VkPhysicalDeviceMemoryProperties* pMemoryProperties) 656 { 657 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); 658 659 STATIC_ASSERT(RADV_MEM_TYPE_COUNT <= VK_MAX_MEMORY_TYPES); 660 661 pMemoryProperties->memoryTypeCount = RADV_MEM_TYPE_COUNT; 662 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM] = (VkMemoryType) { 663 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT, 664 .heapIndex = RADV_MEM_HEAP_VRAM, 665 }; 666 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_WRITE_COMBINE] = (VkMemoryType) { 667 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 668 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 669 .heapIndex = RADV_MEM_HEAP_GTT, 670 }; 671 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_VRAM_CPU_ACCESS] = (VkMemoryType) { 672 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 673 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 674 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 675 .heapIndex = RADV_MEM_HEAP_VRAM_CPU_ACCESS, 676 }; 677 pMemoryProperties->memoryTypes[RADV_MEM_TYPE_GTT_CACHED] = (VkMemoryType) { 678 .propertyFlags = VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 679 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 680 VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 681 .heapIndex = RADV_MEM_HEAP_GTT, 682 }; 683 684 STATIC_ASSERT(RADV_MEM_HEAP_COUNT <= VK_MAX_MEMORY_HEAPS); 685 686 pMemoryProperties->memoryHeapCount = RADV_MEM_HEAP_COUNT; 687 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM] = (VkMemoryHeap) { 688 .size = physical_device->rad_info.vram_size - 689 physical_device->rad_info.visible_vram_size, 690 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 691 }; 692 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_VRAM_CPU_ACCESS] = (VkMemoryHeap) { 693 .size = physical_device->rad_info.visible_vram_size, 694 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 695 }; 696 pMemoryProperties->memoryHeaps[RADV_MEM_HEAP_GTT] = (VkMemoryHeap) { 697 .size = physical_device->rad_info.gart_size, 698 .flags = 0, 699 }; 700 } 701 702 static int 703 radv_queue_init(struct radv_device *device, struct radv_queue *queue, 704 int queue_family_index, int idx) 705 { 706 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 707 queue->device = device; 708 queue->queue_family_index = queue_family_index; 709 queue->queue_idx = idx; 710 711 queue->hw_ctx = device->ws->ctx_create(device->ws); 712 if (!queue->hw_ctx) 713 return VK_ERROR_OUT_OF_HOST_MEMORY; 714 715 return VK_SUCCESS; 716 } 717 718 static void 719 radv_queue_finish(struct radv_queue *queue) 720 { 721 if (queue->hw_ctx) 722 queue->device->ws->ctx_destroy(queue->hw_ctx); 723 } 724 725 VkResult radv_CreateDevice( 726 VkPhysicalDevice physicalDevice, 727 const VkDeviceCreateInfo* pCreateInfo, 728 const VkAllocationCallbacks* pAllocator, 729 VkDevice* pDevice) 730 { 731 RADV_FROM_HANDLE(radv_physical_device, physical_device, physicalDevice); 732 VkResult result; 733 struct radv_device *device; 734 735 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 736 if (!is_extension_enabled(physical_device->extensions.ext_array, 737 physical_device->extensions.num_ext, 738 pCreateInfo->ppEnabledExtensionNames[i])) 739 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 740 } 741 742 device = vk_alloc2(&physical_device->instance->alloc, pAllocator, 743 sizeof(*device), 8, 744 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 745 if (!device) 746 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 747 748 memset(device, 0, sizeof(*device)); 749 750 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 751 device->instance = physical_device->instance; 752 device->physical_device = physical_device; 753 754 device->debug_flags = device->instance->debug_flags; 755 756 device->ws = physical_device->ws; 757 if (pAllocator) 758 device->alloc = *pAllocator; 759 else 760 device->alloc = physical_device->instance->alloc; 761 762 for (unsigned i = 0; i < pCreateInfo->queueCreateInfoCount; i++) { 763 const VkDeviceQueueCreateInfo *queue_create = &pCreateInfo->pQueueCreateInfos[i]; 764 uint32_t qfi = queue_create->queueFamilyIndex; 765 766 device->queues[qfi] = vk_alloc(&device->alloc, 767 queue_create->queueCount * sizeof(struct radv_queue), 8, VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 768 if (!device->queues[qfi]) { 769 result = VK_ERROR_OUT_OF_HOST_MEMORY; 770 goto fail; 771 } 772 773 memset(device->queues[qfi], 0, queue_create->queueCount * sizeof(struct radv_queue)); 774 775 device->queue_count[qfi] = queue_create->queueCount; 776 777 for (unsigned q = 0; q < queue_create->queueCount; q++) { 778 result = radv_queue_init(device, &device->queues[qfi][q], qfi, q); 779 if (result != VK_SUCCESS) 780 goto fail; 781 } 782 } 783 784 result = radv_device_init_meta(device); 785 if (result != VK_SUCCESS) 786 goto fail; 787 788 radv_device_init_msaa(device); 789 790 for (int family = 0; family < RADV_MAX_QUEUE_FAMILIES; ++family) { 791 device->empty_cs[family] = device->ws->cs_create(device->ws, family); 792 switch (family) { 793 case RADV_QUEUE_GENERAL: 794 radeon_emit(device->empty_cs[family], PKT3(PKT3_CONTEXT_CONTROL, 1, 0)); 795 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_LOAD_ENABLE(1)); 796 radeon_emit(device->empty_cs[family], CONTEXT_CONTROL_SHADOW_ENABLE(1)); 797 break; 798 case RADV_QUEUE_COMPUTE: 799 radeon_emit(device->empty_cs[family], PKT3(PKT3_NOP, 0, 0)); 800 radeon_emit(device->empty_cs[family], 0); 801 break; 802 } 803 device->ws->cs_finalize(device->empty_cs[family]); 804 } 805 806 if (getenv("RADV_TRACE_FILE")) { 807 device->trace_bo = device->ws->buffer_create(device->ws, 4096, 8, 808 RADEON_DOMAIN_VRAM, RADEON_FLAG_CPU_ACCESS); 809 if (!device->trace_bo) 810 goto fail; 811 812 device->trace_id_ptr = device->ws->buffer_map(device->trace_bo); 813 if (!device->trace_id_ptr) 814 goto fail; 815 } 816 817 *pDevice = radv_device_to_handle(device); 818 return VK_SUCCESS; 819 820 fail: 821 if (device->trace_bo) 822 device->ws->buffer_destroy(device->trace_bo); 823 824 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 825 for (unsigned q = 0; q < device->queue_count[i]; q++) 826 radv_queue_finish(&device->queues[i][q]); 827 if (device->queue_count[i]) 828 vk_free(&device->alloc, device->queues[i]); 829 } 830 831 vk_free(&device->alloc, device); 832 return result; 833 } 834 835 void radv_DestroyDevice( 836 VkDevice _device, 837 const VkAllocationCallbacks* pAllocator) 838 { 839 RADV_FROM_HANDLE(radv_device, device, _device); 840 841 if (device->trace_bo) 842 device->ws->buffer_destroy(device->trace_bo); 843 844 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 845 for (unsigned q = 0; q < device->queue_count[i]; q++) 846 radv_queue_finish(&device->queues[i][q]); 847 if (device->queue_count[i]) 848 vk_free(&device->alloc, device->queues[i]); 849 } 850 radv_device_finish_meta(device); 851 852 vk_free(&device->alloc, device); 853 } 854 855 VkResult radv_EnumerateInstanceExtensionProperties( 856 const char* pLayerName, 857 uint32_t* pPropertyCount, 858 VkExtensionProperties* pProperties) 859 { 860 if (pProperties == NULL) { 861 *pPropertyCount = ARRAY_SIZE(instance_extensions); 862 return VK_SUCCESS; 863 } 864 865 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(instance_extensions)); 866 typed_memcpy(pProperties, instance_extensions, *pPropertyCount); 867 868 if (*pPropertyCount < ARRAY_SIZE(instance_extensions)) 869 return VK_INCOMPLETE; 870 871 return VK_SUCCESS; 872 } 873 874 VkResult radv_EnumerateDeviceExtensionProperties( 875 VkPhysicalDevice physicalDevice, 876 const char* pLayerName, 877 uint32_t* pPropertyCount, 878 VkExtensionProperties* pProperties) 879 { 880 RADV_FROM_HANDLE(radv_physical_device, pdevice, physicalDevice); 881 882 if (pProperties == NULL) { 883 *pPropertyCount = pdevice->extensions.num_ext; 884 return VK_SUCCESS; 885 } 886 887 *pPropertyCount = MIN2(*pPropertyCount, pdevice->extensions.num_ext); 888 typed_memcpy(pProperties, pdevice->extensions.ext_array, *pPropertyCount); 889 890 if (*pPropertyCount < pdevice->extensions.num_ext) 891 return VK_INCOMPLETE; 892 893 return VK_SUCCESS; 894 } 895 896 VkResult radv_EnumerateInstanceLayerProperties( 897 uint32_t* pPropertyCount, 898 VkLayerProperties* pProperties) 899 { 900 if (pProperties == NULL) { 901 *pPropertyCount = 0; 902 return VK_SUCCESS; 903 } 904 905 /* None supported at this time */ 906 return vk_error(VK_ERROR_LAYER_NOT_PRESENT); 907 } 908 909 VkResult radv_EnumerateDeviceLayerProperties( 910 VkPhysicalDevice physicalDevice, 911 uint32_t* pPropertyCount, 912 VkLayerProperties* pProperties) 913 { 914 if (pProperties == NULL) { 915 *pPropertyCount = 0; 916 return VK_SUCCESS; 917 } 918 919 /* None supported at this time */ 920 return vk_error(VK_ERROR_LAYER_NOT_PRESENT); 921 } 922 923 void radv_GetDeviceQueue( 924 VkDevice _device, 925 uint32_t queueFamilyIndex, 926 uint32_t queueIndex, 927 VkQueue* pQueue) 928 { 929 RADV_FROM_HANDLE(radv_device, device, _device); 930 931 *pQueue = radv_queue_to_handle(&device->queues[queueFamilyIndex][queueIndex]); 932 } 933 934 static void radv_dump_trace(struct radv_device *device, 935 struct radeon_winsys_cs *cs) 936 { 937 const char *filename = getenv("RADV_TRACE_FILE"); 938 FILE *f = fopen(filename, "w"); 939 if (!f) { 940 fprintf(stderr, "Failed to write trace dump to %s\n", filename); 941 return; 942 } 943 944 fprintf(f, "Trace ID: %x\n", *device->trace_id_ptr); 945 device->ws->cs_dump(cs, f, *device->trace_id_ptr); 946 fclose(f); 947 } 948 949 VkResult radv_QueueSubmit( 950 VkQueue _queue, 951 uint32_t submitCount, 952 const VkSubmitInfo* pSubmits, 953 VkFence _fence) 954 { 955 RADV_FROM_HANDLE(radv_queue, queue, _queue); 956 RADV_FROM_HANDLE(radv_fence, fence, _fence); 957 struct radeon_winsys_fence *base_fence = fence ? fence->fence : NULL; 958 struct radeon_winsys_ctx *ctx = queue->hw_ctx; 959 int ret; 960 uint32_t max_cs_submission = queue->device->trace_bo ? 1 : UINT32_MAX; 961 962 for (uint32_t i = 0; i < submitCount; i++) { 963 struct radeon_winsys_cs **cs_array; 964 bool can_patch = true; 965 uint32_t advance; 966 967 if (!pSubmits[i].commandBufferCount) 968 continue; 969 970 cs_array = malloc(sizeof(struct radeon_winsys_cs *) * 971 pSubmits[i].commandBufferCount); 972 973 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { 974 RADV_FROM_HANDLE(radv_cmd_buffer, cmd_buffer, 975 pSubmits[i].pCommandBuffers[j]); 976 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 977 978 cs_array[j] = cmd_buffer->cs; 979 if ((cmd_buffer->usage_flags & VK_COMMAND_BUFFER_USAGE_SIMULTANEOUS_USE_BIT)) 980 can_patch = false; 981 } 982 983 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j += advance) { 984 advance = MIN2(max_cs_submission, 985 pSubmits[i].commandBufferCount - j); 986 bool b = j == 0; 987 bool e = j + advance == pSubmits[i].commandBufferCount; 988 989 if (queue->device->trace_bo) 990 *queue->device->trace_id_ptr = 0; 991 992 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, cs_array + j, advance, 993 (struct radeon_winsys_sem **)pSubmits[i].pWaitSemaphores, 994 b ? pSubmits[i].waitSemaphoreCount : 0, 995 (struct radeon_winsys_sem **)pSubmits[i].pSignalSemaphores, 996 e ? pSubmits[i].signalSemaphoreCount : 0, 997 can_patch, base_fence); 998 999 if (ret) { 1000 radv_loge("failed to submit CS %d\n", i); 1001 abort(); 1002 } 1003 if (queue->device->trace_bo) { 1004 bool success = queue->device->ws->ctx_wait_idle( 1005 queue->hw_ctx, 1006 radv_queue_family_to_ring( 1007 queue->queue_family_index), 1008 queue->queue_idx); 1009 1010 if (!success) { /* Hang */ 1011 radv_dump_trace(queue->device, cs_array[j]); 1012 abort(); 1013 } 1014 } 1015 } 1016 free(cs_array); 1017 } 1018 1019 if (fence) { 1020 if (!submitCount) 1021 ret = queue->device->ws->cs_submit(ctx, queue->queue_idx, 1022 &queue->device->empty_cs[queue->queue_family_index], 1023 1, NULL, 0, NULL, 0, false, base_fence); 1024 1025 fence->submitted = true; 1026 } 1027 1028 return VK_SUCCESS; 1029 } 1030 1031 VkResult radv_QueueWaitIdle( 1032 VkQueue _queue) 1033 { 1034 RADV_FROM_HANDLE(radv_queue, queue, _queue); 1035 1036 queue->device->ws->ctx_wait_idle(queue->hw_ctx, 1037 radv_queue_family_to_ring(queue->queue_family_index), 1038 queue->queue_idx); 1039 return VK_SUCCESS; 1040 } 1041 1042 VkResult radv_DeviceWaitIdle( 1043 VkDevice _device) 1044 { 1045 RADV_FROM_HANDLE(radv_device, device, _device); 1046 1047 for (unsigned i = 0; i < RADV_MAX_QUEUE_FAMILIES; i++) { 1048 for (unsigned q = 0; q < device->queue_count[i]; q++) { 1049 radv_QueueWaitIdle(radv_queue_to_handle(&device->queues[i][q])); 1050 } 1051 } 1052 return VK_SUCCESS; 1053 } 1054 1055 PFN_vkVoidFunction radv_GetInstanceProcAddr( 1056 VkInstance instance, 1057 const char* pName) 1058 { 1059 return radv_lookup_entrypoint(pName); 1060 } 1061 1062 /* The loader wants us to expose a second GetInstanceProcAddr function 1063 * to work around certain LD_PRELOAD issues seen in apps. 1064 */ 1065 PUBLIC 1066 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 1067 VkInstance instance, 1068 const char* pName); 1069 1070 PUBLIC 1071 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 1072 VkInstance instance, 1073 const char* pName) 1074 { 1075 return radv_GetInstanceProcAddr(instance, pName); 1076 } 1077 1078 PFN_vkVoidFunction radv_GetDeviceProcAddr( 1079 VkDevice device, 1080 const char* pName) 1081 { 1082 return radv_lookup_entrypoint(pName); 1083 } 1084 1085 VkResult radv_AllocateMemory( 1086 VkDevice _device, 1087 const VkMemoryAllocateInfo* pAllocateInfo, 1088 const VkAllocationCallbacks* pAllocator, 1089 VkDeviceMemory* pMem) 1090 { 1091 RADV_FROM_HANDLE(radv_device, device, _device); 1092 struct radv_device_memory *mem; 1093 VkResult result; 1094 enum radeon_bo_domain domain; 1095 uint32_t flags = 0; 1096 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 1097 1098 if (pAllocateInfo->allocationSize == 0) { 1099 /* Apparently, this is allowed */ 1100 *pMem = VK_NULL_HANDLE; 1101 return VK_SUCCESS; 1102 } 1103 1104 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, 1105 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1106 if (mem == NULL) 1107 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1108 1109 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); 1110 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE || 1111 pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_CACHED) 1112 domain = RADEON_DOMAIN_GTT; 1113 else 1114 domain = RADEON_DOMAIN_VRAM; 1115 1116 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_VRAM) 1117 flags |= RADEON_FLAG_NO_CPU_ACCESS; 1118 else 1119 flags |= RADEON_FLAG_CPU_ACCESS; 1120 1121 if (pAllocateInfo->memoryTypeIndex == RADV_MEM_TYPE_GTT_WRITE_COMBINE) 1122 flags |= RADEON_FLAG_GTT_WC; 1123 1124 mem->bo = device->ws->buffer_create(device->ws, alloc_size, 65536, 1125 domain, flags); 1126 1127 if (!mem->bo) { 1128 result = VK_ERROR_OUT_OF_DEVICE_MEMORY; 1129 goto fail; 1130 } 1131 mem->type_index = pAllocateInfo->memoryTypeIndex; 1132 1133 *pMem = radv_device_memory_to_handle(mem); 1134 1135 return VK_SUCCESS; 1136 1137 fail: 1138 vk_free2(&device->alloc, pAllocator, mem); 1139 1140 return result; 1141 } 1142 1143 void radv_FreeMemory( 1144 VkDevice _device, 1145 VkDeviceMemory _mem, 1146 const VkAllocationCallbacks* pAllocator) 1147 { 1148 RADV_FROM_HANDLE(radv_device, device, _device); 1149 RADV_FROM_HANDLE(radv_device_memory, mem, _mem); 1150 1151 if (mem == NULL) 1152 return; 1153 1154 device->ws->buffer_destroy(mem->bo); 1155 mem->bo = NULL; 1156 1157 vk_free2(&device->alloc, pAllocator, mem); 1158 } 1159 1160 VkResult radv_MapMemory( 1161 VkDevice _device, 1162 VkDeviceMemory _memory, 1163 VkDeviceSize offset, 1164 VkDeviceSize size, 1165 VkMemoryMapFlags flags, 1166 void** ppData) 1167 { 1168 RADV_FROM_HANDLE(radv_device, device, _device); 1169 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 1170 1171 if (mem == NULL) { 1172 *ppData = NULL; 1173 return VK_SUCCESS; 1174 } 1175 1176 *ppData = device->ws->buffer_map(mem->bo); 1177 if (*ppData) { 1178 *ppData += offset; 1179 return VK_SUCCESS; 1180 } 1181 1182 return VK_ERROR_MEMORY_MAP_FAILED; 1183 } 1184 1185 void radv_UnmapMemory( 1186 VkDevice _device, 1187 VkDeviceMemory _memory) 1188 { 1189 RADV_FROM_HANDLE(radv_device, device, _device); 1190 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 1191 1192 if (mem == NULL) 1193 return; 1194 1195 device->ws->buffer_unmap(mem->bo); 1196 } 1197 1198 VkResult radv_FlushMappedMemoryRanges( 1199 VkDevice _device, 1200 uint32_t memoryRangeCount, 1201 const VkMappedMemoryRange* pMemoryRanges) 1202 { 1203 return VK_SUCCESS; 1204 } 1205 1206 VkResult radv_InvalidateMappedMemoryRanges( 1207 VkDevice _device, 1208 uint32_t memoryRangeCount, 1209 const VkMappedMemoryRange* pMemoryRanges) 1210 { 1211 return VK_SUCCESS; 1212 } 1213 1214 void radv_GetBufferMemoryRequirements( 1215 VkDevice device, 1216 VkBuffer _buffer, 1217 VkMemoryRequirements* pMemoryRequirements) 1218 { 1219 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 1220 1221 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1; 1222 1223 pMemoryRequirements->size = buffer->size; 1224 pMemoryRequirements->alignment = 16; 1225 } 1226 1227 void radv_GetImageMemoryRequirements( 1228 VkDevice device, 1229 VkImage _image, 1230 VkMemoryRequirements* pMemoryRequirements) 1231 { 1232 RADV_FROM_HANDLE(radv_image, image, _image); 1233 1234 pMemoryRequirements->memoryTypeBits = (1u << RADV_MEM_TYPE_COUNT) - 1; 1235 1236 pMemoryRequirements->size = image->size; 1237 pMemoryRequirements->alignment = image->alignment; 1238 } 1239 1240 void radv_GetImageSparseMemoryRequirements( 1241 VkDevice device, 1242 VkImage image, 1243 uint32_t* pSparseMemoryRequirementCount, 1244 VkSparseImageMemoryRequirements* pSparseMemoryRequirements) 1245 { 1246 stub(); 1247 } 1248 1249 void radv_GetDeviceMemoryCommitment( 1250 VkDevice device, 1251 VkDeviceMemory memory, 1252 VkDeviceSize* pCommittedMemoryInBytes) 1253 { 1254 *pCommittedMemoryInBytes = 0; 1255 } 1256 1257 VkResult radv_BindBufferMemory( 1258 VkDevice device, 1259 VkBuffer _buffer, 1260 VkDeviceMemory _memory, 1261 VkDeviceSize memoryOffset) 1262 { 1263 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 1264 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 1265 1266 if (mem) { 1267 buffer->bo = mem->bo; 1268 buffer->offset = memoryOffset; 1269 } else { 1270 buffer->bo = NULL; 1271 buffer->offset = 0; 1272 } 1273 1274 return VK_SUCCESS; 1275 } 1276 1277 VkResult radv_BindImageMemory( 1278 VkDevice device, 1279 VkImage _image, 1280 VkDeviceMemory _memory, 1281 VkDeviceSize memoryOffset) 1282 { 1283 RADV_FROM_HANDLE(radv_device_memory, mem, _memory); 1284 RADV_FROM_HANDLE(radv_image, image, _image); 1285 1286 if (mem) { 1287 image->bo = mem->bo; 1288 image->offset = memoryOffset; 1289 } else { 1290 image->bo = NULL; 1291 image->offset = 0; 1292 } 1293 1294 return VK_SUCCESS; 1295 } 1296 1297 VkResult radv_QueueBindSparse( 1298 VkQueue queue, 1299 uint32_t bindInfoCount, 1300 const VkBindSparseInfo* pBindInfo, 1301 VkFence fence) 1302 { 1303 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); 1304 } 1305 1306 VkResult radv_CreateFence( 1307 VkDevice _device, 1308 const VkFenceCreateInfo* pCreateInfo, 1309 const VkAllocationCallbacks* pAllocator, 1310 VkFence* pFence) 1311 { 1312 RADV_FROM_HANDLE(radv_device, device, _device); 1313 struct radv_fence *fence = vk_alloc2(&device->alloc, pAllocator, 1314 sizeof(*fence), 8, 1315 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1316 1317 if (!fence) 1318 return VK_ERROR_OUT_OF_HOST_MEMORY; 1319 1320 memset(fence, 0, sizeof(*fence)); 1321 fence->submitted = false; 1322 fence->signalled = !!(pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT); 1323 fence->fence = device->ws->create_fence(); 1324 if (!fence->fence) { 1325 vk_free2(&device->alloc, pAllocator, fence); 1326 return VK_ERROR_OUT_OF_HOST_MEMORY; 1327 } 1328 1329 *pFence = radv_fence_to_handle(fence); 1330 1331 return VK_SUCCESS; 1332 } 1333 1334 void radv_DestroyFence( 1335 VkDevice _device, 1336 VkFence _fence, 1337 const VkAllocationCallbacks* pAllocator) 1338 { 1339 RADV_FROM_HANDLE(radv_device, device, _device); 1340 RADV_FROM_HANDLE(radv_fence, fence, _fence); 1341 1342 if (!fence) 1343 return; 1344 device->ws->destroy_fence(fence->fence); 1345 vk_free2(&device->alloc, pAllocator, fence); 1346 } 1347 1348 static uint64_t radv_get_absolute_timeout(uint64_t timeout) 1349 { 1350 uint64_t current_time; 1351 struct timespec tv; 1352 1353 clock_gettime(CLOCK_MONOTONIC, &tv); 1354 current_time = tv.tv_nsec + tv.tv_sec*1000000000ull; 1355 1356 timeout = MIN2(UINT64_MAX - current_time, timeout); 1357 1358 return current_time + timeout; 1359 } 1360 1361 VkResult radv_WaitForFences( 1362 VkDevice _device, 1363 uint32_t fenceCount, 1364 const VkFence* pFences, 1365 VkBool32 waitAll, 1366 uint64_t timeout) 1367 { 1368 RADV_FROM_HANDLE(radv_device, device, _device); 1369 timeout = radv_get_absolute_timeout(timeout); 1370 1371 if (!waitAll && fenceCount > 1) { 1372 fprintf(stderr, "radv: WaitForFences without waitAll not implemented yet\n"); 1373 } 1374 1375 for (uint32_t i = 0; i < fenceCount; ++i) { 1376 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 1377 bool expired = false; 1378 1379 if (fence->signalled) 1380 continue; 1381 1382 if (!fence->submitted) 1383 return VK_TIMEOUT; 1384 1385 expired = device->ws->fence_wait(device->ws, fence->fence, true, timeout); 1386 if (!expired) 1387 return VK_TIMEOUT; 1388 1389 fence->signalled = true; 1390 } 1391 1392 return VK_SUCCESS; 1393 } 1394 1395 VkResult radv_ResetFences(VkDevice device, 1396 uint32_t fenceCount, 1397 const VkFence *pFences) 1398 { 1399 for (unsigned i = 0; i < fenceCount; ++i) { 1400 RADV_FROM_HANDLE(radv_fence, fence, pFences[i]); 1401 fence->submitted = fence->signalled = false; 1402 } 1403 1404 return VK_SUCCESS; 1405 } 1406 1407 VkResult radv_GetFenceStatus(VkDevice _device, VkFence _fence) 1408 { 1409 RADV_FROM_HANDLE(radv_device, device, _device); 1410 RADV_FROM_HANDLE(radv_fence, fence, _fence); 1411 1412 if (fence->signalled) 1413 return VK_SUCCESS; 1414 if (!fence->submitted) 1415 return VK_NOT_READY; 1416 1417 if (!device->ws->fence_wait(device->ws, fence->fence, false, 0)) 1418 return VK_NOT_READY; 1419 1420 return VK_SUCCESS; 1421 } 1422 1423 1424 // Queue semaphore functions 1425 1426 VkResult radv_CreateSemaphore( 1427 VkDevice _device, 1428 const VkSemaphoreCreateInfo* pCreateInfo, 1429 const VkAllocationCallbacks* pAllocator, 1430 VkSemaphore* pSemaphore) 1431 { 1432 RADV_FROM_HANDLE(radv_device, device, _device); 1433 struct radeon_winsys_sem *sem; 1434 1435 sem = device->ws->create_sem(device->ws); 1436 if (!sem) 1437 return VK_ERROR_OUT_OF_HOST_MEMORY; 1438 1439 *pSemaphore = (VkSemaphore)sem; 1440 return VK_SUCCESS; 1441 } 1442 1443 void radv_DestroySemaphore( 1444 VkDevice _device, 1445 VkSemaphore _semaphore, 1446 const VkAllocationCallbacks* pAllocator) 1447 { 1448 RADV_FROM_HANDLE(radv_device, device, _device); 1449 struct radeon_winsys_sem *sem; 1450 if (!_semaphore) 1451 return; 1452 1453 sem = (struct radeon_winsys_sem *)_semaphore; 1454 device->ws->destroy_sem(sem); 1455 } 1456 1457 VkResult radv_CreateEvent( 1458 VkDevice _device, 1459 const VkEventCreateInfo* pCreateInfo, 1460 const VkAllocationCallbacks* pAllocator, 1461 VkEvent* pEvent) 1462 { 1463 RADV_FROM_HANDLE(radv_device, device, _device); 1464 struct radv_event *event = vk_alloc2(&device->alloc, pAllocator, 1465 sizeof(*event), 8, 1466 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1467 1468 if (!event) 1469 return VK_ERROR_OUT_OF_HOST_MEMORY; 1470 1471 event->bo = device->ws->buffer_create(device->ws, 8, 8, 1472 RADEON_DOMAIN_GTT, 1473 RADEON_FLAG_CPU_ACCESS); 1474 if (!event->bo) { 1475 vk_free2(&device->alloc, pAllocator, event); 1476 return VK_ERROR_OUT_OF_DEVICE_MEMORY; 1477 } 1478 1479 event->map = (uint64_t*)device->ws->buffer_map(event->bo); 1480 1481 *pEvent = radv_event_to_handle(event); 1482 1483 return VK_SUCCESS; 1484 } 1485 1486 void radv_DestroyEvent( 1487 VkDevice _device, 1488 VkEvent _event, 1489 const VkAllocationCallbacks* pAllocator) 1490 { 1491 RADV_FROM_HANDLE(radv_device, device, _device); 1492 RADV_FROM_HANDLE(radv_event, event, _event); 1493 1494 if (!event) 1495 return; 1496 device->ws->buffer_destroy(event->bo); 1497 vk_free2(&device->alloc, pAllocator, event); 1498 } 1499 1500 VkResult radv_GetEventStatus( 1501 VkDevice _device, 1502 VkEvent _event) 1503 { 1504 RADV_FROM_HANDLE(radv_event, event, _event); 1505 1506 if (*event->map == 1) 1507 return VK_EVENT_SET; 1508 return VK_EVENT_RESET; 1509 } 1510 1511 VkResult radv_SetEvent( 1512 VkDevice _device, 1513 VkEvent _event) 1514 { 1515 RADV_FROM_HANDLE(radv_event, event, _event); 1516 *event->map = 1; 1517 1518 return VK_SUCCESS; 1519 } 1520 1521 VkResult radv_ResetEvent( 1522 VkDevice _device, 1523 VkEvent _event) 1524 { 1525 RADV_FROM_HANDLE(radv_event, event, _event); 1526 *event->map = 0; 1527 1528 return VK_SUCCESS; 1529 } 1530 1531 VkResult radv_CreateBuffer( 1532 VkDevice _device, 1533 const VkBufferCreateInfo* pCreateInfo, 1534 const VkAllocationCallbacks* pAllocator, 1535 VkBuffer* pBuffer) 1536 { 1537 RADV_FROM_HANDLE(radv_device, device, _device); 1538 struct radv_buffer *buffer; 1539 1540 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 1541 1542 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, 1543 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1544 if (buffer == NULL) 1545 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1546 1547 buffer->size = pCreateInfo->size; 1548 buffer->usage = pCreateInfo->usage; 1549 buffer->bo = NULL; 1550 buffer->offset = 0; 1551 1552 *pBuffer = radv_buffer_to_handle(buffer); 1553 1554 return VK_SUCCESS; 1555 } 1556 1557 void radv_DestroyBuffer( 1558 VkDevice _device, 1559 VkBuffer _buffer, 1560 const VkAllocationCallbacks* pAllocator) 1561 { 1562 RADV_FROM_HANDLE(radv_device, device, _device); 1563 RADV_FROM_HANDLE(radv_buffer, buffer, _buffer); 1564 1565 if (!buffer) 1566 return; 1567 1568 vk_free2(&device->alloc, pAllocator, buffer); 1569 } 1570 1571 static inline unsigned 1572 si_tile_mode_index(const struct radv_image *image, unsigned level, bool stencil) 1573 { 1574 if (stencil) 1575 return image->surface.stencil_tiling_index[level]; 1576 else 1577 return image->surface.tiling_index[level]; 1578 } 1579 1580 static void 1581 radv_initialise_color_surface(struct radv_device *device, 1582 struct radv_color_buffer_info *cb, 1583 struct radv_image_view *iview) 1584 { 1585 const struct vk_format_description *desc; 1586 unsigned ntype, format, swap, endian; 1587 unsigned blend_clamp = 0, blend_bypass = 0; 1588 unsigned pitch_tile_max, slice_tile_max, tile_mode_index; 1589 uint64_t va; 1590 const struct radeon_surf *surf = &iview->image->surface; 1591 const struct radeon_surf_level *level_info = &surf->level[iview->base_mip]; 1592 1593 desc = vk_format_description(iview->vk_format); 1594 1595 memset(cb, 0, sizeof(*cb)); 1596 1597 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; 1598 va += level_info->offset; 1599 cb->cb_color_base = va >> 8; 1600 1601 /* CMASK variables */ 1602 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; 1603 va += iview->image->cmask.offset; 1604 cb->cb_color_cmask = va >> 8; 1605 cb->cb_color_cmask_slice = iview->image->cmask.slice_tile_max; 1606 1607 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; 1608 va += iview->image->dcc_offset; 1609 cb->cb_dcc_base = va >> 8; 1610 1611 cb->cb_color_view = S_028C6C_SLICE_START(iview->base_layer) | 1612 S_028C6C_SLICE_MAX(iview->base_layer + iview->extent.depth - 1); 1613 1614 cb->micro_tile_mode = iview->image->surface.micro_tile_mode; 1615 pitch_tile_max = level_info->nblk_x / 8 - 1; 1616 slice_tile_max = (level_info->nblk_x * level_info->nblk_y) / 64 - 1; 1617 tile_mode_index = si_tile_mode_index(iview->image, iview->base_mip, false); 1618 1619 cb->cb_color_pitch = S_028C64_TILE_MAX(pitch_tile_max); 1620 cb->cb_color_slice = S_028C68_TILE_MAX(slice_tile_max); 1621 1622 /* Intensity is implemented as Red, so treat it that way. */ 1623 cb->cb_color_attrib = S_028C74_FORCE_DST_ALPHA_1(desc->swizzle[3] == VK_SWIZZLE_1) | 1624 S_028C74_TILE_MODE_INDEX(tile_mode_index); 1625 1626 if (iview->image->samples > 1) { 1627 unsigned log_samples = util_logbase2(iview->image->samples); 1628 1629 cb->cb_color_attrib |= S_028C74_NUM_SAMPLES(log_samples) | 1630 S_028C74_NUM_FRAGMENTS(log_samples); 1631 } 1632 1633 if (iview->image->fmask.size) { 1634 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + iview->image->fmask.offset; 1635 if (device->physical_device->rad_info.chip_class >= CIK) 1636 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(iview->image->fmask.pitch_in_pixels / 8 - 1); 1637 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(iview->image->fmask.tile_mode_index); 1638 cb->cb_color_fmask = va >> 8; 1639 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(iview->image->fmask.slice_tile_max); 1640 } else { 1641 /* This must be set for fast clear to work without FMASK. */ 1642 if (device->physical_device->rad_info.chip_class >= CIK) 1643 cb->cb_color_pitch |= S_028C64_FMASK_TILE_MAX(pitch_tile_max); 1644 cb->cb_color_attrib |= S_028C74_FMASK_TILE_MODE_INDEX(tile_mode_index); 1645 cb->cb_color_fmask = cb->cb_color_base; 1646 cb->cb_color_fmask_slice = S_028C88_TILE_MAX(slice_tile_max); 1647 } 1648 1649 ntype = radv_translate_color_numformat(iview->vk_format, 1650 desc, 1651 vk_format_get_first_non_void_channel(iview->vk_format)); 1652 format = radv_translate_colorformat(iview->vk_format); 1653 if (format == V_028C70_COLOR_INVALID || ntype == ~0u) 1654 radv_finishme("Illegal color\n"); 1655 swap = radv_translate_colorswap(iview->vk_format, FALSE); 1656 endian = radv_colorformat_endian_swap(format); 1657 1658 /* blend clamp should be set for all NORM/SRGB types */ 1659 if (ntype == V_028C70_NUMBER_UNORM || 1660 ntype == V_028C70_NUMBER_SNORM || 1661 ntype == V_028C70_NUMBER_SRGB) 1662 blend_clamp = 1; 1663 1664 /* set blend bypass according to docs if SINT/UINT or 1665 8/24 COLOR variants */ 1666 if (ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT || 1667 format == V_028C70_COLOR_8_24 || format == V_028C70_COLOR_24_8 || 1668 format == V_028C70_COLOR_X24_8_32_FLOAT) { 1669 blend_clamp = 0; 1670 blend_bypass = 1; 1671 } 1672 #if 0 1673 if ((ntype == V_028C70_NUMBER_UINT || ntype == V_028C70_NUMBER_SINT) && 1674 (format == V_028C70_COLOR_8 || 1675 format == V_028C70_COLOR_8_8 || 1676 format == V_028C70_COLOR_8_8_8_8)) 1677 ->color_is_int8 = true; 1678 #endif 1679 cb->cb_color_info = S_028C70_FORMAT(format) | 1680 S_028C70_COMP_SWAP(swap) | 1681 S_028C70_BLEND_CLAMP(blend_clamp) | 1682 S_028C70_BLEND_BYPASS(blend_bypass) | 1683 S_028C70_SIMPLE_FLOAT(1) | 1684 S_028C70_ROUND_MODE(ntype != V_028C70_NUMBER_UNORM && 1685 ntype != V_028C70_NUMBER_SNORM && 1686 ntype != V_028C70_NUMBER_SRGB && 1687 format != V_028C70_COLOR_8_24 && 1688 format != V_028C70_COLOR_24_8) | 1689 S_028C70_NUMBER_TYPE(ntype) | 1690 S_028C70_ENDIAN(endian); 1691 if (iview->image->samples > 1) 1692 if (iview->image->fmask.size) 1693 cb->cb_color_info |= S_028C70_COMPRESSION(1); 1694 1695 if (iview->image->cmask.size && 1696 (device->debug_flags & RADV_DEBUG_FAST_CLEARS)) 1697 cb->cb_color_info |= S_028C70_FAST_CLEAR(1); 1698 1699 if (iview->image->surface.dcc_size && level_info->dcc_enabled) 1700 cb->cb_color_info |= S_028C70_DCC_ENABLE(1); 1701 1702 if (device->physical_device->rad_info.chip_class >= VI) { 1703 unsigned max_uncompressed_block_size = 2; 1704 if (iview->image->samples > 1) { 1705 if (iview->image->surface.bpe == 1) 1706 max_uncompressed_block_size = 0; 1707 else if (iview->image->surface.bpe == 2) 1708 max_uncompressed_block_size = 1; 1709 } 1710 1711 cb->cb_dcc_control = S_028C78_MAX_UNCOMPRESSED_BLOCK_SIZE(max_uncompressed_block_size) | 1712 S_028C78_INDEPENDENT_64B_BLOCKS(1); 1713 } 1714 1715 /* This must be set for fast clear to work without FMASK. */ 1716 if (!iview->image->fmask.size && 1717 device->physical_device->rad_info.chip_class == SI) { 1718 unsigned bankh = util_logbase2(iview->image->surface.bankh); 1719 cb->cb_color_attrib |= S_028C74_FMASK_BANK_HEIGHT(bankh); 1720 } 1721 } 1722 1723 static void 1724 radv_initialise_ds_surface(struct radv_device *device, 1725 struct radv_ds_buffer_info *ds, 1726 struct radv_image_view *iview) 1727 { 1728 unsigned level = iview->base_mip; 1729 unsigned format; 1730 uint64_t va, s_offs, z_offs; 1731 const struct radeon_surf_level *level_info = &iview->image->surface.level[level]; 1732 memset(ds, 0, sizeof(*ds)); 1733 switch (iview->vk_format) { 1734 case VK_FORMAT_D24_UNORM_S8_UINT: 1735 case VK_FORMAT_X8_D24_UNORM_PACK32: 1736 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-24); 1737 ds->offset_scale = 2.0f; 1738 break; 1739 case VK_FORMAT_D16_UNORM: 1740 case VK_FORMAT_D16_UNORM_S8_UINT: 1741 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-16); 1742 ds->offset_scale = 4.0f; 1743 break; 1744 case VK_FORMAT_D32_SFLOAT: 1745 case VK_FORMAT_D32_SFLOAT_S8_UINT: 1746 ds->pa_su_poly_offset_db_fmt_cntl = S_028B78_POLY_OFFSET_NEG_NUM_DB_BITS(-23) | 1747 S_028B78_POLY_OFFSET_DB_IS_FLOAT_FMT(1); 1748 ds->offset_scale = 1.0f; 1749 break; 1750 default: 1751 break; 1752 } 1753 1754 format = radv_translate_dbformat(iview->vk_format); 1755 if (format == V_028040_Z_INVALID) { 1756 fprintf(stderr, "Invalid DB format: %d, disabling DB.\n", iview->vk_format); 1757 } 1758 1759 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset; 1760 s_offs = z_offs = va; 1761 z_offs += iview->image->surface.level[level].offset; 1762 s_offs += iview->image->surface.stencil_level[level].offset; 1763 1764 ds->db_depth_view = S_028008_SLICE_START(iview->base_layer) | 1765 S_028008_SLICE_MAX(iview->base_layer + iview->extent.depth - 1); 1766 ds->db_depth_info = S_02803C_ADDR5_SWIZZLE_MASK(1); 1767 ds->db_z_info = S_028040_FORMAT(format) | S_028040_ZRANGE_PRECISION(1); 1768 1769 if (iview->image->samples > 1) 1770 ds->db_z_info |= S_028040_NUM_SAMPLES(util_logbase2(iview->image->samples)); 1771 1772 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) 1773 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_8); 1774 else 1775 ds->db_stencil_info = S_028044_FORMAT(V_028044_STENCIL_INVALID); 1776 1777 if (device->physical_device->rad_info.chip_class >= CIK) { 1778 struct radeon_info *info = &device->physical_device->rad_info; 1779 unsigned tiling_index = iview->image->surface.tiling_index[level]; 1780 unsigned stencil_index = iview->image->surface.stencil_tiling_index[level]; 1781 unsigned macro_index = iview->image->surface.macro_tile_index; 1782 unsigned tile_mode = info->si_tile_mode_array[tiling_index]; 1783 unsigned stencil_tile_mode = info->si_tile_mode_array[stencil_index]; 1784 unsigned macro_mode = info->cik_macrotile_mode_array[macro_index]; 1785 1786 ds->db_depth_info |= 1787 S_02803C_ARRAY_MODE(G_009910_ARRAY_MODE(tile_mode)) | 1788 S_02803C_PIPE_CONFIG(G_009910_PIPE_CONFIG(tile_mode)) | 1789 S_02803C_BANK_WIDTH(G_009990_BANK_WIDTH(macro_mode)) | 1790 S_02803C_BANK_HEIGHT(G_009990_BANK_HEIGHT(macro_mode)) | 1791 S_02803C_MACRO_TILE_ASPECT(G_009990_MACRO_TILE_ASPECT(macro_mode)) | 1792 S_02803C_NUM_BANKS(G_009990_NUM_BANKS(macro_mode)); 1793 ds->db_z_info |= S_028040_TILE_SPLIT(G_009910_TILE_SPLIT(tile_mode)); 1794 ds->db_stencil_info |= S_028044_TILE_SPLIT(G_009910_TILE_SPLIT(stencil_tile_mode)); 1795 } else { 1796 unsigned tile_mode_index = si_tile_mode_index(iview->image, level, false); 1797 ds->db_z_info |= S_028040_TILE_MODE_INDEX(tile_mode_index); 1798 tile_mode_index = si_tile_mode_index(iview->image, level, true); 1799 ds->db_stencil_info |= S_028044_TILE_MODE_INDEX(tile_mode_index); 1800 } 1801 1802 if (iview->image->htile.size && !level) { 1803 ds->db_z_info |= S_028040_TILE_SURFACE_ENABLE(1) | 1804 S_028040_ALLOW_EXPCLEAR(1); 1805 1806 if (iview->image->surface.flags & RADEON_SURF_SBUFFER) { 1807 /* Workaround: For a not yet understood reason, the 1808 * combination of MSAA, fast stencil clear and stencil 1809 * decompress messes with subsequent stencil buffer 1810 * uses. Problem was reproduced on Verde, Bonaire, 1811 * Tonga, and Carrizo. 1812 * 1813 * Disabling EXPCLEAR works around the problem. 1814 * 1815 * Check piglit's arb_texture_multisample-stencil-clear 1816 * test if you want to try changing this. 1817 */ 1818 if (iview->image->samples <= 1) 1819 ds->db_stencil_info |= S_028044_ALLOW_EXPCLEAR(1); 1820 } else 1821 /* Use all of the htile_buffer for depth if there's no stencil. */ 1822 ds->db_stencil_info |= S_028044_TILE_STENCIL_DISABLE(1); 1823 1824 va = device->ws->buffer_get_va(iview->bo) + iview->image->offset + 1825 iview->image->htile.offset; 1826 ds->db_htile_data_base = va >> 8; 1827 ds->db_htile_surface = S_028ABC_FULL_CACHE(1); 1828 } else { 1829 ds->db_htile_data_base = 0; 1830 ds->db_htile_surface = 0; 1831 } 1832 1833 ds->db_z_read_base = ds->db_z_write_base = z_offs >> 8; 1834 ds->db_stencil_read_base = ds->db_stencil_write_base = s_offs >> 8; 1835 1836 ds->db_depth_size = S_028058_PITCH_TILE_MAX((level_info->nblk_x / 8) - 1) | 1837 S_028058_HEIGHT_TILE_MAX((level_info->nblk_y / 8) - 1); 1838 ds->db_depth_slice = S_02805C_SLICE_TILE_MAX((level_info->nblk_x * level_info->nblk_y) / 64 - 1); 1839 } 1840 1841 VkResult radv_CreateFramebuffer( 1842 VkDevice _device, 1843 const VkFramebufferCreateInfo* pCreateInfo, 1844 const VkAllocationCallbacks* pAllocator, 1845 VkFramebuffer* pFramebuffer) 1846 { 1847 RADV_FROM_HANDLE(radv_device, device, _device); 1848 struct radv_framebuffer *framebuffer; 1849 1850 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); 1851 1852 size_t size = sizeof(*framebuffer) + 1853 sizeof(struct radv_attachment_info) * pCreateInfo->attachmentCount; 1854 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, 1855 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1856 if (framebuffer == NULL) 1857 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1858 1859 framebuffer->attachment_count = pCreateInfo->attachmentCount; 1860 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 1861 VkImageView _iview = pCreateInfo->pAttachments[i]; 1862 struct radv_image_view *iview = radv_image_view_from_handle(_iview); 1863 framebuffer->attachments[i].attachment = iview; 1864 if (iview->aspect_mask & VK_IMAGE_ASPECT_COLOR_BIT) { 1865 radv_initialise_color_surface(device, &framebuffer->attachments[i].cb, iview); 1866 } else if (iview->aspect_mask & (VK_IMAGE_ASPECT_DEPTH_BIT | VK_IMAGE_ASPECT_STENCIL_BIT)) { 1867 radv_initialise_ds_surface(device, &framebuffer->attachments[i].ds, iview); 1868 } 1869 } 1870 1871 framebuffer->width = pCreateInfo->width; 1872 framebuffer->height = pCreateInfo->height; 1873 framebuffer->layers = pCreateInfo->layers; 1874 1875 *pFramebuffer = radv_framebuffer_to_handle(framebuffer); 1876 return VK_SUCCESS; 1877 } 1878 1879 void radv_DestroyFramebuffer( 1880 VkDevice _device, 1881 VkFramebuffer _fb, 1882 const VkAllocationCallbacks* pAllocator) 1883 { 1884 RADV_FROM_HANDLE(radv_device, device, _device); 1885 RADV_FROM_HANDLE(radv_framebuffer, fb, _fb); 1886 1887 if (!fb) 1888 return; 1889 vk_free2(&device->alloc, pAllocator, fb); 1890 } 1891 1892 static unsigned radv_tex_wrap(VkSamplerAddressMode address_mode) 1893 { 1894 switch (address_mode) { 1895 case VK_SAMPLER_ADDRESS_MODE_REPEAT: 1896 return V_008F30_SQ_TEX_WRAP; 1897 case VK_SAMPLER_ADDRESS_MODE_MIRRORED_REPEAT: 1898 return V_008F30_SQ_TEX_MIRROR; 1899 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_EDGE: 1900 return V_008F30_SQ_TEX_CLAMP_LAST_TEXEL; 1901 case VK_SAMPLER_ADDRESS_MODE_CLAMP_TO_BORDER: 1902 return V_008F30_SQ_TEX_CLAMP_BORDER; 1903 case VK_SAMPLER_ADDRESS_MODE_MIRROR_CLAMP_TO_EDGE: 1904 return V_008F30_SQ_TEX_MIRROR_ONCE_LAST_TEXEL; 1905 default: 1906 unreachable("illegal tex wrap mode"); 1907 break; 1908 } 1909 } 1910 1911 static unsigned 1912 radv_tex_compare(VkCompareOp op) 1913 { 1914 switch (op) { 1915 case VK_COMPARE_OP_NEVER: 1916 return V_008F30_SQ_TEX_DEPTH_COMPARE_NEVER; 1917 case VK_COMPARE_OP_LESS: 1918 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESS; 1919 case VK_COMPARE_OP_EQUAL: 1920 return V_008F30_SQ_TEX_DEPTH_COMPARE_EQUAL; 1921 case VK_COMPARE_OP_LESS_OR_EQUAL: 1922 return V_008F30_SQ_TEX_DEPTH_COMPARE_LESSEQUAL; 1923 case VK_COMPARE_OP_GREATER: 1924 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATER; 1925 case VK_COMPARE_OP_NOT_EQUAL: 1926 return V_008F30_SQ_TEX_DEPTH_COMPARE_NOTEQUAL; 1927 case VK_COMPARE_OP_GREATER_OR_EQUAL: 1928 return V_008F30_SQ_TEX_DEPTH_COMPARE_GREATEREQUAL; 1929 case VK_COMPARE_OP_ALWAYS: 1930 return V_008F30_SQ_TEX_DEPTH_COMPARE_ALWAYS; 1931 default: 1932 unreachable("illegal compare mode"); 1933 break; 1934 } 1935 } 1936 1937 static unsigned 1938 radv_tex_filter(VkFilter filter, unsigned max_ansio) 1939 { 1940 switch (filter) { 1941 case VK_FILTER_NEAREST: 1942 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_POINT : 1943 V_008F38_SQ_TEX_XY_FILTER_POINT); 1944 case VK_FILTER_LINEAR: 1945 return (max_ansio > 1 ? V_008F38_SQ_TEX_XY_FILTER_ANISO_BILINEAR : 1946 V_008F38_SQ_TEX_XY_FILTER_BILINEAR); 1947 case VK_FILTER_CUBIC_IMG: 1948 default: 1949 fprintf(stderr, "illegal texture filter"); 1950 return 0; 1951 } 1952 } 1953 1954 static unsigned 1955 radv_tex_mipfilter(VkSamplerMipmapMode mode) 1956 { 1957 switch (mode) { 1958 case VK_SAMPLER_MIPMAP_MODE_NEAREST: 1959 return V_008F38_SQ_TEX_Z_FILTER_POINT; 1960 case VK_SAMPLER_MIPMAP_MODE_LINEAR: 1961 return V_008F38_SQ_TEX_Z_FILTER_LINEAR; 1962 default: 1963 return V_008F38_SQ_TEX_Z_FILTER_NONE; 1964 } 1965 } 1966 1967 static unsigned 1968 radv_tex_bordercolor(VkBorderColor bcolor) 1969 { 1970 switch (bcolor) { 1971 case VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK: 1972 case VK_BORDER_COLOR_INT_TRANSPARENT_BLACK: 1973 return V_008F3C_SQ_TEX_BORDER_COLOR_TRANS_BLACK; 1974 case VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK: 1975 case VK_BORDER_COLOR_INT_OPAQUE_BLACK: 1976 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_BLACK; 1977 case VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE: 1978 case VK_BORDER_COLOR_INT_OPAQUE_WHITE: 1979 return V_008F3C_SQ_TEX_BORDER_COLOR_OPAQUE_WHITE; 1980 default: 1981 break; 1982 } 1983 return 0; 1984 } 1985 1986 static unsigned 1987 radv_tex_aniso_filter(unsigned filter) 1988 { 1989 if (filter < 2) 1990 return 0; 1991 if (filter < 4) 1992 return 1; 1993 if (filter < 8) 1994 return 2; 1995 if (filter < 16) 1996 return 3; 1997 return 4; 1998 } 1999 2000 static void 2001 radv_init_sampler(struct radv_device *device, 2002 struct radv_sampler *sampler, 2003 const VkSamplerCreateInfo *pCreateInfo) 2004 { 2005 uint32_t max_aniso = pCreateInfo->anisotropyEnable && pCreateInfo->maxAnisotropy > 1.0 ? 2006 (uint32_t) pCreateInfo->maxAnisotropy : 0; 2007 uint32_t max_aniso_ratio = radv_tex_aniso_filter(max_aniso); 2008 bool is_vi = (device->physical_device->rad_info.chip_class >= VI); 2009 2010 sampler->state[0] = (S_008F30_CLAMP_X(radv_tex_wrap(pCreateInfo->addressModeU)) | 2011 S_008F30_CLAMP_Y(radv_tex_wrap(pCreateInfo->addressModeV)) | 2012 S_008F30_CLAMP_Z(radv_tex_wrap(pCreateInfo->addressModeW)) | 2013 S_008F30_MAX_ANISO_RATIO(max_aniso_ratio) | 2014 S_008F30_DEPTH_COMPARE_FUNC(radv_tex_compare(pCreateInfo->compareOp)) | 2015 S_008F30_FORCE_UNNORMALIZED(pCreateInfo->unnormalizedCoordinates ? 1 : 0) | 2016 S_008F30_ANISO_THRESHOLD(max_aniso_ratio >> 1) | 2017 S_008F30_ANISO_BIAS(max_aniso_ratio) | 2018 S_008F30_DISABLE_CUBE_WRAP(0) | 2019 S_008F30_COMPAT_MODE(is_vi)); 2020 sampler->state[1] = (S_008F34_MIN_LOD(S_FIXED(CLAMP(pCreateInfo->minLod, 0, 15), 8)) | 2021 S_008F34_MAX_LOD(S_FIXED(CLAMP(pCreateInfo->maxLod, 0, 15), 8)) | 2022 S_008F34_PERF_MIP(max_aniso_ratio ? max_aniso_ratio + 6 : 0)); 2023 sampler->state[2] = (S_008F38_LOD_BIAS(S_FIXED(CLAMP(pCreateInfo->mipLodBias, -16, 16), 8)) | 2024 S_008F38_XY_MAG_FILTER(radv_tex_filter(pCreateInfo->magFilter, max_aniso)) | 2025 S_008F38_XY_MIN_FILTER(radv_tex_filter(pCreateInfo->minFilter, max_aniso)) | 2026 S_008F38_MIP_FILTER(radv_tex_mipfilter(pCreateInfo->mipmapMode)) | 2027 S_008F38_MIP_POINT_PRECLAMP(0) | 2028 S_008F38_DISABLE_LSB_CEIL(1) | 2029 S_008F38_FILTER_PREC_FIX(1) | 2030 S_008F38_ANISO_OVERRIDE(is_vi)); 2031 sampler->state[3] = (S_008F3C_BORDER_COLOR_PTR(0) | 2032 S_008F3C_BORDER_COLOR_TYPE(radv_tex_bordercolor(pCreateInfo->borderColor))); 2033 } 2034 2035 VkResult radv_CreateSampler( 2036 VkDevice _device, 2037 const VkSamplerCreateInfo* pCreateInfo, 2038 const VkAllocationCallbacks* pAllocator, 2039 VkSampler* pSampler) 2040 { 2041 RADV_FROM_HANDLE(radv_device, device, _device); 2042 struct radv_sampler *sampler; 2043 2044 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_SAMPLER_CREATE_INFO); 2045 2046 sampler = vk_alloc2(&device->alloc, pAllocator, sizeof(*sampler), 8, 2047 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2048 if (!sampler) 2049 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 2050 2051 radv_init_sampler(device, sampler, pCreateInfo); 2052 *pSampler = radv_sampler_to_handle(sampler); 2053 2054 return VK_SUCCESS; 2055 } 2056 2057 void radv_DestroySampler( 2058 VkDevice _device, 2059 VkSampler _sampler, 2060 const VkAllocationCallbacks* pAllocator) 2061 { 2062 RADV_FROM_HANDLE(radv_device, device, _device); 2063 RADV_FROM_HANDLE(radv_sampler, sampler, _sampler); 2064 2065 if (!sampler) 2066 return; 2067 vk_free2(&device->alloc, pAllocator, sampler); 2068 } 2069 2070 2071 /* vk_icd.h does not declare this function, so we declare it here to 2072 * suppress Wmissing-prototypes. 2073 */ 2074 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 2075 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion); 2076 2077 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 2078 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t *pSupportedVersion) 2079 { 2080 /* For the full details on loader interface versioning, see 2081 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 2082 * What follows is a condensed summary, to help you navigate the large and 2083 * confusing official doc. 2084 * 2085 * - Loader interface v0 is incompatible with later versions. We don't 2086 * support it. 2087 * 2088 * - In loader interface v1: 2089 * - The first ICD entrypoint called by the loader is 2090 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 2091 * entrypoint. 2092 * - The ICD must statically expose no other Vulkan symbol unless it is 2093 * linked with -Bsymbolic. 2094 * - Each dispatchable Vulkan handle created by the ICD must be 2095 * a pointer to a struct whose first member is VK_LOADER_DATA. The 2096 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. 2097 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 2098 * vkDestroySurfaceKHR(). The ICD must be capable of working with 2099 * such loader-managed surfaces. 2100 * 2101 * - Loader interface v2 differs from v1 in: 2102 * - The first ICD entrypoint called by the loader is 2103 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 2104 * statically expose this entrypoint. 2105 * 2106 * - Loader interface v3 differs from v2 in: 2107 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 2108 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 2109 * because the loader no longer does so. 2110 */ 2111 *pSupportedVersion = MIN2(*pSupportedVersion, 3u); 2112 return VK_SUCCESS; 2113 } 2114