1 /* 2 * Copyright 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <dlfcn.h> 25 #include <assert.h> 26 #include <stdbool.h> 27 #include <string.h> 28 #include <sys/mman.h> 29 #include <sys/stat.h> 30 #include <unistd.h> 31 #include <fcntl.h> 32 33 #include "anv_private.h" 34 #include "util/strtod.h" 35 #include "util/debug.h" 36 37 #include "genxml/gen7_pack.h" 38 39 struct anv_dispatch_table dtable; 40 41 static void 42 compiler_debug_log(void *data, const char *fmt, ...) 43 { } 44 45 static void 46 compiler_perf_log(void *data, const char *fmt, ...) 47 { 48 va_list args; 49 va_start(args, fmt); 50 51 if (unlikely(INTEL_DEBUG & DEBUG_PERF)) 52 vfprintf(stderr, fmt, args); 53 54 va_end(args); 55 } 56 57 static bool 58 anv_get_function_timestamp(void *ptr, uint32_t* timestamp) 59 { 60 Dl_info info; 61 struct stat st; 62 if (!dladdr(ptr, &info) || !info.dli_fname) 63 return false; 64 65 if (stat(info.dli_fname, &st)) 66 return false; 67 68 *timestamp = st.st_mtim.tv_sec; 69 return true; 70 } 71 72 static bool 73 anv_device_get_cache_uuid(void *uuid) 74 { 75 uint32_t timestamp; 76 77 memset(uuid, 0, VK_UUID_SIZE); 78 if (!anv_get_function_timestamp(anv_device_get_cache_uuid, ×tamp)) 79 return false; 80 81 snprintf(uuid, VK_UUID_SIZE, "anv-%d", timestamp); 82 return true; 83 } 84 85 static VkResult 86 anv_physical_device_init(struct anv_physical_device *device, 87 struct anv_instance *instance, 88 const char *path) 89 { 90 VkResult result; 91 int fd; 92 93 fd = open(path, O_RDWR | O_CLOEXEC); 94 if (fd < 0) 95 return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); 96 97 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 98 device->instance = instance; 99 100 assert(strlen(path) < ARRAY_SIZE(device->path)); 101 strncpy(device->path, path, ARRAY_SIZE(device->path)); 102 103 device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID); 104 if (!device->chipset_id) { 105 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); 106 goto fail; 107 } 108 109 device->name = gen_get_device_name(device->chipset_id); 110 if (!gen_get_device_info(device->chipset_id, &device->info)) { 111 result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER); 112 goto fail; 113 } 114 115 if (device->info.is_haswell) { 116 fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n"); 117 } else if (device->info.gen == 7 && !device->info.is_baytrail) { 118 fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n"); 119 } else if (device->info.gen == 7 && device->info.is_baytrail) { 120 fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n"); 121 } else if (device->info.gen >= 8) { 122 /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully 123 * supported as anything */ 124 } else { 125 result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, 126 "Vulkan not yet supported on %s", device->name); 127 goto fail; 128 } 129 130 device->cmd_parser_version = -1; 131 if (device->info.gen == 7) { 132 device->cmd_parser_version = 133 anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION); 134 if (device->cmd_parser_version == -1) { 135 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, 136 "failed to get command parser version"); 137 goto fail; 138 } 139 } 140 141 if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) { 142 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, 143 "failed to get aperture size: %m"); 144 goto fail; 145 } 146 147 if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) { 148 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, 149 "kernel missing gem wait"); 150 goto fail; 151 } 152 153 if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) { 154 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, 155 "kernel missing execbuf2"); 156 goto fail; 157 } 158 159 if (!device->info.has_llc && 160 anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) { 161 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, 162 "kernel missing wc mmap"); 163 goto fail; 164 } 165 166 if (!anv_device_get_cache_uuid(device->uuid)) { 167 result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED, 168 "cannot generate UUID"); 169 goto fail; 170 } 171 bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X); 172 173 /* GENs prior to 8 do not support EU/Subslice info */ 174 if (device->info.gen >= 8) { 175 device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL); 176 device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL); 177 178 /* Without this information, we cannot get the right Braswell 179 * brandstrings, and we have to use conservative numbers for GPGPU on 180 * many platforms, but otherwise, things will just work. 181 */ 182 if (device->subslice_total < 1 || device->eu_total < 1) { 183 fprintf(stderr, "WARNING: Kernel 4.1 required to properly" 184 " query GPU properties.\n"); 185 } 186 } else if (device->info.gen == 7) { 187 device->subslice_total = 1 << (device->info.gt - 1); 188 } 189 190 if (device->info.is_cherryview && 191 device->subslice_total > 0 && device->eu_total > 0) { 192 /* Logical CS threads = EUs per subslice * 7 threads per EU */ 193 uint32_t max_cs_threads = device->eu_total / device->subslice_total * 7; 194 195 /* Fuse configurations may give more threads than expected, never less. */ 196 if (max_cs_threads > device->info.max_cs_threads) 197 device->info.max_cs_threads = max_cs_threads; 198 } 199 200 brw_process_intel_debug_variable(); 201 202 device->compiler = brw_compiler_create(NULL, &device->info); 203 if (device->compiler == NULL) { 204 result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 205 goto fail; 206 } 207 device->compiler->shader_debug_log = compiler_debug_log; 208 device->compiler->shader_perf_log = compiler_perf_log; 209 210 result = anv_init_wsi(device); 211 if (result != VK_SUCCESS) { 212 ralloc_free(device->compiler); 213 goto fail; 214 } 215 216 isl_device_init(&device->isl_dev, &device->info, swizzled); 217 218 close(fd); 219 return VK_SUCCESS; 220 221 fail: 222 close(fd); 223 return result; 224 } 225 226 static void 227 anv_physical_device_finish(struct anv_physical_device *device) 228 { 229 anv_finish_wsi(device); 230 ralloc_free(device->compiler); 231 } 232 233 static const VkExtensionProperties global_extensions[] = { 234 { 235 .extensionName = VK_KHR_SURFACE_EXTENSION_NAME, 236 .specVersion = 25, 237 }, 238 #ifdef VK_USE_PLATFORM_XCB_KHR 239 { 240 .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME, 241 .specVersion = 6, 242 }, 243 #endif 244 #ifdef VK_USE_PLATFORM_XLIB_KHR 245 { 246 .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME, 247 .specVersion = 6, 248 }, 249 #endif 250 #ifdef VK_USE_PLATFORM_WAYLAND_KHR 251 { 252 .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME, 253 .specVersion = 5, 254 }, 255 #endif 256 }; 257 258 static const VkExtensionProperties device_extensions[] = { 259 { 260 .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME, 261 .specVersion = 68, 262 }, 263 { 264 .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME, 265 .specVersion = 1, 266 } 267 }; 268 269 static void * 270 default_alloc_func(void *pUserData, size_t size, size_t align, 271 VkSystemAllocationScope allocationScope) 272 { 273 return malloc(size); 274 } 275 276 static void * 277 default_realloc_func(void *pUserData, void *pOriginal, size_t size, 278 size_t align, VkSystemAllocationScope allocationScope) 279 { 280 return realloc(pOriginal, size); 281 } 282 283 static void 284 default_free_func(void *pUserData, void *pMemory) 285 { 286 free(pMemory); 287 } 288 289 static const VkAllocationCallbacks default_alloc = { 290 .pUserData = NULL, 291 .pfnAllocation = default_alloc_func, 292 .pfnReallocation = default_realloc_func, 293 .pfnFree = default_free_func, 294 }; 295 296 VkResult anv_CreateInstance( 297 const VkInstanceCreateInfo* pCreateInfo, 298 const VkAllocationCallbacks* pAllocator, 299 VkInstance* pInstance) 300 { 301 struct anv_instance *instance; 302 303 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO); 304 305 uint32_t client_version; 306 if (pCreateInfo->pApplicationInfo && 307 pCreateInfo->pApplicationInfo->apiVersion != 0) { 308 client_version = pCreateInfo->pApplicationInfo->apiVersion; 309 } else { 310 client_version = VK_MAKE_VERSION(1, 0, 0); 311 } 312 313 if (VK_MAKE_VERSION(1, 0, 0) > client_version || 314 client_version > VK_MAKE_VERSION(1, 0, 0xfff)) { 315 return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER, 316 "Client requested version %d.%d.%d", 317 VK_VERSION_MAJOR(client_version), 318 VK_VERSION_MINOR(client_version), 319 VK_VERSION_PATCH(client_version)); 320 } 321 322 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 323 bool found = false; 324 for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) { 325 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], 326 global_extensions[j].extensionName) == 0) { 327 found = true; 328 break; 329 } 330 } 331 if (!found) 332 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 333 } 334 335 instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8, 336 VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE); 337 if (!instance) 338 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 339 340 instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 341 342 if (pAllocator) 343 instance->alloc = *pAllocator; 344 else 345 instance->alloc = default_alloc; 346 347 instance->apiVersion = client_version; 348 instance->physicalDeviceCount = -1; 349 350 _mesa_locale_init(); 351 352 VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false)); 353 354 *pInstance = anv_instance_to_handle(instance); 355 356 return VK_SUCCESS; 357 } 358 359 void anv_DestroyInstance( 360 VkInstance _instance, 361 const VkAllocationCallbacks* pAllocator) 362 { 363 ANV_FROM_HANDLE(anv_instance, instance, _instance); 364 365 if (!instance) 366 return; 367 368 if (instance->physicalDeviceCount > 0) { 369 /* We support at most one physical device. */ 370 assert(instance->physicalDeviceCount == 1); 371 anv_physical_device_finish(&instance->physicalDevice); 372 } 373 374 VG(VALGRIND_DESTROY_MEMPOOL(instance)); 375 376 _mesa_locale_fini(); 377 378 vk_free(&instance->alloc, instance); 379 } 380 381 VkResult anv_EnumeratePhysicalDevices( 382 VkInstance _instance, 383 uint32_t* pPhysicalDeviceCount, 384 VkPhysicalDevice* pPhysicalDevices) 385 { 386 ANV_FROM_HANDLE(anv_instance, instance, _instance); 387 VkResult result; 388 389 if (instance->physicalDeviceCount < 0) { 390 char path[20]; 391 for (unsigned i = 0; i < 8; i++) { 392 snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i); 393 result = anv_physical_device_init(&instance->physicalDevice, 394 instance, path); 395 if (result != VK_ERROR_INCOMPATIBLE_DRIVER) 396 break; 397 } 398 399 if (result == VK_ERROR_INCOMPATIBLE_DRIVER) { 400 instance->physicalDeviceCount = 0; 401 } else if (result == VK_SUCCESS) { 402 instance->physicalDeviceCount = 1; 403 } else { 404 return result; 405 } 406 } 407 408 /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL; 409 * otherwise it's an inout parameter. 410 * 411 * The Vulkan spec (git aaed022) says: 412 * 413 * pPhysicalDeviceCount is a pointer to an unsigned integer variable 414 * that is initialized with the number of devices the application is 415 * prepared to receive handles to. pname:pPhysicalDevices is pointer to 416 * an array of at least this many VkPhysicalDevice handles [...]. 417 * 418 * Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices 419 * overwrites the contents of the variable pointed to by 420 * pPhysicalDeviceCount with the number of physical devices in in the 421 * instance; otherwise, vkEnumeratePhysicalDevices overwrites 422 * pPhysicalDeviceCount with the number of physical handles written to 423 * pPhysicalDevices. 424 */ 425 if (!pPhysicalDevices) { 426 *pPhysicalDeviceCount = instance->physicalDeviceCount; 427 } else if (*pPhysicalDeviceCount >= 1) { 428 pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice); 429 *pPhysicalDeviceCount = 1; 430 } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) { 431 return VK_INCOMPLETE; 432 } else { 433 *pPhysicalDeviceCount = 0; 434 } 435 436 return VK_SUCCESS; 437 } 438 439 void anv_GetPhysicalDeviceFeatures( 440 VkPhysicalDevice physicalDevice, 441 VkPhysicalDeviceFeatures* pFeatures) 442 { 443 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 444 445 *pFeatures = (VkPhysicalDeviceFeatures) { 446 .robustBufferAccess = true, 447 .fullDrawIndexUint32 = true, 448 .imageCubeArray = true, 449 .independentBlend = true, 450 .geometryShader = true, 451 .tessellationShader = true, 452 .sampleRateShading = true, 453 .dualSrcBlend = true, 454 .logicOp = true, 455 .multiDrawIndirect = false, 456 .drawIndirectFirstInstance = true, 457 .depthClamp = true, 458 .depthBiasClamp = true, 459 .fillModeNonSolid = true, 460 .depthBounds = false, 461 .wideLines = true, 462 .largePoints = true, 463 .alphaToOne = true, 464 .multiViewport = true, 465 .samplerAnisotropy = true, 466 .textureCompressionETC2 = pdevice->info.gen >= 8 || 467 pdevice->info.is_baytrail, 468 .textureCompressionASTC_LDR = pdevice->info.gen >= 9, /* FINISHME CHV */ 469 .textureCompressionBC = true, 470 .occlusionQueryPrecise = true, 471 .pipelineStatisticsQuery = false, 472 .fragmentStoresAndAtomics = true, 473 .shaderTessellationAndGeometryPointSize = true, 474 .shaderImageGatherExtended = true, 475 .shaderStorageImageExtendedFormats = true, 476 .shaderStorageImageMultisample = false, 477 .shaderStorageImageReadWithoutFormat = false, 478 .shaderStorageImageWriteWithoutFormat = false, 479 .shaderUniformBufferArrayDynamicIndexing = true, 480 .shaderSampledImageArrayDynamicIndexing = true, 481 .shaderStorageBufferArrayDynamicIndexing = true, 482 .shaderStorageImageArrayDynamicIndexing = true, 483 .shaderClipDistance = true, 484 .shaderCullDistance = true, 485 .shaderFloat64 = pdevice->info.gen >= 8, 486 .shaderInt64 = false, 487 .shaderInt16 = false, 488 .shaderResourceMinLod = false, 489 .variableMultisampleRate = false, 490 .inheritedQueries = false, 491 }; 492 493 /* We can't do image stores in vec4 shaders */ 494 pFeatures->vertexPipelineStoresAndAtomics = 495 pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] && 496 pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY]; 497 } 498 499 void anv_GetPhysicalDeviceProperties( 500 VkPhysicalDevice physicalDevice, 501 VkPhysicalDeviceProperties* pProperties) 502 { 503 ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice); 504 const struct gen_device_info *devinfo = &pdevice->info; 505 506 const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0; 507 508 /* See assertions made when programming the buffer surface state. */ 509 const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ? 510 (1ul << 30) : (1ul << 27); 511 512 VkSampleCountFlags sample_counts = 513 isl_device_get_sample_counts(&pdevice->isl_dev); 514 515 VkPhysicalDeviceLimits limits = { 516 .maxImageDimension1D = (1 << 14), 517 .maxImageDimension2D = (1 << 14), 518 .maxImageDimension3D = (1 << 11), 519 .maxImageDimensionCube = (1 << 14), 520 .maxImageArrayLayers = (1 << 11), 521 .maxTexelBufferElements = 128 * 1024 * 1024, 522 .maxUniformBufferRange = (1ul << 27), 523 .maxStorageBufferRange = max_raw_buffer_sz, 524 .maxPushConstantsSize = MAX_PUSH_CONSTANTS_SIZE, 525 .maxMemoryAllocationCount = UINT32_MAX, 526 .maxSamplerAllocationCount = 64 * 1024, 527 .bufferImageGranularity = 64, /* A cache line */ 528 .sparseAddressSpaceSize = 0, 529 .maxBoundDescriptorSets = MAX_SETS, 530 .maxPerStageDescriptorSamplers = 64, 531 .maxPerStageDescriptorUniformBuffers = 64, 532 .maxPerStageDescriptorStorageBuffers = 64, 533 .maxPerStageDescriptorSampledImages = 64, 534 .maxPerStageDescriptorStorageImages = 64, 535 .maxPerStageDescriptorInputAttachments = 64, 536 .maxPerStageResources = 128, 537 .maxDescriptorSetSamplers = 256, 538 .maxDescriptorSetUniformBuffers = 256, 539 .maxDescriptorSetUniformBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, 540 .maxDescriptorSetStorageBuffers = 256, 541 .maxDescriptorSetStorageBuffersDynamic = MAX_DYNAMIC_BUFFERS / 2, 542 .maxDescriptorSetSampledImages = 256, 543 .maxDescriptorSetStorageImages = 256, 544 .maxDescriptorSetInputAttachments = 256, 545 .maxVertexInputAttributes = 32, 546 .maxVertexInputBindings = 32, 547 .maxVertexInputAttributeOffset = 2047, 548 .maxVertexInputBindingStride = 2048, 549 .maxVertexOutputComponents = 128, 550 .maxTessellationGenerationLevel = 64, 551 .maxTessellationPatchSize = 32, 552 .maxTessellationControlPerVertexInputComponents = 128, 553 .maxTessellationControlPerVertexOutputComponents = 128, 554 .maxTessellationControlPerPatchOutputComponents = 128, 555 .maxTessellationControlTotalOutputComponents = 2048, 556 .maxTessellationEvaluationInputComponents = 128, 557 .maxTessellationEvaluationOutputComponents = 128, 558 .maxGeometryShaderInvocations = 32, 559 .maxGeometryInputComponents = 64, 560 .maxGeometryOutputComponents = 128, 561 .maxGeometryOutputVertices = 256, 562 .maxGeometryTotalOutputComponents = 1024, 563 .maxFragmentInputComponents = 128, 564 .maxFragmentOutputAttachments = 8, 565 .maxFragmentDualSrcAttachments = 1, 566 .maxFragmentCombinedOutputResources = 8, 567 .maxComputeSharedMemorySize = 32768, 568 .maxComputeWorkGroupCount = { 65535, 65535, 65535 }, 569 .maxComputeWorkGroupInvocations = 16 * devinfo->max_cs_threads, 570 .maxComputeWorkGroupSize = { 571 16 * devinfo->max_cs_threads, 572 16 * devinfo->max_cs_threads, 573 16 * devinfo->max_cs_threads, 574 }, 575 .subPixelPrecisionBits = 4 /* FIXME */, 576 .subTexelPrecisionBits = 4 /* FIXME */, 577 .mipmapPrecisionBits = 4 /* FIXME */, 578 .maxDrawIndexedIndexValue = UINT32_MAX, 579 .maxDrawIndirectCount = UINT32_MAX, 580 .maxSamplerLodBias = 16, 581 .maxSamplerAnisotropy = 16, 582 .maxViewports = MAX_VIEWPORTS, 583 .maxViewportDimensions = { (1 << 14), (1 << 14) }, 584 .viewportBoundsRange = { INT16_MIN, INT16_MAX }, 585 .viewportSubPixelBits = 13, /* We take a float? */ 586 .minMemoryMapAlignment = 4096, /* A page */ 587 .minTexelBufferOffsetAlignment = 1, 588 .minUniformBufferOffsetAlignment = 16, 589 .minStorageBufferOffsetAlignment = 4, 590 .minTexelOffset = -8, 591 .maxTexelOffset = 7, 592 .minTexelGatherOffset = -32, 593 .maxTexelGatherOffset = 31, 594 .minInterpolationOffset = -0.5, 595 .maxInterpolationOffset = 0.4375, 596 .subPixelInterpolationOffsetBits = 4, 597 .maxFramebufferWidth = (1 << 14), 598 .maxFramebufferHeight = (1 << 14), 599 .maxFramebufferLayers = (1 << 11), 600 .framebufferColorSampleCounts = sample_counts, 601 .framebufferDepthSampleCounts = sample_counts, 602 .framebufferStencilSampleCounts = sample_counts, 603 .framebufferNoAttachmentsSampleCounts = sample_counts, 604 .maxColorAttachments = MAX_RTS, 605 .sampledImageColorSampleCounts = sample_counts, 606 .sampledImageIntegerSampleCounts = VK_SAMPLE_COUNT_1_BIT, 607 .sampledImageDepthSampleCounts = sample_counts, 608 .sampledImageStencilSampleCounts = sample_counts, 609 .storageImageSampleCounts = VK_SAMPLE_COUNT_1_BIT, 610 .maxSampleMaskWords = 1, 611 .timestampComputeAndGraphics = false, 612 .timestampPeriod = time_stamp_base, 613 .maxClipDistances = 8, 614 .maxCullDistances = 8, 615 .maxCombinedClipAndCullDistances = 8, 616 .discreteQueuePriorities = 1, 617 .pointSizeRange = { 0.125, 255.875 }, 618 .lineWidthRange = { 0.0, 7.9921875 }, 619 .pointSizeGranularity = (1.0 / 8.0), 620 .lineWidthGranularity = (1.0 / 128.0), 621 .strictLines = false, /* FINISHME */ 622 .standardSampleLocations = true, 623 .optimalBufferCopyOffsetAlignment = 128, 624 .optimalBufferCopyRowPitchAlignment = 128, 625 .nonCoherentAtomSize = 64, 626 }; 627 628 *pProperties = (VkPhysicalDeviceProperties) { 629 .apiVersion = VK_MAKE_VERSION(1, 0, 5), 630 .driverVersion = 1, 631 .vendorID = 0x8086, 632 .deviceID = pdevice->chipset_id, 633 .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU, 634 .limits = limits, 635 .sparseProperties = {0}, /* Broadwell doesn't do sparse. */ 636 }; 637 638 strcpy(pProperties->deviceName, pdevice->name); 639 memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE); 640 } 641 642 void anv_GetPhysicalDeviceQueueFamilyProperties( 643 VkPhysicalDevice physicalDevice, 644 uint32_t* pCount, 645 VkQueueFamilyProperties* pQueueFamilyProperties) 646 { 647 if (pQueueFamilyProperties == NULL) { 648 *pCount = 1; 649 return; 650 } 651 652 /* The spec implicitly allows the incoming count to be 0. From the Vulkan 653 * 1.0.38 spec, Section 4.1 Physical Devices: 654 * 655 * If the value referenced by pQueueFamilyPropertyCount is not 0 [then 656 * do stuff]. 657 */ 658 if (*pCount == 0) 659 return; 660 661 *pQueueFamilyProperties = (VkQueueFamilyProperties) { 662 .queueFlags = VK_QUEUE_GRAPHICS_BIT | 663 VK_QUEUE_COMPUTE_BIT | 664 VK_QUEUE_TRANSFER_BIT, 665 .queueCount = 1, 666 .timestampValidBits = 36, /* XXX: Real value here */ 667 .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 }, 668 }; 669 670 *pCount = 1; 671 } 672 673 void anv_GetPhysicalDeviceMemoryProperties( 674 VkPhysicalDevice physicalDevice, 675 VkPhysicalDeviceMemoryProperties* pMemoryProperties) 676 { 677 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); 678 VkDeviceSize heap_size; 679 680 /* Reserve some wiggle room for the driver by exposing only 75% of the 681 * aperture to the heap. 682 */ 683 heap_size = 3 * physical_device->aperture_size / 4; 684 685 if (physical_device->info.has_llc) { 686 /* Big core GPUs share LLC with the CPU and thus one memory type can be 687 * both cached and coherent at the same time. 688 */ 689 pMemoryProperties->memoryTypeCount = 1; 690 pMemoryProperties->memoryTypes[0] = (VkMemoryType) { 691 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 692 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 693 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT | 694 VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 695 .heapIndex = 0, 696 }; 697 } else { 698 /* The spec requires that we expose a host-visible, coherent memory 699 * type, but Atom GPUs don't share LLC. Thus we offer two memory types 700 * to give the application a choice between cached, but not coherent and 701 * coherent but uncached (WC though). 702 */ 703 pMemoryProperties->memoryTypeCount = 2; 704 pMemoryProperties->memoryTypes[0] = (VkMemoryType) { 705 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 706 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 707 VK_MEMORY_PROPERTY_HOST_COHERENT_BIT, 708 .heapIndex = 0, 709 }; 710 pMemoryProperties->memoryTypes[1] = (VkMemoryType) { 711 .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT | 712 VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT | 713 VK_MEMORY_PROPERTY_HOST_CACHED_BIT, 714 .heapIndex = 0, 715 }; 716 } 717 718 pMemoryProperties->memoryHeapCount = 1; 719 pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) { 720 .size = heap_size, 721 .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT, 722 }; 723 } 724 725 PFN_vkVoidFunction anv_GetInstanceProcAddr( 726 VkInstance instance, 727 const char* pName) 728 { 729 return anv_lookup_entrypoint(NULL, pName); 730 } 731 732 /* With version 1+ of the loader interface the ICD should expose 733 * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps. 734 */ 735 PUBLIC 736 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 737 VkInstance instance, 738 const char* pName); 739 740 PUBLIC 741 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr( 742 VkInstance instance, 743 const char* pName) 744 { 745 return anv_GetInstanceProcAddr(instance, pName); 746 } 747 748 PFN_vkVoidFunction anv_GetDeviceProcAddr( 749 VkDevice _device, 750 const char* pName) 751 { 752 ANV_FROM_HANDLE(anv_device, device, _device); 753 return anv_lookup_entrypoint(&device->info, pName); 754 } 755 756 static void 757 anv_queue_init(struct anv_device *device, struct anv_queue *queue) 758 { 759 queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 760 queue->device = device; 761 queue->pool = &device->surface_state_pool; 762 } 763 764 static void 765 anv_queue_finish(struct anv_queue *queue) 766 { 767 } 768 769 static struct anv_state 770 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p) 771 { 772 struct anv_state state; 773 774 state = anv_state_pool_alloc(pool, size, align); 775 memcpy(state.map, p, size); 776 777 if (!pool->block_pool->device->info.has_llc) 778 anv_state_clflush(state); 779 780 return state; 781 } 782 783 struct gen8_border_color { 784 union { 785 float float32[4]; 786 uint32_t uint32[4]; 787 }; 788 /* Pad out to 64 bytes */ 789 uint32_t _pad[12]; 790 }; 791 792 static void 793 anv_device_init_border_colors(struct anv_device *device) 794 { 795 static const struct gen8_border_color border_colors[] = { 796 [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 0.0 } }, 797 [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] = { .float32 = { 0.0, 0.0, 0.0, 1.0 } }, 798 [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] = { .float32 = { 1.0, 1.0, 1.0, 1.0 } }, 799 [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] = { .uint32 = { 0, 0, 0, 0 } }, 800 [VK_BORDER_COLOR_INT_OPAQUE_BLACK] = { .uint32 = { 0, 0, 0, 1 } }, 801 [VK_BORDER_COLOR_INT_OPAQUE_WHITE] = { .uint32 = { 1, 1, 1, 1 } }, 802 }; 803 804 device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool, 805 sizeof(border_colors), 64, 806 border_colors); 807 } 808 809 VkResult 810 anv_device_submit_simple_batch(struct anv_device *device, 811 struct anv_batch *batch) 812 { 813 struct drm_i915_gem_execbuffer2 execbuf; 814 struct drm_i915_gem_exec_object2 exec2_objects[1]; 815 struct anv_bo bo, *exec_bos[1]; 816 VkResult result = VK_SUCCESS; 817 uint32_t size; 818 int64_t timeout; 819 int ret; 820 821 /* Kernel driver requires 8 byte aligned batch length */ 822 size = align_u32(batch->next - batch->start, 8); 823 result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size); 824 if (result != VK_SUCCESS) 825 return result; 826 827 memcpy(bo.map, batch->start, size); 828 if (!device->info.has_llc) 829 anv_clflush_range(bo.map, size); 830 831 exec_bos[0] = &bo; 832 exec2_objects[0].handle = bo.gem_handle; 833 exec2_objects[0].relocation_count = 0; 834 exec2_objects[0].relocs_ptr = 0; 835 exec2_objects[0].alignment = 0; 836 exec2_objects[0].offset = bo.offset; 837 exec2_objects[0].flags = 0; 838 exec2_objects[0].rsvd1 = 0; 839 exec2_objects[0].rsvd2 = 0; 840 841 execbuf.buffers_ptr = (uintptr_t) exec2_objects; 842 execbuf.buffer_count = 1; 843 execbuf.batch_start_offset = 0; 844 execbuf.batch_len = size; 845 execbuf.cliprects_ptr = 0; 846 execbuf.num_cliprects = 0; 847 execbuf.DR1 = 0; 848 execbuf.DR4 = 0; 849 850 execbuf.flags = 851 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; 852 execbuf.rsvd1 = device->context_id; 853 execbuf.rsvd2 = 0; 854 855 result = anv_device_execbuf(device, &execbuf, exec_bos); 856 if (result != VK_SUCCESS) 857 goto fail; 858 859 timeout = INT64_MAX; 860 ret = anv_gem_wait(device, bo.gem_handle, &timeout); 861 if (ret != 0) { 862 /* We don't know the real error. */ 863 result = vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m"); 864 goto fail; 865 } 866 867 fail: 868 anv_bo_pool_free(&device->batch_bo_pool, &bo); 869 870 return result; 871 } 872 873 VkResult anv_CreateDevice( 874 VkPhysicalDevice physicalDevice, 875 const VkDeviceCreateInfo* pCreateInfo, 876 const VkAllocationCallbacks* pAllocator, 877 VkDevice* pDevice) 878 { 879 ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice); 880 VkResult result; 881 struct anv_device *device; 882 883 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO); 884 885 for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) { 886 bool found = false; 887 for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) { 888 if (strcmp(pCreateInfo->ppEnabledExtensionNames[i], 889 device_extensions[j].extensionName) == 0) { 890 found = true; 891 break; 892 } 893 } 894 if (!found) 895 return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT); 896 } 897 898 device = vk_alloc2(&physical_device->instance->alloc, pAllocator, 899 sizeof(*device), 8, 900 VK_SYSTEM_ALLOCATION_SCOPE_DEVICE); 901 if (!device) 902 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 903 904 device->_loader_data.loaderMagic = ICD_LOADER_MAGIC; 905 device->instance = physical_device->instance; 906 device->chipset_id = physical_device->chipset_id; 907 908 if (pAllocator) 909 device->alloc = *pAllocator; 910 else 911 device->alloc = physical_device->instance->alloc; 912 913 /* XXX(chadv): Can we dup() physicalDevice->fd here? */ 914 device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC); 915 if (device->fd == -1) { 916 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 917 goto fail_device; 918 } 919 920 device->context_id = anv_gem_create_context(device); 921 if (device->context_id == -1) { 922 result = vk_error(VK_ERROR_INITIALIZATION_FAILED); 923 goto fail_fd; 924 } 925 926 device->info = physical_device->info; 927 device->isl_dev = physical_device->isl_dev; 928 929 /* On Broadwell and later, we can use batch chaining to more efficiently 930 * implement growing command buffers. Prior to Haswell, the kernel 931 * command parser gets in the way and we have to fall back to growing 932 * the batch. 933 */ 934 device->can_chain_batches = device->info.gen >= 8; 935 936 device->robust_buffer_access = pCreateInfo->pEnabledFeatures && 937 pCreateInfo->pEnabledFeatures->robustBufferAccess; 938 939 pthread_mutex_init(&device->mutex, NULL); 940 941 pthread_condattr_t condattr; 942 pthread_condattr_init(&condattr); 943 pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC); 944 pthread_cond_init(&device->queue_submit, NULL); 945 pthread_condattr_destroy(&condattr); 946 947 anv_bo_pool_init(&device->batch_bo_pool, device); 948 949 anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384); 950 951 anv_state_pool_init(&device->dynamic_state_pool, 952 &device->dynamic_state_block_pool); 953 954 anv_block_pool_init(&device->instruction_block_pool, device, 1024 * 1024); 955 anv_state_pool_init(&device->instruction_state_pool, 956 &device->instruction_block_pool); 957 958 anv_block_pool_init(&device->surface_state_block_pool, device, 4096); 959 960 anv_state_pool_init(&device->surface_state_pool, 961 &device->surface_state_block_pool); 962 963 anv_bo_init_new(&device->workaround_bo, device, 1024); 964 965 anv_scratch_pool_init(device, &device->scratch_pool); 966 967 anv_queue_init(device, &device->queue); 968 969 switch (device->info.gen) { 970 case 7: 971 if (!device->info.is_haswell) 972 result = gen7_init_device_state(device); 973 else 974 result = gen75_init_device_state(device); 975 break; 976 case 8: 977 result = gen8_init_device_state(device); 978 break; 979 case 9: 980 result = gen9_init_device_state(device); 981 break; 982 default: 983 /* Shouldn't get here as we don't create physical devices for any other 984 * gens. */ 985 unreachable("unhandled gen"); 986 } 987 if (result != VK_SUCCESS) 988 goto fail_fd; 989 990 anv_device_init_blorp(device); 991 992 anv_device_init_border_colors(device); 993 994 *pDevice = anv_device_to_handle(device); 995 996 return VK_SUCCESS; 997 998 fail_fd: 999 close(device->fd); 1000 fail_device: 1001 vk_free(&device->alloc, device); 1002 1003 return result; 1004 } 1005 1006 void anv_DestroyDevice( 1007 VkDevice _device, 1008 const VkAllocationCallbacks* pAllocator) 1009 { 1010 ANV_FROM_HANDLE(anv_device, device, _device); 1011 1012 if (!device) 1013 return; 1014 1015 anv_device_finish_blorp(device); 1016 1017 anv_queue_finish(&device->queue); 1018 1019 #ifdef HAVE_VALGRIND 1020 /* We only need to free these to prevent valgrind errors. The backing 1021 * BO will go away in a couple of lines so we don't actually leak. 1022 */ 1023 anv_state_pool_free(&device->dynamic_state_pool, device->border_colors); 1024 #endif 1025 1026 anv_scratch_pool_finish(device, &device->scratch_pool); 1027 1028 anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size); 1029 anv_gem_close(device, device->workaround_bo.gem_handle); 1030 1031 anv_state_pool_finish(&device->surface_state_pool); 1032 anv_block_pool_finish(&device->surface_state_block_pool); 1033 anv_state_pool_finish(&device->instruction_state_pool); 1034 anv_block_pool_finish(&device->instruction_block_pool); 1035 anv_state_pool_finish(&device->dynamic_state_pool); 1036 anv_block_pool_finish(&device->dynamic_state_block_pool); 1037 1038 anv_bo_pool_finish(&device->batch_bo_pool); 1039 1040 pthread_cond_destroy(&device->queue_submit); 1041 pthread_mutex_destroy(&device->mutex); 1042 1043 anv_gem_destroy_context(device, device->context_id); 1044 1045 close(device->fd); 1046 1047 vk_free(&device->alloc, device); 1048 } 1049 1050 VkResult anv_EnumerateInstanceExtensionProperties( 1051 const char* pLayerName, 1052 uint32_t* pPropertyCount, 1053 VkExtensionProperties* pProperties) 1054 { 1055 if (pProperties == NULL) { 1056 *pPropertyCount = ARRAY_SIZE(global_extensions); 1057 return VK_SUCCESS; 1058 } 1059 1060 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions)); 1061 typed_memcpy(pProperties, global_extensions, *pPropertyCount); 1062 1063 if (*pPropertyCount < ARRAY_SIZE(global_extensions)) 1064 return VK_INCOMPLETE; 1065 1066 return VK_SUCCESS; 1067 } 1068 1069 VkResult anv_EnumerateDeviceExtensionProperties( 1070 VkPhysicalDevice physicalDevice, 1071 const char* pLayerName, 1072 uint32_t* pPropertyCount, 1073 VkExtensionProperties* pProperties) 1074 { 1075 if (pProperties == NULL) { 1076 *pPropertyCount = ARRAY_SIZE(device_extensions); 1077 return VK_SUCCESS; 1078 } 1079 1080 *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions)); 1081 typed_memcpy(pProperties, device_extensions, *pPropertyCount); 1082 1083 if (*pPropertyCount < ARRAY_SIZE(device_extensions)) 1084 return VK_INCOMPLETE; 1085 1086 return VK_SUCCESS; 1087 } 1088 1089 VkResult anv_EnumerateInstanceLayerProperties( 1090 uint32_t* pPropertyCount, 1091 VkLayerProperties* pProperties) 1092 { 1093 if (pProperties == NULL) { 1094 *pPropertyCount = 0; 1095 return VK_SUCCESS; 1096 } 1097 1098 /* None supported at this time */ 1099 return vk_error(VK_ERROR_LAYER_NOT_PRESENT); 1100 } 1101 1102 VkResult anv_EnumerateDeviceLayerProperties( 1103 VkPhysicalDevice physicalDevice, 1104 uint32_t* pPropertyCount, 1105 VkLayerProperties* pProperties) 1106 { 1107 if (pProperties == NULL) { 1108 *pPropertyCount = 0; 1109 return VK_SUCCESS; 1110 } 1111 1112 /* None supported at this time */ 1113 return vk_error(VK_ERROR_LAYER_NOT_PRESENT); 1114 } 1115 1116 void anv_GetDeviceQueue( 1117 VkDevice _device, 1118 uint32_t queueNodeIndex, 1119 uint32_t queueIndex, 1120 VkQueue* pQueue) 1121 { 1122 ANV_FROM_HANDLE(anv_device, device, _device); 1123 1124 assert(queueIndex == 0); 1125 1126 *pQueue = anv_queue_to_handle(&device->queue); 1127 } 1128 1129 VkResult 1130 anv_device_execbuf(struct anv_device *device, 1131 struct drm_i915_gem_execbuffer2 *execbuf, 1132 struct anv_bo **execbuf_bos) 1133 { 1134 int ret = anv_gem_execbuffer(device, execbuf); 1135 if (ret != 0) { 1136 /* We don't know the real error. */ 1137 return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m"); 1138 } 1139 1140 struct drm_i915_gem_exec_object2 *objects = 1141 (void *)(uintptr_t)execbuf->buffers_ptr; 1142 for (uint32_t k = 0; k < execbuf->buffer_count; k++) 1143 execbuf_bos[k]->offset = objects[k].offset; 1144 1145 return VK_SUCCESS; 1146 } 1147 1148 VkResult anv_QueueSubmit( 1149 VkQueue _queue, 1150 uint32_t submitCount, 1151 const VkSubmitInfo* pSubmits, 1152 VkFence _fence) 1153 { 1154 ANV_FROM_HANDLE(anv_queue, queue, _queue); 1155 ANV_FROM_HANDLE(anv_fence, fence, _fence); 1156 struct anv_device *device = queue->device; 1157 VkResult result = VK_SUCCESS; 1158 1159 /* We lock around QueueSubmit for three main reasons: 1160 * 1161 * 1) When a block pool is resized, we create a new gem handle with a 1162 * different size and, in the case of surface states, possibly a 1163 * different center offset but we re-use the same anv_bo struct when 1164 * we do so. If this happens in the middle of setting up an execbuf, 1165 * we could end up with our list of BOs out of sync with our list of 1166 * gem handles. 1167 * 1168 * 2) The algorithm we use for building the list of unique buffers isn't 1169 * thread-safe. While the client is supposed to syncronize around 1170 * QueueSubmit, this would be extremely difficult to debug if it ever 1171 * came up in the wild due to a broken app. It's better to play it 1172 * safe and just lock around QueueSubmit. 1173 * 1174 * 3) The anv_cmd_buffer_execbuf function may perform relocations in 1175 * userspace. Due to the fact that the surface state buffer is shared 1176 * between batches, we can't afford to have that happen from multiple 1177 * threads at the same time. Even though the user is supposed to 1178 * ensure this doesn't happen, we play it safe as in (2) above. 1179 * 1180 * Since the only other things that ever take the device lock such as block 1181 * pool resize only rarely happen, this will almost never be contended so 1182 * taking a lock isn't really an expensive operation in this case. 1183 */ 1184 pthread_mutex_lock(&device->mutex); 1185 1186 for (uint32_t i = 0; i < submitCount; i++) { 1187 for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) { 1188 ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer, 1189 pSubmits[i].pCommandBuffers[j]); 1190 assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY); 1191 1192 result = anv_cmd_buffer_execbuf(device, cmd_buffer); 1193 if (result != VK_SUCCESS) 1194 goto out; 1195 } 1196 } 1197 1198 if (fence) { 1199 struct anv_bo *fence_bo = &fence->bo; 1200 result = anv_device_execbuf(device, &fence->execbuf, &fence_bo); 1201 if (result != VK_SUCCESS) 1202 goto out; 1203 1204 /* Update the fence and wake up any waiters */ 1205 assert(fence->state == ANV_FENCE_STATE_RESET); 1206 fence->state = ANV_FENCE_STATE_SUBMITTED; 1207 pthread_cond_broadcast(&device->queue_submit); 1208 } 1209 1210 out: 1211 pthread_mutex_unlock(&device->mutex); 1212 1213 return result; 1214 } 1215 1216 VkResult anv_QueueWaitIdle( 1217 VkQueue _queue) 1218 { 1219 ANV_FROM_HANDLE(anv_queue, queue, _queue); 1220 1221 return anv_DeviceWaitIdle(anv_device_to_handle(queue->device)); 1222 } 1223 1224 VkResult anv_DeviceWaitIdle( 1225 VkDevice _device) 1226 { 1227 ANV_FROM_HANDLE(anv_device, device, _device); 1228 struct anv_batch batch; 1229 1230 uint32_t cmds[8]; 1231 batch.start = batch.next = cmds; 1232 batch.end = (void *) cmds + sizeof(cmds); 1233 1234 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); 1235 anv_batch_emit(&batch, GEN7_MI_NOOP, noop); 1236 1237 return anv_device_submit_simple_batch(device, &batch); 1238 } 1239 1240 VkResult 1241 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size) 1242 { 1243 uint32_t gem_handle = anv_gem_create(device, size); 1244 if (!gem_handle) 1245 return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY); 1246 1247 anv_bo_init(bo, gem_handle, size); 1248 1249 return VK_SUCCESS; 1250 } 1251 1252 VkResult anv_AllocateMemory( 1253 VkDevice _device, 1254 const VkMemoryAllocateInfo* pAllocateInfo, 1255 const VkAllocationCallbacks* pAllocator, 1256 VkDeviceMemory* pMem) 1257 { 1258 ANV_FROM_HANDLE(anv_device, device, _device); 1259 struct anv_device_memory *mem; 1260 VkResult result; 1261 1262 assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO); 1263 1264 /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */ 1265 assert(pAllocateInfo->allocationSize > 0); 1266 1267 /* We support exactly one memory heap. */ 1268 assert(pAllocateInfo->memoryTypeIndex == 0 || 1269 (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2)); 1270 1271 /* FINISHME: Fail if allocation request exceeds heap size. */ 1272 1273 mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8, 1274 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1275 if (mem == NULL) 1276 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1277 1278 /* The kernel is going to give us whole pages anyway */ 1279 uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096); 1280 1281 result = anv_bo_init_new(&mem->bo, device, alloc_size); 1282 if (result != VK_SUCCESS) 1283 goto fail; 1284 1285 mem->type_index = pAllocateInfo->memoryTypeIndex; 1286 1287 mem->map = NULL; 1288 mem->map_size = 0; 1289 1290 *pMem = anv_device_memory_to_handle(mem); 1291 1292 return VK_SUCCESS; 1293 1294 fail: 1295 vk_free2(&device->alloc, pAllocator, mem); 1296 1297 return result; 1298 } 1299 1300 void anv_FreeMemory( 1301 VkDevice _device, 1302 VkDeviceMemory _mem, 1303 const VkAllocationCallbacks* pAllocator) 1304 { 1305 ANV_FROM_HANDLE(anv_device, device, _device); 1306 ANV_FROM_HANDLE(anv_device_memory, mem, _mem); 1307 1308 if (mem == NULL) 1309 return; 1310 1311 if (mem->map) 1312 anv_UnmapMemory(_device, _mem); 1313 1314 if (mem->bo.map) 1315 anv_gem_munmap(mem->bo.map, mem->bo.size); 1316 1317 if (mem->bo.gem_handle != 0) 1318 anv_gem_close(device, mem->bo.gem_handle); 1319 1320 vk_free2(&device->alloc, pAllocator, mem); 1321 } 1322 1323 VkResult anv_MapMemory( 1324 VkDevice _device, 1325 VkDeviceMemory _memory, 1326 VkDeviceSize offset, 1327 VkDeviceSize size, 1328 VkMemoryMapFlags flags, 1329 void** ppData) 1330 { 1331 ANV_FROM_HANDLE(anv_device, device, _device); 1332 ANV_FROM_HANDLE(anv_device_memory, mem, _memory); 1333 1334 if (mem == NULL) { 1335 *ppData = NULL; 1336 return VK_SUCCESS; 1337 } 1338 1339 if (size == VK_WHOLE_SIZE) 1340 size = mem->bo.size - offset; 1341 1342 /* From the Vulkan spec version 1.0.32 docs for MapMemory: 1343 * 1344 * * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0 1345 * assert(size != 0); 1346 * * If size is not equal to VK_WHOLE_SIZE, size must be less than or 1347 * equal to the size of the memory minus offset 1348 */ 1349 assert(size > 0); 1350 assert(offset + size <= mem->bo.size); 1351 1352 /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only 1353 * takes a VkDeviceMemory pointer, it seems like only one map of the memory 1354 * at a time is valid. We could just mmap up front and return an offset 1355 * pointer here, but that may exhaust virtual memory on 32 bit 1356 * userspace. */ 1357 1358 uint32_t gem_flags = 0; 1359 if (!device->info.has_llc && mem->type_index == 0) 1360 gem_flags |= I915_MMAP_WC; 1361 1362 /* GEM will fail to map if the offset isn't 4k-aligned. Round down. */ 1363 uint64_t map_offset = offset & ~4095ull; 1364 assert(offset >= map_offset); 1365 uint64_t map_size = (offset + size) - map_offset; 1366 1367 /* Let's map whole pages */ 1368 map_size = align_u64(map_size, 4096); 1369 1370 void *map = anv_gem_mmap(device, mem->bo.gem_handle, 1371 map_offset, map_size, gem_flags); 1372 if (map == MAP_FAILED) 1373 return vk_error(VK_ERROR_MEMORY_MAP_FAILED); 1374 1375 mem->map = map; 1376 mem->map_size = map_size; 1377 1378 *ppData = mem->map + (offset - map_offset); 1379 1380 return VK_SUCCESS; 1381 } 1382 1383 void anv_UnmapMemory( 1384 VkDevice _device, 1385 VkDeviceMemory _memory) 1386 { 1387 ANV_FROM_HANDLE(anv_device_memory, mem, _memory); 1388 1389 if (mem == NULL) 1390 return; 1391 1392 anv_gem_munmap(mem->map, mem->map_size); 1393 1394 mem->map = NULL; 1395 mem->map_size = 0; 1396 } 1397 1398 static void 1399 clflush_mapped_ranges(struct anv_device *device, 1400 uint32_t count, 1401 const VkMappedMemoryRange *ranges) 1402 { 1403 for (uint32_t i = 0; i < count; i++) { 1404 ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory); 1405 void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK); 1406 void *end; 1407 1408 if (ranges[i].offset + ranges[i].size > mem->map_size) 1409 end = mem->map + mem->map_size; 1410 else 1411 end = mem->map + ranges[i].offset + ranges[i].size; 1412 1413 while (p < end) { 1414 __builtin_ia32_clflush(p); 1415 p += CACHELINE_SIZE; 1416 } 1417 } 1418 } 1419 1420 VkResult anv_FlushMappedMemoryRanges( 1421 VkDevice _device, 1422 uint32_t memoryRangeCount, 1423 const VkMappedMemoryRange* pMemoryRanges) 1424 { 1425 ANV_FROM_HANDLE(anv_device, device, _device); 1426 1427 if (device->info.has_llc) 1428 return VK_SUCCESS; 1429 1430 /* Make sure the writes we're flushing have landed. */ 1431 __builtin_ia32_mfence(); 1432 1433 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); 1434 1435 return VK_SUCCESS; 1436 } 1437 1438 VkResult anv_InvalidateMappedMemoryRanges( 1439 VkDevice _device, 1440 uint32_t memoryRangeCount, 1441 const VkMappedMemoryRange* pMemoryRanges) 1442 { 1443 ANV_FROM_HANDLE(anv_device, device, _device); 1444 1445 if (device->info.has_llc) 1446 return VK_SUCCESS; 1447 1448 clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges); 1449 1450 /* Make sure no reads get moved up above the invalidate. */ 1451 __builtin_ia32_mfence(); 1452 1453 return VK_SUCCESS; 1454 } 1455 1456 void anv_GetBufferMemoryRequirements( 1457 VkDevice _device, 1458 VkBuffer _buffer, 1459 VkMemoryRequirements* pMemoryRequirements) 1460 { 1461 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); 1462 ANV_FROM_HANDLE(anv_device, device, _device); 1463 1464 /* The Vulkan spec (git aaed022) says: 1465 * 1466 * memoryTypeBits is a bitfield and contains one bit set for every 1467 * supported memory type for the resource. The bit `1<<i` is set if and 1468 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties 1469 * structure for the physical device is supported. 1470 * 1471 * We support exactly one memory type on LLC, two on non-LLC. 1472 */ 1473 pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3; 1474 1475 pMemoryRequirements->size = buffer->size; 1476 pMemoryRequirements->alignment = 16; 1477 } 1478 1479 void anv_GetImageMemoryRequirements( 1480 VkDevice _device, 1481 VkImage _image, 1482 VkMemoryRequirements* pMemoryRequirements) 1483 { 1484 ANV_FROM_HANDLE(anv_image, image, _image); 1485 ANV_FROM_HANDLE(anv_device, device, _device); 1486 1487 /* The Vulkan spec (git aaed022) says: 1488 * 1489 * memoryTypeBits is a bitfield and contains one bit set for every 1490 * supported memory type for the resource. The bit `1<<i` is set if and 1491 * only if the memory type `i` in the VkPhysicalDeviceMemoryProperties 1492 * structure for the physical device is supported. 1493 * 1494 * We support exactly one memory type on LLC, two on non-LLC. 1495 */ 1496 pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3; 1497 1498 pMemoryRequirements->size = image->size; 1499 pMemoryRequirements->alignment = image->alignment; 1500 } 1501 1502 void anv_GetImageSparseMemoryRequirements( 1503 VkDevice device, 1504 VkImage image, 1505 uint32_t* pSparseMemoryRequirementCount, 1506 VkSparseImageMemoryRequirements* pSparseMemoryRequirements) 1507 { 1508 stub(); 1509 } 1510 1511 void anv_GetDeviceMemoryCommitment( 1512 VkDevice device, 1513 VkDeviceMemory memory, 1514 VkDeviceSize* pCommittedMemoryInBytes) 1515 { 1516 *pCommittedMemoryInBytes = 0; 1517 } 1518 1519 VkResult anv_BindBufferMemory( 1520 VkDevice device, 1521 VkBuffer _buffer, 1522 VkDeviceMemory _memory, 1523 VkDeviceSize memoryOffset) 1524 { 1525 ANV_FROM_HANDLE(anv_device_memory, mem, _memory); 1526 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); 1527 1528 if (mem) { 1529 buffer->bo = &mem->bo; 1530 buffer->offset = memoryOffset; 1531 } else { 1532 buffer->bo = NULL; 1533 buffer->offset = 0; 1534 } 1535 1536 return VK_SUCCESS; 1537 } 1538 1539 VkResult anv_QueueBindSparse( 1540 VkQueue queue, 1541 uint32_t bindInfoCount, 1542 const VkBindSparseInfo* pBindInfo, 1543 VkFence fence) 1544 { 1545 stub_return(VK_ERROR_INCOMPATIBLE_DRIVER); 1546 } 1547 1548 VkResult anv_CreateFence( 1549 VkDevice _device, 1550 const VkFenceCreateInfo* pCreateInfo, 1551 const VkAllocationCallbacks* pAllocator, 1552 VkFence* pFence) 1553 { 1554 ANV_FROM_HANDLE(anv_device, device, _device); 1555 struct anv_bo fence_bo; 1556 struct anv_fence *fence; 1557 struct anv_batch batch; 1558 VkResult result; 1559 1560 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO); 1561 1562 result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096); 1563 if (result != VK_SUCCESS) 1564 return result; 1565 1566 /* Fences are small. Just store the CPU data structure in the BO. */ 1567 fence = fence_bo.map; 1568 fence->bo = fence_bo; 1569 1570 /* Place the batch after the CPU data but on its own cache line. */ 1571 const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE); 1572 batch.next = batch.start = fence->bo.map + batch_offset; 1573 batch.end = fence->bo.map + fence->bo.size; 1574 anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe); 1575 anv_batch_emit(&batch, GEN7_MI_NOOP, noop); 1576 1577 if (!device->info.has_llc) { 1578 assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0); 1579 assert(batch.next - batch.start <= CACHELINE_SIZE); 1580 __builtin_ia32_mfence(); 1581 __builtin_ia32_clflush(batch.start); 1582 } 1583 1584 fence->exec2_objects[0].handle = fence->bo.gem_handle; 1585 fence->exec2_objects[0].relocation_count = 0; 1586 fence->exec2_objects[0].relocs_ptr = 0; 1587 fence->exec2_objects[0].alignment = 0; 1588 fence->exec2_objects[0].offset = fence->bo.offset; 1589 fence->exec2_objects[0].flags = 0; 1590 fence->exec2_objects[0].rsvd1 = 0; 1591 fence->exec2_objects[0].rsvd2 = 0; 1592 1593 fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects; 1594 fence->execbuf.buffer_count = 1; 1595 fence->execbuf.batch_start_offset = batch.start - fence->bo.map; 1596 fence->execbuf.batch_len = batch.next - batch.start; 1597 fence->execbuf.cliprects_ptr = 0; 1598 fence->execbuf.num_cliprects = 0; 1599 fence->execbuf.DR1 = 0; 1600 fence->execbuf.DR4 = 0; 1601 1602 fence->execbuf.flags = 1603 I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER; 1604 fence->execbuf.rsvd1 = device->context_id; 1605 fence->execbuf.rsvd2 = 0; 1606 1607 if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) { 1608 fence->state = ANV_FENCE_STATE_SIGNALED; 1609 } else { 1610 fence->state = ANV_FENCE_STATE_RESET; 1611 } 1612 1613 *pFence = anv_fence_to_handle(fence); 1614 1615 return VK_SUCCESS; 1616 } 1617 1618 void anv_DestroyFence( 1619 VkDevice _device, 1620 VkFence _fence, 1621 const VkAllocationCallbacks* pAllocator) 1622 { 1623 ANV_FROM_HANDLE(anv_device, device, _device); 1624 ANV_FROM_HANDLE(anv_fence, fence, _fence); 1625 1626 if (!fence) 1627 return; 1628 1629 assert(fence->bo.map == fence); 1630 anv_bo_pool_free(&device->batch_bo_pool, &fence->bo); 1631 } 1632 1633 VkResult anv_ResetFences( 1634 VkDevice _device, 1635 uint32_t fenceCount, 1636 const VkFence* pFences) 1637 { 1638 for (uint32_t i = 0; i < fenceCount; i++) { 1639 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); 1640 fence->state = ANV_FENCE_STATE_RESET; 1641 } 1642 1643 return VK_SUCCESS; 1644 } 1645 1646 VkResult anv_GetFenceStatus( 1647 VkDevice _device, 1648 VkFence _fence) 1649 { 1650 ANV_FROM_HANDLE(anv_device, device, _device); 1651 ANV_FROM_HANDLE(anv_fence, fence, _fence); 1652 int64_t t = 0; 1653 int ret; 1654 1655 switch (fence->state) { 1656 case ANV_FENCE_STATE_RESET: 1657 /* If it hasn't even been sent off to the GPU yet, it's not ready */ 1658 return VK_NOT_READY; 1659 1660 case ANV_FENCE_STATE_SIGNALED: 1661 /* It's been signaled, return success */ 1662 return VK_SUCCESS; 1663 1664 case ANV_FENCE_STATE_SUBMITTED: 1665 /* It's been submitted to the GPU but we don't know if it's done yet. */ 1666 ret = anv_gem_wait(device, fence->bo.gem_handle, &t); 1667 if (ret == 0) { 1668 fence->state = ANV_FENCE_STATE_SIGNALED; 1669 return VK_SUCCESS; 1670 } else { 1671 return VK_NOT_READY; 1672 } 1673 default: 1674 unreachable("Invalid fence status"); 1675 } 1676 } 1677 1678 #define NSEC_PER_SEC 1000000000 1679 #define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1) 1680 1681 VkResult anv_WaitForFences( 1682 VkDevice _device, 1683 uint32_t fenceCount, 1684 const VkFence* pFences, 1685 VkBool32 waitAll, 1686 uint64_t _timeout) 1687 { 1688 ANV_FROM_HANDLE(anv_device, device, _device); 1689 int ret; 1690 1691 /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed 1692 * to block indefinitely timeouts <= 0. Unfortunately, this was broken 1693 * for a couple of kernel releases. Since there's no way to know 1694 * whether or not the kernel we're using is one of the broken ones, the 1695 * best we can do is to clamp the timeout to INT64_MAX. This limits the 1696 * maximum timeout from 584 years to 292 years - likely not a big deal. 1697 */ 1698 int64_t timeout = MIN2(_timeout, INT64_MAX); 1699 1700 uint32_t pending_fences = fenceCount; 1701 while (pending_fences) { 1702 pending_fences = 0; 1703 bool signaled_fences = false; 1704 for (uint32_t i = 0; i < fenceCount; i++) { 1705 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); 1706 switch (fence->state) { 1707 case ANV_FENCE_STATE_RESET: 1708 /* This fence hasn't been submitted yet, we'll catch it the next 1709 * time around. Yes, this may mean we dead-loop but, short of 1710 * lots of locking and a condition variable, there's not much that 1711 * we can do about that. 1712 */ 1713 pending_fences++; 1714 continue; 1715 1716 case ANV_FENCE_STATE_SIGNALED: 1717 /* This fence is not pending. If waitAll isn't set, we can return 1718 * early. Otherwise, we have to keep going. 1719 */ 1720 if (!waitAll) 1721 return VK_SUCCESS; 1722 continue; 1723 1724 case ANV_FENCE_STATE_SUBMITTED: 1725 /* These are the fences we really care about. Go ahead and wait 1726 * on it until we hit a timeout. 1727 */ 1728 ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout); 1729 if (ret == -1 && errno == ETIME) { 1730 return VK_TIMEOUT; 1731 } else if (ret == -1) { 1732 /* We don't know the real error. */ 1733 return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m"); 1734 } else { 1735 fence->state = ANV_FENCE_STATE_SIGNALED; 1736 signaled_fences = true; 1737 if (!waitAll) 1738 return VK_SUCCESS; 1739 continue; 1740 } 1741 } 1742 } 1743 1744 if (pending_fences && !signaled_fences) { 1745 /* If we've hit this then someone decided to vkWaitForFences before 1746 * they've actually submitted any of them to a queue. This is a 1747 * fairly pessimal case, so it's ok to lock here and use a standard 1748 * pthreads condition variable. 1749 */ 1750 pthread_mutex_lock(&device->mutex); 1751 1752 /* It's possible that some of the fences have changed state since the 1753 * last time we checked. Now that we have the lock, check for 1754 * pending fences again and don't wait if it's changed. 1755 */ 1756 uint32_t now_pending_fences = 0; 1757 for (uint32_t i = 0; i < fenceCount; i++) { 1758 ANV_FROM_HANDLE(anv_fence, fence, pFences[i]); 1759 if (fence->state == ANV_FENCE_STATE_RESET) 1760 now_pending_fences++; 1761 } 1762 assert(now_pending_fences <= pending_fences); 1763 1764 if (now_pending_fences == pending_fences) { 1765 struct timespec before; 1766 clock_gettime(CLOCK_MONOTONIC, &before); 1767 1768 uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC; 1769 uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) + 1770 (timeout / NSEC_PER_SEC); 1771 abs_nsec %= NSEC_PER_SEC; 1772 1773 /* Avoid roll-over in tv_sec on 32-bit systems if the user 1774 * provided timeout is UINT64_MAX 1775 */ 1776 struct timespec abstime; 1777 abstime.tv_nsec = abs_nsec; 1778 abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec)); 1779 1780 ret = pthread_cond_timedwait(&device->queue_submit, 1781 &device->mutex, &abstime); 1782 assert(ret != EINVAL); 1783 1784 struct timespec after; 1785 clock_gettime(CLOCK_MONOTONIC, &after); 1786 uint64_t time_elapsed = 1787 ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) - 1788 ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec); 1789 1790 if (time_elapsed >= timeout) { 1791 pthread_mutex_unlock(&device->mutex); 1792 return VK_TIMEOUT; 1793 } 1794 1795 timeout -= time_elapsed; 1796 } 1797 1798 pthread_mutex_unlock(&device->mutex); 1799 } 1800 } 1801 1802 return VK_SUCCESS; 1803 } 1804 1805 // Queue semaphore functions 1806 1807 VkResult anv_CreateSemaphore( 1808 VkDevice device, 1809 const VkSemaphoreCreateInfo* pCreateInfo, 1810 const VkAllocationCallbacks* pAllocator, 1811 VkSemaphore* pSemaphore) 1812 { 1813 /* The DRM execbuffer ioctl always execute in-oder, even between different 1814 * rings. As such, there's nothing to do for the user space semaphore. 1815 */ 1816 1817 *pSemaphore = (VkSemaphore)1; 1818 1819 return VK_SUCCESS; 1820 } 1821 1822 void anv_DestroySemaphore( 1823 VkDevice device, 1824 VkSemaphore semaphore, 1825 const VkAllocationCallbacks* pAllocator) 1826 { 1827 } 1828 1829 // Event functions 1830 1831 VkResult anv_CreateEvent( 1832 VkDevice _device, 1833 const VkEventCreateInfo* pCreateInfo, 1834 const VkAllocationCallbacks* pAllocator, 1835 VkEvent* pEvent) 1836 { 1837 ANV_FROM_HANDLE(anv_device, device, _device); 1838 struct anv_state state; 1839 struct anv_event *event; 1840 1841 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO); 1842 1843 state = anv_state_pool_alloc(&device->dynamic_state_pool, 1844 sizeof(*event), 8); 1845 event = state.map; 1846 event->state = state; 1847 event->semaphore = VK_EVENT_RESET; 1848 1849 if (!device->info.has_llc) { 1850 /* Make sure the writes we're flushing have landed. */ 1851 __builtin_ia32_mfence(); 1852 __builtin_ia32_clflush(event); 1853 } 1854 1855 *pEvent = anv_event_to_handle(event); 1856 1857 return VK_SUCCESS; 1858 } 1859 1860 void anv_DestroyEvent( 1861 VkDevice _device, 1862 VkEvent _event, 1863 const VkAllocationCallbacks* pAllocator) 1864 { 1865 ANV_FROM_HANDLE(anv_device, device, _device); 1866 ANV_FROM_HANDLE(anv_event, event, _event); 1867 1868 if (!event) 1869 return; 1870 1871 anv_state_pool_free(&device->dynamic_state_pool, event->state); 1872 } 1873 1874 VkResult anv_GetEventStatus( 1875 VkDevice _device, 1876 VkEvent _event) 1877 { 1878 ANV_FROM_HANDLE(anv_device, device, _device); 1879 ANV_FROM_HANDLE(anv_event, event, _event); 1880 1881 if (!device->info.has_llc) { 1882 /* Invalidate read cache before reading event written by GPU. */ 1883 __builtin_ia32_clflush(event); 1884 __builtin_ia32_mfence(); 1885 1886 } 1887 1888 return event->semaphore; 1889 } 1890 1891 VkResult anv_SetEvent( 1892 VkDevice _device, 1893 VkEvent _event) 1894 { 1895 ANV_FROM_HANDLE(anv_device, device, _device); 1896 ANV_FROM_HANDLE(anv_event, event, _event); 1897 1898 event->semaphore = VK_EVENT_SET; 1899 1900 if (!device->info.has_llc) { 1901 /* Make sure the writes we're flushing have landed. */ 1902 __builtin_ia32_mfence(); 1903 __builtin_ia32_clflush(event); 1904 } 1905 1906 return VK_SUCCESS; 1907 } 1908 1909 VkResult anv_ResetEvent( 1910 VkDevice _device, 1911 VkEvent _event) 1912 { 1913 ANV_FROM_HANDLE(anv_device, device, _device); 1914 ANV_FROM_HANDLE(anv_event, event, _event); 1915 1916 event->semaphore = VK_EVENT_RESET; 1917 1918 if (!device->info.has_llc) { 1919 /* Make sure the writes we're flushing have landed. */ 1920 __builtin_ia32_mfence(); 1921 __builtin_ia32_clflush(event); 1922 } 1923 1924 return VK_SUCCESS; 1925 } 1926 1927 // Buffer functions 1928 1929 VkResult anv_CreateBuffer( 1930 VkDevice _device, 1931 const VkBufferCreateInfo* pCreateInfo, 1932 const VkAllocationCallbacks* pAllocator, 1933 VkBuffer* pBuffer) 1934 { 1935 ANV_FROM_HANDLE(anv_device, device, _device); 1936 struct anv_buffer *buffer; 1937 1938 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO); 1939 1940 buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8, 1941 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 1942 if (buffer == NULL) 1943 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 1944 1945 buffer->size = pCreateInfo->size; 1946 buffer->usage = pCreateInfo->usage; 1947 buffer->bo = NULL; 1948 buffer->offset = 0; 1949 1950 *pBuffer = anv_buffer_to_handle(buffer); 1951 1952 return VK_SUCCESS; 1953 } 1954 1955 void anv_DestroyBuffer( 1956 VkDevice _device, 1957 VkBuffer _buffer, 1958 const VkAllocationCallbacks* pAllocator) 1959 { 1960 ANV_FROM_HANDLE(anv_device, device, _device); 1961 ANV_FROM_HANDLE(anv_buffer, buffer, _buffer); 1962 1963 if (!buffer) 1964 return; 1965 1966 vk_free2(&device->alloc, pAllocator, buffer); 1967 } 1968 1969 void 1970 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state, 1971 enum isl_format format, 1972 uint32_t offset, uint32_t range, uint32_t stride) 1973 { 1974 isl_buffer_fill_state(&device->isl_dev, state.map, 1975 .address = offset, 1976 .mocs = device->default_mocs, 1977 .size = range, 1978 .format = format, 1979 .stride = stride); 1980 1981 if (!device->info.has_llc) 1982 anv_state_clflush(state); 1983 } 1984 1985 void anv_DestroySampler( 1986 VkDevice _device, 1987 VkSampler _sampler, 1988 const VkAllocationCallbacks* pAllocator) 1989 { 1990 ANV_FROM_HANDLE(anv_device, device, _device); 1991 ANV_FROM_HANDLE(anv_sampler, sampler, _sampler); 1992 1993 if (!sampler) 1994 return; 1995 1996 vk_free2(&device->alloc, pAllocator, sampler); 1997 } 1998 1999 VkResult anv_CreateFramebuffer( 2000 VkDevice _device, 2001 const VkFramebufferCreateInfo* pCreateInfo, 2002 const VkAllocationCallbacks* pAllocator, 2003 VkFramebuffer* pFramebuffer) 2004 { 2005 ANV_FROM_HANDLE(anv_device, device, _device); 2006 struct anv_framebuffer *framebuffer; 2007 2008 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO); 2009 2010 size_t size = sizeof(*framebuffer) + 2011 sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount; 2012 framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8, 2013 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 2014 if (framebuffer == NULL) 2015 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 2016 2017 framebuffer->attachment_count = pCreateInfo->attachmentCount; 2018 for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) { 2019 VkImageView _iview = pCreateInfo->pAttachments[i]; 2020 framebuffer->attachments[i] = anv_image_view_from_handle(_iview); 2021 } 2022 2023 framebuffer->width = pCreateInfo->width; 2024 framebuffer->height = pCreateInfo->height; 2025 framebuffer->layers = pCreateInfo->layers; 2026 2027 *pFramebuffer = anv_framebuffer_to_handle(framebuffer); 2028 2029 return VK_SUCCESS; 2030 } 2031 2032 void anv_DestroyFramebuffer( 2033 VkDevice _device, 2034 VkFramebuffer _fb, 2035 const VkAllocationCallbacks* pAllocator) 2036 { 2037 ANV_FROM_HANDLE(anv_device, device, _device); 2038 ANV_FROM_HANDLE(anv_framebuffer, fb, _fb); 2039 2040 if (!fb) 2041 return; 2042 2043 vk_free2(&device->alloc, pAllocator, fb); 2044 } 2045 2046 /* vk_icd.h does not declare this function, so we declare it here to 2047 * suppress Wmissing-prototypes. 2048 */ 2049 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 2050 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion); 2051 2052 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL 2053 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion) 2054 { 2055 /* For the full details on loader interface versioning, see 2056 * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>. 2057 * What follows is a condensed summary, to help you navigate the large and 2058 * confusing official doc. 2059 * 2060 * - Loader interface v0 is incompatible with later versions. We don't 2061 * support it. 2062 * 2063 * - In loader interface v1: 2064 * - The first ICD entrypoint called by the loader is 2065 * vk_icdGetInstanceProcAddr(). The ICD must statically expose this 2066 * entrypoint. 2067 * - The ICD must statically expose no other Vulkan symbol unless it is 2068 * linked with -Bsymbolic. 2069 * - Each dispatchable Vulkan handle created by the ICD must be 2070 * a pointer to a struct whose first member is VK_LOADER_DATA. The 2071 * ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC. 2072 * - The loader implements vkCreate{PLATFORM}SurfaceKHR() and 2073 * vkDestroySurfaceKHR(). The ICD must be capable of working with 2074 * such loader-managed surfaces. 2075 * 2076 * - Loader interface v2 differs from v1 in: 2077 * - The first ICD entrypoint called by the loader is 2078 * vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must 2079 * statically expose this entrypoint. 2080 * 2081 * - Loader interface v3 differs from v2 in: 2082 * - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(), 2083 * vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR, 2084 * because the loader no longer does so. 2085 */ 2086 *pSupportedVersion = MIN2(*pSupportedVersion, 3u); 2087 return VK_SUCCESS; 2088 } 2089