Home | History | Annotate | Download | only in vulkan
      1 /*
      2  * Copyright  2015 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include <dlfcn.h>
     25 #include <assert.h>
     26 #include <stdbool.h>
     27 #include <string.h>
     28 #include <sys/mman.h>
     29 #include <sys/stat.h>
     30 #include <unistd.h>
     31 #include <fcntl.h>
     32 
     33 #include "anv_private.h"
     34 #include "util/strtod.h"
     35 #include "util/debug.h"
     36 
     37 #include "genxml/gen7_pack.h"
     38 
     39 struct anv_dispatch_table dtable;
     40 
     41 static void
     42 compiler_debug_log(void *data, const char *fmt, ...)
     43 { }
     44 
     45 static void
     46 compiler_perf_log(void *data, const char *fmt, ...)
     47 {
     48    va_list args;
     49    va_start(args, fmt);
     50 
     51    if (unlikely(INTEL_DEBUG & DEBUG_PERF))
     52       vfprintf(stderr, fmt, args);
     53 
     54    va_end(args);
     55 }
     56 
     57 static bool
     58 anv_get_function_timestamp(void *ptr, uint32_t* timestamp)
     59 {
     60    Dl_info info;
     61    struct stat st;
     62    if (!dladdr(ptr, &info) || !info.dli_fname)
     63       return false;
     64 
     65    if (stat(info.dli_fname, &st))
     66       return false;
     67 
     68    *timestamp = st.st_mtim.tv_sec;
     69    return true;
     70 }
     71 
     72 static bool
     73 anv_device_get_cache_uuid(void *uuid)
     74 {
     75    uint32_t timestamp;
     76 
     77    memset(uuid, 0, VK_UUID_SIZE);
     78    if (!anv_get_function_timestamp(anv_device_get_cache_uuid, &timestamp))
     79       return false;
     80 
     81    snprintf(uuid, VK_UUID_SIZE, "anv-%d", timestamp);
     82    return true;
     83 }
     84 
     85 static VkResult
     86 anv_physical_device_init(struct anv_physical_device *device,
     87                          struct anv_instance *instance,
     88                          const char *path)
     89 {
     90    VkResult result;
     91    int fd;
     92 
     93    fd = open(path, O_RDWR | O_CLOEXEC);
     94    if (fd < 0)
     95       return vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
     96 
     97    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
     98    device->instance = instance;
     99 
    100    assert(strlen(path) < ARRAY_SIZE(device->path));
    101    strncpy(device->path, path, ARRAY_SIZE(device->path));
    102 
    103    device->chipset_id = anv_gem_get_param(fd, I915_PARAM_CHIPSET_ID);
    104    if (!device->chipset_id) {
    105       result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
    106       goto fail;
    107    }
    108 
    109    device->name = gen_get_device_name(device->chipset_id);
    110    if (!gen_get_device_info(device->chipset_id, &device->info)) {
    111       result = vk_error(VK_ERROR_INCOMPATIBLE_DRIVER);
    112       goto fail;
    113    }
    114 
    115    if (device->info.is_haswell) {
    116       fprintf(stderr, "WARNING: Haswell Vulkan support is incomplete\n");
    117    } else if (device->info.gen == 7 && !device->info.is_baytrail) {
    118       fprintf(stderr, "WARNING: Ivy Bridge Vulkan support is incomplete\n");
    119    } else if (device->info.gen == 7 && device->info.is_baytrail) {
    120       fprintf(stderr, "WARNING: Bay Trail Vulkan support is incomplete\n");
    121    } else if (device->info.gen >= 8) {
    122       /* Broadwell, Cherryview, Skylake, Broxton, Kabylake is as fully
    123        * supported as anything */
    124    } else {
    125       result = vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
    126                          "Vulkan not yet supported on %s", device->name);
    127       goto fail;
    128    }
    129 
    130    device->cmd_parser_version = -1;
    131    if (device->info.gen == 7) {
    132       device->cmd_parser_version =
    133          anv_gem_get_param(fd, I915_PARAM_CMD_PARSER_VERSION);
    134       if (device->cmd_parser_version == -1) {
    135          result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
    136                             "failed to get command parser version");
    137          goto fail;
    138       }
    139    }
    140 
    141    if (anv_gem_get_aperture(fd, &device->aperture_size) == -1) {
    142       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
    143                          "failed to get aperture size: %m");
    144       goto fail;
    145    }
    146 
    147    if (!anv_gem_get_param(fd, I915_PARAM_HAS_WAIT_TIMEOUT)) {
    148       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
    149                          "kernel missing gem wait");
    150       goto fail;
    151    }
    152 
    153    if (!anv_gem_get_param(fd, I915_PARAM_HAS_EXECBUF2)) {
    154       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
    155                          "kernel missing execbuf2");
    156       goto fail;
    157    }
    158 
    159    if (!device->info.has_llc &&
    160        anv_gem_get_param(fd, I915_PARAM_MMAP_VERSION) < 1) {
    161       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
    162                          "kernel missing wc mmap");
    163       goto fail;
    164    }
    165 
    166    if (!anv_device_get_cache_uuid(device->uuid)) {
    167       result = vk_errorf(VK_ERROR_INITIALIZATION_FAILED,
    168                          "cannot generate UUID");
    169       goto fail;
    170    }
    171    bool swizzled = anv_gem_get_bit6_swizzle(fd, I915_TILING_X);
    172 
    173    /* GENs prior to 8 do not support EU/Subslice info */
    174    if (device->info.gen >= 8) {
    175       device->subslice_total = anv_gem_get_param(fd, I915_PARAM_SUBSLICE_TOTAL);
    176       device->eu_total = anv_gem_get_param(fd, I915_PARAM_EU_TOTAL);
    177 
    178       /* Without this information, we cannot get the right Braswell
    179        * brandstrings, and we have to use conservative numbers for GPGPU on
    180        * many platforms, but otherwise, things will just work.
    181        */
    182       if (device->subslice_total < 1 || device->eu_total < 1) {
    183          fprintf(stderr, "WARNING: Kernel 4.1 required to properly"
    184                          " query GPU properties.\n");
    185       }
    186    } else if (device->info.gen == 7) {
    187       device->subslice_total = 1 << (device->info.gt - 1);
    188    }
    189 
    190    if (device->info.is_cherryview &&
    191        device->subslice_total > 0 && device->eu_total > 0) {
    192       /* Logical CS threads = EUs per subslice * 7 threads per EU */
    193       uint32_t max_cs_threads = device->eu_total / device->subslice_total * 7;
    194 
    195       /* Fuse configurations may give more threads than expected, never less. */
    196       if (max_cs_threads > device->info.max_cs_threads)
    197          device->info.max_cs_threads = max_cs_threads;
    198    }
    199 
    200    brw_process_intel_debug_variable();
    201 
    202    device->compiler = brw_compiler_create(NULL, &device->info);
    203    if (device->compiler == NULL) {
    204       result = vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    205       goto fail;
    206    }
    207    device->compiler->shader_debug_log = compiler_debug_log;
    208    device->compiler->shader_perf_log = compiler_perf_log;
    209 
    210    result = anv_init_wsi(device);
    211    if (result != VK_SUCCESS) {
    212       ralloc_free(device->compiler);
    213       goto fail;
    214    }
    215 
    216    isl_device_init(&device->isl_dev, &device->info, swizzled);
    217 
    218    close(fd);
    219    return VK_SUCCESS;
    220 
    221 fail:
    222    close(fd);
    223    return result;
    224 }
    225 
    226 static void
    227 anv_physical_device_finish(struct anv_physical_device *device)
    228 {
    229    anv_finish_wsi(device);
    230    ralloc_free(device->compiler);
    231 }
    232 
    233 static const VkExtensionProperties global_extensions[] = {
    234    {
    235       .extensionName = VK_KHR_SURFACE_EXTENSION_NAME,
    236       .specVersion = 25,
    237    },
    238 #ifdef VK_USE_PLATFORM_XCB_KHR
    239    {
    240       .extensionName = VK_KHR_XCB_SURFACE_EXTENSION_NAME,
    241       .specVersion = 6,
    242    },
    243 #endif
    244 #ifdef VK_USE_PLATFORM_XLIB_KHR
    245    {
    246       .extensionName = VK_KHR_XLIB_SURFACE_EXTENSION_NAME,
    247       .specVersion = 6,
    248    },
    249 #endif
    250 #ifdef VK_USE_PLATFORM_WAYLAND_KHR
    251    {
    252       .extensionName = VK_KHR_WAYLAND_SURFACE_EXTENSION_NAME,
    253       .specVersion = 5,
    254    },
    255 #endif
    256 };
    257 
    258 static const VkExtensionProperties device_extensions[] = {
    259    {
    260       .extensionName = VK_KHR_SWAPCHAIN_EXTENSION_NAME,
    261       .specVersion = 68,
    262    },
    263    {
    264       .extensionName = VK_KHR_SAMPLER_MIRROR_CLAMP_TO_EDGE_EXTENSION_NAME,
    265       .specVersion = 1,
    266    }
    267 };
    268 
    269 static void *
    270 default_alloc_func(void *pUserData, size_t size, size_t align,
    271                    VkSystemAllocationScope allocationScope)
    272 {
    273    return malloc(size);
    274 }
    275 
    276 static void *
    277 default_realloc_func(void *pUserData, void *pOriginal, size_t size,
    278                      size_t align, VkSystemAllocationScope allocationScope)
    279 {
    280    return realloc(pOriginal, size);
    281 }
    282 
    283 static void
    284 default_free_func(void *pUserData, void *pMemory)
    285 {
    286    free(pMemory);
    287 }
    288 
    289 static const VkAllocationCallbacks default_alloc = {
    290    .pUserData = NULL,
    291    .pfnAllocation = default_alloc_func,
    292    .pfnReallocation = default_realloc_func,
    293    .pfnFree = default_free_func,
    294 };
    295 
    296 VkResult anv_CreateInstance(
    297     const VkInstanceCreateInfo*                 pCreateInfo,
    298     const VkAllocationCallbacks*                pAllocator,
    299     VkInstance*                                 pInstance)
    300 {
    301    struct anv_instance *instance;
    302 
    303    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_INSTANCE_CREATE_INFO);
    304 
    305    uint32_t client_version;
    306    if (pCreateInfo->pApplicationInfo &&
    307        pCreateInfo->pApplicationInfo->apiVersion != 0) {
    308       client_version = pCreateInfo->pApplicationInfo->apiVersion;
    309    } else {
    310       client_version = VK_MAKE_VERSION(1, 0, 0);
    311    }
    312 
    313    if (VK_MAKE_VERSION(1, 0, 0) > client_version ||
    314        client_version > VK_MAKE_VERSION(1, 0, 0xfff)) {
    315       return vk_errorf(VK_ERROR_INCOMPATIBLE_DRIVER,
    316                        "Client requested version %d.%d.%d",
    317                        VK_VERSION_MAJOR(client_version),
    318                        VK_VERSION_MINOR(client_version),
    319                        VK_VERSION_PATCH(client_version));
    320    }
    321 
    322    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
    323       bool found = false;
    324       for (uint32_t j = 0; j < ARRAY_SIZE(global_extensions); j++) {
    325          if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
    326                     global_extensions[j].extensionName) == 0) {
    327             found = true;
    328             break;
    329          }
    330       }
    331       if (!found)
    332          return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
    333    }
    334 
    335    instance = vk_alloc2(&default_alloc, pAllocator, sizeof(*instance), 8,
    336                          VK_SYSTEM_ALLOCATION_SCOPE_INSTANCE);
    337    if (!instance)
    338       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    339 
    340    instance->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
    341 
    342    if (pAllocator)
    343       instance->alloc = *pAllocator;
    344    else
    345       instance->alloc = default_alloc;
    346 
    347    instance->apiVersion = client_version;
    348    instance->physicalDeviceCount = -1;
    349 
    350    _mesa_locale_init();
    351 
    352    VG(VALGRIND_CREATE_MEMPOOL(instance, 0, false));
    353 
    354    *pInstance = anv_instance_to_handle(instance);
    355 
    356    return VK_SUCCESS;
    357 }
    358 
    359 void anv_DestroyInstance(
    360     VkInstance                                  _instance,
    361     const VkAllocationCallbacks*                pAllocator)
    362 {
    363    ANV_FROM_HANDLE(anv_instance, instance, _instance);
    364 
    365    if (!instance)
    366       return;
    367 
    368    if (instance->physicalDeviceCount > 0) {
    369       /* We support at most one physical device. */
    370       assert(instance->physicalDeviceCount == 1);
    371       anv_physical_device_finish(&instance->physicalDevice);
    372    }
    373 
    374    VG(VALGRIND_DESTROY_MEMPOOL(instance));
    375 
    376    _mesa_locale_fini();
    377 
    378    vk_free(&instance->alloc, instance);
    379 }
    380 
    381 VkResult anv_EnumeratePhysicalDevices(
    382     VkInstance                                  _instance,
    383     uint32_t*                                   pPhysicalDeviceCount,
    384     VkPhysicalDevice*                           pPhysicalDevices)
    385 {
    386    ANV_FROM_HANDLE(anv_instance, instance, _instance);
    387    VkResult result;
    388 
    389    if (instance->physicalDeviceCount < 0) {
    390       char path[20];
    391       for (unsigned i = 0; i < 8; i++) {
    392          snprintf(path, sizeof(path), "/dev/dri/renderD%d", 128 + i);
    393          result = anv_physical_device_init(&instance->physicalDevice,
    394                                            instance, path);
    395          if (result != VK_ERROR_INCOMPATIBLE_DRIVER)
    396             break;
    397       }
    398 
    399       if (result == VK_ERROR_INCOMPATIBLE_DRIVER) {
    400          instance->physicalDeviceCount = 0;
    401       } else if (result == VK_SUCCESS) {
    402          instance->physicalDeviceCount = 1;
    403       } else {
    404          return result;
    405       }
    406    }
    407 
    408    /* pPhysicalDeviceCount is an out parameter if pPhysicalDevices is NULL;
    409     * otherwise it's an inout parameter.
    410     *
    411     * The Vulkan spec (git aaed022) says:
    412     *
    413     *    pPhysicalDeviceCount is a pointer to an unsigned integer variable
    414     *    that is initialized with the number of devices the application is
    415     *    prepared to receive handles to. pname:pPhysicalDevices is pointer to
    416     *    an array of at least this many VkPhysicalDevice handles [...].
    417     *
    418     *    Upon success, if pPhysicalDevices is NULL, vkEnumeratePhysicalDevices
    419     *    overwrites the contents of the variable pointed to by
    420     *    pPhysicalDeviceCount with the number of physical devices in in the
    421     *    instance; otherwise, vkEnumeratePhysicalDevices overwrites
    422     *    pPhysicalDeviceCount with the number of physical handles written to
    423     *    pPhysicalDevices.
    424     */
    425    if (!pPhysicalDevices) {
    426       *pPhysicalDeviceCount = instance->physicalDeviceCount;
    427    } else if (*pPhysicalDeviceCount >= 1) {
    428       pPhysicalDevices[0] = anv_physical_device_to_handle(&instance->physicalDevice);
    429       *pPhysicalDeviceCount = 1;
    430    } else if (*pPhysicalDeviceCount < instance->physicalDeviceCount) {
    431       return VK_INCOMPLETE;
    432    } else {
    433       *pPhysicalDeviceCount = 0;
    434    }
    435 
    436    return VK_SUCCESS;
    437 }
    438 
    439 void anv_GetPhysicalDeviceFeatures(
    440     VkPhysicalDevice                            physicalDevice,
    441     VkPhysicalDeviceFeatures*                   pFeatures)
    442 {
    443    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
    444 
    445    *pFeatures = (VkPhysicalDeviceFeatures) {
    446       .robustBufferAccess                       = true,
    447       .fullDrawIndexUint32                      = true,
    448       .imageCubeArray                           = true,
    449       .independentBlend                         = true,
    450       .geometryShader                           = true,
    451       .tessellationShader                       = true,
    452       .sampleRateShading                        = true,
    453       .dualSrcBlend                             = true,
    454       .logicOp                                  = true,
    455       .multiDrawIndirect                        = false,
    456       .drawIndirectFirstInstance                = true,
    457       .depthClamp                               = true,
    458       .depthBiasClamp                           = true,
    459       .fillModeNonSolid                         = true,
    460       .depthBounds                              = false,
    461       .wideLines                                = true,
    462       .largePoints                              = true,
    463       .alphaToOne                               = true,
    464       .multiViewport                            = true,
    465       .samplerAnisotropy                        = true,
    466       .textureCompressionETC2                   = pdevice->info.gen >= 8 ||
    467                                                   pdevice->info.is_baytrail,
    468       .textureCompressionASTC_LDR               = pdevice->info.gen >= 9, /* FINISHME CHV */
    469       .textureCompressionBC                     = true,
    470       .occlusionQueryPrecise                    = true,
    471       .pipelineStatisticsQuery                  = false,
    472       .fragmentStoresAndAtomics                 = true,
    473       .shaderTessellationAndGeometryPointSize   = true,
    474       .shaderImageGatherExtended                = true,
    475       .shaderStorageImageExtendedFormats        = true,
    476       .shaderStorageImageMultisample            = false,
    477       .shaderStorageImageReadWithoutFormat      = false,
    478       .shaderStorageImageWriteWithoutFormat     = false,
    479       .shaderUniformBufferArrayDynamicIndexing  = true,
    480       .shaderSampledImageArrayDynamicIndexing   = true,
    481       .shaderStorageBufferArrayDynamicIndexing  = true,
    482       .shaderStorageImageArrayDynamicIndexing   = true,
    483       .shaderClipDistance                       = true,
    484       .shaderCullDistance                       = true,
    485       .shaderFloat64                            = pdevice->info.gen >= 8,
    486       .shaderInt64                              = false,
    487       .shaderInt16                              = false,
    488       .shaderResourceMinLod                     = false,
    489       .variableMultisampleRate                  = false,
    490       .inheritedQueries                         = false,
    491    };
    492 
    493    /* We can't do image stores in vec4 shaders */
    494    pFeatures->vertexPipelineStoresAndAtomics =
    495       pdevice->compiler->scalar_stage[MESA_SHADER_VERTEX] &&
    496       pdevice->compiler->scalar_stage[MESA_SHADER_GEOMETRY];
    497 }
    498 
    499 void anv_GetPhysicalDeviceProperties(
    500     VkPhysicalDevice                            physicalDevice,
    501     VkPhysicalDeviceProperties*                 pProperties)
    502 {
    503    ANV_FROM_HANDLE(anv_physical_device, pdevice, physicalDevice);
    504    const struct gen_device_info *devinfo = &pdevice->info;
    505 
    506    const float time_stamp_base = devinfo->gen >= 9 ? 83.333 : 80.0;
    507 
    508    /* See assertions made when programming the buffer surface state. */
    509    const uint32_t max_raw_buffer_sz = devinfo->gen >= 7 ?
    510                                       (1ul << 30) : (1ul << 27);
    511 
    512    VkSampleCountFlags sample_counts =
    513       isl_device_get_sample_counts(&pdevice->isl_dev);
    514 
    515    VkPhysicalDeviceLimits limits = {
    516       .maxImageDimension1D                      = (1 << 14),
    517       .maxImageDimension2D                      = (1 << 14),
    518       .maxImageDimension3D                      = (1 << 11),
    519       .maxImageDimensionCube                    = (1 << 14),
    520       .maxImageArrayLayers                      = (1 << 11),
    521       .maxTexelBufferElements                   = 128 * 1024 * 1024,
    522       .maxUniformBufferRange                    = (1ul << 27),
    523       .maxStorageBufferRange                    = max_raw_buffer_sz,
    524       .maxPushConstantsSize                     = MAX_PUSH_CONSTANTS_SIZE,
    525       .maxMemoryAllocationCount                 = UINT32_MAX,
    526       .maxSamplerAllocationCount                = 64 * 1024,
    527       .bufferImageGranularity                   = 64, /* A cache line */
    528       .sparseAddressSpaceSize                   = 0,
    529       .maxBoundDescriptorSets                   = MAX_SETS,
    530       .maxPerStageDescriptorSamplers            = 64,
    531       .maxPerStageDescriptorUniformBuffers      = 64,
    532       .maxPerStageDescriptorStorageBuffers      = 64,
    533       .maxPerStageDescriptorSampledImages       = 64,
    534       .maxPerStageDescriptorStorageImages       = 64,
    535       .maxPerStageDescriptorInputAttachments    = 64,
    536       .maxPerStageResources                     = 128,
    537       .maxDescriptorSetSamplers                 = 256,
    538       .maxDescriptorSetUniformBuffers           = 256,
    539       .maxDescriptorSetUniformBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
    540       .maxDescriptorSetStorageBuffers           = 256,
    541       .maxDescriptorSetStorageBuffersDynamic    = MAX_DYNAMIC_BUFFERS / 2,
    542       .maxDescriptorSetSampledImages            = 256,
    543       .maxDescriptorSetStorageImages            = 256,
    544       .maxDescriptorSetInputAttachments         = 256,
    545       .maxVertexInputAttributes                 = 32,
    546       .maxVertexInputBindings                   = 32,
    547       .maxVertexInputAttributeOffset            = 2047,
    548       .maxVertexInputBindingStride              = 2048,
    549       .maxVertexOutputComponents                = 128,
    550       .maxTessellationGenerationLevel           = 64,
    551       .maxTessellationPatchSize                 = 32,
    552       .maxTessellationControlPerVertexInputComponents = 128,
    553       .maxTessellationControlPerVertexOutputComponents = 128,
    554       .maxTessellationControlPerPatchOutputComponents = 128,
    555       .maxTessellationControlTotalOutputComponents = 2048,
    556       .maxTessellationEvaluationInputComponents = 128,
    557       .maxTessellationEvaluationOutputComponents = 128,
    558       .maxGeometryShaderInvocations             = 32,
    559       .maxGeometryInputComponents               = 64,
    560       .maxGeometryOutputComponents              = 128,
    561       .maxGeometryOutputVertices                = 256,
    562       .maxGeometryTotalOutputComponents         = 1024,
    563       .maxFragmentInputComponents               = 128,
    564       .maxFragmentOutputAttachments             = 8,
    565       .maxFragmentDualSrcAttachments            = 1,
    566       .maxFragmentCombinedOutputResources       = 8,
    567       .maxComputeSharedMemorySize               = 32768,
    568       .maxComputeWorkGroupCount                 = { 65535, 65535, 65535 },
    569       .maxComputeWorkGroupInvocations           = 16 * devinfo->max_cs_threads,
    570       .maxComputeWorkGroupSize = {
    571          16 * devinfo->max_cs_threads,
    572          16 * devinfo->max_cs_threads,
    573          16 * devinfo->max_cs_threads,
    574       },
    575       .subPixelPrecisionBits                    = 4 /* FIXME */,
    576       .subTexelPrecisionBits                    = 4 /* FIXME */,
    577       .mipmapPrecisionBits                      = 4 /* FIXME */,
    578       .maxDrawIndexedIndexValue                 = UINT32_MAX,
    579       .maxDrawIndirectCount                     = UINT32_MAX,
    580       .maxSamplerLodBias                        = 16,
    581       .maxSamplerAnisotropy                     = 16,
    582       .maxViewports                             = MAX_VIEWPORTS,
    583       .maxViewportDimensions                    = { (1 << 14), (1 << 14) },
    584       .viewportBoundsRange                      = { INT16_MIN, INT16_MAX },
    585       .viewportSubPixelBits                     = 13, /* We take a float? */
    586       .minMemoryMapAlignment                    = 4096, /* A page */
    587       .minTexelBufferOffsetAlignment            = 1,
    588       .minUniformBufferOffsetAlignment          = 16,
    589       .minStorageBufferOffsetAlignment          = 4,
    590       .minTexelOffset                           = -8,
    591       .maxTexelOffset                           = 7,
    592       .minTexelGatherOffset                     = -32,
    593       .maxTexelGatherOffset                     = 31,
    594       .minInterpolationOffset                   = -0.5,
    595       .maxInterpolationOffset                   = 0.4375,
    596       .subPixelInterpolationOffsetBits          = 4,
    597       .maxFramebufferWidth                      = (1 << 14),
    598       .maxFramebufferHeight                     = (1 << 14),
    599       .maxFramebufferLayers                     = (1 << 11),
    600       .framebufferColorSampleCounts             = sample_counts,
    601       .framebufferDepthSampleCounts             = sample_counts,
    602       .framebufferStencilSampleCounts           = sample_counts,
    603       .framebufferNoAttachmentsSampleCounts     = sample_counts,
    604       .maxColorAttachments                      = MAX_RTS,
    605       .sampledImageColorSampleCounts            = sample_counts,
    606       .sampledImageIntegerSampleCounts          = VK_SAMPLE_COUNT_1_BIT,
    607       .sampledImageDepthSampleCounts            = sample_counts,
    608       .sampledImageStencilSampleCounts          = sample_counts,
    609       .storageImageSampleCounts                 = VK_SAMPLE_COUNT_1_BIT,
    610       .maxSampleMaskWords                       = 1,
    611       .timestampComputeAndGraphics              = false,
    612       .timestampPeriod                          = time_stamp_base,
    613       .maxClipDistances                         = 8,
    614       .maxCullDistances                         = 8,
    615       .maxCombinedClipAndCullDistances          = 8,
    616       .discreteQueuePriorities                  = 1,
    617       .pointSizeRange                           = { 0.125, 255.875 },
    618       .lineWidthRange                           = { 0.0, 7.9921875 },
    619       .pointSizeGranularity                     = (1.0 / 8.0),
    620       .lineWidthGranularity                     = (1.0 / 128.0),
    621       .strictLines                              = false, /* FINISHME */
    622       .standardSampleLocations                  = true,
    623       .optimalBufferCopyOffsetAlignment         = 128,
    624       .optimalBufferCopyRowPitchAlignment       = 128,
    625       .nonCoherentAtomSize                      = 64,
    626    };
    627 
    628    *pProperties = (VkPhysicalDeviceProperties) {
    629       .apiVersion = VK_MAKE_VERSION(1, 0, 5),
    630       .driverVersion = 1,
    631       .vendorID = 0x8086,
    632       .deviceID = pdevice->chipset_id,
    633       .deviceType = VK_PHYSICAL_DEVICE_TYPE_INTEGRATED_GPU,
    634       .limits = limits,
    635       .sparseProperties = {0}, /* Broadwell doesn't do sparse. */
    636    };
    637 
    638    strcpy(pProperties->deviceName, pdevice->name);
    639    memcpy(pProperties->pipelineCacheUUID, pdevice->uuid, VK_UUID_SIZE);
    640 }
    641 
    642 void anv_GetPhysicalDeviceQueueFamilyProperties(
    643     VkPhysicalDevice                            physicalDevice,
    644     uint32_t*                                   pCount,
    645     VkQueueFamilyProperties*                    pQueueFamilyProperties)
    646 {
    647    if (pQueueFamilyProperties == NULL) {
    648       *pCount = 1;
    649       return;
    650    }
    651 
    652    /* The spec implicitly allows the incoming count to be 0. From the Vulkan
    653     * 1.0.38 spec, Section 4.1 Physical Devices:
    654     *
    655     *     If the value referenced by pQueueFamilyPropertyCount is not 0 [then
    656     *     do stuff].
    657     */
    658    if (*pCount == 0)
    659       return;
    660 
    661    *pQueueFamilyProperties = (VkQueueFamilyProperties) {
    662       .queueFlags = VK_QUEUE_GRAPHICS_BIT |
    663                     VK_QUEUE_COMPUTE_BIT |
    664                     VK_QUEUE_TRANSFER_BIT,
    665       .queueCount = 1,
    666       .timestampValidBits = 36, /* XXX: Real value here */
    667       .minImageTransferGranularity = (VkExtent3D) { 1, 1, 1 },
    668    };
    669 
    670    *pCount = 1;
    671 }
    672 
    673 void anv_GetPhysicalDeviceMemoryProperties(
    674     VkPhysicalDevice                            physicalDevice,
    675     VkPhysicalDeviceMemoryProperties*           pMemoryProperties)
    676 {
    677    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
    678    VkDeviceSize heap_size;
    679 
    680    /* Reserve some wiggle room for the driver by exposing only 75% of the
    681     * aperture to the heap.
    682     */
    683    heap_size = 3 * physical_device->aperture_size / 4;
    684 
    685    if (physical_device->info.has_llc) {
    686       /* Big core GPUs share LLC with the CPU and thus one memory type can be
    687        * both cached and coherent at the same time.
    688        */
    689       pMemoryProperties->memoryTypeCount = 1;
    690       pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
    691          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
    692                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
    693                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT |
    694                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
    695          .heapIndex = 0,
    696       };
    697    } else {
    698       /* The spec requires that we expose a host-visible, coherent memory
    699        * type, but Atom GPUs don't share LLC. Thus we offer two memory types
    700        * to give the application a choice between cached, but not coherent and
    701        * coherent but uncached (WC though).
    702        */
    703       pMemoryProperties->memoryTypeCount = 2;
    704       pMemoryProperties->memoryTypes[0] = (VkMemoryType) {
    705          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
    706                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
    707                           VK_MEMORY_PROPERTY_HOST_COHERENT_BIT,
    708          .heapIndex = 0,
    709       };
    710       pMemoryProperties->memoryTypes[1] = (VkMemoryType) {
    711          .propertyFlags = VK_MEMORY_PROPERTY_DEVICE_LOCAL_BIT |
    712                           VK_MEMORY_PROPERTY_HOST_VISIBLE_BIT |
    713                           VK_MEMORY_PROPERTY_HOST_CACHED_BIT,
    714          .heapIndex = 0,
    715       };
    716    }
    717 
    718    pMemoryProperties->memoryHeapCount = 1;
    719    pMemoryProperties->memoryHeaps[0] = (VkMemoryHeap) {
    720       .size = heap_size,
    721       .flags = VK_MEMORY_HEAP_DEVICE_LOCAL_BIT,
    722    };
    723 }
    724 
    725 PFN_vkVoidFunction anv_GetInstanceProcAddr(
    726     VkInstance                                  instance,
    727     const char*                                 pName)
    728 {
    729    return anv_lookup_entrypoint(NULL, pName);
    730 }
    731 
    732 /* With version 1+ of the loader interface the ICD should expose
    733  * vk_icdGetInstanceProcAddr to work around certain LD_PRELOAD issues seen in apps.
    734  */
    735 PUBLIC
    736 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
    737     VkInstance                                  instance,
    738     const char*                                 pName);
    739 
    740 PUBLIC
    741 VKAPI_ATTR PFN_vkVoidFunction VKAPI_CALL vk_icdGetInstanceProcAddr(
    742     VkInstance                                  instance,
    743     const char*                                 pName)
    744 {
    745    return anv_GetInstanceProcAddr(instance, pName);
    746 }
    747 
    748 PFN_vkVoidFunction anv_GetDeviceProcAddr(
    749     VkDevice                                    _device,
    750     const char*                                 pName)
    751 {
    752    ANV_FROM_HANDLE(anv_device, device, _device);
    753    return anv_lookup_entrypoint(&device->info, pName);
    754 }
    755 
    756 static void
    757 anv_queue_init(struct anv_device *device, struct anv_queue *queue)
    758 {
    759    queue->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
    760    queue->device = device;
    761    queue->pool = &device->surface_state_pool;
    762 }
    763 
    764 static void
    765 anv_queue_finish(struct anv_queue *queue)
    766 {
    767 }
    768 
    769 static struct anv_state
    770 anv_state_pool_emit_data(struct anv_state_pool *pool, size_t size, size_t align, const void *p)
    771 {
    772    struct anv_state state;
    773 
    774    state = anv_state_pool_alloc(pool, size, align);
    775    memcpy(state.map, p, size);
    776 
    777    if (!pool->block_pool->device->info.has_llc)
    778       anv_state_clflush(state);
    779 
    780    return state;
    781 }
    782 
    783 struct gen8_border_color {
    784    union {
    785       float float32[4];
    786       uint32_t uint32[4];
    787    };
    788    /* Pad out to 64 bytes */
    789    uint32_t _pad[12];
    790 };
    791 
    792 static void
    793 anv_device_init_border_colors(struct anv_device *device)
    794 {
    795    static const struct gen8_border_color border_colors[] = {
    796       [VK_BORDER_COLOR_FLOAT_TRANSPARENT_BLACK] =  { .float32 = { 0.0, 0.0, 0.0, 0.0 } },
    797       [VK_BORDER_COLOR_FLOAT_OPAQUE_BLACK] =       { .float32 = { 0.0, 0.0, 0.0, 1.0 } },
    798       [VK_BORDER_COLOR_FLOAT_OPAQUE_WHITE] =       { .float32 = { 1.0, 1.0, 1.0, 1.0 } },
    799       [VK_BORDER_COLOR_INT_TRANSPARENT_BLACK] =    { .uint32 = { 0, 0, 0, 0 } },
    800       [VK_BORDER_COLOR_INT_OPAQUE_BLACK] =         { .uint32 = { 0, 0, 0, 1 } },
    801       [VK_BORDER_COLOR_INT_OPAQUE_WHITE] =         { .uint32 = { 1, 1, 1, 1 } },
    802    };
    803 
    804    device->border_colors = anv_state_pool_emit_data(&device->dynamic_state_pool,
    805                                                     sizeof(border_colors), 64,
    806                                                     border_colors);
    807 }
    808 
    809 VkResult
    810 anv_device_submit_simple_batch(struct anv_device *device,
    811                                struct anv_batch *batch)
    812 {
    813    struct drm_i915_gem_execbuffer2 execbuf;
    814    struct drm_i915_gem_exec_object2 exec2_objects[1];
    815    struct anv_bo bo, *exec_bos[1];
    816    VkResult result = VK_SUCCESS;
    817    uint32_t size;
    818    int64_t timeout;
    819    int ret;
    820 
    821    /* Kernel driver requires 8 byte aligned batch length */
    822    size = align_u32(batch->next - batch->start, 8);
    823    result = anv_bo_pool_alloc(&device->batch_bo_pool, &bo, size);
    824    if (result != VK_SUCCESS)
    825       return result;
    826 
    827    memcpy(bo.map, batch->start, size);
    828    if (!device->info.has_llc)
    829       anv_clflush_range(bo.map, size);
    830 
    831    exec_bos[0] = &bo;
    832    exec2_objects[0].handle = bo.gem_handle;
    833    exec2_objects[0].relocation_count = 0;
    834    exec2_objects[0].relocs_ptr = 0;
    835    exec2_objects[0].alignment = 0;
    836    exec2_objects[0].offset = bo.offset;
    837    exec2_objects[0].flags = 0;
    838    exec2_objects[0].rsvd1 = 0;
    839    exec2_objects[0].rsvd2 = 0;
    840 
    841    execbuf.buffers_ptr = (uintptr_t) exec2_objects;
    842    execbuf.buffer_count = 1;
    843    execbuf.batch_start_offset = 0;
    844    execbuf.batch_len = size;
    845    execbuf.cliprects_ptr = 0;
    846    execbuf.num_cliprects = 0;
    847    execbuf.DR1 = 0;
    848    execbuf.DR4 = 0;
    849 
    850    execbuf.flags =
    851       I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
    852    execbuf.rsvd1 = device->context_id;
    853    execbuf.rsvd2 = 0;
    854 
    855    result = anv_device_execbuf(device, &execbuf, exec_bos);
    856    if (result != VK_SUCCESS)
    857       goto fail;
    858 
    859    timeout = INT64_MAX;
    860    ret = anv_gem_wait(device, bo.gem_handle, &timeout);
    861    if (ret != 0) {
    862       /* We don't know the real error. */
    863       result = vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
    864       goto fail;
    865    }
    866 
    867  fail:
    868    anv_bo_pool_free(&device->batch_bo_pool, &bo);
    869 
    870    return result;
    871 }
    872 
    873 VkResult anv_CreateDevice(
    874     VkPhysicalDevice                            physicalDevice,
    875     const VkDeviceCreateInfo*                   pCreateInfo,
    876     const VkAllocationCallbacks*                pAllocator,
    877     VkDevice*                                   pDevice)
    878 {
    879    ANV_FROM_HANDLE(anv_physical_device, physical_device, physicalDevice);
    880    VkResult result;
    881    struct anv_device *device;
    882 
    883    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_DEVICE_CREATE_INFO);
    884 
    885    for (uint32_t i = 0; i < pCreateInfo->enabledExtensionCount; i++) {
    886       bool found = false;
    887       for (uint32_t j = 0; j < ARRAY_SIZE(device_extensions); j++) {
    888          if (strcmp(pCreateInfo->ppEnabledExtensionNames[i],
    889                     device_extensions[j].extensionName) == 0) {
    890             found = true;
    891             break;
    892          }
    893       }
    894       if (!found)
    895          return vk_error(VK_ERROR_EXTENSION_NOT_PRESENT);
    896    }
    897 
    898    device = vk_alloc2(&physical_device->instance->alloc, pAllocator,
    899                        sizeof(*device), 8,
    900                        VK_SYSTEM_ALLOCATION_SCOPE_DEVICE);
    901    if (!device)
    902       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    903 
    904    device->_loader_data.loaderMagic = ICD_LOADER_MAGIC;
    905    device->instance = physical_device->instance;
    906    device->chipset_id = physical_device->chipset_id;
    907 
    908    if (pAllocator)
    909       device->alloc = *pAllocator;
    910    else
    911       device->alloc = physical_device->instance->alloc;
    912 
    913    /* XXX(chadv): Can we dup() physicalDevice->fd here? */
    914    device->fd = open(physical_device->path, O_RDWR | O_CLOEXEC);
    915    if (device->fd == -1) {
    916       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
    917       goto fail_device;
    918    }
    919 
    920    device->context_id = anv_gem_create_context(device);
    921    if (device->context_id == -1) {
    922       result = vk_error(VK_ERROR_INITIALIZATION_FAILED);
    923       goto fail_fd;
    924    }
    925 
    926    device->info = physical_device->info;
    927    device->isl_dev = physical_device->isl_dev;
    928 
    929    /* On Broadwell and later, we can use batch chaining to more efficiently
    930     * implement growing command buffers.  Prior to Haswell, the kernel
    931     * command parser gets in the way and we have to fall back to growing
    932     * the batch.
    933     */
    934    device->can_chain_batches = device->info.gen >= 8;
    935 
    936    device->robust_buffer_access = pCreateInfo->pEnabledFeatures &&
    937       pCreateInfo->pEnabledFeatures->robustBufferAccess;
    938 
    939    pthread_mutex_init(&device->mutex, NULL);
    940 
    941    pthread_condattr_t condattr;
    942    pthread_condattr_init(&condattr);
    943    pthread_condattr_setclock(&condattr, CLOCK_MONOTONIC);
    944    pthread_cond_init(&device->queue_submit, NULL);
    945    pthread_condattr_destroy(&condattr);
    946 
    947    anv_bo_pool_init(&device->batch_bo_pool, device);
    948 
    949    anv_block_pool_init(&device->dynamic_state_block_pool, device, 16384);
    950 
    951    anv_state_pool_init(&device->dynamic_state_pool,
    952                        &device->dynamic_state_block_pool);
    953 
    954    anv_block_pool_init(&device->instruction_block_pool, device, 1024 * 1024);
    955    anv_state_pool_init(&device->instruction_state_pool,
    956                        &device->instruction_block_pool);
    957 
    958    anv_block_pool_init(&device->surface_state_block_pool, device, 4096);
    959 
    960    anv_state_pool_init(&device->surface_state_pool,
    961                        &device->surface_state_block_pool);
    962 
    963    anv_bo_init_new(&device->workaround_bo, device, 1024);
    964 
    965    anv_scratch_pool_init(device, &device->scratch_pool);
    966 
    967    anv_queue_init(device, &device->queue);
    968 
    969    switch (device->info.gen) {
    970    case 7:
    971       if (!device->info.is_haswell)
    972          result = gen7_init_device_state(device);
    973       else
    974          result = gen75_init_device_state(device);
    975       break;
    976    case 8:
    977       result = gen8_init_device_state(device);
    978       break;
    979    case 9:
    980       result = gen9_init_device_state(device);
    981       break;
    982    default:
    983       /* Shouldn't get here as we don't create physical devices for any other
    984        * gens. */
    985       unreachable("unhandled gen");
    986    }
    987    if (result != VK_SUCCESS)
    988       goto fail_fd;
    989 
    990    anv_device_init_blorp(device);
    991 
    992    anv_device_init_border_colors(device);
    993 
    994    *pDevice = anv_device_to_handle(device);
    995 
    996    return VK_SUCCESS;
    997 
    998  fail_fd:
    999    close(device->fd);
   1000  fail_device:
   1001    vk_free(&device->alloc, device);
   1002 
   1003    return result;
   1004 }
   1005 
   1006 void anv_DestroyDevice(
   1007     VkDevice                                    _device,
   1008     const VkAllocationCallbacks*                pAllocator)
   1009 {
   1010    ANV_FROM_HANDLE(anv_device, device, _device);
   1011 
   1012    if (!device)
   1013       return;
   1014 
   1015    anv_device_finish_blorp(device);
   1016 
   1017    anv_queue_finish(&device->queue);
   1018 
   1019 #ifdef HAVE_VALGRIND
   1020    /* We only need to free these to prevent valgrind errors.  The backing
   1021     * BO will go away in a couple of lines so we don't actually leak.
   1022     */
   1023    anv_state_pool_free(&device->dynamic_state_pool, device->border_colors);
   1024 #endif
   1025 
   1026    anv_scratch_pool_finish(device, &device->scratch_pool);
   1027 
   1028    anv_gem_munmap(device->workaround_bo.map, device->workaround_bo.size);
   1029    anv_gem_close(device, device->workaround_bo.gem_handle);
   1030 
   1031    anv_state_pool_finish(&device->surface_state_pool);
   1032    anv_block_pool_finish(&device->surface_state_block_pool);
   1033    anv_state_pool_finish(&device->instruction_state_pool);
   1034    anv_block_pool_finish(&device->instruction_block_pool);
   1035    anv_state_pool_finish(&device->dynamic_state_pool);
   1036    anv_block_pool_finish(&device->dynamic_state_block_pool);
   1037 
   1038    anv_bo_pool_finish(&device->batch_bo_pool);
   1039 
   1040    pthread_cond_destroy(&device->queue_submit);
   1041    pthread_mutex_destroy(&device->mutex);
   1042 
   1043    anv_gem_destroy_context(device, device->context_id);
   1044 
   1045    close(device->fd);
   1046 
   1047    vk_free(&device->alloc, device);
   1048 }
   1049 
   1050 VkResult anv_EnumerateInstanceExtensionProperties(
   1051     const char*                                 pLayerName,
   1052     uint32_t*                                   pPropertyCount,
   1053     VkExtensionProperties*                      pProperties)
   1054 {
   1055    if (pProperties == NULL) {
   1056       *pPropertyCount = ARRAY_SIZE(global_extensions);
   1057       return VK_SUCCESS;
   1058    }
   1059 
   1060    *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(global_extensions));
   1061    typed_memcpy(pProperties, global_extensions, *pPropertyCount);
   1062 
   1063    if (*pPropertyCount < ARRAY_SIZE(global_extensions))
   1064       return VK_INCOMPLETE;
   1065 
   1066    return VK_SUCCESS;
   1067 }
   1068 
   1069 VkResult anv_EnumerateDeviceExtensionProperties(
   1070     VkPhysicalDevice                            physicalDevice,
   1071     const char*                                 pLayerName,
   1072     uint32_t*                                   pPropertyCount,
   1073     VkExtensionProperties*                      pProperties)
   1074 {
   1075    if (pProperties == NULL) {
   1076       *pPropertyCount = ARRAY_SIZE(device_extensions);
   1077       return VK_SUCCESS;
   1078    }
   1079 
   1080    *pPropertyCount = MIN2(*pPropertyCount, ARRAY_SIZE(device_extensions));
   1081    typed_memcpy(pProperties, device_extensions, *pPropertyCount);
   1082 
   1083    if (*pPropertyCount < ARRAY_SIZE(device_extensions))
   1084       return VK_INCOMPLETE;
   1085 
   1086    return VK_SUCCESS;
   1087 }
   1088 
   1089 VkResult anv_EnumerateInstanceLayerProperties(
   1090     uint32_t*                                   pPropertyCount,
   1091     VkLayerProperties*                          pProperties)
   1092 {
   1093    if (pProperties == NULL) {
   1094       *pPropertyCount = 0;
   1095       return VK_SUCCESS;
   1096    }
   1097 
   1098    /* None supported at this time */
   1099    return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
   1100 }
   1101 
   1102 VkResult anv_EnumerateDeviceLayerProperties(
   1103     VkPhysicalDevice                            physicalDevice,
   1104     uint32_t*                                   pPropertyCount,
   1105     VkLayerProperties*                          pProperties)
   1106 {
   1107    if (pProperties == NULL) {
   1108       *pPropertyCount = 0;
   1109       return VK_SUCCESS;
   1110    }
   1111 
   1112    /* None supported at this time */
   1113    return vk_error(VK_ERROR_LAYER_NOT_PRESENT);
   1114 }
   1115 
   1116 void anv_GetDeviceQueue(
   1117     VkDevice                                    _device,
   1118     uint32_t                                    queueNodeIndex,
   1119     uint32_t                                    queueIndex,
   1120     VkQueue*                                    pQueue)
   1121 {
   1122    ANV_FROM_HANDLE(anv_device, device, _device);
   1123 
   1124    assert(queueIndex == 0);
   1125 
   1126    *pQueue = anv_queue_to_handle(&device->queue);
   1127 }
   1128 
   1129 VkResult
   1130 anv_device_execbuf(struct anv_device *device,
   1131                    struct drm_i915_gem_execbuffer2 *execbuf,
   1132                    struct anv_bo **execbuf_bos)
   1133 {
   1134    int ret = anv_gem_execbuffer(device, execbuf);
   1135    if (ret != 0) {
   1136       /* We don't know the real error. */
   1137       return vk_errorf(VK_ERROR_DEVICE_LOST, "execbuf2 failed: %m");
   1138    }
   1139 
   1140    struct drm_i915_gem_exec_object2 *objects =
   1141       (void *)(uintptr_t)execbuf->buffers_ptr;
   1142    for (uint32_t k = 0; k < execbuf->buffer_count; k++)
   1143       execbuf_bos[k]->offset = objects[k].offset;
   1144 
   1145    return VK_SUCCESS;
   1146 }
   1147 
   1148 VkResult anv_QueueSubmit(
   1149     VkQueue                                     _queue,
   1150     uint32_t                                    submitCount,
   1151     const VkSubmitInfo*                         pSubmits,
   1152     VkFence                                     _fence)
   1153 {
   1154    ANV_FROM_HANDLE(anv_queue, queue, _queue);
   1155    ANV_FROM_HANDLE(anv_fence, fence, _fence);
   1156    struct anv_device *device = queue->device;
   1157    VkResult result = VK_SUCCESS;
   1158 
   1159    /* We lock around QueueSubmit for three main reasons:
   1160     *
   1161     *  1) When a block pool is resized, we create a new gem handle with a
   1162     *     different size and, in the case of surface states, possibly a
   1163     *     different center offset but we re-use the same anv_bo struct when
   1164     *     we do so.  If this happens in the middle of setting up an execbuf,
   1165     *     we could end up with our list of BOs out of sync with our list of
   1166     *     gem handles.
   1167     *
   1168     *  2) The algorithm we use for building the list of unique buffers isn't
   1169     *     thread-safe.  While the client is supposed to syncronize around
   1170     *     QueueSubmit, this would be extremely difficult to debug if it ever
   1171     *     came up in the wild due to a broken app.  It's better to play it
   1172     *     safe and just lock around QueueSubmit.
   1173     *
   1174     *  3)  The anv_cmd_buffer_execbuf function may perform relocations in
   1175     *      userspace.  Due to the fact that the surface state buffer is shared
   1176     *      between batches, we can't afford to have that happen from multiple
   1177     *      threads at the same time.  Even though the user is supposed to
   1178     *      ensure this doesn't happen, we play it safe as in (2) above.
   1179     *
   1180     * Since the only other things that ever take the device lock such as block
   1181     * pool resize only rarely happen, this will almost never be contended so
   1182     * taking a lock isn't really an expensive operation in this case.
   1183     */
   1184    pthread_mutex_lock(&device->mutex);
   1185 
   1186    for (uint32_t i = 0; i < submitCount; i++) {
   1187       for (uint32_t j = 0; j < pSubmits[i].commandBufferCount; j++) {
   1188          ANV_FROM_HANDLE(anv_cmd_buffer, cmd_buffer,
   1189                          pSubmits[i].pCommandBuffers[j]);
   1190          assert(cmd_buffer->level == VK_COMMAND_BUFFER_LEVEL_PRIMARY);
   1191 
   1192          result = anv_cmd_buffer_execbuf(device, cmd_buffer);
   1193          if (result != VK_SUCCESS)
   1194             goto out;
   1195       }
   1196    }
   1197 
   1198    if (fence) {
   1199       struct anv_bo *fence_bo = &fence->bo;
   1200       result = anv_device_execbuf(device, &fence->execbuf, &fence_bo);
   1201       if (result != VK_SUCCESS)
   1202          goto out;
   1203 
   1204       /* Update the fence and wake up any waiters */
   1205       assert(fence->state == ANV_FENCE_STATE_RESET);
   1206       fence->state = ANV_FENCE_STATE_SUBMITTED;
   1207       pthread_cond_broadcast(&device->queue_submit);
   1208    }
   1209 
   1210 out:
   1211    pthread_mutex_unlock(&device->mutex);
   1212 
   1213    return result;
   1214 }
   1215 
   1216 VkResult anv_QueueWaitIdle(
   1217     VkQueue                                     _queue)
   1218 {
   1219    ANV_FROM_HANDLE(anv_queue, queue, _queue);
   1220 
   1221    return anv_DeviceWaitIdle(anv_device_to_handle(queue->device));
   1222 }
   1223 
   1224 VkResult anv_DeviceWaitIdle(
   1225     VkDevice                                    _device)
   1226 {
   1227    ANV_FROM_HANDLE(anv_device, device, _device);
   1228    struct anv_batch batch;
   1229 
   1230    uint32_t cmds[8];
   1231    batch.start = batch.next = cmds;
   1232    batch.end = (void *) cmds + sizeof(cmds);
   1233 
   1234    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
   1235    anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
   1236 
   1237    return anv_device_submit_simple_batch(device, &batch);
   1238 }
   1239 
   1240 VkResult
   1241 anv_bo_init_new(struct anv_bo *bo, struct anv_device *device, uint64_t size)
   1242 {
   1243    uint32_t gem_handle = anv_gem_create(device, size);
   1244    if (!gem_handle)
   1245       return vk_error(VK_ERROR_OUT_OF_DEVICE_MEMORY);
   1246 
   1247    anv_bo_init(bo, gem_handle, size);
   1248 
   1249    return VK_SUCCESS;
   1250 }
   1251 
   1252 VkResult anv_AllocateMemory(
   1253     VkDevice                                    _device,
   1254     const VkMemoryAllocateInfo*                 pAllocateInfo,
   1255     const VkAllocationCallbacks*                pAllocator,
   1256     VkDeviceMemory*                             pMem)
   1257 {
   1258    ANV_FROM_HANDLE(anv_device, device, _device);
   1259    struct anv_device_memory *mem;
   1260    VkResult result;
   1261 
   1262    assert(pAllocateInfo->sType == VK_STRUCTURE_TYPE_MEMORY_ALLOCATE_INFO);
   1263 
   1264    /* The Vulkan 1.0.33 spec says "allocationSize must be greater than 0". */
   1265    assert(pAllocateInfo->allocationSize > 0);
   1266 
   1267    /* We support exactly one memory heap. */
   1268    assert(pAllocateInfo->memoryTypeIndex == 0 ||
   1269           (!device->info.has_llc && pAllocateInfo->memoryTypeIndex < 2));
   1270 
   1271    /* FINISHME: Fail if allocation request exceeds heap size. */
   1272 
   1273    mem = vk_alloc2(&device->alloc, pAllocator, sizeof(*mem), 8,
   1274                     VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   1275    if (mem == NULL)
   1276       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   1277 
   1278    /* The kernel is going to give us whole pages anyway */
   1279    uint64_t alloc_size = align_u64(pAllocateInfo->allocationSize, 4096);
   1280 
   1281    result = anv_bo_init_new(&mem->bo, device, alloc_size);
   1282    if (result != VK_SUCCESS)
   1283       goto fail;
   1284 
   1285    mem->type_index = pAllocateInfo->memoryTypeIndex;
   1286 
   1287    mem->map = NULL;
   1288    mem->map_size = 0;
   1289 
   1290    *pMem = anv_device_memory_to_handle(mem);
   1291 
   1292    return VK_SUCCESS;
   1293 
   1294  fail:
   1295    vk_free2(&device->alloc, pAllocator, mem);
   1296 
   1297    return result;
   1298 }
   1299 
   1300 void anv_FreeMemory(
   1301     VkDevice                                    _device,
   1302     VkDeviceMemory                              _mem,
   1303     const VkAllocationCallbacks*                pAllocator)
   1304 {
   1305    ANV_FROM_HANDLE(anv_device, device, _device);
   1306    ANV_FROM_HANDLE(anv_device_memory, mem, _mem);
   1307 
   1308    if (mem == NULL)
   1309       return;
   1310 
   1311    if (mem->map)
   1312       anv_UnmapMemory(_device, _mem);
   1313 
   1314    if (mem->bo.map)
   1315       anv_gem_munmap(mem->bo.map, mem->bo.size);
   1316 
   1317    if (mem->bo.gem_handle != 0)
   1318       anv_gem_close(device, mem->bo.gem_handle);
   1319 
   1320    vk_free2(&device->alloc, pAllocator, mem);
   1321 }
   1322 
   1323 VkResult anv_MapMemory(
   1324     VkDevice                                    _device,
   1325     VkDeviceMemory                              _memory,
   1326     VkDeviceSize                                offset,
   1327     VkDeviceSize                                size,
   1328     VkMemoryMapFlags                            flags,
   1329     void**                                      ppData)
   1330 {
   1331    ANV_FROM_HANDLE(anv_device, device, _device);
   1332    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
   1333 
   1334    if (mem == NULL) {
   1335       *ppData = NULL;
   1336       return VK_SUCCESS;
   1337    }
   1338 
   1339    if (size == VK_WHOLE_SIZE)
   1340       size = mem->bo.size - offset;
   1341 
   1342    /* From the Vulkan spec version 1.0.32 docs for MapMemory:
   1343     *
   1344     *  * If size is not equal to VK_WHOLE_SIZE, size must be greater than 0
   1345     *    assert(size != 0);
   1346     *  * If size is not equal to VK_WHOLE_SIZE, size must be less than or
   1347     *    equal to the size of the memory minus offset
   1348     */
   1349    assert(size > 0);
   1350    assert(offset + size <= mem->bo.size);
   1351 
   1352    /* FIXME: Is this supposed to be thread safe? Since vkUnmapMemory() only
   1353     * takes a VkDeviceMemory pointer, it seems like only one map of the memory
   1354     * at a time is valid. We could just mmap up front and return an offset
   1355     * pointer here, but that may exhaust virtual memory on 32 bit
   1356     * userspace. */
   1357 
   1358    uint32_t gem_flags = 0;
   1359    if (!device->info.has_llc && mem->type_index == 0)
   1360       gem_flags |= I915_MMAP_WC;
   1361 
   1362    /* GEM will fail to map if the offset isn't 4k-aligned.  Round down. */
   1363    uint64_t map_offset = offset & ~4095ull;
   1364    assert(offset >= map_offset);
   1365    uint64_t map_size = (offset + size) - map_offset;
   1366 
   1367    /* Let's map whole pages */
   1368    map_size = align_u64(map_size, 4096);
   1369 
   1370    void *map = anv_gem_mmap(device, mem->bo.gem_handle,
   1371                             map_offset, map_size, gem_flags);
   1372    if (map == MAP_FAILED)
   1373       return vk_error(VK_ERROR_MEMORY_MAP_FAILED);
   1374 
   1375    mem->map = map;
   1376    mem->map_size = map_size;
   1377 
   1378    *ppData = mem->map + (offset - map_offset);
   1379 
   1380    return VK_SUCCESS;
   1381 }
   1382 
   1383 void anv_UnmapMemory(
   1384     VkDevice                                    _device,
   1385     VkDeviceMemory                              _memory)
   1386 {
   1387    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
   1388 
   1389    if (mem == NULL)
   1390       return;
   1391 
   1392    anv_gem_munmap(mem->map, mem->map_size);
   1393 
   1394    mem->map = NULL;
   1395    mem->map_size = 0;
   1396 }
   1397 
   1398 static void
   1399 clflush_mapped_ranges(struct anv_device         *device,
   1400                       uint32_t                   count,
   1401                       const VkMappedMemoryRange *ranges)
   1402 {
   1403    for (uint32_t i = 0; i < count; i++) {
   1404       ANV_FROM_HANDLE(anv_device_memory, mem, ranges[i].memory);
   1405       void *p = mem->map + (ranges[i].offset & ~CACHELINE_MASK);
   1406       void *end;
   1407 
   1408       if (ranges[i].offset + ranges[i].size > mem->map_size)
   1409          end = mem->map + mem->map_size;
   1410       else
   1411          end = mem->map + ranges[i].offset + ranges[i].size;
   1412 
   1413       while (p < end) {
   1414          __builtin_ia32_clflush(p);
   1415          p += CACHELINE_SIZE;
   1416       }
   1417    }
   1418 }
   1419 
   1420 VkResult anv_FlushMappedMemoryRanges(
   1421     VkDevice                                    _device,
   1422     uint32_t                                    memoryRangeCount,
   1423     const VkMappedMemoryRange*                  pMemoryRanges)
   1424 {
   1425    ANV_FROM_HANDLE(anv_device, device, _device);
   1426 
   1427    if (device->info.has_llc)
   1428       return VK_SUCCESS;
   1429 
   1430    /* Make sure the writes we're flushing have landed. */
   1431    __builtin_ia32_mfence();
   1432 
   1433    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
   1434 
   1435    return VK_SUCCESS;
   1436 }
   1437 
   1438 VkResult anv_InvalidateMappedMemoryRanges(
   1439     VkDevice                                    _device,
   1440     uint32_t                                    memoryRangeCount,
   1441     const VkMappedMemoryRange*                  pMemoryRanges)
   1442 {
   1443    ANV_FROM_HANDLE(anv_device, device, _device);
   1444 
   1445    if (device->info.has_llc)
   1446       return VK_SUCCESS;
   1447 
   1448    clflush_mapped_ranges(device, memoryRangeCount, pMemoryRanges);
   1449 
   1450    /* Make sure no reads get moved up above the invalidate. */
   1451    __builtin_ia32_mfence();
   1452 
   1453    return VK_SUCCESS;
   1454 }
   1455 
   1456 void anv_GetBufferMemoryRequirements(
   1457     VkDevice                                    _device,
   1458     VkBuffer                                    _buffer,
   1459     VkMemoryRequirements*                       pMemoryRequirements)
   1460 {
   1461    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
   1462    ANV_FROM_HANDLE(anv_device, device, _device);
   1463 
   1464    /* The Vulkan spec (git aaed022) says:
   1465     *
   1466     *    memoryTypeBits is a bitfield and contains one bit set for every
   1467     *    supported memory type for the resource. The bit `1<<i` is set if and
   1468     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
   1469     *    structure for the physical device is supported.
   1470     *
   1471     * We support exactly one memory type on LLC, two on non-LLC.
   1472     */
   1473    pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
   1474 
   1475    pMemoryRequirements->size = buffer->size;
   1476    pMemoryRequirements->alignment = 16;
   1477 }
   1478 
   1479 void anv_GetImageMemoryRequirements(
   1480     VkDevice                                    _device,
   1481     VkImage                                     _image,
   1482     VkMemoryRequirements*                       pMemoryRequirements)
   1483 {
   1484    ANV_FROM_HANDLE(anv_image, image, _image);
   1485    ANV_FROM_HANDLE(anv_device, device, _device);
   1486 
   1487    /* The Vulkan spec (git aaed022) says:
   1488     *
   1489     *    memoryTypeBits is a bitfield and contains one bit set for every
   1490     *    supported memory type for the resource. The bit `1<<i` is set if and
   1491     *    only if the memory type `i` in the VkPhysicalDeviceMemoryProperties
   1492     *    structure for the physical device is supported.
   1493     *
   1494     * We support exactly one memory type on LLC, two on non-LLC.
   1495     */
   1496    pMemoryRequirements->memoryTypeBits = device->info.has_llc ? 1 : 3;
   1497 
   1498    pMemoryRequirements->size = image->size;
   1499    pMemoryRequirements->alignment = image->alignment;
   1500 }
   1501 
   1502 void anv_GetImageSparseMemoryRequirements(
   1503     VkDevice                                    device,
   1504     VkImage                                     image,
   1505     uint32_t*                                   pSparseMemoryRequirementCount,
   1506     VkSparseImageMemoryRequirements*            pSparseMemoryRequirements)
   1507 {
   1508    stub();
   1509 }
   1510 
   1511 void anv_GetDeviceMemoryCommitment(
   1512     VkDevice                                    device,
   1513     VkDeviceMemory                              memory,
   1514     VkDeviceSize*                               pCommittedMemoryInBytes)
   1515 {
   1516    *pCommittedMemoryInBytes = 0;
   1517 }
   1518 
   1519 VkResult anv_BindBufferMemory(
   1520     VkDevice                                    device,
   1521     VkBuffer                                    _buffer,
   1522     VkDeviceMemory                              _memory,
   1523     VkDeviceSize                                memoryOffset)
   1524 {
   1525    ANV_FROM_HANDLE(anv_device_memory, mem, _memory);
   1526    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
   1527 
   1528    if (mem) {
   1529       buffer->bo = &mem->bo;
   1530       buffer->offset = memoryOffset;
   1531    } else {
   1532       buffer->bo = NULL;
   1533       buffer->offset = 0;
   1534    }
   1535 
   1536    return VK_SUCCESS;
   1537 }
   1538 
   1539 VkResult anv_QueueBindSparse(
   1540     VkQueue                                     queue,
   1541     uint32_t                                    bindInfoCount,
   1542     const VkBindSparseInfo*                     pBindInfo,
   1543     VkFence                                     fence)
   1544 {
   1545    stub_return(VK_ERROR_INCOMPATIBLE_DRIVER);
   1546 }
   1547 
   1548 VkResult anv_CreateFence(
   1549     VkDevice                                    _device,
   1550     const VkFenceCreateInfo*                    pCreateInfo,
   1551     const VkAllocationCallbacks*                pAllocator,
   1552     VkFence*                                    pFence)
   1553 {
   1554    ANV_FROM_HANDLE(anv_device, device, _device);
   1555    struct anv_bo fence_bo;
   1556    struct anv_fence *fence;
   1557    struct anv_batch batch;
   1558    VkResult result;
   1559 
   1560    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FENCE_CREATE_INFO);
   1561 
   1562    result = anv_bo_pool_alloc(&device->batch_bo_pool, &fence_bo, 4096);
   1563    if (result != VK_SUCCESS)
   1564       return result;
   1565 
   1566    /* Fences are small.  Just store the CPU data structure in the BO. */
   1567    fence = fence_bo.map;
   1568    fence->bo = fence_bo;
   1569 
   1570    /* Place the batch after the CPU data but on its own cache line. */
   1571    const uint32_t batch_offset = align_u32(sizeof(*fence), CACHELINE_SIZE);
   1572    batch.next = batch.start = fence->bo.map + batch_offset;
   1573    batch.end = fence->bo.map + fence->bo.size;
   1574    anv_batch_emit(&batch, GEN7_MI_BATCH_BUFFER_END, bbe);
   1575    anv_batch_emit(&batch, GEN7_MI_NOOP, noop);
   1576 
   1577    if (!device->info.has_llc) {
   1578       assert(((uintptr_t) batch.start & CACHELINE_MASK) == 0);
   1579       assert(batch.next - batch.start <= CACHELINE_SIZE);
   1580       __builtin_ia32_mfence();
   1581       __builtin_ia32_clflush(batch.start);
   1582    }
   1583 
   1584    fence->exec2_objects[0].handle = fence->bo.gem_handle;
   1585    fence->exec2_objects[0].relocation_count = 0;
   1586    fence->exec2_objects[0].relocs_ptr = 0;
   1587    fence->exec2_objects[0].alignment = 0;
   1588    fence->exec2_objects[0].offset = fence->bo.offset;
   1589    fence->exec2_objects[0].flags = 0;
   1590    fence->exec2_objects[0].rsvd1 = 0;
   1591    fence->exec2_objects[0].rsvd2 = 0;
   1592 
   1593    fence->execbuf.buffers_ptr = (uintptr_t) fence->exec2_objects;
   1594    fence->execbuf.buffer_count = 1;
   1595    fence->execbuf.batch_start_offset = batch.start - fence->bo.map;
   1596    fence->execbuf.batch_len = batch.next - batch.start;
   1597    fence->execbuf.cliprects_ptr = 0;
   1598    fence->execbuf.num_cliprects = 0;
   1599    fence->execbuf.DR1 = 0;
   1600    fence->execbuf.DR4 = 0;
   1601 
   1602    fence->execbuf.flags =
   1603       I915_EXEC_HANDLE_LUT | I915_EXEC_NO_RELOC | I915_EXEC_RENDER;
   1604    fence->execbuf.rsvd1 = device->context_id;
   1605    fence->execbuf.rsvd2 = 0;
   1606 
   1607    if (pCreateInfo->flags & VK_FENCE_CREATE_SIGNALED_BIT) {
   1608       fence->state = ANV_FENCE_STATE_SIGNALED;
   1609    } else {
   1610       fence->state = ANV_FENCE_STATE_RESET;
   1611    }
   1612 
   1613    *pFence = anv_fence_to_handle(fence);
   1614 
   1615    return VK_SUCCESS;
   1616 }
   1617 
   1618 void anv_DestroyFence(
   1619     VkDevice                                    _device,
   1620     VkFence                                     _fence,
   1621     const VkAllocationCallbacks*                pAllocator)
   1622 {
   1623    ANV_FROM_HANDLE(anv_device, device, _device);
   1624    ANV_FROM_HANDLE(anv_fence, fence, _fence);
   1625 
   1626    if (!fence)
   1627       return;
   1628 
   1629    assert(fence->bo.map == fence);
   1630    anv_bo_pool_free(&device->batch_bo_pool, &fence->bo);
   1631 }
   1632 
   1633 VkResult anv_ResetFences(
   1634     VkDevice                                    _device,
   1635     uint32_t                                    fenceCount,
   1636     const VkFence*                              pFences)
   1637 {
   1638    for (uint32_t i = 0; i < fenceCount; i++) {
   1639       ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
   1640       fence->state = ANV_FENCE_STATE_RESET;
   1641    }
   1642 
   1643    return VK_SUCCESS;
   1644 }
   1645 
   1646 VkResult anv_GetFenceStatus(
   1647     VkDevice                                    _device,
   1648     VkFence                                     _fence)
   1649 {
   1650    ANV_FROM_HANDLE(anv_device, device, _device);
   1651    ANV_FROM_HANDLE(anv_fence, fence, _fence);
   1652    int64_t t = 0;
   1653    int ret;
   1654 
   1655    switch (fence->state) {
   1656    case ANV_FENCE_STATE_RESET:
   1657       /* If it hasn't even been sent off to the GPU yet, it's not ready */
   1658       return VK_NOT_READY;
   1659 
   1660    case ANV_FENCE_STATE_SIGNALED:
   1661       /* It's been signaled, return success */
   1662       return VK_SUCCESS;
   1663 
   1664    case ANV_FENCE_STATE_SUBMITTED:
   1665       /* It's been submitted to the GPU but we don't know if it's done yet. */
   1666       ret = anv_gem_wait(device, fence->bo.gem_handle, &t);
   1667       if (ret == 0) {
   1668          fence->state = ANV_FENCE_STATE_SIGNALED;
   1669          return VK_SUCCESS;
   1670       } else {
   1671          return VK_NOT_READY;
   1672       }
   1673    default:
   1674       unreachable("Invalid fence status");
   1675    }
   1676 }
   1677 
   1678 #define NSEC_PER_SEC 1000000000
   1679 #define INT_TYPE_MAX(type) ((1ull << (sizeof(type) * 8 - 1)) - 1)
   1680 
   1681 VkResult anv_WaitForFences(
   1682     VkDevice                                    _device,
   1683     uint32_t                                    fenceCount,
   1684     const VkFence*                              pFences,
   1685     VkBool32                                    waitAll,
   1686     uint64_t                                    _timeout)
   1687 {
   1688    ANV_FROM_HANDLE(anv_device, device, _device);
   1689    int ret;
   1690 
   1691    /* DRM_IOCTL_I915_GEM_WAIT uses a signed 64 bit timeout and is supposed
   1692     * to block indefinitely timeouts <= 0.  Unfortunately, this was broken
   1693     * for a couple of kernel releases.  Since there's no way to know
   1694     * whether or not the kernel we're using is one of the broken ones, the
   1695     * best we can do is to clamp the timeout to INT64_MAX.  This limits the
   1696     * maximum timeout from 584 years to 292 years - likely not a big deal.
   1697     */
   1698    int64_t timeout = MIN2(_timeout, INT64_MAX);
   1699 
   1700    uint32_t pending_fences = fenceCount;
   1701    while (pending_fences) {
   1702       pending_fences = 0;
   1703       bool signaled_fences = false;
   1704       for (uint32_t i = 0; i < fenceCount; i++) {
   1705          ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
   1706          switch (fence->state) {
   1707          case ANV_FENCE_STATE_RESET:
   1708             /* This fence hasn't been submitted yet, we'll catch it the next
   1709              * time around.  Yes, this may mean we dead-loop but, short of
   1710              * lots of locking and a condition variable, there's not much that
   1711              * we can do about that.
   1712              */
   1713             pending_fences++;
   1714             continue;
   1715 
   1716          case ANV_FENCE_STATE_SIGNALED:
   1717             /* This fence is not pending.  If waitAll isn't set, we can return
   1718              * early.  Otherwise, we have to keep going.
   1719              */
   1720             if (!waitAll)
   1721                return VK_SUCCESS;
   1722             continue;
   1723 
   1724          case ANV_FENCE_STATE_SUBMITTED:
   1725             /* These are the fences we really care about.  Go ahead and wait
   1726              * on it until we hit a timeout.
   1727              */
   1728             ret = anv_gem_wait(device, fence->bo.gem_handle, &timeout);
   1729             if (ret == -1 && errno == ETIME) {
   1730                return VK_TIMEOUT;
   1731             } else if (ret == -1) {
   1732                /* We don't know the real error. */
   1733                return vk_errorf(VK_ERROR_DEVICE_LOST, "gem wait failed: %m");
   1734             } else {
   1735                fence->state = ANV_FENCE_STATE_SIGNALED;
   1736                signaled_fences = true;
   1737                if (!waitAll)
   1738                   return VK_SUCCESS;
   1739                continue;
   1740             }
   1741          }
   1742       }
   1743 
   1744       if (pending_fences && !signaled_fences) {
   1745          /* If we've hit this then someone decided to vkWaitForFences before
   1746           * they've actually submitted any of them to a queue.  This is a
   1747           * fairly pessimal case, so it's ok to lock here and use a standard
   1748           * pthreads condition variable.
   1749           */
   1750          pthread_mutex_lock(&device->mutex);
   1751 
   1752          /* It's possible that some of the fences have changed state since the
   1753           * last time we checked.  Now that we have the lock, check for
   1754           * pending fences again and don't wait if it's changed.
   1755           */
   1756          uint32_t now_pending_fences = 0;
   1757          for (uint32_t i = 0; i < fenceCount; i++) {
   1758             ANV_FROM_HANDLE(anv_fence, fence, pFences[i]);
   1759             if (fence->state == ANV_FENCE_STATE_RESET)
   1760                now_pending_fences++;
   1761          }
   1762          assert(now_pending_fences <= pending_fences);
   1763 
   1764          if (now_pending_fences == pending_fences) {
   1765             struct timespec before;
   1766             clock_gettime(CLOCK_MONOTONIC, &before);
   1767 
   1768             uint32_t abs_nsec = before.tv_nsec + timeout % NSEC_PER_SEC;
   1769             uint64_t abs_sec = before.tv_sec + (abs_nsec / NSEC_PER_SEC) +
   1770                                (timeout / NSEC_PER_SEC);
   1771             abs_nsec %= NSEC_PER_SEC;
   1772 
   1773             /* Avoid roll-over in tv_sec on 32-bit systems if the user
   1774              * provided timeout is UINT64_MAX
   1775              */
   1776             struct timespec abstime;
   1777             abstime.tv_nsec = abs_nsec;
   1778             abstime.tv_sec = MIN2(abs_sec, INT_TYPE_MAX(abstime.tv_sec));
   1779 
   1780             ret = pthread_cond_timedwait(&device->queue_submit,
   1781                                          &device->mutex, &abstime);
   1782             assert(ret != EINVAL);
   1783 
   1784             struct timespec after;
   1785             clock_gettime(CLOCK_MONOTONIC, &after);
   1786             uint64_t time_elapsed =
   1787                ((uint64_t)after.tv_sec * NSEC_PER_SEC + after.tv_nsec) -
   1788                ((uint64_t)before.tv_sec * NSEC_PER_SEC + before.tv_nsec);
   1789 
   1790             if (time_elapsed >= timeout) {
   1791                pthread_mutex_unlock(&device->mutex);
   1792                return VK_TIMEOUT;
   1793             }
   1794 
   1795             timeout -= time_elapsed;
   1796          }
   1797 
   1798          pthread_mutex_unlock(&device->mutex);
   1799       }
   1800    }
   1801 
   1802    return VK_SUCCESS;
   1803 }
   1804 
   1805 // Queue semaphore functions
   1806 
   1807 VkResult anv_CreateSemaphore(
   1808     VkDevice                                    device,
   1809     const VkSemaphoreCreateInfo*                pCreateInfo,
   1810     const VkAllocationCallbacks*                pAllocator,
   1811     VkSemaphore*                                pSemaphore)
   1812 {
   1813    /* The DRM execbuffer ioctl always execute in-oder, even between different
   1814     * rings. As such, there's nothing to do for the user space semaphore.
   1815     */
   1816 
   1817    *pSemaphore = (VkSemaphore)1;
   1818 
   1819    return VK_SUCCESS;
   1820 }
   1821 
   1822 void anv_DestroySemaphore(
   1823     VkDevice                                    device,
   1824     VkSemaphore                                 semaphore,
   1825     const VkAllocationCallbacks*                pAllocator)
   1826 {
   1827 }
   1828 
   1829 // Event functions
   1830 
   1831 VkResult anv_CreateEvent(
   1832     VkDevice                                    _device,
   1833     const VkEventCreateInfo*                    pCreateInfo,
   1834     const VkAllocationCallbacks*                pAllocator,
   1835     VkEvent*                                    pEvent)
   1836 {
   1837    ANV_FROM_HANDLE(anv_device, device, _device);
   1838    struct anv_state state;
   1839    struct anv_event *event;
   1840 
   1841    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_EVENT_CREATE_INFO);
   1842 
   1843    state = anv_state_pool_alloc(&device->dynamic_state_pool,
   1844                                 sizeof(*event), 8);
   1845    event = state.map;
   1846    event->state = state;
   1847    event->semaphore = VK_EVENT_RESET;
   1848 
   1849    if (!device->info.has_llc) {
   1850       /* Make sure the writes we're flushing have landed. */
   1851       __builtin_ia32_mfence();
   1852       __builtin_ia32_clflush(event);
   1853    }
   1854 
   1855    *pEvent = anv_event_to_handle(event);
   1856 
   1857    return VK_SUCCESS;
   1858 }
   1859 
   1860 void anv_DestroyEvent(
   1861     VkDevice                                    _device,
   1862     VkEvent                                     _event,
   1863     const VkAllocationCallbacks*                pAllocator)
   1864 {
   1865    ANV_FROM_HANDLE(anv_device, device, _device);
   1866    ANV_FROM_HANDLE(anv_event, event, _event);
   1867 
   1868    if (!event)
   1869       return;
   1870 
   1871    anv_state_pool_free(&device->dynamic_state_pool, event->state);
   1872 }
   1873 
   1874 VkResult anv_GetEventStatus(
   1875     VkDevice                                    _device,
   1876     VkEvent                                     _event)
   1877 {
   1878    ANV_FROM_HANDLE(anv_device, device, _device);
   1879    ANV_FROM_HANDLE(anv_event, event, _event);
   1880 
   1881    if (!device->info.has_llc) {
   1882       /* Invalidate read cache before reading event written by GPU. */
   1883       __builtin_ia32_clflush(event);
   1884       __builtin_ia32_mfence();
   1885 
   1886    }
   1887 
   1888    return event->semaphore;
   1889 }
   1890 
   1891 VkResult anv_SetEvent(
   1892     VkDevice                                    _device,
   1893     VkEvent                                     _event)
   1894 {
   1895    ANV_FROM_HANDLE(anv_device, device, _device);
   1896    ANV_FROM_HANDLE(anv_event, event, _event);
   1897 
   1898    event->semaphore = VK_EVENT_SET;
   1899 
   1900    if (!device->info.has_llc) {
   1901       /* Make sure the writes we're flushing have landed. */
   1902       __builtin_ia32_mfence();
   1903       __builtin_ia32_clflush(event);
   1904    }
   1905 
   1906    return VK_SUCCESS;
   1907 }
   1908 
   1909 VkResult anv_ResetEvent(
   1910     VkDevice                                    _device,
   1911     VkEvent                                     _event)
   1912 {
   1913    ANV_FROM_HANDLE(anv_device, device, _device);
   1914    ANV_FROM_HANDLE(anv_event, event, _event);
   1915 
   1916    event->semaphore = VK_EVENT_RESET;
   1917 
   1918    if (!device->info.has_llc) {
   1919       /* Make sure the writes we're flushing have landed. */
   1920       __builtin_ia32_mfence();
   1921       __builtin_ia32_clflush(event);
   1922    }
   1923 
   1924    return VK_SUCCESS;
   1925 }
   1926 
   1927 // Buffer functions
   1928 
   1929 VkResult anv_CreateBuffer(
   1930     VkDevice                                    _device,
   1931     const VkBufferCreateInfo*                   pCreateInfo,
   1932     const VkAllocationCallbacks*                pAllocator,
   1933     VkBuffer*                                   pBuffer)
   1934 {
   1935    ANV_FROM_HANDLE(anv_device, device, _device);
   1936    struct anv_buffer *buffer;
   1937 
   1938    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_BUFFER_CREATE_INFO);
   1939 
   1940    buffer = vk_alloc2(&device->alloc, pAllocator, sizeof(*buffer), 8,
   1941                        VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   1942    if (buffer == NULL)
   1943       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   1944 
   1945    buffer->size = pCreateInfo->size;
   1946    buffer->usage = pCreateInfo->usage;
   1947    buffer->bo = NULL;
   1948    buffer->offset = 0;
   1949 
   1950    *pBuffer = anv_buffer_to_handle(buffer);
   1951 
   1952    return VK_SUCCESS;
   1953 }
   1954 
   1955 void anv_DestroyBuffer(
   1956     VkDevice                                    _device,
   1957     VkBuffer                                    _buffer,
   1958     const VkAllocationCallbacks*                pAllocator)
   1959 {
   1960    ANV_FROM_HANDLE(anv_device, device, _device);
   1961    ANV_FROM_HANDLE(anv_buffer, buffer, _buffer);
   1962 
   1963    if (!buffer)
   1964       return;
   1965 
   1966    vk_free2(&device->alloc, pAllocator, buffer);
   1967 }
   1968 
   1969 void
   1970 anv_fill_buffer_surface_state(struct anv_device *device, struct anv_state state,
   1971                               enum isl_format format,
   1972                               uint32_t offset, uint32_t range, uint32_t stride)
   1973 {
   1974    isl_buffer_fill_state(&device->isl_dev, state.map,
   1975                          .address = offset,
   1976                          .mocs = device->default_mocs,
   1977                          .size = range,
   1978                          .format = format,
   1979                          .stride = stride);
   1980 
   1981    if (!device->info.has_llc)
   1982       anv_state_clflush(state);
   1983 }
   1984 
   1985 void anv_DestroySampler(
   1986     VkDevice                                    _device,
   1987     VkSampler                                   _sampler,
   1988     const VkAllocationCallbacks*                pAllocator)
   1989 {
   1990    ANV_FROM_HANDLE(anv_device, device, _device);
   1991    ANV_FROM_HANDLE(anv_sampler, sampler, _sampler);
   1992 
   1993    if (!sampler)
   1994       return;
   1995 
   1996    vk_free2(&device->alloc, pAllocator, sampler);
   1997 }
   1998 
   1999 VkResult anv_CreateFramebuffer(
   2000     VkDevice                                    _device,
   2001     const VkFramebufferCreateInfo*              pCreateInfo,
   2002     const VkAllocationCallbacks*                pAllocator,
   2003     VkFramebuffer*                              pFramebuffer)
   2004 {
   2005    ANV_FROM_HANDLE(anv_device, device, _device);
   2006    struct anv_framebuffer *framebuffer;
   2007 
   2008    assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_FRAMEBUFFER_CREATE_INFO);
   2009 
   2010    size_t size = sizeof(*framebuffer) +
   2011                  sizeof(struct anv_image_view *) * pCreateInfo->attachmentCount;
   2012    framebuffer = vk_alloc2(&device->alloc, pAllocator, size, 8,
   2013                             VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
   2014    if (framebuffer == NULL)
   2015       return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
   2016 
   2017    framebuffer->attachment_count = pCreateInfo->attachmentCount;
   2018    for (uint32_t i = 0; i < pCreateInfo->attachmentCount; i++) {
   2019       VkImageView _iview = pCreateInfo->pAttachments[i];
   2020       framebuffer->attachments[i] = anv_image_view_from_handle(_iview);
   2021    }
   2022 
   2023    framebuffer->width = pCreateInfo->width;
   2024    framebuffer->height = pCreateInfo->height;
   2025    framebuffer->layers = pCreateInfo->layers;
   2026 
   2027    *pFramebuffer = anv_framebuffer_to_handle(framebuffer);
   2028 
   2029    return VK_SUCCESS;
   2030 }
   2031 
   2032 void anv_DestroyFramebuffer(
   2033     VkDevice                                    _device,
   2034     VkFramebuffer                               _fb,
   2035     const VkAllocationCallbacks*                pAllocator)
   2036 {
   2037    ANV_FROM_HANDLE(anv_device, device, _device);
   2038    ANV_FROM_HANDLE(anv_framebuffer, fb, _fb);
   2039 
   2040    if (!fb)
   2041       return;
   2042 
   2043    vk_free2(&device->alloc, pAllocator, fb);
   2044 }
   2045 
   2046 /* vk_icd.h does not declare this function, so we declare it here to
   2047  * suppress Wmissing-prototypes.
   2048  */
   2049 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
   2050 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion);
   2051 
   2052 PUBLIC VKAPI_ATTR VkResult VKAPI_CALL
   2053 vk_icdNegotiateLoaderICDInterfaceVersion(uint32_t* pSupportedVersion)
   2054 {
   2055    /* For the full details on loader interface versioning, see
   2056     * <https://github.com/KhronosGroup/Vulkan-LoaderAndValidationLayers/blob/master/loader/LoaderAndLayerInterface.md>.
   2057     * What follows is a condensed summary, to help you navigate the large and
   2058     * confusing official doc.
   2059     *
   2060     *   - Loader interface v0 is incompatible with later versions. We don't
   2061     *     support it.
   2062     *
   2063     *   - In loader interface v1:
   2064     *       - The first ICD entrypoint called by the loader is
   2065     *         vk_icdGetInstanceProcAddr(). The ICD must statically expose this
   2066     *         entrypoint.
   2067     *       - The ICD must statically expose no other Vulkan symbol unless it is
   2068     *         linked with -Bsymbolic.
   2069     *       - Each dispatchable Vulkan handle created by the ICD must be
   2070     *         a pointer to a struct whose first member is VK_LOADER_DATA. The
   2071     *         ICD must initialize VK_LOADER_DATA.loadMagic to ICD_LOADER_MAGIC.
   2072     *       - The loader implements vkCreate{PLATFORM}SurfaceKHR() and
   2073     *         vkDestroySurfaceKHR(). The ICD must be capable of working with
   2074     *         such loader-managed surfaces.
   2075     *
   2076     *    - Loader interface v2 differs from v1 in:
   2077     *       - The first ICD entrypoint called by the loader is
   2078     *         vk_icdNegotiateLoaderICDInterfaceVersion(). The ICD must
   2079     *         statically expose this entrypoint.
   2080     *
   2081     *    - Loader interface v3 differs from v2 in:
   2082     *        - The ICD must implement vkCreate{PLATFORM}SurfaceKHR(),
   2083     *          vkDestroySurfaceKHR(), and other API which uses VKSurfaceKHR,
   2084     *          because the loader no longer does so.
   2085     */
   2086    *pSupportedVersion = MIN2(*pSupportedVersion, 3u);
   2087    return VK_SUCCESS;
   2088 }
   2089