1 /* 2 * Copyright 2015 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include "util/mesa-sha1.h" 25 #include "util/debug.h" 26 #include "radv_private.h" 27 28 #include "ac_nir_to_llvm.h" 29 30 struct cache_entry { 31 union { 32 unsigned char sha1[20]; 33 uint32_t sha1_dw[5]; 34 }; 35 uint32_t code_size; 36 struct ac_shader_variant_info variant_info; 37 struct ac_shader_config config; 38 uint32_t rsrc1, rsrc2; 39 struct radv_shader_variant *variant; 40 uint32_t code[0]; 41 }; 42 43 void 44 radv_pipeline_cache_init(struct radv_pipeline_cache *cache, 45 struct radv_device *device) 46 { 47 cache->device = device; 48 pthread_mutex_init(&cache->mutex, NULL); 49 50 cache->modified = false; 51 cache->kernel_count = 0; 52 cache->total_size = 0; 53 cache->table_size = 1024; 54 const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]); 55 cache->hash_table = malloc(byte_size); 56 57 /* We don't consider allocation failure fatal, we just start with a 0-sized 58 * cache. */ 59 if (cache->hash_table == NULL || 60 (device->debug_flags & RADV_DEBUG_NO_CACHE)) 61 cache->table_size = 0; 62 else 63 memset(cache->hash_table, 0, byte_size); 64 } 65 66 void 67 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache) 68 { 69 for (unsigned i = 0; i < cache->table_size; ++i) 70 if (cache->hash_table[i]) { 71 if (cache->hash_table[i]->variant) 72 radv_shader_variant_destroy(cache->device, 73 cache->hash_table[i]->variant); 74 vk_free(&cache->alloc, cache->hash_table[i]); 75 } 76 pthread_mutex_destroy(&cache->mutex); 77 free(cache->hash_table); 78 } 79 80 static uint32_t 81 entry_size(struct cache_entry *entry) 82 { 83 return sizeof(*entry) + entry->code_size; 84 } 85 86 void 87 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module, 88 const char *entrypoint, 89 const VkSpecializationInfo *spec_info, 90 const struct radv_pipeline_layout *layout, 91 const union ac_shader_variant_key *key) 92 { 93 struct mesa_sha1 *ctx; 94 95 ctx = _mesa_sha1_init(); 96 if (key) 97 _mesa_sha1_update(ctx, key, sizeof(*key)); 98 _mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1)); 99 _mesa_sha1_update(ctx, entrypoint, strlen(entrypoint)); 100 if (layout) 101 _mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1)); 102 if (spec_info) { 103 _mesa_sha1_update(ctx, spec_info->pMapEntries, 104 spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]); 105 _mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize); 106 } 107 _mesa_sha1_final(ctx, hash); 108 } 109 110 111 static struct cache_entry * 112 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache, 113 const unsigned char *sha1) 114 { 115 const uint32_t mask = cache->table_size - 1; 116 const uint32_t start = (*(uint32_t *) sha1); 117 118 for (uint32_t i = 0; i < cache->table_size; i++) { 119 const uint32_t index = (start + i) & mask; 120 struct cache_entry *entry = cache->hash_table[index]; 121 122 if (!entry) 123 return NULL; 124 125 if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) { 126 return entry; 127 } 128 } 129 130 unreachable("hash table should never be full"); 131 } 132 133 static struct cache_entry * 134 radv_pipeline_cache_search(struct radv_pipeline_cache *cache, 135 const unsigned char *sha1) 136 { 137 struct cache_entry *entry; 138 139 pthread_mutex_lock(&cache->mutex); 140 141 entry = radv_pipeline_cache_search_unlocked(cache, sha1); 142 143 pthread_mutex_unlock(&cache->mutex); 144 145 return entry; 146 } 147 148 struct radv_shader_variant * 149 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device, 150 struct radv_pipeline_cache *cache, 151 const unsigned char *sha1) 152 { 153 struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1); 154 155 if (!entry) 156 return NULL; 157 158 if (!entry->variant) { 159 struct radv_shader_variant *variant; 160 161 variant = calloc(1, sizeof(struct radv_shader_variant)); 162 if (!variant) 163 return NULL; 164 165 variant->config = entry->config; 166 variant->info = entry->variant_info; 167 variant->rsrc1 = entry->rsrc1; 168 variant->rsrc2 = entry->rsrc2; 169 variant->ref_count = 1; 170 171 variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256, 172 RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS); 173 174 void *ptr = device->ws->buffer_map(variant->bo); 175 memcpy(ptr, entry->code, entry->code_size); 176 device->ws->buffer_unmap(variant->bo); 177 178 entry->variant = variant; 179 } 180 181 __sync_fetch_and_add(&entry->variant->ref_count, 1); 182 return entry->variant; 183 } 184 185 186 static void 187 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache, 188 struct cache_entry *entry) 189 { 190 const uint32_t mask = cache->table_size - 1; 191 const uint32_t start = entry->sha1_dw[0]; 192 193 /* We'll always be able to insert when we get here. */ 194 assert(cache->kernel_count < cache->table_size / 2); 195 196 for (uint32_t i = 0; i < cache->table_size; i++) { 197 const uint32_t index = (start + i) & mask; 198 if (!cache->hash_table[index]) { 199 cache->hash_table[index] = entry; 200 break; 201 } 202 } 203 204 cache->total_size += entry_size(entry); 205 cache->kernel_count++; 206 } 207 208 209 static VkResult 210 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache) 211 { 212 const uint32_t table_size = cache->table_size * 2; 213 const uint32_t old_table_size = cache->table_size; 214 const size_t byte_size = table_size * sizeof(cache->hash_table[0]); 215 struct cache_entry **table; 216 struct cache_entry **old_table = cache->hash_table; 217 218 table = malloc(byte_size); 219 if (table == NULL) 220 return VK_ERROR_OUT_OF_HOST_MEMORY; 221 222 cache->hash_table = table; 223 cache->table_size = table_size; 224 cache->kernel_count = 0; 225 cache->total_size = 0; 226 227 memset(cache->hash_table, 0, byte_size); 228 for (uint32_t i = 0; i < old_table_size; i++) { 229 struct cache_entry *entry = old_table[i]; 230 if (!entry) 231 continue; 232 233 radv_pipeline_cache_set_entry(cache, entry); 234 } 235 236 free(old_table); 237 238 return VK_SUCCESS; 239 } 240 241 static void 242 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache, 243 struct cache_entry *entry) 244 { 245 if (cache->kernel_count == cache->table_size / 2) 246 radv_pipeline_cache_grow(cache); 247 248 /* Failing to grow that hash table isn't fatal, but may mean we don't 249 * have enough space to add this new kernel. Only add it if there's room. 250 */ 251 if (cache->kernel_count < cache->table_size / 2) 252 radv_pipeline_cache_set_entry(cache, entry); 253 } 254 255 struct radv_shader_variant * 256 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache, 257 const unsigned char *sha1, 258 struct radv_shader_variant *variant, 259 const void *code, unsigned code_size) 260 { 261 pthread_mutex_lock(&cache->mutex); 262 struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1); 263 if (entry) { 264 if (entry->variant) { 265 radv_shader_variant_destroy(cache->device, variant); 266 variant = entry->variant; 267 } else { 268 entry->variant = variant; 269 } 270 __sync_fetch_and_add(&variant->ref_count, 1); 271 pthread_mutex_unlock(&cache->mutex); 272 return variant; 273 } 274 275 entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8, 276 VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 277 if (!entry) { 278 pthread_mutex_unlock(&cache->mutex); 279 return variant; 280 } 281 282 memcpy(entry->sha1, sha1, 20); 283 memcpy(entry->code, code, code_size); 284 entry->config = variant->config; 285 entry->variant_info = variant->info; 286 entry->rsrc1 = variant->rsrc1; 287 entry->rsrc2 = variant->rsrc2; 288 entry->code_size = code_size; 289 entry->variant = variant; 290 __sync_fetch_and_add(&variant->ref_count, 1); 291 292 radv_pipeline_cache_add_entry(cache, entry); 293 294 cache->modified = true; 295 pthread_mutex_unlock(&cache->mutex); 296 return variant; 297 } 298 299 struct cache_header { 300 uint32_t header_size; 301 uint32_t header_version; 302 uint32_t vendor_id; 303 uint32_t device_id; 304 uint8_t uuid[VK_UUID_SIZE]; 305 }; 306 void 307 radv_pipeline_cache_load(struct radv_pipeline_cache *cache, 308 const void *data, size_t size) 309 { 310 struct radv_device *device = cache->device; 311 struct cache_header header; 312 313 if (size < sizeof(header)) 314 return; 315 memcpy(&header, data, sizeof(header)); 316 if (header.header_size < sizeof(header)) 317 return; 318 if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE) 319 return; 320 if (header.vendor_id != 0x1002) 321 return; 322 if (header.device_id != device->physical_device->rad_info.pci_id) 323 return; 324 if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0) 325 return; 326 327 char *end = (void *) data + size; 328 char *p = (void *) data + header.header_size; 329 330 while (end - p >= sizeof(struct cache_entry)) { 331 struct cache_entry *entry = (struct cache_entry*)p; 332 struct cache_entry *dest_entry; 333 if(end - p < sizeof(*entry) + entry->code_size) 334 break; 335 336 dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size, 337 8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE); 338 if (dest_entry) { 339 memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size); 340 dest_entry->variant = NULL; 341 radv_pipeline_cache_add_entry(cache, dest_entry); 342 } 343 p += sizeof (*entry) + entry->code_size; 344 } 345 } 346 347 VkResult radv_CreatePipelineCache( 348 VkDevice _device, 349 const VkPipelineCacheCreateInfo* pCreateInfo, 350 const VkAllocationCallbacks* pAllocator, 351 VkPipelineCache* pPipelineCache) 352 { 353 RADV_FROM_HANDLE(radv_device, device, _device); 354 struct radv_pipeline_cache *cache; 355 356 assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO); 357 assert(pCreateInfo->flags == 0); 358 359 cache = vk_alloc2(&device->alloc, pAllocator, 360 sizeof(*cache), 8, 361 VK_SYSTEM_ALLOCATION_SCOPE_OBJECT); 362 if (cache == NULL) 363 return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY); 364 365 if (pAllocator) 366 cache->alloc = *pAllocator; 367 else 368 cache->alloc = device->alloc; 369 370 radv_pipeline_cache_init(cache, device); 371 372 if (pCreateInfo->initialDataSize > 0) { 373 radv_pipeline_cache_load(cache, 374 pCreateInfo->pInitialData, 375 pCreateInfo->initialDataSize); 376 } 377 378 *pPipelineCache = radv_pipeline_cache_to_handle(cache); 379 380 return VK_SUCCESS; 381 } 382 383 void radv_DestroyPipelineCache( 384 VkDevice _device, 385 VkPipelineCache _cache, 386 const VkAllocationCallbacks* pAllocator) 387 { 388 RADV_FROM_HANDLE(radv_device, device, _device); 389 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 390 391 if (!cache) 392 return; 393 radv_pipeline_cache_finish(cache); 394 395 vk_free2(&device->alloc, pAllocator, cache); 396 } 397 398 VkResult radv_GetPipelineCacheData( 399 VkDevice _device, 400 VkPipelineCache _cache, 401 size_t* pDataSize, 402 void* pData) 403 { 404 RADV_FROM_HANDLE(radv_device, device, _device); 405 RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache); 406 struct cache_header *header; 407 VkResult result = VK_SUCCESS; 408 const size_t size = sizeof(*header) + cache->total_size; 409 if (pData == NULL) { 410 *pDataSize = size; 411 return VK_SUCCESS; 412 } 413 if (*pDataSize < sizeof(*header)) { 414 *pDataSize = 0; 415 return VK_INCOMPLETE; 416 } 417 void *p = pData, *end = pData + *pDataSize; 418 header = p; 419 header->header_size = sizeof(*header); 420 header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE; 421 header->vendor_id = 0x1002; 422 header->device_id = device->physical_device->rad_info.pci_id; 423 memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE); 424 p += header->header_size; 425 426 struct cache_entry *entry; 427 for (uint32_t i = 0; i < cache->table_size; i++) { 428 if (!cache->hash_table[i]) 429 continue; 430 entry = cache->hash_table[i]; 431 const uint32_t size = entry_size(entry); 432 if (end < p + size) { 433 result = VK_INCOMPLETE; 434 break; 435 } 436 437 memcpy(p, entry, size); 438 ((struct cache_entry*)p)->variant = NULL; 439 p += size; 440 } 441 *pDataSize = p - pData; 442 443 return result; 444 } 445 446 static void 447 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst, 448 struct radv_pipeline_cache *src) 449 { 450 for (uint32_t i = 0; i < src->table_size; i++) { 451 struct cache_entry *entry = src->hash_table[i]; 452 if (!entry || radv_pipeline_cache_search(dst, entry->sha1)) 453 continue; 454 455 radv_pipeline_cache_add_entry(dst, entry); 456 457 src->hash_table[i] = NULL; 458 } 459 } 460 461 VkResult radv_MergePipelineCaches( 462 VkDevice _device, 463 VkPipelineCache destCache, 464 uint32_t srcCacheCount, 465 const VkPipelineCache* pSrcCaches) 466 { 467 RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache); 468 469 for (uint32_t i = 0; i < srcCacheCount; i++) { 470 RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]); 471 472 radv_pipeline_cache_merge(dst, src); 473 } 474 475 return VK_SUCCESS; 476 } 477