Home | History | Annotate | Download | only in vulkan
      1 /*
      2  * Copyright  2015 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "util/mesa-sha1.h"
     25 #include "util/debug.h"
     26 #include "radv_private.h"
     27 
     28 #include "ac_nir_to_llvm.h"
     29 
     30 struct cache_entry {
     31 	union {
     32 		unsigned char sha1[20];
     33 		uint32_t sha1_dw[5];
     34 	};
     35 	uint32_t code_size;
     36 	struct ac_shader_variant_info variant_info;
     37 	struct ac_shader_config config;
     38 	uint32_t rsrc1, rsrc2;
     39 	struct radv_shader_variant *variant;
     40 	uint32_t code[0];
     41 };
     42 
     43 void
     44 radv_pipeline_cache_init(struct radv_pipeline_cache *cache,
     45 			 struct radv_device *device)
     46 {
     47 	cache->device = device;
     48 	pthread_mutex_init(&cache->mutex, NULL);
     49 
     50 	cache->modified = false;
     51 	cache->kernel_count = 0;
     52 	cache->total_size = 0;
     53 	cache->table_size = 1024;
     54 	const size_t byte_size = cache->table_size * sizeof(cache->hash_table[0]);
     55 	cache->hash_table = malloc(byte_size);
     56 
     57 	/* We don't consider allocation failure fatal, we just start with a 0-sized
     58 	 * cache. */
     59 	if (cache->hash_table == NULL ||
     60 	    (device->debug_flags & RADV_DEBUG_NO_CACHE))
     61 		cache->table_size = 0;
     62 	else
     63 		memset(cache->hash_table, 0, byte_size);
     64 }
     65 
     66 void
     67 radv_pipeline_cache_finish(struct radv_pipeline_cache *cache)
     68 {
     69 	for (unsigned i = 0; i < cache->table_size; ++i)
     70 		if (cache->hash_table[i]) {
     71 			if (cache->hash_table[i]->variant)
     72 				radv_shader_variant_destroy(cache->device,
     73 							    cache->hash_table[i]->variant);
     74 			vk_free(&cache->alloc, cache->hash_table[i]);
     75 		}
     76 	pthread_mutex_destroy(&cache->mutex);
     77 	free(cache->hash_table);
     78 }
     79 
     80 static uint32_t
     81 entry_size(struct cache_entry *entry)
     82 {
     83 	return sizeof(*entry) + entry->code_size;
     84 }
     85 
     86 void
     87 radv_hash_shader(unsigned char *hash, struct radv_shader_module *module,
     88 		 const char *entrypoint,
     89 		 const VkSpecializationInfo *spec_info,
     90 		 const struct radv_pipeline_layout *layout,
     91 		 const union ac_shader_variant_key *key)
     92 {
     93 	struct mesa_sha1 *ctx;
     94 
     95 	ctx = _mesa_sha1_init();
     96 	if (key)
     97 		_mesa_sha1_update(ctx, key, sizeof(*key));
     98 	_mesa_sha1_update(ctx, module->sha1, sizeof(module->sha1));
     99 	_mesa_sha1_update(ctx, entrypoint, strlen(entrypoint));
    100 	if (layout)
    101 		_mesa_sha1_update(ctx, layout->sha1, sizeof(layout->sha1));
    102 	if (spec_info) {
    103 		_mesa_sha1_update(ctx, spec_info->pMapEntries,
    104 				  spec_info->mapEntryCount * sizeof spec_info->pMapEntries[0]);
    105 		_mesa_sha1_update(ctx, spec_info->pData, spec_info->dataSize);
    106 	}
    107 	_mesa_sha1_final(ctx, hash);
    108 }
    109 
    110 
    111 static struct cache_entry *
    112 radv_pipeline_cache_search_unlocked(struct radv_pipeline_cache *cache,
    113 				    const unsigned char *sha1)
    114 {
    115 	const uint32_t mask = cache->table_size - 1;
    116 	const uint32_t start = (*(uint32_t *) sha1);
    117 
    118 	for (uint32_t i = 0; i < cache->table_size; i++) {
    119 		const uint32_t index = (start + i) & mask;
    120 		struct cache_entry *entry = cache->hash_table[index];
    121 
    122 		if (!entry)
    123 			return NULL;
    124 
    125 		if (memcmp(entry->sha1, sha1, sizeof(entry->sha1)) == 0) {
    126 			return entry;
    127 		}
    128 	}
    129 
    130 	unreachable("hash table should never be full");
    131 }
    132 
    133 static struct cache_entry *
    134 radv_pipeline_cache_search(struct radv_pipeline_cache *cache,
    135 			   const unsigned char *sha1)
    136 {
    137 	struct cache_entry *entry;
    138 
    139 	pthread_mutex_lock(&cache->mutex);
    140 
    141 	entry = radv_pipeline_cache_search_unlocked(cache, sha1);
    142 
    143 	pthread_mutex_unlock(&cache->mutex);
    144 
    145 	return entry;
    146 }
    147 
    148 struct radv_shader_variant *
    149 radv_create_shader_variant_from_pipeline_cache(struct radv_device *device,
    150 					       struct radv_pipeline_cache *cache,
    151 					       const unsigned char *sha1)
    152 {
    153 	struct cache_entry *entry = radv_pipeline_cache_search(cache, sha1);
    154 
    155 	if (!entry)
    156 		return NULL;
    157 
    158 	if (!entry->variant) {
    159 		struct radv_shader_variant *variant;
    160 
    161 		variant = calloc(1, sizeof(struct radv_shader_variant));
    162 		if (!variant)
    163 			return NULL;
    164 
    165 		variant->config = entry->config;
    166 		variant->info = entry->variant_info;
    167 		variant->rsrc1 = entry->rsrc1;
    168 		variant->rsrc2 = entry->rsrc2;
    169 		variant->ref_count = 1;
    170 
    171 		variant->bo = device->ws->buffer_create(device->ws, entry->code_size, 256,
    172 						RADEON_DOMAIN_GTT, RADEON_FLAG_CPU_ACCESS);
    173 
    174 		void *ptr = device->ws->buffer_map(variant->bo);
    175 		memcpy(ptr, entry->code, entry->code_size);
    176 		device->ws->buffer_unmap(variant->bo);
    177 
    178 		entry->variant = variant;
    179 	}
    180 
    181 	__sync_fetch_and_add(&entry->variant->ref_count, 1);
    182 	return entry->variant;
    183 }
    184 
    185 
    186 static void
    187 radv_pipeline_cache_set_entry(struct radv_pipeline_cache *cache,
    188 			      struct cache_entry *entry)
    189 {
    190 	const uint32_t mask = cache->table_size - 1;
    191 	const uint32_t start = entry->sha1_dw[0];
    192 
    193 	/* We'll always be able to insert when we get here. */
    194 	assert(cache->kernel_count < cache->table_size / 2);
    195 
    196 	for (uint32_t i = 0; i < cache->table_size; i++) {
    197 		const uint32_t index = (start + i) & mask;
    198 		if (!cache->hash_table[index]) {
    199 			cache->hash_table[index] = entry;
    200 			break;
    201 		}
    202 	}
    203 
    204 	cache->total_size += entry_size(entry);
    205 	cache->kernel_count++;
    206 }
    207 
    208 
    209 static VkResult
    210 radv_pipeline_cache_grow(struct radv_pipeline_cache *cache)
    211 {
    212 	const uint32_t table_size = cache->table_size * 2;
    213 	const uint32_t old_table_size = cache->table_size;
    214 	const size_t byte_size = table_size * sizeof(cache->hash_table[0]);
    215 	struct cache_entry **table;
    216 	struct cache_entry **old_table = cache->hash_table;
    217 
    218 	table = malloc(byte_size);
    219 	if (table == NULL)
    220 		return VK_ERROR_OUT_OF_HOST_MEMORY;
    221 
    222 	cache->hash_table = table;
    223 	cache->table_size = table_size;
    224 	cache->kernel_count = 0;
    225 	cache->total_size = 0;
    226 
    227 	memset(cache->hash_table, 0, byte_size);
    228 	for (uint32_t i = 0; i < old_table_size; i++) {
    229 		struct cache_entry *entry = old_table[i];
    230 		if (!entry)
    231 			continue;
    232 
    233 		radv_pipeline_cache_set_entry(cache, entry);
    234 	}
    235 
    236 	free(old_table);
    237 
    238 	return VK_SUCCESS;
    239 }
    240 
    241 static void
    242 radv_pipeline_cache_add_entry(struct radv_pipeline_cache *cache,
    243 			      struct cache_entry *entry)
    244 {
    245 	if (cache->kernel_count == cache->table_size / 2)
    246 		radv_pipeline_cache_grow(cache);
    247 
    248 	/* Failing to grow that hash table isn't fatal, but may mean we don't
    249 	 * have enough space to add this new kernel. Only add it if there's room.
    250 	 */
    251 	if (cache->kernel_count < cache->table_size / 2)
    252 		radv_pipeline_cache_set_entry(cache, entry);
    253 }
    254 
    255 struct radv_shader_variant *
    256 radv_pipeline_cache_insert_shader(struct radv_pipeline_cache *cache,
    257 				  const unsigned char *sha1,
    258 				  struct radv_shader_variant *variant,
    259 				  const void *code, unsigned code_size)
    260 {
    261 	pthread_mutex_lock(&cache->mutex);
    262 	struct cache_entry *entry = radv_pipeline_cache_search_unlocked(cache, sha1);
    263 	if (entry) {
    264 		if (entry->variant) {
    265 			radv_shader_variant_destroy(cache->device, variant);
    266 			variant = entry->variant;
    267 		} else {
    268 			entry->variant = variant;
    269 		}
    270 		__sync_fetch_and_add(&variant->ref_count, 1);
    271 		pthread_mutex_unlock(&cache->mutex);
    272 		return variant;
    273 	}
    274 
    275 	entry = vk_alloc(&cache->alloc, sizeof(*entry) + code_size, 8,
    276 			   VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
    277 	if (!entry) {
    278 		pthread_mutex_unlock(&cache->mutex);
    279 		return variant;
    280 	}
    281 
    282 	memcpy(entry->sha1, sha1, 20);
    283 	memcpy(entry->code, code, code_size);
    284 	entry->config = variant->config;
    285 	entry->variant_info = variant->info;
    286 	entry->rsrc1 = variant->rsrc1;
    287 	entry->rsrc2 = variant->rsrc2;
    288 	entry->code_size = code_size;
    289 	entry->variant = variant;
    290 	__sync_fetch_and_add(&variant->ref_count, 1);
    291 
    292 	radv_pipeline_cache_add_entry(cache, entry);
    293 
    294 	cache->modified = true;
    295 	pthread_mutex_unlock(&cache->mutex);
    296 	return variant;
    297 }
    298 
    299 struct cache_header {
    300 	uint32_t header_size;
    301 	uint32_t header_version;
    302 	uint32_t vendor_id;
    303 	uint32_t device_id;
    304 	uint8_t  uuid[VK_UUID_SIZE];
    305 };
    306 void
    307 radv_pipeline_cache_load(struct radv_pipeline_cache *cache,
    308 			 const void *data, size_t size)
    309 {
    310 	struct radv_device *device = cache->device;
    311 	struct cache_header header;
    312 
    313 	if (size < sizeof(header))
    314 		return;
    315 	memcpy(&header, data, sizeof(header));
    316 	if (header.header_size < sizeof(header))
    317 		return;
    318 	if (header.header_version != VK_PIPELINE_CACHE_HEADER_VERSION_ONE)
    319 		return;
    320 	if (header.vendor_id != 0x1002)
    321 		return;
    322 	if (header.device_id != device->physical_device->rad_info.pci_id)
    323 		return;
    324 	if (memcmp(header.uuid, device->physical_device->uuid, VK_UUID_SIZE) != 0)
    325 		return;
    326 
    327 	char *end = (void *) data + size;
    328 	char *p = (void *) data + header.header_size;
    329 
    330 	while (end - p >= sizeof(struct cache_entry)) {
    331 		struct cache_entry *entry = (struct cache_entry*)p;
    332 		struct cache_entry *dest_entry;
    333 		if(end - p < sizeof(*entry) + entry->code_size)
    334 			break;
    335 
    336 		dest_entry = vk_alloc(&cache->alloc, sizeof(*entry) + entry->code_size,
    337 					8, VK_SYSTEM_ALLOCATION_SCOPE_CACHE);
    338 		if (dest_entry) {
    339 			memcpy(dest_entry, entry, sizeof(*entry) + entry->code_size);
    340 			dest_entry->variant = NULL;
    341 			radv_pipeline_cache_add_entry(cache, dest_entry);
    342 		}
    343 		p += sizeof (*entry) + entry->code_size;
    344 	}
    345 }
    346 
    347 VkResult radv_CreatePipelineCache(
    348 	VkDevice                                    _device,
    349 	const VkPipelineCacheCreateInfo*            pCreateInfo,
    350 	const VkAllocationCallbacks*                pAllocator,
    351 	VkPipelineCache*                            pPipelineCache)
    352 {
    353 	RADV_FROM_HANDLE(radv_device, device, _device);
    354 	struct radv_pipeline_cache *cache;
    355 
    356 	assert(pCreateInfo->sType == VK_STRUCTURE_TYPE_PIPELINE_CACHE_CREATE_INFO);
    357 	assert(pCreateInfo->flags == 0);
    358 
    359 	cache = vk_alloc2(&device->alloc, pAllocator,
    360 			    sizeof(*cache), 8,
    361 			    VK_SYSTEM_ALLOCATION_SCOPE_OBJECT);
    362 	if (cache == NULL)
    363 		return vk_error(VK_ERROR_OUT_OF_HOST_MEMORY);
    364 
    365 	if (pAllocator)
    366 		cache->alloc = *pAllocator;
    367 	else
    368 		cache->alloc = device->alloc;
    369 
    370 	radv_pipeline_cache_init(cache, device);
    371 
    372 	if (pCreateInfo->initialDataSize > 0) {
    373 		radv_pipeline_cache_load(cache,
    374 					 pCreateInfo->pInitialData,
    375 					 pCreateInfo->initialDataSize);
    376 	}
    377 
    378 	*pPipelineCache = radv_pipeline_cache_to_handle(cache);
    379 
    380 	return VK_SUCCESS;
    381 }
    382 
    383 void radv_DestroyPipelineCache(
    384 	VkDevice                                    _device,
    385 	VkPipelineCache                             _cache,
    386 	const VkAllocationCallbacks*                pAllocator)
    387 {
    388 	RADV_FROM_HANDLE(radv_device, device, _device);
    389 	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
    390 
    391 	if (!cache)
    392 		return;
    393 	radv_pipeline_cache_finish(cache);
    394 
    395 	vk_free2(&device->alloc, pAllocator, cache);
    396 }
    397 
    398 VkResult radv_GetPipelineCacheData(
    399 	VkDevice                                    _device,
    400 	VkPipelineCache                             _cache,
    401 	size_t*                                     pDataSize,
    402 	void*                                       pData)
    403 {
    404 	RADV_FROM_HANDLE(radv_device, device, _device);
    405 	RADV_FROM_HANDLE(radv_pipeline_cache, cache, _cache);
    406 	struct cache_header *header;
    407 	VkResult result = VK_SUCCESS;
    408 	const size_t size = sizeof(*header) + cache->total_size;
    409 	if (pData == NULL) {
    410 		*pDataSize = size;
    411 		return VK_SUCCESS;
    412 	}
    413 	if (*pDataSize < sizeof(*header)) {
    414 		*pDataSize = 0;
    415 		return VK_INCOMPLETE;
    416 	}
    417 	void *p = pData, *end = pData + *pDataSize;
    418 	header = p;
    419 	header->header_size = sizeof(*header);
    420 	header->header_version = VK_PIPELINE_CACHE_HEADER_VERSION_ONE;
    421 	header->vendor_id = 0x1002;
    422 	header->device_id = device->physical_device->rad_info.pci_id;
    423 	memcpy(header->uuid, device->physical_device->uuid, VK_UUID_SIZE);
    424 	p += header->header_size;
    425 
    426 	struct cache_entry *entry;
    427 	for (uint32_t i = 0; i < cache->table_size; i++) {
    428 		if (!cache->hash_table[i])
    429 			continue;
    430 		entry = cache->hash_table[i];
    431 		const uint32_t size = entry_size(entry);
    432 		if (end < p + size) {
    433 			result = VK_INCOMPLETE;
    434 			break;
    435 		}
    436 
    437 		memcpy(p, entry, size);
    438 		((struct cache_entry*)p)->variant = NULL;
    439 		p += size;
    440 	}
    441 	*pDataSize = p - pData;
    442 
    443 	return result;
    444 }
    445 
    446 static void
    447 radv_pipeline_cache_merge(struct radv_pipeline_cache *dst,
    448 			  struct radv_pipeline_cache *src)
    449 {
    450 	for (uint32_t i = 0; i < src->table_size; i++) {
    451 		struct cache_entry *entry = src->hash_table[i];
    452 		if (!entry || radv_pipeline_cache_search(dst, entry->sha1))
    453 			continue;
    454 
    455 		radv_pipeline_cache_add_entry(dst, entry);
    456 
    457 		src->hash_table[i] = NULL;
    458 	}
    459 }
    460 
    461 VkResult radv_MergePipelineCaches(
    462 	VkDevice                                    _device,
    463 	VkPipelineCache                             destCache,
    464 	uint32_t                                    srcCacheCount,
    465 	const VkPipelineCache*                      pSrcCaches)
    466 {
    467 	RADV_FROM_HANDLE(radv_pipeline_cache, dst, destCache);
    468 
    469 	for (uint32_t i = 0; i < srcCacheCount; i++) {
    470 		RADV_FROM_HANDLE(radv_pipeline_cache, src, pSrcCaches[i]);
    471 
    472 		radv_pipeline_cache_merge(dst, src);
    473 	}
    474 
    475 	return VK_SUCCESS;
    476 }
    477