1 /* 2 * Copyright 2011 Marek Olk <maraeo (at) gmail.com> 3 * Copyright 2015 Advanced Micro Devices, Inc. 4 * All Rights Reserved. 5 * 6 * Permission is hereby granted, free of charge, to any person obtaining 7 * a copy of this software and associated documentation files (the 8 * "Software"), to deal in the Software without restriction, including 9 * without limitation the rights to use, copy, modify, merge, publish, 10 * distribute, sub license, and/or sell copies of the Software, and to 11 * permit persons to whom the Software is furnished to do so, subject to 12 * the following conditions: 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 15 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 16 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 17 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 18 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 20 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * The above copyright notice and this permission notice (including the 24 * next paragraph) shall be included in all copies or substantial portions 25 * of the Software. 26 */ 27 /* 28 * Authors: 29 * Marek Olk <maraeo (at) gmail.com> 30 */ 31 32 #include "amdgpu_cs.h" 33 34 #include "os/os_time.h" 35 #include "state_tracker/drm_driver.h" 36 #include <amdgpu_drm.h> 37 #include <xf86drm.h> 38 #include <stdio.h> 39 #include <inttypes.h> 40 41 static struct pb_buffer * 42 amdgpu_bo_create(struct radeon_winsys *rws, 43 uint64_t size, 44 unsigned alignment, 45 enum radeon_bo_domain domain, 46 enum radeon_bo_flag flags); 47 48 static bool amdgpu_bo_wait(struct pb_buffer *_buf, uint64_t timeout, 49 enum radeon_bo_usage usage) 50 { 51 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 52 struct amdgpu_winsys *ws = bo->ws; 53 int64_t abs_timeout; 54 55 if (timeout == 0) { 56 if (p_atomic_read(&bo->num_active_ioctls)) 57 return false; 58 59 } else { 60 abs_timeout = os_time_get_absolute_timeout(timeout); 61 62 /* Wait if any ioctl is being submitted with this buffer. */ 63 if (!os_wait_until_zero_abs_timeout(&bo->num_active_ioctls, abs_timeout)) 64 return false; 65 } 66 67 if (bo->is_shared) { 68 /* We can't use user fences for shared buffers, because user fences 69 * are local to this process only. If we want to wait for all buffer 70 * uses in all processes, we have to use amdgpu_bo_wait_for_idle. 71 */ 72 bool buffer_busy = true; 73 int r; 74 75 r = amdgpu_bo_wait_for_idle(bo->bo, timeout, &buffer_busy); 76 if (r) 77 fprintf(stderr, "%s: amdgpu_bo_wait_for_idle failed %i\n", __func__, 78 r); 79 return !buffer_busy; 80 } 81 82 if (timeout == 0) { 83 unsigned idle_fences; 84 bool buffer_idle; 85 86 pipe_mutex_lock(ws->bo_fence_lock); 87 88 for (idle_fences = 0; idle_fences < bo->num_fences; ++idle_fences) { 89 if (!amdgpu_fence_wait(bo->fences[idle_fences], 0, false)) 90 break; 91 } 92 93 /* Release the idle fences to avoid checking them again later. */ 94 for (unsigned i = 0; i < idle_fences; ++i) 95 amdgpu_fence_reference(&bo->fences[i], NULL); 96 97 memmove(&bo->fences[0], &bo->fences[idle_fences], 98 (bo->num_fences - idle_fences) * sizeof(*bo->fences)); 99 bo->num_fences -= idle_fences; 100 101 buffer_idle = !bo->num_fences; 102 pipe_mutex_unlock(ws->bo_fence_lock); 103 104 return buffer_idle; 105 } else { 106 bool buffer_idle = true; 107 108 pipe_mutex_lock(ws->bo_fence_lock); 109 while (bo->num_fences && buffer_idle) { 110 struct pipe_fence_handle *fence = NULL; 111 bool fence_idle = false; 112 113 amdgpu_fence_reference(&fence, bo->fences[0]); 114 115 /* Wait for the fence. */ 116 pipe_mutex_unlock(ws->bo_fence_lock); 117 if (amdgpu_fence_wait(fence, abs_timeout, true)) 118 fence_idle = true; 119 else 120 buffer_idle = false; 121 pipe_mutex_lock(ws->bo_fence_lock); 122 123 /* Release an idle fence to avoid checking it again later, keeping in 124 * mind that the fence array may have been modified by other threads. 125 */ 126 if (fence_idle && bo->num_fences && bo->fences[0] == fence) { 127 amdgpu_fence_reference(&bo->fences[0], NULL); 128 memmove(&bo->fences[0], &bo->fences[1], 129 (bo->num_fences - 1) * sizeof(*bo->fences)); 130 bo->num_fences--; 131 } 132 133 amdgpu_fence_reference(&fence, NULL); 134 } 135 pipe_mutex_unlock(ws->bo_fence_lock); 136 137 return buffer_idle; 138 } 139 } 140 141 static enum radeon_bo_domain amdgpu_bo_get_initial_domain( 142 struct pb_buffer *buf) 143 { 144 return ((struct amdgpu_winsys_bo*)buf)->initial_domain; 145 } 146 147 static void amdgpu_bo_remove_fences(struct amdgpu_winsys_bo *bo) 148 { 149 for (unsigned i = 0; i < bo->num_fences; ++i) 150 amdgpu_fence_reference(&bo->fences[i], NULL); 151 152 FREE(bo->fences); 153 bo->num_fences = 0; 154 bo->max_fences = 0; 155 } 156 157 void amdgpu_bo_destroy(struct pb_buffer *_buf) 158 { 159 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 160 161 assert(bo->bo && "must not be called for slab entries"); 162 163 pipe_mutex_lock(bo->ws->global_bo_list_lock); 164 LIST_DEL(&bo->u.real.global_list_item); 165 bo->ws->num_buffers--; 166 pipe_mutex_unlock(bo->ws->global_bo_list_lock); 167 168 amdgpu_bo_va_op(bo->bo, 0, bo->base.size, bo->va, 0, AMDGPU_VA_OP_UNMAP); 169 amdgpu_va_range_free(bo->u.real.va_handle); 170 amdgpu_bo_free(bo->bo); 171 172 amdgpu_bo_remove_fences(bo); 173 174 if (bo->initial_domain & RADEON_DOMAIN_VRAM) 175 bo->ws->allocated_vram -= align64(bo->base.size, bo->ws->info.gart_page_size); 176 else if (bo->initial_domain & RADEON_DOMAIN_GTT) 177 bo->ws->allocated_gtt -= align64(bo->base.size, bo->ws->info.gart_page_size); 178 179 if (bo->u.real.map_count >= 1) { 180 if (bo->initial_domain & RADEON_DOMAIN_VRAM) 181 bo->ws->mapped_vram -= bo->base.size; 182 else if (bo->initial_domain & RADEON_DOMAIN_GTT) 183 bo->ws->mapped_gtt -= bo->base.size; 184 } 185 186 FREE(bo); 187 } 188 189 static void amdgpu_bo_destroy_or_cache(struct pb_buffer *_buf) 190 { 191 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 192 193 assert(bo->bo); /* slab buffers have a separate vtbl */ 194 195 if (bo->u.real.use_reusable_pool) 196 pb_cache_add_buffer(&bo->u.real.cache_entry); 197 else 198 amdgpu_bo_destroy(_buf); 199 } 200 201 static void *amdgpu_bo_map(struct pb_buffer *buf, 202 struct radeon_winsys_cs *rcs, 203 enum pipe_transfer_usage usage) 204 { 205 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 206 struct amdgpu_winsys_bo *real; 207 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs; 208 int r; 209 void *cpu = NULL; 210 uint64_t offset = 0; 211 212 /* If it's not unsynchronized bo_map, flush CS if needed and then wait. */ 213 if (!(usage & PIPE_TRANSFER_UNSYNCHRONIZED)) { 214 /* DONTBLOCK doesn't make sense with UNSYNCHRONIZED. */ 215 if (usage & PIPE_TRANSFER_DONTBLOCK) { 216 if (!(usage & PIPE_TRANSFER_WRITE)) { 217 /* Mapping for read. 218 * 219 * Since we are mapping for read, we don't need to wait 220 * if the GPU is using the buffer for read too 221 * (neither one is changing it). 222 * 223 * Only check whether the buffer is being used for write. */ 224 if (cs && amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, 225 RADEON_USAGE_WRITE)) { 226 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL); 227 return NULL; 228 } 229 230 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0, 231 RADEON_USAGE_WRITE)) { 232 return NULL; 233 } 234 } else { 235 if (cs && amdgpu_bo_is_referenced_by_cs(cs, bo)) { 236 cs->flush_cs(cs->flush_data, RADEON_FLUSH_ASYNC, NULL); 237 return NULL; 238 } 239 240 if (!amdgpu_bo_wait((struct pb_buffer*)bo, 0, 241 RADEON_USAGE_READWRITE)) { 242 return NULL; 243 } 244 } 245 } else { 246 uint64_t time = os_time_get_nano(); 247 248 if (!(usage & PIPE_TRANSFER_WRITE)) { 249 /* Mapping for read. 250 * 251 * Since we are mapping for read, we don't need to wait 252 * if the GPU is using the buffer for read too 253 * (neither one is changing it). 254 * 255 * Only check whether the buffer is being used for write. */ 256 if (cs) { 257 if (amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, 258 RADEON_USAGE_WRITE)) { 259 cs->flush_cs(cs->flush_data, 0, NULL); 260 } else { 261 /* Try to avoid busy-waiting in amdgpu_bo_wait. */ 262 if (p_atomic_read(&bo->num_active_ioctls)) 263 amdgpu_cs_sync_flush(rcs); 264 } 265 } 266 267 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, 268 RADEON_USAGE_WRITE); 269 } else { 270 /* Mapping for write. */ 271 if (cs) { 272 if (amdgpu_bo_is_referenced_by_cs(cs, bo)) { 273 cs->flush_cs(cs->flush_data, 0, NULL); 274 } else { 275 /* Try to avoid busy-waiting in amdgpu_bo_wait. */ 276 if (p_atomic_read(&bo->num_active_ioctls)) 277 amdgpu_cs_sync_flush(rcs); 278 } 279 } 280 281 amdgpu_bo_wait((struct pb_buffer*)bo, PIPE_TIMEOUT_INFINITE, 282 RADEON_USAGE_READWRITE); 283 } 284 285 bo->ws->buffer_wait_time += os_time_get_nano() - time; 286 } 287 } 288 289 /* If the buffer is created from user memory, return the user pointer. */ 290 if (bo->user_ptr) 291 return bo->user_ptr; 292 293 if (bo->bo) { 294 real = bo; 295 } else { 296 real = bo->u.slab.real; 297 offset = bo->va - real->va; 298 } 299 300 r = amdgpu_bo_cpu_map(real->bo, &cpu); 301 if (r) { 302 /* Clear the cache and try again. */ 303 pb_cache_release_all_buffers(&real->ws->bo_cache); 304 r = amdgpu_bo_cpu_map(real->bo, &cpu); 305 if (r) 306 return NULL; 307 } 308 309 if (p_atomic_inc_return(&real->u.real.map_count) == 1) { 310 if (real->initial_domain & RADEON_DOMAIN_VRAM) 311 real->ws->mapped_vram += real->base.size; 312 else if (real->initial_domain & RADEON_DOMAIN_GTT) 313 real->ws->mapped_gtt += real->base.size; 314 } 315 return (uint8_t*)cpu + offset; 316 } 317 318 static void amdgpu_bo_unmap(struct pb_buffer *buf) 319 { 320 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 321 struct amdgpu_winsys_bo *real; 322 323 if (bo->user_ptr) 324 return; 325 326 real = bo->bo ? bo : bo->u.slab.real; 327 328 if (p_atomic_dec_zero(&real->u.real.map_count)) { 329 if (real->initial_domain & RADEON_DOMAIN_VRAM) 330 real->ws->mapped_vram -= real->base.size; 331 else if (real->initial_domain & RADEON_DOMAIN_GTT) 332 real->ws->mapped_gtt -= real->base.size; 333 } 334 335 amdgpu_bo_cpu_unmap(real->bo); 336 } 337 338 static const struct pb_vtbl amdgpu_winsys_bo_vtbl = { 339 amdgpu_bo_destroy_or_cache 340 /* other functions are never called */ 341 }; 342 343 static void amdgpu_add_buffer_to_global_list(struct amdgpu_winsys_bo *bo) 344 { 345 struct amdgpu_winsys *ws = bo->ws; 346 347 assert(bo->bo); 348 349 pipe_mutex_lock(ws->global_bo_list_lock); 350 LIST_ADDTAIL(&bo->u.real.global_list_item, &ws->global_bo_list); 351 ws->num_buffers++; 352 pipe_mutex_unlock(ws->global_bo_list_lock); 353 } 354 355 static struct amdgpu_winsys_bo *amdgpu_create_bo(struct amdgpu_winsys *ws, 356 uint64_t size, 357 unsigned alignment, 358 unsigned usage, 359 enum radeon_bo_domain initial_domain, 360 unsigned flags, 361 unsigned pb_cache_bucket) 362 { 363 struct amdgpu_bo_alloc_request request = {0}; 364 amdgpu_bo_handle buf_handle; 365 uint64_t va = 0; 366 struct amdgpu_winsys_bo *bo; 367 amdgpu_va_handle va_handle; 368 unsigned va_gap_size; 369 int r; 370 371 assert(initial_domain & RADEON_DOMAIN_VRAM_GTT); 372 bo = CALLOC_STRUCT(amdgpu_winsys_bo); 373 if (!bo) { 374 return NULL; 375 } 376 377 pb_cache_init_entry(&ws->bo_cache, &bo->u.real.cache_entry, &bo->base, 378 pb_cache_bucket); 379 request.alloc_size = size; 380 request.phys_alignment = alignment; 381 382 if (initial_domain & RADEON_DOMAIN_VRAM) 383 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; 384 if (initial_domain & RADEON_DOMAIN_GTT) 385 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; 386 387 if (flags & RADEON_FLAG_CPU_ACCESS) 388 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 389 if (flags & RADEON_FLAG_NO_CPU_ACCESS) 390 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 391 if (flags & RADEON_FLAG_GTT_WC) 392 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; 393 394 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); 395 if (r) { 396 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); 397 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); 398 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); 399 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); 400 goto error_bo_alloc; 401 } 402 403 va_gap_size = ws->check_vm ? MAX2(4 * alignment, 64 * 1024) : 0; 404 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 405 size + va_gap_size, alignment, 0, &va, &va_handle, 0); 406 if (r) 407 goto error_va_alloc; 408 409 r = amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP); 410 if (r) 411 goto error_va_map; 412 413 pipe_reference_init(&bo->base.reference, 1); 414 bo->base.alignment = alignment; 415 bo->base.usage = usage; 416 bo->base.size = size; 417 bo->base.vtbl = &amdgpu_winsys_bo_vtbl; 418 bo->ws = ws; 419 bo->bo = buf_handle; 420 bo->va = va; 421 bo->u.real.va_handle = va_handle; 422 bo->initial_domain = initial_domain; 423 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); 424 425 if (initial_domain & RADEON_DOMAIN_VRAM) 426 ws->allocated_vram += align64(size, ws->info.gart_page_size); 427 else if (initial_domain & RADEON_DOMAIN_GTT) 428 ws->allocated_gtt += align64(size, ws->info.gart_page_size); 429 430 amdgpu_add_buffer_to_global_list(bo); 431 432 return bo; 433 434 error_va_map: 435 amdgpu_va_range_free(va_handle); 436 437 error_va_alloc: 438 amdgpu_bo_free(buf_handle); 439 440 error_bo_alloc: 441 FREE(bo); 442 return NULL; 443 } 444 445 bool amdgpu_bo_can_reclaim(struct pb_buffer *_buf) 446 { 447 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 448 449 if (amdgpu_bo_is_referenced_by_any_cs(bo)) { 450 return false; 451 } 452 453 return amdgpu_bo_wait(_buf, 0, RADEON_USAGE_READWRITE); 454 } 455 456 bool amdgpu_bo_can_reclaim_slab(void *priv, struct pb_slab_entry *entry) 457 { 458 struct amdgpu_winsys_bo *bo = NULL; /* fix container_of */ 459 bo = container_of(entry, bo, u.slab.entry); 460 461 return amdgpu_bo_can_reclaim(&bo->base); 462 } 463 464 static void amdgpu_bo_slab_destroy(struct pb_buffer *_buf) 465 { 466 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 467 468 assert(!bo->bo); 469 470 pb_slab_free(&bo->ws->bo_slabs, &bo->u.slab.entry); 471 } 472 473 static const struct pb_vtbl amdgpu_winsys_bo_slab_vtbl = { 474 amdgpu_bo_slab_destroy 475 /* other functions are never called */ 476 }; 477 478 struct pb_slab *amdgpu_bo_slab_alloc(void *priv, unsigned heap, 479 unsigned entry_size, 480 unsigned group_index) 481 { 482 struct amdgpu_winsys *ws = priv; 483 struct amdgpu_slab *slab = CALLOC_STRUCT(amdgpu_slab); 484 enum radeon_bo_domain domains; 485 enum radeon_bo_flag flags = 0; 486 uint32_t base_id; 487 488 if (!slab) 489 return NULL; 490 491 if (heap & 1) 492 flags |= RADEON_FLAG_GTT_WC; 493 if (heap & 2) 494 flags |= RADEON_FLAG_CPU_ACCESS; 495 496 switch (heap >> 2) { 497 case 0: 498 domains = RADEON_DOMAIN_VRAM; 499 break; 500 default: 501 case 1: 502 domains = RADEON_DOMAIN_VRAM_GTT; 503 break; 504 case 2: 505 domains = RADEON_DOMAIN_GTT; 506 break; 507 } 508 509 slab->buffer = amdgpu_winsys_bo(amdgpu_bo_create(&ws->base, 510 64 * 1024, 64 * 1024, 511 domains, flags)); 512 if (!slab->buffer) 513 goto fail; 514 515 assert(slab->buffer->bo); 516 517 slab->base.num_entries = slab->buffer->base.size / entry_size; 518 slab->base.num_free = slab->base.num_entries; 519 slab->entries = CALLOC(slab->base.num_entries, sizeof(*slab->entries)); 520 if (!slab->entries) 521 goto fail_buffer; 522 523 LIST_INITHEAD(&slab->base.free); 524 525 base_id = __sync_fetch_and_add(&ws->next_bo_unique_id, slab->base.num_entries); 526 527 for (unsigned i = 0; i < slab->base.num_entries; ++i) { 528 struct amdgpu_winsys_bo *bo = &slab->entries[i]; 529 530 bo->base.alignment = entry_size; 531 bo->base.usage = slab->buffer->base.usage; 532 bo->base.size = entry_size; 533 bo->base.vtbl = &amdgpu_winsys_bo_slab_vtbl; 534 bo->ws = ws; 535 bo->va = slab->buffer->va + i * entry_size; 536 bo->initial_domain = domains; 537 bo->unique_id = base_id + i; 538 bo->u.slab.entry.slab = &slab->base; 539 bo->u.slab.entry.group_index = group_index; 540 bo->u.slab.real = slab->buffer; 541 542 LIST_ADDTAIL(&bo->u.slab.entry.head, &slab->base.free); 543 } 544 545 return &slab->base; 546 547 fail_buffer: 548 amdgpu_winsys_bo_reference(&slab->buffer, NULL); 549 fail: 550 FREE(slab); 551 return NULL; 552 } 553 554 void amdgpu_bo_slab_free(void *priv, struct pb_slab *pslab) 555 { 556 struct amdgpu_slab *slab = amdgpu_slab(pslab); 557 558 for (unsigned i = 0; i < slab->base.num_entries; ++i) 559 amdgpu_bo_remove_fences(&slab->entries[i]); 560 561 FREE(slab->entries); 562 amdgpu_winsys_bo_reference(&slab->buffer, NULL); 563 FREE(slab); 564 } 565 566 static unsigned eg_tile_split(unsigned tile_split) 567 { 568 switch (tile_split) { 569 case 0: tile_split = 64; break; 570 case 1: tile_split = 128; break; 571 case 2: tile_split = 256; break; 572 case 3: tile_split = 512; break; 573 default: 574 case 4: tile_split = 1024; break; 575 case 5: tile_split = 2048; break; 576 case 6: tile_split = 4096; break; 577 } 578 return tile_split; 579 } 580 581 static unsigned eg_tile_split_rev(unsigned eg_tile_split) 582 { 583 switch (eg_tile_split) { 584 case 64: return 0; 585 case 128: return 1; 586 case 256: return 2; 587 case 512: return 3; 588 default: 589 case 1024: return 4; 590 case 2048: return 5; 591 case 4096: return 6; 592 } 593 } 594 595 static void amdgpu_buffer_get_metadata(struct pb_buffer *_buf, 596 struct radeon_bo_metadata *md) 597 { 598 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 599 struct amdgpu_bo_info info = {0}; 600 uint32_t tiling_flags; 601 int r; 602 603 assert(bo->bo && "must not be called for slab entries"); 604 605 r = amdgpu_bo_query_info(bo->bo, &info); 606 if (r) 607 return; 608 609 tiling_flags = info.metadata.tiling_info; 610 611 md->microtile = RADEON_LAYOUT_LINEAR; 612 md->macrotile = RADEON_LAYOUT_LINEAR; 613 614 if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 4) /* 2D_TILED_THIN1 */ 615 md->macrotile = RADEON_LAYOUT_TILED; 616 else if (AMDGPU_TILING_GET(tiling_flags, ARRAY_MODE) == 2) /* 1D_TILED_THIN1 */ 617 md->microtile = RADEON_LAYOUT_TILED; 618 619 md->pipe_config = AMDGPU_TILING_GET(tiling_flags, PIPE_CONFIG); 620 md->bankw = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_WIDTH); 621 md->bankh = 1 << AMDGPU_TILING_GET(tiling_flags, BANK_HEIGHT); 622 md->tile_split = eg_tile_split(AMDGPU_TILING_GET(tiling_flags, TILE_SPLIT)); 623 md->mtilea = 1 << AMDGPU_TILING_GET(tiling_flags, MACRO_TILE_ASPECT); 624 md->num_banks = 2 << AMDGPU_TILING_GET(tiling_flags, NUM_BANKS); 625 md->scanout = AMDGPU_TILING_GET(tiling_flags, MICRO_TILE_MODE) == 0; /* DISPLAY */ 626 627 md->size_metadata = info.metadata.size_metadata; 628 memcpy(md->metadata, info.metadata.umd_metadata, sizeof(md->metadata)); 629 } 630 631 static void amdgpu_buffer_set_metadata(struct pb_buffer *_buf, 632 struct radeon_bo_metadata *md) 633 { 634 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(_buf); 635 struct amdgpu_bo_metadata metadata = {0}; 636 uint32_t tiling_flags = 0; 637 638 assert(bo->bo && "must not be called for slab entries"); 639 640 if (md->macrotile == RADEON_LAYOUT_TILED) 641 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ 642 else if (md->microtile == RADEON_LAYOUT_TILED) 643 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ 644 else 645 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ 646 647 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->pipe_config); 648 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->bankw)); 649 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->bankh)); 650 if (md->tile_split) 651 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, eg_tile_split_rev(md->tile_split)); 652 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->mtilea)); 653 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->num_banks)-1); 654 655 if (md->scanout) 656 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ 657 else 658 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ 659 660 metadata.tiling_info = tiling_flags; 661 metadata.size_metadata = md->size_metadata; 662 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata)); 663 664 amdgpu_bo_set_metadata(bo->bo, &metadata); 665 } 666 667 static struct pb_buffer * 668 amdgpu_bo_create(struct radeon_winsys *rws, 669 uint64_t size, 670 unsigned alignment, 671 enum radeon_bo_domain domain, 672 enum radeon_bo_flag flags) 673 { 674 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 675 struct amdgpu_winsys_bo *bo; 676 unsigned usage = 0, pb_cache_bucket; 677 678 /* Sub-allocate small buffers from slabs. */ 679 if (!(flags & RADEON_FLAG_HANDLE) && 680 size <= (1 << AMDGPU_SLAB_MAX_SIZE_LOG2) && 681 alignment <= MAX2(1 << AMDGPU_SLAB_MIN_SIZE_LOG2, util_next_power_of_two(size))) { 682 struct pb_slab_entry *entry; 683 unsigned heap = 0; 684 685 if (flags & RADEON_FLAG_GTT_WC) 686 heap |= 1; 687 if (flags & RADEON_FLAG_CPU_ACCESS) 688 heap |= 2; 689 if (flags & ~(RADEON_FLAG_GTT_WC | RADEON_FLAG_CPU_ACCESS)) 690 goto no_slab; 691 692 switch (domain) { 693 case RADEON_DOMAIN_VRAM: 694 heap |= 0 * 4; 695 break; 696 case RADEON_DOMAIN_VRAM_GTT: 697 heap |= 1 * 4; 698 break; 699 case RADEON_DOMAIN_GTT: 700 heap |= 2 * 4; 701 break; 702 default: 703 goto no_slab; 704 } 705 706 entry = pb_slab_alloc(&ws->bo_slabs, size, heap); 707 if (!entry) { 708 /* Clear the cache and try again. */ 709 pb_cache_release_all_buffers(&ws->bo_cache); 710 711 entry = pb_slab_alloc(&ws->bo_slabs, size, heap); 712 } 713 if (!entry) 714 return NULL; 715 716 bo = NULL; 717 bo = container_of(entry, bo, u.slab.entry); 718 719 pipe_reference_init(&bo->base.reference, 1); 720 721 return &bo->base; 722 } 723 no_slab: 724 725 /* This flag is irrelevant for the cache. */ 726 flags &= ~RADEON_FLAG_HANDLE; 727 728 /* Align size to page size. This is the minimum alignment for normal 729 * BOs. Aligning this here helps the cached bufmgr. Especially small BOs, 730 * like constant/uniform buffers, can benefit from better and more reuse. 731 */ 732 size = align64(size, ws->info.gart_page_size); 733 alignment = align(alignment, ws->info.gart_page_size); 734 735 /* Only set one usage bit each for domains and flags, or the cache manager 736 * might consider different sets of domains / flags compatible 737 */ 738 if (domain == RADEON_DOMAIN_VRAM_GTT) 739 usage = 1 << 2; 740 else 741 usage = domain >> 1; 742 assert(flags < sizeof(usage) * 8 - 3); 743 usage |= 1 << (flags + 3); 744 745 /* Determine the pb_cache bucket for minimizing pb_cache misses. */ 746 pb_cache_bucket = 0; 747 if (domain & RADEON_DOMAIN_VRAM) /* VRAM or VRAM+GTT */ 748 pb_cache_bucket += 1; 749 if (flags == RADEON_FLAG_GTT_WC) /* WC */ 750 pb_cache_bucket += 2; 751 assert(pb_cache_bucket < ARRAY_SIZE(ws->bo_cache.buckets)); 752 753 /* Get a buffer from the cache. */ 754 bo = (struct amdgpu_winsys_bo*) 755 pb_cache_reclaim_buffer(&ws->bo_cache, size, alignment, usage, 756 pb_cache_bucket); 757 if (bo) 758 return &bo->base; 759 760 /* Create a new one. */ 761 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags, 762 pb_cache_bucket); 763 if (!bo) { 764 /* Clear the cache and try again. */ 765 pb_slabs_reclaim(&ws->bo_slabs); 766 pb_cache_release_all_buffers(&ws->bo_cache); 767 bo = amdgpu_create_bo(ws, size, alignment, usage, domain, flags, 768 pb_cache_bucket); 769 if (!bo) 770 return NULL; 771 } 772 773 bo->u.real.use_reusable_pool = true; 774 return &bo->base; 775 } 776 777 static struct pb_buffer *amdgpu_bo_from_handle(struct radeon_winsys *rws, 778 struct winsys_handle *whandle, 779 unsigned *stride, 780 unsigned *offset) 781 { 782 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 783 struct amdgpu_winsys_bo *bo; 784 enum amdgpu_bo_handle_type type; 785 struct amdgpu_bo_import_result result = {0}; 786 uint64_t va; 787 amdgpu_va_handle va_handle; 788 struct amdgpu_bo_info info = {0}; 789 enum radeon_bo_domain initial = 0; 790 int r; 791 792 /* Initialize the structure. */ 793 bo = CALLOC_STRUCT(amdgpu_winsys_bo); 794 if (!bo) { 795 return NULL; 796 } 797 798 switch (whandle->type) { 799 case DRM_API_HANDLE_TYPE_SHARED: 800 type = amdgpu_bo_handle_type_gem_flink_name; 801 break; 802 case DRM_API_HANDLE_TYPE_FD: 803 type = amdgpu_bo_handle_type_dma_buf_fd; 804 break; 805 default: 806 return NULL; 807 } 808 809 r = amdgpu_bo_import(ws->dev, type, whandle->handle, &result); 810 if (r) 811 goto error; 812 813 /* Get initial domains. */ 814 r = amdgpu_bo_query_info(result.buf_handle, &info); 815 if (r) 816 goto error_query; 817 818 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 819 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0); 820 if (r) 821 goto error_query; 822 823 r = amdgpu_bo_va_op(result.buf_handle, 0, result.alloc_size, va, 0, AMDGPU_VA_OP_MAP); 824 if (r) 825 goto error_va_map; 826 827 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM) 828 initial |= RADEON_DOMAIN_VRAM; 829 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT) 830 initial |= RADEON_DOMAIN_GTT; 831 832 833 pipe_reference_init(&bo->base.reference, 1); 834 bo->base.alignment = info.phys_alignment; 835 bo->bo = result.buf_handle; 836 bo->base.size = result.alloc_size; 837 bo->base.vtbl = &amdgpu_winsys_bo_vtbl; 838 bo->ws = ws; 839 bo->va = va; 840 bo->u.real.va_handle = va_handle; 841 bo->initial_domain = initial; 842 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); 843 bo->is_shared = true; 844 845 if (stride) 846 *stride = whandle->stride; 847 if (offset) 848 *offset = whandle->offset; 849 850 if (bo->initial_domain & RADEON_DOMAIN_VRAM) 851 ws->allocated_vram += align64(bo->base.size, ws->info.gart_page_size); 852 else if (bo->initial_domain & RADEON_DOMAIN_GTT) 853 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size); 854 855 amdgpu_add_buffer_to_global_list(bo); 856 857 return &bo->base; 858 859 error_va_map: 860 amdgpu_va_range_free(va_handle); 861 862 error_query: 863 amdgpu_bo_free(result.buf_handle); 864 865 error: 866 FREE(bo); 867 return NULL; 868 } 869 870 static bool amdgpu_bo_get_handle(struct pb_buffer *buffer, 871 unsigned stride, unsigned offset, 872 unsigned slice_size, 873 struct winsys_handle *whandle) 874 { 875 struct amdgpu_winsys_bo *bo = amdgpu_winsys_bo(buffer); 876 enum amdgpu_bo_handle_type type; 877 int r; 878 879 if (!bo->bo) { 880 offset += bo->va - bo->u.slab.real->va; 881 bo = bo->u.slab.real; 882 } 883 884 bo->u.real.use_reusable_pool = false; 885 886 switch (whandle->type) { 887 case DRM_API_HANDLE_TYPE_SHARED: 888 type = amdgpu_bo_handle_type_gem_flink_name; 889 break; 890 case DRM_API_HANDLE_TYPE_FD: 891 type = amdgpu_bo_handle_type_dma_buf_fd; 892 break; 893 case DRM_API_HANDLE_TYPE_KMS: 894 type = amdgpu_bo_handle_type_kms; 895 break; 896 default: 897 return false; 898 } 899 900 r = amdgpu_bo_export(bo->bo, type, &whandle->handle); 901 if (r) 902 return false; 903 904 whandle->stride = stride; 905 whandle->offset = offset; 906 whandle->offset += slice_size * whandle->layer; 907 bo->is_shared = true; 908 return true; 909 } 910 911 static struct pb_buffer *amdgpu_bo_from_ptr(struct radeon_winsys *rws, 912 void *pointer, uint64_t size) 913 { 914 struct amdgpu_winsys *ws = amdgpu_winsys(rws); 915 amdgpu_bo_handle buf_handle; 916 struct amdgpu_winsys_bo *bo; 917 uint64_t va; 918 amdgpu_va_handle va_handle; 919 920 bo = CALLOC_STRUCT(amdgpu_winsys_bo); 921 if (!bo) 922 return NULL; 923 924 if (amdgpu_create_bo_from_user_mem(ws->dev, pointer, size, &buf_handle)) 925 goto error; 926 927 if (amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 928 size, 1 << 12, 0, &va, &va_handle, 0)) 929 goto error_va_alloc; 930 931 if (amdgpu_bo_va_op(buf_handle, 0, size, va, 0, AMDGPU_VA_OP_MAP)) 932 goto error_va_map; 933 934 /* Initialize it. */ 935 pipe_reference_init(&bo->base.reference, 1); 936 bo->bo = buf_handle; 937 bo->base.alignment = 0; 938 bo->base.size = size; 939 bo->base.vtbl = &amdgpu_winsys_bo_vtbl; 940 bo->ws = ws; 941 bo->user_ptr = pointer; 942 bo->va = va; 943 bo->u.real.va_handle = va_handle; 944 bo->initial_domain = RADEON_DOMAIN_GTT; 945 bo->unique_id = __sync_fetch_and_add(&ws->next_bo_unique_id, 1); 946 947 ws->allocated_gtt += align64(bo->base.size, ws->info.gart_page_size); 948 949 amdgpu_add_buffer_to_global_list(bo); 950 951 return (struct pb_buffer*)bo; 952 953 error_va_map: 954 amdgpu_va_range_free(va_handle); 955 956 error_va_alloc: 957 amdgpu_bo_free(buf_handle); 958 959 error: 960 FREE(bo); 961 return NULL; 962 } 963 964 static bool amdgpu_bo_is_user_ptr(struct pb_buffer *buf) 965 { 966 return ((struct amdgpu_winsys_bo*)buf)->user_ptr != NULL; 967 } 968 969 static uint64_t amdgpu_bo_get_va(struct pb_buffer *buf) 970 { 971 return ((struct amdgpu_winsys_bo*)buf)->va; 972 } 973 974 void amdgpu_bo_init_functions(struct amdgpu_winsys *ws) 975 { 976 ws->base.buffer_set_metadata = amdgpu_buffer_set_metadata; 977 ws->base.buffer_get_metadata = amdgpu_buffer_get_metadata; 978 ws->base.buffer_map = amdgpu_bo_map; 979 ws->base.buffer_unmap = amdgpu_bo_unmap; 980 ws->base.buffer_wait = amdgpu_bo_wait; 981 ws->base.buffer_create = amdgpu_bo_create; 982 ws->base.buffer_from_handle = amdgpu_bo_from_handle; 983 ws->base.buffer_from_ptr = amdgpu_bo_from_ptr; 984 ws->base.buffer_is_user_ptr = amdgpu_bo_is_user_ptr; 985 ws->base.buffer_get_handle = amdgpu_bo_get_handle; 986 ws->base.buffer_get_virtual_address = amdgpu_bo_get_va; 987 ws->base.buffer_get_initial_domain = amdgpu_bo_get_initial_domain; 988 } 989