1 /* 2 * Copyright 2016 Red Hat. 3 * Copyright 2016 Bas Nieuwenhuizen 4 * 5 * based on amdgpu winsys. 6 * Copyright 2011 Marek Olk <maraeo (at) gmail.com> 7 * Copyright 2015 Advanced Micro Devices, Inc. 8 * 9 * Permission is hereby granted, free of charge, to any person obtaining a 10 * copy of this software and associated documentation files (the "Software"), 11 * to deal in the Software without restriction, including without limitation 12 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 13 * and/or sell copies of the Software, and to permit persons to whom the 14 * Software is furnished to do so, subject to the following conditions: 15 * 16 * The above copyright notice and this permission notice (including the next 17 * paragraph) shall be included in all copies or substantial portions of the 18 * Software. 19 * 20 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 21 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 22 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 23 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 24 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 25 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 26 * IN THE SOFTWARE. 27 */ 28 29 #include <stdio.h> 30 31 #include "radv_amdgpu_bo.h" 32 33 #include <amdgpu.h> 34 #include <amdgpu_drm.h> 35 #include <inttypes.h> 36 37 #include "util/u_atomic.h" 38 39 40 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo); 41 42 static int 43 radv_amdgpu_bo_va_op(struct radv_amdgpu_winsys *ws, 44 amdgpu_bo_handle bo, 45 uint64_t offset, 46 uint64_t size, 47 uint64_t addr, 48 uint32_t bo_flags, 49 uint32_t ops) 50 { 51 uint64_t flags = AMDGPU_VM_PAGE_READABLE | 52 AMDGPU_VM_PAGE_EXECUTABLE; 53 54 if ((bo_flags & RADEON_FLAG_VA_UNCACHED) && ws->info.chip_class >= GFX9) 55 flags |= AMDGPU_VM_MTYPE_UC; 56 57 if (!(bo_flags & RADEON_FLAG_READ_ONLY)) 58 flags |= AMDGPU_VM_PAGE_WRITEABLE; 59 60 size = ALIGN(size, getpagesize()); 61 62 return amdgpu_bo_va_op_raw(ws->dev, bo, offset, size, addr, 63 flags, ops); 64 } 65 66 static void 67 radv_amdgpu_winsys_virtual_map(struct radv_amdgpu_winsys_bo *bo, 68 const struct radv_amdgpu_map_range *range) 69 { 70 assert(range->size); 71 72 if (!range->bo) 73 return; /* TODO: PRT mapping */ 74 75 p_atomic_inc(&range->bo->ref_count); 76 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset, 77 range->size, range->offset + bo->base.va, 78 0, AMDGPU_VA_OP_MAP); 79 if (r) 80 abort(); 81 } 82 83 static void 84 radv_amdgpu_winsys_virtual_unmap(struct radv_amdgpu_winsys_bo *bo, 85 const struct radv_amdgpu_map_range *range) 86 { 87 assert(range->size); 88 89 if (!range->bo) 90 return; /* TODO: PRT mapping */ 91 92 int r = radv_amdgpu_bo_va_op(bo->ws, range->bo->bo, range->bo_offset, 93 range->size, range->offset + bo->base.va, 94 0, AMDGPU_VA_OP_UNMAP); 95 if (r) 96 abort(); 97 radv_amdgpu_winsys_bo_destroy((struct radeon_winsys_bo *)range->bo); 98 } 99 100 static int bo_comparator(const void *ap, const void *bp) { 101 struct radv_amdgpu_bo *a = *(struct radv_amdgpu_bo *const *)ap; 102 struct radv_amdgpu_bo *b = *(struct radv_amdgpu_bo *const *)bp; 103 return (a > b) ? 1 : (a < b) ? -1 : 0; 104 } 105 106 static void 107 radv_amdgpu_winsys_rebuild_bo_list(struct radv_amdgpu_winsys_bo *bo) 108 { 109 if (bo->bo_capacity < bo->range_count) { 110 uint32_t new_count = MAX2(bo->bo_capacity * 2, bo->range_count); 111 bo->bos = realloc(bo->bos, new_count * sizeof(struct radv_amdgpu_winsys_bo *)); 112 bo->bo_capacity = new_count; 113 } 114 115 uint32_t temp_bo_count = 0; 116 for (uint32_t i = 0; i < bo->range_count; ++i) 117 if (bo->ranges[i].bo) 118 bo->bos[temp_bo_count++] = bo->ranges[i].bo; 119 120 qsort(bo->bos, temp_bo_count, sizeof(struct radv_amdgpu_winsys_bo *), &bo_comparator); 121 122 uint32_t final_bo_count = 1; 123 for (uint32_t i = 1; i < temp_bo_count; ++i) 124 if (bo->bos[i] != bo->bos[i - 1]) 125 bo->bos[final_bo_count++] = bo->bos[i]; 126 127 bo->bo_count = final_bo_count; 128 } 129 130 static void 131 radv_amdgpu_winsys_bo_virtual_bind(struct radeon_winsys_bo *_parent, 132 uint64_t offset, uint64_t size, 133 struct radeon_winsys_bo *_bo, uint64_t bo_offset) 134 { 135 struct radv_amdgpu_winsys_bo *parent = (struct radv_amdgpu_winsys_bo *)_parent; 136 struct radv_amdgpu_winsys_bo *bo = (struct radv_amdgpu_winsys_bo*)_bo; 137 int range_count_delta, new_idx; 138 int first = 0, last; 139 struct radv_amdgpu_map_range new_first, new_last; 140 141 assert(parent->is_virtual); 142 assert(!bo || !bo->is_virtual); 143 144 if (!size) 145 return; 146 147 /* We have at most 2 new ranges (1 by the bind, and another one by splitting a range that contains the newly bound range). */ 148 if (parent->range_capacity - parent->range_count < 2) { 149 parent->range_capacity += 2; 150 parent->ranges = realloc(parent->ranges, 151 parent->range_capacity * sizeof(struct radv_amdgpu_map_range)); 152 } 153 154 /* 155 * [first, last] is exactly the range of ranges that either overlap the 156 * new parent, or are adjacent to it. This corresponds to the bind ranges 157 * that may change. 158 */ 159 while(first + 1 < parent->range_count && parent->ranges[first].offset + parent->ranges[first].size < offset) 160 ++first; 161 162 last = first; 163 while(last + 1 < parent->range_count && parent->ranges[last].offset <= offset + size) 164 ++last; 165 166 /* Whether the first or last range are going to be totally removed or just 167 * resized/left alone. Note that in the case of first == last, we will split 168 * this into a part before and after the new range. The remove flag is then 169 * whether to not create the corresponding split part. */ 170 bool remove_first = parent->ranges[first].offset == offset; 171 bool remove_last = parent->ranges[last].offset + parent->ranges[last].size == offset + size; 172 bool unmapped_first = false; 173 174 assert(parent->ranges[first].offset <= offset); 175 assert(parent->ranges[last].offset + parent->ranges[last].size >= offset + size); 176 177 /* Try to merge the new range with the first range. */ 178 if (parent->ranges[first].bo == bo && (!bo || offset - bo_offset == parent->ranges[first].offset - parent->ranges[first].bo_offset)) { 179 size += offset - parent->ranges[first].offset; 180 offset = parent->ranges[first].offset; 181 bo_offset = parent->ranges[first].bo_offset; 182 remove_first = true; 183 } 184 185 /* Try to merge the new range with the last range. */ 186 if (parent->ranges[last].bo == bo && (!bo || offset - bo_offset == parent->ranges[last].offset - parent->ranges[last].bo_offset)) { 187 size = parent->ranges[last].offset + parent->ranges[last].size - offset; 188 remove_last = true; 189 } 190 191 range_count_delta = 1 - (last - first + 1) + !remove_first + !remove_last; 192 new_idx = first + !remove_first; 193 194 /* Any range between first and last is going to be entirely covered by the new range so just unmap them. */ 195 for (int i = first + 1; i < last; ++i) 196 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + i); 197 198 /* If the first/last range are not left alone we unmap then and optionally map 199 * them again after modifications. Not that this implicitly can do the splitting 200 * if first == last. */ 201 new_first = parent->ranges[first]; 202 new_last = parent->ranges[last]; 203 204 if (parent->ranges[first].offset + parent->ranges[first].size > offset || remove_first) { 205 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + first); 206 unmapped_first = true; 207 208 if (!remove_first) { 209 new_first.size = offset - new_first.offset; 210 radv_amdgpu_winsys_virtual_map(parent, &new_first); 211 } 212 } 213 214 if (parent->ranges[last].offset < offset + size || remove_last) { 215 if (first != last || !unmapped_first) 216 radv_amdgpu_winsys_virtual_unmap(parent, parent->ranges + last); 217 218 if (!remove_last) { 219 new_last.size -= offset + size - new_last.offset; 220 new_last.offset = offset + size; 221 radv_amdgpu_winsys_virtual_map(parent, &new_last); 222 } 223 } 224 225 /* Moves the range list after last to account for the changed number of ranges. */ 226 memmove(parent->ranges + last + 1 + range_count_delta, parent->ranges + last + 1, 227 sizeof(struct radv_amdgpu_map_range) * (parent->range_count - last - 1)); 228 229 if (!remove_first) 230 parent->ranges[first] = new_first; 231 232 if (!remove_last) 233 parent->ranges[new_idx + 1] = new_last; 234 235 /* Actually set up the new range. */ 236 parent->ranges[new_idx].offset = offset; 237 parent->ranges[new_idx].size = size; 238 parent->ranges[new_idx].bo = bo; 239 parent->ranges[new_idx].bo_offset = bo_offset; 240 241 radv_amdgpu_winsys_virtual_map(parent, parent->ranges + new_idx); 242 243 parent->range_count += range_count_delta; 244 245 radv_amdgpu_winsys_rebuild_bo_list(parent); 246 } 247 248 static void radv_amdgpu_winsys_bo_destroy(struct radeon_winsys_bo *_bo) 249 { 250 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 251 252 if (p_atomic_dec_return(&bo->ref_count)) 253 return; 254 if (bo->is_virtual) { 255 for (uint32_t i = 0; i < bo->range_count; ++i) { 256 radv_amdgpu_winsys_virtual_unmap(bo, bo->ranges + i); 257 } 258 free(bo->bos); 259 free(bo->ranges); 260 } else { 261 if (bo->ws->debug_all_bos) { 262 pthread_mutex_lock(&bo->ws->global_bo_list_lock); 263 LIST_DEL(&bo->global_list_item); 264 bo->ws->num_buffers--; 265 pthread_mutex_unlock(&bo->ws->global_bo_list_lock); 266 } 267 radv_amdgpu_bo_va_op(bo->ws, bo->bo, 0, bo->size, bo->base.va, 268 0, AMDGPU_VA_OP_UNMAP); 269 amdgpu_bo_free(bo->bo); 270 } 271 amdgpu_va_range_free(bo->va_handle); 272 FREE(bo); 273 } 274 275 static void radv_amdgpu_add_buffer_to_global_list(struct radv_amdgpu_winsys_bo *bo) 276 { 277 struct radv_amdgpu_winsys *ws = bo->ws; 278 279 if (bo->ws->debug_all_bos) { 280 pthread_mutex_lock(&ws->global_bo_list_lock); 281 LIST_ADDTAIL(&bo->global_list_item, &ws->global_bo_list); 282 ws->num_buffers++; 283 pthread_mutex_unlock(&ws->global_bo_list_lock); 284 } 285 } 286 287 static struct radeon_winsys_bo * 288 radv_amdgpu_winsys_bo_create(struct radeon_winsys *_ws, 289 uint64_t size, 290 unsigned alignment, 291 enum radeon_bo_domain initial_domain, 292 unsigned flags) 293 { 294 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); 295 struct radv_amdgpu_winsys_bo *bo; 296 struct amdgpu_bo_alloc_request request = {0}; 297 amdgpu_bo_handle buf_handle; 298 uint64_t va = 0; 299 amdgpu_va_handle va_handle; 300 int r; 301 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); 302 if (!bo) { 303 return NULL; 304 } 305 306 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 307 size, alignment, 0, &va, &va_handle, 0); 308 if (r) 309 goto error_va_alloc; 310 311 bo->base.va = va; 312 bo->va_handle = va_handle; 313 bo->size = size; 314 bo->ws = ws; 315 bo->is_virtual = !!(flags & RADEON_FLAG_VIRTUAL); 316 bo->ref_count = 1; 317 318 if (flags & RADEON_FLAG_VIRTUAL) { 319 bo->ranges = realloc(NULL, sizeof(struct radv_amdgpu_map_range)); 320 bo->range_count = 1; 321 bo->range_capacity = 1; 322 323 bo->ranges[0].offset = 0; 324 bo->ranges[0].size = size; 325 bo->ranges[0].bo = NULL; 326 bo->ranges[0].bo_offset = 0; 327 328 radv_amdgpu_winsys_virtual_map(bo, bo->ranges); 329 return (struct radeon_winsys_bo *)bo; 330 } 331 332 request.alloc_size = size; 333 request.phys_alignment = alignment; 334 335 if (initial_domain & RADEON_DOMAIN_VRAM) 336 request.preferred_heap |= AMDGPU_GEM_DOMAIN_VRAM; 337 if (initial_domain & RADEON_DOMAIN_GTT) 338 request.preferred_heap |= AMDGPU_GEM_DOMAIN_GTT; 339 340 if (flags & RADEON_FLAG_CPU_ACCESS) 341 request.flags |= AMDGPU_GEM_CREATE_CPU_ACCESS_REQUIRED; 342 if (flags & RADEON_FLAG_NO_CPU_ACCESS) 343 request.flags |= AMDGPU_GEM_CREATE_NO_CPU_ACCESS; 344 if (flags & RADEON_FLAG_GTT_WC) 345 request.flags |= AMDGPU_GEM_CREATE_CPU_GTT_USWC; 346 if (!(flags & RADEON_FLAG_IMPLICIT_SYNC) && ws->info.drm_minor >= 22) 347 request.flags |= AMDGPU_GEM_CREATE_EXPLICIT_SYNC; 348 if (flags & RADEON_FLAG_NO_INTERPROCESS_SHARING && ws->info.drm_minor >= 20 && ws->use_local_bos) { 349 bo->base.is_local = true; 350 request.flags |= AMDGPU_GEM_CREATE_VM_ALWAYS_VALID; 351 } 352 353 /* this won't do anything on pre 4.9 kernels */ 354 if (ws->zero_all_vram_allocs && (initial_domain & RADEON_DOMAIN_VRAM)) 355 request.flags |= AMDGPU_GEM_CREATE_VRAM_CLEARED; 356 r = amdgpu_bo_alloc(ws->dev, &request, &buf_handle); 357 if (r) { 358 fprintf(stderr, "amdgpu: Failed to allocate a buffer:\n"); 359 fprintf(stderr, "amdgpu: size : %"PRIu64" bytes\n", size); 360 fprintf(stderr, "amdgpu: alignment : %u bytes\n", alignment); 361 fprintf(stderr, "amdgpu: domains : %u\n", initial_domain); 362 goto error_bo_alloc; 363 } 364 365 r = radv_amdgpu_bo_va_op(ws, buf_handle, 0, size, va, flags, 366 AMDGPU_VA_OP_MAP); 367 if (r) 368 goto error_va_map; 369 370 bo->bo = buf_handle; 371 bo->initial_domain = initial_domain; 372 bo->is_shared = false; 373 radv_amdgpu_add_buffer_to_global_list(bo); 374 return (struct radeon_winsys_bo *)bo; 375 error_va_map: 376 amdgpu_bo_free(buf_handle); 377 378 error_bo_alloc: 379 amdgpu_va_range_free(va_handle); 380 381 error_va_alloc: 382 FREE(bo); 383 return NULL; 384 } 385 386 static void * 387 radv_amdgpu_winsys_bo_map(struct radeon_winsys_bo *_bo) 388 { 389 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 390 int ret; 391 void *data; 392 ret = amdgpu_bo_cpu_map(bo->bo, &data); 393 if (ret) 394 return NULL; 395 return data; 396 } 397 398 static void 399 radv_amdgpu_winsys_bo_unmap(struct radeon_winsys_bo *_bo) 400 { 401 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 402 amdgpu_bo_cpu_unmap(bo->bo); 403 } 404 405 static struct radeon_winsys_bo * 406 radv_amdgpu_winsys_bo_from_fd(struct radeon_winsys *_ws, 407 int fd, unsigned *stride, 408 unsigned *offset) 409 { 410 struct radv_amdgpu_winsys *ws = radv_amdgpu_winsys(_ws); 411 struct radv_amdgpu_winsys_bo *bo; 412 uint64_t va; 413 amdgpu_va_handle va_handle; 414 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; 415 struct amdgpu_bo_import_result result = {0}; 416 struct amdgpu_bo_info info = {0}; 417 enum radeon_bo_domain initial = 0; 418 int r; 419 bo = CALLOC_STRUCT(radv_amdgpu_winsys_bo); 420 if (!bo) 421 return NULL; 422 423 r = amdgpu_bo_import(ws->dev, type, fd, &result); 424 if (r) 425 goto error; 426 427 r = amdgpu_bo_query_info(result.buf_handle, &info); 428 if (r) 429 goto error_query; 430 431 r = amdgpu_va_range_alloc(ws->dev, amdgpu_gpu_va_range_general, 432 result.alloc_size, 1 << 20, 0, &va, &va_handle, 0); 433 if (r) 434 goto error_query; 435 436 r = radv_amdgpu_bo_va_op(ws, result.buf_handle, 0, result.alloc_size, 437 va, 0, AMDGPU_VA_OP_MAP); 438 if (r) 439 goto error_va_map; 440 441 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_VRAM) 442 initial |= RADEON_DOMAIN_VRAM; 443 if (info.preferred_heap & AMDGPU_GEM_DOMAIN_GTT) 444 initial |= RADEON_DOMAIN_GTT; 445 446 bo->bo = result.buf_handle; 447 bo->base.va = va; 448 bo->va_handle = va_handle; 449 bo->initial_domain = initial; 450 bo->size = result.alloc_size; 451 bo->is_shared = true; 452 bo->ws = ws; 453 bo->ref_count = 1; 454 radv_amdgpu_add_buffer_to_global_list(bo); 455 return (struct radeon_winsys_bo *)bo; 456 error_va_map: 457 amdgpu_va_range_free(va_handle); 458 459 error_query: 460 amdgpu_bo_free(result.buf_handle); 461 462 error: 463 FREE(bo); 464 return NULL; 465 } 466 467 static bool 468 radv_amdgpu_winsys_get_fd(struct radeon_winsys *_ws, 469 struct radeon_winsys_bo *_bo, 470 int *fd) 471 { 472 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 473 enum amdgpu_bo_handle_type type = amdgpu_bo_handle_type_dma_buf_fd; 474 int r; 475 unsigned handle; 476 r = amdgpu_bo_export(bo->bo, type, &handle); 477 if (r) 478 return false; 479 480 *fd = (int)handle; 481 bo->is_shared = true; 482 return true; 483 } 484 485 static unsigned radv_eg_tile_split_rev(unsigned eg_tile_split) 486 { 487 switch (eg_tile_split) { 488 case 64: return 0; 489 case 128: return 1; 490 case 256: return 2; 491 case 512: return 3; 492 default: 493 case 1024: return 4; 494 case 2048: return 5; 495 case 4096: return 6; 496 } 497 } 498 499 static void 500 radv_amdgpu_winsys_bo_set_metadata(struct radeon_winsys_bo *_bo, 501 struct radeon_bo_metadata *md) 502 { 503 struct radv_amdgpu_winsys_bo *bo = radv_amdgpu_winsys_bo(_bo); 504 struct amdgpu_bo_metadata metadata = {0}; 505 uint32_t tiling_flags = 0; 506 507 if (bo->ws->info.chip_class >= GFX9) { 508 tiling_flags |= AMDGPU_TILING_SET(SWIZZLE_MODE, md->u.gfx9.swizzle_mode); 509 } else { 510 if (md->u.legacy.macrotile == RADEON_LAYOUT_TILED) 511 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 4); /* 2D_TILED_THIN1 */ 512 else if (md->u.legacy.microtile == RADEON_LAYOUT_TILED) 513 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 2); /* 1D_TILED_THIN1 */ 514 else 515 tiling_flags |= AMDGPU_TILING_SET(ARRAY_MODE, 1); /* LINEAR_ALIGNED */ 516 517 tiling_flags |= AMDGPU_TILING_SET(PIPE_CONFIG, md->u.legacy.pipe_config); 518 tiling_flags |= AMDGPU_TILING_SET(BANK_WIDTH, util_logbase2(md->u.legacy.bankw)); 519 tiling_flags |= AMDGPU_TILING_SET(BANK_HEIGHT, util_logbase2(md->u.legacy.bankh)); 520 if (md->u.legacy.tile_split) 521 tiling_flags |= AMDGPU_TILING_SET(TILE_SPLIT, radv_eg_tile_split_rev(md->u.legacy.tile_split)); 522 tiling_flags |= AMDGPU_TILING_SET(MACRO_TILE_ASPECT, util_logbase2(md->u.legacy.mtilea)); 523 tiling_flags |= AMDGPU_TILING_SET(NUM_BANKS, util_logbase2(md->u.legacy.num_banks)-1); 524 525 if (md->u.legacy.scanout) 526 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 0); /* DISPLAY_MICRO_TILING */ 527 else 528 tiling_flags |= AMDGPU_TILING_SET(MICRO_TILE_MODE, 1); /* THIN_MICRO_TILING */ 529 } 530 531 metadata.tiling_info = tiling_flags; 532 metadata.size_metadata = md->size_metadata; 533 memcpy(metadata.umd_metadata, md->metadata, sizeof(md->metadata)); 534 535 amdgpu_bo_set_metadata(bo->bo, &metadata); 536 } 537 538 void radv_amdgpu_bo_init_functions(struct radv_amdgpu_winsys *ws) 539 { 540 ws->base.buffer_create = radv_amdgpu_winsys_bo_create; 541 ws->base.buffer_destroy = radv_amdgpu_winsys_bo_destroy; 542 ws->base.buffer_map = radv_amdgpu_winsys_bo_map; 543 ws->base.buffer_unmap = radv_amdgpu_winsys_bo_unmap; 544 ws->base.buffer_from_fd = radv_amdgpu_winsys_bo_from_fd; 545 ws->base.buffer_get_fd = radv_amdgpu_winsys_get_fd; 546 ws->base.buffer_set_metadata = radv_amdgpu_winsys_bo_set_metadata; 547 ws->base.buffer_virtual_bind = radv_amdgpu_winsys_bo_virtual_bind; 548 } 549