1 /* 2 * Copyright 2008 Jrme Glisse 3 * Copyright 2010 Marek Olk <maraeo (at) gmail.com> 4 * Copyright 2015 Advanced Micro Devices, Inc. 5 * All Rights Reserved. 6 * 7 * Permission is hereby granted, free of charge, to any person obtaining 8 * a copy of this software and associated documentation files (the 9 * "Software"), to deal in the Software without restriction, including 10 * without limitation the rights to use, copy, modify, merge, publish, 11 * distribute, sub license, and/or sell copies of the Software, and to 12 * permit persons to whom the Software is furnished to do so, subject to 13 * the following conditions: 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, 16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES 17 * OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND 18 * NON-INFRINGEMENT. IN NO EVENT SHALL THE COPYRIGHT HOLDERS, AUTHORS 19 * AND/OR ITS SUPPLIERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 20 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, 21 * ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 22 * USE OR OTHER DEALINGS IN THE SOFTWARE. 23 * 24 * The above copyright notice and this permission notice (including the 25 * next paragraph) shall be included in all copies or substantial portions 26 * of the Software. 27 */ 28 /* 29 * Authors: 30 * Marek Olk <maraeo (at) gmail.com> 31 */ 32 33 #include "amdgpu_cs.h" 34 #include "os/os_time.h" 35 #include <stdio.h> 36 #include <amdgpu_drm.h> 37 38 #include "amd/common/sid.h" 39 40 DEBUG_GET_ONCE_BOOL_OPTION(noop, "RADEON_NOOP", false) 41 42 /* FENCES */ 43 44 static struct pipe_fence_handle * 45 amdgpu_fence_create(struct amdgpu_ctx *ctx, unsigned ip_type, 46 unsigned ip_instance, unsigned ring) 47 { 48 struct amdgpu_fence *fence = CALLOC_STRUCT(amdgpu_fence); 49 50 fence->reference.count = 1; 51 fence->ctx = ctx; 52 fence->fence.context = ctx->ctx; 53 fence->fence.ip_type = ip_type; 54 fence->fence.ip_instance = ip_instance; 55 fence->fence.ring = ring; 56 fence->submission_in_progress = true; 57 p_atomic_inc(&ctx->refcount); 58 return (struct pipe_fence_handle *)fence; 59 } 60 61 static void amdgpu_fence_submitted(struct pipe_fence_handle *fence, 62 struct amdgpu_cs_request* request, 63 uint64_t *user_fence_cpu_address) 64 { 65 struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; 66 67 rfence->fence.fence = request->seq_no; 68 rfence->user_fence_cpu_address = user_fence_cpu_address; 69 rfence->submission_in_progress = false; 70 } 71 72 static void amdgpu_fence_signalled(struct pipe_fence_handle *fence) 73 { 74 struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; 75 76 rfence->signalled = true; 77 rfence->submission_in_progress = false; 78 } 79 80 bool amdgpu_fence_wait(struct pipe_fence_handle *fence, uint64_t timeout, 81 bool absolute) 82 { 83 struct amdgpu_fence *rfence = (struct amdgpu_fence*)fence; 84 uint32_t expired; 85 int64_t abs_timeout; 86 uint64_t *user_fence_cpu; 87 int r; 88 89 if (rfence->signalled) 90 return true; 91 92 if (absolute) 93 abs_timeout = timeout; 94 else 95 abs_timeout = os_time_get_absolute_timeout(timeout); 96 97 /* The fence might not have a number assigned if its IB is being 98 * submitted in the other thread right now. Wait until the submission 99 * is done. */ 100 if (!os_wait_until_zero_abs_timeout(&rfence->submission_in_progress, 101 abs_timeout)) 102 return false; 103 104 user_fence_cpu = rfence->user_fence_cpu_address; 105 if (user_fence_cpu) { 106 if (*user_fence_cpu >= rfence->fence.fence) { 107 rfence->signalled = true; 108 return true; 109 } 110 111 /* No timeout, just query: no need for the ioctl. */ 112 if (!absolute && !timeout) 113 return false; 114 } 115 116 /* Now use the libdrm query. */ 117 r = amdgpu_cs_query_fence_status(&rfence->fence, 118 abs_timeout, 119 AMDGPU_QUERY_FENCE_TIMEOUT_IS_ABSOLUTE, 120 &expired); 121 if (r) { 122 fprintf(stderr, "amdgpu: amdgpu_cs_query_fence_status failed.\n"); 123 return false; 124 } 125 126 if (expired) { 127 /* This variable can only transition from false to true, so it doesn't 128 * matter if threads race for it. */ 129 rfence->signalled = true; 130 return true; 131 } 132 return false; 133 } 134 135 static bool amdgpu_fence_wait_rel_timeout(struct radeon_winsys *rws, 136 struct pipe_fence_handle *fence, 137 uint64_t timeout) 138 { 139 return amdgpu_fence_wait(fence, timeout, false); 140 } 141 142 static struct pipe_fence_handle * 143 amdgpu_cs_get_next_fence(struct radeon_winsys_cs *rcs) 144 { 145 struct amdgpu_cs *cs = amdgpu_cs(rcs); 146 struct pipe_fence_handle *fence = NULL; 147 148 if (debug_get_option_noop()) 149 return NULL; 150 151 if (cs->next_fence) { 152 amdgpu_fence_reference(&fence, cs->next_fence); 153 return fence; 154 } 155 156 fence = amdgpu_fence_create(cs->ctx, 157 cs->csc->request.ip_type, 158 cs->csc->request.ip_instance, 159 cs->csc->request.ring); 160 if (!fence) 161 return NULL; 162 163 amdgpu_fence_reference(&cs->next_fence, fence); 164 return fence; 165 } 166 167 /* CONTEXTS */ 168 169 static struct radeon_winsys_ctx *amdgpu_ctx_create(struct radeon_winsys *ws) 170 { 171 struct amdgpu_ctx *ctx = CALLOC_STRUCT(amdgpu_ctx); 172 int r; 173 struct amdgpu_bo_alloc_request alloc_buffer = {}; 174 amdgpu_bo_handle buf_handle; 175 176 if (!ctx) 177 return NULL; 178 179 ctx->ws = amdgpu_winsys(ws); 180 ctx->refcount = 1; 181 182 r = amdgpu_cs_ctx_create(ctx->ws->dev, &ctx->ctx); 183 if (r) { 184 fprintf(stderr, "amdgpu: amdgpu_cs_ctx_create failed. (%i)\n", r); 185 goto error_create; 186 } 187 188 alloc_buffer.alloc_size = ctx->ws->info.gart_page_size; 189 alloc_buffer.phys_alignment = ctx->ws->info.gart_page_size; 190 alloc_buffer.preferred_heap = AMDGPU_GEM_DOMAIN_GTT; 191 192 r = amdgpu_bo_alloc(ctx->ws->dev, &alloc_buffer, &buf_handle); 193 if (r) { 194 fprintf(stderr, "amdgpu: amdgpu_bo_alloc failed. (%i)\n", r); 195 goto error_user_fence_alloc; 196 } 197 198 r = amdgpu_bo_cpu_map(buf_handle, (void**)&ctx->user_fence_cpu_address_base); 199 if (r) { 200 fprintf(stderr, "amdgpu: amdgpu_bo_cpu_map failed. (%i)\n", r); 201 goto error_user_fence_map; 202 } 203 204 memset(ctx->user_fence_cpu_address_base, 0, alloc_buffer.alloc_size); 205 ctx->user_fence_bo = buf_handle; 206 207 return (struct radeon_winsys_ctx*)ctx; 208 209 error_user_fence_map: 210 amdgpu_bo_free(buf_handle); 211 error_user_fence_alloc: 212 amdgpu_cs_ctx_free(ctx->ctx); 213 error_create: 214 FREE(ctx); 215 return NULL; 216 } 217 218 static void amdgpu_ctx_destroy(struct radeon_winsys_ctx *rwctx) 219 { 220 amdgpu_ctx_unref((struct amdgpu_ctx*)rwctx); 221 } 222 223 static enum pipe_reset_status 224 amdgpu_ctx_query_reset_status(struct radeon_winsys_ctx *rwctx) 225 { 226 struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; 227 uint32_t result, hangs; 228 int r; 229 230 r = amdgpu_cs_query_reset_state(ctx->ctx, &result, &hangs); 231 if (r) { 232 fprintf(stderr, "amdgpu: amdgpu_cs_query_reset_state failed. (%i)\n", r); 233 return PIPE_NO_RESET; 234 } 235 236 switch (result) { 237 case AMDGPU_CTX_GUILTY_RESET: 238 return PIPE_GUILTY_CONTEXT_RESET; 239 case AMDGPU_CTX_INNOCENT_RESET: 240 return PIPE_INNOCENT_CONTEXT_RESET; 241 case AMDGPU_CTX_UNKNOWN_RESET: 242 return PIPE_UNKNOWN_CONTEXT_RESET; 243 case AMDGPU_CTX_NO_RESET: 244 default: 245 return PIPE_NO_RESET; 246 } 247 } 248 249 /* COMMAND SUBMISSION */ 250 251 static bool amdgpu_cs_has_user_fence(struct amdgpu_cs_context *cs) 252 { 253 return cs->request.ip_type != AMDGPU_HW_IP_UVD && 254 cs->request.ip_type != AMDGPU_HW_IP_VCE; 255 } 256 257 static bool amdgpu_cs_has_chaining(struct amdgpu_cs *cs) 258 { 259 return cs->ctx->ws->info.chip_class >= CIK && 260 cs->ring_type == RING_GFX; 261 } 262 263 static unsigned amdgpu_cs_epilog_dws(enum ring_type ring_type) 264 { 265 if (ring_type == RING_GFX) 266 return 4; /* for chaining */ 267 268 return 0; 269 } 270 271 int amdgpu_lookup_buffer(struct amdgpu_cs_context *cs, struct amdgpu_winsys_bo *bo) 272 { 273 unsigned hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1); 274 int i = cs->buffer_indices_hashlist[hash]; 275 struct amdgpu_cs_buffer *buffers; 276 int num_buffers; 277 278 if (bo->bo) { 279 buffers = cs->real_buffers; 280 num_buffers = cs->num_real_buffers; 281 } else { 282 buffers = cs->slab_buffers; 283 num_buffers = cs->num_slab_buffers; 284 } 285 286 /* not found or found */ 287 if (i < 0 || (i < num_buffers && buffers[i].bo == bo)) 288 return i; 289 290 /* Hash collision, look for the BO in the list of buffers linearly. */ 291 for (i = num_buffers - 1; i >= 0; i--) { 292 if (buffers[i].bo == bo) { 293 /* Put this buffer in the hash list. 294 * This will prevent additional hash collisions if there are 295 * several consecutive lookup_buffer calls for the same buffer. 296 * 297 * Example: Assuming buffers A,B,C collide in the hash list, 298 * the following sequence of buffers: 299 * AAAAAAAAAAABBBBBBBBBBBBBBCCCCCCCC 300 * will collide here: ^ and here: ^, 301 * meaning that we should get very few collisions in the end. */ 302 cs->buffer_indices_hashlist[hash] = i; 303 return i; 304 } 305 } 306 return -1; 307 } 308 309 static int 310 amdgpu_lookup_or_add_real_buffer(struct amdgpu_cs *acs, struct amdgpu_winsys_bo *bo) 311 { 312 struct amdgpu_cs_context *cs = acs->csc; 313 struct amdgpu_cs_buffer *buffer; 314 unsigned hash; 315 int idx = amdgpu_lookup_buffer(cs, bo); 316 317 if (idx >= 0) 318 return idx; 319 320 /* New buffer, check if the backing array is large enough. */ 321 if (cs->num_real_buffers >= cs->max_real_buffers) { 322 unsigned new_max = 323 MAX2(cs->max_real_buffers + 16, (unsigned)(cs->max_real_buffers * 1.3)); 324 struct amdgpu_cs_buffer *new_buffers; 325 amdgpu_bo_handle *new_handles; 326 uint8_t *new_flags; 327 328 new_buffers = MALLOC(new_max * sizeof(*new_buffers)); 329 new_handles = MALLOC(new_max * sizeof(*new_handles)); 330 new_flags = MALLOC(new_max * sizeof(*new_flags)); 331 332 if (!new_buffers || !new_handles || !new_flags) { 333 fprintf(stderr, "amdgpu_lookup_or_add_buffer: allocation failed\n"); 334 FREE(new_buffers); 335 FREE(new_handles); 336 FREE(new_flags); 337 return -1; 338 } 339 340 memcpy(new_buffers, cs->real_buffers, cs->num_real_buffers * sizeof(*new_buffers)); 341 memcpy(new_handles, cs->handles, cs->num_real_buffers * sizeof(*new_handles)); 342 memcpy(new_flags, cs->flags, cs->num_real_buffers * sizeof(*new_flags)); 343 344 FREE(cs->real_buffers); 345 FREE(cs->handles); 346 FREE(cs->flags); 347 348 cs->max_real_buffers = new_max; 349 cs->real_buffers = new_buffers; 350 cs->handles = new_handles; 351 cs->flags = new_flags; 352 } 353 354 idx = cs->num_real_buffers; 355 buffer = &cs->real_buffers[idx]; 356 357 memset(buffer, 0, sizeof(*buffer)); 358 amdgpu_winsys_bo_reference(&buffer->bo, bo); 359 cs->handles[idx] = bo->bo; 360 cs->flags[idx] = 0; 361 p_atomic_inc(&bo->num_cs_references); 362 cs->num_real_buffers++; 363 364 hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1); 365 cs->buffer_indices_hashlist[hash] = idx; 366 367 if (bo->initial_domain & RADEON_DOMAIN_VRAM) 368 acs->main.base.used_vram += bo->base.size; 369 else if (bo->initial_domain & RADEON_DOMAIN_GTT) 370 acs->main.base.used_gart += bo->base.size; 371 372 return idx; 373 } 374 375 static int amdgpu_lookup_or_add_slab_buffer(struct amdgpu_cs *acs, 376 struct amdgpu_winsys_bo *bo) 377 { 378 struct amdgpu_cs_context *cs = acs->csc; 379 struct amdgpu_cs_buffer *buffer; 380 unsigned hash; 381 int idx = amdgpu_lookup_buffer(cs, bo); 382 int real_idx; 383 384 if (idx >= 0) 385 return idx; 386 387 real_idx = amdgpu_lookup_or_add_real_buffer(acs, bo->u.slab.real); 388 if (real_idx < 0) 389 return -1; 390 391 /* New buffer, check if the backing array is large enough. */ 392 if (cs->num_slab_buffers >= cs->max_slab_buffers) { 393 unsigned new_max = 394 MAX2(cs->max_slab_buffers + 16, (unsigned)(cs->max_slab_buffers * 1.3)); 395 struct amdgpu_cs_buffer *new_buffers; 396 397 new_buffers = REALLOC(cs->slab_buffers, 398 cs->max_slab_buffers * sizeof(*new_buffers), 399 new_max * sizeof(*new_buffers)); 400 if (!new_buffers) { 401 fprintf(stderr, "amdgpu_lookup_or_add_slab_buffer: allocation failed\n"); 402 return -1; 403 } 404 405 cs->max_slab_buffers = new_max; 406 cs->slab_buffers = new_buffers; 407 } 408 409 idx = cs->num_slab_buffers; 410 buffer = &cs->slab_buffers[idx]; 411 412 memset(buffer, 0, sizeof(*buffer)); 413 amdgpu_winsys_bo_reference(&buffer->bo, bo); 414 buffer->u.slab.real_idx = real_idx; 415 p_atomic_inc(&bo->num_cs_references); 416 cs->num_slab_buffers++; 417 418 hash = bo->unique_id & (ARRAY_SIZE(cs->buffer_indices_hashlist)-1); 419 cs->buffer_indices_hashlist[hash] = idx; 420 421 return idx; 422 } 423 424 static unsigned amdgpu_cs_add_buffer(struct radeon_winsys_cs *rcs, 425 struct pb_buffer *buf, 426 enum radeon_bo_usage usage, 427 enum radeon_bo_domain domains, 428 enum radeon_bo_priority priority) 429 { 430 /* Don't use the "domains" parameter. Amdgpu doesn't support changing 431 * the buffer placement during command submission. 432 */ 433 struct amdgpu_cs *acs = amdgpu_cs(rcs); 434 struct amdgpu_cs_context *cs = acs->csc; 435 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)buf; 436 struct amdgpu_cs_buffer *buffer; 437 int index; 438 439 if (!bo->bo) { 440 index = amdgpu_lookup_or_add_slab_buffer(acs, bo); 441 if (index < 0) 442 return 0; 443 444 buffer = &cs->slab_buffers[index]; 445 buffer->usage |= usage; 446 447 usage &= ~RADEON_USAGE_SYNCHRONIZED; 448 index = buffer->u.slab.real_idx; 449 } else { 450 index = amdgpu_lookup_or_add_real_buffer(acs, bo); 451 if (index < 0) 452 return 0; 453 } 454 455 buffer = &cs->real_buffers[index]; 456 buffer->u.real.priority_usage |= 1llu << priority; 457 buffer->usage |= usage; 458 cs->flags[index] = MAX2(cs->flags[index], priority / 4); 459 return index; 460 } 461 462 static bool amdgpu_ib_new_buffer(struct amdgpu_winsys *ws, struct amdgpu_ib *ib) 463 { 464 struct pb_buffer *pb; 465 uint8_t *mapped; 466 unsigned buffer_size; 467 468 /* Always create a buffer that is at least as large as the maximum seen IB 469 * size, aligned to a power of two (and multiplied by 4 to reduce internal 470 * fragmentation if chaining is not available). Limit to 512k dwords, which 471 * is the largest power of two that fits into the size field of the 472 * INDIRECT_BUFFER packet. 473 */ 474 if (amdgpu_cs_has_chaining(amdgpu_cs_from_ib(ib))) 475 buffer_size = 4 *util_next_power_of_two(ib->max_ib_size); 476 else 477 buffer_size = 4 *util_next_power_of_two(4 * ib->max_ib_size); 478 479 buffer_size = MIN2(buffer_size, 4 * 512 * 1024); 480 481 switch (ib->ib_type) { 482 case IB_CONST_PREAMBLE: 483 buffer_size = MAX2(buffer_size, 4 * 1024); 484 break; 485 case IB_CONST: 486 buffer_size = MAX2(buffer_size, 16 * 1024 * 4); 487 break; 488 case IB_MAIN: 489 buffer_size = MAX2(buffer_size, 8 * 1024 * 4); 490 break; 491 default: 492 unreachable("unhandled IB type"); 493 } 494 495 pb = ws->base.buffer_create(&ws->base, buffer_size, 496 ws->info.gart_page_size, 497 RADEON_DOMAIN_GTT, 498 RADEON_FLAG_CPU_ACCESS); 499 if (!pb) 500 return false; 501 502 mapped = ws->base.buffer_map(pb, NULL, PIPE_TRANSFER_WRITE); 503 if (!mapped) { 504 pb_reference(&pb, NULL); 505 return false; 506 } 507 508 pb_reference(&ib->big_ib_buffer, pb); 509 pb_reference(&pb, NULL); 510 511 ib->ib_mapped = mapped; 512 ib->used_ib_space = 0; 513 514 return true; 515 } 516 517 static unsigned amdgpu_ib_max_submit_dwords(enum ib_type ib_type) 518 { 519 switch (ib_type) { 520 case IB_MAIN: 521 /* Smaller submits means the GPU gets busy sooner and there is less 522 * waiting for buffers and fences. Proof: 523 * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1 524 */ 525 return 20 * 1024; 526 case IB_CONST_PREAMBLE: 527 case IB_CONST: 528 /* There isn't really any reason to limit CE IB size beyond the natural 529 * limit implied by the main IB, except perhaps GTT size. Just return 530 * an extremely large value that we never get anywhere close to. 531 */ 532 return 16 * 1024 * 1024; 533 default: 534 unreachable("bad ib_type"); 535 } 536 } 537 538 static bool amdgpu_get_new_ib(struct radeon_winsys *ws, struct amdgpu_cs *cs, 539 enum ib_type ib_type) 540 { 541 struct amdgpu_winsys *aws = (struct amdgpu_winsys*)ws; 542 /* Small IBs are better than big IBs, because the GPU goes idle quicker 543 * and there is less waiting for buffers and fences. Proof: 544 * http://www.phoronix.com/scan.php?page=article&item=mesa-111-si&num=1 545 */ 546 struct amdgpu_ib *ib = NULL; 547 struct amdgpu_cs_ib_info *info = &cs->csc->ib[ib_type]; 548 unsigned ib_size = 0; 549 550 switch (ib_type) { 551 case IB_CONST_PREAMBLE: 552 ib = &cs->const_preamble_ib; 553 ib_size = 256 * 4; 554 break; 555 case IB_CONST: 556 ib = &cs->const_ib; 557 ib_size = 8 * 1024 * 4; 558 break; 559 case IB_MAIN: 560 ib = &cs->main; 561 ib_size = 4 * 1024 * 4; 562 break; 563 default: 564 unreachable("unhandled IB type"); 565 } 566 567 if (!amdgpu_cs_has_chaining(cs)) { 568 ib_size = MAX2(ib_size, 569 4 * MIN2(util_next_power_of_two(ib->max_ib_size), 570 amdgpu_ib_max_submit_dwords(ib_type))); 571 } 572 573 ib->max_ib_size = ib->max_ib_size - ib->max_ib_size / 32; 574 575 ib->base.prev_dw = 0; 576 ib->base.num_prev = 0; 577 ib->base.current.cdw = 0; 578 ib->base.current.buf = NULL; 579 580 /* Allocate a new buffer for IBs if the current buffer is all used. */ 581 if (!ib->big_ib_buffer || 582 ib->used_ib_space + ib_size > ib->big_ib_buffer->size) { 583 if (!amdgpu_ib_new_buffer(aws, ib)) 584 return false; 585 } 586 587 info->ib_mc_address = amdgpu_winsys_bo(ib->big_ib_buffer)->va + 588 ib->used_ib_space; 589 info->size = 0; 590 ib->ptr_ib_size = &info->size; 591 592 amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer, 593 RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); 594 595 ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); 596 597 ib_size = ib->big_ib_buffer->size - ib->used_ib_space; 598 ib->base.current.max_dw = ib_size / 4 - amdgpu_cs_epilog_dws(cs->ring_type); 599 return true; 600 } 601 602 static void amdgpu_ib_finalize(struct amdgpu_ib *ib) 603 { 604 *ib->ptr_ib_size |= ib->base.current.cdw; 605 ib->used_ib_space += ib->base.current.cdw * 4; 606 ib->max_ib_size = MAX2(ib->max_ib_size, ib->base.prev_dw + ib->base.current.cdw); 607 } 608 609 static bool amdgpu_init_cs_context(struct amdgpu_cs_context *cs, 610 enum ring_type ring_type) 611 { 612 int i; 613 614 switch (ring_type) { 615 case RING_DMA: 616 cs->request.ip_type = AMDGPU_HW_IP_DMA; 617 break; 618 619 case RING_UVD: 620 cs->request.ip_type = AMDGPU_HW_IP_UVD; 621 break; 622 623 case RING_VCE: 624 cs->request.ip_type = AMDGPU_HW_IP_VCE; 625 break; 626 627 case RING_COMPUTE: 628 cs->request.ip_type = AMDGPU_HW_IP_COMPUTE; 629 break; 630 631 default: 632 case RING_GFX: 633 cs->request.ip_type = AMDGPU_HW_IP_GFX; 634 break; 635 } 636 637 for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) { 638 cs->buffer_indices_hashlist[i] = -1; 639 } 640 641 cs->request.number_of_ibs = 1; 642 cs->request.ibs = &cs->ib[IB_MAIN]; 643 644 cs->ib[IB_CONST].flags = AMDGPU_IB_FLAG_CE; 645 cs->ib[IB_CONST_PREAMBLE].flags = AMDGPU_IB_FLAG_CE | 646 AMDGPU_IB_FLAG_PREAMBLE; 647 648 return true; 649 } 650 651 static void amdgpu_cs_context_cleanup(struct amdgpu_cs_context *cs) 652 { 653 unsigned i; 654 655 for (i = 0; i < cs->num_real_buffers; i++) { 656 p_atomic_dec(&cs->real_buffers[i].bo->num_cs_references); 657 amdgpu_winsys_bo_reference(&cs->real_buffers[i].bo, NULL); 658 } 659 for (i = 0; i < cs->num_slab_buffers; i++) { 660 p_atomic_dec(&cs->slab_buffers[i].bo->num_cs_references); 661 amdgpu_winsys_bo_reference(&cs->slab_buffers[i].bo, NULL); 662 } 663 664 cs->num_real_buffers = 0; 665 cs->num_slab_buffers = 0; 666 amdgpu_fence_reference(&cs->fence, NULL); 667 668 for (i = 0; i < ARRAY_SIZE(cs->buffer_indices_hashlist); i++) { 669 cs->buffer_indices_hashlist[i] = -1; 670 } 671 } 672 673 static void amdgpu_destroy_cs_context(struct amdgpu_cs_context *cs) 674 { 675 amdgpu_cs_context_cleanup(cs); 676 FREE(cs->flags); 677 FREE(cs->real_buffers); 678 FREE(cs->handles); 679 FREE(cs->slab_buffers); 680 FREE(cs->request.dependencies); 681 } 682 683 684 static struct radeon_winsys_cs * 685 amdgpu_cs_create(struct radeon_winsys_ctx *rwctx, 686 enum ring_type ring_type, 687 void (*flush)(void *ctx, unsigned flags, 688 struct pipe_fence_handle **fence), 689 void *flush_ctx) 690 { 691 struct amdgpu_ctx *ctx = (struct amdgpu_ctx*)rwctx; 692 struct amdgpu_cs *cs; 693 694 cs = CALLOC_STRUCT(amdgpu_cs); 695 if (!cs) { 696 return NULL; 697 } 698 699 util_queue_fence_init(&cs->flush_completed); 700 701 cs->ctx = ctx; 702 cs->flush_cs = flush; 703 cs->flush_data = flush_ctx; 704 cs->ring_type = ring_type; 705 706 cs->main.ib_type = IB_MAIN; 707 cs->const_ib.ib_type = IB_CONST; 708 cs->const_preamble_ib.ib_type = IB_CONST_PREAMBLE; 709 710 if (!amdgpu_init_cs_context(&cs->csc1, ring_type)) { 711 FREE(cs); 712 return NULL; 713 } 714 715 if (!amdgpu_init_cs_context(&cs->csc2, ring_type)) { 716 amdgpu_destroy_cs_context(&cs->csc1); 717 FREE(cs); 718 return NULL; 719 } 720 721 /* Set the first submission context as current. */ 722 cs->csc = &cs->csc1; 723 cs->cst = &cs->csc2; 724 725 if (!amdgpu_get_new_ib(&ctx->ws->base, cs, IB_MAIN)) { 726 amdgpu_destroy_cs_context(&cs->csc2); 727 amdgpu_destroy_cs_context(&cs->csc1); 728 FREE(cs); 729 return NULL; 730 } 731 732 p_atomic_inc(&ctx->ws->num_cs); 733 return &cs->main.base; 734 } 735 736 static struct radeon_winsys_cs * 737 amdgpu_cs_add_const_ib(struct radeon_winsys_cs *rcs) 738 { 739 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs; 740 struct amdgpu_winsys *ws = cs->ctx->ws; 741 742 /* only one const IB can be added */ 743 if (cs->ring_type != RING_GFX || cs->const_ib.ib_mapped) 744 return NULL; 745 746 if (!amdgpu_get_new_ib(&ws->base, cs, IB_CONST)) 747 return NULL; 748 749 cs->csc->request.number_of_ibs = 2; 750 cs->csc->request.ibs = &cs->csc->ib[IB_CONST]; 751 752 cs->cst->request.number_of_ibs = 2; 753 cs->cst->request.ibs = &cs->cst->ib[IB_CONST]; 754 755 return &cs->const_ib.base; 756 } 757 758 static struct radeon_winsys_cs * 759 amdgpu_cs_add_const_preamble_ib(struct radeon_winsys_cs *rcs) 760 { 761 struct amdgpu_cs *cs = (struct amdgpu_cs*)rcs; 762 struct amdgpu_winsys *ws = cs->ctx->ws; 763 764 /* only one const preamble IB can be added and only when the const IB has 765 * also been mapped */ 766 if (cs->ring_type != RING_GFX || !cs->const_ib.ib_mapped || 767 cs->const_preamble_ib.ib_mapped) 768 return NULL; 769 770 if (!amdgpu_get_new_ib(&ws->base, cs, IB_CONST_PREAMBLE)) 771 return NULL; 772 773 cs->csc->request.number_of_ibs = 3; 774 cs->csc->request.ibs = &cs->csc->ib[IB_CONST_PREAMBLE]; 775 776 cs->cst->request.number_of_ibs = 3; 777 cs->cst->request.ibs = &cs->cst->ib[IB_CONST_PREAMBLE]; 778 779 return &cs->const_preamble_ib.base; 780 } 781 782 static bool amdgpu_cs_validate(struct radeon_winsys_cs *rcs) 783 { 784 return true; 785 } 786 787 static bool amdgpu_cs_check_space(struct radeon_winsys_cs *rcs, unsigned dw) 788 { 789 struct amdgpu_ib *ib = amdgpu_ib(rcs); 790 struct amdgpu_cs *cs = amdgpu_cs_from_ib(ib); 791 unsigned requested_size = rcs->prev_dw + rcs->current.cdw + dw; 792 uint64_t va; 793 uint32_t *new_ptr_ib_size; 794 795 assert(rcs->current.cdw <= rcs->current.max_dw); 796 797 if (requested_size > amdgpu_ib_max_submit_dwords(ib->ib_type)) 798 return false; 799 800 ib->max_ib_size = MAX2(ib->max_ib_size, requested_size); 801 802 if (rcs->current.max_dw - rcs->current.cdw >= dw) 803 return true; 804 805 if (!amdgpu_cs_has_chaining(cs)) 806 return false; 807 808 /* Allocate a new chunk */ 809 if (rcs->num_prev >= rcs->max_prev) { 810 unsigned new_max_prev = MAX2(1, 2 * rcs->max_prev); 811 struct radeon_winsys_cs_chunk *new_prev; 812 813 new_prev = REALLOC(rcs->prev, 814 sizeof(*new_prev) * rcs->max_prev, 815 sizeof(*new_prev) * new_max_prev); 816 if (!new_prev) 817 return false; 818 819 rcs->prev = new_prev; 820 rcs->max_prev = new_max_prev; 821 } 822 823 if (!amdgpu_ib_new_buffer(cs->ctx->ws, ib)) 824 return false; 825 826 assert(ib->used_ib_space == 0); 827 va = amdgpu_winsys_bo(ib->big_ib_buffer)->va; 828 829 /* This space was originally reserved. */ 830 rcs->current.max_dw += 4; 831 assert(ib->used_ib_space + 4 * rcs->current.max_dw <= ib->big_ib_buffer->size); 832 833 /* Pad with NOPs and add INDIRECT_BUFFER packet */ 834 while ((rcs->current.cdw & 7) != 4) 835 radeon_emit(rcs, 0xffff1000); /* type3 nop packet */ 836 837 radeon_emit(rcs, PKT3(ib->ib_type == IB_MAIN ? PKT3_INDIRECT_BUFFER_CIK 838 : PKT3_INDIRECT_BUFFER_CONST, 2, 0)); 839 radeon_emit(rcs, va); 840 radeon_emit(rcs, va >> 32); 841 new_ptr_ib_size = &rcs->current.buf[rcs->current.cdw]; 842 radeon_emit(rcs, S_3F2_CHAIN(1) | S_3F2_VALID(1)); 843 844 assert((rcs->current.cdw & 7) == 0); 845 assert(rcs->current.cdw <= rcs->current.max_dw); 846 847 *ib->ptr_ib_size |= rcs->current.cdw; 848 ib->ptr_ib_size = new_ptr_ib_size; 849 850 /* Hook up the new chunk */ 851 rcs->prev[rcs->num_prev].buf = rcs->current.buf; 852 rcs->prev[rcs->num_prev].cdw = rcs->current.cdw; 853 rcs->prev[rcs->num_prev].max_dw = rcs->current.cdw; /* no modifications */ 854 rcs->num_prev++; 855 856 ib->base.prev_dw += ib->base.current.cdw; 857 ib->base.current.cdw = 0; 858 859 ib->base.current.buf = (uint32_t*)(ib->ib_mapped + ib->used_ib_space); 860 ib->base.current.max_dw = ib->big_ib_buffer->size / 4 - amdgpu_cs_epilog_dws(cs->ring_type); 861 862 amdgpu_cs_add_buffer(&cs->main.base, ib->big_ib_buffer, 863 RADEON_USAGE_READ, 0, RADEON_PRIO_IB1); 864 865 return true; 866 } 867 868 static unsigned amdgpu_cs_get_buffer_list(struct radeon_winsys_cs *rcs, 869 struct radeon_bo_list_item *list) 870 { 871 struct amdgpu_cs_context *cs = amdgpu_cs(rcs)->csc; 872 int i; 873 874 if (list) { 875 for (i = 0; i < cs->num_real_buffers; i++) { 876 list[i].bo_size = cs->real_buffers[i].bo->base.size; 877 list[i].vm_address = cs->real_buffers[i].bo->va; 878 list[i].priority_usage = cs->real_buffers[i].u.real.priority_usage; 879 } 880 } 881 return cs->num_real_buffers; 882 } 883 884 DEBUG_GET_ONCE_BOOL_OPTION(all_bos, "RADEON_ALL_BOS", false) 885 886 static void amdgpu_add_fence_dependency(struct amdgpu_cs *acs, 887 struct amdgpu_cs_buffer *buffer) 888 { 889 struct amdgpu_cs_context *cs = acs->csc; 890 struct amdgpu_winsys_bo *bo = buffer->bo; 891 struct amdgpu_cs_fence *dep; 892 unsigned new_num_fences = 0; 893 894 for (unsigned j = 0; j < bo->num_fences; ++j) { 895 struct amdgpu_fence *bo_fence = (void *)bo->fences[j]; 896 unsigned idx; 897 898 if (bo_fence->ctx == acs->ctx && 899 bo_fence->fence.ip_type == cs->request.ip_type && 900 bo_fence->fence.ip_instance == cs->request.ip_instance && 901 bo_fence->fence.ring == cs->request.ring) 902 continue; 903 904 if (amdgpu_fence_wait((void *)bo_fence, 0, false)) 905 continue; 906 907 amdgpu_fence_reference(&bo->fences[new_num_fences], bo->fences[j]); 908 new_num_fences++; 909 910 if (!(buffer->usage & RADEON_USAGE_SYNCHRONIZED)) 911 continue; 912 913 if (bo_fence->submission_in_progress) 914 os_wait_until_zero(&bo_fence->submission_in_progress, 915 PIPE_TIMEOUT_INFINITE); 916 917 idx = cs->request.number_of_dependencies++; 918 if (idx >= cs->max_dependencies) { 919 unsigned size; 920 921 cs->max_dependencies = idx + 8; 922 size = cs->max_dependencies * sizeof(struct amdgpu_cs_fence); 923 cs->request.dependencies = realloc(cs->request.dependencies, size); 924 } 925 926 dep = &cs->request.dependencies[idx]; 927 memcpy(dep, &bo_fence->fence, sizeof(*dep)); 928 } 929 930 for (unsigned j = new_num_fences; j < bo->num_fences; ++j) 931 amdgpu_fence_reference(&bo->fences[j], NULL); 932 933 bo->num_fences = new_num_fences; 934 } 935 936 /* Since the kernel driver doesn't synchronize execution between different 937 * rings automatically, we have to add fence dependencies manually. 938 */ 939 static void amdgpu_add_fence_dependencies(struct amdgpu_cs *acs) 940 { 941 struct amdgpu_cs_context *cs = acs->csc; 942 int i; 943 944 cs->request.number_of_dependencies = 0; 945 946 for (i = 0; i < cs->num_real_buffers; i++) 947 amdgpu_add_fence_dependency(acs, &cs->real_buffers[i]); 948 for (i = 0; i < cs->num_slab_buffers; i++) 949 amdgpu_add_fence_dependency(acs, &cs->slab_buffers[i]); 950 } 951 952 static void amdgpu_add_fence(struct amdgpu_winsys_bo *bo, 953 struct pipe_fence_handle *fence) 954 { 955 if (bo->num_fences >= bo->max_fences) { 956 unsigned new_max_fences = MAX2(1, bo->max_fences * 2); 957 struct pipe_fence_handle **new_fences = 958 REALLOC(bo->fences, 959 bo->num_fences * sizeof(*new_fences), 960 new_max_fences * sizeof(*new_fences)); 961 if (new_fences) { 962 bo->fences = new_fences; 963 bo->max_fences = new_max_fences; 964 } else { 965 fprintf(stderr, "amdgpu_add_fence: allocation failure, dropping fence\n"); 966 if (!bo->num_fences) 967 return; 968 969 bo->num_fences--; /* prefer to keep a more recent fence if possible */ 970 amdgpu_fence_reference(&bo->fences[bo->num_fences], NULL); 971 } 972 } 973 974 bo->fences[bo->num_fences] = NULL; 975 amdgpu_fence_reference(&bo->fences[bo->num_fences], fence); 976 bo->num_fences++; 977 } 978 979 void amdgpu_cs_submit_ib(void *job, int thread_index) 980 { 981 struct amdgpu_cs *acs = (struct amdgpu_cs*)job; 982 struct amdgpu_winsys *ws = acs->ctx->ws; 983 struct amdgpu_cs_context *cs = acs->cst; 984 int i, r; 985 986 cs->request.fence_info.handle = NULL; 987 if (amdgpu_cs_has_user_fence(cs)) { 988 cs->request.fence_info.handle = acs->ctx->user_fence_bo; 989 cs->request.fence_info.offset = acs->ring_type; 990 } 991 992 /* Create the buffer list. 993 * Use a buffer list containing all allocated buffers if requested. 994 */ 995 if (debug_get_option_all_bos()) { 996 struct amdgpu_winsys_bo *bo; 997 amdgpu_bo_handle *handles; 998 unsigned num = 0; 999 1000 pipe_mutex_lock(ws->global_bo_list_lock); 1001 1002 handles = malloc(sizeof(handles[0]) * ws->num_buffers); 1003 if (!handles) { 1004 pipe_mutex_unlock(ws->global_bo_list_lock); 1005 amdgpu_cs_context_cleanup(cs); 1006 cs->error_code = -ENOMEM; 1007 return; 1008 } 1009 1010 LIST_FOR_EACH_ENTRY(bo, &ws->global_bo_list, u.real.global_list_item) { 1011 assert(num < ws->num_buffers); 1012 handles[num++] = bo->bo; 1013 } 1014 1015 r = amdgpu_bo_list_create(ws->dev, ws->num_buffers, 1016 handles, NULL, 1017 &cs->request.resources); 1018 free(handles); 1019 pipe_mutex_unlock(ws->global_bo_list_lock); 1020 } else { 1021 r = amdgpu_bo_list_create(ws->dev, cs->num_real_buffers, 1022 cs->handles, cs->flags, 1023 &cs->request.resources); 1024 } 1025 1026 if (r) { 1027 fprintf(stderr, "amdgpu: buffer list creation failed (%d)\n", r); 1028 cs->request.resources = NULL; 1029 amdgpu_fence_signalled(cs->fence); 1030 cs->error_code = r; 1031 goto cleanup; 1032 } 1033 1034 r = amdgpu_cs_submit(acs->ctx->ctx, 0, &cs->request, 1); 1035 cs->error_code = r; 1036 if (r) { 1037 if (r == -ENOMEM) 1038 fprintf(stderr, "amdgpu: Not enough memory for command submission.\n"); 1039 else 1040 fprintf(stderr, "amdgpu: The CS has been rejected, " 1041 "see dmesg for more information (%i).\n", r); 1042 1043 amdgpu_fence_signalled(cs->fence); 1044 } else { 1045 /* Success. */ 1046 uint64_t *user_fence = NULL; 1047 if (amdgpu_cs_has_user_fence(cs)) 1048 user_fence = acs->ctx->user_fence_cpu_address_base + 1049 cs->request.fence_info.offset; 1050 amdgpu_fence_submitted(cs->fence, &cs->request, user_fence); 1051 } 1052 1053 /* Cleanup. */ 1054 if (cs->request.resources) 1055 amdgpu_bo_list_destroy(cs->request.resources); 1056 1057 cleanup: 1058 for (i = 0; i < cs->num_real_buffers; i++) 1059 p_atomic_dec(&cs->real_buffers[i].bo->num_active_ioctls); 1060 for (i = 0; i < cs->num_slab_buffers; i++) 1061 p_atomic_dec(&cs->slab_buffers[i].bo->num_active_ioctls); 1062 1063 amdgpu_cs_context_cleanup(cs); 1064 } 1065 1066 /* Make sure the previous submission is completed. */ 1067 void amdgpu_cs_sync_flush(struct radeon_winsys_cs *rcs) 1068 { 1069 struct amdgpu_cs *cs = amdgpu_cs(rcs); 1070 1071 /* Wait for any pending ioctl of this CS to complete. */ 1072 util_queue_job_wait(&cs->flush_completed); 1073 } 1074 1075 static int amdgpu_cs_flush(struct radeon_winsys_cs *rcs, 1076 unsigned flags, 1077 struct pipe_fence_handle **fence) 1078 { 1079 struct amdgpu_cs *cs = amdgpu_cs(rcs); 1080 struct amdgpu_winsys *ws = cs->ctx->ws; 1081 int error_code = 0; 1082 1083 rcs->current.max_dw += amdgpu_cs_epilog_dws(cs->ring_type); 1084 1085 switch (cs->ring_type) { 1086 case RING_DMA: 1087 /* pad DMA ring to 8 DWs */ 1088 if (ws->info.chip_class <= SI) { 1089 while (rcs->current.cdw & 7) 1090 radeon_emit(rcs, 0xf0000000); /* NOP packet */ 1091 } else { 1092 while (rcs->current.cdw & 7) 1093 radeon_emit(rcs, 0x00000000); /* NOP packet */ 1094 } 1095 break; 1096 case RING_GFX: 1097 /* pad GFX ring to 8 DWs to meet CP fetch alignment requirements */ 1098 if (ws->info.gfx_ib_pad_with_type2) { 1099 while (rcs->current.cdw & 7) 1100 radeon_emit(rcs, 0x80000000); /* type2 nop packet */ 1101 } else { 1102 while (rcs->current.cdw & 7) 1103 radeon_emit(rcs, 0xffff1000); /* type3 nop packet */ 1104 } 1105 1106 /* Also pad the const IB. */ 1107 if (cs->const_ib.ib_mapped) 1108 while (!cs->const_ib.base.current.cdw || (cs->const_ib.base.current.cdw & 7)) 1109 radeon_emit(&cs->const_ib.base, 0xffff1000); /* type3 nop packet */ 1110 1111 if (cs->const_preamble_ib.ib_mapped) 1112 while (!cs->const_preamble_ib.base.current.cdw || (cs->const_preamble_ib.base.current.cdw & 7)) 1113 radeon_emit(&cs->const_preamble_ib.base, 0xffff1000); 1114 break; 1115 case RING_UVD: 1116 while (rcs->current.cdw & 15) 1117 radeon_emit(rcs, 0x80000000); /* type2 nop packet */ 1118 break; 1119 default: 1120 break; 1121 } 1122 1123 if (rcs->current.cdw > rcs->current.max_dw) { 1124 fprintf(stderr, "amdgpu: command stream overflowed\n"); 1125 } 1126 1127 /* If the CS is not empty or overflowed.... */ 1128 if (radeon_emitted(&cs->main.base, 0) && 1129 cs->main.base.current.cdw <= cs->main.base.current.max_dw && 1130 !debug_get_option_noop()) { 1131 struct amdgpu_cs_context *cur = cs->csc; 1132 unsigned i, num_buffers; 1133 1134 /* Set IB sizes. */ 1135 amdgpu_ib_finalize(&cs->main); 1136 1137 if (cs->const_ib.ib_mapped) 1138 amdgpu_ib_finalize(&cs->const_ib); 1139 1140 if (cs->const_preamble_ib.ib_mapped) 1141 amdgpu_ib_finalize(&cs->const_preamble_ib); 1142 1143 /* Create a fence. */ 1144 amdgpu_fence_reference(&cur->fence, NULL); 1145 if (cs->next_fence) { 1146 /* just move the reference */ 1147 cur->fence = cs->next_fence; 1148 cs->next_fence = NULL; 1149 } else { 1150 cur->fence = amdgpu_fence_create(cs->ctx, 1151 cur->request.ip_type, 1152 cur->request.ip_instance, 1153 cur->request.ring); 1154 } 1155 if (fence) 1156 amdgpu_fence_reference(fence, cur->fence); 1157 1158 amdgpu_cs_sync_flush(rcs); 1159 1160 /* Prepare buffers. 1161 * 1162 * This fence must be held until the submission is queued to ensure 1163 * that the order of fence dependency updates matches the order of 1164 * submissions. 1165 */ 1166 pipe_mutex_lock(ws->bo_fence_lock); 1167 amdgpu_add_fence_dependencies(cs); 1168 1169 num_buffers = cur->num_real_buffers; 1170 for (i = 0; i < num_buffers; i++) { 1171 struct amdgpu_winsys_bo *bo = cur->real_buffers[i].bo; 1172 p_atomic_inc(&bo->num_active_ioctls); 1173 amdgpu_add_fence(bo, cur->fence); 1174 } 1175 1176 num_buffers = cur->num_slab_buffers; 1177 for (i = 0; i < num_buffers; i++) { 1178 struct amdgpu_winsys_bo *bo = cur->slab_buffers[i].bo; 1179 p_atomic_inc(&bo->num_active_ioctls); 1180 amdgpu_add_fence(bo, cur->fence); 1181 } 1182 1183 /* Swap command streams. "cst" is going to be submitted. */ 1184 cs->csc = cs->cst; 1185 cs->cst = cur; 1186 1187 /* Submit. */ 1188 util_queue_add_job(&ws->cs_queue, cs, &cs->flush_completed, 1189 amdgpu_cs_submit_ib, NULL); 1190 /* The submission has been queued, unlock the fence now. */ 1191 pipe_mutex_unlock(ws->bo_fence_lock); 1192 1193 if (!(flags & RADEON_FLUSH_ASYNC)) { 1194 amdgpu_cs_sync_flush(rcs); 1195 error_code = cur->error_code; 1196 } 1197 } else { 1198 amdgpu_cs_context_cleanup(cs->csc); 1199 } 1200 1201 amdgpu_get_new_ib(&ws->base, cs, IB_MAIN); 1202 if (cs->const_ib.ib_mapped) 1203 amdgpu_get_new_ib(&ws->base, cs, IB_CONST); 1204 if (cs->const_preamble_ib.ib_mapped) 1205 amdgpu_get_new_ib(&ws->base, cs, IB_CONST_PREAMBLE); 1206 1207 cs->main.base.used_gart = 0; 1208 cs->main.base.used_vram = 0; 1209 1210 if (cs->ring_type == RING_GFX) 1211 ws->num_gfx_IBs++; 1212 else if (cs->ring_type == RING_DMA) 1213 ws->num_sdma_IBs++; 1214 1215 return error_code; 1216 } 1217 1218 static void amdgpu_cs_destroy(struct radeon_winsys_cs *rcs) 1219 { 1220 struct amdgpu_cs *cs = amdgpu_cs(rcs); 1221 1222 amdgpu_cs_sync_flush(rcs); 1223 util_queue_fence_destroy(&cs->flush_completed); 1224 p_atomic_dec(&cs->ctx->ws->num_cs); 1225 pb_reference(&cs->main.big_ib_buffer, NULL); 1226 FREE(cs->main.base.prev); 1227 pb_reference(&cs->const_ib.big_ib_buffer, NULL); 1228 FREE(cs->const_ib.base.prev); 1229 pb_reference(&cs->const_preamble_ib.big_ib_buffer, NULL); 1230 FREE(cs->const_preamble_ib.base.prev); 1231 amdgpu_destroy_cs_context(&cs->csc1); 1232 amdgpu_destroy_cs_context(&cs->csc2); 1233 amdgpu_fence_reference(&cs->next_fence, NULL); 1234 FREE(cs); 1235 } 1236 1237 static bool amdgpu_bo_is_referenced(struct radeon_winsys_cs *rcs, 1238 struct pb_buffer *_buf, 1239 enum radeon_bo_usage usage) 1240 { 1241 struct amdgpu_cs *cs = amdgpu_cs(rcs); 1242 struct amdgpu_winsys_bo *bo = (struct amdgpu_winsys_bo*)_buf; 1243 1244 return amdgpu_bo_is_referenced_by_cs_with_usage(cs, bo, usage); 1245 } 1246 1247 void amdgpu_cs_init_functions(struct amdgpu_winsys *ws) 1248 { 1249 ws->base.ctx_create = amdgpu_ctx_create; 1250 ws->base.ctx_destroy = amdgpu_ctx_destroy; 1251 ws->base.ctx_query_reset_status = amdgpu_ctx_query_reset_status; 1252 ws->base.cs_create = amdgpu_cs_create; 1253 ws->base.cs_add_const_ib = amdgpu_cs_add_const_ib; 1254 ws->base.cs_add_const_preamble_ib = amdgpu_cs_add_const_preamble_ib; 1255 ws->base.cs_destroy = amdgpu_cs_destroy; 1256 ws->base.cs_add_buffer = amdgpu_cs_add_buffer; 1257 ws->base.cs_validate = amdgpu_cs_validate; 1258 ws->base.cs_check_space = amdgpu_cs_check_space; 1259 ws->base.cs_get_buffer_list = amdgpu_cs_get_buffer_list; 1260 ws->base.cs_flush = amdgpu_cs_flush; 1261 ws->base.cs_get_next_fence = amdgpu_cs_get_next_fence; 1262 ws->base.cs_is_buffer_referenced = amdgpu_bo_is_referenced; 1263 ws->base.cs_sync_flush = amdgpu_cs_sync_flush; 1264 ws->base.fence_wait = amdgpu_fence_wait_rel_timeout; 1265 ws->base.fence_reference = amdgpu_fence_reference; 1266 } 1267