1 /* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Marek Olk <maraeo (at) gmail.com> 24 * 25 */ 26 27 #include "r600_pipe_common.h" 28 #include "r600_cs.h" 29 #include "tgsi/tgsi_parse.h" 30 #include "util/list.h" 31 #include "util/u_draw_quad.h" 32 #include "util/u_memory.h" 33 #include "util/u_format_s3tc.h" 34 #include "util/u_upload_mgr.h" 35 #include "util/os_time.h" 36 #include "vl/vl_decoder.h" 37 #include "vl/vl_video_buffer.h" 38 #include "radeon_video.h" 39 #include <inttypes.h> 40 #include <sys/utsname.h> 41 42 #ifndef HAVE_LLVM 43 #define HAVE_LLVM 0 44 #endif 45 46 #if HAVE_LLVM 47 #include <llvm-c/TargetMachine.h> 48 #endif 49 50 #ifndef MESA_LLVM_VERSION_PATCH 51 #define MESA_LLVM_VERSION_PATCH 0 52 #endif 53 54 struct r600_multi_fence { 55 struct pipe_reference reference; 56 struct pipe_fence_handle *gfx; 57 struct pipe_fence_handle *sdma; 58 59 /* If the context wasn't flushed at fence creation, this is non-NULL. */ 60 struct { 61 struct r600_common_context *ctx; 62 unsigned ib_index; 63 } gfx_unflushed; 64 }; 65 66 /* 67 * shader binary helpers. 68 */ 69 void radeon_shader_binary_init(struct ac_shader_binary *b) 70 { 71 memset(b, 0, sizeof(*b)); 72 } 73 74 void radeon_shader_binary_clean(struct ac_shader_binary *b) 75 { 76 if (!b) 77 return; 78 FREE(b->code); 79 FREE(b->config); 80 FREE(b->rodata); 81 FREE(b->global_symbol_offsets); 82 FREE(b->relocs); 83 FREE(b->disasm_string); 84 FREE(b->llvm_ir_string); 85 } 86 87 /* 88 * pipe_context 89 */ 90 91 /** 92 * Write an EOP event. 93 * 94 * \param event EVENT_TYPE_* 95 * \param event_flags Optional cache flush flags (TC) 96 * \param data_sel 1 = fence, 3 = timestamp 97 * \param buf Buffer 98 * \param va GPU address 99 * \param old_value Previous fence value (for a bug workaround) 100 * \param new_value Fence value to write for this event. 101 */ 102 void r600_gfx_write_event_eop(struct r600_common_context *ctx, 103 unsigned event, unsigned event_flags, 104 unsigned data_sel, 105 struct r600_resource *buf, uint64_t va, 106 uint32_t new_fence, unsigned query_type) 107 { 108 struct radeon_winsys_cs *cs = ctx->gfx.cs; 109 unsigned op = EVENT_TYPE(event) | 110 EVENT_INDEX(5) | 111 event_flags; 112 unsigned sel = EOP_DATA_SEL(data_sel); 113 114 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); 115 radeon_emit(cs, op); 116 radeon_emit(cs, va); 117 radeon_emit(cs, ((va >> 32) & 0xffff) | sel); 118 radeon_emit(cs, new_fence); /* immediate data */ 119 radeon_emit(cs, 0); /* unused */ 120 121 if (buf) 122 r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, 123 RADEON_PRIO_QUERY); 124 } 125 126 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen) 127 { 128 unsigned dwords = 6; 129 130 if (!screen->info.has_virtual_memory) 131 dwords += 2; 132 133 return dwords; 134 } 135 136 void r600_gfx_wait_fence(struct r600_common_context *ctx, 137 uint64_t va, uint32_t ref, uint32_t mask) 138 { 139 struct radeon_winsys_cs *cs = ctx->gfx.cs; 140 141 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 142 radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); 143 radeon_emit(cs, va); 144 radeon_emit(cs, va >> 32); 145 radeon_emit(cs, ref); /* reference value */ 146 radeon_emit(cs, mask); /* mask */ 147 radeon_emit(cs, 4); /* poll interval */ 148 } 149 150 void r600_draw_rectangle(struct blitter_context *blitter, 151 void *vertex_elements_cso, 152 blitter_get_vs_func get_vs, 153 int x1, int y1, int x2, int y2, 154 float depth, unsigned num_instances, 155 enum blitter_attrib_type type, 156 const union blitter_attrib *attrib) 157 { 158 struct r600_common_context *rctx = 159 (struct r600_common_context*)util_blitter_get_pipe(blitter); 160 struct pipe_viewport_state viewport; 161 struct pipe_resource *buf = NULL; 162 unsigned offset = 0; 163 float *vb; 164 165 rctx->b.bind_vertex_elements_state(&rctx->b, vertex_elements_cso); 166 rctx->b.bind_vs_state(&rctx->b, get_vs(blitter)); 167 168 /* Some operations (like color resolve on r6xx) don't work 169 * with the conventional primitive types. 170 * One that works is PT_RECTLIST, which we use here. */ 171 172 /* setup viewport */ 173 viewport.scale[0] = 1.0f; 174 viewport.scale[1] = 1.0f; 175 viewport.scale[2] = 1.0f; 176 viewport.translate[0] = 0.0f; 177 viewport.translate[1] = 0.0f; 178 viewport.translate[2] = 0.0f; 179 rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport); 180 181 /* Upload vertices. The hw rectangle has only 3 vertices, 182 * The 4th one is derived from the first 3. 183 * The vertex specification should match u_blitter's vertex element state. */ 184 u_upload_alloc(rctx->b.stream_uploader, 0, sizeof(float) * 24, 185 rctx->screen->info.tcc_cache_line_size, 186 &offset, &buf, (void**)&vb); 187 if (!buf) 188 return; 189 190 vb[0] = x1; 191 vb[1] = y1; 192 vb[2] = depth; 193 vb[3] = 1; 194 195 vb[8] = x1; 196 vb[9] = y2; 197 vb[10] = depth; 198 vb[11] = 1; 199 200 vb[16] = x2; 201 vb[17] = y1; 202 vb[18] = depth; 203 vb[19] = 1; 204 205 switch (type) { 206 case UTIL_BLITTER_ATTRIB_COLOR: 207 memcpy(vb+4, attrib->color, sizeof(float)*4); 208 memcpy(vb+12, attrib->color, sizeof(float)*4); 209 memcpy(vb+20, attrib->color, sizeof(float)*4); 210 break; 211 case UTIL_BLITTER_ATTRIB_TEXCOORD_XYZW: 212 case UTIL_BLITTER_ATTRIB_TEXCOORD_XY: 213 vb[6] = vb[14] = vb[22] = attrib->texcoord.z; 214 vb[7] = vb[15] = vb[23] = attrib->texcoord.w; 215 /* fall through */ 216 vb[4] = attrib->texcoord.x1; 217 vb[5] = attrib->texcoord.y1; 218 vb[12] = attrib->texcoord.x1; 219 vb[13] = attrib->texcoord.y2; 220 vb[20] = attrib->texcoord.x2; 221 vb[21] = attrib->texcoord.y1; 222 break; 223 default:; /* Nothing to do. */ 224 } 225 226 /* draw */ 227 struct pipe_vertex_buffer vbuffer = {}; 228 vbuffer.buffer.resource = buf; 229 vbuffer.stride = 2 * 4 * sizeof(float); /* vertex size */ 230 vbuffer.buffer_offset = offset; 231 232 rctx->b.set_vertex_buffers(&rctx->b, blitter->vb_slot, 1, &vbuffer); 233 util_draw_arrays_instanced(&rctx->b, R600_PRIM_RECTANGLE_LIST, 0, 3, 234 0, num_instances); 235 pipe_resource_reference(&buf, NULL); 236 } 237 238 static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) 239 { 240 struct radeon_winsys_cs *cs = rctx->dma.cs; 241 242 if (rctx->chip_class >= EVERGREEN) 243 radeon_emit(cs, 0xf0000000); /* NOP */ 244 else { 245 /* TODO: R600-R700 should use the FENCE packet. 246 * CS checker support is required. */ 247 } 248 } 249 250 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, 251 struct r600_resource *dst, struct r600_resource *src) 252 { 253 uint64_t vram = ctx->dma.cs->used_vram; 254 uint64_t gtt = ctx->dma.cs->used_gart; 255 256 if (dst) { 257 vram += dst->vram_usage; 258 gtt += dst->gart_usage; 259 } 260 if (src) { 261 vram += src->vram_usage; 262 gtt += src->gart_usage; 263 } 264 265 /* Flush the GFX IB if DMA depends on it. */ 266 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && 267 ((dst && 268 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf, 269 RADEON_USAGE_READWRITE)) || 270 (src && 271 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf, 272 RADEON_USAGE_WRITE)))) 273 ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); 274 275 /* Flush if there's not enough space, or if the memory usage per IB 276 * is too large. 277 * 278 * IBs using too little memory are limited by the IB submission overhead. 279 * IBs using too much memory are limited by the kernel/TTM overhead. 280 * Too long IBs create CPU-GPU pipeline bubbles and add latency. 281 * 282 * This heuristic makes sure that DMA requests are executed 283 * very soon after the call is made and lowers memory usage. 284 * It improves texture upload performance by keeping the DMA 285 * engine busy while uploads are being submitted. 286 */ 287 num_dw++; /* for emit_wait_idle below */ 288 if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) || 289 ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 || 290 !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) { 291 ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); 292 assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw); 293 } 294 295 /* Wait for idle if either buffer has been used in the IB before to 296 * prevent read-after-write hazards. 297 */ 298 if ((dst && 299 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf, 300 RADEON_USAGE_READWRITE)) || 301 (src && 302 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf, 303 RADEON_USAGE_WRITE))) 304 r600_dma_emit_wait_idle(ctx); 305 306 /* If GPUVM is not supported, the CS checker needs 2 entries 307 * in the buffer list per packet, which has to be done manually. 308 */ 309 if (ctx->screen->info.has_virtual_memory) { 310 if (dst) 311 radeon_add_to_buffer_list(ctx, &ctx->dma, dst, 312 RADEON_USAGE_WRITE, 313 RADEON_PRIO_SDMA_BUFFER); 314 if (src) 315 radeon_add_to_buffer_list(ctx, &ctx->dma, src, 316 RADEON_USAGE_READ, 317 RADEON_PRIO_SDMA_BUFFER); 318 } 319 320 /* this function is called before all DMA calls, so increment this. */ 321 ctx->num_dma_calls++; 322 } 323 324 void r600_preflush_suspend_features(struct r600_common_context *ctx) 325 { 326 /* suspend queries */ 327 if (!LIST_IS_EMPTY(&ctx->active_queries)) 328 r600_suspend_queries(ctx); 329 330 ctx->streamout.suspended = false; 331 if (ctx->streamout.begin_emitted) { 332 r600_emit_streamout_end(ctx); 333 ctx->streamout.suspended = true; 334 } 335 } 336 337 void r600_postflush_resume_features(struct r600_common_context *ctx) 338 { 339 if (ctx->streamout.suspended) { 340 ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; 341 r600_streamout_buffers_dirty(ctx); 342 } 343 344 /* resume queries */ 345 if (!LIST_IS_EMPTY(&ctx->active_queries)) 346 r600_resume_queries(ctx); 347 } 348 349 static void r600_add_fence_dependency(struct r600_common_context *rctx, 350 struct pipe_fence_handle *fence) 351 { 352 struct radeon_winsys *ws = rctx->ws; 353 354 if (rctx->dma.cs) 355 ws->cs_add_fence_dependency(rctx->dma.cs, fence); 356 ws->cs_add_fence_dependency(rctx->gfx.cs, fence); 357 } 358 359 static void r600_fence_server_sync(struct pipe_context *ctx, 360 struct pipe_fence_handle *fence) 361 { 362 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 363 struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; 364 365 /* Only amdgpu needs to handle fence dependencies (for fence imports). 366 * radeon synchronizes all rings by default and will not implement 367 * fence imports. 368 */ 369 if (rctx->screen->info.drm_major == 2) 370 return; 371 372 /* Only imported fences need to be handled by fence_server_sync, 373 * because the winsys handles synchronizations automatically for BOs 374 * within the process. 375 * 376 * Simply skip unflushed fences here, and the winsys will drop no-op 377 * dependencies (i.e. dependencies within the same ring). 378 */ 379 if (rfence->gfx_unflushed.ctx) 380 return; 381 382 /* All unflushed commands will not start execution before 383 * this fence dependency is signalled. 384 * 385 * Should we flush the context to allow more GPU parallelism? 386 */ 387 if (rfence->sdma) 388 r600_add_fence_dependency(rctx, rfence->sdma); 389 if (rfence->gfx) 390 r600_add_fence_dependency(rctx, rfence->gfx); 391 } 392 393 static void r600_flush_from_st(struct pipe_context *ctx, 394 struct pipe_fence_handle **fence, 395 unsigned flags) 396 { 397 struct pipe_screen *screen = ctx->screen; 398 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 399 struct radeon_winsys *ws = rctx->ws; 400 struct pipe_fence_handle *gfx_fence = NULL; 401 struct pipe_fence_handle *sdma_fence = NULL; 402 bool deferred_fence = false; 403 unsigned rflags = PIPE_FLUSH_ASYNC; 404 405 if (flags & PIPE_FLUSH_END_OF_FRAME) 406 rflags |= PIPE_FLUSH_END_OF_FRAME; 407 408 /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ 409 if (rctx->dma.cs) 410 rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); 411 412 if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { 413 if (fence) 414 ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); 415 if (!(flags & PIPE_FLUSH_DEFERRED)) 416 ws->cs_sync_flush(rctx->gfx.cs); 417 } else { 418 /* Instead of flushing, create a deferred fence. Constraints: 419 * - The state tracker must allow a deferred flush. 420 * - The state tracker must request a fence. 421 * Thread safety in fence_finish must be ensured by the state tracker. 422 */ 423 if (flags & PIPE_FLUSH_DEFERRED && fence) { 424 gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); 425 deferred_fence = true; 426 } else { 427 rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); 428 } 429 } 430 431 /* Both engines can signal out of order, so we need to keep both fences. */ 432 if (fence) { 433 struct r600_multi_fence *multi_fence = 434 CALLOC_STRUCT(r600_multi_fence); 435 if (!multi_fence) { 436 ws->fence_reference(&sdma_fence, NULL); 437 ws->fence_reference(&gfx_fence, NULL); 438 goto finish; 439 } 440 441 multi_fence->reference.count = 1; 442 /* If both fences are NULL, fence_finish will always return true. */ 443 multi_fence->gfx = gfx_fence; 444 multi_fence->sdma = sdma_fence; 445 446 if (deferred_fence) { 447 multi_fence->gfx_unflushed.ctx = rctx; 448 multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; 449 } 450 451 screen->fence_reference(screen, fence, NULL); 452 *fence = (struct pipe_fence_handle*)multi_fence; 453 } 454 finish: 455 if (!(flags & PIPE_FLUSH_DEFERRED)) { 456 if (rctx->dma.cs) 457 ws->cs_sync_flush(rctx->dma.cs); 458 ws->cs_sync_flush(rctx->gfx.cs); 459 } 460 } 461 462 static void r600_flush_dma_ring(void *ctx, unsigned flags, 463 struct pipe_fence_handle **fence) 464 { 465 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 466 struct radeon_winsys_cs *cs = rctx->dma.cs; 467 struct radeon_saved_cs saved; 468 bool check_vm = 469 (rctx->screen->debug_flags & DBG_CHECK_VM) && 470 rctx->check_vm_faults; 471 472 if (!radeon_emitted(cs, 0)) { 473 if (fence) 474 rctx->ws->fence_reference(fence, rctx->last_sdma_fence); 475 return; 476 } 477 478 if (check_vm) 479 radeon_save_cs(rctx->ws, cs, &saved, true); 480 481 rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence); 482 if (fence) 483 rctx->ws->fence_reference(fence, rctx->last_sdma_fence); 484 485 if (check_vm) { 486 /* Use conservative timeout 800ms, after which we won't wait any 487 * longer and assume the GPU is hung. 488 */ 489 rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000); 490 491 rctx->check_vm_faults(rctx, &saved, RING_DMA); 492 radeon_clear_saved_cs(&saved); 493 } 494 } 495 496 /** 497 * Store a linearized copy of all chunks of \p cs together with the buffer 498 * list in \p saved. 499 */ 500 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, 501 struct radeon_saved_cs *saved, bool get_buffer_list) 502 { 503 uint32_t *buf; 504 unsigned i; 505 506 /* Save the IB chunks. */ 507 saved->num_dw = cs->prev_dw + cs->current.cdw; 508 saved->ib = MALLOC(4 * saved->num_dw); 509 if (!saved->ib) 510 goto oom; 511 512 buf = saved->ib; 513 for (i = 0; i < cs->num_prev; ++i) { 514 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); 515 buf += cs->prev[i].cdw; 516 } 517 memcpy(buf, cs->current.buf, cs->current.cdw * 4); 518 519 if (!get_buffer_list) 520 return; 521 522 /* Save the buffer list. */ 523 saved->bo_count = ws->cs_get_buffer_list(cs, NULL); 524 saved->bo_list = CALLOC(saved->bo_count, 525 sizeof(saved->bo_list[0])); 526 if (!saved->bo_list) { 527 FREE(saved->ib); 528 goto oom; 529 } 530 ws->cs_get_buffer_list(cs, saved->bo_list); 531 532 return; 533 534 oom: 535 fprintf(stderr, "%s: out of memory\n", __func__); 536 memset(saved, 0, sizeof(*saved)); 537 } 538 539 void radeon_clear_saved_cs(struct radeon_saved_cs *saved) 540 { 541 FREE(saved->ib); 542 FREE(saved->bo_list); 543 544 memset(saved, 0, sizeof(*saved)); 545 } 546 547 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx) 548 { 549 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 550 unsigned latest = rctx->ws->query_value(rctx->ws, 551 RADEON_GPU_RESET_COUNTER); 552 553 if (rctx->gpu_reset_counter == latest) 554 return PIPE_NO_RESET; 555 556 rctx->gpu_reset_counter = latest; 557 return PIPE_UNKNOWN_CONTEXT_RESET; 558 } 559 560 static void r600_set_debug_callback(struct pipe_context *ctx, 561 const struct pipe_debug_callback *cb) 562 { 563 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 564 565 if (cb) 566 rctx->debug = *cb; 567 else 568 memset(&rctx->debug, 0, sizeof(rctx->debug)); 569 } 570 571 static void r600_set_device_reset_callback(struct pipe_context *ctx, 572 const struct pipe_device_reset_callback *cb) 573 { 574 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 575 576 if (cb) 577 rctx->device_reset_callback = *cb; 578 else 579 memset(&rctx->device_reset_callback, 0, 580 sizeof(rctx->device_reset_callback)); 581 } 582 583 bool r600_check_device_reset(struct r600_common_context *rctx) 584 { 585 enum pipe_reset_status status; 586 587 if (!rctx->device_reset_callback.reset) 588 return false; 589 590 if (!rctx->b.get_device_reset_status) 591 return false; 592 593 status = rctx->b.get_device_reset_status(&rctx->b); 594 if (status == PIPE_NO_RESET) 595 return false; 596 597 rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status); 598 return true; 599 } 600 601 static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx, 602 struct pipe_resource *dst, 603 uint64_t offset, uint64_t size, 604 unsigned value) 605 { 606 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 607 608 rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE); 609 } 610 611 static bool r600_resource_commit(struct pipe_context *pctx, 612 struct pipe_resource *resource, 613 unsigned level, struct pipe_box *box, 614 bool commit) 615 { 616 struct r600_common_context *ctx = (struct r600_common_context *)pctx; 617 struct r600_resource *res = r600_resource(resource); 618 619 /* 620 * Since buffer commitment changes cannot be pipelined, we need to 621 * (a) flush any pending commands that refer to the buffer we're about 622 * to change, and 623 * (b) wait for threaded submit to finish, including those that were 624 * triggered by some other, earlier operation. 625 */ 626 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && 627 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, 628 res->buf, RADEON_USAGE_READWRITE)) { 629 ctx->gfx.flush(ctx, PIPE_FLUSH_ASYNC, NULL); 630 } 631 if (radeon_emitted(ctx->dma.cs, 0) && 632 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, 633 res->buf, RADEON_USAGE_READWRITE)) { 634 ctx->dma.flush(ctx, PIPE_FLUSH_ASYNC, NULL); 635 } 636 637 ctx->ws->cs_sync_flush(ctx->dma.cs); 638 ctx->ws->cs_sync_flush(ctx->gfx.cs); 639 640 assert(resource->target == PIPE_BUFFER); 641 642 return ctx->ws->buffer_commit(res->buf, box->x, box->width, commit); 643 } 644 645 bool r600_common_context_init(struct r600_common_context *rctx, 646 struct r600_common_screen *rscreen, 647 unsigned context_flags) 648 { 649 slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers); 650 slab_create_child(&rctx->pool_transfers_unsync, &rscreen->pool_transfers); 651 652 rctx->screen = rscreen; 653 rctx->ws = rscreen->ws; 654 rctx->family = rscreen->family; 655 rctx->chip_class = rscreen->chip_class; 656 657 rctx->b.invalidate_resource = r600_invalidate_resource; 658 rctx->b.resource_commit = r600_resource_commit; 659 rctx->b.transfer_map = u_transfer_map_vtbl; 660 rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl; 661 rctx->b.transfer_unmap = u_transfer_unmap_vtbl; 662 rctx->b.texture_subdata = u_default_texture_subdata; 663 rctx->b.flush = r600_flush_from_st; 664 rctx->b.set_debug_callback = r600_set_debug_callback; 665 rctx->b.fence_server_sync = r600_fence_server_sync; 666 rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback; 667 668 /* evergreen_compute.c has a special codepath for global buffers. 669 * Everything else can use the direct path. 670 */ 671 if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) && 672 (context_flags & PIPE_CONTEXT_COMPUTE_ONLY)) 673 rctx->b.buffer_subdata = u_default_buffer_subdata; 674 else 675 rctx->b.buffer_subdata = r600_buffer_subdata; 676 677 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) { 678 rctx->b.get_device_reset_status = r600_get_reset_status; 679 rctx->gpu_reset_counter = 680 rctx->ws->query_value(rctx->ws, 681 RADEON_GPU_RESET_COUNTER); 682 } 683 684 rctx->b.set_device_reset_callback = r600_set_device_reset_callback; 685 686 r600_init_context_texture_functions(rctx); 687 r600_init_viewport_functions(rctx); 688 r600_streamout_init(rctx); 689 r600_query_init(rctx); 690 cayman_init_msaa(&rctx->b); 691 692 rctx->allocator_zeroed_memory = 693 u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, 694 0, PIPE_USAGE_DEFAULT, 0, true); 695 if (!rctx->allocator_zeroed_memory) 696 return false; 697 698 rctx->b.stream_uploader = u_upload_create(&rctx->b, 1024 * 1024, 699 0, PIPE_USAGE_STREAM, 0); 700 if (!rctx->b.stream_uploader) 701 return false; 702 703 rctx->b.const_uploader = u_upload_create(&rctx->b, 128 * 1024, 704 0, PIPE_USAGE_DEFAULT, 0); 705 if (!rctx->b.const_uploader) 706 return false; 707 708 rctx->ctx = rctx->ws->ctx_create(rctx->ws); 709 if (!rctx->ctx) 710 return false; 711 712 if (rscreen->info.num_sdma_rings && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { 713 rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, 714 r600_flush_dma_ring, 715 rctx); 716 rctx->dma.flush = r600_flush_dma_ring; 717 } 718 719 return true; 720 } 721 722 void r600_common_context_cleanup(struct r600_common_context *rctx) 723 { 724 if (rctx->query_result_shader) 725 rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader); 726 727 if (rctx->gfx.cs) 728 rctx->ws->cs_destroy(rctx->gfx.cs); 729 if (rctx->dma.cs) 730 rctx->ws->cs_destroy(rctx->dma.cs); 731 if (rctx->ctx) 732 rctx->ws->ctx_destroy(rctx->ctx); 733 734 if (rctx->b.stream_uploader) 735 u_upload_destroy(rctx->b.stream_uploader); 736 if (rctx->b.const_uploader) 737 u_upload_destroy(rctx->b.const_uploader); 738 739 slab_destroy_child(&rctx->pool_transfers); 740 slab_destroy_child(&rctx->pool_transfers_unsync); 741 742 if (rctx->allocator_zeroed_memory) { 743 u_suballocator_destroy(rctx->allocator_zeroed_memory); 744 } 745 rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL); 746 rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL); 747 r600_resource_reference(&rctx->eop_bug_scratch, NULL); 748 } 749 750 /* 751 * pipe_screen 752 */ 753 754 static const struct debug_named_value common_debug_options[] = { 755 /* logging */ 756 { "tex", DBG_TEX, "Print texture info" }, 757 { "nir", DBG_NIR, "Enable experimental NIR shaders" }, 758 { "compute", DBG_COMPUTE, "Print compute info" }, 759 { "vm", DBG_VM, "Print virtual addresses when creating resources" }, 760 { "info", DBG_INFO, "Print driver information" }, 761 762 /* shaders */ 763 { "fs", DBG_FS, "Print fetch shaders" }, 764 { "vs", DBG_VS, "Print vertex shaders" }, 765 { "gs", DBG_GS, "Print geometry shaders" }, 766 { "ps", DBG_PS, "Print pixel shaders" }, 767 { "cs", DBG_CS, "Print compute shaders" }, 768 { "tcs", DBG_TCS, "Print tessellation control shaders" }, 769 { "tes", DBG_TES, "Print tessellation evaluation shaders" }, 770 { "noir", DBG_NO_IR, "Don't print the LLVM IR"}, 771 { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"}, 772 { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"}, 773 { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" }, 774 { "checkir", DBG_CHECK_IR, "Enable additional sanity checks on shader IR" }, 775 { "nooptvariant", DBG_NO_OPT_VARIANT, "Disable compiling optimized shader variants." }, 776 777 { "testdma", DBG_TEST_DMA, "Invoke SDMA tests and exit." }, 778 { "testvmfaultcp", DBG_TEST_VMFAULT_CP, "Invoke a CP VM fault test and exit." }, 779 { "testvmfaultsdma", DBG_TEST_VMFAULT_SDMA, "Invoke a SDMA VM fault test and exit." }, 780 { "testvmfaultshader", DBG_TEST_VMFAULT_SHADER, "Invoke a shader VM fault test and exit." }, 781 782 /* features */ 783 { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" }, 784 { "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" }, 785 /* GL uses the word INVALIDATE, gallium uses the word DISCARD */ 786 { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" }, 787 { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" }, 788 { "notiling", DBG_NO_TILING, "Disable tiling" }, 789 { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, 790 { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, 791 { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, 792 { "nowc", DBG_NO_WC, "Disable GTT write combining" }, 793 { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, 794 { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" }, 795 796 DEBUG_NAMED_VALUE_END /* must be last */ 797 }; 798 799 static const char* r600_get_vendor(struct pipe_screen* pscreen) 800 { 801 return "X.Org"; 802 } 803 804 static const char* r600_get_device_vendor(struct pipe_screen* pscreen) 805 { 806 return "AMD"; 807 } 808 809 static const char *r600_get_marketing_name(struct radeon_winsys *ws) 810 { 811 if (!ws->get_chip_name) 812 return NULL; 813 return ws->get_chip_name(ws); 814 } 815 816 static const char *r600_get_family_name(const struct r600_common_screen *rscreen) 817 { 818 switch (rscreen->info.family) { 819 case CHIP_R600: return "AMD R600"; 820 case CHIP_RV610: return "AMD RV610"; 821 case CHIP_RV630: return "AMD RV630"; 822 case CHIP_RV670: return "AMD RV670"; 823 case CHIP_RV620: return "AMD RV620"; 824 case CHIP_RV635: return "AMD RV635"; 825 case CHIP_RS780: return "AMD RS780"; 826 case CHIP_RS880: return "AMD RS880"; 827 case CHIP_RV770: return "AMD RV770"; 828 case CHIP_RV730: return "AMD RV730"; 829 case CHIP_RV710: return "AMD RV710"; 830 case CHIP_RV740: return "AMD RV740"; 831 case CHIP_CEDAR: return "AMD CEDAR"; 832 case CHIP_REDWOOD: return "AMD REDWOOD"; 833 case CHIP_JUNIPER: return "AMD JUNIPER"; 834 case CHIP_CYPRESS: return "AMD CYPRESS"; 835 case CHIP_HEMLOCK: return "AMD HEMLOCK"; 836 case CHIP_PALM: return "AMD PALM"; 837 case CHIP_SUMO: return "AMD SUMO"; 838 case CHIP_SUMO2: return "AMD SUMO2"; 839 case CHIP_BARTS: return "AMD BARTS"; 840 case CHIP_TURKS: return "AMD TURKS"; 841 case CHIP_CAICOS: return "AMD CAICOS"; 842 case CHIP_CAYMAN: return "AMD CAYMAN"; 843 case CHIP_ARUBA: return "AMD ARUBA"; 844 default: return "AMD unknown"; 845 } 846 } 847 848 static void r600_disk_cache_create(struct r600_common_screen *rscreen) 849 { 850 /* Don't use the cache if shader dumping is enabled. */ 851 if (rscreen->debug_flags & DBG_ALL_SHADERS) 852 return; 853 854 uint32_t mesa_timestamp; 855 if (disk_cache_get_function_timestamp(r600_disk_cache_create, 856 &mesa_timestamp)) { 857 char *timestamp_str; 858 int res = -1; 859 860 res = asprintf(×tamp_str, "%u",mesa_timestamp); 861 if (res != -1) { 862 /* These flags affect shader compilation. */ 863 uint64_t shader_debug_flags = 864 rscreen->debug_flags & 865 (DBG_FS_CORRECT_DERIVS_AFTER_KILL | 866 DBG_UNSAFE_MATH); 867 868 rscreen->disk_shader_cache = 869 disk_cache_create(r600_get_family_name(rscreen), 870 timestamp_str, 871 shader_debug_flags); 872 free(timestamp_str); 873 } 874 } 875 } 876 877 static struct disk_cache *r600_get_disk_shader_cache(struct pipe_screen *pscreen) 878 { 879 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; 880 return rscreen->disk_shader_cache; 881 } 882 883 static const char* r600_get_name(struct pipe_screen* pscreen) 884 { 885 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; 886 887 return rscreen->renderer_string; 888 } 889 890 static float r600_get_paramf(struct pipe_screen* pscreen, 891 enum pipe_capf param) 892 { 893 struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen; 894 895 switch (param) { 896 case PIPE_CAPF_MAX_LINE_WIDTH: 897 case PIPE_CAPF_MAX_LINE_WIDTH_AA: 898 case PIPE_CAPF_MAX_POINT_WIDTH: 899 case PIPE_CAPF_MAX_POINT_WIDTH_AA: 900 if (rscreen->family >= CHIP_CEDAR) 901 return 16384.0f; 902 else 903 return 8192.0f; 904 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: 905 return 16.0f; 906 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: 907 return 16.0f; 908 case PIPE_CAPF_GUARD_BAND_LEFT: 909 case PIPE_CAPF_GUARD_BAND_TOP: 910 case PIPE_CAPF_GUARD_BAND_RIGHT: 911 case PIPE_CAPF_GUARD_BAND_BOTTOM: 912 return 0.0f; 913 } 914 return 0.0f; 915 } 916 917 static int r600_get_video_param(struct pipe_screen *screen, 918 enum pipe_video_profile profile, 919 enum pipe_video_entrypoint entrypoint, 920 enum pipe_video_cap param) 921 { 922 switch (param) { 923 case PIPE_VIDEO_CAP_SUPPORTED: 924 return vl_profile_supported(screen, profile, entrypoint); 925 case PIPE_VIDEO_CAP_NPOT_TEXTURES: 926 return 1; 927 case PIPE_VIDEO_CAP_MAX_WIDTH: 928 case PIPE_VIDEO_CAP_MAX_HEIGHT: 929 return vl_video_buffer_max_size(screen); 930 case PIPE_VIDEO_CAP_PREFERED_FORMAT: 931 return PIPE_FORMAT_NV12; 932 case PIPE_VIDEO_CAP_PREFERS_INTERLACED: 933 return false; 934 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: 935 return false; 936 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: 937 return true; 938 case PIPE_VIDEO_CAP_MAX_LEVEL: 939 return vl_level_supported(screen, profile); 940 default: 941 return 0; 942 } 943 } 944 945 const char *r600_get_llvm_processor_name(enum radeon_family family) 946 { 947 switch (family) { 948 case CHIP_R600: 949 case CHIP_RV630: 950 case CHIP_RV635: 951 case CHIP_RV670: 952 return "r600"; 953 case CHIP_RV610: 954 case CHIP_RV620: 955 case CHIP_RS780: 956 case CHIP_RS880: 957 return "rs880"; 958 case CHIP_RV710: 959 return "rv710"; 960 case CHIP_RV730: 961 return "rv730"; 962 case CHIP_RV740: 963 case CHIP_RV770: 964 return "rv770"; 965 case CHIP_PALM: 966 case CHIP_CEDAR: 967 return "cedar"; 968 case CHIP_SUMO: 969 case CHIP_SUMO2: 970 return "sumo"; 971 case CHIP_REDWOOD: 972 return "redwood"; 973 case CHIP_JUNIPER: 974 return "juniper"; 975 case CHIP_HEMLOCK: 976 case CHIP_CYPRESS: 977 return "cypress"; 978 case CHIP_BARTS: 979 return "barts"; 980 case CHIP_TURKS: 981 return "turks"; 982 case CHIP_CAICOS: 983 return "caicos"; 984 case CHIP_CAYMAN: 985 case CHIP_ARUBA: 986 return "cayman"; 987 988 default: 989 return ""; 990 } 991 } 992 993 static unsigned get_max_threads_per_block(struct r600_common_screen *screen, 994 enum pipe_shader_ir ir_type) 995 { 996 if (ir_type != PIPE_SHADER_IR_TGSI) 997 return 256; 998 if (screen->chip_class >= EVERGREEN) 999 return 2048; 1000 return 256; 1001 } 1002 1003 static int r600_get_compute_param(struct pipe_screen *screen, 1004 enum pipe_shader_ir ir_type, 1005 enum pipe_compute_cap param, 1006 void *ret) 1007 { 1008 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; 1009 1010 //TODO: select these params by asic 1011 switch (param) { 1012 case PIPE_COMPUTE_CAP_IR_TARGET: { 1013 const char *gpu; 1014 const char *triple = "r600--"; 1015 gpu = r600_get_llvm_processor_name(rscreen->family); 1016 if (ret) { 1017 sprintf(ret, "%s-%s", gpu, triple); 1018 } 1019 /* +2 for dash and terminating NIL byte */ 1020 return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); 1021 } 1022 case PIPE_COMPUTE_CAP_GRID_DIMENSION: 1023 if (ret) { 1024 uint64_t *grid_dimension = ret; 1025 grid_dimension[0] = 3; 1026 } 1027 return 1 * sizeof(uint64_t); 1028 1029 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: 1030 if (ret) { 1031 uint64_t *grid_size = ret; 1032 grid_size[0] = 65535; 1033 grid_size[1] = 65535; 1034 grid_size[2] = 65535; 1035 } 1036 return 3 * sizeof(uint64_t) ; 1037 1038 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: 1039 if (ret) { 1040 uint64_t *block_size = ret; 1041 unsigned threads_per_block = get_max_threads_per_block(rscreen, ir_type); 1042 block_size[0] = threads_per_block; 1043 block_size[1] = threads_per_block; 1044 block_size[2] = threads_per_block; 1045 } 1046 return 3 * sizeof(uint64_t); 1047 1048 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: 1049 if (ret) { 1050 uint64_t *max_threads_per_block = ret; 1051 *max_threads_per_block = get_max_threads_per_block(rscreen, ir_type); 1052 } 1053 return sizeof(uint64_t); 1054 case PIPE_COMPUTE_CAP_ADDRESS_BITS: 1055 if (ret) { 1056 uint32_t *address_bits = ret; 1057 address_bits[0] = 32; 1058 } 1059 return 1 * sizeof(uint32_t); 1060 1061 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: 1062 if (ret) { 1063 uint64_t *max_global_size = ret; 1064 uint64_t max_mem_alloc_size; 1065 1066 r600_get_compute_param(screen, ir_type, 1067 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, 1068 &max_mem_alloc_size); 1069 1070 /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least 1071 * 1/4 of the MAX_GLOBAL_SIZE. Since the 1072 * MAX_MEM_ALLOC_SIZE is fixed for older kernels, 1073 * make sure we never report more than 1074 * 4 * MAX_MEM_ALLOC_SIZE. 1075 */ 1076 *max_global_size = MIN2(4 * max_mem_alloc_size, 1077 MAX2(rscreen->info.gart_size, 1078 rscreen->info.vram_size)); 1079 } 1080 return sizeof(uint64_t); 1081 1082 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: 1083 if (ret) { 1084 uint64_t *max_local_size = ret; 1085 /* Value reported by the closed source driver. */ 1086 *max_local_size = 32768; 1087 } 1088 return sizeof(uint64_t); 1089 1090 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: 1091 if (ret) { 1092 uint64_t *max_input_size = ret; 1093 /* Value reported by the closed source driver. */ 1094 *max_input_size = 1024; 1095 } 1096 return sizeof(uint64_t); 1097 1098 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: 1099 if (ret) { 1100 uint64_t *max_mem_alloc_size = ret; 1101 1102 *max_mem_alloc_size = rscreen->info.max_alloc_size; 1103 } 1104 return sizeof(uint64_t); 1105 1106 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: 1107 if (ret) { 1108 uint32_t *max_clock_frequency = ret; 1109 *max_clock_frequency = rscreen->info.max_shader_clock; 1110 } 1111 return sizeof(uint32_t); 1112 1113 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: 1114 if (ret) { 1115 uint32_t *max_compute_units = ret; 1116 *max_compute_units = rscreen->info.num_good_compute_units; 1117 } 1118 return sizeof(uint32_t); 1119 1120 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: 1121 if (ret) { 1122 uint32_t *images_supported = ret; 1123 *images_supported = 0; 1124 } 1125 return sizeof(uint32_t); 1126 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: 1127 break; /* unused */ 1128 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: 1129 if (ret) { 1130 uint32_t *subgroup_size = ret; 1131 *subgroup_size = r600_wavefront_size(rscreen->family); 1132 } 1133 return sizeof(uint32_t); 1134 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: 1135 if (ret) { 1136 uint64_t *max_variable_threads_per_block = ret; 1137 *max_variable_threads_per_block = 0; 1138 } 1139 return sizeof(uint64_t); 1140 } 1141 1142 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); 1143 return 0; 1144 } 1145 1146 static uint64_t r600_get_timestamp(struct pipe_screen *screen) 1147 { 1148 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1149 1150 return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / 1151 rscreen->info.clock_crystal_freq; 1152 } 1153 1154 static void r600_fence_reference(struct pipe_screen *screen, 1155 struct pipe_fence_handle **dst, 1156 struct pipe_fence_handle *src) 1157 { 1158 struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; 1159 struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst; 1160 struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src; 1161 1162 if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { 1163 ws->fence_reference(&(*rdst)->gfx, NULL); 1164 ws->fence_reference(&(*rdst)->sdma, NULL); 1165 FREE(*rdst); 1166 } 1167 *rdst = rsrc; 1168 } 1169 1170 static boolean r600_fence_finish(struct pipe_screen *screen, 1171 struct pipe_context *ctx, 1172 struct pipe_fence_handle *fence, 1173 uint64_t timeout) 1174 { 1175 struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; 1176 struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; 1177 struct r600_common_context *rctx; 1178 int64_t abs_timeout = os_time_get_absolute_timeout(timeout); 1179 1180 ctx = threaded_context_unwrap_sync(ctx); 1181 rctx = ctx ? (struct r600_common_context*)ctx : NULL; 1182 1183 if (rfence->sdma) { 1184 if (!rws->fence_wait(rws, rfence->sdma, timeout)) 1185 return false; 1186 1187 /* Recompute the timeout after waiting. */ 1188 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { 1189 int64_t time = os_time_get_nano(); 1190 timeout = abs_timeout > time ? abs_timeout - time : 0; 1191 } 1192 } 1193 1194 if (!rfence->gfx) 1195 return true; 1196 1197 /* Flush the gfx IB if it hasn't been flushed yet. */ 1198 if (rctx && 1199 rfence->gfx_unflushed.ctx == rctx && 1200 rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { 1201 rctx->gfx.flush(rctx, timeout ? 0 : PIPE_FLUSH_ASYNC, NULL); 1202 rfence->gfx_unflushed.ctx = NULL; 1203 1204 if (!timeout) 1205 return false; 1206 1207 /* Recompute the timeout after all that. */ 1208 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { 1209 int64_t time = os_time_get_nano(); 1210 timeout = abs_timeout > time ? abs_timeout - time : 0; 1211 } 1212 } 1213 1214 return rws->fence_wait(rws, rfence->gfx, timeout); 1215 } 1216 1217 static void r600_query_memory_info(struct pipe_screen *screen, 1218 struct pipe_memory_info *info) 1219 { 1220 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1221 struct radeon_winsys *ws = rscreen->ws; 1222 unsigned vram_usage, gtt_usage; 1223 1224 info->total_device_memory = rscreen->info.vram_size / 1024; 1225 info->total_staging_memory = rscreen->info.gart_size / 1024; 1226 1227 /* The real TTM memory usage is somewhat random, because: 1228 * 1229 * 1) TTM delays freeing memory, because it can only free it after 1230 * fences expire. 1231 * 1232 * 2) The memory usage can be really low if big VRAM evictions are 1233 * taking place, but the real usage is well above the size of VRAM. 1234 * 1235 * Instead, return statistics of this process. 1236 */ 1237 vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; 1238 gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; 1239 1240 info->avail_device_memory = 1241 vram_usage <= info->total_device_memory ? 1242 info->total_device_memory - vram_usage : 0; 1243 info->avail_staging_memory = 1244 gtt_usage <= info->total_staging_memory ? 1245 info->total_staging_memory - gtt_usage : 0; 1246 1247 info->device_memory_evicted = 1248 ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; 1249 1250 if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) 1251 info->nr_device_memory_evictions = 1252 ws->query_value(ws, RADEON_NUM_EVICTIONS); 1253 else 1254 /* Just return the number of evicted 64KB pages. */ 1255 info->nr_device_memory_evictions = info->device_memory_evicted / 64; 1256 } 1257 1258 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, 1259 const struct pipe_resource *templ) 1260 { 1261 if (templ->target == PIPE_BUFFER) { 1262 return r600_buffer_create(screen, templ, 256); 1263 } else { 1264 return r600_texture_create(screen, templ); 1265 } 1266 } 1267 1268 bool r600_common_screen_init(struct r600_common_screen *rscreen, 1269 struct radeon_winsys *ws) 1270 { 1271 char family_name[32] = {}, llvm_string[32] = {}, kernel_version[128] = {}; 1272 struct utsname uname_data; 1273 const char *chip_name; 1274 1275 ws->query_info(ws, &rscreen->info); 1276 rscreen->ws = ws; 1277 1278 if ((chip_name = r600_get_marketing_name(ws))) 1279 snprintf(family_name, sizeof(family_name), "%s / ", 1280 r600_get_family_name(rscreen) + 4); 1281 else 1282 chip_name = r600_get_family_name(rscreen); 1283 1284 if (uname(&uname_data) == 0) 1285 snprintf(kernel_version, sizeof(kernel_version), 1286 " / %s", uname_data.release); 1287 1288 if (HAVE_LLVM > 0) { 1289 snprintf(llvm_string, sizeof(llvm_string), 1290 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, 1291 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); 1292 } 1293 1294 snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), 1295 "%s (%sDRM %i.%i.%i%s%s)", 1296 chip_name, family_name, rscreen->info.drm_major, 1297 rscreen->info.drm_minor, rscreen->info.drm_patchlevel, 1298 kernel_version, llvm_string); 1299 1300 rscreen->b.get_name = r600_get_name; 1301 rscreen->b.get_vendor = r600_get_vendor; 1302 rscreen->b.get_device_vendor = r600_get_device_vendor; 1303 rscreen->b.get_disk_shader_cache = r600_get_disk_shader_cache; 1304 rscreen->b.get_compute_param = r600_get_compute_param; 1305 rscreen->b.get_paramf = r600_get_paramf; 1306 rscreen->b.get_timestamp = r600_get_timestamp; 1307 rscreen->b.fence_finish = r600_fence_finish; 1308 rscreen->b.fence_reference = r600_fence_reference; 1309 rscreen->b.resource_destroy = u_resource_destroy_vtbl; 1310 rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; 1311 rscreen->b.query_memory_info = r600_query_memory_info; 1312 1313 if (rscreen->info.has_hw_decode) { 1314 rscreen->b.get_video_param = rvid_get_video_param; 1315 rscreen->b.is_video_format_supported = rvid_is_format_supported; 1316 } else { 1317 rscreen->b.get_video_param = r600_get_video_param; 1318 rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; 1319 } 1320 1321 r600_init_screen_texture_functions(rscreen); 1322 r600_init_screen_query_functions(rscreen); 1323 1324 rscreen->family = rscreen->info.family; 1325 rscreen->chip_class = rscreen->info.chip_class; 1326 rscreen->debug_flags |= debug_get_flags_option("R600_DEBUG", common_debug_options, 0); 1327 1328 r600_disk_cache_create(rscreen); 1329 1330 slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64); 1331 1332 rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); 1333 if (rscreen->force_aniso >= 0) { 1334 printf("radeon: Forcing anisotropy filter to %ix\n", 1335 /* round down to a power of two */ 1336 1 << util_logbase2(rscreen->force_aniso)); 1337 } 1338 1339 (void) mtx_init(&rscreen->aux_context_lock, mtx_plain); 1340 (void) mtx_init(&rscreen->gpu_load_mutex, mtx_plain); 1341 1342 if (rscreen->debug_flags & DBG_INFO) { 1343 printf("pci (domain:bus:dev.func): %04x:%02x:%02x.%x\n", 1344 rscreen->info.pci_domain, rscreen->info.pci_bus, 1345 rscreen->info.pci_dev, rscreen->info.pci_func); 1346 printf("pci_id = 0x%x\n", rscreen->info.pci_id); 1347 printf("family = %i (%s)\n", rscreen->info.family, 1348 r600_get_family_name(rscreen)); 1349 printf("chip_class = %i\n", rscreen->info.chip_class); 1350 printf("pte_fragment_size = %u\n", rscreen->info.pte_fragment_size); 1351 printf("gart_page_size = %u\n", rscreen->info.gart_page_size); 1352 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); 1353 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); 1354 printf("vram_vis_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_vis_size, 1024*1024)); 1355 printf("max_alloc_size = %i MB\n", 1356 (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); 1357 printf("min_alloc_size = %u\n", rscreen->info.min_alloc_size); 1358 printf("has_dedicated_vram = %u\n", rscreen->info.has_dedicated_vram); 1359 printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); 1360 printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); 1361 printf("has_hw_decode = %u\n", rscreen->info.has_hw_decode); 1362 printf("num_sdma_rings = %i\n", rscreen->info.num_sdma_rings); 1363 printf("num_compute_rings = %u\n", rscreen->info.num_compute_rings); 1364 printf("uvd_fw_version = %u\n", rscreen->info.uvd_fw_version); 1365 printf("vce_fw_version = %u\n", rscreen->info.vce_fw_version); 1366 printf("me_fw_version = %i\n", rscreen->info.me_fw_version); 1367 printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version); 1368 printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version); 1369 printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); 1370 printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); 1371 printf("tcc_cache_line_size = %u\n", rscreen->info.tcc_cache_line_size); 1372 printf("drm = %i.%i.%i\n", rscreen->info.drm_major, 1373 rscreen->info.drm_minor, rscreen->info.drm_patchlevel); 1374 printf("has_userptr = %i\n", rscreen->info.has_userptr); 1375 printf("has_syncobj = %u\n", rscreen->info.has_syncobj); 1376 1377 printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); 1378 printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); 1379 printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); 1380 printf("max_se = %i\n", rscreen->info.max_se); 1381 printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); 1382 1383 printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); 1384 printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); 1385 printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); 1386 printf("num_render_backends = %i\n", rscreen->info.num_render_backends); 1387 printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); 1388 printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); 1389 printf("enabled_rb_mask = 0x%x\n", rscreen->info.enabled_rb_mask); 1390 printf("max_alignment = %u\n", (unsigned)rscreen->info.max_alignment); 1391 } 1392 return true; 1393 } 1394 1395 void r600_destroy_common_screen(struct r600_common_screen *rscreen) 1396 { 1397 r600_perfcounters_destroy(rscreen); 1398 r600_gpu_load_kill_thread(rscreen); 1399 1400 mtx_destroy(&rscreen->gpu_load_mutex); 1401 mtx_destroy(&rscreen->aux_context_lock); 1402 rscreen->aux_context->destroy(rscreen->aux_context); 1403 1404 slab_destroy_parent(&rscreen->pool_transfers); 1405 1406 disk_cache_destroy(rscreen->disk_shader_cache); 1407 rscreen->ws->destroy(rscreen->ws); 1408 FREE(rscreen); 1409 } 1410 1411 bool r600_can_dump_shader(struct r600_common_screen *rscreen, 1412 unsigned processor) 1413 { 1414 return rscreen->debug_flags & (1 << processor); 1415 } 1416 1417 bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor) 1418 { 1419 return (rscreen->debug_flags & DBG_CHECK_IR) || 1420 r600_can_dump_shader(rscreen, processor); 1421 } 1422 1423 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, 1424 uint64_t offset, uint64_t size, unsigned value) 1425 { 1426 struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; 1427 1428 mtx_lock(&rscreen->aux_context_lock); 1429 rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value); 1430 rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); 1431 mtx_unlock(&rscreen->aux_context_lock); 1432 } 1433