1 /* 2 * Copyright 2013 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, 20 * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 21 * SOFTWARE. 22 * 23 * Authors: Marek Olk <maraeo (at) gmail.com> 24 * 25 */ 26 27 #include "r600_pipe_common.h" 28 #include "r600_cs.h" 29 #include "tgsi/tgsi_parse.h" 30 #include "util/list.h" 31 #include "util/u_draw_quad.h" 32 #include "util/u_memory.h" 33 #include "util/u_format_s3tc.h" 34 #include "util/u_upload_mgr.h" 35 #include "os/os_time.h" 36 #include "vl/vl_decoder.h" 37 #include "vl/vl_video_buffer.h" 38 #include "radeon/radeon_video.h" 39 #include <inttypes.h> 40 #include <sys/utsname.h> 41 42 #ifndef HAVE_LLVM 43 #define HAVE_LLVM 0 44 #endif 45 46 #ifndef MESA_LLVM_VERSION_PATCH 47 #define MESA_LLVM_VERSION_PATCH 0 48 #endif 49 50 struct r600_multi_fence { 51 struct pipe_reference reference; 52 struct pipe_fence_handle *gfx; 53 struct pipe_fence_handle *sdma; 54 55 /* If the context wasn't flushed at fence creation, this is non-NULL. */ 56 struct { 57 struct r600_common_context *ctx; 58 unsigned ib_index; 59 } gfx_unflushed; 60 }; 61 62 /* 63 * shader binary helpers. 64 */ 65 void radeon_shader_binary_init(struct radeon_shader_binary *b) 66 { 67 memset(b, 0, sizeof(*b)); 68 } 69 70 void radeon_shader_binary_clean(struct radeon_shader_binary *b) 71 { 72 if (!b) 73 return; 74 FREE(b->code); 75 FREE(b->config); 76 FREE(b->rodata); 77 FREE(b->global_symbol_offsets); 78 FREE(b->relocs); 79 FREE(b->disasm_string); 80 FREE(b->llvm_ir_string); 81 } 82 83 /* 84 * pipe_context 85 */ 86 87 /** 88 * Write an EOP event. 89 * 90 * \param event EVENT_TYPE_* 91 * \param event_flags Optional cache flush flags (TC) 92 * \param data_sel 1 = fence, 3 = timestamp 93 * \param buf Buffer 94 * \param va GPU address 95 * \param old_value Previous fence value (for a bug workaround) 96 * \param new_value Fence value to write for this event. 97 */ 98 void r600_gfx_write_event_eop(struct r600_common_context *ctx, 99 unsigned event, unsigned event_flags, 100 unsigned data_sel, 101 struct r600_resource *buf, uint64_t va, 102 uint32_t old_fence, uint32_t new_fence) 103 { 104 struct radeon_winsys_cs *cs = ctx->gfx.cs; 105 unsigned op = EVENT_TYPE(event) | 106 EVENT_INDEX(5) | 107 event_flags; 108 109 if (ctx->chip_class == CIK || 110 ctx->chip_class == VI) { 111 /* Two EOP events are required to make all engines go idle 112 * (and optional cache flushes executed) before the timestamp 113 * is written. 114 */ 115 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); 116 radeon_emit(cs, op); 117 radeon_emit(cs, va); 118 radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel)); 119 radeon_emit(cs, old_fence); /* immediate data */ 120 radeon_emit(cs, 0); /* unused */ 121 } 122 123 radeon_emit(cs, PKT3(PKT3_EVENT_WRITE_EOP, 4, 0)); 124 radeon_emit(cs, op); 125 radeon_emit(cs, va); 126 radeon_emit(cs, ((va >> 32) & 0xffff) | EOP_DATA_SEL(data_sel)); 127 radeon_emit(cs, new_fence); /* immediate data */ 128 radeon_emit(cs, 0); /* unused */ 129 130 if (buf) 131 r600_emit_reloc(ctx, &ctx->gfx, buf, RADEON_USAGE_WRITE, 132 RADEON_PRIO_QUERY); 133 } 134 135 unsigned r600_gfx_write_fence_dwords(struct r600_common_screen *screen) 136 { 137 unsigned dwords = 6; 138 139 if (screen->chip_class == CIK || 140 screen->chip_class == VI) 141 dwords *= 2; 142 143 if (!screen->info.has_virtual_memory) 144 dwords += 2; 145 146 return dwords; 147 } 148 149 void r600_gfx_wait_fence(struct r600_common_context *ctx, 150 uint64_t va, uint32_t ref, uint32_t mask) 151 { 152 struct radeon_winsys_cs *cs = ctx->gfx.cs; 153 154 radeon_emit(cs, PKT3(PKT3_WAIT_REG_MEM, 5, 0)); 155 radeon_emit(cs, WAIT_REG_MEM_EQUAL | WAIT_REG_MEM_MEM_SPACE(1)); 156 radeon_emit(cs, va); 157 radeon_emit(cs, va >> 32); 158 radeon_emit(cs, ref); /* reference value */ 159 radeon_emit(cs, mask); /* mask */ 160 radeon_emit(cs, 4); /* poll interval */ 161 } 162 163 void r600_draw_rectangle(struct blitter_context *blitter, 164 int x1, int y1, int x2, int y2, float depth, 165 enum blitter_attrib_type type, 166 const union pipe_color_union *attrib) 167 { 168 struct r600_common_context *rctx = 169 (struct r600_common_context*)util_blitter_get_pipe(blitter); 170 struct pipe_viewport_state viewport; 171 struct pipe_resource *buf = NULL; 172 unsigned offset = 0; 173 float *vb; 174 175 if (type == UTIL_BLITTER_ATTRIB_TEXCOORD) { 176 util_blitter_draw_rectangle(blitter, x1, y1, x2, y2, depth, type, attrib); 177 return; 178 } 179 180 /* Some operations (like color resolve on r6xx) don't work 181 * with the conventional primitive types. 182 * One that works is PT_RECTLIST, which we use here. */ 183 184 /* setup viewport */ 185 viewport.scale[0] = 1.0f; 186 viewport.scale[1] = 1.0f; 187 viewport.scale[2] = 1.0f; 188 viewport.translate[0] = 0.0f; 189 viewport.translate[1] = 0.0f; 190 viewport.translate[2] = 0.0f; 191 rctx->b.set_viewport_states(&rctx->b, 0, 1, &viewport); 192 193 /* Upload vertices. The hw rectangle has only 3 vertices, 194 * I guess the 4th one is derived from the first 3. 195 * The vertex specification should match u_blitter's vertex element state. */ 196 u_upload_alloc(rctx->uploader, 0, sizeof(float) * 24, 256, &offset, &buf, (void**)&vb); 197 if (!buf) 198 return; 199 200 vb[0] = x1; 201 vb[1] = y1; 202 vb[2] = depth; 203 vb[3] = 1; 204 205 vb[8] = x1; 206 vb[9] = y2; 207 vb[10] = depth; 208 vb[11] = 1; 209 210 vb[16] = x2; 211 vb[17] = y1; 212 vb[18] = depth; 213 vb[19] = 1; 214 215 if (attrib) { 216 memcpy(vb+4, attrib->f, sizeof(float)*4); 217 memcpy(vb+12, attrib->f, sizeof(float)*4); 218 memcpy(vb+20, attrib->f, sizeof(float)*4); 219 } 220 221 /* draw */ 222 util_draw_vertex_buffer(&rctx->b, NULL, buf, blitter->vb_slot, offset, 223 R600_PRIM_RECTANGLE_LIST, 3, 2); 224 pipe_resource_reference(&buf, NULL); 225 } 226 227 static void r600_dma_emit_wait_idle(struct r600_common_context *rctx) 228 { 229 struct radeon_winsys_cs *cs = rctx->dma.cs; 230 231 /* NOP waits for idle on Evergreen and later. */ 232 if (rctx->chip_class >= CIK) 233 radeon_emit(cs, 0x00000000); /* NOP */ 234 else if (rctx->chip_class >= EVERGREEN) 235 radeon_emit(cs, 0xf0000000); /* NOP */ 236 else { 237 /* TODO: R600-R700 should use the FENCE packet. 238 * CS checker support is required. */ 239 } 240 } 241 242 void r600_need_dma_space(struct r600_common_context *ctx, unsigned num_dw, 243 struct r600_resource *dst, struct r600_resource *src) 244 { 245 uint64_t vram = ctx->dma.cs->used_vram; 246 uint64_t gtt = ctx->dma.cs->used_gart; 247 248 if (dst) { 249 vram += dst->vram_usage; 250 gtt += dst->gart_usage; 251 } 252 if (src) { 253 vram += src->vram_usage; 254 gtt += src->gart_usage; 255 } 256 257 /* Flush the GFX IB if DMA depends on it. */ 258 if (radeon_emitted(ctx->gfx.cs, ctx->initial_gfx_cs_size) && 259 ((dst && 260 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, dst->buf, 261 RADEON_USAGE_READWRITE)) || 262 (src && 263 ctx->ws->cs_is_buffer_referenced(ctx->gfx.cs, src->buf, 264 RADEON_USAGE_WRITE)))) 265 ctx->gfx.flush(ctx, RADEON_FLUSH_ASYNC, NULL); 266 267 /* Flush if there's not enough space, or if the memory usage per IB 268 * is too large. 269 * 270 * IBs using too little memory are limited by the IB submission overhead. 271 * IBs using too much memory are limited by the kernel/TTM overhead. 272 * Too long IBs create CPU-GPU pipeline bubbles and add latency. 273 * 274 * This heuristic makes sure that DMA requests are executed 275 * very soon after the call is made and lowers memory usage. 276 * It improves texture upload performance by keeping the DMA 277 * engine busy while uploads are being submitted. 278 */ 279 num_dw++; /* for emit_wait_idle below */ 280 if (!ctx->ws->cs_check_space(ctx->dma.cs, num_dw) || 281 ctx->dma.cs->used_vram + ctx->dma.cs->used_gart > 64 * 1024 * 1024 || 282 !radeon_cs_memory_below_limit(ctx->screen, ctx->dma.cs, vram, gtt)) { 283 ctx->dma.flush(ctx, RADEON_FLUSH_ASYNC, NULL); 284 assert((num_dw + ctx->dma.cs->current.cdw) <= ctx->dma.cs->current.max_dw); 285 } 286 287 /* Wait for idle if either buffer has been used in the IB before to 288 * prevent read-after-write hazards. 289 */ 290 if ((dst && 291 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, dst->buf, 292 RADEON_USAGE_READWRITE)) || 293 (src && 294 ctx->ws->cs_is_buffer_referenced(ctx->dma.cs, src->buf, 295 RADEON_USAGE_WRITE))) 296 r600_dma_emit_wait_idle(ctx); 297 298 /* If GPUVM is not supported, the CS checker needs 2 entries 299 * in the buffer list per packet, which has to be done manually. 300 */ 301 if (ctx->screen->info.has_virtual_memory) { 302 if (dst) 303 radeon_add_to_buffer_list(ctx, &ctx->dma, dst, 304 RADEON_USAGE_WRITE, 305 RADEON_PRIO_SDMA_BUFFER); 306 if (src) 307 radeon_add_to_buffer_list(ctx, &ctx->dma, src, 308 RADEON_USAGE_READ, 309 RADEON_PRIO_SDMA_BUFFER); 310 } 311 312 /* this function is called before all DMA calls, so increment this. */ 313 ctx->num_dma_calls++; 314 } 315 316 static void r600_memory_barrier(struct pipe_context *ctx, unsigned flags) 317 { 318 } 319 320 void r600_preflush_suspend_features(struct r600_common_context *ctx) 321 { 322 /* suspend queries */ 323 if (!LIST_IS_EMPTY(&ctx->active_queries)) 324 r600_suspend_queries(ctx); 325 326 ctx->streamout.suspended = false; 327 if (ctx->streamout.begin_emitted) { 328 r600_emit_streamout_end(ctx); 329 ctx->streamout.suspended = true; 330 } 331 } 332 333 void r600_postflush_resume_features(struct r600_common_context *ctx) 334 { 335 if (ctx->streamout.suspended) { 336 ctx->streamout.append_bitmask = ctx->streamout.enabled_mask; 337 r600_streamout_buffers_dirty(ctx); 338 } 339 340 /* resume queries */ 341 if (!LIST_IS_EMPTY(&ctx->active_queries)) 342 r600_resume_queries(ctx); 343 } 344 345 static void r600_flush_from_st(struct pipe_context *ctx, 346 struct pipe_fence_handle **fence, 347 unsigned flags) 348 { 349 struct pipe_screen *screen = ctx->screen; 350 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 351 struct radeon_winsys *ws = rctx->ws; 352 unsigned rflags = 0; 353 struct pipe_fence_handle *gfx_fence = NULL; 354 struct pipe_fence_handle *sdma_fence = NULL; 355 bool deferred_fence = false; 356 357 if (flags & PIPE_FLUSH_END_OF_FRAME) 358 rflags |= RADEON_FLUSH_END_OF_FRAME; 359 if (flags & PIPE_FLUSH_DEFERRED) 360 rflags |= RADEON_FLUSH_ASYNC; 361 362 /* DMA IBs are preambles to gfx IBs, therefore must be flushed first. */ 363 if (rctx->dma.cs) 364 rctx->dma.flush(rctx, rflags, fence ? &sdma_fence : NULL); 365 366 if (!radeon_emitted(rctx->gfx.cs, rctx->initial_gfx_cs_size)) { 367 if (fence) 368 ws->fence_reference(&gfx_fence, rctx->last_gfx_fence); 369 if (!(rflags & RADEON_FLUSH_ASYNC)) 370 ws->cs_sync_flush(rctx->gfx.cs); 371 } else { 372 /* Instead of flushing, create a deferred fence. Constraints: 373 * - The state tracker must allow a deferred flush. 374 * - The state tracker must request a fence. 375 * Thread safety in fence_finish must be ensured by the state tracker. 376 */ 377 if (flags & PIPE_FLUSH_DEFERRED && fence) { 378 gfx_fence = rctx->ws->cs_get_next_fence(rctx->gfx.cs); 379 deferred_fence = true; 380 } else { 381 rctx->gfx.flush(rctx, rflags, fence ? &gfx_fence : NULL); 382 } 383 } 384 385 /* Both engines can signal out of order, so we need to keep both fences. */ 386 if (fence) { 387 struct r600_multi_fence *multi_fence = 388 CALLOC_STRUCT(r600_multi_fence); 389 if (!multi_fence) 390 return; 391 392 multi_fence->reference.count = 1; 393 /* If both fences are NULL, fence_finish will always return true. */ 394 multi_fence->gfx = gfx_fence; 395 multi_fence->sdma = sdma_fence; 396 397 if (deferred_fence) { 398 multi_fence->gfx_unflushed.ctx = rctx; 399 multi_fence->gfx_unflushed.ib_index = rctx->num_gfx_cs_flushes; 400 } 401 402 screen->fence_reference(screen, fence, NULL); 403 *fence = (struct pipe_fence_handle*)multi_fence; 404 } 405 } 406 407 static void r600_flush_dma_ring(void *ctx, unsigned flags, 408 struct pipe_fence_handle **fence) 409 { 410 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 411 struct radeon_winsys_cs *cs = rctx->dma.cs; 412 struct radeon_saved_cs saved; 413 bool check_vm = 414 (rctx->screen->debug_flags & DBG_CHECK_VM) && 415 rctx->check_vm_faults; 416 417 if (!radeon_emitted(cs, 0)) { 418 if (fence) 419 rctx->ws->fence_reference(fence, rctx->last_sdma_fence); 420 return; 421 } 422 423 if (check_vm) 424 radeon_save_cs(rctx->ws, cs, &saved); 425 426 rctx->ws->cs_flush(cs, flags, &rctx->last_sdma_fence); 427 if (fence) 428 rctx->ws->fence_reference(fence, rctx->last_sdma_fence); 429 430 if (check_vm) { 431 /* Use conservative timeout 800ms, after which we won't wait any 432 * longer and assume the GPU is hung. 433 */ 434 rctx->ws->fence_wait(rctx->ws, rctx->last_sdma_fence, 800*1000*1000); 435 436 rctx->check_vm_faults(rctx, &saved, RING_DMA); 437 radeon_clear_saved_cs(&saved); 438 } 439 } 440 441 /** 442 * Store a linearized copy of all chunks of \p cs together with the buffer 443 * list in \p saved. 444 */ 445 void radeon_save_cs(struct radeon_winsys *ws, struct radeon_winsys_cs *cs, 446 struct radeon_saved_cs *saved) 447 { 448 void *buf; 449 unsigned i; 450 451 /* Save the IB chunks. */ 452 saved->num_dw = cs->prev_dw + cs->current.cdw; 453 saved->ib = MALLOC(4 * saved->num_dw); 454 if (!saved->ib) 455 goto oom; 456 457 buf = saved->ib; 458 for (i = 0; i < cs->num_prev; ++i) { 459 memcpy(buf, cs->prev[i].buf, cs->prev[i].cdw * 4); 460 buf += cs->prev[i].cdw; 461 } 462 memcpy(buf, cs->current.buf, cs->current.cdw * 4); 463 464 /* Save the buffer list. */ 465 saved->bo_count = ws->cs_get_buffer_list(cs, NULL); 466 saved->bo_list = CALLOC(saved->bo_count, 467 sizeof(saved->bo_list[0])); 468 if (!saved->bo_list) { 469 FREE(saved->ib); 470 goto oom; 471 } 472 ws->cs_get_buffer_list(cs, saved->bo_list); 473 474 return; 475 476 oom: 477 fprintf(stderr, "%s: out of memory\n", __func__); 478 memset(saved, 0, sizeof(*saved)); 479 } 480 481 void radeon_clear_saved_cs(struct radeon_saved_cs *saved) 482 { 483 FREE(saved->ib); 484 FREE(saved->bo_list); 485 486 memset(saved, 0, sizeof(*saved)); 487 } 488 489 static enum pipe_reset_status r600_get_reset_status(struct pipe_context *ctx) 490 { 491 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 492 unsigned latest = rctx->ws->query_value(rctx->ws, 493 RADEON_GPU_RESET_COUNTER); 494 495 if (rctx->gpu_reset_counter == latest) 496 return PIPE_NO_RESET; 497 498 rctx->gpu_reset_counter = latest; 499 return PIPE_UNKNOWN_CONTEXT_RESET; 500 } 501 502 static void r600_set_debug_callback(struct pipe_context *ctx, 503 const struct pipe_debug_callback *cb) 504 { 505 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 506 507 if (cb) 508 rctx->debug = *cb; 509 else 510 memset(&rctx->debug, 0, sizeof(rctx->debug)); 511 } 512 513 static void r600_set_device_reset_callback(struct pipe_context *ctx, 514 const struct pipe_device_reset_callback *cb) 515 { 516 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 517 518 if (cb) 519 rctx->device_reset_callback = *cb; 520 else 521 memset(&rctx->device_reset_callback, 0, 522 sizeof(rctx->device_reset_callback)); 523 } 524 525 bool r600_check_device_reset(struct r600_common_context *rctx) 526 { 527 enum pipe_reset_status status; 528 529 if (!rctx->device_reset_callback.reset) 530 return false; 531 532 if (!rctx->b.get_device_reset_status) 533 return false; 534 535 status = rctx->b.get_device_reset_status(&rctx->b); 536 if (status == PIPE_NO_RESET) 537 return false; 538 539 rctx->device_reset_callback.reset(rctx->device_reset_callback.data, status); 540 return true; 541 } 542 543 static void r600_dma_clear_buffer_fallback(struct pipe_context *ctx, 544 struct pipe_resource *dst, 545 uint64_t offset, uint64_t size, 546 unsigned value) 547 { 548 struct r600_common_context *rctx = (struct r600_common_context *)ctx; 549 550 rctx->clear_buffer(ctx, dst, offset, size, value, R600_COHERENCY_NONE); 551 } 552 553 bool r600_common_context_init(struct r600_common_context *rctx, 554 struct r600_common_screen *rscreen, 555 unsigned context_flags) 556 { 557 slab_create_child(&rctx->pool_transfers, &rscreen->pool_transfers); 558 559 rctx->screen = rscreen; 560 rctx->ws = rscreen->ws; 561 rctx->family = rscreen->family; 562 rctx->chip_class = rscreen->chip_class; 563 564 if (rscreen->chip_class >= CIK) 565 rctx->max_db = MAX2(8, rscreen->info.num_render_backends); 566 else if (rscreen->chip_class >= EVERGREEN) 567 rctx->max_db = 8; 568 else 569 rctx->max_db = 4; 570 571 rctx->b.invalidate_resource = r600_invalidate_resource; 572 rctx->b.transfer_map = u_transfer_map_vtbl; 573 rctx->b.transfer_flush_region = u_transfer_flush_region_vtbl; 574 rctx->b.transfer_unmap = u_transfer_unmap_vtbl; 575 rctx->b.texture_subdata = u_default_texture_subdata; 576 rctx->b.memory_barrier = r600_memory_barrier; 577 rctx->b.flush = r600_flush_from_st; 578 rctx->b.set_debug_callback = r600_set_debug_callback; 579 rctx->dma_clear_buffer = r600_dma_clear_buffer_fallback; 580 581 /* evergreen_compute.c has a special codepath for global buffers. 582 * Everything else can use the direct path. 583 */ 584 if ((rscreen->chip_class == EVERGREEN || rscreen->chip_class == CAYMAN) && 585 (context_flags & PIPE_CONTEXT_COMPUTE_ONLY)) 586 rctx->b.buffer_subdata = u_default_buffer_subdata; 587 else 588 rctx->b.buffer_subdata = r600_buffer_subdata; 589 590 if (rscreen->info.drm_major == 2 && rscreen->info.drm_minor >= 43) { 591 rctx->b.get_device_reset_status = r600_get_reset_status; 592 rctx->gpu_reset_counter = 593 rctx->ws->query_value(rctx->ws, 594 RADEON_GPU_RESET_COUNTER); 595 } 596 597 rctx->b.set_device_reset_callback = r600_set_device_reset_callback; 598 599 r600_init_context_texture_functions(rctx); 600 r600_init_viewport_functions(rctx); 601 r600_streamout_init(rctx); 602 r600_query_init(rctx); 603 cayman_init_msaa(&rctx->b); 604 605 rctx->allocator_zeroed_memory = 606 u_suballocator_create(&rctx->b, rscreen->info.gart_page_size, 607 0, PIPE_USAGE_DEFAULT, true); 608 if (!rctx->allocator_zeroed_memory) 609 return false; 610 611 rctx->uploader = u_upload_create(&rctx->b, 1024 * 1024, 612 PIPE_BIND_INDEX_BUFFER | 613 PIPE_BIND_CONSTANT_BUFFER, PIPE_USAGE_STREAM); 614 if (!rctx->uploader) 615 return false; 616 617 rctx->ctx = rctx->ws->ctx_create(rctx->ws); 618 if (!rctx->ctx) 619 return false; 620 621 if (rscreen->info.has_sdma && !(rscreen->debug_flags & DBG_NO_ASYNC_DMA)) { 622 rctx->dma.cs = rctx->ws->cs_create(rctx->ctx, RING_DMA, 623 r600_flush_dma_ring, 624 rctx); 625 rctx->dma.flush = r600_flush_dma_ring; 626 } 627 628 return true; 629 } 630 631 void r600_common_context_cleanup(struct r600_common_context *rctx) 632 { 633 unsigned i,j; 634 635 /* Release DCC stats. */ 636 for (i = 0; i < ARRAY_SIZE(rctx->dcc_stats); i++) { 637 assert(!rctx->dcc_stats[i].query_active); 638 639 for (j = 0; j < ARRAY_SIZE(rctx->dcc_stats[i].ps_stats); j++) 640 if (rctx->dcc_stats[i].ps_stats[j]) 641 rctx->b.destroy_query(&rctx->b, 642 rctx->dcc_stats[i].ps_stats[j]); 643 644 r600_texture_reference(&rctx->dcc_stats[i].tex, NULL); 645 } 646 647 if (rctx->query_result_shader) 648 rctx->b.delete_compute_state(&rctx->b, rctx->query_result_shader); 649 650 if (rctx->gfx.cs) 651 rctx->ws->cs_destroy(rctx->gfx.cs); 652 if (rctx->dma.cs) 653 rctx->ws->cs_destroy(rctx->dma.cs); 654 if (rctx->ctx) 655 rctx->ws->ctx_destroy(rctx->ctx); 656 657 if (rctx->uploader) { 658 u_upload_destroy(rctx->uploader); 659 } 660 661 slab_destroy_child(&rctx->pool_transfers); 662 663 if (rctx->allocator_zeroed_memory) { 664 u_suballocator_destroy(rctx->allocator_zeroed_memory); 665 } 666 rctx->ws->fence_reference(&rctx->last_gfx_fence, NULL); 667 rctx->ws->fence_reference(&rctx->last_sdma_fence, NULL); 668 } 669 670 /* 671 * pipe_screen 672 */ 673 674 static const struct debug_named_value common_debug_options[] = { 675 /* logging */ 676 { "tex", DBG_TEX, "Print texture info" }, 677 { "compute", DBG_COMPUTE, "Print compute info" }, 678 { "vm", DBG_VM, "Print virtual addresses when creating resources" }, 679 { "info", DBG_INFO, "Print driver information" }, 680 681 /* shaders */ 682 { "fs", DBG_FS, "Print fetch shaders" }, 683 { "vs", DBG_VS, "Print vertex shaders" }, 684 { "gs", DBG_GS, "Print geometry shaders" }, 685 { "ps", DBG_PS, "Print pixel shaders" }, 686 { "cs", DBG_CS, "Print compute shaders" }, 687 { "tcs", DBG_TCS, "Print tessellation control shaders" }, 688 { "tes", DBG_TES, "Print tessellation evaluation shaders" }, 689 { "noir", DBG_NO_IR, "Don't print the LLVM IR"}, 690 { "notgsi", DBG_NO_TGSI, "Don't print the TGSI"}, 691 { "noasm", DBG_NO_ASM, "Don't print disassembled shaders"}, 692 { "preoptir", DBG_PREOPT_IR, "Print the LLVM IR before initial optimizations" }, 693 { "checkir", DBG_CHECK_IR, "Enable additional sanity checks on shader IR" }, 694 { "nooptvariant", DBG_NO_OPT_VARIANT, "Disable compiling optimized shader variants." }, 695 696 { "testdma", DBG_TEST_DMA, "Invoke SDMA tests and exit." }, 697 698 /* features */ 699 { "nodma", DBG_NO_ASYNC_DMA, "Disable asynchronous DMA" }, 700 { "nohyperz", DBG_NO_HYPERZ, "Disable Hyper-Z" }, 701 /* GL uses the word INVALIDATE, gallium uses the word DISCARD */ 702 { "noinvalrange", DBG_NO_DISCARD_RANGE, "Disable handling of INVALIDATE_RANGE map flags" }, 703 { "no2d", DBG_NO_2D_TILING, "Disable 2D tiling" }, 704 { "notiling", DBG_NO_TILING, "Disable tiling" }, 705 { "switch_on_eop", DBG_SWITCH_ON_EOP, "Program WD/IA to switch on end-of-packet." }, 706 { "forcedma", DBG_FORCE_DMA, "Use asynchronous DMA for all operations when possible." }, 707 { "precompile", DBG_PRECOMPILE, "Compile one shader variant at shader creation." }, 708 { "nowc", DBG_NO_WC, "Disable GTT write combining" }, 709 { "check_vm", DBG_CHECK_VM, "Check VM faults and dump debug info." }, 710 { "nodcc", DBG_NO_DCC, "Disable DCC." }, 711 { "nodccclear", DBG_NO_DCC_CLEAR, "Disable DCC fast clear." }, 712 { "norbplus", DBG_NO_RB_PLUS, "Disable RB+ on Stoney." }, 713 { "sisched", DBG_SI_SCHED, "Enable LLVM SI Machine Instruction Scheduler." }, 714 { "mono", DBG_MONOLITHIC_SHADERS, "Use old-style monolithic shaders compiled on demand" }, 715 { "noce", DBG_NO_CE, "Disable the constant engine"}, 716 { "unsafemath", DBG_UNSAFE_MATH, "Enable unsafe math shader optimizations" }, 717 { "nodccfb", DBG_NO_DCC_FB, "Disable separate DCC on the main framebuffer" }, 718 719 DEBUG_NAMED_VALUE_END /* must be last */ 720 }; 721 722 static const char* r600_get_vendor(struct pipe_screen* pscreen) 723 { 724 return "X.Org"; 725 } 726 727 static const char* r600_get_device_vendor(struct pipe_screen* pscreen) 728 { 729 return "AMD"; 730 } 731 732 static const char* r600_get_chip_name(struct r600_common_screen *rscreen) 733 { 734 switch (rscreen->info.family) { 735 case CHIP_R600: return "AMD R600"; 736 case CHIP_RV610: return "AMD RV610"; 737 case CHIP_RV630: return "AMD RV630"; 738 case CHIP_RV670: return "AMD RV670"; 739 case CHIP_RV620: return "AMD RV620"; 740 case CHIP_RV635: return "AMD RV635"; 741 case CHIP_RS780: return "AMD RS780"; 742 case CHIP_RS880: return "AMD RS880"; 743 case CHIP_RV770: return "AMD RV770"; 744 case CHIP_RV730: return "AMD RV730"; 745 case CHIP_RV710: return "AMD RV710"; 746 case CHIP_RV740: return "AMD RV740"; 747 case CHIP_CEDAR: return "AMD CEDAR"; 748 case CHIP_REDWOOD: return "AMD REDWOOD"; 749 case CHIP_JUNIPER: return "AMD JUNIPER"; 750 case CHIP_CYPRESS: return "AMD CYPRESS"; 751 case CHIP_HEMLOCK: return "AMD HEMLOCK"; 752 case CHIP_PALM: return "AMD PALM"; 753 case CHIP_SUMO: return "AMD SUMO"; 754 case CHIP_SUMO2: return "AMD SUMO2"; 755 case CHIP_BARTS: return "AMD BARTS"; 756 case CHIP_TURKS: return "AMD TURKS"; 757 case CHIP_CAICOS: return "AMD CAICOS"; 758 case CHIP_CAYMAN: return "AMD CAYMAN"; 759 case CHIP_ARUBA: return "AMD ARUBA"; 760 case CHIP_TAHITI: return "AMD TAHITI"; 761 case CHIP_PITCAIRN: return "AMD PITCAIRN"; 762 case CHIP_VERDE: return "AMD CAPE VERDE"; 763 case CHIP_OLAND: return "AMD OLAND"; 764 case CHIP_HAINAN: return "AMD HAINAN"; 765 case CHIP_BONAIRE: return "AMD BONAIRE"; 766 case CHIP_KAVERI: return "AMD KAVERI"; 767 case CHIP_KABINI: return "AMD KABINI"; 768 case CHIP_HAWAII: return "AMD HAWAII"; 769 case CHIP_MULLINS: return "AMD MULLINS"; 770 case CHIP_TONGA: return "AMD TONGA"; 771 case CHIP_ICELAND: return "AMD ICELAND"; 772 case CHIP_CARRIZO: return "AMD CARRIZO"; 773 case CHIP_FIJI: return "AMD FIJI"; 774 case CHIP_POLARIS10: return "AMD POLARIS10"; 775 case CHIP_POLARIS11: return "AMD POLARIS11"; 776 case CHIP_POLARIS12: return "AMD POLARIS12"; 777 case CHIP_STONEY: return "AMD STONEY"; 778 default: return "AMD unknown"; 779 } 780 } 781 782 static const char* r600_get_name(struct pipe_screen* pscreen) 783 { 784 struct r600_common_screen *rscreen = (struct r600_common_screen*)pscreen; 785 786 return rscreen->renderer_string; 787 } 788 789 static float r600_get_paramf(struct pipe_screen* pscreen, 790 enum pipe_capf param) 791 { 792 struct r600_common_screen *rscreen = (struct r600_common_screen *)pscreen; 793 794 switch (param) { 795 case PIPE_CAPF_MAX_LINE_WIDTH: 796 case PIPE_CAPF_MAX_LINE_WIDTH_AA: 797 case PIPE_CAPF_MAX_POINT_WIDTH: 798 case PIPE_CAPF_MAX_POINT_WIDTH_AA: 799 if (rscreen->family >= CHIP_CEDAR) 800 return 16384.0f; 801 else 802 return 8192.0f; 803 case PIPE_CAPF_MAX_TEXTURE_ANISOTROPY: 804 return 16.0f; 805 case PIPE_CAPF_MAX_TEXTURE_LOD_BIAS: 806 return 16.0f; 807 case PIPE_CAPF_GUARD_BAND_LEFT: 808 case PIPE_CAPF_GUARD_BAND_TOP: 809 case PIPE_CAPF_GUARD_BAND_RIGHT: 810 case PIPE_CAPF_GUARD_BAND_BOTTOM: 811 return 0.0f; 812 } 813 return 0.0f; 814 } 815 816 static int r600_get_video_param(struct pipe_screen *screen, 817 enum pipe_video_profile profile, 818 enum pipe_video_entrypoint entrypoint, 819 enum pipe_video_cap param) 820 { 821 switch (param) { 822 case PIPE_VIDEO_CAP_SUPPORTED: 823 return vl_profile_supported(screen, profile, entrypoint); 824 case PIPE_VIDEO_CAP_NPOT_TEXTURES: 825 return 1; 826 case PIPE_VIDEO_CAP_MAX_WIDTH: 827 case PIPE_VIDEO_CAP_MAX_HEIGHT: 828 return vl_video_buffer_max_size(screen); 829 case PIPE_VIDEO_CAP_PREFERED_FORMAT: 830 return PIPE_FORMAT_NV12; 831 case PIPE_VIDEO_CAP_PREFERS_INTERLACED: 832 return false; 833 case PIPE_VIDEO_CAP_SUPPORTS_INTERLACED: 834 return false; 835 case PIPE_VIDEO_CAP_SUPPORTS_PROGRESSIVE: 836 return true; 837 case PIPE_VIDEO_CAP_MAX_LEVEL: 838 return vl_level_supported(screen, profile); 839 default: 840 return 0; 841 } 842 } 843 844 const char *r600_get_llvm_processor_name(enum radeon_family family) 845 { 846 switch (family) { 847 case CHIP_R600: 848 case CHIP_RV630: 849 case CHIP_RV635: 850 case CHIP_RV670: 851 return "r600"; 852 case CHIP_RV610: 853 case CHIP_RV620: 854 case CHIP_RS780: 855 case CHIP_RS880: 856 return "rs880"; 857 case CHIP_RV710: 858 return "rv710"; 859 case CHIP_RV730: 860 return "rv730"; 861 case CHIP_RV740: 862 case CHIP_RV770: 863 return "rv770"; 864 case CHIP_PALM: 865 case CHIP_CEDAR: 866 return "cedar"; 867 case CHIP_SUMO: 868 case CHIP_SUMO2: 869 return "sumo"; 870 case CHIP_REDWOOD: 871 return "redwood"; 872 case CHIP_JUNIPER: 873 return "juniper"; 874 case CHIP_HEMLOCK: 875 case CHIP_CYPRESS: 876 return "cypress"; 877 case CHIP_BARTS: 878 return "barts"; 879 case CHIP_TURKS: 880 return "turks"; 881 case CHIP_CAICOS: 882 return "caicos"; 883 case CHIP_CAYMAN: 884 case CHIP_ARUBA: 885 return "cayman"; 886 887 case CHIP_TAHITI: return "tahiti"; 888 case CHIP_PITCAIRN: return "pitcairn"; 889 case CHIP_VERDE: return "verde"; 890 case CHIP_OLAND: return "oland"; 891 case CHIP_HAINAN: return "hainan"; 892 case CHIP_BONAIRE: return "bonaire"; 893 case CHIP_KABINI: return "kabini"; 894 case CHIP_KAVERI: return "kaveri"; 895 case CHIP_HAWAII: return "hawaii"; 896 case CHIP_MULLINS: 897 return "mullins"; 898 case CHIP_TONGA: return "tonga"; 899 case CHIP_ICELAND: return "iceland"; 900 case CHIP_CARRIZO: return "carrizo"; 901 case CHIP_FIJI: 902 return HAVE_LLVM >= 0x0308 ? "fiji" : "carrizo"; 903 case CHIP_STONEY: 904 return HAVE_LLVM >= 0x0308 ? "stoney" : "carrizo"; 905 case CHIP_POLARIS10: 906 return HAVE_LLVM >= 0x0309 ? "polaris10" : "carrizo"; 907 case CHIP_POLARIS11: 908 case CHIP_POLARIS12: /* same as polaris11 */ 909 return HAVE_LLVM >= 0x0309 ? "polaris11" : "carrizo"; 910 default: 911 return ""; 912 } 913 } 914 915 static int r600_get_compute_param(struct pipe_screen *screen, 916 enum pipe_shader_ir ir_type, 917 enum pipe_compute_cap param, 918 void *ret) 919 { 920 struct r600_common_screen *rscreen = (struct r600_common_screen *)screen; 921 922 //TODO: select these params by asic 923 switch (param) { 924 case PIPE_COMPUTE_CAP_IR_TARGET: { 925 const char *gpu; 926 const char *triple; 927 if (rscreen->family <= CHIP_ARUBA) { 928 triple = "r600--"; 929 } else { 930 if (HAVE_LLVM < 0x0400) { 931 triple = "amdgcn--"; 932 } else { 933 triple = "amdgcn-mesa-mesa3d"; 934 } 935 } 936 switch(rscreen->family) { 937 /* Clang < 3.6 is missing Hainan in its list of 938 * GPUs, so we need to use the name of a similar GPU. 939 */ 940 default: 941 gpu = r600_get_llvm_processor_name(rscreen->family); 942 break; 943 } 944 if (ret) { 945 sprintf(ret, "%s-%s", gpu, triple); 946 } 947 /* +2 for dash and terminating NIL byte */ 948 return (strlen(triple) + strlen(gpu) + 2) * sizeof(char); 949 } 950 case PIPE_COMPUTE_CAP_GRID_DIMENSION: 951 if (ret) { 952 uint64_t *grid_dimension = ret; 953 grid_dimension[0] = 3; 954 } 955 return 1 * sizeof(uint64_t); 956 957 case PIPE_COMPUTE_CAP_MAX_GRID_SIZE: 958 if (ret) { 959 uint64_t *grid_size = ret; 960 grid_size[0] = 65535; 961 grid_size[1] = 65535; 962 grid_size[2] = 65535; 963 } 964 return 3 * sizeof(uint64_t) ; 965 966 case PIPE_COMPUTE_CAP_MAX_BLOCK_SIZE: 967 if (ret) { 968 uint64_t *block_size = ret; 969 if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && 970 ir_type == PIPE_SHADER_IR_TGSI) { 971 block_size[0] = 2048; 972 block_size[1] = 2048; 973 block_size[2] = 2048; 974 } else { 975 block_size[0] = 256; 976 block_size[1] = 256; 977 block_size[2] = 256; 978 } 979 } 980 return 3 * sizeof(uint64_t); 981 982 case PIPE_COMPUTE_CAP_MAX_THREADS_PER_BLOCK: 983 if (ret) { 984 uint64_t *max_threads_per_block = ret; 985 if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && 986 ir_type == PIPE_SHADER_IR_TGSI) 987 *max_threads_per_block = 2048; 988 else 989 *max_threads_per_block = 256; 990 } 991 return sizeof(uint64_t); 992 case PIPE_COMPUTE_CAP_ADDRESS_BITS: 993 if (ret) { 994 uint32_t *address_bits = ret; 995 address_bits[0] = 32; 996 if (rscreen->chip_class >= SI) 997 address_bits[0] = 64; 998 } 999 return 1 * sizeof(uint32_t); 1000 1001 case PIPE_COMPUTE_CAP_MAX_GLOBAL_SIZE: 1002 if (ret) { 1003 uint64_t *max_global_size = ret; 1004 uint64_t max_mem_alloc_size; 1005 1006 r600_get_compute_param(screen, ir_type, 1007 PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE, 1008 &max_mem_alloc_size); 1009 1010 /* In OpenCL, the MAX_MEM_ALLOC_SIZE must be at least 1011 * 1/4 of the MAX_GLOBAL_SIZE. Since the 1012 * MAX_MEM_ALLOC_SIZE is fixed for older kernels, 1013 * make sure we never report more than 1014 * 4 * MAX_MEM_ALLOC_SIZE. 1015 */ 1016 *max_global_size = MIN2(4 * max_mem_alloc_size, 1017 MAX2(rscreen->info.gart_size, 1018 rscreen->info.vram_size)); 1019 } 1020 return sizeof(uint64_t); 1021 1022 case PIPE_COMPUTE_CAP_MAX_LOCAL_SIZE: 1023 if (ret) { 1024 uint64_t *max_local_size = ret; 1025 /* Value reported by the closed source driver. */ 1026 *max_local_size = 32768; 1027 } 1028 return sizeof(uint64_t); 1029 1030 case PIPE_COMPUTE_CAP_MAX_INPUT_SIZE: 1031 if (ret) { 1032 uint64_t *max_input_size = ret; 1033 /* Value reported by the closed source driver. */ 1034 *max_input_size = 1024; 1035 } 1036 return sizeof(uint64_t); 1037 1038 case PIPE_COMPUTE_CAP_MAX_MEM_ALLOC_SIZE: 1039 if (ret) { 1040 uint64_t *max_mem_alloc_size = ret; 1041 1042 *max_mem_alloc_size = rscreen->info.max_alloc_size; 1043 } 1044 return sizeof(uint64_t); 1045 1046 case PIPE_COMPUTE_CAP_MAX_CLOCK_FREQUENCY: 1047 if (ret) { 1048 uint32_t *max_clock_frequency = ret; 1049 *max_clock_frequency = rscreen->info.max_shader_clock; 1050 } 1051 return sizeof(uint32_t); 1052 1053 case PIPE_COMPUTE_CAP_MAX_COMPUTE_UNITS: 1054 if (ret) { 1055 uint32_t *max_compute_units = ret; 1056 *max_compute_units = rscreen->info.num_good_compute_units; 1057 } 1058 return sizeof(uint32_t); 1059 1060 case PIPE_COMPUTE_CAP_IMAGES_SUPPORTED: 1061 if (ret) { 1062 uint32_t *images_supported = ret; 1063 *images_supported = 0; 1064 } 1065 return sizeof(uint32_t); 1066 case PIPE_COMPUTE_CAP_MAX_PRIVATE_SIZE: 1067 break; /* unused */ 1068 case PIPE_COMPUTE_CAP_SUBGROUP_SIZE: 1069 if (ret) { 1070 uint32_t *subgroup_size = ret; 1071 *subgroup_size = r600_wavefront_size(rscreen->family); 1072 } 1073 return sizeof(uint32_t); 1074 case PIPE_COMPUTE_CAP_MAX_VARIABLE_THREADS_PER_BLOCK: 1075 if (ret) { 1076 uint64_t *max_variable_threads_per_block = ret; 1077 if (rscreen->chip_class >= SI && HAVE_LLVM >= 0x309 && 1078 ir_type == PIPE_SHADER_IR_TGSI) 1079 *max_variable_threads_per_block = SI_MAX_VARIABLE_THREADS_PER_BLOCK; 1080 else 1081 *max_variable_threads_per_block = 0; 1082 } 1083 return sizeof(uint64_t); 1084 } 1085 1086 fprintf(stderr, "unknown PIPE_COMPUTE_CAP %d\n", param); 1087 return 0; 1088 } 1089 1090 static uint64_t r600_get_timestamp(struct pipe_screen *screen) 1091 { 1092 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1093 1094 return 1000000 * rscreen->ws->query_value(rscreen->ws, RADEON_TIMESTAMP) / 1095 rscreen->info.clock_crystal_freq; 1096 } 1097 1098 static void r600_fence_reference(struct pipe_screen *screen, 1099 struct pipe_fence_handle **dst, 1100 struct pipe_fence_handle *src) 1101 { 1102 struct radeon_winsys *ws = ((struct r600_common_screen*)screen)->ws; 1103 struct r600_multi_fence **rdst = (struct r600_multi_fence **)dst; 1104 struct r600_multi_fence *rsrc = (struct r600_multi_fence *)src; 1105 1106 if (pipe_reference(&(*rdst)->reference, &rsrc->reference)) { 1107 ws->fence_reference(&(*rdst)->gfx, NULL); 1108 ws->fence_reference(&(*rdst)->sdma, NULL); 1109 FREE(*rdst); 1110 } 1111 *rdst = rsrc; 1112 } 1113 1114 static boolean r600_fence_finish(struct pipe_screen *screen, 1115 struct pipe_context *ctx, 1116 struct pipe_fence_handle *fence, 1117 uint64_t timeout) 1118 { 1119 struct radeon_winsys *rws = ((struct r600_common_screen*)screen)->ws; 1120 struct r600_multi_fence *rfence = (struct r600_multi_fence *)fence; 1121 struct r600_common_context *rctx = 1122 ctx ? (struct r600_common_context*)ctx : NULL; 1123 int64_t abs_timeout = os_time_get_absolute_timeout(timeout); 1124 1125 if (rfence->sdma) { 1126 if (!rws->fence_wait(rws, rfence->sdma, timeout)) 1127 return false; 1128 1129 /* Recompute the timeout after waiting. */ 1130 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { 1131 int64_t time = os_time_get_nano(); 1132 timeout = abs_timeout > time ? abs_timeout - time : 0; 1133 } 1134 } 1135 1136 if (!rfence->gfx) 1137 return true; 1138 1139 /* Flush the gfx IB if it hasn't been flushed yet. */ 1140 if (rctx && 1141 rfence->gfx_unflushed.ctx == rctx && 1142 rfence->gfx_unflushed.ib_index == rctx->num_gfx_cs_flushes) { 1143 rctx->gfx.flush(rctx, timeout ? 0 : RADEON_FLUSH_ASYNC, NULL); 1144 rfence->gfx_unflushed.ctx = NULL; 1145 1146 if (!timeout) 1147 return false; 1148 1149 /* Recompute the timeout after all that. */ 1150 if (timeout && timeout != PIPE_TIMEOUT_INFINITE) { 1151 int64_t time = os_time_get_nano(); 1152 timeout = abs_timeout > time ? abs_timeout - time : 0; 1153 } 1154 } 1155 1156 return rws->fence_wait(rws, rfence->gfx, timeout); 1157 } 1158 1159 static void r600_query_memory_info(struct pipe_screen *screen, 1160 struct pipe_memory_info *info) 1161 { 1162 struct r600_common_screen *rscreen = (struct r600_common_screen*)screen; 1163 struct radeon_winsys *ws = rscreen->ws; 1164 unsigned vram_usage, gtt_usage; 1165 1166 info->total_device_memory = rscreen->info.vram_size / 1024; 1167 info->total_staging_memory = rscreen->info.gart_size / 1024; 1168 1169 /* The real TTM memory usage is somewhat random, because: 1170 * 1171 * 1) TTM delays freeing memory, because it can only free it after 1172 * fences expire. 1173 * 1174 * 2) The memory usage can be really low if big VRAM evictions are 1175 * taking place, but the real usage is well above the size of VRAM. 1176 * 1177 * Instead, return statistics of this process. 1178 */ 1179 vram_usage = ws->query_value(ws, RADEON_REQUESTED_VRAM_MEMORY) / 1024; 1180 gtt_usage = ws->query_value(ws, RADEON_REQUESTED_GTT_MEMORY) / 1024; 1181 1182 info->avail_device_memory = 1183 vram_usage <= info->total_device_memory ? 1184 info->total_device_memory - vram_usage : 0; 1185 info->avail_staging_memory = 1186 gtt_usage <= info->total_staging_memory ? 1187 info->total_staging_memory - gtt_usage : 0; 1188 1189 info->device_memory_evicted = 1190 ws->query_value(ws, RADEON_NUM_BYTES_MOVED) / 1024; 1191 1192 if (rscreen->info.drm_major == 3 && rscreen->info.drm_minor >= 4) 1193 info->nr_device_memory_evictions = 1194 ws->query_value(ws, RADEON_NUM_EVICTIONS); 1195 else 1196 /* Just return the number of evicted 64KB pages. */ 1197 info->nr_device_memory_evictions = info->device_memory_evicted / 64; 1198 } 1199 1200 struct pipe_resource *r600_resource_create_common(struct pipe_screen *screen, 1201 const struct pipe_resource *templ) 1202 { 1203 if (templ->target == PIPE_BUFFER) { 1204 return r600_buffer_create(screen, templ, 256); 1205 } else { 1206 return r600_texture_create(screen, templ); 1207 } 1208 } 1209 1210 bool r600_common_screen_init(struct r600_common_screen *rscreen, 1211 struct radeon_winsys *ws) 1212 { 1213 char llvm_string[32] = {}, kernel_version[128] = {}; 1214 struct utsname uname_data; 1215 1216 ws->query_info(ws, &rscreen->info); 1217 1218 if (uname(&uname_data) == 0) 1219 snprintf(kernel_version, sizeof(kernel_version), 1220 " / %s", uname_data.release); 1221 1222 if (HAVE_LLVM > 0) { 1223 snprintf(llvm_string, sizeof(llvm_string), 1224 ", LLVM %i.%i.%i", (HAVE_LLVM >> 8) & 0xff, 1225 HAVE_LLVM & 0xff, MESA_LLVM_VERSION_PATCH); 1226 } 1227 1228 snprintf(rscreen->renderer_string, sizeof(rscreen->renderer_string), 1229 "%s (DRM %i.%i.%i%s%s)", 1230 r600_get_chip_name(rscreen), rscreen->info.drm_major, 1231 rscreen->info.drm_minor, rscreen->info.drm_patchlevel, 1232 kernel_version, llvm_string); 1233 1234 rscreen->b.get_name = r600_get_name; 1235 rscreen->b.get_vendor = r600_get_vendor; 1236 rscreen->b.get_device_vendor = r600_get_device_vendor; 1237 rscreen->b.get_compute_param = r600_get_compute_param; 1238 rscreen->b.get_paramf = r600_get_paramf; 1239 rscreen->b.get_timestamp = r600_get_timestamp; 1240 rscreen->b.fence_finish = r600_fence_finish; 1241 rscreen->b.fence_reference = r600_fence_reference; 1242 rscreen->b.resource_destroy = u_resource_destroy_vtbl; 1243 rscreen->b.resource_from_user_memory = r600_buffer_from_user_memory; 1244 rscreen->b.query_memory_info = r600_query_memory_info; 1245 1246 if (rscreen->info.has_uvd) { 1247 rscreen->b.get_video_param = rvid_get_video_param; 1248 rscreen->b.is_video_format_supported = rvid_is_format_supported; 1249 } else { 1250 rscreen->b.get_video_param = r600_get_video_param; 1251 rscreen->b.is_video_format_supported = vl_video_buffer_is_format_supported; 1252 } 1253 1254 r600_init_screen_texture_functions(rscreen); 1255 r600_init_screen_query_functions(rscreen); 1256 1257 rscreen->ws = ws; 1258 rscreen->family = rscreen->info.family; 1259 rscreen->chip_class = rscreen->info.chip_class; 1260 rscreen->debug_flags = debug_get_flags_option("R600_DEBUG", common_debug_options, 0); 1261 1262 slab_create_parent(&rscreen->pool_transfers, sizeof(struct r600_transfer), 64); 1263 1264 rscreen->force_aniso = MIN2(16, debug_get_num_option("R600_TEX_ANISO", -1)); 1265 if (rscreen->force_aniso >= 0) { 1266 printf("radeon: Forcing anisotropy filter to %ix\n", 1267 /* round down to a power of two */ 1268 1 << util_logbase2(rscreen->force_aniso)); 1269 } 1270 1271 util_format_s3tc_init(); 1272 pipe_mutex_init(rscreen->aux_context_lock); 1273 pipe_mutex_init(rscreen->gpu_load_mutex); 1274 1275 if (rscreen->debug_flags & DBG_INFO) { 1276 printf("pci_id = 0x%x\n", rscreen->info.pci_id); 1277 printf("family = %i (%s)\n", rscreen->info.family, 1278 r600_get_chip_name(rscreen)); 1279 printf("chip_class = %i\n", rscreen->info.chip_class); 1280 printf("gart_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.gart_size, 1024*1024)); 1281 printf("vram_size = %i MB\n", (int)DIV_ROUND_UP(rscreen->info.vram_size, 1024*1024)); 1282 printf("max_alloc_size = %i MB\n", 1283 (int)DIV_ROUND_UP(rscreen->info.max_alloc_size, 1024*1024)); 1284 printf("has_virtual_memory = %i\n", rscreen->info.has_virtual_memory); 1285 printf("gfx_ib_pad_with_type2 = %i\n", rscreen->info.gfx_ib_pad_with_type2); 1286 printf("has_sdma = %i\n", rscreen->info.has_sdma); 1287 printf("has_uvd = %i\n", rscreen->info.has_uvd); 1288 printf("me_fw_version = %i\n", rscreen->info.me_fw_version); 1289 printf("pfp_fw_version = %i\n", rscreen->info.pfp_fw_version); 1290 printf("ce_fw_version = %i\n", rscreen->info.ce_fw_version); 1291 printf("vce_fw_version = %i\n", rscreen->info.vce_fw_version); 1292 printf("vce_harvest_config = %i\n", rscreen->info.vce_harvest_config); 1293 printf("clock_crystal_freq = %i\n", rscreen->info.clock_crystal_freq); 1294 printf("drm = %i.%i.%i\n", rscreen->info.drm_major, 1295 rscreen->info.drm_minor, rscreen->info.drm_patchlevel); 1296 printf("has_userptr = %i\n", rscreen->info.has_userptr); 1297 1298 printf("r600_max_quad_pipes = %i\n", rscreen->info.r600_max_quad_pipes); 1299 printf("max_shader_clock = %i\n", rscreen->info.max_shader_clock); 1300 printf("num_good_compute_units = %i\n", rscreen->info.num_good_compute_units); 1301 printf("max_se = %i\n", rscreen->info.max_se); 1302 printf("max_sh_per_se = %i\n", rscreen->info.max_sh_per_se); 1303 1304 printf("r600_gb_backend_map = %i\n", rscreen->info.r600_gb_backend_map); 1305 printf("r600_gb_backend_map_valid = %i\n", rscreen->info.r600_gb_backend_map_valid); 1306 printf("r600_num_banks = %i\n", rscreen->info.r600_num_banks); 1307 printf("num_render_backends = %i\n", rscreen->info.num_render_backends); 1308 printf("num_tile_pipes = %i\n", rscreen->info.num_tile_pipes); 1309 printf("pipe_interleave_bytes = %i\n", rscreen->info.pipe_interleave_bytes); 1310 } 1311 return true; 1312 } 1313 1314 void r600_destroy_common_screen(struct r600_common_screen *rscreen) 1315 { 1316 r600_perfcounters_destroy(rscreen); 1317 r600_gpu_load_kill_thread(rscreen); 1318 1319 pipe_mutex_destroy(rscreen->gpu_load_mutex); 1320 pipe_mutex_destroy(rscreen->aux_context_lock); 1321 rscreen->aux_context->destroy(rscreen->aux_context); 1322 1323 slab_destroy_parent(&rscreen->pool_transfers); 1324 1325 rscreen->ws->destroy(rscreen->ws); 1326 FREE(rscreen); 1327 } 1328 1329 bool r600_can_dump_shader(struct r600_common_screen *rscreen, 1330 unsigned processor) 1331 { 1332 switch (processor) { 1333 case PIPE_SHADER_VERTEX: 1334 return (rscreen->debug_flags & DBG_VS) != 0; 1335 case PIPE_SHADER_TESS_CTRL: 1336 return (rscreen->debug_flags & DBG_TCS) != 0; 1337 case PIPE_SHADER_TESS_EVAL: 1338 return (rscreen->debug_flags & DBG_TES) != 0; 1339 case PIPE_SHADER_GEOMETRY: 1340 return (rscreen->debug_flags & DBG_GS) != 0; 1341 case PIPE_SHADER_FRAGMENT: 1342 return (rscreen->debug_flags & DBG_PS) != 0; 1343 case PIPE_SHADER_COMPUTE: 1344 return (rscreen->debug_flags & DBG_CS) != 0; 1345 default: 1346 return false; 1347 } 1348 } 1349 1350 bool r600_extra_shader_checks(struct r600_common_screen *rscreen, unsigned processor) 1351 { 1352 return (rscreen->debug_flags & DBG_CHECK_IR) || 1353 r600_can_dump_shader(rscreen, processor); 1354 } 1355 1356 void r600_screen_clear_buffer(struct r600_common_screen *rscreen, struct pipe_resource *dst, 1357 uint64_t offset, uint64_t size, unsigned value) 1358 { 1359 struct r600_common_context *rctx = (struct r600_common_context*)rscreen->aux_context; 1360 1361 pipe_mutex_lock(rscreen->aux_context_lock); 1362 rctx->dma_clear_buffer(&rctx->b, dst, offset, size, value); 1363 rscreen->aux_context->flush(rscreen->aux_context, NULL, 0); 1364 pipe_mutex_unlock(rscreen->aux_context_lock); 1365 } 1366