1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "si_pipe.h" 25 #include "si_compute.h" 26 #include "sid.h" 27 #include "gfx9d.h" 28 #include "sid_tables.h" 29 #include "ddebug/dd_util.h" 30 #include "util/u_dump.h" 31 #include "util/u_log.h" 32 #include "util/u_memory.h" 33 #include "ac_debug.h" 34 35 static void si_dump_bo_list(struct si_context *sctx, 36 const struct radeon_saved_cs *saved, FILE *f); 37 38 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL) 39 40 static void si_dump_shader(struct si_screen *sscreen, 41 enum pipe_shader_type processor, 42 const struct si_shader *shader, FILE *f) 43 { 44 if (shader->shader_log) 45 fwrite(shader->shader_log, shader->shader_log_size, 1, f); 46 else 47 si_shader_dump(sscreen, shader, NULL, processor, f, false); 48 } 49 50 struct si_log_chunk_shader { 51 /* The shader destroy code assumes a current context for unlinking of 52 * PM4 packets etc. 53 * 54 * While we should be able to destroy shaders without a context, doing 55 * so would happen only very rarely and be therefore likely to fail 56 * just when you're trying to debug something. Let's just remember the 57 * current context in the chunk. 58 */ 59 struct si_context *ctx; 60 struct si_shader *shader; 61 enum pipe_shader_type processor; 62 63 /* For keep-alive reference counts */ 64 struct si_shader_selector *sel; 65 struct si_compute *program; 66 }; 67 68 static void 69 si_log_chunk_shader_destroy(void *data) 70 { 71 struct si_log_chunk_shader *chunk = data; 72 si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL); 73 si_compute_reference(&chunk->program, NULL); 74 FREE(chunk); 75 } 76 77 static void 78 si_log_chunk_shader_print(void *data, FILE *f) 79 { 80 struct si_log_chunk_shader *chunk = data; 81 struct si_screen *sscreen = chunk->ctx->screen; 82 si_dump_shader(sscreen, chunk->processor, 83 chunk->shader, f); 84 } 85 86 static struct u_log_chunk_type si_log_chunk_type_shader = { 87 .destroy = si_log_chunk_shader_destroy, 88 .print = si_log_chunk_shader_print, 89 }; 90 91 static void si_dump_gfx_shader(struct si_context *ctx, 92 const struct si_shader_ctx_state *state, 93 struct u_log_context *log) 94 { 95 struct si_shader *current = state->current; 96 97 if (!state->cso || !current) 98 return; 99 100 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 101 chunk->ctx = ctx; 102 chunk->processor = state->cso->info.processor; 103 chunk->shader = current; 104 si_shader_selector_reference(ctx, &chunk->sel, current->selector); 105 u_log_chunk(log, &si_log_chunk_type_shader, chunk); 106 } 107 108 static void si_dump_compute_shader(struct si_context *ctx, 109 struct u_log_context *log) 110 { 111 const struct si_cs_shader_state *state = &ctx->cs_shader_state; 112 113 if (!state->program) 114 return; 115 116 struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader); 117 chunk->ctx = ctx; 118 chunk->processor = PIPE_SHADER_COMPUTE; 119 chunk->shader = &state->program->shader; 120 si_compute_reference(&chunk->program, state->program); 121 u_log_chunk(log, &si_log_chunk_type_shader, chunk); 122 } 123 124 /** 125 * Shader compiles can be overridden with arbitrary ELF objects by setting 126 * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2] 127 */ 128 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary) 129 { 130 const char *p = debug_get_option_replace_shaders(); 131 const char *semicolon; 132 char *copy = NULL; 133 FILE *f; 134 long filesize, nread; 135 char *buf = NULL; 136 bool replaced = false; 137 138 if (!p) 139 return false; 140 141 while (*p) { 142 unsigned long i; 143 char *endp; 144 i = strtoul(p, &endp, 0); 145 146 p = endp; 147 if (*p != ':') { 148 fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n"); 149 exit(1); 150 } 151 ++p; 152 153 if (i == num) 154 break; 155 156 p = strchr(p, ';'); 157 if (!p) 158 return false; 159 ++p; 160 } 161 if (!*p) 162 return false; 163 164 semicolon = strchr(p, ';'); 165 if (semicolon) { 166 p = copy = strndup(p, semicolon - p); 167 if (!copy) { 168 fprintf(stderr, "out of memory\n"); 169 return false; 170 } 171 } 172 173 fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p); 174 175 f = fopen(p, "r"); 176 if (!f) { 177 perror("radeonsi: failed to open file"); 178 goto out_free; 179 } 180 181 if (fseek(f, 0, SEEK_END) != 0) 182 goto file_error; 183 184 filesize = ftell(f); 185 if (filesize < 0) 186 goto file_error; 187 188 if (fseek(f, 0, SEEK_SET) != 0) 189 goto file_error; 190 191 buf = MALLOC(filesize); 192 if (!buf) { 193 fprintf(stderr, "out of memory\n"); 194 goto out_close; 195 } 196 197 nread = fread(buf, 1, filesize, f); 198 if (nread != filesize) 199 goto file_error; 200 201 ac_elf_read(buf, filesize, binary); 202 replaced = true; 203 204 out_close: 205 fclose(f); 206 out_free: 207 FREE(buf); 208 free(copy); 209 return replaced; 210 211 file_error: 212 perror("radeonsi: reading shader"); 213 goto out_close; 214 } 215 216 /* Parsed IBs are difficult to read without colors. Use "less -R file" to 217 * read them, or use "aha -b -f file" to convert them to html. 218 */ 219 #define COLOR_RESET "\033[0m" 220 #define COLOR_RED "\033[31m" 221 #define COLOR_GREEN "\033[1;32m" 222 #define COLOR_YELLOW "\033[1;33m" 223 #define COLOR_CYAN "\033[1;36m" 224 225 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f, 226 unsigned offset) 227 { 228 struct radeon_winsys *ws = sctx->b.ws; 229 uint32_t value; 230 231 if (ws->read_registers(ws, offset, 1, &value)) 232 ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0); 233 } 234 235 static void si_dump_debug_registers(struct si_context *sctx, FILE *f) 236 { 237 if (sctx->screen->info.drm_major == 2 && 238 sctx->screen->info.drm_minor < 42) 239 return; /* no radeon support */ 240 241 fprintf(f, "Memory-mapped registers:\n"); 242 si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS); 243 244 /* No other registers can be read on DRM < 3.1.0. */ 245 if (sctx->screen->info.drm_major < 3 || 246 sctx->screen->info.drm_minor < 1) { 247 fprintf(f, "\n"); 248 return; 249 } 250 251 si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2); 252 si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0); 253 si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1); 254 si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2); 255 si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3); 256 si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG); 257 si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG); 258 if (sctx->b.chip_class <= VI) { 259 si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS); 260 si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2); 261 si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3); 262 } 263 si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT); 264 si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1); 265 si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2); 266 si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3); 267 si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS); 268 si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT); 269 si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1); 270 si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS); 271 si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT); 272 si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1); 273 fprintf(f, "\n"); 274 } 275 276 struct si_log_chunk_cs { 277 struct si_context *ctx; 278 struct si_saved_cs *cs; 279 bool dump_bo_list; 280 unsigned gfx_begin, gfx_end; 281 }; 282 283 static void si_log_chunk_type_cs_destroy(void *data) 284 { 285 struct si_log_chunk_cs *chunk = data; 286 si_saved_cs_reference(&chunk->cs, NULL); 287 free(chunk); 288 } 289 290 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs, 291 unsigned begin, unsigned end, 292 int *last_trace_id, unsigned trace_id_count, 293 const char *name, enum chip_class chip_class) 294 { 295 unsigned orig_end = end; 296 297 assert(begin <= end); 298 299 fprintf(f, "------------------ %s begin (dw = %u) ------------------\n", 300 name, begin); 301 302 for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) { 303 struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx]; 304 305 if (begin < chunk->cdw) { 306 ac_parse_ib_chunk(f, chunk->buf + begin, 307 MIN2(end, chunk->cdw) - begin, 308 last_trace_id, trace_id_count, 309 chip_class, NULL, NULL); 310 } 311 312 if (end <= chunk->cdw) 313 return; 314 315 if (begin < chunk->cdw) 316 fprintf(f, "\n---------- Next %s Chunk ----------\n\n", 317 name); 318 319 begin -= MIN2(begin, chunk->cdw); 320 end -= chunk->cdw; 321 } 322 323 assert(end <= cs->current.cdw); 324 325 ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id, 326 trace_id_count, chip_class, NULL, NULL); 327 328 fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n", 329 name, orig_end); 330 } 331 332 static void si_log_chunk_type_cs_print(void *data, FILE *f) 333 { 334 struct si_log_chunk_cs *chunk = data; 335 struct si_context *ctx = chunk->ctx; 336 struct si_saved_cs *scs = chunk->cs; 337 int last_trace_id = -1; 338 339 /* We are expecting that the ddebug pipe has already 340 * waited for the context, so this buffer should be idle. 341 * If the GPU is hung, there is no point in waiting for it. 342 */ 343 uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf, 344 NULL, 345 PIPE_TRANSFER_UNSYNCHRONIZED | 346 PIPE_TRANSFER_READ); 347 if (map) 348 last_trace_id = map[0]; 349 350 if (chunk->gfx_end != chunk->gfx_begin) { 351 if (chunk->gfx_begin == 0) { 352 if (ctx->init_config) 353 ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw, 354 NULL, 0, "IB2: Init config", ctx->b.chip_class, 355 NULL, NULL); 356 357 if (ctx->init_config_gs_rings) 358 ac_parse_ib(f, ctx->init_config_gs_rings->pm4, 359 ctx->init_config_gs_rings->ndw, 360 NULL, 0, "IB2: Init GS rings", ctx->b.chip_class, 361 NULL, NULL); 362 } 363 364 if (scs->flushed) { 365 ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin, 366 chunk->gfx_end - chunk->gfx_begin, 367 &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class, 368 NULL, NULL); 369 } else { 370 si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin, 371 chunk->gfx_end, &last_trace_id, map ? 1 : 0, 372 "IB", ctx->b.chip_class); 373 } 374 } 375 376 if (chunk->dump_bo_list) { 377 fprintf(f, "Flushing. Time: "); 378 util_dump_ns(f, scs->time_flush); 379 fprintf(f, "\n\n"); 380 si_dump_bo_list(ctx, &scs->gfx, f); 381 } 382 } 383 384 static const struct u_log_chunk_type si_log_chunk_type_cs = { 385 .destroy = si_log_chunk_type_cs_destroy, 386 .print = si_log_chunk_type_cs_print, 387 }; 388 389 static void si_log_cs(struct si_context *ctx, struct u_log_context *log, 390 bool dump_bo_list) 391 { 392 assert(ctx->current_saved_cs); 393 394 struct si_saved_cs *scs = ctx->current_saved_cs; 395 unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw; 396 397 if (!dump_bo_list && 398 gfx_cur == scs->gfx_last_dw) 399 return; 400 401 struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk)); 402 403 chunk->ctx = ctx; 404 si_saved_cs_reference(&chunk->cs, scs); 405 chunk->dump_bo_list = dump_bo_list; 406 407 chunk->gfx_begin = scs->gfx_last_dw; 408 chunk->gfx_end = gfx_cur; 409 scs->gfx_last_dw = gfx_cur; 410 411 u_log_chunk(log, &si_log_chunk_type_cs, chunk); 412 } 413 414 void si_auto_log_cs(void *data, struct u_log_context *log) 415 { 416 struct si_context *ctx = (struct si_context *)data; 417 si_log_cs(ctx, log, false); 418 } 419 420 void si_log_hw_flush(struct si_context *sctx) 421 { 422 if (!sctx->b.log) 423 return; 424 425 si_log_cs(sctx, sctx->b.log, true); 426 } 427 428 static const char *priority_to_string(enum radeon_bo_priority priority) 429 { 430 #define ITEM(x) [RADEON_PRIO_##x] = #x 431 static const char *table[64] = { 432 ITEM(FENCE), 433 ITEM(TRACE), 434 ITEM(SO_FILLED_SIZE), 435 ITEM(QUERY), 436 ITEM(IB1), 437 ITEM(IB2), 438 ITEM(DRAW_INDIRECT), 439 ITEM(INDEX_BUFFER), 440 ITEM(VCE), 441 ITEM(UVD), 442 ITEM(SDMA_BUFFER), 443 ITEM(SDMA_TEXTURE), 444 ITEM(CP_DMA), 445 ITEM(CONST_BUFFER), 446 ITEM(DESCRIPTORS), 447 ITEM(BORDER_COLORS), 448 ITEM(SAMPLER_BUFFER), 449 ITEM(VERTEX_BUFFER), 450 ITEM(SHADER_RW_BUFFER), 451 ITEM(COMPUTE_GLOBAL), 452 ITEM(SAMPLER_TEXTURE), 453 ITEM(SHADER_RW_IMAGE), 454 ITEM(SAMPLER_TEXTURE_MSAA), 455 ITEM(COLOR_BUFFER), 456 ITEM(DEPTH_BUFFER), 457 ITEM(COLOR_BUFFER_MSAA), 458 ITEM(DEPTH_BUFFER_MSAA), 459 ITEM(CMASK), 460 ITEM(DCC), 461 ITEM(HTILE), 462 ITEM(SHADER_BINARY), 463 ITEM(SHADER_RINGS), 464 ITEM(SCRATCH_BUFFER), 465 }; 466 #undef ITEM 467 468 assert(priority < ARRAY_SIZE(table)); 469 return table[priority]; 470 } 471 472 static int bo_list_compare_va(const struct radeon_bo_list_item *a, 473 const struct radeon_bo_list_item *b) 474 { 475 return a->vm_address < b->vm_address ? -1 : 476 a->vm_address > b->vm_address ? 1 : 0; 477 } 478 479 static void si_dump_bo_list(struct si_context *sctx, 480 const struct radeon_saved_cs *saved, FILE *f) 481 { 482 unsigned i,j; 483 484 if (!saved->bo_list) 485 return; 486 487 /* Sort the list according to VM adddresses first. */ 488 qsort(saved->bo_list, saved->bo_count, 489 sizeof(saved->bo_list[0]), (void*)bo_list_compare_va); 490 491 fprintf(f, "Buffer list (in units of pages = 4kB):\n" 492 COLOR_YELLOW " Size VM start page " 493 "VM end page Usage" COLOR_RESET "\n"); 494 495 for (i = 0; i < saved->bo_count; i++) { 496 /* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */ 497 const unsigned page_size = sctx->b.screen->info.gart_page_size; 498 uint64_t va = saved->bo_list[i].vm_address; 499 uint64_t size = saved->bo_list[i].bo_size; 500 bool hit = false; 501 502 /* If there's unused virtual memory between 2 buffers, print it. */ 503 if (i) { 504 uint64_t previous_va_end = saved->bo_list[i-1].vm_address + 505 saved->bo_list[i-1].bo_size; 506 507 if (va > previous_va_end) { 508 fprintf(f, " %10"PRIu64" -- hole --\n", 509 (va - previous_va_end) / page_size); 510 } 511 } 512 513 /* Print the buffer. */ 514 fprintf(f, " %10"PRIu64" 0x%013"PRIX64" 0x%013"PRIX64" ", 515 size / page_size, va / page_size, (va + size) / page_size); 516 517 /* Print the usage. */ 518 for (j = 0; j < 64; j++) { 519 if (!(saved->bo_list[i].priority_usage & (1ull << j))) 520 continue; 521 522 fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j)); 523 hit = true; 524 } 525 fprintf(f, "\n"); 526 } 527 fprintf(f, "\nNote: The holes represent memory not used by the IB.\n" 528 " Other buffers can still be allocated there.\n\n"); 529 } 530 531 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log) 532 { 533 struct pipe_framebuffer_state *state = &sctx->framebuffer.state; 534 struct r600_texture *rtex; 535 int i; 536 537 for (i = 0; i < state->nr_cbufs; i++) { 538 if (!state->cbufs[i]) 539 continue; 540 541 rtex = (struct r600_texture*)state->cbufs[i]->texture; 542 u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i); 543 si_print_texture_info(sctx->b.screen, rtex, log); 544 u_log_printf(log, "\n"); 545 } 546 547 if (state->zsbuf) { 548 rtex = (struct r600_texture*)state->zsbuf->texture; 549 u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n"); 550 si_print_texture_info(sctx->b.screen, rtex, log); 551 u_log_printf(log, "\n"); 552 } 553 } 554 555 typedef unsigned (*slot_remap_func)(unsigned); 556 557 struct si_log_chunk_desc_list { 558 /** Pointer to memory map of buffer where the list is uploader */ 559 uint32_t *gpu_list; 560 /** Reference of buffer where the list is uploaded, so that gpu_list 561 * is kept live. */ 562 struct r600_resource *buf; 563 564 const char *shader_name; 565 const char *elem_name; 566 slot_remap_func slot_remap; 567 enum chip_class chip_class; 568 unsigned element_dw_size; 569 unsigned num_elements; 570 571 uint32_t list[0]; 572 }; 573 574 static void 575 si_log_chunk_desc_list_destroy(void *data) 576 { 577 struct si_log_chunk_desc_list *chunk = data; 578 r600_resource_reference(&chunk->buf, NULL); 579 FREE(chunk); 580 } 581 582 static void 583 si_log_chunk_desc_list_print(void *data, FILE *f) 584 { 585 struct si_log_chunk_desc_list *chunk = data; 586 587 for (unsigned i = 0; i < chunk->num_elements; i++) { 588 unsigned cpu_dw_offset = i * chunk->element_dw_size; 589 unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size; 590 const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list"; 591 uint32_t *cpu_list = chunk->list + cpu_dw_offset; 592 uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list; 593 594 fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n", 595 chunk->shader_name, chunk->elem_name, i, list_note); 596 597 switch (chunk->element_dw_size) { 598 case 4: 599 for (unsigned j = 0; j < 4; j++) 600 ac_dump_reg(f, chunk->chip_class, 601 R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 602 gpu_list[j], 0xffffffff); 603 break; 604 case 8: 605 for (unsigned j = 0; j < 8; j++) 606 ac_dump_reg(f, chunk->chip_class, 607 R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 608 gpu_list[j], 0xffffffff); 609 610 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 611 for (unsigned j = 0; j < 4; j++) 612 ac_dump_reg(f, chunk->chip_class, 613 R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 614 gpu_list[4+j], 0xffffffff); 615 break; 616 case 16: 617 for (unsigned j = 0; j < 8; j++) 618 ac_dump_reg(f, chunk->chip_class, 619 R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 620 gpu_list[j], 0xffffffff); 621 622 fprintf(f, COLOR_CYAN " Buffer:" COLOR_RESET "\n"); 623 for (unsigned j = 0; j < 4; j++) 624 ac_dump_reg(f, chunk->chip_class, 625 R_008F00_SQ_BUF_RSRC_WORD0 + j*4, 626 gpu_list[4+j], 0xffffffff); 627 628 fprintf(f, COLOR_CYAN " FMASK:" COLOR_RESET "\n"); 629 for (unsigned j = 0; j < 8; j++) 630 ac_dump_reg(f, chunk->chip_class, 631 R_008F10_SQ_IMG_RSRC_WORD0 + j*4, 632 gpu_list[8+j], 0xffffffff); 633 634 fprintf(f, COLOR_CYAN " Sampler state:" COLOR_RESET "\n"); 635 for (unsigned j = 0; j < 4; j++) 636 ac_dump_reg(f, chunk->chip_class, 637 R_008F30_SQ_IMG_SAMP_WORD0 + j*4, 638 gpu_list[12+j], 0xffffffff); 639 break; 640 } 641 642 if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) { 643 fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!" 644 COLOR_RESET "\n"); 645 } 646 647 fprintf(f, "\n"); 648 } 649 650 } 651 652 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = { 653 .destroy = si_log_chunk_desc_list_destroy, 654 .print = si_log_chunk_desc_list_print, 655 }; 656 657 static void si_dump_descriptor_list(struct si_screen *screen, 658 struct si_descriptors *desc, 659 const char *shader_name, 660 const char *elem_name, 661 unsigned element_dw_size, 662 unsigned num_elements, 663 slot_remap_func slot_remap, 664 struct u_log_context *log) 665 { 666 if (!desc->list) 667 return; 668 669 /* In some cases, the caller doesn't know how many elements are really 670 * uploaded. Reduce num_elements to fit in the range of active slots. */ 671 unsigned active_range_dw_begin = 672 desc->first_active_slot * desc->element_dw_size; 673 unsigned active_range_dw_end = 674 active_range_dw_begin + desc->num_active_slots * desc->element_dw_size; 675 676 while (num_elements > 0) { 677 int i = slot_remap(num_elements - 1); 678 unsigned dw_begin = i * element_dw_size; 679 unsigned dw_end = dw_begin + element_dw_size; 680 681 if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end) 682 break; 683 684 num_elements--; 685 } 686 687 struct si_log_chunk_desc_list *chunk = 688 CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list, 689 4 * element_dw_size * num_elements); 690 chunk->shader_name = shader_name; 691 chunk->elem_name = elem_name; 692 chunk->element_dw_size = element_dw_size; 693 chunk->num_elements = num_elements; 694 chunk->slot_remap = slot_remap; 695 chunk->chip_class = screen->info.chip_class; 696 697 r600_resource_reference(&chunk->buf, desc->buffer); 698 chunk->gpu_list = desc->gpu_list; 699 700 for (unsigned i = 0; i < num_elements; ++i) { 701 memcpy(&chunk->list[i * element_dw_size], 702 &desc->list[slot_remap(i) * element_dw_size], 703 4 * element_dw_size); 704 } 705 706 u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk); 707 } 708 709 static unsigned si_identity(unsigned slot) 710 { 711 return slot; 712 } 713 714 static void si_dump_descriptors(struct si_context *sctx, 715 enum pipe_shader_type processor, 716 const struct tgsi_shader_info *info, 717 struct u_log_context *log) 718 { 719 struct si_descriptors *descs = 720 &sctx->descriptors[SI_DESCS_FIRST_SHADER + 721 processor * SI_NUM_SHADER_DESCS]; 722 static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"}; 723 const char *name = shader_name[processor]; 724 unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers; 725 unsigned enabled_images; 726 727 if (info) { 728 enabled_constbuf = info->const_buffers_declared; 729 enabled_shaderbuf = info->shader_buffers_declared; 730 enabled_samplers = info->samplers_declared; 731 enabled_images = info->images_declared; 732 } else { 733 enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >> 734 SI_NUM_SHADER_BUFFERS; 735 enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask & 736 u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS); 737 enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >> 738 (32 - SI_NUM_SHADER_BUFFERS); 739 enabled_samplers = sctx->samplers[processor].enabled_mask; 740 enabled_images = sctx->images[processor].enabled_mask; 741 } 742 743 if (processor == PIPE_SHADER_VERTEX) { 744 assert(info); /* only CS may not have an info struct */ 745 746 si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, name, 747 " - Vertex buffer", 4, info->num_inputs, 748 si_identity, log); 749 } 750 751 si_dump_descriptor_list(sctx->screen, 752 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], 753 name, " - Constant buffer", 4, 754 util_last_bit(enabled_constbuf), 755 si_get_constbuf_slot, log); 756 si_dump_descriptor_list(sctx->screen, 757 &descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS], 758 name, " - Shader buffer", 4, 759 util_last_bit(enabled_shaderbuf), 760 si_get_shaderbuf_slot, log); 761 si_dump_descriptor_list(sctx->screen, 762 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], 763 name, " - Sampler", 16, 764 util_last_bit(enabled_samplers), 765 si_get_sampler_slot, log); 766 si_dump_descriptor_list(sctx->screen, 767 &descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES], 768 name, " - Image", 8, 769 util_last_bit(enabled_images), 770 si_get_image_slot, log); 771 } 772 773 static void si_dump_gfx_descriptors(struct si_context *sctx, 774 const struct si_shader_ctx_state *state, 775 struct u_log_context *log) 776 { 777 if (!state->cso || !state->current) 778 return; 779 780 si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log); 781 } 782 783 static void si_dump_compute_descriptors(struct si_context *sctx, 784 struct u_log_context *log) 785 { 786 if (!sctx->cs_shader_state.program) 787 return; 788 789 si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log); 790 } 791 792 struct si_shader_inst { 793 char text[160]; /* one disasm line */ 794 unsigned offset; /* instruction offset */ 795 unsigned size; /* instruction size = 4 or 8 */ 796 }; 797 798 /* Split a disassembly string into lines and add them to the array pointed 799 * to by "instructions". */ 800 static void si_add_split_disasm(const char *disasm, 801 uint64_t start_addr, 802 unsigned *num, 803 struct si_shader_inst *instructions) 804 { 805 struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL; 806 char *next; 807 808 while ((next = strchr(disasm, '\n'))) { 809 struct si_shader_inst *inst = &instructions[*num]; 810 unsigned len = next - disasm; 811 812 assert(len < ARRAY_SIZE(inst->text)); 813 memcpy(inst->text, disasm, len); 814 inst->text[len] = 0; 815 inst->offset = last_inst ? last_inst->offset + last_inst->size : 0; 816 817 const char *semicolon = strchr(disasm, ';'); 818 assert(semicolon); 819 /* More than 16 chars after ";" means the instruction is 8 bytes long. */ 820 inst->size = next - semicolon > 16 ? 8 : 4; 821 822 snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len, 823 " [PC=0x%"PRIx64", off=%u, size=%u]", 824 start_addr + inst->offset, inst->offset, inst->size); 825 826 last_inst = inst; 827 (*num)++; 828 disasm = next + 1; 829 } 830 } 831 832 /* If the shader is being executed, print its asm instructions, and annotate 833 * those that are being executed right now with information about waves that 834 * execute them. This is most useful during a GPU hang. 835 */ 836 static void si_print_annotated_shader(struct si_shader *shader, 837 struct ac_wave_info *waves, 838 unsigned num_waves, 839 FILE *f) 840 { 841 if (!shader || !shader->binary.disasm_string) 842 return; 843 844 uint64_t start_addr = shader->bo->gpu_address; 845 uint64_t end_addr = start_addr + shader->bo->b.b.width0; 846 unsigned i; 847 848 /* See if any wave executes the shader. */ 849 for (i = 0; i < num_waves; i++) { 850 if (start_addr <= waves[i].pc && waves[i].pc <= end_addr) 851 break; 852 } 853 if (i == num_waves) 854 return; /* the shader is not being executed */ 855 856 /* Remember the first found wave. The waves are sorted according to PC. */ 857 waves = &waves[i]; 858 num_waves -= i; 859 860 /* Get the list of instructions. 861 * Buffer size / 4 is the upper bound of the instruction count. 862 */ 863 unsigned num_inst = 0; 864 struct si_shader_inst *instructions = 865 calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst)); 866 867 if (shader->prolog) { 868 si_add_split_disasm(shader->prolog->binary.disasm_string, 869 start_addr, &num_inst, instructions); 870 } 871 if (shader->previous_stage) { 872 si_add_split_disasm(shader->previous_stage->binary.disasm_string, 873 start_addr, &num_inst, instructions); 874 } 875 if (shader->prolog2) { 876 si_add_split_disasm(shader->prolog2->binary.disasm_string, 877 start_addr, &num_inst, instructions); 878 } 879 si_add_split_disasm(shader->binary.disasm_string, 880 start_addr, &num_inst, instructions); 881 if (shader->epilog) { 882 si_add_split_disasm(shader->epilog->binary.disasm_string, 883 start_addr, &num_inst, instructions); 884 } 885 886 fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n", 887 si_get_shader_name(shader, shader->selector->type)); 888 889 /* Print instructions with annotations. */ 890 for (i = 0; i < num_inst; i++) { 891 struct si_shader_inst *inst = &instructions[i]; 892 893 fprintf(f, "%s\n", inst->text); 894 895 /* Print which waves execute the instruction right now. */ 896 while (num_waves && start_addr + inst->offset == waves->pc) { 897 fprintf(f, 898 " " COLOR_GREEN "^ SE%u SH%u CU%u " 899 "SIMD%u WAVE%u EXEC=%016"PRIx64 " ", 900 waves->se, waves->sh, waves->cu, waves->simd, 901 waves->wave, waves->exec); 902 903 if (inst->size == 4) { 904 fprintf(f, "INST32=%08X" COLOR_RESET "\n", 905 waves->inst_dw0); 906 } else { 907 fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n", 908 waves->inst_dw0, waves->inst_dw1); 909 } 910 911 waves->matched = true; 912 waves = &waves[1]; 913 num_waves--; 914 } 915 } 916 917 fprintf(f, "\n\n"); 918 free(instructions); 919 } 920 921 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f) 922 { 923 struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]; 924 unsigned num_waves = ac_get_wave_info(waves); 925 926 fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET 927 "\n\n", num_waves); 928 929 si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f); 930 si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f); 931 si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f); 932 si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f); 933 si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f); 934 935 /* Print waves executing shaders that are not currently bound. */ 936 unsigned i; 937 bool found = false; 938 for (i = 0; i < num_waves; i++) { 939 if (waves[i].matched) 940 continue; 941 942 if (!found) { 943 fprintf(f, COLOR_CYAN 944 "Waves not executing currently-bound shaders:" 945 COLOR_RESET "\n"); 946 found = true; 947 } 948 fprintf(f, " SE%u SH%u CU%u SIMD%u WAVE%u EXEC=%016"PRIx64 949 " INST=%08X %08X PC=%"PRIx64"\n", 950 waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd, 951 waves[i].wave, waves[i].exec, waves[i].inst_dw0, 952 waves[i].inst_dw1, waves[i].pc); 953 } 954 if (found) 955 fprintf(f, "\n\n"); 956 } 957 958 static void si_dump_command(const char *title, const char *command, FILE *f) 959 { 960 char line[2000]; 961 962 FILE *p = popen(command, "r"); 963 if (!p) 964 return; 965 966 fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title); 967 while (fgets(line, sizeof(line), p)) 968 fputs(line, f); 969 fprintf(f, "\n\n"); 970 pclose(p); 971 } 972 973 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f, 974 unsigned flags) 975 { 976 struct si_context *sctx = (struct si_context*)ctx; 977 978 if (sctx->b.log) 979 u_log_flush(sctx->b.log); 980 981 if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) { 982 si_dump_debug_registers(sctx, f); 983 984 si_dump_annotated_shaders(sctx, f); 985 si_dump_command("Active waves (raw data)", "umr -wa | column -t", f); 986 si_dump_command("Wave information", "umr -O bits -wa", f); 987 } 988 } 989 990 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log) 991 { 992 if (!log) 993 return; 994 995 si_dump_framebuffer(sctx, log); 996 997 si_dump_gfx_shader(sctx, &sctx->vs_shader, log); 998 si_dump_gfx_shader(sctx, &sctx->tcs_shader, log); 999 si_dump_gfx_shader(sctx, &sctx->tes_shader, log); 1000 si_dump_gfx_shader(sctx, &sctx->gs_shader, log); 1001 si_dump_gfx_shader(sctx, &sctx->ps_shader, log); 1002 1003 si_dump_descriptor_list(sctx->screen, 1004 &sctx->descriptors[SI_DESCS_RW_BUFFERS], 1005 "", "RW buffers", 4, SI_NUM_RW_BUFFERS, 1006 si_identity, log); 1007 si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log); 1008 si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log); 1009 si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log); 1010 si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log); 1011 si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log); 1012 } 1013 1014 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log) 1015 { 1016 if (!log) 1017 return; 1018 1019 si_dump_compute_shader(sctx, log); 1020 si_dump_compute_descriptors(sctx, log); 1021 } 1022 1023 static void si_dump_dma(struct si_context *sctx, 1024 struct radeon_saved_cs *saved, FILE *f) 1025 { 1026 static const char ib_name[] = "sDMA IB"; 1027 unsigned i; 1028 1029 si_dump_bo_list(sctx, saved, f); 1030 1031 fprintf(f, "------------------ %s begin ------------------\n", ib_name); 1032 1033 for (i = 0; i < saved->num_dw; ++i) { 1034 fprintf(f, " %08x\n", saved->ib[i]); 1035 } 1036 1037 fprintf(f, "------------------- %s end -------------------\n", ib_name); 1038 fprintf(f, "\n"); 1039 1040 fprintf(f, "SDMA Dump Done.\n"); 1041 } 1042 1043 void si_check_vm_faults(struct r600_common_context *ctx, 1044 struct radeon_saved_cs *saved, enum ring_type ring) 1045 { 1046 struct si_context *sctx = (struct si_context *)ctx; 1047 struct pipe_screen *screen = sctx->b.b.screen; 1048 FILE *f; 1049 uint64_t addr; 1050 char cmd_line[4096]; 1051 1052 if (!ac_vm_fault_occured(sctx->b.chip_class, 1053 &sctx->dmesg_timestamp, &addr)) 1054 return; 1055 1056 f = dd_get_debug_file(false); 1057 if (!f) 1058 return; 1059 1060 fprintf(f, "VM fault report.\n\n"); 1061 if (os_get_command_line(cmd_line, sizeof(cmd_line))) 1062 fprintf(f, "Command: %s\n", cmd_line); 1063 fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen)); 1064 fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen)); 1065 fprintf(f, "Device name: %s\n\n", screen->get_name(screen)); 1066 fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr); 1067 1068 if (sctx->apitrace_call_number) 1069 fprintf(f, "Last apitrace call: %u\n\n", 1070 sctx->apitrace_call_number); 1071 1072 switch (ring) { 1073 case RING_GFX: { 1074 struct u_log_context log; 1075 u_log_context_init(&log); 1076 1077 si_log_draw_state(sctx, &log); 1078 si_log_compute_state(sctx, &log); 1079 si_log_cs(sctx, &log, true); 1080 1081 u_log_new_page_print(&log, f); 1082 u_log_context_destroy(&log); 1083 break; 1084 } 1085 case RING_DMA: 1086 si_dump_dma(sctx, saved, f); 1087 break; 1088 1089 default: 1090 break; 1091 } 1092 1093 fclose(f); 1094 1095 fprintf(stderr, "Detected a VM fault, exiting...\n"); 1096 exit(0); 1097 } 1098 1099 void si_init_debug_functions(struct si_context *sctx) 1100 { 1101 sctx->b.b.dump_debug_state = si_dump_debug_state; 1102 sctx->b.check_vm_faults = si_check_vm_faults; 1103 1104 /* Set the initial dmesg timestamp for this context, so that 1105 * only new messages will be checked for VM faults. 1106 */ 1107 if (sctx->screen->debug_flags & DBG(CHECK_VM)) 1108 ac_vm_fault_occured(sctx->b.chip_class, 1109 &sctx->dmesg_timestamp, NULL); 1110 } 1111