Home | History | Annotate | Download | only in radeonsi
      1 /*
      2  * Copyright 2015 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "si_pipe.h"
     25 #include "si_compute.h"
     26 #include "sid.h"
     27 #include "gfx9d.h"
     28 #include "sid_tables.h"
     29 #include "ddebug/dd_util.h"
     30 #include "util/u_dump.h"
     31 #include "util/u_log.h"
     32 #include "util/u_memory.h"
     33 #include "ac_debug.h"
     34 
     35 static void si_dump_bo_list(struct si_context *sctx,
     36 			    const struct radeon_saved_cs *saved, FILE *f);
     37 
     38 DEBUG_GET_ONCE_OPTION(replace_shaders, "RADEON_REPLACE_SHADERS", NULL)
     39 
     40 static void si_dump_shader(struct si_screen *sscreen,
     41 			   enum pipe_shader_type processor,
     42 			   const struct si_shader *shader, FILE *f)
     43 {
     44 	if (shader->shader_log)
     45 		fwrite(shader->shader_log, shader->shader_log_size, 1, f);
     46 	else
     47 		si_shader_dump(sscreen, shader, NULL, processor, f, false);
     48 }
     49 
     50 struct si_log_chunk_shader {
     51 	/* The shader destroy code assumes a current context for unlinking of
     52 	 * PM4 packets etc.
     53 	 *
     54 	 * While we should be able to destroy shaders without a context, doing
     55 	 * so would happen only very rarely and be therefore likely to fail
     56 	 * just when you're trying to debug something. Let's just remember the
     57 	 * current context in the chunk.
     58 	 */
     59 	struct si_context *ctx;
     60 	struct si_shader *shader;
     61 	enum pipe_shader_type processor;
     62 
     63 	/* For keep-alive reference counts */
     64 	struct si_shader_selector *sel;
     65 	struct si_compute *program;
     66 };
     67 
     68 static void
     69 si_log_chunk_shader_destroy(void *data)
     70 {
     71 	struct si_log_chunk_shader *chunk = data;
     72 	si_shader_selector_reference(chunk->ctx, &chunk->sel, NULL);
     73 	si_compute_reference(&chunk->program, NULL);
     74 	FREE(chunk);
     75 }
     76 
     77 static void
     78 si_log_chunk_shader_print(void *data, FILE *f)
     79 {
     80 	struct si_log_chunk_shader *chunk = data;
     81 	struct si_screen *sscreen = chunk->ctx->screen;
     82 	si_dump_shader(sscreen, chunk->processor,
     83 		       chunk->shader, f);
     84 }
     85 
     86 static struct u_log_chunk_type si_log_chunk_type_shader = {
     87 	.destroy = si_log_chunk_shader_destroy,
     88 	.print = si_log_chunk_shader_print,
     89 };
     90 
     91 static void si_dump_gfx_shader(struct si_context *ctx,
     92 			       const struct si_shader_ctx_state *state,
     93 			       struct u_log_context *log)
     94 {
     95 	struct si_shader *current = state->current;
     96 
     97 	if (!state->cso || !current)
     98 		return;
     99 
    100 	struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
    101 	chunk->ctx = ctx;
    102 	chunk->processor = state->cso->info.processor;
    103 	chunk->shader = current;
    104 	si_shader_selector_reference(ctx, &chunk->sel, current->selector);
    105 	u_log_chunk(log, &si_log_chunk_type_shader, chunk);
    106 }
    107 
    108 static void si_dump_compute_shader(struct si_context *ctx,
    109 				   struct u_log_context *log)
    110 {
    111 	const struct si_cs_shader_state *state = &ctx->cs_shader_state;
    112 
    113 	if (!state->program)
    114 		return;
    115 
    116 	struct si_log_chunk_shader *chunk = CALLOC_STRUCT(si_log_chunk_shader);
    117 	chunk->ctx = ctx;
    118 	chunk->processor = PIPE_SHADER_COMPUTE;
    119 	chunk->shader = &state->program->shader;
    120 	si_compute_reference(&chunk->program, state->program);
    121 	u_log_chunk(log, &si_log_chunk_type_shader, chunk);
    122 }
    123 
    124 /**
    125  * Shader compiles can be overridden with arbitrary ELF objects by setting
    126  * the environment variable RADEON_REPLACE_SHADERS=num1:filename1[;num2:filename2]
    127  */
    128 bool si_replace_shader(unsigned num, struct ac_shader_binary *binary)
    129 {
    130 	const char *p = debug_get_option_replace_shaders();
    131 	const char *semicolon;
    132 	char *copy = NULL;
    133 	FILE *f;
    134 	long filesize, nread;
    135 	char *buf = NULL;
    136 	bool replaced = false;
    137 
    138 	if (!p)
    139 		return false;
    140 
    141 	while (*p) {
    142 		unsigned long i;
    143 		char *endp;
    144 		i = strtoul(p, &endp, 0);
    145 
    146 		p = endp;
    147 		if (*p != ':') {
    148 			fprintf(stderr, "RADEON_REPLACE_SHADERS formatted badly.\n");
    149 			exit(1);
    150 		}
    151 		++p;
    152 
    153 		if (i == num)
    154 			break;
    155 
    156 		p = strchr(p, ';');
    157 		if (!p)
    158 			return false;
    159 		++p;
    160 	}
    161 	if (!*p)
    162 		return false;
    163 
    164 	semicolon = strchr(p, ';');
    165 	if (semicolon) {
    166 		p = copy = strndup(p, semicolon - p);
    167 		if (!copy) {
    168 			fprintf(stderr, "out of memory\n");
    169 			return false;
    170 		}
    171 	}
    172 
    173 	fprintf(stderr, "radeonsi: replace shader %u by %s\n", num, p);
    174 
    175 	f = fopen(p, "r");
    176 	if (!f) {
    177 		perror("radeonsi: failed to open file");
    178 		goto out_free;
    179 	}
    180 
    181 	if (fseek(f, 0, SEEK_END) != 0)
    182 		goto file_error;
    183 
    184 	filesize = ftell(f);
    185 	if (filesize < 0)
    186 		goto file_error;
    187 
    188 	if (fseek(f, 0, SEEK_SET) != 0)
    189 		goto file_error;
    190 
    191 	buf = MALLOC(filesize);
    192 	if (!buf) {
    193 		fprintf(stderr, "out of memory\n");
    194 		goto out_close;
    195 	}
    196 
    197 	nread = fread(buf, 1, filesize, f);
    198 	if (nread != filesize)
    199 		goto file_error;
    200 
    201 	ac_elf_read(buf, filesize, binary);
    202 	replaced = true;
    203 
    204 out_close:
    205 	fclose(f);
    206 out_free:
    207 	FREE(buf);
    208 	free(copy);
    209 	return replaced;
    210 
    211 file_error:
    212 	perror("radeonsi: reading shader");
    213 	goto out_close;
    214 }
    215 
    216 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
    217  * read them, or use "aha -b -f file" to convert them to html.
    218  */
    219 #define COLOR_RESET	"\033[0m"
    220 #define COLOR_RED	"\033[31m"
    221 #define COLOR_GREEN	"\033[1;32m"
    222 #define COLOR_YELLOW	"\033[1;33m"
    223 #define COLOR_CYAN	"\033[1;36m"
    224 
    225 static void si_dump_mmapped_reg(struct si_context *sctx, FILE *f,
    226 				unsigned offset)
    227 {
    228 	struct radeon_winsys *ws = sctx->b.ws;
    229 	uint32_t value;
    230 
    231 	if (ws->read_registers(ws, offset, 1, &value))
    232 		ac_dump_reg(f, sctx->b.chip_class, offset, value, ~0);
    233 }
    234 
    235 static void si_dump_debug_registers(struct si_context *sctx, FILE *f)
    236 {
    237 	if (sctx->screen->info.drm_major == 2 &&
    238 	    sctx->screen->info.drm_minor < 42)
    239 		return; /* no radeon support */
    240 
    241 	fprintf(f, "Memory-mapped registers:\n");
    242 	si_dump_mmapped_reg(sctx, f, R_008010_GRBM_STATUS);
    243 
    244 	/* No other registers can be read on DRM < 3.1.0. */
    245 	if (sctx->screen->info.drm_major < 3 ||
    246 	    sctx->screen->info.drm_minor < 1) {
    247 		fprintf(f, "\n");
    248 		return;
    249 	}
    250 
    251 	si_dump_mmapped_reg(sctx, f, R_008008_GRBM_STATUS2);
    252 	si_dump_mmapped_reg(sctx, f, R_008014_GRBM_STATUS_SE0);
    253 	si_dump_mmapped_reg(sctx, f, R_008018_GRBM_STATUS_SE1);
    254 	si_dump_mmapped_reg(sctx, f, R_008038_GRBM_STATUS_SE2);
    255 	si_dump_mmapped_reg(sctx, f, R_00803C_GRBM_STATUS_SE3);
    256 	si_dump_mmapped_reg(sctx, f, R_00D034_SDMA0_STATUS_REG);
    257 	si_dump_mmapped_reg(sctx, f, R_00D834_SDMA1_STATUS_REG);
    258 	if (sctx->b.chip_class <= VI) {
    259 		si_dump_mmapped_reg(sctx, f, R_000E50_SRBM_STATUS);
    260 		si_dump_mmapped_reg(sctx, f, R_000E4C_SRBM_STATUS2);
    261 		si_dump_mmapped_reg(sctx, f, R_000E54_SRBM_STATUS3);
    262 	}
    263 	si_dump_mmapped_reg(sctx, f, R_008680_CP_STAT);
    264 	si_dump_mmapped_reg(sctx, f, R_008674_CP_STALLED_STAT1);
    265 	si_dump_mmapped_reg(sctx, f, R_008678_CP_STALLED_STAT2);
    266 	si_dump_mmapped_reg(sctx, f, R_008670_CP_STALLED_STAT3);
    267 	si_dump_mmapped_reg(sctx, f, R_008210_CP_CPC_STATUS);
    268 	si_dump_mmapped_reg(sctx, f, R_008214_CP_CPC_BUSY_STAT);
    269 	si_dump_mmapped_reg(sctx, f, R_008218_CP_CPC_STALLED_STAT1);
    270 	si_dump_mmapped_reg(sctx, f, R_00821C_CP_CPF_STATUS);
    271 	si_dump_mmapped_reg(sctx, f, R_008220_CP_CPF_BUSY_STAT);
    272 	si_dump_mmapped_reg(sctx, f, R_008224_CP_CPF_STALLED_STAT1);
    273 	fprintf(f, "\n");
    274 }
    275 
    276 struct si_log_chunk_cs {
    277 	struct si_context *ctx;
    278 	struct si_saved_cs *cs;
    279 	bool dump_bo_list;
    280 	unsigned gfx_begin, gfx_end;
    281 };
    282 
    283 static void si_log_chunk_type_cs_destroy(void *data)
    284 {
    285 	struct si_log_chunk_cs *chunk = data;
    286 	si_saved_cs_reference(&chunk->cs, NULL);
    287 	free(chunk);
    288 }
    289 
    290 static void si_parse_current_ib(FILE *f, struct radeon_winsys_cs *cs,
    291 				unsigned begin, unsigned end,
    292 				int *last_trace_id, unsigned trace_id_count,
    293 				const char *name, enum chip_class chip_class)
    294 {
    295 	unsigned orig_end = end;
    296 
    297 	assert(begin <= end);
    298 
    299 	fprintf(f, "------------------ %s begin (dw = %u) ------------------\n",
    300 		name, begin);
    301 
    302 	for (unsigned prev_idx = 0; prev_idx < cs->num_prev; ++prev_idx) {
    303 		struct radeon_winsys_cs_chunk *chunk = &cs->prev[prev_idx];
    304 
    305 		if (begin < chunk->cdw) {
    306 			ac_parse_ib_chunk(f, chunk->buf + begin,
    307 					  MIN2(end, chunk->cdw) - begin,
    308 					  last_trace_id, trace_id_count,
    309 				          chip_class, NULL, NULL);
    310 		}
    311 
    312 		if (end <= chunk->cdw)
    313 			return;
    314 
    315 		if (begin < chunk->cdw)
    316 			fprintf(f, "\n---------- Next %s Chunk ----------\n\n",
    317 				name);
    318 
    319 		begin -= MIN2(begin, chunk->cdw);
    320 		end -= chunk->cdw;
    321 	}
    322 
    323 	assert(end <= cs->current.cdw);
    324 
    325 	ac_parse_ib_chunk(f, cs->current.buf + begin, end - begin, last_trace_id,
    326 			  trace_id_count, chip_class, NULL, NULL);
    327 
    328 	fprintf(f, "------------------- %s end (dw = %u) -------------------\n\n",
    329 		name, orig_end);
    330 }
    331 
    332 static void si_log_chunk_type_cs_print(void *data, FILE *f)
    333 {
    334 	struct si_log_chunk_cs *chunk = data;
    335 	struct si_context *ctx = chunk->ctx;
    336 	struct si_saved_cs *scs = chunk->cs;
    337 	int last_trace_id = -1;
    338 
    339 	/* We are expecting that the ddebug pipe has already
    340 	 * waited for the context, so this buffer should be idle.
    341 	 * If the GPU is hung, there is no point in waiting for it.
    342 	 */
    343 	uint32_t *map = ctx->b.ws->buffer_map(scs->trace_buf->buf,
    344 					      NULL,
    345 					      PIPE_TRANSFER_UNSYNCHRONIZED |
    346 					      PIPE_TRANSFER_READ);
    347 	if (map)
    348 		last_trace_id = map[0];
    349 
    350 	if (chunk->gfx_end != chunk->gfx_begin) {
    351 		if (chunk->gfx_begin == 0) {
    352 			if (ctx->init_config)
    353 				ac_parse_ib(f, ctx->init_config->pm4, ctx->init_config->ndw,
    354 					    NULL, 0, "IB2: Init config", ctx->b.chip_class,
    355 					    NULL, NULL);
    356 
    357 			if (ctx->init_config_gs_rings)
    358 				ac_parse_ib(f, ctx->init_config_gs_rings->pm4,
    359 					    ctx->init_config_gs_rings->ndw,
    360 					    NULL, 0, "IB2: Init GS rings", ctx->b.chip_class,
    361 					    NULL, NULL);
    362 		}
    363 
    364 		if (scs->flushed) {
    365 			ac_parse_ib(f, scs->gfx.ib + chunk->gfx_begin,
    366 				    chunk->gfx_end - chunk->gfx_begin,
    367 				    &last_trace_id, map ? 1 : 0, "IB", ctx->b.chip_class,
    368 				    NULL, NULL);
    369 		} else {
    370 			si_parse_current_ib(f, ctx->b.gfx.cs, chunk->gfx_begin,
    371 					    chunk->gfx_end, &last_trace_id, map ? 1 : 0,
    372 					    "IB", ctx->b.chip_class);
    373 		}
    374 	}
    375 
    376 	if (chunk->dump_bo_list) {
    377 		fprintf(f, "Flushing. Time: ");
    378 		util_dump_ns(f, scs->time_flush);
    379 		fprintf(f, "\n\n");
    380 		si_dump_bo_list(ctx, &scs->gfx, f);
    381 	}
    382 }
    383 
    384 static const struct u_log_chunk_type si_log_chunk_type_cs = {
    385 	.destroy = si_log_chunk_type_cs_destroy,
    386 	.print = si_log_chunk_type_cs_print,
    387 };
    388 
    389 static void si_log_cs(struct si_context *ctx, struct u_log_context *log,
    390 		      bool dump_bo_list)
    391 {
    392 	assert(ctx->current_saved_cs);
    393 
    394 	struct si_saved_cs *scs = ctx->current_saved_cs;
    395 	unsigned gfx_cur = ctx->b.gfx.cs->prev_dw + ctx->b.gfx.cs->current.cdw;
    396 
    397 	if (!dump_bo_list &&
    398 	    gfx_cur == scs->gfx_last_dw)
    399 		return;
    400 
    401 	struct si_log_chunk_cs *chunk = calloc(1, sizeof(*chunk));
    402 
    403 	chunk->ctx = ctx;
    404 	si_saved_cs_reference(&chunk->cs, scs);
    405 	chunk->dump_bo_list = dump_bo_list;
    406 
    407 	chunk->gfx_begin = scs->gfx_last_dw;
    408 	chunk->gfx_end = gfx_cur;
    409 	scs->gfx_last_dw = gfx_cur;
    410 
    411 	u_log_chunk(log, &si_log_chunk_type_cs, chunk);
    412 }
    413 
    414 void si_auto_log_cs(void *data, struct u_log_context *log)
    415 {
    416 	struct si_context *ctx = (struct si_context *)data;
    417 	si_log_cs(ctx, log, false);
    418 }
    419 
    420 void si_log_hw_flush(struct si_context *sctx)
    421 {
    422 	if (!sctx->b.log)
    423 		return;
    424 
    425 	si_log_cs(sctx, sctx->b.log, true);
    426 }
    427 
    428 static const char *priority_to_string(enum radeon_bo_priority priority)
    429 {
    430 #define ITEM(x) [RADEON_PRIO_##x] = #x
    431 	static const char *table[64] = {
    432 		ITEM(FENCE),
    433 	        ITEM(TRACE),
    434 	        ITEM(SO_FILLED_SIZE),
    435 	        ITEM(QUERY),
    436 	        ITEM(IB1),
    437 	        ITEM(IB2),
    438 	        ITEM(DRAW_INDIRECT),
    439 	        ITEM(INDEX_BUFFER),
    440 	        ITEM(VCE),
    441 	        ITEM(UVD),
    442 	        ITEM(SDMA_BUFFER),
    443 	        ITEM(SDMA_TEXTURE),
    444 		ITEM(CP_DMA),
    445 	        ITEM(CONST_BUFFER),
    446 	        ITEM(DESCRIPTORS),
    447 	        ITEM(BORDER_COLORS),
    448 	        ITEM(SAMPLER_BUFFER),
    449 	        ITEM(VERTEX_BUFFER),
    450 	        ITEM(SHADER_RW_BUFFER),
    451 	        ITEM(COMPUTE_GLOBAL),
    452 	        ITEM(SAMPLER_TEXTURE),
    453 	        ITEM(SHADER_RW_IMAGE),
    454 	        ITEM(SAMPLER_TEXTURE_MSAA),
    455 	        ITEM(COLOR_BUFFER),
    456 	        ITEM(DEPTH_BUFFER),
    457 	        ITEM(COLOR_BUFFER_MSAA),
    458 	        ITEM(DEPTH_BUFFER_MSAA),
    459 	        ITEM(CMASK),
    460 	        ITEM(DCC),
    461 	        ITEM(HTILE),
    462 		ITEM(SHADER_BINARY),
    463 		ITEM(SHADER_RINGS),
    464 		ITEM(SCRATCH_BUFFER),
    465 	};
    466 #undef ITEM
    467 
    468 	assert(priority < ARRAY_SIZE(table));
    469 	return table[priority];
    470 }
    471 
    472 static int bo_list_compare_va(const struct radeon_bo_list_item *a,
    473 				   const struct radeon_bo_list_item *b)
    474 {
    475 	return a->vm_address < b->vm_address ? -1 :
    476 	       a->vm_address > b->vm_address ? 1 : 0;
    477 }
    478 
    479 static void si_dump_bo_list(struct si_context *sctx,
    480 			    const struct radeon_saved_cs *saved, FILE *f)
    481 {
    482 	unsigned i,j;
    483 
    484 	if (!saved->bo_list)
    485 		return;
    486 
    487 	/* Sort the list according to VM adddresses first. */
    488 	qsort(saved->bo_list, saved->bo_count,
    489 	      sizeof(saved->bo_list[0]), (void*)bo_list_compare_va);
    490 
    491 	fprintf(f, "Buffer list (in units of pages = 4kB):\n"
    492 		COLOR_YELLOW "        Size    VM start page         "
    493 		"VM end page           Usage" COLOR_RESET "\n");
    494 
    495 	for (i = 0; i < saved->bo_count; i++) {
    496 		/* Note: Buffer sizes are expected to be aligned to 4k by the winsys. */
    497 		const unsigned page_size = sctx->b.screen->info.gart_page_size;
    498 		uint64_t va = saved->bo_list[i].vm_address;
    499 		uint64_t size = saved->bo_list[i].bo_size;
    500 		bool hit = false;
    501 
    502 		/* If there's unused virtual memory between 2 buffers, print it. */
    503 		if (i) {
    504 			uint64_t previous_va_end = saved->bo_list[i-1].vm_address +
    505 						   saved->bo_list[i-1].bo_size;
    506 
    507 			if (va > previous_va_end) {
    508 				fprintf(f, "  %10"PRIu64"    -- hole --\n",
    509 					(va - previous_va_end) / page_size);
    510 			}
    511 		}
    512 
    513 		/* Print the buffer. */
    514 		fprintf(f, "  %10"PRIu64"    0x%013"PRIX64"       0x%013"PRIX64"       ",
    515 			size / page_size, va / page_size, (va + size) / page_size);
    516 
    517 		/* Print the usage. */
    518 		for (j = 0; j < 64; j++) {
    519 			if (!(saved->bo_list[i].priority_usage & (1ull << j)))
    520 				continue;
    521 
    522 			fprintf(f, "%s%s", !hit ? "" : ", ", priority_to_string(j));
    523 			hit = true;
    524 		}
    525 		fprintf(f, "\n");
    526 	}
    527 	fprintf(f, "\nNote: The holes represent memory not used by the IB.\n"
    528 		   "      Other buffers can still be allocated there.\n\n");
    529 }
    530 
    531 static void si_dump_framebuffer(struct si_context *sctx, struct u_log_context *log)
    532 {
    533 	struct pipe_framebuffer_state *state = &sctx->framebuffer.state;
    534 	struct r600_texture *rtex;
    535 	int i;
    536 
    537 	for (i = 0; i < state->nr_cbufs; i++) {
    538 		if (!state->cbufs[i])
    539 			continue;
    540 
    541 		rtex = (struct r600_texture*)state->cbufs[i]->texture;
    542 		u_log_printf(log, COLOR_YELLOW "Color buffer %i:" COLOR_RESET "\n", i);
    543 		si_print_texture_info(sctx->b.screen, rtex, log);
    544 		u_log_printf(log, "\n");
    545 	}
    546 
    547 	if (state->zsbuf) {
    548 		rtex = (struct r600_texture*)state->zsbuf->texture;
    549 		u_log_printf(log, COLOR_YELLOW "Depth-stencil buffer:" COLOR_RESET "\n");
    550 		si_print_texture_info(sctx->b.screen, rtex, log);
    551 		u_log_printf(log, "\n");
    552 	}
    553 }
    554 
    555 typedef unsigned (*slot_remap_func)(unsigned);
    556 
    557 struct si_log_chunk_desc_list {
    558 	/** Pointer to memory map of buffer where the list is uploader */
    559 	uint32_t *gpu_list;
    560 	/** Reference of buffer where the list is uploaded, so that gpu_list
    561 	 * is kept live. */
    562 	struct r600_resource *buf;
    563 
    564 	const char *shader_name;
    565 	const char *elem_name;
    566 	slot_remap_func slot_remap;
    567 	enum chip_class chip_class;
    568 	unsigned element_dw_size;
    569 	unsigned num_elements;
    570 
    571 	uint32_t list[0];
    572 };
    573 
    574 static void
    575 si_log_chunk_desc_list_destroy(void *data)
    576 {
    577 	struct si_log_chunk_desc_list *chunk = data;
    578 	r600_resource_reference(&chunk->buf, NULL);
    579 	FREE(chunk);
    580 }
    581 
    582 static void
    583 si_log_chunk_desc_list_print(void *data, FILE *f)
    584 {
    585 	struct si_log_chunk_desc_list *chunk = data;
    586 
    587 	for (unsigned i = 0; i < chunk->num_elements; i++) {
    588 		unsigned cpu_dw_offset = i * chunk->element_dw_size;
    589 		unsigned gpu_dw_offset = chunk->slot_remap(i) * chunk->element_dw_size;
    590 		const char *list_note = chunk->gpu_list ? "GPU list" : "CPU list";
    591 		uint32_t *cpu_list = chunk->list + cpu_dw_offset;
    592 		uint32_t *gpu_list = chunk->gpu_list ? chunk->gpu_list + gpu_dw_offset : cpu_list;
    593 
    594 		fprintf(f, COLOR_GREEN "%s%s slot %u (%s):" COLOR_RESET "\n",
    595 			chunk->shader_name, chunk->elem_name, i, list_note);
    596 
    597 		switch (chunk->element_dw_size) {
    598 		case 4:
    599 			for (unsigned j = 0; j < 4; j++)
    600 				ac_dump_reg(f, chunk->chip_class,
    601 					    R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
    602 					    gpu_list[j], 0xffffffff);
    603 			break;
    604 		case 8:
    605 			for (unsigned j = 0; j < 8; j++)
    606 				ac_dump_reg(f, chunk->chip_class,
    607 					    R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
    608 					    gpu_list[j], 0xffffffff);
    609 
    610 			fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
    611 			for (unsigned j = 0; j < 4; j++)
    612 				ac_dump_reg(f, chunk->chip_class,
    613 					    R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
    614 					    gpu_list[4+j], 0xffffffff);
    615 			break;
    616 		case 16:
    617 			for (unsigned j = 0; j < 8; j++)
    618 				ac_dump_reg(f, chunk->chip_class,
    619 					    R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
    620 					    gpu_list[j], 0xffffffff);
    621 
    622 			fprintf(f, COLOR_CYAN "    Buffer:" COLOR_RESET "\n");
    623 			for (unsigned j = 0; j < 4; j++)
    624 				ac_dump_reg(f, chunk->chip_class,
    625 					    R_008F00_SQ_BUF_RSRC_WORD0 + j*4,
    626 					    gpu_list[4+j], 0xffffffff);
    627 
    628 			fprintf(f, COLOR_CYAN "    FMASK:" COLOR_RESET "\n");
    629 			for (unsigned j = 0; j < 8; j++)
    630 				ac_dump_reg(f, chunk->chip_class,
    631 					    R_008F10_SQ_IMG_RSRC_WORD0 + j*4,
    632 					    gpu_list[8+j], 0xffffffff);
    633 
    634 			fprintf(f, COLOR_CYAN "    Sampler state:" COLOR_RESET "\n");
    635 			for (unsigned j = 0; j < 4; j++)
    636 				ac_dump_reg(f, chunk->chip_class,
    637 					    R_008F30_SQ_IMG_SAMP_WORD0 + j*4,
    638 					    gpu_list[12+j], 0xffffffff);
    639 			break;
    640 		}
    641 
    642 		if (memcmp(gpu_list, cpu_list, chunk->element_dw_size * 4) != 0) {
    643 			fprintf(f, COLOR_RED "!!!!! This slot was corrupted in GPU memory !!!!!"
    644 				COLOR_RESET "\n");
    645 		}
    646 
    647 		fprintf(f, "\n");
    648 	}
    649 
    650 }
    651 
    652 static const struct u_log_chunk_type si_log_chunk_type_descriptor_list = {
    653 	.destroy = si_log_chunk_desc_list_destroy,
    654 	.print = si_log_chunk_desc_list_print,
    655 };
    656 
    657 static void si_dump_descriptor_list(struct si_screen *screen,
    658 				    struct si_descriptors *desc,
    659 				    const char *shader_name,
    660 				    const char *elem_name,
    661 				    unsigned element_dw_size,
    662 				    unsigned num_elements,
    663 				    slot_remap_func slot_remap,
    664 				    struct u_log_context *log)
    665 {
    666 	if (!desc->list)
    667 		return;
    668 
    669 	/* In some cases, the caller doesn't know how many elements are really
    670 	 * uploaded. Reduce num_elements to fit in the range of active slots. */
    671 	unsigned active_range_dw_begin =
    672 		desc->first_active_slot * desc->element_dw_size;
    673 	unsigned active_range_dw_end =
    674 		active_range_dw_begin + desc->num_active_slots * desc->element_dw_size;
    675 
    676 	while (num_elements > 0) {
    677 		int i = slot_remap(num_elements - 1);
    678 		unsigned dw_begin = i * element_dw_size;
    679 		unsigned dw_end = dw_begin + element_dw_size;
    680 
    681 		if (dw_begin >= active_range_dw_begin && dw_end <= active_range_dw_end)
    682 			break;
    683 
    684 		num_elements--;
    685 	}
    686 
    687 	struct si_log_chunk_desc_list *chunk =
    688 		CALLOC_VARIANT_LENGTH_STRUCT(si_log_chunk_desc_list,
    689 					     4 * element_dw_size * num_elements);
    690 	chunk->shader_name = shader_name;
    691 	chunk->elem_name = elem_name;
    692 	chunk->element_dw_size = element_dw_size;
    693 	chunk->num_elements = num_elements;
    694 	chunk->slot_remap = slot_remap;
    695 	chunk->chip_class = screen->info.chip_class;
    696 
    697 	r600_resource_reference(&chunk->buf, desc->buffer);
    698 	chunk->gpu_list = desc->gpu_list;
    699 
    700 	for (unsigned i = 0; i < num_elements; ++i) {
    701 		memcpy(&chunk->list[i * element_dw_size],
    702 		       &desc->list[slot_remap(i) * element_dw_size],
    703 		       4 * element_dw_size);
    704 	}
    705 
    706 	u_log_chunk(log, &si_log_chunk_type_descriptor_list, chunk);
    707 }
    708 
    709 static unsigned si_identity(unsigned slot)
    710 {
    711 	return slot;
    712 }
    713 
    714 static void si_dump_descriptors(struct si_context *sctx,
    715 				enum pipe_shader_type processor,
    716 				const struct tgsi_shader_info *info,
    717 				struct u_log_context *log)
    718 {
    719 	struct si_descriptors *descs =
    720 		&sctx->descriptors[SI_DESCS_FIRST_SHADER +
    721 				   processor * SI_NUM_SHADER_DESCS];
    722 	static const char *shader_name[] = {"VS", "PS", "GS", "TCS", "TES", "CS"};
    723 	const char *name = shader_name[processor];
    724 	unsigned enabled_constbuf, enabled_shaderbuf, enabled_samplers;
    725 	unsigned enabled_images;
    726 
    727 	if (info) {
    728 		enabled_constbuf = info->const_buffers_declared;
    729 		enabled_shaderbuf = info->shader_buffers_declared;
    730 		enabled_samplers = info->samplers_declared;
    731 		enabled_images = info->images_declared;
    732 	} else {
    733 		enabled_constbuf = sctx->const_and_shader_buffers[processor].enabled_mask >>
    734 				   SI_NUM_SHADER_BUFFERS;
    735 		enabled_shaderbuf = sctx->const_and_shader_buffers[processor].enabled_mask &
    736 				    u_bit_consecutive(0, SI_NUM_SHADER_BUFFERS);
    737 		enabled_shaderbuf = util_bitreverse(enabled_shaderbuf) >>
    738 				    (32 - SI_NUM_SHADER_BUFFERS);
    739 		enabled_samplers = sctx->samplers[processor].enabled_mask;
    740 		enabled_images = sctx->images[processor].enabled_mask;
    741 	}
    742 
    743 	if (processor == PIPE_SHADER_VERTEX) {
    744 		assert(info); /* only CS may not have an info struct */
    745 
    746 		si_dump_descriptor_list(sctx->screen, &sctx->vertex_buffers, name,
    747 					" - Vertex buffer", 4, info->num_inputs,
    748 					si_identity, log);
    749 	}
    750 
    751 	si_dump_descriptor_list(sctx->screen,
    752 				&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
    753 				name, " - Constant buffer", 4,
    754 				util_last_bit(enabled_constbuf),
    755 				si_get_constbuf_slot, log);
    756 	si_dump_descriptor_list(sctx->screen,
    757 				&descs[SI_SHADER_DESCS_CONST_AND_SHADER_BUFFERS],
    758 				name, " - Shader buffer", 4,
    759 				util_last_bit(enabled_shaderbuf),
    760 				si_get_shaderbuf_slot, log);
    761 	si_dump_descriptor_list(sctx->screen,
    762 				&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
    763 				name, " - Sampler", 16,
    764 				util_last_bit(enabled_samplers),
    765 				si_get_sampler_slot, log);
    766 	si_dump_descriptor_list(sctx->screen,
    767 				&descs[SI_SHADER_DESCS_SAMPLERS_AND_IMAGES],
    768 				name, " - Image", 8,
    769 				util_last_bit(enabled_images),
    770 				si_get_image_slot, log);
    771 }
    772 
    773 static void si_dump_gfx_descriptors(struct si_context *sctx,
    774 				    const struct si_shader_ctx_state *state,
    775 				    struct u_log_context *log)
    776 {
    777 	if (!state->cso || !state->current)
    778 		return;
    779 
    780 	si_dump_descriptors(sctx, state->cso->type, &state->cso->info, log);
    781 }
    782 
    783 static void si_dump_compute_descriptors(struct si_context *sctx,
    784 					struct u_log_context *log)
    785 {
    786 	if (!sctx->cs_shader_state.program)
    787 		return;
    788 
    789 	si_dump_descriptors(sctx, PIPE_SHADER_COMPUTE, NULL, log);
    790 }
    791 
    792 struct si_shader_inst {
    793 	char text[160];  /* one disasm line */
    794 	unsigned offset; /* instruction offset */
    795 	unsigned size;   /* instruction size = 4 or 8 */
    796 };
    797 
    798 /* Split a disassembly string into lines and add them to the array pointed
    799  * to by "instructions". */
    800 static void si_add_split_disasm(const char *disasm,
    801 				uint64_t start_addr,
    802 				unsigned *num,
    803 				struct si_shader_inst *instructions)
    804 {
    805 	struct si_shader_inst *last_inst = *num ? &instructions[*num - 1] : NULL;
    806 	char *next;
    807 
    808 	while ((next = strchr(disasm, '\n'))) {
    809 		struct si_shader_inst *inst = &instructions[*num];
    810 		unsigned len = next - disasm;
    811 
    812 		assert(len < ARRAY_SIZE(inst->text));
    813 		memcpy(inst->text, disasm, len);
    814 		inst->text[len] = 0;
    815 		inst->offset = last_inst ? last_inst->offset + last_inst->size : 0;
    816 
    817 		const char *semicolon = strchr(disasm, ';');
    818 		assert(semicolon);
    819 		/* More than 16 chars after ";" means the instruction is 8 bytes long. */
    820 		inst->size = next - semicolon > 16 ? 8 : 4;
    821 
    822 		snprintf(inst->text + len, ARRAY_SIZE(inst->text) - len,
    823 			" [PC=0x%"PRIx64", off=%u, size=%u]",
    824 			start_addr + inst->offset, inst->offset, inst->size);
    825 
    826 		last_inst = inst;
    827 		(*num)++;
    828 		disasm = next + 1;
    829 	}
    830 }
    831 
    832 /* If the shader is being executed, print its asm instructions, and annotate
    833  * those that are being executed right now with information about waves that
    834  * execute them. This is most useful during a GPU hang.
    835  */
    836 static void si_print_annotated_shader(struct si_shader *shader,
    837 				      struct ac_wave_info *waves,
    838 				      unsigned num_waves,
    839 				      FILE *f)
    840 {
    841 	if (!shader || !shader->binary.disasm_string)
    842 		return;
    843 
    844 	uint64_t start_addr = shader->bo->gpu_address;
    845 	uint64_t end_addr = start_addr + shader->bo->b.b.width0;
    846 	unsigned i;
    847 
    848 	/* See if any wave executes the shader. */
    849 	for (i = 0; i < num_waves; i++) {
    850 		if (start_addr <= waves[i].pc && waves[i].pc <= end_addr)
    851 			break;
    852 	}
    853 	if (i == num_waves)
    854 		return; /* the shader is not being executed */
    855 
    856 	/* Remember the first found wave. The waves are sorted according to PC. */
    857 	waves = &waves[i];
    858 	num_waves -= i;
    859 
    860 	/* Get the list of instructions.
    861 	 * Buffer size / 4 is the upper bound of the instruction count.
    862 	 */
    863 	unsigned num_inst = 0;
    864 	struct si_shader_inst *instructions =
    865 		calloc(shader->bo->b.b.width0 / 4, sizeof(struct si_shader_inst));
    866 
    867 	if (shader->prolog) {
    868 		si_add_split_disasm(shader->prolog->binary.disasm_string,
    869 				    start_addr, &num_inst, instructions);
    870 	}
    871 	if (shader->previous_stage) {
    872 		si_add_split_disasm(shader->previous_stage->binary.disasm_string,
    873 				    start_addr, &num_inst, instructions);
    874 	}
    875 	if (shader->prolog2) {
    876 		si_add_split_disasm(shader->prolog2->binary.disasm_string,
    877 				    start_addr, &num_inst, instructions);
    878 	}
    879 	si_add_split_disasm(shader->binary.disasm_string,
    880 			    start_addr, &num_inst, instructions);
    881 	if (shader->epilog) {
    882 		si_add_split_disasm(shader->epilog->binary.disasm_string,
    883 				    start_addr, &num_inst, instructions);
    884 	}
    885 
    886 	fprintf(f, COLOR_YELLOW "%s - annotated disassembly:" COLOR_RESET "\n",
    887 		si_get_shader_name(shader, shader->selector->type));
    888 
    889 	/* Print instructions with annotations. */
    890 	for (i = 0; i < num_inst; i++) {
    891 		struct si_shader_inst *inst = &instructions[i];
    892 
    893 		fprintf(f, "%s\n", inst->text);
    894 
    895 		/* Print which waves execute the instruction right now. */
    896 		while (num_waves && start_addr + inst->offset == waves->pc) {
    897 			fprintf(f,
    898 				"          " COLOR_GREEN "^ SE%u SH%u CU%u "
    899 				"SIMD%u WAVE%u  EXEC=%016"PRIx64 "  ",
    900 				waves->se, waves->sh, waves->cu, waves->simd,
    901 				waves->wave, waves->exec);
    902 
    903 			if (inst->size == 4) {
    904 				fprintf(f, "INST32=%08X" COLOR_RESET "\n",
    905 					waves->inst_dw0);
    906 			} else {
    907 				fprintf(f, "INST64=%08X %08X" COLOR_RESET "\n",
    908 					waves->inst_dw0, waves->inst_dw1);
    909 			}
    910 
    911 			waves->matched = true;
    912 			waves = &waves[1];
    913 			num_waves--;
    914 		}
    915 	}
    916 
    917 	fprintf(f, "\n\n");
    918 	free(instructions);
    919 }
    920 
    921 static void si_dump_annotated_shaders(struct si_context *sctx, FILE *f)
    922 {
    923 	struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP];
    924 	unsigned num_waves = ac_get_wave_info(waves);
    925 
    926 	fprintf(f, COLOR_CYAN "The number of active waves = %u" COLOR_RESET
    927 		"\n\n", num_waves);
    928 
    929 	si_print_annotated_shader(sctx->vs_shader.current, waves, num_waves, f);
    930 	si_print_annotated_shader(sctx->tcs_shader.current, waves, num_waves, f);
    931 	si_print_annotated_shader(sctx->tes_shader.current, waves, num_waves, f);
    932 	si_print_annotated_shader(sctx->gs_shader.current, waves, num_waves, f);
    933 	si_print_annotated_shader(sctx->ps_shader.current, waves, num_waves, f);
    934 
    935 	/* Print waves executing shaders that are not currently bound. */
    936 	unsigned i;
    937 	bool found = false;
    938 	for (i = 0; i < num_waves; i++) {
    939 		if (waves[i].matched)
    940 			continue;
    941 
    942 		if (!found) {
    943 			fprintf(f, COLOR_CYAN
    944 				"Waves not executing currently-bound shaders:"
    945 				COLOR_RESET "\n");
    946 			found = true;
    947 		}
    948 		fprintf(f, "    SE%u SH%u CU%u SIMD%u WAVE%u  EXEC=%016"PRIx64
    949 			"  INST=%08X %08X  PC=%"PRIx64"\n",
    950 			waves[i].se, waves[i].sh, waves[i].cu, waves[i].simd,
    951 			waves[i].wave, waves[i].exec, waves[i].inst_dw0,
    952 			waves[i].inst_dw1, waves[i].pc);
    953 	}
    954 	if (found)
    955 		fprintf(f, "\n\n");
    956 }
    957 
    958 static void si_dump_command(const char *title, const char *command, FILE *f)
    959 {
    960 	char line[2000];
    961 
    962 	FILE *p = popen(command, "r");
    963 	if (!p)
    964 		return;
    965 
    966 	fprintf(f, COLOR_YELLOW "%s: " COLOR_RESET "\n", title);
    967 	while (fgets(line, sizeof(line), p))
    968 		fputs(line, f);
    969 	fprintf(f, "\n\n");
    970 	pclose(p);
    971 }
    972 
    973 static void si_dump_debug_state(struct pipe_context *ctx, FILE *f,
    974 				unsigned flags)
    975 {
    976 	struct si_context *sctx = (struct si_context*)ctx;
    977 
    978 	if (sctx->b.log)
    979 		u_log_flush(sctx->b.log);
    980 
    981 	if (flags & PIPE_DUMP_DEVICE_STATUS_REGISTERS) {
    982 		si_dump_debug_registers(sctx, f);
    983 
    984 		si_dump_annotated_shaders(sctx, f);
    985 		si_dump_command("Active waves (raw data)", "umr -wa | column -t", f);
    986 		si_dump_command("Wave information", "umr -O bits -wa", f);
    987 	}
    988 }
    989 
    990 void si_log_draw_state(struct si_context *sctx, struct u_log_context *log)
    991 {
    992 	if (!log)
    993 		return;
    994 
    995 	si_dump_framebuffer(sctx, log);
    996 
    997 	si_dump_gfx_shader(sctx, &sctx->vs_shader, log);
    998 	si_dump_gfx_shader(sctx, &sctx->tcs_shader, log);
    999 	si_dump_gfx_shader(sctx, &sctx->tes_shader, log);
   1000 	si_dump_gfx_shader(sctx, &sctx->gs_shader, log);
   1001 	si_dump_gfx_shader(sctx, &sctx->ps_shader, log);
   1002 
   1003 	si_dump_descriptor_list(sctx->screen,
   1004 				&sctx->descriptors[SI_DESCS_RW_BUFFERS],
   1005 				"", "RW buffers", 4, SI_NUM_RW_BUFFERS,
   1006 				si_identity, log);
   1007 	si_dump_gfx_descriptors(sctx, &sctx->vs_shader, log);
   1008 	si_dump_gfx_descriptors(sctx, &sctx->tcs_shader, log);
   1009 	si_dump_gfx_descriptors(sctx, &sctx->tes_shader, log);
   1010 	si_dump_gfx_descriptors(sctx, &sctx->gs_shader, log);
   1011 	si_dump_gfx_descriptors(sctx, &sctx->ps_shader, log);
   1012 }
   1013 
   1014 void si_log_compute_state(struct si_context *sctx, struct u_log_context *log)
   1015 {
   1016 	if (!log)
   1017 		return;
   1018 
   1019 	si_dump_compute_shader(sctx, log);
   1020 	si_dump_compute_descriptors(sctx, log);
   1021 }
   1022 
   1023 static void si_dump_dma(struct si_context *sctx,
   1024 			struct radeon_saved_cs *saved, FILE *f)
   1025 {
   1026 	static const char ib_name[] = "sDMA IB";
   1027 	unsigned i;
   1028 
   1029 	si_dump_bo_list(sctx, saved, f);
   1030 
   1031 	fprintf(f, "------------------ %s begin ------------------\n", ib_name);
   1032 
   1033 	for (i = 0; i < saved->num_dw; ++i) {
   1034 		fprintf(f, " %08x\n", saved->ib[i]);
   1035 	}
   1036 
   1037 	fprintf(f, "------------------- %s end -------------------\n", ib_name);
   1038 	fprintf(f, "\n");
   1039 
   1040 	fprintf(f, "SDMA Dump Done.\n");
   1041 }
   1042 
   1043 void si_check_vm_faults(struct r600_common_context *ctx,
   1044 			struct radeon_saved_cs *saved, enum ring_type ring)
   1045 {
   1046 	struct si_context *sctx = (struct si_context *)ctx;
   1047 	struct pipe_screen *screen = sctx->b.b.screen;
   1048 	FILE *f;
   1049 	uint64_t addr;
   1050 	char cmd_line[4096];
   1051 
   1052 	if (!ac_vm_fault_occured(sctx->b.chip_class,
   1053 				 &sctx->dmesg_timestamp, &addr))
   1054 		return;
   1055 
   1056 	f = dd_get_debug_file(false);
   1057 	if (!f)
   1058 		return;
   1059 
   1060 	fprintf(f, "VM fault report.\n\n");
   1061 	if (os_get_command_line(cmd_line, sizeof(cmd_line)))
   1062 		fprintf(f, "Command: %s\n", cmd_line);
   1063 	fprintf(f, "Driver vendor: %s\n", screen->get_vendor(screen));
   1064 	fprintf(f, "Device vendor: %s\n", screen->get_device_vendor(screen));
   1065 	fprintf(f, "Device name: %s\n\n", screen->get_name(screen));
   1066 	fprintf(f, "Failing VM page: 0x%08"PRIx64"\n\n", addr);
   1067 
   1068 	if (sctx->apitrace_call_number)
   1069 		fprintf(f, "Last apitrace call: %u\n\n",
   1070 			sctx->apitrace_call_number);
   1071 
   1072 	switch (ring) {
   1073 	case RING_GFX: {
   1074 		struct u_log_context log;
   1075 		u_log_context_init(&log);
   1076 
   1077 		si_log_draw_state(sctx, &log);
   1078 		si_log_compute_state(sctx, &log);
   1079 		si_log_cs(sctx, &log, true);
   1080 
   1081 		u_log_new_page_print(&log, f);
   1082 		u_log_context_destroy(&log);
   1083 		break;
   1084 	}
   1085 	case RING_DMA:
   1086 		si_dump_dma(sctx, saved, f);
   1087 		break;
   1088 
   1089 	default:
   1090 		break;
   1091 	}
   1092 
   1093 	fclose(f);
   1094 
   1095 	fprintf(stderr, "Detected a VM fault, exiting...\n");
   1096 	exit(0);
   1097 }
   1098 
   1099 void si_init_debug_functions(struct si_context *sctx)
   1100 {
   1101 	sctx->b.b.dump_debug_state = si_dump_debug_state;
   1102 	sctx->b.check_vm_faults = si_check_vm_faults;
   1103 
   1104 	/* Set the initial dmesg timestamp for this context, so that
   1105 	 * only new messages will be checked for VM faults.
   1106 	 */
   1107 	if (sctx->screen->debug_flags & DBG(CHECK_VM))
   1108 		ac_vm_fault_occured(sctx->b.chip_class,
   1109 				    &sctx->dmesg_timestamp, NULL);
   1110 }
   1111