Home | History | Annotate | Download | only in common
      1 /*
      2  * Copyright 2015 Advanced Micro Devices, Inc.
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * on the rights to use, copy, modify, merge, publish, distribute, sub
      8  * license, and/or sell copies of the Software, and to permit persons to whom
      9  * the Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL
     18  * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM,
     19  * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR
     20  * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE
     21  * USE OR OTHER DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "ac_debug.h"
     25 
     26 #ifdef HAVE_VALGRIND
     27 #include <valgrind.h>
     28 #include <memcheck.h>
     29 #define VG(x) x
     30 #else
     31 #define VG(x)
     32 #endif
     33 
     34 #include <inttypes.h>
     35 
     36 #include "sid.h"
     37 #include "gfx9d.h"
     38 #include "sid_tables.h"
     39 #include "util/u_math.h"
     40 #include "util/u_memory.h"
     41 #include "util/u_string.h"
     42 
     43 #include <assert.h>
     44 
     45 /* Parsed IBs are difficult to read without colors. Use "less -R file" to
     46  * read them, or use "aha -b -f file" to convert them to html.
     47  */
     48 #define COLOR_RESET	"\033[0m"
     49 #define COLOR_RED	"\033[31m"
     50 #define COLOR_GREEN	"\033[1;32m"
     51 #define COLOR_YELLOW	"\033[1;33m"
     52 #define COLOR_CYAN	"\033[1;36m"
     53 
     54 #define INDENT_PKT 8
     55 
     56 struct ac_ib_parser {
     57 	FILE *f;
     58 	uint32_t *ib;
     59 	unsigned num_dw;
     60 	const int *trace_ids;
     61 	unsigned trace_id_count;
     62 	enum chip_class chip_class;
     63 	ac_debug_addr_callback addr_callback;
     64 	void *addr_callback_data;
     65 
     66 	unsigned cur_dw;
     67 };
     68 
     69 static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib);
     70 
     71 static void print_spaces(FILE *f, unsigned num)
     72 {
     73 	fprintf(f, "%*s", num, "");
     74 }
     75 
     76 static void print_value(FILE *file, uint32_t value, int bits)
     77 {
     78 	/* Guess if it's int or float */
     79 	if (value <= (1 << 15)) {
     80 		if (value <= 9)
     81 			fprintf(file, "%u\n", value);
     82 		else
     83 			fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value);
     84 	} else {
     85 		float f = uif(value);
     86 
     87 		if (fabs(f) < 100000 && f*10 == floor(f*10))
     88 			fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value);
     89 		else
     90 			/* Don't print more leading zeros than there are bits. */
     91 			fprintf(file, "0x%0*x\n", bits / 4, value);
     92 	}
     93 }
     94 
     95 static void print_named_value(FILE *file, const char *name, uint32_t value,
     96 			      int bits)
     97 {
     98 	print_spaces(file, INDENT_PKT);
     99 	fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name);
    100 	print_value(file, value, bits);
    101 }
    102 
    103 static const struct si_reg *find_register(const struct si_reg *table,
    104 					  unsigned table_size,
    105 					  unsigned offset)
    106 {
    107 	for (unsigned i = 0; i < table_size; i++) {
    108 		const struct si_reg *reg = &table[i];
    109 
    110 		if (reg->offset == offset)
    111 			return reg;
    112 	}
    113 
    114 	return NULL;
    115 }
    116 
    117 void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset,
    118 		 uint32_t value, uint32_t field_mask)
    119 {
    120 	const struct si_reg *reg = NULL;
    121 
    122 	if (chip_class >= GFX9)
    123 		reg = find_register(gfx9d_reg_table, ARRAY_SIZE(gfx9d_reg_table), offset);
    124 	if (!reg)
    125 		reg = find_register(sid_reg_table, ARRAY_SIZE(sid_reg_table), offset);
    126 
    127 	if (reg) {
    128 		const char *reg_name = sid_strings + reg->name_offset;
    129 		bool first_field = true;
    130 
    131 		print_spaces(file, INDENT_PKT);
    132 		fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ",
    133 			reg_name);
    134 
    135 		if (!reg->num_fields) {
    136 			print_value(file, value, 32);
    137 			return;
    138 		}
    139 
    140 		for (unsigned f = 0; f < reg->num_fields; f++) {
    141 			const struct si_field *field = sid_fields_table + reg->fields_offset + f;
    142 			const int *values_offsets = sid_strings_offsets + field->values_offset;
    143 			uint32_t val = (value & field->mask) >>
    144 				       (ffs(field->mask) - 1);
    145 
    146 			if (!(field->mask & field_mask))
    147 				continue;
    148 
    149 			/* Indent the field. */
    150 			if (!first_field)
    151 				print_spaces(file,
    152 					     INDENT_PKT + strlen(reg_name) + 4);
    153 
    154 			/* Print the field. */
    155 			fprintf(file, "%s = ", sid_strings + field->name_offset);
    156 
    157 			if (val < field->num_values && values_offsets[val] >= 0)
    158 				fprintf(file, "%s\n", sid_strings + values_offsets[val]);
    159 			else
    160 				print_value(file, val,
    161 					    util_bitcount(field->mask));
    162 
    163 			first_field = false;
    164 		}
    165 		return;
    166 	}
    167 
    168 	print_spaces(file, INDENT_PKT);
    169 	fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value);
    170 }
    171 
    172 static uint32_t ac_ib_get(struct ac_ib_parser *ib)
    173 {
    174 	uint32_t v = 0;
    175 
    176 	if (ib->cur_dw < ib->num_dw) {
    177 		v = ib->ib[ib->cur_dw];
    178 #ifdef HAVE_VALGRIND
    179 		/* Help figure out where garbage data is written to IBs.
    180 		 *
    181 		 * Arguably we should do this already when the IBs are written,
    182 		 * see RADEON_VALGRIND. The problem is that client-requests to
    183 		 * Valgrind have an overhead even when Valgrind isn't running,
    184 		 * and radeon_emit is performance sensitive...
    185 		 */
    186 		if (VALGRIND_CHECK_VALUE_IS_DEFINED(v))
    187 			fprintf(ib->f, COLOR_RED "Valgrind: The next DWORD is garbage"
    188 				COLOR_RESET "\n");
    189 #endif
    190 		fprintf(ib->f, "\n\035#%08x ", v);
    191 	} else {
    192 		fprintf(ib->f, "\n\035#???????? ");
    193 	}
    194 
    195 	ib->cur_dw++;
    196 	return v;
    197 }
    198 
    199 static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset,
    200 				    struct ac_ib_parser *ib)
    201 {
    202 	unsigned reg_dw = ac_ib_get(ib);
    203 	unsigned reg = ((reg_dw & 0xFFFF) << 2) + reg_offset;
    204 	unsigned index = reg_dw >> 28;
    205 	int i;
    206 
    207 	if (index != 0) {
    208 		print_spaces(f, INDENT_PKT);
    209 		fprintf(f, "INDEX = %u\n", index);
    210 	}
    211 
    212 	for (i = 0; i < count; i++)
    213 		ac_dump_reg(f, ib->chip_class, reg + i*4, ac_ib_get(ib), ~0);
    214 }
    215 
    216 static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib,
    217                              int *current_trace_id)
    218 {
    219 	unsigned first_dw = ib->cur_dw;
    220 	int count = PKT_COUNT_G(header);
    221 	unsigned op = PKT3_IT_OPCODE_G(header);
    222 	const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : "";
    223 	int i;
    224 
    225 	/* Print the name first. */
    226 	for (i = 0; i < ARRAY_SIZE(packet3_table); i++)
    227 		if (packet3_table[i].op == op)
    228 			break;
    229 
    230 	if (i < ARRAY_SIZE(packet3_table)) {
    231 		const char *name = sid_strings + packet3_table[i].name_offset;
    232 
    233 		if (op == PKT3_SET_CONTEXT_REG ||
    234 		    op == PKT3_SET_CONFIG_REG ||
    235 		    op == PKT3_SET_UCONFIG_REG ||
    236 		    op == PKT3_SET_SH_REG)
    237 			fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n",
    238 				name, predicate);
    239 		else
    240 			fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n",
    241 				name, predicate);
    242 	} else
    243 		fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n",
    244 			op, predicate);
    245 
    246 	/* Print the contents. */
    247 	switch (op) {
    248 	case PKT3_SET_CONTEXT_REG:
    249 		ac_parse_set_reg_packet(f, count, SI_CONTEXT_REG_OFFSET, ib);
    250 		break;
    251 	case PKT3_SET_CONFIG_REG:
    252 		ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib);
    253 		break;
    254 	case PKT3_SET_UCONFIG_REG:
    255 		ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib);
    256 		break;
    257 	case PKT3_SET_SH_REG:
    258 		ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib);
    259 		break;
    260 	case PKT3_ACQUIRE_MEM:
    261 		ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
    262 		ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
    263 		ac_dump_reg(f, ib->chip_class, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0);
    264 		ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
    265 		ac_dump_reg(f, ib->chip_class, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0);
    266 		print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
    267 		break;
    268 	case PKT3_SURFACE_SYNC:
    269 		if (ib->chip_class >= CIK) {
    270 			ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
    271 			ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
    272 			ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
    273 		} else {
    274 			ac_dump_reg(f, ib->chip_class, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0);
    275 			ac_dump_reg(f, ib->chip_class, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0);
    276 			ac_dump_reg(f, ib->chip_class, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0);
    277 		}
    278 		print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
    279 		break;
    280 	case PKT3_EVENT_WRITE: {
    281 		uint32_t event_dw = ac_ib_get(ib);
    282 		ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
    283 			    S_028A90_EVENT_TYPE(~0));
    284 		print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
    285 		print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1);
    286 		if (count > 0) {
    287 			print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
    288 			print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 16);
    289 		}
    290 		break;
    291 	}
    292 	case PKT3_EVENT_WRITE_EOP: {
    293 		uint32_t event_dw = ac_ib_get(ib);
    294 		ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
    295 			    S_028A90_EVENT_TYPE(~0));
    296 		print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
    297 		print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
    298 		print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
    299 		print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
    300 		print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
    301 		print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
    302 		print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
    303 		uint32_t addr_hi_dw = ac_ib_get(ib);
    304 		print_named_value(f, "ADDRESS_HI", addr_hi_dw, 16);
    305 		print_named_value(f, "DST_SEL", (addr_hi_dw >> 16) & 0x3, 2);
    306 		print_named_value(f, "INT_SEL", (addr_hi_dw >> 24) & 0x7, 3);
    307 		print_named_value(f, "DATA_SEL", addr_hi_dw >> 29, 3);
    308 		print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
    309 		print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
    310 		break;
    311 	}
    312 	case PKT3_RELEASE_MEM: {
    313 		uint32_t event_dw = ac_ib_get(ib);
    314 		ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw,
    315 			    S_028A90_EVENT_TYPE(~0));
    316 		print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4);
    317 		print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1);
    318 		print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1);
    319 		print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1);
    320 		print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1);
    321 		print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1);
    322 		print_named_value(f, "TC_NC_ACTION_ENA", (event_dw >> 19) & 0x1, 1);
    323 		print_named_value(f, "TC_WC_ACTION_ENA", (event_dw >> 20) & 0x1, 1);
    324 		print_named_value(f, "TC_MD_ACTION_ENA", (event_dw >> 21) & 0x1, 1);
    325 		uint32_t sel_dw = ac_ib_get(ib);
    326 		print_named_value(f, "DST_SEL", (sel_dw >> 16) & 0x3, 2);
    327 		print_named_value(f, "INT_SEL", (sel_dw >> 24) & 0x7, 3);
    328 		print_named_value(f, "DATA_SEL", sel_dw >> 29, 3);
    329 		print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
    330 		print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
    331 		print_named_value(f, "DATA_LO", ac_ib_get(ib), 32);
    332 		print_named_value(f, "DATA_HI", ac_ib_get(ib), 32);
    333 		print_named_value(f, "CTXID", ac_ib_get(ib), 32);
    334 		break;
    335 	}
    336 	case PKT3_WAIT_REG_MEM:
    337 		print_named_value(f, "OP", ac_ib_get(ib), 32);
    338 		print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32);
    339 		print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32);
    340 		print_named_value(f, "REF", ac_ib_get(ib), 32);
    341 		print_named_value(f, "MASK", ac_ib_get(ib), 32);
    342 		print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16);
    343 		break;
    344 	case PKT3_DRAW_INDEX_AUTO:
    345 		ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
    346 		ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
    347 		break;
    348 	case PKT3_DRAW_INDEX_2:
    349 		ac_dump_reg(f, ib->chip_class, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0);
    350 		ac_dump_reg(f, ib->chip_class, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0);
    351 		ac_dump_reg(f, ib->chip_class, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0);
    352 		ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0);
    353 		ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0);
    354 		break;
    355 	case PKT3_INDEX_TYPE:
    356 		ac_dump_reg(f, ib->chip_class, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0);
    357 		break;
    358 	case PKT3_NUM_INSTANCES:
    359 		ac_dump_reg(f, ib->chip_class, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0);
    360 		break;
    361 	case PKT3_WRITE_DATA:
    362 		ac_dump_reg(f, ib->chip_class, R_370_CONTROL, ac_ib_get(ib), ~0);
    363 		ac_dump_reg(f, ib->chip_class, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0);
    364 		ac_dump_reg(f, ib->chip_class, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0);
    365 		/* The payload is written automatically */
    366 		break;
    367 	case PKT3_CP_DMA:
    368 		ac_dump_reg(f, ib->chip_class, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0);
    369 		ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0);
    370 		ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0);
    371 		ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0);
    372 		ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
    373 		break;
    374 	case PKT3_DMA_DATA:
    375 		ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0);
    376 		ac_dump_reg(f, ib->chip_class, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0);
    377 		ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0);
    378 		ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0);
    379 		ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0);
    380 		ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0);
    381 		break;
    382 	case PKT3_INDIRECT_BUFFER_SI:
    383 	case PKT3_INDIRECT_BUFFER_CONST:
    384 	case PKT3_INDIRECT_BUFFER_CIK: {
    385 		uint32_t base_lo_dw = ac_ib_get(ib);
    386 		ac_dump_reg(f, ib->chip_class, R_3F0_IB_BASE_LO, base_lo_dw, ~0);
    387 		uint32_t base_hi_dw = ac_ib_get(ib);
    388 		ac_dump_reg(f, ib->chip_class, R_3F1_IB_BASE_HI, base_hi_dw, ~0);
    389 		uint32_t control_dw = ac_ib_get(ib);
    390 		ac_dump_reg(f, ib->chip_class, R_3F2_CONTROL, control_dw, ~0);
    391 
    392 		if (!ib->addr_callback)
    393 			break;
    394 
    395 		uint64_t addr = ((uint64_t)base_hi_dw << 32) | base_lo_dw;
    396 		void *data = ib->addr_callback(ib->addr_callback_data, addr);
    397 		if (!data)
    398 			break;
    399 
    400 		if (G_3F2_CHAIN(control_dw)) {
    401 			ib->ib = data;
    402 			ib->num_dw = G_3F2_IB_SIZE(control_dw);
    403 			ib->cur_dw = 0;
    404 			return;
    405 		}
    406 
    407 		struct ac_ib_parser ib_recurse;
    408 		memcpy(&ib_recurse, ib, sizeof(ib_recurse));
    409 		ib_recurse.ib = data;
    410 		ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw);
    411 		ib_recurse.cur_dw = 0;
    412 		if(ib_recurse.trace_id_count) {
    413 			if (*current_trace_id == *ib->trace_ids) {
    414 				++ib_recurse.trace_ids;
    415 				--ib_recurse.trace_id_count;
    416 			} else {
    417 				ib_recurse.trace_id_count = 0;
    418 			}
    419 		}
    420 
    421 		fprintf(f, "\n\035>------------------ nested begin ------------------\n");
    422 		ac_do_parse_ib(f, &ib_recurse);
    423 		fprintf(f, "\n\035<------------------- nested end -------------------\n");
    424 		break;
    425 	}
    426 	case PKT3_CLEAR_STATE:
    427 	case PKT3_INCREMENT_DE_COUNTER:
    428 	case PKT3_PFP_SYNC_ME:
    429 		break;
    430 	case PKT3_NOP:
    431 		if (header == 0xffff1000) {
    432 			count = -1; /* One dword NOP. */
    433 		} else if (count == 0 && ib->cur_dw < ib->num_dw &&
    434 			   AC_IS_TRACE_POINT(ib->ib[ib->cur_dw])) {
    435 			unsigned packet_id = AC_GET_TRACE_POINT_ID(ib->ib[ib->cur_dw]);
    436 
    437 			print_spaces(f, INDENT_PKT);
    438 			fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id);
    439 
    440 			if (!ib->trace_id_count)
    441 				break; /* tracing was disabled */
    442 
    443 			*current_trace_id = packet_id;
    444 
    445 			print_spaces(f, INDENT_PKT);
    446 			if (packet_id < *ib->trace_ids)
    447 				fprintf(f, COLOR_RED
    448 					"This trace point was reached by the CP."
    449 					COLOR_RESET "\n");
    450 			else if (packet_id == *ib->trace_ids)
    451 				fprintf(f, COLOR_RED
    452 					"!!!!! This is the last trace point that "
    453 					"was reached by the CP !!!!!"
    454 					COLOR_RESET "\n");
    455 			else if (packet_id+1 == *ib->trace_ids)
    456 				fprintf(f, COLOR_RED
    457 					"!!!!! This is the first trace point that "
    458 					"was NOT been reached by the CP !!!!!"
    459 					COLOR_RESET "\n");
    460 			else
    461 				fprintf(f, COLOR_RED
    462 					"!!!!! This trace point was NOT reached "
    463 					"by the CP !!!!!"
    464 					COLOR_RESET "\n");
    465 			break;
    466 		}
    467 		break;
    468 	}
    469 
    470 	/* print additional dwords */
    471 	while (ib->cur_dw <= first_dw + count)
    472 		ac_ib_get(ib);
    473 
    474 	if (ib->cur_dw > first_dw + count + 1)
    475 		fprintf(f, COLOR_RED "\n!!!!! count in header too low !!!!!"
    476 			COLOR_RESET "\n");
    477 }
    478 
    479 /**
    480  * Parse and print an IB into a file.
    481  */
    482 static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib)
    483 {
    484 	int current_trace_id = -1;
    485 
    486 	while (ib->cur_dw < ib->num_dw) {
    487 		uint32_t header = ac_ib_get(ib);
    488 		unsigned type = PKT_TYPE_G(header);
    489 
    490 		switch (type) {
    491 		case 3:
    492 			ac_parse_packet3(f, header, ib, &current_trace_id);
    493 			break;
    494 		case 2:
    495 			/* type-2 nop */
    496 			if (header == 0x80000000) {
    497 				fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n");
    498 				break;
    499 			}
    500 			/* fall through */
    501 		default:
    502 			fprintf(f, "Unknown packet type %i\n", type);
    503 			break;
    504 		}
    505 	}
    506 }
    507 
    508 static void format_ib_output(FILE *f, char *out)
    509 {
    510 	unsigned depth = 0;
    511 
    512 	for (;;) {
    513 		char op = 0;
    514 
    515 		if (out[0] == '\n' && out[1] == '\035')
    516 			out++;
    517 		if (out[0] == '\035') {
    518 			op = out[1];
    519 			out += 2;
    520 		}
    521 
    522 		if (op == '<')
    523 			depth--;
    524 
    525 		unsigned indent = 4 * depth;
    526 		if (op != '#')
    527 			indent += 9;
    528 
    529 		if (indent)
    530 			print_spaces(f, indent);
    531 
    532 		char *end = util_strchrnul(out, '\n');
    533 		fwrite(out, end - out, 1, f);
    534 		fputc('\n', f); /* always end with a new line */
    535 		if (!*end)
    536 			break;
    537 
    538 		out = end + 1;
    539 
    540 		if (op == '>')
    541 			depth++;
    542 	}
    543 }
    544 
    545 /**
    546  * Parse and print an IB into a file.
    547  *
    548  * \param f            file
    549  * \param ib_ptr       IB
    550  * \param num_dw       size of the IB
    551  * \param chip_class   chip class
    552  * \param trace_ids	the last trace IDs that are known to have been reached
    553  *			and executed by the CP, typically read from a buffer
    554  * \param trace_id_count The number of entries in the trace_ids array.
    555  * \param addr_callback Get a mapped pointer of the IB at a given address. Can
    556  *                      be NULL.
    557  * \param addr_callback_data user data for addr_callback
    558  */
    559 void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_ids,
    560 		       unsigned trace_id_count, enum chip_class chip_class,
    561                        ac_debug_addr_callback addr_callback, void *addr_callback_data)
    562 {
    563 	struct ac_ib_parser ib = {};
    564 	ib.ib = ib_ptr;
    565 	ib.num_dw = num_dw;
    566 	ib.trace_ids = trace_ids;
    567 	ib.trace_id_count = trace_id_count;
    568 	ib.chip_class = chip_class;
    569 	ib.addr_callback = addr_callback;
    570 	ib.addr_callback_data = addr_callback_data;
    571 
    572 	char *out;
    573 	size_t outsize;
    574 	FILE *memf = open_memstream(&out, &outsize);
    575 	ib.f = memf;
    576 	ac_do_parse_ib(memf, &ib);
    577 	fclose(memf);
    578 
    579 	if (out) {
    580 		format_ib_output(f, out);
    581 		free(out);
    582 	}
    583 
    584 	if (ib.cur_dw > ib.num_dw) {
    585 		printf("\nPacket ends after the end of IB.\n");
    586 		exit(1);
    587 	}
    588 }
    589 
    590 /**
    591  * Parse and print an IB into a file.
    592  *
    593  * \param f		file
    594  * \param ib		IB
    595  * \param num_dw	size of the IB
    596  * \param chip_class	chip class
    597  * \param trace_ids	the last trace IDs that are known to have been reached
    598  *			and executed by the CP, typically read from a buffer
    599  * \param trace_id_count The number of entries in the trace_ids array.
    600  * \param addr_callback Get a mapped pointer of the IB at a given address. Can
    601  *                      be NULL.
    602  * \param addr_callback_data user data for addr_callback
    603  */
    604 void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids,
    605 		 unsigned trace_id_count, const char *name,
    606 		 enum chip_class chip_class, ac_debug_addr_callback addr_callback,
    607 		 void *addr_callback_data)
    608 {
    609 	fprintf(f, "------------------ %s begin ------------------\n", name);
    610 
    611 	ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count,
    612 			  chip_class, addr_callback,  addr_callback_data);
    613 
    614 	fprintf(f, "------------------- %s end -------------------\n\n", name);
    615 }
    616 
    617 /**
    618  * Parse dmesg and return TRUE if a VM fault has been detected.
    619  *
    620  * \param chip_class		chip class
    621  * \param old_dmesg_timestamp	previous dmesg timestamp parsed at init time
    622  * \param out_addr		detected VM fault addr
    623  */
    624 bool ac_vm_fault_occured(enum chip_class chip_class,
    625 			 uint64_t *old_dmesg_timestamp, uint64_t *out_addr)
    626 {
    627 	char line[2000];
    628 	unsigned sec, usec;
    629 	int progress = 0;
    630 	uint64_t dmesg_timestamp = 0;
    631 	bool fault = false;
    632 
    633 	FILE *p = popen("dmesg", "r");
    634 	if (!p)
    635 		return false;
    636 
    637 	while (fgets(line, sizeof(line), p)) {
    638 		char *msg, len;
    639 
    640 		if (!line[0] || line[0] == '\n')
    641 			continue;
    642 
    643 		/* Get the timestamp. */
    644 		if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) {
    645 			static bool hit = false;
    646 			if (!hit) {
    647 				fprintf(stderr, "%s: failed to parse line '%s'\n",
    648 					__func__, line);
    649 				hit = true;
    650 			}
    651 			continue;
    652 		}
    653 		dmesg_timestamp = sec * 1000000ull + usec;
    654 
    655 		/* If just updating the timestamp. */
    656 		if (!out_addr)
    657 			continue;
    658 
    659 		/* Process messages only if the timestamp is newer. */
    660 		if (dmesg_timestamp <= *old_dmesg_timestamp)
    661 			continue;
    662 
    663 		/* Only process the first VM fault. */
    664 		if (fault)
    665 			continue;
    666 
    667 		/* Remove trailing \n */
    668 		len = strlen(line);
    669 		if (len && line[len-1] == '\n')
    670 			line[len-1] = 0;
    671 
    672 		/* Get the message part. */
    673 		msg = strchr(line, ']');
    674 		if (!msg)
    675 			continue;
    676 		msg++;
    677 
    678 		const char *header_line, *addr_line_prefix, *addr_line_format;
    679 
    680 		if (chip_class >= GFX9) {
    681 			/* Match this:
    682 			 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0)
    683 			 * ..:   at page 0x0000000219f8f000 from 27
    684 			 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C
    685 			 */
    686 			header_line = "VMC page fault";
    687 			addr_line_prefix = "   at page";
    688 			addr_line_format = "%"PRIx64;
    689 		} else {
    690 			header_line = "GPU fault detected:";
    691 			addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR";
    692 			addr_line_format = "%"PRIX64;
    693 		}
    694 
    695 		switch (progress) {
    696 		case 0:
    697 			if (strstr(msg, header_line))
    698 				progress = 1;
    699 			break;
    700 		case 1:
    701 			msg = strstr(msg, addr_line_prefix);
    702 			if (msg) {
    703 				msg = strstr(msg, "0x");
    704 				if (msg) {
    705 					msg += 2;
    706 					if (sscanf(msg, addr_line_format, out_addr) == 1)
    707 						fault = true;
    708 				}
    709 			}
    710 			progress = 0;
    711 			break;
    712 		default:
    713 			progress = 0;
    714 		}
    715 	}
    716 	pclose(p);
    717 
    718 	if (dmesg_timestamp > *old_dmesg_timestamp)
    719 		*old_dmesg_timestamp = dmesg_timestamp;
    720 
    721 	return fault;
    722 }
    723 
    724 static int compare_wave(const void *p1, const void *p2)
    725 {
    726 	struct ac_wave_info *w1 = (struct ac_wave_info *)p1;
    727 	struct ac_wave_info *w2 = (struct ac_wave_info *)p2;
    728 
    729 	/* Sort waves according to PC and then SE, SH, CU, etc. */
    730 	if (w1->pc < w2->pc)
    731 		return -1;
    732 	if (w1->pc > w2->pc)
    733 		return 1;
    734 	if (w1->se < w2->se)
    735 		return -1;
    736 	if (w1->se > w2->se)
    737 		return 1;
    738 	if (w1->sh < w2->sh)
    739 		return -1;
    740 	if (w1->sh > w2->sh)
    741 		return 1;
    742 	if (w1->cu < w2->cu)
    743 		return -1;
    744 	if (w1->cu > w2->cu)
    745 		return 1;
    746 	if (w1->simd < w2->simd)
    747 		return -1;
    748 	if (w1->simd > w2->simd)
    749 		return 1;
    750 	if (w1->wave < w2->wave)
    751 		return -1;
    752 	if (w1->wave > w2->wave)
    753 		return 1;
    754 
    755 	return 0;
    756 }
    757 
    758 /* Return wave information. "waves" should be a large enough array. */
    759 unsigned ac_get_wave_info(struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP])
    760 {
    761 	char line[2000];
    762 	unsigned num_waves = 0;
    763 
    764 	FILE *p = popen("umr -wa", "r");
    765 	if (!p)
    766 		return 0;
    767 
    768 	if (!fgets(line, sizeof(line), p) ||
    769 	    strncmp(line, "SE", 2) != 0) {
    770 		pclose(p);
    771 		return 0;
    772 	}
    773 
    774 	while (fgets(line, sizeof(line), p)) {
    775 		struct ac_wave_info *w;
    776 		uint32_t pc_hi, pc_lo, exec_hi, exec_lo;
    777 
    778 		assert(num_waves < AC_MAX_WAVES_PER_CHIP);
    779 		w = &waves[num_waves];
    780 
    781 		if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x",
    782 			   &w->se, &w->sh, &w->cu, &w->simd, &w->wave,
    783 			   &w->status, &pc_hi, &pc_lo, &w->inst_dw0,
    784 			   &w->inst_dw1, &exec_hi, &exec_lo) == 12) {
    785 			w->pc = ((uint64_t)pc_hi << 32) | pc_lo;
    786 			w->exec = ((uint64_t)exec_hi << 32) | exec_lo;
    787 			w->matched = false;
    788 			num_waves++;
    789 		}
    790 	}
    791 
    792 	qsort(waves, num_waves, sizeof(struct ac_wave_info), compare_wave);
    793 
    794 	pclose(p);
    795 	return num_waves;
    796 }
    797