1 /* 2 * Copyright 2015 Advanced Micro Devices, Inc. 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "ac_debug.h" 25 26 #ifdef HAVE_VALGRIND 27 #include <valgrind.h> 28 #include <memcheck.h> 29 #define VG(x) x 30 #else 31 #define VG(x) 32 #endif 33 34 #include <inttypes.h> 35 36 #include "sid.h" 37 #include "gfx9d.h" 38 #include "sid_tables.h" 39 #include "util/u_math.h" 40 #include "util/u_memory.h" 41 #include "util/u_string.h" 42 43 #include <assert.h> 44 45 /* Parsed IBs are difficult to read without colors. Use "less -R file" to 46 * read them, or use "aha -b -f file" to convert them to html. 47 */ 48 #define COLOR_RESET "\033[0m" 49 #define COLOR_RED "\033[31m" 50 #define COLOR_GREEN "\033[1;32m" 51 #define COLOR_YELLOW "\033[1;33m" 52 #define COLOR_CYAN "\033[1;36m" 53 54 #define INDENT_PKT 8 55 56 struct ac_ib_parser { 57 FILE *f; 58 uint32_t *ib; 59 unsigned num_dw; 60 const int *trace_ids; 61 unsigned trace_id_count; 62 enum chip_class chip_class; 63 ac_debug_addr_callback addr_callback; 64 void *addr_callback_data; 65 66 unsigned cur_dw; 67 }; 68 69 static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib); 70 71 static void print_spaces(FILE *f, unsigned num) 72 { 73 fprintf(f, "%*s", num, ""); 74 } 75 76 static void print_value(FILE *file, uint32_t value, int bits) 77 { 78 /* Guess if it's int or float */ 79 if (value <= (1 << 15)) { 80 if (value <= 9) 81 fprintf(file, "%u\n", value); 82 else 83 fprintf(file, "%u (0x%0*x)\n", value, bits / 4, value); 84 } else { 85 float f = uif(value); 86 87 if (fabs(f) < 100000 && f*10 == floor(f*10)) 88 fprintf(file, "%.1ff (0x%0*x)\n", f, bits / 4, value); 89 else 90 /* Don't print more leading zeros than there are bits. */ 91 fprintf(file, "0x%0*x\n", bits / 4, value); 92 } 93 } 94 95 static void print_named_value(FILE *file, const char *name, uint32_t value, 96 int bits) 97 { 98 print_spaces(file, INDENT_PKT); 99 fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", name); 100 print_value(file, value, bits); 101 } 102 103 static const struct si_reg *find_register(const struct si_reg *table, 104 unsigned table_size, 105 unsigned offset) 106 { 107 for (unsigned i = 0; i < table_size; i++) { 108 const struct si_reg *reg = &table[i]; 109 110 if (reg->offset == offset) 111 return reg; 112 } 113 114 return NULL; 115 } 116 117 void ac_dump_reg(FILE *file, enum chip_class chip_class, unsigned offset, 118 uint32_t value, uint32_t field_mask) 119 { 120 const struct si_reg *reg = NULL; 121 122 if (chip_class >= GFX9) 123 reg = find_register(gfx9d_reg_table, ARRAY_SIZE(gfx9d_reg_table), offset); 124 if (!reg) 125 reg = find_register(sid_reg_table, ARRAY_SIZE(sid_reg_table), offset); 126 127 if (reg) { 128 const char *reg_name = sid_strings + reg->name_offset; 129 bool first_field = true; 130 131 print_spaces(file, INDENT_PKT); 132 fprintf(file, COLOR_YELLOW "%s" COLOR_RESET " <- ", 133 reg_name); 134 135 if (!reg->num_fields) { 136 print_value(file, value, 32); 137 return; 138 } 139 140 for (unsigned f = 0; f < reg->num_fields; f++) { 141 const struct si_field *field = sid_fields_table + reg->fields_offset + f; 142 const int *values_offsets = sid_strings_offsets + field->values_offset; 143 uint32_t val = (value & field->mask) >> 144 (ffs(field->mask) - 1); 145 146 if (!(field->mask & field_mask)) 147 continue; 148 149 /* Indent the field. */ 150 if (!first_field) 151 print_spaces(file, 152 INDENT_PKT + strlen(reg_name) + 4); 153 154 /* Print the field. */ 155 fprintf(file, "%s = ", sid_strings + field->name_offset); 156 157 if (val < field->num_values && values_offsets[val] >= 0) 158 fprintf(file, "%s\n", sid_strings + values_offsets[val]); 159 else 160 print_value(file, val, 161 util_bitcount(field->mask)); 162 163 first_field = false; 164 } 165 return; 166 } 167 168 print_spaces(file, INDENT_PKT); 169 fprintf(file, COLOR_YELLOW "0x%05x" COLOR_RESET " <- 0x%08x\n", offset, value); 170 } 171 172 static uint32_t ac_ib_get(struct ac_ib_parser *ib) 173 { 174 uint32_t v = 0; 175 176 if (ib->cur_dw < ib->num_dw) { 177 v = ib->ib[ib->cur_dw]; 178 #ifdef HAVE_VALGRIND 179 /* Help figure out where garbage data is written to IBs. 180 * 181 * Arguably we should do this already when the IBs are written, 182 * see RADEON_VALGRIND. The problem is that client-requests to 183 * Valgrind have an overhead even when Valgrind isn't running, 184 * and radeon_emit is performance sensitive... 185 */ 186 if (VALGRIND_CHECK_VALUE_IS_DEFINED(v)) 187 fprintf(ib->f, COLOR_RED "Valgrind: The next DWORD is garbage" 188 COLOR_RESET "\n"); 189 #endif 190 fprintf(ib->f, "\n\035#%08x ", v); 191 } else { 192 fprintf(ib->f, "\n\035#???????? "); 193 } 194 195 ib->cur_dw++; 196 return v; 197 } 198 199 static void ac_parse_set_reg_packet(FILE *f, unsigned count, unsigned reg_offset, 200 struct ac_ib_parser *ib) 201 { 202 unsigned reg_dw = ac_ib_get(ib); 203 unsigned reg = ((reg_dw & 0xFFFF) << 2) + reg_offset; 204 unsigned index = reg_dw >> 28; 205 int i; 206 207 if (index != 0) { 208 print_spaces(f, INDENT_PKT); 209 fprintf(f, "INDEX = %u\n", index); 210 } 211 212 for (i = 0; i < count; i++) 213 ac_dump_reg(f, ib->chip_class, reg + i*4, ac_ib_get(ib), ~0); 214 } 215 216 static void ac_parse_packet3(FILE *f, uint32_t header, struct ac_ib_parser *ib, 217 int *current_trace_id) 218 { 219 unsigned first_dw = ib->cur_dw; 220 int count = PKT_COUNT_G(header); 221 unsigned op = PKT3_IT_OPCODE_G(header); 222 const char *predicate = PKT3_PREDICATE(header) ? "(predicate)" : ""; 223 int i; 224 225 /* Print the name first. */ 226 for (i = 0; i < ARRAY_SIZE(packet3_table); i++) 227 if (packet3_table[i].op == op) 228 break; 229 230 if (i < ARRAY_SIZE(packet3_table)) { 231 const char *name = sid_strings + packet3_table[i].name_offset; 232 233 if (op == PKT3_SET_CONTEXT_REG || 234 op == PKT3_SET_CONFIG_REG || 235 op == PKT3_SET_UCONFIG_REG || 236 op == PKT3_SET_SH_REG) 237 fprintf(f, COLOR_CYAN "%s%s" COLOR_CYAN ":\n", 238 name, predicate); 239 else 240 fprintf(f, COLOR_GREEN "%s%s" COLOR_RESET ":\n", 241 name, predicate); 242 } else 243 fprintf(f, COLOR_RED "PKT3_UNKNOWN 0x%x%s" COLOR_RESET ":\n", 244 op, predicate); 245 246 /* Print the contents. */ 247 switch (op) { 248 case PKT3_SET_CONTEXT_REG: 249 ac_parse_set_reg_packet(f, count, SI_CONTEXT_REG_OFFSET, ib); 250 break; 251 case PKT3_SET_CONFIG_REG: 252 ac_parse_set_reg_packet(f, count, SI_CONFIG_REG_OFFSET, ib); 253 break; 254 case PKT3_SET_UCONFIG_REG: 255 ac_parse_set_reg_packet(f, count, CIK_UCONFIG_REG_OFFSET, ib); 256 break; 257 case PKT3_SET_SH_REG: 258 ac_parse_set_reg_packet(f, count, SI_SH_REG_OFFSET, ib); 259 break; 260 case PKT3_ACQUIRE_MEM: 261 ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); 262 ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); 263 ac_dump_reg(f, ib->chip_class, R_030230_CP_COHER_SIZE_HI, ac_ib_get(ib), ~0); 264 ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); 265 ac_dump_reg(f, ib->chip_class, R_0301E4_CP_COHER_BASE_HI, ac_ib_get(ib), ~0); 266 print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); 267 break; 268 case PKT3_SURFACE_SYNC: 269 if (ib->chip_class >= CIK) { 270 ac_dump_reg(f, ib->chip_class, R_0301F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); 271 ac_dump_reg(f, ib->chip_class, R_0301F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); 272 ac_dump_reg(f, ib->chip_class, R_0301F8_CP_COHER_BASE, ac_ib_get(ib), ~0); 273 } else { 274 ac_dump_reg(f, ib->chip_class, R_0085F0_CP_COHER_CNTL, ac_ib_get(ib), ~0); 275 ac_dump_reg(f, ib->chip_class, R_0085F4_CP_COHER_SIZE, ac_ib_get(ib), ~0); 276 ac_dump_reg(f, ib->chip_class, R_0085F8_CP_COHER_BASE, ac_ib_get(ib), ~0); 277 } 278 print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); 279 break; 280 case PKT3_EVENT_WRITE: { 281 uint32_t event_dw = ac_ib_get(ib); 282 ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw, 283 S_028A90_EVENT_TYPE(~0)); 284 print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4); 285 print_named_value(f, "INV_L2", (event_dw >> 20) & 0x1, 1); 286 if (count > 0) { 287 print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32); 288 print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 16); 289 } 290 break; 291 } 292 case PKT3_EVENT_WRITE_EOP: { 293 uint32_t event_dw = ac_ib_get(ib); 294 ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw, 295 S_028A90_EVENT_TYPE(~0)); 296 print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4); 297 print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1); 298 print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1); 299 print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1); 300 print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1); 301 print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1); 302 print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32); 303 uint32_t addr_hi_dw = ac_ib_get(ib); 304 print_named_value(f, "ADDRESS_HI", addr_hi_dw, 16); 305 print_named_value(f, "DST_SEL", (addr_hi_dw >> 16) & 0x3, 2); 306 print_named_value(f, "INT_SEL", (addr_hi_dw >> 24) & 0x7, 3); 307 print_named_value(f, "DATA_SEL", addr_hi_dw >> 29, 3); 308 print_named_value(f, "DATA_LO", ac_ib_get(ib), 32); 309 print_named_value(f, "DATA_HI", ac_ib_get(ib), 32); 310 break; 311 } 312 case PKT3_RELEASE_MEM: { 313 uint32_t event_dw = ac_ib_get(ib); 314 ac_dump_reg(f, ib->chip_class, R_028A90_VGT_EVENT_INITIATOR, event_dw, 315 S_028A90_EVENT_TYPE(~0)); 316 print_named_value(f, "EVENT_INDEX", (event_dw >> 8) & 0xf, 4); 317 print_named_value(f, "TCL1_VOL_ACTION_ENA", (event_dw >> 12) & 0x1, 1); 318 print_named_value(f, "TC_VOL_ACTION_ENA", (event_dw >> 13) & 0x1, 1); 319 print_named_value(f, "TC_WB_ACTION_ENA", (event_dw >> 15) & 0x1, 1); 320 print_named_value(f, "TCL1_ACTION_ENA", (event_dw >> 16) & 0x1, 1); 321 print_named_value(f, "TC_ACTION_ENA", (event_dw >> 17) & 0x1, 1); 322 print_named_value(f, "TC_NC_ACTION_ENA", (event_dw >> 19) & 0x1, 1); 323 print_named_value(f, "TC_WC_ACTION_ENA", (event_dw >> 20) & 0x1, 1); 324 print_named_value(f, "TC_MD_ACTION_ENA", (event_dw >> 21) & 0x1, 1); 325 uint32_t sel_dw = ac_ib_get(ib); 326 print_named_value(f, "DST_SEL", (sel_dw >> 16) & 0x3, 2); 327 print_named_value(f, "INT_SEL", (sel_dw >> 24) & 0x7, 3); 328 print_named_value(f, "DATA_SEL", sel_dw >> 29, 3); 329 print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32); 330 print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32); 331 print_named_value(f, "DATA_LO", ac_ib_get(ib), 32); 332 print_named_value(f, "DATA_HI", ac_ib_get(ib), 32); 333 print_named_value(f, "CTXID", ac_ib_get(ib), 32); 334 break; 335 } 336 case PKT3_WAIT_REG_MEM: 337 print_named_value(f, "OP", ac_ib_get(ib), 32); 338 print_named_value(f, "ADDRESS_LO", ac_ib_get(ib), 32); 339 print_named_value(f, "ADDRESS_HI", ac_ib_get(ib), 32); 340 print_named_value(f, "REF", ac_ib_get(ib), 32); 341 print_named_value(f, "MASK", ac_ib_get(ib), 32); 342 print_named_value(f, "POLL_INTERVAL", ac_ib_get(ib), 16); 343 break; 344 case PKT3_DRAW_INDEX_AUTO: 345 ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0); 346 ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0); 347 break; 348 case PKT3_DRAW_INDEX_2: 349 ac_dump_reg(f, ib->chip_class, R_028A78_VGT_DMA_MAX_SIZE, ac_ib_get(ib), ~0); 350 ac_dump_reg(f, ib->chip_class, R_0287E8_VGT_DMA_BASE, ac_ib_get(ib), ~0); 351 ac_dump_reg(f, ib->chip_class, R_0287E4_VGT_DMA_BASE_HI, ac_ib_get(ib), ~0); 352 ac_dump_reg(f, ib->chip_class, R_030930_VGT_NUM_INDICES, ac_ib_get(ib), ~0); 353 ac_dump_reg(f, ib->chip_class, R_0287F0_VGT_DRAW_INITIATOR, ac_ib_get(ib), ~0); 354 break; 355 case PKT3_INDEX_TYPE: 356 ac_dump_reg(f, ib->chip_class, R_028A7C_VGT_DMA_INDEX_TYPE, ac_ib_get(ib), ~0); 357 break; 358 case PKT3_NUM_INSTANCES: 359 ac_dump_reg(f, ib->chip_class, R_030934_VGT_NUM_INSTANCES, ac_ib_get(ib), ~0); 360 break; 361 case PKT3_WRITE_DATA: 362 ac_dump_reg(f, ib->chip_class, R_370_CONTROL, ac_ib_get(ib), ~0); 363 ac_dump_reg(f, ib->chip_class, R_371_DST_ADDR_LO, ac_ib_get(ib), ~0); 364 ac_dump_reg(f, ib->chip_class, R_372_DST_ADDR_HI, ac_ib_get(ib), ~0); 365 /* The payload is written automatically */ 366 break; 367 case PKT3_CP_DMA: 368 ac_dump_reg(f, ib->chip_class, R_410_CP_DMA_WORD0, ac_ib_get(ib), ~0); 369 ac_dump_reg(f, ib->chip_class, R_411_CP_DMA_WORD1, ac_ib_get(ib), ~0); 370 ac_dump_reg(f, ib->chip_class, R_412_CP_DMA_WORD2, ac_ib_get(ib), ~0); 371 ac_dump_reg(f, ib->chip_class, R_413_CP_DMA_WORD3, ac_ib_get(ib), ~0); 372 ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0); 373 break; 374 case PKT3_DMA_DATA: 375 ac_dump_reg(f, ib->chip_class, R_500_DMA_DATA_WORD0, ac_ib_get(ib), ~0); 376 ac_dump_reg(f, ib->chip_class, R_501_SRC_ADDR_LO, ac_ib_get(ib), ~0); 377 ac_dump_reg(f, ib->chip_class, R_502_SRC_ADDR_HI, ac_ib_get(ib), ~0); 378 ac_dump_reg(f, ib->chip_class, R_503_DST_ADDR_LO, ac_ib_get(ib), ~0); 379 ac_dump_reg(f, ib->chip_class, R_504_DST_ADDR_HI, ac_ib_get(ib), ~0); 380 ac_dump_reg(f, ib->chip_class, R_414_COMMAND, ac_ib_get(ib), ~0); 381 break; 382 case PKT3_INDIRECT_BUFFER_SI: 383 case PKT3_INDIRECT_BUFFER_CONST: 384 case PKT3_INDIRECT_BUFFER_CIK: { 385 uint32_t base_lo_dw = ac_ib_get(ib); 386 ac_dump_reg(f, ib->chip_class, R_3F0_IB_BASE_LO, base_lo_dw, ~0); 387 uint32_t base_hi_dw = ac_ib_get(ib); 388 ac_dump_reg(f, ib->chip_class, R_3F1_IB_BASE_HI, base_hi_dw, ~0); 389 uint32_t control_dw = ac_ib_get(ib); 390 ac_dump_reg(f, ib->chip_class, R_3F2_CONTROL, control_dw, ~0); 391 392 if (!ib->addr_callback) 393 break; 394 395 uint64_t addr = ((uint64_t)base_hi_dw << 32) | base_lo_dw; 396 void *data = ib->addr_callback(ib->addr_callback_data, addr); 397 if (!data) 398 break; 399 400 if (G_3F2_CHAIN(control_dw)) { 401 ib->ib = data; 402 ib->num_dw = G_3F2_IB_SIZE(control_dw); 403 ib->cur_dw = 0; 404 return; 405 } 406 407 struct ac_ib_parser ib_recurse; 408 memcpy(&ib_recurse, ib, sizeof(ib_recurse)); 409 ib_recurse.ib = data; 410 ib_recurse.num_dw = G_3F2_IB_SIZE(control_dw); 411 ib_recurse.cur_dw = 0; 412 if(ib_recurse.trace_id_count) { 413 if (*current_trace_id == *ib->trace_ids) { 414 ++ib_recurse.trace_ids; 415 --ib_recurse.trace_id_count; 416 } else { 417 ib_recurse.trace_id_count = 0; 418 } 419 } 420 421 fprintf(f, "\n\035>------------------ nested begin ------------------\n"); 422 ac_do_parse_ib(f, &ib_recurse); 423 fprintf(f, "\n\035<------------------- nested end -------------------\n"); 424 break; 425 } 426 case PKT3_CLEAR_STATE: 427 case PKT3_INCREMENT_DE_COUNTER: 428 case PKT3_PFP_SYNC_ME: 429 break; 430 case PKT3_NOP: 431 if (header == 0xffff1000) { 432 count = -1; /* One dword NOP. */ 433 } else if (count == 0 && ib->cur_dw < ib->num_dw && 434 AC_IS_TRACE_POINT(ib->ib[ib->cur_dw])) { 435 unsigned packet_id = AC_GET_TRACE_POINT_ID(ib->ib[ib->cur_dw]); 436 437 print_spaces(f, INDENT_PKT); 438 fprintf(f, COLOR_RED "Trace point ID: %u\n", packet_id); 439 440 if (!ib->trace_id_count) 441 break; /* tracing was disabled */ 442 443 *current_trace_id = packet_id; 444 445 print_spaces(f, INDENT_PKT); 446 if (packet_id < *ib->trace_ids) 447 fprintf(f, COLOR_RED 448 "This trace point was reached by the CP." 449 COLOR_RESET "\n"); 450 else if (packet_id == *ib->trace_ids) 451 fprintf(f, COLOR_RED 452 "!!!!! This is the last trace point that " 453 "was reached by the CP !!!!!" 454 COLOR_RESET "\n"); 455 else if (packet_id+1 == *ib->trace_ids) 456 fprintf(f, COLOR_RED 457 "!!!!! This is the first trace point that " 458 "was NOT been reached by the CP !!!!!" 459 COLOR_RESET "\n"); 460 else 461 fprintf(f, COLOR_RED 462 "!!!!! This trace point was NOT reached " 463 "by the CP !!!!!" 464 COLOR_RESET "\n"); 465 break; 466 } 467 break; 468 } 469 470 /* print additional dwords */ 471 while (ib->cur_dw <= first_dw + count) 472 ac_ib_get(ib); 473 474 if (ib->cur_dw > first_dw + count + 1) 475 fprintf(f, COLOR_RED "\n!!!!! count in header too low !!!!!" 476 COLOR_RESET "\n"); 477 } 478 479 /** 480 * Parse and print an IB into a file. 481 */ 482 static void ac_do_parse_ib(FILE *f, struct ac_ib_parser *ib) 483 { 484 int current_trace_id = -1; 485 486 while (ib->cur_dw < ib->num_dw) { 487 uint32_t header = ac_ib_get(ib); 488 unsigned type = PKT_TYPE_G(header); 489 490 switch (type) { 491 case 3: 492 ac_parse_packet3(f, header, ib, ¤t_trace_id); 493 break; 494 case 2: 495 /* type-2 nop */ 496 if (header == 0x80000000) { 497 fprintf(f, COLOR_GREEN "NOP (type 2)" COLOR_RESET "\n"); 498 break; 499 } 500 /* fall through */ 501 default: 502 fprintf(f, "Unknown packet type %i\n", type); 503 break; 504 } 505 } 506 } 507 508 static void format_ib_output(FILE *f, char *out) 509 { 510 unsigned depth = 0; 511 512 for (;;) { 513 char op = 0; 514 515 if (out[0] == '\n' && out[1] == '\035') 516 out++; 517 if (out[0] == '\035') { 518 op = out[1]; 519 out += 2; 520 } 521 522 if (op == '<') 523 depth--; 524 525 unsigned indent = 4 * depth; 526 if (op != '#') 527 indent += 9; 528 529 if (indent) 530 print_spaces(f, indent); 531 532 char *end = util_strchrnul(out, '\n'); 533 fwrite(out, end - out, 1, f); 534 fputc('\n', f); /* always end with a new line */ 535 if (!*end) 536 break; 537 538 out = end + 1; 539 540 if (op == '>') 541 depth++; 542 } 543 } 544 545 /** 546 * Parse and print an IB into a file. 547 * 548 * \param f file 549 * \param ib_ptr IB 550 * \param num_dw size of the IB 551 * \param chip_class chip class 552 * \param trace_ids the last trace IDs that are known to have been reached 553 * and executed by the CP, typically read from a buffer 554 * \param trace_id_count The number of entries in the trace_ids array. 555 * \param addr_callback Get a mapped pointer of the IB at a given address. Can 556 * be NULL. 557 * \param addr_callback_data user data for addr_callback 558 */ 559 void ac_parse_ib_chunk(FILE *f, uint32_t *ib_ptr, int num_dw, const int *trace_ids, 560 unsigned trace_id_count, enum chip_class chip_class, 561 ac_debug_addr_callback addr_callback, void *addr_callback_data) 562 { 563 struct ac_ib_parser ib = {}; 564 ib.ib = ib_ptr; 565 ib.num_dw = num_dw; 566 ib.trace_ids = trace_ids; 567 ib.trace_id_count = trace_id_count; 568 ib.chip_class = chip_class; 569 ib.addr_callback = addr_callback; 570 ib.addr_callback_data = addr_callback_data; 571 572 char *out; 573 size_t outsize; 574 FILE *memf = open_memstream(&out, &outsize); 575 ib.f = memf; 576 ac_do_parse_ib(memf, &ib); 577 fclose(memf); 578 579 if (out) { 580 format_ib_output(f, out); 581 free(out); 582 } 583 584 if (ib.cur_dw > ib.num_dw) { 585 printf("\nPacket ends after the end of IB.\n"); 586 exit(1); 587 } 588 } 589 590 /** 591 * Parse and print an IB into a file. 592 * 593 * \param f file 594 * \param ib IB 595 * \param num_dw size of the IB 596 * \param chip_class chip class 597 * \param trace_ids the last trace IDs that are known to have been reached 598 * and executed by the CP, typically read from a buffer 599 * \param trace_id_count The number of entries in the trace_ids array. 600 * \param addr_callback Get a mapped pointer of the IB at a given address. Can 601 * be NULL. 602 * \param addr_callback_data user data for addr_callback 603 */ 604 void ac_parse_ib(FILE *f, uint32_t *ib, int num_dw, const int *trace_ids, 605 unsigned trace_id_count, const char *name, 606 enum chip_class chip_class, ac_debug_addr_callback addr_callback, 607 void *addr_callback_data) 608 { 609 fprintf(f, "------------------ %s begin ------------------\n", name); 610 611 ac_parse_ib_chunk(f, ib, num_dw, trace_ids, trace_id_count, 612 chip_class, addr_callback, addr_callback_data); 613 614 fprintf(f, "------------------- %s end -------------------\n\n", name); 615 } 616 617 /** 618 * Parse dmesg and return TRUE if a VM fault has been detected. 619 * 620 * \param chip_class chip class 621 * \param old_dmesg_timestamp previous dmesg timestamp parsed at init time 622 * \param out_addr detected VM fault addr 623 */ 624 bool ac_vm_fault_occured(enum chip_class chip_class, 625 uint64_t *old_dmesg_timestamp, uint64_t *out_addr) 626 { 627 char line[2000]; 628 unsigned sec, usec; 629 int progress = 0; 630 uint64_t dmesg_timestamp = 0; 631 bool fault = false; 632 633 FILE *p = popen("dmesg", "r"); 634 if (!p) 635 return false; 636 637 while (fgets(line, sizeof(line), p)) { 638 char *msg, len; 639 640 if (!line[0] || line[0] == '\n') 641 continue; 642 643 /* Get the timestamp. */ 644 if (sscanf(line, "[%u.%u]", &sec, &usec) != 2) { 645 static bool hit = false; 646 if (!hit) { 647 fprintf(stderr, "%s: failed to parse line '%s'\n", 648 __func__, line); 649 hit = true; 650 } 651 continue; 652 } 653 dmesg_timestamp = sec * 1000000ull + usec; 654 655 /* If just updating the timestamp. */ 656 if (!out_addr) 657 continue; 658 659 /* Process messages only if the timestamp is newer. */ 660 if (dmesg_timestamp <= *old_dmesg_timestamp) 661 continue; 662 663 /* Only process the first VM fault. */ 664 if (fault) 665 continue; 666 667 /* Remove trailing \n */ 668 len = strlen(line); 669 if (len && line[len-1] == '\n') 670 line[len-1] = 0; 671 672 /* Get the message part. */ 673 msg = strchr(line, ']'); 674 if (!msg) 675 continue; 676 msg++; 677 678 const char *header_line, *addr_line_prefix, *addr_line_format; 679 680 if (chip_class >= GFX9) { 681 /* Match this: 682 * ..: [gfxhub] VMC page fault (src_id:0 ring:158 vm_id:2 pas_id:0) 683 * ..: at page 0x0000000219f8f000 from 27 684 * ..: VM_L2_PROTECTION_FAULT_STATUS:0x0020113C 685 */ 686 header_line = "VMC page fault"; 687 addr_line_prefix = " at page"; 688 addr_line_format = "%"PRIx64; 689 } else { 690 header_line = "GPU fault detected:"; 691 addr_line_prefix = "VM_CONTEXT1_PROTECTION_FAULT_ADDR"; 692 addr_line_format = "%"PRIX64; 693 } 694 695 switch (progress) { 696 case 0: 697 if (strstr(msg, header_line)) 698 progress = 1; 699 break; 700 case 1: 701 msg = strstr(msg, addr_line_prefix); 702 if (msg) { 703 msg = strstr(msg, "0x"); 704 if (msg) { 705 msg += 2; 706 if (sscanf(msg, addr_line_format, out_addr) == 1) 707 fault = true; 708 } 709 } 710 progress = 0; 711 break; 712 default: 713 progress = 0; 714 } 715 } 716 pclose(p); 717 718 if (dmesg_timestamp > *old_dmesg_timestamp) 719 *old_dmesg_timestamp = dmesg_timestamp; 720 721 return fault; 722 } 723 724 static int compare_wave(const void *p1, const void *p2) 725 { 726 struct ac_wave_info *w1 = (struct ac_wave_info *)p1; 727 struct ac_wave_info *w2 = (struct ac_wave_info *)p2; 728 729 /* Sort waves according to PC and then SE, SH, CU, etc. */ 730 if (w1->pc < w2->pc) 731 return -1; 732 if (w1->pc > w2->pc) 733 return 1; 734 if (w1->se < w2->se) 735 return -1; 736 if (w1->se > w2->se) 737 return 1; 738 if (w1->sh < w2->sh) 739 return -1; 740 if (w1->sh > w2->sh) 741 return 1; 742 if (w1->cu < w2->cu) 743 return -1; 744 if (w1->cu > w2->cu) 745 return 1; 746 if (w1->simd < w2->simd) 747 return -1; 748 if (w1->simd > w2->simd) 749 return 1; 750 if (w1->wave < w2->wave) 751 return -1; 752 if (w1->wave > w2->wave) 753 return 1; 754 755 return 0; 756 } 757 758 /* Return wave information. "waves" should be a large enough array. */ 759 unsigned ac_get_wave_info(struct ac_wave_info waves[AC_MAX_WAVES_PER_CHIP]) 760 { 761 char line[2000]; 762 unsigned num_waves = 0; 763 764 FILE *p = popen("umr -wa", "r"); 765 if (!p) 766 return 0; 767 768 if (!fgets(line, sizeof(line), p) || 769 strncmp(line, "SE", 2) != 0) { 770 pclose(p); 771 return 0; 772 } 773 774 while (fgets(line, sizeof(line), p)) { 775 struct ac_wave_info *w; 776 uint32_t pc_hi, pc_lo, exec_hi, exec_lo; 777 778 assert(num_waves < AC_MAX_WAVES_PER_CHIP); 779 w = &waves[num_waves]; 780 781 if (sscanf(line, "%u %u %u %u %u %x %x %x %x %x %x %x", 782 &w->se, &w->sh, &w->cu, &w->simd, &w->wave, 783 &w->status, &pc_hi, &pc_lo, &w->inst_dw0, 784 &w->inst_dw1, &exec_hi, &exec_lo) == 12) { 785 w->pc = ((uint64_t)pc_hi << 32) | pc_lo; 786 w->exec = ((uint64_t)exec_hi << 32) | exec_lo; 787 w->matched = false; 788 num_waves++; 789 } 790 } 791 792 qsort(waves, num_waves, sizeof(struct ac_wave_info), compare_wave); 793 794 pclose(p); 795 return num_waves; 796 } 797