1 /* 2 * Copyright 2014 Broadcom 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 */ 23 24 #include <stdbool.h> 25 #include <stdio.h> 26 27 #include "vc4_qpu.h" 28 #include "vc4_qpu_defines.h" 29 30 static const char *qpu_add_opcodes[] = { 31 [QPU_A_NOP] = "nop", 32 [QPU_A_FADD] = "fadd", 33 [QPU_A_FSUB] = "fsub", 34 [QPU_A_FMIN] = "fmin", 35 [QPU_A_FMAX] = "fmax", 36 [QPU_A_FMINABS] = "fminabs", 37 [QPU_A_FMAXABS] = "fmaxabs", 38 [QPU_A_FTOI] = "ftoi", 39 [QPU_A_ITOF] = "itof", 40 [QPU_A_ADD] = "add", 41 [QPU_A_SUB] = "sub", 42 [QPU_A_SHR] = "shr", 43 [QPU_A_ASR] = "asr", 44 [QPU_A_ROR] = "ror", 45 [QPU_A_SHL] = "shl", 46 [QPU_A_MIN] = "min", 47 [QPU_A_MAX] = "max", 48 [QPU_A_AND] = "and", 49 [QPU_A_OR] = "or", 50 [QPU_A_XOR] = "xor", 51 [QPU_A_NOT] = "not", 52 [QPU_A_CLZ] = "clz", 53 [QPU_A_V8ADDS] = "v8adds", 54 [QPU_A_V8SUBS] = "v8subs", 55 }; 56 57 static const char *qpu_mul_opcodes[] = { 58 [QPU_M_NOP] = "nop", 59 [QPU_M_FMUL] = "fmul", 60 [QPU_M_MUL24] = "mul24", 61 [QPU_M_V8MULD] = "v8muld", 62 [QPU_M_V8MIN] = "v8min", 63 [QPU_M_V8MAX] = "v8max", 64 [QPU_M_V8ADDS] = "v8adds", 65 [QPU_M_V8SUBS] = "v8subs", 66 }; 67 68 static const char *qpu_sig[] = { 69 [QPU_SIG_SW_BREAKPOINT] = "sig_brk", 70 [QPU_SIG_NONE] = "", 71 [QPU_SIG_THREAD_SWITCH] = "sig_switch", 72 [QPU_SIG_PROG_END] = "sig_end", 73 [QPU_SIG_WAIT_FOR_SCOREBOARD] = "sig_wait_score", 74 [QPU_SIG_SCOREBOARD_UNLOCK] = "sig_unlock_score", 75 [QPU_SIG_LAST_THREAD_SWITCH] = "sig_thread_switch", 76 [QPU_SIG_COVERAGE_LOAD] = "sig_coverage_load", 77 [QPU_SIG_COLOR_LOAD] = "sig_color_load", 78 [QPU_SIG_COLOR_LOAD_END] = "sig_color_load_end", 79 [QPU_SIG_LOAD_TMU0] = "load_tmu0", 80 [QPU_SIG_LOAD_TMU1] = "load_tmu1", 81 [QPU_SIG_ALPHA_MASK_LOAD] = "sig_alpha_mask_load", 82 [QPU_SIG_SMALL_IMM] = "sig_small_imm", 83 [QPU_SIG_LOAD_IMM] = "sig_load_imm", 84 [QPU_SIG_BRANCH] = "sig_branch", 85 }; 86 87 static const char *qpu_pack_mul[] = { 88 [QPU_PACK_MUL_NOP] = "", 89 [QPU_PACK_MUL_8888] = ".8888", 90 [QPU_PACK_MUL_8A] = ".8a", 91 [QPU_PACK_MUL_8B] = ".8b", 92 [QPU_PACK_MUL_8C] = ".8c", 93 [QPU_PACK_MUL_8D] = ".8d", 94 }; 95 96 /* The QPU unpack for A and R4 files can be described the same, it's just that 97 * the R4 variants are convert-to-float only, with no int support. 98 */ 99 static const char *qpu_unpack[] = { 100 [QPU_UNPACK_NOP] = "", 101 [QPU_UNPACK_16A] = "16a", 102 [QPU_UNPACK_16B] = "16b", 103 [QPU_UNPACK_8D_REP] = "8d_rep", 104 [QPU_UNPACK_8A] = "8a", 105 [QPU_UNPACK_8B] = "8b", 106 [QPU_UNPACK_8C] = "8c", 107 [QPU_UNPACK_8D] = "8d", 108 }; 109 110 static const char *special_read_a[] = { 111 "uni", 112 NULL, 113 NULL, 114 "vary", 115 NULL, 116 NULL, 117 "elem", 118 "nop", 119 NULL, 120 "x_pix", 121 "ms_flags", 122 NULL, 123 NULL, 124 NULL, 125 NULL, 126 NULL, 127 "vpm_read", 128 "vpm_ld_busy", 129 "vpm_ld_wait", 130 "mutex_acq" 131 }; 132 133 static const char *special_read_b[] = { 134 "uni", 135 NULL, 136 NULL, 137 "vary", 138 NULL, 139 NULL, 140 "qpu", 141 "nop", 142 NULL, 143 "y_pix", 144 "rev_flag", 145 NULL, 146 NULL, 147 NULL, 148 NULL, 149 NULL, 150 "vpm_read", 151 "vpm_st_busy", 152 "vpm_st_wait", 153 "mutex_acq" 154 }; 155 156 /** 157 * This has the B-file descriptions for register writes. 158 * 159 * Since only a couple of regs are different between A and B, the A overrides 160 * are in get_special_write_desc(). 161 */ 162 static const char *special_write[] = { 163 [QPU_W_ACC0] = "r0", 164 [QPU_W_ACC1] = "r1", 165 [QPU_W_ACC2] = "r2", 166 [QPU_W_ACC3] = "r3", 167 [QPU_W_TMU_NOSWAP] = "tmu_noswap", 168 [QPU_W_ACC5] = "r5", 169 [QPU_W_HOST_INT] = "host_int", 170 [QPU_W_NOP] = "nop", 171 [QPU_W_UNIFORMS_ADDRESS] = "uniforms_addr", 172 [QPU_W_QUAD_XY] = "quad_y", 173 [QPU_W_MS_FLAGS] = "ms_flags", 174 [QPU_W_TLB_STENCIL_SETUP] = "tlb_stencil_setup", 175 [QPU_W_TLB_Z] = "tlb_z", 176 [QPU_W_TLB_COLOR_MS] = "tlb_color_ms", 177 [QPU_W_TLB_COLOR_ALL] = "tlb_color_all", 178 [QPU_W_VPM] = "vpm", 179 [QPU_W_VPMVCD_SETUP] = "vw_setup", 180 [QPU_W_VPM_ADDR] = "vw_addr", 181 [QPU_W_MUTEX_RELEASE] = "mutex_release", 182 [QPU_W_SFU_RECIP] = "sfu_recip", 183 [QPU_W_SFU_RECIPSQRT] = "sfu_recipsqrt", 184 [QPU_W_SFU_EXP] = "sfu_exp", 185 [QPU_W_SFU_LOG] = "sfu_log", 186 [QPU_W_TMU0_S] = "tmu0_s", 187 [QPU_W_TMU0_T] = "tmu0_t", 188 [QPU_W_TMU0_R] = "tmu0_r", 189 [QPU_W_TMU0_B] = "tmu0_b", 190 [QPU_W_TMU1_S] = "tmu1_s", 191 [QPU_W_TMU1_T] = "tmu1_t", 192 [QPU_W_TMU1_R] = "tmu1_r", 193 [QPU_W_TMU1_B] = "tmu1_b", 194 }; 195 196 static const char *qpu_pack_a[] = { 197 [QPU_PACK_A_NOP] = "", 198 [QPU_PACK_A_16A] = ".16a", 199 [QPU_PACK_A_16B] = ".16b", 200 [QPU_PACK_A_8888] = ".8888", 201 [QPU_PACK_A_8A] = ".8a", 202 [QPU_PACK_A_8B] = ".8b", 203 [QPU_PACK_A_8C] = ".8c", 204 [QPU_PACK_A_8D] = ".8d", 205 206 [QPU_PACK_A_32_SAT] = ".sat", 207 [QPU_PACK_A_16A_SAT] = ".16a.sat", 208 [QPU_PACK_A_16B_SAT] = ".16b.sat", 209 [QPU_PACK_A_8888_SAT] = ".8888.sat", 210 [QPU_PACK_A_8A_SAT] = ".8a.sat", 211 [QPU_PACK_A_8B_SAT] = ".8b.sat", 212 [QPU_PACK_A_8C_SAT] = ".8c.sat", 213 [QPU_PACK_A_8D_SAT] = ".8d.sat", 214 }; 215 216 static const char *qpu_cond[] = { 217 [QPU_COND_NEVER] = ".never", 218 [QPU_COND_ALWAYS] = "", 219 [QPU_COND_ZS] = ".zs", 220 [QPU_COND_ZC] = ".zc", 221 [QPU_COND_NS] = ".ns", 222 [QPU_COND_NC] = ".nc", 223 [QPU_COND_CS] = ".cs", 224 [QPU_COND_CC] = ".cc", 225 }; 226 227 static const char *qpu_cond_branch[] = { 228 [QPU_COND_BRANCH_ALL_ZS] = ".all_zs", 229 [QPU_COND_BRANCH_ALL_ZC] = ".all_zc", 230 [QPU_COND_BRANCH_ANY_ZS] = ".any_zs", 231 [QPU_COND_BRANCH_ANY_ZC] = ".any_zc", 232 [QPU_COND_BRANCH_ALL_NS] = ".all_ns", 233 [QPU_COND_BRANCH_ALL_NC] = ".all_nc", 234 [QPU_COND_BRANCH_ANY_NS] = ".any_ns", 235 [QPU_COND_BRANCH_ANY_NC] = ".any_nc", 236 [QPU_COND_BRANCH_ALL_CS] = ".all_cs", 237 [QPU_COND_BRANCH_ALL_CC] = ".all_cc", 238 [QPU_COND_BRANCH_ANY_CS] = ".any_cs", 239 [QPU_COND_BRANCH_ANY_CC] = ".any_cc", 240 [QPU_COND_BRANCH_ALWAYS] = "", 241 }; 242 243 #define DESC(array, index) \ 244 ((index >= ARRAY_SIZE(array) || !(array)[index]) ? \ 245 "???" : (array)[index]) 246 247 static const char * 248 get_special_write_desc(int reg, bool is_a) 249 { 250 if (is_a) { 251 switch (reg) { 252 case QPU_W_QUAD_XY: 253 return "quad_x"; 254 case QPU_W_VPMVCD_SETUP: 255 return "vr_setup"; 256 case QPU_W_VPM_ADDR: 257 return "vr_addr"; 258 } 259 } 260 261 return special_write[reg]; 262 } 263 264 void 265 vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack) 266 { 267 fprintf(out, "%s", DESC(qpu_pack_mul, pack)); 268 } 269 270 void 271 vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack) 272 { 273 fprintf(out, "%s", DESC(qpu_pack_a, pack)); 274 } 275 276 void 277 vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack) 278 { 279 if (unpack != QPU_UNPACK_NOP) 280 fprintf(out, ".%s", DESC(qpu_unpack, unpack)); 281 } 282 283 void 284 vc4_qpu_disasm_cond(FILE *out, uint32_t cond) 285 { 286 fprintf(out, "%s", DESC(qpu_cond, cond)); 287 } 288 289 void 290 vc4_qpu_disasm_cond_branch(FILE *out, uint32_t cond) 291 { 292 fprintf(out, "%s", DESC(qpu_cond_branch, cond)); 293 } 294 295 static void 296 print_alu_dst(uint64_t inst, bool is_mul) 297 { 298 bool is_a = is_mul == ((inst & QPU_WS) != 0); 299 uint32_t waddr = (is_mul ? 300 QPU_GET_FIELD(inst, QPU_WADDR_MUL) : 301 QPU_GET_FIELD(inst, QPU_WADDR_ADD)); 302 const char *file = is_a ? "a" : "b"; 303 uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK); 304 305 if (waddr <= 31) 306 fprintf(stderr, "r%s%d", file, waddr); 307 else if (get_special_write_desc(waddr, is_a)) 308 fprintf(stderr, "%s", get_special_write_desc(waddr, is_a)); 309 else 310 fprintf(stderr, "%s%d?", file, waddr); 311 312 if (is_mul && (inst & QPU_PM)) { 313 vc4_qpu_disasm_pack_mul(stderr, pack); 314 } else if (is_a && !(inst & QPU_PM)) { 315 vc4_qpu_disasm_pack_a(stderr, pack); 316 } 317 } 318 319 static void 320 print_alu_src(uint64_t inst, uint32_t mux, bool is_mul) 321 { 322 bool is_a = mux != QPU_MUX_B; 323 const char *file = is_a ? "a" : "b"; 324 uint32_t raddr = (is_a ? 325 QPU_GET_FIELD(inst, QPU_RADDR_A) : 326 QPU_GET_FIELD(inst, QPU_RADDR_B)); 327 uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK); 328 bool has_si = QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM; 329 uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM); 330 331 if (mux <= QPU_MUX_R5) { 332 fprintf(stderr, "r%d", mux); 333 if (has_si && is_mul && si >= QPU_SMALL_IMM_MUL_ROT + 1) 334 fprintf(stderr, "+%d", si - QPU_SMALL_IMM_MUL_ROT); 335 } else if (!is_a && has_si) { 336 if (si <= 15) 337 fprintf(stderr, "%d", si); 338 else if (si <= 31) 339 fprintf(stderr, "%d", -16 + (si - 16)); 340 else if (si <= 39) 341 fprintf(stderr, "%.1f", (float)(1 << (si - 32))); 342 else if (si <= 47) 343 fprintf(stderr, "%f", 1.0f / (1 << (48 - si))); 344 else 345 fprintf(stderr, "<bad imm %d>", si); 346 } else if (raddr <= 31) 347 fprintf(stderr, "r%s%d", file, raddr); 348 else { 349 if (is_a) 350 fprintf(stderr, "%s", DESC(special_read_a, raddr - 32)); 351 else 352 fprintf(stderr, "%s", DESC(special_read_b, raddr - 32)); 353 } 354 355 if (((mux == QPU_MUX_A && !(inst & QPU_PM)) || 356 (mux == QPU_MUX_R4 && (inst & QPU_PM)))) { 357 vc4_qpu_disasm_unpack(stderr, unpack); 358 } 359 } 360 361 static void 362 print_add_op(uint64_t inst) 363 { 364 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 365 uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD); 366 bool is_mov = (op_add == QPU_A_OR && 367 QPU_GET_FIELD(inst, QPU_ADD_A) == 368 QPU_GET_FIELD(inst, QPU_ADD_B)); 369 370 if (is_mov) 371 fprintf(stderr, "mov"); 372 else 373 fprintf(stderr, "%s", DESC(qpu_add_opcodes, op_add)); 374 375 if ((inst & QPU_SF) && op_add != QPU_A_NOP) 376 fprintf(stderr, ".sf"); 377 378 if (op_add != QPU_A_NOP) 379 vc4_qpu_disasm_cond(stderr, cond); 380 381 fprintf(stderr, " "); 382 print_alu_dst(inst, false); 383 fprintf(stderr, ", "); 384 385 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A), false); 386 387 if (!is_mov) { 388 fprintf(stderr, ", "); 389 390 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B), false); 391 } 392 } 393 394 static void 395 print_mul_op(uint64_t inst) 396 { 397 uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD); 398 uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL); 399 uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL); 400 bool is_mov = (op_mul == QPU_M_V8MIN && 401 QPU_GET_FIELD(inst, QPU_MUL_A) == 402 QPU_GET_FIELD(inst, QPU_MUL_B)); 403 404 if (is_mov) 405 fprintf(stderr, "mov"); 406 else 407 fprintf(stderr, "%s", DESC(qpu_mul_opcodes, op_mul)); 408 409 if ((inst & QPU_SF) && op_add == QPU_A_NOP) 410 fprintf(stderr, ".sf"); 411 412 if (op_mul != QPU_M_NOP) 413 vc4_qpu_disasm_cond(stderr, cond); 414 415 fprintf(stderr, " "); 416 print_alu_dst(inst, true); 417 fprintf(stderr, ", "); 418 419 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A), true); 420 421 if (!is_mov) { 422 fprintf(stderr, ", "); 423 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B), true); 424 } 425 } 426 427 static void 428 print_load_imm(uint64_t inst) 429 { 430 uint32_t imm = inst; 431 uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD); 432 uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL); 433 uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD); 434 uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL); 435 436 fprintf(stderr, "load_imm "); 437 438 print_alu_dst(inst, false); 439 if (waddr_add != QPU_W_NOP) 440 vc4_qpu_disasm_cond(stderr, cond_add); 441 fprintf(stderr, ", "); 442 443 print_alu_dst(inst, true); 444 if (waddr_mul != QPU_W_NOP) 445 vc4_qpu_disasm_cond(stderr, cond_mul); 446 fprintf(stderr, ", "); 447 448 fprintf(stderr, "0x%08x (%f)", imm, uif(imm)); 449 } 450 451 void 452 vc4_qpu_disasm(const uint64_t *instructions, int num_instructions) 453 { 454 for (int i = 0; i < num_instructions; i++) { 455 uint64_t inst = instructions[i]; 456 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG); 457 458 switch (sig) { 459 case QPU_SIG_BRANCH: 460 fprintf(stderr, "branch"); 461 vc4_qpu_disasm_cond_branch(stderr, 462 QPU_GET_FIELD(inst, 463 QPU_BRANCH_COND)); 464 465 fprintf(stderr, " %d", (uint32_t)inst); 466 break; 467 468 case QPU_SIG_LOAD_IMM: 469 print_load_imm(inst); 470 break; 471 default: 472 if (sig != QPU_SIG_NONE) 473 fprintf(stderr, "%s ", DESC(qpu_sig, sig)); 474 print_add_op(inst); 475 fprintf(stderr, " ; "); 476 print_mul_op(inst); 477 break; 478 } 479 480 if (num_instructions != 1) 481 fprintf(stderr, "\n"); 482 } 483 } 484