Home | History | Annotate | Download | only in vc4
      1 /*
      2  * Copyright  2014 Broadcom
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include <stdbool.h>
     25 #include <stdio.h>
     26 
     27 #include "vc4_qpu.h"
     28 #include "vc4_qpu_defines.h"
     29 
     30 static const char *qpu_add_opcodes[] = {
     31         [QPU_A_NOP] = "nop",
     32         [QPU_A_FADD] = "fadd",
     33         [QPU_A_FSUB] = "fsub",
     34         [QPU_A_FMIN] = "fmin",
     35         [QPU_A_FMAX] = "fmax",
     36         [QPU_A_FMINABS] = "fminabs",
     37         [QPU_A_FMAXABS] = "fmaxabs",
     38         [QPU_A_FTOI] = "ftoi",
     39         [QPU_A_ITOF] = "itof",
     40         [QPU_A_ADD] = "add",
     41         [QPU_A_SUB] = "sub",
     42         [QPU_A_SHR] = "shr",
     43         [QPU_A_ASR] = "asr",
     44         [QPU_A_ROR] = "ror",
     45         [QPU_A_SHL] = "shl",
     46         [QPU_A_MIN] = "min",
     47         [QPU_A_MAX] = "max",
     48         [QPU_A_AND] = "and",
     49         [QPU_A_OR] = "or",
     50         [QPU_A_XOR] = "xor",
     51         [QPU_A_NOT] = "not",
     52         [QPU_A_CLZ] = "clz",
     53         [QPU_A_V8ADDS] = "v8adds",
     54         [QPU_A_V8SUBS] = "v8subs",
     55 };
     56 
     57 static const char *qpu_mul_opcodes[] = {
     58         [QPU_M_NOP] = "nop",
     59         [QPU_M_FMUL] = "fmul",
     60         [QPU_M_MUL24] = "mul24",
     61         [QPU_M_V8MULD] = "v8muld",
     62         [QPU_M_V8MIN] = "v8min",
     63         [QPU_M_V8MAX] = "v8max",
     64         [QPU_M_V8ADDS] = "v8adds",
     65         [QPU_M_V8SUBS] = "v8subs",
     66 };
     67 
     68 static const char *qpu_sig[] = {
     69         [QPU_SIG_SW_BREAKPOINT] = "sig_brk",
     70         [QPU_SIG_NONE] = "",
     71         [QPU_SIG_THREAD_SWITCH] = "sig_switch",
     72         [QPU_SIG_PROG_END] = "sig_end",
     73         [QPU_SIG_WAIT_FOR_SCOREBOARD] = "sig_wait_score",
     74         [QPU_SIG_SCOREBOARD_UNLOCK] = "sig_unlock_score",
     75         [QPU_SIG_LAST_THREAD_SWITCH] = "sig_thread_switch",
     76         [QPU_SIG_COVERAGE_LOAD] = "sig_coverage_load",
     77         [QPU_SIG_COLOR_LOAD] = "sig_color_load",
     78         [QPU_SIG_COLOR_LOAD_END] = "sig_color_load_end",
     79         [QPU_SIG_LOAD_TMU0] = "load_tmu0",
     80         [QPU_SIG_LOAD_TMU1] = "load_tmu1",
     81         [QPU_SIG_ALPHA_MASK_LOAD] = "sig_alpha_mask_load",
     82         [QPU_SIG_SMALL_IMM] = "sig_small_imm",
     83         [QPU_SIG_LOAD_IMM] = "sig_load_imm",
     84         [QPU_SIG_BRANCH] = "sig_branch",
     85 };
     86 
     87 static const char *qpu_pack_mul[] = {
     88         [QPU_PACK_MUL_NOP] = "",
     89         [QPU_PACK_MUL_8888] = ".8888",
     90         [QPU_PACK_MUL_8A] = ".8a",
     91         [QPU_PACK_MUL_8B] = ".8b",
     92         [QPU_PACK_MUL_8C] = ".8c",
     93         [QPU_PACK_MUL_8D] = ".8d",
     94 };
     95 
     96 /* The QPU unpack for A and R4 files can be described the same, it's just that
     97  * the R4 variants are convert-to-float only, with no int support.
     98  */
     99 static const char *qpu_unpack[] = {
    100         [QPU_UNPACK_NOP] = "",
    101         [QPU_UNPACK_16A] = "16a",
    102         [QPU_UNPACK_16B] = "16b",
    103         [QPU_UNPACK_8D_REP] = "8d_rep",
    104         [QPU_UNPACK_8A] = "8a",
    105         [QPU_UNPACK_8B] = "8b",
    106         [QPU_UNPACK_8C] = "8c",
    107         [QPU_UNPACK_8D] = "8d",
    108 };
    109 
    110 static const char *special_read_a[] = {
    111         "uni",
    112         NULL,
    113         NULL,
    114         "vary",
    115         NULL,
    116         NULL,
    117         "elem",
    118         "nop",
    119         NULL,
    120         "x_pix",
    121         "ms_flags",
    122         NULL,
    123         NULL,
    124         NULL,
    125         NULL,
    126         NULL,
    127         "vpm_read",
    128         "vpm_ld_busy",
    129         "vpm_ld_wait",
    130         "mutex_acq"
    131 };
    132 
    133 static const char *special_read_b[] = {
    134         "uni",
    135         NULL,
    136         NULL,
    137         "vary",
    138         NULL,
    139         NULL,
    140         "qpu",
    141         "nop",
    142         NULL,
    143         "y_pix",
    144         "rev_flag",
    145         NULL,
    146         NULL,
    147         NULL,
    148         NULL,
    149         NULL,
    150         "vpm_read",
    151         "vpm_st_busy",
    152         "vpm_st_wait",
    153         "mutex_acq"
    154 };
    155 
    156 /**
    157  * This has the B-file descriptions for register writes.
    158  *
    159  * Since only a couple of regs are different between A and B, the A overrides
    160  * are in get_special_write_desc().
    161  */
    162 static const char *special_write[] = {
    163         [QPU_W_ACC0] = "r0",
    164         [QPU_W_ACC1] = "r1",
    165         [QPU_W_ACC2] = "r2",
    166         [QPU_W_ACC3] = "r3",
    167         [QPU_W_TMU_NOSWAP] = "tmu_noswap",
    168         [QPU_W_ACC5] = "r5",
    169         [QPU_W_HOST_INT] = "host_int",
    170         [QPU_W_NOP] = "nop",
    171         [QPU_W_UNIFORMS_ADDRESS] = "uniforms_addr",
    172         [QPU_W_QUAD_XY] = "quad_y",
    173         [QPU_W_MS_FLAGS] = "ms_flags",
    174         [QPU_W_TLB_STENCIL_SETUP] = "tlb_stencil_setup",
    175         [QPU_W_TLB_Z] = "tlb_z",
    176         [QPU_W_TLB_COLOR_MS] = "tlb_color_ms",
    177         [QPU_W_TLB_COLOR_ALL] = "tlb_color_all",
    178         [QPU_W_VPM] = "vpm",
    179         [QPU_W_VPMVCD_SETUP] = "vw_setup",
    180         [QPU_W_VPM_ADDR] = "vw_addr",
    181         [QPU_W_MUTEX_RELEASE] = "mutex_release",
    182         [QPU_W_SFU_RECIP] = "sfu_recip",
    183         [QPU_W_SFU_RECIPSQRT] = "sfu_recipsqrt",
    184         [QPU_W_SFU_EXP] = "sfu_exp",
    185         [QPU_W_SFU_LOG] = "sfu_log",
    186         [QPU_W_TMU0_S] = "tmu0_s",
    187         [QPU_W_TMU0_T] = "tmu0_t",
    188         [QPU_W_TMU0_R] = "tmu0_r",
    189         [QPU_W_TMU0_B] = "tmu0_b",
    190         [QPU_W_TMU1_S] = "tmu1_s",
    191         [QPU_W_TMU1_T] = "tmu1_t",
    192         [QPU_W_TMU1_R] = "tmu1_r",
    193         [QPU_W_TMU1_B] = "tmu1_b",
    194 };
    195 
    196 static const char *qpu_pack_a[] = {
    197         [QPU_PACK_A_NOP] = "",
    198         [QPU_PACK_A_16A] = ".16a",
    199         [QPU_PACK_A_16B] = ".16b",
    200         [QPU_PACK_A_8888] = ".8888",
    201         [QPU_PACK_A_8A] = ".8a",
    202         [QPU_PACK_A_8B] = ".8b",
    203         [QPU_PACK_A_8C] = ".8c",
    204         [QPU_PACK_A_8D] = ".8d",
    205 
    206         [QPU_PACK_A_32_SAT] = ".sat",
    207         [QPU_PACK_A_16A_SAT] = ".16a.sat",
    208         [QPU_PACK_A_16B_SAT] = ".16b.sat",
    209         [QPU_PACK_A_8888_SAT] = ".8888.sat",
    210         [QPU_PACK_A_8A_SAT] = ".8a.sat",
    211         [QPU_PACK_A_8B_SAT] = ".8b.sat",
    212         [QPU_PACK_A_8C_SAT] = ".8c.sat",
    213         [QPU_PACK_A_8D_SAT] = ".8d.sat",
    214 };
    215 
    216 static const char *qpu_cond[] = {
    217         [QPU_COND_NEVER] = ".never",
    218         [QPU_COND_ALWAYS] = "",
    219         [QPU_COND_ZS] = ".zs",
    220         [QPU_COND_ZC] = ".zc",
    221         [QPU_COND_NS] = ".ns",
    222         [QPU_COND_NC] = ".nc",
    223         [QPU_COND_CS] = ".cs",
    224         [QPU_COND_CC] = ".cc",
    225 };
    226 
    227 static const char *qpu_cond_branch[] = {
    228         [QPU_COND_BRANCH_ALL_ZS] = ".all_zs",
    229         [QPU_COND_BRANCH_ALL_ZC] = ".all_zc",
    230         [QPU_COND_BRANCH_ANY_ZS] = ".any_zs",
    231         [QPU_COND_BRANCH_ANY_ZC] = ".any_zc",
    232         [QPU_COND_BRANCH_ALL_NS] = ".all_ns",
    233         [QPU_COND_BRANCH_ALL_NC] = ".all_nc",
    234         [QPU_COND_BRANCH_ANY_NS] = ".any_ns",
    235         [QPU_COND_BRANCH_ANY_NC] = ".any_nc",
    236         [QPU_COND_BRANCH_ALL_CS] = ".all_cs",
    237         [QPU_COND_BRANCH_ALL_CC] = ".all_cc",
    238         [QPU_COND_BRANCH_ANY_CS] = ".any_cs",
    239         [QPU_COND_BRANCH_ANY_CC] = ".any_cc",
    240         [QPU_COND_BRANCH_ALWAYS] = "",
    241 };
    242 
    243 #define DESC(array, index)                                        \
    244         ((index >= ARRAY_SIZE(array) || !(array)[index]) ?         \
    245          "???" : (array)[index])
    246 
    247 static const char *
    248 get_special_write_desc(int reg, bool is_a)
    249 {
    250         if (is_a) {
    251                 switch (reg) {
    252                 case QPU_W_QUAD_XY:
    253                         return "quad_x";
    254                 case QPU_W_VPMVCD_SETUP:
    255                         return "vr_setup";
    256                 case QPU_W_VPM_ADDR:
    257                         return "vr_addr";
    258                 }
    259         }
    260 
    261         return special_write[reg];
    262 }
    263 
    264 void
    265 vc4_qpu_disasm_pack_mul(FILE *out, uint32_t pack)
    266 {
    267         fprintf(out, "%s", DESC(qpu_pack_mul, pack));
    268 }
    269 
    270 void
    271 vc4_qpu_disasm_pack_a(FILE *out, uint32_t pack)
    272 {
    273         fprintf(out, "%s", DESC(qpu_pack_a, pack));
    274 }
    275 
    276 void
    277 vc4_qpu_disasm_unpack(FILE *out, uint32_t unpack)
    278 {
    279         if (unpack != QPU_UNPACK_NOP)
    280                 fprintf(out, ".%s", DESC(qpu_unpack, unpack));
    281 }
    282 
    283 void
    284 vc4_qpu_disasm_cond(FILE *out, uint32_t cond)
    285 {
    286         fprintf(out, "%s", DESC(qpu_cond, cond));
    287 }
    288 
    289 void
    290 vc4_qpu_disasm_cond_branch(FILE *out, uint32_t cond)
    291 {
    292         fprintf(out, "%s", DESC(qpu_cond_branch, cond));
    293 }
    294 
    295 static void
    296 print_alu_dst(uint64_t inst, bool is_mul)
    297 {
    298         bool is_a = is_mul == ((inst & QPU_WS) != 0);
    299         uint32_t waddr = (is_mul ?
    300                           QPU_GET_FIELD(inst, QPU_WADDR_MUL) :
    301                           QPU_GET_FIELD(inst, QPU_WADDR_ADD));
    302         const char *file = is_a ? "a" : "b";
    303         uint32_t pack = QPU_GET_FIELD(inst, QPU_PACK);
    304 
    305         if (waddr <= 31)
    306                 fprintf(stderr, "r%s%d", file, waddr);
    307         else if (get_special_write_desc(waddr, is_a))
    308                 fprintf(stderr, "%s", get_special_write_desc(waddr, is_a));
    309         else
    310                 fprintf(stderr, "%s%d?", file, waddr);
    311 
    312         if (is_mul && (inst & QPU_PM)) {
    313                 vc4_qpu_disasm_pack_mul(stderr, pack);
    314         } else if (is_a && !(inst & QPU_PM)) {
    315                 vc4_qpu_disasm_pack_a(stderr, pack);
    316         }
    317 }
    318 
    319 static void
    320 print_alu_src(uint64_t inst, uint32_t mux, bool is_mul)
    321 {
    322         bool is_a = mux != QPU_MUX_B;
    323         const char *file = is_a ? "a" : "b";
    324         uint32_t raddr = (is_a ?
    325                           QPU_GET_FIELD(inst, QPU_RADDR_A) :
    326                           QPU_GET_FIELD(inst, QPU_RADDR_B));
    327         uint32_t unpack = QPU_GET_FIELD(inst, QPU_UNPACK);
    328         bool has_si = QPU_GET_FIELD(inst, QPU_SIG) == QPU_SIG_SMALL_IMM;
    329         uint32_t si = QPU_GET_FIELD(inst, QPU_SMALL_IMM);
    330 
    331         if (mux <= QPU_MUX_R5) {
    332                 fprintf(stderr, "r%d", mux);
    333                 if (has_si && is_mul && si >= QPU_SMALL_IMM_MUL_ROT + 1)
    334                         fprintf(stderr, "+%d", si - QPU_SMALL_IMM_MUL_ROT);
    335         } else if (!is_a && has_si) {
    336                 if (si <= 15)
    337                         fprintf(stderr, "%d", si);
    338                 else if (si <= 31)
    339                         fprintf(stderr, "%d", -16 + (si - 16));
    340                 else if (si <= 39)
    341                         fprintf(stderr, "%.1f", (float)(1 << (si - 32)));
    342                 else if (si <= 47)
    343                         fprintf(stderr, "%f", 1.0f / (1 << (48 - si)));
    344                 else
    345                         fprintf(stderr, "<bad imm %d>", si);
    346         } else if (raddr <= 31)
    347                 fprintf(stderr, "r%s%d", file, raddr);
    348         else {
    349                 if (is_a)
    350                         fprintf(stderr, "%s", DESC(special_read_a, raddr - 32));
    351                 else
    352                         fprintf(stderr, "%s", DESC(special_read_b, raddr - 32));
    353         }
    354 
    355         if (((mux == QPU_MUX_A && !(inst & QPU_PM)) ||
    356              (mux == QPU_MUX_R4 && (inst & QPU_PM)))) {
    357                 vc4_qpu_disasm_unpack(stderr, unpack);
    358         }
    359 }
    360 
    361 static void
    362 print_add_op(uint64_t inst)
    363 {
    364         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
    365         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_ADD);
    366         bool is_mov = (op_add == QPU_A_OR &&
    367                        QPU_GET_FIELD(inst, QPU_ADD_A) ==
    368                        QPU_GET_FIELD(inst, QPU_ADD_B));
    369 
    370         if (is_mov)
    371                 fprintf(stderr, "mov");
    372         else
    373                 fprintf(stderr, "%s", DESC(qpu_add_opcodes, op_add));
    374 
    375         if ((inst & QPU_SF) && op_add != QPU_A_NOP)
    376                 fprintf(stderr, ".sf");
    377 
    378         if (op_add != QPU_A_NOP)
    379                 vc4_qpu_disasm_cond(stderr, cond);
    380 
    381         fprintf(stderr, " ");
    382         print_alu_dst(inst, false);
    383         fprintf(stderr, ", ");
    384 
    385         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_A), false);
    386 
    387         if (!is_mov) {
    388                 fprintf(stderr, ", ");
    389 
    390                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_ADD_B), false);
    391         }
    392 }
    393 
    394 static void
    395 print_mul_op(uint64_t inst)
    396 {
    397         uint32_t op_add = QPU_GET_FIELD(inst, QPU_OP_ADD);
    398         uint32_t op_mul = QPU_GET_FIELD(inst, QPU_OP_MUL);
    399         uint32_t cond = QPU_GET_FIELD(inst, QPU_COND_MUL);
    400         bool is_mov = (op_mul == QPU_M_V8MIN &&
    401                        QPU_GET_FIELD(inst, QPU_MUL_A) ==
    402                        QPU_GET_FIELD(inst, QPU_MUL_B));
    403 
    404         if (is_mov)
    405                 fprintf(stderr, "mov");
    406         else
    407                 fprintf(stderr, "%s", DESC(qpu_mul_opcodes, op_mul));
    408 
    409         if ((inst & QPU_SF) && op_add == QPU_A_NOP)
    410                 fprintf(stderr, ".sf");
    411 
    412         if (op_mul != QPU_M_NOP)
    413                 vc4_qpu_disasm_cond(stderr, cond);
    414 
    415         fprintf(stderr, " ");
    416         print_alu_dst(inst, true);
    417         fprintf(stderr, ", ");
    418 
    419         print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_A), true);
    420 
    421         if (!is_mov) {
    422                 fprintf(stderr, ", ");
    423                 print_alu_src(inst, QPU_GET_FIELD(inst, QPU_MUL_B), true);
    424         }
    425 }
    426 
    427 static void
    428 print_load_imm(uint64_t inst)
    429 {
    430         uint32_t imm = inst;
    431         uint32_t waddr_add = QPU_GET_FIELD(inst, QPU_WADDR_ADD);
    432         uint32_t waddr_mul = QPU_GET_FIELD(inst, QPU_WADDR_MUL);
    433         uint32_t cond_add = QPU_GET_FIELD(inst, QPU_COND_ADD);
    434         uint32_t cond_mul = QPU_GET_FIELD(inst, QPU_COND_MUL);
    435 
    436         fprintf(stderr, "load_imm ");
    437 
    438         print_alu_dst(inst, false);
    439         if (waddr_add != QPU_W_NOP)
    440                 vc4_qpu_disasm_cond(stderr, cond_add);
    441         fprintf(stderr, ", ");
    442 
    443         print_alu_dst(inst, true);
    444         if (waddr_mul != QPU_W_NOP)
    445                 vc4_qpu_disasm_cond(stderr, cond_mul);
    446         fprintf(stderr, ", ");
    447 
    448         fprintf(stderr, "0x%08x (%f)", imm, uif(imm));
    449 }
    450 
    451 void
    452 vc4_qpu_disasm(const uint64_t *instructions, int num_instructions)
    453 {
    454         for (int i = 0; i < num_instructions; i++) {
    455                 uint64_t inst = instructions[i];
    456                 uint32_t sig = QPU_GET_FIELD(inst, QPU_SIG);
    457 
    458                 switch (sig) {
    459                 case QPU_SIG_BRANCH:
    460                         fprintf(stderr, "branch");
    461                         vc4_qpu_disasm_cond_branch(stderr,
    462                                                    QPU_GET_FIELD(inst,
    463                                                                  QPU_BRANCH_COND));
    464 
    465                         fprintf(stderr, " %d", (uint32_t)inst);
    466                         break;
    467 
    468                 case QPU_SIG_LOAD_IMM:
    469                         print_load_imm(inst);
    470                         break;
    471                 default:
    472                         if (sig != QPU_SIG_NONE)
    473                                 fprintf(stderr, "%s ", DESC(qpu_sig, sig));
    474                         print_add_op(inst);
    475                         fprintf(stderr, " ; ");
    476                         print_mul_op(inst);
    477                         break;
    478                 }
    479 
    480                 if (num_instructions != 1)
    481                         fprintf(stderr, "\n");
    482         }
    483 }
    484