Home | History | Annotate | Download | only in tools
      1 /*
      2  * Copyright  2017 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "common/gen_decoder.h"
     25 #include "gen_disasm.h"
     26 
     27 #include <string.h>
     28 
     29 void
     30 gen_batch_decode_ctx_init(struct gen_batch_decode_ctx *ctx,
     31                           const struct gen_device_info *devinfo,
     32                           FILE *fp, enum gen_batch_decode_flags flags,
     33                           const char *xml_path,
     34                           struct gen_batch_decode_bo (*get_bo)(void *,
     35                                                                uint64_t),
     36                           void *user_data)
     37 {
     38    memset(ctx, 0, sizeof(*ctx));
     39 
     40    ctx->get_bo = get_bo;
     41    ctx->user_data = user_data;
     42    ctx->fp = fp;
     43    ctx->flags = flags;
     44 
     45    if (xml_path == NULL)
     46       ctx->spec = gen_spec_load(devinfo);
     47    else
     48       ctx->spec = gen_spec_load_from_path(devinfo, xml_path);
     49    ctx->disasm = gen_disasm_create(devinfo);
     50 }
     51 
     52 void
     53 gen_batch_decode_ctx_finish(struct gen_batch_decode_ctx *ctx)
     54 {
     55    gen_spec_destroy(ctx->spec);
     56    gen_disasm_destroy(ctx->disasm);
     57 }
     58 
     59 #define CSI "\e["
     60 #define BLUE_HEADER  CSI "0;44m"
     61 #define GREEN_HEADER CSI "1;42m"
     62 #define NORMAL       CSI "0m"
     63 
     64 #define ARRAY_LENGTH(a) (sizeof (a) / sizeof (a)[0])
     65 
     66 static void
     67 ctx_print_group(struct gen_batch_decode_ctx *ctx,
     68                 struct gen_group *group,
     69                 uint64_t address, const void *map)
     70 {
     71    gen_print_group(ctx->fp, group, address, map, 0,
     72                    (ctx->flags & GEN_BATCH_DECODE_IN_COLOR) != 0);
     73 }
     74 
     75 static struct gen_batch_decode_bo
     76 ctx_get_bo(struct gen_batch_decode_ctx *ctx, uint64_t addr)
     77 {
     78    if (gen_spec_get_gen(ctx->spec) >= gen_make_gen(8,0)) {
     79       /* On Broadwell and above, we have 48-bit addresses which consume two
     80        * dwords.  Some packets require that these get stored in a "canonical
     81        * form" which means that bit 47 is sign-extended through the upper
     82        * bits. In order to correctly handle those aub dumps, we need to mask
     83        * off the top 16 bits.
     84        */
     85       addr &= (~0ull >> 16);
     86    }
     87 
     88    struct gen_batch_decode_bo bo = ctx->get_bo(ctx->user_data, addr);
     89 
     90    if (gen_spec_get_gen(ctx->spec) >= gen_make_gen(8,0))
     91       bo.addr &= (~0ull >> 16);
     92 
     93    /* We may actually have an offset into the bo */
     94    if (bo.map != NULL) {
     95       assert(bo.addr <= addr);
     96       uint64_t offset = addr - bo.addr;
     97       bo.map += offset;
     98       bo.addr += offset;
     99       bo.size -= offset;
    100    }
    101 
    102    return bo;
    103 }
    104 
    105 static void
    106 ctx_disassemble_program(struct gen_batch_decode_ctx *ctx,
    107                         uint32_t ksp, const char *type)
    108 {
    109    if (!ctx->instruction_base.map)
    110       return;
    111 
    112    printf("\nReferenced %s:\n", type);
    113    gen_disasm_disassemble(ctx->disasm,
    114                           (void *)ctx->instruction_base.map, ksp,
    115                           ctx->fp);
    116 }
    117 
    118 /* Heuristic to determine whether a uint32_t is probably actually a float
    119  * (http://stackoverflow.com/a/2953466)
    120  */
    121 
    122 static bool
    123 probably_float(uint32_t bits)
    124 {
    125    int exp = ((bits & 0x7f800000U) >> 23) - 127;
    126    uint32_t mant = bits & 0x007fffff;
    127 
    128    /* +- 0.0 */
    129    if (exp == -127 && mant == 0)
    130       return true;
    131 
    132    /* +- 1 billionth to 1 billion */
    133    if (-30 <= exp && exp <= 30)
    134       return true;
    135 
    136    /* some value with only a few binary digits */
    137    if ((mant & 0x0000ffff) == 0)
    138       return true;
    139 
    140    return false;
    141 }
    142 
    143 static void
    144 ctx_print_buffer(struct gen_batch_decode_ctx *ctx,
    145                  struct gen_batch_decode_bo bo,
    146                  uint32_t read_length,
    147                  uint32_t pitch)
    148 {
    149    const uint32_t *dw_end = bo.map + MIN2(bo.size, read_length);
    150 
    151    unsigned line_count = 0;
    152    for (const uint32_t *dw = bo.map; dw < dw_end; dw++) {
    153       if (line_count * 4 == pitch || line_count == 8) {
    154          fprintf(ctx->fp, "\n");
    155          line_count = 0;
    156       }
    157       fprintf(ctx->fp, line_count == 0 ? "  " : " ");
    158 
    159       if ((ctx->flags & GEN_BATCH_DECODE_FLOATS) && probably_float(*dw))
    160          fprintf(ctx->fp, "  %8.2f", *(float *) dw);
    161       else
    162          fprintf(ctx->fp, "  0x%08x", *dw);
    163 
    164       line_count++;
    165    }
    166    fprintf(ctx->fp, "\n");
    167 }
    168 
    169 static void
    170 handle_state_base_address(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
    171 {
    172    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    173 
    174    struct gen_field_iterator iter;
    175    gen_field_iterator_init(&iter, inst, p, 0, false);
    176 
    177    do {
    178       if (strcmp(iter.name, "Surface State Base Address") == 0) {
    179          ctx->surface_base = ctx_get_bo(ctx, iter.raw_value);
    180       } else if (strcmp(iter.name, "Dynamic State Base Address") == 0) {
    181          ctx->dynamic_base = ctx_get_bo(ctx, iter.raw_value);
    182       } else if (strcmp(iter.name, "Instruction Base Address") == 0) {
    183          ctx->instruction_base = ctx_get_bo(ctx, iter.raw_value);
    184       }
    185    } while (gen_field_iterator_next(&iter));
    186 }
    187 
    188 static void
    189 dump_binding_table(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
    190 {
    191    struct gen_group *strct =
    192       gen_spec_find_struct(ctx->spec, "RENDER_SURFACE_STATE");
    193    if (strct == NULL) {
    194       fprintf(ctx->fp, "did not find RENDER_SURFACE_STATE info\n");
    195       return;
    196    }
    197 
    198    /* If we don't know the actual count, guess. */
    199    if (count < 0)
    200       count = 8;
    201 
    202    if (ctx->surface_base.map == NULL) {
    203       fprintf(ctx->fp, "  binding table unavailable\n");
    204       return;
    205    }
    206 
    207    if (offset % 32 != 0 || offset >= UINT16_MAX ||
    208        offset >= ctx->surface_base.size) {
    209       fprintf(ctx->fp, "  invalid binding table pointer\n");
    210       return;
    211    }
    212 
    213    const uint32_t *pointers = ctx->surface_base.map + offset;
    214    for (int i = 0; i < count; i++) {
    215       if (pointers[i] == 0)
    216          continue;
    217 
    218       if (pointers[i] % 32 != 0 ||
    219           (pointers[i] + strct->dw_length * 4) >= ctx->surface_base.size) {
    220          fprintf(ctx->fp, "pointer %u: %08x <not valid>\n", i, pointers[i]);
    221          continue;
    222       }
    223 
    224       fprintf(ctx->fp, "pointer %u: %08x\n", i, pointers[i]);
    225       ctx_print_group(ctx, strct, ctx->surface_base.addr + pointers[i],
    226                       ctx->surface_base.map + pointers[i]);
    227    }
    228 }
    229 
    230 static void
    231 dump_samplers(struct gen_batch_decode_ctx *ctx, uint32_t offset, int count)
    232 {
    233    struct gen_group *strct = gen_spec_find_struct(ctx->spec, "SAMPLER_STATE");
    234 
    235    /* If we don't know the actual count, guess. */
    236    if (count < 0)
    237       count = 4;
    238 
    239    if (ctx->dynamic_base.map == NULL) {
    240       fprintf(ctx->fp, "  samplers unavailable\n");
    241       return;
    242    }
    243 
    244    if (offset % 32 != 0 || offset >= ctx->dynamic_base.size) {
    245       fprintf(ctx->fp, "  invalid sampler state pointer\n");
    246       return;
    247    }
    248 
    249    uint64_t state_addr = ctx->dynamic_base.addr + offset;
    250    const void *state_map = ctx->dynamic_base.map + offset;
    251    for (int i = 0; i < count; i++) {
    252       fprintf(ctx->fp, "sampler state %d\n", i);
    253       ctx_print_group(ctx, strct, state_addr, state_map);
    254       state_addr += 16;
    255       state_map += 16;
    256    }
    257 }
    258 
    259 static void
    260 handle_media_interface_descriptor_load(struct gen_batch_decode_ctx *ctx,
    261                                        const uint32_t *p)
    262 {
    263    if (ctx->dynamic_base.map == NULL)
    264       return;
    265 
    266    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    267    struct gen_group *desc =
    268       gen_spec_find_struct(ctx->spec, "INTERFACE_DESCRIPTOR_DATA");
    269 
    270    struct gen_field_iterator iter;
    271    gen_field_iterator_init(&iter, inst, p, 0, false);
    272    uint32_t descriptor_offset = 0;
    273    int descriptor_count = 0;
    274    do {
    275       if (strcmp(iter.name, "Interface Descriptor Data Start Address") == 0) {
    276          descriptor_offset = strtol(iter.value, NULL, 16);
    277       } else if (strcmp(iter.name, "Interface Descriptor Total Length") == 0) {
    278          descriptor_count =
    279             strtol(iter.value, NULL, 16) / (desc->dw_length * 4);
    280       }
    281    } while (gen_field_iterator_next(&iter));
    282 
    283    uint64_t desc_addr = ctx->dynamic_base.addr + descriptor_offset;
    284    const uint32_t *desc_map = ctx->dynamic_base.map + descriptor_offset;
    285    for (int i = 0; i < descriptor_count; i++) {
    286       fprintf(ctx->fp, "descriptor %d: %08x\n", i, descriptor_offset);
    287 
    288       ctx_print_group(ctx, inst, desc_addr, desc_map);
    289 
    290       gen_field_iterator_init(&iter, desc, desc_map, 0, false);
    291       uint64_t ksp;
    292       uint32_t sampler_offset, sampler_count;
    293       uint32_t binding_table_offset, binding_entry_count;
    294       do {
    295          if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
    296             ksp = strtoll(iter.value, NULL, 16);
    297          } else if (strcmp(iter.name, "Sampler State Pointer") == 0) {
    298             sampler_offset = strtol(iter.value, NULL, 16);
    299          } else if (strcmp(iter.name, "Sampler Count") == 0) {
    300             sampler_count = strtol(iter.value, NULL, 10);
    301          } else if (strcmp(iter.name, "Binding Table Pointer") == 0) {
    302             binding_table_offset = strtol(iter.value, NULL, 16);
    303          } else if (strcmp(iter.name, "Binding Table Entry Count") == 0) {
    304             binding_entry_count = strtol(iter.value, NULL, 10);
    305          }
    306       } while (gen_field_iterator_next(&iter));
    307 
    308       ctx_disassemble_program(ctx, ksp, "compute shader");
    309       printf("\n");
    310 
    311       dump_samplers(ctx, sampler_offset, sampler_count);
    312       dump_binding_table(ctx, binding_table_offset, binding_entry_count);
    313 
    314       desc_map += desc->dw_length;
    315       desc_addr += desc->dw_length * 4;
    316    }
    317 }
    318 
    319 static void
    320 handle_3dstate_vertex_buffers(struct gen_batch_decode_ctx *ctx,
    321                               const uint32_t *p)
    322 {
    323    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    324 
    325    struct gen_batch_decode_bo vb = {};
    326    uint32_t vb_size = 0;
    327    int index = -1;
    328    int pitch = -1;
    329    bool ready = false;
    330 
    331    struct gen_field_iterator iter;
    332    gen_field_iterator_init(&iter, inst, p, 0, false);
    333    do {
    334       if (strcmp(iter.name, "Vertex Buffer Index") == 0) {
    335          index = iter.raw_value;
    336       } else if (strcmp(iter.name, "Buffer Pitch") == 0) {
    337          pitch = iter.raw_value;
    338       } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
    339          vb = ctx_get_bo(ctx, iter.raw_value);
    340       } else if (strcmp(iter.name, "Buffer Size") == 0) {
    341          vb_size = iter.raw_value;
    342          ready = true;
    343       } else if (strcmp(iter.name, "End Address") == 0) {
    344          if (vb.map && iter.raw_value >= vb.addr)
    345             vb_size = iter.raw_value - vb.addr;
    346          else
    347             vb_size = 0;
    348          ready = true;
    349       }
    350 
    351       if (!ready)
    352          continue;
    353 
    354       fprintf(ctx->fp, "vertex buffer %d, size %d\n", index, vb_size);
    355 
    356       if (vb.map == NULL) {
    357          fprintf(ctx->fp, "  buffer contents unavailable\n");
    358          continue;
    359       }
    360 
    361       if (vb.map == 0 || vb_size == 0)
    362          continue;
    363 
    364       ctx_print_buffer(ctx, vb, vb_size, pitch);
    365 
    366       vb.map = NULL;
    367       vb_size = 0;
    368       index = -1;
    369       pitch = -1;
    370       ready = false;
    371    } while (gen_field_iterator_next(&iter));
    372 }
    373 
    374 static void
    375 handle_3dstate_index_buffer(struct gen_batch_decode_ctx *ctx,
    376                             const uint32_t *p)
    377 {
    378    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    379 
    380    struct gen_batch_decode_bo ib = {};
    381    uint32_t ib_size = 0;
    382    uint32_t format = 0;
    383 
    384    struct gen_field_iterator iter;
    385    gen_field_iterator_init(&iter, inst, p, 0, false);
    386    do {
    387       if (strcmp(iter.name, "Index Format") == 0) {
    388          format = iter.raw_value;
    389       } else if (strcmp(iter.name, "Buffer Starting Address") == 0) {
    390          ib = ctx_get_bo(ctx, iter.raw_value);
    391       } else if (strcmp(iter.name, "Buffer Size") == 0) {
    392          ib_size = iter.raw_value;
    393       }
    394    } while (gen_field_iterator_next(&iter));
    395 
    396    if (ib.map == NULL) {
    397       fprintf(ctx->fp, "  buffer contents unavailable\n");
    398       return;
    399    }
    400 
    401    const void *m = ib.map;
    402    const void *ib_end = ib.map + MIN2(ib.size, ib_size);
    403    for (int i = 0; m < ib_end && i < 10; i++) {
    404       switch (format) {
    405       case 0:
    406          fprintf(ctx->fp, "%3d ", *(uint8_t *)m);
    407          m += 1;
    408          break;
    409       case 1:
    410          fprintf(ctx->fp, "%3d ", *(uint16_t *)m);
    411          m += 2;
    412          break;
    413       case 2:
    414          fprintf(ctx->fp, "%3d ", *(uint32_t *)m);
    415          m += 4;
    416          break;
    417       }
    418    }
    419 
    420    if (m < ib_end)
    421       fprintf(ctx->fp, "...");
    422    fprintf(ctx->fp, "\n");
    423 }
    424 
    425 static void
    426 decode_single_ksp(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
    427 {
    428    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    429 
    430    uint64_t ksp = 0;
    431    bool is_simd8 = false; /* vertex shaders on Gen8+ only */
    432    bool is_enabled = true;
    433 
    434    struct gen_field_iterator iter;
    435    gen_field_iterator_init(&iter, inst, p, 0, false);
    436    do {
    437       if (strcmp(iter.name, "Kernel Start Pointer") == 0) {
    438          ksp = iter.raw_value;
    439       } else if (strcmp(iter.name, "SIMD8 Dispatch Enable") == 0) {
    440          is_simd8 = iter.raw_value;
    441       } else if (strcmp(iter.name, "Dispatch Mode") == 0) {
    442          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
    443       } else if (strcmp(iter.name, "Dispatch Enable") == 0) {
    444          is_simd8 = strcmp(iter.value, "SIMD8") == 0;
    445       } else if (strcmp(iter.name, "Enable") == 0) {
    446          is_enabled = iter.raw_value;
    447       }
    448    } while (gen_field_iterator_next(&iter));
    449 
    450    const char *type =
    451       strcmp(inst->name,   "VS_STATE") == 0 ? "vertex shader" :
    452       strcmp(inst->name,   "GS_STATE") == 0 ? "geometry shader" :
    453       strcmp(inst->name,   "SF_STATE") == 0 ? "strips and fans shader" :
    454       strcmp(inst->name, "CLIP_STATE") == 0 ? "clip shader" :
    455       strcmp(inst->name, "3DSTATE_DS") == 0 ? "tessellation control shader" :
    456       strcmp(inst->name, "3DSTATE_HS") == 0 ? "tessellation evaluation shader" :
    457       strcmp(inst->name, "3DSTATE_VS") == 0 ? (is_simd8 ? "SIMD8 vertex shader" : "vec4 vertex shader") :
    458       strcmp(inst->name, "3DSTATE_GS") == 0 ? (is_simd8 ? "SIMD8 geometry shader" : "vec4 geometry shader") :
    459       NULL;
    460 
    461    if (is_enabled) {
    462       ctx_disassemble_program(ctx, ksp, type);
    463       printf("\n");
    464    }
    465 }
    466 
    467 static void
    468 decode_ps_kernels(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
    469 {
    470    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    471 
    472    uint64_t ksp[3] = {0, 0, 0};
    473    bool enabled[3] = {false, false, false};
    474 
    475    struct gen_field_iterator iter;
    476    gen_field_iterator_init(&iter, inst, p, 0, false);
    477    do {
    478       if (strncmp(iter.name, "Kernel Start Pointer ",
    479                   strlen("Kernel Start Pointer ")) == 0) {
    480          int idx = iter.name[strlen("Kernel Start Pointer ")] - '0';
    481          ksp[idx] = strtol(iter.value, NULL, 16);
    482       } else if (strcmp(iter.name, "8 Pixel Dispatch Enable") == 0) {
    483          enabled[0] = strcmp(iter.value, "true") == 0;
    484       } else if (strcmp(iter.name, "16 Pixel Dispatch Enable") == 0) {
    485          enabled[1] = strcmp(iter.value, "true") == 0;
    486       } else if (strcmp(iter.name, "32 Pixel Dispatch Enable") == 0) {
    487          enabled[2] = strcmp(iter.value, "true") == 0;
    488       }
    489    } while (gen_field_iterator_next(&iter));
    490 
    491    /* Reorder KSPs to be [8, 16, 32] instead of the hardware order. */
    492    if (enabled[0] + enabled[1] + enabled[2] == 1) {
    493       if (enabled[1]) {
    494          ksp[1] = ksp[0];
    495          ksp[0] = 0;
    496       } else if (enabled[2]) {
    497          ksp[2] = ksp[0];
    498          ksp[0] = 0;
    499       }
    500    } else {
    501       uint64_t tmp = ksp[1];
    502       ksp[1] = ksp[2];
    503       ksp[2] = tmp;
    504    }
    505 
    506    if (enabled[0])
    507       ctx_disassemble_program(ctx, ksp[0], "SIMD8 fragment shader");
    508    if (enabled[1])
    509       ctx_disassemble_program(ctx, ksp[1], "SIMD16 fragment shader");
    510    if (enabled[2])
    511       ctx_disassemble_program(ctx, ksp[2], "SIMD32 fragment shader");
    512    fprintf(ctx->fp, "\n");
    513 }
    514 
    515 static void
    516 decode_3dstate_constant(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
    517 {
    518    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    519 
    520    uint32_t read_length[4];
    521    struct gen_batch_decode_bo buffer[4];
    522    memset(buffer, 0, sizeof(buffer));
    523 
    524    int rlidx = 0, bidx = 0;
    525 
    526    struct gen_field_iterator iter;
    527    gen_field_iterator_init(&iter, inst, p, 0, false);
    528    do {
    529       if (strcmp(iter.name, "Read Length") == 0) {
    530          read_length[rlidx++] = iter.raw_value;
    531       } else if (strcmp(iter.name, "Buffer") == 0) {
    532          buffer[bidx++] = ctx_get_bo(ctx, iter.raw_value);
    533       }
    534    } while (gen_field_iterator_next(&iter));
    535 
    536    for (int i = 0; i < 4; i++) {
    537       if (read_length[i] == 0 || buffer[i].map == NULL)
    538          continue;
    539 
    540       unsigned size = read_length[i] * 32;
    541       fprintf(ctx->fp, "constant buffer %d, size %u\n", i, size);
    542 
    543       ctx_print_buffer(ctx, buffer[i], size, 0);
    544    }
    545 }
    546 
    547 static void
    548 decode_3dstate_binding_table_pointers(struct gen_batch_decode_ctx *ctx,
    549                                       const uint32_t *p)
    550 {
    551    dump_binding_table(ctx, p[1], -1);
    552 }
    553 
    554 static void
    555 decode_3dstate_sampler_state_pointers(struct gen_batch_decode_ctx *ctx,
    556                                       const uint32_t *p)
    557 {
    558    dump_samplers(ctx, p[1], -1);
    559 }
    560 
    561 static void
    562 decode_3dstate_sampler_state_pointers_gen6(struct gen_batch_decode_ctx *ctx,
    563                                            const uint32_t *p)
    564 {
    565    dump_samplers(ctx, p[1], -1);
    566    dump_samplers(ctx, p[2], -1);
    567    dump_samplers(ctx, p[3], -1);
    568 }
    569 
    570 static bool
    571 str_ends_with(const char *str, const char *end)
    572 {
    573    int offset = strlen(str) - strlen(end);
    574    if (offset < 0)
    575       return false;
    576 
    577    return strcmp(str + offset, end) == 0;
    578 }
    579 
    580 static void
    581 decode_dynamic_state_pointers(struct gen_batch_decode_ctx *ctx,
    582                               const char *struct_type, const uint32_t *p,
    583                               int count)
    584 {
    585    if (ctx->dynamic_base.map == NULL) {
    586       fprintf(ctx->fp, "  dynamic %s state unavailable\n", struct_type);
    587       return;
    588    }
    589 
    590    struct gen_group *inst = gen_spec_find_instruction(ctx->spec, p);
    591    struct gen_group *state = gen_spec_find_struct(ctx->spec, struct_type);
    592 
    593    uint32_t state_offset;
    594 
    595    struct gen_field_iterator iter;
    596    gen_field_iterator_init(&iter, inst, p, 0, false);
    597    do {
    598       if (str_ends_with(iter.name, "Pointer")) {
    599          state_offset = iter.raw_value;
    600          break;
    601       }
    602    } while (gen_field_iterator_next(&iter));
    603 
    604    uint32_t state_addr = ctx->dynamic_base.addr + state_offset;
    605    const uint32_t *state_map = ctx->dynamic_base.map + state_offset;
    606    for (int i = 0; i < count; i++) {
    607       fprintf(ctx->fp, "%s %d\n", struct_type, i);
    608       ctx_print_group(ctx, state, state_offset, state_map);
    609 
    610       state_addr += state->dw_length * 4;
    611       state_map += state->dw_length;
    612    }
    613 }
    614 
    615 static void
    616 decode_3dstate_viewport_state_pointers_cc(struct gen_batch_decode_ctx *ctx,
    617                                           const uint32_t *p)
    618 {
    619    decode_dynamic_state_pointers(ctx, "CC_VIEWPORT", p, 4);
    620 }
    621 
    622 static void
    623 decode_3dstate_viewport_state_pointers_sf_clip(struct gen_batch_decode_ctx *ctx,
    624                                                const uint32_t *p)
    625 {
    626    decode_dynamic_state_pointers(ctx, "SF_CLIP_VIEWPORT", p, 4);
    627 }
    628 
    629 static void
    630 decode_3dstate_blend_state_pointers(struct gen_batch_decode_ctx *ctx,
    631                                     const uint32_t *p)
    632 {
    633    decode_dynamic_state_pointers(ctx, "BLEND_STATE", p, 1);
    634 }
    635 
    636 static void
    637 decode_3dstate_cc_state_pointers(struct gen_batch_decode_ctx *ctx,
    638                                  const uint32_t *p)
    639 {
    640    decode_dynamic_state_pointers(ctx, "COLOR_CALC_STATE", p, 1);
    641 }
    642 
    643 static void
    644 decode_3dstate_scissor_state_pointers(struct gen_batch_decode_ctx *ctx,
    645                                       const uint32_t *p)
    646 {
    647    decode_dynamic_state_pointers(ctx, "SCISSOR_RECT", p, 1);
    648 }
    649 
    650 static void
    651 decode_load_register_imm(struct gen_batch_decode_ctx *ctx, const uint32_t *p)
    652 {
    653    struct gen_group *reg = gen_spec_find_register(ctx->spec, p[1]);
    654 
    655    if (reg != NULL) {
    656       fprintf(ctx->fp, "register %s (0x%x): 0x%x\n",
    657               reg->name, reg->register_offset, p[2]);
    658       ctx_print_group(ctx, reg, reg->register_offset, &p[2]);
    659    }
    660 }
    661 
    662 struct custom_decoder {
    663    const char *cmd_name;
    664    void (*decode)(struct gen_batch_decode_ctx *ctx, const uint32_t *p);
    665 } custom_decoders[] = {
    666    { "STATE_BASE_ADDRESS", handle_state_base_address },
    667    { "MEDIA_INTERFACE_DESCRIPTOR_LOAD", handle_media_interface_descriptor_load },
    668    { "3DSTATE_VERTEX_BUFFERS", handle_3dstate_vertex_buffers },
    669    { "3DSTATE_INDEX_BUFFER", handle_3dstate_index_buffer },
    670    { "3DSTATE_VS", decode_single_ksp },
    671    { "3DSTATE_GS", decode_single_ksp },
    672    { "3DSTATE_DS", decode_single_ksp },
    673    { "3DSTATE_HS", decode_single_ksp },
    674    { "3DSTATE_PS", decode_ps_kernels },
    675    { "3DSTATE_CONSTANT_VS", decode_3dstate_constant },
    676    { "3DSTATE_CONSTANT_GS", decode_3dstate_constant },
    677    { "3DSTATE_CONSTANT_PS", decode_3dstate_constant },
    678    { "3DSTATE_CONSTANT_HS", decode_3dstate_constant },
    679    { "3DSTATE_CONSTANT_DS", decode_3dstate_constant },
    680 
    681    { "3DSTATE_BINDING_TABLE_POINTERS_VS", decode_3dstate_binding_table_pointers },
    682    { "3DSTATE_BINDING_TABLE_POINTERS_HS", decode_3dstate_binding_table_pointers },
    683    { "3DSTATE_BINDING_TABLE_POINTERS_DS", decode_3dstate_binding_table_pointers },
    684    { "3DSTATE_BINDING_TABLE_POINTERS_GS", decode_3dstate_binding_table_pointers },
    685    { "3DSTATE_BINDING_TABLE_POINTERS_PS", decode_3dstate_binding_table_pointers },
    686 
    687    { "3DSTATE_SAMPLER_STATE_POINTERS_VS", decode_3dstate_sampler_state_pointers },
    688    { "3DSTATE_SAMPLER_STATE_POINTERS_HS", decode_3dstate_sampler_state_pointers },
    689    { "3DSTATE_SAMPLER_STATE_POINTERS_DS", decode_3dstate_sampler_state_pointers },
    690    { "3DSTATE_SAMPLER_STATE_POINTERS_GS", decode_3dstate_sampler_state_pointers },
    691    { "3DSTATE_SAMPLER_STATE_POINTERS_PS", decode_3dstate_sampler_state_pointers },
    692    { "3DSTATE_SAMPLER_STATE_POINTERS", decode_3dstate_sampler_state_pointers_gen6 },
    693 
    694    { "3DSTATE_VIEWPORT_STATE_POINTERS_CC", decode_3dstate_viewport_state_pointers_cc },
    695    { "3DSTATE_VIEWPORT_STATE_POINTERS_SF_CLIP", decode_3dstate_viewport_state_pointers_sf_clip },
    696    { "3DSTATE_BLEND_STATE_POINTERS", decode_3dstate_blend_state_pointers },
    697    { "3DSTATE_CC_STATE_POINTERS", decode_3dstate_cc_state_pointers },
    698    { "3DSTATE_SCISSOR_STATE_POINTERS", decode_3dstate_scissor_state_pointers },
    699    { "MI_LOAD_REGISTER_IMM", decode_load_register_imm }
    700 };
    701 
    702 static inline uint64_t
    703 get_address(struct gen_spec *spec, const uint32_t *p)
    704 {
    705    /* Addresses are always guaranteed to be page-aligned and sometimes
    706     * hardware packets have extra stuff stuffed in the bottom 12 bits.
    707     */
    708    uint64_t addr = p[0] & ~0xfffu;
    709 
    710    if (gen_spec_get_gen(spec) >= gen_make_gen(8,0)) {
    711       /* On Broadwell and above, we have 48-bit addresses which consume two
    712        * dwords.  Some packets require that these get stored in a "canonical
    713        * form" which means that bit 47 is sign-extended through the upper
    714        * bits. In order to correctly handle those aub dumps, we need to mask
    715        * off the top 16 bits.
    716        */
    717       addr |= ((uint64_t)p[1] & 0xffff) << 32;
    718    }
    719 
    720    return addr;
    721 }
    722 
    723 void
    724 gen_print_batch(struct gen_batch_decode_ctx *ctx,
    725                 const uint32_t *batch, uint32_t batch_size,
    726                 uint64_t batch_addr)
    727 {
    728    const uint32_t *p, *end = batch + batch_size;
    729    int length;
    730    struct gen_group *inst;
    731 
    732    for (p = batch; p < end; p += length) {
    733       inst = gen_spec_find_instruction(ctx->spec, p);
    734       length = gen_group_get_length(inst, p);
    735       assert(inst == NULL || length > 0);
    736       length = MAX2(1, length);
    737       if (inst == NULL) {
    738          fprintf(ctx->fp, "unknown instruction %08x\n", p[0]);
    739          continue;
    740       }
    741 
    742       const char *color, *reset_color;
    743       uint64_t offset;
    744 
    745       const char *inst_name = gen_group_get_name(inst);
    746       if (ctx->flags & GEN_BATCH_DECODE_IN_COLOR) {
    747          reset_color = NORMAL;
    748          if (ctx->flags & GEN_BATCH_DECODE_FULL) {
    749             if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0 ||
    750                 strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0)
    751                color = GREEN_HEADER;
    752             else
    753                color = BLUE_HEADER;
    754          } else {
    755             color = NORMAL;
    756          }
    757       } else {
    758          color = "";
    759          reset_color = "";
    760       }
    761 
    762       if (ctx->flags & GEN_BATCH_DECODE_OFFSETS)
    763          offset = batch_addr + ((char *)p - (char *)batch);
    764       else
    765          offset = 0;
    766 
    767       fprintf(ctx->fp, "%s0x%08"PRIx64":  0x%08x:  %-80s%s\n",
    768               color, offset, p[0], inst_name, reset_color);
    769 
    770       if (ctx->flags & GEN_BATCH_DECODE_FULL) {
    771          ctx_print_group(ctx, inst, offset, p);
    772 
    773          for (int i = 0; i < ARRAY_LENGTH(custom_decoders); i++) {
    774             if (strcmp(inst_name, custom_decoders[i].cmd_name) == 0) {
    775                custom_decoders[i].decode(ctx, p);
    776                break;
    777             }
    778          }
    779       }
    780 
    781       if (strcmp(inst_name, "MI_BATCH_BUFFER_START") == 0) {
    782          struct gen_batch_decode_bo next_batch;
    783          bool second_level;
    784          struct gen_field_iterator iter;
    785          gen_field_iterator_init(&iter, inst, p, 0, false);
    786          do {
    787             if (strcmp(iter.name, "Batch Buffer Start Address") == 0) {
    788                next_batch = ctx_get_bo(ctx, iter.raw_value);
    789             } else if (strcmp(iter.name, "Second Level Batch Buffer") == 0) {
    790                second_level = iter.raw_value;
    791             }
    792          } while (gen_field_iterator_next(&iter));
    793 
    794          if (next_batch.map == NULL) {
    795             fprintf(ctx->fp, "Secondary batch at 0x%08"PRIx64" unavailable",
    796                     next_batch.addr);
    797          }
    798 
    799          if (second_level) {
    800             /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" set acts
    801              * like a subroutine call.  Commands that come afterwards get
    802              * processed once the 2nd level batch buffer returns with
    803              * MI_BATCH_BUFFER_END.
    804              */
    805             if (next_batch.map) {
    806                gen_print_batch(ctx, next_batch.map, next_batch.size,
    807                                next_batch.addr);
    808             }
    809          } else {
    810             /* MI_BATCH_BUFFER_START with "2nd Level Batch Buffer" unset acts
    811              * like a goto.  Nothing after it will ever get processed.  In
    812              * order to prevent the recursion from growing, we just reset the
    813              * loop and continue;
    814              */
    815             if (next_batch.map) {
    816                p = next_batch.map;
    817                end = next_batch.map + next_batch.size;
    818                length = 0;
    819                continue;
    820             } else {
    821                /* Nothing we can do */
    822                break;
    823             }
    824          }
    825       } else if (strcmp(inst_name, "MI_BATCH_BUFFER_END") == 0) {
    826          break;
    827       }
    828    }
    829 }
    830