Home | History | Annotate | Download | only in ilo
      1 /*
      2  * Mesa 3-D graphics library
      3  *
      4  * Copyright (C) 2012-2013 LunarG, Inc.
      5  *
      6  * Permission is hereby granted, free of charge, to any person obtaining a
      7  * copy of this software and associated documentation files (the "Software"),
      8  * to deal in the Software without restriction, including without limitation
      9  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
     10  * and/or sell copies of the Software, and to permit persons to whom the
     11  * Software is furnished to do so, subject to the following conditions:
     12  *
     13  * The above copyright notice and this permission notice shall be included
     14  * in all copies or substantial portions of the Software.
     15  *
     16  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     17  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     18  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     19  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     20  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     21  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     22  * DEALINGS IN THE SOFTWARE.
     23  *
     24  * Authors:
     25  *    Chia-I Wu <olv (at) lunarg.com>
     26  */
     27 
     28 #include "genhw/genhw.h" /* for SBE setup */
     29 #include "core/ilo_builder.h"
     30 #include "core/intel_winsys.h"
     31 #include "shader/ilo_shader_internal.h"
     32 #include "tgsi/tgsi_parse.h"
     33 
     34 #include "ilo_state.h"
     35 #include "ilo_shader.h"
     36 
     37 struct ilo_shader_cache {
     38    struct list_head shaders;
     39    struct list_head changed;
     40 
     41    int max_vs_scratch_size;
     42    int max_gs_scratch_size;
     43    int max_fs_scratch_size;
     44 };
     45 
     46 /**
     47  * Create a shader cache.  A shader cache can manage shaders and upload them
     48  * to a bo as a whole.
     49  */
     50 struct ilo_shader_cache *
     51 ilo_shader_cache_create(void)
     52 {
     53    struct ilo_shader_cache *shc;
     54 
     55    shc = CALLOC_STRUCT(ilo_shader_cache);
     56    if (!shc)
     57       return NULL;
     58 
     59    list_inithead(&shc->shaders);
     60    list_inithead(&shc->changed);
     61 
     62    return shc;
     63 }
     64 
     65 /**
     66  * Destroy a shader cache.
     67  */
     68 void
     69 ilo_shader_cache_destroy(struct ilo_shader_cache *shc)
     70 {
     71    FREE(shc);
     72 }
     73 
     74 /**
     75  * Add a shader to the cache.
     76  */
     77 void
     78 ilo_shader_cache_add(struct ilo_shader_cache *shc,
     79                      struct ilo_shader_state *shader)
     80 {
     81    struct ilo_shader *sh;
     82 
     83    shader->cache = shc;
     84    LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
     85       sh->uploaded = false;
     86 
     87    list_add(&shader->list, &shc->changed);
     88 }
     89 
     90 /**
     91  * Remove a shader from the cache.
     92  */
     93 void
     94 ilo_shader_cache_remove(struct ilo_shader_cache *shc,
     95                         struct ilo_shader_state *shader)
     96 {
     97    list_del(&shader->list);
     98    shader->cache = NULL;
     99 }
    100 
    101 /**
    102  * Notify the cache that a managed shader has changed.
    103  */
    104 static void
    105 ilo_shader_cache_notify_change(struct ilo_shader_cache *shc,
    106                                struct ilo_shader_state *shader)
    107 {
    108    if (shader->cache == shc) {
    109       list_del(&shader->list);
    110       list_add(&shader->list, &shc->changed);
    111    }
    112 }
    113 
    114 /**
    115  * Upload managed shaders to the bo.  Only shaders that are changed or added
    116  * after the last upload are uploaded.
    117  */
    118 void
    119 ilo_shader_cache_upload(struct ilo_shader_cache *shc,
    120                         struct ilo_builder *builder)
    121 {
    122    struct ilo_shader_state *shader, *next;
    123 
    124    LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->changed, list) {
    125       struct ilo_shader *sh;
    126 
    127       LIST_FOR_EACH_ENTRY(sh, &shader->variants, list) {
    128          int scratch_size, *cur_max;
    129 
    130          if (sh->uploaded)
    131             continue;
    132 
    133          sh->cache_offset = ilo_builder_instruction_write(builder,
    134                sh->kernel_size, sh->kernel);
    135 
    136          sh->uploaded = true;
    137 
    138          switch (shader->info.type) {
    139          case PIPE_SHADER_VERTEX:
    140             scratch_size = ilo_state_vs_get_scratch_size(&sh->cso.vs);
    141             cur_max = &shc->max_vs_scratch_size;
    142             break;
    143          case PIPE_SHADER_GEOMETRY:
    144             scratch_size = ilo_state_gs_get_scratch_size(&sh->cso.gs);
    145             cur_max = &shc->max_gs_scratch_size;
    146             break;
    147          case PIPE_SHADER_FRAGMENT:
    148             scratch_size = ilo_state_ps_get_scratch_size(&sh->cso.ps);
    149             cur_max = &shc->max_fs_scratch_size;
    150             break;
    151          default:
    152             assert(!"unknown shader type");
    153             scratch_size = 0;
    154             cur_max = &shc->max_vs_scratch_size;
    155             break;
    156          }
    157 
    158          if (*cur_max < scratch_size)
    159             *cur_max = scratch_size;
    160       }
    161 
    162       list_del(&shader->list);
    163       list_add(&shader->list, &shc->shaders);
    164    }
    165 }
    166 
    167 /**
    168  * Invalidate all shaders so that they get uploaded in next
    169  * ilo_shader_cache_upload().
    170  */
    171 void
    172 ilo_shader_cache_invalidate(struct ilo_shader_cache *shc)
    173 {
    174    struct ilo_shader_state *shader, *next;
    175 
    176    LIST_FOR_EACH_ENTRY_SAFE(shader, next, &shc->shaders, list) {
    177       list_del(&shader->list);
    178       list_add(&shader->list, &shc->changed);
    179    }
    180 
    181    LIST_FOR_EACH_ENTRY(shader, &shc->changed, list) {
    182       struct ilo_shader *sh;
    183 
    184       LIST_FOR_EACH_ENTRY(sh, &shader->variants, list)
    185          sh->uploaded = false;
    186    }
    187 
    188    shc->max_vs_scratch_size = 0;
    189    shc->max_gs_scratch_size = 0;
    190    shc->max_fs_scratch_size = 0;
    191 }
    192 
    193 void
    194 ilo_shader_cache_get_max_scratch_sizes(const struct ilo_shader_cache *shc,
    195                                        int *vs_scratch_size,
    196                                        int *gs_scratch_size,
    197                                        int *fs_scratch_size)
    198 {
    199    *vs_scratch_size = shc->max_vs_scratch_size;
    200    *gs_scratch_size = shc->max_gs_scratch_size;
    201    *fs_scratch_size = shc->max_fs_scratch_size;
    202 }
    203 
    204 /**
    205  * Initialize a shader variant.
    206  */
    207 void
    208 ilo_shader_variant_init(struct ilo_shader_variant *variant,
    209                         const struct ilo_shader_info *info,
    210                         const struct ilo_state_vector *vec)
    211 {
    212    int num_views, i;
    213 
    214    memset(variant, 0, sizeof(*variant));
    215 
    216    switch (info->type) {
    217    case PIPE_SHADER_VERTEX:
    218       variant->u.vs.rasterizer_discard =
    219          vec->rasterizer->state.rasterizer_discard;
    220       variant->u.vs.num_ucps =
    221          util_last_bit(vec->rasterizer->state.clip_plane_enable);
    222       break;
    223    case PIPE_SHADER_GEOMETRY:
    224       variant->u.gs.rasterizer_discard =
    225          vec->rasterizer->state.rasterizer_discard;
    226       variant->u.gs.num_inputs = vec->vs->shader->out.count;
    227       for (i = 0; i < vec->vs->shader->out.count; i++) {
    228          variant->u.gs.semantic_names[i] =
    229             vec->vs->shader->out.semantic_names[i];
    230          variant->u.gs.semantic_indices[i] =
    231             vec->vs->shader->out.semantic_indices[i];
    232       }
    233       break;
    234    case PIPE_SHADER_FRAGMENT:
    235       variant->u.fs.flatshade =
    236          (info->has_color_interp && vec->rasterizer->state.flatshade);
    237       variant->u.fs.fb_height = (info->has_pos) ?
    238          vec->fb.state.height : 1;
    239       variant->u.fs.num_cbufs = vec->fb.state.nr_cbufs;
    240       break;
    241    default:
    242       assert(!"unknown shader type");
    243       break;
    244    }
    245 
    246    /* use PCB unless constant buffer 0 is not in user buffer  */
    247    if ((vec->cbuf[info->type].enabled_mask & 0x1) &&
    248        !vec->cbuf[info->type].cso[0].user_buffer)
    249       variant->use_pcb = false;
    250    else
    251       variant->use_pcb = true;
    252 
    253    num_views = vec->view[info->type].count;
    254    assert(info->num_samplers <= num_views);
    255 
    256    variant->num_sampler_views = info->num_samplers;
    257    for (i = 0; i < info->num_samplers; i++) {
    258       const struct pipe_sampler_view *view = vec->view[info->type].states[i];
    259       const struct ilo_sampler_cso *sampler = vec->sampler[info->type].cso[i];
    260 
    261       if (view) {
    262          variant->sampler_view_swizzles[i].r = view->swizzle_r;
    263          variant->sampler_view_swizzles[i].g = view->swizzle_g;
    264          variant->sampler_view_swizzles[i].b = view->swizzle_b;
    265          variant->sampler_view_swizzles[i].a = view->swizzle_a;
    266       }
    267       else if (info->shadow_samplers & (1 << i)) {
    268          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_X;
    269          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_X;
    270          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_X;
    271          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_1;
    272       }
    273       else {
    274          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_X;
    275          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_Y;
    276          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_Z;
    277          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_W;
    278       }
    279 
    280       /*
    281        * When non-nearest filter and PIPE_TEX_WRAP_CLAMP wrap mode is used,
    282        * the HW wrap mode is set to GEN6_TEXCOORDMODE_CLAMP_BORDER, and we
    283        * need to manually saturate the texture coordinates.
    284        */
    285       if (sampler) {
    286          variant->saturate_tex_coords[0] |= sampler->saturate_s << i;
    287          variant->saturate_tex_coords[1] |= sampler->saturate_t << i;
    288          variant->saturate_tex_coords[2] |= sampler->saturate_r << i;
    289       }
    290    }
    291 }
    292 
    293 /**
    294  * Guess the shader variant, knowing that the context may still change.
    295  */
    296 static void
    297 ilo_shader_variant_guess(struct ilo_shader_variant *variant,
    298                          const struct ilo_shader_info *info,
    299                          const struct ilo_state_vector *vec)
    300 {
    301    int i;
    302 
    303    memset(variant, 0, sizeof(*variant));
    304 
    305    switch (info->type) {
    306    case PIPE_SHADER_VERTEX:
    307       break;
    308    case PIPE_SHADER_GEOMETRY:
    309       break;
    310    case PIPE_SHADER_FRAGMENT:
    311       variant->u.fs.flatshade = false;
    312       variant->u.fs.fb_height = (info->has_pos) ?
    313          vec->fb.state.height : 1;
    314       variant->u.fs.num_cbufs = 1;
    315       break;
    316    default:
    317       assert(!"unknown shader type");
    318       break;
    319    }
    320 
    321    variant->use_pcb = true;
    322 
    323    variant->num_sampler_views = info->num_samplers;
    324    for (i = 0; i < info->num_samplers; i++) {
    325       if (info->shadow_samplers & (1 << i)) {
    326          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_X;
    327          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_X;
    328          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_X;
    329          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_1;
    330       }
    331       else {
    332          variant->sampler_view_swizzles[i].r = PIPE_SWIZZLE_X;
    333          variant->sampler_view_swizzles[i].g = PIPE_SWIZZLE_Y;
    334          variant->sampler_view_swizzles[i].b = PIPE_SWIZZLE_Z;
    335          variant->sampler_view_swizzles[i].a = PIPE_SWIZZLE_W;
    336       }
    337    }
    338 }
    339 
    340 
    341 /**
    342  * Parse a TGSI instruction for the shader info.
    343  */
    344 static void
    345 ilo_shader_info_parse_inst(struct ilo_shader_info *info,
    346                            const struct tgsi_full_instruction *inst)
    347 {
    348    int i;
    349 
    350    /* look for edgeflag passthrough */
    351    if (info->edgeflag_out >= 0 &&
    352        inst->Instruction.Opcode == TGSI_OPCODE_MOV &&
    353        inst->Dst[0].Register.File == TGSI_FILE_OUTPUT &&
    354        inst->Dst[0].Register.Index == info->edgeflag_out) {
    355 
    356       assert(inst->Src[0].Register.File == TGSI_FILE_INPUT);
    357       info->edgeflag_in = inst->Src[0].Register.Index;
    358    }
    359 
    360    if (inst->Instruction.Texture) {
    361       bool shadow;
    362 
    363       switch (inst->Texture.Texture) {
    364       case TGSI_TEXTURE_SHADOW1D:
    365       case TGSI_TEXTURE_SHADOW2D:
    366       case TGSI_TEXTURE_SHADOWRECT:
    367       case TGSI_TEXTURE_SHADOW1D_ARRAY:
    368       case TGSI_TEXTURE_SHADOW2D_ARRAY:
    369       case TGSI_TEXTURE_SHADOWCUBE:
    370       case TGSI_TEXTURE_SHADOWCUBE_ARRAY:
    371          shadow = true;
    372          break;
    373       default:
    374          shadow = false;
    375          break;
    376       }
    377 
    378       for (i = 0; i < inst->Instruction.NumSrcRegs; i++) {
    379          const struct tgsi_full_src_register *src = &inst->Src[i];
    380 
    381          if (src->Register.File == TGSI_FILE_SAMPLER) {
    382             const int idx = src->Register.Index;
    383 
    384             if (idx >= info->num_samplers)
    385                info->num_samplers = idx + 1;
    386 
    387             if (shadow)
    388                info->shadow_samplers |= 1 << idx;
    389          }
    390       }
    391    }
    392 }
    393 
    394 /**
    395  * Parse a TGSI property for the shader info.
    396  */
    397 static void
    398 ilo_shader_info_parse_prop(struct ilo_shader_info *info,
    399                            const struct tgsi_full_property *prop)
    400 {
    401    switch (prop->Property.PropertyName) {
    402    case TGSI_PROPERTY_FS_COLOR0_WRITES_ALL_CBUFS:
    403       info->fs_color0_writes_all_cbufs = prop->u[0].Data;
    404       break;
    405    default:
    406       break;
    407    }
    408 }
    409 
    410 /**
    411  * Parse a TGSI declaration for the shader info.
    412  */
    413 static void
    414 ilo_shader_info_parse_decl(struct ilo_shader_info *info,
    415                            const struct tgsi_full_declaration *decl)
    416 {
    417    switch (decl->Declaration.File) {
    418    case TGSI_FILE_INPUT:
    419       if (decl->Declaration.Interpolate &&
    420           decl->Interp.Interpolate == TGSI_INTERPOLATE_COLOR)
    421          info->has_color_interp = true;
    422       if (decl->Declaration.Semantic &&
    423           decl->Semantic.Name == TGSI_SEMANTIC_POSITION)
    424          info->has_pos = true;
    425       break;
    426    case TGSI_FILE_OUTPUT:
    427       if (decl->Declaration.Semantic &&
    428           decl->Semantic.Name == TGSI_SEMANTIC_EDGEFLAG)
    429          info->edgeflag_out = decl->Range.First;
    430       break;
    431    case TGSI_FILE_CONSTANT:
    432       {
    433          const int idx = (decl->Declaration.Dimension) ?
    434             decl->Dim.Index2D : 0;
    435          if (info->constant_buffer_count <= idx)
    436             info->constant_buffer_count = idx + 1;
    437       }
    438       break;
    439    case TGSI_FILE_SYSTEM_VALUE:
    440       if (decl->Declaration.Semantic &&
    441           decl->Semantic.Name == TGSI_SEMANTIC_INSTANCEID)
    442          info->has_instanceid = true;
    443       if (decl->Declaration.Semantic &&
    444           decl->Semantic.Name == TGSI_SEMANTIC_VERTEXID)
    445          info->has_vertexid = true;
    446       break;
    447    default:
    448       break;
    449    }
    450 }
    451 
    452 static void
    453 ilo_shader_info_parse_tokens(struct ilo_shader_info *info)
    454 {
    455    struct tgsi_parse_context parse;
    456 
    457    info->edgeflag_in = -1;
    458    info->edgeflag_out = -1;
    459 
    460    tgsi_parse_init(&parse, info->tokens);
    461    while (!tgsi_parse_end_of_tokens(&parse)) {
    462       const union tgsi_full_token *token;
    463 
    464       tgsi_parse_token(&parse);
    465       token = &parse.FullToken;
    466 
    467       switch (token->Token.Type) {
    468       case TGSI_TOKEN_TYPE_DECLARATION:
    469          ilo_shader_info_parse_decl(info, &token->FullDeclaration);
    470          break;
    471       case TGSI_TOKEN_TYPE_INSTRUCTION:
    472          ilo_shader_info_parse_inst(info, &token->FullInstruction);
    473          break;
    474       case TGSI_TOKEN_TYPE_PROPERTY:
    475          ilo_shader_info_parse_prop(info, &token->FullProperty);
    476          break;
    477       default:
    478          break;
    479       }
    480    }
    481    tgsi_parse_free(&parse);
    482 }
    483 
    484 /**
    485  * Create a shader state.
    486  */
    487 static struct ilo_shader_state *
    488 ilo_shader_state_create(const struct ilo_dev *dev,
    489                         const struct ilo_state_vector *vec,
    490                         int type, const void *templ)
    491 {
    492    struct ilo_shader_state *state;
    493    struct ilo_shader_variant variant;
    494 
    495    state = CALLOC_STRUCT(ilo_shader_state);
    496    if (!state)
    497       return NULL;
    498 
    499    state->info.dev = dev;
    500    state->info.type = type;
    501 
    502    if (type == PIPE_SHADER_COMPUTE) {
    503       const struct pipe_compute_state *c =
    504          (const struct pipe_compute_state *) templ;
    505 
    506       state->info.tokens = tgsi_dup_tokens(c->prog);
    507       state->info.compute.req_local_mem = c->req_local_mem;
    508       state->info.compute.req_private_mem = c->req_private_mem;
    509       state->info.compute.req_input_mem = c->req_input_mem;
    510    }
    511    else {
    512       const struct pipe_shader_state *s =
    513          (const struct pipe_shader_state *) templ;
    514 
    515       state->info.tokens = tgsi_dup_tokens(s->tokens);
    516       state->info.stream_output = s->stream_output;
    517    }
    518 
    519    list_inithead(&state->variants);
    520 
    521    ilo_shader_info_parse_tokens(&state->info);
    522 
    523    /* guess and compile now */
    524    ilo_shader_variant_guess(&variant, &state->info, vec);
    525    if (!ilo_shader_state_use_variant(state, &variant)) {
    526       ilo_shader_destroy(state);
    527       return NULL;
    528    }
    529 
    530    return state;
    531 }
    532 
    533 /**
    534  * Add a compiled shader to the shader state.
    535  */
    536 static void
    537 ilo_shader_state_add_shader(struct ilo_shader_state *state,
    538                             struct ilo_shader *sh)
    539 {
    540    list_add(&sh->list, &state->variants);
    541    state->num_variants++;
    542    state->total_size += sh->kernel_size;
    543 
    544    if (state->cache)
    545       ilo_shader_cache_notify_change(state->cache, state);
    546 }
    547 
    548 /**
    549  * Remove a compiled shader from the shader state.
    550  */
    551 static void
    552 ilo_shader_state_remove_shader(struct ilo_shader_state *state,
    553                                struct ilo_shader *sh)
    554 {
    555    list_del(&sh->list);
    556    state->num_variants--;
    557    state->total_size -= sh->kernel_size;
    558 }
    559 
    560 /**
    561  * Garbage collect shader variants in the shader state.
    562  */
    563 static void
    564 ilo_shader_state_gc(struct ilo_shader_state *state)
    565 {
    566    /* activate when the variants take up more than 4KiB of space */
    567    const int limit = 4 * 1024;
    568    struct ilo_shader *sh, *next;
    569 
    570    if (state->total_size < limit)
    571       return;
    572 
    573    /* remove from the tail as the most recently ones are at the head */
    574    LIST_FOR_EACH_ENTRY_SAFE_REV(sh, next, &state->variants, list) {
    575       ilo_shader_state_remove_shader(state, sh);
    576       ilo_shader_destroy_kernel(sh);
    577 
    578       if (state->total_size <= limit / 2)
    579          break;
    580    }
    581 }
    582 
    583 /**
    584  * Search for a shader variant.
    585  */
    586 static struct ilo_shader *
    587 ilo_shader_state_search_variant(struct ilo_shader_state *state,
    588                                 const struct ilo_shader_variant *variant)
    589 {
    590    struct ilo_shader *sh = NULL, *tmp;
    591 
    592    LIST_FOR_EACH_ENTRY(tmp, &state->variants, list) {
    593       if (memcmp(&tmp->variant, variant, sizeof(*variant)) == 0) {
    594          sh = tmp;
    595          break;
    596       }
    597    }
    598 
    599    return sh;
    600 }
    601 
    602 static void
    603 init_shader_urb(const struct ilo_shader *kernel,
    604                 const struct ilo_shader_state *state,
    605                 struct ilo_state_shader_urb_info *urb)
    606 {
    607    urb->cv_input_attr_count = kernel->in.count;
    608    urb->read_base = 0;
    609    urb->read_count = kernel->in.count;
    610 
    611    urb->output_attr_count = kernel->out.count;
    612    urb->user_cull_enables = 0x0;
    613    urb->user_clip_enables = 0x0;
    614 }
    615 
    616 static void
    617 init_shader_kernel(const struct ilo_shader *kernel,
    618                    const struct ilo_shader_state *state,
    619                    struct ilo_state_shader_kernel_info *kern)
    620 {
    621    kern->offset = 0;
    622    kern->grf_start = kernel->in.start_grf;
    623    kern->pcb_attr_count =
    624       (kernel->pcb.cbuf0_size + kernel->pcb.clip_state_size + 15) / 16;
    625 }
    626 
    627 static void
    628 init_shader_resource(const struct ilo_shader *kernel,
    629                      const struct ilo_shader_state *state,
    630                      struct ilo_state_shader_resource_info *resource)
    631 {
    632    resource->sampler_count = state->info.num_samplers;
    633    resource->surface_count = 0;
    634    resource->has_uav = false;
    635 }
    636 
    637 static void
    638 init_vs(struct ilo_shader *kernel,
    639         const struct ilo_shader_state *state)
    640 {
    641    struct ilo_state_vs_info info;
    642 
    643    memset(&info, 0, sizeof(info));
    644 
    645    init_shader_urb(kernel, state, &info.urb);
    646    init_shader_kernel(kernel, state, &info.kernel);
    647    init_shader_resource(kernel, state, &info.resource);
    648    info.per_thread_scratch_size = kernel->per_thread_scratch_size;
    649    info.dispatch_enable = true;
    650    info.stats_enable = true;
    651 
    652    if (ilo_dev_gen(state->info.dev) == ILO_GEN(6) && kernel->stream_output) {
    653       struct ilo_state_gs_info gs_info;
    654 
    655       memset(&gs_info, 0, sizeof(gs_info));
    656 
    657       gs_info.urb.cv_input_attr_count = kernel->out.count;
    658       gs_info.urb.read_count = kernel->out.count;
    659       gs_info.kernel.grf_start = kernel->gs_start_grf;
    660       gs_info.sol.sol_enable = true;
    661       gs_info.sol.stats_enable = true;
    662       gs_info.sol.render_disable = kernel->variant.u.vs.rasterizer_discard;
    663       gs_info.sol.svbi_post_inc = kernel->svbi_post_inc;
    664       gs_info.sol.tristrip_reorder = GEN7_REORDER_LEADING;
    665       gs_info.dispatch_enable = true;
    666       gs_info.stats_enable = true;
    667 
    668       ilo_state_vs_init(&kernel->cso.vs_sol.vs, state->info.dev, &info);
    669       ilo_state_gs_init(&kernel->cso.vs_sol.sol, state->info.dev, &gs_info);
    670    } else {
    671       ilo_state_vs_init(&kernel->cso.vs, state->info.dev, &info);
    672    }
    673 }
    674 
    675 static void
    676 init_gs(struct ilo_shader *kernel,
    677         const struct ilo_shader_state *state)
    678 {
    679    const struct pipe_stream_output_info *so_info = &state->info.stream_output;
    680    struct ilo_state_gs_info info;
    681 
    682    memset(&info, 0, sizeof(info));
    683 
    684    init_shader_urb(kernel, state, &info.urb);
    685    init_shader_kernel(kernel, state, &info.kernel);
    686    init_shader_resource(kernel, state, &info.resource);
    687    info.per_thread_scratch_size = kernel->per_thread_scratch_size;
    688    info.dispatch_enable = true;
    689    info.stats_enable = true;
    690 
    691    if (so_info->num_outputs > 0) {
    692       info.sol.sol_enable = true;
    693       info.sol.stats_enable = true;
    694       info.sol.render_disable = kernel->variant.u.gs.rasterizer_discard;
    695       info.sol.tristrip_reorder = GEN7_REORDER_LEADING;
    696    }
    697 
    698    ilo_state_gs_init(&kernel->cso.gs, state->info.dev, &info);
    699 }
    700 
    701 static void
    702 init_ps(struct ilo_shader *kernel,
    703         const struct ilo_shader_state *state)
    704 {
    705    struct ilo_state_ps_info info;
    706 
    707    memset(&info, 0, sizeof(info));
    708 
    709    init_shader_kernel(kernel, state, &info.kernel_8);
    710    init_shader_resource(kernel, state, &info.resource);
    711 
    712    info.per_thread_scratch_size = kernel->per_thread_scratch_size;
    713    info.io.has_rt_write = true;
    714    info.io.posoffset = GEN6_POSOFFSET_NONE;
    715    info.io.attr_count = kernel->in.count;
    716    info.io.use_z = kernel->in.has_pos;
    717    info.io.use_w = kernel->in.has_pos;
    718    info.io.use_coverage_mask = false;
    719    info.io.pscdepth = (kernel->out.has_pos) ?
    720       GEN7_PSCDEPTH_ON : GEN7_PSCDEPTH_OFF;
    721    info.io.write_pixel_mask = kernel->has_kill;
    722    info.io.write_omask = false;
    723 
    724    info.params.sample_mask = 0x1;
    725    info.params.earlyz_control_psexec = false;
    726    info.params.alpha_may_kill = false;
    727    info.params.dual_source_blending = false;
    728    info.params.has_writeable_rt = true;
    729 
    730    info.valid_kernels = GEN6_PS_DISPATCH_8;
    731 
    732    /*
    733     * From the Sandy Bridge PRM, volume 2 part 1, page 284:
    734     *
    735     *     "(MSDISPMODE_PERSAMPLE) This is the high-quality multisample mode
    736     *      where (over and above PERPIXEL mode) the PS is run for each covered
    737     *      sample. This mode is also used for "normal" non-multisample
    738     *      rendering (aka 1X), given Number of Multisamples is programmed to
    739     *      NUMSAMPLES_1."
    740     */
    741    info.per_sample_dispatch = true;
    742 
    743    info.rt_clear_enable = false;
    744    info.rt_resolve_enable = false;
    745    info.cv_per_sample_interp = false;
    746    info.cv_has_earlyz_op = false;
    747    info.sample_count_one = true;
    748    info.cv_has_depth_buffer = true;
    749 
    750    ilo_state_ps_init(&kernel->cso.ps, state->info.dev, &info);
    751 
    752    /* remember current parameters */
    753    kernel->ps_params = info.params;
    754 }
    755 
    756 static void
    757 init_sol(struct ilo_shader *kernel,
    758          const struct ilo_dev *dev,
    759          const struct pipe_stream_output_info *so_info,
    760          bool rasterizer_discard)
    761 {
    762    struct ilo_state_sol_decl_info decls[4][PIPE_MAX_SO_OUTPUTS];
    763    unsigned buf_offsets[PIPE_MAX_SO_BUFFERS];
    764    struct ilo_state_sol_info info;
    765    unsigned i;
    766 
    767    if (!so_info->num_outputs) {
    768       ilo_state_sol_init_disabled(&kernel->sol, dev, rasterizer_discard);
    769       return;
    770    }
    771 
    772    memset(&info, 0, sizeof(info));
    773    info.data = kernel->sol_data;
    774    info.data_size = sizeof(kernel->sol_data);
    775    info.sol_enable = true;
    776    info.stats_enable = true;
    777    info.tristrip_reorder = GEN7_REORDER_TRAILING;
    778    info.render_disable = rasterizer_discard;
    779    info.render_stream = 0;
    780 
    781    for (i = 0; i < 4; i++) {
    782       info.buffer_strides[i] = so_info->stride[i] * 4;
    783 
    784       info.streams[i].cv_vue_attr_count = kernel->out.count;
    785       info.streams[i].decls = decls[i];
    786    }
    787 
    788    memset(decls, 0, sizeof(decls));
    789    memset(buf_offsets, 0, sizeof(buf_offsets));
    790    for (i = 0; i < so_info->num_outputs; i++) {
    791       const unsigned stream = so_info->output[i].stream;
    792       const unsigned buffer = so_info->output[i].output_buffer;
    793       struct ilo_state_sol_decl_info *decl;
    794       unsigned attr;
    795 
    796       /* figure out which attribute is sourced */
    797       for (attr = 0; attr < kernel->out.count; attr++) {
    798          const int reg_idx = kernel->out.register_indices[attr];
    799          if (reg_idx == so_info->output[i].register_index)
    800             break;
    801       }
    802       if (attr >= kernel->out.count) {
    803          assert(!"stream output an undefined register");
    804          attr = 0;
    805       }
    806 
    807       if (info.streams[stream].vue_read_count < attr + 1)
    808          info.streams[stream].vue_read_count = attr + 1;
    809 
    810       /* pad with holes first */
    811       while (buf_offsets[buffer] < so_info->output[i].dst_offset) {
    812          int num_dwords;
    813 
    814          num_dwords = so_info->output[i].dst_offset - buf_offsets[buffer];
    815          if (num_dwords > 4)
    816             num_dwords = 4;
    817 
    818          assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
    819          decl = &decls[stream][info.streams[stream].decl_count];
    820 
    821          decl->attr = 0;
    822          decl->is_hole = true;
    823          decl->component_base = 0;
    824          decl->component_count = num_dwords;
    825          decl->buffer = buffer;
    826 
    827          info.streams[stream].decl_count++;
    828          buf_offsets[buffer] += num_dwords;
    829       }
    830       assert(buf_offsets[buffer] == so_info->output[i].dst_offset);
    831 
    832       assert(info.streams[stream].decl_count < ARRAY_SIZE(decls[stream]));
    833       decl = &decls[stream][info.streams[stream].decl_count];
    834 
    835       decl->attr = attr;
    836       decl->is_hole = false;
    837       /* PSIZE is at W channel */
    838       if (kernel->out.semantic_names[attr] == TGSI_SEMANTIC_PSIZE) {
    839          assert(so_info->output[i].start_component == 0);
    840          assert(so_info->output[i].num_components == 1);
    841          decl->component_base = 3;
    842          decl->component_count = 1;
    843       } else {
    844          decl->component_base = so_info->output[i].start_component;
    845          decl->component_count = so_info->output[i].num_components;
    846       }
    847       decl->buffer = buffer;
    848 
    849       info.streams[stream].decl_count++;
    850       buf_offsets[buffer] += so_info->output[i].num_components;
    851    }
    852 
    853    ilo_state_sol_init(&kernel->sol, dev, &info);
    854 }
    855 
    856 /**
    857  * Add a shader variant to the shader state.
    858  */
    859 static struct ilo_shader *
    860 ilo_shader_state_add_variant(struct ilo_shader_state *state,
    861                              const struct ilo_shader_variant *variant)
    862 {
    863    bool rasterizer_discard = false;
    864    struct ilo_shader *sh;
    865 
    866    switch (state->info.type) {
    867    case PIPE_SHADER_VERTEX:
    868       sh = ilo_shader_compile_vs(state, variant);
    869       rasterizer_discard = variant->u.vs.rasterizer_discard;
    870       break;
    871    case PIPE_SHADER_FRAGMENT:
    872       sh = ilo_shader_compile_fs(state, variant);
    873       break;
    874    case PIPE_SHADER_GEOMETRY:
    875       sh = ilo_shader_compile_gs(state, variant);
    876       rasterizer_discard = variant->u.gs.rasterizer_discard;
    877       break;
    878    case PIPE_SHADER_COMPUTE:
    879       sh = ilo_shader_compile_cs(state, variant);
    880       break;
    881    default:
    882       sh = NULL;
    883       break;
    884    }
    885    if (!sh) {
    886       assert(!"failed to compile shader");
    887       return NULL;
    888    }
    889 
    890    sh->variant = *variant;
    891 
    892    init_sol(sh, state->info.dev, &state->info.stream_output,
    893          rasterizer_discard);
    894 
    895    ilo_shader_state_add_shader(state, sh);
    896 
    897    return sh;
    898 }
    899 
    900 /**
    901  * Update state->shader to point to a variant.  If the variant does not exist,
    902  * it will be added first.
    903  */
    904 bool
    905 ilo_shader_state_use_variant(struct ilo_shader_state *state,
    906                              const struct ilo_shader_variant *variant)
    907 {
    908    struct ilo_shader *sh;
    909    bool construct_cso = false;
    910 
    911    sh = ilo_shader_state_search_variant(state, variant);
    912    if (!sh) {
    913       ilo_shader_state_gc(state);
    914 
    915       sh = ilo_shader_state_add_variant(state, variant);
    916       if (!sh)
    917          return false;
    918 
    919       construct_cso = true;
    920    }
    921 
    922    /* move to head */
    923    if (state->variants.next != &sh->list) {
    924       list_del(&sh->list);
    925       list_add(&sh->list, &state->variants);
    926    }
    927 
    928    state->shader = sh;
    929 
    930    if (construct_cso) {
    931       switch (state->info.type) {
    932       case PIPE_SHADER_VERTEX:
    933          init_vs(sh, state);
    934          break;
    935       case PIPE_SHADER_GEOMETRY:
    936          init_gs(sh, state);
    937          break;
    938       case PIPE_SHADER_FRAGMENT:
    939          init_ps(sh, state);
    940          break;
    941       default:
    942          break;
    943       }
    944    }
    945 
    946    return true;
    947 }
    948 
    949 struct ilo_shader_state *
    950 ilo_shader_create_vs(const struct ilo_dev *dev,
    951                      const struct pipe_shader_state *state,
    952                      const struct ilo_state_vector *precompile)
    953 {
    954    struct ilo_shader_state *shader;
    955 
    956    shader = ilo_shader_state_create(dev, precompile,
    957          PIPE_SHADER_VERTEX, state);
    958 
    959    /* states used in ilo_shader_variant_init() */
    960    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_VS |
    961                                         ILO_DIRTY_RASTERIZER |
    962                                         ILO_DIRTY_CBUF;
    963 
    964    return shader;
    965 }
    966 
    967 struct ilo_shader_state *
    968 ilo_shader_create_gs(const struct ilo_dev *dev,
    969                      const struct pipe_shader_state *state,
    970                      const struct ilo_state_vector *precompile)
    971 {
    972    struct ilo_shader_state *shader;
    973 
    974    shader = ilo_shader_state_create(dev, precompile,
    975          PIPE_SHADER_GEOMETRY, state);
    976 
    977    /* states used in ilo_shader_variant_init() */
    978    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_GS |
    979                                         ILO_DIRTY_VS |
    980                                         ILO_DIRTY_RASTERIZER |
    981                                         ILO_DIRTY_CBUF;
    982 
    983    return shader;
    984 }
    985 
    986 struct ilo_shader_state *
    987 ilo_shader_create_fs(const struct ilo_dev *dev,
    988                      const struct pipe_shader_state *state,
    989                      const struct ilo_state_vector *precompile)
    990 {
    991    struct ilo_shader_state *shader;
    992 
    993    shader = ilo_shader_state_create(dev, precompile,
    994          PIPE_SHADER_FRAGMENT, state);
    995 
    996    /* states used in ilo_shader_variant_init() */
    997    shader->info.non_orthogonal_states = ILO_DIRTY_VIEW_FS |
    998                                         ILO_DIRTY_RASTERIZER |
    999                                         ILO_DIRTY_FB |
   1000                                         ILO_DIRTY_CBUF;
   1001 
   1002    return shader;
   1003 }
   1004 
   1005 struct ilo_shader_state *
   1006 ilo_shader_create_cs(const struct ilo_dev *dev,
   1007                      const struct pipe_compute_state *state,
   1008                      const struct ilo_state_vector *precompile)
   1009 {
   1010    struct ilo_shader_state *shader;
   1011 
   1012    shader = ilo_shader_state_create(dev, precompile,
   1013          PIPE_SHADER_COMPUTE, state);
   1014 
   1015    shader->info.non_orthogonal_states = 0;
   1016 
   1017    return shader;
   1018 }
   1019 
   1020 /**
   1021  * Destroy a shader state.
   1022  */
   1023 void
   1024 ilo_shader_destroy(struct ilo_shader_state *shader)
   1025 {
   1026    struct ilo_shader *sh, *next;
   1027 
   1028    LIST_FOR_EACH_ENTRY_SAFE(sh, next, &shader->variants, list)
   1029       ilo_shader_destroy_kernel(sh);
   1030 
   1031    FREE((struct tgsi_token *) shader->info.tokens);
   1032    FREE(shader);
   1033 }
   1034 
   1035 /**
   1036  * Select a kernel for the given context.  This will compile a new kernel if
   1037  * none of the existing kernels work with the context.
   1038  *
   1039  * \param ilo the context
   1040  * \param dirty states of the context that are considered changed
   1041  * \return true if a different kernel is selected
   1042  */
   1043 bool
   1044 ilo_shader_select_kernel(struct ilo_shader_state *shader,
   1045                          const struct ilo_state_vector *vec,
   1046                          uint32_t dirty)
   1047 {
   1048    struct ilo_shader_variant variant;
   1049    bool changed = false;
   1050 
   1051    if (shader->info.non_orthogonal_states & dirty) {
   1052       const struct ilo_shader * const old = shader->shader;
   1053 
   1054       ilo_shader_variant_init(&variant, &shader->info, vec);
   1055       ilo_shader_state_use_variant(shader, &variant);
   1056       changed = (shader->shader != old);
   1057    }
   1058 
   1059    if (shader->info.type == PIPE_SHADER_FRAGMENT) {
   1060       struct ilo_shader *kernel = shader->shader;
   1061 
   1062       if (kernel->ps_params.sample_mask != vec->sample_mask ||
   1063           kernel->ps_params.alpha_may_kill != vec->blend->alpha_may_kill) {
   1064          kernel->ps_params.sample_mask = vec->sample_mask;
   1065          kernel->ps_params.alpha_may_kill = vec->blend->alpha_may_kill;
   1066 
   1067          ilo_state_ps_set_params(&kernel->cso.ps, shader->info.dev,
   1068                &kernel->ps_params);
   1069 
   1070          changed = true;
   1071       }
   1072    }
   1073 
   1074    return changed;
   1075 }
   1076 
   1077 static int
   1078 route_attr(const int *semantics, const int *indices, int len,
   1079            int semantic, int index)
   1080 {
   1081    int i;
   1082 
   1083    for (i = 0; i < len; i++) {
   1084       if (semantics[i] == semantic && indices[i] == index)
   1085          return i;
   1086    }
   1087 
   1088    /* failed to match for COLOR, try BCOLOR */
   1089    if (semantic == TGSI_SEMANTIC_COLOR) {
   1090       for (i = 0; i < len; i++) {
   1091          if (semantics[i] == TGSI_SEMANTIC_BCOLOR && indices[i] == index)
   1092             return i;
   1093       }
   1094    }
   1095 
   1096    return -1;
   1097 }
   1098 
   1099 /**
   1100  * Select a routing for the given source shader and rasterizer state.
   1101  *
   1102  * \return true if a different routing is selected
   1103  */
   1104 bool
   1105 ilo_shader_select_kernel_sbe(struct ilo_shader_state *shader,
   1106                              const struct ilo_shader_state *source,
   1107                              const struct ilo_rasterizer_state *rasterizer)
   1108 {
   1109    const bool is_point = true;
   1110    const bool light_twoside = rasterizer->state.light_twoside;
   1111    const uint32_t sprite_coord_enable = rasterizer->state.sprite_coord_enable;
   1112    const int sprite_coord_mode = rasterizer->state.sprite_coord_mode;
   1113    struct ilo_shader *kernel = shader->shader;
   1114    struct ilo_kernel_routing *routing = &kernel->routing;
   1115    struct ilo_state_sbe_swizzle_info swizzles[ILO_STATE_SBE_MAX_SWIZZLE_COUNT];
   1116    struct ilo_state_sbe_info info;
   1117    const int *src_semantics, *src_indices;
   1118    int src_skip, src_len, src_slot;
   1119    int dst_len, dst_slot;
   1120 
   1121    assert(kernel);
   1122 
   1123    if (source) {
   1124       assert(source->shader);
   1125 
   1126       src_semantics = source->shader->out.semantic_names;
   1127       src_indices = source->shader->out.semantic_indices;
   1128       src_len = source->shader->out.count;
   1129       src_skip = 0;
   1130 
   1131       assert(src_len >= 2 &&
   1132              src_semantics[0] == TGSI_SEMANTIC_PSIZE &&
   1133              src_semantics[1] == TGSI_SEMANTIC_POSITION);
   1134 
   1135       /*
   1136        * skip PSIZE and POSITION (how about the optional CLIPDISTs?), unless
   1137        * they are all the source shader has and FS needs to read some
   1138        * attributes.
   1139        */
   1140       if (src_len > 2 || !kernel->in.count) {
   1141          src_semantics += 2;
   1142          src_indices += 2;
   1143          src_len -= 2;
   1144          src_skip = 2;
   1145       }
   1146    } else {
   1147       src_semantics = kernel->in.semantic_names;
   1148       src_indices = kernel->in.semantic_indices;
   1149       src_len = kernel->in.count;
   1150       src_skip = 0;
   1151    }
   1152 
   1153    /* no change */
   1154    if (routing->initialized &&
   1155        routing->is_point == is_point &&
   1156        routing->light_twoside == light_twoside &&
   1157        routing->sprite_coord_enable == sprite_coord_enable &&
   1158        routing->sprite_coord_mode == sprite_coord_mode &&
   1159        routing->src_len <= src_len &&
   1160        !memcmp(routing->src_semantics, src_semantics,
   1161           sizeof(src_semantics[0]) * routing->src_len) &&
   1162        !memcmp(routing->src_indices, src_indices,
   1163           sizeof(src_indices[0]) * routing->src_len))
   1164       return false;
   1165 
   1166    routing->is_point = is_point;
   1167    routing->light_twoside = light_twoside;
   1168    routing->sprite_coord_enable = sprite_coord_enable;
   1169    routing->sprite_coord_mode = sprite_coord_mode;
   1170 
   1171    assert(kernel->in.count <= ARRAY_SIZE(swizzles));
   1172    dst_len = MIN2(kernel->in.count, ARRAY_SIZE(swizzles));
   1173 
   1174    memset(&swizzles, 0, sizeof(swizzles));
   1175    memset(&info, 0, sizeof(info));
   1176 
   1177    info.attr_count = dst_len;
   1178    info.cv_vue_attr_count = src_skip + src_len;
   1179    info.vue_read_base = src_skip;
   1180    info.vue_read_count = 0;
   1181    info.has_min_read_count = true;
   1182    info.swizzle_enable = false;
   1183    info.swizzle_16_31 = false;
   1184    info.swizzle_count = 0;
   1185    info.swizzles = swizzles;
   1186    info.const_interp_enables = kernel->in.const_interp_enable;
   1187    info.point_sprite_enables = 0x0;
   1188    info.point_sprite_origin_lower_left =
   1189       (sprite_coord_mode == PIPE_SPRITE_COORD_LOWER_LEFT);
   1190    info.cv_is_point = is_point;
   1191 
   1192    for (dst_slot = 0; dst_slot < dst_len; dst_slot++) {
   1193       const int semantic = kernel->in.semantic_names[dst_slot];
   1194       const int index = kernel->in.semantic_indices[dst_slot];
   1195 
   1196       if (semantic == TGSI_SEMANTIC_GENERIC &&
   1197           (sprite_coord_enable & (1 << index)))
   1198          info.point_sprite_enables |= 1 << dst_slot;
   1199 
   1200       if (source) {
   1201          src_slot = route_attr(src_semantics, src_indices, src_len,
   1202                semantic, index);
   1203 
   1204          /*
   1205           * The source shader stage does not output this attribute.  The value
   1206           * is supposed to be undefined, unless the attribute goes through
   1207           * point sprite replacement or the attribute is
   1208           * TGSI_SEMANTIC_POSITION.  In all cases, we do not care which source
   1209           * attribute is picked.
   1210           *
   1211           * We should update the kernel code and omit the output of
   1212           * TGSI_SEMANTIC_POSITION here.
   1213           */
   1214          if (src_slot < 0)
   1215             src_slot = 0;
   1216       } else {
   1217          src_slot = dst_slot;
   1218       }
   1219 
   1220       /* use the following slot for two-sided lighting */
   1221       if (semantic == TGSI_SEMANTIC_COLOR && light_twoside &&
   1222           src_slot + 1 < src_len &&
   1223           src_semantics[src_slot + 1] == TGSI_SEMANTIC_BCOLOR &&
   1224           src_indices[src_slot + 1] == index) {
   1225          swizzles[dst_slot].attr_select = GEN6_INPUTATTR_FACING;
   1226          swizzles[dst_slot].attr = src_slot;
   1227          info.swizzle_enable = true;
   1228          src_slot++;
   1229       } else {
   1230          swizzles[dst_slot].attr_select = GEN6_INPUTATTR_NORMAL;
   1231          swizzles[dst_slot].attr = src_slot;
   1232          if (src_slot != dst_slot)
   1233             info.swizzle_enable = true;
   1234       }
   1235 
   1236       swizzles[dst_slot].force_zeros = false;
   1237 
   1238       if (info.vue_read_count < src_slot + 1)
   1239          info.vue_read_count = src_slot + 1;
   1240    }
   1241 
   1242    if (info.swizzle_enable)
   1243       info.swizzle_count = dst_len;
   1244 
   1245    if (routing->initialized)
   1246       ilo_state_sbe_set_info(&routing->sbe, shader->info.dev, &info);
   1247    else
   1248       ilo_state_sbe_init(&routing->sbe, shader->info.dev, &info);
   1249 
   1250    routing->src_len = info.vue_read_count;
   1251    memcpy(routing->src_semantics, src_semantics,
   1252          sizeof(src_semantics[0]) * routing->src_len);
   1253    memcpy(routing->src_indices, src_indices,
   1254          sizeof(src_indices[0]) * routing->src_len);
   1255 
   1256    routing->initialized = true;
   1257 
   1258    return true;
   1259 }
   1260 
   1261 /**
   1262  * Return the cache offset of the selected kernel.  This must be called after
   1263  * ilo_shader_select_kernel() and ilo_shader_cache_upload().
   1264  */
   1265 uint32_t
   1266 ilo_shader_get_kernel_offset(const struct ilo_shader_state *shader)
   1267 {
   1268    const struct ilo_shader *kernel = shader->shader;
   1269 
   1270    assert(kernel && kernel->uploaded);
   1271 
   1272    return kernel->cache_offset;
   1273 }
   1274 
   1275 /**
   1276  * Query a kernel parameter for the selected kernel.
   1277  */
   1278 int
   1279 ilo_shader_get_kernel_param(const struct ilo_shader_state *shader,
   1280                             enum ilo_kernel_param param)
   1281 {
   1282    const struct ilo_shader *kernel = shader->shader;
   1283    int val;
   1284 
   1285    assert(kernel);
   1286 
   1287    switch (param) {
   1288    case ILO_KERNEL_INPUT_COUNT:
   1289       val = kernel->in.count;
   1290       break;
   1291    case ILO_KERNEL_OUTPUT_COUNT:
   1292       val = kernel->out.count;
   1293       break;
   1294    case ILO_KERNEL_SAMPLER_COUNT:
   1295       val = shader->info.num_samplers;
   1296       break;
   1297    case ILO_KERNEL_SKIP_CBUF0_UPLOAD:
   1298       val = kernel->skip_cbuf0_upload;
   1299       break;
   1300    case ILO_KERNEL_PCB_CBUF0_SIZE:
   1301       val = kernel->pcb.cbuf0_size;
   1302       break;
   1303 
   1304    case ILO_KERNEL_SURFACE_TOTAL_COUNT:
   1305       val = kernel->bt.total_count;
   1306       break;
   1307    case ILO_KERNEL_SURFACE_TEX_BASE:
   1308       val = kernel->bt.tex_base;
   1309       break;
   1310    case ILO_KERNEL_SURFACE_TEX_COUNT:
   1311       val = kernel->bt.tex_count;
   1312       break;
   1313    case ILO_KERNEL_SURFACE_CONST_BASE:
   1314       val = kernel->bt.const_base;
   1315       break;
   1316    case ILO_KERNEL_SURFACE_CONST_COUNT:
   1317       val = kernel->bt.const_count;
   1318       break;
   1319    case ILO_KERNEL_SURFACE_RES_BASE:
   1320       val = kernel->bt.res_base;
   1321       break;
   1322    case ILO_KERNEL_SURFACE_RES_COUNT:
   1323       val = kernel->bt.res_count;
   1324       break;
   1325 
   1326    case ILO_KERNEL_VS_INPUT_INSTANCEID:
   1327       val = shader->info.has_instanceid;
   1328       break;
   1329    case ILO_KERNEL_VS_INPUT_VERTEXID:
   1330       val = shader->info.has_vertexid;
   1331       break;
   1332    case ILO_KERNEL_VS_INPUT_EDGEFLAG:
   1333       if (shader->info.edgeflag_in >= 0) {
   1334          /* we rely on the state tracker here */
   1335          assert(shader->info.edgeflag_in == kernel->in.count - 1);
   1336          val = true;
   1337       }
   1338       else {
   1339          val = false;
   1340       }
   1341       break;
   1342    case ILO_KERNEL_VS_PCB_UCP_SIZE:
   1343       val = kernel->pcb.clip_state_size;
   1344       break;
   1345    case ILO_KERNEL_VS_GEN6_SO:
   1346       val = kernel->stream_output;
   1347       break;
   1348    case ILO_KERNEL_VS_GEN6_SO_POINT_OFFSET:
   1349       val = kernel->gs_offsets[0];
   1350       break;
   1351    case ILO_KERNEL_VS_GEN6_SO_LINE_OFFSET:
   1352       val = kernel->gs_offsets[1];
   1353       break;
   1354    case ILO_KERNEL_VS_GEN6_SO_TRI_OFFSET:
   1355       val = kernel->gs_offsets[2];
   1356       break;
   1357    case ILO_KERNEL_VS_GEN6_SO_SURFACE_COUNT:
   1358       val = kernel->gs_bt_so_count;
   1359       break;
   1360 
   1361    case ILO_KERNEL_GS_DISCARD_ADJACENCY:
   1362       val = kernel->in.discard_adj;
   1363       break;
   1364    case ILO_KERNEL_GS_GEN6_SVBI_POST_INC:
   1365       val = kernel->svbi_post_inc;
   1366       break;
   1367    case ILO_KERNEL_GS_GEN6_SURFACE_SO_BASE:
   1368       val = kernel->bt.gen6_so_base;
   1369       break;
   1370    case ILO_KERNEL_GS_GEN6_SURFACE_SO_COUNT:
   1371       val = kernel->bt.gen6_so_count;
   1372       break;
   1373 
   1374    case ILO_KERNEL_FS_BARYCENTRIC_INTERPOLATIONS:
   1375       val = kernel->in.barycentric_interpolation_mode;
   1376       break;
   1377    case ILO_KERNEL_FS_DISPATCH_16_OFFSET:
   1378       val = 0;
   1379       break;
   1380    case ILO_KERNEL_FS_SURFACE_RT_BASE:
   1381       val = kernel->bt.rt_base;
   1382       break;
   1383    case ILO_KERNEL_FS_SURFACE_RT_COUNT:
   1384       val = kernel->bt.rt_count;
   1385       break;
   1386 
   1387    case ILO_KERNEL_CS_LOCAL_SIZE:
   1388       val = shader->info.compute.req_local_mem;
   1389       break;
   1390    case ILO_KERNEL_CS_PRIVATE_SIZE:
   1391       val = shader->info.compute.req_private_mem;
   1392       break;
   1393    case ILO_KERNEL_CS_INPUT_SIZE:
   1394       val = shader->info.compute.req_input_mem;
   1395       break;
   1396    case ILO_KERNEL_CS_SIMD_SIZE:
   1397       val = 16;
   1398       break;
   1399    case ILO_KERNEL_CS_SURFACE_GLOBAL_BASE:
   1400       val = kernel->bt.global_base;
   1401       break;
   1402    case ILO_KERNEL_CS_SURFACE_GLOBAL_COUNT:
   1403       val = kernel->bt.global_count;
   1404       break;
   1405 
   1406    default:
   1407       assert(!"unknown kernel parameter");
   1408       val = 0;
   1409       break;
   1410    }
   1411 
   1412    return val;
   1413 }
   1414 
   1415 /**
   1416  * Return the CSO of the selected kernel.
   1417  */
   1418 const union ilo_shader_cso *
   1419 ilo_shader_get_kernel_cso(const struct ilo_shader_state *shader)
   1420 {
   1421    const struct ilo_shader *kernel = shader->shader;
   1422 
   1423    assert(kernel);
   1424 
   1425    return &kernel->cso;
   1426 }
   1427 
   1428 /**
   1429  * Return the SO info of the selected kernel.
   1430  */
   1431 const struct pipe_stream_output_info *
   1432 ilo_shader_get_kernel_so_info(const struct ilo_shader_state *shader)
   1433 {
   1434    return &shader->info.stream_output;
   1435 }
   1436 
   1437 const struct ilo_state_sol *
   1438 ilo_shader_get_kernel_sol(const struct ilo_shader_state *shader)
   1439 {
   1440    const struct ilo_shader *kernel = shader->shader;
   1441 
   1442    assert(kernel);
   1443 
   1444    return &kernel->sol;
   1445 }
   1446 
   1447 /**
   1448  * Return the routing info of the selected kernel.
   1449  */
   1450 const struct ilo_state_sbe *
   1451 ilo_shader_get_kernel_sbe(const struct ilo_shader_state *shader)
   1452 {
   1453    const struct ilo_shader *kernel = shader->shader;
   1454 
   1455    assert(kernel);
   1456 
   1457    return &kernel->routing.sbe;
   1458 }
   1459