Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2015 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_context.h"
     25 #include "compiler/brw_nir.h"
     26 #include "brw_program.h"
     27 #include "compiler/glsl/ir.h"
     28 #include "compiler/glsl/ir_optimization.h"
     29 #include "compiler/glsl/program.h"
     30 #include "compiler/nir/nir_serialize.h"
     31 #include "program/program.h"
     32 #include "main/mtypes.h"
     33 #include "main/shaderapi.h"
     34 #include "main/shaderobj.h"
     35 #include "main/uniforms.h"
     36 
     37 /**
     38  * Performs a compile of the shader stages even when we don't know
     39  * what non-orthogonal state will be set, in the hope that it reflects
     40  * the eventual NOS used, and thus allows us to produce link failures.
     41  */
     42 static bool
     43 brw_shader_precompile(struct gl_context *ctx,
     44                       struct gl_shader_program *sh_prog)
     45 {
     46    struct gl_linked_shader *vs = sh_prog->_LinkedShaders[MESA_SHADER_VERTEX];
     47    struct gl_linked_shader *tcs = sh_prog->_LinkedShaders[MESA_SHADER_TESS_CTRL];
     48    struct gl_linked_shader *tes = sh_prog->_LinkedShaders[MESA_SHADER_TESS_EVAL];
     49    struct gl_linked_shader *gs = sh_prog->_LinkedShaders[MESA_SHADER_GEOMETRY];
     50    struct gl_linked_shader *fs = sh_prog->_LinkedShaders[MESA_SHADER_FRAGMENT];
     51    struct gl_linked_shader *cs = sh_prog->_LinkedShaders[MESA_SHADER_COMPUTE];
     52 
     53    if (fs && !brw_fs_precompile(ctx, fs->Program))
     54       return false;
     55 
     56    if (gs && !brw_gs_precompile(ctx, gs->Program))
     57       return false;
     58 
     59    if (tes && !brw_tes_precompile(ctx, sh_prog, tes->Program))
     60       return false;
     61 
     62    if (tcs && !brw_tcs_precompile(ctx, sh_prog, tcs->Program))
     63       return false;
     64 
     65    if (vs && !brw_vs_precompile(ctx, vs->Program))
     66       return false;
     67 
     68    if (cs && !brw_cs_precompile(ctx, cs->Program))
     69       return false;
     70 
     71    return true;
     72 }
     73 
     74 static void
     75 brw_lower_packing_builtins(struct brw_context *brw,
     76                            exec_list *ir)
     77 {
     78    const struct gen_device_info *devinfo = &brw->screen->devinfo;
     79 
     80    /* Gens < 7 don't have instructions to convert to or from half-precision,
     81     * and Gens < 6 don't expose that functionality.
     82     */
     83    if (devinfo->gen != 6)
     84       return;
     85 
     86    lower_packing_builtins(ir, LOWER_PACK_HALF_2x16 | LOWER_UNPACK_HALF_2x16);
     87 }
     88 
     89 static void
     90 process_glsl_ir(struct brw_context *brw,
     91                 struct gl_shader_program *shader_prog,
     92                 struct gl_linked_shader *shader)
     93 {
     94    const struct gen_device_info *devinfo = &brw->screen->devinfo;
     95    struct gl_context *ctx = &brw->ctx;
     96 
     97    /* Temporary memory context for any new IR. */
     98    void *mem_ctx = ralloc_context(NULL);
     99 
    100    ralloc_adopt(mem_ctx, shader->ir);
    101 
    102    lower_blend_equation_advanced(shader);
    103 
    104    /* lower_packing_builtins() inserts arithmetic instructions, so it
    105     * must precede lower_instructions().
    106     */
    107    brw_lower_packing_builtins(brw, shader->ir);
    108    do_mat_op_to_vec(shader->ir);
    109 
    110    unsigned instructions_to_lower = (DIV_TO_MUL_RCP |
    111                                      SUB_TO_ADD_NEG |
    112                                      EXP_TO_EXP2 |
    113                                      LOG_TO_LOG2 |
    114                                      DFREXP_DLDEXP_TO_ARITH);
    115    if (devinfo->gen < 7) {
    116       instructions_to_lower |= BIT_COUNT_TO_MATH |
    117                                EXTRACT_TO_SHIFTS |
    118                                INSERT_TO_SHIFTS |
    119                                REVERSE_TO_SHIFTS;
    120    }
    121 
    122    lower_instructions(shader->ir, instructions_to_lower);
    123 
    124    /* Pre-gen6 HW can only nest if-statements 16 deep.  Beyond this,
    125     * if-statements need to be flattened.
    126     */
    127    if (devinfo->gen < 6)
    128       lower_if_to_cond_assign(shader->Stage, shader->ir, 16);
    129 
    130    do_lower_texture_projection(shader->ir);
    131    do_vec_index_to_cond_assign(shader->ir);
    132    lower_vector_insert(shader->ir, true);
    133    lower_offset_arrays(shader->ir);
    134    lower_noise(shader->ir);
    135    lower_quadop_vector(shader->ir, false);
    136 
    137    validate_ir_tree(shader->ir);
    138 
    139    /* Now that we've finished altering the linked IR, reparent any live IR back
    140     * to the permanent memory context, and free the temporary one (discarding any
    141     * junk we optimized away).
    142     */
    143    reparent_ir(shader->ir, shader->ir);
    144    ralloc_free(mem_ctx);
    145 
    146    if (ctx->_Shader->Flags & GLSL_DUMP) {
    147       fprintf(stderr, "\n");
    148       if (shader->ir) {
    149          fprintf(stderr, "GLSL IR for linked %s program %d:\n",
    150                  _mesa_shader_stage_to_string(shader->Stage),
    151                  shader_prog->Name);
    152          _mesa_print_ir(stderr, shader->ir, NULL);
    153       } else {
    154          fprintf(stderr, "No GLSL IR for linked %s program %d (shader may be "
    155                  "from cache)\n", _mesa_shader_stage_to_string(shader->Stage),
    156                  shader_prog->Name);
    157       }
    158       fprintf(stderr, "\n");
    159    }
    160 }
    161 
    162 static void
    163 unify_interfaces(struct shader_info **infos)
    164 {
    165    struct shader_info *prev_info = NULL;
    166 
    167    for (unsigned i = MESA_SHADER_VERTEX; i < MESA_SHADER_FRAGMENT; i++) {
    168       if (!infos[i])
    169          continue;
    170 
    171       if (prev_info) {
    172          prev_info->outputs_written |= infos[i]->inputs_read &
    173             ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
    174          infos[i]->inputs_read |= prev_info->outputs_written &
    175             ~(VARYING_BIT_TESS_LEVEL_INNER | VARYING_BIT_TESS_LEVEL_OUTER);
    176 
    177          prev_info->patch_outputs_written |= infos[i]->patch_inputs_read;
    178          infos[i]->patch_inputs_read |= prev_info->patch_outputs_written;
    179       }
    180       prev_info = infos[i];
    181    }
    182 }
    183 
    184 static void
    185 update_xfb_info(struct gl_transform_feedback_info *xfb_info,
    186                 struct shader_info *info)
    187 {
    188    if (!xfb_info)
    189       return;
    190 
    191    for (unsigned i = 0; i < xfb_info->NumOutputs; i++) {
    192       struct gl_transform_feedback_output *output = &xfb_info->Outputs[i];
    193 
    194       /* The VUE header contains three scalar fields packed together:
    195        * - gl_PointSize is stored in VARYING_SLOT_PSIZ.w
    196        * - gl_Layer is stored in VARYING_SLOT_PSIZ.y
    197        * - gl_ViewportIndex is stored in VARYING_SLOT_PSIZ.z
    198        */
    199       switch (output->OutputRegister) {
    200       case VARYING_SLOT_LAYER:
    201          assert(output->NumComponents == 1);
    202          output->OutputRegister = VARYING_SLOT_PSIZ;
    203          output->ComponentOffset = 1;
    204          break;
    205       case VARYING_SLOT_VIEWPORT:
    206          assert(output->NumComponents == 1);
    207          output->OutputRegister = VARYING_SLOT_PSIZ;
    208          output->ComponentOffset = 2;
    209          break;
    210       case VARYING_SLOT_PSIZ:
    211          assert(output->NumComponents == 1);
    212          output->ComponentOffset = 3;
    213          break;
    214       }
    215 
    216       info->outputs_written |= 1ull << output->OutputRegister;
    217    }
    218 }
    219 
    220 extern "C" GLboolean
    221 brw_link_shader(struct gl_context *ctx, struct gl_shader_program *shProg)
    222 {
    223    struct brw_context *brw = brw_context(ctx);
    224    const struct brw_compiler *compiler = brw->screen->compiler;
    225    unsigned int stage;
    226    struct shader_info *infos[MESA_SHADER_STAGES] = { 0, };
    227 
    228    if (shProg->data->LinkStatus == linking_skipped)
    229       return GL_TRUE;
    230 
    231    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
    232       struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
    233       if (!shader)
    234          continue;
    235 
    236       struct gl_program *prog = shader->Program;
    237       prog->Parameters = _mesa_new_parameter_list();
    238 
    239       process_glsl_ir(brw, shProg, shader);
    240 
    241       _mesa_copy_linked_program_data(shProg, shader);
    242 
    243       prog->ShadowSamplers = shader->shadow_samplers;
    244       _mesa_update_shader_textures_used(shProg, prog);
    245 
    246       bool debug_enabled =
    247          (INTEL_DEBUG & intel_debug_flag_for_shader_stage(shader->Stage));
    248 
    249       if (debug_enabled && shader->ir) {
    250          fprintf(stderr, "GLSL IR for native %s shader %d:\n",
    251                  _mesa_shader_stage_to_string(shader->Stage), shProg->Name);
    252          _mesa_print_ir(stderr, shader->ir, NULL);
    253          fprintf(stderr, "\n\n");
    254       }
    255 
    256       prog->nir = brw_create_nir(brw, shProg, prog, (gl_shader_stage) stage,
    257                                  compiler->scalar_stage[stage]);
    258    }
    259 
    260    /* Determine first and last stage. */
    261    unsigned first = MESA_SHADER_STAGES;
    262    unsigned last = 0;
    263    for (unsigned i = 0; i < MESA_SHADER_STAGES; i++) {
    264       if (!shProg->_LinkedShaders[i])
    265          continue;
    266       if (first == MESA_SHADER_STAGES)
    267          first = i;
    268       last = i;
    269    }
    270 
    271    /* Linking the stages in the opposite order (from fragment to vertex)
    272     * ensures that inter-shader outputs written to in an earlier stage
    273     * are eliminated if they are (transitively) not used in a later
    274     * stage.
    275     *
    276     * TODO: Look into Shadow of Mordor regressions on HSW and enable this for
    277     * all platforms. See: https://bugs.freedesktop.org/show_bug.cgi?id=103537
    278     */
    279     if (first != last && brw->screen->devinfo.gen >= 8) {
    280        int next = last;
    281        for (int i = next - 1; i >= 0; i--) {
    282           if (shProg->_LinkedShaders[i] == NULL)
    283              continue;
    284 
    285           brw_nir_link_shaders(compiler,
    286                                &shProg->_LinkedShaders[i]->Program->nir,
    287                                &shProg->_LinkedShaders[next]->Program->nir);
    288           next = i;
    289        }
    290     }
    291 
    292    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
    293       struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
    294       if (!shader)
    295          continue;
    296 
    297       struct gl_program *prog = shader->Program;
    298       brw_shader_gather_info(prog->nir, prog);
    299 
    300       NIR_PASS_V(prog->nir, nir_lower_samplers, shProg);
    301       NIR_PASS_V(prog->nir, nir_lower_atomics, shProg);
    302       NIR_PASS_V(prog->nir, nir_lower_atomics_to_ssbo,
    303                  prog->nir->info.num_abos);
    304 
    305       infos[stage] = &prog->nir->info;
    306 
    307       update_xfb_info(prog->sh.LinkedTransformFeedback, infos[stage]);
    308 
    309       /* Make a pass over the IR to add state references for any built-in
    310        * uniforms that are used.  This has to be done now (during linking).
    311        * Code generation doesn't happen until the first time this shader is
    312        * used for rendering.  Waiting until then to generate the parameters is
    313        * too late.  At that point, the values for the built-in uniforms won't
    314        * get sent to the shader.
    315        */
    316       nir_foreach_variable(var, &prog->nir->uniforms) {
    317          if (strncmp(var->name, "gl_", 3) == 0) {
    318             const nir_state_slot *const slots = var->state_slots;
    319             assert(var->state_slots != NULL);
    320 
    321             for (unsigned int i = 0; i < var->num_state_slots; i++) {
    322                _mesa_add_state_reference(prog->Parameters,
    323                                          (gl_state_index *)slots[i].tokens);
    324             }
    325          }
    326       }
    327    }
    328 
    329    /* The linker tries to dead code eliminate unused varying components,
    330     * and make sure interfaces match.  But it isn't able to do so in all
    331     * cases.  So, explicitly make the interfaces match by OR'ing together
    332     * the inputs_read/outputs_written bitfields of adjacent stages.
    333     */
    334    if (!shProg->SeparateShader)
    335       unify_interfaces(infos);
    336 
    337    if ((ctx->_Shader->Flags & GLSL_DUMP) && shProg->Name != 0) {
    338       for (unsigned i = 0; i < shProg->NumShaders; i++) {
    339          const struct gl_shader *sh = shProg->Shaders[i];
    340          if (!sh)
    341             continue;
    342 
    343          fprintf(stderr, "GLSL %s shader %d source for linked program %d:\n",
    344                  _mesa_shader_stage_to_string(sh->Stage),
    345                  i, shProg->Name);
    346          fprintf(stderr, "%s", sh->Source);
    347          fprintf(stderr, "\n");
    348       }
    349    }
    350 
    351    if (brw->ctx.Cache) {
    352       for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
    353          struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
    354          if (!shader)
    355             continue;
    356 
    357          struct gl_program *prog = shader->Program;
    358          brw_program_serialize_nir(ctx, prog);
    359       }
    360    }
    361 
    362    if (brw->precompile && !brw_shader_precompile(ctx, shProg))
    363       return false;
    364 
    365    build_program_resource_list(ctx, shProg);
    366 
    367    for (stage = 0; stage < ARRAY_SIZE(shProg->_LinkedShaders); stage++) {
    368       struct gl_linked_shader *shader = shProg->_LinkedShaders[stage];
    369       if (!shader)
    370          continue;
    371 
    372       /* The GLSL IR won't be needed anymore. */
    373       ralloc_free(shader->ir);
    374       shader->ir = NULL;
    375    }
    376 
    377    return true;
    378 }
    379