Home | History | Annotate | Download | only in compiler
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_nir.h"
     25 #include "brw_shader.h"
     26 #include "common/gen_debug.h"
     27 #include "compiler/glsl_types.h"
     28 #include "compiler/nir/nir_builder.h"
     29 
     30 static bool
     31 is_input(nir_intrinsic_instr *intrin)
     32 {
     33    return intrin->intrinsic == nir_intrinsic_load_input ||
     34           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
     35           intrin->intrinsic == nir_intrinsic_load_interpolated_input;
     36 }
     37 
     38 static bool
     39 is_output(nir_intrinsic_instr *intrin)
     40 {
     41    return intrin->intrinsic == nir_intrinsic_load_output ||
     42           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
     43           intrin->intrinsic == nir_intrinsic_store_output ||
     44           intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
     45 }
     46 
     47 /**
     48  * In many cases, we just add the base and offset together, so there's no
     49  * reason to keep them separate.  Sometimes, combining them is essential:
     50  * if a shader only accesses part of a compound variable (such as a matrix
     51  * or array), the variable's base may not actually exist in the VUE map.
     52  *
     53  * This pass adds constant offsets to instr->const_index[0], and resets
     54  * the offset source to 0.  Non-constant offsets remain unchanged - since
     55  * we don't know what part of a compound variable is accessed, we allocate
     56  * storage for the entire thing.
     57  */
     58 
     59 static bool
     60 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
     61                                nir_variable_mode mode)
     62 {
     63    nir_foreach_instr_safe(instr, block) {
     64       if (instr->type != nir_instr_type_intrinsic)
     65          continue;
     66 
     67       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
     68 
     69       if ((mode == nir_var_shader_in && is_input(intrin)) ||
     70           (mode == nir_var_shader_out && is_output(intrin))) {
     71          nir_src *offset = nir_get_io_offset_src(intrin);
     72          nir_const_value *const_offset = nir_src_as_const_value(*offset);
     73 
     74          if (const_offset) {
     75             intrin->const_index[0] += const_offset->u32[0];
     76             b->cursor = nir_before_instr(&intrin->instr);
     77             nir_instr_rewrite_src(&intrin->instr, offset,
     78                                   nir_src_for_ssa(nir_imm_int(b, 0)));
     79          }
     80       }
     81    }
     82    return true;
     83 }
     84 
     85 static void
     86 add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode)
     87 {
     88    nir_foreach_function(f, nir) {
     89       if (f->impl) {
     90          nir_builder b;
     91          nir_builder_init(&b, f->impl);
     92          nir_foreach_block(block, f->impl) {
     93             add_const_offset_to_base_block(block, &b, mode);
     94          }
     95       }
     96    }
     97 }
     98 
     99 static bool
    100 remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
    101                   GLenum primitive_mode)
    102 {
    103    const int location = nir_intrinsic_base(intr);
    104    const unsigned component = nir_intrinsic_component(intr);
    105    bool out_of_bounds;
    106 
    107    if (location == VARYING_SLOT_TESS_LEVEL_INNER) {
    108       switch (primitive_mode) {
    109       case GL_QUADS:
    110          /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */
    111          nir_intrinsic_set_base(intr, 0);
    112          nir_intrinsic_set_component(intr, 3 - component);
    113          out_of_bounds = false;
    114          break;
    115       case GL_TRIANGLES:
    116          /* gl_TessLevelInner[0] lives at DWord 4. */
    117          nir_intrinsic_set_base(intr, 1);
    118          out_of_bounds = component > 0;
    119          break;
    120       case GL_ISOLINES:
    121          out_of_bounds = true;
    122          break;
    123       default:
    124          unreachable("Bogus tessellation domain");
    125       }
    126    } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) {
    127       if (primitive_mode == GL_ISOLINES) {
    128          /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */
    129          nir_intrinsic_set_base(intr, 1);
    130          nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr));
    131          out_of_bounds = component > 1;
    132       } else {
    133          /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */
    134          nir_intrinsic_set_base(intr, 1);
    135          nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr));
    136          out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES;
    137       }
    138    } else {
    139       return false;
    140    }
    141 
    142    if (out_of_bounds) {
    143       if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
    144          b->cursor = nir_before_instr(&intr->instr);
    145          nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
    146          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(undef));
    147       }
    148       nir_instr_remove(&intr->instr);
    149    }
    150 
    151    return true;
    152 }
    153 
    154 static bool
    155 remap_patch_urb_offsets(nir_block *block, nir_builder *b,
    156                         const struct brw_vue_map *vue_map,
    157                         GLenum tes_primitive_mode)
    158 {
    159    const bool is_passthrough_tcs = b->shader->info.name &&
    160       strcmp(b->shader->info.name, "passthrough") == 0;
    161 
    162    nir_foreach_instr_safe(instr, block) {
    163       if (instr->type != nir_instr_type_intrinsic)
    164          continue;
    165 
    166       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    167 
    168       gl_shader_stage stage = b->shader->info.stage;
    169 
    170       if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
    171           (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
    172 
    173          if (!is_passthrough_tcs &&
    174              remap_tess_levels(b, intrin, tes_primitive_mode))
    175             continue;
    176 
    177          int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
    178          assert(vue_slot != -1);
    179          intrin->const_index[0] = vue_slot;
    180 
    181          nir_src *vertex = nir_get_io_vertex_index_src(intrin);
    182          if (vertex) {
    183             nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
    184             if (const_vertex) {
    185                intrin->const_index[0] += const_vertex->u32[0] *
    186                                          vue_map->num_per_vertex_slots;
    187             } else {
    188                b->cursor = nir_before_instr(&intrin->instr);
    189 
    190                /* Multiply by the number of per-vertex slots. */
    191                nir_ssa_def *vertex_offset =
    192                   nir_imul(b,
    193                            nir_ssa_for_src(b, *vertex, 1),
    194                            nir_imm_int(b,
    195                                        vue_map->num_per_vertex_slots));
    196 
    197                /* Add it to the existing offset */
    198                nir_src *offset = nir_get_io_offset_src(intrin);
    199                nir_ssa_def *total_offset =
    200                   nir_iadd(b, vertex_offset,
    201                            nir_ssa_for_src(b, *offset, 1));
    202 
    203                nir_instr_rewrite_src(&intrin->instr, offset,
    204                                      nir_src_for_ssa(total_offset));
    205             }
    206          }
    207       }
    208    }
    209    return true;
    210 }
    211 
    212 void
    213 brw_nir_lower_vs_inputs(nir_shader *nir,
    214                         const uint8_t *vs_attrib_wa_flags)
    215 {
    216    /* Start with the location of the variable's base. */
    217    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    218       var->data.driver_location = var->data.location;
    219    }
    220 
    221    /* Now use nir_lower_io to walk dereference chains.  Attribute arrays are
    222     * loaded as one vec4 or dvec4 per element (or matrix column), depending on
    223     * whether it is a double-precision type or not.
    224     */
    225    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
    226 
    227    /* This pass needs actual constants */
    228    nir_opt_constant_folding(nir);
    229 
    230    add_const_offset_to_base(nir, nir_var_shader_in);
    231 
    232    brw_nir_apply_attribute_workarounds(nir, vs_attrib_wa_flags);
    233 
    234    /* The last step is to remap VERT_ATTRIB_* to actual registers */
    235 
    236    /* Whether or not we have any system generated values.  gl_DrawID is not
    237     * included here as it lives in its own vec4.
    238     */
    239    const bool has_sgvs =
    240       nir->info.system_values_read &
    241       (BITFIELD64_BIT(SYSTEM_VALUE_BASE_VERTEX) |
    242        BITFIELD64_BIT(SYSTEM_VALUE_BASE_INSTANCE) |
    243        BITFIELD64_BIT(SYSTEM_VALUE_VERTEX_ID_ZERO_BASE) |
    244        BITFIELD64_BIT(SYSTEM_VALUE_INSTANCE_ID));
    245 
    246    const unsigned num_inputs = _mesa_bitcount_64(nir->info.inputs_read);
    247 
    248    nir_foreach_function(function, nir) {
    249       if (!function->impl)
    250          continue;
    251 
    252       nir_builder b;
    253       nir_builder_init(&b, function->impl);
    254 
    255       nir_foreach_block(block, function->impl) {
    256          nir_foreach_instr_safe(instr, block) {
    257             if (instr->type != nir_instr_type_intrinsic)
    258                continue;
    259 
    260             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    261 
    262             switch (intrin->intrinsic) {
    263             case nir_intrinsic_load_base_vertex:
    264             case nir_intrinsic_load_base_instance:
    265             case nir_intrinsic_load_vertex_id_zero_base:
    266             case nir_intrinsic_load_instance_id:
    267             case nir_intrinsic_load_draw_id: {
    268                b.cursor = nir_after_instr(&intrin->instr);
    269 
    270                /* gl_VertexID and friends are stored by the VF as the last
    271                 * vertex element.  We convert them to load_input intrinsics at
    272                 * the right location.
    273                 */
    274                nir_intrinsic_instr *load =
    275                   nir_intrinsic_instr_create(nir, nir_intrinsic_load_input);
    276                load->src[0] = nir_src_for_ssa(nir_imm_int(&b, 0));
    277 
    278                nir_intrinsic_set_base(load, num_inputs);
    279                switch (intrin->intrinsic) {
    280                case nir_intrinsic_load_base_vertex:
    281                   nir_intrinsic_set_component(load, 0);
    282                   break;
    283                case nir_intrinsic_load_base_instance:
    284                   nir_intrinsic_set_component(load, 1);
    285                   break;
    286                case nir_intrinsic_load_vertex_id_zero_base:
    287                   nir_intrinsic_set_component(load, 2);
    288                   break;
    289                case nir_intrinsic_load_instance_id:
    290                   nir_intrinsic_set_component(load, 3);
    291                   break;
    292                case nir_intrinsic_load_draw_id:
    293                   /* gl_DrawID is stored right after gl_VertexID and friends
    294                    * if any of them exist.
    295                    */
    296                   nir_intrinsic_set_base(load, num_inputs + has_sgvs);
    297                   nir_intrinsic_set_component(load, 0);
    298                   break;
    299                default:
    300                   unreachable("Invalid system value intrinsic");
    301                }
    302 
    303                load->num_components = 1;
    304                nir_ssa_dest_init(&load->instr, &load->dest, 1, 32, NULL);
    305                nir_builder_instr_insert(&b, &load->instr);
    306 
    307                nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
    308                                         nir_src_for_ssa(&load->dest.ssa));
    309                nir_instr_remove(&intrin->instr);
    310                break;
    311             }
    312 
    313             case nir_intrinsic_load_input: {
    314                /* Attributes come in a contiguous block, ordered by their
    315                 * gl_vert_attrib value.  That means we can compute the slot
    316                 * number for an attribute by masking out the enabled attributes
    317                 * before it and counting the bits.
    318                 */
    319                int attr = nir_intrinsic_base(intrin);
    320                int slot = _mesa_bitcount_64(nir->info.inputs_read &
    321                                             BITFIELD64_MASK(attr));
    322                nir_intrinsic_set_base(intrin, slot);
    323                break;
    324             }
    325 
    326             default:
    327                break; /* Nothing to do */
    328             }
    329          }
    330       }
    331    }
    332 }
    333 
    334 void
    335 brw_nir_lower_vue_inputs(nir_shader *nir,
    336                          const struct brw_vue_map *vue_map)
    337 {
    338    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    339       var->data.driver_location = var->data.location;
    340    }
    341 
    342    /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
    343    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
    344 
    345    /* This pass needs actual constants */
    346    nir_opt_constant_folding(nir);
    347 
    348    add_const_offset_to_base(nir, nir_var_shader_in);
    349 
    350    nir_foreach_function(function, nir) {
    351       if (!function->impl)
    352          continue;
    353 
    354       nir_foreach_block(block, function->impl) {
    355          nir_foreach_instr(instr, block) {
    356             if (instr->type != nir_instr_type_intrinsic)
    357                continue;
    358 
    359             nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    360 
    361             if (intrin->intrinsic == nir_intrinsic_load_input ||
    362                 intrin->intrinsic == nir_intrinsic_load_per_vertex_input) {
    363                /* Offset 0 is the VUE header, which contains
    364                 * VARYING_SLOT_LAYER [.y], VARYING_SLOT_VIEWPORT [.z], and
    365                 * VARYING_SLOT_PSIZ [.w].
    366                 */
    367                int varying = nir_intrinsic_base(intrin);
    368                int vue_slot;
    369                switch (varying) {
    370                case VARYING_SLOT_PSIZ:
    371                   nir_intrinsic_set_base(intrin, 0);
    372                   nir_intrinsic_set_component(intrin, 3);
    373                   break;
    374 
    375                default:
    376                   vue_slot = vue_map->varying_to_slot[varying];
    377                   assert(vue_slot != -1);
    378                   nir_intrinsic_set_base(intrin, vue_slot);
    379                   break;
    380                }
    381             }
    382          }
    383       }
    384    }
    385 }
    386 
    387 void
    388 brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map)
    389 {
    390    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    391       var->data.driver_location = var->data.location;
    392    }
    393 
    394    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
    395 
    396    /* This pass needs actual constants */
    397    nir_opt_constant_folding(nir);
    398 
    399    add_const_offset_to_base(nir, nir_var_shader_in);
    400 
    401    nir_foreach_function(function, nir) {
    402       if (function->impl) {
    403          nir_builder b;
    404          nir_builder_init(&b, function->impl);
    405          nir_foreach_block(block, function->impl) {
    406             remap_patch_urb_offsets(block, &b, vue_map,
    407                                     nir->info.tess.primitive_mode);
    408          }
    409       }
    410    }
    411 }
    412 
    413 void
    414 brw_nir_lower_fs_inputs(nir_shader *nir,
    415                         const struct gen_device_info *devinfo,
    416                         const struct brw_wm_prog_key *key)
    417 {
    418    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    419       var->data.driver_location = var->data.location;
    420 
    421       /* Apply default interpolation mode.
    422        *
    423        * Everything defaults to smooth except for the legacy GL color
    424        * built-in variables, which might be flat depending on API state.
    425        */
    426       if (var->data.interpolation == INTERP_MODE_NONE) {
    427          const bool flat = key->flat_shade &&
    428             (var->data.location == VARYING_SLOT_COL0 ||
    429              var->data.location == VARYING_SLOT_COL1);
    430 
    431          var->data.interpolation = flat ? INTERP_MODE_FLAT
    432                                         : INTERP_MODE_SMOOTH;
    433       }
    434 
    435       /* On Ironlake and below, there is only one interpolation mode.
    436        * Centroid interpolation doesn't mean anything on this hardware --
    437        * there is no multisampling.
    438        */
    439       if (devinfo->gen < 6) {
    440          var->data.centroid = false;
    441          var->data.sample = false;
    442       }
    443    }
    444 
    445    nir_lower_io_options lower_io_options = 0;
    446    if (key->persample_interp)
    447       lower_io_options |= nir_lower_io_force_sample_interpolation;
    448 
    449    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options);
    450 
    451    /* This pass needs actual constants */
    452    nir_opt_constant_folding(nir);
    453 
    454    add_const_offset_to_base(nir, nir_var_shader_in);
    455 }
    456 
    457 void
    458 brw_nir_lower_vue_outputs(nir_shader *nir,
    459                           bool is_scalar)
    460 {
    461    nir_foreach_variable(var, &nir->outputs) {
    462       var->data.driver_location = var->data.location;
    463    }
    464 
    465    nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
    466 }
    467 
    468 void
    469 brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map,
    470                           GLenum tes_primitive_mode)
    471 {
    472    nir_foreach_variable(var, &nir->outputs) {
    473       var->data.driver_location = var->data.location;
    474    }
    475 
    476    nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
    477 
    478    /* This pass needs actual constants */
    479    nir_opt_constant_folding(nir);
    480 
    481    add_const_offset_to_base(nir, nir_var_shader_out);
    482 
    483    nir_foreach_function(function, nir) {
    484       if (function->impl) {
    485          nir_builder b;
    486          nir_builder_init(&b, function->impl);
    487          nir_foreach_block(block, function->impl) {
    488             remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode);
    489          }
    490       }
    491    }
    492 }
    493 
    494 void
    495 brw_nir_lower_fs_outputs(nir_shader *nir)
    496 {
    497    nir_foreach_variable(var, &nir->outputs) {
    498       var->data.driver_location =
    499          SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
    500          SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
    501    }
    502 
    503    nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0);
    504 }
    505 
    506 void
    507 brw_nir_lower_cs_shared(nir_shader *nir)
    508 {
    509    nir_assign_var_locations(&nir->shared, &nir->num_shared,
    510                             type_size_scalar_bytes);
    511    nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0);
    512 }
    513 
    514 #define OPT(pass, ...) ({                                  \
    515    bool this_progress = false;                             \
    516    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
    517    if (this_progress)                                      \
    518       progress = true;                                     \
    519    this_progress;                                          \
    520 })
    521 
    522 static nir_variable_mode
    523 brw_nir_no_indirect_mask(const struct brw_compiler *compiler,
    524                          gl_shader_stage stage)
    525 {
    526    nir_variable_mode indirect_mask = 0;
    527 
    528    if (compiler->glsl_compiler_options[stage].EmitNoIndirectInput)
    529       indirect_mask |= nir_var_shader_in;
    530    if (compiler->glsl_compiler_options[stage].EmitNoIndirectOutput)
    531       indirect_mask |= nir_var_shader_out;
    532    if (compiler->glsl_compiler_options[stage].EmitNoIndirectTemp)
    533       indirect_mask |= nir_var_local;
    534 
    535    return indirect_mask;
    536 }
    537 
    538 nir_shader *
    539 brw_nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
    540                  bool is_scalar)
    541 {
    542    nir_variable_mode indirect_mask =
    543       brw_nir_no_indirect_mask(compiler, nir->info.stage);
    544 
    545    bool progress;
    546    do {
    547       progress = false;
    548       OPT(nir_lower_vars_to_ssa);
    549       OPT(nir_opt_copy_prop_vars);
    550 
    551       if (is_scalar) {
    552          OPT(nir_lower_alu_to_scalar);
    553       }
    554 
    555       OPT(nir_copy_prop);
    556 
    557       if (is_scalar) {
    558          OPT(nir_lower_phis_to_scalar);
    559       }
    560 
    561       OPT(nir_copy_prop);
    562       OPT(nir_opt_dce);
    563       OPT(nir_opt_cse);
    564       OPT(nir_opt_peephole_select, 0);
    565       OPT(nir_opt_intrinsics);
    566       OPT(nir_opt_algebraic);
    567       OPT(nir_opt_constant_folding);
    568       OPT(nir_opt_dead_cf);
    569       if (OPT(nir_opt_trivial_continues)) {
    570          /* If nir_opt_trivial_continues makes progress, then we need to clean
    571           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
    572           * to make progress.
    573           */
    574          OPT(nir_copy_prop);
    575          OPT(nir_opt_dce);
    576       }
    577       OPT(nir_opt_if);
    578       if (nir->options->max_unroll_iterations != 0) {
    579          OPT(nir_opt_loop_unroll, indirect_mask);
    580       }
    581       OPT(nir_opt_remove_phis);
    582       OPT(nir_opt_undef);
    583       OPT(nir_lower_doubles, nir_lower_drcp |
    584                              nir_lower_dsqrt |
    585                              nir_lower_drsq |
    586                              nir_lower_dtrunc |
    587                              nir_lower_dfloor |
    588                              nir_lower_dceil |
    589                              nir_lower_dfract |
    590                              nir_lower_dround_even |
    591                              nir_lower_dmod);
    592       OPT(nir_lower_64bit_pack);
    593    } while (progress);
    594 
    595    return nir;
    596 }
    597 
    598 /* Does some simple lowering and runs the standard suite of optimizations
    599  *
    600  * This is intended to be called more-or-less directly after you get the
    601  * shader out of GLSL or some other source.  While it is geared towards i965,
    602  * it is not at all generator-specific except for the is_scalar flag.  Even
    603  * there, it is safe to call with is_scalar = false for a shader that is
    604  * intended for the FS backend as long as nir_optimize is called again with
    605  * is_scalar = true to scalarize everything prior to code gen.
    606  */
    607 nir_shader *
    608 brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
    609 {
    610    const struct gen_device_info *devinfo = compiler->devinfo;
    611    UNUSED bool progress; /* Written by OPT */
    612 
    613    const bool is_scalar = compiler->scalar_stage[nir->info.stage];
    614 
    615    if (nir->info.stage == MESA_SHADER_GEOMETRY)
    616       OPT(nir_lower_gs_intrinsics);
    617 
    618    /* See also brw_nir_trig_workarounds.py */
    619    if (compiler->precise_trig &&
    620        !(devinfo->gen >= 10 || devinfo->is_kabylake))
    621       OPT(brw_nir_apply_trig_workarounds);
    622 
    623    static const nir_lower_tex_options tex_options = {
    624       .lower_txp = ~0,
    625       .lower_txf_offset = true,
    626       .lower_rect_offset = true,
    627       .lower_txd_cube_map = true,
    628    };
    629 
    630    OPT(nir_lower_tex, &tex_options);
    631    OPT(nir_normalize_cubemap_coords);
    632 
    633    OPT(nir_lower_global_vars_to_local);
    634 
    635    OPT(nir_split_var_copies);
    636 
    637    nir = brw_nir_optimize(nir, compiler, is_scalar);
    638 
    639    if (is_scalar) {
    640       OPT(nir_lower_load_const_to_scalar);
    641    }
    642 
    643    /* Lower a bunch of stuff */
    644    OPT(nir_lower_var_copies);
    645 
    646    OPT(nir_lower_system_values);
    647 
    648    const nir_lower_subgroups_options subgroups_options = {
    649       .subgroup_size = nir->info.stage == MESA_SHADER_COMPUTE ? 32 :
    650                        nir->info.stage == MESA_SHADER_FRAGMENT ? 16 : 8,
    651       .ballot_bit_size = 32,
    652       .lower_to_scalar = true,
    653       .lower_subgroup_masks = true,
    654       .lower_vote_trivial = !is_scalar,
    655    };
    656    OPT(nir_lower_subgroups, &subgroups_options);
    657 
    658    OPT(nir_lower_clip_cull_distance_arrays);
    659 
    660    nir_variable_mode indirect_mask =
    661       brw_nir_no_indirect_mask(compiler, nir->info.stage);
    662    nir_lower_indirect_derefs(nir, indirect_mask);
    663 
    664    nir_lower_int64(nir, nir_lower_imul64 |
    665                         nir_lower_isign64 |
    666                         nir_lower_divmod64);
    667 
    668    /* Get rid of split copies */
    669    nir = brw_nir_optimize(nir, compiler, is_scalar);
    670 
    671    OPT(nir_remove_dead_variables, nir_var_local);
    672 
    673    return nir;
    674 }
    675 
    676 void
    677 brw_nir_link_shaders(const struct brw_compiler *compiler,
    678                      nir_shader **producer, nir_shader **consumer)
    679 {
    680    NIR_PASS_V(*producer, nir_remove_dead_variables, nir_var_shader_out);
    681    NIR_PASS_V(*consumer, nir_remove_dead_variables, nir_var_shader_in);
    682 
    683    if (nir_remove_unused_varyings(*producer, *consumer)) {
    684       NIR_PASS_V(*producer, nir_lower_global_vars_to_local);
    685       NIR_PASS_V(*consumer, nir_lower_global_vars_to_local);
    686 
    687       /* The backend might not be able to handle indirects on
    688        * temporaries so we need to lower indirects on any of the
    689        * varyings we have demoted here.
    690        */
    691       NIR_PASS_V(*producer, nir_lower_indirect_derefs,
    692                  brw_nir_no_indirect_mask(compiler, (*producer)->info.stage));
    693       NIR_PASS_V(*consumer, nir_lower_indirect_derefs,
    694                  brw_nir_no_indirect_mask(compiler, (*consumer)->info.stage));
    695 
    696       const bool p_is_scalar =
    697          compiler->scalar_stage[(*producer)->info.stage];
    698       *producer = brw_nir_optimize(*producer, compiler, p_is_scalar);
    699 
    700       const bool c_is_scalar =
    701          compiler->scalar_stage[(*producer)->info.stage];
    702       *consumer = brw_nir_optimize(*consumer, compiler, c_is_scalar);
    703    }
    704 }
    705 
    706 /* Prepare the given shader for codegen
    707  *
    708  * This function is intended to be called right before going into the actual
    709  * backend and is highly backend-specific.  Also, once this function has been
    710  * called on a shader, it will no longer be in SSA form so most optimizations
    711  * will not work.
    712  */
    713 nir_shader *
    714 brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
    715                     bool is_scalar)
    716 {
    717    const struct gen_device_info *devinfo = compiler->devinfo;
    718    bool debug_enabled =
    719       (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->info.stage));
    720 
    721    UNUSED bool progress; /* Written by OPT */
    722 
    723 
    724    do {
    725       progress = false;
    726       OPT(nir_opt_algebraic_before_ffma);
    727    } while (progress);
    728 
    729    nir = brw_nir_optimize(nir, compiler, is_scalar);
    730 
    731    if (devinfo->gen >= 6) {
    732       /* Try and fuse multiply-adds */
    733       OPT(brw_nir_opt_peephole_ffma);
    734    }
    735 
    736    OPT(nir_opt_algebraic_late);
    737 
    738    OPT(nir_lower_to_source_mods);
    739    OPT(nir_copy_prop);
    740    OPT(nir_opt_dce);
    741    OPT(nir_opt_move_comparisons);
    742 
    743    OPT(nir_lower_locals_to_regs);
    744 
    745    if (unlikely(debug_enabled)) {
    746       /* Re-index SSA defs so we print more sensible numbers. */
    747       nir_foreach_function(function, nir) {
    748          if (function->impl)
    749             nir_index_ssa_defs(function->impl);
    750       }
    751 
    752       fprintf(stderr, "NIR (SSA form) for %s shader:\n",
    753               _mesa_shader_stage_to_string(nir->info.stage));
    754       nir_print_shader(nir, stderr);
    755    }
    756 
    757    OPT(nir_convert_from_ssa, true);
    758 
    759    if (!is_scalar) {
    760       OPT(nir_move_vec_src_uses_to_dest);
    761       OPT(nir_lower_vec_to_movs);
    762    }
    763 
    764    /* This is the last pass we run before we start emitting stuff.  It
    765     * determines when we need to insert boolean resolves on Gen <= 5.  We
    766     * run it last because it stashes data in instr->pass_flags and we don't
    767     * want that to be squashed by other NIR passes.
    768     */
    769    if (devinfo->gen <= 5)
    770       brw_nir_analyze_boolean_resolves(nir);
    771 
    772    nir_sweep(nir);
    773 
    774    if (unlikely(debug_enabled)) {
    775       fprintf(stderr, "NIR (final form) for %s shader:\n",
    776               _mesa_shader_stage_to_string(nir->info.stage));
    777       nir_print_shader(nir, stderr);
    778    }
    779 
    780    return nir;
    781 }
    782 
    783 nir_shader *
    784 brw_nir_apply_sampler_key(nir_shader *nir,
    785                           const struct brw_compiler *compiler,
    786                           const struct brw_sampler_prog_key_data *key_tex,
    787                           bool is_scalar)
    788 {
    789    const struct gen_device_info *devinfo = compiler->devinfo;
    790    nir_lower_tex_options tex_options = { 0 };
    791 
    792    /* Iron Lake and prior require lowering of all rectangle textures */
    793    if (devinfo->gen < 6)
    794       tex_options.lower_rect = true;
    795 
    796    /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */
    797    if (devinfo->gen < 8) {
    798       tex_options.saturate_s = key_tex->gl_clamp_mask[0];
    799       tex_options.saturate_t = key_tex->gl_clamp_mask[1];
    800       tex_options.saturate_r = key_tex->gl_clamp_mask[2];
    801    }
    802 
    803    /* Prior to Haswell, we have to fake texture swizzle */
    804    for (unsigned s = 0; s < MAX_SAMPLERS; s++) {
    805       if (key_tex->swizzles[s] == SWIZZLE_NOOP)
    806          continue;
    807 
    808       tex_options.swizzle_result |= (1 << s);
    809       for (unsigned c = 0; c < 4; c++)
    810          tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
    811    }
    812 
    813    /* Prior to Haswell, we have to lower gradients on shadow samplers */
    814    tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell;
    815 
    816    tex_options.lower_y_uv_external = key_tex->y_uv_image_mask;
    817    tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask;
    818    tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask;
    819    tex_options.lower_xy_uxvx_external = key_tex->xy_uxvx_image_mask;
    820 
    821    if (nir_lower_tex(nir, &tex_options)) {
    822       nir_validate_shader(nir);
    823       nir = brw_nir_optimize(nir, compiler, is_scalar);
    824    }
    825 
    826    return nir;
    827 }
    828 
    829 enum brw_reg_type
    830 brw_type_for_nir_type(const struct gen_device_info *devinfo, nir_alu_type type)
    831 {
    832    switch (type) {
    833    case nir_type_uint:
    834    case nir_type_uint32:
    835       return BRW_REGISTER_TYPE_UD;
    836    case nir_type_bool:
    837    case nir_type_int:
    838    case nir_type_bool32:
    839    case nir_type_int32:
    840       return BRW_REGISTER_TYPE_D;
    841    case nir_type_float:
    842    case nir_type_float32:
    843       return BRW_REGISTER_TYPE_F;
    844    case nir_type_float16:
    845       return BRW_REGISTER_TYPE_HF;
    846    case nir_type_float64:
    847       return BRW_REGISTER_TYPE_DF;
    848    case nir_type_int64:
    849       return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_Q;
    850    case nir_type_uint64:
    851       return devinfo->gen < 8 ? BRW_REGISTER_TYPE_DF : BRW_REGISTER_TYPE_UQ;
    852    case nir_type_int16:
    853       return BRW_REGISTER_TYPE_W;
    854    case nir_type_uint16:
    855       return BRW_REGISTER_TYPE_UW;
    856    default:
    857       unreachable("unknown type");
    858    }
    859 
    860    return BRW_REGISTER_TYPE_F;
    861 }
    862 
    863 /* Returns the glsl_base_type corresponding to a nir_alu_type.
    864  * This is used by both brw_vec4_nir and brw_fs_nir.
    865  */
    866 enum glsl_base_type
    867 brw_glsl_base_type_for_nir_type(nir_alu_type type)
    868 {
    869    switch (type) {
    870    case nir_type_float:
    871    case nir_type_float32:
    872       return GLSL_TYPE_FLOAT;
    873 
    874    case nir_type_float16:
    875       return GLSL_TYPE_FLOAT16;
    876 
    877    case nir_type_float64:
    878       return GLSL_TYPE_DOUBLE;
    879 
    880    case nir_type_int:
    881    case nir_type_int32:
    882       return GLSL_TYPE_INT;
    883 
    884    case nir_type_uint:
    885    case nir_type_uint32:
    886       return GLSL_TYPE_UINT;
    887 
    888    case nir_type_int16:
    889       return GLSL_TYPE_INT16;
    890 
    891    case nir_type_uint16:
    892       return GLSL_TYPE_UINT16;
    893 
    894    default:
    895       unreachable("bad type");
    896    }
    897 }
    898