Home | History | Annotate | Download | only in i965
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "brw_nir.h"
     25 #include "brw_shader.h"
     26 #include "compiler/glsl_types.h"
     27 #include "compiler/nir/nir_builder.h"
     28 
     29 static bool
     30 is_input(nir_intrinsic_instr *intrin)
     31 {
     32    return intrin->intrinsic == nir_intrinsic_load_input ||
     33           intrin->intrinsic == nir_intrinsic_load_per_vertex_input ||
     34           intrin->intrinsic == nir_intrinsic_load_interpolated_input;
     35 }
     36 
     37 static bool
     38 is_output(nir_intrinsic_instr *intrin)
     39 {
     40    return intrin->intrinsic == nir_intrinsic_load_output ||
     41           intrin->intrinsic == nir_intrinsic_load_per_vertex_output ||
     42           intrin->intrinsic == nir_intrinsic_store_output ||
     43           intrin->intrinsic == nir_intrinsic_store_per_vertex_output;
     44 }
     45 
     46 /**
     47  * In many cases, we just add the base and offset together, so there's no
     48  * reason to keep them separate.  Sometimes, combining them is essential:
     49  * if a shader only accesses part of a compound variable (such as a matrix
     50  * or array), the variable's base may not actually exist in the VUE map.
     51  *
     52  * This pass adds constant offsets to instr->const_index[0], and resets
     53  * the offset source to 0.  Non-constant offsets remain unchanged - since
     54  * we don't know what part of a compound variable is accessed, we allocate
     55  * storage for the entire thing.
     56  */
     57 
     58 static bool
     59 add_const_offset_to_base_block(nir_block *block, nir_builder *b,
     60                                nir_variable_mode mode)
     61 {
     62    nir_foreach_instr_safe(instr, block) {
     63       if (instr->type != nir_instr_type_intrinsic)
     64          continue;
     65 
     66       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
     67 
     68       if ((mode == nir_var_shader_in && is_input(intrin)) ||
     69           (mode == nir_var_shader_out && is_output(intrin))) {
     70          nir_src *offset = nir_get_io_offset_src(intrin);
     71          nir_const_value *const_offset = nir_src_as_const_value(*offset);
     72 
     73          if (const_offset) {
     74             intrin->const_index[0] += const_offset->u32[0];
     75             b->cursor = nir_before_instr(&intrin->instr);
     76             nir_instr_rewrite_src(&intrin->instr, offset,
     77                                   nir_src_for_ssa(nir_imm_int(b, 0)));
     78          }
     79       }
     80    }
     81    return true;
     82 }
     83 
     84 static void
     85 add_const_offset_to_base(nir_shader *nir, nir_variable_mode mode)
     86 {
     87    nir_foreach_function(f, nir) {
     88       if (f->impl) {
     89          nir_builder b;
     90          nir_builder_init(&b, f->impl);
     91          nir_foreach_block(block, f->impl) {
     92             add_const_offset_to_base_block(block, &b, mode);
     93          }
     94       }
     95    }
     96 }
     97 
     98 static bool
     99 remap_vs_attrs(nir_block *block, shader_info *nir_info)
    100 {
    101    nir_foreach_instr(instr, block) {
    102       if (instr->type != nir_instr_type_intrinsic)
    103          continue;
    104 
    105       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    106 
    107       if (intrin->intrinsic == nir_intrinsic_load_input) {
    108          /* Attributes come in a contiguous block, ordered by their
    109           * gl_vert_attrib value.  That means we can compute the slot
    110           * number for an attribute by masking out the enabled attributes
    111           * before it and counting the bits.
    112           */
    113          int attr = intrin->const_index[0];
    114          int slot = _mesa_bitcount_64(nir_info->inputs_read &
    115                                       BITFIELD64_MASK(attr));
    116          intrin->const_index[0] = 4 * slot;
    117       }
    118    }
    119    return true;
    120 }
    121 
    122 static bool
    123 remap_inputs_with_vue_map(nir_block *block, const struct brw_vue_map *vue_map)
    124 {
    125    nir_foreach_instr(instr, block) {
    126       if (instr->type != nir_instr_type_intrinsic)
    127          continue;
    128 
    129       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    130 
    131       if (intrin->intrinsic == nir_intrinsic_load_input ||
    132           intrin->intrinsic == nir_intrinsic_load_per_vertex_input) {
    133          int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
    134          assert(vue_slot != -1);
    135          intrin->const_index[0] = vue_slot;
    136       }
    137    }
    138    return true;
    139 }
    140 
    141 static bool
    142 remap_tess_levels(nir_builder *b, nir_intrinsic_instr *intr,
    143                   GLenum primitive_mode)
    144 {
    145    const int location = nir_intrinsic_base(intr);
    146    const unsigned component = nir_intrinsic_component(intr);
    147    bool out_of_bounds;
    148 
    149    if (location == VARYING_SLOT_TESS_LEVEL_INNER) {
    150       switch (primitive_mode) {
    151       case GL_QUADS:
    152          /* gl_TessLevelInner[0..1] lives at DWords 3-2 (reversed). */
    153          nir_intrinsic_set_base(intr, 0);
    154          nir_intrinsic_set_component(intr, 3 - component);
    155          out_of_bounds = false;
    156          break;
    157       case GL_TRIANGLES:
    158          /* gl_TessLevelInner[0] lives at DWord 4. */
    159          nir_intrinsic_set_base(intr, 1);
    160          out_of_bounds = component > 0;
    161          break;
    162       case GL_ISOLINES:
    163          out_of_bounds = true;
    164          break;
    165       default:
    166          unreachable("Bogus tessellation domain");
    167       }
    168    } else if (location == VARYING_SLOT_TESS_LEVEL_OUTER) {
    169       if (primitive_mode == GL_ISOLINES) {
    170          /* gl_TessLevelOuter[0..1] lives at DWords 6-7 (in order). */
    171          nir_intrinsic_set_base(intr, 1);
    172          nir_intrinsic_set_component(intr, 2 + nir_intrinsic_component(intr));
    173          out_of_bounds = component > 1;
    174       } else {
    175          /* Triangles use DWords 7-5 (reversed); Quads use 7-4 (reversed) */
    176          nir_intrinsic_set_base(intr, 1);
    177          nir_intrinsic_set_component(intr, 3 - nir_intrinsic_component(intr));
    178          out_of_bounds = component == 3 && primitive_mode == GL_TRIANGLES;
    179       }
    180    } else {
    181       return false;
    182    }
    183 
    184    if (out_of_bounds) {
    185       if (nir_intrinsic_infos[intr->intrinsic].has_dest) {
    186          b->cursor = nir_before_instr(&intr->instr);
    187          nir_ssa_def *undef = nir_ssa_undef(b, 1, 32);
    188          nir_ssa_def_rewrite_uses(&intr->dest.ssa, nir_src_for_ssa(undef));
    189       }
    190       nir_instr_remove(&intr->instr);
    191    }
    192 
    193    return true;
    194 }
    195 
    196 static bool
    197 remap_patch_urb_offsets(nir_block *block, nir_builder *b,
    198                         const struct brw_vue_map *vue_map,
    199                         GLenum tes_primitive_mode)
    200 {
    201    const bool is_passthrough_tcs = b->shader->info->name &&
    202       strcmp(b->shader->info->name, "passthrough") == 0;
    203 
    204    nir_foreach_instr_safe(instr, block) {
    205       if (instr->type != nir_instr_type_intrinsic)
    206          continue;
    207 
    208       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    209 
    210       gl_shader_stage stage = b->shader->stage;
    211 
    212       if ((stage == MESA_SHADER_TESS_CTRL && is_output(intrin)) ||
    213           (stage == MESA_SHADER_TESS_EVAL && is_input(intrin))) {
    214 
    215          if (!is_passthrough_tcs &&
    216              remap_tess_levels(b, intrin, tes_primitive_mode))
    217             continue;
    218 
    219          int vue_slot = vue_map->varying_to_slot[intrin->const_index[0]];
    220          assert(vue_slot != -1);
    221          intrin->const_index[0] = vue_slot;
    222 
    223          nir_src *vertex = nir_get_io_vertex_index_src(intrin);
    224          if (vertex) {
    225             nir_const_value *const_vertex = nir_src_as_const_value(*vertex);
    226             if (const_vertex) {
    227                intrin->const_index[0] += const_vertex->u32[0] *
    228                                          vue_map->num_per_vertex_slots;
    229             } else {
    230                b->cursor = nir_before_instr(&intrin->instr);
    231 
    232                /* Multiply by the number of per-vertex slots. */
    233                nir_ssa_def *vertex_offset =
    234                   nir_imul(b,
    235                            nir_ssa_for_src(b, *vertex, 1),
    236                            nir_imm_int(b,
    237                                        vue_map->num_per_vertex_slots));
    238 
    239                /* Add it to the existing offset */
    240                nir_src *offset = nir_get_io_offset_src(intrin);
    241                nir_ssa_def *total_offset =
    242                   nir_iadd(b, vertex_offset,
    243                            nir_ssa_for_src(b, *offset, 1));
    244 
    245                nir_instr_rewrite_src(&intrin->instr, offset,
    246                                      nir_src_for_ssa(total_offset));
    247             }
    248          }
    249       }
    250    }
    251    return true;
    252 }
    253 
    254 void
    255 brw_nir_lower_vs_inputs(nir_shader *nir,
    256                         bool is_scalar,
    257                         bool use_legacy_snorm_formula,
    258                         const uint8_t *vs_attrib_wa_flags)
    259 {
    260    /* Start with the location of the variable's base. */
    261    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    262       var->data.driver_location = var->data.location;
    263    }
    264 
    265    /* Now use nir_lower_io to walk dereference chains.  Attribute arrays are
    266     * loaded as one vec4 or dvec4 per element (or matrix column), depending on
    267     * whether it is a double-precision type or not.
    268     */
    269    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
    270 
    271    /* This pass needs actual constants */
    272    nir_opt_constant_folding(nir);
    273 
    274    add_const_offset_to_base(nir, nir_var_shader_in);
    275 
    276    brw_nir_apply_attribute_workarounds(nir, use_legacy_snorm_formula,
    277                                        vs_attrib_wa_flags);
    278 
    279    if (is_scalar) {
    280       /* Finally, translate VERT_ATTRIB_* values into the actual registers. */
    281 
    282       nir_foreach_function(function, nir) {
    283          if (function->impl) {
    284             nir_foreach_block(block, function->impl) {
    285                remap_vs_attrs(block, nir->info);
    286             }
    287          }
    288       }
    289    }
    290 }
    291 
    292 void
    293 brw_nir_lower_vue_inputs(nir_shader *nir, bool is_scalar,
    294                          const struct brw_vue_map *vue_map)
    295 {
    296    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    297       var->data.driver_location = var->data.location;
    298    }
    299 
    300    /* Inputs are stored in vec4 slots, so use type_size_vec4(). */
    301    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
    302 
    303    if (is_scalar || nir->stage != MESA_SHADER_GEOMETRY) {
    304       /* This pass needs actual constants */
    305       nir_opt_constant_folding(nir);
    306 
    307       add_const_offset_to_base(nir, nir_var_shader_in);
    308 
    309       nir_foreach_function(function, nir) {
    310          if (function->impl) {
    311             nir_foreach_block(block, function->impl) {
    312                remap_inputs_with_vue_map(block, vue_map);
    313             }
    314          }
    315       }
    316    }
    317 }
    318 
    319 void
    320 brw_nir_lower_tes_inputs(nir_shader *nir, const struct brw_vue_map *vue_map)
    321 {
    322    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    323       var->data.driver_location = var->data.location;
    324    }
    325 
    326    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, 0);
    327 
    328    /* This pass needs actual constants */
    329    nir_opt_constant_folding(nir);
    330 
    331    add_const_offset_to_base(nir, nir_var_shader_in);
    332 
    333    nir_foreach_function(function, nir) {
    334       if (function->impl) {
    335          nir_builder b;
    336          nir_builder_init(&b, function->impl);
    337          nir_foreach_block(block, function->impl) {
    338             remap_patch_urb_offsets(block, &b, vue_map,
    339                                     nir->info->tess.primitive_mode);
    340          }
    341       }
    342    }
    343 }
    344 
    345 void
    346 brw_nir_lower_fs_inputs(nir_shader *nir,
    347                         const struct gen_device_info *devinfo,
    348                         const struct brw_wm_prog_key *key)
    349 {
    350    foreach_list_typed(nir_variable, var, node, &nir->inputs) {
    351       var->data.driver_location = var->data.location;
    352 
    353       /* Apply default interpolation mode.
    354        *
    355        * Everything defaults to smooth except for the legacy GL color
    356        * built-in variables, which might be flat depending on API state.
    357        */
    358       if (var->data.interpolation == INTERP_MODE_NONE) {
    359          const bool flat = key->flat_shade &&
    360             (var->data.location == VARYING_SLOT_COL0 ||
    361              var->data.location == VARYING_SLOT_COL1);
    362 
    363          var->data.interpolation = flat ? INTERP_MODE_FLAT
    364                                         : INTERP_MODE_SMOOTH;
    365       }
    366 
    367       /* On Ironlake and below, there is only one interpolation mode.
    368        * Centroid interpolation doesn't mean anything on this hardware --
    369        * there is no multisampling.
    370        */
    371       if (devinfo->gen < 6) {
    372          var->data.centroid = false;
    373          var->data.sample = false;
    374       }
    375    }
    376 
    377    nir_lower_io_options lower_io_options = 0;
    378    if (key->persample_interp)
    379       lower_io_options |= nir_lower_io_force_sample_interpolation;
    380 
    381    nir_lower_io(nir, nir_var_shader_in, type_size_vec4, lower_io_options);
    382 
    383    /* This pass needs actual constants */
    384    nir_opt_constant_folding(nir);
    385 
    386    add_const_offset_to_base(nir, nir_var_shader_in);
    387 }
    388 
    389 void
    390 brw_nir_lower_vue_outputs(nir_shader *nir,
    391                           bool is_scalar)
    392 {
    393    nir_foreach_variable(var, &nir->outputs) {
    394       var->data.driver_location = var->data.location;
    395    }
    396 
    397    nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
    398 }
    399 
    400 void
    401 brw_nir_lower_tcs_outputs(nir_shader *nir, const struct brw_vue_map *vue_map,
    402                           GLenum tes_primitive_mode)
    403 {
    404    nir_foreach_variable(var, &nir->outputs) {
    405       var->data.driver_location = var->data.location;
    406    }
    407 
    408    nir_lower_io(nir, nir_var_shader_out, type_size_vec4, 0);
    409 
    410    /* This pass needs actual constants */
    411    nir_opt_constant_folding(nir);
    412 
    413    add_const_offset_to_base(nir, nir_var_shader_out);
    414 
    415    nir_foreach_function(function, nir) {
    416       if (function->impl) {
    417          nir_builder b;
    418          nir_builder_init(&b, function->impl);
    419          nir_foreach_block(block, function->impl) {
    420             remap_patch_urb_offsets(block, &b, vue_map, tes_primitive_mode);
    421          }
    422       }
    423    }
    424 }
    425 
    426 void
    427 brw_nir_lower_fs_outputs(nir_shader *nir)
    428 {
    429    nir_foreach_variable(var, &nir->outputs) {
    430       var->data.driver_location =
    431          SET_FIELD(var->data.index, BRW_NIR_FRAG_OUTPUT_INDEX) |
    432          SET_FIELD(var->data.location, BRW_NIR_FRAG_OUTPUT_LOCATION);
    433    }
    434 
    435    nir_lower_io(nir, nir_var_shader_out, type_size_dvec4, 0);
    436 }
    437 
    438 void
    439 brw_nir_lower_cs_shared(nir_shader *nir)
    440 {
    441    nir_assign_var_locations(&nir->shared, &nir->num_shared,
    442                             type_size_scalar_bytes);
    443    nir_lower_io(nir, nir_var_shared, type_size_scalar_bytes, 0);
    444 }
    445 
    446 #define OPT(pass, ...) ({                                  \
    447    bool this_progress = false;                             \
    448    NIR_PASS(this_progress, nir, pass, ##__VA_ARGS__);      \
    449    if (this_progress)                                      \
    450       progress = true;                                     \
    451    this_progress;                                          \
    452 })
    453 
    454 #define OPT_V(pass, ...) NIR_PASS_V(nir, pass, ##__VA_ARGS__)
    455 
    456 static nir_shader *
    457 nir_optimize(nir_shader *nir, const struct brw_compiler *compiler,
    458              bool is_scalar)
    459 {
    460    nir_variable_mode indirect_mask = 0;
    461    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
    462       indirect_mask |= nir_var_shader_in;
    463    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
    464       indirect_mask |= nir_var_shader_out;
    465    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
    466       indirect_mask |= nir_var_local;
    467 
    468    bool progress;
    469    do {
    470       progress = false;
    471       OPT_V(nir_lower_vars_to_ssa);
    472       OPT(nir_opt_copy_prop_vars);
    473 
    474       if (is_scalar) {
    475          OPT(nir_lower_alu_to_scalar);
    476       }
    477 
    478       OPT(nir_copy_prop);
    479 
    480       if (is_scalar) {
    481          OPT(nir_lower_phis_to_scalar);
    482       }
    483 
    484       OPT(nir_copy_prop);
    485       OPT(nir_opt_dce);
    486       OPT(nir_opt_cse);
    487       OPT(nir_opt_peephole_select, 0);
    488       OPT(nir_opt_algebraic);
    489       OPT(nir_opt_constant_folding);
    490       OPT(nir_opt_dead_cf);
    491       if (OPT(nir_opt_trivial_continues)) {
    492          /* If nir_opt_trivial_continues makes progress, then we need to clean
    493           * things up if we want any hope of nir_opt_if or nir_opt_loop_unroll
    494           * to make progress.
    495           */
    496          OPT(nir_copy_prop);
    497          OPT(nir_opt_dce);
    498       }
    499       OPT(nir_opt_if);
    500       if (nir->options->max_unroll_iterations != 0) {
    501          OPT(nir_opt_loop_unroll, indirect_mask);
    502       }
    503       OPT(nir_opt_remove_phis);
    504       OPT(nir_opt_undef);
    505       OPT_V(nir_lower_doubles, nir_lower_drcp |
    506                                nir_lower_dsqrt |
    507                                nir_lower_drsq |
    508                                nir_lower_dtrunc |
    509                                nir_lower_dfloor |
    510                                nir_lower_dceil |
    511                                nir_lower_dfract |
    512                                nir_lower_dround_even |
    513                                nir_lower_dmod);
    514       OPT_V(nir_lower_double_pack);
    515    } while (progress);
    516 
    517    return nir;
    518 }
    519 
    520 /* Does some simple lowering and runs the standard suite of optimizations
    521  *
    522  * This is intended to be called more-or-less directly after you get the
    523  * shader out of GLSL or some other source.  While it is geared towards i965,
    524  * it is not at all generator-specific except for the is_scalar flag.  Even
    525  * there, it is safe to call with is_scalar = false for a shader that is
    526  * intended for the FS backend as long as nir_optimize is called again with
    527  * is_scalar = true to scalarize everything prior to code gen.
    528  */
    529 nir_shader *
    530 brw_preprocess_nir(const struct brw_compiler *compiler, nir_shader *nir)
    531 {
    532    const struct gen_device_info *devinfo = compiler->devinfo;
    533    bool progress; /* Written by OPT and OPT_V */
    534    (void)progress;
    535 
    536    const bool is_scalar = compiler->scalar_stage[nir->stage];
    537 
    538    if (nir->stage == MESA_SHADER_GEOMETRY)
    539       OPT(nir_lower_gs_intrinsics);
    540 
    541    /* See also brw_nir_trig_workarounds.py */
    542    if (compiler->precise_trig &&
    543        !(devinfo->gen >= 10 || devinfo->is_kabylake))
    544       OPT(brw_nir_apply_trig_workarounds);
    545 
    546    static const nir_lower_tex_options tex_options = {
    547       .lower_txp = ~0,
    548       .lower_txf_offset = true,
    549       .lower_rect_offset = true,
    550       .lower_txd_cube_map = true,
    551    };
    552 
    553    OPT(nir_lower_tex, &tex_options);
    554    OPT(nir_normalize_cubemap_coords);
    555 
    556    OPT(nir_lower_global_vars_to_local);
    557 
    558    OPT(nir_split_var_copies);
    559 
    560    nir = nir_optimize(nir, compiler, is_scalar);
    561 
    562    if (is_scalar) {
    563       OPT_V(nir_lower_load_const_to_scalar);
    564    }
    565 
    566    /* Lower a bunch of stuff */
    567    OPT_V(nir_lower_var_copies);
    568 
    569    OPT_V(nir_lower_clip_cull_distance_arrays);
    570 
    571    nir_variable_mode indirect_mask = 0;
    572    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectInput)
    573       indirect_mask |= nir_var_shader_in;
    574    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectOutput)
    575       indirect_mask |= nir_var_shader_out;
    576    if (compiler->glsl_compiler_options[nir->stage].EmitNoIndirectTemp)
    577       indirect_mask |= nir_var_local;
    578 
    579    nir_lower_indirect_derefs(nir, indirect_mask);
    580 
    581    /* Get rid of split copies */
    582    nir = nir_optimize(nir, compiler, is_scalar);
    583 
    584    OPT(nir_remove_dead_variables, nir_var_local);
    585 
    586    return nir;
    587 }
    588 
    589 /* Prepare the given shader for codegen
    590  *
    591  * This function is intended to be called right before going into the actual
    592  * backend and is highly backend-specific.  Also, once this function has been
    593  * called on a shader, it will no longer be in SSA form so most optimizations
    594  * will not work.
    595  */
    596 nir_shader *
    597 brw_postprocess_nir(nir_shader *nir, const struct brw_compiler *compiler,
    598                     bool is_scalar)
    599 {
    600    const struct gen_device_info *devinfo = compiler->devinfo;
    601    bool debug_enabled =
    602       (INTEL_DEBUG & intel_debug_flag_for_shader_stage(nir->stage));
    603 
    604    bool progress; /* Written by OPT and OPT_V */
    605    (void)progress;
    606 
    607    nir = nir_optimize(nir, compiler, is_scalar);
    608 
    609    if (devinfo->gen >= 6) {
    610       /* Try and fuse multiply-adds */
    611       OPT(brw_nir_opt_peephole_ffma);
    612    }
    613 
    614    OPT(nir_opt_algebraic_late);
    615 
    616    OPT_V(nir_lower_to_source_mods);
    617    OPT(nir_copy_prop);
    618    OPT(nir_opt_dce);
    619    OPT(nir_opt_move_comparisons);
    620 
    621    OPT(nir_lower_locals_to_regs);
    622 
    623    if (unlikely(debug_enabled)) {
    624       /* Re-index SSA defs so we print more sensible numbers. */
    625       nir_foreach_function(function, nir) {
    626          if (function->impl)
    627             nir_index_ssa_defs(function->impl);
    628       }
    629 
    630       fprintf(stderr, "NIR (SSA form) for %s shader:\n",
    631               _mesa_shader_stage_to_string(nir->stage));
    632       nir_print_shader(nir, stderr);
    633    }
    634 
    635    OPT_V(nir_convert_from_ssa, true);
    636 
    637    if (!is_scalar) {
    638       OPT_V(nir_move_vec_src_uses_to_dest);
    639       OPT(nir_lower_vec_to_movs);
    640    }
    641 
    642    /* This is the last pass we run before we start emitting stuff.  It
    643     * determines when we need to insert boolean resolves on Gen <= 5.  We
    644     * run it last because it stashes data in instr->pass_flags and we don't
    645     * want that to be squashed by other NIR passes.
    646     */
    647    if (devinfo->gen <= 5)
    648       brw_nir_analyze_boolean_resolves(nir);
    649 
    650    nir_sweep(nir);
    651 
    652    if (unlikely(debug_enabled)) {
    653       fprintf(stderr, "NIR (final form) for %s shader:\n",
    654               _mesa_shader_stage_to_string(nir->stage));
    655       nir_print_shader(nir, stderr);
    656    }
    657 
    658    return nir;
    659 }
    660 
    661 nir_shader *
    662 brw_nir_apply_sampler_key(nir_shader *nir,
    663                           const struct brw_compiler *compiler,
    664                           const struct brw_sampler_prog_key_data *key_tex,
    665                           bool is_scalar)
    666 {
    667    const struct gen_device_info *devinfo = compiler->devinfo;
    668    nir_lower_tex_options tex_options = { 0 };
    669 
    670    /* Iron Lake and prior require lowering of all rectangle textures */
    671    if (devinfo->gen < 6)
    672       tex_options.lower_rect = true;
    673 
    674    /* Prior to Broadwell, our hardware can't actually do GL_CLAMP */
    675    if (devinfo->gen < 8) {
    676       tex_options.saturate_s = key_tex->gl_clamp_mask[0];
    677       tex_options.saturate_t = key_tex->gl_clamp_mask[1];
    678       tex_options.saturate_r = key_tex->gl_clamp_mask[2];
    679    }
    680 
    681    /* Prior to Haswell, we have to fake texture swizzle */
    682    for (unsigned s = 0; s < MAX_SAMPLERS; s++) {
    683       if (key_tex->swizzles[s] == SWIZZLE_NOOP)
    684          continue;
    685 
    686       tex_options.swizzle_result |= (1 << s);
    687       for (unsigned c = 0; c < 4; c++)
    688          tex_options.swizzles[s][c] = GET_SWZ(key_tex->swizzles[s], c);
    689    }
    690 
    691    /* Prior to Haswell, we have to lower gradients on shadow samplers */
    692    tex_options.lower_txd_shadow = devinfo->gen < 8 && !devinfo->is_haswell;
    693 
    694    tex_options.lower_y_uv_external = key_tex->y_uv_image_mask;
    695    tex_options.lower_y_u_v_external = key_tex->y_u_v_image_mask;
    696    tex_options.lower_yx_xuxv_external = key_tex->yx_xuxv_image_mask;
    697 
    698    if (nir_lower_tex(nir, &tex_options)) {
    699       nir_validate_shader(nir);
    700       nir = nir_optimize(nir, compiler, is_scalar);
    701    }
    702 
    703    return nir;
    704 }
    705 
    706 enum brw_reg_type
    707 brw_type_for_nir_type(nir_alu_type type)
    708 {
    709    switch (type) {
    710    case nir_type_uint:
    711    case nir_type_uint32:
    712       return BRW_REGISTER_TYPE_UD;
    713    case nir_type_bool:
    714    case nir_type_int:
    715    case nir_type_bool32:
    716    case nir_type_int32:
    717       return BRW_REGISTER_TYPE_D;
    718    case nir_type_float:
    719    case nir_type_float32:
    720       return BRW_REGISTER_TYPE_F;
    721    case nir_type_float64:
    722       return BRW_REGISTER_TYPE_DF;
    723    case nir_type_int64:
    724    case nir_type_uint64:
    725       /* TODO we should only see these in moves, so for now it's ok, but when
    726        * we add actual 64-bit integer support we should fix this.
    727        */
    728       return BRW_REGISTER_TYPE_DF;
    729    default:
    730       unreachable("unknown type");
    731    }
    732 
    733    return BRW_REGISTER_TYPE_F;
    734 }
    735 
    736 /* Returns the glsl_base_type corresponding to a nir_alu_type.
    737  * This is used by both brw_vec4_nir and brw_fs_nir.
    738  */
    739 enum glsl_base_type
    740 brw_glsl_base_type_for_nir_type(nir_alu_type type)
    741 {
    742    switch (type) {
    743    case nir_type_float:
    744    case nir_type_float32:
    745       return GLSL_TYPE_FLOAT;
    746 
    747    case nir_type_float64:
    748       return GLSL_TYPE_DOUBLE;
    749 
    750    case nir_type_int:
    751    case nir_type_int32:
    752       return GLSL_TYPE_INT;
    753 
    754    case nir_type_uint:
    755    case nir_type_uint32:
    756       return GLSL_TYPE_UINT;
    757 
    758    default:
    759       unreachable("bad type");
    760    }
    761 }
    762