Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Connor Abbott (cwabbott0 (at) gmail.com)
     25  *    Jason Ekstrand (jason (at) jlekstrand.net)
     26  *
     27  */
     28 
     29 /*
     30  * This lowering pass converts references to input/output variables with
     31  * loads/stores to actual input/output intrinsics.
     32  */
     33 
     34 #include "nir.h"
     35 #include "nir_builder.h"
     36 
     37 struct lower_io_state {
     38    nir_builder builder;
     39    int (*type_size)(const struct glsl_type *type);
     40    nir_variable_mode modes;
     41    nir_lower_io_options options;
     42 };
     43 
     44 void
     45 nir_assign_var_locations(struct exec_list *var_list, unsigned *size,
     46                          int (*type_size)(const struct glsl_type *))
     47 {
     48    unsigned location = 0;
     49 
     50    nir_foreach_variable(var, var_list) {
     51       /*
     52        * UBO's have their own address spaces, so don't count them towards the
     53        * number of global uniforms
     54        */
     55       if ((var->data.mode == nir_var_uniform || var->data.mode == nir_var_shader_storage) &&
     56           var->interface_type != NULL)
     57          continue;
     58 
     59       var->data.driver_location = location;
     60       location += type_size(var->type);
     61    }
     62 
     63    *size = location;
     64 }
     65 
     66 /**
     67  * Return true if the given variable is a per-vertex input/output array.
     68  * (such as geometry shader inputs).
     69  */
     70 bool
     71 nir_is_per_vertex_io(nir_variable *var, gl_shader_stage stage)
     72 {
     73    if (var->data.patch || !glsl_type_is_array(var->type))
     74       return false;
     75 
     76    if (var->data.mode == nir_var_shader_in)
     77       return stage == MESA_SHADER_GEOMETRY ||
     78              stage == MESA_SHADER_TESS_CTRL ||
     79              stage == MESA_SHADER_TESS_EVAL;
     80 
     81    if (var->data.mode == nir_var_shader_out)
     82       return stage == MESA_SHADER_TESS_CTRL;
     83 
     84    return false;
     85 }
     86 
     87 static nir_ssa_def *
     88 get_io_offset(nir_builder *b, nir_deref_var *deref,
     89               nir_ssa_def **vertex_index,
     90               int (*type_size)(const struct glsl_type *),
     91               unsigned *component)
     92 {
     93    nir_deref *tail = &deref->deref;
     94 
     95    /* For per-vertex input arrays (i.e. geometry shader inputs), keep the
     96     * outermost array index separate.  Process the rest normally.
     97     */
     98    if (vertex_index != NULL) {
     99       tail = tail->child;
    100       nir_deref_array *deref_array = nir_deref_as_array(tail);
    101 
    102       nir_ssa_def *vtx = nir_imm_int(b, deref_array->base_offset);
    103       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
    104          vtx = nir_iadd(b, vtx, nir_ssa_for_src(b, deref_array->indirect, 1));
    105       }
    106       *vertex_index = vtx;
    107    }
    108 
    109    if (deref->var->data.compact) {
    110       assert(tail->child->deref_type == nir_deref_type_array);
    111       assert(glsl_type_is_scalar(glsl_without_array(deref->var->type)));
    112       nir_deref_array *deref_array = nir_deref_as_array(tail->child);
    113       /* We always lower indirect dereferences for "compact" array vars. */
    114       assert(deref_array->deref_array_type == nir_deref_array_type_direct);
    115 
    116       const unsigned total_offset = *component + deref_array->base_offset;
    117       const unsigned slot_offset = total_offset / 4;
    118       *component = total_offset % 4;
    119       return nir_imm_int(b, type_size(glsl_vec4_type()) * slot_offset);
    120    }
    121 
    122    /* Just emit code and let constant-folding go to town */
    123    nir_ssa_def *offset = nir_imm_int(b, 0);
    124 
    125    while (tail->child != NULL) {
    126       const struct glsl_type *parent_type = tail->type;
    127       tail = tail->child;
    128 
    129       if (tail->deref_type == nir_deref_type_array) {
    130          nir_deref_array *deref_array = nir_deref_as_array(tail);
    131          unsigned size = type_size(tail->type);
    132 
    133          offset = nir_iadd(b, offset,
    134                            nir_imm_int(b, size * deref_array->base_offset));
    135 
    136          if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
    137             nir_ssa_def *mul =
    138                nir_imul(b, nir_imm_int(b, size),
    139                         nir_ssa_for_src(b, deref_array->indirect, 1));
    140 
    141             offset = nir_iadd(b, offset, mul);
    142          }
    143       } else if (tail->deref_type == nir_deref_type_struct) {
    144          nir_deref_struct *deref_struct = nir_deref_as_struct(tail);
    145 
    146          unsigned field_offset = 0;
    147          for (unsigned i = 0; i < deref_struct->index; i++) {
    148             field_offset += type_size(glsl_get_struct_field(parent_type, i));
    149          }
    150          offset = nir_iadd(b, offset, nir_imm_int(b, field_offset));
    151       }
    152    }
    153 
    154    return offset;
    155 }
    156 
    157 static nir_intrinsic_instr *
    158 lower_load(nir_intrinsic_instr *intrin, struct lower_io_state *state,
    159            nir_ssa_def *vertex_index, nir_ssa_def *offset,
    160            unsigned component)
    161 {
    162    const nir_shader *nir = state->builder.shader;
    163    nir_variable *var = intrin->variables[0]->var;
    164    nir_variable_mode mode = var->data.mode;
    165    nir_ssa_def *barycentric = NULL;
    166 
    167    nir_intrinsic_op op;
    168    switch (mode) {
    169    case nir_var_shader_in:
    170       if (nir->stage == MESA_SHADER_FRAGMENT &&
    171           nir->options->use_interpolated_input_intrinsics &&
    172           var->data.interpolation != INTERP_MODE_FLAT) {
    173          assert(vertex_index == NULL);
    174 
    175          nir_intrinsic_op bary_op;
    176          if (var->data.sample ||
    177              (state->options & nir_lower_io_force_sample_interpolation))
    178             bary_op = nir_intrinsic_load_barycentric_sample;
    179          else if (var->data.centroid)
    180             bary_op = nir_intrinsic_load_barycentric_centroid;
    181          else
    182             bary_op = nir_intrinsic_load_barycentric_pixel;
    183 
    184          barycentric = nir_load_barycentric(&state->builder, bary_op,
    185                                             var->data.interpolation);
    186          op = nir_intrinsic_load_interpolated_input;
    187       } else {
    188          op = vertex_index ? nir_intrinsic_load_per_vertex_input :
    189                              nir_intrinsic_load_input;
    190       }
    191       break;
    192    case nir_var_shader_out:
    193       op = vertex_index ? nir_intrinsic_load_per_vertex_output :
    194                           nir_intrinsic_load_output;
    195       break;
    196    case nir_var_uniform:
    197       op = nir_intrinsic_load_uniform;
    198       break;
    199    case nir_var_shared:
    200       op = nir_intrinsic_load_shared;
    201       break;
    202    default:
    203       unreachable("Unknown variable mode");
    204    }
    205 
    206    nir_intrinsic_instr *load =
    207       nir_intrinsic_instr_create(state->builder.shader, op);
    208    load->num_components = intrin->num_components;
    209 
    210    nir_intrinsic_set_base(load, var->data.driver_location);
    211    if (mode == nir_var_shader_in || mode == nir_var_shader_out)
    212       nir_intrinsic_set_component(load, component);
    213 
    214    if (load->intrinsic == nir_intrinsic_load_uniform)
    215       nir_intrinsic_set_range(load, state->type_size(var->type));
    216 
    217    if (vertex_index) {
    218       load->src[0] = nir_src_for_ssa(vertex_index);
    219       load->src[1] = nir_src_for_ssa(offset);
    220    } else if (barycentric) {
    221       load->src[0] = nir_src_for_ssa(barycentric);
    222       load->src[1] = nir_src_for_ssa(offset);
    223    } else {
    224       load->src[0] = nir_src_for_ssa(offset);
    225    }
    226 
    227    return load;
    228 }
    229 
    230 static nir_intrinsic_instr *
    231 lower_store(nir_intrinsic_instr *intrin, struct lower_io_state *state,
    232             nir_ssa_def *vertex_index, nir_ssa_def *offset,
    233             unsigned component)
    234 {
    235    nir_variable *var = intrin->variables[0]->var;
    236    nir_variable_mode mode = var->data.mode;
    237 
    238    nir_intrinsic_op op;
    239    if (mode == nir_var_shared) {
    240       op = nir_intrinsic_store_shared;
    241    } else {
    242       assert(mode == nir_var_shader_out);
    243       op = vertex_index ? nir_intrinsic_store_per_vertex_output :
    244                           nir_intrinsic_store_output;
    245    }
    246 
    247    nir_intrinsic_instr *store =
    248       nir_intrinsic_instr_create(state->builder.shader, op);
    249    store->num_components = intrin->num_components;
    250 
    251    nir_src_copy(&store->src[0], &intrin->src[0], store);
    252 
    253    nir_intrinsic_set_base(store, var->data.driver_location);
    254 
    255    if (mode == nir_var_shader_out)
    256       nir_intrinsic_set_component(store, component);
    257 
    258    nir_intrinsic_set_write_mask(store, nir_intrinsic_write_mask(intrin));
    259 
    260    if (vertex_index)
    261       store->src[1] = nir_src_for_ssa(vertex_index);
    262 
    263    store->src[vertex_index ? 2 : 1] = nir_src_for_ssa(offset);
    264 
    265    return store;
    266 }
    267 
    268 static nir_intrinsic_instr *
    269 lower_atomic(nir_intrinsic_instr *intrin, struct lower_io_state *state,
    270              nir_ssa_def *offset)
    271 {
    272    nir_variable *var = intrin->variables[0]->var;
    273 
    274    assert(var->data.mode == nir_var_shared);
    275 
    276    nir_intrinsic_op op;
    277    switch (intrin->intrinsic) {
    278 #define OP(O) case nir_intrinsic_var_##O: op = nir_intrinsic_shared_##O; break;
    279    OP(atomic_exchange)
    280    OP(atomic_comp_swap)
    281    OP(atomic_add)
    282    OP(atomic_imin)
    283    OP(atomic_umin)
    284    OP(atomic_imax)
    285    OP(atomic_umax)
    286    OP(atomic_and)
    287    OP(atomic_or)
    288    OP(atomic_xor)
    289 #undef OP
    290    default:
    291       unreachable("Invalid atomic");
    292    }
    293 
    294    nir_intrinsic_instr *atomic =
    295       nir_intrinsic_instr_create(state->builder.shader, op);
    296 
    297    nir_intrinsic_set_base(atomic, var->data.driver_location);
    298 
    299    atomic->src[0] = nir_src_for_ssa(offset);
    300    for (unsigned i = 0; i < nir_intrinsic_infos[intrin->intrinsic].num_srcs; i++) {
    301       nir_src_copy(&atomic->src[i+1], &intrin->src[i], atomic);
    302    }
    303 
    304    return atomic;
    305 }
    306 
    307 static nir_intrinsic_instr *
    308 lower_interpolate_at(nir_intrinsic_instr *intrin, struct lower_io_state *state,
    309                      nir_ssa_def *offset, unsigned component)
    310 {
    311    nir_variable *var = intrin->variables[0]->var;
    312 
    313    assert(var->data.mode == nir_var_shader_in);
    314 
    315    /* Ignore interpolateAt() for flat variables - flat is flat. */
    316    if (var->data.interpolation == INTERP_MODE_FLAT)
    317       return lower_load(intrin, state, NULL, offset, component);
    318 
    319    nir_intrinsic_op bary_op;
    320    switch (intrin->intrinsic) {
    321    case nir_intrinsic_interp_var_at_centroid:
    322       bary_op = (state->options & nir_lower_io_force_sample_interpolation) ?
    323                 nir_intrinsic_load_barycentric_sample :
    324                 nir_intrinsic_load_barycentric_centroid;
    325       break;
    326    case nir_intrinsic_interp_var_at_sample:
    327       bary_op = nir_intrinsic_load_barycentric_at_sample;
    328       break;
    329    case nir_intrinsic_interp_var_at_offset:
    330       bary_op = nir_intrinsic_load_barycentric_at_offset;
    331       break;
    332    default:
    333       unreachable("Bogus interpolateAt() intrinsic.");
    334    }
    335 
    336    nir_intrinsic_instr *bary_setup =
    337       nir_intrinsic_instr_create(state->builder.shader, bary_op);
    338 
    339    nir_ssa_dest_init(&bary_setup->instr, &bary_setup->dest, 2, 32, NULL);
    340    nir_intrinsic_set_interp_mode(bary_setup, var->data.interpolation);
    341 
    342    if (intrin->intrinsic != nir_intrinsic_interp_var_at_centroid)
    343       nir_src_copy(&bary_setup->src[0], &intrin->src[0], bary_setup);
    344 
    345    nir_builder_instr_insert(&state->builder, &bary_setup->instr);
    346 
    347    nir_intrinsic_instr *load =
    348       nir_intrinsic_instr_create(state->builder.shader,
    349                                  nir_intrinsic_load_interpolated_input);
    350    load->num_components = intrin->num_components;
    351 
    352    nir_intrinsic_set_base(load, var->data.driver_location);
    353    nir_intrinsic_set_component(load, component);
    354 
    355    load->src[0] = nir_src_for_ssa(&bary_setup->dest.ssa);
    356    load->src[1] = nir_src_for_ssa(offset);
    357 
    358    return load;
    359 }
    360 
    361 static bool
    362 nir_lower_io_block(nir_block *block,
    363                    struct lower_io_state *state)
    364 {
    365    nir_builder *b = &state->builder;
    366    const nir_shader_compiler_options *options = b->shader->options;
    367 
    368    nir_foreach_instr_safe(instr, block) {
    369       if (instr->type != nir_instr_type_intrinsic)
    370          continue;
    371 
    372       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    373 
    374       switch (intrin->intrinsic) {
    375       case nir_intrinsic_load_var:
    376       case nir_intrinsic_store_var:
    377       case nir_intrinsic_var_atomic_add:
    378       case nir_intrinsic_var_atomic_imin:
    379       case nir_intrinsic_var_atomic_umin:
    380       case nir_intrinsic_var_atomic_imax:
    381       case nir_intrinsic_var_atomic_umax:
    382       case nir_intrinsic_var_atomic_and:
    383       case nir_intrinsic_var_atomic_or:
    384       case nir_intrinsic_var_atomic_xor:
    385       case nir_intrinsic_var_atomic_exchange:
    386       case nir_intrinsic_var_atomic_comp_swap:
    387          /* We can lower the io for this nir instrinsic */
    388          break;
    389       case nir_intrinsic_interp_var_at_centroid:
    390       case nir_intrinsic_interp_var_at_sample:
    391       case nir_intrinsic_interp_var_at_offset:
    392          /* We can optionally lower these to load_interpolated_input */
    393          if (options->use_interpolated_input_intrinsics)
    394             break;
    395       default:
    396          /* We can't lower the io for this nir instrinsic, so skip it */
    397          continue;
    398       }
    399 
    400       nir_variable *var = intrin->variables[0]->var;
    401       nir_variable_mode mode = var->data.mode;
    402 
    403       if ((state->modes & mode) == 0)
    404          continue;
    405 
    406       if (mode != nir_var_shader_in &&
    407           mode != nir_var_shader_out &&
    408           mode != nir_var_shared &&
    409           mode != nir_var_uniform)
    410          continue;
    411 
    412       b->cursor = nir_before_instr(instr);
    413 
    414       const bool per_vertex = nir_is_per_vertex_io(var, b->shader->stage);
    415 
    416       nir_ssa_def *offset;
    417       nir_ssa_def *vertex_index = NULL;
    418       unsigned component_offset = var->data.location_frac;
    419 
    420       offset = get_io_offset(b, intrin->variables[0],
    421                              per_vertex ? &vertex_index : NULL,
    422                              state->type_size, &component_offset);
    423 
    424       nir_intrinsic_instr *replacement;
    425 
    426       switch (intrin->intrinsic) {
    427       case nir_intrinsic_load_var:
    428          replacement = lower_load(intrin, state, vertex_index, offset,
    429                                   component_offset);
    430          break;
    431 
    432       case nir_intrinsic_store_var:
    433          replacement = lower_store(intrin, state, vertex_index, offset,
    434                                    component_offset);
    435          break;
    436 
    437       case nir_intrinsic_var_atomic_add:
    438       case nir_intrinsic_var_atomic_imin:
    439       case nir_intrinsic_var_atomic_umin:
    440       case nir_intrinsic_var_atomic_imax:
    441       case nir_intrinsic_var_atomic_umax:
    442       case nir_intrinsic_var_atomic_and:
    443       case nir_intrinsic_var_atomic_or:
    444       case nir_intrinsic_var_atomic_xor:
    445       case nir_intrinsic_var_atomic_exchange:
    446       case nir_intrinsic_var_atomic_comp_swap:
    447          assert(vertex_index == NULL);
    448          replacement = lower_atomic(intrin, state, offset);
    449          break;
    450 
    451       case nir_intrinsic_interp_var_at_centroid:
    452       case nir_intrinsic_interp_var_at_sample:
    453       case nir_intrinsic_interp_var_at_offset:
    454          assert(vertex_index == NULL);
    455          replacement = lower_interpolate_at(intrin, state, offset,
    456                                             component_offset);
    457          break;
    458 
    459       default:
    460          continue;
    461       }
    462 
    463       if (nir_intrinsic_infos[intrin->intrinsic].has_dest) {
    464          if (intrin->dest.is_ssa) {
    465             nir_ssa_dest_init(&replacement->instr, &replacement->dest,
    466                               intrin->dest.ssa.num_components,
    467                               intrin->dest.ssa.bit_size, NULL);
    468             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
    469                                      nir_src_for_ssa(&replacement->dest.ssa));
    470          } else {
    471             nir_dest_copy(&replacement->dest, &intrin->dest, &intrin->instr);
    472          }
    473       }
    474 
    475       nir_instr_insert_before(&intrin->instr, &replacement->instr);
    476       nir_instr_remove(&intrin->instr);
    477    }
    478 
    479    return true;
    480 }
    481 
    482 static void
    483 nir_lower_io_impl(nir_function_impl *impl,
    484                   nir_variable_mode modes,
    485                   int (*type_size)(const struct glsl_type *),
    486                   nir_lower_io_options options)
    487 {
    488    struct lower_io_state state;
    489 
    490    nir_builder_init(&state.builder, impl);
    491    state.modes = modes;
    492    state.type_size = type_size;
    493    state.options = options;
    494 
    495    nir_foreach_block(block, impl) {
    496       nir_lower_io_block(block, &state);
    497    }
    498 
    499    nir_metadata_preserve(impl, nir_metadata_block_index |
    500                                nir_metadata_dominance);
    501 }
    502 
    503 void
    504 nir_lower_io(nir_shader *shader, nir_variable_mode modes,
    505              int (*type_size)(const struct glsl_type *),
    506              nir_lower_io_options options)
    507 {
    508    nir_foreach_function(function, shader) {
    509       if (function->impl) {
    510          nir_lower_io_impl(function->impl, modes, type_size, options);
    511       }
    512    }
    513 }
    514 
    515 /**
    516  * Return the offset soruce for a load/store intrinsic.
    517  */
    518 nir_src *
    519 nir_get_io_offset_src(nir_intrinsic_instr *instr)
    520 {
    521    switch (instr->intrinsic) {
    522    case nir_intrinsic_load_input:
    523    case nir_intrinsic_load_output:
    524    case nir_intrinsic_load_uniform:
    525       return &instr->src[0];
    526    case nir_intrinsic_load_ubo:
    527    case nir_intrinsic_load_ssbo:
    528    case nir_intrinsic_load_per_vertex_input:
    529    case nir_intrinsic_load_per_vertex_output:
    530    case nir_intrinsic_load_interpolated_input:
    531    case nir_intrinsic_store_output:
    532       return &instr->src[1];
    533    case nir_intrinsic_store_ssbo:
    534    case nir_intrinsic_store_per_vertex_output:
    535       return &instr->src[2];
    536    default:
    537       return NULL;
    538    }
    539 }
    540 
    541 /**
    542  * Return the vertex index source for a load/store per_vertex intrinsic.
    543  */
    544 nir_src *
    545 nir_get_io_vertex_index_src(nir_intrinsic_instr *instr)
    546 {
    547    switch (instr->intrinsic) {
    548    case nir_intrinsic_load_per_vertex_input:
    549    case nir_intrinsic_load_per_vertex_output:
    550       return &instr->src[0];
    551    case nir_intrinsic_store_per_vertex_output:
    552       return &instr->src[1];
    553    default:
    554       return NULL;
    555    }
    556 }
    557