Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Jason Ekstrand (jason (at) jlekstrand.net)
     25  *
     26  */
     27 
     28 #include "nir.h"
     29 #include "nir_array.h"
     30 
     31 struct locals_to_regs_state {
     32    nir_shader *shader;
     33    nir_function_impl *impl;
     34 
     35    /* A hash table mapping derefs to registers */
     36    struct hash_table *regs_table;
     37 
     38    bool progress;
     39 };
     40 
     41 /* The following two functions implement a hash and equality check for
     42  * variable dreferences.  When the hash or equality function encounters an
     43  * array, it ignores the offset and whether it is direct or indirect
     44  * entirely.
     45  */
     46 static uint32_t
     47 hash_deref(const void *void_deref)
     48 {
     49    uint32_t hash = _mesa_fnv32_1a_offset_bias;
     50 
     51    const nir_deref_var *deref_var = void_deref;
     52    hash = _mesa_fnv32_1a_accumulate(hash, deref_var->var);
     53 
     54    for (const nir_deref *deref = deref_var->deref.child;
     55         deref; deref = deref->child) {
     56       if (deref->deref_type == nir_deref_type_struct) {
     57          const nir_deref_struct *deref_struct = nir_deref_as_struct(deref);
     58          hash = _mesa_fnv32_1a_accumulate(hash, deref_struct->index);
     59       }
     60    }
     61 
     62    return hash;
     63 }
     64 
     65 static bool
     66 derefs_equal(const void *void_a, const void *void_b)
     67 {
     68    const nir_deref_var *a_var = void_a;
     69    const nir_deref_var *b_var = void_b;
     70 
     71    if (a_var->var != b_var->var)
     72       return false;
     73 
     74    for (const nir_deref *a = a_var->deref.child, *b = b_var->deref.child;
     75         a != NULL; a = a->child, b = b->child) {
     76       if (a->deref_type != b->deref_type)
     77          return false;
     78 
     79       if (a->deref_type == nir_deref_type_struct) {
     80          if (nir_deref_as_struct(a)->index != nir_deref_as_struct(b)->index)
     81             return false;
     82       }
     83       /* Do nothing for arrays.  They're all the same. */
     84 
     85       assert((a->child == NULL) == (b->child == NULL));
     86       if((a->child == NULL) != (b->child == NULL))
     87          return false;
     88    }
     89 
     90    return true;
     91 }
     92 
     93 static nir_register *
     94 get_reg_for_deref(nir_deref_var *deref, struct locals_to_regs_state *state)
     95 {
     96    uint32_t hash = hash_deref(deref);
     97 
     98    assert(deref->var->constant_initializer == NULL);
     99 
    100    struct hash_entry *entry =
    101       _mesa_hash_table_search_pre_hashed(state->regs_table, hash, deref);
    102    if (entry)
    103       return entry->data;
    104 
    105    unsigned array_size = 1;
    106    nir_deref *tail = &deref->deref;
    107    while (tail->child) {
    108       if (tail->child->deref_type == nir_deref_type_array)
    109          array_size *= glsl_get_length(tail->type);
    110       tail = tail->child;
    111    }
    112 
    113    assert(glsl_type_is_vector(tail->type) || glsl_type_is_scalar(tail->type));
    114 
    115    nir_register *reg = nir_local_reg_create(state->impl);
    116    reg->num_components = glsl_get_vector_elements(tail->type);
    117    reg->num_array_elems = array_size > 1 ? array_size : 0;
    118    reg->bit_size = glsl_get_bit_size(tail->type);
    119 
    120    _mesa_hash_table_insert_pre_hashed(state->regs_table, hash, deref, reg);
    121 
    122    return reg;
    123 }
    124 
    125 static nir_src
    126 get_deref_reg_src(nir_deref_var *deref, nir_instr *instr,
    127                   struct locals_to_regs_state *state)
    128 {
    129    nir_src src;
    130 
    131    src.is_ssa = false;
    132    src.reg.reg = get_reg_for_deref(deref, state);
    133    src.reg.base_offset = 0;
    134    src.reg.indirect = NULL;
    135 
    136    /* It is possible for a user to create a shader that has an array with a
    137     * single element and then proceed to access it indirectly.  Indirectly
    138     * accessing a non-array register is not allowed in NIR.  In order to
    139     * handle this case we just convert it to a direct reference.
    140     */
    141    if (src.reg.reg->num_array_elems == 0)
    142       return src;
    143 
    144    nir_deref *tail = &deref->deref;
    145    while (tail->child != NULL) {
    146       const struct glsl_type *parent_type = tail->type;
    147       tail = tail->child;
    148 
    149       if (tail->deref_type != nir_deref_type_array)
    150          continue;
    151 
    152       nir_deref_array *deref_array = nir_deref_as_array(tail);
    153 
    154       src.reg.base_offset *= glsl_get_length(parent_type);
    155       src.reg.base_offset += deref_array->base_offset;
    156 
    157       if (src.reg.indirect) {
    158          nir_load_const_instr *load_const =
    159             nir_load_const_instr_create(state->shader, 1, 32);
    160          load_const->value.u32[0] = glsl_get_length(parent_type);
    161          nir_instr_insert_before(instr, &load_const->instr);
    162 
    163          nir_alu_instr *mul = nir_alu_instr_create(state->shader, nir_op_imul);
    164          mul->src[0].src = *src.reg.indirect;
    165          mul->src[1].src.is_ssa = true;
    166          mul->src[1].src.ssa = &load_const->def;
    167          mul->dest.write_mask = 1;
    168          nir_ssa_dest_init(&mul->instr, &mul->dest.dest, 1, 32, NULL);
    169          nir_instr_insert_before(instr, &mul->instr);
    170 
    171          src.reg.indirect->is_ssa = true;
    172          src.reg.indirect->ssa = &mul->dest.dest.ssa;
    173       }
    174 
    175       if (deref_array->deref_array_type == nir_deref_array_type_indirect) {
    176          if (src.reg.indirect == NULL) {
    177             src.reg.indirect = ralloc(state->shader, nir_src);
    178             nir_src_copy(src.reg.indirect, &deref_array->indirect,
    179                          state->shader);
    180          } else {
    181             nir_alu_instr *add = nir_alu_instr_create(state->shader,
    182                                                       nir_op_iadd);
    183             add->src[0].src = *src.reg.indirect;
    184             nir_src_copy(&add->src[1].src, &deref_array->indirect, add);
    185             add->dest.write_mask = 1;
    186             nir_ssa_dest_init(&add->instr, &add->dest.dest, 1, 32, NULL);
    187             nir_instr_insert_before(instr, &add->instr);
    188 
    189             src.reg.indirect->is_ssa = true;
    190             src.reg.indirect->ssa = &add->dest.dest.ssa;
    191          }
    192       }
    193    }
    194 
    195    return src;
    196 }
    197 
    198 static bool
    199 lower_locals_to_regs_block(nir_block *block,
    200                            struct locals_to_regs_state *state)
    201 {
    202    nir_foreach_instr_safe(instr, block) {
    203       if (instr->type != nir_instr_type_intrinsic)
    204          continue;
    205 
    206       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    207 
    208       switch (intrin->intrinsic) {
    209       case nir_intrinsic_load_var: {
    210          if (intrin->variables[0]->var->data.mode != nir_var_local)
    211             continue;
    212 
    213          nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
    214          mov->src[0].src = get_deref_reg_src(intrin->variables[0],
    215                                              &intrin->instr, state);
    216          mov->dest.write_mask = (1 << intrin->num_components) - 1;
    217          if (intrin->dest.is_ssa) {
    218             nir_ssa_dest_init(&mov->instr, &mov->dest.dest,
    219                               intrin->num_components,
    220                               intrin->dest.ssa.bit_size, NULL);
    221             nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
    222                                      nir_src_for_ssa(&mov->dest.dest.ssa));
    223          } else {
    224             nir_dest_copy(&mov->dest.dest, &intrin->dest, &mov->instr);
    225          }
    226          nir_instr_insert_before(&intrin->instr, &mov->instr);
    227 
    228          nir_instr_remove(&intrin->instr);
    229          state->progress = true;
    230          break;
    231       }
    232 
    233       case nir_intrinsic_store_var: {
    234          if (intrin->variables[0]->var->data.mode != nir_var_local)
    235             continue;
    236 
    237          nir_src reg_src = get_deref_reg_src(intrin->variables[0],
    238                                              &intrin->instr, state);
    239 
    240          nir_alu_instr *mov = nir_alu_instr_create(state->shader, nir_op_imov);
    241          nir_src_copy(&mov->src[0].src, &intrin->src[0], mov);
    242          mov->dest.write_mask = nir_intrinsic_write_mask(intrin);
    243          mov->dest.dest.is_ssa = false;
    244          mov->dest.dest.reg.reg = reg_src.reg.reg;
    245          mov->dest.dest.reg.base_offset = reg_src.reg.base_offset;
    246          mov->dest.dest.reg.indirect = reg_src.reg.indirect;
    247 
    248          nir_instr_insert_before(&intrin->instr, &mov->instr);
    249 
    250          nir_instr_remove(&intrin->instr);
    251          state->progress = true;
    252          break;
    253       }
    254 
    255       case nir_intrinsic_copy_var:
    256          unreachable("There should be no copies whatsoever at this point");
    257          break;
    258 
    259       default:
    260          continue;
    261       }
    262    }
    263 
    264    return true;
    265 }
    266 
    267 static bool
    268 nir_lower_locals_to_regs_impl(nir_function_impl *impl)
    269 {
    270    struct locals_to_regs_state state;
    271 
    272    state.shader = impl->function->shader;
    273    state.impl = impl;
    274    state.progress = false;
    275    state.regs_table = _mesa_hash_table_create(NULL, hash_deref, derefs_equal);
    276 
    277    nir_metadata_require(impl, nir_metadata_dominance);
    278 
    279    nir_foreach_block(block, impl) {
    280       lower_locals_to_regs_block(block, &state);
    281    }
    282 
    283    nir_metadata_preserve(impl, nir_metadata_block_index |
    284                                nir_metadata_dominance);
    285 
    286    _mesa_hash_table_destroy(state.regs_table, NULL);
    287 
    288    return state.progress;
    289 }
    290 
    291 bool
    292 nir_lower_locals_to_regs(nir_shader *shader)
    293 {
    294    bool progress = false;
    295 
    296    nir_foreach_function(function, shader) {
    297       if (function->impl)
    298          progress = nir_lower_locals_to_regs_impl(function->impl) || progress;
    299    }
    300 
    301    return progress;
    302 }
    303