Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2016 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "nir.h"
     25 #include "nir_builder.h"
     26 
     27 #include "util/bitscan.h"
     28 
     29 /**
     30  * Variable-based copy propagation
     31  *
     32  * Normally, NIR trusts in SSA form for most of its copy-propagation needs.
     33  * However, there are cases, especially when dealing with indirects, where SSA
     34  * won't help you.  This pass is for those times.  Specifically, it handles
     35  * the following things that the rest of NIR can't:
     36  *
     37  *  1) Copy-propagation on variables that have indirect access.  This includes
     38  *     propagating from indirect stores into indirect loads.
     39  *
     40  *  2) Dead code elimination of store_var and copy_var intrinsics based on
     41  *     killed destination values.
     42  *
     43  *  3) Removal of redundant load_var intrinsics.  We can't trust regular CSE
     44  *     to do this because it isn't aware of variable writes that may alias the
     45  *     value and make the former load invalid.
     46  *
     47  * Unfortunately, properly handling all of those cases makes this path rather
     48  * complex.  In order to avoid additional complexity, this pass is entirely
     49  * block-local.  If we tried to make it global, the data-flow analysis would
     50  * rapidly get out of hand.  Fortunately, for anything that is only ever
     51  * accessed directly, we get SSA based copy-propagation which is extremely
     52  * powerful so this isn't that great a loss.
     53  */
     54 
     55 struct value {
     56    bool is_ssa;
     57    union {
     58       nir_ssa_def *ssa[4];
     59       nir_deref_var *deref;
     60    };
     61 };
     62 
     63 struct copy_entry {
     64    struct list_head link;
     65 
     66    nir_instr *store_instr[4];
     67 
     68    unsigned comps_may_be_read;
     69    struct value src;
     70 
     71    nir_deref_var *dst;
     72 };
     73 
     74 struct copy_prop_var_state {
     75    nir_shader *shader;
     76 
     77    void *mem_ctx;
     78 
     79    struct list_head copies;
     80 
     81    /* We're going to be allocating and deleting a lot of copy entries so we'll
     82     * keep a free list to avoid thrashing malloc too badly.
     83     */
     84    struct list_head copy_free_list;
     85 
     86    bool progress;
     87 };
     88 
     89 static struct copy_entry *
     90 copy_entry_create(struct copy_prop_var_state *state,
     91                   nir_deref_var *dst_deref)
     92 {
     93    struct copy_entry *entry;
     94    if (!list_empty(&state->copy_free_list)) {
     95       struct list_head *item = state->copy_free_list.next;
     96       list_del(item);
     97       entry = LIST_ENTRY(struct copy_entry, item, link);
     98       memset(entry, 0, sizeof(*entry));
     99    } else {
    100       entry = rzalloc(state->mem_ctx, struct copy_entry);
    101    }
    102 
    103    entry->dst = dst_deref;
    104    list_add(&entry->link, &state->copies);
    105 
    106    return entry;
    107 }
    108 
    109 static void
    110 copy_entry_remove(struct copy_prop_var_state *state, struct copy_entry *entry)
    111 {
    112    list_del(&entry->link);
    113    list_add(&entry->link, &state->copy_free_list);
    114 }
    115 
    116 enum deref_compare_result {
    117    derefs_equal_bit = (1 << 0),
    118    derefs_may_alias_bit = (1 << 1),
    119    derefs_a_contains_b_bit = (1 << 2),
    120    derefs_b_contains_a_bit = (1 << 3),
    121 };
    122 
    123 /** Returns true if the storage referrenced to by deref completely contains
    124  * the storage referenced by sub.
    125  *
    126  * NOTE: This is fairly general and could be moved to core NIR if someone else
    127  * ever needs it.
    128  */
    129 static enum deref_compare_result
    130 compare_derefs(nir_deref_var *a, nir_deref_var *b)
    131 {
    132    if (a->var != b->var)
    133       return 0;
    134 
    135    /* Start off assuming they fully compare.  We ignore equality for now.  In
    136     * the end, we'll determine that by containment.
    137     */
    138    enum deref_compare_result result = derefs_may_alias_bit |
    139                                       derefs_a_contains_b_bit |
    140                                       derefs_b_contains_a_bit;
    141 
    142    nir_deref *a_tail = &a->deref;
    143    nir_deref *b_tail = &b->deref;
    144    while (a_tail->child && b_tail->child) {
    145       a_tail = a_tail->child;
    146       b_tail = b_tail->child;
    147 
    148       assert(a_tail->deref_type == b_tail->deref_type);
    149       switch (a_tail->deref_type) {
    150       case nir_deref_type_array: {
    151          nir_deref_array *a_arr = nir_deref_as_array(a_tail);
    152          nir_deref_array *b_arr = nir_deref_as_array(b_tail);
    153 
    154          if (a_arr->deref_array_type == nir_deref_array_type_direct &&
    155              b_arr->deref_array_type == nir_deref_array_type_direct) {
    156             /* If they're both direct and have different offsets, they
    157              * don't even alias much less anything else.
    158              */
    159             if (a_arr->base_offset != b_arr->base_offset)
    160                return 0;
    161          } else if (a_arr->deref_array_type == nir_deref_array_type_wildcard) {
    162             if (b_arr->deref_array_type != nir_deref_array_type_wildcard)
    163                result &= ~derefs_b_contains_a_bit;
    164          } else if (b_arr->deref_array_type == nir_deref_array_type_wildcard) {
    165             if (a_arr->deref_array_type != nir_deref_array_type_wildcard)
    166                result &= ~derefs_a_contains_b_bit;
    167          } else if (a_arr->deref_array_type == nir_deref_array_type_indirect &&
    168                     b_arr->deref_array_type == nir_deref_array_type_indirect) {
    169             assert(a_arr->indirect.is_ssa && b_arr->indirect.is_ssa);
    170             if (a_arr->indirect.ssa == b_arr->indirect.ssa) {
    171                /* If they're different constant offsets from the same indirect
    172                 * then they don't alias at all.
    173                 */
    174                if (a_arr->base_offset != b_arr->base_offset)
    175                   return 0;
    176                /* Otherwise the indirect and base both match */
    177             } else {
    178                /* If they're have different indirect offsets then we can't
    179                 * prove anything about containment.
    180                 */
    181                result &= ~(derefs_a_contains_b_bit | derefs_b_contains_a_bit);
    182             }
    183          } else {
    184             /* In this case, one is indirect and the other direct so we can't
    185              * prove anything about containment.
    186              */
    187             result &= ~(derefs_a_contains_b_bit | derefs_b_contains_a_bit);
    188          }
    189          break;
    190       }
    191 
    192       case nir_deref_type_struct: {
    193          nir_deref_struct *a_struct = nir_deref_as_struct(a_tail);
    194          nir_deref_struct *b_struct = nir_deref_as_struct(b_tail);
    195 
    196          /* If they're different struct members, they don't even alias */
    197          if (a_struct->index != b_struct->index)
    198             return 0;
    199          break;
    200       }
    201 
    202       default:
    203          unreachable("Invalid deref type");
    204       }
    205    }
    206 
    207    /* If a is longer than b, then it can't contain b */
    208    if (a_tail->child)
    209       result &= ~derefs_a_contains_b_bit;
    210    if (b_tail->child)
    211       result &= ~derefs_b_contains_a_bit;
    212 
    213    /* If a contains b and b contains a they must be equal. */
    214    if ((result & derefs_a_contains_b_bit) && (result & derefs_b_contains_a_bit))
    215       result |= derefs_equal_bit;
    216 
    217    return result;
    218 }
    219 
    220 static void
    221 remove_dead_writes(struct copy_prop_var_state *state,
    222                    struct copy_entry *entry, unsigned write_mask)
    223 {
    224    /* We're overwriting another entry.  Some of it's components may not
    225     * have been read yet and, if that's the case, we may be able to delete
    226     * some instructions but we have to be careful.
    227     */
    228    unsigned dead_comps = write_mask & ~entry->comps_may_be_read;
    229 
    230    for (unsigned mask = dead_comps; mask;) {
    231       unsigned i = u_bit_scan(&mask);
    232 
    233       nir_instr *instr = entry->store_instr[i];
    234 
    235       /* We may have already deleted it on a previous iteration */
    236       if (!instr)
    237          continue;
    238 
    239       /* See if this instr is used anywhere that it's not dead */
    240       bool keep = false;
    241       for (unsigned j = 0; j < 4; j++) {
    242          if (entry->store_instr[j] == instr) {
    243             if (dead_comps & (1 << j)) {
    244                entry->store_instr[j] = NULL;
    245             } else {
    246                keep = true;
    247             }
    248          }
    249       }
    250 
    251       if (!keep) {
    252          nir_instr_remove(instr);
    253          state->progress = true;
    254       }
    255    }
    256 }
    257 
    258 static struct copy_entry *
    259 lookup_entry_for_deref(struct copy_prop_var_state *state,
    260                        nir_deref_var *deref,
    261                        enum deref_compare_result allowed_comparisons)
    262 {
    263    list_for_each_entry(struct copy_entry, iter, &state->copies, link) {
    264       if (compare_derefs(iter->dst, deref) & allowed_comparisons)
    265          return iter;
    266    }
    267 
    268    return NULL;
    269 }
    270 
    271 static void
    272 mark_aliased_entries_as_read(struct copy_prop_var_state *state,
    273                              nir_deref_var *deref, unsigned components)
    274 {
    275    list_for_each_entry(struct copy_entry, iter, &state->copies, link) {
    276       if (compare_derefs(iter->dst, deref) & derefs_may_alias_bit)
    277          iter->comps_may_be_read |= components;
    278    }
    279 }
    280 
    281 static struct copy_entry *
    282 get_entry_and_kill_aliases(struct copy_prop_var_state *state,
    283                            nir_deref_var *deref,
    284                            unsigned write_mask)
    285 {
    286    struct copy_entry *entry = NULL;
    287    list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link) {
    288       if (!iter->src.is_ssa) {
    289          /* If this write aliases the source of some entry, get rid of it */
    290          if (compare_derefs(iter->src.deref, deref) & derefs_may_alias_bit) {
    291             copy_entry_remove(state, iter);
    292             continue;
    293          }
    294       }
    295 
    296       enum deref_compare_result comp = compare_derefs(iter->dst, deref);
    297       /* This is a store operation.  If we completely overwrite some value, we
    298        * want to delete any dead writes that may be present.
    299        */
    300       if (comp & derefs_b_contains_a_bit)
    301          remove_dead_writes(state, iter, write_mask);
    302 
    303       if (comp & derefs_equal_bit) {
    304          assert(entry == NULL);
    305          entry = iter;
    306       } else if (comp & derefs_may_alias_bit) {
    307          copy_entry_remove(state, iter);
    308       }
    309    }
    310 
    311    if (entry == NULL)
    312       entry = copy_entry_create(state, deref);
    313 
    314    return entry;
    315 }
    316 
    317 static void
    318 apply_barrier_for_modes(struct copy_prop_var_state *state,
    319                         nir_variable_mode modes)
    320 {
    321    list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link) {
    322       if ((iter->dst->var->data.mode & modes) ||
    323           (!iter->src.is_ssa && (iter->src.deref->var->data.mode & modes)))
    324          copy_entry_remove(state, iter);
    325    }
    326 }
    327 
    328 static void
    329 store_to_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
    330                const struct value *value, unsigned write_mask,
    331                nir_instr *store_instr)
    332 {
    333    entry->comps_may_be_read &= ~write_mask;
    334    if (value->is_ssa) {
    335       entry->src.is_ssa = true;
    336       /* Only overwrite the written components */
    337       for (unsigned i = 0; i < 4; i++) {
    338          if (write_mask & (1 << i)) {
    339             entry->store_instr[i] = store_instr;
    340             entry->src.ssa[i] = value->ssa[i];
    341          }
    342       }
    343    } else {
    344       /* Non-ssa stores always write everything */
    345       entry->src.is_ssa = false;
    346       entry->src.deref = value->deref;
    347       for (unsigned i = 0; i < 4; i++)
    348          entry->store_instr[i] = store_instr;
    349    }
    350 }
    351 
    352 /* Remove an instruction and return a cursor pointing to where it was */
    353 static nir_cursor
    354 instr_remove_cursor(nir_instr *instr)
    355 {
    356    nir_cursor cursor;
    357    nir_instr *prev = nir_instr_prev(instr);
    358    if (prev) {
    359       cursor = nir_after_instr(prev);
    360    } else {
    361       cursor = nir_before_block(instr->block);
    362    }
    363    nir_instr_remove(instr);
    364    return cursor;
    365 }
    366 
    367 /* Do a "load" from an SSA-based entry return it in "value" as a value with a
    368  * single SSA def.  Because an entry could reference up to 4 different SSA
    369  * defs, a vecN operation may be inserted to combine them into a single SSA
    370  * def before handing it back to the caller.  If the load instruction is no
    371  * longer needed, it is removed and nir_instr::block is set to NULL.  (It is
    372  * possible, in some cases, for the load to be used in the vecN operation in
    373  * which case it isn't deleted.)
    374  */
    375 static bool
    376 load_from_ssa_entry_value(struct copy_prop_var_state *state,
    377                           struct copy_entry *entry,
    378                           nir_builder *b, nir_intrinsic_instr *intrin,
    379                           struct value *value)
    380 {
    381    *value = entry->src;
    382    assert(value->is_ssa);
    383 
    384    const struct glsl_type *type = nir_deref_tail(&entry->dst->deref)->type;
    385    unsigned num_components = glsl_get_vector_elements(type);
    386 
    387    uint8_t available = 0;
    388    bool all_same = true;
    389    for (unsigned i = 0; i < num_components; i++) {
    390       if (value->ssa[i])
    391          available |= (1 << i);
    392 
    393       if (value->ssa[i] != value->ssa[0])
    394          all_same = false;
    395    }
    396 
    397    if (all_same) {
    398       /* Our work here is done */
    399       b->cursor = instr_remove_cursor(&intrin->instr);
    400       intrin->instr.block = NULL;
    401       return true;
    402    }
    403 
    404    if (available != (1 << num_components) - 1 &&
    405        intrin->intrinsic == nir_intrinsic_load_var &&
    406        (available & nir_ssa_def_components_read(&intrin->dest.ssa)) == 0) {
    407       /* If none of the components read are available as SSA values, then we
    408        * should just bail.  Otherwise, we would end up replacing the uses of
    409        * the load_var a vecN() that just gathers up its components.
    410        */
    411       return false;
    412    }
    413 
    414    b->cursor = nir_after_instr(&intrin->instr);
    415 
    416    nir_ssa_def *load_def =
    417       intrin->intrinsic == nir_intrinsic_load_var ? &intrin->dest.ssa : NULL;
    418 
    419    bool keep_intrin = false;
    420    nir_ssa_def *comps[4];
    421    for (unsigned i = 0; i < num_components; i++) {
    422       if (value->ssa[i]) {
    423          comps[i] = nir_channel(b, value->ssa[i], i);
    424       } else {
    425          /* We don't have anything for this component in our
    426           * list.  Just re-use a channel from the load.
    427           */
    428          if (load_def == NULL)
    429             load_def = nir_load_deref_var(b, entry->dst);
    430 
    431          if (load_def->parent_instr == &intrin->instr)
    432             keep_intrin = true;
    433 
    434          comps[i] = nir_channel(b, load_def, i);
    435       }
    436    }
    437 
    438    nir_ssa_def *vec = nir_vec(b, comps, num_components);
    439    for (unsigned i = 0; i < num_components; i++)
    440       value->ssa[i] = vec;
    441 
    442    if (!keep_intrin) {
    443       /* Removing this instruction should not touch the cursor because we
    444        * created the cursor after the intrinsic and have added at least one
    445        * instruction (the vec) since then.
    446        */
    447       assert(b->cursor.instr != &intrin->instr);
    448       nir_instr_remove(&intrin->instr);
    449       intrin->instr.block = NULL;
    450    }
    451 
    452    return true;
    453 }
    454 
    455 /**
    456  * Specialize the wildcards in a deref chain
    457  *
    458  * This function returns a deref chain identical to \param deref except that
    459  * some of its wildcards are replaced with indices from \param specific.  The
    460  * process is guided by \param guide which references the same type as \param
    461  * specific but has the same wildcard array lengths as \param deref.
    462  */
    463 static nir_deref_var *
    464 specialize_wildcards(nir_deref_var *deref,
    465                      nir_deref_var *guide,
    466                      nir_deref_var *specific,
    467                      void *mem_ctx)
    468 {
    469    nir_deref_var *ret = nir_deref_var_create(mem_ctx, deref->var);
    470 
    471    nir_deref *deref_tail = deref->deref.child;
    472    nir_deref *guide_tail = guide->deref.child;
    473    nir_deref *spec_tail = specific->deref.child;
    474    nir_deref *ret_tail = &ret->deref;
    475    while (deref_tail) {
    476       switch (deref_tail->deref_type) {
    477       case nir_deref_type_array: {
    478          nir_deref_array *deref_arr = nir_deref_as_array(deref_tail);
    479 
    480          nir_deref_array *ret_arr = nir_deref_array_create(ret_tail);
    481          ret_arr->deref.type = deref_arr->deref.type;
    482          ret_arr->deref_array_type = deref_arr->deref_array_type;
    483 
    484          switch (deref_arr->deref_array_type) {
    485          case nir_deref_array_type_direct:
    486             ret_arr->base_offset = deref_arr->base_offset;
    487             break;
    488          case nir_deref_array_type_indirect:
    489             ret_arr->base_offset = deref_arr->base_offset;
    490             assert(deref_arr->indirect.is_ssa);
    491             ret_arr->indirect = deref_arr->indirect;
    492             break;
    493          case nir_deref_array_type_wildcard:
    494             /* This is where things get tricky.  We have to search through
    495              * the entry deref to find its corresponding wildcard and fill
    496              * this slot in with the value from the src.
    497              */
    498             while (guide_tail) {
    499                if (guide_tail->deref_type == nir_deref_type_array &&
    500                    nir_deref_as_array(guide_tail)->deref_array_type ==
    501                    nir_deref_array_type_wildcard)
    502                   break;
    503 
    504                guide_tail = guide_tail->child;
    505                spec_tail = spec_tail->child;
    506             }
    507 
    508             nir_deref_array *spec_arr = nir_deref_as_array(spec_tail);
    509             ret_arr->deref_array_type = spec_arr->deref_array_type;
    510             ret_arr->base_offset = spec_arr->base_offset;
    511             ret_arr->indirect = spec_arr->indirect;
    512          }
    513 
    514          ret_tail->child = &ret_arr->deref;
    515          break;
    516       }
    517       case nir_deref_type_struct: {
    518          nir_deref_struct *deref_struct = nir_deref_as_struct(deref_tail);
    519 
    520          nir_deref_struct *ret_struct =
    521             nir_deref_struct_create(ret_tail, deref_struct->index);
    522          ret_struct->deref.type = deref_struct->deref.type;
    523 
    524          ret_tail->child = &ret_struct->deref;
    525          break;
    526       }
    527       case nir_deref_type_var:
    528          unreachable("Invalid deref type");
    529       }
    530 
    531       deref_tail = deref_tail->child;
    532       ret_tail = ret_tail->child;
    533    }
    534 
    535    return ret;
    536 }
    537 
    538 /* Do a "load" from an deref-based entry return it in "value" as a value.  The
    539  * deref returned in "value" will always be a fresh copy so the caller can
    540  * steal it and assign it to the instruction directly without copying it
    541  * again.
    542  */
    543 static bool
    544 load_from_deref_entry_value(struct copy_prop_var_state *state,
    545                             struct copy_entry *entry,
    546                             nir_builder *b, nir_intrinsic_instr *intrin,
    547                             nir_deref_var *src, struct value *value)
    548 {
    549    *value = entry->src;
    550 
    551    /* Walk the deref to get the two tails and also figure out if we need to
    552     * specialize any wildcards.
    553     */
    554    bool need_to_specialize_wildcards = false;
    555    nir_deref *entry_tail = &entry->dst->deref;
    556    nir_deref *src_tail = &src->deref;
    557    while (entry_tail->child && src_tail->child) {
    558       assert(src_tail->child->deref_type == entry_tail->child->deref_type);
    559       if (src_tail->child->deref_type == nir_deref_type_array) {
    560          nir_deref_array *entry_arr = nir_deref_as_array(entry_tail->child);
    561          nir_deref_array *src_arr = nir_deref_as_array(src_tail->child);
    562 
    563          if (src_arr->deref_array_type != nir_deref_array_type_wildcard &&
    564              entry_arr->deref_array_type == nir_deref_array_type_wildcard)
    565             need_to_specialize_wildcards = true;
    566       }
    567 
    568       entry_tail = entry_tail->child;
    569       src_tail = src_tail->child;
    570    }
    571 
    572    /* If the entry deref is longer than the source deref then it refers to a
    573     * smaller type and we can't source from it.
    574     */
    575    assert(entry_tail->child == NULL);
    576 
    577    if (need_to_specialize_wildcards) {
    578       /* The entry has some wildcards that are not in src.  This means we need
    579        * to construct a new deref based on the entry but using the wildcards
    580        * from the source and guided by the entry dst.  Oof.
    581        */
    582       value->deref = specialize_wildcards(entry->src.deref, entry->dst, src,
    583                                           state->mem_ctx);
    584    } else {
    585       /* We're going to need to make a copy in case we modify it below */
    586       value->deref = nir_deref_var_clone(value->deref, state->mem_ctx);
    587    }
    588 
    589    if (src_tail->child) {
    590       /* If our source deref is longer than the entry deref, that's ok because
    591        * it just means the entry deref needs to be extended a bit.
    592        */
    593       nir_deref *value_tail = nir_deref_tail(&value->deref->deref);
    594       value_tail->child = nir_deref_clone(src_tail->child, value_tail);
    595    }
    596 
    597    b->cursor = instr_remove_cursor(&intrin->instr);
    598 
    599    return true;
    600 }
    601 
    602 static bool
    603 try_load_from_entry(struct copy_prop_var_state *state, struct copy_entry *entry,
    604                     nir_builder *b, nir_intrinsic_instr *intrin,
    605                     nir_deref_var *src, struct value *value)
    606 {
    607    if (entry == NULL)
    608       return false;
    609 
    610    if (entry->src.is_ssa) {
    611       return load_from_ssa_entry_value(state, entry, b, intrin, value);
    612    } else {
    613       return load_from_deref_entry_value(state, entry, b, intrin, src, value);
    614    }
    615 }
    616 
    617 static void
    618 copy_prop_vars_block(struct copy_prop_var_state *state,
    619                      nir_builder *b, nir_block *block)
    620 {
    621    /* Start each block with a blank slate */
    622    list_for_each_entry_safe(struct copy_entry, iter, &state->copies, link)
    623       copy_entry_remove(state, iter);
    624 
    625    nir_foreach_instr_safe(instr, block) {
    626       if (instr->type != nir_instr_type_intrinsic)
    627          continue;
    628 
    629       nir_intrinsic_instr *intrin = nir_instr_as_intrinsic(instr);
    630       switch (intrin->intrinsic) {
    631       case nir_intrinsic_barrier:
    632       case nir_intrinsic_memory_barrier:
    633          /* If we hit a barrier, we need to trash everything that may possibly
    634           * be accessible to another thread.  Locals, globals, and things of
    635           * the like are safe, however.
    636           */
    637          apply_barrier_for_modes(state, ~(nir_var_local | nir_var_global |
    638                                           nir_var_shader_in | nir_var_uniform));
    639          break;
    640 
    641       case nir_intrinsic_emit_vertex:
    642       case nir_intrinsic_emit_vertex_with_counter:
    643          apply_barrier_for_modes(state, nir_var_shader_out);
    644          break;
    645 
    646       case nir_intrinsic_load_var: {
    647          nir_deref_var *src = intrin->variables[0];
    648 
    649          uint8_t comps_read = nir_ssa_def_components_read(&intrin->dest.ssa);
    650          mark_aliased_entries_as_read(state, src, comps_read);
    651 
    652          struct copy_entry *src_entry =
    653             lookup_entry_for_deref(state, src, derefs_a_contains_b_bit);
    654          struct value value;
    655          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
    656             if (value.is_ssa) {
    657                /* lookup_load has already ensured that we get a single SSA
    658                 * value that has all of the channels.  We just have to do the
    659                 * rewrite operation.
    660                 */
    661                if (intrin->instr.block) {
    662                   /* The lookup left our instruction in-place.  This means it
    663                    * must have used it to vec up a bunch of different sources.
    664                    * We need to be careful when rewriting uses so we don't
    665                    * rewrite the vecN itself.
    666                    */
    667                   nir_ssa_def_rewrite_uses_after(&intrin->dest.ssa,
    668                                                  nir_src_for_ssa(value.ssa[0]),
    669                                                  value.ssa[0]->parent_instr);
    670                } else {
    671                   nir_ssa_def_rewrite_uses(&intrin->dest.ssa,
    672                                            nir_src_for_ssa(value.ssa[0]));
    673                }
    674             } else {
    675                /* We're turning it into a load of a different variable */
    676                ralloc_steal(intrin, value.deref);
    677                intrin->variables[0] = value.deref;
    678 
    679                /* Put it back in again. */
    680                nir_builder_instr_insert(b, instr);
    681 
    682                value.is_ssa = true;
    683                for (unsigned i = 0; i < intrin->num_components; i++)
    684                   value.ssa[i] = &intrin->dest.ssa;
    685             }
    686             state->progress = true;
    687          } else {
    688             value.is_ssa = true;
    689             for (unsigned i = 0; i < intrin->num_components; i++)
    690                value.ssa[i] = &intrin->dest.ssa;
    691          }
    692 
    693          /* Now that we have a value, we're going to store it back so that we
    694           * have the right value next time we come looking for it.  In order
    695           * to do this, we need an exact match, not just something that
    696           * contains what we're looking for.
    697           */
    698          struct copy_entry *store_entry =
    699             lookup_entry_for_deref(state, src, derefs_equal_bit);
    700          if (!store_entry)
    701             store_entry = copy_entry_create(state, src);
    702 
    703          /* Set up a store to this entry with the value of the load.  This way
    704           * we can potentially remove subsequent loads.  However, we use a
    705           * NULL instruction so we don't try and delete the load on a
    706           * subsequent store.
    707           */
    708          store_to_entry(state, store_entry, &value,
    709                         ((1 << intrin->num_components) - 1), NULL);
    710          break;
    711       }
    712 
    713       case nir_intrinsic_store_var: {
    714          struct value value = {
    715             .is_ssa = true
    716          };
    717 
    718          for (unsigned i = 0; i < intrin->num_components; i++)
    719             value.ssa[i] = intrin->src[0].ssa;
    720 
    721          nir_deref_var *dst = intrin->variables[0];
    722          unsigned wrmask = nir_intrinsic_write_mask(intrin);
    723          struct copy_entry *entry =
    724             get_entry_and_kill_aliases(state, dst, wrmask);
    725          store_to_entry(state, entry, &value, wrmask, &intrin->instr);
    726          break;
    727       }
    728 
    729       case nir_intrinsic_copy_var: {
    730          nir_deref_var *dst = intrin->variables[0];
    731          nir_deref_var *src = intrin->variables[1];
    732 
    733          if (compare_derefs(src, dst) & derefs_equal_bit) {
    734             /* This is a no-op self-copy.  Get rid of it */
    735             nir_instr_remove(instr);
    736             continue;
    737          }
    738 
    739          mark_aliased_entries_as_read(state, src, 0xf);
    740 
    741          struct copy_entry *src_entry =
    742             lookup_entry_for_deref(state, src, derefs_a_contains_b_bit);
    743          struct value value;
    744          if (try_load_from_entry(state, src_entry, b, intrin, src, &value)) {
    745             if (value.is_ssa) {
    746                nir_store_deref_var(b, dst, value.ssa[0], 0xf);
    747                intrin = nir_instr_as_intrinsic(nir_builder_last_instr(b));
    748             } else {
    749                /* If this would be a no-op self-copy, don't bother. */
    750                if (compare_derefs(value.deref, dst) & derefs_equal_bit)
    751                   continue;
    752 
    753                /* Just turn it into a copy of a different deref */
    754                ralloc_steal(intrin, value.deref);
    755                intrin->variables[1] = value.deref;
    756 
    757                /* Put it back in again. */
    758                nir_builder_instr_insert(b, instr);
    759             }
    760 
    761             state->progress = true;
    762          } else {
    763             value = (struct value) {
    764                .is_ssa = false,
    765                { .deref = src },
    766             };
    767          }
    768 
    769          struct copy_entry *dst_entry =
    770             get_entry_and_kill_aliases(state, dst, 0xf);
    771          store_to_entry(state, dst_entry, &value, 0xf, &intrin->instr);
    772          break;
    773       }
    774 
    775       default:
    776          break;
    777       }
    778    }
    779 }
    780 
    781 bool
    782 nir_opt_copy_prop_vars(nir_shader *shader)
    783 {
    784    struct copy_prop_var_state state;
    785 
    786    state.shader = shader;
    787    state.mem_ctx = ralloc_context(NULL);
    788    list_inithead(&state.copies);
    789    list_inithead(&state.copy_free_list);
    790 
    791    bool global_progress = false;
    792    nir_foreach_function(function, shader) {
    793       if (!function->impl)
    794          continue;
    795 
    796       nir_builder b;
    797       nir_builder_init(&b, function->impl);
    798 
    799       state.progress = false;
    800       nir_foreach_block(block, function->impl)
    801          copy_prop_vars_block(&state, &b, block);
    802 
    803       if (state.progress) {
    804          nir_metadata_preserve(function->impl, nir_metadata_block_index |
    805                                                nir_metadata_dominance);
    806          global_progress = true;
    807       }
    808    }
    809 
    810    ralloc_free(state.mem_ctx);
    811 
    812    return global_progress;
    813 }
    814