Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2016 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "nir_phi_builder.h"
     25 #include "nir/nir_vla.h"
     26 
     27 struct nir_phi_builder {
     28    nir_shader *shader;
     29    nir_function_impl *impl;
     30 
     31    /* Copied from the impl for easy access */
     32    unsigned num_blocks;
     33 
     34    /* Array of all blocks indexed by block->index. */
     35    nir_block **blocks;
     36 
     37    /* Hold on to the values so we can easily iterate over them. */
     38    struct exec_list values;
     39 
     40    /* Worklist for phi adding */
     41    unsigned iter_count;
     42    unsigned *work;
     43    nir_block **W;
     44 };
     45 
     46 #define NEEDS_PHI ((nir_ssa_def *)(intptr_t)-1)
     47 
     48 struct nir_phi_builder_value {
     49    struct exec_node node;
     50 
     51    struct nir_phi_builder *builder;
     52 
     53    /* Needed so we can create phis and undefs */
     54    unsigned num_components;
     55    unsigned bit_size;
     56 
     57    /* The list of phi nodes associated with this value.  Phi nodes are not
     58     * added directly.  Instead, they are created, the instr->block pointer
     59     * set, and then added to this list.  Later, in phi_builder_finish, we
     60     * set up their sources and add them to the top of their respective
     61     * blocks.
     62     */
     63    struct exec_list phis;
     64 
     65    /* Array of SSA defs, indexed by block.  For each block, this array has has
     66     * one of three types of values:
     67     *
     68     *  - NULL. Indicates that there is no known definition in this block.  If
     69     *    you need to find one, look at the block's immediate dominator.
     70     *
     71     *  - NEEDS_PHI. Indicates that the block may need a phi node but none has
     72     *    been created yet.  If a def is requested for a block, a phi will need
     73     *    to be created.
     74     *
     75     *  - A regular SSA def.  This will be either the result of a phi node or
     76     *    one of the defs provided by nir_phi_builder_value_set_blocK_def().
     77     */
     78    nir_ssa_def *defs[0];
     79 };
     80 
     81 struct nir_phi_builder *
     82 nir_phi_builder_create(nir_function_impl *impl)
     83 {
     84    struct nir_phi_builder *pb = rzalloc(NULL, struct nir_phi_builder);
     85 
     86    pb->shader = impl->function->shader;
     87    pb->impl = impl;
     88 
     89    assert(impl->valid_metadata & (nir_metadata_block_index |
     90                                   nir_metadata_dominance));
     91 
     92    pb->num_blocks = impl->num_blocks;
     93    pb->blocks = ralloc_array(pb, nir_block *, pb->num_blocks);
     94    nir_foreach_block(block, impl) {
     95       pb->blocks[block->index] = block;
     96    }
     97 
     98    exec_list_make_empty(&pb->values);
     99 
    100    pb->iter_count = 0;
    101    pb->work = rzalloc_array(pb, unsigned, pb->num_blocks);
    102    pb->W = ralloc_array(pb, nir_block *, pb->num_blocks);
    103 
    104    return pb;
    105 }
    106 
    107 struct nir_phi_builder_value *
    108 nir_phi_builder_add_value(struct nir_phi_builder *pb, unsigned num_components,
    109                           unsigned bit_size, const BITSET_WORD *defs)
    110 {
    111    struct nir_phi_builder_value *val;
    112    unsigned i, w_start = 0, w_end = 0;
    113 
    114    val = rzalloc_size(pb, sizeof(*val) + sizeof(val->defs[0]) * pb->num_blocks);
    115    val->builder = pb;
    116    val->num_components = num_components;
    117    val->bit_size = bit_size;
    118    exec_list_make_empty(&val->phis);
    119    exec_list_push_tail(&pb->values, &val->node);
    120 
    121    pb->iter_count++;
    122 
    123    BITSET_WORD tmp;
    124    BITSET_FOREACH_SET(i, tmp, defs, pb->num_blocks) {
    125       if (pb->work[i] < pb->iter_count)
    126          pb->W[w_end++] = pb->blocks[i];
    127       pb->work[i] = pb->iter_count;
    128    }
    129 
    130    while (w_start != w_end) {
    131       nir_block *cur = pb->W[w_start++];
    132       struct set_entry *dom_entry;
    133       set_foreach(cur->dom_frontier, dom_entry) {
    134          nir_block *next = (nir_block *) dom_entry->key;
    135 
    136          /* If there's more than one return statement, then the end block
    137           * can be a join point for some definitions. However, there are
    138           * no instructions in the end block, so nothing would use those
    139           * phi nodes. Of course, we couldn't place those phi nodes
    140           * anyways due to the restriction of having no instructions in the
    141           * end block...
    142           */
    143          if (next == pb->impl->end_block)
    144             continue;
    145 
    146          if (val->defs[next->index] == NULL) {
    147             /* Instead of creating a phi node immediately, we simply set the
    148              * value to the magic value NEEDS_PHI.  Later, we create phi nodes
    149              * on demand in nir_phi_builder_value_get_block_def().
    150              */
    151             val->defs[next->index] = NEEDS_PHI;
    152 
    153             if (pb->work[next->index] < pb->iter_count) {
    154                pb->work[next->index] = pb->iter_count;
    155                pb->W[w_end++] = next;
    156             }
    157          }
    158       }
    159    }
    160 
    161    return val;
    162 }
    163 
    164 void
    165 nir_phi_builder_value_set_block_def(struct nir_phi_builder_value *val,
    166                                     nir_block *block, nir_ssa_def *def)
    167 {
    168    val->defs[block->index] = def;
    169 }
    170 
    171 nir_ssa_def *
    172 nir_phi_builder_value_get_block_def(struct nir_phi_builder_value *val,
    173                                     nir_block *block)
    174 {
    175    /* Crawl up the dominance tree and find the closest dominator for which we
    176     * have a valid ssa_def, if any.
    177     */
    178    nir_block *dom = block;
    179    while (dom && val->defs[dom->index] == NULL)
    180       dom = dom->imm_dom;
    181 
    182    nir_ssa_def *def;
    183    if (dom == NULL) {
    184       /* No dominator means either that we crawled to the top without ever
    185        * finding a definition or that this block is unreachable.  In either
    186        * case, the value is undefined so we need an SSA undef.
    187        */
    188       nir_ssa_undef_instr *undef =
    189          nir_ssa_undef_instr_create(val->builder->shader,
    190                                     val->num_components,
    191                                     val->bit_size);
    192       nir_instr_insert(nir_before_cf_list(&val->builder->impl->body),
    193                        &undef->instr);
    194       def = &undef->def;
    195    } else if (val->defs[dom->index] == NEEDS_PHI) {
    196       /* The magic value NEEDS_PHI indicates that the block needs a phi node
    197        * but none has been created.  We need to create one now so we can
    198        * return it to the caller.
    199        *
    200        * Because a phi node may use SSA defs that it does not dominate (this
    201        * happens in loops), we do not yet have enough information to fully
    202        * fill out the phi node.  Instead, the phi nodes we create here will be
    203        * empty (have no sources) and won't actually be placed in the block's
    204        * instruction list yet.  Later, in nir_phi_builder_finish(), we walk
    205        * over all of the phi instructions, fill out the sources lists, and
    206        * place them at the top of their respective block's instruction list.
    207        *
    208        * Creating phi nodes on-demand allows us to avoid creating dead phi
    209        * nodes that will just get deleted later. While this probably isn't a
    210        * big win for a full into-SSA pass, other users may use the phi builder
    211        * to make small SSA form repairs where most of the phi nodes will never
    212        * be used.
    213        */
    214       nir_phi_instr *phi = nir_phi_instr_create(val->builder->shader);
    215       nir_ssa_dest_init(&phi->instr, &phi->dest, val->num_components,
    216                         val->bit_size, NULL);
    217       phi->instr.block = dom;
    218       exec_list_push_tail(&val->phis, &phi->instr.node);
    219       def = val->defs[dom->index] = &phi->dest.ssa;
    220    } else {
    221       /* In this case, we have an actual SSA def.  It's either the result of a
    222        * phi node created by the case above or one passed to us through
    223        * nir_phi_builder_value_set_block_def().
    224        */
    225       def = val->defs[dom->index];
    226    }
    227 
    228    /* Walk the chain and stash the def in all of the applicable blocks.  We do
    229     * this for two reasons:
    230     *
    231     *  1) To speed up lookup next time even if the next time is called from a
    232     *     block that is not dominated by this one.
    233     *  2) To avoid unneeded recreation of phi nodes and undefs.
    234     */
    235    for (dom = block; dom && val->defs[dom->index] == NULL; dom = dom->imm_dom)
    236       val->defs[dom->index] = def;
    237 
    238    return def;
    239 }
    240 
    241 static int
    242 compare_blocks(const void *_a, const void *_b)
    243 {
    244    nir_block * const * a = _a;
    245    nir_block * const * b = _b;
    246 
    247    return (*a)->index - (*b)->index;
    248 }
    249 
    250 void
    251 nir_phi_builder_finish(struct nir_phi_builder *pb)
    252 {
    253    const unsigned num_blocks = pb->num_blocks;
    254    NIR_VLA(nir_block *, preds, num_blocks);
    255 
    256    foreach_list_typed(struct nir_phi_builder_value, val, node, &pb->values) {
    257       /* We treat the linked list of phi nodes like a worklist.  The list is
    258        * pre-populated by calls to nir_phi_builder_value_get_block_def() that
    259        * create phi nodes.  As we fill in the sources of phi nodes, more may
    260        * be created and are added to the end of the list.
    261        *
    262        * Because we are adding and removing phi nodes from the list as we go,
    263        * we can't iterate over it normally.  Instead, we just iterate until
    264        * the list is empty.
    265        */
    266       while (!exec_list_is_empty(&val->phis)) {
    267          struct exec_node *head = exec_list_get_head(&val->phis);
    268          nir_phi_instr *phi = exec_node_data(nir_phi_instr, head, instr.node);
    269          assert(phi->instr.type == nir_instr_type_phi);
    270 
    271          exec_node_remove(&phi->instr.node);
    272 
    273          /* Construct an array of predecessors.  We sort it to ensure
    274           * determinism in the phi insertion algorithm.
    275           *
    276           * XXX: Calling qsort this many times seems expensive.
    277           */
    278          int num_preds = 0;
    279          struct set_entry *entry;
    280          set_foreach(phi->instr.block->predecessors, entry)
    281             preds[num_preds++] = (nir_block *)entry->key;
    282          qsort(preds, num_preds, sizeof(*preds), compare_blocks);
    283 
    284          for (unsigned i = 0; i < num_preds; i++) {
    285             nir_phi_src *src = ralloc(phi, nir_phi_src);
    286             src->pred = preds[i];
    287             src->src = nir_src_for_ssa(
    288                nir_phi_builder_value_get_block_def(val, preds[i]));
    289             exec_list_push_tail(&phi->srcs, &src->node);
    290          }
    291 
    292          nir_instr_insert(nir_before_block(phi->instr.block), &phi->instr);
    293       }
    294    }
    295 
    296    ralloc_free(pb);
    297 }
    298