Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2016 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "nir.h"
     25 #include "nir_control_flow.h"
     26 
     27 /**
     28  * This optimization detects if statements at the tops of loops where the
     29  * condition is a phi node of two constants and moves half of the if to above
     30  * the loop and the other half of the if to the end of the loop.  A simple for
     31  * loop "for (int i = 0; i < 4; i++)", when run through the SPIR-V front-end,
     32  * ends up looking something like this:
     33  *
     34  * vec1 32 ssa_0 = load_const (0x00000000)
     35  * vec1 32 ssa_1 = load_const (0xffffffff)
     36  * loop {
     37  *    block block_1:
     38  *    vec1 32 ssa_2 = phi block_0: ssa_0, block_7: ssa_5
     39  *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
     40  *    if ssa_2 {
     41  *       block block_2:
     42  *       vec1 32 ssa_4 = load_const (0x00000001)
     43  *       vec1 32 ssa_5 = iadd ssa_2, ssa_4
     44  *    } else {
     45  *       block block_3:
     46  *    }
     47  *    block block_4:
     48  *    vec1 32 ssa_6 = load_const (0x00000004)
     49  *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
     50  *    if ssa_7 {
     51  *       block block_5:
     52  *    } else {
     53  *       block block_6:
     54  *       break
     55  *    }
     56  *    block block_7:
     57  * }
     58  *
     59  * This turns it into something like this:
     60  *
     61  * // Stuff from block 1
     62  * // Stuff from block 3
     63  * loop {
     64  *    block block_1:
     65  *    vec1 32 ssa_3 = phi block_0: ssa_0, block_7: ssa_1
     66  *    vec1 32 ssa_6 = load_const (0x00000004)
     67  *    vec1 32 ssa_7 = ilt ssa_5, ssa_6
     68  *    if ssa_7 {
     69  *       block block_5:
     70  *    } else {
     71  *       block block_6:
     72  *       break
     73  *    }
     74  *    block block_7:
     75  *    // Stuff from block 1
     76  *    // Stuff from block 2
     77  *    vec1 32 ssa_4 = load_const (0x00000001)
     78  *    vec1 32 ssa_5 = iadd ssa_2, ssa_4
     79  * }
     80  */
     81 static bool
     82 opt_peel_loop_initial_if(nir_loop *loop)
     83 {
     84    nir_block *header_block = nir_loop_first_block(loop);
     85    nir_block *prev_block =
     86       nir_cf_node_as_block(nir_cf_node_prev(&loop->cf_node));
     87 
     88    /* It would be insane if this were not true */
     89    assert(_mesa_set_search(header_block->predecessors, prev_block));
     90 
     91    /* The loop must have exactly one continue block which could be a block
     92     * ending in a continue instruction or the "natural" continue from the
     93     * last block in the loop back to the top.
     94     */
     95    if (header_block->predecessors->entries != 2)
     96       return false;
     97 
     98    nir_block *continue_block = NULL;
     99    struct set_entry *pred_entry;
    100    set_foreach(header_block->predecessors, pred_entry) {
    101       if (pred_entry->key != prev_block)
    102          continue_block = (void *)pred_entry->key;
    103    }
    104 
    105    nir_cf_node *if_node = nir_cf_node_next(&header_block->cf_node);
    106    if (!if_node || if_node->type != nir_cf_node_if)
    107       return false;
    108 
    109    nir_if *nif = nir_cf_node_as_if(if_node);
    110    assert(nif->condition.is_ssa);
    111 
    112    nir_ssa_def *cond = nif->condition.ssa;
    113    if (cond->parent_instr->type != nir_instr_type_phi)
    114       return false;
    115 
    116    nir_phi_instr *cond_phi = nir_instr_as_phi(cond->parent_instr);
    117    if (cond->parent_instr->block != header_block)
    118       return false;
    119 
    120    /* We already know we have exactly one continue */
    121    assert(exec_list_length(&cond_phi->srcs) == 2);
    122 
    123    uint32_t entry_val = 0, continue_val = 0;
    124    nir_foreach_phi_src(src, cond_phi) {
    125       assert(src->src.is_ssa);
    126       nir_const_value *const_src = nir_src_as_const_value(src->src);
    127       if (!const_src)
    128          return false;
    129 
    130       if (src->pred == continue_block) {
    131          continue_val = const_src->u32[0];
    132       } else {
    133          assert(src->pred == prev_block);
    134          entry_val = const_src->u32[0];
    135       }
    136    }
    137 
    138    /* If they both execute or both don't execute, this is a job for
    139     * nir_dead_cf, not this pass.
    140     */
    141    if ((entry_val && continue_val) || (!entry_val && !continue_val))
    142       return false;
    143 
    144    struct exec_list *continue_list, *entry_list;
    145    if (continue_val) {
    146       continue_list = &nif->then_list;
    147       entry_list = &nif->else_list;
    148    } else {
    149       continue_list = &nif->else_list;
    150       entry_list = &nif->then_list;
    151    }
    152 
    153    /* We want to be moving the contents of entry_list to above the loop so it
    154     * can't contain any break or continue instructions.
    155     */
    156    foreach_list_typed(nir_cf_node, cf_node, node, entry_list) {
    157       nir_foreach_block_in_cf_node(block, cf_node) {
    158          nir_instr *last_instr = nir_block_last_instr(block);
    159          if (last_instr && last_instr->type == nir_instr_type_jump)
    160             return false;
    161       }
    162    }
    163 
    164    /* Before we do anything, convert the loop to LCSSA.  We're about to
    165     * replace a bunch of SSA defs with registers and this will prevent any of
    166     * it from leaking outside the loop.
    167     */
    168    nir_convert_loop_to_lcssa(loop);
    169 
    170    nir_block *after_if_block =
    171       nir_cf_node_as_block(nir_cf_node_next(&nif->cf_node));
    172 
    173    /* Get rid of phis in the header block since we will be duplicating it */
    174    nir_lower_phis_to_regs_block(header_block);
    175    /* Get rid of phis after the if since dominance will change */
    176    nir_lower_phis_to_regs_block(after_if_block);
    177 
    178    /* Get rid of SSA defs in the pieces we're about to move around */
    179    nir_lower_ssa_defs_to_regs_block(header_block);
    180    nir_foreach_block_in_cf_node(block, &nif->cf_node)
    181       nir_lower_ssa_defs_to_regs_block(block);
    182 
    183    nir_cf_list header, tmp;
    184    nir_cf_extract(&header, nir_before_block(header_block),
    185                            nir_after_block(header_block));
    186 
    187    nir_cf_list_clone(&tmp, &header, &loop->cf_node, NULL);
    188    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
    189    nir_cf_extract(&tmp, nir_before_cf_list(entry_list),
    190                         nir_after_cf_list(entry_list));
    191    nir_cf_reinsert(&tmp, nir_before_cf_node(&loop->cf_node));
    192 
    193    nir_cf_reinsert(&header, nir_after_block_before_jump(continue_block));
    194    nir_cf_extract(&tmp, nir_before_cf_list(continue_list),
    195                         nir_after_cf_list(continue_list));
    196    nir_cf_reinsert(&tmp, nir_after_block_before_jump(continue_block));
    197 
    198    nir_cf_node_remove(&nif->cf_node);
    199 
    200    return true;
    201 }
    202 
    203 static bool
    204 opt_if_cf_list(struct exec_list *cf_list)
    205 {
    206    bool progress = false;
    207    foreach_list_typed(nir_cf_node, cf_node, node, cf_list) {
    208       switch (cf_node->type) {
    209       case nir_cf_node_block:
    210          break;
    211 
    212       case nir_cf_node_if: {
    213          nir_if *nif = nir_cf_node_as_if(cf_node);
    214          progress |= opt_if_cf_list(&nif->then_list);
    215          progress |= opt_if_cf_list(&nif->else_list);
    216          break;
    217       }
    218 
    219       case nir_cf_node_loop: {
    220          nir_loop *loop = nir_cf_node_as_loop(cf_node);
    221          progress |= opt_if_cf_list(&loop->body);
    222          progress |= opt_peel_loop_initial_if(loop);
    223          break;
    224       }
    225 
    226       case nir_cf_node_function:
    227          unreachable("Invalid cf type");
    228       }
    229    }
    230 
    231    return progress;
    232 }
    233 
    234 bool
    235 nir_opt_if(nir_shader *shader)
    236 {
    237    bool progress = false;
    238 
    239    nir_foreach_function(function, shader) {
    240       if (function->impl == NULL)
    241          continue;
    242 
    243       if (opt_if_cf_list(&function->impl->body)) {
    244          nir_metadata_preserve(function->impl, nir_metadata_none);
    245 
    246          /* If that made progress, we're no longer really in SSA form.  We
    247           * need to convert registers back into SSA defs and clean up SSA defs
    248           * that don't dominate their uses.
    249           */
    250          nir_lower_regs_to_ssa_impl(function->impl);
    251          progress = true;
    252       }
    253    }
    254 
    255    return progress;
    256 }
    257