Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2016 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  */
     23 
     24 #include "nir.h"
     25 
     26 /**
     27  * \file nir_opt_move_comparisons.c
     28  *
     29  * This pass moves ALU comparison operations just before their first use.
     30  *
     31  * It only moves instructions within a single basic block; cross-block
     32  * movement is left to global code motion.
     33  *
     34  * Many GPUs generate condition codes for comparisons, and use predication
     35  * for conditional selects and control flow.  In a sequence such as:
     36  *
     37  *     vec1 32 ssa_1 = flt a b
     38  *     <some other operations>
     39  *     vec1 32 ssa_2 = bcsel ssa_1 c d
     40  *
     41  * the backend would likely do the comparison, producing condition codes,
     42  * then save those to a boolean value.  The intervening operations might
     43  * trash the condition codes.  Then, in order to do the bcsel, it would
     44  * need to re-populate the condition code register based on the boolean.
     45  *
     46  * By moving the comparison just before the bcsel, the condition codes could
     47  * be used directly.  This eliminates the need to reload them from the boolean
     48  * (generally eliminating an instruction).  It may also eliminate the need to
     49  * create a boolean value altogether (unless it's used elsewhere), which could
     50  * lower register pressure.
     51  */
     52 
     53 static bool
     54 is_comparison(nir_op op)
     55 {
     56    switch (op) {
     57    case nir_op_flt:
     58    case nir_op_fge:
     59    case nir_op_feq:
     60    case nir_op_fne:
     61    case nir_op_ilt:
     62    case nir_op_ult:
     63    case nir_op_ige:
     64    case nir_op_uge:
     65    case nir_op_ieq:
     66    case nir_op_ine:
     67    case nir_op_i2b:
     68    case nir_op_f2b:
     69    case nir_op_inot:
     70    case nir_op_fnot:
     71       return true;
     72    default:
     73       return false;
     74    }
     75 }
     76 
     77 static bool
     78 move_comparison_source(nir_src *src, nir_block *block, nir_instr *before)
     79 {
     80    if (!src->is_ssa)
     81       return false;
     82 
     83    nir_instr *src_instr = src->ssa->parent_instr;
     84 
     85    if (src_instr->block == block &&
     86        src_instr->type == nir_instr_type_alu &&
     87        is_comparison(nir_instr_as_alu(src_instr)->op)) {
     88 
     89       exec_node_remove(&src_instr->node);
     90 
     91       if (before)
     92          exec_node_insert_node_before(&before->node, &src_instr->node);
     93       else
     94          exec_list_push_tail(&block->instr_list, &src_instr->node);
     95 
     96       return true;
     97    }
     98 
     99    return false;
    100 }
    101 
    102 static bool
    103 move_comparison_source_cb(nir_src *src, void *data)
    104 {
    105    bool *progress = data;
    106 
    107    nir_instr *instr = src->parent_instr;
    108    if (move_comparison_source(src, instr->block, instr))
    109       *progress = true;
    110 
    111    return true; /* nir_foreach_src should keep going */
    112 }
    113 
    114 static bool
    115 move_comparisons(nir_block *block)
    116 {
    117    bool progress = false;
    118 
    119    /* We use a simple approach: walk instructions backwards.
    120     *
    121     * If the instruction's source is a comparison from the same block,
    122     * simply move it here.  This may break SSA if it's used earlier in
    123     * the block as well.  However, as we walk backwards, we'll find the
    124     * earlier use and move it again, further up.  It eventually ends up
    125     * dominating all uses again, restoring SSA form.
    126     *
    127     * Before walking instructions, we consider the if-condition at the
    128     * end of the block, if one exists.  It's effectively a use at the
    129     * bottom of the block.
    130     */
    131    nir_if *iff = nir_block_get_following_if(block);
    132    if (iff) {
    133       progress |= move_comparison_source(&iff->condition, block, NULL);
    134    }
    135 
    136    nir_foreach_instr_reverse(instr, block) {
    137       /* The sources of phi instructions happen after the predecessor block
    138        * but before this block.  (Yes, that's between blocks).  This means
    139        * that we don't need to move them in order for them to be correct.
    140        * We could move them to encourage comparisons that are used in a phi to
    141        * the end of the block, doing so correctly would make the pass
    142        * substantially more complicated and wouldn't gain us anything since
    143        * the phi can't use a flag value anyway.
    144        */
    145       if (instr->type == nir_instr_type_phi) {
    146          /* We're going backwards so everything else is a phi too */
    147          break;
    148       } else if (instr->type == nir_instr_type_alu) {
    149          /* Walk ALU instruction sources backwards so that bcsel's boolean
    150           * condition is processed last.
    151           */
    152          nir_alu_instr *alu = nir_instr_as_alu(instr);
    153          for (int i = nir_op_infos[alu->op].num_inputs - 1; i >= 0; i--) {
    154             progress |= move_comparison_source(&alu->src[i].src,
    155                                                block, instr);
    156          }
    157       } else {
    158          nir_foreach_src(instr, move_comparison_source_cb, &progress);
    159       }
    160    }
    161 
    162    return progress;
    163 }
    164 
    165 bool
    166 nir_opt_move_comparisons(nir_shader *shader)
    167 {
    168    bool progress = false;
    169 
    170    nir_foreach_function(func, shader) {
    171       if (!func->impl)
    172          continue;
    173 
    174       nir_foreach_block(block, func->impl) {
    175          if (move_comparisons(block)) {
    176             nir_metadata_preserve(func->impl, nir_metadata_block_index |
    177                                               nir_metadata_dominance |
    178                                               nir_metadata_live_ssa_defs);
    179             progress = true;
    180          }
    181       }
    182    }
    183 
    184    return progress;
    185 }
    186