Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Jason Ekstrand (jason (at) jlekstrand.net)
     25  *
     26  */
     27 
     28 #include "nir.h"
     29 
     30 /*
     31  * Implements a pass that tries to move uses vecN sources to their
     32  * destinations.  This is kind of like an inverse copy-propagation pass.
     33  * For instance, if you have
     34  *
     35  * ssa_1 = vec4(a, b, c, d)
     36  * ssa_2 = fadd(a, b)
     37  *
     38  * This will be turned into
     39  *
     40  * ssa_1 = vec4(a, b, c, d)
     41  * ssa_2 = fadd(ssa_1.x, ssa_1.y)
     42  *
     43  * While this is "worse" because it adds a bunch of unneeded dependencies, it
     44  * actually makes it much easier for vec4-based backends to coalesce the MOV's
     45  * that result from the vec4 operation because it doesn't have to worry about
     46  * quite as many reads.
     47  */
     48 
     49 /* Returns true if the given SSA def dominates the instruction.  An SSA def is
     50  * considered to *not* dominate the instruction that defines it.
     51  */
     52 static bool
     53 ssa_def_dominates_instr(nir_ssa_def *def, nir_instr *instr)
     54 {
     55    if (instr->index <= def->parent_instr->index) {
     56       return false;
     57    } else if (def->parent_instr->block == instr->block) {
     58       return def->parent_instr->index < instr->index;
     59    } else {
     60       return nir_block_dominates(def->parent_instr->block, instr->block);
     61    }
     62 }
     63 
     64 static bool
     65 move_vec_src_uses_to_dest_block(nir_block *block)
     66 {
     67    nir_foreach_instr(instr, block) {
     68       if (instr->type != nir_instr_type_alu)
     69          continue;
     70 
     71       nir_alu_instr *vec = nir_instr_as_alu(instr);
     72 
     73       switch (vec->op) {
     74       case nir_op_vec2:
     75       case nir_op_vec3:
     76       case nir_op_vec4:
     77          break;
     78       default:
     79          continue; /* The loop */
     80       }
     81 
     82       /* Can't handle non-SSA vec operations */
     83       if (!vec->dest.dest.is_ssa)
     84          continue;
     85 
     86       /* Can't handle saturation */
     87       if (vec->dest.saturate)
     88          continue;
     89 
     90       /* First, mark all of the sources we are going to consider for rewriting
     91        * to the destination
     92        */
     93       int srcs_remaining = 0;
     94       for (unsigned i = 0; i < nir_op_infos[vec->op].num_inputs; i++) {
     95          /* We can't rewrite a source if it's not in SSA form */
     96          if (!vec->src[i].src.is_ssa)
     97             continue;
     98 
     99          /* We can't rewrite a source if it has modifiers */
    100          if (vec->src[i].abs || vec->src[i].negate)
    101             continue;
    102 
    103          srcs_remaining |= 1 << i;
    104       }
    105 
    106       /* We can't actually do anything with this instruction */
    107       if (srcs_remaining == 0)
    108          continue;
    109 
    110       for (unsigned i; i = ffs(srcs_remaining) - 1, srcs_remaining;) {
    111          int8_t swizzle[4] = { -1, -1, -1, -1 };
    112 
    113          for (unsigned j = i; j < nir_op_infos[vec->op].num_inputs; j++) {
    114             if (vec->src[j].src.ssa != vec->src[i].src.ssa)
    115                continue;
    116 
    117             /* Mark the given chanle as having been handled */
    118             srcs_remaining &= ~(1 << j);
    119 
    120             /* Mark the appropreate channel as coming from src j */
    121             swizzle[vec->src[j].swizzle[0]] = j;
    122          }
    123 
    124          nir_foreach_use_safe(use, vec->src[i].src.ssa) {
    125             if (use->parent_instr == &vec->instr)
    126                continue;
    127 
    128             /* We need to dominate the use if we are going to rewrite it */
    129             if (!ssa_def_dominates_instr(&vec->dest.dest.ssa, use->parent_instr))
    130                continue;
    131 
    132             /* For now, we'll just rewrite ALU instructions */
    133             if (use->parent_instr->type != nir_instr_type_alu)
    134                continue;
    135 
    136             assert(use->is_ssa);
    137 
    138             nir_alu_instr *use_alu = nir_instr_as_alu(use->parent_instr);
    139 
    140             /* Figure out which source we're actually looking at */
    141             nir_alu_src *use_alu_src = exec_node_data(nir_alu_src, use, src);
    142             unsigned src_idx = use_alu_src - use_alu->src;
    143             assert(src_idx < nir_op_infos[use_alu->op].num_inputs);
    144 
    145             bool can_reswizzle = true;
    146             for (unsigned j = 0; j < 4; j++) {
    147                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
    148                   continue;
    149 
    150                if (swizzle[use_alu_src->swizzle[j]] == -1) {
    151                   can_reswizzle = false;
    152                   break;
    153                }
    154             }
    155 
    156             if (!can_reswizzle)
    157                continue;
    158 
    159             /* At this point, we have determined that the given use can be
    160              * reswizzled to actually use the destination of the vecN operation.
    161              * Go ahead and rewrite it as needed.
    162              */
    163             nir_instr_rewrite_src(use->parent_instr, use,
    164                                   nir_src_for_ssa(&vec->dest.dest.ssa));
    165             for (unsigned j = 0; j < 4; j++) {
    166                if (!nir_alu_instr_channel_used(use_alu, src_idx, j))
    167                   continue;
    168 
    169                use_alu_src->swizzle[j] = swizzle[use_alu_src->swizzle[j]];
    170             }
    171          }
    172       }
    173    }
    174 
    175    return true;
    176 }
    177 
    178 static void
    179 nir_move_vec_src_uses_to_dest_impl(nir_shader *shader, nir_function_impl *impl)
    180 {
    181    nir_metadata_require(impl, nir_metadata_dominance);
    182 
    183    nir_index_instrs(impl);
    184 
    185    nir_foreach_block(block, impl) {
    186       move_vec_src_uses_to_dest_block(block);
    187    }
    188 
    189    nir_metadata_preserve(impl, nir_metadata_block_index |
    190                                nir_metadata_dominance);
    191 }
    192 
    193 void
    194 nir_move_vec_src_uses_to_dest(nir_shader *shader)
    195 {
    196    nir_foreach_function(function, shader) {
    197       if (function->impl)
    198          nir_move_vec_src_uses_to_dest_impl(shader, function->impl);
    199    }
    200 }
    201