Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Jason Ekstrand (jason (at) jlekstrand.net)
     25  *
     26  */
     27 
     28 #include "nir.h"
     29 
     30 /*
     31  * This pass lowers the neg, abs, and sat operations to source modifiers on
     32  * ALU operations to make things nicer for the backend.  It's just much
     33  * easier to not have them when we're doing optimizations.
     34  */
     35 
     36 static bool
     37 nir_lower_to_source_mods_block(nir_block *block)
     38 {
     39    nir_foreach_instr(instr, block) {
     40       if (instr->type != nir_instr_type_alu)
     41          continue;
     42 
     43       nir_alu_instr *alu = nir_instr_as_alu(instr);
     44 
     45       for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) {
     46          if (!alu->src[i].src.is_ssa)
     47             continue;
     48 
     49          if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu)
     50             continue;
     51 
     52          nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr);
     53 
     54          if (parent->dest.saturate)
     55             continue;
     56 
     57          switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i])) {
     58          case nir_type_float:
     59             if (parent->op != nir_op_fmov)
     60                continue;
     61             break;
     62          case nir_type_int:
     63             if (parent->op != nir_op_imov)
     64                continue;
     65             break;
     66          default:
     67             continue;
     68          }
     69 
     70          /* We can only do a rewrite if the source we are copying is SSA.
     71           * Otherwise, moving the read might invalidly reorder reads/writes
     72           * on a register.
     73           */
     74          if (!parent->src[0].src.is_ssa)
     75             continue;
     76 
     77          nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src);
     78          if (alu->src[i].abs) {
     79             /* abs trumps both neg and abs, do nothing */
     80          } else {
     81             alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate);
     82             alu->src[i].abs |= parent->src[0].abs;
     83          }
     84 
     85          for (int j = 0; j < 4; ++j) {
     86             if (!nir_alu_instr_channel_used(alu, i, j))
     87                continue;
     88             alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]];
     89          }
     90 
     91          if (list_empty(&parent->dest.dest.ssa.uses) &&
     92              list_empty(&parent->dest.dest.ssa.if_uses))
     93             nir_instr_remove(&parent->instr);
     94       }
     95 
     96       switch (alu->op) {
     97       case nir_op_fsat:
     98          alu->op = nir_op_fmov;
     99          alu->dest.saturate = true;
    100          break;
    101       case nir_op_ineg:
    102          alu->op = nir_op_imov;
    103          alu->src[0].negate = !alu->src[0].negate;
    104          break;
    105       case nir_op_fneg:
    106          alu->op = nir_op_fmov;
    107          alu->src[0].negate = !alu->src[0].negate;
    108          break;
    109       case nir_op_iabs:
    110          alu->op = nir_op_imov;
    111          alu->src[0].abs = true;
    112          alu->src[0].negate = false;
    113          break;
    114       case nir_op_fabs:
    115          alu->op = nir_op_fmov;
    116          alu->src[0].abs = true;
    117          alu->src[0].negate = false;
    118          break;
    119       default:
    120          break;
    121       }
    122 
    123       /* We've covered sources.  Now we're going to try and saturate the
    124        * destination if we can.
    125        */
    126 
    127       if (!alu->dest.dest.is_ssa)
    128          continue;
    129 
    130       /* We can only saturate float destinations */
    131       if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) !=
    132           nir_type_float)
    133          continue;
    134 
    135       if (!list_empty(&alu->dest.dest.ssa.if_uses))
    136          continue;
    137 
    138       bool all_children_are_sat = true;
    139       nir_foreach_use(child_src, &alu->dest.dest.ssa) {
    140          assert(child_src->is_ssa);
    141          nir_instr *child = child_src->parent_instr;
    142          if (child->type != nir_instr_type_alu) {
    143             all_children_are_sat = false;
    144             continue;
    145          }
    146 
    147          nir_alu_instr *child_alu = nir_instr_as_alu(child);
    148          if (child_alu->src[0].negate || child_alu->src[0].abs) {
    149             all_children_are_sat = false;
    150             continue;
    151          }
    152 
    153          if (child_alu->op != nir_op_fsat &&
    154              !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) {
    155             all_children_are_sat = false;
    156             continue;
    157          }
    158       }
    159 
    160       if (!all_children_are_sat)
    161          continue;
    162 
    163       alu->dest.saturate = true;
    164 
    165       nir_foreach_use(child_src, &alu->dest.dest.ssa) {
    166          assert(child_src->is_ssa);
    167          nir_alu_instr *child_alu = nir_instr_as_alu(child_src->parent_instr);
    168 
    169          child_alu->op = nir_op_fmov;
    170          child_alu->dest.saturate = false;
    171          /* We could propagate the dest of our instruction to the
    172           * destinations of the uses here.  However, one quick round of
    173           * copy propagation will clean that all up and then we don't have
    174           * the complexity.
    175           */
    176       }
    177    }
    178 
    179    return true;
    180 }
    181 
    182 void
    183 nir_lower_to_source_mods(nir_shader *shader)
    184 {
    185    nir_foreach_function(function, shader) {
    186       if (function->impl) {
    187          nir_foreach_block(block, function->impl) {
    188             nir_lower_to_source_mods_block(block);
    189          }
    190       }
    191    }
    192 }
    193