1 /* 2 * Copyright 2014 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Jason Ekstrand (jason (at) jlekstrand.net) 25 * 26 */ 27 28 #include "nir.h" 29 30 /* 31 * This pass lowers the neg, abs, and sat operations to source modifiers on 32 * ALU operations to make things nicer for the backend. It's just much 33 * easier to not have them when we're doing optimizations. 34 */ 35 36 static bool 37 nir_lower_to_source_mods_block(nir_block *block) 38 { 39 nir_foreach_instr(instr, block) { 40 if (instr->type != nir_instr_type_alu) 41 continue; 42 43 nir_alu_instr *alu = nir_instr_as_alu(instr); 44 45 for (unsigned i = 0; i < nir_op_infos[alu->op].num_inputs; i++) { 46 if (!alu->src[i].src.is_ssa) 47 continue; 48 49 if (alu->src[i].src.ssa->parent_instr->type != nir_instr_type_alu) 50 continue; 51 52 nir_alu_instr *parent = nir_instr_as_alu(alu->src[i].src.ssa->parent_instr); 53 54 if (parent->dest.saturate) 55 continue; 56 57 switch (nir_alu_type_get_base_type(nir_op_infos[alu->op].input_types[i])) { 58 case nir_type_float: 59 if (parent->op != nir_op_fmov) 60 continue; 61 break; 62 case nir_type_int: 63 if (parent->op != nir_op_imov) 64 continue; 65 break; 66 default: 67 continue; 68 } 69 70 /* We can only do a rewrite if the source we are copying is SSA. 71 * Otherwise, moving the read might invalidly reorder reads/writes 72 * on a register. 73 */ 74 if (!parent->src[0].src.is_ssa) 75 continue; 76 77 nir_instr_rewrite_src(instr, &alu->src[i].src, parent->src[0].src); 78 if (alu->src[i].abs) { 79 /* abs trumps both neg and abs, do nothing */ 80 } else { 81 alu->src[i].negate = (alu->src[i].negate != parent->src[0].negate); 82 alu->src[i].abs |= parent->src[0].abs; 83 } 84 85 for (int j = 0; j < 4; ++j) { 86 if (!nir_alu_instr_channel_used(alu, i, j)) 87 continue; 88 alu->src[i].swizzle[j] = parent->src[0].swizzle[alu->src[i].swizzle[j]]; 89 } 90 91 if (list_empty(&parent->dest.dest.ssa.uses) && 92 list_empty(&parent->dest.dest.ssa.if_uses)) 93 nir_instr_remove(&parent->instr); 94 } 95 96 switch (alu->op) { 97 case nir_op_fsat: 98 alu->op = nir_op_fmov; 99 alu->dest.saturate = true; 100 break; 101 case nir_op_ineg: 102 alu->op = nir_op_imov; 103 alu->src[0].negate = !alu->src[0].negate; 104 break; 105 case nir_op_fneg: 106 alu->op = nir_op_fmov; 107 alu->src[0].negate = !alu->src[0].negate; 108 break; 109 case nir_op_iabs: 110 alu->op = nir_op_imov; 111 alu->src[0].abs = true; 112 alu->src[0].negate = false; 113 break; 114 case nir_op_fabs: 115 alu->op = nir_op_fmov; 116 alu->src[0].abs = true; 117 alu->src[0].negate = false; 118 break; 119 default: 120 break; 121 } 122 123 /* We've covered sources. Now we're going to try and saturate the 124 * destination if we can. 125 */ 126 127 if (!alu->dest.dest.is_ssa) 128 continue; 129 130 /* We can only saturate float destinations */ 131 if (nir_alu_type_get_base_type(nir_op_infos[alu->op].output_type) != 132 nir_type_float) 133 continue; 134 135 if (!list_empty(&alu->dest.dest.ssa.if_uses)) 136 continue; 137 138 bool all_children_are_sat = true; 139 nir_foreach_use(child_src, &alu->dest.dest.ssa) { 140 assert(child_src->is_ssa); 141 nir_instr *child = child_src->parent_instr; 142 if (child->type != nir_instr_type_alu) { 143 all_children_are_sat = false; 144 continue; 145 } 146 147 nir_alu_instr *child_alu = nir_instr_as_alu(child); 148 if (child_alu->src[0].negate || child_alu->src[0].abs) { 149 all_children_are_sat = false; 150 continue; 151 } 152 153 if (child_alu->op != nir_op_fsat && 154 !(child_alu->op == nir_op_fmov && child_alu->dest.saturate)) { 155 all_children_are_sat = false; 156 continue; 157 } 158 } 159 160 if (!all_children_are_sat) 161 continue; 162 163 alu->dest.saturate = true; 164 165 nir_foreach_use(child_src, &alu->dest.dest.ssa) { 166 assert(child_src->is_ssa); 167 nir_alu_instr *child_alu = nir_instr_as_alu(child_src->parent_instr); 168 169 child_alu->op = nir_op_fmov; 170 child_alu->dest.saturate = false; 171 /* We could propagate the dest of our instruction to the 172 * destinations of the uses here. However, one quick round of 173 * copy propagation will clean that all up and then we don't have 174 * the complexity. 175 */ 176 } 177 } 178 179 return true; 180 } 181 182 void 183 nir_lower_to_source_mods(nir_shader *shader) 184 { 185 nir_foreach_function(function, shader) { 186 if (function->impl) { 187 nir_foreach_block(block, function->impl) { 188 nir_lower_to_source_mods_block(block); 189 } 190 } 191 } 192 } 193