1 /* 2 * Copyright 2015 Red Hat 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS 21 * IN THE SOFTWARE. 22 * 23 * Authors: 24 * Rob Clark <robclark (at) freedesktop.org> 25 */ 26 27 #include "nir.h" 28 #include "nir_builder.h" 29 30 /* Lowers idiv/udiv/umod 31 * Based on NV50LegalizeSSA::handleDIV() 32 * 33 * Note that this is probably not enough precision for compute shaders. 34 * Perhaps we want a second higher precision (looping) version of this? 35 * Or perhaps we assume if you can do compute shaders you can also 36 * branch out to a pre-optimized shader library routine.. 37 */ 38 39 static bool 40 convert_instr(nir_builder *bld, nir_alu_instr *alu) 41 { 42 nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r; 43 nir_op op = alu->op; 44 bool is_signed; 45 46 if ((op != nir_op_idiv) && 47 (op != nir_op_udiv) && 48 (op != nir_op_umod)) 49 return false; 50 51 is_signed = (op == nir_op_idiv); 52 53 bld->cursor = nir_before_instr(&alu->instr); 54 55 numer = nir_ssa_for_alu_src(bld, alu, 0); 56 denom = nir_ssa_for_alu_src(bld, alu, 1); 57 58 if (is_signed) { 59 af = nir_i2f(bld, numer); 60 bf = nir_i2f(bld, denom); 61 af = nir_fabs(bld, af); 62 bf = nir_fabs(bld, bf); 63 a = nir_iabs(bld, numer); 64 b = nir_iabs(bld, denom); 65 } else { 66 af = nir_u2f(bld, numer); 67 bf = nir_u2f(bld, denom); 68 a = numer; 69 b = denom; 70 } 71 72 /* get first result: */ 73 bf = nir_frcp(bld, bf); 74 bf = nir_isub(bld, bf, nir_imm_int(bld, 2)); /* yes, really */ 75 q = nir_fmul(bld, af, bf); 76 77 if (is_signed) { 78 q = nir_f2i(bld, q); 79 } else { 80 q = nir_f2u(bld, q); 81 } 82 83 /* get error of first result: */ 84 r = nir_imul(bld, q, b); 85 r = nir_isub(bld, a, r); 86 r = nir_u2f(bld, r); 87 r = nir_fmul(bld, r, bf); 88 r = nir_f2u(bld, r); 89 90 /* add quotients: */ 91 q = nir_iadd(bld, q, r); 92 93 /* correction: if modulus >= divisor, add 1 */ 94 r = nir_imul(bld, q, b); 95 r = nir_isub(bld, a, r); 96 97 r = nir_uge(bld, r, b); 98 r = nir_b2i(bld, r); 99 100 q = nir_iadd(bld, q, r); 101 if (is_signed) { 102 /* fix the sign: */ 103 r = nir_ixor(bld, numer, denom); 104 r = nir_ishr(bld, r, nir_imm_int(bld, 31)); 105 b = nir_ineg(bld, q); 106 q = nir_bcsel(bld, r, b, q); 107 } 108 109 if (op == nir_op_umod) { 110 /* division result in q */ 111 r = nir_imul(bld, q, b); 112 q = nir_isub(bld, a, r); 113 } 114 115 assert(alu->dest.dest.is_ssa); 116 nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q)); 117 118 return true; 119 } 120 121 static bool 122 convert_impl(nir_function_impl *impl) 123 { 124 nir_builder b; 125 nir_builder_init(&b, impl); 126 bool progress = false; 127 128 nir_foreach_block(block, impl) { 129 nir_foreach_instr_safe(instr, block) { 130 if (instr->type == nir_instr_type_alu) 131 progress |= convert_instr(&b, nir_instr_as_alu(instr)); 132 } 133 } 134 135 nir_metadata_preserve(impl, nir_metadata_block_index | 136 nir_metadata_dominance); 137 138 return progress; 139 } 140 141 bool 142 nir_lower_idiv(nir_shader *shader) 143 { 144 bool progress = false; 145 146 nir_foreach_function(function, shader) { 147 if (function->impl) 148 progress |= convert_impl(function->impl); 149 } 150 151 return progress; 152 } 153