Home | History | Annotate | Download | only in nir
      1 /*
      2  * Copyright  2015 Red Hat
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS
     21  * IN THE SOFTWARE.
     22  *
     23  * Authors:
     24  *    Rob Clark <robclark (at) freedesktop.org>
     25  */
     26 
     27 #include "nir.h"
     28 #include "nir_builder.h"
     29 
     30 /* Lowers idiv/udiv/umod
     31  * Based on NV50LegalizeSSA::handleDIV()
     32  *
     33  * Note that this is probably not enough precision for compute shaders.
     34  * Perhaps we want a second higher precision (looping) version of this?
     35  * Or perhaps we assume if you can do compute shaders you can also
     36  * branch out to a pre-optimized shader library routine..
     37  */
     38 
     39 static bool
     40 convert_instr(nir_builder *bld, nir_alu_instr *alu)
     41 {
     42    nir_ssa_def *numer, *denom, *af, *bf, *a, *b, *q, *r;
     43    nir_op op = alu->op;
     44    bool is_signed;
     45 
     46    if ((op != nir_op_idiv) &&
     47        (op != nir_op_udiv) &&
     48        (op != nir_op_umod))
     49       return false;
     50 
     51    is_signed = (op == nir_op_idiv);
     52 
     53    bld->cursor = nir_before_instr(&alu->instr);
     54 
     55    numer = nir_ssa_for_alu_src(bld, alu, 0);
     56    denom = nir_ssa_for_alu_src(bld, alu, 1);
     57 
     58    if (is_signed) {
     59       af = nir_i2f(bld, numer);
     60       bf = nir_i2f(bld, denom);
     61       af = nir_fabs(bld, af);
     62       bf = nir_fabs(bld, bf);
     63       a  = nir_iabs(bld, numer);
     64       b  = nir_iabs(bld, denom);
     65    } else {
     66       af = nir_u2f(bld, numer);
     67       bf = nir_u2f(bld, denom);
     68       a  = numer;
     69       b  = denom;
     70    }
     71 
     72    /* get first result: */
     73    bf = nir_frcp(bld, bf);
     74    bf = nir_isub(bld, bf, nir_imm_int(bld, 2));  /* yes, really */
     75    q  = nir_fmul(bld, af, bf);
     76 
     77    if (is_signed) {
     78       q = nir_f2i(bld, q);
     79    } else {
     80       q = nir_f2u(bld, q);
     81    }
     82 
     83    /* get error of first result: */
     84    r = nir_imul(bld, q, b);
     85    r = nir_isub(bld, a, r);
     86    r = nir_u2f(bld, r);
     87    r = nir_fmul(bld, r, bf);
     88    r = nir_f2u(bld, r);
     89 
     90    /* add quotients: */
     91    q = nir_iadd(bld, q, r);
     92 
     93    /* correction: if modulus >= divisor, add 1 */
     94    r = nir_imul(bld, q, b);
     95    r = nir_isub(bld, a, r);
     96 
     97    r = nir_uge(bld, r, b);
     98    r = nir_b2i(bld, r);
     99 
    100    q = nir_iadd(bld, q, r);
    101    if (is_signed)  {
    102       /* fix the sign: */
    103       r = nir_ixor(bld, numer, denom);
    104       r = nir_ishr(bld, r, nir_imm_int(bld, 31));
    105       b = nir_ineg(bld, q);
    106       q = nir_bcsel(bld, r, b, q);
    107    }
    108 
    109    if (op == nir_op_umod) {
    110       /* division result in q */
    111       r = nir_imul(bld, q, b);
    112       q = nir_isub(bld, a, r);
    113    }
    114 
    115    assert(alu->dest.dest.is_ssa);
    116    nir_ssa_def_rewrite_uses(&alu->dest.dest.ssa, nir_src_for_ssa(q));
    117 
    118    return true;
    119 }
    120 
    121 static bool
    122 convert_impl(nir_function_impl *impl)
    123 {
    124    nir_builder b;
    125    nir_builder_init(&b, impl);
    126    bool progress = false;
    127 
    128    nir_foreach_block(block, impl) {
    129       nir_foreach_instr_safe(instr, block) {
    130          if (instr->type == nir_instr_type_alu)
    131             progress |= convert_instr(&b, nir_instr_as_alu(instr));
    132       }
    133    }
    134 
    135    nir_metadata_preserve(impl, nir_metadata_block_index |
    136                                nir_metadata_dominance);
    137 
    138    return progress;
    139 }
    140 
    141 bool
    142 nir_lower_idiv(nir_shader *shader)
    143 {
    144    bool progress = false;
    145 
    146    nir_foreach_function(function, shader) {
    147       if (function->impl)
    148          progress |= convert_impl(function->impl);
    149    }
    150 
    151    return progress;
    152 }
    153