Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_minmax.cpp
     26  *
     27  * Drop operands from an expression tree of only min/max operations if they
     28  * can be proven to not contribute to the final result.
     29  *
     30  * The algorithm is similar to alpha-beta pruning on a minmax search.
     31  */
     32 
     33 #include "ir.h"
     34 #include "ir_visitor.h"
     35 #include "ir_rvalue_visitor.h"
     36 #include "ir_optimization.h"
     37 #include "ir_builder.h"
     38 #include "program/prog_instruction.h"
     39 #include "compiler/glsl_types.h"
     40 #include "main/macros.h"
     41 
     42 using namespace ir_builder;
     43 
     44 namespace {
     45 
     46 enum compare_components_result {
     47    LESS,
     48    LESS_OR_EQUAL,
     49    EQUAL,
     50    GREATER_OR_EQUAL,
     51    GREATER,
     52    MIXED
     53 };
     54 
     55 class minmax_range {
     56 public:
     57    minmax_range(ir_constant *low = NULL, ir_constant *high = NULL)
     58    {
     59       this->low = low;
     60       this->high = high;
     61    }
     62 
     63    /* low is the lower limit of the range, high is the higher limit. NULL on
     64     * low means negative infinity (unlimited) and on high positive infinity
     65     * (unlimited). Because of the two interpretations of the value NULL,
     66     * arbitrary comparison between ir_constants is impossible.
     67     */
     68    ir_constant *low;
     69    ir_constant *high;
     70 };
     71 
     72 class ir_minmax_visitor : public ir_rvalue_enter_visitor {
     73 public:
     74    ir_minmax_visitor()
     75       : progress(false)
     76    {
     77    }
     78 
     79    ir_rvalue *prune_expression(ir_expression *expr, minmax_range baserange);
     80 
     81    void handle_rvalue(ir_rvalue **rvalue);
     82 
     83    bool progress;
     84 };
     85 
     86 /*
     87  * Returns LESS if all vector components of `a' are strictly lower than of `b',
     88  * GREATER if all vector components of `a' are strictly greater than of `b',
     89  * MIXED if some vector components of `a' are strictly lower than of `b' while
     90  * others are strictly greater, or EQUAL otherwise.
     91  */
     92 static enum compare_components_result
     93 compare_components(ir_constant *a, ir_constant *b)
     94 {
     95    assert(a != NULL);
     96    assert(b != NULL);
     97 
     98    assert(a->type->base_type == b->type->base_type);
     99 
    100    unsigned a_inc = a->type->is_scalar() ? 0 : 1;
    101    unsigned b_inc = b->type->is_scalar() ? 0 : 1;
    102    unsigned components = MAX2(a->type->components(), b->type->components());
    103 
    104    bool foundless = false;
    105    bool foundgreater = false;
    106    bool foundequal = false;
    107 
    108    for (unsigned i = 0, c0 = 0, c1 = 0;
    109         i < components;
    110         c0 += a_inc, c1 += b_inc, ++i) {
    111       switch (a->type->base_type) {
    112       case GLSL_TYPE_UINT:
    113          if (a->value.u[c0] < b->value.u[c1])
    114             foundless = true;
    115          else if (a->value.u[c0] > b->value.u[c1])
    116             foundgreater = true;
    117          else
    118             foundequal = true;
    119          break;
    120       case GLSL_TYPE_INT:
    121          if (a->value.i[c0] < b->value.i[c1])
    122             foundless = true;
    123          else if (a->value.i[c0] > b->value.i[c1])
    124             foundgreater = true;
    125          else
    126             foundequal = true;
    127          break;
    128       case GLSL_TYPE_FLOAT:
    129          if (a->value.f[c0] < b->value.f[c1])
    130             foundless = true;
    131          else if (a->value.f[c0] > b->value.f[c1])
    132             foundgreater = true;
    133          else
    134             foundequal = true;
    135          break;
    136       case GLSL_TYPE_DOUBLE:
    137          if (a->value.d[c0] < b->value.d[c1])
    138             foundless = true;
    139          else if (a->value.d[c0] > b->value.d[c1])
    140             foundgreater = true;
    141          else
    142             foundequal = true;
    143          break;
    144       default:
    145          unreachable("not reached");
    146       }
    147    }
    148 
    149    if (foundless && foundgreater) {
    150       /* Some components are strictly lower, others are strictly greater */
    151       return MIXED;
    152    }
    153 
    154    if (foundequal) {
    155        /* It is not mixed, but it is not strictly lower or greater */
    156       if (foundless)
    157          return LESS_OR_EQUAL;
    158       if (foundgreater)
    159          return GREATER_OR_EQUAL;
    160       return EQUAL;
    161    }
    162 
    163    /* All components are strictly lower or strictly greater */
    164    return foundless ? LESS : GREATER;
    165 }
    166 
    167 static ir_constant *
    168 combine_constant(bool ismin, ir_constant *a, ir_constant *b)
    169 {
    170    void *mem_ctx = ralloc_parent(a);
    171    ir_constant *c = a->clone(mem_ctx, NULL);
    172    for (unsigned i = 0; i < c->type->components(); i++) {
    173       switch (c->type->base_type) {
    174       case GLSL_TYPE_UINT:
    175          if ((ismin && b->value.u[i] < c->value.u[i]) ||
    176              (!ismin && b->value.u[i] > c->value.u[i]))
    177             c->value.u[i] = b->value.u[i];
    178          break;
    179       case GLSL_TYPE_INT:
    180          if ((ismin && b->value.i[i] < c->value.i[i]) ||
    181              (!ismin && b->value.i[i] > c->value.i[i]))
    182             c->value.i[i] = b->value.i[i];
    183          break;
    184       case GLSL_TYPE_FLOAT:
    185          if ((ismin && b->value.f[i] < c->value.f[i]) ||
    186              (!ismin && b->value.f[i] > c->value.f[i]))
    187             c->value.f[i] = b->value.f[i];
    188          break;
    189       case GLSL_TYPE_DOUBLE:
    190          if ((ismin && b->value.d[i] < c->value.d[i]) ||
    191              (!ismin && b->value.d[i] > c->value.d[i]))
    192             c->value.d[i] = b->value.d[i];
    193          break;
    194       default:
    195          assert(!"not reached");
    196       }
    197    }
    198    return c;
    199 }
    200 
    201 static ir_constant *
    202 smaller_constant(ir_constant *a, ir_constant *b)
    203 {
    204    assert(a != NULL);
    205    assert(b != NULL);
    206 
    207    enum compare_components_result ret = compare_components(a, b);
    208    if (ret == MIXED)
    209       return combine_constant(true, a, b);
    210    else if (ret < EQUAL)
    211       return a;
    212    else
    213       return b;
    214 }
    215 
    216 static ir_constant *
    217 larger_constant(ir_constant *a, ir_constant *b)
    218 {
    219    assert(a != NULL);
    220    assert(b != NULL);
    221 
    222    enum compare_components_result ret = compare_components(a, b);
    223    if (ret == MIXED)
    224       return combine_constant(false, a, b);
    225    else if (ret < EQUAL)
    226       return b;
    227    else
    228       return a;
    229 }
    230 
    231 /* Combines two ranges by doing an element-wise min() / max() depending on the
    232  * operation.
    233  */
    234 static minmax_range
    235 combine_range(minmax_range r0, minmax_range r1, bool ismin)
    236 {
    237    minmax_range ret;
    238 
    239    if (!r0.low) {
    240       ret.low = ismin ? r0.low : r1.low;
    241    } else if (!r1.low) {
    242       ret.low = ismin ? r1.low : r0.low;
    243    } else {
    244       ret.low = ismin ? smaller_constant(r0.low, r1.low) :
    245          larger_constant(r0.low, r1.low);
    246    }
    247 
    248    if (!r0.high) {
    249       ret.high = ismin ? r1.high : r0.high;
    250    } else if (!r1.high) {
    251       ret.high = ismin ? r0.high : r1.high;
    252    } else {
    253       ret.high = ismin ? smaller_constant(r0.high, r1.high) :
    254          larger_constant(r0.high, r1.high);
    255    }
    256 
    257    return ret;
    258 }
    259 
    260 /* Returns a range so that lower limit is the larger of the two lower limits,
    261  * and higher limit is the smaller of the two higher limits.
    262  */
    263 static minmax_range
    264 range_intersection(minmax_range r0, minmax_range r1)
    265 {
    266    minmax_range ret;
    267 
    268    if (!r0.low)
    269       ret.low = r1.low;
    270    else if (!r1.low)
    271       ret.low = r0.low;
    272    else
    273       ret.low = larger_constant(r0.low, r1.low);
    274 
    275    if (!r0.high)
    276       ret.high = r1.high;
    277    else if (!r1.high)
    278       ret.high = r0.high;
    279    else
    280       ret.high = smaller_constant(r0.high, r1.high);
    281 
    282    return ret;
    283 }
    284 
    285 static minmax_range
    286 get_range(ir_rvalue *rval)
    287 {
    288    ir_expression *expr = rval->as_expression();
    289    if (expr && (expr->operation == ir_binop_min ||
    290                 expr->operation == ir_binop_max)) {
    291       minmax_range r0 = get_range(expr->operands[0]);
    292       minmax_range r1 = get_range(expr->operands[1]);
    293       return combine_range(r0, r1, expr->operation == ir_binop_min);
    294    }
    295 
    296    ir_constant *c = rval->as_constant();
    297    if (c) {
    298       return minmax_range(c, c);
    299    }
    300 
    301    return minmax_range();
    302 }
    303 
    304 /**
    305  * Prunes a min/max expression considering the base range of the parent
    306  * min/max expression.
    307  *
    308  * @param baserange the range that the parents of this min/max expression
    309  * in the min/max tree will clamp its value to.
    310  */
    311 ir_rvalue *
    312 ir_minmax_visitor::prune_expression(ir_expression *expr, minmax_range baserange)
    313 {
    314    assert(expr->operation == ir_binop_min ||
    315           expr->operation == ir_binop_max);
    316 
    317    bool ismin = expr->operation == ir_binop_min;
    318    minmax_range limits[2];
    319 
    320    /* Recurse to get the ranges for each of the subtrees of this
    321     * expression. We need to do this as a separate step because we need to
    322     * know the ranges of each of the subtrees before we prune either one.
    323     * Consider something like this:
    324     *
    325     *        max
    326     *     /       \
    327     *    max     max
    328     *   /   \   /   \
    329     *  3    a   b    2
    330     *
    331     * We would like to prune away the max on the bottom-right, but to do so
    332     * we need to know the range of the expression on the left beforehand,
    333     * and there's no guarantee that we will visit either subtree in a
    334     * particular order.
    335     */
    336    for (unsigned i = 0; i < 2; ++i)
    337       limits[i] = get_range(expr->operands[i]);
    338 
    339    for (unsigned i = 0; i < 2; ++i) {
    340       bool is_redundant = false;
    341 
    342       enum compare_components_result cr = LESS;
    343       if (ismin) {
    344          /* If this operand will always be greater than the other one, it's
    345           * redundant.
    346           */
    347          if (limits[i].low && limits[1 - i].high) {
    348                cr = compare_components(limits[i].low, limits[1 - i].high);
    349             if (cr >= EQUAL && cr != MIXED)
    350                is_redundant = true;
    351          }
    352          /* If this operand is always greater than baserange, then even if
    353           * it's smaller than the other one it'll get clamped, so it's
    354           * redundant.
    355           */
    356          if (!is_redundant && limits[i].low && baserange.high) {
    357             cr = compare_components(limits[i].low, baserange.high);
    358             if (cr > EQUAL && cr != MIXED)
    359                is_redundant = true;
    360          }
    361       } else {
    362          /* If this operand will always be lower than the other one, it's
    363           * redundant.
    364           */
    365          if (limits[i].high && limits[1 - i].low) {
    366             cr = compare_components(limits[i].high, limits[1 - i].low);
    367             if (cr <= EQUAL)
    368                is_redundant = true;
    369          }
    370          /* If this operand is always lower than baserange, then even if
    371           * it's greater than the other one it'll get clamped, so it's
    372           * redundant.
    373           */
    374          if (!is_redundant && limits[i].high && baserange.low) {
    375             cr = compare_components(limits[i].high, baserange.low);
    376             if (cr < EQUAL)
    377                is_redundant = true;
    378          }
    379       }
    380 
    381       if (is_redundant) {
    382          progress = true;
    383 
    384          /* Recurse if necessary. */
    385          ir_expression *op_expr = expr->operands[1 - i]->as_expression();
    386          if (op_expr && (op_expr->operation == ir_binop_min ||
    387                          op_expr->operation == ir_binop_max)) {
    388             return prune_expression(op_expr, baserange);
    389          }
    390 
    391          return expr->operands[1 - i];
    392       } else if (cr == MIXED) {
    393          /* If we have mixed vector operands, we can try to resolve the minmax
    394           * expression by doing a component-wise minmax:
    395           *
    396           *             min                          min
    397           *           /    \                       /    \
    398           *         min     a       ===>        [1,1]    a
    399           *       /    \
    400           *    [1,3]   [3,1]
    401           *
    402           */
    403          ir_constant *a = expr->operands[0]->as_constant();
    404          ir_constant *b = expr->operands[1]->as_constant();
    405          if (a && b)
    406             return combine_constant(ismin, a, b);
    407       }
    408    }
    409 
    410    /* Now recurse to operands giving them the proper baserange. The baserange
    411     * to pass is the intersection of our baserange and the other operand's
    412     * limit with one of the ranges unlimited. If we can't compute a valid
    413     * intersection, we use the current baserange.
    414     */
    415    for (unsigned i = 0; i < 2; ++i) {
    416       ir_expression *op_expr = expr->operands[i]->as_expression();
    417       if (op_expr && (op_expr->operation == ir_binop_min ||
    418                       op_expr->operation == ir_binop_max)) {
    419          /* We can only compute a new baserange for this operand if we managed
    420           * to compute a valid range for the other operand.
    421           */
    422          if (ismin)
    423             limits[1 - i].low = NULL;
    424          else
    425             limits[1 - i].high = NULL;
    426          minmax_range base = range_intersection(limits[1 - i], baserange);
    427          expr->operands[i] = prune_expression(op_expr, base);
    428       }
    429    }
    430 
    431    /* If we got here we could not discard any of the operands of the minmax
    432     * expression, but we can still try to resolve the expression if both
    433     * operands are constant. We do this after the loop above, to make sure
    434     * that if our operands are minmax expressions we have tried to prune them
    435     * first (hopefully reducing them to constants).
    436     */
    437    ir_constant *a = expr->operands[0]->as_constant();
    438    ir_constant *b = expr->operands[1]->as_constant();
    439    if (a && b)
    440       return combine_constant(ismin, a, b);
    441 
    442    return expr;
    443 }
    444 
    445 static ir_rvalue *
    446 swizzle_if_required(ir_expression *expr, ir_rvalue *rval)
    447 {
    448    if (expr->type->is_vector() && rval->type->is_scalar()) {
    449       return swizzle(rval, SWIZZLE_XXXX, expr->type->vector_elements);
    450    } else {
    451       return rval;
    452    }
    453 }
    454 
    455 void
    456 ir_minmax_visitor::handle_rvalue(ir_rvalue **rvalue)
    457 {
    458    if (!*rvalue)
    459       return;
    460 
    461    ir_expression *expr = (*rvalue)->as_expression();
    462    if (!expr || (expr->operation != ir_binop_min &&
    463                  expr->operation != ir_binop_max))
    464       return;
    465 
    466    ir_rvalue *new_rvalue = prune_expression(expr, minmax_range());
    467    if (new_rvalue == *rvalue)
    468       return;
    469 
    470    /* If the expression type is a vector and the optimization leaves a scalar
    471     * as the result, we need to turn it into a vector.
    472     */
    473    *rvalue = swizzle_if_required(expr, new_rvalue);
    474 
    475    progress = true;
    476 }
    477 
    478 }
    479 
    480 bool
    481 do_minmax_prune(exec_list *instructions)
    482 {
    483    ir_minmax_visitor v;
    484 
    485    visit_list_elements(&v, instructions);
    486 
    487    return v.progress;
    488 }
    489