Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2014 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_rebalance_tree.cpp
     26  *
     27  * Rebalances a reduction expression tree.
     28  *
     29  * For reduction operations (e.g., x + y + z + w) we generate an expression
     30  * tree like
     31  *
     32  *        +
     33  *       / \
     34  *      +   w
     35  *     / \
     36  *    +   z
     37  *   / \
     38  *  x   y
     39  *
     40  * which we can rebalance into
     41  *
     42  *       +
     43  *      / \
     44  *     /   \
     45  *    +     +
     46  *   / \   / \
     47  *  x   y z   w
     48  *
     49  * to get a better instruction scheduling.
     50  *
     51  * See "Tree Rebalancing in Optimal Editor Time and Space" by Quentin F. Stout
     52  * and Bette L. Warren.
     53  *
     54  * Also see http://penguin.ewu.edu/~trolfe/DSWpaper/ for a very readable
     55  * explanation of the of the tree_to_vine() (rightward rotation) and
     56  * vine_to_tree() (leftward rotation) algorithms.
     57  */
     58 
     59 #include "ir.h"
     60 #include "ir_visitor.h"
     61 #include "ir_rvalue_visitor.h"
     62 #include "ir_optimization.h"
     63 #include "main/macros.h" /* for MAX2 */
     64 
     65 /* The DSW algorithm generates a degenerate tree (really, a linked list) in
     66  * tree_to_vine(). We'd rather not leave a binary expression with only one
     67  * operand, so trivial modifications (the ternary operators below) are needed
     68  * to ensure that we only rotate around the ir_expression nodes of the tree.
     69  */
     70 static unsigned
     71 tree_to_vine(ir_expression *root)
     72 {
     73    unsigned size = 0;
     74    ir_rvalue *vine_tail = root;
     75    ir_rvalue *remainder = root->operands[1];
     76 
     77    while (remainder != NULL) {
     78       ir_expression *remainder_temp = remainder->as_expression();
     79       ir_expression *remainder_left = remainder_temp ?
     80          remainder_temp->operands[0]->as_expression() : NULL;
     81 
     82       if (remainder_left == NULL) {
     83          /* move vine_tail down one */
     84          vine_tail = remainder;
     85          remainder = remainder->as_expression() ?
     86             ((ir_expression *)remainder)->operands[1] : NULL;
     87          size++;
     88       } else {
     89          /* rotate */
     90          ir_expression *tempptr = remainder_left;
     91          ((ir_expression *)remainder)->operands[0] = tempptr->operands[1];
     92          tempptr->operands[1] = remainder;
     93          remainder = tempptr;
     94          ((ir_expression *)vine_tail)->operands[1] = tempptr;
     95       }
     96    }
     97 
     98    return size;
     99 }
    100 
    101 static void
    102 compression(ir_expression *root, unsigned count)
    103 {
    104    ir_expression *scanner = root;
    105 
    106    for (unsigned i = 0; i < count; i++) {
    107       ir_expression *child = (ir_expression *)scanner->operands[1];
    108       scanner->operands[1] = child->operands[1];
    109       scanner = (ir_expression *)scanner->operands[1];
    110       child->operands[1] = scanner->operands[0];
    111       scanner->operands[0] = child;
    112    }
    113 }
    114 
    115 static void
    116 vine_to_tree(ir_expression *root, unsigned size)
    117 {
    118    int n = size - 1;
    119    for (int m = n / 2; m > 0; m = n / 2) {
    120       compression(root, m);
    121       n -= m + 1;
    122    }
    123 }
    124 
    125 namespace {
    126 
    127 class ir_rebalance_visitor : public ir_rvalue_enter_visitor {
    128 public:
    129    ir_rebalance_visitor()
    130    {
    131       progress = false;
    132    }
    133 
    134    virtual ir_visitor_status visit_enter(ir_assignment *ir);
    135 
    136    void handle_rvalue(ir_rvalue **rvalue);
    137 
    138    bool progress;
    139 };
    140 
    141 struct is_reduction_data {
    142    ir_expression_operation operation;
    143    const glsl_type *type;
    144    unsigned num_expr;
    145    bool is_reduction;
    146    bool contains_constant;
    147 };
    148 
    149 } /* anonymous namespace */
    150 
    151 ir_visitor_status
    152 ir_rebalance_visitor::visit_enter(ir_assignment *ir)
    153 {
    154    ir_variable *var = ir->lhs->variable_referenced();
    155    if (var->data.invariant || var->data.precise) {
    156       /* If we're assigning to an invariant variable, just bail.  Tree
    157        * rebalancing (reassociation) isn't precision-safe.
    158        */
    159       return visit_continue_with_parent;
    160    } else {
    161       return visit_continue;
    162    }
    163 }
    164 
    165 static bool
    166 is_reduction_operation(ir_expression_operation operation)
    167 {
    168    switch (operation) {
    169    case ir_binop_add:
    170    case ir_binop_mul:
    171    case ir_binop_bit_and:
    172    case ir_binop_bit_xor:
    173    case ir_binop_bit_or:
    174    case ir_binop_logic_and:
    175    case ir_binop_logic_xor:
    176    case ir_binop_logic_or:
    177    case ir_binop_min:
    178    case ir_binop_max:
    179       return true;
    180    default:
    181       return false;
    182    }
    183 }
    184 
    185 /* Note that this function does not attempt to recognize that reduction trees
    186  * are already balanced.
    187  *
    188  * We return false from this function for a number of reasons other than an
    189  * expression tree not being a mathematical reduction. Namely,
    190  *
    191  *    - if the tree contains multiple constants that we may be able to combine.
    192  *    - if the tree contains matrices:
    193  *       - they might contain vec4's with many constant components that we can
    194  *         simplify after splitting.
    195  *       - applying the matrix chain ordering optimization is more than just
    196  *         balancing an expression tree.
    197  *    - if the tree contains operations on multiple types.
    198  *    - if the tree contains ir_dereference_{array,record}, since foo[a+b] + c
    199  *      would trick the visiting pass.
    200  */
    201 static void
    202 is_reduction(ir_instruction *ir, void *data)
    203 {
    204    struct is_reduction_data *ird = (struct is_reduction_data *)data;
    205    if (!ird->is_reduction)
    206       return;
    207 
    208    /* We don't want to balance a tree that contains multiple constants, since
    209     * we'll be able to constant fold them if they're not in separate subtrees.
    210     */
    211    if (ir->as_constant()) {
    212       if (ird->contains_constant) {
    213          ird->is_reduction = false;
    214       }
    215       ird->contains_constant = true;
    216       return;
    217    }
    218 
    219    /* Array/record dereferences have subtrees that are not part of the expr
    220     * tree we're balancing. Skip trees containing them.
    221     */
    222    if (ir->ir_type == ir_type_dereference_array ||
    223        ir->ir_type == ir_type_dereference_record) {
    224       ird->is_reduction = false;
    225       return;
    226    }
    227 
    228    ir_expression *expr = ir->as_expression();
    229    if (!expr)
    230       return;
    231 
    232    /* Non-constant matrices might still contain constant vec4 that we can
    233     * constant fold once split up. Handling matrices will need some more
    234     * work.
    235     */
    236    if (expr->type->is_matrix() ||
    237        expr->operands[0]->type->is_matrix() ||
    238        (expr->operands[1] && expr->operands[1]->type->is_matrix())) {
    239       ird->is_reduction = false;
    240       return;
    241    }
    242 
    243    if (ird->type != NULL && ird->type != expr->type) {
    244       ird->is_reduction = false;
    245       return;
    246    }
    247    ird->type = expr->type;
    248 
    249    ird->num_expr++;
    250    if (is_reduction_operation(expr->operation)) {
    251       if (ird->operation != 0 && ird->operation != expr->operation)
    252          ird->is_reduction = false;
    253       ird->operation = expr->operation;
    254    } else {
    255       ird->is_reduction = false;
    256    }
    257 }
    258 
    259 static ir_rvalue *
    260 handle_expression(ir_expression *expr)
    261 {
    262    struct is_reduction_data ird;
    263    ird.operation = (ir_expression_operation)0;
    264    ird.type = NULL;
    265    ird.num_expr = 0;
    266    ird.is_reduction = true;
    267    ird.contains_constant = false;
    268 
    269    visit_tree(expr, is_reduction, (void *)&ird);
    270 
    271    if (ird.is_reduction && ird.num_expr > 2) {
    272       ir_constant z = ir_constant(0.0f);
    273       ir_expression pseudo_root = ir_expression(ir_binop_add, &z, expr);
    274 
    275       unsigned size = tree_to_vine(&pseudo_root);
    276       vine_to_tree(&pseudo_root, size);
    277 
    278       expr = (ir_expression *)pseudo_root.operands[1];
    279    }
    280    return expr;
    281 }
    282 
    283 static void
    284 update_types(ir_instruction *ir, void *)
    285 {
    286    ir_expression *expr = ir->as_expression();
    287    if (!expr)
    288       return;
    289 
    290    const glsl_type *const new_type =
    291       glsl_type::get_instance(expr->type->base_type,
    292                               MAX2(expr->operands[0]->type->vector_elements,
    293                                    expr->operands[1]->type->vector_elements),
    294                               1);
    295    assert(new_type != glsl_type::error_type);
    296    expr->type = new_type;
    297 }
    298 
    299 void
    300 ir_rebalance_visitor::handle_rvalue(ir_rvalue **rvalue)
    301 {
    302    if (!*rvalue)
    303       return;
    304 
    305    ir_expression *expr = (*rvalue)->as_expression();
    306    if (!expr || !is_reduction_operation(expr->operation))
    307       return;
    308 
    309    ir_rvalue *new_rvalue = handle_expression(expr);
    310 
    311    /* If we failed to rebalance the tree (e.g., because it wasn't a reduction,
    312     * or some other set of cases) new_rvalue will point to the same root as
    313     * before.
    314     *
    315     * Similarly, if the tree rooted at *rvalue was a reduction and was already
    316     * balanced, the algorithm will rearrange the tree but will ultimately
    317     * return an identical tree, so this check will handle that as well and
    318     * will not set progress = true.
    319     */
    320    if (new_rvalue == *rvalue)
    321       return;
    322 
    323    visit_tree(new_rvalue, NULL, NULL, update_types);
    324 
    325    *rvalue = new_rvalue;
    326    this->progress = true;
    327 }
    328 
    329 bool
    330 do_rebalance_tree(exec_list *instructions)
    331 {
    332    ir_rebalance_visitor v;
    333 
    334    v.run(instructions);
    335 
    336    return v.progress;
    337 }
    338