Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_if_to_cond_assign.cpp
     26  *
     27  * This flattens if-statements to conditional assignments if:
     28  *
     29  * - the GPU has limited or no flow control support
     30  *   (controlled by max_depth)
     31  *
     32  * - small conditional branches are more expensive than conditional assignments
     33  *   (controlled by min_branch_cost, that's the cost for a branch to be
     34  *    preserved)
     35  *
     36  * It can't handle other control flow being inside of its block, such
     37  * as calls or loops.  Hopefully loop unrolling and inlining will take
     38  * care of those.
     39  *
     40  * Drivers for GPUs with no control flow support should simply call
     41  *
     42  *    lower_if_to_cond_assign(instructions)
     43  *
     44  * to attempt to flatten all if-statements.
     45  *
     46  * Some GPUs (such as i965 prior to gen6) do support control flow, but have a
     47  * maximum nesting depth N.  Drivers for such hardware can call
     48  *
     49  *    lower_if_to_cond_assign(instructions, N)
     50  *
     51  * to attempt to flatten any if-statements appearing at depth > N.
     52  */
     53 
     54 #include "compiler/glsl_types.h"
     55 #include "ir.h"
     56 #include "util/set.h"
     57 #include "util/hash_table.h" /* Needed for the hashing functions */
     58 #include "main/macros.h" /* for MAX2 */
     59 
     60 namespace {
     61 
     62 class ir_if_to_cond_assign_visitor : public ir_hierarchical_visitor {
     63 public:
     64    ir_if_to_cond_assign_visitor(gl_shader_stage stage,
     65                                 unsigned max_depth,
     66                                 unsigned min_branch_cost)
     67    {
     68       this->progress = false;
     69       this->stage = stage;
     70       this->max_depth = max_depth;
     71       this->min_branch_cost = min_branch_cost;
     72       this->depth = 0;
     73 
     74       this->condition_variables =
     75             _mesa_set_create(NULL, _mesa_hash_pointer,
     76                                     _mesa_key_pointer_equal);
     77    }
     78 
     79    ~ir_if_to_cond_assign_visitor()
     80    {
     81       _mesa_set_destroy(this->condition_variables, NULL);
     82    }
     83 
     84    ir_visitor_status visit_enter(ir_if *);
     85    ir_visitor_status visit_leave(ir_if *);
     86 
     87    bool found_unsupported_op;
     88    bool found_expensive_op;
     89    bool found_dynamic_arrayref;
     90    bool is_then;
     91    bool progress;
     92    gl_shader_stage stage;
     93    unsigned then_cost;
     94    unsigned else_cost;
     95    unsigned min_branch_cost;
     96    unsigned max_depth;
     97    unsigned depth;
     98 
     99    struct set *condition_variables;
    100 };
    101 
    102 } /* anonymous namespace */
    103 
    104 bool
    105 lower_if_to_cond_assign(gl_shader_stage stage, exec_list *instructions,
    106                         unsigned max_depth, unsigned min_branch_cost)
    107 {
    108    if (max_depth == UINT_MAX)
    109       return false;
    110 
    111    ir_if_to_cond_assign_visitor v(stage, max_depth, min_branch_cost);
    112 
    113    visit_list_elements(&v, instructions);
    114 
    115    return v.progress;
    116 }
    117 
    118 void
    119 check_ir_node(ir_instruction *ir, void *data)
    120 {
    121    ir_if_to_cond_assign_visitor *v = (ir_if_to_cond_assign_visitor *)data;
    122 
    123    switch (ir->ir_type) {
    124    case ir_type_call:
    125    case ir_type_discard:
    126    case ir_type_loop:
    127    case ir_type_loop_jump:
    128    case ir_type_return:
    129    case ir_type_emit_vertex:
    130    case ir_type_end_primitive:
    131    case ir_type_barrier:
    132       v->found_unsupported_op = true;
    133       break;
    134 
    135    case ir_type_dereference_variable: {
    136       ir_variable *var = ir->as_dereference_variable()->variable_referenced();
    137 
    138       /* Lowering branches with TCS output accesses breaks many piglit tests,
    139        * so don't touch them for now.
    140        */
    141       if (v->stage == MESA_SHADER_TESS_CTRL &&
    142           var->data.mode == ir_var_shader_out)
    143          v->found_unsupported_op = true;
    144       break;
    145    }
    146 
    147    /* SSBO, images, atomic counters are handled by ir_type_call */
    148    case ir_type_texture:
    149       v->found_expensive_op = true;
    150       break;
    151 
    152    case ir_type_dereference_array: {
    153       ir_dereference_array *deref = ir->as_dereference_array();
    154 
    155       if (deref->array_index->ir_type != ir_type_constant)
    156          v->found_dynamic_arrayref = true;
    157    } /* fall-through */
    158    case ir_type_expression:
    159    case ir_type_dereference_record:
    160       if (v->is_then)
    161          v->then_cost++;
    162       else
    163          v->else_cost++;
    164       break;
    165 
    166    default:
    167       break;
    168    }
    169 }
    170 
    171 void
    172 move_block_to_cond_assign(void *mem_ctx,
    173 			  ir_if *if_ir, ir_rvalue *cond_expr,
    174 			  exec_list *instructions,
    175 			  struct set *set)
    176 {
    177    foreach_in_list_safe(ir_instruction, ir, instructions) {
    178       if (ir->ir_type == ir_type_assignment) {
    179 	 ir_assignment *assign = (ir_assignment *)ir;
    180 
    181 	 if (_mesa_set_search(set, assign) == NULL) {
    182 	    _mesa_set_add(set, assign);
    183 
    184 	    /* If the LHS of the assignment is a condition variable that was
    185 	     * previously added, insert an additional assignment of false to
    186 	     * the variable.
    187 	     */
    188 	    const bool assign_to_cv =
    189 	          _mesa_set_search(
    190 	                set, assign->lhs->variable_referenced()) != NULL;
    191 
    192 	    if (!assign->condition) {
    193           if (assign_to_cv) {
    194              assign->rhs =
    195 		          new(mem_ctx) ir_expression(ir_binop_logic_and,
    196                                            glsl_type::bool_type,
    197                                            cond_expr->clone(mem_ctx, NULL),
    198                                            assign->rhs);
    199           } else {
    200              assign->condition = cond_expr->clone(mem_ctx, NULL);
    201 	       }
    202 	    } else {
    203 	       assign->condition =
    204              new(mem_ctx) ir_expression(ir_binop_logic_and,
    205                                         glsl_type::bool_type,
    206                                         cond_expr->clone(mem_ctx, NULL),
    207                                         assign->condition);
    208 	    }
    209 	 }
    210       }
    211 
    212       /* Now, move from the if block to the block surrounding it. */
    213       ir->remove();
    214       if_ir->insert_before(ir);
    215    }
    216 }
    217 
    218 ir_visitor_status
    219 ir_if_to_cond_assign_visitor::visit_enter(ir_if *ir)
    220 {
    221    (void) ir;
    222    this->depth++;
    223 
    224    return visit_continue;
    225 }
    226 
    227 ir_visitor_status
    228 ir_if_to_cond_assign_visitor::visit_leave(ir_if *ir)
    229 {
    230    bool must_lower = this->depth-- > this->max_depth;
    231 
    232    /* Only flatten when beyond the GPU's maximum supported nesting depth. */
    233    if (!must_lower && this->min_branch_cost == 0)
    234       return visit_continue;
    235 
    236    this->found_unsupported_op = false;
    237    this->found_expensive_op = false;
    238    this->found_dynamic_arrayref = false;
    239    this->then_cost = 0;
    240    this->else_cost = 0;
    241 
    242    ir_assignment *assign;
    243 
    244    /* Check that both blocks don't contain anything we can't support. */
    245    this->is_then = true;
    246    foreach_in_list(ir_instruction, then_ir, &ir->then_instructions) {
    247       visit_tree(then_ir, check_ir_node, this);
    248    }
    249 
    250    this->is_then = false;
    251    foreach_in_list(ir_instruction, else_ir, &ir->else_instructions) {
    252       visit_tree(else_ir, check_ir_node, this);
    253    }
    254 
    255    if (this->found_unsupported_op)
    256       return visit_continue; /* can't handle inner unsupported opcodes */
    257 
    258    /* Skip if the branch cost is high enough or if there's an expensive op.
    259     *
    260     * Also skip if non-constant array indices were encountered, since those
    261     * can be out-of-bounds for a not-taken branch, and so generating an
    262     * assignment would be incorrect. In the case of must_lower, it's up to the
    263     * backend to deal with any potential fall-out (perhaps by translating the
    264     * assignments to hardware-predicated moves).
    265     */
    266    if (!must_lower &&
    267        (this->found_expensive_op ||
    268         this->found_dynamic_arrayref ||
    269         MAX2(this->then_cost, this->else_cost) >= this->min_branch_cost))
    270       return visit_continue;
    271 
    272    void *mem_ctx = ralloc_parent(ir);
    273 
    274    /* Store the condition to a variable.  Move all of the instructions from
    275     * the then-clause of the if-statement.  Use the condition variable as a
    276     * condition for all assignments.
    277     */
    278    ir_variable *const then_var =
    279       new(mem_ctx) ir_variable(glsl_type::bool_type,
    280 			       "if_to_cond_assign_then",
    281 			       ir_var_temporary);
    282    ir->insert_before(then_var);
    283 
    284    ir_dereference_variable *then_cond =
    285       new(mem_ctx) ir_dereference_variable(then_var);
    286 
    287    assign = new(mem_ctx) ir_assignment(then_cond, ir->condition);
    288    ir->insert_before(assign);
    289 
    290    move_block_to_cond_assign(mem_ctx, ir, then_cond,
    291 			     &ir->then_instructions,
    292 			     this->condition_variables);
    293 
    294    /* Add the new condition variable to the hash table.  This allows us to
    295     * find this variable when lowering other (enclosing) if-statements.
    296     */
    297    _mesa_set_add(this->condition_variables, then_var);
    298 
    299    /* If there are instructions in the else-clause, store the inverse of the
    300     * condition to a variable.  Move all of the instructions from the
    301     * else-clause if the if-statement.  Use the (inverse) condition variable
    302     * as a condition for all assignments.
    303     */
    304    if (!ir->else_instructions.is_empty()) {
    305       ir_variable *const else_var =
    306 	 new(mem_ctx) ir_variable(glsl_type::bool_type,
    307 				  "if_to_cond_assign_else",
    308 				  ir_var_temporary);
    309       ir->insert_before(else_var);
    310 
    311       ir_dereference_variable *else_cond =
    312 	 new(mem_ctx) ir_dereference_variable(else_var);
    313 
    314       ir_rvalue *inverse =
    315 	 new(mem_ctx) ir_expression(ir_unop_logic_not,
    316 				    then_cond->clone(mem_ctx, NULL));
    317 
    318       assign = new(mem_ctx) ir_assignment(else_cond, inverse);
    319       ir->insert_before(assign);
    320 
    321       move_block_to_cond_assign(mem_ctx, ir, else_cond,
    322 				&ir->else_instructions,
    323 				this->condition_variables);
    324 
    325       /* Add the new condition variable to the hash table.  This allows us to
    326        * find this variable when lowering other (enclosing) if-statements.
    327        */
    328       _mesa_set_add(this->condition_variables, else_var);
    329    }
    330 
    331    ir->remove();
    332 
    333    this->progress = true;
    334 
    335    return visit_continue;
    336 }
    337