Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 #include "compiler/glsl_types.h"
     25 #include "loop_analysis.h"
     26 #include "ir_hierarchical_visitor.h"
     27 
     28 #include "main/mtypes.h"
     29 
     30 namespace {
     31 
     32 class loop_unroll_visitor : public ir_hierarchical_visitor {
     33 public:
     34    loop_unroll_visitor(loop_state *state,
     35                        const struct gl_shader_compiler_options *options)
     36    {
     37       this->state = state;
     38       this->progress = false;
     39       this->options = options;
     40    }
     41 
     42    virtual ir_visitor_status visit_leave(ir_loop *ir);
     43    void simple_unroll(ir_loop *ir, int iterations);
     44    void complex_unroll(ir_loop *ir, int iterations,
     45                        bool continue_from_then_branch);
     46    void splice_post_if_instructions(ir_if *ir_if, exec_list *splice_dest);
     47 
     48    loop_state *state;
     49 
     50    bool progress;
     51    const struct gl_shader_compiler_options *options;
     52 };
     53 
     54 } /* anonymous namespace */
     55 
     56 static bool
     57 is_break(ir_instruction *ir)
     58 {
     59    return ir != NULL && ir->ir_type == ir_type_loop_jump
     60 		     && ((ir_loop_jump *) ir)->is_break();
     61 }
     62 
     63 class loop_unroll_count : public ir_hierarchical_visitor {
     64 public:
     65    int nodes;
     66    bool unsupported_variable_indexing;
     67    bool array_indexed_by_induction_var_with_exact_iterations;
     68    /* If there are nested loops, the node count will be inaccurate. */
     69    bool nested_loop;
     70 
     71    loop_unroll_count(exec_list *list, loop_variable_state *ls,
     72                      const struct gl_shader_compiler_options *options)
     73       : ls(ls), options(options)
     74    {
     75       nodes = 0;
     76       nested_loop = false;
     77       unsupported_variable_indexing = false;
     78       array_indexed_by_induction_var_with_exact_iterations = false;
     79 
     80       run(list);
     81    }
     82 
     83    virtual ir_visitor_status visit_enter(ir_assignment *)
     84    {
     85       nodes++;
     86       return visit_continue;
     87    }
     88 
     89    virtual ir_visitor_status visit_enter(ir_expression *)
     90    {
     91       nodes++;
     92       return visit_continue;
     93    }
     94 
     95    virtual ir_visitor_status visit_enter(ir_loop *)
     96    {
     97       nested_loop = true;
     98       return visit_continue;
     99    }
    100 
    101    virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
    102    {
    103       /* Force unroll in case of dynamic indexing with sampler arrays
    104        * when EmitNoIndirectSampler is set.
    105        */
    106       if (options->EmitNoIndirectSampler) {
    107          if ((ir->array->type->is_array() &&
    108               ir->array->type->contains_sampler()) &&
    109              !ir->array_index->constant_expression_value()) {
    110             unsupported_variable_indexing = true;
    111             return visit_continue;
    112          }
    113       }
    114 
    115       /* Check for arrays variably-indexed by a loop induction variable.
    116        * Unrolling the loop may convert that access into constant-indexing.
    117        *
    118        * Many drivers don't support particular kinds of variable indexing,
    119        * and have to resort to using lower_variable_index_to_cond_assign to
    120        * handle it.  This results in huge amounts of horrible code, so we'd
    121        * like to avoid that if possible.  Here, we just note that it will
    122        * happen.
    123        */
    124       if ((ir->array->type->is_array() || ir->array->type->is_matrix()) &&
    125           !ir->array_index->as_constant()) {
    126          ir_variable *array = ir->array->variable_referenced();
    127          loop_variable *lv = ls->get(ir->array_index->variable_referenced());
    128          if (array && lv && lv->is_induction_var()) {
    129             /* If an array is indexed by a loop induction variable, and the
    130              * array size is exactly the number of loop iterations, this is
    131              * probably a simple for-loop trying to access each element in
    132              * turn; the application may expect it to be unrolled.
    133              */
    134             if (int(array->type->length) == ls->limiting_terminator->iterations)
    135                array_indexed_by_induction_var_with_exact_iterations = true;
    136 
    137             switch (array->data.mode) {
    138             case ir_var_auto:
    139             case ir_var_temporary:
    140             case ir_var_const_in:
    141             case ir_var_function_in:
    142             case ir_var_function_out:
    143             case ir_var_function_inout:
    144                if (options->EmitNoIndirectTemp)
    145                   unsupported_variable_indexing = true;
    146                break;
    147             case ir_var_uniform:
    148             case ir_var_shader_storage:
    149                if (options->EmitNoIndirectUniform)
    150                   unsupported_variable_indexing = true;
    151                break;
    152             case ir_var_shader_in:
    153                if (options->EmitNoIndirectInput)
    154                   unsupported_variable_indexing = true;
    155                break;
    156             case ir_var_shader_out:
    157                if (options->EmitNoIndirectOutput)
    158                   unsupported_variable_indexing = true;
    159                break;
    160             }
    161          }
    162       }
    163       return visit_continue;
    164    }
    165 
    166 private:
    167    loop_variable_state *ls;
    168    const struct gl_shader_compiler_options *options;
    169 };
    170 
    171 
    172 /**
    173  * Unroll a loop which does not contain any jumps.  For example, if the input
    174  * is:
    175  *
    176  *     (loop (...) ...instrs...)
    177  *
    178  * And the iteration count is 3, the output will be:
    179  *
    180  *     ...instrs... ...instrs... ...instrs...
    181  */
    182 void
    183 loop_unroll_visitor::simple_unroll(ir_loop *ir, int iterations)
    184 {
    185    void *const mem_ctx = ralloc_parent(ir);
    186 
    187    for (int i = 0; i < iterations; i++) {
    188       exec_list copy_list;
    189 
    190       copy_list.make_empty();
    191       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
    192 
    193       ir->insert_before(&copy_list);
    194    }
    195 
    196    /* The loop has been replaced by the unrolled copies.  Remove the original
    197     * loop from the IR sequence.
    198     */
    199    ir->remove();
    200 
    201    this->progress = true;
    202 }
    203 
    204 
    205 /**
    206  * Unroll a loop whose last statement is an ir_if.  If \c
    207  * continue_from_then_branch is true, the loop is repeated only when the
    208  * "then" branch of the if is taken; otherwise it is repeated only when the
    209  * "else" branch of the if is taken.
    210  *
    211  * For example, if the input is:
    212  *
    213  *     (loop (...)
    214  *      ...body...
    215  *      (if (cond)
    216  *          (...then_instrs...)
    217  *        (...else_instrs...)))
    218  *
    219  * And the iteration count is 3, and \c continue_from_then_branch is true,
    220  * then the output will be:
    221  *
    222  *     ...body...
    223  *     (if (cond)
    224  *         (...then_instrs...
    225  *          ...body...
    226  *          (if (cond)
    227  *              (...then_instrs...
    228  *               ...body...
    229  *               (if (cond)
    230  *                   (...then_instrs...)
    231  *                 (...else_instrs...)))
    232  *            (...else_instrs...)))
    233  *       (...else_instrs))
    234  */
    235 void
    236 loop_unroll_visitor::complex_unroll(ir_loop *ir, int iterations,
    237                                     bool continue_from_then_branch)
    238 {
    239    void *const mem_ctx = ralloc_parent(ir);
    240    ir_instruction *ir_to_replace = ir;
    241 
    242    for (int i = 0; i < iterations; i++) {
    243       exec_list copy_list;
    244 
    245       copy_list.make_empty();
    246       clone_ir_list(mem_ctx, &copy_list, &ir->body_instructions);
    247 
    248       ir_if *ir_if = ((ir_instruction *) copy_list.get_tail())->as_if();
    249       assert(ir_if != NULL);
    250 
    251       ir_to_replace->insert_before(&copy_list);
    252       ir_to_replace->remove();
    253 
    254       /* placeholder that will be removed in the next iteration */
    255       ir_to_replace =
    256          new(mem_ctx) ir_loop_jump(ir_loop_jump::jump_continue);
    257 
    258       exec_list *const list = (continue_from_then_branch)
    259          ? &ir_if->then_instructions : &ir_if->else_instructions;
    260 
    261       list->push_tail(ir_to_replace);
    262    }
    263 
    264    ir_to_replace->remove();
    265 
    266    this->progress = true;
    267 }
    268 
    269 
    270 /**
    271  * Move all of the instructions which follow \c ir_if to the end of
    272  * \c splice_dest.
    273  *
    274  * For example, in the code snippet:
    275  *
    276  *     (if (cond)
    277  *         (...then_instructions...
    278  *          break)
    279  *       (...else_instructions...))
    280  *     ...post_if_instructions...
    281  *
    282  * If \c ir_if points to the "if" instruction, and \c splice_dest points to
    283  * (...else_instructions...), the code snippet is transformed into:
    284  *
    285  *     (if (cond)
    286  *         (...then_instructions...
    287  *          break)
    288  *       (...else_instructions...
    289  *        ...post_if_instructions...))
    290  */
    291 void
    292 loop_unroll_visitor::splice_post_if_instructions(ir_if *ir_if,
    293                                                  exec_list *splice_dest)
    294 {
    295    while (!ir_if->get_next()->is_tail_sentinel()) {
    296       ir_instruction *move_ir = (ir_instruction *) ir_if->get_next();
    297 
    298       move_ir->remove();
    299       splice_dest->push_tail(move_ir);
    300    }
    301 }
    302 
    303 
    304 ir_visitor_status
    305 loop_unroll_visitor::visit_leave(ir_loop *ir)
    306 {
    307    loop_variable_state *const ls = this->state->get(ir);
    308    int iterations;
    309 
    310    /* If we've entered a loop that hasn't been analyzed, something really,
    311     * really bad has happened.
    312     */
    313    if (ls == NULL) {
    314       assert(ls != NULL);
    315       return visit_continue;
    316    }
    317 
    318    if (ls->limiting_terminator == NULL) {
    319       ir_instruction *last_ir =
    320          (ir_instruction *) ir->body_instructions.get_tail();
    321 
    322       /* If a loop has no induction variable and the last instruction is
    323        * a break, unroll the loop with a count of 1.  This is the classic
    324        *
    325        *    do {
    326        *        // ...
    327        *    } while (false)
    328        *
    329        * that is used to wrap multi-line macros.
    330        *
    331        * If num_loop_jumps is not zero, last_ir cannot be NULL... there has to
    332        * be at least num_loop_jumps instructions in the loop.
    333        */
    334       if (ls->num_loop_jumps == 1 && is_break(last_ir)) {
    335          last_ir->remove();
    336 
    337          simple_unroll(ir, 1);
    338       }
    339 
    340       /* Don't try to unroll loops where the number of iterations is not known
    341        * at compile-time.
    342        */
    343       return visit_continue;
    344    }
    345 
    346    iterations = ls->limiting_terminator->iterations;
    347 
    348    const int max_iterations = options->MaxUnrollIterations;
    349 
    350    /* Don't try to unroll loops that have zillions of iterations either.
    351     */
    352    if (iterations > max_iterations)
    353       return visit_continue;
    354 
    355    /* Don't try to unroll nested loops and loops with a huge body.
    356     */
    357    loop_unroll_count count(&ir->body_instructions, ls, options);
    358 
    359    bool loop_too_large =
    360       count.nested_loop || count.nodes * iterations > max_iterations * 5;
    361 
    362    if (loop_too_large && !count.unsupported_variable_indexing &&
    363        !count.array_indexed_by_induction_var_with_exact_iterations)
    364       return visit_continue;
    365 
    366    /* Note: the limiting terminator contributes 1 to ls->num_loop_jumps.
    367     * We'll be removing the limiting terminator before we unroll.
    368     */
    369    assert(ls->num_loop_jumps > 0);
    370    unsigned predicted_num_loop_jumps = ls->num_loop_jumps - 1;
    371 
    372    if (predicted_num_loop_jumps > 1)
    373       return visit_continue;
    374 
    375    if (predicted_num_loop_jumps == 0) {
    376       ls->limiting_terminator->ir->remove();
    377       simple_unroll(ir, iterations);
    378       return visit_continue;
    379    }
    380 
    381    ir_instruction *last_ir = (ir_instruction *) ir->body_instructions.get_tail();
    382    assert(last_ir != NULL);
    383 
    384    if (is_break(last_ir)) {
    385       /* If the only loop-jump is a break at the end of the loop, the loop
    386        * will execute exactly once.  Remove the break and use the simple
    387        * unroller with an iteration count of 1.
    388        */
    389       last_ir->remove();
    390 
    391       ls->limiting_terminator->ir->remove();
    392       simple_unroll(ir, 1);
    393       return visit_continue;
    394    }
    395 
    396    /* recognize loops in the form produced by ir_lower_jumps */
    397    foreach_in_list(ir_instruction, cur_ir, &ir->body_instructions) {
    398       /* Skip the limiting terminator, since it will go away when we
    399        * unroll.
    400        */
    401       if (cur_ir == ls->limiting_terminator->ir)
    402          continue;
    403 
    404       ir_if *ir_if = cur_ir->as_if();
    405       if (ir_if != NULL) {
    406          /* Determine which if-statement branch, if any, ends with a
    407           * break.  The branch that did *not* have the break will get a
    408           * temporary continue inserted in each iteration of the loop
    409           * unroll.
    410           *
    411           * Note that since ls->num_loop_jumps is <= 1, it is impossible
    412           * for both branches to end with a break.
    413           */
    414          ir_instruction *ir_if_last =
    415             (ir_instruction *) ir_if->then_instructions.get_tail();
    416 
    417          if (is_break(ir_if_last)) {
    418             ls->limiting_terminator->ir->remove();
    419             splice_post_if_instructions(ir_if, &ir_if->else_instructions);
    420             ir_if_last->remove();
    421             complex_unroll(ir, iterations, false);
    422             return visit_continue;
    423          } else {
    424             ir_if_last =
    425                (ir_instruction *) ir_if->else_instructions.get_tail();
    426 
    427             if (is_break(ir_if_last)) {
    428                ls->limiting_terminator->ir->remove();
    429                splice_post_if_instructions(ir_if, &ir_if->then_instructions);
    430                ir_if_last->remove();
    431                complex_unroll(ir, iterations, true);
    432                return visit_continue;
    433             }
    434          }
    435       }
    436    }
    437 
    438    /* Did not find the break statement.  It must be in a complex if-nesting,
    439     * so don't try to unroll.
    440     */
    441    return visit_continue;
    442 }
    443 
    444 
    445 bool
    446 unroll_loops(exec_list *instructions, loop_state *ls,
    447              const struct gl_shader_compiler_options *options)
    448 {
    449    loop_unroll_visitor v(ls, options);
    450 
    451    v.run(instructions);
    452 
    453    return v.progress;
    454 }
    455