Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Luca Barbieri
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_jumps.cpp
     26  *
     27  * This pass lowers jumps (break, continue, and return) to if/else structures.
     28  *
     29  * It can be asked to:
     30  * 1. Pull jumps out of ifs where possible
     31  * 2. Remove all "continue"s, replacing them with an "execute flag"
     32  * 3. Replace all "break" with a single conditional one at the end of the loop
     33  * 4. Replace all "return"s with a single return at the end of the function,
     34  *    for the main function and/or other functions
     35  *
     36  * Applying this pass gives several benefits:
     37  * 1. All functions can be inlined.
     38  * 2. nv40 and other pre-DX10 chips without "continue" can be supported
     39  * 3. nv30 and other pre-DX10 chips with no control flow at all are better
     40  *    supported
     41  *
     42  * Continues are lowered by adding a per-loop "execute flag", initialized to
     43  * true, that when cleared inhibits all execution until the end of the loop.
     44  *
     45  * Breaks are lowered to continues, plus setting a "break flag" that is checked
     46  * at the end of the loop, and trigger the unique "break".
     47  *
     48  * Returns are lowered to breaks/continues, plus adding a "return flag" that
     49  * causes loops to break again out of their enclosing loops until all the
     50  * loops are exited: then the "execute flag" logic will ignore everything
     51  * until the end of the function.
     52  *
     53  * Note that "continue" and "return" can also be implemented by adding
     54  * a dummy loop and using break.
     55  * However, this is bad for hardware with limited nesting depth, and
     56  * prevents further optimization, and thus is not currently performed.
     57  */
     58 
     59 #include "compiler/glsl_types.h"
     60 #include <string.h>
     61 #include "ir.h"
     62 
     63 /**
     64  * Enum recording the result of analyzing how control flow might exit
     65  * an IR node.
     66  *
     67  * Each possible value of jump_strength indicates a strictly stronger
     68  * guarantee on control flow than the previous value.
     69  *
     70  * The ordering of strengths roughly reflects the way jumps are
     71  * lowered: jumps with higher strength tend to be lowered to jumps of
     72  * lower strength.  Accordingly, strength is used as a heuristic to
     73  * determine which lowering to perform first.
     74  *
     75  * This enum is also used by get_jump_strength() to categorize
     76  * instructions as either break, continue, return, or other.  When
     77  * used in this fashion, strength_always_clears_execute_flag is not
     78  * used.
     79  *
     80  * The control flow analysis made by this optimization pass makes two
     81  * simplifying assumptions:
     82  *
     83  * - It ignores discard instructions, since they are lowered by a
     84  *   separate pass (lower_discard.cpp).
     85  *
     86  * - It assumes it is always possible for control to flow from a loop
     87  *   to the instruction immediately following it.  Technically, this
     88  *   is not true (since all execution paths through the loop might
     89  *   jump back to the top, or return from the function).
     90  *
     91  * Both of these simplifying assumtions are safe, since they can never
     92  * cause reachable code to be incorrectly classified as unreachable;
     93  * they can only do the opposite.
     94  */
     95 enum jump_strength
     96 {
     97    /**
     98     * Analysis has produced no guarantee on how control flow might
     99     * exit this IR node.  It might fall out the bottom (with or
    100     * without clearing the execute flag, if present), or it might
    101     * continue to the top of the innermost enclosing loop, break out
    102     * of it, or return from the function.
    103     */
    104    strength_none,
    105 
    106    /**
    107     * The only way control can fall out the bottom of this node is
    108     * through a code path that clears the execute flag.  It might also
    109     * continue to the top of the innermost enclosing loop, break out
    110     * of it, or return from the function.
    111     */
    112    strength_always_clears_execute_flag,
    113 
    114    /**
    115     * Control cannot fall out the bottom of this node.  It might
    116     * continue to the top of the innermost enclosing loop, break out
    117     * of it, or return from the function.
    118     */
    119    strength_continue,
    120 
    121    /**
    122     * Control cannot fall out the bottom of this node, or continue the
    123     * top of the innermost enclosing loop.  It can only break out of
    124     * it or return from the function.
    125     */
    126    strength_break,
    127 
    128    /**
    129     * Control cannot fall out the bottom of this node, continue to the
    130     * top of the innermost enclosing loop, or break out of it.  It can
    131     * only return from the function.
    132     */
    133    strength_return
    134 };
    135 
    136 namespace {
    137 
    138 struct block_record
    139 {
    140    /* minimum jump strength (of lowered IR, not pre-lowering IR)
    141     *
    142     * If the block ends with a jump, must be the strength of the jump.
    143     * Otherwise, the jump would be dead and have been deleted before)
    144     *
    145     * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump
    146     * (e.g. an if with a return in one branch, and a break in the other, while not lowering them)
    147     * Note that identical jumps are usually unified though.
    148     */
    149    jump_strength min_strength;
    150 
    151    /* can anything clear the execute flag? */
    152    bool may_clear_execute_flag;
    153 
    154    block_record()
    155    {
    156       this->min_strength = strength_none;
    157       this->may_clear_execute_flag = false;
    158    }
    159 };
    160 
    161 struct loop_record
    162 {
    163    ir_function_signature* signature;
    164    ir_loop* loop;
    165 
    166    /* used to avoid lowering the break used to represent lowered breaks */
    167    unsigned nesting_depth;
    168    bool in_if_at_the_end_of_the_loop;
    169 
    170    bool may_set_return_flag;
    171 
    172    ir_variable* break_flag;
    173    ir_variable* execute_flag; /* cleared to emulate continue */
    174 
    175    loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0)
    176    {
    177       this->signature = p_signature;
    178       this->loop = p_loop;
    179       this->nesting_depth = 0;
    180       this->in_if_at_the_end_of_the_loop = false;
    181       this->may_set_return_flag = false;
    182       this->break_flag = 0;
    183       this->execute_flag = 0;
    184    }
    185 
    186    ir_variable* get_execute_flag()
    187    {
    188       /* also supported for the "function loop" */
    189       if(!this->execute_flag) {
    190          exec_list& list = this->loop ? this->loop->body_instructions : signature->body;
    191          this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary);
    192          list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true)));
    193          list.push_head(this->execute_flag);
    194       }
    195       return this->execute_flag;
    196    }
    197 
    198    ir_variable* get_break_flag()
    199    {
    200       assert(this->loop);
    201       if(!this->break_flag) {
    202          this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary);
    203          this->loop->insert_before(this->break_flag);
    204          this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false)));
    205       }
    206       return this->break_flag;
    207    }
    208 };
    209 
    210 struct function_record
    211 {
    212    ir_function_signature* signature;
    213    ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */
    214    ir_variable* return_value;
    215    bool lower_return;
    216    unsigned nesting_depth;
    217 
    218    function_record(ir_function_signature* p_signature = 0,
    219                    bool lower_return = false)
    220    {
    221       this->signature = p_signature;
    222       this->return_flag = 0;
    223       this->return_value = 0;
    224       this->nesting_depth = 0;
    225       this->lower_return = lower_return;
    226    }
    227 
    228    ir_variable* get_return_flag()
    229    {
    230       if(!this->return_flag) {
    231          this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary);
    232          this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false)));
    233          this->signature->body.push_head(this->return_flag);
    234       }
    235       return this->return_flag;
    236    }
    237 
    238    ir_variable* get_return_value()
    239    {
    240       if(!this->return_value) {
    241          assert(!this->signature->return_type->is_void());
    242          return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary);
    243          this->signature->body.push_head(this->return_value);
    244       }
    245       return this->return_value;
    246    }
    247 };
    248 
    249 struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
    250    /* Postconditions: on exit of any visit() function:
    251     *
    252     * ANALYSIS: this->block.min_strength,
    253     * this->block.may_clear_execute_flag, and
    254     * this->loop.may_set_return_flag are updated to reflect the
    255     * characteristics of the visited statement.
    256     *
    257     * DEAD_CODE_ELIMINATION: If this->block.min_strength is not
    258     * strength_none, the visited node is at the end of its exec_list.
    259     * In other words, any unreachable statements that follow the
    260     * visited statement in its exec_list have been removed.
    261     *
    262     * CONTAINED_JUMPS_LOWERED: If the visited statement contains other
    263     * statements, then should_lower_jump() is false for all of the
    264     * return, break, or continue statements it contains.
    265     *
    266     * Note that visiting a jump does not lower it.  That is the
    267     * responsibility of the statement (or function signature) that
    268     * contains the jump.
    269     */
    270 
    271    bool progress;
    272 
    273    struct function_record function;
    274    struct loop_record loop;
    275    struct block_record block;
    276 
    277    bool pull_out_jumps;
    278    bool lower_continue;
    279    bool lower_break;
    280    bool lower_sub_return;
    281    bool lower_main_return;
    282 
    283    ir_lower_jumps_visitor()
    284       : progress(false),
    285         pull_out_jumps(false),
    286         lower_continue(false),
    287         lower_break(false),
    288         lower_sub_return(false),
    289         lower_main_return(false)
    290    {
    291    }
    292 
    293    void truncate_after_instruction(exec_node *ir)
    294    {
    295       if (!ir)
    296          return;
    297 
    298       while (!ir->get_next()->is_tail_sentinel()) {
    299          ((ir_instruction *)ir->get_next())->remove();
    300          this->progress = true;
    301       }
    302    }
    303 
    304    void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block)
    305    {
    306       while (!ir->get_next()->is_tail_sentinel()) {
    307          ir_instruction *move_ir = (ir_instruction *)ir->get_next();
    308 
    309          move_ir->remove();
    310          inner_block->push_tail(move_ir);
    311       }
    312    }
    313 
    314    /**
    315     * Insert the instructions necessary to lower a return statement,
    316     * before the given return instruction.
    317     */
    318    void insert_lowered_return(ir_return *ir)
    319    {
    320       ir_variable* return_flag = this->function.get_return_flag();
    321       if(!this->function.signature->return_type->is_void()) {
    322          ir_variable* return_value = this->function.get_return_value();
    323          ir->insert_before(
    324             new(ir) ir_assignment(
    325                new (ir) ir_dereference_variable(return_value),
    326                ir->value));
    327       }
    328       ir->insert_before(
    329          new(ir) ir_assignment(
    330             new (ir) ir_dereference_variable(return_flag),
    331             new (ir) ir_constant(true)));
    332       this->loop.may_set_return_flag = true;
    333    }
    334 
    335    /**
    336     * If the given instruction is a return, lower it to instructions
    337     * that store the return value (if there is one), set the return
    338     * flag, and then break.
    339     *
    340     * It is safe to pass NULL to this function.
    341     */
    342    void lower_return_unconditionally(ir_instruction *ir)
    343    {
    344       if (get_jump_strength(ir) != strength_return) {
    345          return;
    346       }
    347       insert_lowered_return((ir_return*)ir);
    348       ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    349    }
    350 
    351    /**
    352     * Create the necessary instruction to replace a break instruction.
    353     */
    354    ir_instruction *create_lowered_break()
    355    {
    356       void *ctx = this->function.signature;
    357       return new(ctx) ir_assignment(
    358           new(ctx) ir_dereference_variable(this->loop.get_break_flag()),
    359           new(ctx) ir_constant(true));
    360    }
    361 
    362    /**
    363     * If the given instruction is a break, lower it to an instruction
    364     * that sets the break flag, without consulting
    365     * should_lower_jump().
    366     *
    367     * It is safe to pass NULL to this function.
    368     */
    369    void lower_break_unconditionally(ir_instruction *ir)
    370    {
    371       if (get_jump_strength(ir) != strength_break) {
    372          return;
    373       }
    374       ir->replace_with(create_lowered_break());
    375    }
    376 
    377    /**
    378     * If the block ends in a conditional or unconditional break, lower
    379     * it, even though should_lower_jump() says it needn't be lowered.
    380     */
    381    void lower_final_breaks(exec_list *block)
    382    {
    383       ir_instruction *ir = (ir_instruction *) block->get_tail();
    384       lower_break_unconditionally(ir);
    385       ir_if *ir_if = ir->as_if();
    386       if (ir_if) {
    387           lower_break_unconditionally(
    388               (ir_instruction *) ir_if->then_instructions.get_tail());
    389           lower_break_unconditionally(
    390               (ir_instruction *) ir_if->else_instructions.get_tail());
    391       }
    392    }
    393 
    394    virtual void visit(class ir_loop_jump * ir)
    395    {
    396       /* Eliminate all instructions after each one, since they are
    397        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
    398        * postcondition.
    399        */
    400       truncate_after_instruction(ir);
    401 
    402       /* Set this->block.min_strength based on this instruction.  This
    403        * satisfies the ANALYSIS postcondition.  It is not necessary to
    404        * update this->block.may_clear_execute_flag or
    405        * this->loop.may_set_return_flag, because an unlowered jump
    406        * instruction can't change any flags.
    407        */
    408       this->block.min_strength = ir->is_break() ? strength_break : strength_continue;
    409 
    410       /* The CONTAINED_JUMPS_LOWERED postcondition is already
    411        * satisfied, because jump statements can't contain other
    412        * statements.
    413        */
    414    }
    415 
    416    virtual void visit(class ir_return * ir)
    417    {
    418       /* Eliminate all instructions after each one, since they are
    419        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
    420        * postcondition.
    421        */
    422       truncate_after_instruction(ir);
    423 
    424       /* Set this->block.min_strength based on this instruction.  This
    425        * satisfies the ANALYSIS postcondition.  It is not necessary to
    426        * update this->block.may_clear_execute_flag or
    427        * this->loop.may_set_return_flag, because an unlowered return
    428        * instruction can't change any flags.
    429        */
    430       this->block.min_strength = strength_return;
    431 
    432       /* The CONTAINED_JUMPS_LOWERED postcondition is already
    433        * satisfied, because jump statements can't contain other
    434        * statements.
    435        */
    436    }
    437 
    438    virtual void visit(class ir_discard * ir)
    439    {
    440       /* Nothing needs to be done.  The ANALYSIS and
    441        * DEAD_CODE_ELIMINATION postconditions are already satisfied,
    442        * because discard statements are ignored by this optimization
    443        * pass.  The CONTAINED_JUMPS_LOWERED postcondition is already
    444        * satisfied, because discard statements can't contain other
    445        * statements.
    446        */
    447       (void) ir;
    448    }
    449 
    450    enum jump_strength get_jump_strength(ir_instruction* ir)
    451    {
    452       if(!ir)
    453          return strength_none;
    454       else if(ir->ir_type == ir_type_loop_jump) {
    455          if(((ir_loop_jump*)ir)->is_break())
    456             return strength_break;
    457          else
    458             return strength_continue;
    459       } else if(ir->ir_type == ir_type_return)
    460          return strength_return;
    461       else
    462          return strength_none;
    463    }
    464 
    465    bool should_lower_jump(ir_jump* ir)
    466    {
    467       unsigned strength = get_jump_strength(ir);
    468       bool lower;
    469       switch(strength)
    470       {
    471       case strength_none:
    472          lower = false; /* don't change this, code relies on it */
    473          break;
    474       case strength_continue:
    475          lower = lower_continue;
    476          break;
    477       case strength_break:
    478          assert(this->loop.loop);
    479          /* never lower "canonical break" */
    480          if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0
    481                || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop)))
    482             lower = false;
    483          else
    484             lower = lower_break;
    485          break;
    486       case strength_return:
    487          /* never lower return at the end of a this->function */
    488          if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
    489             lower = false;
    490          else
    491             lower = this->function.lower_return;
    492          break;
    493       }
    494       return lower;
    495    }
    496 
    497    block_record visit_block(exec_list* list)
    498    {
    499       /* Note: since visiting a node may change that node's next
    500        * pointer, we can't use visit_exec_list(), because
    501        * visit_exec_list() caches the node's next pointer before
    502        * visiting it.  So we use foreach_in_list() instead.
    503        *
    504        * foreach_in_list() isn't safe if the node being visited gets
    505        * removed, but fortunately this visitor doesn't do that.
    506        */
    507 
    508       block_record saved_block = this->block;
    509       this->block = block_record();
    510       foreach_in_list(ir_instruction, node, list) {
    511          node->accept(this);
    512       }
    513       block_record ret = this->block;
    514       this->block = saved_block;
    515       return ret;
    516    }
    517 
    518    virtual void visit(ir_if *ir)
    519    {
    520       if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
    521          this->loop.in_if_at_the_end_of_the_loop = true;
    522 
    523       ++this->function.nesting_depth;
    524       ++this->loop.nesting_depth;
    525 
    526       block_record block_records[2];
    527       ir_jump* jumps[2];
    528 
    529       /* Recursively lower nested jumps.  This satisfies the
    530        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
    531        * unconditional jumps at the end of ir->then_instructions and
    532        * ir->else_instructions, which are handled below.
    533        */
    534       block_records[0] = visit_block(&ir->then_instructions);
    535       block_records[1] = visit_block(&ir->else_instructions);
    536 
    537 retry: /* we get here if we put code after the if inside a branch */
    538 
    539       /* Determine which of ir->then_instructions and
    540        * ir->else_instructions end with an unconditional jump.
    541        */
    542       for(unsigned i = 0; i < 2; ++i) {
    543          exec_list& list = i ? ir->else_instructions : ir->then_instructions;
    544          jumps[i] = 0;
    545          if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail()))
    546             jumps[i] = (ir_jump*)list.get_tail();
    547       }
    548 
    549       /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED
    550        * postcondition by lowering jumps in both then_instructions and
    551        * else_instructions.
    552        */
    553       for(;;) {
    554          /* Determine the types of the jumps that terminate
    555           * ir->then_instructions and ir->else_instructions.
    556           */
    557          jump_strength jump_strengths[2];
    558 
    559          for(unsigned i = 0; i < 2; ++i) {
    560             if(jumps[i]) {
    561                jump_strengths[i] = block_records[i].min_strength;
    562                assert(jump_strengths[i] == get_jump_strength(jumps[i]));
    563             } else
    564                jump_strengths[i] = strength_none;
    565          }
    566 
    567          /* If both code paths end in a jump, and the jumps are the
    568           * same, and we are pulling out jumps, replace them with a
    569           * single jump that comes after the if instruction.  The new
    570           * jump will be visited next, and it will be lowered if
    571           * necessary by the loop or conditional that encloses it.
    572           */
    573          if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) {
    574             bool unify = true;
    575             if(jump_strengths[0] == strength_continue)
    576                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue));
    577             else if(jump_strengths[0] == strength_break)
    578                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    579             /* FINISHME: unify returns with identical expressions */
    580             else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void())
    581                ir->insert_after(new(ir) ir_return(NULL));
    582 	    else
    583 	       unify = false;
    584 
    585             if(unify) {
    586                jumps[0]->remove();
    587                jumps[1]->remove();
    588                this->progress = true;
    589 
    590                /* Update jumps[] to reflect the fact that the jumps
    591                 * are gone, and update block_records[] to reflect the
    592                 * fact that control can now flow to the next
    593                 * instruction.
    594                 */
    595                jumps[0] = 0;
    596                jumps[1] = 0;
    597                block_records[0].min_strength = strength_none;
    598                block_records[1].min_strength = strength_none;
    599 
    600                /* The CONTAINED_JUMPS_LOWERED postcondition is now
    601                 * satisfied, so we can break out of the loop.
    602                 */
    603                break;
    604             }
    605          }
    606 
    607          /* lower a jump: if both need to lowered, start with the strongest one, so that
    608           * we might later unify the lowered version with the other one
    609           */
    610          bool should_lower[2];
    611          for(unsigned i = 0; i < 2; ++i)
    612             should_lower[i] = should_lower_jump(jumps[i]);
    613 
    614          int lower;
    615          if(should_lower[1] && should_lower[0])
    616             lower = jump_strengths[1] > jump_strengths[0];
    617          else if(should_lower[0])
    618             lower = 0;
    619          else if(should_lower[1])
    620             lower = 1;
    621          else
    622             /* Neither code path ends in a jump that needs to be
    623              * lowered, so the CONTAINED_JUMPS_LOWERED postcondition
    624              * is satisfied and we can break out of the loop.
    625              */
    626             break;
    627 
    628          if(jump_strengths[lower] == strength_return) {
    629             /* To lower a return, we create a return flag (if the
    630              * function doesn't have one already) and add instructions
    631              * that: 1. store the return value (if this function has a
    632              * non-void return) and 2. set the return flag
    633              */
    634             insert_lowered_return((ir_return*)jumps[lower]);
    635             if(this->loop.loop) {
    636                /* If we are in a loop, replace the return instruction
    637                 * with a break instruction, and then loop so that the
    638                 * break instruction can be lowered if necessary.
    639                 */
    640                ir_loop_jump* lowered = 0;
    641                lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
    642                /* Note: we must update block_records and jumps to
    643                 * reflect the fact that the control path has been
    644                 * altered from a return to a break.
    645                 */
    646                block_records[lower].min_strength = strength_break;
    647                jumps[lower]->replace_with(lowered);
    648                jumps[lower] = lowered;
    649             } else {
    650                /* If we are not in a loop, we then proceed as we would
    651                 * for a continue statement (set the execute flag to
    652                 * false to prevent the rest of the function from
    653                 * executing).
    654                 */
    655                goto lower_continue;
    656             }
    657             this->progress = true;
    658          } else if(jump_strengths[lower] == strength_break) {
    659             /* To lower a break, we create a break flag (if the loop
    660              * doesn't have one already) and add an instruction that
    661              * sets it.
    662              *
    663              * Then we proceed as we would for a continue statement
    664              * (set the execute flag to false to prevent the rest of
    665              * the loop body from executing).
    666              *
    667              * The visit() function for the loop will ensure that the
    668              * break flag is checked after executing the loop body.
    669              */
    670             jumps[lower]->insert_before(create_lowered_break());
    671             goto lower_continue;
    672          } else if(jump_strengths[lower] == strength_continue) {
    673 lower_continue:
    674             /* To lower a continue, we create an execute flag (if the
    675              * loop doesn't have one already) and replace the continue
    676              * with an instruction that clears it.
    677              *
    678              * Note that this code path gets exercised when lowering
    679              * return statements that are not inside a loop, so
    680              * this->loop must be initialized even outside of loops.
    681              */
    682             ir_variable* execute_flag = this->loop.get_execute_flag();
    683             jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false)));
    684             /* Note: we must update block_records and jumps to reflect
    685              * the fact that the control path has been altered to an
    686              * instruction that clears the execute flag.
    687              */
    688             jumps[lower] = 0;
    689             block_records[lower].min_strength = strength_always_clears_execute_flag;
    690             block_records[lower].may_clear_execute_flag = true;
    691             this->progress = true;
    692 
    693             /* Let the loop run again, in case the other branch of the
    694              * if needs to be lowered too.
    695              */
    696          }
    697       }
    698 
    699       /* move out a jump out if possible */
    700       if(pull_out_jumps) {
    701          /* If one of the branches ends in a jump, and control cannot
    702           * fall out the bottom of the other branch, then we can move
    703           * the jump after the if.
    704           *
    705           * Set move_out to the branch we are moving a jump out of.
    706           */
    707          int move_out = -1;
    708          if(jumps[0] && block_records[1].min_strength >= strength_continue)
    709             move_out = 0;
    710          else if(jumps[1] && block_records[0].min_strength >= strength_continue)
    711             move_out = 1;
    712 
    713          if(move_out >= 0)
    714          {
    715             jumps[move_out]->remove();
    716             ir->insert_after(jumps[move_out]);
    717             /* Note: we must update block_records and jumps to reflect
    718              * the fact that the jump has been moved out of the if.
    719              */
    720             jumps[move_out] = 0;
    721             block_records[move_out].min_strength = strength_none;
    722             this->progress = true;
    723          }
    724       }
    725 
    726       /* Now satisfy the ANALYSIS postcondition by setting
    727        * this->block.min_strength and
    728        * this->block.may_clear_execute_flag based on the
    729        * characteristics of the two branches.
    730        */
    731       if(block_records[0].min_strength < block_records[1].min_strength)
    732          this->block.min_strength = block_records[0].min_strength;
    733       else
    734          this->block.min_strength = block_records[1].min_strength;
    735       this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag;
    736 
    737       /* Now we need to clean up the instructions that follow the
    738        * if.
    739        *
    740        * If those instructions are unreachable, then satisfy the
    741        * DEAD_CODE_ELIMINATION postcondition by eliminating them.
    742        * Otherwise that postcondition is already satisfied.
    743        */
    744       if(this->block.min_strength)
    745          truncate_after_instruction(ir);
    746       else if(this->block.may_clear_execute_flag)
    747       {
    748          /* If the "if" instruction might clear the execute flag, then
    749           * we need to guard any instructions that follow so that they
    750           * are only executed if the execute flag is set.
    751           *
    752           * If one of the branches of the "if" always clears the
    753           * execute flag, and the other branch never clears it, then
    754           * this is easy: just move all the instructions following the
    755           * "if" into the branch that never clears it.
    756           */
    757          int move_into = -1;
    758          if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag)
    759             move_into = 1;
    760          else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag)
    761             move_into = 0;
    762 
    763          if(move_into >= 0) {
    764             assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */
    765 
    766             exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions;
    767             exec_node* next = ir->get_next();
    768             if(!next->is_tail_sentinel()) {
    769                move_outer_block_inside(ir, list);
    770 
    771                /* If any instructions moved, then we need to visit
    772                 * them (since they are now inside the "if").  Since
    773                 * block_records[move_into] is in its default state
    774                 * (see assertion above), we can safely replace
    775                 * block_records[move_into] with the result of this
    776                 * analysis.
    777                 */
    778                exec_list list;
    779                list.head_sentinel.next = next;
    780                block_records[move_into] = visit_block(&list);
    781 
    782                /*
    783                 * Then we need to re-start our jump lowering, since one
    784                 * of the instructions we moved might be a jump that
    785                 * needs to be lowered.
    786                 */
    787                this->progress = true;
    788                goto retry;
    789             }
    790          } else {
    791             /* If we get here, then the simple case didn't apply; we
    792              * need to actually guard the instructions that follow.
    793              *
    794              * To avoid creating unnecessarily-deep nesting, first
    795              * look through the instructions that follow and unwrap
    796              * any instructions that that are already wrapped in the
    797              * appropriate guard.
    798              */
    799             ir_instruction* ir_after;
    800             for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();)
    801             {
    802                ir_if* ir_if = ir_after->as_if();
    803                if(ir_if && ir_if->else_instructions.is_empty()) {
    804                   ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable();
    805                   if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) {
    806                      ir_instruction* ir_next = (ir_instruction*)ir_after->get_next();
    807                      ir_after->insert_before(&ir_if->then_instructions);
    808                      ir_after->remove();
    809                      ir_after = ir_next;
    810                      continue;
    811                   }
    812                }
    813                ir_after = (ir_instruction*)ir_after->get_next();
    814 
    815                /* only set this if we find any unprotected instruction */
    816                this->progress = true;
    817             }
    818 
    819             /* Then, wrap all the instructions that follow in a single
    820              * guard.
    821              */
    822             if(!ir->get_next()->is_tail_sentinel()) {
    823                assert(this->loop.execute_flag);
    824                ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag));
    825                move_outer_block_inside(ir, &if_execute->then_instructions);
    826                ir->insert_after(if_execute);
    827             }
    828          }
    829       }
    830       --this->loop.nesting_depth;
    831       --this->function.nesting_depth;
    832    }
    833 
    834    virtual void visit(ir_loop *ir)
    835    {
    836       /* Visit the body of the loop, with a fresh data structure in
    837        * this->loop so that the analysis we do here won't bleed into
    838        * enclosing loops.
    839        *
    840        * We assume that all code after a loop is reachable from the
    841        * loop (see comments on enum jump_strength), so the
    842        * DEAD_CODE_ELIMINATION postcondition is automatically
    843        * satisfied, as is the block.min_strength portion of the
    844        * ANALYSIS postcondition.
    845        *
    846        * The block.may_clear_execute_flag portion of the ANALYSIS
    847        * postcondition is automatically satisfied because execute
    848        * flags do not propagate outside of loops.
    849        *
    850        * The loop.may_set_return_flag portion of the ANALYSIS
    851        * postcondition is handled below.
    852        */
    853       ++this->function.nesting_depth;
    854       loop_record saved_loop = this->loop;
    855       this->loop = loop_record(this->function.signature, ir);
    856 
    857       /* Recursively lower nested jumps.  This satisfies the
    858        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
    859        * an unconditional continue or return at the bottom of the
    860        * loop, which are handled below.
    861        */
    862       block_record body = visit_block(&ir->body_instructions);
    863 
    864       /* If the loop ends in an unconditional continue, eliminate it
    865        * because it is redundant.
    866        */
    867       ir_instruction *ir_last
    868          = (ir_instruction *) ir->body_instructions.get_tail();
    869       if (get_jump_strength(ir_last) == strength_continue) {
    870          ir_last->remove();
    871       }
    872 
    873       /* If the loop ends in an unconditional return, and we are
    874        * lowering returns, lower it.
    875        */
    876       if (this->function.lower_return)
    877          lower_return_unconditionally(ir_last);
    878 
    879       if(body.min_strength >= strength_break) {
    880          /* FINISHME: If the min_strength of the loop body is
    881           * strength_break or strength_return, that means that it
    882           * isn't a loop at all, since control flow always leaves the
    883           * body of the loop via break or return.  In principle the
    884           * loop could be eliminated in this case.  This optimization
    885           * is not implemented yet.
    886           */
    887       }
    888 
    889       if(this->loop.break_flag) {
    890          /* We only get here if we are lowering breaks */
    891          assert (lower_break);
    892 
    893          /* If a break flag was generated while visiting the body of
    894           * the loop, then at least one break was lowered, so we need
    895           * to generate an if statement at the end of the loop that
    896           * does a "break" if the break flag is set.  The break we
    897           * generate won't violate the CONTAINED_JUMPS_LOWERED
    898           * postcondition, because should_lower_jump() always returns
    899           * false for a break that happens at the end of a loop.
    900           *
    901           * However, if the loop already ends in a conditional or
    902           * unconditional break, then we need to lower that break,
    903           * because it won't be at the end of the loop anymore.
    904           */
    905          lower_final_breaks(&ir->body_instructions);
    906 
    907          ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag));
    908          break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    909          ir->body_instructions.push_tail(break_if);
    910       }
    911 
    912       /* If the body of the loop may set the return flag, then at
    913        * least one return was lowered to a break, so we need to ensure
    914        * that the return flag is checked after the body of the loop is
    915        * executed.
    916        */
    917       if(this->loop.may_set_return_flag) {
    918          assert(this->function.return_flag);
    919          /* Generate the if statement to check the return flag */
    920          ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag));
    921          /* Note: we also need to propagate the knowledge that the
    922           * return flag may get set to the outer context.  This
    923           * satisfies the loop.may_set_return_flag part of the
    924           * ANALYSIS postcondition.
    925           */
    926          saved_loop.may_set_return_flag = true;
    927          if(saved_loop.loop)
    928             /* If this loop is nested inside another one, then the if
    929              * statement that we generated should break out of that
    930              * loop if the return flag is set.  Caller will lower that
    931              * break statement if necessary.
    932              */
    933             return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    934          else {
    935             /* Otherwise, ensure that the instructions that follow are only
    936              * executed if the return flag is clear.  We can do that by moving
    937              * those instructions into the else clause of the generated if
    938              * statement.
    939              */
    940             move_outer_block_inside(ir, &return_if->else_instructions);
    941 
    942             /* In case the loop is embedded inside an if add a new return to
    943              * the return flag then branch and let a future pass tidy it up.
    944              */
    945             if (this->function.signature->return_type->is_void())
    946                return_if->then_instructions.push_tail(new(ir) ir_return(NULL));
    947             else {
    948                assert(this->function.return_value);
    949                ir_variable* return_value = this->function.return_value;
    950                return_if->then_instructions.push_tail(
    951                   new(ir) ir_return(new(ir) ir_dereference_variable(return_value)));
    952             }
    953          }
    954 
    955          ir->insert_after(return_if);
    956       }
    957 
    958       this->loop = saved_loop;
    959       --this->function.nesting_depth;
    960    }
    961 
    962    virtual void visit(ir_function_signature *ir)
    963    {
    964       /* these are not strictly necessary */
    965       assert(!this->function.signature);
    966       assert(!this->loop.loop);
    967 
    968       bool lower_return;
    969       if (strcmp(ir->function_name(), "main") == 0)
    970          lower_return = lower_main_return;
    971       else
    972          lower_return = lower_sub_return;
    973 
    974       function_record saved_function = this->function;
    975       loop_record saved_loop = this->loop;
    976       this->function = function_record(ir, lower_return);
    977       this->loop = loop_record(ir);
    978 
    979       assert(!this->loop.loop);
    980 
    981       /* Visit the body of the function to lower any jumps that occur
    982        * in it, except possibly an unconditional return statement at
    983        * the end of it.
    984        */
    985       visit_block(&ir->body);
    986 
    987       /* If the body ended in an unconditional return of non-void,
    988        * then we don't need to lower it because it's the one canonical
    989        * return.
    990        *
    991        * If the body ended in a return of void, eliminate it because
    992        * it is redundant.
    993        */
    994       if (ir->return_type->is_void() &&
    995           get_jump_strength((ir_instruction *) ir->body.get_tail())) {
    996          ir_jump *jump = (ir_jump *) ir->body.get_tail();
    997          assert (jump->ir_type == ir_type_return);
    998          jump->remove();
    999       }
   1000 
   1001       if(this->function.return_value)
   1002          ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value)));
   1003 
   1004       this->loop = saved_loop;
   1005       this->function = saved_function;
   1006    }
   1007 
   1008    virtual void visit(class ir_function * ir)
   1009    {
   1010       visit_block(&ir->signatures);
   1011    }
   1012 };
   1013 
   1014 } /* anonymous namespace */
   1015 
   1016 bool
   1017 do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break)
   1018 {
   1019    ir_lower_jumps_visitor v;
   1020    v.pull_out_jumps = pull_out_jumps;
   1021    v.lower_continue = lower_continue;
   1022    v.lower_break = lower_break;
   1023    v.lower_sub_return = lower_sub_return;
   1024    v.lower_main_return = lower_main_return;
   1025 
   1026    bool progress_ever = false;
   1027    do {
   1028       v.progress = false;
   1029       visit_exec_list(instructions, &v);
   1030       progress_ever = v.progress || progress_ever;
   1031    } while (v.progress);
   1032 
   1033    return progress_ever;
   1034 }
   1035