Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Luca Barbieri
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_jumps.cpp
     26  *
     27  * This pass lowers jumps (break, continue, and return) to if/else structures.
     28  *
     29  * It can be asked to:
     30  * 1. Pull jumps out of ifs where possible
     31  * 2. Remove all "continue"s, replacing them with an "execute flag"
     32  * 3. Replace all "break" with a single conditional one at the end of the loop
     33  * 4. Replace all "return"s with a single return at the end of the function,
     34  *    for the main function and/or other functions
     35  *
     36  * Applying this pass gives several benefits:
     37  * 1. All functions can be inlined.
     38  * 2. nv40 and other pre-DX10 chips without "continue" can be supported
     39  * 3. nv30 and other pre-DX10 chips with no control flow at all are better
     40  *    supported
     41  *
     42  * Continues are lowered by adding a per-loop "execute flag", initialized to
     43  * true, that when cleared inhibits all execution until the end of the loop.
     44  *
     45  * Breaks are lowered to continues, plus setting a "break flag" that is checked
     46  * at the end of the loop, and trigger the unique "break".
     47  *
     48  * Returns are lowered to breaks/continues, plus adding a "return flag" that
     49  * causes loops to break again out of their enclosing loops until all the
     50  * loops are exited: then the "execute flag" logic will ignore everything
     51  * until the end of the function.
     52  *
     53  * Note that "continue" and "return" can also be implemented by adding
     54  * a dummy loop and using break.
     55  * However, this is bad for hardware with limited nesting depth, and
     56  * prevents further optimization, and thus is not currently performed.
     57  */
     58 
     59 #include "glsl_types.h"
     60 #include <string.h>
     61 #include "ir.h"
     62 
     63 /**
     64  * Enum recording the result of analyzing how control flow might exit
     65  * an IR node.
     66  *
     67  * Each possible value of jump_strength indicates a strictly stronger
     68  * guarantee on control flow than the previous value.
     69  *
     70  * The ordering of strengths roughly reflects the way jumps are
     71  * lowered: jumps with higher strength tend to be lowered to jumps of
     72  * lower strength.  Accordingly, strength is used as a heuristic to
     73  * determine which lowering to perform first.
     74  *
     75  * This enum is also used by get_jump_strength() to categorize
     76  * instructions as either break, continue, return, or other.  When
     77  * used in this fashion, strength_always_clears_execute_flag is not
     78  * used.
     79  *
     80  * The control flow analysis made by this optimization pass makes two
     81  * simplifying assumptions:
     82  *
     83  * - It ignores discard instructions, since they are lowered by a
     84  *   separate pass (lower_discard.cpp).
     85  *
     86  * - It assumes it is always possible for control to flow from a loop
     87  *   to the instruction immediately following it.  Technically, this
     88  *   is not true (since all execution paths through the loop might
     89  *   jump back to the top, or return from the function).
     90  *
     91  * Both of these simplifying assumtions are safe, since they can never
     92  * cause reachable code to be incorrectly classified as unreachable;
     93  * they can only do the opposite.
     94  */
     95 enum jump_strength
     96 {
     97    /**
     98     * Analysis has produced no guarantee on how control flow might
     99     * exit this IR node.  It might fall out the bottom (with or
    100     * without clearing the execute flag, if present), or it might
    101     * continue to the top of the innermost enclosing loop, break out
    102     * of it, or return from the function.
    103     */
    104    strength_none,
    105 
    106    /**
    107     * The only way control can fall out the bottom of this node is
    108     * through a code path that clears the execute flag.  It might also
    109     * continue to the top of the innermost enclosing loop, break out
    110     * of it, or return from the function.
    111     */
    112    strength_always_clears_execute_flag,
    113 
    114    /**
    115     * Control cannot fall out the bottom of this node.  It might
    116     * continue to the top of the innermost enclosing loop, break out
    117     * of it, or return from the function.
    118     */
    119    strength_continue,
    120 
    121    /**
    122     * Control cannot fall out the bottom of this node, or continue the
    123     * top of the innermost enclosing loop.  It can only break out of
    124     * it or return from the function.
    125     */
    126    strength_break,
    127 
    128    /**
    129     * Control cannot fall out the bottom of this node, continue to the
    130     * top of the innermost enclosing loop, or break out of it.  It can
    131     * only return from the function.
    132     */
    133    strength_return
    134 };
    135 
    136 struct block_record
    137 {
    138    /* minimum jump strength (of lowered IR, not pre-lowering IR)
    139     *
    140     * If the block ends with a jump, must be the strength of the jump.
    141     * Otherwise, the jump would be dead and have been deleted before)
    142     *
    143     * If the block doesn't end with a jump, it can be different than strength_none if all paths before it lead to some jump
    144     * (e.g. an if with a return in one branch, and a break in the other, while not lowering them)
    145     * Note that identical jumps are usually unified though.
    146     */
    147    jump_strength min_strength;
    148 
    149    /* can anything clear the execute flag? */
    150    bool may_clear_execute_flag;
    151 
    152    block_record()
    153    {
    154       this->min_strength = strength_none;
    155       this->may_clear_execute_flag = false;
    156    }
    157 };
    158 
    159 struct loop_record
    160 {
    161    ir_function_signature* signature;
    162    ir_loop* loop;
    163 
    164    /* used to avoid lowering the break used to represent lowered breaks */
    165    unsigned nesting_depth;
    166    bool in_if_at_the_end_of_the_loop;
    167 
    168    bool may_set_return_flag;
    169 
    170    ir_variable* break_flag;
    171    ir_variable* execute_flag; /* cleared to emulate continue */
    172 
    173    loop_record(ir_function_signature* p_signature = 0, ir_loop* p_loop = 0)
    174    {
    175       this->signature = p_signature;
    176       this->loop = p_loop;
    177       this->nesting_depth = 0;
    178       this->in_if_at_the_end_of_the_loop = false;
    179       this->may_set_return_flag = false;
    180       this->break_flag = 0;
    181       this->execute_flag = 0;
    182    }
    183 
    184    ir_variable* get_execute_flag()
    185    {
    186       /* also supported for the "function loop" */
    187       if(!this->execute_flag) {
    188          exec_list& list = this->loop ? this->loop->body_instructions : signature->body;
    189          this->execute_flag = new(this->signature) ir_variable(glsl_type::bool_type, "execute_flag", ir_var_temporary);
    190          list.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(execute_flag), new(this->signature) ir_constant(true), 0));
    191          list.push_head(this->execute_flag);
    192       }
    193       return this->execute_flag;
    194    }
    195 
    196    ir_variable* get_break_flag()
    197    {
    198       assert(this->loop);
    199       if(!this->break_flag) {
    200          this->break_flag = new(this->signature) ir_variable(glsl_type::bool_type, "break_flag", ir_var_temporary);
    201          this->loop->insert_before(this->break_flag);
    202          this->loop->insert_before(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(break_flag), new(this->signature) ir_constant(false), 0));
    203       }
    204       return this->break_flag;
    205    }
    206 };
    207 
    208 struct function_record
    209 {
    210    ir_function_signature* signature;
    211    ir_variable* return_flag; /* used to break out of all loops and then jump to the return instruction */
    212    ir_variable* return_value;
    213    bool lower_return;
    214    unsigned nesting_depth;
    215 
    216    function_record(ir_function_signature* p_signature = 0,
    217                    bool lower_return = false)
    218    {
    219       this->signature = p_signature;
    220       this->return_flag = 0;
    221       this->return_value = 0;
    222       this->nesting_depth = 0;
    223       this->lower_return = lower_return;
    224    }
    225 
    226    ir_variable* get_return_flag()
    227    {
    228       if(!this->return_flag) {
    229          this->return_flag = new(this->signature) ir_variable(glsl_type::bool_type, "return_flag", ir_var_temporary);
    230          this->signature->body.push_head(new(this->signature) ir_assignment(new(this->signature) ir_dereference_variable(return_flag), new(this->signature) ir_constant(false), 0));
    231          this->signature->body.push_head(this->return_flag);
    232       }
    233       return this->return_flag;
    234    }
    235 
    236    ir_variable* get_return_value()
    237    {
    238       if(!this->return_value) {
    239          assert(!this->signature->return_type->is_void());
    240          return_value = new(this->signature) ir_variable(this->signature->return_type, "return_value", ir_var_temporary);
    241          this->signature->body.push_head(this->return_value);
    242       }
    243       return this->return_value;
    244    }
    245 };
    246 
    247 struct ir_lower_jumps_visitor : public ir_control_flow_visitor {
    248    /* Postconditions: on exit of any visit() function:
    249     *
    250     * ANALYSIS: this->block.min_strength,
    251     * this->block.may_clear_execute_flag, and
    252     * this->loop.may_set_return_flag are updated to reflect the
    253     * characteristics of the visited statement.
    254     *
    255     * DEAD_CODE_ELIMINATION: If this->block.min_strength is not
    256     * strength_none, the visited node is at the end of its exec_list.
    257     * In other words, any unreachable statements that follow the
    258     * visited statement in its exec_list have been removed.
    259     *
    260     * CONTAINED_JUMPS_LOWERED: If the visited statement contains other
    261     * statements, then should_lower_jump() is false for all of the
    262     * return, break, or continue statements it contains.
    263     *
    264     * Note that visiting a jump does not lower it.  That is the
    265     * responsibility of the statement (or function signature) that
    266     * contains the jump.
    267     */
    268 
    269    bool progress;
    270 
    271    struct function_record function;
    272    struct loop_record loop;
    273    struct block_record block;
    274 
    275    bool pull_out_jumps;
    276    bool lower_continue;
    277    bool lower_break;
    278    bool lower_sub_return;
    279    bool lower_main_return;
    280 
    281    ir_lower_jumps_visitor()
    282    {
    283       this->progress = false;
    284    }
    285 
    286    void truncate_after_instruction(exec_node *ir)
    287    {
    288       if (!ir)
    289          return;
    290 
    291       while (!ir->get_next()->is_tail_sentinel()) {
    292          ((ir_instruction *)ir->get_next())->remove();
    293          this->progress = true;
    294       }
    295    }
    296 
    297    void move_outer_block_inside(ir_instruction *ir, exec_list *inner_block)
    298    {
    299       while (!ir->get_next()->is_tail_sentinel()) {
    300          ir_instruction *move_ir = (ir_instruction *)ir->get_next();
    301 
    302          move_ir->remove();
    303          inner_block->push_tail(move_ir);
    304       }
    305    }
    306 
    307    /**
    308     * Insert the instructions necessary to lower a return statement,
    309     * before the given return instruction.
    310     */
    311    void insert_lowered_return(ir_return *ir)
    312    {
    313       ir_variable* return_flag = this->function.get_return_flag();
    314       if(!this->function.signature->return_type->is_void()) {
    315          ir_variable* return_value = this->function.get_return_value();
    316          ir->insert_before(
    317             new(ir) ir_assignment(
    318                new (ir) ir_dereference_variable(return_value),
    319                ir->value));
    320       }
    321       ir->insert_before(
    322          new(ir) ir_assignment(
    323             new (ir) ir_dereference_variable(return_flag),
    324             new (ir) ir_constant(true)));
    325       this->loop.may_set_return_flag = true;
    326    }
    327 
    328    /**
    329     * If the given instruction is a return, lower it to instructions
    330     * that store the return value (if there is one), set the return
    331     * flag, and then break.
    332     *
    333     * It is safe to pass NULL to this function.
    334     */
    335    void lower_return_unconditionally(ir_instruction *ir)
    336    {
    337       if (get_jump_strength(ir) != strength_return) {
    338          return;
    339       }
    340       insert_lowered_return((ir_return*)ir);
    341       ir->replace_with(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    342    }
    343 
    344    /**
    345     * Create the necessary instruction to replace a break instruction.
    346     */
    347    ir_instruction *create_lowered_break()
    348    {
    349       void *ctx = this->function.signature;
    350       return new(ctx) ir_assignment(
    351           new(ctx) ir_dereference_variable(this->loop.get_break_flag()),
    352           new(ctx) ir_constant(true),
    353           0);
    354    }
    355 
    356    /**
    357     * If the given instruction is a break, lower it to an instruction
    358     * that sets the break flag, without consulting
    359     * should_lower_jump().
    360     *
    361     * It is safe to pass NULL to this function.
    362     */
    363    void lower_break_unconditionally(ir_instruction *ir)
    364    {
    365       if (get_jump_strength(ir) != strength_break) {
    366          return;
    367       }
    368       ir->replace_with(create_lowered_break());
    369    }
    370 
    371    /**
    372     * If the block ends in a conditional or unconditional break, lower
    373     * it, even though should_lower_jump() says it needn't be lowered.
    374     */
    375    void lower_final_breaks(exec_list *block)
    376    {
    377       ir_instruction *ir = (ir_instruction *) block->get_tail();
    378       lower_break_unconditionally(ir);
    379       ir_if *ir_if = ir->as_if();
    380       if (ir_if) {
    381           lower_break_unconditionally(
    382               (ir_instruction *) ir_if->then_instructions.get_tail());
    383           lower_break_unconditionally(
    384               (ir_instruction *) ir_if->else_instructions.get_tail());
    385       }
    386    }
    387 
    388    virtual void visit(class ir_loop_jump * ir)
    389    {
    390       /* Eliminate all instructions after each one, since they are
    391        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
    392        * postcondition.
    393        */
    394       truncate_after_instruction(ir);
    395 
    396       /* Set this->block.min_strength based on this instruction.  This
    397        * satisfies the ANALYSIS postcondition.  It is not necessary to
    398        * update this->block.may_clear_execute_flag or
    399        * this->loop.may_set_return_flag, because an unlowered jump
    400        * instruction can't change any flags.
    401        */
    402       this->block.min_strength = ir->is_break() ? strength_break : strength_continue;
    403 
    404       /* The CONTAINED_JUMPS_LOWERED postcondition is already
    405        * satisfied, because jump statements can't contain other
    406        * statements.
    407        */
    408    }
    409 
    410    virtual void visit(class ir_return * ir)
    411    {
    412       /* Eliminate all instructions after each one, since they are
    413        * unreachable.  This satisfies the DEAD_CODE_ELIMINATION
    414        * postcondition.
    415        */
    416       truncate_after_instruction(ir);
    417 
    418       /* Set this->block.min_strength based on this instruction.  This
    419        * satisfies the ANALYSIS postcondition.  It is not necessary to
    420        * update this->block.may_clear_execute_flag or
    421        * this->loop.may_set_return_flag, because an unlowered return
    422        * instruction can't change any flags.
    423        */
    424       this->block.min_strength = strength_return;
    425 
    426       /* The CONTAINED_JUMPS_LOWERED postcondition is already
    427        * satisfied, because jump statements can't contain other
    428        * statements.
    429        */
    430    }
    431 
    432    virtual void visit(class ir_discard * ir)
    433    {
    434       /* Nothing needs to be done.  The ANALYSIS and
    435        * DEAD_CODE_ELIMINATION postconditions are already satisfied,
    436        * because discard statements are ignored by this optimization
    437        * pass.  The CONTAINED_JUMPS_LOWERED postcondition is already
    438        * satisfied, because discard statements can't contain other
    439        * statements.
    440        */
    441       (void) ir;
    442    }
    443 
    444    enum jump_strength get_jump_strength(ir_instruction* ir)
    445    {
    446       if(!ir)
    447          return strength_none;
    448       else if(ir->ir_type == ir_type_loop_jump) {
    449          if(((ir_loop_jump*)ir)->is_break())
    450             return strength_break;
    451          else
    452             return strength_continue;
    453       } else if(ir->ir_type == ir_type_return)
    454          return strength_return;
    455       else
    456          return strength_none;
    457    }
    458 
    459    bool should_lower_jump(ir_jump* ir)
    460    {
    461       unsigned strength = get_jump_strength(ir);
    462       bool lower;
    463       switch(strength)
    464       {
    465       case strength_none:
    466          lower = false; /* don't change this, code relies on it */
    467          break;
    468       case strength_continue:
    469          lower = lower_continue;
    470          break;
    471       case strength_break:
    472          assert(this->loop.loop);
    473          /* never lower "canonical break" */
    474          if(ir->get_next()->is_tail_sentinel() && (this->loop.nesting_depth == 0
    475                || (this->loop.nesting_depth == 1 && this->loop.in_if_at_the_end_of_the_loop)))
    476             lower = false;
    477          else
    478             lower = lower_break;
    479          break;
    480       case strength_return:
    481          /* never lower return at the end of a this->function */
    482          if(this->function.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
    483             lower = false;
    484          else
    485             lower = this->function.lower_return;
    486          break;
    487       }
    488       return lower;
    489    }
    490 
    491    block_record visit_block(exec_list* list)
    492    {
    493       /* Note: since visiting a node may change that node's next
    494        * pointer, we can't use visit_exec_list(), because
    495        * visit_exec_list() caches the node's next pointer before
    496        * visiting it.  So we use foreach_list() instead.
    497        *
    498        * foreach_list() isn't safe if the node being visited gets
    499        * removed, but fortunately this visitor doesn't do that.
    500        */
    501 
    502       block_record saved_block = this->block;
    503       this->block = block_record();
    504       foreach_list(node, list) {
    505          ((ir_instruction *) node)->accept(this);
    506       }
    507       block_record ret = this->block;
    508       this->block = saved_block;
    509       return ret;
    510    }
    511 
    512    virtual void visit(ir_if *ir)
    513    {
    514       if(this->loop.nesting_depth == 0 && ir->get_next()->is_tail_sentinel())
    515          this->loop.in_if_at_the_end_of_the_loop = true;
    516 
    517       ++this->function.nesting_depth;
    518       ++this->loop.nesting_depth;
    519 
    520       block_record block_records[2];
    521       ir_jump* jumps[2];
    522 
    523       /* Recursively lower nested jumps.  This satisfies the
    524        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
    525        * unconditional jumps at the end of ir->then_instructions and
    526        * ir->else_instructions, which are handled below.
    527        */
    528       block_records[0] = visit_block(&ir->then_instructions);
    529       block_records[1] = visit_block(&ir->else_instructions);
    530 
    531 retry: /* we get here if we put code after the if inside a branch */
    532 
    533       /* Determine which of ir->then_instructions and
    534        * ir->else_instructions end with an unconditional jump.
    535        */
    536       for(unsigned i = 0; i < 2; ++i) {
    537          exec_list& list = i ? ir->else_instructions : ir->then_instructions;
    538          jumps[i] = 0;
    539          if(!list.is_empty() && get_jump_strength((ir_instruction*)list.get_tail()))
    540             jumps[i] = (ir_jump*)list.get_tail();
    541       }
    542 
    543       /* Loop until we have satisfied the CONTAINED_JUMPS_LOWERED
    544        * postcondition by lowering jumps in both then_instructions and
    545        * else_instructions.
    546        */
    547       for(;;) {
    548          /* Determine the types of the jumps that terminate
    549           * ir->then_instructions and ir->else_instructions.
    550           */
    551          jump_strength jump_strengths[2];
    552 
    553          for(unsigned i = 0; i < 2; ++i) {
    554             if(jumps[i]) {
    555                jump_strengths[i] = block_records[i].min_strength;
    556                assert(jump_strengths[i] == get_jump_strength(jumps[i]));
    557             } else
    558                jump_strengths[i] = strength_none;
    559          }
    560 
    561          /* If both code paths end in a jump, and the jumps are the
    562           * same, and we are pulling out jumps, replace them with a
    563           * single jump that comes after the if instruction.  The new
    564           * jump will be visited next, and it will be lowered if
    565           * necessary by the loop or conditional that encloses it.
    566           */
    567          if(pull_out_jumps && jump_strengths[0] == jump_strengths[1]) {
    568             bool unify = true;
    569             if(jump_strengths[0] == strength_continue)
    570                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_continue));
    571             else if(jump_strengths[0] == strength_break)
    572                ir->insert_after(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    573             /* FINISHME: unify returns with identical expressions */
    574             else if(jump_strengths[0] == strength_return && this->function.signature->return_type->is_void())
    575                ir->insert_after(new(ir) ir_return(NULL));
    576 	    else
    577 	       unify = false;
    578 
    579             if(unify) {
    580                jumps[0]->remove();
    581                jumps[1]->remove();
    582                this->progress = true;
    583 
    584                /* Update jumps[] to reflect the fact that the jumps
    585                 * are gone, and update block_records[] to reflect the
    586                 * fact that control can now flow to the next
    587                 * instruction.
    588                 */
    589                jumps[0] = 0;
    590                jumps[1] = 0;
    591                block_records[0].min_strength = strength_none;
    592                block_records[1].min_strength = strength_none;
    593 
    594                /* The CONTAINED_JUMPS_LOWERED postcondition is now
    595                 * satisfied, so we can break out of the loop.
    596                 */
    597                break;
    598             }
    599          }
    600 
    601          /* lower a jump: if both need to lowered, start with the strongest one, so that
    602           * we might later unify the lowered version with the other one
    603           */
    604          bool should_lower[2];
    605          for(unsigned i = 0; i < 2; ++i)
    606             should_lower[i] = should_lower_jump(jumps[i]);
    607 
    608          int lower;
    609          if(should_lower[1] && should_lower[0])
    610             lower = jump_strengths[1] > jump_strengths[0];
    611          else if(should_lower[0])
    612             lower = 0;
    613          else if(should_lower[1])
    614             lower = 1;
    615          else
    616             /* Neither code path ends in a jump that needs to be
    617              * lowered, so the CONTAINED_JUMPS_LOWERED postcondition
    618              * is satisfied and we can break out of the loop.
    619              */
    620             break;
    621 
    622          if(jump_strengths[lower] == strength_return) {
    623             /* To lower a return, we create a return flag (if the
    624              * function doesn't have one already) and add instructions
    625              * that: 1. store the return value (if this function has a
    626              * non-void return) and 2. set the return flag
    627              */
    628             insert_lowered_return((ir_return*)jumps[lower]);
    629             if(this->loop.loop) {
    630                /* If we are in a loop, replace the return instruction
    631                 * with a break instruction, and then loop so that the
    632                 * break instruction can be lowered if necessary.
    633                 */
    634                ir_loop_jump* lowered = 0;
    635                lowered = new(ir) ir_loop_jump(ir_loop_jump::jump_break);
    636                /* Note: we must update block_records and jumps to
    637                 * reflect the fact that the control path has been
    638                 * altered from a return to a break.
    639                 */
    640                block_records[lower].min_strength = strength_break;
    641                jumps[lower]->replace_with(lowered);
    642                jumps[lower] = lowered;
    643             } else {
    644                /* If we are not in a loop, we then proceed as we would
    645                 * for a continue statement (set the execute flag to
    646                 * false to prevent the rest of the function from
    647                 * executing).
    648                 */
    649                goto lower_continue;
    650             }
    651             this->progress = true;
    652          } else if(jump_strengths[lower] == strength_break) {
    653             /* To lower a break, we create a break flag (if the loop
    654              * doesn't have one already) and add an instruction that
    655              * sets it.
    656              *
    657              * Then we proceed as we would for a continue statement
    658              * (set the execute flag to false to prevent the rest of
    659              * the loop body from executing).
    660              *
    661              * The visit() function for the loop will ensure that the
    662              * break flag is checked after executing the loop body.
    663              */
    664             jumps[lower]->insert_before(create_lowered_break());
    665             goto lower_continue;
    666          } else if(jump_strengths[lower] == strength_continue) {
    667 lower_continue:
    668             /* To lower a continue, we create an execute flag (if the
    669              * loop doesn't have one already) and replace the continue
    670              * with an instruction that clears it.
    671              *
    672              * Note that this code path gets exercised when lowering
    673              * return statements that are not inside a loop, so
    674              * this->loop must be initialized even outside of loops.
    675              */
    676             ir_variable* execute_flag = this->loop.get_execute_flag();
    677             jumps[lower]->replace_with(new(ir) ir_assignment(new (ir) ir_dereference_variable(execute_flag), new (ir) ir_constant(false), 0));
    678             /* Note: we must update block_records and jumps to reflect
    679              * the fact that the control path has been altered to an
    680              * instruction that clears the execute flag.
    681              */
    682             jumps[lower] = 0;
    683             block_records[lower].min_strength = strength_always_clears_execute_flag;
    684             block_records[lower].may_clear_execute_flag = true;
    685             this->progress = true;
    686 
    687             /* Let the loop run again, in case the other branch of the
    688              * if needs to be lowered too.
    689              */
    690          }
    691       }
    692 
    693       /* move out a jump out if possible */
    694       if(pull_out_jumps) {
    695          /* If one of the branches ends in a jump, and control cannot
    696           * fall out the bottom of the other branch, then we can move
    697           * the jump after the if.
    698           *
    699           * Set move_out to the branch we are moving a jump out of.
    700           */
    701          int move_out = -1;
    702          if(jumps[0] && block_records[1].min_strength >= strength_continue)
    703             move_out = 0;
    704          else if(jumps[1] && block_records[0].min_strength >= strength_continue)
    705             move_out = 1;
    706 
    707          if(move_out >= 0)
    708          {
    709             jumps[move_out]->remove();
    710             ir->insert_after(jumps[move_out]);
    711             /* Note: we must update block_records and jumps to reflect
    712              * the fact that the jump has been moved out of the if.
    713              */
    714             jumps[move_out] = 0;
    715             block_records[move_out].min_strength = strength_none;
    716             this->progress = true;
    717          }
    718       }
    719 
    720       /* Now satisfy the ANALYSIS postcondition by setting
    721        * this->block.min_strength and
    722        * this->block.may_clear_execute_flag based on the
    723        * characteristics of the two branches.
    724        */
    725       if(block_records[0].min_strength < block_records[1].min_strength)
    726          this->block.min_strength = block_records[0].min_strength;
    727       else
    728          this->block.min_strength = block_records[1].min_strength;
    729       this->block.may_clear_execute_flag = this->block.may_clear_execute_flag || block_records[0].may_clear_execute_flag || block_records[1].may_clear_execute_flag;
    730 
    731       /* Now we need to clean up the instructions that follow the
    732        * if.
    733        *
    734        * If those instructions are unreachable, then satisfy the
    735        * DEAD_CODE_ELIMINATION postcondition by eliminating them.
    736        * Otherwise that postcondition is already satisfied.
    737        */
    738       if(this->block.min_strength)
    739          truncate_after_instruction(ir);
    740       else if(this->block.may_clear_execute_flag)
    741       {
    742          /* If the "if" instruction might clear the execute flag, then
    743           * we need to guard any instructions that follow so that they
    744           * are only executed if the execute flag is set.
    745           *
    746           * If one of the branches of the "if" always clears the
    747           * execute flag, and the other branch never clears it, then
    748           * this is easy: just move all the instructions following the
    749           * "if" into the branch that never clears it.
    750           */
    751          int move_into = -1;
    752          if(block_records[0].min_strength && !block_records[1].may_clear_execute_flag)
    753             move_into = 1;
    754          else if(block_records[1].min_strength && !block_records[0].may_clear_execute_flag)
    755             move_into = 0;
    756 
    757          if(move_into >= 0) {
    758             assert(!block_records[move_into].min_strength && !block_records[move_into].may_clear_execute_flag); /* otherwise, we just truncated */
    759 
    760             exec_list* list = move_into ? &ir->else_instructions : &ir->then_instructions;
    761             exec_node* next = ir->get_next();
    762             if(!next->is_tail_sentinel()) {
    763                move_outer_block_inside(ir, list);
    764 
    765                /* If any instructions moved, then we need to visit
    766                 * them (since they are now inside the "if").  Since
    767                 * block_records[move_into] is in its default state
    768                 * (see assertion above), we can safely replace
    769                 * block_records[move_into] with the result of this
    770                 * analysis.
    771                 */
    772                exec_list list;
    773                list.head = next;
    774                block_records[move_into] = visit_block(&list);
    775 
    776                /*
    777                 * Then we need to re-start our jump lowering, since one
    778                 * of the instructions we moved might be a jump that
    779                 * needs to be lowered.
    780                 */
    781                this->progress = true;
    782                goto retry;
    783             }
    784          } else {
    785             /* If we get here, then the simple case didn't apply; we
    786              * need to actually guard the instructions that follow.
    787              *
    788              * To avoid creating unnecessarily-deep nesting, first
    789              * look through the instructions that follow and unwrap
    790              * any instructions that that are already wrapped in the
    791              * appropriate guard.
    792              */
    793             ir_instruction* ir_after;
    794             for(ir_after = (ir_instruction*)ir->get_next(); !ir_after->is_tail_sentinel();)
    795             {
    796                ir_if* ir_if = ir_after->as_if();
    797                if(ir_if && ir_if->else_instructions.is_empty()) {
    798                   ir_dereference_variable* ir_if_cond_deref = ir_if->condition->as_dereference_variable();
    799                   if(ir_if_cond_deref && ir_if_cond_deref->var == this->loop.execute_flag) {
    800                      ir_instruction* ir_next = (ir_instruction*)ir_after->get_next();
    801                      ir_after->insert_before(&ir_if->then_instructions);
    802                      ir_after->remove();
    803                      ir_after = ir_next;
    804                      continue;
    805                   }
    806                }
    807                ir_after = (ir_instruction*)ir_after->get_next();
    808 
    809                /* only set this if we find any unprotected instruction */
    810                this->progress = true;
    811             }
    812 
    813             /* Then, wrap all the instructions that follow in a single
    814              * guard.
    815              */
    816             if(!ir->get_next()->is_tail_sentinel()) {
    817                assert(this->loop.execute_flag);
    818                ir_if* if_execute = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.execute_flag));
    819                move_outer_block_inside(ir, &if_execute->then_instructions);
    820                ir->insert_after(if_execute);
    821             }
    822          }
    823       }
    824       --this->loop.nesting_depth;
    825       --this->function.nesting_depth;
    826    }
    827 
    828    virtual void visit(ir_loop *ir)
    829    {
    830       /* Visit the body of the loop, with a fresh data structure in
    831        * this->loop so that the analysis we do here won't bleed into
    832        * enclosing loops.
    833        *
    834        * We assume that all code after a loop is reachable from the
    835        * loop (see comments on enum jump_strength), so the
    836        * DEAD_CODE_ELIMINATION postcondition is automatically
    837        * satisfied, as is the block.min_strength portion of the
    838        * ANALYSIS postcondition.
    839        *
    840        * The block.may_clear_execute_flag portion of the ANALYSIS
    841        * postcondition is automatically satisfied because execute
    842        * flags do not propagate outside of loops.
    843        *
    844        * The loop.may_set_return_flag portion of the ANALYSIS
    845        * postcondition is handled below.
    846        */
    847       ++this->function.nesting_depth;
    848       loop_record saved_loop = this->loop;
    849       this->loop = loop_record(this->function.signature, ir);
    850 
    851       /* Recursively lower nested jumps.  This satisfies the
    852        * CONTAINED_JUMPS_LOWERED postcondition, except in the case of
    853        * an unconditional continue or return at the bottom of the
    854        * loop, which are handled below.
    855        */
    856       block_record body = visit_block(&ir->body_instructions);
    857 
    858       /* If the loop ends in an unconditional continue, eliminate it
    859        * because it is redundant.
    860        */
    861       ir_instruction *ir_last
    862          = (ir_instruction *) ir->body_instructions.get_tail();
    863       if (get_jump_strength(ir_last) == strength_continue) {
    864          ir_last->remove();
    865       }
    866 
    867       /* If the loop ends in an unconditional return, and we are
    868        * lowering returns, lower it.
    869        */
    870       if (this->function.lower_return)
    871          lower_return_unconditionally(ir_last);
    872 
    873       if(body.min_strength >= strength_break) {
    874          /* FINISHME: If the min_strength of the loop body is
    875           * strength_break or strength_return, that means that it
    876           * isn't a loop at all, since control flow always leaves the
    877           * body of the loop via break or return.  In principle the
    878           * loop could be eliminated in this case.  This optimization
    879           * is not implemented yet.
    880           */
    881       }
    882 
    883       if(this->loop.break_flag) {
    884          /* We only get here if we are lowering breaks */
    885          assert (lower_break);
    886 
    887          /* If a break flag was generated while visiting the body of
    888           * the loop, then at least one break was lowered, so we need
    889           * to generate an if statement at the end of the loop that
    890           * does a "break" if the break flag is set.  The break we
    891           * generate won't violate the CONTAINED_JUMPS_LOWERED
    892           * postcondition, because should_lower_jump() always returns
    893           * false for a break that happens at the end of a loop.
    894           *
    895           * However, if the loop already ends in a conditional or
    896           * unconditional break, then we need to lower that break,
    897           * because it won't be at the end of the loop anymore.
    898           */
    899          lower_final_breaks(&ir->body_instructions);
    900 
    901          ir_if* break_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->loop.break_flag));
    902          break_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    903          ir->body_instructions.push_tail(break_if);
    904       }
    905 
    906       /* If the body of the loop may set the return flag, then at
    907        * least one return was lowered to a break, so we need to ensure
    908        * that the return flag is checked after the body of the loop is
    909        * executed.
    910        */
    911       if(this->loop.may_set_return_flag) {
    912          assert(this->function.return_flag);
    913          /* Generate the if statement to check the return flag */
    914          ir_if* return_if = new(ir) ir_if(new(ir) ir_dereference_variable(this->function.return_flag));
    915          /* Note: we also need to propagate the knowledge that the
    916           * return flag may get set to the outer context.  This
    917           * satisfies the loop.may_set_return_flag part of the
    918           * ANALYSIS postcondition.
    919           */
    920          saved_loop.may_set_return_flag = true;
    921          if(saved_loop.loop)
    922             /* If this loop is nested inside another one, then the if
    923              * statement that we generated should break out of that
    924              * loop if the return flag is set.  Caller will lower that
    925              * break statement if necessary.
    926              */
    927             return_if->then_instructions.push_tail(new(ir) ir_loop_jump(ir_loop_jump::jump_break));
    928          else
    929             /* Otherwise, all we need to do is ensure that the
    930              * instructions that follow are only executed if the
    931              * return flag is clear.  We can do that by moving those
    932              * instructions into the else clause of the generated if
    933              * statement.
    934              */
    935             move_outer_block_inside(ir, &return_if->else_instructions);
    936          ir->insert_after(return_if);
    937       }
    938 
    939       this->loop = saved_loop;
    940       --this->function.nesting_depth;
    941    }
    942 
    943    virtual void visit(ir_function_signature *ir)
    944    {
    945       /* these are not strictly necessary */
    946       assert(!this->function.signature);
    947       assert(!this->loop.loop);
    948 
    949       bool lower_return;
    950       if (strcmp(ir->function_name(), "main") == 0)
    951          lower_return = lower_main_return;
    952       else
    953          lower_return = lower_sub_return;
    954 
    955       function_record saved_function = this->function;
    956       loop_record saved_loop = this->loop;
    957       this->function = function_record(ir, lower_return);
    958       this->loop = loop_record(ir);
    959 
    960       assert(!this->loop.loop);
    961 
    962       /* Visit the body of the function to lower any jumps that occur
    963        * in it, except possibly an unconditional return statement at
    964        * the end of it.
    965        */
    966       visit_block(&ir->body);
    967 
    968       /* If the body ended in an unconditional return of non-void,
    969        * then we don't need to lower it because it's the one canonical
    970        * return.
    971        *
    972        * If the body ended in a return of void, eliminate it because
    973        * it is redundant.
    974        */
    975       if (ir->return_type->is_void() &&
    976           get_jump_strength((ir_instruction *) ir->body.get_tail())) {
    977          ir_jump *jump = (ir_jump *) ir->body.get_tail();
    978          assert (jump->ir_type == ir_type_return);
    979          jump->remove();
    980       }
    981 
    982       if(this->function.return_value)
    983          ir->body.push_tail(new(ir) ir_return(new (ir) ir_dereference_variable(this->function.return_value)));
    984 
    985       this->loop = saved_loop;
    986       this->function = saved_function;
    987    }
    988 
    989    virtual void visit(class ir_function * ir)
    990    {
    991       visit_block(&ir->signatures);
    992    }
    993 };
    994 
    995 bool
    996 do_lower_jumps(exec_list *instructions, bool pull_out_jumps, bool lower_sub_return, bool lower_main_return, bool lower_continue, bool lower_break)
    997 {
    998    ir_lower_jumps_visitor v;
    999    v.pull_out_jumps = pull_out_jumps;
   1000    v.lower_continue = lower_continue;
   1001    v.lower_break = lower_break;
   1002    v.lower_sub_return = lower_sub_return;
   1003    v.lower_main_return = lower_main_return;
   1004 
   1005    do {
   1006       v.progress = false;
   1007       visit_exec_list(instructions, &v);
   1008    } while (v.progress);
   1009 
   1010    return v.progress;
   1011 }
   1012