Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_tree_grafting.cpp
     26  *
     27  * Takes assignments to variables that are dereferenced only once and
     28  * pastes the RHS expression into where the variable is dereferenced.
     29  *
     30  * In the process of various operations like function inlining and
     31  * tertiary op handling, we'll end up with our expression trees having
     32  * been chopped up into a series of assignments of short expressions
     33  * to temps.  Other passes like ir_algebraic.cpp would prefer to see
     34  * the deepest expression trees they can to try to optimize them.
     35  *
     36  * This is a lot like copy propagaton.  In comparison, copy
     37  * propagation only acts on plain copies, not arbitrary expressions on
     38  * the RHS.  Generally, we wouldn't want to go pasting some
     39  * complicated expression everywhere it got used, though, so we don't
     40  * handle expressions in that pass.
     41  *
     42  * The hard part is making sure we don't move an expression across
     43  * some other assignments that would change the value of the
     44  * expression.  So we split this into two passes: First, find the
     45  * variables in our scope which are written to once and read once, and
     46  * then go through basic blocks seeing if we find an opportunity to
     47  * move those expressions safely.
     48  */
     49 
     50 #include "ir.h"
     51 #include "ir_visitor.h"
     52 #include "ir_variable_refcount.h"
     53 #include "ir_basic_block.h"
     54 #include "ir_optimization.h"
     55 #include "compiler/glsl_types.h"
     56 
     57 namespace {
     58 
     59 static bool debug = false;
     60 
     61 class ir_tree_grafting_visitor : public ir_hierarchical_visitor {
     62 public:
     63    ir_tree_grafting_visitor(ir_assignment *graft_assign,
     64 			    ir_variable *graft_var)
     65    {
     66       this->progress = false;
     67       this->graft_assign = graft_assign;
     68       this->graft_var = graft_var;
     69    }
     70 
     71    virtual ir_visitor_status visit_leave(class ir_assignment *);
     72    virtual ir_visitor_status visit_enter(class ir_call *);
     73    virtual ir_visitor_status visit_enter(class ir_expression *);
     74    virtual ir_visitor_status visit_enter(class ir_function *);
     75    virtual ir_visitor_status visit_enter(class ir_function_signature *);
     76    virtual ir_visitor_status visit_enter(class ir_if *);
     77    virtual ir_visitor_status visit_enter(class ir_loop *);
     78    virtual ir_visitor_status visit_enter(class ir_swizzle *);
     79    virtual ir_visitor_status visit_enter(class ir_texture *);
     80 
     81    ir_visitor_status check_graft(ir_instruction *ir, ir_variable *var);
     82 
     83    bool do_graft(ir_rvalue **rvalue);
     84 
     85    bool progress;
     86    ir_variable *graft_var;
     87    ir_assignment *graft_assign;
     88 };
     89 
     90 struct find_deref_info {
     91    ir_variable *var;
     92    bool found;
     93 };
     94 
     95 void
     96 dereferences_variable_callback(ir_instruction *ir, void *data)
     97 {
     98    struct find_deref_info *info = (struct find_deref_info *)data;
     99    ir_dereference_variable *deref = ir->as_dereference_variable();
    100 
    101    if (deref && deref->var == info->var)
    102       info->found = true;
    103 }
    104 
    105 static bool
    106 dereferences_variable(ir_instruction *ir, ir_variable *var)
    107 {
    108    struct find_deref_info info;
    109 
    110    info.var = var;
    111    info.found = false;
    112 
    113    visit_tree(ir, dereferences_variable_callback, &info);
    114 
    115    return info.found;
    116 }
    117 
    118 bool
    119 ir_tree_grafting_visitor::do_graft(ir_rvalue **rvalue)
    120 {
    121    if (!*rvalue)
    122       return false;
    123 
    124    ir_dereference_variable *deref = (*rvalue)->as_dereference_variable();
    125 
    126    if (!deref || deref->var != this->graft_var)
    127       return false;
    128 
    129    if (debug) {
    130       fprintf(stderr, "GRAFTING:\n");
    131       this->graft_assign->fprint(stderr);
    132       fprintf(stderr, "\n");
    133       fprintf(stderr, "TO:\n");
    134       (*rvalue)->fprint(stderr);
    135       fprintf(stderr, "\n");
    136    }
    137 
    138    this->graft_assign->remove();
    139    *rvalue = this->graft_assign->rhs;
    140 
    141    this->progress = true;
    142    return true;
    143 }
    144 
    145 ir_visitor_status
    146 ir_tree_grafting_visitor::visit_enter(ir_loop *ir)
    147 {
    148    (void)ir;
    149    /* Do not traverse into the body of the loop since that is a
    150     * different basic block.
    151     */
    152    return visit_stop;
    153 }
    154 
    155 /**
    156  * Check if we can continue grafting after writing to a variable.  If the
    157  * expression we're trying to graft references the variable, we must stop.
    158  *
    159  * \param ir   An instruction that writes to a variable.
    160  * \param var  The variable being updated.
    161  */
    162 ir_visitor_status
    163 ir_tree_grafting_visitor::check_graft(ir_instruction *ir, ir_variable *var)
    164 {
    165    if (dereferences_variable(this->graft_assign->rhs, var)) {
    166       if (debug) {
    167 	 fprintf(stderr, "graft killed by: ");
    168 	 ir->fprint(stderr);
    169 	 fprintf(stderr, "\n");
    170       }
    171       return visit_stop;
    172    }
    173 
    174    return visit_continue;
    175 }
    176 
    177 ir_visitor_status
    178 ir_tree_grafting_visitor::visit_leave(ir_assignment *ir)
    179 {
    180    if (do_graft(&ir->rhs) ||
    181        do_graft(&ir->condition))
    182       return visit_stop;
    183 
    184    /* If this assignment updates a variable used in the assignment
    185     * we're trying to graft, then we're done.
    186     */
    187    return check_graft(ir, ir->lhs->variable_referenced());
    188 }
    189 
    190 ir_visitor_status
    191 ir_tree_grafting_visitor::visit_enter(ir_function *ir)
    192 {
    193    (void) ir;
    194    return visit_continue_with_parent;
    195 }
    196 
    197 ir_visitor_status
    198 ir_tree_grafting_visitor::visit_enter(ir_function_signature *ir)
    199 {
    200    (void)ir;
    201    return visit_continue_with_parent;
    202 }
    203 
    204 ir_visitor_status
    205 ir_tree_grafting_visitor::visit_enter(ir_call *ir)
    206 {
    207    foreach_two_lists(formal_node, &ir->callee->parameters,
    208                      actual_node, &ir->actual_parameters) {
    209       ir_variable *sig_param = (ir_variable *) formal_node;
    210       ir_rvalue *ir = (ir_rvalue *) actual_node;
    211       ir_rvalue *new_ir = ir;
    212 
    213       if (sig_param->data.mode != ir_var_function_in
    214           && sig_param->data.mode != ir_var_const_in) {
    215 	 if (check_graft(ir, sig_param) == visit_stop)
    216 	    return visit_stop;
    217 	 continue;
    218       }
    219 
    220       if (do_graft(&new_ir)) {
    221 	 ir->replace_with(new_ir);
    222 	 return visit_stop;
    223       }
    224    }
    225 
    226    if (ir->return_deref && check_graft(ir, ir->return_deref->var) == visit_stop)
    227       return visit_stop;
    228 
    229    return visit_continue;
    230 }
    231 
    232 ir_visitor_status
    233 ir_tree_grafting_visitor::visit_enter(ir_expression *ir)
    234 {
    235    for (unsigned int i = 0; i < ir->get_num_operands(); i++) {
    236       if (do_graft(&ir->operands[i]))
    237 	 return visit_stop;
    238    }
    239 
    240    return visit_continue;
    241 }
    242 
    243 ir_visitor_status
    244 ir_tree_grafting_visitor::visit_enter(ir_if *ir)
    245 {
    246    if (do_graft(&ir->condition))
    247       return visit_stop;
    248 
    249    /* Do not traverse into the body of the if-statement since that is a
    250     * different basic block.
    251     */
    252    return visit_continue_with_parent;
    253 }
    254 
    255 ir_visitor_status
    256 ir_tree_grafting_visitor::visit_enter(ir_swizzle *ir)
    257 {
    258    if (do_graft(&ir->val))
    259       return visit_stop;
    260 
    261    return visit_continue;
    262 }
    263 
    264 ir_visitor_status
    265 ir_tree_grafting_visitor::visit_enter(ir_texture *ir)
    266 {
    267    if (do_graft(&ir->coordinate) ||
    268        do_graft(&ir->projector) ||
    269        do_graft(&ir->offset) ||
    270        do_graft(&ir->shadow_comparator))
    271 	 return visit_stop;
    272 
    273    switch (ir->op) {
    274    case ir_tex:
    275    case ir_lod:
    276    case ir_query_levels:
    277    case ir_texture_samples:
    278    case ir_samples_identical:
    279       break;
    280    case ir_txb:
    281       if (do_graft(&ir->lod_info.bias))
    282 	 return visit_stop;
    283       break;
    284    case ir_txf:
    285    case ir_txl:
    286    case ir_txs:
    287       if (do_graft(&ir->lod_info.lod))
    288 	 return visit_stop;
    289       break;
    290    case ir_txf_ms:
    291       if (do_graft(&ir->lod_info.sample_index))
    292          return visit_stop;
    293       break;
    294    case ir_txd:
    295       if (do_graft(&ir->lod_info.grad.dPdx) ||
    296 	  do_graft(&ir->lod_info.grad.dPdy))
    297 	 return visit_stop;
    298       break;
    299    case ir_tg4:
    300       if (do_graft(&ir->lod_info.component))
    301          return visit_stop;
    302       break;
    303    }
    304 
    305    return visit_continue;
    306 }
    307 
    308 struct tree_grafting_info {
    309    ir_variable_refcount_visitor *refs;
    310    bool progress;
    311 };
    312 
    313 static bool
    314 try_tree_grafting(ir_assignment *start,
    315 		  ir_variable *lhs_var,
    316 		  ir_instruction *bb_last)
    317 {
    318    ir_tree_grafting_visitor v(start, lhs_var);
    319 
    320    if (debug) {
    321       fprintf(stderr, "trying to graft: ");
    322       lhs_var->fprint(stderr);
    323       fprintf(stderr, "\n");
    324    }
    325 
    326    for (ir_instruction *ir = (ir_instruction *)start->next;
    327 	ir != bb_last->next;
    328 	ir = (ir_instruction *)ir->next) {
    329 
    330       if (debug) {
    331 	 fprintf(stderr, "- ");
    332 	 ir->fprint(stderr);
    333 	 fprintf(stderr, "\n");
    334       }
    335 
    336       ir_visitor_status s = ir->accept(&v);
    337       if (s == visit_stop)
    338 	 return v.progress;
    339    }
    340 
    341    return false;
    342 }
    343 
    344 static void
    345 tree_grafting_basic_block(ir_instruction *bb_first,
    346 			  ir_instruction *bb_last,
    347 			  void *data)
    348 {
    349    struct tree_grafting_info *info = (struct tree_grafting_info *)data;
    350    ir_instruction *ir, *next;
    351 
    352    for (ir = bb_first, next = (ir_instruction *)ir->next;
    353 	ir != bb_last->next;
    354 	ir = next, next = (ir_instruction *)ir->next) {
    355       ir_assignment *assign = ir->as_assignment();
    356 
    357       if (!assign)
    358 	 continue;
    359 
    360       ir_variable *lhs_var = assign->whole_variable_written();
    361       if (!lhs_var)
    362 	 continue;
    363 
    364       if (lhs_var->data.mode == ir_var_function_out ||
    365           lhs_var->data.mode == ir_var_function_inout ||
    366           lhs_var->data.mode == ir_var_shader_out ||
    367           lhs_var->data.mode == ir_var_shader_storage ||
    368           lhs_var->data.mode == ir_var_shader_shared)
    369          continue;
    370 
    371       if (lhs_var->data.precise)
    372          continue;
    373 
    374       ir_variable_refcount_entry *entry = info->refs->get_variable_entry(lhs_var);
    375 
    376       if (!entry->declaration ||
    377 	  entry->assigned_count != 1 ||
    378 	  entry->referenced_count != 2)
    379 	 continue;
    380 
    381       /* Found a possibly graftable assignment.  Now, walk through the
    382        * rest of the BB seeing if the deref is here, and if nothing interfered with
    383        * pasting its expression's values in between.
    384        */
    385       info->progress |= try_tree_grafting(assign, lhs_var, bb_last);
    386    }
    387 }
    388 
    389 } /* unnamed namespace */
    390 
    391 /**
    392  * Does a copy propagation pass on the code present in the instruction stream.
    393  */
    394 bool
    395 do_tree_grafting(exec_list *instructions)
    396 {
    397    ir_variable_refcount_visitor refs;
    398    struct tree_grafting_info info;
    399 
    400    info.progress = false;
    401    info.refs = &refs;
    402 
    403    visit_list_elements(info.refs, instructions);
    404 
    405    call_for_basic_blocks(instructions, tree_grafting_basic_block, &info);
    406 
    407    return info.progress;
    408 }
    409