Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_function_inlining.cpp
     26  *
     27  * Replaces calls to functions with the body of the function.
     28  */
     29 
     30 #include "ir.h"
     31 #include "ir_visitor.h"
     32 #include "ir_function_inlining.h"
     33 #include "ir_expression_flattening.h"
     34 #include "compiler/glsl_types.h"
     35 #include "util/hash_table.h"
     36 
     37 static void
     38 do_variable_replacement(exec_list *instructions,
     39                         ir_variable *orig,
     40                         ir_dereference *repl);
     41 
     42 namespace {
     43 
     44 class ir_function_inlining_visitor : public ir_hierarchical_visitor {
     45 public:
     46    ir_function_inlining_visitor()
     47    {
     48       progress = false;
     49    }
     50 
     51    virtual ~ir_function_inlining_visitor()
     52    {
     53       /* empty */
     54    }
     55 
     56    virtual ir_visitor_status visit_enter(ir_expression *);
     57    virtual ir_visitor_status visit_enter(ir_call *);
     58    virtual ir_visitor_status visit_enter(ir_return *);
     59    virtual ir_visitor_status visit_enter(ir_texture *);
     60    virtual ir_visitor_status visit_enter(ir_swizzle *);
     61 
     62    bool progress;
     63 };
     64 
     65 class ir_save_lvalue_visitor : public ir_hierarchical_visitor {
     66 public:
     67    virtual ir_visitor_status visit_enter(ir_dereference_array *);
     68 };
     69 
     70 } /* unnamed namespace */
     71 
     72 bool
     73 do_function_inlining(exec_list *instructions)
     74 {
     75    ir_function_inlining_visitor v;
     76 
     77    v.run(instructions);
     78 
     79    return v.progress;
     80 }
     81 
     82 static void
     83 replace_return_with_assignment(ir_instruction *ir, void *data)
     84 {
     85    void *ctx = ralloc_parent(ir);
     86    ir_dereference *orig_deref = (ir_dereference *) data;
     87    ir_return *ret = ir->as_return();
     88 
     89    if (ret) {
     90       if (ret->value) {
     91 	 ir_rvalue *lhs = orig_deref->clone(ctx, NULL);
     92 	 ret->replace_with(new(ctx) ir_assignment(lhs, ret->value, NULL));
     93       } else {
     94 	 /* un-valued return has to be the last return, or we shouldn't
     95 	  * have reached here. (see can_inline()).
     96 	  */
     97 	 assert(ret->next->is_tail_sentinel());
     98 	 ret->remove();
     99       }
    100    }
    101 }
    102 
    103 /* Save the given lvalue before the given instruction.
    104  *
    105  * This is done by adding temporary variables into which the current value
    106  * of any array indices are saved, and then modifying the dereference chain
    107  * in-place to point to those temporary variables.
    108  *
    109  * The hierarchical visitor is only used to traverse the left-hand-side chain
    110  * of derefs.
    111  */
    112 ir_visitor_status
    113 ir_save_lvalue_visitor::visit_enter(ir_dereference_array *deref)
    114 {
    115    if (deref->array_index->ir_type != ir_type_constant) {
    116       void *ctx = ralloc_parent(deref);
    117       ir_variable *index;
    118       ir_assignment *assignment;
    119 
    120       index = new(ctx) ir_variable(deref->array_index->type, "saved_idx", ir_var_temporary);
    121       base_ir->insert_before(index);
    122 
    123       assignment = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(index),
    124                                           deref->array_index, 0);
    125       base_ir->insert_before(assignment);
    126 
    127       deref->array_index = new(ctx) ir_dereference_variable(index);
    128    }
    129 
    130    deref->array->accept(this);
    131    return visit_stop;
    132 }
    133 
    134 void
    135 ir_call::generate_inline(ir_instruction *next_ir)
    136 {
    137    void *ctx = ralloc_parent(this);
    138    ir_variable **parameters;
    139    unsigned num_parameters;
    140    int i;
    141    struct hash_table *ht;
    142 
    143    ht = _mesa_hash_table_create(NULL, _mesa_hash_pointer, _mesa_key_pointer_equal);
    144 
    145    num_parameters = this->callee->parameters.length();
    146    parameters = new ir_variable *[num_parameters];
    147 
    148    /* Generate the declarations for the parameters to our inlined code,
    149     * and set up the mapping of real function body variables to ours.
    150     */
    151    i = 0;
    152    foreach_two_lists(formal_node, &this->callee->parameters,
    153                      actual_node, &this->actual_parameters) {
    154       ir_variable *sig_param = (ir_variable *) formal_node;
    155       ir_rvalue *param = (ir_rvalue *) actual_node;
    156 
    157       /* Generate a new variable for the parameter. */
    158       if (sig_param->type->contains_opaque()) {
    159 	 /* For opaque types, we want the inlined variable references
    160 	  * referencing the passed in variable, since that will have
    161 	  * the location information, which an assignment of an opaque
    162 	  * variable wouldn't.  Fix it up below.
    163 	  */
    164 	 parameters[i] = NULL;
    165       } else {
    166 	 parameters[i] = sig_param->clone(ctx, ht);
    167 	 parameters[i]->data.mode = ir_var_temporary;
    168 
    169 	 /* Remove the read-only decoration because we're going to write
    170 	  * directly to this variable.  If the cloned variable is left
    171 	  * read-only and the inlined function is inside a loop, the loop
    172 	  * analysis code will get confused.
    173 	  */
    174 	 parameters[i]->data.read_only = false;
    175 	 next_ir->insert_before(parameters[i]);
    176       }
    177 
    178       /* Section 6.1.1 (Function Calling Conventions) of the OpenGL Shading
    179        * Language 4.5 spec says:
    180        *
    181        *    "All arguments are evaluated at call time, exactly once, in order,
    182        *     from left to right. [...] Evaluation of an out parameter results
    183        *     in an l-value that is used to copy out a value when the function
    184        *     returns."
    185        *
    186        * I.e., we have to take temporary copies of any relevant array indices
    187        * before the function body is executed.
    188        *
    189        * This ensures that
    190        * (a) if an array index expressions refers to a variable that is
    191        *     modified by the execution of the function body, we use the
    192        *     original value as intended, and
    193        * (b) if an array index expression has side effects, those side effects
    194        *     are only executed once and at the right time.
    195        */
    196       if (parameters[i]) {
    197          if (sig_param->data.mode == ir_var_function_in ||
    198              sig_param->data.mode == ir_var_const_in) {
    199             ir_assignment *assign;
    200 
    201             assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
    202                                             param, NULL);
    203             next_ir->insert_before(assign);
    204          } else {
    205             assert(sig_param->data.mode == ir_var_function_out ||
    206                    sig_param->data.mode == ir_var_function_inout);
    207             assert(param->is_lvalue());
    208 
    209             ir_save_lvalue_visitor v;
    210             v.base_ir = next_ir;
    211 
    212             param->accept(&v);
    213 
    214             if (sig_param->data.mode == ir_var_function_inout) {
    215                ir_assignment *assign;
    216 
    217                assign = new(ctx) ir_assignment(new(ctx) ir_dereference_variable(parameters[i]),
    218                                                param->clone(ctx, NULL)->as_rvalue(), NULL);
    219                next_ir->insert_before(assign);
    220             }
    221          }
    222       }
    223 
    224       ++i;
    225    }
    226 
    227    exec_list new_instructions;
    228 
    229    /* Generate the inlined body of the function to a new list */
    230    foreach_in_list(ir_instruction, ir, &callee->body) {
    231       ir_instruction *new_ir = ir->clone(ctx, ht);
    232 
    233       new_instructions.push_tail(new_ir);
    234       visit_tree(new_ir, replace_return_with_assignment, this->return_deref);
    235    }
    236 
    237    /* If any opaque types were passed in, replace any deref of the
    238     * opaque variable with a deref of the argument.
    239     */
    240    foreach_two_lists(formal_node, &this->callee->parameters,
    241                      actual_node, &this->actual_parameters) {
    242       ir_rvalue *const param = (ir_rvalue *) actual_node;
    243       ir_variable *sig_param = (ir_variable *) formal_node;
    244 
    245       if (sig_param->type->contains_opaque()) {
    246 	 ir_dereference *deref = param->as_dereference();
    247 
    248 	 assert(deref);
    249 	 do_variable_replacement(&new_instructions, sig_param, deref);
    250       }
    251    }
    252 
    253    /* Now push those new instructions in. */
    254    next_ir->insert_before(&new_instructions);
    255 
    256    /* Copy back the value of any 'out' parameters from the function body
    257     * variables to our own.
    258     */
    259    i = 0;
    260    foreach_two_lists(formal_node, &this->callee->parameters,
    261                      actual_node, &this->actual_parameters) {
    262       ir_rvalue *const param = (ir_rvalue *) actual_node;
    263       const ir_variable *const sig_param = (ir_variable *) formal_node;
    264 
    265       /* Move our param variable into the actual param if it's an 'out' type. */
    266       if (parameters[i] && (sig_param->data.mode == ir_var_function_out ||
    267 			    sig_param->data.mode == ir_var_function_inout)) {
    268 	 ir_assignment *assign;
    269 
    270          assign = new(ctx) ir_assignment(param,
    271 					 new(ctx) ir_dereference_variable(parameters[i]),
    272 					 NULL);
    273 	 next_ir->insert_before(assign);
    274       }
    275 
    276       ++i;
    277    }
    278 
    279    delete [] parameters;
    280 
    281    _mesa_hash_table_destroy(ht, NULL);
    282 }
    283 
    284 
    285 ir_visitor_status
    286 ir_function_inlining_visitor::visit_enter(ir_expression *ir)
    287 {
    288    (void) ir;
    289    return visit_continue_with_parent;
    290 }
    291 
    292 
    293 ir_visitor_status
    294 ir_function_inlining_visitor::visit_enter(ir_return *ir)
    295 {
    296    (void) ir;
    297    return visit_continue_with_parent;
    298 }
    299 
    300 
    301 ir_visitor_status
    302 ir_function_inlining_visitor::visit_enter(ir_texture *ir)
    303 {
    304    (void) ir;
    305    return visit_continue_with_parent;
    306 }
    307 
    308 
    309 ir_visitor_status
    310 ir_function_inlining_visitor::visit_enter(ir_swizzle *ir)
    311 {
    312    (void) ir;
    313    return visit_continue_with_parent;
    314 }
    315 
    316 
    317 ir_visitor_status
    318 ir_function_inlining_visitor::visit_enter(ir_call *ir)
    319 {
    320    if (can_inline(ir)) {
    321       ir->generate_inline(ir);
    322       ir->remove();
    323       this->progress = true;
    324    }
    325 
    326    return visit_continue;
    327 }
    328 
    329 
    330 /**
    331  * Replaces references to the "orig" variable with a clone of "repl."
    332  *
    333  * From the spec, opaque types can appear in the tree as function
    334  * (non-out) parameters and as the result of array indexing and
    335  * structure field selection.  In our builtin implementation, they
    336  * also appear in the sampler field of an ir_tex instruction.
    337  */
    338 
    339 class ir_variable_replacement_visitor : public ir_hierarchical_visitor {
    340 public:
    341    ir_variable_replacement_visitor(ir_variable *orig, ir_dereference *repl)
    342    {
    343       this->orig = orig;
    344       this->repl = repl;
    345    }
    346 
    347    virtual ~ir_variable_replacement_visitor()
    348    {
    349    }
    350 
    351    virtual ir_visitor_status visit_leave(ir_call *);
    352    virtual ir_visitor_status visit_leave(ir_dereference_array *);
    353    virtual ir_visitor_status visit_leave(ir_dereference_record *);
    354    virtual ir_visitor_status visit_leave(ir_texture *);
    355 
    356    void replace_deref(ir_dereference **deref);
    357    void replace_rvalue(ir_rvalue **rvalue);
    358 
    359    ir_variable *orig;
    360    ir_dereference *repl;
    361 };
    362 
    363 void
    364 ir_variable_replacement_visitor::replace_deref(ir_dereference **deref)
    365 {
    366    ir_dereference_variable *deref_var = (*deref)->as_dereference_variable();
    367    if (deref_var && deref_var->var == this->orig) {
    368       *deref = this->repl->clone(ralloc_parent(*deref), NULL);
    369    }
    370 }
    371 
    372 void
    373 ir_variable_replacement_visitor::replace_rvalue(ir_rvalue **rvalue)
    374 {
    375    if (!*rvalue)
    376       return;
    377 
    378    ir_dereference *deref = (*rvalue)->as_dereference();
    379 
    380    if (!deref)
    381       return;
    382 
    383    replace_deref(&deref);
    384    *rvalue = deref;
    385 }
    386 
    387 ir_visitor_status
    388 ir_variable_replacement_visitor::visit_leave(ir_texture *ir)
    389 {
    390    replace_deref(&ir->sampler);
    391 
    392    return visit_continue;
    393 }
    394 
    395 ir_visitor_status
    396 ir_variable_replacement_visitor::visit_leave(ir_dereference_array *ir)
    397 {
    398    replace_rvalue(&ir->array);
    399    return visit_continue;
    400 }
    401 
    402 ir_visitor_status
    403 ir_variable_replacement_visitor::visit_leave(ir_dereference_record *ir)
    404 {
    405    replace_rvalue(&ir->record);
    406    return visit_continue;
    407 }
    408 
    409 ir_visitor_status
    410 ir_variable_replacement_visitor::visit_leave(ir_call *ir)
    411 {
    412    foreach_in_list_safe(ir_rvalue, param, &ir->actual_parameters) {
    413       ir_rvalue *new_param = param;
    414       replace_rvalue(&new_param);
    415 
    416       if (new_param != param) {
    417 	 param->replace_with(new_param);
    418       }
    419    }
    420    return visit_continue;
    421 }
    422 
    423 static void
    424 do_variable_replacement(exec_list *instructions,
    425                         ir_variable *orig,
    426                         ir_dereference *repl)
    427 {
    428    ir_variable_replacement_visitor v(orig, repl);
    429 
    430    visit_list_elements(&v, instructions);
    431 }
    432