Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_copy_propagation_elements.cpp
     26  *
     27  * Replaces usage of recently-copied components of variables with the
     28  * previous copy of the variable.
     29  *
     30  * This pass can be compared with opt_copy_propagation, which operands
     31  * on arbitrary whole-variable copies.  However, in order to handle
     32  * the copy propagation of swizzled variables or writemasked writes,
     33  * we want to track things on a channel-wise basis.  I found that
     34  * trying to mix the swizzled/writemasked support here with the
     35  * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
     36  * so this is separate despite the ACP handling being somewhat
     37  * similar.
     38  *
     39  * This should reduce the number of MOV instructions in the generated
     40  * programs unless copy propagation is also done on the LIR, and may
     41  * help anyway by triggering other optimizations that live in the HIR.
     42  */
     43 
     44 #include "ir.h"
     45 #include "ir_rvalue_visitor.h"
     46 #include "ir_basic_block.h"
     47 #include "ir_optimization.h"
     48 #include "glsl_types.h"
     49 
     50 static bool debug = false;
     51 
     52 namespace {
     53 
     54 class acp_entry : public exec_node
     55 {
     56 public:
     57    acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4])
     58    {
     59       this->lhs = lhs;
     60       this->rhs = rhs;
     61       this->write_mask = write_mask;
     62       memcpy(this->swizzle, swizzle, sizeof(this->swizzle));
     63    }
     64 
     65    acp_entry(acp_entry *a)
     66    {
     67       this->lhs = a->lhs;
     68       this->rhs = a->rhs;
     69       this->write_mask = a->write_mask;
     70       memcpy(this->swizzle, a->swizzle, sizeof(this->swizzle));
     71    }
     72 
     73    ir_variable *lhs;
     74    ir_variable *rhs;
     75    unsigned int write_mask;
     76    int swizzle[4];
     77 };
     78 
     79 
     80 class kill_entry : public exec_node
     81 {
     82 public:
     83    kill_entry(ir_variable *var, int write_mask)
     84    {
     85       this->var = var;
     86       this->write_mask = write_mask;
     87    }
     88 
     89    ir_variable *var;
     90    unsigned int write_mask;
     91 };
     92 
     93 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
     94 public:
     95    ir_copy_propagation_elements_visitor()
     96    {
     97       this->progress = false;
     98       this->killed_all = false;
     99       this->mem_ctx = ralloc_context(NULL);
    100       this->shader_mem_ctx = NULL;
    101       this->acp = new(mem_ctx) exec_list;
    102       this->kills = new(mem_ctx) exec_list;
    103    }
    104    ~ir_copy_propagation_elements_visitor()
    105    {
    106       ralloc_free(mem_ctx);
    107    }
    108 
    109    virtual ir_visitor_status visit_enter(class ir_loop *);
    110    virtual ir_visitor_status visit_enter(class ir_function_signature *);
    111    virtual ir_visitor_status visit_leave(class ir_assignment *);
    112    virtual ir_visitor_status visit_enter(class ir_call *);
    113    virtual ir_visitor_status visit_enter(class ir_if *);
    114    virtual ir_visitor_status visit_leave(class ir_swizzle *);
    115 
    116    void handle_rvalue(ir_rvalue **rvalue);
    117 
    118    void add_copy(ir_assignment *ir);
    119    void kill(kill_entry *k);
    120    void handle_if_block(exec_list *instructions);
    121 
    122    /** List of acp_entry: The available copies to propagate */
    123    exec_list *acp;
    124    /**
    125     * List of kill_entry: The variables whose values were killed in this
    126     * block.
    127     */
    128    exec_list *kills;
    129 
    130    bool progress;
    131 
    132    bool killed_all;
    133 
    134    /* Context for our local data structures. */
    135    void *mem_ctx;
    136    /* Context for allocating new shader nodes. */
    137    void *shader_mem_ctx;
    138 };
    139 
    140 } /* unnamed namespace */
    141 
    142 ir_visitor_status
    143 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
    144 {
    145    /* Treat entry into a function signature as a completely separate
    146     * block.  Any instructions at global scope will be shuffled into
    147     * main() at link time, so they're irrelevant to us.
    148     */
    149    exec_list *orig_acp = this->acp;
    150    exec_list *orig_kills = this->kills;
    151    bool orig_killed_all = this->killed_all;
    152 
    153    this->acp = new(mem_ctx) exec_list;
    154    this->kills = new(mem_ctx) exec_list;
    155    this->killed_all = false;
    156 
    157    visit_list_elements(this, &ir->body);
    158 
    159    this->kills = orig_kills;
    160    this->acp = orig_acp;
    161    this->killed_all = orig_killed_all;
    162 
    163    return visit_continue_with_parent;
    164 }
    165 
    166 ir_visitor_status
    167 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
    168 {
    169    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
    170    ir_variable *var = ir->lhs->variable_referenced();
    171 
    172    if (var->type->is_scalar() || var->type->is_vector()) {
    173       kill_entry *k;
    174 
    175       if (lhs)
    176 	 k = new(mem_ctx) kill_entry(var, ir->write_mask);
    177       else
    178 	 k = new(mem_ctx) kill_entry(var, ~0);
    179 
    180       kill(k);
    181    }
    182 
    183    add_copy(ir);
    184 
    185    return visit_continue;
    186 }
    187 
    188 ir_visitor_status
    189 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *ir)
    190 {
    191    /* Don't visit the values of swizzles since they are handled while
    192     * visiting the swizzle itself.
    193     */
    194    return visit_continue;
    195 }
    196 
    197 /**
    198  * Replaces dereferences of ACP RHS variables with ACP LHS variables.
    199  *
    200  * This is where the actual copy propagation occurs.  Note that the
    201  * rewriting of ir_dereference means that the ir_dereference instance
    202  * must not be shared by multiple IR operations!
    203  */
    204 void
    205 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
    206 {
    207    int swizzle_chan[4];
    208    ir_dereference_variable *deref_var;
    209    ir_variable *source[4] = {NULL, NULL, NULL, NULL};
    210    int source_chan[4];
    211    int chans;
    212 
    213    if (!*ir)
    214       return;
    215 
    216    ir_swizzle *swizzle = (*ir)->as_swizzle();
    217    if (swizzle) {
    218       deref_var = swizzle->val->as_dereference_variable();
    219       if (!deref_var)
    220 	 return;
    221 
    222       swizzle_chan[0] = swizzle->mask.x;
    223       swizzle_chan[1] = swizzle->mask.y;
    224       swizzle_chan[2] = swizzle->mask.z;
    225       swizzle_chan[3] = swizzle->mask.w;
    226       chans = swizzle->type->vector_elements;
    227    } else {
    228       deref_var = (*ir)->as_dereference_variable();
    229       if (!deref_var)
    230 	 return;
    231 
    232       swizzle_chan[0] = 0;
    233       swizzle_chan[1] = 1;
    234       swizzle_chan[2] = 2;
    235       swizzle_chan[3] = 3;
    236       chans = deref_var->type->vector_elements;
    237    }
    238 
    239    if (this->in_assignee)
    240       return;
    241 
    242    ir_variable *var = deref_var->var;
    243 
    244    /* Try to find ACP entries covering swizzle_chan[], hoping they're
    245     * the same source variable.
    246     */
    247    foreach_iter(exec_list_iterator, iter, *this->acp) {
    248       acp_entry *entry = (acp_entry *)iter.get();
    249 
    250       if (var == entry->lhs) {
    251 	 for (int c = 0; c < chans; c++) {
    252 	    if (entry->write_mask & (1 << swizzle_chan[c])) {
    253 	       source[c] = entry->rhs;
    254 	       source_chan[c] = entry->swizzle[swizzle_chan[c]];
    255 	    }
    256 	 }
    257       }
    258    }
    259 
    260    /* Make sure all channels are copying from the same source variable. */
    261    if (!source[0])
    262       return;
    263    for (int c = 1; c < chans; c++) {
    264       if (source[c] != source[0])
    265 	 return;
    266    }
    267 
    268    if (!shader_mem_ctx)
    269       shader_mem_ctx = ralloc_parent(deref_var);
    270 
    271    if (debug) {
    272       printf("Copy propagation from:\n");
    273       (*ir)->print();
    274    }
    275 
    276    deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
    277    *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
    278 					source_chan[0],
    279 					source_chan[1],
    280 					source_chan[2],
    281 					source_chan[3],
    282 					chans);
    283 
    284    if (debug) {
    285       printf("to:\n");
    286       (*ir)->print();
    287       printf("\n");
    288    }
    289 }
    290 
    291 
    292 ir_visitor_status
    293 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
    294 {
    295    /* Do copy propagation on call parameters, but skip any out params */
    296    exec_list_iterator sig_param_iter = ir->callee->parameters.iterator();
    297    foreach_iter(exec_list_iterator, iter, ir->actual_parameters) {
    298       ir_variable *sig_param = (ir_variable *)sig_param_iter.get();
    299       ir_instruction *ir = (ir_instruction *)iter.get();
    300       if (sig_param->mode != ir_var_out && sig_param->mode != ir_var_inout) {
    301          ir->accept(this);
    302       }
    303       sig_param_iter.next();
    304    }
    305 
    306    /* Since we're unlinked, we don't (necessarily) know the side effects of
    307     * this call.  So kill all copies.
    308     */
    309    acp->make_empty();
    310    this->killed_all = true;
    311 
    312    return visit_continue_with_parent;
    313 }
    314 
    315 void
    316 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
    317 {
    318    exec_list *orig_acp = this->acp;
    319    exec_list *orig_kills = this->kills;
    320    bool orig_killed_all = this->killed_all;
    321 
    322    this->acp = new(mem_ctx) exec_list;
    323    this->kills = new(mem_ctx) exec_list;
    324    this->killed_all = false;
    325 
    326    /* Populate the initial acp with a copy of the original */
    327    foreach_iter(exec_list_iterator, iter, *orig_acp) {
    328       acp_entry *a = (acp_entry *)iter.get();
    329       this->acp->push_tail(new(this->mem_ctx) acp_entry(a));
    330    }
    331 
    332    visit_list_elements(this, instructions);
    333 
    334    if (this->killed_all) {
    335       orig_acp->make_empty();
    336    }
    337 
    338    exec_list *new_kills = this->kills;
    339    this->kills = orig_kills;
    340    this->acp = orig_acp;
    341    this->killed_all = this->killed_all || orig_killed_all;
    342 
    343    /* Move the new kills into the parent block's list, removing them
    344     * from the parent's ACP list in the process.
    345     */
    346    foreach_list_safe(node, new_kills) {
    347       kill_entry *k = (kill_entry *)node;
    348       kill(k);
    349    }
    350 }
    351 
    352 ir_visitor_status
    353 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
    354 {
    355    ir->condition->accept(this);
    356 
    357    handle_if_block(&ir->then_instructions);
    358    handle_if_block(&ir->else_instructions);
    359 
    360    /* handle_if_block() already descended into the children. */
    361    return visit_continue_with_parent;
    362 }
    363 
    364 ir_visitor_status
    365 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
    366 {
    367    exec_list *orig_acp = this->acp;
    368    exec_list *orig_kills = this->kills;
    369    bool orig_killed_all = this->killed_all;
    370 
    371    /* FINISHME: For now, the initial acp for loops is totally empty.
    372     * We could go through once, then go through again with the acp
    373     * cloned minus the killed entries after the first run through.
    374     */
    375    this->acp = new(mem_ctx) exec_list;
    376    this->kills = new(mem_ctx) exec_list;
    377    this->killed_all = false;
    378 
    379    visit_list_elements(this, &ir->body_instructions);
    380 
    381    if (this->killed_all) {
    382       orig_acp->make_empty();
    383    }
    384 
    385    exec_list *new_kills = this->kills;
    386    this->kills = orig_kills;
    387    this->acp = orig_acp;
    388    this->killed_all = this->killed_all || orig_killed_all;
    389 
    390    foreach_list_safe(node, new_kills) {
    391       kill_entry *k = (kill_entry *)node;
    392       kill(k);
    393    }
    394 
    395    /* already descended into the children. */
    396    return visit_continue_with_parent;
    397 }
    398 
    399 /* Remove any entries currently in the ACP for this kill. */
    400 void
    401 ir_copy_propagation_elements_visitor::kill(kill_entry *k)
    402 {
    403    foreach_list_safe(node, acp) {
    404       acp_entry *entry = (acp_entry *)node;
    405 
    406       if (entry->lhs == k->var) {
    407 	 entry->write_mask = entry->write_mask & ~k->write_mask;
    408 	 if (entry->write_mask == 0) {
    409 	    entry->remove();
    410 	    continue;
    411 	 }
    412       }
    413       if (entry->rhs == k->var) {
    414 	 entry->remove();
    415       }
    416    }
    417 
    418    /* If we were on a list, remove ourselves before inserting */
    419    if (k->next)
    420       k->remove();
    421 
    422    this->kills->push_tail(k);
    423 }
    424 
    425 /**
    426  * Adds directly-copied channels between vector variables to the available
    427  * copy propagation list.
    428  */
    429 void
    430 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
    431 {
    432    acp_entry *entry;
    433    int orig_swizzle[4] = {0, 1, 2, 3};
    434    int swizzle[4];
    435 
    436    if (ir->condition)
    437       return;
    438 
    439    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
    440    if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
    441       return;
    442 
    443    ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
    444    if (!rhs) {
    445       ir_swizzle *swiz = ir->rhs->as_swizzle();
    446       if (!swiz)
    447 	 return;
    448 
    449       rhs = swiz->val->as_dereference_variable();
    450       if (!rhs)
    451 	 return;
    452 
    453       orig_swizzle[0] = swiz->mask.x;
    454       orig_swizzle[1] = swiz->mask.y;
    455       orig_swizzle[2] = swiz->mask.z;
    456       orig_swizzle[3] = swiz->mask.w;
    457    }
    458 
    459    /* Move the swizzle channels out to the positions they match in the
    460     * destination.  We don't want to have to rewrite the swizzle[]
    461     * array every time we clear a bit of the write_mask.
    462     */
    463    int j = 0;
    464    for (int i = 0; i < 4; i++) {
    465       if (ir->write_mask & (1 << i))
    466 	 swizzle[i] = orig_swizzle[j++];
    467    }
    468 
    469    int write_mask = ir->write_mask;
    470    if (lhs->var == rhs->var) {
    471       /* If this is a copy from the variable to itself, then we need
    472        * to be sure not to include the updated channels from this
    473        * instruction in the set of new source channels to be
    474        * copy-propagated from.
    475        */
    476       for (int i = 0; i < 4; i++) {
    477 	 if (ir->write_mask & (1 << orig_swizzle[i]))
    478 	    write_mask &= ~(1 << i);
    479       }
    480    }
    481 
    482    entry = new(this->mem_ctx) acp_entry(lhs->var, rhs->var, write_mask,
    483 					swizzle);
    484    this->acp->push_tail(entry);
    485 }
    486 
    487 bool
    488 do_copy_propagation_elements(exec_list *instructions)
    489 {
    490    ir_copy_propagation_elements_visitor v;
    491 
    492    visit_list_elements(&v, instructions);
    493 
    494    return v.progress;
    495 }
    496