Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_copy_propagation_elements.cpp
     26  *
     27  * Replaces usage of recently-copied components of variables with the
     28  * previous copy of the variable.
     29  *
     30  * This pass can be compared with opt_copy_propagation, which operands
     31  * on arbitrary whole-variable copies.  However, in order to handle
     32  * the copy propagation of swizzled variables or writemasked writes,
     33  * we want to track things on a channel-wise basis.  I found that
     34  * trying to mix the swizzled/writemasked support here with the
     35  * whole-variable stuff in opt_copy_propagation.cpp just made a mess,
     36  * so this is separate despite the ACP handling being somewhat
     37  * similar.
     38  *
     39  * This should reduce the number of MOV instructions in the generated
     40  * programs unless copy propagation is also done on the LIR, and may
     41  * help anyway by triggering other optimizations that live in the HIR.
     42  */
     43 
     44 #include "ir.h"
     45 #include "ir_rvalue_visitor.h"
     46 #include "ir_basic_block.h"
     47 #include "ir_optimization.h"
     48 #include "compiler/glsl_types.h"
     49 #include "util/hash_table.h"
     50 
     51 static bool debug = false;
     52 
     53 namespace {
     54 
     55 class acp_entry;
     56 
     57 /* Class that refers to acp_entry in another exec_list. Used
     58  * when making removals based on rhs.
     59  */
     60 class acp_ref : public exec_node
     61 {
     62 public:
     63    acp_ref(acp_entry *e)
     64    {
     65       entry = e;
     66    }
     67    acp_entry *entry;
     68 };
     69 
     70 class acp_entry : public exec_node
     71 {
     72 public:
     73    /* override operator new from exec_node */
     74    DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(acp_entry)
     75 
     76    acp_entry(ir_variable *lhs, ir_variable *rhs, int write_mask, int swizzle[4])
     77       : rhs_node(this)
     78    {
     79       this->lhs = lhs;
     80       this->rhs = rhs;
     81       this->write_mask = write_mask;
     82       memcpy(this->swizzle, swizzle, sizeof(this->swizzle));
     83    }
     84 
     85    ir_variable *lhs;
     86    ir_variable *rhs;
     87    unsigned int write_mask;
     88    int swizzle[4];
     89    acp_ref rhs_node;
     90 };
     91 
     92 
     93 class kill_entry : public exec_node
     94 {
     95 public:
     96    /* override operator new from exec_node */
     97    DECLARE_LINEAR_ZALLOC_CXX_OPERATORS(kill_entry)
     98 
     99    kill_entry(ir_variable *var, int write_mask)
    100    {
    101       this->var = var;
    102       this->write_mask = write_mask;
    103    }
    104 
    105    ir_variable *var;
    106    unsigned int write_mask;
    107 };
    108 
    109 class ir_copy_propagation_elements_visitor : public ir_rvalue_visitor {
    110 public:
    111    ir_copy_propagation_elements_visitor()
    112    {
    113       this->progress = false;
    114       this->killed_all = false;
    115       this->mem_ctx = ralloc_context(NULL);
    116       this->lin_ctx = linear_alloc_parent(this->mem_ctx, 0);
    117       this->shader_mem_ctx = NULL;
    118       this->kills = new(mem_ctx) exec_list;
    119 
    120       create_acp();
    121    }
    122    ~ir_copy_propagation_elements_visitor()
    123    {
    124       ralloc_free(mem_ctx);
    125    }
    126 
    127    void create_acp()
    128    {
    129       lhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
    130                                        _mesa_key_pointer_equal);
    131       rhs_ht = _mesa_hash_table_create(mem_ctx, _mesa_hash_pointer,
    132                                        _mesa_key_pointer_equal);
    133    }
    134 
    135    void destroy_acp()
    136    {
    137       _mesa_hash_table_destroy(lhs_ht, NULL);
    138       _mesa_hash_table_destroy(rhs_ht, NULL);
    139    }
    140 
    141    void populate_acp(hash_table *lhs, hash_table *rhs)
    142    {
    143       struct hash_entry *entry;
    144 
    145       hash_table_foreach(lhs, entry) {
    146          _mesa_hash_table_insert(lhs_ht, entry->key, entry->data);
    147       }
    148 
    149       hash_table_foreach(rhs, entry) {
    150          _mesa_hash_table_insert(rhs_ht, entry->key, entry->data);
    151       }
    152    }
    153 
    154    void handle_loop(ir_loop *, bool keep_acp);
    155    virtual ir_visitor_status visit_enter(class ir_loop *);
    156    virtual ir_visitor_status visit_enter(class ir_function_signature *);
    157    virtual ir_visitor_status visit_leave(class ir_assignment *);
    158    virtual ir_visitor_status visit_enter(class ir_call *);
    159    virtual ir_visitor_status visit_enter(class ir_if *);
    160    virtual ir_visitor_status visit_leave(class ir_swizzle *);
    161 
    162    void handle_rvalue(ir_rvalue **rvalue);
    163 
    164    void add_copy(ir_assignment *ir);
    165    void kill(kill_entry *k);
    166    void handle_if_block(exec_list *instructions);
    167 
    168    /** Hash of acp_entry: The available copies to propagate */
    169    hash_table *lhs_ht;
    170    hash_table *rhs_ht;
    171 
    172    /**
    173     * List of kill_entry: The variables whose values were killed in this
    174     * block.
    175     */
    176    exec_list *kills;
    177 
    178    bool progress;
    179 
    180    bool killed_all;
    181 
    182    /* Context for our local data structures. */
    183    void *mem_ctx;
    184    void *lin_ctx;
    185    /* Context for allocating new shader nodes. */
    186    void *shader_mem_ctx;
    187 };
    188 
    189 } /* unnamed namespace */
    190 
    191 ir_visitor_status
    192 ir_copy_propagation_elements_visitor::visit_enter(ir_function_signature *ir)
    193 {
    194    /* Treat entry into a function signature as a completely separate
    195     * block.  Any instructions at global scope will be shuffled into
    196     * main() at link time, so they're irrelevant to us.
    197     */
    198    exec_list *orig_kills = this->kills;
    199    bool orig_killed_all = this->killed_all;
    200 
    201    hash_table *orig_lhs_ht = lhs_ht;
    202    hash_table *orig_rhs_ht = rhs_ht;
    203 
    204    this->kills = new(mem_ctx) exec_list;
    205    this->killed_all = false;
    206 
    207    create_acp();
    208 
    209    visit_list_elements(this, &ir->body);
    210 
    211    ralloc_free(this->kills);
    212 
    213    destroy_acp();
    214 
    215    this->kills = orig_kills;
    216    this->killed_all = orig_killed_all;
    217 
    218    lhs_ht = orig_lhs_ht;
    219    rhs_ht = orig_rhs_ht;
    220 
    221    return visit_continue_with_parent;
    222 }
    223 
    224 ir_visitor_status
    225 ir_copy_propagation_elements_visitor::visit_leave(ir_assignment *ir)
    226 {
    227    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
    228    ir_variable *var = ir->lhs->variable_referenced();
    229 
    230    if (var->type->is_scalar() || var->type->is_vector()) {
    231       kill_entry *k;
    232 
    233       if (lhs)
    234 	 k = new(this->lin_ctx) kill_entry(var, ir->write_mask);
    235       else
    236 	 k = new(this->lin_ctx) kill_entry(var, ~0);
    237 
    238       kill(k);
    239    }
    240 
    241    add_copy(ir);
    242 
    243    return visit_continue;
    244 }
    245 
    246 ir_visitor_status
    247 ir_copy_propagation_elements_visitor::visit_leave(ir_swizzle *)
    248 {
    249    /* Don't visit the values of swizzles since they are handled while
    250     * visiting the swizzle itself.
    251     */
    252    return visit_continue;
    253 }
    254 
    255 /**
    256  * Replaces dereferences of ACP RHS variables with ACP LHS variables.
    257  *
    258  * This is where the actual copy propagation occurs.  Note that the
    259  * rewriting of ir_dereference means that the ir_dereference instance
    260  * must not be shared by multiple IR operations!
    261  */
    262 void
    263 ir_copy_propagation_elements_visitor::handle_rvalue(ir_rvalue **ir)
    264 {
    265    int swizzle_chan[4];
    266    ir_dereference_variable *deref_var;
    267    ir_variable *source[4] = {NULL, NULL, NULL, NULL};
    268    int source_chan[4] = {0, 0, 0, 0};
    269    int chans;
    270    bool noop_swizzle = true;
    271 
    272    if (!*ir)
    273       return;
    274 
    275    ir_swizzle *swizzle = (*ir)->as_swizzle();
    276    if (swizzle) {
    277       deref_var = swizzle->val->as_dereference_variable();
    278       if (!deref_var)
    279 	 return;
    280 
    281       swizzle_chan[0] = swizzle->mask.x;
    282       swizzle_chan[1] = swizzle->mask.y;
    283       swizzle_chan[2] = swizzle->mask.z;
    284       swizzle_chan[3] = swizzle->mask.w;
    285       chans = swizzle->type->vector_elements;
    286    } else {
    287       deref_var = (*ir)->as_dereference_variable();
    288       if (!deref_var)
    289 	 return;
    290 
    291       swizzle_chan[0] = 0;
    292       swizzle_chan[1] = 1;
    293       swizzle_chan[2] = 2;
    294       swizzle_chan[3] = 3;
    295       chans = deref_var->type->vector_elements;
    296    }
    297 
    298    if (this->in_assignee)
    299       return;
    300 
    301    ir_variable *var = deref_var->var;
    302 
    303    /* Try to find ACP entries covering swizzle_chan[], hoping they're
    304     * the same source variable.
    305     */
    306    hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, var);
    307    if (ht_entry) {
    308       exec_list *ht_list = (exec_list *) ht_entry->data;
    309       foreach_in_list(acp_entry, entry, ht_list) {
    310          for (int c = 0; c < chans; c++) {
    311             if (entry->write_mask & (1 << swizzle_chan[c])) {
    312                source[c] = entry->rhs;
    313                source_chan[c] = entry->swizzle[swizzle_chan[c]];
    314 
    315                if (source_chan[c] != swizzle_chan[c])
    316                   noop_swizzle = false;
    317             }
    318          }
    319       }
    320    }
    321 
    322    /* Make sure all channels are copying from the same source variable. */
    323    if (!source[0])
    324       return;
    325    for (int c = 1; c < chans; c++) {
    326       if (source[c] != source[0])
    327 	 return;
    328    }
    329 
    330    if (!shader_mem_ctx)
    331       shader_mem_ctx = ralloc_parent(deref_var);
    332 
    333    /* Don't pointlessly replace the rvalue with itself (or a noop swizzle
    334     * of itself, which would just be deleted by opt_noop_swizzle).
    335     */
    336    if (source[0] == var && noop_swizzle)
    337       return;
    338 
    339    if (debug) {
    340       printf("Copy propagation from:\n");
    341       (*ir)->print();
    342    }
    343 
    344    deref_var = new(shader_mem_ctx) ir_dereference_variable(source[0]);
    345    *ir = new(shader_mem_ctx) ir_swizzle(deref_var,
    346 					source_chan[0],
    347 					source_chan[1],
    348 					source_chan[2],
    349 					source_chan[3],
    350 					chans);
    351    progress = true;
    352 
    353    if (debug) {
    354       printf("to:\n");
    355       (*ir)->print();
    356       printf("\n");
    357    }
    358 }
    359 
    360 
    361 ir_visitor_status
    362 ir_copy_propagation_elements_visitor::visit_enter(ir_call *ir)
    363 {
    364    /* Do copy propagation on call parameters, but skip any out params */
    365    foreach_two_lists(formal_node, &ir->callee->parameters,
    366                      actual_node, &ir->actual_parameters) {
    367       ir_variable *sig_param = (ir_variable *) formal_node;
    368       ir_rvalue *ir = (ir_rvalue *) actual_node;
    369       if (sig_param->data.mode != ir_var_function_out
    370           && sig_param->data.mode != ir_var_function_inout) {
    371          ir->accept(this);
    372       }
    373    }
    374 
    375    /* Since we're unlinked, we don't (necessarily) know the side effects of
    376     * this call.  So kill all copies.
    377     */
    378    _mesa_hash_table_clear(lhs_ht, NULL);
    379    _mesa_hash_table_clear(rhs_ht, NULL);
    380 
    381    this->killed_all = true;
    382 
    383    return visit_continue_with_parent;
    384 }
    385 
    386 void
    387 ir_copy_propagation_elements_visitor::handle_if_block(exec_list *instructions)
    388 {
    389    exec_list *orig_kills = this->kills;
    390    bool orig_killed_all = this->killed_all;
    391 
    392    hash_table *orig_lhs_ht = lhs_ht;
    393    hash_table *orig_rhs_ht = rhs_ht;
    394 
    395    this->kills = new(mem_ctx) exec_list;
    396    this->killed_all = false;
    397 
    398    create_acp();
    399 
    400    /* Populate the initial acp with a copy of the original */
    401    populate_acp(orig_lhs_ht, orig_rhs_ht);
    402 
    403    visit_list_elements(this, instructions);
    404 
    405    if (this->killed_all) {
    406       _mesa_hash_table_clear(orig_lhs_ht, NULL);
    407       _mesa_hash_table_clear(orig_rhs_ht, NULL);
    408    }
    409 
    410    exec_list *new_kills = this->kills;
    411    this->kills = orig_kills;
    412    this->killed_all = this->killed_all || orig_killed_all;
    413 
    414    destroy_acp();
    415 
    416    lhs_ht = orig_lhs_ht;
    417    rhs_ht = orig_rhs_ht;
    418 
    419    /* Move the new kills into the parent block's list, removing them
    420     * from the parent's ACP list in the process.
    421     */
    422    foreach_in_list_safe(kill_entry, k, new_kills) {
    423       kill(k);
    424    }
    425 
    426    ralloc_free(new_kills);
    427 }
    428 
    429 ir_visitor_status
    430 ir_copy_propagation_elements_visitor::visit_enter(ir_if *ir)
    431 {
    432    ir->condition->accept(this);
    433 
    434    handle_if_block(&ir->then_instructions);
    435    handle_if_block(&ir->else_instructions);
    436 
    437    /* handle_if_block() already descended into the children. */
    438    return visit_continue_with_parent;
    439 }
    440 
    441 void
    442 ir_copy_propagation_elements_visitor::handle_loop(ir_loop *ir, bool keep_acp)
    443 {
    444    exec_list *orig_kills = this->kills;
    445    bool orig_killed_all = this->killed_all;
    446 
    447    hash_table *orig_lhs_ht = lhs_ht;
    448    hash_table *orig_rhs_ht = rhs_ht;
    449 
    450    /* FINISHME: For now, the initial acp for loops is totally empty.
    451     * We could go through once, then go through again with the acp
    452     * cloned minus the killed entries after the first run through.
    453     */
    454    this->kills = new(mem_ctx) exec_list;
    455    this->killed_all = false;
    456 
    457    create_acp();
    458 
    459    if (keep_acp) {
    460       /* Populate the initial acp with a copy of the original */
    461       populate_acp(orig_lhs_ht, orig_rhs_ht);
    462    }
    463 
    464    visit_list_elements(this, &ir->body_instructions);
    465 
    466    if (this->killed_all) {
    467       _mesa_hash_table_clear(orig_lhs_ht, NULL);
    468       _mesa_hash_table_clear(orig_rhs_ht, NULL);
    469    }
    470 
    471    exec_list *new_kills = this->kills;
    472    this->kills = orig_kills;
    473    this->killed_all = this->killed_all || orig_killed_all;
    474 
    475    destroy_acp();
    476 
    477    lhs_ht = orig_lhs_ht;
    478    rhs_ht = orig_rhs_ht;
    479 
    480    foreach_in_list_safe(kill_entry, k, new_kills) {
    481       kill(k);
    482    }
    483 
    484    ralloc_free(new_kills);
    485 }
    486 
    487 ir_visitor_status
    488 ir_copy_propagation_elements_visitor::visit_enter(ir_loop *ir)
    489 {
    490    handle_loop(ir, false);
    491    handle_loop(ir, true);
    492 
    493    /* already descended into the children. */
    494    return visit_continue_with_parent;
    495 }
    496 
    497 /* Remove any entries currently in the ACP for this kill. */
    498 void
    499 ir_copy_propagation_elements_visitor::kill(kill_entry *k)
    500 {
    501    /* removal of lhs entries */
    502    hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, k->var);
    503    if (ht_entry) {
    504       exec_list *lhs_list = (exec_list *) ht_entry->data;
    505       foreach_in_list_safe(acp_entry, entry, lhs_list) {
    506          entry->write_mask = entry->write_mask & ~k->write_mask;
    507          if (entry->write_mask == 0) {
    508             entry->remove();
    509             continue;
    510          }
    511       }
    512    }
    513 
    514    /* removal of rhs entries */
    515    ht_entry = _mesa_hash_table_search(rhs_ht, k->var);
    516    if (ht_entry) {
    517       exec_list *rhs_list = (exec_list *) ht_entry->data;
    518       acp_ref *ref;
    519 
    520       while ((ref = (acp_ref *) rhs_list->pop_head()) != NULL) {
    521          acp_entry *entry = ref->entry;
    522 
    523          /* If entry is still in a list (not already removed by lhs entry
    524           * removal above), remove it.
    525           */
    526          if (entry->prev || entry->next)
    527             entry->remove();
    528       }
    529    }
    530 
    531    /* If we were on a list, remove ourselves before inserting */
    532    if (k->next)
    533       k->remove();
    534 
    535    this->kills->push_tail(k);
    536 }
    537 
    538 /**
    539  * Adds directly-copied channels between vector variables to the available
    540  * copy propagation list.
    541  */
    542 void
    543 ir_copy_propagation_elements_visitor::add_copy(ir_assignment *ir)
    544 {
    545    acp_entry *entry;
    546    int orig_swizzle[4] = {0, 1, 2, 3};
    547    int swizzle[4];
    548 
    549    if (ir->condition)
    550       return;
    551 
    552    ir_dereference_variable *lhs = ir->lhs->as_dereference_variable();
    553    if (!lhs || !(lhs->type->is_scalar() || lhs->type->is_vector()))
    554       return;
    555 
    556    ir_dereference_variable *rhs = ir->rhs->as_dereference_variable();
    557    if (!rhs) {
    558       ir_swizzle *swiz = ir->rhs->as_swizzle();
    559       if (!swiz)
    560 	 return;
    561 
    562       rhs = swiz->val->as_dereference_variable();
    563       if (!rhs)
    564 	 return;
    565 
    566       orig_swizzle[0] = swiz->mask.x;
    567       orig_swizzle[1] = swiz->mask.y;
    568       orig_swizzle[2] = swiz->mask.z;
    569       orig_swizzle[3] = swiz->mask.w;
    570    }
    571 
    572    /* Move the swizzle channels out to the positions they match in the
    573     * destination.  We don't want to have to rewrite the swizzle[]
    574     * array every time we clear a bit of the write_mask.
    575     */
    576    int j = 0;
    577    for (int i = 0; i < 4; i++) {
    578       if (ir->write_mask & (1 << i))
    579 	 swizzle[i] = orig_swizzle[j++];
    580    }
    581 
    582    int write_mask = ir->write_mask;
    583    if (lhs->var == rhs->var) {
    584       /* If this is a copy from the variable to itself, then we need
    585        * to be sure not to include the updated channels from this
    586        * instruction in the set of new source channels to be
    587        * copy-propagated from.
    588        */
    589       for (int i = 0; i < 4; i++) {
    590 	 if (ir->write_mask & (1 << orig_swizzle[i]))
    591 	    write_mask &= ~(1 << i);
    592       }
    593    }
    594 
    595    if (lhs->var->data.precise != rhs->var->data.precise)
    596       return;
    597 
    598    entry = new(this->lin_ctx) acp_entry(lhs->var, rhs->var, write_mask,
    599 					swizzle);
    600 
    601    /* lhs hash, hash of lhs -> acp_entry lists */
    602    hash_entry *ht_entry = _mesa_hash_table_search(lhs_ht, lhs->var);
    603    if (ht_entry) {
    604       exec_list *lhs_list = (exec_list *) ht_entry->data;
    605       lhs_list->push_tail(entry);
    606    } else {
    607       exec_list *lhs_list = new(mem_ctx) exec_list;
    608       lhs_list->push_tail(entry);
    609       _mesa_hash_table_insert(lhs_ht, lhs->var, lhs_list);
    610    }
    611 
    612    /* rhs hash, hash of rhs -> acp_entry pointers to lhs lists */
    613    ht_entry = _mesa_hash_table_search(rhs_ht, rhs->var);
    614    if (ht_entry) {
    615       exec_list *rhs_list = (exec_list *) ht_entry->data;
    616       rhs_list->push_tail(&entry->rhs_node);
    617    } else {
    618       exec_list *rhs_list = new(mem_ctx) exec_list;
    619       rhs_list->push_tail(&entry->rhs_node);
    620       _mesa_hash_table_insert(rhs_ht, rhs->var, rhs_list);
    621    }
    622 }
    623 
    624 bool
    625 do_copy_propagation_elements(exec_list *instructions)
    626 {
    627    ir_copy_propagation_elements_visitor v;
    628 
    629    visit_list_elements(&v, instructions);
    630 
    631    return v.progress;
    632 }
    633