Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Intel Corporation
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file opt_array_splitting.cpp
     26  *
     27  * If an array is always dereferenced with a constant index, then
     28  * split it apart into its elements, making it more amenable to other
     29  * optimization passes.
     30  *
     31  * This skips uniform/varying arrays, which would need careful
     32  * handling due to their ir->location fields tying them to the GL API
     33  * and other shader stages.
     34  */
     35 
     36 #include "ir.h"
     37 #include "ir_visitor.h"
     38 #include "ir_rvalue_visitor.h"
     39 #include "compiler/glsl_types.h"
     40 
     41 static bool debug = false;
     42 
     43 namespace {
     44 
     45 namespace opt_array_splitting {
     46 
     47 class variable_entry : public exec_node
     48 {
     49 public:
     50    variable_entry(ir_variable *var)
     51    {
     52       this->var = var;
     53       this->split = true;
     54       this->declaration = false;
     55       this->components = NULL;
     56       this->mem_ctx = NULL;
     57       if (var->type->is_array())
     58          this->size = var->type->length;
     59       else
     60          this->size = var->type->matrix_columns;
     61    }
     62 
     63    ir_variable *var; /* The key: the variable's pointer. */
     64    unsigned size; /* array length or matrix columns */
     65 
     66    /** Whether this array should be split or not. */
     67    bool split;
     68 
     69    /* If the variable had a decl we can work with in the instruction
     70     * stream.  We can't do splitting on function arguments, which
     71     * don't get this variable set.
     72     */
     73    bool declaration;
     74 
     75    ir_variable **components;
     76 
     77    /** ralloc_parent(this->var) -- the shader's talloc context. */
     78    void *mem_ctx;
     79 };
     80 
     81 } /* namespace */
     82 
     83 using namespace opt_array_splitting;
     84 
     85 /**
     86  * This class does a walk over the tree, coming up with the set of
     87  * variables that could be split by looking to see if they are arrays
     88  * that are only ever constant-index dereferenced.
     89  */
     90 class ir_array_reference_visitor : public ir_hierarchical_visitor {
     91 public:
     92    ir_array_reference_visitor(void)
     93    {
     94       this->mem_ctx = ralloc_context(NULL);
     95       this->variable_list.make_empty();
     96       this->in_whole_array_copy = false;
     97    }
     98 
     99    ~ir_array_reference_visitor(void)
    100    {
    101       ralloc_free(mem_ctx);
    102    }
    103 
    104    bool get_split_list(exec_list *instructions, bool linked);
    105 
    106    virtual ir_visitor_status visit(ir_variable *);
    107    virtual ir_visitor_status visit(ir_dereference_variable *);
    108    virtual ir_visitor_status visit_enter(ir_assignment *);
    109    virtual ir_visitor_status visit_leave(ir_assignment *);
    110    virtual ir_visitor_status visit_enter(ir_dereference_array *);
    111    virtual ir_visitor_status visit_enter(ir_function_signature *);
    112 
    113    variable_entry *get_variable_entry(ir_variable *var);
    114 
    115    /* List of variable_entry */
    116    exec_list variable_list;
    117 
    118    void *mem_ctx;
    119 
    120    bool in_whole_array_copy;
    121 };
    122 
    123 } /* namespace */
    124 
    125 variable_entry *
    126 ir_array_reference_visitor::get_variable_entry(ir_variable *var)
    127 {
    128    assert(var);
    129 
    130    if (var->data.mode != ir_var_auto &&
    131        var->data.mode != ir_var_temporary)
    132       return NULL;
    133 
    134    if (!(var->type->is_array() || var->type->is_matrix()))
    135       return NULL;
    136 
    137    /* If the array hasn't been sized yet, we can't split it.  After
    138     * linking, this should be resolved.
    139     */
    140    if (var->type->is_unsized_array())
    141       return NULL;
    142 
    143    foreach_in_list(variable_entry, entry, &this->variable_list) {
    144       if (entry->var == var)
    145          return entry;
    146    }
    147 
    148    variable_entry *entry = new(mem_ctx) variable_entry(var);
    149    this->variable_list.push_tail(entry);
    150    return entry;
    151 }
    152 
    153 
    154 ir_visitor_status
    155 ir_array_reference_visitor::visit(ir_variable *ir)
    156 {
    157    variable_entry *entry = this->get_variable_entry(ir);
    158 
    159    if (entry)
    160       entry->declaration = true;
    161 
    162    return visit_continue;
    163 }
    164 
    165 ir_visitor_status
    166 ir_array_reference_visitor::visit_enter(ir_assignment *ir)
    167 {
    168    in_whole_array_copy =
    169       ir->lhs->type->is_array() && ir->whole_variable_written();
    170 
    171    return visit_continue;
    172 }
    173 
    174 ir_visitor_status
    175 ir_array_reference_visitor::visit_leave(ir_assignment *ir)
    176 {
    177    in_whole_array_copy = false;
    178 
    179    return visit_continue;
    180 }
    181 
    182 ir_visitor_status
    183 ir_array_reference_visitor::visit(ir_dereference_variable *ir)
    184 {
    185    variable_entry *entry = this->get_variable_entry(ir->var);
    186 
    187    /* Allow whole-array assignments on the LHS.  We can split those
    188     * by "unrolling" the assignment into component-wise assignments.
    189     */
    190    if (in_assignee && in_whole_array_copy)
    191       return visit_continue;
    192 
    193    /* If we made it to here without seeing an ir_dereference_array,
    194     * then the dereference of this array didn't have a constant index
    195     * (see the visit_continue_with_parent below), so we can't split
    196     * the variable.
    197     */
    198    if (entry)
    199       entry->split = false;
    200 
    201    return visit_continue;
    202 }
    203 
    204 ir_visitor_status
    205 ir_array_reference_visitor::visit_enter(ir_dereference_array *ir)
    206 {
    207    ir_dereference_variable *deref = ir->array->as_dereference_variable();
    208    if (!deref)
    209       return visit_continue;
    210 
    211    variable_entry *entry = this->get_variable_entry(deref->var);
    212 
    213    /* If the access to the array has a variable index, we wouldn't
    214     * know which split variable this dereference should go to.
    215     */
    216    if (!ir->array_index->as_constant()) {
    217       if (entry)
    218          entry->split = false;
    219       /* This variable indexing could come from a different array dereference
    220        * that also has variable indexing, that is, something like a[b[a[b[0]]]].
    221        * If we return visit_continue_with_parent here for the first appearence
    222        * of a, then we can miss that b also has indirect indexing (if this is
    223        * the only place in the program where such indirect indexing into b
    224        * happens), so keep going.
    225        */
    226       return visit_continue;
    227    }
    228 
    229    /* If the index is also array dereference, visit index. */
    230    if (ir->array_index->as_dereference_array())
    231       visit_enter(ir->array_index->as_dereference_array());
    232 
    233    return visit_continue_with_parent;
    234 }
    235 
    236 ir_visitor_status
    237 ir_array_reference_visitor::visit_enter(ir_function_signature *ir)
    238 {
    239    /* We don't have logic for array-splitting function arguments,
    240     * so just look at the body instructions and not the parameter
    241     * declarations.
    242     */
    243    visit_list_elements(this, &ir->body);
    244    return visit_continue_with_parent;
    245 }
    246 
    247 bool
    248 ir_array_reference_visitor::get_split_list(exec_list *instructions,
    249                                            bool linked)
    250 {
    251    visit_list_elements(this, instructions);
    252 
    253    /* If the shaders aren't linked yet, we can't mess with global
    254     * declarations, which need to be matched by name across shaders.
    255     */
    256    if (!linked) {
    257       foreach_in_list(ir_instruction, node, instructions) {
    258          ir_variable *var = node->as_variable();
    259          if (var) {
    260             variable_entry *entry = get_variable_entry(var);
    261             if (entry)
    262                entry->remove();
    263          }
    264       }
    265    }
    266 
    267    /* Trim out variables we found that we can't split. */
    268    foreach_in_list_safe(variable_entry, entry, &variable_list) {
    269       if (debug) {
    270          printf("array %s@%p: decl %d, split %d\n",
    271                 entry->var->name, (void *) entry->var, entry->declaration,
    272                 entry->split);
    273       }
    274 
    275       if (!(entry->declaration && entry->split)) {
    276          entry->remove();
    277       }
    278    }
    279 
    280    return !variable_list.is_empty();
    281 }
    282 
    283 /**
    284  * This class rewrites the dereferences of arrays that have been split
    285  * to use the newly created ir_variables for each component.
    286  */
    287 class ir_array_splitting_visitor : public ir_rvalue_visitor {
    288 public:
    289    ir_array_splitting_visitor(exec_list *vars)
    290    {
    291       this->variable_list = vars;
    292    }
    293 
    294    virtual ~ir_array_splitting_visitor()
    295    {
    296    }
    297 
    298    virtual ir_visitor_status visit_leave(ir_assignment *);
    299 
    300    void split_deref(ir_dereference **deref);
    301    void handle_rvalue(ir_rvalue **rvalue);
    302    variable_entry *get_splitting_entry(ir_variable *var);
    303 
    304    exec_list *variable_list;
    305 };
    306 
    307 variable_entry *
    308 ir_array_splitting_visitor::get_splitting_entry(ir_variable *var)
    309 {
    310    assert(var);
    311 
    312    foreach_in_list(variable_entry, entry, this->variable_list) {
    313       if (entry->var == var) {
    314          return entry;
    315       }
    316    }
    317 
    318    return NULL;
    319 }
    320 
    321 void
    322 ir_array_splitting_visitor::split_deref(ir_dereference **deref)
    323 {
    324    ir_dereference_array *deref_array = (*deref)->as_dereference_array();
    325    if (!deref_array)
    326       return;
    327 
    328    ir_dereference_variable *deref_var = deref_array->array->as_dereference_variable();
    329    if (!deref_var)
    330       return;
    331    ir_variable *var = deref_var->var;
    332 
    333    variable_entry *entry = get_splitting_entry(var);
    334    if (!entry)
    335       return;
    336 
    337    ir_constant *constant = deref_array->array_index->as_constant();
    338    assert(constant);
    339 
    340    if (constant->value.i[0] >= 0 && constant->value.i[0] < (int)entry->size) {
    341       *deref = new(entry->mem_ctx)
    342                ir_dereference_variable(entry->components[constant->value.i[0]]);
    343    } else {
    344       /* There was a constant array access beyond the end of the
    345        * array.  This might have happened due to constant folding
    346        * after the initial parse.  This produces an undefined value,
    347        * but shouldn't crash.  Just give them an uninitialized
    348        * variable.
    349        */
    350       ir_variable *temp = new(entry->mem_ctx) ir_variable(deref_array->type,
    351                                                           "undef",
    352                                                           ir_var_temporary);
    353       entry->components[0]->insert_before(temp);
    354       *deref = new(entry->mem_ctx) ir_dereference_variable(temp);
    355    }
    356 }
    357 
    358 void
    359 ir_array_splitting_visitor::handle_rvalue(ir_rvalue **rvalue)
    360 {
    361    if (!*rvalue)
    362       return;
    363 
    364    ir_dereference *deref = (*rvalue)->as_dereference();
    365 
    366    if (!deref)
    367       return;
    368 
    369    split_deref(&deref);
    370    *rvalue = deref;
    371 }
    372 
    373 ir_visitor_status
    374 ir_array_splitting_visitor::visit_leave(ir_assignment *ir)
    375 {
    376    /* The normal rvalue visitor skips the LHS of assignments, but we
    377     * need to process those just the same.
    378     */
    379    ir_rvalue *lhs = ir->lhs;
    380 
    381    /* "Unroll" any whole array assignments, creating assignments for
    382     * each array element.  Then, do splitting on each new assignment.
    383     */
    384    if (lhs->type->is_array() && ir->whole_variable_written() &&
    385        get_splitting_entry(ir->whole_variable_written())) {
    386       void *mem_ctx = ralloc_parent(ir);
    387 
    388       for (unsigned i = 0; i < lhs->type->length; i++) {
    389          ir_rvalue *lhs_i =
    390             new(mem_ctx) ir_dereference_array(ir->lhs->clone(mem_ctx, NULL),
    391                                               new(mem_ctx) ir_constant(i));
    392          ir_rvalue *rhs_i =
    393             new(mem_ctx) ir_dereference_array(ir->rhs->clone(mem_ctx, NULL),
    394                                               new(mem_ctx) ir_constant(i));
    395          ir_rvalue *condition_i =
    396             ir->condition ? ir->condition->clone(mem_ctx, NULL) : NULL;
    397 
    398          ir_assignment *assign_i =
    399             new(mem_ctx) ir_assignment(lhs_i, rhs_i, condition_i);
    400 
    401          ir->insert_before(assign_i);
    402          assign_i->accept(this);
    403       }
    404       ir->remove();
    405       return visit_continue;
    406    }
    407 
    408    handle_rvalue(&lhs);
    409    ir->lhs = lhs->as_dereference();
    410 
    411    ir->lhs->accept(this);
    412 
    413    handle_rvalue(&ir->rhs);
    414    ir->rhs->accept(this);
    415 
    416    if (ir->condition) {
    417       handle_rvalue(&ir->condition);
    418       ir->condition->accept(this);
    419    }
    420 
    421    return visit_continue;
    422 }
    423 
    424 bool
    425 optimize_split_arrays(exec_list *instructions, bool linked)
    426 {
    427    ir_array_reference_visitor refs;
    428    if (!refs.get_split_list(instructions, linked))
    429       return false;
    430 
    431    void *mem_ctx = ralloc_context(NULL);
    432 
    433    /* Replace the decls of the arrays to be split with their split
    434     * components.
    435     */
    436    foreach_in_list(variable_entry, entry, &refs.variable_list) {
    437       const struct glsl_type *type = entry->var->type;
    438       const struct glsl_type *subtype;
    439 
    440       if (type->is_matrix())
    441          subtype = type->column_type();
    442       else
    443          subtype = type->fields.array;
    444 
    445       entry->mem_ctx = ralloc_parent(entry->var);
    446 
    447       entry->components = ralloc_array(mem_ctx, ir_variable *, entry->size);
    448 
    449       for (unsigned int i = 0; i < entry->size; i++) {
    450          const char *name = ralloc_asprintf(mem_ctx, "%s_%d",
    451                                             entry->var->name, i);
    452 
    453          entry->components[i] =
    454             new(entry->mem_ctx) ir_variable(subtype, name, ir_var_temporary);
    455          entry->var->insert_before(entry->components[i]);
    456       }
    457 
    458       entry->var->remove();
    459    }
    460 
    461    ir_array_splitting_visitor split(&refs.variable_list);
    462    visit_list_elements(&split, instructions);
    463 
    464    if (debug)
    465       _mesa_print_ir(stdout, instructions, NULL);
    466 
    467    ralloc_free(mem_ctx);
    468 
    469    return true;
    470 
    471 }
    472