Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Luca Barbieri
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_variable_index_to_cond_assign.cpp
     26  *
     27  * Turns non-constant indexing into array types to a series of
     28  * conditional moves of each element into a temporary.
     29  *
     30  * Pre-DX10 GPUs often don't have a native way to do this operation,
     31  * and this works around that.
     32  *
     33  * The lowering process proceeds as follows.  Each non-constant index
     34  * found in an r-value is converted to a canonical form \c array[i].  Each
     35  * element of the array is conditionally assigned to a temporary by comparing
     36  * \c i to a constant index.  This is done by cloning the canonical form and
     37  * replacing all occurances of \c i with a constant.  Each remaining occurance
     38  * of the canonical form in the IR is replaced with a dereference of the
     39  * temporary variable.
     40  *
     41  * L-values with non-constant indices are handled similarly.  In this case,
     42  * the RHS of the assignment is assigned to a temporary.  The non-constant
     43  * index is replace with the canonical form (just like for r-values).  The
     44  * temporary is conditionally assigned to each element of the canonical form
     45  * by comparing \c i with each index.  The same clone-and-replace scheme is
     46  * used.
     47  */
     48 
     49 #include "ir.h"
     50 #include "ir_rvalue_visitor.h"
     51 #include "ir_optimization.h"
     52 #include "compiler/glsl_types.h"
     53 #include "main/macros.h"
     54 
     55 /**
     56  * Generate a comparison value for a block of indices
     57  *
     58  * Lowering passes for non-constant indexing of arrays, matrices, or vectors
     59  * can use this to generate blocks of index comparison values.
     60  *
     61  * \param instructions  List where new instructions will be appended
     62  * \param index         \c ir_variable containing the desired index
     63  * \param base          Base value for this block of comparisons
     64  * \param components    Number of unique index values to compare.  This must
     65  *                      be on the range [1, 4].
     66  * \param mem_ctx       ralloc memory context to be used for all allocations.
     67  *
     68  * \returns
     69  * An \c ir_rvalue that \b must be cloned for each use in conditional
     70  * assignments, etc.
     71  */
     72 ir_rvalue *
     73 compare_index_block(exec_list *instructions, ir_variable *index,
     74 		    unsigned base, unsigned components, void *mem_ctx)
     75 {
     76    ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index);
     77 
     78    assert(index->type->is_scalar());
     79    assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT);
     80    assert(components >= 1 && components <= 4);
     81 
     82    if (components > 1) {
     83       const ir_swizzle_mask m = { 0, 0, 0, 0, components, false };
     84       broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m);
     85    }
     86 
     87    /* Compare the desired index value with the next block of four indices.
     88     */
     89    ir_constant_data test_indices_data;
     90    memset(&test_indices_data, 0, sizeof(test_indices_data));
     91    test_indices_data.i[0] = base;
     92    test_indices_data.i[1] = base + 1;
     93    test_indices_data.i[2] = base + 2;
     94    test_indices_data.i[3] = base + 3;
     95 
     96    ir_constant *const test_indices =
     97       new(mem_ctx) ir_constant(broadcast_index->type,
     98 			       &test_indices_data);
     99 
    100    ir_rvalue *const condition_val =
    101       new(mem_ctx) ir_expression(ir_binop_equal,
    102 				 glsl_type::bvec(components),
    103 				 broadcast_index,
    104 				 test_indices);
    105 
    106    ir_variable *const condition =
    107       new(mem_ctx) ir_variable(condition_val->type,
    108 			       "dereference_condition",
    109 			       ir_var_temporary);
    110    instructions->push_tail(condition);
    111 
    112    ir_rvalue *const cond_deref =
    113       new(mem_ctx) ir_dereference_variable(condition);
    114    instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0));
    115 
    116    return cond_deref;
    117 }
    118 
    119 static inline bool
    120 is_array_or_matrix(const ir_rvalue *ir)
    121 {
    122    return (ir->type->is_array() || ir->type->is_matrix());
    123 }
    124 
    125 namespace {
    126 /**
    127  * Replace a dereference of a variable with a specified r-value
    128  *
    129  * Each time a dereference of the specified value is replaced, the r-value
    130  * tree is cloned.
    131  */
    132 class deref_replacer : public ir_rvalue_visitor {
    133 public:
    134    deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value)
    135       : variable_to_replace(variable_to_replace), value(value),
    136 	progress(false)
    137    {
    138       assert(this->variable_to_replace != NULL);
    139       assert(this->value != NULL);
    140    }
    141 
    142    virtual void handle_rvalue(ir_rvalue **rvalue)
    143    {
    144       ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable();
    145 
    146       if ((dv != NULL) && (dv->var == this->variable_to_replace)) {
    147 	 this->progress = true;
    148 	 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL);
    149       }
    150    }
    151 
    152    const ir_variable *variable_to_replace;
    153    ir_rvalue *value;
    154    bool progress;
    155 };
    156 
    157 /**
    158  * Find a variable index dereference of an array in an rvalue tree
    159  */
    160 class find_variable_index : public ir_hierarchical_visitor {
    161 public:
    162    find_variable_index()
    163       : deref(NULL)
    164    {
    165       /* empty */
    166    }
    167 
    168    virtual ir_visitor_status visit_enter(ir_dereference_array *ir)
    169    {
    170       if (is_array_or_matrix(ir->array)
    171 	  && (ir->array_index->as_constant() == NULL)) {
    172 	 this->deref = ir;
    173 	 return visit_stop;
    174       }
    175 
    176       return visit_continue;
    177    }
    178 
    179    /**
    180     * First array dereference found in the tree that has a non-constant index.
    181     */
    182    ir_dereference_array *deref;
    183 };
    184 
    185 struct assignment_generator
    186 {
    187    ir_instruction* base_ir;
    188    ir_dereference *rvalue;
    189    ir_variable *old_index;
    190    bool is_write;
    191    unsigned int write_mask;
    192    ir_variable* var;
    193 
    194    assignment_generator()
    195       : base_ir(NULL),
    196         rvalue(NULL),
    197         old_index(NULL),
    198         is_write(false),
    199         write_mask(0),
    200         var(NULL)
    201    {
    202    }
    203 
    204    void generate(unsigned i, ir_rvalue* condition, exec_list *list) const
    205    {
    206       /* Just clone the rest of the deref chain when trying to get at the
    207        * underlying variable.
    208        */
    209       void *mem_ctx = ralloc_parent(base_ir);
    210 
    211       /* Clone the old r-value in its entirety.  Then replace any occurances of
    212        * the old variable index with the new constant index.
    213        */
    214       ir_dereference *element = this->rvalue->clone(mem_ctx, NULL);
    215       ir_constant *const index = new(mem_ctx) ir_constant(i);
    216       deref_replacer r(this->old_index, index);
    217       element->accept(&r);
    218       assert(r.progress);
    219 
    220       /* Generate a conditional assignment to (or from) the constant indexed
    221        * array dereference.
    222        */
    223       ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
    224       ir_assignment *const assignment = (is_write)
    225 	 ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask)
    226 	 : new(mem_ctx) ir_assignment(variable, element, condition);
    227 
    228       list->push_tail(assignment);
    229    }
    230 };
    231 
    232 struct switch_generator
    233 {
    234    /* make TFunction a template parameter if you need to use other generators */
    235    typedef assignment_generator TFunction;
    236    const TFunction& generator;
    237 
    238    ir_variable* index;
    239    unsigned linear_sequence_max_length;
    240    unsigned condition_components;
    241 
    242    void *mem_ctx;
    243 
    244    switch_generator(const TFunction& generator, ir_variable *index,
    245 		    unsigned linear_sequence_max_length,
    246 		    unsigned condition_components)
    247       : generator(generator), index(index),
    248 	linear_sequence_max_length(linear_sequence_max_length),
    249 	condition_components(condition_components)
    250    {
    251       this->mem_ctx = ralloc_parent(index);
    252    }
    253 
    254    void linear_sequence(unsigned begin, unsigned end, exec_list *list)
    255    {
    256       if (begin == end)
    257          return;
    258 
    259       /* If the array access is a read, read the first element of this subregion
    260        * unconditionally.  The remaining tests will possibly overwrite this
    261        * value with one of the other array elements.
    262        *
    263        * This optimization cannot be done for writes because it will cause the
    264        * first element of the subregion to be written possibly *in addition* to
    265        * one of the other elements.
    266        */
    267       unsigned first;
    268       if (!this->generator.is_write) {
    269 	 this->generator.generate(begin, 0, list);
    270 	 first = begin + 1;
    271       } else {
    272 	 first = begin;
    273       }
    274 
    275       for (unsigned i = first; i < end; i += 4) {
    276          const unsigned comps = MIN2(condition_components, end - i);
    277 
    278 	 ir_rvalue *const cond_deref =
    279 	    compare_index_block(list, index, i, comps, this->mem_ctx);
    280 
    281          if (comps == 1) {
    282             this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL),
    283 				     list);
    284          } else {
    285             for (unsigned j = 0; j < comps; j++) {
    286 	       ir_rvalue *const cond_swiz =
    287 		  new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL),
    288 						j, 0, 0, 0, 1);
    289 
    290                this->generator.generate(i + j, cond_swiz, list);
    291             }
    292          }
    293       }
    294    }
    295 
    296    void bisect(unsigned begin, unsigned end, exec_list *list)
    297    {
    298       unsigned middle = (begin + end) >> 1;
    299 
    300       assert(index->type->is_integer());
    301 
    302       ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT)
    303 	 ? new(this->mem_ctx) ir_constant((unsigned)middle)
    304          : new(this->mem_ctx) ir_constant((int)middle);
    305 
    306 
    307       ir_dereference_variable *deref =
    308 	 new(this->mem_ctx) ir_dereference_variable(this->index);
    309 
    310       ir_expression *less =
    311 	 new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type,
    312 					  deref, middle_c);
    313 
    314       ir_if *if_less = new(this->mem_ctx) ir_if(less);
    315 
    316       generate(begin, middle, &if_less->then_instructions);
    317       generate(middle, end, &if_less->else_instructions);
    318 
    319       list->push_tail(if_less);
    320    }
    321 
    322    void generate(unsigned begin, unsigned end, exec_list *list)
    323    {
    324       unsigned length = end - begin;
    325       if (length <= this->linear_sequence_max_length)
    326          return linear_sequence(begin, end, list);
    327       else
    328          return bisect(begin, end, list);
    329    }
    330 };
    331 
    332 /**
    333  * Visitor class for replacing expressions with ir_constant values.
    334  */
    335 
    336 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
    337 public:
    338    variable_index_to_cond_assign_visitor(gl_shader_stage stage,
    339                                          bool lower_input,
    340                                          bool lower_output,
    341                                          bool lower_temp,
    342                                          bool lower_uniform)
    343    {
    344       this->progress = false;
    345       this->stage = stage;
    346       this->lower_inputs = lower_input;
    347       this->lower_outputs = lower_output;
    348       this->lower_temps = lower_temp;
    349       this->lower_uniforms = lower_uniform;
    350    }
    351 
    352    bool progress;
    353 
    354    gl_shader_stage stage;
    355    bool lower_inputs;
    356    bool lower_outputs;
    357    bool lower_temps;
    358    bool lower_uniforms;
    359 
    360    bool storage_type_needs_lowering(ir_dereference_array *deref) const
    361    {
    362       /* If a variable isn't eventually the target of this dereference, then
    363        * it must be a constant or some sort of anonymous temporary storage.
    364        *
    365        * FINISHME: Is this correct?  Most drivers treat arrays of constants as
    366        * FINISHME: uniforms.  It seems like this should do the same.
    367        */
    368       const ir_variable *const var = deref->array->variable_referenced();
    369       if (var == NULL)
    370 	 return this->lower_temps;
    371 
    372       switch (var->data.mode) {
    373       case ir_var_auto:
    374       case ir_var_temporary:
    375 	 return this->lower_temps;
    376 
    377       case ir_var_uniform:
    378       case ir_var_shader_storage:
    379 	 return this->lower_uniforms;
    380 
    381       case ir_var_shader_shared:
    382 	 return false;
    383 
    384       case ir_var_function_in:
    385       case ir_var_const_in:
    386          return this->lower_temps;
    387 
    388       case ir_var_system_value:
    389          /* There are only a few system values that have array types:
    390           *
    391           *    gl_TessLevelInner[]
    392           *    gl_TessLevelOuter[]
    393           *    gl_SampleMaskIn[]
    394           *
    395           * The tessellation factor arrays are lowered to vec4/vec2s
    396           * by lower_tess_level() before this pass occurs, so we'll
    397           * never see them here.
    398           *
    399           * The only remaining case is gl_SampleMaskIn[], which has
    400           * a length of ceil(ctx->Const.MaxSamples / 32).  Most hardware
    401           * supports no more than 32 samples, in which case our lowering
    402           * produces a single read of gl_SampleMaskIn[0].  Even with 64x
    403           * MSAA, the array length is only 2, so the lowering is fairly
    404           * efficient.  Therefore, lower unconditionally.
    405           */
    406          return true;
    407 
    408       case ir_var_shader_in:
    409          /* The input array size is unknown at compiler time for non-patch
    410           * inputs in TCS and TES. The arrays are sized to
    411           * the implementation-dependent limit "gl_MaxPatchVertices", but
    412           * the real size is stored in the "gl_PatchVerticesIn" built-in
    413           * uniform.
    414           *
    415           * The TCS input array size is specified by
    416           * glPatchParameteri(GL_PATCH_VERTICES).
    417           *
    418           * The TES input array size is specified by the "vertices" output
    419           * layout qualifier in TCS.
    420           */
    421          if ((stage == MESA_SHADER_TESS_CTRL ||
    422               stage == MESA_SHADER_TESS_EVAL) && !var->data.patch)
    423             return false;
    424          return this->lower_inputs;
    425 
    426       case ir_var_function_out:
    427          /* TCS non-patch outputs can only be indexed with "gl_InvocationID".
    428           * Other expressions are not allowed.
    429           */
    430          if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch)
    431             return false;
    432          return this->lower_temps;
    433 
    434       case ir_var_shader_out:
    435          return this->lower_outputs;
    436 
    437       case ir_var_function_inout:
    438 	 return this->lower_temps;
    439       }
    440 
    441       assert(!"Should not get here.");
    442       return false;
    443    }
    444 
    445    bool needs_lowering(ir_dereference_array *deref) const
    446    {
    447       if (deref == NULL || deref->array_index->as_constant()
    448 	  || !is_array_or_matrix(deref->array))
    449 	 return false;
    450 
    451       return this->storage_type_needs_lowering(deref);
    452    }
    453 
    454    ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
    455 					  ir_assignment* orig_assign,
    456 					  ir_dereference *orig_base)
    457    {
    458       assert(is_array_or_matrix(orig_deref->array));
    459 
    460       const unsigned length = (orig_deref->array->type->is_array())
    461          ? orig_deref->array->type->length
    462          : orig_deref->array->type->matrix_columns;
    463 
    464       void *const mem_ctx = ralloc_parent(base_ir);
    465 
    466       /* Temporary storage for either the result of the dereference of
    467        * the array, or the RHS that's being assigned into the
    468        * dereference of the array.
    469        */
    470       ir_variable *var;
    471 
    472       if (orig_assign) {
    473 	 var = new(mem_ctx) ir_variable(orig_assign->rhs->type,
    474 					"dereference_array_value",
    475 					ir_var_temporary);
    476 	 base_ir->insert_before(var);
    477 
    478 	 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var);
    479 	 ir_assignment *assign = new(mem_ctx) ir_assignment(lhs,
    480 							    orig_assign->rhs,
    481 							    NULL);
    482 
    483          base_ir->insert_before(assign);
    484       } else {
    485 	 var = new(mem_ctx) ir_variable(orig_deref->type,
    486 					"dereference_array_value",
    487 					ir_var_temporary);
    488 	 base_ir->insert_before(var);
    489       }
    490 
    491       /* Store the index to a temporary to avoid reusing its tree. */
    492       ir_variable *index =
    493 	 new(mem_ctx) ir_variable(orig_deref->array_index->type,
    494 				  "dereference_array_index", ir_var_temporary);
    495       base_ir->insert_before(index);
    496 
    497       ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index);
    498       ir_assignment *assign =
    499 	 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
    500       base_ir->insert_before(assign);
    501 
    502       orig_deref->array_index = lhs->clone(mem_ctx, NULL);
    503 
    504       assignment_generator ag;
    505       ag.rvalue = orig_base;
    506       ag.base_ir = base_ir;
    507       ag.old_index = index;
    508       ag.var = var;
    509       if (orig_assign) {
    510 	 ag.is_write = true;
    511 	 ag.write_mask = orig_assign->write_mask;
    512       } else {
    513 	 ag.is_write = false;
    514       }
    515 
    516       switch_generator sg(ag, index, 4, 4);
    517 
    518       /* If the original assignment has a condition, respect that original
    519        * condition!  This is acomplished by wrapping the new conditional
    520        * assignments in an if-statement that uses the original condition.
    521        */
    522       if ((orig_assign != NULL) && (orig_assign->condition != NULL)) {
    523 	 /* No need to clone the condition because the IR that it hangs on is
    524 	  * going to be removed from the instruction sequence.
    525 	  */
    526 	 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition);
    527 
    528 	 sg.generate(0, length, &if_stmt->then_instructions);
    529 	 base_ir->insert_before(if_stmt);
    530       } else {
    531 	 exec_list list;
    532 
    533 	 sg.generate(0, length, &list);
    534 	 base_ir->insert_before(&list);
    535       }
    536 
    537       return var;
    538    }
    539 
    540    virtual void handle_rvalue(ir_rvalue **pir)
    541    {
    542       if (this->in_assignee)
    543 	 return;
    544 
    545       if (!*pir)
    546          return;
    547 
    548       ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
    549       if (needs_lowering(orig_deref)) {
    550          ir_variable *var =
    551 	    convert_dereference_array(orig_deref, NULL, orig_deref);
    552          assert(var);
    553          *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var);
    554          this->progress = true;
    555       }
    556    }
    557 
    558    ir_visitor_status
    559    visit_leave(ir_assignment *ir)
    560    {
    561       ir_rvalue_visitor::visit_leave(ir);
    562 
    563       find_variable_index f;
    564       ir->lhs->accept(&f);
    565 
    566       if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) {
    567          convert_dereference_array(f.deref, ir, ir->lhs);
    568          ir->remove();
    569          this->progress = true;
    570       }
    571 
    572       return visit_continue;
    573    }
    574 };
    575 
    576 } /* anonymous namespace */
    577 
    578 bool
    579 lower_variable_index_to_cond_assign(gl_shader_stage stage,
    580                                     exec_list *instructions,
    581                                     bool lower_input,
    582                                     bool lower_output,
    583                                     bool lower_temp,
    584                                     bool lower_uniform)
    585 {
    586    variable_index_to_cond_assign_visitor v(stage,
    587                                            lower_input,
    588                                            lower_output,
    589                                            lower_temp,
    590                                            lower_uniform);
    591 
    592    /* Continue lowering until no progress is made.  If there are multiple
    593     * levels of indirection (e.g., non-constant indexing of array elements and
    594     * matrix columns of an array of matrix), each pass will only lower one
    595     * level of indirection.
    596     */
    597    bool progress_ever = false;
    598    do {
    599       v.progress = false;
    600       visit_list_elements(&v, instructions);
    601       progress_ever = v.progress || progress_ever;
    602    } while (v.progress);
    603 
    604    return progress_ever;
    605 }
    606