Home | History | Annotate | Download | only in glsl
      1 /*
      2  * Copyright  2010 Luca Barbieri
      3  *
      4  * Permission is hereby granted, free of charge, to any person obtaining a
      5  * copy of this software and associated documentation files (the "Software"),
      6  * to deal in the Software without restriction, including without limitation
      7  * the rights to use, copy, modify, merge, publish, distribute, sublicense,
      8  * and/or sell copies of the Software, and to permit persons to whom the
      9  * Software is furnished to do so, subject to the following conditions:
     10  *
     11  * The above copyright notice and this permission notice (including the next
     12  * paragraph) shall be included in all copies or substantial portions of the
     13  * Software.
     14  *
     15  * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
     16  * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
     17  * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.  IN NO EVENT SHALL
     18  * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
     19  * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
     20  * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
     21  * DEALINGS IN THE SOFTWARE.
     22  */
     23 
     24 /**
     25  * \file lower_variable_index_to_cond_assign.cpp
     26  *
     27  * Turns non-constant indexing into array types to a series of
     28  * conditional moves of each element into a temporary.
     29  *
     30  * Pre-DX10 GPUs often don't have a native way to do this operation,
     31  * and this works around that.
     32  */
     33 
     34 #include "ir.h"
     35 #include "ir_rvalue_visitor.h"
     36 #include "ir_optimization.h"
     37 #include "glsl_types.h"
     38 #include "main/macros.h"
     39 
     40 struct assignment_generator
     41 {
     42    ir_instruction* base_ir;
     43    ir_rvalue* array;
     44    bool is_write;
     45    ir_variable* var;
     46 
     47    assignment_generator()
     48    {
     49    }
     50 
     51    void generate(unsigned i, ir_rvalue* condition, exec_list *list) const
     52    {
     53       /* Just clone the rest of the deref chain when trying to get at the
     54        * underlying variable.
     55        */
     56       void *mem_ctx = hieralloc_parent(base_ir);
     57       ir_rvalue *element =
     58 	 new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL),
     59 					   new(mem_ctx) ir_constant(i));
     60       ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var);
     61 
     62       ir_assignment *assignment = (is_write)
     63 	 ? new(mem_ctx) ir_assignment(element, variable, condition)
     64 	 : new(mem_ctx) ir_assignment(variable, element, condition);
     65 
     66       list->push_tail(assignment);
     67    }
     68 };
     69 
     70 struct switch_generator
     71 {
     72    /* make TFunction a template parameter if you need to use other generators */
     73    typedef assignment_generator TFunction;
     74    const TFunction& generator;
     75 
     76    ir_variable* index;
     77    unsigned linear_sequence_max_length;
     78    unsigned condition_components;
     79 
     80    void *mem_ctx;
     81 
     82    switch_generator(const TFunction& generator, ir_variable *index,
     83 		    unsigned linear_sequence_max_length,
     84 		    unsigned condition_components)
     85       : generator(generator), index(index),
     86 	linear_sequence_max_length(linear_sequence_max_length),
     87 	condition_components(condition_components)
     88    {
     89       this->mem_ctx = hieralloc_parent(index);
     90    }
     91 
     92    void linear_sequence(unsigned begin, unsigned end, exec_list *list)
     93    {
     94       if (begin == end)
     95          return;
     96 
     97       /* If the array access is a read, read the first element of this subregion
     98        * unconditionally.  The remaining tests will possibly overwrite this
     99        * value with one of the other array elements.
    100        *
    101        * This optimization cannot be done for writes because it will cause the
    102        * first element of the subregion to be written possibly *in addition* to
    103        * one of the other elements.
    104        */
    105       unsigned first;
    106       if (!this->generator.is_write) {
    107 	 this->generator.generate(begin, 0, list);
    108 	 first = begin + 1;
    109       } else {
    110 	 first = begin;
    111       }
    112 
    113       for (unsigned i = first; i < end; i += 4) {
    114          const unsigned comps = MIN2(condition_components, end - i);
    115 
    116          ir_rvalue *broadcast_index =
    117 	    new(this->mem_ctx) ir_dereference_variable(index);
    118 
    119          if (comps) {
    120 	    const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false };
    121 	    broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m);
    122 	 }
    123 
    124 	 /* Compare the desired index value with the next block of four indices.
    125 	  */
    126          ir_constant_data test_indices_data;
    127          memset(&test_indices_data, 0, sizeof(test_indices_data));
    128          test_indices_data.i[0] = i;
    129          test_indices_data.i[1] = i + 1;
    130          test_indices_data.i[2] = i + 2;
    131          test_indices_data.i[3] = i + 3;
    132          ir_constant *const test_indices =
    133 	    new(this->mem_ctx) ir_constant(broadcast_index->type,
    134 					   &test_indices_data);
    135 
    136          ir_rvalue *const condition_val =
    137 	    new(this->mem_ctx) ir_expression(ir_binop_equal,
    138 					     &glsl_type::bool_type[comps - 1],
    139 					     broadcast_index,
    140 					     test_indices);
    141 
    142          ir_variable *const condition =
    143 	    new(this->mem_ctx) ir_variable(condition_val->type,
    144 					   "dereference_array_condition",
    145 					   ir_var_temporary);
    146          list->push_tail(condition);
    147 
    148 	 ir_rvalue *const cond_deref =
    149 	    new(this->mem_ctx) ir_dereference_variable(condition);
    150          list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref,
    151 							  condition_val, 0));
    152 
    153          if (comps == 1) {
    154 	    ir_rvalue *const cond_deref =
    155 	       new(this->mem_ctx) ir_dereference_variable(condition);
    156 
    157             this->generator.generate(i, cond_deref, list);
    158          } else {
    159             for (unsigned j = 0; j < comps; j++) {
    160 	       ir_rvalue *const cond_deref =
    161 		  new(this->mem_ctx) ir_dereference_variable(condition);
    162 	       ir_rvalue *const cond_swiz =
    163 		  new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1);
    164 
    165                this->generator.generate(i + j, cond_swiz, list);
    166             }
    167          }
    168       }
    169    }
    170 
    171    void bisect(unsigned begin, unsigned end, exec_list *list)
    172    {
    173       unsigned middle = (begin + end) >> 1;
    174 
    175       assert(index->type->is_integer());
    176 
    177       ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT)
    178 	 ? new(this->mem_ctx) ir_constant((unsigned)middle)
    179          : new(this->mem_ctx) ir_constant((int)middle);
    180 
    181 
    182       ir_dereference_variable *deref =
    183 	 new(this->mem_ctx) ir_dereference_variable(this->index);
    184 
    185       ir_expression *less =
    186 	 new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type,
    187 					  deref, middle_c);
    188 
    189       ir_if *if_less = new(this->mem_ctx) ir_if(less);
    190 
    191       generate(begin, middle, &if_less->then_instructions);
    192       generate(middle, end, &if_less->else_instructions);
    193 
    194       list->push_tail(if_less);
    195    }
    196 
    197    void generate(unsigned begin, unsigned end, exec_list *list)
    198    {
    199       unsigned length = end - begin;
    200       if (length <= this->linear_sequence_max_length)
    201          return linear_sequence(begin, end, list);
    202       else
    203          return bisect(begin, end, list);
    204    }
    205 };
    206 
    207 /**
    208  * Visitor class for replacing expressions with ir_constant values.
    209  */
    210 
    211 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor {
    212 public:
    213    variable_index_to_cond_assign_visitor(bool lower_input,
    214 					 bool lower_output,
    215 					 bool lower_temp,
    216 					 bool lower_uniform)
    217    {
    218       this->progress = false;
    219       this->lower_inputs = lower_input;
    220       this->lower_outputs = lower_output;
    221       this->lower_temps = lower_temp;
    222       this->lower_uniforms = lower_uniform;
    223    }
    224 
    225    bool progress;
    226    bool lower_inputs;
    227    bool lower_outputs;
    228    bool lower_temps;
    229    bool lower_uniforms;
    230 
    231    bool is_array_or_matrix(const ir_instruction *ir) const
    232    {
    233       return (ir->type->is_array() || ir->type->is_matrix());
    234    }
    235 
    236    bool needs_lowering(ir_dereference_array *deref) const
    237    {
    238       if (deref == NULL || deref->array_index->as_constant()
    239 	  || !is_array_or_matrix(deref->array))
    240 	 return false;
    241 
    242       if (deref->array->ir_type == ir_type_constant)
    243 	 return this->lower_temps;
    244 
    245       const ir_variable *const var = deref->array->variable_referenced();
    246       switch (var->mode) {
    247       case ir_var_auto:
    248       case ir_var_temporary:
    249 	 return this->lower_temps;
    250       case ir_var_uniform:
    251 	 return this->lower_uniforms;
    252       case ir_var_in:
    253 	 return (var->location == -1) ? this->lower_temps : this->lower_inputs;
    254       case ir_var_out:
    255 	 return (var->location == -1) ? this->lower_temps : this->lower_outputs;
    256       case ir_var_inout:
    257 	 return this->lower_temps;
    258       }
    259 
    260       assert(!"Should not get here.");
    261       return false;
    262    }
    263 
    264    ir_variable *convert_dereference_array(ir_dereference_array *orig_deref,
    265 					  ir_rvalue* value)
    266    {
    267       assert(is_array_or_matrix(orig_deref->array));
    268 
    269       const unsigned length = (orig_deref->array->type->is_array())
    270          ? orig_deref->array->type->length
    271          : orig_deref->array->type->matrix_columns;
    272 
    273       void *const mem_ctx = hieralloc_parent(base_ir);
    274       ir_variable *var =
    275 	 new(mem_ctx) ir_variable(orig_deref->type, "dereference_array_value",
    276 				  ir_var_temporary);
    277       base_ir->insert_before(var);
    278 
    279       if (value) {
    280 	 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var);
    281 	 ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, value, NULL);
    282 
    283          base_ir->insert_before(assign);
    284       }
    285 
    286       /* Store the index to a temporary to avoid reusing its tree. */
    287       ir_variable *index =
    288 	 new(mem_ctx) ir_variable(orig_deref->array_index->type,
    289 				  "dereference_array_index", ir_var_temporary);
    290       base_ir->insert_before(index);
    291 
    292       ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index);
    293       ir_assignment *assign =
    294 	 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL);
    295       base_ir->insert_before(assign);
    296 
    297       assignment_generator ag;
    298       ag.array = orig_deref->array;
    299       ag.base_ir = base_ir;
    300       ag.var = var;
    301       ag.is_write = !!value;
    302 
    303       switch_generator sg(ag, index, 4, 4);
    304 
    305       exec_list list;
    306       sg.generate(0, length, &list);
    307       base_ir->insert_before(&list);
    308 
    309       return var;
    310    }
    311 
    312    virtual void handle_rvalue(ir_rvalue **pir)
    313    {
    314       if (!*pir)
    315          return;
    316 
    317       ir_dereference_array* orig_deref = (*pir)->as_dereference_array();
    318       if (needs_lowering(orig_deref)) {
    319          ir_variable* var = convert_dereference_array(orig_deref, 0);
    320          assert(var);
    321          *pir = new(hieralloc_parent(base_ir)) ir_dereference_variable(var);
    322          this->progress = true;
    323       }
    324    }
    325 
    326    ir_visitor_status
    327    visit_leave(ir_assignment *ir)
    328    {
    329       ir_rvalue_visitor::visit_leave(ir);
    330 
    331       ir_dereference_array *orig_deref = ir->lhs->as_dereference_array();
    332 
    333       if (needs_lowering(orig_deref)) {
    334          convert_dereference_array(orig_deref, ir->rhs);
    335          ir->remove();
    336          this->progress = true;
    337       }
    338 
    339       return visit_continue;
    340    }
    341 };
    342 
    343 bool
    344 lower_variable_index_to_cond_assign(exec_list *instructions,
    345 				    bool lower_input,
    346 				    bool lower_output,
    347 				    bool lower_temp,
    348 				    bool lower_uniform)
    349 {
    350    variable_index_to_cond_assign_visitor v(lower_input,
    351 					   lower_output,
    352 					   lower_temp,
    353 					   lower_uniform);
    354 
    355    visit_list_elements(&v, instructions);
    356 
    357    return v.progress;
    358 }
    359