1 /* 2 * Copyright 2010 Luca Barbieri 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_variable_index_to_cond_assign.cpp 26 * 27 * Turns non-constant indexing into array types to a series of 28 * conditional moves of each element into a temporary. 29 * 30 * Pre-DX10 GPUs often don't have a native way to do this operation, 31 * and this works around that. 32 */ 33 34 #include "ir.h" 35 #include "ir_rvalue_visitor.h" 36 #include "ir_optimization.h" 37 #include "glsl_types.h" 38 #include "main/macros.h" 39 40 struct assignment_generator 41 { 42 ir_instruction* base_ir; 43 ir_rvalue* array; 44 bool is_write; 45 ir_variable* var; 46 47 assignment_generator() 48 { 49 } 50 51 void generate(unsigned i, ir_rvalue* condition, exec_list *list) const 52 { 53 /* Just clone the rest of the deref chain when trying to get at the 54 * underlying variable. 55 */ 56 void *mem_ctx = hieralloc_parent(base_ir); 57 ir_rvalue *element = 58 new(mem_ctx) ir_dereference_array(this->array->clone(mem_ctx, NULL), 59 new(mem_ctx) ir_constant(i)); 60 ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); 61 62 ir_assignment *assignment = (is_write) 63 ? new(mem_ctx) ir_assignment(element, variable, condition) 64 : new(mem_ctx) ir_assignment(variable, element, condition); 65 66 list->push_tail(assignment); 67 } 68 }; 69 70 struct switch_generator 71 { 72 /* make TFunction a template parameter if you need to use other generators */ 73 typedef assignment_generator TFunction; 74 const TFunction& generator; 75 76 ir_variable* index; 77 unsigned linear_sequence_max_length; 78 unsigned condition_components; 79 80 void *mem_ctx; 81 82 switch_generator(const TFunction& generator, ir_variable *index, 83 unsigned linear_sequence_max_length, 84 unsigned condition_components) 85 : generator(generator), index(index), 86 linear_sequence_max_length(linear_sequence_max_length), 87 condition_components(condition_components) 88 { 89 this->mem_ctx = hieralloc_parent(index); 90 } 91 92 void linear_sequence(unsigned begin, unsigned end, exec_list *list) 93 { 94 if (begin == end) 95 return; 96 97 /* If the array access is a read, read the first element of this subregion 98 * unconditionally. The remaining tests will possibly overwrite this 99 * value with one of the other array elements. 100 * 101 * This optimization cannot be done for writes because it will cause the 102 * first element of the subregion to be written possibly *in addition* to 103 * one of the other elements. 104 */ 105 unsigned first; 106 if (!this->generator.is_write) { 107 this->generator.generate(begin, 0, list); 108 first = begin + 1; 109 } else { 110 first = begin; 111 } 112 113 for (unsigned i = first; i < end; i += 4) { 114 const unsigned comps = MIN2(condition_components, end - i); 115 116 ir_rvalue *broadcast_index = 117 new(this->mem_ctx) ir_dereference_variable(index); 118 119 if (comps) { 120 const ir_swizzle_mask m = { 0, 0, 0, 0, comps, false }; 121 broadcast_index = new(this->mem_ctx) ir_swizzle(broadcast_index, m); 122 } 123 124 /* Compare the desired index value with the next block of four indices. 125 */ 126 ir_constant_data test_indices_data; 127 memset(&test_indices_data, 0, sizeof(test_indices_data)); 128 test_indices_data.i[0] = i; 129 test_indices_data.i[1] = i + 1; 130 test_indices_data.i[2] = i + 2; 131 test_indices_data.i[3] = i + 3; 132 ir_constant *const test_indices = 133 new(this->mem_ctx) ir_constant(broadcast_index->type, 134 &test_indices_data); 135 136 ir_rvalue *const condition_val = 137 new(this->mem_ctx) ir_expression(ir_binop_equal, 138 &glsl_type::bool_type[comps - 1], 139 broadcast_index, 140 test_indices); 141 142 ir_variable *const condition = 143 new(this->mem_ctx) ir_variable(condition_val->type, 144 "dereference_array_condition", 145 ir_var_temporary); 146 list->push_tail(condition); 147 148 ir_rvalue *const cond_deref = 149 new(this->mem_ctx) ir_dereference_variable(condition); 150 list->push_tail(new(this->mem_ctx) ir_assignment(cond_deref, 151 condition_val, 0)); 152 153 if (comps == 1) { 154 ir_rvalue *const cond_deref = 155 new(this->mem_ctx) ir_dereference_variable(condition); 156 157 this->generator.generate(i, cond_deref, list); 158 } else { 159 for (unsigned j = 0; j < comps; j++) { 160 ir_rvalue *const cond_deref = 161 new(this->mem_ctx) ir_dereference_variable(condition); 162 ir_rvalue *const cond_swiz = 163 new(this->mem_ctx) ir_swizzle(cond_deref, j, 0, 0, 0, 1); 164 165 this->generator.generate(i + j, cond_swiz, list); 166 } 167 } 168 } 169 } 170 171 void bisect(unsigned begin, unsigned end, exec_list *list) 172 { 173 unsigned middle = (begin + end) >> 1; 174 175 assert(index->type->is_integer()); 176 177 ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) 178 ? new(this->mem_ctx) ir_constant((unsigned)middle) 179 : new(this->mem_ctx) ir_constant((int)middle); 180 181 182 ir_dereference_variable *deref = 183 new(this->mem_ctx) ir_dereference_variable(this->index); 184 185 ir_expression *less = 186 new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, 187 deref, middle_c); 188 189 ir_if *if_less = new(this->mem_ctx) ir_if(less); 190 191 generate(begin, middle, &if_less->then_instructions); 192 generate(middle, end, &if_less->else_instructions); 193 194 list->push_tail(if_less); 195 } 196 197 void generate(unsigned begin, unsigned end, exec_list *list) 198 { 199 unsigned length = end - begin; 200 if (length <= this->linear_sequence_max_length) 201 return linear_sequence(begin, end, list); 202 else 203 return bisect(begin, end, list); 204 } 205 }; 206 207 /** 208 * Visitor class for replacing expressions with ir_constant values. 209 */ 210 211 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor { 212 public: 213 variable_index_to_cond_assign_visitor(bool lower_input, 214 bool lower_output, 215 bool lower_temp, 216 bool lower_uniform) 217 { 218 this->progress = false; 219 this->lower_inputs = lower_input; 220 this->lower_outputs = lower_output; 221 this->lower_temps = lower_temp; 222 this->lower_uniforms = lower_uniform; 223 } 224 225 bool progress; 226 bool lower_inputs; 227 bool lower_outputs; 228 bool lower_temps; 229 bool lower_uniforms; 230 231 bool is_array_or_matrix(const ir_instruction *ir) const 232 { 233 return (ir->type->is_array() || ir->type->is_matrix()); 234 } 235 236 bool needs_lowering(ir_dereference_array *deref) const 237 { 238 if (deref == NULL || deref->array_index->as_constant() 239 || !is_array_or_matrix(deref->array)) 240 return false; 241 242 if (deref->array->ir_type == ir_type_constant) 243 return this->lower_temps; 244 245 const ir_variable *const var = deref->array->variable_referenced(); 246 switch (var->mode) { 247 case ir_var_auto: 248 case ir_var_temporary: 249 return this->lower_temps; 250 case ir_var_uniform: 251 return this->lower_uniforms; 252 case ir_var_in: 253 return (var->location == -1) ? this->lower_temps : this->lower_inputs; 254 case ir_var_out: 255 return (var->location == -1) ? this->lower_temps : this->lower_outputs; 256 case ir_var_inout: 257 return this->lower_temps; 258 } 259 260 assert(!"Should not get here."); 261 return false; 262 } 263 264 ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, 265 ir_rvalue* value) 266 { 267 assert(is_array_or_matrix(orig_deref->array)); 268 269 const unsigned length = (orig_deref->array->type->is_array()) 270 ? orig_deref->array->type->length 271 : orig_deref->array->type->matrix_columns; 272 273 void *const mem_ctx = hieralloc_parent(base_ir); 274 ir_variable *var = 275 new(mem_ctx) ir_variable(orig_deref->type, "dereference_array_value", 276 ir_var_temporary); 277 base_ir->insert_before(var); 278 279 if (value) { 280 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); 281 ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, value, NULL); 282 283 base_ir->insert_before(assign); 284 } 285 286 /* Store the index to a temporary to avoid reusing its tree. */ 287 ir_variable *index = 288 new(mem_ctx) ir_variable(orig_deref->array_index->type, 289 "dereference_array_index", ir_var_temporary); 290 base_ir->insert_before(index); 291 292 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); 293 ir_assignment *assign = 294 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); 295 base_ir->insert_before(assign); 296 297 assignment_generator ag; 298 ag.array = orig_deref->array; 299 ag.base_ir = base_ir; 300 ag.var = var; 301 ag.is_write = !!value; 302 303 switch_generator sg(ag, index, 4, 4); 304 305 exec_list list; 306 sg.generate(0, length, &list); 307 base_ir->insert_before(&list); 308 309 return var; 310 } 311 312 virtual void handle_rvalue(ir_rvalue **pir) 313 { 314 if (!*pir) 315 return; 316 317 ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); 318 if (needs_lowering(orig_deref)) { 319 ir_variable* var = convert_dereference_array(orig_deref, 0); 320 assert(var); 321 *pir = new(hieralloc_parent(base_ir)) ir_dereference_variable(var); 322 this->progress = true; 323 } 324 } 325 326 ir_visitor_status 327 visit_leave(ir_assignment *ir) 328 { 329 ir_rvalue_visitor::visit_leave(ir); 330 331 ir_dereference_array *orig_deref = ir->lhs->as_dereference_array(); 332 333 if (needs_lowering(orig_deref)) { 334 convert_dereference_array(orig_deref, ir->rhs); 335 ir->remove(); 336 this->progress = true; 337 } 338 339 return visit_continue; 340 } 341 }; 342 343 bool 344 lower_variable_index_to_cond_assign(exec_list *instructions, 345 bool lower_input, 346 bool lower_output, 347 bool lower_temp, 348 bool lower_uniform) 349 { 350 variable_index_to_cond_assign_visitor v(lower_input, 351 lower_output, 352 lower_temp, 353 lower_uniform); 354 355 visit_list_elements(&v, instructions); 356 357 return v.progress; 358 } 359