1 /* 2 * Copyright 2010 Luca Barbieri 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_variable_index_to_cond_assign.cpp 26 * 27 * Turns non-constant indexing into array types to a series of 28 * conditional moves of each element into a temporary. 29 * 30 * Pre-DX10 GPUs often don't have a native way to do this operation, 31 * and this works around that. 32 * 33 * The lowering process proceeds as follows. Each non-constant index 34 * found in an r-value is converted to a canonical form \c array[i]. Each 35 * element of the array is conditionally assigned to a temporary by comparing 36 * \c i to a constant index. This is done by cloning the canonical form and 37 * replacing all occurances of \c i with a constant. Each remaining occurance 38 * of the canonical form in the IR is replaced with a dereference of the 39 * temporary variable. 40 * 41 * L-values with non-constant indices are handled similarly. In this case, 42 * the RHS of the assignment is assigned to a temporary. The non-constant 43 * index is replace with the canonical form (just like for r-values). The 44 * temporary is conditionally assigned to each element of the canonical form 45 * by comparing \c i with each index. The same clone-and-replace scheme is 46 * used. 47 */ 48 49 #include "ir.h" 50 #include "ir_rvalue_visitor.h" 51 #include "ir_optimization.h" 52 #include "glsl_types.h" 53 #include "main/macros.h" 54 55 /** 56 * Generate a comparison value for a block of indices 57 * 58 * Lowering passes for non-constant indexing of arrays, matrices, or vectors 59 * can use this to generate blocks of index comparison values. 60 * 61 * \param instructions List where new instructions will be appended 62 * \param index \c ir_variable containing the desired index 63 * \param base Base value for this block of comparisons 64 * \param components Number of unique index values to compare. This must 65 * be on the range [1, 4]. 66 * \param mem_ctx ralloc memory context to be used for all allocations. 67 * 68 * \returns 69 * An \c ir_rvalue that \b must be cloned for each use in conditional 70 * assignments, etc. 71 */ 72 ir_rvalue * 73 compare_index_block(exec_list *instructions, ir_variable *index, 74 unsigned base, unsigned components, void *mem_ctx) 75 { 76 ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); 77 78 assert(index->type->is_scalar()); 79 assert(index->type->base_type == GLSL_TYPE_INT); 80 assert(components >= 1 && components <= 4); 81 82 if (components > 1) { 83 const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; 84 broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); 85 } 86 87 /* Compare the desired index value with the next block of four indices. 88 */ 89 ir_constant_data test_indices_data; 90 memset(&test_indices_data, 0, sizeof(test_indices_data)); 91 test_indices_data.i[0] = base; 92 test_indices_data.i[1] = base + 1; 93 test_indices_data.i[2] = base + 2; 94 test_indices_data.i[3] = base + 3; 95 96 ir_constant *const test_indices = 97 new(mem_ctx) ir_constant(broadcast_index->type, 98 &test_indices_data); 99 100 ir_rvalue *const condition_val = 101 new(mem_ctx) ir_expression(ir_binop_equal, 102 &glsl_type::bool_type[components - 1], 103 broadcast_index, 104 test_indices); 105 106 ir_variable *const condition = 107 new(mem_ctx) ir_variable(condition_val->type, 108 "dereference_condition", 109 ir_var_temporary); 110 instructions->push_tail(condition); 111 112 ir_rvalue *const cond_deref = 113 new(mem_ctx) ir_dereference_variable(condition); 114 instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); 115 116 return cond_deref; 117 } 118 119 static inline bool 120 is_array_or_matrix(const ir_rvalue *ir) 121 { 122 return (ir->type->is_array() || ir->type->is_matrix()); 123 } 124 125 /** 126 * Replace a dereference of a variable with a specified r-value 127 * 128 * Each time a dereference of the specified value is replaced, the r-value 129 * tree is cloned. 130 */ 131 class deref_replacer : public ir_rvalue_visitor { 132 public: 133 deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) 134 : variable_to_replace(variable_to_replace), value(value), 135 progress(false) 136 { 137 assert(this->variable_to_replace != NULL); 138 assert(this->value != NULL); 139 } 140 141 virtual void handle_rvalue(ir_rvalue **rvalue) 142 { 143 ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); 144 145 if ((dv != NULL) && (dv->var == this->variable_to_replace)) { 146 this->progress = true; 147 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); 148 } 149 } 150 151 const ir_variable *variable_to_replace; 152 ir_rvalue *value; 153 bool progress; 154 }; 155 156 /** 157 * Find a variable index dereference of an array in an rvalue tree 158 */ 159 class find_variable_index : public ir_hierarchical_visitor { 160 public: 161 find_variable_index() 162 : deref(NULL) 163 { 164 /* empty */ 165 } 166 167 virtual ir_visitor_status visit_enter(ir_dereference_array *ir) 168 { 169 if (is_array_or_matrix(ir->array) 170 && (ir->array_index->as_constant() == NULL)) { 171 this->deref = ir; 172 return visit_stop; 173 } 174 175 return visit_continue; 176 } 177 178 /** 179 * First array dereference found in the tree that has a non-constant index. 180 */ 181 ir_dereference_array *deref; 182 }; 183 184 struct assignment_generator 185 { 186 ir_instruction* base_ir; 187 ir_dereference *rvalue; 188 ir_variable *old_index; 189 bool is_write; 190 unsigned int write_mask; 191 ir_variable* var; 192 193 assignment_generator() 194 { 195 } 196 197 void generate(unsigned i, ir_rvalue* condition, exec_list *list) const 198 { 199 /* Just clone the rest of the deref chain when trying to get at the 200 * underlying variable. 201 */ 202 void *mem_ctx = ralloc_parent(base_ir); 203 204 /* Clone the old r-value in its entirety. Then replace any occurances of 205 * the old variable index with the new constant index. 206 */ 207 ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); 208 ir_constant *const index = new(mem_ctx) ir_constant(i); 209 deref_replacer r(this->old_index, index); 210 element->accept(&r); 211 assert(r.progress); 212 213 /* Generate a conditional assignment to (or from) the constant indexed 214 * array dereference. 215 */ 216 ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); 217 ir_assignment *const assignment = (is_write) 218 ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) 219 : new(mem_ctx) ir_assignment(variable, element, condition); 220 221 list->push_tail(assignment); 222 } 223 }; 224 225 struct switch_generator 226 { 227 /* make TFunction a template parameter if you need to use other generators */ 228 typedef assignment_generator TFunction; 229 const TFunction& generator; 230 231 ir_variable* index; 232 unsigned linear_sequence_max_length; 233 unsigned condition_components; 234 235 void *mem_ctx; 236 237 switch_generator(const TFunction& generator, ir_variable *index, 238 unsigned linear_sequence_max_length, 239 unsigned condition_components) 240 : generator(generator), index(index), 241 linear_sequence_max_length(linear_sequence_max_length), 242 condition_components(condition_components) 243 { 244 this->mem_ctx = ralloc_parent(index); 245 } 246 247 void linear_sequence(unsigned begin, unsigned end, exec_list *list) 248 { 249 if (begin == end) 250 return; 251 252 /* If the array access is a read, read the first element of this subregion 253 * unconditionally. The remaining tests will possibly overwrite this 254 * value with one of the other array elements. 255 * 256 * This optimization cannot be done for writes because it will cause the 257 * first element of the subregion to be written possibly *in addition* to 258 * one of the other elements. 259 */ 260 unsigned first; 261 if (!this->generator.is_write) { 262 this->generator.generate(begin, 0, list); 263 first = begin + 1; 264 } else { 265 first = begin; 266 } 267 268 for (unsigned i = first; i < end; i += 4) { 269 const unsigned comps = MIN2(condition_components, end - i); 270 271 ir_rvalue *const cond_deref = 272 compare_index_block(list, index, i, comps, this->mem_ctx); 273 274 if (comps == 1) { 275 this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), 276 list); 277 } else { 278 for (unsigned j = 0; j < comps; j++) { 279 ir_rvalue *const cond_swiz = 280 new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), 281 j, 0, 0, 0, 1); 282 283 this->generator.generate(i + j, cond_swiz, list); 284 } 285 } 286 } 287 } 288 289 void bisect(unsigned begin, unsigned end, exec_list *list) 290 { 291 unsigned middle = (begin + end) >> 1; 292 293 assert(index->type->is_integer()); 294 295 ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) 296 ? new(this->mem_ctx) ir_constant((unsigned)middle) 297 : new(this->mem_ctx) ir_constant((int)middle); 298 299 300 ir_dereference_variable *deref = 301 new(this->mem_ctx) ir_dereference_variable(this->index); 302 303 ir_expression *less = 304 new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, 305 deref, middle_c); 306 307 ir_if *if_less = new(this->mem_ctx) ir_if(less); 308 309 generate(begin, middle, &if_less->then_instructions); 310 generate(middle, end, &if_less->else_instructions); 311 312 list->push_tail(if_less); 313 } 314 315 void generate(unsigned begin, unsigned end, exec_list *list) 316 { 317 unsigned length = end - begin; 318 if (length <= this->linear_sequence_max_length) 319 return linear_sequence(begin, end, list); 320 else 321 return bisect(begin, end, list); 322 } 323 }; 324 325 /** 326 * Visitor class for replacing expressions with ir_constant values. 327 */ 328 329 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor { 330 public: 331 variable_index_to_cond_assign_visitor(bool lower_input, 332 bool lower_output, 333 bool lower_temp, 334 bool lower_uniform) 335 { 336 this->progress = false; 337 this->lower_inputs = lower_input; 338 this->lower_outputs = lower_output; 339 this->lower_temps = lower_temp; 340 this->lower_uniforms = lower_uniform; 341 } 342 343 bool progress; 344 bool lower_inputs; 345 bool lower_outputs; 346 bool lower_temps; 347 bool lower_uniforms; 348 349 bool storage_type_needs_lowering(ir_dereference_array *deref) const 350 { 351 /* If a variable isn't eventually the target of this dereference, then 352 * it must be a constant or some sort of anonymous temporary storage. 353 * 354 * FINISHME: Is this correct? Most drivers treat arrays of constants as 355 * FINISHME: uniforms. It seems like this should do the same. 356 */ 357 const ir_variable *const var = deref->array->variable_referenced(); 358 if (var == NULL) 359 return this->lower_temps; 360 361 switch (var->mode) { 362 case ir_var_auto: 363 case ir_var_temporary: 364 return this->lower_temps; 365 case ir_var_uniform: 366 return this->lower_uniforms; 367 case ir_var_in: 368 case ir_var_const_in: 369 return (var->location == -1) ? this->lower_temps : this->lower_inputs; 370 case ir_var_out: 371 return (var->location == -1) ? this->lower_temps : this->lower_outputs; 372 case ir_var_inout: 373 return this->lower_temps; 374 } 375 376 assert(!"Should not get here."); 377 return false; 378 } 379 380 bool needs_lowering(ir_dereference_array *deref) const 381 { 382 if (deref == NULL || deref->array_index->as_constant() 383 || !is_array_or_matrix(deref->array)) 384 return false; 385 386 return this->storage_type_needs_lowering(deref); 387 } 388 389 ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, 390 ir_assignment* orig_assign, 391 ir_dereference *orig_base) 392 { 393 assert(is_array_or_matrix(orig_deref->array)); 394 395 const unsigned length = (orig_deref->array->type->is_array()) 396 ? orig_deref->array->type->length 397 : orig_deref->array->type->matrix_columns; 398 399 void *const mem_ctx = ralloc_parent(base_ir); 400 401 /* Temporary storage for either the result of the dereference of 402 * the array, or the RHS that's being assigned into the 403 * dereference of the array. 404 */ 405 ir_variable *var; 406 407 if (orig_assign) { 408 var = new(mem_ctx) ir_variable(orig_assign->rhs->type, 409 "dereference_array_value", 410 ir_var_temporary); 411 base_ir->insert_before(var); 412 413 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); 414 ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, 415 orig_assign->rhs, 416 NULL); 417 418 base_ir->insert_before(assign); 419 } else { 420 var = new(mem_ctx) ir_variable(orig_deref->type, 421 "dereference_array_value", 422 ir_var_temporary); 423 base_ir->insert_before(var); 424 } 425 426 /* Store the index to a temporary to avoid reusing its tree. */ 427 ir_variable *index = 428 new(mem_ctx) ir_variable(orig_deref->array_index->type, 429 "dereference_array_index", ir_var_temporary); 430 base_ir->insert_before(index); 431 432 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); 433 ir_assignment *assign = 434 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); 435 base_ir->insert_before(assign); 436 437 orig_deref->array_index = lhs->clone(mem_ctx, NULL); 438 439 assignment_generator ag; 440 ag.rvalue = orig_base; 441 ag.base_ir = base_ir; 442 ag.old_index = index; 443 ag.var = var; 444 if (orig_assign) { 445 ag.is_write = true; 446 ag.write_mask = orig_assign->write_mask; 447 } else { 448 ag.is_write = false; 449 } 450 451 switch_generator sg(ag, index, 4, 4); 452 453 /* If the original assignment has a condition, respect that original 454 * condition! This is acomplished by wrapping the new conditional 455 * assignments in an if-statement that uses the original condition. 456 */ 457 if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { 458 /* No need to clone the condition because the IR that it hangs on is 459 * going to be removed from the instruction sequence. 460 */ 461 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); 462 463 sg.generate(0, length, &if_stmt->then_instructions); 464 base_ir->insert_before(if_stmt); 465 } else { 466 exec_list list; 467 468 sg.generate(0, length, &list); 469 base_ir->insert_before(&list); 470 } 471 472 return var; 473 } 474 475 virtual void handle_rvalue(ir_rvalue **pir) 476 { 477 if (this->in_assignee) 478 return; 479 480 if (!*pir) 481 return; 482 483 ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); 484 if (needs_lowering(orig_deref)) { 485 ir_variable *var = 486 convert_dereference_array(orig_deref, NULL, orig_deref); 487 assert(var); 488 *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); 489 this->progress = true; 490 } 491 } 492 493 ir_visitor_status 494 visit_leave(ir_assignment *ir) 495 { 496 ir_rvalue_visitor::visit_leave(ir); 497 498 find_variable_index f; 499 ir->lhs->accept(&f); 500 501 if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { 502 convert_dereference_array(f.deref, ir, ir->lhs); 503 ir->remove(); 504 this->progress = true; 505 } 506 507 return visit_continue; 508 } 509 }; 510 511 bool 512 lower_variable_index_to_cond_assign(exec_list *instructions, 513 bool lower_input, 514 bool lower_output, 515 bool lower_temp, 516 bool lower_uniform) 517 { 518 variable_index_to_cond_assign_visitor v(lower_input, 519 lower_output, 520 lower_temp, 521 lower_uniform); 522 523 /* Continue lowering until no progress is made. If there are multiple 524 * levels of indirection (e.g., non-constant indexing of array elements and 525 * matrix columns of an array of matrix), each pass will only lower one 526 * level of indirection. 527 */ 528 bool progress_ever = false; 529 do { 530 v.progress = false; 531 visit_list_elements(&v, instructions); 532 progress_ever = v.progress || progress_ever; 533 } while (v.progress); 534 535 return progress_ever; 536 } 537