1 /* 2 * Copyright 2010 Luca Barbieri 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_variable_index_to_cond_assign.cpp 26 * 27 * Turns non-constant indexing into array types to a series of 28 * conditional moves of each element into a temporary. 29 * 30 * Pre-DX10 GPUs often don't have a native way to do this operation, 31 * and this works around that. 32 * 33 * The lowering process proceeds as follows. Each non-constant index 34 * found in an r-value is converted to a canonical form \c array[i]. Each 35 * element of the array is conditionally assigned to a temporary by comparing 36 * \c i to a constant index. This is done by cloning the canonical form and 37 * replacing all occurances of \c i with a constant. Each remaining occurance 38 * of the canonical form in the IR is replaced with a dereference of the 39 * temporary variable. 40 * 41 * L-values with non-constant indices are handled similarly. In this case, 42 * the RHS of the assignment is assigned to a temporary. The non-constant 43 * index is replace with the canonical form (just like for r-values). The 44 * temporary is conditionally assigned to each element of the canonical form 45 * by comparing \c i with each index. The same clone-and-replace scheme is 46 * used. 47 */ 48 49 #include "ir.h" 50 #include "ir_rvalue_visitor.h" 51 #include "ir_optimization.h" 52 #include "compiler/glsl_types.h" 53 #include "main/macros.h" 54 55 /** 56 * Generate a comparison value for a block of indices 57 * 58 * Lowering passes for non-constant indexing of arrays, matrices, or vectors 59 * can use this to generate blocks of index comparison values. 60 * 61 * \param instructions List where new instructions will be appended 62 * \param index \c ir_variable containing the desired index 63 * \param base Base value for this block of comparisons 64 * \param components Number of unique index values to compare. This must 65 * be on the range [1, 4]. 66 * \param mem_ctx ralloc memory context to be used for all allocations. 67 * 68 * \returns 69 * An \c ir_rvalue that \b must be cloned for each use in conditional 70 * assignments, etc. 71 */ 72 ir_rvalue * 73 compare_index_block(exec_list *instructions, ir_variable *index, 74 unsigned base, unsigned components, void *mem_ctx) 75 { 76 ir_rvalue *broadcast_index = new(mem_ctx) ir_dereference_variable(index); 77 78 assert(index->type->is_scalar()); 79 assert(index->type->base_type == GLSL_TYPE_INT || index->type->base_type == GLSL_TYPE_UINT); 80 assert(components >= 1 && components <= 4); 81 82 if (components > 1) { 83 const ir_swizzle_mask m = { 0, 0, 0, 0, components, false }; 84 broadcast_index = new(mem_ctx) ir_swizzle(broadcast_index, m); 85 } 86 87 /* Compare the desired index value with the next block of four indices. 88 */ 89 ir_constant_data test_indices_data; 90 memset(&test_indices_data, 0, sizeof(test_indices_data)); 91 test_indices_data.i[0] = base; 92 test_indices_data.i[1] = base + 1; 93 test_indices_data.i[2] = base + 2; 94 test_indices_data.i[3] = base + 3; 95 96 ir_constant *const test_indices = 97 new(mem_ctx) ir_constant(broadcast_index->type, 98 &test_indices_data); 99 100 ir_rvalue *const condition_val = 101 new(mem_ctx) ir_expression(ir_binop_equal, 102 glsl_type::bvec(components), 103 broadcast_index, 104 test_indices); 105 106 ir_variable *const condition = 107 new(mem_ctx) ir_variable(condition_val->type, 108 "dereference_condition", 109 ir_var_temporary); 110 instructions->push_tail(condition); 111 112 ir_rvalue *const cond_deref = 113 new(mem_ctx) ir_dereference_variable(condition); 114 instructions->push_tail(new(mem_ctx) ir_assignment(cond_deref, condition_val, 0)); 115 116 return cond_deref; 117 } 118 119 static inline bool 120 is_array_or_matrix(const ir_rvalue *ir) 121 { 122 return (ir->type->is_array() || ir->type->is_matrix()); 123 } 124 125 namespace { 126 /** 127 * Replace a dereference of a variable with a specified r-value 128 * 129 * Each time a dereference of the specified value is replaced, the r-value 130 * tree is cloned. 131 */ 132 class deref_replacer : public ir_rvalue_visitor { 133 public: 134 deref_replacer(const ir_variable *variable_to_replace, ir_rvalue *value) 135 : variable_to_replace(variable_to_replace), value(value), 136 progress(false) 137 { 138 assert(this->variable_to_replace != NULL); 139 assert(this->value != NULL); 140 } 141 142 virtual void handle_rvalue(ir_rvalue **rvalue) 143 { 144 ir_dereference_variable *const dv = (*rvalue)->as_dereference_variable(); 145 146 if ((dv != NULL) && (dv->var == this->variable_to_replace)) { 147 this->progress = true; 148 *rvalue = this->value->clone(ralloc_parent(*rvalue), NULL); 149 } 150 } 151 152 const ir_variable *variable_to_replace; 153 ir_rvalue *value; 154 bool progress; 155 }; 156 157 /** 158 * Find a variable index dereference of an array in an rvalue tree 159 */ 160 class find_variable_index : public ir_hierarchical_visitor { 161 public: 162 find_variable_index() 163 : deref(NULL) 164 { 165 /* empty */ 166 } 167 168 virtual ir_visitor_status visit_enter(ir_dereference_array *ir) 169 { 170 if (is_array_or_matrix(ir->array) 171 && (ir->array_index->as_constant() == NULL)) { 172 this->deref = ir; 173 return visit_stop; 174 } 175 176 return visit_continue; 177 } 178 179 /** 180 * First array dereference found in the tree that has a non-constant index. 181 */ 182 ir_dereference_array *deref; 183 }; 184 185 struct assignment_generator 186 { 187 ir_instruction* base_ir; 188 ir_dereference *rvalue; 189 ir_variable *old_index; 190 bool is_write; 191 unsigned int write_mask; 192 ir_variable* var; 193 194 assignment_generator() 195 : base_ir(NULL), 196 rvalue(NULL), 197 old_index(NULL), 198 is_write(false), 199 write_mask(0), 200 var(NULL) 201 { 202 } 203 204 void generate(unsigned i, ir_rvalue* condition, exec_list *list) const 205 { 206 /* Just clone the rest of the deref chain when trying to get at the 207 * underlying variable. 208 */ 209 void *mem_ctx = ralloc_parent(base_ir); 210 211 /* Clone the old r-value in its entirety. Then replace any occurances of 212 * the old variable index with the new constant index. 213 */ 214 ir_dereference *element = this->rvalue->clone(mem_ctx, NULL); 215 ir_constant *const index = new(mem_ctx) ir_constant(i); 216 deref_replacer r(this->old_index, index); 217 element->accept(&r); 218 assert(r.progress); 219 220 /* Generate a conditional assignment to (or from) the constant indexed 221 * array dereference. 222 */ 223 ir_rvalue *variable = new(mem_ctx) ir_dereference_variable(this->var); 224 ir_assignment *const assignment = (is_write) 225 ? new(mem_ctx) ir_assignment(element, variable, condition, write_mask) 226 : new(mem_ctx) ir_assignment(variable, element, condition); 227 228 list->push_tail(assignment); 229 } 230 }; 231 232 struct switch_generator 233 { 234 /* make TFunction a template parameter if you need to use other generators */ 235 typedef assignment_generator TFunction; 236 const TFunction& generator; 237 238 ir_variable* index; 239 unsigned linear_sequence_max_length; 240 unsigned condition_components; 241 242 void *mem_ctx; 243 244 switch_generator(const TFunction& generator, ir_variable *index, 245 unsigned linear_sequence_max_length, 246 unsigned condition_components) 247 : generator(generator), index(index), 248 linear_sequence_max_length(linear_sequence_max_length), 249 condition_components(condition_components) 250 { 251 this->mem_ctx = ralloc_parent(index); 252 } 253 254 void linear_sequence(unsigned begin, unsigned end, exec_list *list) 255 { 256 if (begin == end) 257 return; 258 259 /* If the array access is a read, read the first element of this subregion 260 * unconditionally. The remaining tests will possibly overwrite this 261 * value with one of the other array elements. 262 * 263 * This optimization cannot be done for writes because it will cause the 264 * first element of the subregion to be written possibly *in addition* to 265 * one of the other elements. 266 */ 267 unsigned first; 268 if (!this->generator.is_write) { 269 this->generator.generate(begin, 0, list); 270 first = begin + 1; 271 } else { 272 first = begin; 273 } 274 275 for (unsigned i = first; i < end; i += 4) { 276 const unsigned comps = MIN2(condition_components, end - i); 277 278 ir_rvalue *const cond_deref = 279 compare_index_block(list, index, i, comps, this->mem_ctx); 280 281 if (comps == 1) { 282 this->generator.generate(i, cond_deref->clone(this->mem_ctx, NULL), 283 list); 284 } else { 285 for (unsigned j = 0; j < comps; j++) { 286 ir_rvalue *const cond_swiz = 287 new(this->mem_ctx) ir_swizzle(cond_deref->clone(this->mem_ctx, NULL), 288 j, 0, 0, 0, 1); 289 290 this->generator.generate(i + j, cond_swiz, list); 291 } 292 } 293 } 294 } 295 296 void bisect(unsigned begin, unsigned end, exec_list *list) 297 { 298 unsigned middle = (begin + end) >> 1; 299 300 assert(index->type->is_integer()); 301 302 ir_constant *const middle_c = (index->type->base_type == GLSL_TYPE_UINT) 303 ? new(this->mem_ctx) ir_constant((unsigned)middle) 304 : new(this->mem_ctx) ir_constant((int)middle); 305 306 307 ir_dereference_variable *deref = 308 new(this->mem_ctx) ir_dereference_variable(this->index); 309 310 ir_expression *less = 311 new(this->mem_ctx) ir_expression(ir_binop_less, glsl_type::bool_type, 312 deref, middle_c); 313 314 ir_if *if_less = new(this->mem_ctx) ir_if(less); 315 316 generate(begin, middle, &if_less->then_instructions); 317 generate(middle, end, &if_less->else_instructions); 318 319 list->push_tail(if_less); 320 } 321 322 void generate(unsigned begin, unsigned end, exec_list *list) 323 { 324 unsigned length = end - begin; 325 if (length <= this->linear_sequence_max_length) 326 return linear_sequence(begin, end, list); 327 else 328 return bisect(begin, end, list); 329 } 330 }; 331 332 /** 333 * Visitor class for replacing expressions with ir_constant values. 334 */ 335 336 class variable_index_to_cond_assign_visitor : public ir_rvalue_visitor { 337 public: 338 variable_index_to_cond_assign_visitor(gl_shader_stage stage, 339 bool lower_input, 340 bool lower_output, 341 bool lower_temp, 342 bool lower_uniform) 343 { 344 this->progress = false; 345 this->stage = stage; 346 this->lower_inputs = lower_input; 347 this->lower_outputs = lower_output; 348 this->lower_temps = lower_temp; 349 this->lower_uniforms = lower_uniform; 350 } 351 352 bool progress; 353 354 gl_shader_stage stage; 355 bool lower_inputs; 356 bool lower_outputs; 357 bool lower_temps; 358 bool lower_uniforms; 359 360 bool storage_type_needs_lowering(ir_dereference_array *deref) const 361 { 362 /* If a variable isn't eventually the target of this dereference, then 363 * it must be a constant or some sort of anonymous temporary storage. 364 * 365 * FINISHME: Is this correct? Most drivers treat arrays of constants as 366 * FINISHME: uniforms. It seems like this should do the same. 367 */ 368 const ir_variable *const var = deref->array->variable_referenced(); 369 if (var == NULL) 370 return this->lower_temps; 371 372 switch (var->data.mode) { 373 case ir_var_auto: 374 case ir_var_temporary: 375 return this->lower_temps; 376 377 case ir_var_uniform: 378 case ir_var_shader_storage: 379 return this->lower_uniforms; 380 381 case ir_var_shader_shared: 382 return false; 383 384 case ir_var_function_in: 385 case ir_var_const_in: 386 return this->lower_temps; 387 388 case ir_var_system_value: 389 /* There are only a few system values that have array types: 390 * 391 * gl_TessLevelInner[] 392 * gl_TessLevelOuter[] 393 * gl_SampleMaskIn[] 394 * 395 * The tessellation factor arrays are lowered to vec4/vec2s 396 * by lower_tess_level() before this pass occurs, so we'll 397 * never see them here. 398 * 399 * The only remaining case is gl_SampleMaskIn[], which has 400 * a length of ceil(ctx->Const.MaxSamples / 32). Most hardware 401 * supports no more than 32 samples, in which case our lowering 402 * produces a single read of gl_SampleMaskIn[0]. Even with 64x 403 * MSAA, the array length is only 2, so the lowering is fairly 404 * efficient. Therefore, lower unconditionally. 405 */ 406 return true; 407 408 case ir_var_shader_in: 409 /* The input array size is unknown at compiler time for non-patch 410 * inputs in TCS and TES. The arrays are sized to 411 * the implementation-dependent limit "gl_MaxPatchVertices", but 412 * the real size is stored in the "gl_PatchVerticesIn" built-in 413 * uniform. 414 * 415 * The TCS input array size is specified by 416 * glPatchParameteri(GL_PATCH_VERTICES). 417 * 418 * The TES input array size is specified by the "vertices" output 419 * layout qualifier in TCS. 420 */ 421 if ((stage == MESA_SHADER_TESS_CTRL || 422 stage == MESA_SHADER_TESS_EVAL) && !var->data.patch) 423 return false; 424 return this->lower_inputs; 425 426 case ir_var_function_out: 427 /* TCS non-patch outputs can only be indexed with "gl_InvocationID". 428 * Other expressions are not allowed. 429 */ 430 if (stage == MESA_SHADER_TESS_CTRL && !var->data.patch) 431 return false; 432 return this->lower_temps; 433 434 case ir_var_shader_out: 435 return this->lower_outputs; 436 437 case ir_var_function_inout: 438 return this->lower_temps; 439 } 440 441 assert(!"Should not get here."); 442 return false; 443 } 444 445 bool needs_lowering(ir_dereference_array *deref) const 446 { 447 if (deref == NULL || deref->array_index->as_constant() 448 || !is_array_or_matrix(deref->array)) 449 return false; 450 451 return this->storage_type_needs_lowering(deref); 452 } 453 454 ir_variable *convert_dereference_array(ir_dereference_array *orig_deref, 455 ir_assignment* orig_assign, 456 ir_dereference *orig_base) 457 { 458 assert(is_array_or_matrix(orig_deref->array)); 459 460 const unsigned length = (orig_deref->array->type->is_array()) 461 ? orig_deref->array->type->length 462 : orig_deref->array->type->matrix_columns; 463 464 void *const mem_ctx = ralloc_parent(base_ir); 465 466 /* Temporary storage for either the result of the dereference of 467 * the array, or the RHS that's being assigned into the 468 * dereference of the array. 469 */ 470 ir_variable *var; 471 472 if (orig_assign) { 473 var = new(mem_ctx) ir_variable(orig_assign->rhs->type, 474 "dereference_array_value", 475 ir_var_temporary); 476 base_ir->insert_before(var); 477 478 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(var); 479 ir_assignment *assign = new(mem_ctx) ir_assignment(lhs, 480 orig_assign->rhs, 481 NULL); 482 483 base_ir->insert_before(assign); 484 } else { 485 var = new(mem_ctx) ir_variable(orig_deref->type, 486 "dereference_array_value", 487 ir_var_temporary); 488 base_ir->insert_before(var); 489 } 490 491 /* Store the index to a temporary to avoid reusing its tree. */ 492 ir_variable *index = 493 new(mem_ctx) ir_variable(orig_deref->array_index->type, 494 "dereference_array_index", ir_var_temporary); 495 base_ir->insert_before(index); 496 497 ir_dereference *lhs = new(mem_ctx) ir_dereference_variable(index); 498 ir_assignment *assign = 499 new(mem_ctx) ir_assignment(lhs, orig_deref->array_index, NULL); 500 base_ir->insert_before(assign); 501 502 orig_deref->array_index = lhs->clone(mem_ctx, NULL); 503 504 assignment_generator ag; 505 ag.rvalue = orig_base; 506 ag.base_ir = base_ir; 507 ag.old_index = index; 508 ag.var = var; 509 if (orig_assign) { 510 ag.is_write = true; 511 ag.write_mask = orig_assign->write_mask; 512 } else { 513 ag.is_write = false; 514 } 515 516 switch_generator sg(ag, index, 4, 4); 517 518 /* If the original assignment has a condition, respect that original 519 * condition! This is acomplished by wrapping the new conditional 520 * assignments in an if-statement that uses the original condition. 521 */ 522 if ((orig_assign != NULL) && (orig_assign->condition != NULL)) { 523 /* No need to clone the condition because the IR that it hangs on is 524 * going to be removed from the instruction sequence. 525 */ 526 ir_if *if_stmt = new(mem_ctx) ir_if(orig_assign->condition); 527 528 sg.generate(0, length, &if_stmt->then_instructions); 529 base_ir->insert_before(if_stmt); 530 } else { 531 exec_list list; 532 533 sg.generate(0, length, &list); 534 base_ir->insert_before(&list); 535 } 536 537 return var; 538 } 539 540 virtual void handle_rvalue(ir_rvalue **pir) 541 { 542 if (this->in_assignee) 543 return; 544 545 if (!*pir) 546 return; 547 548 ir_dereference_array* orig_deref = (*pir)->as_dereference_array(); 549 if (needs_lowering(orig_deref)) { 550 ir_variable *var = 551 convert_dereference_array(orig_deref, NULL, orig_deref); 552 assert(var); 553 *pir = new(ralloc_parent(base_ir)) ir_dereference_variable(var); 554 this->progress = true; 555 } 556 } 557 558 ir_visitor_status 559 visit_leave(ir_assignment *ir) 560 { 561 ir_rvalue_visitor::visit_leave(ir); 562 563 find_variable_index f; 564 ir->lhs->accept(&f); 565 566 if ((f.deref != NULL) && storage_type_needs_lowering(f.deref)) { 567 convert_dereference_array(f.deref, ir, ir->lhs); 568 ir->remove(); 569 this->progress = true; 570 } 571 572 return visit_continue; 573 } 574 }; 575 576 } /* anonymous namespace */ 577 578 bool 579 lower_variable_index_to_cond_assign(gl_shader_stage stage, 580 exec_list *instructions, 581 bool lower_input, 582 bool lower_output, 583 bool lower_temp, 584 bool lower_uniform) 585 { 586 variable_index_to_cond_assign_visitor v(stage, 587 lower_input, 588 lower_output, 589 lower_temp, 590 lower_uniform); 591 592 /* Continue lowering until no progress is made. If there are multiple 593 * levels of indirection (e.g., non-constant indexing of array elements and 594 * matrix columns of an array of matrix), each pass will only lower one 595 * level of indirection. 596 */ 597 bool progress_ever = false; 598 do { 599 v.progress = false; 600 visit_list_elements(&v, instructions); 601 progress_ever = v.progress || progress_ever; 602 } while (v.progress); 603 604 return progress_ever; 605 } 606