1 /* 2 * Copyright 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_ubo_reference.cpp 26 * 27 * IR lower pass to replace dereferences of variables in a uniform 28 * buffer object with usage of ir_binop_ubo_load expressions, each of 29 * which can read data up to the size of a vec4. 30 * 31 * This relieves drivers of the responsibility to deal with tricky UBO 32 * layout issues like std140 structures and row_major matrices on 33 * their own. 34 */ 35 36 #include "lower_buffer_access.h" 37 #include "ir_builder.h" 38 #include "main/macros.h" 39 #include "glsl_parser_extras.h" 40 41 using namespace ir_builder; 42 43 namespace { 44 class lower_ubo_reference_visitor : 45 public lower_buffer_access::lower_buffer_access { 46 public: 47 lower_ubo_reference_visitor(struct gl_linked_shader *shader, 48 bool clamp_block_indices, 49 bool use_std430_as_default) 50 : shader(shader), clamp_block_indices(clamp_block_indices), 51 struct_field(NULL), variable(NULL) 52 { 53 this->use_std430_as_default = use_std430_as_default; 54 } 55 56 void handle_rvalue(ir_rvalue **rvalue); 57 ir_visitor_status visit_enter(ir_assignment *ir); 58 59 void setup_for_load_or_store(void *mem_ctx, 60 ir_variable *var, 61 ir_rvalue *deref, 62 ir_rvalue **offset, 63 unsigned *const_offset, 64 bool *row_major, 65 const glsl_type **matrix_type, 66 enum glsl_interface_packing packing); 67 uint32_t ssbo_access_params(); 68 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, 69 ir_rvalue *offset); 70 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, 71 ir_rvalue *offset); 72 73 bool check_for_buffer_array_copy(ir_assignment *ir); 74 bool check_for_buffer_struct_copy(ir_assignment *ir); 75 void check_for_ssbo_store(ir_assignment *ir); 76 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, 77 ir_variable *write_var, unsigned write_mask); 78 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, 79 unsigned write_mask); 80 81 enum { 82 ubo_load_access, 83 ssbo_load_access, 84 ssbo_store_access, 85 ssbo_unsized_array_length_access, 86 ssbo_atomic_access, 87 } buffer_access_type; 88 89 void insert_buffer_access(void *mem_ctx, ir_dereference *deref, 90 const glsl_type *type, ir_rvalue *offset, 91 unsigned mask, int channel); 92 93 ir_visitor_status visit_enter(class ir_expression *); 94 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); 95 void check_ssbo_unsized_array_length_expression(class ir_expression *); 96 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); 97 98 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, 99 ir_dereference *, 100 ir_variable *); 101 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); 102 103 unsigned calculate_unsized_array_stride(ir_dereference *deref, 104 enum glsl_interface_packing packing); 105 106 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); 107 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); 108 ir_visitor_status visit_enter(ir_call *ir); 109 ir_visitor_status visit_enter(ir_texture *ir); 110 111 struct gl_linked_shader *shader; 112 bool clamp_block_indices; 113 const struct glsl_struct_field *struct_field; 114 ir_variable *variable; 115 ir_rvalue *uniform_block; 116 bool progress; 117 }; 118 119 /** 120 * Determine the name of the interface block field 121 * 122 * This is the name of the specific member as it would appear in the 123 * \c gl_uniform_buffer_variable::Name field in the shader's 124 * \c UniformBlocks array. 125 */ 126 static const char * 127 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, 128 ir_rvalue **nonconst_block_index) 129 { 130 *nonconst_block_index = NULL; 131 char *name_copy = NULL; 132 size_t base_length = 0; 133 134 /* Loop back through the IR until we find the uniform block */ 135 ir_rvalue *ir = d; 136 while (ir != NULL) { 137 switch (ir->ir_type) { 138 case ir_type_dereference_variable: { 139 /* Exit loop */ 140 ir = NULL; 141 break; 142 } 143 144 case ir_type_dereference_record: { 145 ir_dereference_record *r = (ir_dereference_record *) ir; 146 ir = r->record->as_dereference(); 147 148 /* If we got here it means any previous array subscripts belong to 149 * block members and not the block itself so skip over them in the 150 * next pass. 151 */ 152 d = ir; 153 break; 154 } 155 156 case ir_type_dereference_array: { 157 ir_dereference_array *a = (ir_dereference_array *) ir; 158 ir = a->array->as_dereference(); 159 break; 160 } 161 162 case ir_type_swizzle: { 163 ir_swizzle *s = (ir_swizzle *) ir; 164 ir = s->val->as_dereference(); 165 /* Skip swizzle in the next pass */ 166 d = ir; 167 break; 168 } 169 170 default: 171 assert(!"Should not get here."); 172 break; 173 } 174 } 175 176 while (d != NULL) { 177 switch (d->ir_type) { 178 case ir_type_dereference_variable: { 179 ir_dereference_variable *v = (ir_dereference_variable *) d; 180 if (name_copy != NULL && 181 v->var->is_interface_instance() && 182 v->var->type->is_array()) { 183 return name_copy; 184 } else { 185 *nonconst_block_index = NULL; 186 return base_name; 187 } 188 189 break; 190 } 191 192 case ir_type_dereference_array: { 193 ir_dereference_array *a = (ir_dereference_array *) d; 194 size_t new_length; 195 196 if (name_copy == NULL) { 197 name_copy = ralloc_strdup(mem_ctx, base_name); 198 base_length = strlen(name_copy); 199 } 200 201 /* For arrays of arrays we start at the innermost array and work our 202 * way out so we need to insert the subscript at the base of the 203 * name string rather than just attaching it to the end. 204 */ 205 new_length = base_length; 206 ir_constant *const_index = a->array_index->as_constant(); 207 char *end = ralloc_strdup(NULL, &name_copy[new_length]); 208 if (!const_index) { 209 ir_rvalue *array_index = a->array_index; 210 if (array_index->type != glsl_type::uint_type) 211 array_index = i2u(array_index); 212 213 if (a->array->type->is_array() && 214 a->array->type->fields.array->is_array()) { 215 ir_constant *base_size = new(mem_ctx) 216 ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); 217 array_index = mul(array_index, base_size); 218 } 219 220 if (*nonconst_block_index) { 221 *nonconst_block_index = add(*nonconst_block_index, array_index); 222 } else { 223 *nonconst_block_index = array_index; 224 } 225 226 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", 227 end); 228 } else { 229 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", 230 const_index->get_uint_component(0), 231 end); 232 } 233 ralloc_free(end); 234 235 d = a->array->as_dereference(); 236 237 break; 238 } 239 240 default: 241 assert(!"Should not get here."); 242 break; 243 } 244 } 245 246 assert(!"Should not get here."); 247 return NULL; 248 } 249 250 static ir_rvalue * 251 clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type) 252 { 253 assert(type->is_array()); 254 255 const unsigned array_size = type->arrays_of_arrays_size(); 256 257 ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1); 258 max_index->type = index->type; 259 260 ir_constant *zero = new(mem_ctx) ir_constant(0); 261 zero->type = index->type; 262 263 if (index->type->base_type == GLSL_TYPE_INT) 264 index = max2(index, zero); 265 index = min2(index, max_index); 266 267 return index; 268 } 269 270 void 271 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, 272 ir_variable *var, 273 ir_rvalue *deref, 274 ir_rvalue **offset, 275 unsigned *const_offset, 276 bool *row_major, 277 const glsl_type **matrix_type, 278 enum glsl_interface_packing packing) 279 { 280 /* Determine the name of the interface block */ 281 ir_rvalue *nonconst_block_index; 282 const char *const field_name = 283 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, 284 deref, &nonconst_block_index); 285 286 if (nonconst_block_index && clamp_block_indices) { 287 nonconst_block_index = 288 clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type); 289 } 290 291 /* Locate the block by interface name */ 292 unsigned num_blocks; 293 struct gl_uniform_block **blocks; 294 if (this->buffer_access_type != ubo_load_access) { 295 num_blocks = shader->Program->info.num_ssbos; 296 blocks = shader->Program->sh.ShaderStorageBlocks; 297 } else { 298 num_blocks = shader->Program->info.num_ubos; 299 blocks = shader->Program->sh.UniformBlocks; 300 } 301 this->uniform_block = NULL; 302 for (unsigned i = 0; i < num_blocks; i++) { 303 if (strcmp(field_name, blocks[i]->Name) == 0) { 304 305 ir_constant *index = new(mem_ctx) ir_constant(i); 306 307 if (nonconst_block_index) { 308 this->uniform_block = add(nonconst_block_index, index); 309 } else { 310 this->uniform_block = index; 311 } 312 313 if (var->is_interface_instance()) { 314 *const_offset = 0; 315 } else { 316 *const_offset = blocks[i]->Uniforms[var->data.location].Offset; 317 } 318 319 break; 320 } 321 } 322 323 assert(this->uniform_block); 324 325 this->struct_field = NULL; 326 setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major, 327 matrix_type, &this->struct_field, packing); 328 } 329 330 void 331 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) 332 { 333 if (!*rvalue) 334 return; 335 336 ir_dereference *deref = (*rvalue)->as_dereference(); 337 if (!deref) 338 return; 339 340 ir_variable *var = deref->variable_referenced(); 341 if (!var || !var->is_in_buffer_block()) 342 return; 343 344 void *mem_ctx = ralloc_parent(shader->ir); 345 346 ir_rvalue *offset = NULL; 347 unsigned const_offset; 348 bool row_major; 349 const glsl_type *matrix_type; 350 351 enum glsl_interface_packing packing = 352 var->get_interface_type()-> 353 get_internal_ifc_packing(use_std430_as_default); 354 355 this->buffer_access_type = 356 var->is_in_shader_storage_block() ? 357 ssbo_load_access : ubo_load_access; 358 this->variable = var; 359 360 /* Compute the offset to the start if the dereference as well as other 361 * information we need to configure the write 362 */ 363 setup_for_load_or_store(mem_ctx, var, deref, 364 &offset, &const_offset, 365 &row_major, &matrix_type, 366 packing); 367 assert(offset); 368 369 /* Now that we've calculated the offset to the start of the 370 * dereference, walk over the type and emit loads into a temporary. 371 */ 372 const glsl_type *type = (*rvalue)->type; 373 ir_variable *load_var = new(mem_ctx) ir_variable(type, 374 "ubo_load_temp", 375 ir_var_temporary); 376 base_ir->insert_before(load_var); 377 378 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, 379 "ubo_load_temp_offset", 380 ir_var_temporary); 381 base_ir->insert_before(load_offset); 382 base_ir->insert_before(assign(load_offset, offset)); 383 384 deref = new(mem_ctx) ir_dereference_variable(load_var); 385 emit_access(mem_ctx, false, deref, load_offset, const_offset, 386 row_major, matrix_type, packing, 0); 387 *rvalue = deref; 388 389 progress = true; 390 } 391 392 ir_expression * 393 lower_ubo_reference_visitor::ubo_load(void *mem_ctx, 394 const glsl_type *type, 395 ir_rvalue *offset) 396 { 397 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 398 return new(mem_ctx) 399 ir_expression(ir_binop_ubo_load, 400 type, 401 block_ref, 402 offset); 403 404 } 405 406 static bool 407 shader_storage_buffer_object(const _mesa_glsl_parse_state *state) 408 { 409 return state->has_shader_storage_buffer_objects(); 410 } 411 412 uint32_t 413 lower_ubo_reference_visitor::ssbo_access_params() 414 { 415 assert(variable); 416 417 if (variable->is_interface_instance()) { 418 assert(struct_field); 419 420 return ((struct_field->memory_coherent ? ACCESS_COHERENT : 0) | 421 (struct_field->memory_restrict ? ACCESS_RESTRICT : 0) | 422 (struct_field->memory_volatile ? ACCESS_VOLATILE : 0)); 423 } else { 424 return ((variable->data.memory_coherent ? ACCESS_COHERENT : 0) | 425 (variable->data.memory_restrict ? ACCESS_RESTRICT : 0) | 426 (variable->data.memory_volatile ? ACCESS_VOLATILE : 0)); 427 } 428 } 429 430 ir_call * 431 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, 432 ir_rvalue *deref, 433 ir_rvalue *offset, 434 unsigned write_mask) 435 { 436 exec_list sig_params; 437 438 ir_variable *block_ref = new(mem_ctx) 439 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 440 sig_params.push_tail(block_ref); 441 442 ir_variable *offset_ref = new(mem_ctx) 443 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 444 sig_params.push_tail(offset_ref); 445 446 ir_variable *val_ref = new(mem_ctx) 447 ir_variable(deref->type, "value" , ir_var_function_in); 448 sig_params.push_tail(val_ref); 449 450 ir_variable *writemask_ref = new(mem_ctx) 451 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); 452 sig_params.push_tail(writemask_ref); 453 454 ir_variable *access_ref = new(mem_ctx) 455 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 456 sig_params.push_tail(access_ref); 457 458 ir_function_signature *sig = new(mem_ctx) 459 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); 460 assert(sig); 461 sig->replace_parameters(&sig_params); 462 sig->intrinsic_id = ir_intrinsic_ssbo_store; 463 464 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); 465 f->add_signature(sig); 466 467 exec_list call_params; 468 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 469 call_params.push_tail(offset->clone(mem_ctx, NULL)); 470 call_params.push_tail(deref->clone(mem_ctx, NULL)); 471 call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); 472 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 473 return new(mem_ctx) ir_call(sig, NULL, &call_params); 474 } 475 476 ir_call * 477 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, 478 const struct glsl_type *type, 479 ir_rvalue *offset) 480 { 481 exec_list sig_params; 482 483 ir_variable *block_ref = new(mem_ctx) 484 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 485 sig_params.push_tail(block_ref); 486 487 ir_variable *offset_ref = new(mem_ctx) 488 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); 489 sig_params.push_tail(offset_ref); 490 491 ir_variable *access_ref = new(mem_ctx) 492 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 493 sig_params.push_tail(access_ref); 494 495 ir_function_signature *sig = 496 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); 497 assert(sig); 498 sig->replace_parameters(&sig_params); 499 sig->intrinsic_id = ir_intrinsic_ssbo_load; 500 501 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); 502 f->add_signature(sig); 503 504 ir_variable *result = new(mem_ctx) 505 ir_variable(type, "ssbo_load_result", ir_var_temporary); 506 base_ir->insert_before(result); 507 ir_dereference_variable *deref_result = new(mem_ctx) 508 ir_dereference_variable(result); 509 510 exec_list call_params; 511 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 512 call_params.push_tail(offset->clone(mem_ctx, NULL)); 513 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 514 515 return new(mem_ctx) ir_call(sig, deref_result, &call_params); 516 } 517 518 void 519 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, 520 ir_dereference *deref, 521 const glsl_type *type, 522 ir_rvalue *offset, 523 unsigned mask, 524 int channel) 525 { 526 switch (this->buffer_access_type) { 527 case ubo_load_access: 528 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), 529 ubo_load(mem_ctx, type, offset), 530 mask)); 531 break; 532 case ssbo_load_access: { 533 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); 534 base_ir->insert_before(load_ssbo); 535 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); 536 ir_assignment *assignment = 537 assign(deref->clone(mem_ctx, NULL), value, mask); 538 base_ir->insert_before(assignment); 539 break; 540 } 541 case ssbo_store_access: 542 if (channel >= 0) { 543 base_ir->insert_after(ssbo_store(mem_ctx, 544 swizzle(deref, channel, 1), 545 offset, 1)); 546 } else { 547 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); 548 } 549 break; 550 default: 551 unreachable("invalid buffer_access_type in insert_buffer_access"); 552 } 553 } 554 555 void 556 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, 557 ir_dereference *deref, 558 ir_variable *var, 559 ir_variable *write_var, 560 unsigned write_mask) 561 { 562 ir_rvalue *offset = NULL; 563 unsigned const_offset; 564 bool row_major; 565 const glsl_type *matrix_type; 566 567 enum glsl_interface_packing packing = 568 var->get_interface_type()-> 569 get_internal_ifc_packing(use_std430_as_default); 570 571 this->buffer_access_type = ssbo_store_access; 572 this->variable = var; 573 574 /* Compute the offset to the start if the dereference as well as other 575 * information we need to configure the write 576 */ 577 setup_for_load_or_store(mem_ctx, var, deref, 578 &offset, &const_offset, 579 &row_major, &matrix_type, 580 packing); 581 assert(offset); 582 583 /* Now emit writes from the temporary to memory */ 584 ir_variable *write_offset = 585 new(mem_ctx) ir_variable(glsl_type::uint_type, 586 "ssbo_store_temp_offset", 587 ir_var_temporary); 588 589 base_ir->insert_before(write_offset); 590 base_ir->insert_before(assign(write_offset, offset)); 591 592 deref = new(mem_ctx) ir_dereference_variable(write_var); 593 emit_access(mem_ctx, true, deref, write_offset, const_offset, 594 row_major, matrix_type, packing, write_mask); 595 } 596 597 ir_visitor_status 598 lower_ubo_reference_visitor::visit_enter(ir_expression *ir) 599 { 600 check_ssbo_unsized_array_length_expression(ir); 601 return rvalue_visit(ir); 602 } 603 604 ir_expression * 605 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) 606 { 607 if (expr->operation != 608 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) 609 return NULL; 610 611 ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); 612 if (!rvalue || 613 !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) 614 return NULL; 615 616 ir_dereference *deref = expr->operands[0]->as_dereference(); 617 if (!deref) 618 return NULL; 619 620 ir_variable *var = expr->operands[0]->variable_referenced(); 621 if (!var || !var->is_in_shader_storage_block()) 622 return NULL; 623 return process_ssbo_unsized_array_length(&rvalue, deref, var); 624 } 625 626 void 627 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) 628 { 629 if (ir->operation == 630 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { 631 /* Don't replace this unop if it is found alone. It is going to be 632 * removed by the optimization passes or replaced if it is part of 633 * an ir_assignment or another ir_expression. 634 */ 635 return; 636 } 637 638 for (unsigned i = 0; i < ir->num_operands; i++) { 639 if (ir->operands[i]->ir_type != ir_type_expression) 640 continue; 641 ir_expression *expr = (ir_expression *) ir->operands[i]; 642 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 643 if (!temp) 644 continue; 645 646 delete expr; 647 ir->operands[i] = temp; 648 } 649 } 650 651 void 652 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) 653 { 654 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) 655 return; 656 657 ir_expression *expr = (ir_expression *) ir->rhs; 658 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 659 if (!temp) 660 return; 661 662 delete expr; 663 ir->rhs = temp; 664 return; 665 } 666 667 ir_expression * 668 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) 669 { 670 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 671 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, 672 glsl_type::int_type, 673 block_ref); 674 } 675 676 unsigned 677 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, 678 enum glsl_interface_packing packing) 679 { 680 unsigned array_stride = 0; 681 682 switch (deref->ir_type) { 683 case ir_type_dereference_variable: 684 { 685 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; 686 const struct glsl_type *unsized_array_type = NULL; 687 /* An unsized array can be sized by other lowering passes, so pick 688 * the first field of the array which has the data type of the unsized 689 * array. 690 */ 691 unsized_array_type = deref_var->var->type->fields.array; 692 693 /* Whether or not the field is row-major (because it might be a 694 * bvec2 or something) does not affect the array itself. We need 695 * to know whether an array element in its entirety is row-major. 696 */ 697 const bool array_row_major = 698 is_dereferenced_thing_row_major(deref_var); 699 700 if (packing == GLSL_INTERFACE_PACKING_STD430) { 701 array_stride = unsized_array_type->std430_array_stride(array_row_major); 702 } else { 703 array_stride = unsized_array_type->std140_size(array_row_major); 704 array_stride = glsl_align(array_stride, 16); 705 } 706 break; 707 } 708 case ir_type_dereference_record: 709 { 710 ir_dereference_record *deref_record = (ir_dereference_record *) deref; 711 ir_dereference *interface_deref = 712 deref_record->record->as_dereference(); 713 assert(interface_deref != NULL); 714 const struct glsl_type *interface_type = interface_deref->type; 715 unsigned record_length = interface_type->length; 716 /* Unsized array is always the last element of the interface */ 717 const struct glsl_type *unsized_array_type = 718 interface_type->fields.structure[record_length - 1].type->fields.array; 719 720 const bool array_row_major = 721 is_dereferenced_thing_row_major(deref_record); 722 723 if (packing == GLSL_INTERFACE_PACKING_STD430) { 724 array_stride = unsized_array_type->std430_array_stride(array_row_major); 725 } else { 726 array_stride = unsized_array_type->std140_size(array_row_major); 727 array_stride = glsl_align(array_stride, 16); 728 } 729 break; 730 } 731 default: 732 unreachable("Unsupported dereference type"); 733 } 734 return array_stride; 735 } 736 737 ir_expression * 738 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, 739 ir_dereference *deref, 740 ir_variable *var) 741 { 742 void *mem_ctx = ralloc_parent(*rvalue); 743 744 ir_rvalue *base_offset = NULL; 745 unsigned const_offset; 746 bool row_major; 747 const glsl_type *matrix_type; 748 749 enum glsl_interface_packing packing = 750 var->get_interface_type()-> 751 get_internal_ifc_packing(use_std430_as_default); 752 int unsized_array_stride = 753 calculate_unsized_array_stride(deref, packing); 754 755 this->buffer_access_type = ssbo_unsized_array_length_access; 756 this->variable = var; 757 758 /* Compute the offset to the start if the dereference as well as other 759 * information we need to calculate the length. 760 */ 761 setup_for_load_or_store(mem_ctx, var, deref, 762 &base_offset, &const_offset, 763 &row_major, &matrix_type, 764 packing); 765 /* array.length() = 766 * max((buffer_object_size - offset_of_array) / stride_of_array, 0) 767 */ 768 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); 769 770 ir_expression *offset_of_array = new(mem_ctx) 771 ir_expression(ir_binop_add, base_offset, 772 new(mem_ctx) ir_constant(const_offset)); 773 ir_expression *offset_of_array_int = new(mem_ctx) 774 ir_expression(ir_unop_u2i, offset_of_array); 775 776 ir_expression *sub = new(mem_ctx) 777 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); 778 ir_expression *div = new(mem_ctx) 779 ir_expression(ir_binop_div, sub, 780 new(mem_ctx) ir_constant(unsized_array_stride)); 781 ir_expression *max = new(mem_ctx) 782 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); 783 784 return max; 785 } 786 787 void 788 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) 789 { 790 if (!ir || !ir->lhs) 791 return; 792 793 ir_rvalue *rvalue = ir->lhs->as_rvalue(); 794 if (!rvalue) 795 return; 796 797 ir_dereference *deref = ir->lhs->as_dereference(); 798 if (!deref) 799 return; 800 801 ir_variable *var = ir->lhs->variable_referenced(); 802 if (!var || !var->is_in_shader_storage_block()) 803 return; 804 805 /* We have a write to a buffer variable, so declare a temporary and rewrite 806 * the assignment so that the temporary is the LHS. 807 */ 808 void *mem_ctx = ralloc_parent(shader->ir); 809 810 const glsl_type *type = rvalue->type; 811 ir_variable *write_var = new(mem_ctx) ir_variable(type, 812 "ssbo_store_temp", 813 ir_var_temporary); 814 base_ir->insert_before(write_var); 815 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); 816 817 /* Now we have to write the value assigned to the temporary back to memory */ 818 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); 819 progress = true; 820 } 821 822 static bool 823 is_buffer_backed_variable(ir_variable *var) 824 { 825 return var->is_in_buffer_block() || 826 var->data.mode == ir_var_shader_shared; 827 } 828 829 bool 830 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) 831 { 832 if (!ir || !ir->lhs || !ir->rhs) 833 return false; 834 835 /* LHS and RHS must be arrays 836 * FIXME: arrays of arrays? 837 */ 838 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) 839 return false; 840 841 /* RHS must be a buffer-backed variable. This is what can cause the problem 842 * since it would lead to a series of loads that need to live until we 843 * see the writes to the LHS. 844 */ 845 ir_variable *rhs_var = ir->rhs->variable_referenced(); 846 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 847 return false; 848 849 /* Split the array copy into individual element copies to reduce 850 * register pressure 851 */ 852 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 853 if (!rhs_deref) 854 return false; 855 856 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 857 if (!lhs_deref) 858 return false; 859 860 assert(lhs_deref->type->length == rhs_deref->type->length); 861 void *mem_ctx = ralloc_parent(shader->ir); 862 863 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 864 ir_dereference *lhs_i = 865 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), 866 new(mem_ctx) ir_constant(i)); 867 868 ir_dereference *rhs_i = 869 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), 870 new(mem_ctx) ir_constant(i)); 871 ir->insert_after(assign(lhs_i, rhs_i)); 872 } 873 874 ir->remove(); 875 progress = true; 876 return true; 877 } 878 879 bool 880 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) 881 { 882 if (!ir || !ir->lhs || !ir->rhs) 883 return false; 884 885 /* LHS and RHS must be records */ 886 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record()) 887 return false; 888 889 /* RHS must be a buffer-backed variable. This is what can cause the problem 890 * since it would lead to a series of loads that need to live until we 891 * see the writes to the LHS. 892 */ 893 ir_variable *rhs_var = ir->rhs->variable_referenced(); 894 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 895 return false; 896 897 /* Split the struct copy into individual element copies to reduce 898 * register pressure 899 */ 900 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 901 if (!rhs_deref) 902 return false; 903 904 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 905 if (!lhs_deref) 906 return false; 907 908 assert(lhs_deref->type == rhs_deref->type); 909 void *mem_ctx = ralloc_parent(shader->ir); 910 911 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 912 const char *field_name = lhs_deref->type->fields.structure[i].name; 913 ir_dereference *lhs_field = 914 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), 915 field_name); 916 ir_dereference *rhs_field = 917 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), 918 field_name); 919 ir->insert_after(assign(lhs_field, rhs_field)); 920 } 921 922 ir->remove(); 923 progress = true; 924 return true; 925 } 926 927 ir_visitor_status 928 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) 929 { 930 /* Array and struct copies could involve large amounts of load/store 931 * operations. To improve register pressure we want to special-case 932 * these and split them into individual element copies. 933 * This way we avoid emitting all the loads for the RHS first and 934 * all the writes for the LHS second and register usage is more 935 * efficient. 936 */ 937 if (check_for_buffer_array_copy(ir)) 938 return visit_continue_with_parent; 939 940 if (check_for_buffer_struct_copy(ir)) 941 return visit_continue_with_parent; 942 943 check_ssbo_unsized_array_length_assignment(ir); 944 check_for_ssbo_store(ir); 945 return rvalue_visit(ir); 946 } 947 948 /* Lowers the intrinsic call to a new internal intrinsic that swaps the 949 * access to the buffer variable in the first parameter by an offset 950 * and block index. This involves creating the new internal intrinsic 951 * (i.e. the new function signature). 952 */ 953 ir_call * 954 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) 955 { 956 /* SSBO atomics usually have 2 parameters, the buffer variable and an 957 * integer argument. The exception is CompSwap, that has an additional 958 * integer parameter. 959 */ 960 int param_count = ir->actual_parameters.length(); 961 assert(param_count == 2 || param_count == 3); 962 963 /* First argument must be a scalar integer buffer variable */ 964 exec_node *param = ir->actual_parameters.get_head(); 965 ir_instruction *inst = (ir_instruction *) param; 966 assert(inst->ir_type == ir_type_dereference_variable || 967 inst->ir_type == ir_type_dereference_array || 968 inst->ir_type == ir_type_dereference_record || 969 inst->ir_type == ir_type_swizzle); 970 971 ir_rvalue *deref = (ir_rvalue *) inst; 972 assert(deref->type->is_scalar() && deref->type->is_integer()); 973 974 ir_variable *var = deref->variable_referenced(); 975 assert(var); 976 977 /* Compute the offset to the start if the dereference and the 978 * block index 979 */ 980 void *mem_ctx = ralloc_parent(shader->ir); 981 982 ir_rvalue *offset = NULL; 983 unsigned const_offset; 984 bool row_major; 985 const glsl_type *matrix_type; 986 987 enum glsl_interface_packing packing = 988 var->get_interface_type()-> 989 get_internal_ifc_packing(use_std430_as_default); 990 991 this->buffer_access_type = ssbo_atomic_access; 992 this->variable = var; 993 994 setup_for_load_or_store(mem_ctx, var, deref, 995 &offset, &const_offset, 996 &row_major, &matrix_type, 997 packing); 998 assert(offset); 999 assert(!row_major); 1000 assert(matrix_type == NULL); 1001 1002 ir_rvalue *deref_offset = 1003 add(offset, new(mem_ctx) ir_constant(const_offset)); 1004 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); 1005 1006 /* Create the new internal function signature that will take a block 1007 * index and offset instead of a buffer variable 1008 */ 1009 exec_list sig_params; 1010 ir_variable *sig_param = new(mem_ctx) 1011 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 1012 sig_params.push_tail(sig_param); 1013 1014 sig_param = new(mem_ctx) 1015 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 1016 sig_params.push_tail(sig_param); 1017 1018 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? 1019 glsl_type::int_type : glsl_type::uint_type; 1020 sig_param = new(mem_ctx) 1021 ir_variable(type, "data1", ir_var_function_in); 1022 sig_params.push_tail(sig_param); 1023 1024 if (param_count == 3) { 1025 sig_param = new(mem_ctx) 1026 ir_variable(type, "data2", ir_var_function_in); 1027 sig_params.push_tail(sig_param); 1028 } 1029 1030 ir_function_signature *sig = 1031 new(mem_ctx) ir_function_signature(deref->type, 1032 shader_storage_buffer_object); 1033 assert(sig); 1034 sig->replace_parameters(&sig_params); 1035 1036 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load); 1037 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap); 1038 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo); 1039 1040 char func_name[64]; 1041 sprintf(func_name, "%s_ssbo", ir->callee_name()); 1042 ir_function *f = new(mem_ctx) ir_function(func_name); 1043 f->add_signature(sig); 1044 1045 /* Now, create the call to the internal intrinsic */ 1046 exec_list call_params; 1047 call_params.push_tail(block_index); 1048 call_params.push_tail(deref_offset); 1049 param = ir->actual_parameters.get_head()->get_next(); 1050 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1051 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1052 if (param_count == 3) { 1053 param = param->get_next(); 1054 param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1055 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1056 } 1057 ir_dereference_variable *return_deref = 1058 ir->return_deref->clone(mem_ctx, NULL); 1059 return new(mem_ctx) ir_call(sig, return_deref, &call_params); 1060 } 1061 1062 ir_call * 1063 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) 1064 { 1065 exec_list& params = ir->actual_parameters; 1066 1067 if (params.length() < 2 || params.length() > 3) 1068 return ir; 1069 1070 ir_rvalue *rvalue = 1071 ((ir_instruction *) params.get_head())->as_rvalue(); 1072 if (!rvalue) 1073 return ir; 1074 1075 ir_variable *var = rvalue->variable_referenced(); 1076 if (!var || !var->is_in_shader_storage_block()) 1077 return ir; 1078 1079 const enum ir_intrinsic_id id = ir->callee->intrinsic_id; 1080 if (id == ir_intrinsic_generic_atomic_add || 1081 id == ir_intrinsic_generic_atomic_min || 1082 id == ir_intrinsic_generic_atomic_max || 1083 id == ir_intrinsic_generic_atomic_and || 1084 id == ir_intrinsic_generic_atomic_or || 1085 id == ir_intrinsic_generic_atomic_xor || 1086 id == ir_intrinsic_generic_atomic_exchange || 1087 id == ir_intrinsic_generic_atomic_comp_swap) { 1088 return lower_ssbo_atomic_intrinsic(ir); 1089 } 1090 1091 return ir; 1092 } 1093 1094 1095 ir_visitor_status 1096 lower_ubo_reference_visitor::visit_enter(ir_call *ir) 1097 { 1098 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); 1099 if (new_ir != ir) { 1100 progress = true; 1101 base_ir->replace_with(new_ir); 1102 return visit_continue_with_parent; 1103 } 1104 1105 return rvalue_visit(ir); 1106 } 1107 1108 1109 ir_visitor_status 1110 lower_ubo_reference_visitor::visit_enter(ir_texture *ir) 1111 { 1112 ir_dereference *sampler = ir->sampler; 1113 1114 if (sampler->ir_type == ir_type_dereference_record) { 1115 handle_rvalue((ir_rvalue **)&ir->sampler); 1116 return visit_continue_with_parent; 1117 } 1118 1119 return rvalue_visit(ir); 1120 } 1121 1122 1123 } /* unnamed namespace */ 1124 1125 void 1126 lower_ubo_reference(struct gl_linked_shader *shader, 1127 bool clamp_block_indices, bool use_std430_as_default) 1128 { 1129 lower_ubo_reference_visitor v(shader, clamp_block_indices, 1130 use_std430_as_default); 1131 1132 /* Loop over the instructions lowering references, because we take 1133 * a deref of a UBO array using a UBO dereference as the index will 1134 * produce a collection of instructions all of which have cloned 1135 * UBO dereferences for that array index. 1136 */ 1137 do { 1138 v.progress = false; 1139 visit_list_elements(&v, shader->ir); 1140 } while (v.progress); 1141 } 1142