1 /* 2 * Copyright 2012 Intel Corporation 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 /** 25 * \file lower_ubo_reference.cpp 26 * 27 * IR lower pass to replace dereferences of variables in a uniform 28 * buffer object with usage of ir_binop_ubo_load expressions, each of 29 * which can read data up to the size of a vec4. 30 * 31 * This relieves drivers of the responsibility to deal with tricky UBO 32 * layout issues like std140 structures and row_major matrices on 33 * their own. 34 */ 35 36 #include "lower_buffer_access.h" 37 #include "ir_builder.h" 38 #include "main/macros.h" 39 #include "glsl_parser_extras.h" 40 41 using namespace ir_builder; 42 43 namespace { 44 class lower_ubo_reference_visitor : 45 public lower_buffer_access::lower_buffer_access { 46 public: 47 lower_ubo_reference_visitor(struct gl_linked_shader *shader, 48 bool clamp_block_indices) 49 : shader(shader), clamp_block_indices(clamp_block_indices), 50 struct_field(NULL), variable(NULL) 51 { 52 } 53 54 void handle_rvalue(ir_rvalue **rvalue); 55 ir_visitor_status visit_enter(ir_assignment *ir); 56 57 void setup_for_load_or_store(void *mem_ctx, 58 ir_variable *var, 59 ir_rvalue *deref, 60 ir_rvalue **offset, 61 unsigned *const_offset, 62 bool *row_major, 63 int *matrix_columns, 64 enum glsl_interface_packing packing); 65 uint32_t ssbo_access_params(); 66 ir_expression *ubo_load(void *mem_ctx, const struct glsl_type *type, 67 ir_rvalue *offset); 68 ir_call *ssbo_load(void *mem_ctx, const struct glsl_type *type, 69 ir_rvalue *offset); 70 71 bool check_for_buffer_array_copy(ir_assignment *ir); 72 bool check_for_buffer_struct_copy(ir_assignment *ir); 73 void check_for_ssbo_store(ir_assignment *ir); 74 void write_to_memory(void *mem_ctx, ir_dereference *deref, ir_variable *var, 75 ir_variable *write_var, unsigned write_mask); 76 ir_call *ssbo_store(void *mem_ctx, ir_rvalue *deref, ir_rvalue *offset, 77 unsigned write_mask); 78 79 enum { 80 ubo_load_access, 81 ssbo_load_access, 82 ssbo_store_access, 83 ssbo_unsized_array_length_access, 84 ssbo_atomic_access, 85 } buffer_access_type; 86 87 void insert_buffer_access(void *mem_ctx, ir_dereference *deref, 88 const glsl_type *type, ir_rvalue *offset, 89 unsigned mask, int channel); 90 91 ir_visitor_status visit_enter(class ir_expression *); 92 ir_expression *calculate_ssbo_unsized_array_length(ir_expression *expr); 93 void check_ssbo_unsized_array_length_expression(class ir_expression *); 94 void check_ssbo_unsized_array_length_assignment(ir_assignment *ir); 95 96 ir_expression *process_ssbo_unsized_array_length(ir_rvalue **, 97 ir_dereference *, 98 ir_variable *); 99 ir_expression *emit_ssbo_get_buffer_size(void *mem_ctx); 100 101 unsigned calculate_unsized_array_stride(ir_dereference *deref, 102 enum glsl_interface_packing packing); 103 104 ir_call *lower_ssbo_atomic_intrinsic(ir_call *ir); 105 ir_call *check_for_ssbo_atomic_intrinsic(ir_call *ir); 106 ir_visitor_status visit_enter(ir_call *ir); 107 108 struct gl_linked_shader *shader; 109 bool clamp_block_indices; 110 const struct glsl_struct_field *struct_field; 111 ir_variable *variable; 112 ir_rvalue *uniform_block; 113 bool progress; 114 }; 115 116 /** 117 * Determine the name of the interface block field 118 * 119 * This is the name of the specific member as it would appear in the 120 * \c gl_uniform_buffer_variable::Name field in the shader's 121 * \c UniformBlocks array. 122 */ 123 static const char * 124 interface_field_name(void *mem_ctx, char *base_name, ir_rvalue *d, 125 ir_rvalue **nonconst_block_index) 126 { 127 *nonconst_block_index = NULL; 128 char *name_copy = NULL; 129 size_t base_length = 0; 130 131 /* Loop back through the IR until we find the uniform block */ 132 ir_rvalue *ir = d; 133 while (ir != NULL) { 134 switch (ir->ir_type) { 135 case ir_type_dereference_variable: { 136 /* Exit loop */ 137 ir = NULL; 138 break; 139 } 140 141 case ir_type_dereference_record: { 142 ir_dereference_record *r = (ir_dereference_record *) ir; 143 ir = r->record->as_dereference(); 144 145 /* If we got here it means any previous array subscripts belong to 146 * block members and not the block itself so skip over them in the 147 * next pass. 148 */ 149 d = ir; 150 break; 151 } 152 153 case ir_type_dereference_array: { 154 ir_dereference_array *a = (ir_dereference_array *) ir; 155 ir = a->array->as_dereference(); 156 break; 157 } 158 159 case ir_type_swizzle: { 160 ir_swizzle *s = (ir_swizzle *) ir; 161 ir = s->val->as_dereference(); 162 /* Skip swizzle in the next pass */ 163 d = ir; 164 break; 165 } 166 167 default: 168 assert(!"Should not get here."); 169 break; 170 } 171 } 172 173 while (d != NULL) { 174 switch (d->ir_type) { 175 case ir_type_dereference_variable: { 176 ir_dereference_variable *v = (ir_dereference_variable *) d; 177 if (name_copy != NULL && 178 v->var->is_interface_instance() && 179 v->var->type->is_array()) { 180 return name_copy; 181 } else { 182 *nonconst_block_index = NULL; 183 return base_name; 184 } 185 186 break; 187 } 188 189 case ir_type_dereference_array: { 190 ir_dereference_array *a = (ir_dereference_array *) d; 191 size_t new_length; 192 193 if (name_copy == NULL) { 194 name_copy = ralloc_strdup(mem_ctx, base_name); 195 base_length = strlen(name_copy); 196 } 197 198 /* For arrays of arrays we start at the innermost array and work our 199 * way out so we need to insert the subscript at the base of the 200 * name string rather than just attaching it to the end. 201 */ 202 new_length = base_length; 203 ir_constant *const_index = a->array_index->as_constant(); 204 char *end = ralloc_strdup(NULL, &name_copy[new_length]); 205 if (!const_index) { 206 ir_rvalue *array_index = a->array_index; 207 if (array_index->type != glsl_type::uint_type) 208 array_index = i2u(array_index); 209 210 if (a->array->type->is_array() && 211 a->array->type->fields.array->is_array()) { 212 ir_constant *base_size = new(mem_ctx) 213 ir_constant(a->array->type->fields.array->arrays_of_arrays_size()); 214 array_index = mul(array_index, base_size); 215 } 216 217 if (*nonconst_block_index) { 218 *nonconst_block_index = add(*nonconst_block_index, array_index); 219 } else { 220 *nonconst_block_index = array_index; 221 } 222 223 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[0]%s", 224 end); 225 } else { 226 ralloc_asprintf_rewrite_tail(&name_copy, &new_length, "[%d]%s", 227 const_index->get_uint_component(0), 228 end); 229 } 230 ralloc_free(end); 231 232 d = a->array->as_dereference(); 233 234 break; 235 } 236 237 default: 238 assert(!"Should not get here."); 239 break; 240 } 241 } 242 243 assert(!"Should not get here."); 244 return NULL; 245 } 246 247 static ir_rvalue * 248 clamp_to_array_bounds(void *mem_ctx, ir_rvalue *index, const glsl_type *type) 249 { 250 assert(type->is_array()); 251 252 const unsigned array_size = type->arrays_of_arrays_size(); 253 254 ir_constant *max_index = new(mem_ctx) ir_constant(array_size - 1); 255 max_index->type = index->type; 256 257 ir_constant *zero = new(mem_ctx) ir_constant(0); 258 zero->type = index->type; 259 260 if (index->type->base_type == GLSL_TYPE_INT) 261 index = max2(index, zero); 262 index = min2(index, max_index); 263 264 return index; 265 } 266 267 void 268 lower_ubo_reference_visitor::setup_for_load_or_store(void *mem_ctx, 269 ir_variable *var, 270 ir_rvalue *deref, 271 ir_rvalue **offset, 272 unsigned *const_offset, 273 bool *row_major, 274 int *matrix_columns, 275 enum glsl_interface_packing packing) 276 { 277 /* Determine the name of the interface block */ 278 ir_rvalue *nonconst_block_index; 279 const char *const field_name = 280 interface_field_name(mem_ctx, (char *) var->get_interface_type()->name, 281 deref, &nonconst_block_index); 282 283 if (nonconst_block_index && clamp_block_indices) { 284 nonconst_block_index = 285 clamp_to_array_bounds(mem_ctx, nonconst_block_index, var->type); 286 } 287 288 /* Locate the block by interface name */ 289 unsigned num_blocks; 290 struct gl_uniform_block **blocks; 291 if (this->buffer_access_type != ubo_load_access) { 292 num_blocks = shader->Program->info.num_ssbos; 293 blocks = shader->Program->sh.ShaderStorageBlocks; 294 } else { 295 num_blocks = shader->Program->info.num_ubos; 296 blocks = shader->Program->sh.UniformBlocks; 297 } 298 this->uniform_block = NULL; 299 for (unsigned i = 0; i < num_blocks; i++) { 300 if (strcmp(field_name, blocks[i]->Name) == 0) { 301 302 ir_constant *index = new(mem_ctx) ir_constant(i); 303 304 if (nonconst_block_index) { 305 this->uniform_block = add(nonconst_block_index, index); 306 } else { 307 this->uniform_block = index; 308 } 309 310 if (var->is_interface_instance()) { 311 *const_offset = 0; 312 } else { 313 *const_offset = blocks[i]->Uniforms[var->data.location].Offset; 314 } 315 316 break; 317 } 318 } 319 320 assert(this->uniform_block); 321 322 this->struct_field = NULL; 323 setup_buffer_access(mem_ctx, deref, offset, const_offset, row_major, 324 matrix_columns, &this->struct_field, packing); 325 } 326 327 void 328 lower_ubo_reference_visitor::handle_rvalue(ir_rvalue **rvalue) 329 { 330 if (!*rvalue) 331 return; 332 333 ir_dereference *deref = (*rvalue)->as_dereference(); 334 if (!deref) 335 return; 336 337 ir_variable *var = deref->variable_referenced(); 338 if (!var || !var->is_in_buffer_block()) 339 return; 340 341 void *mem_ctx = ralloc_parent(shader->ir); 342 343 ir_rvalue *offset = NULL; 344 unsigned const_offset; 345 bool row_major; 346 int matrix_columns; 347 enum glsl_interface_packing packing = var->get_interface_type_packing(); 348 349 this->buffer_access_type = 350 var->is_in_shader_storage_block() ? 351 ssbo_load_access : ubo_load_access; 352 this->variable = var; 353 354 /* Compute the offset to the start if the dereference as well as other 355 * information we need to configure the write 356 */ 357 setup_for_load_or_store(mem_ctx, var, deref, 358 &offset, &const_offset, 359 &row_major, &matrix_columns, 360 packing); 361 assert(offset); 362 363 /* Now that we've calculated the offset to the start of the 364 * dereference, walk over the type and emit loads into a temporary. 365 */ 366 const glsl_type *type = (*rvalue)->type; 367 ir_variable *load_var = new(mem_ctx) ir_variable(type, 368 "ubo_load_temp", 369 ir_var_temporary); 370 base_ir->insert_before(load_var); 371 372 ir_variable *load_offset = new(mem_ctx) ir_variable(glsl_type::uint_type, 373 "ubo_load_temp_offset", 374 ir_var_temporary); 375 base_ir->insert_before(load_offset); 376 base_ir->insert_before(assign(load_offset, offset)); 377 378 deref = new(mem_ctx) ir_dereference_variable(load_var); 379 emit_access(mem_ctx, false, deref, load_offset, const_offset, 380 row_major, matrix_columns, packing, 0); 381 *rvalue = deref; 382 383 progress = true; 384 } 385 386 ir_expression * 387 lower_ubo_reference_visitor::ubo_load(void *mem_ctx, 388 const glsl_type *type, 389 ir_rvalue *offset) 390 { 391 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 392 return new(mem_ctx) 393 ir_expression(ir_binop_ubo_load, 394 type, 395 block_ref, 396 offset); 397 398 } 399 400 static bool 401 shader_storage_buffer_object(const _mesa_glsl_parse_state *state) 402 { 403 return state->has_shader_storage_buffer_objects(); 404 } 405 406 uint32_t 407 lower_ubo_reference_visitor::ssbo_access_params() 408 { 409 assert(variable); 410 411 if (variable->is_interface_instance()) { 412 assert(struct_field); 413 414 return ((struct_field->image_coherent ? ACCESS_COHERENT : 0) | 415 (struct_field->image_restrict ? ACCESS_RESTRICT : 0) | 416 (struct_field->image_volatile ? ACCESS_VOLATILE : 0)); 417 } else { 418 return ((variable->data.image_coherent ? ACCESS_COHERENT : 0) | 419 (variable->data.image_restrict ? ACCESS_RESTRICT : 0) | 420 (variable->data.image_volatile ? ACCESS_VOLATILE : 0)); 421 } 422 } 423 424 ir_call * 425 lower_ubo_reference_visitor::ssbo_store(void *mem_ctx, 426 ir_rvalue *deref, 427 ir_rvalue *offset, 428 unsigned write_mask) 429 { 430 exec_list sig_params; 431 432 ir_variable *block_ref = new(mem_ctx) 433 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 434 sig_params.push_tail(block_ref); 435 436 ir_variable *offset_ref = new(mem_ctx) 437 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 438 sig_params.push_tail(offset_ref); 439 440 ir_variable *val_ref = new(mem_ctx) 441 ir_variable(deref->type, "value" , ir_var_function_in); 442 sig_params.push_tail(val_ref); 443 444 ir_variable *writemask_ref = new(mem_ctx) 445 ir_variable(glsl_type::uint_type, "write_mask" , ir_var_function_in); 446 sig_params.push_tail(writemask_ref); 447 448 ir_variable *access_ref = new(mem_ctx) 449 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 450 sig_params.push_tail(access_ref); 451 452 ir_function_signature *sig = new(mem_ctx) 453 ir_function_signature(glsl_type::void_type, shader_storage_buffer_object); 454 assert(sig); 455 sig->replace_parameters(&sig_params); 456 sig->intrinsic_id = ir_intrinsic_ssbo_store; 457 458 ir_function *f = new(mem_ctx) ir_function("__intrinsic_store_ssbo"); 459 f->add_signature(sig); 460 461 exec_list call_params; 462 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 463 call_params.push_tail(offset->clone(mem_ctx, NULL)); 464 call_params.push_tail(deref->clone(mem_ctx, NULL)); 465 call_params.push_tail(new(mem_ctx) ir_constant(write_mask)); 466 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 467 return new(mem_ctx) ir_call(sig, NULL, &call_params); 468 } 469 470 ir_call * 471 lower_ubo_reference_visitor::ssbo_load(void *mem_ctx, 472 const struct glsl_type *type, 473 ir_rvalue *offset) 474 { 475 exec_list sig_params; 476 477 ir_variable *block_ref = new(mem_ctx) 478 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 479 sig_params.push_tail(block_ref); 480 481 ir_variable *offset_ref = new(mem_ctx) 482 ir_variable(glsl_type::uint_type, "offset_ref" , ir_var_function_in); 483 sig_params.push_tail(offset_ref); 484 485 ir_variable *access_ref = new(mem_ctx) 486 ir_variable(glsl_type::uint_type, "access" , ir_var_function_in); 487 sig_params.push_tail(access_ref); 488 489 ir_function_signature *sig = 490 new(mem_ctx) ir_function_signature(type, shader_storage_buffer_object); 491 assert(sig); 492 sig->replace_parameters(&sig_params); 493 sig->intrinsic_id = ir_intrinsic_ssbo_load; 494 495 ir_function *f = new(mem_ctx) ir_function("__intrinsic_load_ssbo"); 496 f->add_signature(sig); 497 498 ir_variable *result = new(mem_ctx) 499 ir_variable(type, "ssbo_load_result", ir_var_temporary); 500 base_ir->insert_before(result); 501 ir_dereference_variable *deref_result = new(mem_ctx) 502 ir_dereference_variable(result); 503 504 exec_list call_params; 505 call_params.push_tail(this->uniform_block->clone(mem_ctx, NULL)); 506 call_params.push_tail(offset->clone(mem_ctx, NULL)); 507 call_params.push_tail(new(mem_ctx) ir_constant(ssbo_access_params())); 508 509 return new(mem_ctx) ir_call(sig, deref_result, &call_params); 510 } 511 512 void 513 lower_ubo_reference_visitor::insert_buffer_access(void *mem_ctx, 514 ir_dereference *deref, 515 const glsl_type *type, 516 ir_rvalue *offset, 517 unsigned mask, 518 int channel) 519 { 520 switch (this->buffer_access_type) { 521 case ubo_load_access: 522 base_ir->insert_before(assign(deref->clone(mem_ctx, NULL), 523 ubo_load(mem_ctx, type, offset), 524 mask)); 525 break; 526 case ssbo_load_access: { 527 ir_call *load_ssbo = ssbo_load(mem_ctx, type, offset); 528 base_ir->insert_before(load_ssbo); 529 ir_rvalue *value = load_ssbo->return_deref->as_rvalue()->clone(mem_ctx, NULL); 530 ir_assignment *assignment = 531 assign(deref->clone(mem_ctx, NULL), value, mask); 532 base_ir->insert_before(assignment); 533 break; 534 } 535 case ssbo_store_access: 536 if (channel >= 0) { 537 base_ir->insert_after(ssbo_store(mem_ctx, 538 swizzle(deref, channel, 1), 539 offset, 1)); 540 } else { 541 base_ir->insert_after(ssbo_store(mem_ctx, deref, offset, mask)); 542 } 543 break; 544 default: 545 unreachable("invalid buffer_access_type in insert_buffer_access"); 546 } 547 } 548 549 void 550 lower_ubo_reference_visitor::write_to_memory(void *mem_ctx, 551 ir_dereference *deref, 552 ir_variable *var, 553 ir_variable *write_var, 554 unsigned write_mask) 555 { 556 ir_rvalue *offset = NULL; 557 unsigned const_offset; 558 bool row_major; 559 int matrix_columns; 560 enum glsl_interface_packing packing = var->get_interface_type_packing(); 561 562 this->buffer_access_type = ssbo_store_access; 563 this->variable = var; 564 565 /* Compute the offset to the start if the dereference as well as other 566 * information we need to configure the write 567 */ 568 setup_for_load_or_store(mem_ctx, var, deref, 569 &offset, &const_offset, 570 &row_major, &matrix_columns, 571 packing); 572 assert(offset); 573 574 /* Now emit writes from the temporary to memory */ 575 ir_variable *write_offset = 576 new(mem_ctx) ir_variable(glsl_type::uint_type, 577 "ssbo_store_temp_offset", 578 ir_var_temporary); 579 580 base_ir->insert_before(write_offset); 581 base_ir->insert_before(assign(write_offset, offset)); 582 583 deref = new(mem_ctx) ir_dereference_variable(write_var); 584 emit_access(mem_ctx, true, deref, write_offset, const_offset, 585 row_major, matrix_columns, packing, write_mask); 586 } 587 588 ir_visitor_status 589 lower_ubo_reference_visitor::visit_enter(ir_expression *ir) 590 { 591 check_ssbo_unsized_array_length_expression(ir); 592 return rvalue_visit(ir); 593 } 594 595 ir_expression * 596 lower_ubo_reference_visitor::calculate_ssbo_unsized_array_length(ir_expression *expr) 597 { 598 if (expr->operation != 599 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) 600 return NULL; 601 602 ir_rvalue *rvalue = expr->operands[0]->as_rvalue(); 603 if (!rvalue || 604 !rvalue->type->is_array() || !rvalue->type->is_unsized_array()) 605 return NULL; 606 607 ir_dereference *deref = expr->operands[0]->as_dereference(); 608 if (!deref) 609 return NULL; 610 611 ir_variable *var = expr->operands[0]->variable_referenced(); 612 if (!var || !var->is_in_shader_storage_block()) 613 return NULL; 614 return process_ssbo_unsized_array_length(&rvalue, deref, var); 615 } 616 617 void 618 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_expression(ir_expression *ir) 619 { 620 if (ir->operation == 621 ir_expression_operation(ir_unop_ssbo_unsized_array_length)) { 622 /* Don't replace this unop if it is found alone. It is going to be 623 * removed by the optimization passes or replaced if it is part of 624 * an ir_assignment or another ir_expression. 625 */ 626 return; 627 } 628 629 for (unsigned i = 0; i < ir->get_num_operands(); i++) { 630 if (ir->operands[i]->ir_type != ir_type_expression) 631 continue; 632 ir_expression *expr = (ir_expression *) ir->operands[i]; 633 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 634 if (!temp) 635 continue; 636 637 delete expr; 638 ir->operands[i] = temp; 639 } 640 } 641 642 void 643 lower_ubo_reference_visitor::check_ssbo_unsized_array_length_assignment(ir_assignment *ir) 644 { 645 if (!ir->rhs || ir->rhs->ir_type != ir_type_expression) 646 return; 647 648 ir_expression *expr = (ir_expression *) ir->rhs; 649 ir_expression *temp = calculate_ssbo_unsized_array_length(expr); 650 if (!temp) 651 return; 652 653 delete expr; 654 ir->rhs = temp; 655 return; 656 } 657 658 ir_expression * 659 lower_ubo_reference_visitor::emit_ssbo_get_buffer_size(void *mem_ctx) 660 { 661 ir_rvalue *block_ref = this->uniform_block->clone(mem_ctx, NULL); 662 return new(mem_ctx) ir_expression(ir_unop_get_buffer_size, 663 glsl_type::int_type, 664 block_ref); 665 } 666 667 unsigned 668 lower_ubo_reference_visitor::calculate_unsized_array_stride(ir_dereference *deref, 669 enum glsl_interface_packing packing) 670 { 671 unsigned array_stride = 0; 672 673 switch (deref->ir_type) { 674 case ir_type_dereference_variable: 675 { 676 ir_dereference_variable *deref_var = (ir_dereference_variable *)deref; 677 const struct glsl_type *unsized_array_type = NULL; 678 /* An unsized array can be sized by other lowering passes, so pick 679 * the first field of the array which has the data type of the unsized 680 * array. 681 */ 682 unsized_array_type = deref_var->var->type->fields.array; 683 684 /* Whether or not the field is row-major (because it might be a 685 * bvec2 or something) does not affect the array itself. We need 686 * to know whether an array element in its entirety is row-major. 687 */ 688 const bool array_row_major = 689 is_dereferenced_thing_row_major(deref_var); 690 691 if (packing == GLSL_INTERFACE_PACKING_STD430) { 692 array_stride = unsized_array_type->std430_array_stride(array_row_major); 693 } else { 694 array_stride = unsized_array_type->std140_size(array_row_major); 695 array_stride = glsl_align(array_stride, 16); 696 } 697 break; 698 } 699 case ir_type_dereference_record: 700 { 701 ir_dereference_record *deref_record = (ir_dereference_record *) deref; 702 ir_dereference *interface_deref = 703 deref_record->record->as_dereference(); 704 assert(interface_deref != NULL); 705 const struct glsl_type *interface_type = interface_deref->type; 706 unsigned record_length = interface_type->length; 707 /* Unsized array is always the last element of the interface */ 708 const struct glsl_type *unsized_array_type = 709 interface_type->fields.structure[record_length - 1].type->fields.array; 710 711 const bool array_row_major = 712 is_dereferenced_thing_row_major(deref_record); 713 714 if (packing == GLSL_INTERFACE_PACKING_STD430) { 715 array_stride = unsized_array_type->std430_array_stride(array_row_major); 716 } else { 717 array_stride = unsized_array_type->std140_size(array_row_major); 718 array_stride = glsl_align(array_stride, 16); 719 } 720 break; 721 } 722 default: 723 unreachable("Unsupported dereference type"); 724 } 725 return array_stride; 726 } 727 728 ir_expression * 729 lower_ubo_reference_visitor::process_ssbo_unsized_array_length(ir_rvalue **rvalue, 730 ir_dereference *deref, 731 ir_variable *var) 732 { 733 void *mem_ctx = ralloc_parent(*rvalue); 734 735 ir_rvalue *base_offset = NULL; 736 unsigned const_offset; 737 bool row_major; 738 int matrix_columns; 739 enum glsl_interface_packing packing = var->get_interface_type_packing(); 740 int unsized_array_stride = calculate_unsized_array_stride(deref, packing); 741 742 this->buffer_access_type = ssbo_unsized_array_length_access; 743 this->variable = var; 744 745 /* Compute the offset to the start if the dereference as well as other 746 * information we need to calculate the length. 747 */ 748 setup_for_load_or_store(mem_ctx, var, deref, 749 &base_offset, &const_offset, 750 &row_major, &matrix_columns, 751 packing); 752 /* array.length() = 753 * max((buffer_object_size - offset_of_array) / stride_of_array, 0) 754 */ 755 ir_expression *buffer_size = emit_ssbo_get_buffer_size(mem_ctx); 756 757 ir_expression *offset_of_array = new(mem_ctx) 758 ir_expression(ir_binop_add, base_offset, 759 new(mem_ctx) ir_constant(const_offset)); 760 ir_expression *offset_of_array_int = new(mem_ctx) 761 ir_expression(ir_unop_u2i, offset_of_array); 762 763 ir_expression *sub = new(mem_ctx) 764 ir_expression(ir_binop_sub, buffer_size, offset_of_array_int); 765 ir_expression *div = new(mem_ctx) 766 ir_expression(ir_binop_div, sub, 767 new(mem_ctx) ir_constant(unsized_array_stride)); 768 ir_expression *max = new(mem_ctx) 769 ir_expression(ir_binop_max, div, new(mem_ctx) ir_constant(0)); 770 771 return max; 772 } 773 774 void 775 lower_ubo_reference_visitor::check_for_ssbo_store(ir_assignment *ir) 776 { 777 if (!ir || !ir->lhs) 778 return; 779 780 ir_rvalue *rvalue = ir->lhs->as_rvalue(); 781 if (!rvalue) 782 return; 783 784 ir_dereference *deref = ir->lhs->as_dereference(); 785 if (!deref) 786 return; 787 788 ir_variable *var = ir->lhs->variable_referenced(); 789 if (!var || !var->is_in_shader_storage_block()) 790 return; 791 792 /* We have a write to a buffer variable, so declare a temporary and rewrite 793 * the assignment so that the temporary is the LHS. 794 */ 795 void *mem_ctx = ralloc_parent(shader->ir); 796 797 const glsl_type *type = rvalue->type; 798 ir_variable *write_var = new(mem_ctx) ir_variable(type, 799 "ssbo_store_temp", 800 ir_var_temporary); 801 base_ir->insert_before(write_var); 802 ir->lhs = new(mem_ctx) ir_dereference_variable(write_var); 803 804 /* Now we have to write the value assigned to the temporary back to memory */ 805 write_to_memory(mem_ctx, deref, var, write_var, ir->write_mask); 806 progress = true; 807 } 808 809 static bool 810 is_buffer_backed_variable(ir_variable *var) 811 { 812 return var->is_in_buffer_block() || 813 var->data.mode == ir_var_shader_shared; 814 } 815 816 bool 817 lower_ubo_reference_visitor::check_for_buffer_array_copy(ir_assignment *ir) 818 { 819 if (!ir || !ir->lhs || !ir->rhs) 820 return false; 821 822 /* LHS and RHS must be arrays 823 * FIXME: arrays of arrays? 824 */ 825 if (!ir->lhs->type->is_array() || !ir->rhs->type->is_array()) 826 return false; 827 828 /* RHS must be a buffer-backed variable. This is what can cause the problem 829 * since it would lead to a series of loads that need to live until we 830 * see the writes to the LHS. 831 */ 832 ir_variable *rhs_var = ir->rhs->variable_referenced(); 833 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 834 return false; 835 836 /* Split the array copy into individual element copies to reduce 837 * register pressure 838 */ 839 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 840 if (!rhs_deref) 841 return false; 842 843 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 844 if (!lhs_deref) 845 return false; 846 847 assert(lhs_deref->type->length == rhs_deref->type->length); 848 void *mem_ctx = ralloc_parent(shader->ir); 849 850 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 851 ir_dereference *lhs_i = 852 new(mem_ctx) ir_dereference_array(lhs_deref->clone(mem_ctx, NULL), 853 new(mem_ctx) ir_constant(i)); 854 855 ir_dereference *rhs_i = 856 new(mem_ctx) ir_dereference_array(rhs_deref->clone(mem_ctx, NULL), 857 new(mem_ctx) ir_constant(i)); 858 ir->insert_after(assign(lhs_i, rhs_i)); 859 } 860 861 ir->remove(); 862 progress = true; 863 return true; 864 } 865 866 bool 867 lower_ubo_reference_visitor::check_for_buffer_struct_copy(ir_assignment *ir) 868 { 869 if (!ir || !ir->lhs || !ir->rhs) 870 return false; 871 872 /* LHS and RHS must be records */ 873 if (!ir->lhs->type->is_record() || !ir->rhs->type->is_record()) 874 return false; 875 876 /* RHS must be a buffer-backed variable. This is what can cause the problem 877 * since it would lead to a series of loads that need to live until we 878 * see the writes to the LHS. 879 */ 880 ir_variable *rhs_var = ir->rhs->variable_referenced(); 881 if (!rhs_var || !is_buffer_backed_variable(rhs_var)) 882 return false; 883 884 /* Split the struct copy into individual element copies to reduce 885 * register pressure 886 */ 887 ir_dereference *rhs_deref = ir->rhs->as_dereference(); 888 if (!rhs_deref) 889 return false; 890 891 ir_dereference *lhs_deref = ir->lhs->as_dereference(); 892 if (!lhs_deref) 893 return false; 894 895 assert(lhs_deref->type->record_compare(rhs_deref->type)); 896 void *mem_ctx = ralloc_parent(shader->ir); 897 898 for (unsigned i = 0; i < lhs_deref->type->length; i++) { 899 const char *field_name = lhs_deref->type->fields.structure[i].name; 900 ir_dereference *lhs_field = 901 new(mem_ctx) ir_dereference_record(lhs_deref->clone(mem_ctx, NULL), 902 field_name); 903 ir_dereference *rhs_field = 904 new(mem_ctx) ir_dereference_record(rhs_deref->clone(mem_ctx, NULL), 905 field_name); 906 ir->insert_after(assign(lhs_field, rhs_field)); 907 } 908 909 ir->remove(); 910 progress = true; 911 return true; 912 } 913 914 ir_visitor_status 915 lower_ubo_reference_visitor::visit_enter(ir_assignment *ir) 916 { 917 /* Array and struct copies could involve large amounts of load/store 918 * operations. To improve register pressure we want to special-case 919 * these and split them into individual element copies. 920 * This way we avoid emitting all the loads for the RHS first and 921 * all the writes for the LHS second and register usage is more 922 * efficient. 923 */ 924 if (check_for_buffer_array_copy(ir)) 925 return visit_continue_with_parent; 926 927 if (check_for_buffer_struct_copy(ir)) 928 return visit_continue_with_parent; 929 930 check_ssbo_unsized_array_length_assignment(ir); 931 check_for_ssbo_store(ir); 932 return rvalue_visit(ir); 933 } 934 935 /* Lowers the intrinsic call to a new internal intrinsic that swaps the 936 * access to the buffer variable in the first parameter by an offset 937 * and block index. This involves creating the new internal intrinsic 938 * (i.e. the new function signature). 939 */ 940 ir_call * 941 lower_ubo_reference_visitor::lower_ssbo_atomic_intrinsic(ir_call *ir) 942 { 943 /* SSBO atomics usually have 2 parameters, the buffer variable and an 944 * integer argument. The exception is CompSwap, that has an additional 945 * integer parameter. 946 */ 947 int param_count = ir->actual_parameters.length(); 948 assert(param_count == 2 || param_count == 3); 949 950 /* First argument must be a scalar integer buffer variable */ 951 exec_node *param = ir->actual_parameters.get_head(); 952 ir_instruction *inst = (ir_instruction *) param; 953 assert(inst->ir_type == ir_type_dereference_variable || 954 inst->ir_type == ir_type_dereference_array || 955 inst->ir_type == ir_type_dereference_record || 956 inst->ir_type == ir_type_swizzle); 957 958 ir_rvalue *deref = (ir_rvalue *) inst; 959 assert(deref->type->is_scalar() && deref->type->is_integer()); 960 961 ir_variable *var = deref->variable_referenced(); 962 assert(var); 963 964 /* Compute the offset to the start if the dereference and the 965 * block index 966 */ 967 void *mem_ctx = ralloc_parent(shader->ir); 968 969 ir_rvalue *offset = NULL; 970 unsigned const_offset; 971 bool row_major; 972 int matrix_columns; 973 enum glsl_interface_packing packing = var->get_interface_type_packing(); 974 975 this->buffer_access_type = ssbo_atomic_access; 976 this->variable = var; 977 978 setup_for_load_or_store(mem_ctx, var, deref, 979 &offset, &const_offset, 980 &row_major, &matrix_columns, 981 packing); 982 assert(offset); 983 assert(!row_major); 984 assert(matrix_columns == 1); 985 986 ir_rvalue *deref_offset = 987 add(offset, new(mem_ctx) ir_constant(const_offset)); 988 ir_rvalue *block_index = this->uniform_block->clone(mem_ctx, NULL); 989 990 /* Create the new internal function signature that will take a block 991 * index and offset instead of a buffer variable 992 */ 993 exec_list sig_params; 994 ir_variable *sig_param = new(mem_ctx) 995 ir_variable(glsl_type::uint_type, "block_ref" , ir_var_function_in); 996 sig_params.push_tail(sig_param); 997 998 sig_param = new(mem_ctx) 999 ir_variable(glsl_type::uint_type, "offset" , ir_var_function_in); 1000 sig_params.push_tail(sig_param); 1001 1002 const glsl_type *type = deref->type->base_type == GLSL_TYPE_INT ? 1003 glsl_type::int_type : glsl_type::uint_type; 1004 sig_param = new(mem_ctx) 1005 ir_variable(type, "data1", ir_var_function_in); 1006 sig_params.push_tail(sig_param); 1007 1008 if (param_count == 3) { 1009 sig_param = new(mem_ctx) 1010 ir_variable(type, "data2", ir_var_function_in); 1011 sig_params.push_tail(sig_param); 1012 } 1013 1014 ir_function_signature *sig = 1015 new(mem_ctx) ir_function_signature(deref->type, 1016 shader_storage_buffer_object); 1017 assert(sig); 1018 sig->replace_parameters(&sig_params); 1019 1020 assert(ir->callee->intrinsic_id >= ir_intrinsic_generic_load); 1021 assert(ir->callee->intrinsic_id <= ir_intrinsic_generic_atomic_comp_swap); 1022 sig->intrinsic_id = MAP_INTRINSIC_TO_TYPE(ir->callee->intrinsic_id, ssbo); 1023 1024 char func_name[64]; 1025 sprintf(func_name, "%s_ssbo", ir->callee_name()); 1026 ir_function *f = new(mem_ctx) ir_function(func_name); 1027 f->add_signature(sig); 1028 1029 /* Now, create the call to the internal intrinsic */ 1030 exec_list call_params; 1031 call_params.push_tail(block_index); 1032 call_params.push_tail(deref_offset); 1033 param = ir->actual_parameters.get_head()->get_next(); 1034 ir_rvalue *param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1035 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1036 if (param_count == 3) { 1037 param = param->get_next(); 1038 param_as_rvalue = ((ir_instruction *) param)->as_rvalue(); 1039 call_params.push_tail(param_as_rvalue->clone(mem_ctx, NULL)); 1040 } 1041 ir_dereference_variable *return_deref = 1042 ir->return_deref->clone(mem_ctx, NULL); 1043 return new(mem_ctx) ir_call(sig, return_deref, &call_params); 1044 } 1045 1046 ir_call * 1047 lower_ubo_reference_visitor::check_for_ssbo_atomic_intrinsic(ir_call *ir) 1048 { 1049 exec_list& params = ir->actual_parameters; 1050 1051 if (params.length() < 2 || params.length() > 3) 1052 return ir; 1053 1054 ir_rvalue *rvalue = 1055 ((ir_instruction *) params.get_head())->as_rvalue(); 1056 if (!rvalue) 1057 return ir; 1058 1059 ir_variable *var = rvalue->variable_referenced(); 1060 if (!var || !var->is_in_shader_storage_block()) 1061 return ir; 1062 1063 const enum ir_intrinsic_id id = ir->callee->intrinsic_id; 1064 if (id == ir_intrinsic_generic_atomic_add || 1065 id == ir_intrinsic_generic_atomic_min || 1066 id == ir_intrinsic_generic_atomic_max || 1067 id == ir_intrinsic_generic_atomic_and || 1068 id == ir_intrinsic_generic_atomic_or || 1069 id == ir_intrinsic_generic_atomic_xor || 1070 id == ir_intrinsic_generic_atomic_exchange || 1071 id == ir_intrinsic_generic_atomic_comp_swap) { 1072 return lower_ssbo_atomic_intrinsic(ir); 1073 } 1074 1075 return ir; 1076 } 1077 1078 1079 ir_visitor_status 1080 lower_ubo_reference_visitor::visit_enter(ir_call *ir) 1081 { 1082 ir_call *new_ir = check_for_ssbo_atomic_intrinsic(ir); 1083 if (new_ir != ir) { 1084 progress = true; 1085 base_ir->replace_with(new_ir); 1086 return visit_continue_with_parent; 1087 } 1088 1089 return rvalue_visit(ir); 1090 } 1091 1092 1093 } /* unnamed namespace */ 1094 1095 void 1096 lower_ubo_reference(struct gl_linked_shader *shader, bool clamp_block_indices) 1097 { 1098 lower_ubo_reference_visitor v(shader, clamp_block_indices); 1099 1100 /* Loop over the instructions lowering references, because we take 1101 * a deref of a UBO array using a UBO dereference as the index will 1102 * produce a collection of instructions all of which have cloned 1103 * UBO dereferences for that array index. 1104 */ 1105 do { 1106 v.progress = false; 1107 visit_list_elements(&v, shader->ir); 1108 } while (v.progress); 1109 } 1110