1 /* 2 * Copyright 2017 Gert Wollny 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER 19 * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING 20 * FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER 21 * DEALINGS IN THE SOFTWARE. 22 */ 23 24 #include "st_glsl_to_tgsi_temprename.h" 25 #include "tgsi/tgsi_info.h" 26 #include "tgsi/tgsi_strings.h" 27 #include "program/prog_instruction.h" 28 #include "util/bitscan.h" 29 #include <limits> 30 #include <cstdlib> 31 32 /* std::sort is significantly faster than qsort */ 33 #define USE_STL_SORT 34 #ifdef USE_STL_SORT 35 #include <algorithm> 36 #endif 37 38 #ifndef NDEBUG 39 #include <iostream> 40 #include <iomanip> 41 #include "program/prog_print.h" 42 #include "util/debug.h" 43 using std::cerr; 44 using std::setw; 45 #endif 46 47 /* If <windows.h> is included this is defined and clashes with 48 * std::numeric_limits<>::max() 49 */ 50 #ifdef max 51 #undef max 52 #endif 53 54 using std::numeric_limits; 55 56 /* Without c++11 define the nullptr for forward-compatibility 57 * and better readibility */ 58 #if __cplusplus < 201103L 59 #define nullptr 0 60 #endif 61 62 #ifndef NDEBUG 63 /* Helper function to check whether we want to seen debugging output */ 64 static inline bool is_debug_enabled () 65 { 66 static int debug_enabled = -1; 67 if (debug_enabled < 0) 68 debug_enabled = env_var_as_boolean("GLSL_TO_TGSI_RENAME_DEBUG", false); 69 return debug_enabled > 0; 70 } 71 #define RENAME_DEBUG(X) if (is_debug_enabled()) do { X; } while (false); 72 #else 73 #define RENAME_DEBUG(X) 74 #endif 75 76 namespace { 77 78 enum prog_scope_type { 79 outer_scope, /* Outer program scope */ 80 loop_body, /* Inside a loop */ 81 if_branch, /* Inside if branch */ 82 else_branch, /* Inside else branch */ 83 switch_body, /* Inside switch statmenet */ 84 switch_case_branch, /* Inside switch case statmenet */ 85 switch_default_branch, /* Inside switch default statmenet */ 86 undefined_scope 87 }; 88 89 class prog_scope { 90 public: 91 prog_scope(prog_scope *parent, prog_scope_type type, int id, 92 int depth, int begin); 93 94 prog_scope_type type() const; 95 prog_scope *parent() const; 96 int nesting_depth() const; 97 int id() const; 98 int end() const; 99 int begin() const; 100 int loop_break_line() const; 101 102 const prog_scope *in_ifelse_scope() const; 103 const prog_scope *in_switchcase_scope() const; 104 const prog_scope *innermost_loop() const; 105 const prog_scope *outermost_loop() const; 106 const prog_scope *enclosing_conditional() const; 107 108 bool is_loop() const; 109 bool is_in_loop() const; 110 bool is_conditional() const; 111 112 bool break_is_for_switchcase() const; 113 bool contains_range_of(const prog_scope& other) const; 114 115 void set_end(int end); 116 void set_loop_break_line(int line); 117 118 private: 119 prog_scope_type scope_type; 120 int scope_id; 121 int scope_nesting_depth; 122 int scope_begin; 123 int scope_end; 124 int break_loop_line; 125 prog_scope *parent_scope; 126 }; 127 128 /* Some storage class to encapsulate the prog_scope (de-)allocations */ 129 class prog_scope_storage { 130 public: 131 prog_scope_storage(void *mem_ctx, int n); 132 ~prog_scope_storage(); 133 prog_scope * create(prog_scope *p, prog_scope_type type, int id, 134 int lvl, int s_begin); 135 private: 136 void *mem_ctx; 137 int current_slot; 138 prog_scope *storage; 139 }; 140 141 class temp_comp_access { 142 public: 143 temp_comp_access(); 144 void record_read(int line, prog_scope *scope); 145 void record_write(int line, prog_scope *scope); 146 lifetime get_required_lifetime(); 147 private: 148 void propagate_lifetime_to_dominant_write_scope(); 149 150 prog_scope *last_read_scope; 151 prog_scope *first_read_scope; 152 prog_scope *first_write_scope; 153 int first_write; 154 int last_read; 155 int last_write; 156 int first_read; 157 bool keep_for_full_loop; 158 }; 159 160 class temp_access { 161 public: 162 temp_access(); 163 void record_read(int line, prog_scope *scope, int swizzle); 164 void record_write(int line, prog_scope *scope, int writemask); 165 lifetime get_required_lifetime(); 166 private: 167 void update_access_mask(int mask); 168 169 temp_comp_access comp[4]; 170 int access_mask; 171 bool needs_component_tracking; 172 }; 173 174 prog_scope_storage::prog_scope_storage(void *mc, int n): 175 mem_ctx(mc), 176 current_slot(0) 177 { 178 storage = ralloc_array(mem_ctx, prog_scope, n); 179 } 180 181 prog_scope_storage::~prog_scope_storage() 182 { 183 ralloc_free(storage); 184 } 185 186 prog_scope* 187 prog_scope_storage::create(prog_scope *p, prog_scope_type type, int id, 188 int lvl, int s_begin) 189 { 190 storage[current_slot] = prog_scope(p, type, id, lvl, s_begin); 191 return &storage[current_slot++]; 192 } 193 194 prog_scope::prog_scope(prog_scope *parent, prog_scope_type type, int id, 195 int depth, int scope_begin): 196 scope_type(type), 197 scope_id(id), 198 scope_nesting_depth(depth), 199 scope_begin(scope_begin), 200 scope_end(-1), 201 break_loop_line(numeric_limits<int>::max()), 202 parent_scope(parent) 203 { 204 } 205 206 prog_scope_type prog_scope::type() const 207 { 208 return scope_type; 209 } 210 211 prog_scope *prog_scope::parent() const 212 { 213 return parent_scope; 214 } 215 216 int prog_scope::nesting_depth() const 217 { 218 return scope_nesting_depth; 219 } 220 221 bool prog_scope::is_loop() const 222 { 223 return (scope_type == loop_body); 224 } 225 226 bool prog_scope::is_in_loop() const 227 { 228 if (scope_type == loop_body) 229 return true; 230 231 if (parent_scope) 232 return parent_scope->is_in_loop(); 233 234 return false; 235 } 236 237 const prog_scope *prog_scope::innermost_loop() const 238 { 239 if (scope_type == loop_body) 240 return this; 241 242 if (parent_scope) 243 return parent_scope->innermost_loop(); 244 245 return nullptr; 246 } 247 248 const prog_scope *prog_scope::outermost_loop() const 249 { 250 const prog_scope *loop = nullptr; 251 const prog_scope *p = this; 252 253 do { 254 if (p->type() == loop_body) 255 loop = p; 256 p = p->parent(); 257 } while (p); 258 259 return loop; 260 } 261 262 const prog_scope *prog_scope::enclosing_conditional() const 263 { 264 if (is_conditional()) 265 return this; 266 267 if (parent_scope) 268 return parent_scope->enclosing_conditional(); 269 270 return nullptr; 271 } 272 273 bool prog_scope::contains_range_of(const prog_scope& other) const 274 { 275 return (begin() <= other.begin()) && (end() >= other.end()); 276 } 277 278 bool prog_scope::is_conditional() const 279 { 280 return scope_type == if_branch || 281 scope_type == else_branch || 282 scope_type == switch_case_branch || 283 scope_type == switch_default_branch; 284 } 285 286 const prog_scope *prog_scope::in_ifelse_scope() const 287 { 288 if (scope_type == if_branch || 289 scope_type == else_branch) 290 return this; 291 292 if (parent_scope) 293 return parent_scope->in_ifelse_scope(); 294 295 return nullptr; 296 } 297 298 const prog_scope *prog_scope::in_switchcase_scope() const 299 { 300 if (scope_type == switch_case_branch || 301 scope_type == switch_default_branch) 302 return this; 303 304 if (parent_scope) 305 return parent_scope->in_switchcase_scope(); 306 307 return nullptr; 308 } 309 310 bool prog_scope::break_is_for_switchcase() const 311 { 312 if (scope_type == loop_body) 313 return false; 314 315 if (scope_type == switch_case_branch || 316 scope_type == switch_default_branch || 317 scope_type == switch_body) 318 return true; 319 320 if (parent_scope) 321 return parent_scope->break_is_for_switchcase(); 322 323 return false; 324 } 325 326 int prog_scope::id() const 327 { 328 return scope_id; 329 } 330 331 int prog_scope::begin() const 332 { 333 return scope_begin; 334 } 335 336 int prog_scope::end() const 337 { 338 return scope_end; 339 } 340 341 void prog_scope::set_end(int end) 342 { 343 if (scope_end == -1) 344 scope_end = end; 345 } 346 347 void prog_scope::set_loop_break_line(int line) 348 { 349 if (scope_type == loop_body) { 350 break_loop_line = MIN2(break_loop_line, line); 351 } else { 352 if (parent_scope) 353 parent()->set_loop_break_line(line); 354 } 355 } 356 357 int prog_scope::loop_break_line() const 358 { 359 return break_loop_line; 360 } 361 362 temp_access::temp_access(): 363 access_mask(0), 364 needs_component_tracking(false) 365 { 366 } 367 368 void temp_access::update_access_mask(int mask) 369 { 370 if (access_mask && access_mask != mask) 371 needs_component_tracking = true; 372 access_mask |= mask; 373 } 374 375 void temp_access::record_write(int line, prog_scope *scope, int writemask) 376 { 377 update_access_mask(writemask); 378 379 if (writemask & WRITEMASK_X) 380 comp[0].record_write(line, scope); 381 if (writemask & WRITEMASK_Y) 382 comp[1].record_write(line, scope); 383 if (writemask & WRITEMASK_Z) 384 comp[2].record_write(line, scope); 385 if (writemask & WRITEMASK_W) 386 comp[3].record_write(line, scope); 387 } 388 389 void temp_access::record_read(int line, prog_scope *scope, int swizzle) 390 { 391 int readmask = 0; 392 for (int idx = 0; idx < 4; ++idx) { 393 int swz = GET_SWZ(swizzle, idx); 394 readmask |= (1 << swz) & 0xF; 395 } 396 update_access_mask(readmask); 397 398 if (readmask & WRITEMASK_X) 399 comp[0].record_read(line, scope); 400 if (readmask & WRITEMASK_Y) 401 comp[1].record_read(line, scope); 402 if (readmask & WRITEMASK_Z) 403 comp[2].record_read(line, scope); 404 if (readmask & WRITEMASK_W) 405 comp[3].record_read(line, scope); 406 } 407 408 inline static lifetime make_lifetime(int b, int e) 409 { 410 lifetime lt; 411 lt.begin = b; 412 lt.end = e; 413 return lt; 414 } 415 416 lifetime temp_access::get_required_lifetime() 417 { 418 lifetime result = make_lifetime(-1, -1); 419 420 unsigned mask = access_mask; 421 while (mask) { 422 unsigned chan = u_bit_scan(&mask); 423 lifetime lt = comp[chan].get_required_lifetime(); 424 425 if (lt.begin >= 0) { 426 if ((result.begin < 0) || (result.begin > lt.begin)) 427 result.begin = lt.begin; 428 } 429 430 if (lt.end > result.end) 431 result.end = lt.end; 432 433 if (!needs_component_tracking) 434 break; 435 } 436 return result; 437 } 438 439 temp_comp_access::temp_comp_access(): 440 last_read_scope(nullptr), 441 first_read_scope(nullptr), 442 first_write_scope(nullptr), 443 first_write(-1), 444 last_read(-1), 445 last_write(-1), 446 first_read(numeric_limits<int>::max()) 447 { 448 } 449 450 void temp_comp_access::record_read(int line, prog_scope *scope) 451 { 452 last_read_scope = scope; 453 last_read = line; 454 455 if (first_read > line) { 456 first_read = line; 457 first_read_scope = scope; 458 } 459 } 460 461 void temp_comp_access::record_write(int line, prog_scope *scope) 462 { 463 last_write = line; 464 465 if (first_write < 0) { 466 first_write = line; 467 first_write_scope = scope; 468 } 469 } 470 471 void temp_comp_access::propagate_lifetime_to_dominant_write_scope() 472 { 473 first_write = first_write_scope->begin(); 474 int lr = first_write_scope->end(); 475 476 if (last_read < lr) 477 last_read = lr; 478 } 479 480 lifetime temp_comp_access::get_required_lifetime() 481 { 482 bool keep_for_full_loop = false; 483 484 /* This register component is not used at all, or only read, 485 * mark it as unused and ignore it when renaming. 486 * glsl_to_tgsi_visitor::renumber_registers will take care of 487 * eliminating registers that are not written to. 488 */ 489 if (last_write < 0) 490 return make_lifetime(-1, -1); 491 492 assert(first_write_scope); 493 494 /* Only written to, just make sure the register component is not 495 * reused in the range it is used to write to 496 */ 497 if (!last_read_scope) 498 return make_lifetime(first_write, last_write + 1); 499 500 const prog_scope *enclosing_scope_first_read = first_read_scope; 501 const prog_scope *enclosing_scope_first_write = first_write_scope; 502 503 /* We read before writing in a loop 504 * hence the value must survive the loops 505 */ 506 if ((first_read <= first_write) && 507 first_read_scope->is_in_loop()) { 508 keep_for_full_loop = true; 509 enclosing_scope_first_read = first_read_scope->outermost_loop(); 510 } 511 512 /* A conditional write within a nested loop must survive 513 * the outermost loop, but only if it is read outside 514 * the condition scope where we write. 515 */ 516 const prog_scope *conditional = enclosing_scope_first_write->enclosing_conditional(); 517 if (conditional && conditional->is_in_loop() && 518 !conditional->contains_range_of(*last_read_scope)) { 519 keep_for_full_loop = true; 520 enclosing_scope_first_write = conditional->outermost_loop(); 521 } 522 523 /* Evaluate the scope that is shared by all: required first write scope, 524 * required first read before write scope, and last read scope. 525 */ 526 const prog_scope *enclosing_scope = enclosing_scope_first_read; 527 if (enclosing_scope_first_write->contains_range_of(*enclosing_scope)) 528 enclosing_scope = enclosing_scope_first_write; 529 530 if (last_read_scope->contains_range_of(*enclosing_scope)) 531 enclosing_scope = last_read_scope; 532 533 while (!enclosing_scope->contains_range_of(*enclosing_scope_first_write) || 534 !enclosing_scope->contains_range_of(*last_read_scope)) { 535 enclosing_scope = enclosing_scope->parent(); 536 assert(enclosing_scope); 537 } 538 539 /* Propagate the last read scope to the target scope */ 540 while (enclosing_scope->nesting_depth() < last_read_scope->nesting_depth()) { 541 /* If the read is in a loop and we have to move up the scope we need to 542 * extend the life time to the end of this current loop because at this 543 * point we don't know whether the component was written before 544 * un-conditionally in the same loop. 545 */ 546 if (last_read_scope->is_loop()) 547 last_read = last_read_scope->end(); 548 549 last_read_scope = last_read_scope->parent(); 550 } 551 552 /* If the variable has to be kept for the whole loop, and we 553 * are currently in a loop, then propagate the life time. 554 */ 555 if (keep_for_full_loop && first_write_scope->is_loop()) 556 propagate_lifetime_to_dominant_write_scope(); 557 558 /* Propagate the first_dominant_write scope to the target scope */ 559 while (enclosing_scope->nesting_depth() < first_write_scope->nesting_depth()) { 560 /* Propagate lifetime if there was a break in a loop and the write was 561 * after the break inside that loop. Note, that this is only needed if 562 * we move up in the scopes. 563 */ 564 if (first_write_scope->loop_break_line() < first_write) { 565 keep_for_full_loop = true; 566 propagate_lifetime_to_dominant_write_scope(); 567 } 568 569 first_write_scope = first_write_scope->parent(); 570 571 /* Propagte lifetime if we are now in a loop */ 572 if (keep_for_full_loop && first_write_scope->is_loop()) 573 propagate_lifetime_to_dominant_write_scope(); 574 } 575 576 /* The last write past the last read is dead code, but we have to 577 * ensure that the component is not reused too early, hence extend the 578 * lifetime past the last write. 579 */ 580 if (last_write >= last_read) 581 last_read = last_write + 1; 582 583 /* Here we are at the same scope, all is resolved */ 584 return make_lifetime(first_write, last_read); 585 } 586 587 /* Helper class for sorting and searching the registers based 588 * on life times. */ 589 class access_record { 590 public: 591 int begin; 592 int end; 593 int reg; 594 bool erase; 595 596 bool operator < (const access_record& rhs) const { 597 return begin < rhs.begin; 598 } 599 }; 600 601 } 602 603 #ifndef NDEBUG 604 /* Function used for debugging. */ 605 static void dump_instruction(int line, prog_scope *scope, 606 const glsl_to_tgsi_instruction& inst); 607 #endif 608 609 /* Scan the program and estimate the required register life times. 610 * The array lifetimes must be pre-allocated 611 */ 612 bool 613 get_temp_registers_required_lifetimes(void *mem_ctx, exec_list *instructions, 614 int ntemps, struct lifetime *lifetimes) 615 { 616 int line = 0; 617 int loop_id = 0; 618 int if_id = 0; 619 int switch_id = 0; 620 bool is_at_end = false; 621 bool ok = true; 622 int n_scopes = 1; 623 624 /* Count scopes to allocate the needed space without the need for 625 * re-allocation 626 */ 627 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) { 628 if (inst->op == TGSI_OPCODE_BGNLOOP || 629 inst->op == TGSI_OPCODE_SWITCH || 630 inst->op == TGSI_OPCODE_CASE || 631 inst->op == TGSI_OPCODE_IF || 632 inst->op == TGSI_OPCODE_UIF || 633 inst->op == TGSI_OPCODE_ELSE || 634 inst->op == TGSI_OPCODE_DEFAULT) 635 ++n_scopes; 636 } 637 638 prog_scope_storage scopes(mem_ctx, n_scopes); 639 temp_access *acc = new temp_access[ntemps]; 640 641 prog_scope *cur_scope = scopes.create(nullptr, outer_scope, 0, 0, line); 642 643 RENAME_DEBUG(cerr << "========= Begin shader ============\n"); 644 645 foreach_in_list(glsl_to_tgsi_instruction, inst, instructions) { 646 if (is_at_end) { 647 assert(!"GLSL_TO_TGSI: shader has instructions past end marker"); 648 break; 649 } 650 651 RENAME_DEBUG(dump_instruction(line, cur_scope, *inst)); 652 653 switch (inst->op) { 654 case TGSI_OPCODE_BGNLOOP: { 655 cur_scope = scopes.create(cur_scope, loop_body, loop_id++, 656 cur_scope->nesting_depth() + 1, line); 657 break; 658 } 659 case TGSI_OPCODE_ENDLOOP: { 660 cur_scope->set_end(line); 661 cur_scope = cur_scope->parent(); 662 assert(cur_scope); 663 break; 664 } 665 case TGSI_OPCODE_IF: 666 case TGSI_OPCODE_UIF: { 667 assert(num_inst_src_regs(inst) == 1); 668 const st_src_reg& src = inst->src[0]; 669 if (src.file == PROGRAM_TEMPORARY) 670 acc[src.index].record_read(line, cur_scope, src.swizzle); 671 cur_scope = scopes.create(cur_scope, if_branch, if_id++, 672 cur_scope->nesting_depth() + 1, line + 1); 673 break; 674 } 675 case TGSI_OPCODE_ELSE: { 676 assert(cur_scope->type() == if_branch); 677 cur_scope->set_end(line - 1); 678 cur_scope = scopes.create(cur_scope->parent(), else_branch, 679 cur_scope->id(), cur_scope->nesting_depth(), 680 line + 1); 681 break; 682 } 683 case TGSI_OPCODE_END: { 684 cur_scope->set_end(line); 685 is_at_end = true; 686 break; 687 } 688 case TGSI_OPCODE_ENDIF: { 689 cur_scope->set_end(line - 1); 690 cur_scope = cur_scope->parent(); 691 assert(cur_scope); 692 break; 693 } 694 case TGSI_OPCODE_SWITCH: { 695 assert(num_inst_src_regs(inst) == 1); 696 const st_src_reg& src = inst->src[0]; 697 prog_scope *scope = scopes.create(cur_scope, switch_body, switch_id++, 698 cur_scope->nesting_depth() + 1, line); 699 /* We record the read only for the SWITCH statement itself, like it 700 * is used by the only consumer of TGSI_OPCODE_SWITCH in tgsi_exec.c. 701 */ 702 if (src.file == PROGRAM_TEMPORARY) 703 acc[src.index].record_read(line, cur_scope, src.swizzle); 704 cur_scope = scope; 705 break; 706 } 707 case TGSI_OPCODE_ENDSWITCH: { 708 cur_scope->set_end(line - 1); 709 /* Remove the case level, it might not have been 710 * closed with a break. 711 */ 712 if (cur_scope->type() != switch_body) 713 cur_scope = cur_scope->parent(); 714 715 cur_scope = cur_scope->parent(); 716 assert(cur_scope); 717 break; 718 } 719 case TGSI_OPCODE_CASE: { 720 /* Take care of tracking the registers. */ 721 prog_scope *switch_scope = cur_scope->type() == switch_body ? 722 cur_scope : cur_scope->parent(); 723 724 assert(num_inst_src_regs(inst) == 1); 725 const st_src_reg& src = inst->src[0]; 726 if (src.file == PROGRAM_TEMPORARY) 727 acc[src.index].record_read(line, switch_scope, src.swizzle); 728 729 /* Fall through to allocate the scope. */ 730 } 731 case TGSI_OPCODE_DEFAULT: { 732 prog_scope_type t = inst->op == TGSI_OPCODE_CASE ? switch_case_branch 733 : switch_default_branch; 734 prog_scope *switch_scope = (cur_scope->type() == switch_body) ? 735 cur_scope : cur_scope->parent(); 736 assert(switch_scope->type() == switch_body); 737 prog_scope *scope = scopes.create(switch_scope, t, 738 switch_scope->id(), 739 switch_scope->nesting_depth() + 1, 740 line); 741 /* Previous case falls through, so scope was not yet closed. */ 742 if ((cur_scope != switch_scope) && (cur_scope->end() == -1)) 743 cur_scope->set_end(line - 1); 744 cur_scope = scope; 745 break; 746 } 747 case TGSI_OPCODE_BRK: { 748 if (cur_scope->break_is_for_switchcase()) { 749 cur_scope->set_end(line - 1); 750 } else { 751 cur_scope->set_loop_break_line(line); 752 } 753 break; 754 } 755 case TGSI_OPCODE_CAL: 756 case TGSI_OPCODE_RET: 757 /* These opcodes are not supported and if a subroutine would 758 * be called in a shader, then the lifetime tracking would have 759 * to follow that call to see which registers are used there. 760 * Since this is not done, we have to bail out here and signal 761 * that no register merge will take place. 762 */ 763 ok = false; 764 goto out; 765 default: { 766 for (unsigned j = 0; j < num_inst_src_regs(inst); j++) { 767 const st_src_reg& src = inst->src[j]; 768 if (src.file == PROGRAM_TEMPORARY) 769 acc[src.index].record_read(line, cur_scope, src.swizzle); 770 } 771 for (unsigned j = 0; j < inst->tex_offset_num_offset; j++) { 772 const st_src_reg& src = inst->tex_offsets[j]; 773 if (src.file == PROGRAM_TEMPORARY) 774 acc[src.index].record_read(line, cur_scope, src.swizzle); 775 } 776 for (unsigned j = 0; j < num_inst_dst_regs(inst); j++) { 777 const st_dst_reg& dst = inst->dst[j]; 778 if (dst.file == PROGRAM_TEMPORARY) 779 acc[dst.index].record_write(line, cur_scope, dst.writemask); 780 } 781 } 782 } 783 ++line; 784 } 785 786 RENAME_DEBUG(cerr << "==================================\n\n"); 787 788 /* Make sure last scope is closed, even though no 789 * TGSI_OPCODE_END was given. 790 */ 791 if (cur_scope->end() < 0) 792 cur_scope->set_end(line - 1); 793 794 RENAME_DEBUG(cerr << "========= lifetimes ==============\n"); 795 for(int i = 0; i < ntemps; ++i) { 796 RENAME_DEBUG(cerr << setw(4) << i); 797 lifetimes[i] = acc[i].get_required_lifetime(); 798 RENAME_DEBUG(cerr << ": [" << lifetimes[i].begin << ", " 799 << lifetimes[i].end << "]\n"); 800 } 801 RENAME_DEBUG(cerr << "==================================\n\n"); 802 803 out: 804 delete[] acc; 805 return ok; 806 } 807 808 /* Find the next register between [start, end) that has a life time starting 809 * at or after bound by using a binary search. 810 * start points at the beginning of the search range, 811 * end points at the element past the end of the search range, and 812 * the array comprising [start, end) must be sorted in ascending order. 813 */ 814 static access_record* 815 find_next_rename(access_record* start, access_record* end, int bound) 816 { 817 int delta = (end - start); 818 819 while (delta > 0) { 820 int half = delta >> 1; 821 access_record* middle = start + half; 822 823 if (bound <= middle->begin) { 824 delta = half; 825 } else { 826 start = middle; 827 ++start; 828 delta -= half + 1; 829 } 830 } 831 832 return start; 833 } 834 835 #ifndef USE_STL_SORT 836 static int access_record_compare (const void *a, const void *b) { 837 const access_record *aa = static_cast<const access_record*>(a); 838 const access_record *bb = static_cast<const access_record*>(b); 839 return aa->begin < bb->begin ? -1 : (aa->begin > bb->begin ? 1 : 0); 840 } 841 #endif 842 843 /* This functions evaluates the register merges by using a binary 844 * search to find suitable merge candidates. */ 845 void get_temp_registers_remapping(void *mem_ctx, int ntemps, 846 const struct lifetime* lifetimes, 847 struct rename_reg_pair *result) 848 { 849 access_record *reg_access = ralloc_array(mem_ctx, access_record, ntemps); 850 851 int used_temps = 0; 852 for (int i = 0; i < ntemps; ++i) { 853 if (lifetimes[i].begin >= 0) { 854 reg_access[used_temps].begin = lifetimes[i].begin; 855 reg_access[used_temps].end = lifetimes[i].end; 856 reg_access[used_temps].reg = i; 857 reg_access[used_temps].erase = false; 858 ++used_temps; 859 } 860 } 861 862 #ifdef USE_STL_SORT 863 std::sort(reg_access, reg_access + used_temps); 864 #else 865 std::qsort(reg_access, used_temps, sizeof(access_record), access_record_compare); 866 #endif 867 868 access_record *trgt = reg_access; 869 access_record *reg_access_end = reg_access + used_temps; 870 access_record *first_erase = reg_access_end; 871 access_record *search_start = trgt + 1; 872 873 while (trgt != reg_access_end) { 874 access_record *src = find_next_rename(search_start, reg_access_end, 875 trgt->end); 876 if (src != reg_access_end) { 877 result[src->reg].new_reg = trgt->reg; 878 result[src->reg].valid = true; 879 trgt->end = src->end; 880 881 /* Since we only search forward, don't remove the renamed 882 * register just now, only mark it. */ 883 src->erase = true; 884 885 if (first_erase == reg_access_end) 886 first_erase = src; 887 888 search_start = src + 1; 889 } else { 890 /* Moving to the next target register it is time to remove 891 * the already merged registers from the search range */ 892 if (first_erase != reg_access_end) { 893 access_record *outp = first_erase; 894 access_record *inp = first_erase + 1; 895 896 while (inp != reg_access_end) { 897 if (!inp->erase) 898 *outp++ = *inp; 899 ++inp; 900 } 901 902 reg_access_end = outp; 903 first_erase = reg_access_end; 904 } 905 ++trgt; 906 search_start = trgt + 1; 907 } 908 } 909 ralloc_free(reg_access); 910 } 911 912 /* Code below used for debugging */ 913 #ifndef NDEBUG 914 static const char swizzle_txt[] = "xyzw"; 915 916 static const char *tgsi_file_names[PROGRAM_FILE_MAX] = { 917 "TEMP", "ARRAY", "IN", "OUT", "STATE", "CONST", 918 "UNIFORM", "WO", "ADDR", "SAMPLER", "SV", "UNDEF", 919 "IMM", "BUF", "MEM", "IMAGE" 920 }; 921 922 static 923 void dump_instruction(int line, prog_scope *scope, 924 const glsl_to_tgsi_instruction& inst) 925 { 926 const struct tgsi_opcode_info *info = tgsi_get_opcode_info(inst.op); 927 928 int indent = scope->nesting_depth(); 929 if ((scope->type() == switch_case_branch || 930 scope->type() == switch_default_branch) && 931 (info->opcode == TGSI_OPCODE_CASE || 932 info->opcode == TGSI_OPCODE_DEFAULT)) 933 --indent; 934 935 if (info->opcode == TGSI_OPCODE_ENDIF || 936 info->opcode == TGSI_OPCODE_ELSE || 937 info->opcode == TGSI_OPCODE_ENDLOOP || 938 info->opcode == TGSI_OPCODE_ENDSWITCH) 939 --indent; 940 941 cerr << setw(4) << line << ": "; 942 for (int i = 0; i < indent; ++i) 943 cerr << " "; 944 cerr << tgsi_get_opcode_name(info->opcode) << " "; 945 946 bool has_operators = false; 947 for (unsigned j = 0; j < num_inst_dst_regs(&inst); j++) { 948 has_operators = true; 949 if (j > 0) 950 cerr << ", "; 951 952 const st_dst_reg& dst = inst.dst[j]; 953 cerr << tgsi_file_names[dst.file]; 954 955 if (dst.file == PROGRAM_ARRAY) 956 cerr << "(" << dst.array_id << ")"; 957 958 cerr << "[" << dst.index << "]"; 959 960 if (dst.writemask != TGSI_WRITEMASK_XYZW) { 961 cerr << "."; 962 if (dst.writemask & TGSI_WRITEMASK_X) cerr << "x"; 963 if (dst.writemask & TGSI_WRITEMASK_Y) cerr << "y"; 964 if (dst.writemask & TGSI_WRITEMASK_Z) cerr << "z"; 965 if (dst.writemask & TGSI_WRITEMASK_W) cerr << "w"; 966 } 967 } 968 if (has_operators) 969 cerr << " := "; 970 971 for (unsigned j = 0; j < num_inst_src_regs(&inst); j++) { 972 if (j > 0) 973 cerr << ", "; 974 975 const st_src_reg& src = inst.src[j]; 976 cerr << tgsi_file_names[src.file] 977 << "[" << src.index << "]"; 978 if (src.swizzle != SWIZZLE_XYZW) { 979 cerr << "."; 980 for (int idx = 0; idx < 4; ++idx) { 981 int swz = GET_SWZ(src.swizzle, idx); 982 if (swz < 4) { 983 cerr << swizzle_txt[swz]; 984 } 985 } 986 } 987 } 988 989 if (inst.tex_offset_num_offset > 0) { 990 cerr << ", TEXOFS: "; 991 for (unsigned j = 0; j < inst.tex_offset_num_offset; j++) { 992 if (j > 0) 993 cerr << ", "; 994 995 const st_src_reg& src = inst.tex_offsets[j]; 996 cerr << tgsi_file_names[src.file] 997 << "[" << src.index << "]"; 998 if (src.swizzle != SWIZZLE_XYZW) { 999 cerr << "."; 1000 for (int idx = 0; idx < 4; ++idx) { 1001 int swz = GET_SWZ(src.swizzle, idx); 1002 if (swz < 4) { 1003 cerr << swizzle_txt[swz]; 1004 } 1005 } 1006 } 1007 } 1008 } 1009 cerr << "\n"; 1010 } 1011 #endif 1012