1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define FBC_DEBUG 0 28 29 #if FBC_DEBUG 30 #define FBC_DUMP(q) do { q } while (0) 31 #else 32 #define FBC_DUMP(q) 33 #endif 34 35 #include "sb_bc.h" 36 #include "sb_shader.h" 37 #include "sb_pass.h" 38 39 namespace r600_sb { 40 41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) { 42 43 alu_group_node *g = sh.create_alu_group(); 44 alu_node *a = sh.create_alu(); 45 46 a->bc.set_op(ALU_OP0_NOP); 47 a->bc.last = 1; 48 49 g->push_back(a); 50 b4->insert_before(g); 51 } 52 53 int bc_finalizer::run() { 54 55 run_on(sh.root); 56 57 regions_vec &rv = sh.get_regions(); 58 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E; 59 ++I) { 60 region_node *r = *I; 61 62 assert(r); 63 64 bool loop = r->is_loop(); 65 66 if (loop) 67 finalize_loop(r); 68 else 69 finalize_if(r); 70 71 r->expand(); 72 } 73 74 cf_peephole(); 75 76 // workaround for some problems on r6xx/7xx 77 // add ALU NOP to each vertex shader 78 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) { 79 cf_node *c = sh.create_clause(NST_ALU_CLAUSE); 80 81 alu_group_node *g = sh.create_alu_group(); 82 83 alu_node *a = sh.create_alu(); 84 a->bc.set_op(ALU_OP0_NOP); 85 a->bc.last = 1; 86 87 g->push_back(a); 88 c->push_back(g); 89 90 sh.root->push_back(c); 91 92 c = sh.create_cf(CF_OP_NOP); 93 sh.root->push_back(c); 94 95 last_cf = c; 96 } 97 98 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) { 99 last_cf = sh.create_cf(CF_OP_NOP); 100 sh.root->push_back(last_cf); 101 } 102 103 if (ctx.is_cayman()) { 104 if (!last_cf) { 105 cf_node *c = sh.create_cf(CF_OP_CF_END); 106 sh.root->push_back(c); 107 } else 108 last_cf->insert_after(sh.create_cf(CF_OP_CF_END)); 109 } else 110 last_cf->bc.end_of_program = 1; 111 112 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) { 113 cf_node *le = last_export[t]; 114 if (le) 115 le->bc.set_op(CF_OP_EXPORT_DONE); 116 } 117 118 sh.ngpr = ngpr; 119 sh.nstack = nstack; 120 return 0; 121 } 122 123 void bc_finalizer::finalize_loop(region_node* r) { 124 125 update_nstack(r); 126 127 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); 128 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); 129 130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since 131 // it may point to a cf that is later in program order. 132 // The single parent level check is sufficient since finalize_loop() is processed in 133 // reverse order from innermost to outermost loop nest level. 134 if (!last_cf || last_cf->get_parent_region() == r) { 135 last_cf = loop_end; 136 } 137 138 loop_start->jump_after(loop_end); 139 loop_end->jump_after(loop_start); 140 141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); 142 I != E; ++I) { 143 depart_node *dep = *I; 144 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK); 145 loop_break->jump(loop_end); 146 dep->push_back(loop_break); 147 dep->expand(); 148 } 149 150 // FIXME produces unnecessary LOOP_CONTINUE 151 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end(); 152 I != E; ++I) { 153 repeat_node *rep = *I; 154 if (!(rep->parent == r && rep->prev == NULL)) { 155 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE); 156 loop_cont->jump(loop_end); 157 rep->push_back(loop_cont); 158 } 159 rep->expand(); 160 } 161 162 r->push_front(loop_start); 163 r->push_back(loop_end); 164 } 165 166 void bc_finalizer::finalize_if(region_node* r) { 167 168 update_nstack(r); 169 170 // expecting the following control flow structure here: 171 // - region 172 // { 173 // - depart/repeat 1 (it may be depart/repeat for some outer region) 174 // { 175 // - if 176 // { 177 // - depart/repeat 2 (possibly for outer region) 178 // { 179 // - some optional code 180 // } 181 // } 182 // - optional <else> code> ... 183 // } 184 // } 185 186 container_node *repdep1 = static_cast<container_node*>(r->first); 187 assert(repdep1->is_depart() || repdep1->is_repeat()); 188 189 if_node *n_if = static_cast<if_node*>(repdep1->first); 190 191 if (n_if) { 192 193 194 assert(n_if->is_if()); 195 196 container_node *repdep2 = static_cast<container_node*>(n_if->first); 197 assert(repdep2->is_depart() || repdep2->is_repeat()); 198 199 cf_node *if_jump = sh.create_cf(CF_OP_JUMP); 200 cf_node *if_pop = sh.create_cf(CF_OP_POP); 201 202 if (!last_cf || last_cf->get_parent_region() == r) { 203 last_cf = if_pop; 204 } 205 if_pop->bc.pop_count = 1; 206 if_pop->jump_after(if_pop); 207 208 r->push_front(if_jump); 209 r->push_back(if_pop); 210 211 bool has_else = n_if->next; 212 213 if (has_else) { 214 cf_node *nelse = sh.create_cf(CF_OP_ELSE); 215 n_if->insert_after(nelse); 216 if_jump->jump(nelse); 217 nelse->jump_after(if_pop); 218 nelse->bc.pop_count = 1; 219 220 } else { 221 if_jump->jump_after(if_pop); 222 if_jump->bc.pop_count = 1; 223 } 224 225 n_if->expand(); 226 } 227 228 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); 229 I != E; ++I) { 230 (*I)->expand(); 231 } 232 r->departs.clear(); 233 assert(r->repeats.empty()); 234 } 235 236 void bc_finalizer::run_on(container_node* c) { 237 node *prev_node = NULL; 238 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { 239 node *n = *I; 240 241 if (n->is_alu_group()) { 242 finalize_alu_group(static_cast<alu_group_node*>(n), prev_node); 243 } else { 244 if (n->is_alu_clause()) { 245 cf_node *c = static_cast<cf_node*>(n); 246 247 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) { 248 if (ctx.stack_workaround_8xx) { 249 region_node *r = c->get_parent_region(); 250 if (r) { 251 unsigned ifs, loops; 252 unsigned elems = get_stack_depth(r, loops, ifs); 253 unsigned dmod1 = elems % ctx.stack_entry_size; 254 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size; 255 256 if (elems && (!dmod1 || !dmod2)) 257 c->flags |= NF_ALU_STACK_WORKAROUND; 258 } 259 } else if (ctx.stack_workaround_9xx) { 260 region_node *r = c->get_parent_region(); 261 if (r) { 262 unsigned ifs, loops; 263 get_stack_depth(r, loops, ifs); 264 if (loops >= 2) 265 c->flags |= NF_ALU_STACK_WORKAROUND; 266 } 267 } 268 } 269 } else if (n->is_fetch_inst()) { 270 finalize_fetch(static_cast<fetch_node*>(n)); 271 } else if (n->is_cf_inst()) { 272 finalize_cf(static_cast<cf_node*>(n)); 273 } 274 if (n->is_container()) 275 run_on(static_cast<container_node*>(n)); 276 } 277 prev_node = n; 278 } 279 } 280 281 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) { 282 283 alu_node *last = NULL; 284 alu_group_node *prev_g = NULL; 285 bool add_nop = false; 286 if (prev_node && prev_node->is_alu_group()) { 287 prev_g = static_cast<alu_group_node*>(prev_node); 288 } 289 290 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { 291 alu_node *n = static_cast<alu_node*>(*I); 292 unsigned slot = n->bc.slot; 293 value *d = n->dst.empty() ? NULL : n->dst[0]; 294 295 if (d && d->is_special_reg()) { 296 assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit()); 297 d = NULL; 298 } 299 300 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0); 301 302 if (d) { 303 assert(fdst.chan() == slot || slot == SLOT_TRANS); 304 } 305 306 if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman())) 307 n->bc.dst_gpr = fdst.sel(); 308 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; 309 310 311 if (d && d->is_rel() && d->rel && !d->rel->is_const()) { 312 n->bc.dst_rel = 1; 313 update_ngpr(d->array->gpr.sel() + d->array->array_size -1); 314 } else { 315 n->bc.dst_rel = 0; 316 } 317 318 n->bc.write_mask = d != NULL; 319 n->bc.last = 0; 320 321 if (n->bc.op_ptr->flags & AF_PRED) { 322 n->bc.update_pred = (n->dst[1] != NULL); 323 n->bc.update_exec_mask = (n->dst[2] != NULL); 324 } 325 326 // FIXME handle predication here 327 n->bc.pred_sel = PRED_SEL_OFF; 328 329 update_ngpr(n->bc.dst_gpr); 330 331 add_nop |= finalize_alu_src(g, n, prev_g); 332 333 last = n; 334 } 335 336 if (add_nop) { 337 if (sh.get_ctx().r6xx_gpr_index_workaround) { 338 insert_rv6xx_load_ar_workaround(g); 339 } 340 } 341 last->bc.last = 1; 342 } 343 344 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) { 345 vvec &sv = a->src; 346 bool add_nop = false; 347 FBC_DUMP( 348 sblog << "finalize_alu_src: "; 349 dump::dump_op(a); 350 sblog << "\n"; 351 ); 352 353 unsigned si = 0; 354 355 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) { 356 value *v = *I; 357 assert(v); 358 359 bc_alu_src &src = a->bc.src[si]; 360 sel_chan sc; 361 src.rel = 0; 362 363 sel_chan gpr; 364 365 switch (v->kind) { 366 case VLK_REL_REG: 367 sc = v->get_final_gpr(); 368 src.sel = sc.sel(); 369 src.chan = sc.chan(); 370 if (!v->rel->is_const()) { 371 src.rel = 1; 372 update_ngpr(v->array->gpr.sel() + v->array->array_size -1); 373 if (prev && !add_nop) { 374 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { 375 alu_node *pn = static_cast<alu_node*>(*pI); 376 if (pn->bc.dst_gpr == src.sel) { 377 add_nop = true; 378 break; 379 } 380 } 381 } 382 } else 383 src.rel = 0; 384 385 break; 386 case VLK_REG: 387 gpr = v->get_final_gpr(); 388 src.sel = gpr.sel(); 389 src.chan = gpr.chan(); 390 update_ngpr(src.sel); 391 break; 392 case VLK_TEMP: 393 src.sel = v->gpr.sel(); 394 src.chan = v->gpr.chan(); 395 update_ngpr(src.sel); 396 break; 397 case VLK_UNDEF: 398 case VLK_CONST: { 399 literal lv = v->literal_value; 400 src.chan = 0; 401 402 if (lv == literal(0)) 403 src.sel = ALU_SRC_0; 404 else if (lv == literal(0.5f)) 405 src.sel = ALU_SRC_0_5; 406 else if (lv == literal(1.0f)) 407 src.sel = ALU_SRC_1; 408 else if (lv == literal(1)) 409 src.sel = ALU_SRC_1_INT; 410 else if (lv == literal(-1)) 411 src.sel = ALU_SRC_M_1_INT; 412 else { 413 src.sel = ALU_SRC_LITERAL; 414 src.chan = g->literal_chan(lv); 415 src.value = lv; 416 } 417 break; 418 } 419 case VLK_KCACHE: { 420 cf_node *clause = static_cast<cf_node*>(g->parent); 421 assert(clause->is_alu_clause()); 422 sel_chan k = translate_kcache(clause, v); 423 424 assert(k && "kcache translation failed"); 425 426 src.sel = k.sel(); 427 src.chan = k.chan(); 428 break; 429 } 430 case VLK_PARAM: 431 case VLK_SPECIAL_CONST: 432 src.sel = v->select.sel(); 433 src.chan = v->select.chan(); 434 break; 435 default: 436 assert(!"unknown value kind"); 437 break; 438 } 439 if (prev && !add_nop) { 440 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { 441 alu_node *pn = static_cast<alu_node*>(*pI); 442 if (pn->bc.dst_rel) { 443 if (pn->bc.dst_gpr == src.sel) { 444 add_nop = true; 445 break; 446 } 447 } 448 } 449 } 450 } 451 452 while (si < 3) { 453 a->bc.src[si++].sel = 0; 454 } 455 return add_nop; 456 } 457 458 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start) 459 { 460 int reg = -1; 461 462 for (unsigned chan = 0; chan < 4; ++chan) { 463 464 dst.bc.dst_sel[chan] = SEL_MASK; 465 466 unsigned sel = SEL_MASK; 467 468 value *v = src.src[arg_start + chan]; 469 470 if (!v || v->is_undef()) { 471 sel = SEL_MASK; 472 } else if (v->is_const()) { 473 literal l = v->literal_value; 474 if (l == literal(0)) 475 sel = SEL_0; 476 else if (l == literal(1.0f)) 477 sel = SEL_1; 478 else { 479 sblog << "invalid fetch constant operand " << chan << " "; 480 dump::dump_op(&src); 481 sblog << "\n"; 482 abort(); 483 } 484 485 } else if (v->is_any_gpr()) { 486 unsigned vreg = v->gpr.sel(); 487 unsigned vchan = v->gpr.chan(); 488 489 if (reg == -1) 490 reg = vreg; 491 else if ((unsigned)reg != vreg) { 492 sblog << "invalid fetch source operand " << chan << " "; 493 dump::dump_op(&src); 494 sblog << "\n"; 495 abort(); 496 } 497 498 sel = vchan; 499 500 } else { 501 sblog << "invalid fetch source operand " << chan << " "; 502 dump::dump_op(&src); 503 sblog << "\n"; 504 abort(); 505 } 506 507 dst.bc.src_sel[chan] = sel; 508 } 509 510 if (reg >= 0) 511 update_ngpr(reg); 512 513 dst.bc.src_gpr = reg >= 0 ? reg : 0; 514 } 515 516 void bc_finalizer::emit_set_grad(fetch_node* f) { 517 518 assert(f->src.size() == 12 || f->src.size() == 13); 519 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; 520 521 unsigned arg_start = 0; 522 523 for (unsigned op = 0; op < 2; ++op) { 524 fetch_node *n = sh.create_fetch(); 525 n->bc.set_op(ops[op]); 526 527 arg_start += 4; 528 529 copy_fetch_src(*n, *f, arg_start); 530 531 f->insert_before(n); 532 } 533 534 } 535 536 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) { 537 assert(f.src.size() == 8); 538 539 fetch_node *n = sh.create_fetch(); 540 541 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS); 542 543 copy_fetch_src(*n, f, 4); 544 545 f.insert_before(n); 546 } 547 548 void bc_finalizer::finalize_fetch(fetch_node* f) { 549 550 int reg = -1; 551 552 // src 553 554 unsigned src_count = 4; 555 556 unsigned flags = f->bc.op_ptr->flags; 557 558 if (flags & FF_VTX) { 559 src_count = 1; 560 } else if (flags & FF_USEGRAD) { 561 emit_set_grad(f); 562 } else if (flags & FF_USE_TEXTURE_OFFSETS) { 563 emit_set_texture_offsets(*f); 564 } 565 566 for (unsigned chan = 0; chan < src_count; ++chan) { 567 568 unsigned sel = f->bc.src_sel[chan]; 569 570 if (sel > SEL_W) 571 continue; 572 573 value *v = f->src[chan]; 574 575 if (v->is_undef()) { 576 sel = SEL_MASK; 577 } else if (v->is_const()) { 578 literal l = v->literal_value; 579 if (l == literal(0)) 580 sel = SEL_0; 581 else if (l == literal(1.0f)) 582 sel = SEL_1; 583 else { 584 sblog << "invalid fetch constant operand " << chan << " "; 585 dump::dump_op(f); 586 sblog << "\n"; 587 abort(); 588 } 589 590 } else if (v->is_any_gpr()) { 591 unsigned vreg = v->gpr.sel(); 592 unsigned vchan = v->gpr.chan(); 593 594 if (reg == -1) 595 reg = vreg; 596 else if ((unsigned)reg != vreg) { 597 sblog << "invalid fetch source operand " << chan << " "; 598 dump::dump_op(f); 599 sblog << "\n"; 600 abort(); 601 } 602 603 sel = vchan; 604 605 } else { 606 sblog << "invalid fetch source operand " << chan << " "; 607 dump::dump_op(f); 608 sblog << "\n"; 609 abort(); 610 } 611 612 f->bc.src_sel[chan] = sel; 613 } 614 615 if (reg >= 0) 616 update_ngpr(reg); 617 618 f->bc.src_gpr = reg >= 0 ? reg : 0; 619 620 // dst 621 622 reg = -1; 623 624 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK}; 625 626 for (unsigned chan = 0; chan < 4; ++chan) { 627 628 unsigned sel = f->bc.dst_sel[chan]; 629 630 if (sel == SEL_MASK) 631 continue; 632 633 value *v = f->dst[chan]; 634 if (!v) 635 continue; 636 637 if (v->is_any_gpr()) { 638 unsigned vreg = v->gpr.sel(); 639 unsigned vchan = v->gpr.chan(); 640 641 if (reg == -1) 642 reg = vreg; 643 else if ((unsigned)reg != vreg) { 644 sblog << "invalid fetch dst operand " << chan << " "; 645 dump::dump_op(f); 646 sblog << "\n"; 647 abort(); 648 } 649 650 dst_swz[vchan] = sel; 651 652 } else { 653 sblog << "invalid fetch dst operand " << chan << " "; 654 dump::dump_op(f); 655 sblog << "\n"; 656 abort(); 657 } 658 659 } 660 661 for (unsigned i = 0; i < 4; ++i) 662 f->bc.dst_sel[i] = dst_swz[i]; 663 664 assert(reg >= 0); 665 666 if (reg >= 0) 667 update_ngpr(reg); 668 669 f->bc.dst_gpr = reg >= 0 ? reg : 0; 670 } 671 672 void bc_finalizer::finalize_cf(cf_node* c) { 673 674 unsigned flags = c->bc.op_ptr->flags; 675 676 c->bc.end_of_program = 0; 677 last_cf = c; 678 679 if (flags & CF_EXP) { 680 c->bc.set_op(CF_OP_EXPORT); 681 last_export[c->bc.type] = c; 682 683 int reg = -1; 684 685 for (unsigned chan = 0; chan < 4; ++chan) { 686 687 unsigned sel = c->bc.sel[chan]; 688 689 if (sel > SEL_W) 690 continue; 691 692 value *v = c->src[chan]; 693 694 if (v->is_undef()) { 695 sel = SEL_MASK; 696 } else if (v->is_const()) { 697 literal l = v->literal_value; 698 if (l == literal(0)) 699 sel = SEL_0; 700 else if (l == literal(1.0f)) 701 sel = SEL_1; 702 else { 703 sblog << "invalid export constant operand " << chan << " "; 704 dump::dump_op(c); 705 sblog << "\n"; 706 abort(); 707 } 708 709 } else if (v->is_any_gpr()) { 710 unsigned vreg = v->gpr.sel(); 711 unsigned vchan = v->gpr.chan(); 712 713 if (reg == -1) 714 reg = vreg; 715 else if ((unsigned)reg != vreg) { 716 sblog << "invalid export source operand " << chan << " "; 717 dump::dump_op(c); 718 sblog << "\n"; 719 abort(); 720 } 721 722 sel = vchan; 723 724 } else { 725 sblog << "invalid export source operand " << chan << " "; 726 dump::dump_op(c); 727 sblog << "\n"; 728 abort(); 729 } 730 731 c->bc.sel[chan] = sel; 732 } 733 734 if (reg >= 0) 735 update_ngpr(reg); 736 737 c->bc.rw_gpr = reg >= 0 ? reg : 0; 738 739 } else if (flags & CF_MEM) { 740 741 int reg = -1; 742 unsigned mask = 0; 743 744 for (unsigned chan = 0; chan < 4; ++chan) { 745 value *v = c->src[chan]; 746 if (!v || v->is_undef()) 747 continue; 748 749 if (!v->is_any_gpr() || v->gpr.chan() != chan) { 750 sblog << "invalid source operand " << chan << " "; 751 dump::dump_op(c); 752 sblog << "\n"; 753 abort(); 754 } 755 unsigned vreg = v->gpr.sel(); 756 if (reg == -1) 757 reg = vreg; 758 else if ((unsigned)reg != vreg) { 759 sblog << "invalid source operand " << chan << " "; 760 dump::dump_op(c); 761 sblog << "\n"; 762 abort(); 763 } 764 765 mask |= (1 << chan); 766 } 767 768 if (reg >= 0) 769 update_ngpr(reg); 770 771 c->bc.rw_gpr = reg >= 0 ? reg : 0; 772 c->bc.comp_mask = mask; 773 774 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { 775 776 reg = -1; 777 778 for (unsigned chan = 0; chan < 4; ++chan) { 779 value *v = c->src[4 + chan]; 780 if (!v || v->is_undef()) 781 continue; 782 783 if (!v->is_any_gpr() || v->gpr.chan() != chan) { 784 sblog << "invalid source operand " << chan << " "; 785 dump::dump_op(c); 786 sblog << "\n"; 787 abort(); 788 } 789 unsigned vreg = v->gpr.sel(); 790 if (reg == -1) 791 reg = vreg; 792 else if ((unsigned)reg != vreg) { 793 sblog << "invalid source operand " << chan << " "; 794 dump::dump_op(c); 795 sblog << "\n"; 796 abort(); 797 } 798 } 799 800 assert(reg >= 0); 801 802 if (reg >= 0) 803 update_ngpr(reg); 804 805 c->bc.index_gpr = reg >= 0 ? reg : 0; 806 } 807 } else if (flags & CF_CALL) { 808 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1); 809 } 810 } 811 812 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { 813 unsigned sel = v->select.kcache_sel(); 814 unsigned bank = v->select.kcache_bank(); 815 unsigned chan = v->select.chan(); 816 static const unsigned kc_base[] = {128, 160, 256, 288}; 817 818 sel &= 4095; 819 820 unsigned line = sel >> 4; 821 822 for (unsigned k = 0; k < 4; ++k) { 823 bc_kcache &kc = alu->bc.kc[k]; 824 825 if (kc.mode == KC_LOCK_NONE) 826 break; 827 828 if (kc.bank == bank && (kc.addr == line || 829 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) { 830 831 sel = kc_base[k] + (sel - (kc.addr << 4)); 832 833 return sel_chan(sel, chan); 834 } 835 } 836 837 assert(!"kcache translation error"); 838 return 0; 839 } 840 841 void bc_finalizer::update_ngpr(unsigned gpr) { 842 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr) 843 ngpr = gpr + 1; 844 } 845 846 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops, 847 unsigned &ifs, unsigned add) { 848 unsigned stack_elements = add; 849 bool has_non_wqm_push = (add != 0); 850 region_node *r = n->is_region() ? 851 static_cast<region_node*>(n) : n->get_parent_region(); 852 853 loops = 0; 854 ifs = 0; 855 856 while (r) { 857 if (r->is_loop()) { 858 ++loops; 859 } else { 860 ++ifs; 861 has_non_wqm_push = true; 862 } 863 r = r->get_parent_region(); 864 } 865 stack_elements += (loops * ctx.stack_entry_size) + ifs; 866 867 // reserve additional elements in some cases 868 switch (ctx.hw_class) { 869 case HW_CLASS_R600: 870 case HW_CLASS_R700: 871 // If any non-WQM push is invoked, 2 elements should be reserved. 872 if (has_non_wqm_push) 873 stack_elements += 2; 874 break; 875 case HW_CLASS_CAYMAN: 876 // If any stack operation is invoked, 2 elements should be reserved 877 if (stack_elements) 878 stack_elements += 2; 879 break; 880 case HW_CLASS_EVERGREEN: 881 // According to the docs we need to reserve 1 element for each of the 882 // following cases: 883 // 1) non-WQM push is used with WQM/LOOP frames on stack 884 // 2) ALU_ELSE_AFTER is used at the point of max stack usage 885 // NOTE: 886 // It was found that the conditions above are not sufficient, there are 887 // other cases where we also need to reserve stack space, that's why 888 // we always reserve 1 stack element if we have non-WQM push on stack. 889 // Condition 2 is ignored for now because we don't use this instruction. 890 if (has_non_wqm_push) 891 ++stack_elements; 892 break; 893 case HW_CLASS_UNKNOWN: 894 assert(0); 895 } 896 return stack_elements; 897 } 898 899 void bc_finalizer::update_nstack(region_node* r, unsigned add) { 900 unsigned loops = 0; 901 unsigned ifs = 0; 902 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add; 903 904 // XXX all chips expect this value to be computed using 4 as entry size, 905 // not the real entry size 906 unsigned stack_entries = (elems + 3) >> 2; 907 908 if (nstack < stack_entries) 909 nstack = stack_entries; 910 } 911 912 void bc_finalizer::cf_peephole() { 913 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) { 914 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; 915 I = N) { 916 N = I; ++N; 917 cf_node *c = static_cast<cf_node*>(*I); 918 919 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && 920 (c->flags & NF_ALU_STACK_WORKAROUND)) { 921 cf_node *push = sh.create_cf(CF_OP_PUSH); 922 c->insert_before(push); 923 push->jump(c); 924 c->bc.set_op(CF_OP_ALU); 925 } 926 } 927 } 928 929 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; 930 I = N) { 931 N = I; ++N; 932 933 cf_node *c = static_cast<cf_node*>(*I); 934 935 if (c->jump_after_target) { 936 c->jump_target = static_cast<cf_node*>(c->jump_target->next); 937 c->jump_after_target = false; 938 } 939 940 if (c->is_cf_op(CF_OP_POP)) { 941 node *p = c->prev; 942 if (p->is_alu_clause()) { 943 cf_node *a = static_cast<cf_node*>(p); 944 945 if (a->bc.op == CF_OP_ALU) { 946 a->bc.set_op(CF_OP_ALU_POP_AFTER); 947 c->remove(); 948 } 949 } 950 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) { 951 // if JUMP is immediately followed by its jump target, 952 // then JUMP is useless and we can eliminate it 953 c->remove(); 954 } 955 } 956 } 957 958 } // namespace r600_sb 959