1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define FBC_DEBUG 0 28 29 #if FBC_DEBUG 30 #define FBC_DUMP(q) do { q } while (0) 31 #else 32 #define FBC_DUMP(q) 33 #endif 34 35 #include "sb_bc.h" 36 #include "sb_shader.h" 37 #include "sb_pass.h" 38 39 namespace r600_sb { 40 41 void bc_finalizer::insert_rv6xx_load_ar_workaround(alu_group_node *b4) { 42 43 alu_group_node *g = sh.create_alu_group(); 44 alu_node *a = sh.create_alu(); 45 46 a->bc.set_op(ALU_OP0_NOP); 47 a->bc.last = 1; 48 49 g->push_back(a); 50 b4->insert_before(g); 51 } 52 53 int bc_finalizer::run() { 54 55 run_on(sh.root); 56 57 regions_vec &rv = sh.get_regions(); 58 for (regions_vec::reverse_iterator I = rv.rbegin(), E = rv.rend(); I != E; 59 ++I) { 60 region_node *r = *I; 61 62 assert(r); 63 64 bool loop = r->is_loop(); 65 66 if (loop) 67 finalize_loop(r); 68 else 69 finalize_if(r); 70 71 r->expand(); 72 } 73 74 cf_peephole(); 75 76 // workaround for some problems on r6xx/7xx 77 // add ALU NOP to each vertex shader 78 if (!ctx.is_egcm() && (sh.target == TARGET_VS || sh.target == TARGET_ES)) { 79 cf_node *c = sh.create_clause(NST_ALU_CLAUSE); 80 81 alu_group_node *g = sh.create_alu_group(); 82 83 alu_node *a = sh.create_alu(); 84 a->bc.set_op(ALU_OP0_NOP); 85 a->bc.last = 1; 86 87 g->push_back(a); 88 c->push_back(g); 89 90 sh.root->push_back(c); 91 92 c = sh.create_cf(CF_OP_NOP); 93 sh.root->push_back(c); 94 95 last_cf = c; 96 } 97 98 if (!ctx.is_cayman() && last_cf->bc.op_ptr->flags & CF_ALU) { 99 last_cf = sh.create_cf(CF_OP_NOP); 100 sh.root->push_back(last_cf); 101 } 102 103 if (ctx.is_cayman()) { 104 if (!last_cf) { 105 cf_node *c = sh.create_cf(CF_OP_CF_END); 106 sh.root->push_back(c); 107 } else 108 last_cf->insert_after(sh.create_cf(CF_OP_CF_END)); 109 } else 110 last_cf->bc.end_of_program = 1; 111 112 for (unsigned t = EXP_PIXEL; t < EXP_TYPE_COUNT; ++t) { 113 cf_node *le = last_export[t]; 114 if (le) 115 le->bc.set_op(CF_OP_EXPORT_DONE); 116 } 117 118 sh.ngpr = ngpr; 119 sh.nstack = nstack; 120 return 0; 121 } 122 123 void bc_finalizer::finalize_loop(region_node* r) { 124 125 update_nstack(r); 126 127 cf_node *loop_start = sh.create_cf(CF_OP_LOOP_START_DX10); 128 cf_node *loop_end = sh.create_cf(CF_OP_LOOP_END); 129 130 // Update last_cf, but don't overwrite it if it's outside the current loop nest since 131 // it may point to a cf that is later in program order. 132 // The single parent level check is sufficient since finalize_loop() is processed in 133 // reverse order from innermost to outermost loop nest level. 134 if (!last_cf || last_cf->get_parent_region() == r) { 135 last_cf = loop_end; 136 } 137 138 loop_start->jump_after(loop_end); 139 loop_end->jump_after(loop_start); 140 141 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); 142 I != E; ++I) { 143 depart_node *dep = *I; 144 cf_node *loop_break = sh.create_cf(CF_OP_LOOP_BREAK); 145 loop_break->jump(loop_end); 146 dep->push_back(loop_break); 147 dep->expand(); 148 } 149 150 // FIXME produces unnecessary LOOP_CONTINUE 151 for (repeat_vec::iterator I = r->repeats.begin(), E = r->repeats.end(); 152 I != E; ++I) { 153 repeat_node *rep = *I; 154 if (!(rep->parent == r && rep->prev == NULL)) { 155 cf_node *loop_cont = sh.create_cf(CF_OP_LOOP_CONTINUE); 156 loop_cont->jump(loop_end); 157 rep->push_back(loop_cont); 158 } 159 rep->expand(); 160 } 161 162 r->push_front(loop_start); 163 r->push_back(loop_end); 164 } 165 166 void bc_finalizer::finalize_if(region_node* r) { 167 168 update_nstack(r); 169 170 // expecting the following control flow structure here: 171 // - region 172 // { 173 // - depart/repeat 1 (it may be depart/repeat for some outer region) 174 // { 175 // - if 176 // { 177 // - depart/repeat 2 (possibly for outer region) 178 // { 179 // - some optional code 180 // } 181 // } 182 // - optional <else> code> ... 183 // } 184 // } 185 186 container_node *repdep1 = static_cast<container_node*>(r->first); 187 assert(repdep1->is_depart() || repdep1->is_repeat()); 188 189 if_node *n_if = static_cast<if_node*>(repdep1->first); 190 191 if (n_if) { 192 193 194 assert(n_if->is_if()); 195 196 container_node *repdep2 = static_cast<container_node*>(n_if->first); 197 assert(repdep2->is_depart() || repdep2->is_repeat()); 198 199 cf_node *if_jump = sh.create_cf(CF_OP_JUMP); 200 cf_node *if_pop = sh.create_cf(CF_OP_POP); 201 202 if (!last_cf || last_cf->get_parent_region() == r) { 203 last_cf = if_pop; 204 } 205 if_pop->bc.pop_count = 1; 206 if_pop->jump_after(if_pop); 207 208 r->push_front(if_jump); 209 r->push_back(if_pop); 210 211 /* the depart/repeat 1 is actually part of the "else" code. 212 * if it's a depart for an outer loop region it will want to 213 * insert a LOOP_BREAK or LOOP_CONTINUE in here, so we need 214 * to emit the else clause. 215 */ 216 bool has_else = n_if->next; 217 218 if (repdep1->is_depart()) { 219 depart_node *dep1 = static_cast<depart_node*>(repdep1); 220 if (dep1->target != r && dep1->target->is_loop()) 221 has_else = true; 222 } 223 224 if (repdep1->is_repeat()) { 225 repeat_node *rep1 = static_cast<repeat_node*>(repdep1); 226 if (rep1->target != r && rep1->target->is_loop()) 227 has_else = true; 228 } 229 230 if (has_else) { 231 cf_node *nelse = sh.create_cf(CF_OP_ELSE); 232 n_if->insert_after(nelse); 233 if_jump->jump(nelse); 234 nelse->jump_after(if_pop); 235 nelse->bc.pop_count = 1; 236 237 } else { 238 if_jump->jump_after(if_pop); 239 if_jump->bc.pop_count = 1; 240 } 241 242 n_if->expand(); 243 } 244 245 for (depart_vec::iterator I = r->departs.begin(), E = r->departs.end(); 246 I != E; ++I) { 247 (*I)->expand(); 248 } 249 r->departs.clear(); 250 assert(r->repeats.empty()); 251 } 252 253 void bc_finalizer::run_on(container_node* c) { 254 node *prev_node = NULL; 255 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { 256 node *n = *I; 257 258 if (n->is_alu_group()) { 259 finalize_alu_group(static_cast<alu_group_node*>(n), prev_node); 260 } else { 261 if (n->is_alu_clause()) { 262 cf_node *c = static_cast<cf_node*>(n); 263 264 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && ctx.is_egcm()) { 265 if (ctx.stack_workaround_8xx) { 266 region_node *r = c->get_parent_region(); 267 if (r) { 268 unsigned ifs, loops; 269 unsigned elems = get_stack_depth(r, loops, ifs); 270 unsigned dmod1 = elems % ctx.stack_entry_size; 271 unsigned dmod2 = (elems + 1) % ctx.stack_entry_size; 272 273 if (elems && (!dmod1 || !dmod2)) 274 c->flags |= NF_ALU_STACK_WORKAROUND; 275 } 276 } else if (ctx.stack_workaround_9xx) { 277 region_node *r = c->get_parent_region(); 278 if (r) { 279 unsigned ifs, loops; 280 get_stack_depth(r, loops, ifs); 281 if (loops >= 2) 282 c->flags |= NF_ALU_STACK_WORKAROUND; 283 } 284 } 285 } 286 last_cf = c; 287 } else if (n->is_fetch_inst()) { 288 finalize_fetch(static_cast<fetch_node*>(n)); 289 } else if (n->is_cf_inst()) { 290 finalize_cf(static_cast<cf_node*>(n)); 291 } 292 if (n->is_container()) 293 run_on(static_cast<container_node*>(n)); 294 } 295 prev_node = n; 296 } 297 } 298 299 void bc_finalizer::finalize_alu_group(alu_group_node* g, node *prev_node) { 300 301 alu_node *last = NULL; 302 alu_group_node *prev_g = NULL; 303 bool add_nop = false; 304 if (prev_node && prev_node->is_alu_group()) { 305 prev_g = static_cast<alu_group_node*>(prev_node); 306 } 307 308 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { 309 alu_node *n = static_cast<alu_node*>(*I); 310 unsigned slot = n->bc.slot; 311 value *d = n->dst.empty() ? NULL : n->dst[0]; 312 313 if (d && d->is_special_reg()) { 314 assert((n->bc.op_ptr->flags & AF_MOVA) || d->is_geometry_emit() || d->is_lds_oq() || d->is_lds_access()); 315 d = NULL; 316 } 317 318 sel_chan fdst = d ? d->get_final_gpr() : sel_chan(0, 0); 319 320 if (d) { 321 assert(fdst.chan() == slot || slot == SLOT_TRANS); 322 } 323 324 if (!(n->bc.op_ptr->flags & AF_MOVA && ctx.is_cayman())) 325 n->bc.dst_gpr = fdst.sel(); 326 n->bc.dst_chan = d ? fdst.chan() : slot < SLOT_TRANS ? slot : 0; 327 328 329 if (d && d->is_rel() && d->rel && !d->rel->is_const()) { 330 n->bc.dst_rel = 1; 331 update_ngpr(d->array->gpr.sel() + d->array->array_size -1); 332 } else { 333 n->bc.dst_rel = 0; 334 } 335 336 n->bc.write_mask = d != NULL; 337 n->bc.last = 0; 338 339 if (n->bc.op_ptr->flags & AF_PRED) { 340 n->bc.update_pred = (n->dst[1] != NULL); 341 n->bc.update_exec_mask = (n->dst[2] != NULL); 342 } 343 344 // FIXME handle predication here 345 n->bc.pred_sel = PRED_SEL_OFF; 346 347 update_ngpr(n->bc.dst_gpr); 348 349 add_nop |= finalize_alu_src(g, n, prev_g); 350 351 last = n; 352 } 353 354 if (add_nop) { 355 if (sh.get_ctx().r6xx_gpr_index_workaround) { 356 insert_rv6xx_load_ar_workaround(g); 357 } 358 } 359 last->bc.last = 1; 360 } 361 362 bool bc_finalizer::finalize_alu_src(alu_group_node* g, alu_node* a, alu_group_node *prev) { 363 vvec &sv = a->src; 364 bool add_nop = false; 365 FBC_DUMP( 366 sblog << "finalize_alu_src: "; 367 dump::dump_op(a); 368 sblog << "\n"; 369 ); 370 371 unsigned si = 0; 372 373 for (vvec::iterator I = sv.begin(), E = sv.end(); I != E; ++I, ++si) { 374 value *v = *I; 375 assert(v); 376 377 bc_alu_src &src = a->bc.src[si]; 378 sel_chan sc; 379 src.rel = 0; 380 381 sel_chan gpr; 382 383 switch (v->kind) { 384 case VLK_REL_REG: 385 sc = v->get_final_gpr(); 386 src.sel = sc.sel(); 387 src.chan = sc.chan(); 388 if (!v->rel->is_const()) { 389 src.rel = 1; 390 update_ngpr(v->array->gpr.sel() + v->array->array_size -1); 391 if (prev && !add_nop) { 392 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { 393 alu_node *pn = static_cast<alu_node*>(*pI); 394 if (pn->bc.dst_gpr == src.sel) { 395 add_nop = true; 396 break; 397 } 398 } 399 } 400 } else 401 src.rel = 0; 402 403 break; 404 case VLK_REG: 405 gpr = v->get_final_gpr(); 406 src.sel = gpr.sel(); 407 src.chan = gpr.chan(); 408 update_ngpr(src.sel); 409 break; 410 case VLK_TEMP: 411 src.sel = v->gpr.sel(); 412 src.chan = v->gpr.chan(); 413 update_ngpr(src.sel); 414 break; 415 case VLK_UNDEF: 416 case VLK_CONST: { 417 literal lv = v->literal_value; 418 src.chan = 0; 419 420 if (lv == literal(0)) 421 src.sel = ALU_SRC_0; 422 else if (lv == literal(0.5f)) 423 src.sel = ALU_SRC_0_5; 424 else if (lv == literal(1.0f)) 425 src.sel = ALU_SRC_1; 426 else if (lv == literal(1)) 427 src.sel = ALU_SRC_1_INT; 428 else if (lv == literal(-1)) 429 src.sel = ALU_SRC_M_1_INT; 430 else { 431 src.sel = ALU_SRC_LITERAL; 432 src.chan = g->literal_chan(lv); 433 src.value = lv; 434 } 435 break; 436 } 437 case VLK_KCACHE: { 438 cf_node *clause = static_cast<cf_node*>(g->parent); 439 assert(clause->is_alu_clause()); 440 sel_chan k = translate_kcache(clause, v); 441 442 assert(k && "kcache translation failed"); 443 444 src.sel = k.sel(); 445 src.chan = k.chan(); 446 break; 447 } 448 case VLK_SPECIAL_REG: 449 if (v->select.sel() == SV_LDS_OQA) { 450 src.sel = ALU_SRC_LDS_OQ_A_POP; 451 src.chan = 0; 452 } else if (v->select.sel() == SV_LDS_OQB) { 453 src.sel = ALU_SRC_LDS_OQ_B_POP; 454 src.chan = 0; 455 } else { 456 src.sel = ALU_SRC_0; 457 src.chan = 0; 458 } 459 break; 460 case VLK_PARAM: 461 case VLK_SPECIAL_CONST: 462 src.sel = v->select.sel(); 463 src.chan = v->select.chan(); 464 break; 465 default: 466 assert(!"unknown value kind"); 467 break; 468 } 469 if (prev && !add_nop) { 470 for (node_iterator pI = prev->begin(), pE = prev->end(); pI != pE; ++pI) { 471 alu_node *pn = static_cast<alu_node*>(*pI); 472 if (pn->bc.dst_rel) { 473 if (pn->bc.dst_gpr == src.sel) { 474 add_nop = true; 475 break; 476 } 477 } 478 } 479 } 480 } 481 482 while (si < 3) { 483 a->bc.src[si++].sel = 0; 484 } 485 return add_nop; 486 } 487 488 void bc_finalizer::copy_fetch_src(fetch_node &dst, fetch_node &src, unsigned arg_start) 489 { 490 int reg = -1; 491 492 for (unsigned chan = 0; chan < 4; ++chan) { 493 494 dst.bc.dst_sel[chan] = SEL_MASK; 495 496 unsigned sel = SEL_MASK; 497 498 value *v = src.src[arg_start + chan]; 499 500 if (!v || v->is_undef()) { 501 sel = SEL_MASK; 502 } else if (v->is_const()) { 503 literal l = v->literal_value; 504 if (l == literal(0)) 505 sel = SEL_0; 506 else if (l == literal(1.0f)) 507 sel = SEL_1; 508 else { 509 sblog << "invalid fetch constant operand " << chan << " "; 510 dump::dump_op(&src); 511 sblog << "\n"; 512 abort(); 513 } 514 515 } else if (v->is_any_gpr()) { 516 unsigned vreg = v->gpr.sel(); 517 unsigned vchan = v->gpr.chan(); 518 519 if (reg == -1) 520 reg = vreg; 521 else if ((unsigned)reg != vreg) { 522 sblog << "invalid fetch source operand " << chan << " "; 523 dump::dump_op(&src); 524 sblog << "\n"; 525 abort(); 526 } 527 528 sel = vchan; 529 530 } else { 531 sblog << "invalid fetch source operand " << chan << " "; 532 dump::dump_op(&src); 533 sblog << "\n"; 534 abort(); 535 } 536 537 dst.bc.src_sel[chan] = sel; 538 } 539 540 if (reg >= 0) 541 update_ngpr(reg); 542 543 dst.bc.src_gpr = reg >= 0 ? reg : 0; 544 } 545 546 void bc_finalizer::emit_set_grad(fetch_node* f) { 547 548 assert(f->src.size() == 12 || f->src.size() == 13); 549 unsigned ops[2] = { FETCH_OP_SET_GRADIENTS_V, FETCH_OP_SET_GRADIENTS_H }; 550 551 unsigned arg_start = 0; 552 553 for (unsigned op = 0; op < 2; ++op) { 554 fetch_node *n = sh.create_fetch(); 555 n->bc.set_op(ops[op]); 556 557 arg_start += 4; 558 559 copy_fetch_src(*n, *f, arg_start); 560 561 f->insert_before(n); 562 } 563 564 } 565 566 void bc_finalizer::emit_set_texture_offsets(fetch_node &f) { 567 assert(f.src.size() == 8); 568 569 fetch_node *n = sh.create_fetch(); 570 571 n->bc.set_op(FETCH_OP_SET_TEXTURE_OFFSETS); 572 573 copy_fetch_src(*n, f, 4); 574 575 f.insert_before(n); 576 } 577 578 void bc_finalizer::finalize_fetch(fetch_node* f) { 579 580 int reg = -1; 581 582 // src 583 584 unsigned src_count = 4; 585 586 unsigned flags = f->bc.op_ptr->flags; 587 588 if (flags & FF_VTX) { 589 src_count = 1; 590 } else if (flags & FF_GDS) { 591 src_count = 2; 592 } else if (flags & FF_USEGRAD) { 593 emit_set_grad(f); 594 } else if (flags & FF_USE_TEXTURE_OFFSETS) { 595 emit_set_texture_offsets(*f); 596 } 597 598 for (unsigned chan = 0; chan < src_count; ++chan) { 599 600 unsigned sel = f->bc.src_sel[chan]; 601 602 if (sel > SEL_W) 603 continue; 604 605 value *v = f->src[chan]; 606 607 if (v->is_undef()) { 608 sel = SEL_MASK; 609 } else if (v->is_const()) { 610 literal l = v->literal_value; 611 if (l == literal(0)) 612 sel = SEL_0; 613 else if (l == literal(1.0f)) 614 sel = SEL_1; 615 else { 616 sblog << "invalid fetch constant operand " << chan << " "; 617 dump::dump_op(f); 618 sblog << "\n"; 619 abort(); 620 } 621 622 } else if (v->is_any_gpr()) { 623 unsigned vreg = v->gpr.sel(); 624 unsigned vchan = v->gpr.chan(); 625 626 if (reg == -1) 627 reg = vreg; 628 else if ((unsigned)reg != vreg) { 629 sblog << "invalid fetch source operand " << chan << " "; 630 dump::dump_op(f); 631 sblog << "\n"; 632 abort(); 633 } 634 635 sel = vchan; 636 637 } else { 638 sblog << "invalid fetch source operand " << chan << " "; 639 dump::dump_op(f); 640 sblog << "\n"; 641 abort(); 642 } 643 644 f->bc.src_sel[chan] = sel; 645 } 646 647 if (reg >= 0) 648 update_ngpr(reg); 649 650 f->bc.src_gpr = reg >= 0 ? reg : 0; 651 652 // dst 653 654 reg = -1; 655 656 unsigned dst_swz[4] = {SEL_MASK, SEL_MASK, SEL_MASK, SEL_MASK}; 657 658 for (unsigned chan = 0; chan < 4; ++chan) { 659 660 unsigned sel = f->bc.dst_sel[chan]; 661 662 if (sel == SEL_MASK) 663 continue; 664 665 value *v = f->dst[chan]; 666 if (!v) 667 continue; 668 669 if (v->is_any_gpr()) { 670 unsigned vreg = v->gpr.sel(); 671 unsigned vchan = v->gpr.chan(); 672 673 if (reg == -1) 674 reg = vreg; 675 else if ((unsigned)reg != vreg) { 676 sblog << "invalid fetch dst operand " << chan << " "; 677 dump::dump_op(f); 678 sblog << "\n"; 679 abort(); 680 } 681 682 dst_swz[vchan] = sel; 683 684 } else { 685 sblog << "invalid fetch dst operand " << chan << " "; 686 dump::dump_op(f); 687 sblog << "\n"; 688 abort(); 689 } 690 691 } 692 693 for (unsigned i = 0; i < 4; ++i) 694 f->bc.dst_sel[i] = dst_swz[i]; 695 696 if ((flags & FF_GDS) && reg == -1) { 697 f->bc.dst_sel[0] = SEL_MASK; 698 f->bc.dst_gpr = 0; 699 return ; 700 } 701 assert(reg >= 0); 702 703 if (reg >= 0) 704 update_ngpr(reg); 705 706 f->bc.dst_gpr = reg >= 0 ? reg : 0; 707 } 708 709 void bc_finalizer::finalize_cf(cf_node* c) { 710 711 unsigned flags = c->bc.op_ptr->flags; 712 713 c->bc.end_of_program = 0; 714 last_cf = c; 715 716 if (flags & CF_EXP) { 717 c->bc.set_op(CF_OP_EXPORT); 718 last_export[c->bc.type] = c; 719 720 int reg = -1; 721 722 for (unsigned chan = 0; chan < 4; ++chan) { 723 724 unsigned sel = c->bc.sel[chan]; 725 726 if (sel > SEL_W) 727 continue; 728 729 value *v = c->src[chan]; 730 731 if (v->is_undef()) { 732 sel = SEL_MASK; 733 } else if (v->is_const()) { 734 literal l = v->literal_value; 735 if (l == literal(0)) 736 sel = SEL_0; 737 else if (l == literal(1.0f)) 738 sel = SEL_1; 739 else { 740 sblog << "invalid export constant operand " << chan << " "; 741 dump::dump_op(c); 742 sblog << "\n"; 743 abort(); 744 } 745 746 } else if (v->is_any_gpr()) { 747 unsigned vreg = v->gpr.sel(); 748 unsigned vchan = v->gpr.chan(); 749 750 if (reg == -1) 751 reg = vreg; 752 else if ((unsigned)reg != vreg) { 753 sblog << "invalid export source operand " << chan << " "; 754 dump::dump_op(c); 755 sblog << "\n"; 756 abort(); 757 } 758 759 sel = vchan; 760 761 } else { 762 sblog << "invalid export source operand " << chan << " "; 763 dump::dump_op(c); 764 sblog << "\n"; 765 abort(); 766 } 767 768 c->bc.sel[chan] = sel; 769 } 770 771 if (reg >= 0) 772 update_ngpr(reg); 773 774 c->bc.rw_gpr = reg >= 0 ? reg : 0; 775 776 } else if (flags & CF_MEM) { 777 778 int reg = -1; 779 unsigned mask = 0; 780 781 for (unsigned chan = 0; chan < 4; ++chan) { 782 value *v = c->src[chan]; 783 if (!v || v->is_undef()) 784 continue; 785 786 if (!v->is_any_gpr() || v->gpr.chan() != chan) { 787 sblog << "invalid source operand " << chan << " "; 788 dump::dump_op(c); 789 sblog << "\n"; 790 abort(); 791 } 792 unsigned vreg = v->gpr.sel(); 793 if (reg == -1) 794 reg = vreg; 795 else if ((unsigned)reg != vreg) { 796 sblog << "invalid source operand " << chan << " "; 797 dump::dump_op(c); 798 sblog << "\n"; 799 abort(); 800 } 801 802 mask |= (1 << chan); 803 } 804 805 if (reg >= 0) 806 update_ngpr(reg); 807 808 c->bc.rw_gpr = reg >= 0 ? reg : 0; 809 c->bc.comp_mask = mask; 810 811 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { 812 813 reg = -1; 814 815 for (unsigned chan = 0; chan < 4; ++chan) { 816 value *v = c->src[4 + chan]; 817 if (!v || v->is_undef()) 818 continue; 819 820 if (!v->is_any_gpr() || v->gpr.chan() != chan) { 821 sblog << "invalid source operand " << chan << " "; 822 dump::dump_op(c); 823 sblog << "\n"; 824 abort(); 825 } 826 unsigned vreg = v->gpr.sel(); 827 if (reg == -1) 828 reg = vreg; 829 else if ((unsigned)reg != vreg) { 830 sblog << "invalid source operand " << chan << " "; 831 dump::dump_op(c); 832 sblog << "\n"; 833 abort(); 834 } 835 } 836 837 assert(reg >= 0); 838 839 if (reg >= 0) 840 update_ngpr(reg); 841 842 c->bc.index_gpr = reg >= 0 ? reg : 0; 843 } 844 } else if (flags & CF_CALL) { 845 update_nstack(c->get_parent_region(), ctx.wavefront_size == 16 ? 2 : 1); 846 } 847 } 848 849 sel_chan bc_finalizer::translate_kcache(cf_node* alu, value* v) { 850 unsigned sel = v->select.kcache_sel(); 851 unsigned bank = v->select.kcache_bank(); 852 unsigned chan = v->select.chan(); 853 static const unsigned kc_base[] = {128, 160, 256, 288}; 854 855 sel &= 4095; 856 857 unsigned line = sel >> 4; 858 859 for (unsigned k = 0; k < 4; ++k) { 860 bc_kcache &kc = alu->bc.kc[k]; 861 862 if (kc.mode == KC_LOCK_NONE) 863 break; 864 865 if (kc.bank == bank && (kc.addr == line || 866 (kc.mode == KC_LOCK_2 && kc.addr + 1 == line))) { 867 868 sel = kc_base[k] + (sel - (kc.addr << 4)); 869 870 return sel_chan(sel, chan); 871 } 872 } 873 874 assert(!"kcache translation error"); 875 return 0; 876 } 877 878 void bc_finalizer::update_ngpr(unsigned gpr) { 879 if (gpr < MAX_GPR - ctx.alu_temp_gprs && gpr >= ngpr) 880 ngpr = gpr + 1; 881 } 882 883 unsigned bc_finalizer::get_stack_depth(node *n, unsigned &loops, 884 unsigned &ifs, unsigned add) { 885 unsigned stack_elements = add; 886 bool has_non_wqm_push = (add != 0); 887 region_node *r = n->is_region() ? 888 static_cast<region_node*>(n) : n->get_parent_region(); 889 890 loops = 0; 891 ifs = 0; 892 893 while (r) { 894 if (r->is_loop()) { 895 ++loops; 896 } else { 897 ++ifs; 898 has_non_wqm_push = true; 899 } 900 r = r->get_parent_region(); 901 } 902 stack_elements += (loops * ctx.stack_entry_size) + ifs; 903 904 // reserve additional elements in some cases 905 switch (ctx.hw_class) { 906 case HW_CLASS_R600: 907 case HW_CLASS_R700: 908 // If any non-WQM push is invoked, 2 elements should be reserved. 909 if (has_non_wqm_push) 910 stack_elements += 2; 911 break; 912 case HW_CLASS_CAYMAN: 913 // If any stack operation is invoked, 2 elements should be reserved 914 if (stack_elements) 915 stack_elements += 2; 916 break; 917 case HW_CLASS_EVERGREEN: 918 // According to the docs we need to reserve 1 element for each of the 919 // following cases: 920 // 1) non-WQM push is used with WQM/LOOP frames on stack 921 // 2) ALU_ELSE_AFTER is used at the point of max stack usage 922 // NOTE: 923 // It was found that the conditions above are not sufficient, there are 924 // other cases where we also need to reserve stack space, that's why 925 // we always reserve 1 stack element if we have non-WQM push on stack. 926 // Condition 2 is ignored for now because we don't use this instruction. 927 if (has_non_wqm_push) 928 ++stack_elements; 929 break; 930 case HW_CLASS_UNKNOWN: 931 assert(0); 932 } 933 return stack_elements; 934 } 935 936 void bc_finalizer::update_nstack(region_node* r, unsigned add) { 937 unsigned loops = 0; 938 unsigned ifs = 0; 939 unsigned elems = r ? get_stack_depth(r, loops, ifs, add) : add; 940 941 // XXX all chips expect this value to be computed using 4 as entry size, 942 // not the real entry size 943 unsigned stack_entries = (elems + 3) >> 2; 944 945 if (nstack < stack_entries) 946 nstack = stack_entries; 947 } 948 949 void bc_finalizer::cf_peephole() { 950 if (ctx.stack_workaround_8xx || ctx.stack_workaround_9xx) { 951 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; 952 I = N) { 953 N = I; ++N; 954 cf_node *c = static_cast<cf_node*>(*I); 955 956 if (c->bc.op == CF_OP_ALU_PUSH_BEFORE && 957 (c->flags & NF_ALU_STACK_WORKAROUND)) { 958 cf_node *push = sh.create_cf(CF_OP_PUSH); 959 c->insert_before(push); 960 push->jump(c); 961 c->bc.set_op(CF_OP_ALU); 962 } 963 } 964 } 965 966 for (node_iterator N, I = sh.root->begin(), E = sh.root->end(); I != E; 967 I = N) { 968 N = I; ++N; 969 970 cf_node *c = static_cast<cf_node*>(*I); 971 972 if (c->jump_after_target) { 973 if (c->jump_target->next == NULL) { 974 c->jump_target->insert_after(sh.create_cf(CF_OP_NOP)); 975 if (last_cf == c->jump_target) 976 last_cf = static_cast<cf_node*>(c->jump_target->next); 977 } 978 c->jump_target = static_cast<cf_node*>(c->jump_target->next); 979 c->jump_after_target = false; 980 } 981 982 if (c->is_cf_op(CF_OP_POP)) { 983 node *p = c->prev; 984 if (p->is_alu_clause()) { 985 cf_node *a = static_cast<cf_node*>(p); 986 987 if (a->bc.op == CF_OP_ALU) { 988 a->bc.set_op(CF_OP_ALU_POP_AFTER); 989 c->remove(); 990 } 991 } 992 } else if (c->is_cf_op(CF_OP_JUMP) && c->jump_target == c->next) { 993 // if JUMP is immediately followed by its jump target, 994 // then JUMP is useless and we can eliminate it 995 c->remove(); 996 } 997 } 998 } 999 1000 } // namespace r600_sb 1001