1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #include "sb_bc.h" 28 #include "sb_shader.h" 29 #include "sb_pass.h" 30 31 namespace r600_sb { 32 33 shader::shader(sb_context &sctx, shader_target t, unsigned id) 34 : ctx(sctx), next_temp_value_index(temp_regid_offset), 35 prep_regs_count(), pred_sels(), 36 regions(), inputs(), undef(), val_pool(sizeof(value)), 37 pool(), all_nodes(), src_stats(), opt_stats(), errors(), 38 optimized(), id(id), 39 coal(*this), bbs(), 40 target(t), vt(ex), ex(*this), root(), 41 compute_interferences(), 42 has_alu_predication(), 43 uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {} 44 45 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) { 46 47 unsigned slot_flags = ctx.alu_slots(n->bc.op); 48 unsigned slot = n->bc.dst_chan; 49 50 if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) && 51 (slot_flags & AF_S)) 52 slot = SLOT_TRANS; 53 54 if (slots[slot]) 55 return false; 56 57 n->bc.slot = slot; 58 slots[slot] = n; 59 return true; 60 } 61 62 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, 63 bool src) { 64 unsigned chan = 0; 65 while (comp_mask) { 66 if (comp_mask & 1) { 67 value *v = get_gpr_value(src, gpr, chan, false); 68 v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN); 69 if (!v->is_rel()) { 70 v->gpr = v->pin_gpr = v->select; 71 v->fix(); 72 } 73 if (v->array && !v->array->gpr) { 74 // if pinned value can be accessed with indirect addressing 75 // pin the entire array to its original location 76 v->array->gpr = v->array->base_gpr; 77 } 78 vec.push_back(v); 79 } 80 comp_mask >>= 1; 81 ++chan; 82 } 83 } 84 85 cf_node* shader::create_clause(node_subtype nst) { 86 cf_node *n = create_cf(); 87 88 n->subtype = nst; 89 90 switch (nst) { 91 case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break; 92 case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break; 93 case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break; 94 case NST_GDS_CLAUSE: n->bc.set_op(CF_OP_GDS); break; 95 default: assert(!"invalid clause type"); break; 96 } 97 98 n->bc.barrier = 1; 99 return n; 100 } 101 102 void shader::create_bbs() { 103 create_bbs(root, bbs); 104 } 105 106 void shader::expand_bbs() { 107 expand_bbs(bbs); 108 } 109 110 alu_node* shader::create_mov(value* dst, value* src) { 111 alu_node *n = create_alu(); 112 n->bc.set_op(ALU_OP1_MOV); 113 n->dst.push_back(dst); 114 n->src.push_back(src); 115 dst->def = n; 116 117 return n; 118 } 119 120 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) { 121 alu_node *n = create_mov(dst, src); 122 123 dst->assign_source(src); 124 n->flags |= NF_COPY_MOV | NF_DONT_HOIST; 125 126 if (affcost && dst->is_sgpr() && src->is_sgpr()) 127 coal.add_edge(src, dst, affcost); 128 129 return n; 130 } 131 132 value* shader::get_value(value_kind kind, sel_chan id, 133 unsigned version) { 134 if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count) 135 return val_pool[id - 1]; 136 137 138 unsigned key = (kind << 28) | (version << 16) | id; 139 value_map::iterator i = reg_values.find(key); 140 if (i != reg_values.end()) { 141 return i->second; 142 } 143 value *v = create_value(kind, id, version); 144 reg_values.insert(std::make_pair(key, v)); 145 return v; 146 } 147 148 value* shader::get_special_value(unsigned sv_id, unsigned version) { 149 sel_chan id(sv_id, 0); 150 return get_value(VLK_SPECIAL_REG, id, version); 151 } 152 153 void shader::fill_array_values(gpr_array *a, vvec &vv) { 154 unsigned sz = a->array_size; 155 vv.resize(sz); 156 for (unsigned i = 0; i < a->array_size; ++i) { 157 vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(), 158 false); 159 } 160 } 161 162 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel, 163 unsigned version) { 164 sel_chan id(reg, chan); 165 value *v; 166 gpr_array *a = get_gpr_array(reg, chan); 167 if (rel) { 168 assert(a); 169 v = create_value(VLK_REL_REG, id, 0); 170 v->rel = get_special_value(SV_AR_INDEX); 171 fill_array_values(a, v->muse); 172 if (!src) 173 fill_array_values(a, v->mdef); 174 } else { 175 if (version == 0 && reg < prep_regs_count) 176 return (val_pool[id - 1]); 177 178 v = get_value(VLK_REG, id, version); 179 } 180 181 v->array = a; 182 v->pin_gpr = v->select; 183 184 return v; 185 } 186 187 value* shader::create_temp_value() { 188 sel_chan id(++next_temp_value_index, 0); 189 return get_value(VLK_TEMP, id, 0); 190 } 191 192 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) { 193 return get_ro_value(kcache_values, VLK_KCACHE, 194 sel_chan(bank, index, chan, index_mode)); 195 } 196 197 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) { 198 if (inputs.size() <= gpr) 199 inputs.resize(gpr+1); 200 201 shader_input &i = inputs[gpr]; 202 i.preloaded = preloaded; 203 i.comp_mask = comp_mask; 204 205 if (preloaded) { 206 add_pinned_gpr_values(root->dst, gpr, comp_mask, true); 207 } 208 209 } 210 211 void shader::init() { 212 assert(!root); 213 root = create_container(); 214 } 215 216 void shader::init_call_fs(cf_node* cf) { 217 unsigned gpr = 0; 218 219 assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES); 220 221 for(inputs_vec::const_iterator I = inputs.begin(), 222 E = inputs.end(); I != E; ++I, ++gpr) { 223 if (!I->preloaded) 224 add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false); 225 else 226 add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true); 227 } 228 } 229 230 void shader::set_undef(val_set& s) { 231 value *undefined = get_undef_value(); 232 if (!undefined->gvn_source) 233 vt.add_value(undefined); 234 235 val_set &vs = s; 236 237 for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) { 238 value *v = *I; 239 240 assert(!v->is_readonly() && !v->is_rel()); 241 242 v->gvn_source = undefined->gvn_source; 243 } 244 } 245 246 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) { 247 value *v = val_pool.create(k, regid, ver); 248 return v; 249 } 250 251 value* shader::get_undef_value() { 252 if (!undef) 253 undef = create_value(VLK_UNDEF, 0, 0); 254 return undef; 255 } 256 257 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) { 258 node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags); 259 all_nodes.push_back(n); 260 return n; 261 } 262 263 alu_node* shader::create_alu() { 264 alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node(); 265 all_nodes.push_back(n); 266 return n; 267 } 268 269 alu_group_node* shader::create_alu_group() { 270 alu_group_node* n = 271 new (pool.allocate(sizeof(alu_group_node))) alu_group_node(); 272 all_nodes.push_back(n); 273 return n; 274 } 275 276 alu_packed_node* shader::create_alu_packed() { 277 alu_packed_node* n = 278 new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node(); 279 all_nodes.push_back(n); 280 return n; 281 } 282 283 cf_node* shader::create_cf() { 284 cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node(); 285 n->bc.barrier = 1; 286 all_nodes.push_back(n); 287 return n; 288 } 289 290 fetch_node* shader::create_fetch() { 291 fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node(); 292 all_nodes.push_back(n); 293 return n; 294 } 295 296 region_node* shader::create_region() { 297 region_node *n = new (pool.allocate(sizeof(region_node))) 298 region_node(regions.size()); 299 regions.push_back(n); 300 all_nodes.push_back(n); 301 return n; 302 } 303 304 depart_node* shader::create_depart(region_node* target) { 305 depart_node* n = new (pool.allocate(sizeof(depart_node))) 306 depart_node(target, target->departs.size()); 307 target->departs.push_back(n); 308 all_nodes.push_back(n); 309 return n; 310 } 311 312 repeat_node* shader::create_repeat(region_node* target) { 313 repeat_node* n = new (pool.allocate(sizeof(repeat_node))) 314 repeat_node(target, target->repeats.size() + 1); 315 target->repeats.push_back(n); 316 all_nodes.push_back(n); 317 return n; 318 } 319 320 container_node* shader::create_container(node_type nt, node_subtype nst, 321 node_flags flags) { 322 container_node *n = new (pool.allocate(sizeof(container_node))) 323 container_node(nt, nst, flags); 324 all_nodes.push_back(n); 325 return n; 326 } 327 328 if_node* shader::create_if() { 329 if_node* n = new (pool.allocate(sizeof(if_node))) if_node(); 330 all_nodes.push_back(n); 331 return n; 332 } 333 334 bb_node* shader::create_bb(unsigned id, unsigned loop_level) { 335 bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level); 336 all_nodes.push_back(n); 337 return n; 338 } 339 340 value* shader::get_special_ro_value(unsigned sel) { 341 return get_ro_value(special_ro_values, VLK_PARAM, sel); 342 } 343 344 value* shader::get_const_value(const literal &v) { 345 value *val = get_ro_value(const_values, VLK_CONST, v); 346 val->literal_value = v; 347 return val; 348 } 349 350 shader::~shader() { 351 for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); 352 I != E; ++I) 353 (*I)->~node(); 354 355 for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end(); 356 I != E; ++I) { 357 delete *I; 358 } 359 } 360 361 void shader::dump_ir() { 362 if (ctx.dump_pass) 363 dump(*this).run(); 364 } 365 366 value* shader::get_value_version(value* v, unsigned ver) { 367 assert(!v->is_readonly() && !v->is_rel()); 368 value *vv = get_value(v->kind, v->select, ver); 369 assert(vv); 370 371 if (v->array) { 372 vv->array = v->array; 373 } 374 375 return vv; 376 } 377 378 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) { 379 380 for (regarray_vec::iterator I = gpr_arrays.begin(), 381 E = gpr_arrays.end(); I != E; ++I) { 382 gpr_array* a = *I; 383 unsigned achan = a->base_gpr.chan(); 384 unsigned areg = a->base_gpr.sel(); 385 if (achan == chan && (reg >= areg && reg < areg+a->array_size)) 386 return a; 387 } 388 return NULL; 389 } 390 391 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count, 392 unsigned comp_mask) { 393 unsigned chan = 0; 394 while (comp_mask) { 395 if (comp_mask & 1) { 396 gpr_array *a = new gpr_array( 397 sel_chan(gpr_start, chan), gpr_count); 398 399 SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr 400 << " [" << a->array_size << "]\n"; 401 ); 402 403 gpr_arrays.push_back(a); 404 } 405 comp_mask >>= 1; 406 ++chan; 407 } 408 } 409 410 value* shader::get_pred_sel(int sel) { 411 assert(sel == 0 || sel == 1); 412 if (!pred_sels[sel]) 413 pred_sels[sel] = get_const_value(sel); 414 415 return pred_sels[sel]; 416 } 417 418 cf_node* shader::create_cf(unsigned op) { 419 cf_node *c = create_cf(); 420 c->bc.set_op(op); 421 c->bc.barrier = 1; 422 return c; 423 } 424 425 std::string shader::get_full_target_name() { 426 std::string s = get_shader_target_name(); 427 s += "/"; 428 s += ctx.get_hw_chip_name(); 429 s += "/"; 430 s += ctx.get_hw_class_name(); 431 return s; 432 } 433 434 const char* shader::get_shader_target_name() { 435 switch (target) { 436 case TARGET_VS: return "VS"; 437 case TARGET_ES: return "ES"; 438 case TARGET_PS: return "PS"; 439 case TARGET_GS: return "GS"; 440 case TARGET_HS: return "HS"; 441 case TARGET_LS: return "LS"; 442 case TARGET_COMPUTE: return "COMPUTE"; 443 case TARGET_FETCH: return "FETCH"; 444 default: 445 return "INVALID_TARGET"; 446 } 447 } 448 449 void shader::simplify_dep_rep(node* dr) { 450 container_node *p = dr->parent; 451 if (p->is_repeat()) { 452 repeat_node *r = static_cast<repeat_node*>(p); 453 r->target->expand_repeat(r); 454 } else if (p->is_depart()) { 455 depart_node *d = static_cast<depart_node*>(p); 456 d->target->expand_depart(d); 457 } 458 if (dr->next) 459 dr->parent->cut(dr->next, NULL); 460 } 461 462 463 // FIXME this is used in some places as the max non-temp gpr, 464 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead. 465 unsigned shader::first_temp_gpr() { 466 return MAX_GPR - ctx.alu_temp_gprs; 467 } 468 469 unsigned shader::num_nontemp_gpr() { 470 return MAX_GPR - 2 * ctx.alu_temp_gprs; 471 } 472 473 void shader::set_uses_kill() { 474 if (root->src.empty()) 475 root->src.resize(1); 476 477 if (!root->src[0]) 478 root->src[0] = get_special_value(SV_VALID_MASK); 479 } 480 481 alu_node* shader::clone(alu_node* n) { 482 alu_node *c = create_alu(); 483 484 // FIXME: this may be wrong with indirect operands 485 c->src = n->src; 486 c->dst = n->dst; 487 488 c->bc = n->bc; 489 c->pred = n->pred; 490 491 return c; 492 } 493 494 void shader::collect_stats(bool opt) { 495 if (!sb_context::dump_stat) 496 return; 497 498 shader_stats &s = opt ? opt_stats : src_stats; 499 500 s.shaders = 1; 501 s.ngpr = ngpr; 502 s.nstack = nstack; 503 s.collect(root); 504 505 if (opt) 506 ctx.opt_stats.accumulate(s); 507 else 508 ctx.src_stats.accumulate(s); 509 } 510 511 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) { 512 value_map::iterator I = vm.find(key); 513 if (I != vm.end()) 514 return I->second; 515 value *v = create_value(vk, key, 0); 516 v->flags = VLF_READONLY; 517 vm.insert(std::make_pair(key, v)); 518 return v; 519 } 520 521 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) { 522 523 bool inside_bb = false; 524 bool last_inside_bb = true; 525 node_iterator bb_start(n->begin()), I(bb_start), E(n->end()); 526 527 for (; I != E; ++I) { 528 node *k = *I; 529 inside_bb = k->type == NT_OP; 530 531 if (inside_bb && !last_inside_bb) 532 bb_start = I; 533 else if (!inside_bb) { 534 if (last_inside_bb 535 && I->type != NT_REPEAT 536 && I->type != NT_DEPART 537 && I->type != NT_IF) { 538 bb_node *bb = create_bb(bbs.size(), loop_level); 539 bbs.push_back(bb); 540 n->insert_node_before(*bb_start, bb); 541 if (bb_start != I) 542 bb->move(bb_start, I); 543 } 544 545 if (k->is_container()) { 546 547 bool loop = false; 548 if (k->type == NT_REGION) { 549 loop = static_cast<region_node*>(k)->is_loop(); 550 } 551 552 create_bbs(static_cast<container_node*>(k), bbs, 553 loop_level + loop); 554 } 555 } 556 557 if (k->type == NT_DEPART) 558 return; 559 560 last_inside_bb = inside_bb; 561 } 562 563 if (last_inside_bb) { 564 bb_node *bb = create_bb(bbs.size(), loop_level); 565 bbs.push_back(bb); 566 if (n->empty()) 567 n->push_back(bb); 568 else { 569 n->insert_node_before(*bb_start, bb); 570 if (bb_start != n->end()) 571 bb->move(bb_start, n->end()); 572 } 573 } else { 574 if (n->last && n->last->type == NT_IF) { 575 bb_node *bb = create_bb(bbs.size(), loop_level); 576 bbs.push_back(bb); 577 n->push_back(bb); 578 } 579 } 580 } 581 582 void shader::expand_bbs(bbs_vec &bbs) { 583 584 for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) { 585 bb_node *b = *I; 586 b->expand(); 587 } 588 } 589 590 sched_queue_id shader::get_queue_id(node* n) { 591 switch (n->subtype) { 592 case NST_ALU_INST: 593 case NST_ALU_PACKED_INST: 594 case NST_COPY: 595 case NST_PSI: 596 return SQ_ALU; 597 case NST_FETCH_INST: { 598 fetch_node *f = static_cast<fetch_node*>(n); 599 if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) 600 return SQ_VTX; 601 if (f->bc.op_ptr->flags & FF_GDS) 602 return SQ_GDS; 603 return SQ_TEX; 604 } 605 case NST_CF_INST: 606 return SQ_CF; 607 default: 608 assert(0); 609 return SQ_NUM; 610 } 611 } 612 613 void shader_stats::collect(node *n) { 614 if (n->is_alu_inst()) 615 ++alu; 616 else if (n->is_fetch_inst()) 617 ++fetch; 618 else if (n->is_container()) { 619 container_node *c = static_cast<container_node*>(n); 620 621 if (n->is_alu_group()) 622 ++alu_groups; 623 else if (n->is_alu_clause()) 624 ++alu_clauses; 625 else if (n->is_fetch_clause()) 626 ++fetch_clauses; 627 else if (n->is_cf_inst()) 628 ++cf; 629 630 if (!c->empty()) { 631 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { 632 collect(*I); 633 } 634 } 635 } 636 } 637 638 void shader_stats::accumulate(shader_stats& s) { 639 ++shaders; 640 ndw += s.ndw; 641 ngpr += s.ngpr; 642 nstack += s.nstack; 643 644 alu += s.alu; 645 alu_groups += s.alu_groups; 646 alu_clauses += s.alu_clauses; 647 fetch += s.fetch; 648 fetch_clauses += s.fetch_clauses; 649 cf += s.cf; 650 } 651 652 void shader_stats::dump() { 653 sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack 654 << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses 655 << ", alu:" << alu << ", fetch:" << fetch 656 << ", fetch clauses:" << fetch_clauses 657 << ", cf:" << cf; 658 659 if (shaders > 1) 660 sblog << ", shaders:" << shaders; 661 662 sblog << "\n"; 663 } 664 665 static void print_diff(unsigned d1, unsigned d2) { 666 if (d1) 667 sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%"; 668 else if (d2) 669 sblog << "N/A"; 670 else 671 sblog << "0%"; 672 } 673 674 void shader_stats::dump_diff(shader_stats& s) { 675 sblog << "dw:"; print_diff(ndw, s.ndw); 676 sblog << ", gpr:" ; print_diff(ngpr, s.ngpr); 677 sblog << ", stk:" ; print_diff(nstack, s.nstack); 678 sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups); 679 sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses); 680 sblog << ", alu:" ; print_diff(alu, s.alu); 681 sblog << ", fetch:" ; print_diff(fetch, s.fetch); 682 sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses); 683 sblog << ", cf:" ; print_diff(cf, s.cf); 684 sblog << "\n"; 685 } 686 687 } // namespace r600_sb 688