1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #include "sb_bc.h" 28 #include "sb_shader.h" 29 #include "sb_pass.h" 30 31 namespace r600_sb { 32 33 shader::shader(sb_context &sctx, shader_target t, unsigned id) 34 : ctx(sctx), next_temp_value_index(temp_regid_offset), 35 prep_regs_count(), pred_sels(), 36 regions(), inputs(), undef(), val_pool(sizeof(value)), 37 pool(), all_nodes(), src_stats(), opt_stats(), errors(), 38 optimized(), id(id), 39 coal(*this), bbs(), 40 target(t), vt(ex), ex(*this), root(), 41 compute_interferences(), 42 has_alu_predication(), 43 uses_gradients(), safe_math(), ngpr(), nstack(), dce_flags() {} 44 45 bool shader::assign_slot(alu_node* n, alu_node *slots[5]) { 46 47 unsigned slot_flags = ctx.alu_slots(n->bc.op); 48 unsigned slot = n->bc.dst_chan; 49 50 if (!ctx.is_cayman() && (!(slot_flags & AF_V) || slots[slot]) && 51 (slot_flags & AF_S)) 52 slot = SLOT_TRANS; 53 54 if (slots[slot]) 55 return false; 56 57 n->bc.slot = slot; 58 slots[slot] = n; 59 return true; 60 } 61 62 void shader::add_pinned_gpr_values(vvec& vec, unsigned gpr, unsigned comp_mask, 63 bool src) { 64 unsigned chan = 0; 65 while (comp_mask) { 66 if (comp_mask & 1) { 67 value *v = get_gpr_value(src, gpr, chan, false); 68 v->flags |= (VLF_PIN_REG | VLF_PIN_CHAN); 69 if (!v->is_rel()) { 70 v->gpr = v->pin_gpr = v->select; 71 v->fix(); 72 } 73 if (v->array && !v->array->gpr) { 74 // if pinned value can be accessed with indirect addressing 75 // pin the entire array to its original location 76 v->array->gpr = v->array->base_gpr; 77 } 78 vec.push_back(v); 79 } 80 comp_mask >>= 1; 81 ++chan; 82 } 83 } 84 85 cf_node* shader::create_clause(node_subtype nst) { 86 cf_node *n = create_cf(); 87 88 n->subtype = nst; 89 90 switch (nst) { 91 case NST_ALU_CLAUSE: n->bc.set_op(CF_OP_ALU); break; 92 case NST_TEX_CLAUSE: n->bc.set_op(CF_OP_TEX); break; 93 case NST_VTX_CLAUSE: n->bc.set_op(CF_OP_VTX); break; 94 default: assert(!"invalid clause type"); break; 95 } 96 97 n->bc.barrier = 1; 98 return n; 99 } 100 101 void shader::create_bbs() { 102 create_bbs(root, bbs); 103 } 104 105 void shader::expand_bbs() { 106 expand_bbs(bbs); 107 } 108 109 alu_node* shader::create_mov(value* dst, value* src) { 110 alu_node *n = create_alu(); 111 n->bc.set_op(ALU_OP1_MOV); 112 n->dst.push_back(dst); 113 n->src.push_back(src); 114 dst->def = n; 115 116 return n; 117 } 118 119 alu_node* shader::create_copy_mov(value* dst, value* src, unsigned affcost) { 120 alu_node *n = create_mov(dst, src); 121 122 dst->assign_source(src); 123 n->flags |= NF_COPY_MOV | NF_DONT_HOIST; 124 125 if (affcost && dst->is_sgpr() && src->is_sgpr()) 126 coal.add_edge(src, dst, affcost); 127 128 return n; 129 } 130 131 value* shader::get_value(value_kind kind, sel_chan id, 132 unsigned version) { 133 if (version == 0 && kind == VLK_REG && id.sel() < prep_regs_count) 134 return val_pool[id - 1]; 135 136 137 unsigned key = (kind << 28) | (version << 16) | id; 138 value_map::iterator i = reg_values.find(key); 139 if (i != reg_values.end()) { 140 return i->second; 141 } 142 value *v = create_value(kind, id, version); 143 reg_values.insert(std::make_pair(key, v)); 144 return v; 145 } 146 147 value* shader::get_special_value(unsigned sv_id, unsigned version) { 148 sel_chan id(sv_id, 0); 149 return get_value(VLK_SPECIAL_REG, id, version); 150 } 151 152 void shader::fill_array_values(gpr_array *a, vvec &vv) { 153 unsigned sz = a->array_size; 154 vv.resize(sz); 155 for (unsigned i = 0; i < a->array_size; ++i) { 156 vv[i] = get_gpr_value(true, a->base_gpr.sel() + i, a->base_gpr.chan(), 157 false); 158 } 159 } 160 161 value* shader::get_gpr_value(bool src, unsigned reg, unsigned chan, bool rel, 162 unsigned version) { 163 sel_chan id(reg, chan); 164 value *v; 165 gpr_array *a = get_gpr_array(reg, chan); 166 if (rel) { 167 assert(a); 168 v = create_value(VLK_REL_REG, id, 0); 169 v->rel = get_special_value(SV_AR_INDEX); 170 fill_array_values(a, v->muse); 171 if (!src) 172 fill_array_values(a, v->mdef); 173 } else { 174 if (version == 0 && reg < prep_regs_count) 175 return (val_pool[id - 1]); 176 177 v = get_value(VLK_REG, id, version); 178 } 179 180 v->array = a; 181 v->pin_gpr = v->select; 182 183 return v; 184 } 185 186 value* shader::create_temp_value() { 187 sel_chan id(++next_temp_value_index, 0); 188 return get_value(VLK_TEMP, id, 0); 189 } 190 191 value* shader::get_kcache_value(unsigned bank, unsigned index, unsigned chan, alu_kcache_index_mode index_mode) { 192 return get_ro_value(kcache_values, VLK_KCACHE, 193 sel_chan(bank, index, chan, index_mode)); 194 } 195 196 void shader::add_input(unsigned gpr, bool preloaded, unsigned comp_mask) { 197 if (inputs.size() <= gpr) 198 inputs.resize(gpr+1); 199 200 shader_input &i = inputs[gpr]; 201 i.preloaded = preloaded; 202 i.comp_mask = comp_mask; 203 204 if (preloaded) { 205 add_pinned_gpr_values(root->dst, gpr, comp_mask, true); 206 } 207 208 } 209 210 void shader::init() { 211 assert(!root); 212 root = create_container(); 213 } 214 215 void shader::init_call_fs(cf_node* cf) { 216 unsigned gpr = 0; 217 218 assert(target == TARGET_LS || target == TARGET_VS || target == TARGET_ES); 219 220 for(inputs_vec::const_iterator I = inputs.begin(), 221 E = inputs.end(); I != E; ++I, ++gpr) { 222 if (!I->preloaded) 223 add_pinned_gpr_values(cf->dst, gpr, I->comp_mask, false); 224 else 225 add_pinned_gpr_values(cf->src, gpr, I->comp_mask, true); 226 } 227 } 228 229 void shader::set_undef(val_set& s) { 230 value *undefined = get_undef_value(); 231 if (!undefined->gvn_source) 232 vt.add_value(undefined); 233 234 val_set &vs = s; 235 236 for (val_set::iterator I = vs.begin(*this), E = vs.end(*this); I != E; ++I) { 237 value *v = *I; 238 239 assert(!v->is_readonly() && !v->is_rel()); 240 241 v->gvn_source = undefined->gvn_source; 242 } 243 } 244 245 value* shader::create_value(value_kind k, sel_chan regid, unsigned ver) { 246 value *v = val_pool.create(k, regid, ver); 247 return v; 248 } 249 250 value* shader::get_undef_value() { 251 if (!undef) 252 undef = create_value(VLK_UNDEF, 0, 0); 253 return undef; 254 } 255 256 node* shader::create_node(node_type nt, node_subtype nst, node_flags flags) { 257 node *n = new (pool.allocate(sizeof(node))) node(nt, nst, flags); 258 all_nodes.push_back(n); 259 return n; 260 } 261 262 alu_node* shader::create_alu() { 263 alu_node* n = new (pool.allocate(sizeof(alu_node))) alu_node(); 264 all_nodes.push_back(n); 265 return n; 266 } 267 268 alu_group_node* shader::create_alu_group() { 269 alu_group_node* n = 270 new (pool.allocate(sizeof(alu_group_node))) alu_group_node(); 271 all_nodes.push_back(n); 272 return n; 273 } 274 275 alu_packed_node* shader::create_alu_packed() { 276 alu_packed_node* n = 277 new (pool.allocate(sizeof(alu_packed_node))) alu_packed_node(); 278 all_nodes.push_back(n); 279 return n; 280 } 281 282 cf_node* shader::create_cf() { 283 cf_node* n = new (pool.allocate(sizeof(cf_node))) cf_node(); 284 n->bc.barrier = 1; 285 all_nodes.push_back(n); 286 return n; 287 } 288 289 fetch_node* shader::create_fetch() { 290 fetch_node* n = new (pool.allocate(sizeof(fetch_node))) fetch_node(); 291 all_nodes.push_back(n); 292 return n; 293 } 294 295 region_node* shader::create_region() { 296 region_node *n = new (pool.allocate(sizeof(region_node))) 297 region_node(regions.size()); 298 regions.push_back(n); 299 all_nodes.push_back(n); 300 return n; 301 } 302 303 depart_node* shader::create_depart(region_node* target) { 304 depart_node* n = new (pool.allocate(sizeof(depart_node))) 305 depart_node(target, target->departs.size()); 306 target->departs.push_back(n); 307 all_nodes.push_back(n); 308 return n; 309 } 310 311 repeat_node* shader::create_repeat(region_node* target) { 312 repeat_node* n = new (pool.allocate(sizeof(repeat_node))) 313 repeat_node(target, target->repeats.size() + 1); 314 target->repeats.push_back(n); 315 all_nodes.push_back(n); 316 return n; 317 } 318 319 container_node* shader::create_container(node_type nt, node_subtype nst, 320 node_flags flags) { 321 container_node *n = new (pool.allocate(sizeof(container_node))) 322 container_node(nt, nst, flags); 323 all_nodes.push_back(n); 324 return n; 325 } 326 327 if_node* shader::create_if() { 328 if_node* n = new (pool.allocate(sizeof(if_node))) if_node(); 329 all_nodes.push_back(n); 330 return n; 331 } 332 333 bb_node* shader::create_bb(unsigned id, unsigned loop_level) { 334 bb_node* n = new (pool.allocate(sizeof(bb_node))) bb_node(id, loop_level); 335 all_nodes.push_back(n); 336 return n; 337 } 338 339 value* shader::get_special_ro_value(unsigned sel) { 340 return get_ro_value(special_ro_values, VLK_PARAM, sel); 341 } 342 343 value* shader::get_const_value(const literal &v) { 344 value *val = get_ro_value(const_values, VLK_CONST, v); 345 val->literal_value = v; 346 return val; 347 } 348 349 shader::~shader() { 350 for (node_vec::iterator I = all_nodes.begin(), E = all_nodes.end(); 351 I != E; ++I) 352 (*I)->~node(); 353 354 for (gpr_array_vec::iterator I = gpr_arrays.begin(), E = gpr_arrays.end(); 355 I != E; ++I) { 356 delete *I; 357 } 358 } 359 360 void shader::dump_ir() { 361 if (ctx.dump_pass) 362 dump(*this).run(); 363 } 364 365 value* shader::get_value_version(value* v, unsigned ver) { 366 assert(!v->is_readonly() && !v->is_rel()); 367 value *vv = get_value(v->kind, v->select, ver); 368 assert(vv); 369 370 if (v->array) { 371 vv->array = v->array; 372 } 373 374 return vv; 375 } 376 377 gpr_array* shader::get_gpr_array(unsigned reg, unsigned chan) { 378 379 for (regarray_vec::iterator I = gpr_arrays.begin(), 380 E = gpr_arrays.end(); I != E; ++I) { 381 gpr_array* a = *I; 382 unsigned achan = a->base_gpr.chan(); 383 unsigned areg = a->base_gpr.sel(); 384 if (achan == chan && (reg >= areg && reg < areg+a->array_size)) 385 return a; 386 } 387 return NULL; 388 } 389 390 void shader::add_gpr_array(unsigned gpr_start, unsigned gpr_count, 391 unsigned comp_mask) { 392 unsigned chan = 0; 393 while (comp_mask) { 394 if (comp_mask & 1) { 395 gpr_array *a = new gpr_array( 396 sel_chan(gpr_start, chan), gpr_count); 397 398 SB_DUMP_PASS( sblog << "add_gpr_array: @" << a->base_gpr 399 << " [" << a->array_size << "]\n"; 400 ); 401 402 gpr_arrays.push_back(a); 403 } 404 comp_mask >>= 1; 405 ++chan; 406 } 407 } 408 409 value* shader::get_pred_sel(int sel) { 410 assert(sel == 0 || sel == 1); 411 if (!pred_sels[sel]) 412 pred_sels[sel] = get_const_value(sel); 413 414 return pred_sels[sel]; 415 } 416 417 cf_node* shader::create_cf(unsigned op) { 418 cf_node *c = create_cf(); 419 c->bc.set_op(op); 420 c->bc.barrier = 1; 421 return c; 422 } 423 424 std::string shader::get_full_target_name() { 425 std::string s = get_shader_target_name(); 426 s += "/"; 427 s += ctx.get_hw_chip_name(); 428 s += "/"; 429 s += ctx.get_hw_class_name(); 430 return s; 431 } 432 433 const char* shader::get_shader_target_name() { 434 switch (target) { 435 case TARGET_VS: return "VS"; 436 case TARGET_ES: return "ES"; 437 case TARGET_PS: return "PS"; 438 case TARGET_GS: return "GS"; 439 case TARGET_HS: return "HS"; 440 case TARGET_LS: return "LS"; 441 case TARGET_COMPUTE: return "COMPUTE"; 442 case TARGET_FETCH: return "FETCH"; 443 default: 444 return "INVALID_TARGET"; 445 } 446 } 447 448 void shader::simplify_dep_rep(node* dr) { 449 container_node *p = dr->parent; 450 if (p->is_repeat()) { 451 repeat_node *r = static_cast<repeat_node*>(p); 452 r->target->expand_repeat(r); 453 } else if (p->is_depart()) { 454 depart_node *d = static_cast<depart_node*>(p); 455 d->target->expand_depart(d); 456 } 457 if (dr->next) 458 dr->parent->cut(dr->next, NULL); 459 } 460 461 462 // FIXME this is used in some places as the max non-temp gpr, 463 // (MAX_GPR - 2 * ctx.alu_temp_gprs) should be used for that instead. 464 unsigned shader::first_temp_gpr() { 465 return MAX_GPR - ctx.alu_temp_gprs; 466 } 467 468 unsigned shader::num_nontemp_gpr() { 469 return MAX_GPR - 2 * ctx.alu_temp_gprs; 470 } 471 472 void shader::set_uses_kill() { 473 if (root->src.empty()) 474 root->src.resize(1); 475 476 if (!root->src[0]) 477 root->src[0] = get_special_value(SV_VALID_MASK); 478 } 479 480 alu_node* shader::clone(alu_node* n) { 481 alu_node *c = create_alu(); 482 483 // FIXME: this may be wrong with indirect operands 484 c->src = n->src; 485 c->dst = n->dst; 486 487 c->bc = n->bc; 488 c->pred = n->pred; 489 490 return c; 491 } 492 493 void shader::collect_stats(bool opt) { 494 if (!sb_context::dump_stat) 495 return; 496 497 shader_stats &s = opt ? opt_stats : src_stats; 498 499 s.shaders = 1; 500 s.ngpr = ngpr; 501 s.nstack = nstack; 502 s.collect(root); 503 504 if (opt) 505 ctx.opt_stats.accumulate(s); 506 else 507 ctx.src_stats.accumulate(s); 508 } 509 510 value* shader::get_ro_value(value_map& vm, value_kind vk, unsigned key) { 511 value_map::iterator I = vm.find(key); 512 if (I != vm.end()) 513 return I->second; 514 value *v = create_value(vk, key, 0); 515 v->flags = VLF_READONLY; 516 vm.insert(std::make_pair(key, v)); 517 return v; 518 } 519 520 void shader::create_bbs(container_node* n, bbs_vec &bbs, int loop_level) { 521 522 bool inside_bb = false; 523 bool last_inside_bb = true; 524 node_iterator bb_start(n->begin()), I(bb_start), E(n->end()); 525 526 for (; I != E; ++I) { 527 node *k = *I; 528 inside_bb = k->type == NT_OP; 529 530 if (inside_bb && !last_inside_bb) 531 bb_start = I; 532 else if (!inside_bb) { 533 if (last_inside_bb 534 && I->type != NT_REPEAT 535 && I->type != NT_DEPART 536 && I->type != NT_IF) { 537 bb_node *bb = create_bb(bbs.size(), loop_level); 538 bbs.push_back(bb); 539 n->insert_node_before(*bb_start, bb); 540 if (bb_start != I) 541 bb->move(bb_start, I); 542 } 543 544 if (k->is_container()) { 545 546 bool loop = false; 547 if (k->type == NT_REGION) { 548 loop = static_cast<region_node*>(k)->is_loop(); 549 } 550 551 create_bbs(static_cast<container_node*>(k), bbs, 552 loop_level + loop); 553 } 554 } 555 556 if (k->type == NT_DEPART) 557 return; 558 559 last_inside_bb = inside_bb; 560 } 561 562 if (last_inside_bb) { 563 bb_node *bb = create_bb(bbs.size(), loop_level); 564 bbs.push_back(bb); 565 if (n->empty()) 566 n->push_back(bb); 567 else { 568 n->insert_node_before(*bb_start, bb); 569 if (bb_start != n->end()) 570 bb->move(bb_start, n->end()); 571 } 572 } else { 573 if (n->last && n->last->type == NT_IF) { 574 bb_node *bb = create_bb(bbs.size(), loop_level); 575 bbs.push_back(bb); 576 n->push_back(bb); 577 } 578 } 579 } 580 581 void shader::expand_bbs(bbs_vec &bbs) { 582 583 for (bbs_vec::iterator I = bbs.begin(), E = bbs.end(); I != E; ++I) { 584 bb_node *b = *I; 585 b->expand(); 586 } 587 } 588 589 sched_queue_id shader::get_queue_id(node* n) { 590 switch (n->subtype) { 591 case NST_ALU_INST: 592 case NST_ALU_PACKED_INST: 593 case NST_COPY: 594 case NST_PSI: 595 return SQ_ALU; 596 case NST_FETCH_INST: { 597 fetch_node *f = static_cast<fetch_node*>(n); 598 if (ctx.is_r600() && (f->bc.op_ptr->flags & FF_VTX)) 599 return SQ_VTX; 600 return SQ_TEX; 601 } 602 case NST_CF_INST: 603 return SQ_CF; 604 default: 605 assert(0); 606 return SQ_NUM; 607 } 608 } 609 610 void shader_stats::collect(node *n) { 611 if (n->is_alu_inst()) 612 ++alu; 613 else if (n->is_fetch_inst()) 614 ++fetch; 615 else if (n->is_container()) { 616 container_node *c = static_cast<container_node*>(n); 617 618 if (n->is_alu_group()) 619 ++alu_groups; 620 else if (n->is_alu_clause()) 621 ++alu_clauses; 622 else if (n->is_fetch_clause()) 623 ++fetch_clauses; 624 else if (n->is_cf_inst()) 625 ++cf; 626 627 if (!c->empty()) { 628 for (node_iterator I = c->begin(), E = c->end(); I != E; ++I) { 629 collect(*I); 630 } 631 } 632 } 633 } 634 635 void shader_stats::accumulate(shader_stats& s) { 636 ++shaders; 637 ndw += s.ndw; 638 ngpr += s.ngpr; 639 nstack += s.nstack; 640 641 alu += s.alu; 642 alu_groups += s.alu_groups; 643 alu_clauses += s.alu_clauses; 644 fetch += s.fetch; 645 fetch_clauses += s.fetch_clauses; 646 cf += s.cf; 647 } 648 649 void shader_stats::dump() { 650 sblog << "dw:" << ndw << ", gpr:" << ngpr << ", stk:" << nstack 651 << ", alu groups:" << alu_groups << ", alu clauses: " << alu_clauses 652 << ", alu:" << alu << ", fetch:" << fetch 653 << ", fetch clauses:" << fetch_clauses 654 << ", cf:" << cf; 655 656 if (shaders > 1) 657 sblog << ", shaders:" << shaders; 658 659 sblog << "\n"; 660 } 661 662 static void print_diff(unsigned d1, unsigned d2) { 663 if (d1) 664 sblog << ((int)d2 - (int)d1) * 100 / (int)d1 << "%"; 665 else if (d2) 666 sblog << "N/A"; 667 else 668 sblog << "0%"; 669 } 670 671 void shader_stats::dump_diff(shader_stats& s) { 672 sblog << "dw:"; print_diff(ndw, s.ndw); 673 sblog << ", gpr:" ; print_diff(ngpr, s.ngpr); 674 sblog << ", stk:" ; print_diff(nstack, s.nstack); 675 sblog << ", alu groups:" ; print_diff(alu_groups, s.alu_groups); 676 sblog << ", alu clauses: " ; print_diff(alu_clauses, s.alu_clauses); 677 sblog << ", alu:" ; print_diff(alu, s.alu); 678 sblog << ", fetch:" ; print_diff(fetch, s.fetch); 679 sblog << ", fetch clauses:" ; print_diff(fetch_clauses, s.fetch_clauses); 680 sblog << ", cf:" ; print_diff(cf, s.cf); 681 sblog << "\n"; 682 } 683 684 } // namespace r600_sb 685