1 /* 2 * Copyright 2013 Vadim Girlin <vadimgirlin (at) gmail.com> 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * on the rights to use, copy, modify, merge, publish, distribute, sub 8 * license, and/or sell copies of the Software, and to permit persons to whom 9 * the Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice (including the next 12 * paragraph) shall be included in all copies or substantial portions of the 13 * Software. 14 * 15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 16 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 17 * FITNESS FOR A PARTICULAR PURPOSE AND NON-INFRINGEMENT. IN NO EVENT SHALL 18 * THE AUTHOR(S) AND/OR THEIR SUPPLIERS BE LIABLE FOR ANY CLAIM, 19 * DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR 20 * OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE 21 * USE OR OTHER DEALINGS IN THE SOFTWARE. 22 * 23 * Authors: 24 * Vadim Girlin 25 */ 26 27 #define BCP_DEBUG 0 28 29 #if BCP_DEBUG 30 #define BCP_DUMP(q) do { q } while (0) 31 #else 32 #define BCP_DUMP(q) 33 #endif 34 35 #include "r600_pipe.h" 36 #include "r600_shader.h" 37 #include "eg_sq.h" // CM_V_SQ_MOVA_DST_CF_IDX0/1 38 39 #include <stack> 40 41 #include "sb_bc.h" 42 #include "sb_shader.h" 43 #include "sb_pass.h" 44 #include "util/macros.h" 45 46 namespace r600_sb { 47 48 int bc_parser::decode() { 49 50 dw = bc->bytecode; 51 bc_ndw = bc->ndw; 52 max_cf = 0; 53 54 dec = new bc_decoder(ctx, dw, bc_ndw); 55 56 shader_target t = TARGET_UNKNOWN; 57 58 if (pshader) { 59 switch (bc->type) { 60 case PIPE_SHADER_FRAGMENT: t = TARGET_PS; break; 61 case PIPE_SHADER_VERTEX: 62 t = pshader->vs_as_ls ? TARGET_LS : (pshader->vs_as_es ? TARGET_ES : TARGET_VS); 63 break; 64 case PIPE_SHADER_GEOMETRY: t = TARGET_GS; break; 65 case PIPE_SHADER_COMPUTE: t = TARGET_COMPUTE; break; 66 case PIPE_SHADER_TESS_CTRL: t = TARGET_HS; break; 67 case PIPE_SHADER_TESS_EVAL: t = pshader->tes_as_es ? TARGET_ES : TARGET_VS; break; 68 default: assert(!"unknown shader target"); return -1; break; 69 } 70 } else { 71 if (bc->type == PIPE_SHADER_COMPUTE) 72 t = TARGET_COMPUTE; 73 else 74 t = TARGET_FETCH; 75 } 76 77 sh = new shader(ctx, t, bc->debug_id); 78 sh->safe_math = sb_context::safe_math || (t == TARGET_COMPUTE); 79 80 int r = decode_shader(); 81 82 delete dec; 83 84 sh->ngpr = bc->ngpr; 85 sh->nstack = bc->nstack; 86 87 return r; 88 } 89 90 int bc_parser::decode_shader() { 91 int r = 0; 92 unsigned i = 0; 93 bool eop = false; 94 95 sh->init(); 96 97 do { 98 eop = false; 99 if ((r = decode_cf(i, eop))) 100 return r; 101 102 } while (!eop || (i >> 1) < max_cf); 103 104 return 0; 105 } 106 107 int bc_parser::prepare() { 108 int r = 0; 109 if ((r = parse_decls())) 110 return r; 111 if ((r = prepare_ir())) 112 return r; 113 return 0; 114 } 115 116 int bc_parser::parse_decls() { 117 118 if (!pshader) { 119 if (gpr_reladdr) 120 sh->add_gpr_array(0, bc->ngpr, 0x0F); 121 122 // compute shaders have some values preloaded in R0, R1 123 sh->add_input(0 /* GPR */, true /* preloaded */, 0x0F /* mask */); 124 sh->add_input(1 /* GPR */, true /* preloaded */, 0x0F /* mask */); 125 return 0; 126 } 127 128 if (pshader->indirect_files & ~((1 << TGSI_FILE_CONSTANT) | (1 << TGSI_FILE_SAMPLER))) { 129 130 assert(pshader->num_arrays); 131 132 if (pshader->num_arrays) { 133 for (unsigned i = 0; i < pshader->num_arrays; ++i) { 134 r600_shader_array &a = pshader->arrays[i]; 135 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 136 } 137 } else { 138 sh->add_gpr_array(0, pshader->bc.ngpr, 0x0F); 139 } 140 } 141 142 // GS inputs can add indirect addressing 143 if (sh->target == TARGET_GS) { 144 if (pshader->num_arrays) { 145 for (unsigned i = 0; i < pshader->num_arrays; ++i) { 146 r600_shader_array &a = pshader->arrays[i]; 147 sh->add_gpr_array(a.gpr_start, a.gpr_count, a.comp_mask); 148 } 149 } 150 } 151 152 if (sh->target == TARGET_VS || sh->target == TARGET_ES || sh->target == TARGET_HS) 153 sh->add_input(0, 1, 0x0F); 154 else if (sh->target == TARGET_GS) { 155 sh->add_input(0, 1, 0x0F); 156 sh->add_input(1, 1, 0x0F); 157 } 158 159 bool ps_interp = ctx.hw_class >= HW_CLASS_EVERGREEN 160 && sh->target == TARGET_PS; 161 162 bool ij_interpolators[6]; 163 memset(ij_interpolators, 0, sizeof(ij_interpolators)); 164 165 for (unsigned i = 0; i < pshader->ninput; ++i) { 166 r600_shader_io & in = pshader->input[i]; 167 bool preloaded = sh->target == TARGET_PS && !(ps_interp && in.spi_sid); 168 sh->add_input(in.gpr, preloaded, /*in.write_mask*/ 0x0F); 169 if (ps_interp && in.spi_sid) { 170 int k = eg_get_interpolator_index(in.interpolate, in.interpolate_location); 171 if (k >= 0) 172 ij_interpolators[k] |= true; 173 } 174 } 175 176 if (ps_interp) { 177 /* add the egcm ij interpolators to live inputs */ 178 unsigned num_ij = 0; 179 for (unsigned i = 0; i < ARRAY_SIZE(ij_interpolators); i++) { 180 num_ij += ij_interpolators[i]; 181 } 182 183 unsigned mask = (1 << (2 * num_ij)) - 1; 184 unsigned gpr = 0; 185 186 while (mask) { 187 sh->add_input(gpr, true, mask & 0x0F); 188 ++gpr; 189 mask >>= 4; 190 } 191 } 192 193 return 0; 194 } 195 196 int bc_parser::decode_cf(unsigned &i, bool &eop) { 197 198 int r; 199 200 cf_node *cf = sh->create_cf(); 201 sh->root->push_back(cf); 202 203 unsigned id = i >> 1; 204 205 cf->bc.id = id; 206 207 if (cf_map.size() < id + 1) 208 cf_map.resize(id + 1); 209 210 cf_map[id] = cf; 211 212 if ((r = dec->decode_cf(i, cf->bc))) 213 return r; 214 215 cf_op_flags flags = (cf_op_flags)cf->bc.op_ptr->flags; 216 217 if (flags & CF_ALU) { 218 if ((r = decode_alu_clause(cf))) 219 return r; 220 } else if (flags & CF_FETCH) { 221 if ((r = decode_fetch_clause(cf))) 222 return r; 223 } else if (flags & CF_EXP) { 224 if (cf->bc.rw_rel) 225 gpr_reladdr = true; 226 assert(!cf->bc.rw_rel); 227 } else if (flags & CF_MEM) { 228 if (cf->bc.rw_rel) 229 gpr_reladdr = true; 230 assert(!cf->bc.rw_rel); 231 } else if (flags & CF_BRANCH) { 232 if (cf->bc.addr > max_cf) 233 max_cf = cf->bc.addr; 234 } 235 236 eop = cf->bc.end_of_program || cf->bc.op == CF_OP_CF_END || 237 cf->bc.op == CF_OP_RET; 238 return 0; 239 } 240 241 int bc_parser::decode_alu_clause(cf_node* cf) { 242 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1, gcnt; 243 244 cf->subtype = NST_ALU_CLAUSE; 245 246 cgroup = 0; 247 memset(slots[0], 0, 5*sizeof(slots[0][0])); 248 249 unsigned ng = 0; 250 251 do { 252 decode_alu_group(cf, i, gcnt); 253 assert(gcnt <= cnt); 254 cnt -= gcnt; 255 ng++; 256 } while (cnt); 257 258 return 0; 259 } 260 261 int bc_parser::decode_alu_group(cf_node* cf, unsigned &i, unsigned &gcnt) { 262 int r; 263 alu_node *n; 264 alu_group_node *g = sh->create_alu_group(); 265 266 cgroup = !cgroup; 267 memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 268 gcnt = 0; 269 270 unsigned literal_mask = 0; 271 272 do { 273 n = sh->create_alu(); 274 g->push_back(n); 275 276 if ((r = dec->decode_alu(i, n->bc))) 277 return r; 278 279 if (!sh->assign_slot(n, slots[cgroup])) { 280 assert(!"alu slot assignment failed"); 281 return -1; 282 } 283 284 gcnt++; 285 286 } while (gcnt <= 5 && !n->bc.last); 287 288 assert(n->bc.last); 289 290 for (node_iterator I = g->begin(), E = g->end(); I != E; ++I) { 291 n = static_cast<alu_node*>(*I); 292 293 if (n->bc.dst_rel) 294 gpr_reladdr = true; 295 296 for (int k = 0; k < n->bc.op_ptr->src_count; ++k) { 297 bc_alu_src &src = n->bc.src[k]; 298 if (src.rel) 299 gpr_reladdr = true; 300 if (src.sel == ALU_SRC_LITERAL) { 301 literal_mask |= (1 << src.chan); 302 src.value.u = dw[i + src.chan]; 303 } 304 } 305 } 306 307 unsigned literal_ndw = 0; 308 while (literal_mask) { 309 g->literals.push_back(dw[i + literal_ndw]); 310 literal_ndw += 1; 311 literal_mask >>= 1; 312 } 313 314 literal_ndw = (literal_ndw + 1) & ~1u; 315 316 i += literal_ndw; 317 gcnt += literal_ndw >> 1; 318 319 cf->push_back(g); 320 return 0; 321 } 322 323 int bc_parser::prepare_alu_clause(cf_node* cf) { 324 325 // loop over alu groups 326 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 327 assert(I->subtype == NST_ALU_GROUP); 328 alu_group_node *g = static_cast<alu_group_node*>(*I); 329 prepare_alu_group(cf, g); 330 } 331 332 return 0; 333 } 334 335 void bc_parser::save_set_cf_index(value *val, unsigned idx) 336 { 337 assert(idx <= 1); 338 assert(val); 339 cf_index_value[idx] = val; 340 } 341 value *bc_parser::get_cf_index_value(unsigned idx) 342 { 343 assert(idx <= 1); 344 assert(cf_index_value[idx]); 345 return cf_index_value[idx]; 346 } 347 void bc_parser::save_mova(alu_node *mova) 348 { 349 assert(mova); 350 this->mova = mova; 351 } 352 alu_node *bc_parser::get_mova() 353 { 354 assert(mova); 355 return mova; 356 } 357 358 int bc_parser::prepare_alu_group(cf_node* cf, alu_group_node *g) { 359 360 alu_node *n; 361 362 cgroup = !cgroup; 363 memset(slots[cgroup], 0, 5*sizeof(slots[0][0])); 364 365 for (node_iterator I = g->begin(), E = g->end(); 366 I != E; ++I) { 367 n = static_cast<alu_node*>(*I); 368 bool ubo_indexing[2] = {}; 369 370 if (!sh->assign_slot(n, slots[cgroup])) { 371 assert(!"alu slot assignment failed"); 372 return -1; 373 } 374 375 unsigned src_count = n->bc.op_ptr->src_count; 376 377 if (ctx.alu_slots(n->bc.op) & AF_4SLOT) 378 n->flags |= NF_ALU_4SLOT; 379 380 n->src.resize(src_count); 381 382 unsigned flags = n->bc.op_ptr->flags; 383 384 if (flags & AF_PRED) { 385 n->dst.resize(3); 386 if (n->bc.update_pred) 387 n->dst[1] = sh->get_special_value(SV_ALU_PRED); 388 if (n->bc.update_exec_mask) 389 n->dst[2] = sh->get_special_value(SV_EXEC_MASK); 390 391 n->flags |= NF_DONT_HOIST; 392 393 } else if (flags & AF_KILL) { 394 395 n->dst.resize(2); 396 n->dst[1] = sh->get_special_value(SV_VALID_MASK); 397 sh->set_uses_kill(); 398 399 n->flags |= NF_DONT_HOIST | NF_DONT_MOVE | 400 NF_DONT_KILL | NF_SCHEDULE_EARLY; 401 402 } else { 403 n->dst.resize(1); 404 } 405 406 if (n->bc.op == ALU_OP0_SET_CF_IDX0 || n->bc.op == ALU_OP0_SET_CF_IDX1) { 407 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX 408 // DCE will kill this op 409 save_set_cf_index(get_mova()->src[0], n->bc.op == ALU_OP0_SET_CF_IDX1); 410 } else if (flags & AF_MOVA) { 411 412 n->dst[0] = sh->get_special_value(SV_AR_INDEX); 413 save_mova(n); 414 415 n->flags |= NF_DONT_HOIST; 416 417 } else if (n->bc.op_ptr->src_count == 3 || n->bc.write_mask) { 418 assert(!n->bc.dst_rel || n->bc.index_mode == INDEX_AR_X); 419 420 value *v = sh->get_gpr_value(false, n->bc.dst_gpr, n->bc.dst_chan, 421 n->bc.dst_rel); 422 423 n->dst[0] = v; 424 } 425 426 if (n->bc.pred_sel) { 427 sh->has_alu_predication = true; 428 n->pred = sh->get_special_value(SV_ALU_PRED); 429 } 430 431 for (unsigned s = 0; s < src_count; ++s) { 432 bc_alu_src &src = n->bc.src[s]; 433 434 if (src.sel == ALU_SRC_LITERAL) { 435 n->src[s] = sh->get_const_value(src.value); 436 } else if (src.sel == ALU_SRC_PS || src.sel == ALU_SRC_PV) { 437 unsigned pgroup = !cgroup, prev_slot = src.sel == ALU_SRC_PS ? 438 SLOT_TRANS : src.chan; 439 440 // XXX shouldn't happen but llvm backend uses PS on cayman 441 if (prev_slot == SLOT_TRANS && ctx.is_cayman()) 442 prev_slot = SLOT_X; 443 444 alu_node *prev_alu = slots[pgroup][prev_slot]; 445 446 assert(prev_alu); 447 448 if (!prev_alu->dst[0]) { 449 value * t = sh->create_temp_value(); 450 prev_alu->dst[0] = t; 451 } 452 453 value *d = prev_alu->dst[0]; 454 455 if (d->is_rel()) { 456 d = sh->get_gpr_value(true, prev_alu->bc.dst_gpr, 457 prev_alu->bc.dst_chan, 458 prev_alu->bc.dst_rel); 459 } 460 461 n->src[s] = d; 462 } else if (ctx.is_kcache_sel(src.sel)) { 463 unsigned sel = src.sel, kc_addr; 464 unsigned kc_set = ((sel >> 7) & 2) + ((sel >> 5) & 1); 465 466 bc_kcache &kc = cf->bc.kc[kc_set]; 467 kc_addr = (kc.addr << 4) + (sel & 0x1F); 468 n->src[s] = sh->get_kcache_value(kc.bank, kc_addr, src.chan, (alu_kcache_index_mode)kc.index_mode); 469 470 if (kc.index_mode != KC_INDEX_NONE) { 471 assert(kc.index_mode != KC_LOCK_LOOP); 472 ubo_indexing[kc.index_mode - KC_INDEX_0] = true; 473 } 474 } else if (src.sel < MAX_GPR) { 475 value *v = sh->get_gpr_value(true, src.sel, src.chan, src.rel); 476 477 n->src[s] = v; 478 479 } else if (src.sel >= ALU_SRC_PARAM_OFFSET) { 480 // using slot for value channel because in fact the slot 481 // determines the channel that is loaded by INTERP_LOAD_P0 482 // (and maybe some others). 483 // otherwise GVN will consider INTERP_LOAD_P0s with the same 484 // param index as equal instructions and leave only one of them 485 n->src[s] = sh->get_special_ro_value(sel_chan(src.sel, 486 n->bc.slot)); 487 } else { 488 switch (src.sel) { 489 case ALU_SRC_0: 490 n->src[s] = sh->get_const_value(0); 491 break; 492 case ALU_SRC_0_5: 493 n->src[s] = sh->get_const_value(0.5f); 494 break; 495 case ALU_SRC_1: 496 n->src[s] = sh->get_const_value(1.0f); 497 break; 498 case ALU_SRC_1_INT: 499 n->src[s] = sh->get_const_value(1); 500 break; 501 case ALU_SRC_M_1_INT: 502 n->src[s] = sh->get_const_value(-1); 503 break; 504 default: 505 n->src[s] = sh->get_special_ro_value(src.sel); 506 break; 507 } 508 } 509 } 510 511 // add UBO index values if any as dependencies 512 if (ubo_indexing[0]) { 513 n->src.push_back(get_cf_index_value(0)); 514 } 515 if (ubo_indexing[1]) { 516 n->src.push_back(get_cf_index_value(1)); 517 } 518 519 if ((n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX0 || n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1) && 520 ctx.is_cayman()) 521 // Move CF_IDX value into tex instruction operands, scheduler will later re-emit setting of CF_IDX 522 save_set_cf_index(n->src[0], n->bc.dst_gpr == CM_V_SQ_MOVA_DST_CF_IDX1); 523 } 524 525 // pack multislot instructions into alu_packed_node 526 527 alu_packed_node *p = NULL; 528 for (node_iterator N, I = g->begin(), E = g->end(); I != E; I = N) { 529 N = I + 1; 530 alu_node *a = static_cast<alu_node*>(*I); 531 unsigned sflags = a->bc.slot_flags; 532 533 if (sflags == AF_4V || (ctx.is_cayman() && sflags == AF_S)) { 534 if (!p) 535 p = sh->create_alu_packed(); 536 537 a->remove(); 538 p->push_back(a); 539 } 540 } 541 542 if (p) { 543 g->push_front(p); 544 545 if (p->count() == 3 && ctx.is_cayman()) { 546 // cayman's scalar instruction that can use 3 or 4 slots 547 548 // FIXME for simplicity we'll always add 4th slot, 549 // but probably we might want to always remove 4th slot and make 550 // sure that regalloc won't choose 'w' component for dst 551 552 alu_node *f = static_cast<alu_node*>(p->first); 553 alu_node *a = sh->create_alu(); 554 a->src = f->src; 555 a->dst.resize(f->dst.size()); 556 a->bc = f->bc; 557 a->bc.slot = SLOT_W; 558 p->push_back(a); 559 } 560 } 561 562 return 0; 563 } 564 565 int bc_parser::decode_fetch_clause(cf_node* cf) { 566 int r; 567 unsigned i = cf->bc.addr << 1, cnt = cf->bc.count + 1; 568 569 cf->subtype = NST_TEX_CLAUSE; 570 571 while (cnt--) { 572 fetch_node *n = sh->create_fetch(); 573 cf->push_back(n); 574 if ((r = dec->decode_fetch(i, n->bc))) 575 return r; 576 if (n->bc.src_rel || n->bc.dst_rel) 577 gpr_reladdr = true; 578 579 } 580 return 0; 581 } 582 583 int bc_parser::prepare_fetch_clause(cf_node *cf) { 584 585 vvec grad_v, grad_h, texture_offsets; 586 587 for (node_iterator I = cf->begin(), E = cf->end(); I != E; ++I) { 588 589 fetch_node *n = static_cast<fetch_node*>(*I); 590 assert(n->is_valid()); 591 592 unsigned flags = n->bc.op_ptr->flags; 593 594 unsigned vtx = flags & FF_VTX; 595 unsigned num_src = vtx ? ctx.vtx_src_num : 4; 596 597 n->dst.resize(4); 598 599 if (flags & (FF_SETGRAD | FF_USEGRAD | FF_GETGRAD)) { 600 sh->uses_gradients = true; 601 } 602 603 if (flags & (FF_SETGRAD | FF_SET_TEXTURE_OFFSETS)) { 604 605 vvec *grad = NULL; 606 607 switch (n->bc.op) { 608 case FETCH_OP_SET_GRADIENTS_V: 609 grad = &grad_v; 610 break; 611 case FETCH_OP_SET_GRADIENTS_H: 612 grad = &grad_h; 613 break; 614 case FETCH_OP_SET_TEXTURE_OFFSETS: 615 grad = &texture_offsets; 616 break; 617 default: 618 assert(!"unexpected SET_GRAD instruction"); 619 return -1; 620 } 621 622 if (grad->empty()) 623 grad->resize(4); 624 625 for(unsigned s = 0; s < 4; ++s) { 626 unsigned sw = n->bc.src_sel[s]; 627 if (sw <= SEL_W) 628 (*grad)[s] = sh->get_gpr_value(true, n->bc.src_gpr, 629 sw, false); 630 else if (sw == SEL_0) 631 (*grad)[s] = sh->get_const_value(0.0f); 632 else if (sw == SEL_1) 633 (*grad)[s] = sh->get_const_value(1.0f); 634 } 635 } else { 636 // Fold source values for instructions with hidden target values in to the instructions 637 // using them. The set instructions are later re-emitted by bc_finalizer 638 if (flags & FF_USEGRAD) { 639 n->src.resize(12); 640 std::copy(grad_v.begin(), grad_v.end(), n->src.begin() + 4); 641 std::copy(grad_h.begin(), grad_h.end(), n->src.begin() + 8); 642 } else if (flags & FF_USE_TEXTURE_OFFSETS) { 643 n->src.resize(8); 644 std::copy(texture_offsets.begin(), texture_offsets.end(), n->src.begin() + 4); 645 } else { 646 n->src.resize(4); 647 } 648 649 for(int s = 0; s < 4; ++s) { 650 if (n->bc.dst_sel[s] != SEL_MASK) 651 n->dst[s] = sh->get_gpr_value(false, n->bc.dst_gpr, s, false); 652 // NOTE: it doesn't matter here which components of the result we 653 // are using, but original n->bc.dst_sel should be taken into 654 // account when building the bytecode 655 } 656 for(unsigned s = 0; s < num_src; ++s) { 657 if (n->bc.src_sel[s] <= SEL_W) 658 n->src[s] = sh->get_gpr_value(true, n->bc.src_gpr, 659 n->bc.src_sel[s], false); 660 } 661 662 // Scheduler will emit the appropriate instructions to set CF_IDX0/1 663 if (n->bc.sampler_index_mode != V_SQ_CF_INDEX_NONE) { 664 n->src.push_back(get_cf_index_value(n->bc.sampler_index_mode == V_SQ_CF_INDEX_1)); 665 } 666 if (n->bc.resource_index_mode != V_SQ_CF_INDEX_NONE) { 667 n->src.push_back(get_cf_index_value(n->bc.resource_index_mode == V_SQ_CF_INDEX_1)); 668 } 669 } 670 } 671 672 return 0; 673 } 674 675 int bc_parser::prepare_ir() { 676 677 for(id_cf_map::iterator I = cf_map.begin(), E = cf_map.end(); I != E; ++I) { 678 cf_node *c = *I; 679 680 if (!c) 681 continue; 682 683 unsigned flags = c->bc.op_ptr->flags; 684 685 if (flags & CF_ALU) { 686 prepare_alu_clause(c); 687 } else if (flags & CF_FETCH) { 688 prepare_fetch_clause(c); 689 } else if (c->bc.op == CF_OP_CALL_FS) { 690 sh->init_call_fs(c); 691 c->flags |= NF_SCHEDULE_EARLY | NF_DONT_MOVE; 692 } else if (flags & CF_LOOP_START) { 693 prepare_loop(c); 694 } else if (c->bc.op == CF_OP_JUMP) { 695 prepare_if(c); 696 } else if (c->bc.op == CF_OP_LOOP_END) { 697 loop_stack.pop(); 698 } else if (c->bc.op == CF_OP_LOOP_CONTINUE) { 699 assert(!loop_stack.empty()); 700 repeat_node *rep = sh->create_repeat(loop_stack.top()); 701 if (c->parent->first != c) 702 rep->move(c->parent->first, c); 703 c->replace_with(rep); 704 sh->simplify_dep_rep(rep); 705 } else if (c->bc.op == CF_OP_LOOP_BREAK) { 706 assert(!loop_stack.empty()); 707 depart_node *dep = sh->create_depart(loop_stack.top()); 708 if (c->parent->first != c) 709 dep->move(c->parent->first, c); 710 c->replace_with(dep); 711 sh->simplify_dep_rep(dep); 712 } else if (flags & CF_EXP) { 713 714 // unroll burst exports 715 716 assert(c->bc.op == CF_OP_EXPORT || c->bc.op == CF_OP_EXPORT_DONE); 717 718 c->bc.set_op(CF_OP_EXPORT); 719 720 unsigned burst_count = c->bc.burst_count; 721 unsigned eop = c->bc.end_of_program; 722 723 c->bc.end_of_program = 0; 724 c->bc.burst_count = 0; 725 726 do { 727 c->src.resize(4); 728 729 for(int s = 0; s < 4; ++s) { 730 switch (c->bc.sel[s]) { 731 case SEL_0: 732 c->src[s] = sh->get_const_value(0.0f); 733 break; 734 case SEL_1: 735 c->src[s] = sh->get_const_value(1.0f); 736 break; 737 case SEL_MASK: 738 break; 739 default: 740 if (c->bc.sel[s] <= SEL_W) 741 c->src[s] = sh->get_gpr_value(true, c->bc.rw_gpr, 742 c->bc.sel[s], false); 743 else 744 assert(!"invalid src_sel for export"); 745 } 746 } 747 748 if (!burst_count--) 749 break; 750 751 cf_node *cf_next = sh->create_cf(); 752 cf_next->bc = c->bc; 753 ++cf_next->bc.rw_gpr; 754 ++cf_next->bc.array_base; 755 756 c->insert_after(cf_next); 757 c = cf_next; 758 759 } while (1); 760 761 c->bc.end_of_program = eop; 762 } else if (flags & CF_MEM) { 763 764 unsigned burst_count = c->bc.burst_count; 765 unsigned eop = c->bc.end_of_program; 766 767 c->bc.end_of_program = 0; 768 c->bc.burst_count = 0; 769 770 do { 771 772 c->src.resize(4); 773 774 for(int s = 0; s < 4; ++s) { 775 if (c->bc.comp_mask & (1 << s)) 776 c->src[s] = 777 sh->get_gpr_value(true, c->bc.rw_gpr, s, false); 778 } 779 780 if (((flags & CF_RAT) || (!(flags & CF_STRM))) && (c->bc.type & 1)) { // indexed write 781 c->src.resize(8); 782 for(int s = 0; s < 3; ++s) { 783 c->src[4 + s] = 784 sh->get_gpr_value(true, c->bc.index_gpr, s, false); 785 } 786 787 // FIXME probably we can relax it a bit 788 c->flags |= NF_DONT_HOIST | NF_DONT_MOVE; 789 } 790 791 if (flags & CF_EMIT) { 792 // Instruction implicitly depends on prior [EMIT_][CUT]_VERTEX 793 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 794 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 795 if (sh->target == TARGET_ES) { 796 // For ES shaders this is an export 797 c->flags |= NF_DONT_KILL; 798 } 799 } 800 801 if (!burst_count--) 802 break; 803 804 cf_node *cf_next = sh->create_cf(); 805 cf_next->bc = c->bc; 806 ++cf_next->bc.rw_gpr; 807 808 // FIXME is it correct? 809 cf_next->bc.array_base += cf_next->bc.elem_size + 1; 810 811 c->insert_after(cf_next); 812 c = cf_next; 813 } while (1); 814 815 c->bc.end_of_program = eop; 816 817 } else if (flags & CF_EMIT) { 818 /* quick peephole */ 819 cf_node *prev = static_cast<cf_node *>(c->prev); 820 if (c->bc.op == CF_OP_CUT_VERTEX && 821 prev && prev->is_valid() && 822 prev->bc.op == CF_OP_EMIT_VERTEX && 823 c->bc.count == prev->bc.count) { 824 prev->bc.set_op(CF_OP_EMIT_CUT_VERTEX); 825 prev->bc.end_of_program = c->bc.end_of_program; 826 c->remove(); 827 } 828 else { 829 c->flags |= NF_DONT_KILL | NF_DONT_HOIST | NF_DONT_MOVE; 830 831 c->src.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 832 c->dst.push_back(sh->get_special_value(SV_GEOMETRY_EMIT)); 833 } 834 } 835 } 836 837 assert(loop_stack.empty()); 838 return 0; 839 } 840 841 int bc_parser::prepare_loop(cf_node* c) { 842 assert(c->bc.addr-1 < cf_map.size()); 843 844 cf_node *end = cf_map[c->bc.addr - 1]; 845 assert(end->bc.op == CF_OP_LOOP_END); 846 assert(c->parent == end->parent); 847 848 region_node *reg = sh->create_region(); 849 repeat_node *rep = sh->create_repeat(reg); 850 851 reg->push_back(rep); 852 c->insert_before(reg); 853 rep->move(c, end->next); 854 855 reg->src_loop = true; 856 857 loop_stack.push(reg); 858 return 0; 859 } 860 861 int bc_parser::prepare_if(cf_node* c) { 862 assert(c->bc.addr-1 < cf_map.size()); 863 cf_node *c_else = NULL, *end = cf_map[c->bc.addr]; 864 865 if (!end) 866 return 0; // not quite sure how this happens, malformed input? 867 868 BCP_DUMP( 869 sblog << "parsing JUMP @" << c->bc.id; 870 sblog << "\n"; 871 ); 872 873 if (end->bc.op == CF_OP_ELSE) { 874 BCP_DUMP( 875 sblog << " found ELSE : "; 876 dump::dump_op(end); 877 sblog << "\n"; 878 ); 879 880 c_else = end; 881 end = cf_map[c_else->bc.addr]; 882 } else { 883 BCP_DUMP( 884 sblog << " no else\n"; 885 ); 886 887 c_else = end; 888 } 889 890 if (c_else->parent != c->parent) 891 c_else = NULL; 892 893 if (end && end->parent != c->parent) 894 end = NULL; 895 896 region_node *reg = sh->create_region(); 897 898 depart_node *dep2 = sh->create_depart(reg); 899 depart_node *dep = sh->create_depart(reg); 900 if_node *n_if = sh->create_if(); 901 902 c->insert_before(reg); 903 904 if (c_else != end) 905 dep->move(c_else, end); 906 dep2->move(c, end); 907 908 reg->push_back(dep); 909 dep->push_front(n_if); 910 n_if->push_back(dep2); 911 912 n_if->cond = sh->get_special_value(SV_EXEC_MASK); 913 914 return 0; 915 } 916 917 918 } // namespace r600_sb 919