1 /* 2 * Copyright 2011 Christoph Bumiller 3 * 4 * Permission is hereby granted, free of charge, to any person obtaining a 5 * copy of this software and associated documentation files (the "Software"), 6 * to deal in the Software without restriction, including without limitation 7 * the rights to use, copy, modify, merge, publish, distribute, sublicense, 8 * and/or sell copies of the Software, and to permit persons to whom the 9 * Software is furnished to do so, subject to the following conditions: 10 * 11 * The above copyright notice and this permission notice shall be included in 12 * all copies or substantial portions of the Software. 13 * 14 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR 15 * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, 16 * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL 17 * THE AUTHORS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, 18 * WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF 19 * OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE 20 * SOFTWARE. 21 */ 22 23 #include "nv50/codegen/nv50_ir.h" 24 #include "nv50/codegen/nv50_ir_build_util.h" 25 26 #include "nv50_ir_target_nvc0.h" 27 28 #include <limits> 29 30 namespace nv50_ir { 31 32 #define QOP_ADD 0 33 #define QOP_SUBR 1 34 #define QOP_SUB 2 35 #define QOP_MOV2 3 36 37 // UL UR LL LR 38 #define QUADOP(q, r, s, t) \ 39 ((QOP_##q << 6) | (QOP_##r << 4) | \ 40 (QOP_##s << 2) | (QOP_##t << 0)) 41 42 class NVC0LegalizeSSA : public Pass 43 { 44 private: 45 virtual bool visit(BasicBlock *); 46 virtual bool visit(Function *); 47 48 // we want to insert calls to the builtin library only after optimization 49 void handleDIV(Instruction *); // integer division, modulus 50 void handleRCPRSQ(Instruction *); // double precision float recip/rsqrt 51 52 private: 53 BuildUtil bld; 54 }; 55 56 void 57 NVC0LegalizeSSA::handleDIV(Instruction *i) 58 { 59 FlowInstruction *call; 60 int builtin; 61 Value *def[2]; 62 63 bld.setPosition(i, false); 64 def[0] = bld.mkMovToReg(0, i->getSrc(0))->getDef(0); 65 def[1] = bld.mkMovToReg(1, i->getSrc(1))->getDef(0); 66 switch (i->dType) { 67 case TYPE_U32: builtin = NVC0_BUILTIN_DIV_U32; break; 68 case TYPE_S32: builtin = NVC0_BUILTIN_DIV_S32; break; 69 default: 70 return; 71 } 72 call = bld.mkFlow(OP_CALL, NULL, CC_ALWAYS, NULL); 73 bld.mkMov(i->getDef(0), def[(i->op == OP_DIV) ? 0 : 1]); 74 bld.mkClobber(FILE_GPR, (i->op == OP_DIV) ? 0xe : 0xd, 2); 75 bld.mkClobber(FILE_PREDICATE, (i->dType == TYPE_S32) ? 0xf : 0x3, 0); 76 77 call->fixed = 1; 78 call->absolute = call->builtin = 1; 79 call->target.builtin = builtin; 80 delete_Instruction(prog, i); 81 } 82 83 void 84 NVC0LegalizeSSA::handleRCPRSQ(Instruction *i) 85 { 86 // TODO 87 } 88 89 bool 90 NVC0LegalizeSSA::visit(Function *fn) 91 { 92 bld.setProgram(fn->getProgram()); 93 return true; 94 } 95 96 bool 97 NVC0LegalizeSSA::visit(BasicBlock *bb) 98 { 99 Instruction *next; 100 for (Instruction *i = bb->getEntry(); i; i = next) { 101 next = i->next; 102 if (i->dType == TYPE_F32) 103 continue; 104 switch (i->op) { 105 case OP_DIV: 106 case OP_MOD: 107 handleDIV(i); 108 break; 109 case OP_RCP: 110 case OP_RSQ: 111 if (i->dType == TYPE_F64) 112 handleRCPRSQ(i); 113 break; 114 default: 115 break; 116 } 117 } 118 return true; 119 } 120 121 class NVC0LegalizePostRA : public Pass 122 { 123 public: 124 NVC0LegalizePostRA(const Program *); 125 126 private: 127 virtual bool visit(Function *); 128 virtual bool visit(BasicBlock *); 129 130 void replaceZero(Instruction *); 131 void split64BitOp(Instruction *); 132 bool tryReplaceContWithBra(BasicBlock *); 133 void propagateJoin(BasicBlock *); 134 135 struct TexUse 136 { 137 TexUse(Instruction *use, const Instruction *tex) 138 : insn(use), tex(tex), level(-1) { } 139 Instruction *insn; 140 const Instruction *tex; // or split / mov 141 int level; 142 }; 143 struct Limits 144 { 145 Limits() { } 146 Limits(int min, int max) : min(min), max(max) { } 147 int min, max; 148 }; 149 bool insertTextureBarriers(Function *); 150 inline bool insnDominatedBy(const Instruction *, const Instruction *) const; 151 void findFirstUses(const Instruction *tex, const Instruction *def, 152 std::list<TexUse>&); 153 void findOverwritingDefs(const Instruction *tex, Instruction *insn, 154 const BasicBlock *term, 155 std::list<TexUse>&); 156 void addTexUse(std::list<TexUse>&, Instruction *, const Instruction *); 157 const Instruction *recurseDef(const Instruction *); 158 159 private: 160 LValue *r63; 161 const bool needTexBar; 162 }; 163 164 NVC0LegalizePostRA::NVC0LegalizePostRA(const Program *prog) 165 : needTexBar(prog->getTarget()->getChipset() >= 0xe0) 166 { 167 } 168 169 bool 170 NVC0LegalizePostRA::insnDominatedBy(const Instruction *later, 171 const Instruction *early) const 172 { 173 if (early->bb == later->bb) 174 return early->serial < later->serial; 175 return later->bb->dominatedBy(early->bb); 176 } 177 178 void 179 NVC0LegalizePostRA::addTexUse(std::list<TexUse> &uses, 180 Instruction *usei, const Instruction *insn) 181 { 182 bool add = true; 183 for (std::list<TexUse>::iterator it = uses.begin(); 184 it != uses.end();) { 185 if (insnDominatedBy(usei, it->insn)) { 186 add = false; 187 break; 188 } 189 if (insnDominatedBy(it->insn, usei)) 190 it = uses.erase(it); 191 else 192 ++it; 193 } 194 if (add) 195 uses.push_back(TexUse(usei, insn)); 196 } 197 198 void 199 NVC0LegalizePostRA::findOverwritingDefs(const Instruction *texi, 200 Instruction *insn, 201 const BasicBlock *term, 202 std::list<TexUse> &uses) 203 { 204 while (insn->op == OP_MOV && insn->getDef(0)->equals(insn->getSrc(0))) 205 insn = insn->getSrc(0)->getUniqueInsn(); 206 207 if (!insn || !insn->bb->reachableBy(texi->bb, term)) 208 return; 209 210 switch (insn->op) { 211 /* Values not connected to the tex's definition through any of these should 212 * not be conflicting. 213 */ 214 case OP_SPLIT: 215 case OP_MERGE: 216 case OP_PHI: 217 case OP_UNION: 218 /* recurse again */ 219 for (int s = 0; insn->srcExists(s); ++s) 220 findOverwritingDefs(texi, insn->getSrc(s)->getUniqueInsn(), term, 221 uses); 222 break; 223 default: 224 // if (!isTextureOp(insn->op)) // TODO: are TEXes always ordered ? 225 addTexUse(uses, insn, texi); 226 break; 227 } 228 } 229 230 void 231 NVC0LegalizePostRA::findFirstUses(const Instruction *texi, 232 const Instruction *insn, 233 std::list<TexUse> &uses) 234 { 235 for (int d = 0; insn->defExists(d); ++d) { 236 Value *v = insn->getDef(d); 237 for (Value::UseIterator u = v->uses.begin(); u != v->uses.end(); ++u) { 238 Instruction *usei = (*u)->getInsn(); 239 240 if (usei->op == OP_PHI || usei->op == OP_UNION) { 241 // need a barrier before WAW cases 242 for (int s = 0; usei->srcExists(s); ++s) { 243 Instruction *defi = usei->getSrc(s)->getUniqueInsn(); 244 if (defi && &usei->src(s) != *u) 245 findOverwritingDefs(texi, defi, usei->bb, uses); 246 } 247 } 248 249 if (usei->op == OP_SPLIT || 250 usei->op == OP_MERGE || 251 usei->op == OP_PHI || 252 usei->op == OP_UNION) { 253 // these uses don't manifest in the machine code 254 findFirstUses(texi, usei, uses); 255 } else 256 if (usei->op == OP_MOV && usei->getDef(0)->equals(usei->getSrc(0)) && 257 usei->subOp != NV50_IR_SUBOP_MOV_FINAL) { 258 findFirstUses(texi, usei, uses); 259 } else { 260 addTexUse(uses, usei, insn); 261 } 262 } 263 } 264 } 265 266 // Texture barriers: 267 // This pass is a bit long and ugly and can probably be optimized. 268 // 269 // 1. obtain a list of TEXes and their outputs' first use(s) 270 // 2. calculate the barrier level of each first use (minimal number of TEXes, 271 // over all paths, between the TEX and the use in question) 272 // 3. for each barrier, if all paths from the source TEX to that barrier 273 // contain a barrier of lesser level, it can be culled 274 bool 275 NVC0LegalizePostRA::insertTextureBarriers(Function *fn) 276 { 277 std::list<TexUse> *uses; 278 std::vector<Instruction *> texes; 279 std::vector<int> bbFirstTex; 280 std::vector<int> bbFirstUse; 281 std::vector<int> texCounts; 282 std::vector<TexUse> useVec; 283 ArrayList insns; 284 285 fn->orderInstructions(insns); 286 287 texCounts.resize(fn->allBBlocks.getSize(), 0); 288 bbFirstTex.resize(fn->allBBlocks.getSize(), insns.getSize()); 289 bbFirstUse.resize(fn->allBBlocks.getSize(), insns.getSize()); 290 291 // tag BB CFG nodes by their id for later 292 for (ArrayList::Iterator i = fn->allBBlocks.iterator(); !i.end(); i.next()) { 293 BasicBlock *bb = reinterpret_cast<BasicBlock *>(i.get()); 294 if (bb) 295 bb->cfg.tag = bb->getId(); 296 } 297 298 // gather the first uses for each TEX 299 for (int i = 0; i < insns.getSize(); ++i) { 300 Instruction *tex = reinterpret_cast<Instruction *>(insns.get(i)); 301 if (isTextureOp(tex->op)) { 302 texes.push_back(tex); 303 if (!texCounts.at(tex->bb->getId())) 304 bbFirstTex[tex->bb->getId()] = texes.size() - 1; 305 texCounts[tex->bb->getId()]++; 306 } 307 } 308 insns.clear(); 309 if (texes.empty()) 310 return false; 311 uses = new std::list<TexUse>[texes.size()]; 312 if (!uses) 313 return false; 314 for (size_t i = 0; i < texes.size(); ++i) 315 findFirstUses(texes[i], texes[i], uses[i]); 316 317 // determine the barrier level at each use 318 for (size_t i = 0; i < texes.size(); ++i) { 319 for (std::list<TexUse>::iterator u = uses[i].begin(); u != uses[i].end(); 320 ++u) { 321 BasicBlock *tb = texes[i]->bb; 322 BasicBlock *ub = u->insn->bb; 323 if (tb == ub) { 324 u->level = 0; 325 for (size_t j = i + 1; j < texes.size() && 326 texes[j]->bb == tb && texes[j]->serial < u->insn->serial; 327 ++j) 328 u->level++; 329 } else { 330 u->level = fn->cfg.findLightestPathWeight(&tb->cfg, 331 &ub->cfg, texCounts); 332 if (u->level < 0) { 333 WARN("Failed to find path TEX -> TEXBAR\n"); 334 u->level = 0; 335 continue; 336 } 337 // this counted all TEXes in the origin block, correct that 338 u->level -= i - bbFirstTex.at(tb->getId()) + 1 /* this TEX */; 339 // and did not count the TEXes in the destination block, add those 340 for (size_t j = bbFirstTex.at(ub->getId()); j < texes.size() && 341 texes[j]->bb == ub && texes[j]->serial < u->insn->serial; 342 ++j) 343 u->level++; 344 } 345 assert(u->level >= 0); 346 useVec.push_back(*u); 347 } 348 } 349 delete[] uses; 350 uses = NULL; 351 352 // insert the barriers 353 for (size_t i = 0; i < useVec.size(); ++i) { 354 Instruction *prev = useVec[i].insn->prev; 355 if (useVec[i].level < 0) 356 continue; 357 if (prev && prev->op == OP_TEXBAR) { 358 if (prev->subOp > useVec[i].level) 359 prev->subOp = useVec[i].level; 360 prev->setSrc(prev->srcCount(), useVec[i].tex->getDef(0)); 361 } else { 362 Instruction *bar = new_Instruction(func, OP_TEXBAR, TYPE_NONE); 363 bar->fixed = 1; 364 bar->subOp = useVec[i].level; 365 // make use explicit to ease latency calculation 366 bar->setSrc(bar->srcCount(), useVec[i].tex->getDef(0)); 367 useVec[i].insn->bb->insertBefore(useVec[i].insn, bar); 368 } 369 } 370 371 if (fn->getProgram()->optLevel < 3) { 372 if (uses) 373 delete[] uses; 374 return true; 375 } 376 377 std::vector<Limits> limitT, limitB, limitS; // entry, exit, single 378 379 limitT.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 380 limitB.resize(fn->allBBlocks.getSize(), Limits(0, 0)); 381 limitS.resize(fn->allBBlocks.getSize()); 382 383 // cull unneeded barriers (should do that earlier, but for simplicity) 384 IteratorRef bi = fn->cfg.iteratorCFG(); 385 // first calculate min/max outstanding TEXes for each BB 386 for (bi->reset(); !bi->end(); bi->next()) { 387 Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 388 BasicBlock *bb = BasicBlock::get(n); 389 int min = 0; 390 int max = std::numeric_limits<int>::max(); 391 for (Instruction *i = bb->getFirst(); i; i = i->next) { 392 if (isTextureOp(i->op)) { 393 min++; 394 if (max < std::numeric_limits<int>::max()) 395 max++; 396 } else 397 if (i->op == OP_TEXBAR) { 398 min = MIN2(min, i->subOp); 399 max = MIN2(max, i->subOp); 400 } 401 } 402 // limits when looking at an isolated block 403 limitS[bb->getId()].min = min; 404 limitS[bb->getId()].max = max; 405 } 406 // propagate the min/max values 407 for (unsigned int l = 0; l <= fn->loopNestingBound; ++l) { 408 for (bi->reset(); !bi->end(); bi->next()) { 409 Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 410 BasicBlock *bb = BasicBlock::get(n); 411 const int bbId = bb->getId(); 412 for (Graph::EdgeIterator ei = n->incident(); !ei.end(); ei.next()) { 413 BasicBlock *in = BasicBlock::get(ei.getNode()); 414 const int inId = in->getId(); 415 limitT[bbId].min = MAX2(limitT[bbId].min, limitB[inId].min); 416 limitT[bbId].max = MAX2(limitT[bbId].max, limitB[inId].max); 417 } 418 // I just hope this is correct ... 419 if (limitS[bbId].max == std::numeric_limits<int>::max()) { 420 // no barrier 421 limitB[bbId].min = limitT[bbId].min + limitS[bbId].min; 422 limitB[bbId].max = limitT[bbId].max + limitS[bbId].min; 423 } else { 424 // block contained a barrier 425 limitB[bbId].min = MIN2(limitS[bbId].max, 426 limitT[bbId].min + limitS[bbId].min); 427 limitB[bbId].max = MIN2(limitS[bbId].max, 428 limitT[bbId].max + limitS[bbId].min); 429 } 430 } 431 } 432 // finally delete unnecessary barriers 433 for (bi->reset(); !bi->end(); bi->next()) { 434 Graph::Node *n = reinterpret_cast<Graph::Node *>(bi->get()); 435 BasicBlock *bb = BasicBlock::get(n); 436 Instruction *prev = NULL; 437 Instruction *next; 438 int max = limitT[bb->getId()].max; 439 for (Instruction *i = bb->getFirst(); i; i = next) { 440 next = i->next; 441 if (i->op == OP_TEXBAR) { 442 if (i->subOp >= max) { 443 delete_Instruction(prog, i); 444 } else { 445 max = i->subOp; 446 if (prev && prev->op == OP_TEXBAR && prev->subOp >= max) { 447 delete_Instruction(prog, prev); 448 prev = NULL; 449 } 450 } 451 } else 452 if (isTextureOp(i->op)) { 453 max++; 454 } 455 if (!i->isNop()) 456 prev = i; 457 } 458 } 459 if (uses) 460 delete[] uses; 461 return true; 462 } 463 464 bool 465 NVC0LegalizePostRA::visit(Function *fn) 466 { 467 if (needTexBar) 468 insertTextureBarriers(fn); 469 470 r63 = new_LValue(fn, FILE_GPR); 471 r63->reg.data.id = 63; 472 return true; 473 } 474 475 void 476 NVC0LegalizePostRA::replaceZero(Instruction *i) 477 { 478 for (int s = 0; i->srcExists(s); ++s) { 479 ImmediateValue *imm = i->getSrc(s)->asImm(); 480 if (imm && imm->reg.data.u64 == 0) 481 i->setSrc(s, r63); 482 } 483 } 484 485 void 486 NVC0LegalizePostRA::split64BitOp(Instruction *i) 487 { 488 if (i->dType == TYPE_F64) { 489 if (i->op == OP_MAD) 490 i->op = OP_FMA; 491 if (i->op == OP_ADD || i->op == OP_MUL || i->op == OP_FMA || 492 i->op == OP_CVT || i->op == OP_MIN || i->op == OP_MAX || 493 i->op == OP_SET) 494 return; 495 i->dType = i->sType = TYPE_U32; 496 497 i->bb->insertAfter(i, cloneForward(func, i)); 498 } 499 } 500 501 // replace CONT with BRA for single unconditional continue 502 bool 503 NVC0LegalizePostRA::tryReplaceContWithBra(BasicBlock *bb) 504 { 505 if (bb->cfg.incidentCount() != 2 || bb->getEntry()->op != OP_PRECONT) 506 return false; 507 Graph::EdgeIterator ei = bb->cfg.incident(); 508 if (ei.getType() != Graph::Edge::BACK) 509 ei.next(); 510 if (ei.getType() != Graph::Edge::BACK) 511 return false; 512 BasicBlock *contBB = BasicBlock::get(ei.getNode()); 513 514 if (!contBB->getExit() || contBB->getExit()->op != OP_CONT || 515 contBB->getExit()->getPredicate()) 516 return false; 517 contBB->getExit()->op = OP_BRA; 518 bb->remove(bb->getEntry()); // delete PRECONT 519 520 ei.next(); 521 assert(ei.end() || ei.getType() != Graph::Edge::BACK); 522 return true; 523 } 524 525 // replace branches to join blocks with join ops 526 void 527 NVC0LegalizePostRA::propagateJoin(BasicBlock *bb) 528 { 529 if (bb->getEntry()->op != OP_JOIN || bb->getEntry()->asFlow()->limit) 530 return; 531 for (Graph::EdgeIterator ei = bb->cfg.incident(); !ei.end(); ei.next()) { 532 BasicBlock *in = BasicBlock::get(ei.getNode()); 533 Instruction *exit = in->getExit(); 534 if (!exit) { 535 in->insertTail(new FlowInstruction(func, OP_JOIN, bb)); 536 // there should always be a terminator instruction 537 WARN("inserted missing terminator in BB:%i\n", in->getId()); 538 } else 539 if (exit->op == OP_BRA) { 540 exit->op = OP_JOIN; 541 exit->asFlow()->limit = 1; // must-not-propagate marker 542 } 543 } 544 bb->remove(bb->getEntry()); 545 } 546 547 bool 548 NVC0LegalizePostRA::visit(BasicBlock *bb) 549 { 550 Instruction *i, *next; 551 552 // remove pseudo operations and non-fixed no-ops, split 64 bit operations 553 for (i = bb->getFirst(); i; i = next) { 554 next = i->next; 555 if (i->op == OP_EMIT || i->op == OP_RESTART) { 556 if (!i->getDef(0)->refCount()) 557 i->setDef(0, NULL); 558 if (i->src(0).getFile() == FILE_IMMEDIATE) 559 i->setSrc(0, r63); // initial value must be 0 560 } else 561 if (i->isNop()) { 562 bb->remove(i); 563 } else { 564 if (i->op != OP_MOV && i->op != OP_PFETCH) 565 replaceZero(i); 566 if (typeSizeof(i->dType) == 8) 567 split64BitOp(i); 568 } 569 } 570 if (!bb->getEntry()) 571 return true; 572 573 if (!tryReplaceContWithBra(bb)) 574 propagateJoin(bb); 575 576 return true; 577 } 578 579 class NVC0LoweringPass : public Pass 580 { 581 public: 582 NVC0LoweringPass(Program *); 583 584 private: 585 virtual bool visit(Function *); 586 virtual bool visit(BasicBlock *); 587 virtual bool visit(Instruction *); 588 589 bool handleRDSV(Instruction *); 590 bool handleWRSV(Instruction *); 591 bool handleEXPORT(Instruction *); 592 bool handleOUT(Instruction *); 593 bool handleDIV(Instruction *); 594 bool handleMOD(Instruction *); 595 bool handleSQRT(Instruction *); 596 bool handlePOW(Instruction *); 597 bool handleTEX(TexInstruction *); 598 bool handleTXD(TexInstruction *); 599 bool handleTXQ(TexInstruction *); 600 bool handleManualTXD(TexInstruction *); 601 602 void checkPredicate(Instruction *); 603 604 void readTessCoord(LValue *dst, int c); 605 606 private: 607 const Target *const targ; 608 609 BuildUtil bld; 610 611 LValue *gpEmitAddress; 612 }; 613 614 NVC0LoweringPass::NVC0LoweringPass(Program *prog) : targ(prog->getTarget()) 615 { 616 bld.setProgram(prog); 617 } 618 619 bool 620 NVC0LoweringPass::visit(Function *fn) 621 { 622 if (prog->getType() == Program::TYPE_GEOMETRY) { 623 assert(!strncmp(fn->getName(), "MAIN", 4)); 624 // TODO: when we generate actual functions pass this value along somehow 625 bld.setPosition(BasicBlock::get(fn->cfg.getRoot()), false); 626 gpEmitAddress = bld.loadImm(NULL, 0)->asLValue(); 627 if (fn->cfgExit) { 628 bld.setPosition(BasicBlock::get(fn->cfgExit)->getExit(), false); 629 bld.mkMovToReg(0, gpEmitAddress); 630 } 631 } 632 return true; 633 } 634 635 bool 636 NVC0LoweringPass::visit(BasicBlock *bb) 637 { 638 return true; 639 } 640 641 // move array source to first slot, convert to u16, add indirections 642 bool 643 NVC0LoweringPass::handleTEX(TexInstruction *i) 644 { 645 const int dim = i->tex.target.getDim() + i->tex.target.isCube(); 646 const int arg = i->tex.target.getArgCount(); 647 648 if (prog->getTarget()->getChipset() >= 0xe0) { 649 if (i->tex.r == i->tex.s) { 650 i->tex.r += 8; // NOTE: offset should probably be a driver option 651 i->tex.s = 0; // only a single cX[] value possible here 652 } else { 653 // TODO: extract handles and use register to select TIC/TSC entries 654 } 655 if (i->tex.target.isArray()) { 656 LValue *layer = new_LValue(func, FILE_GPR); 657 Value *src = i->getSrc(arg - 1); 658 const int sat = (i->op == OP_TXF) ? 1 : 0; 659 DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 660 bld.mkCvt(OP_CVT, TYPE_U16, layer, sTy, src)->saturate = sat; 661 for (int s = dim; s >= 1; --s) 662 i->setSrc(s, i->getSrc(s - 1)); 663 i->setSrc(0, layer); 664 } 665 if (i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 666 Value *tmp[2]; 667 Symbol *bind; 668 Value *rRel = i->getIndirectR(); 669 Value *sRel = i->getIndirectS(); 670 Value *shCnt = bld.loadImm(NULL, 2); 671 672 if (rRel) { 673 tmp[0] = bld.getScratch(); 674 bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.r * 4); 675 bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], rRel, shCnt); 676 tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); 677 bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], 678 bld.loadImm(tmp[0], 0x00ffffffu)); 679 rRel = tmp[0]; 680 i->setSrc(i->tex.rIndirectSrc, NULL); 681 } 682 if (sRel) { 683 tmp[0] = bld.getScratch(); 684 bind = bld.mkSymbol(FILE_MEMORY_CONST, 15, TYPE_U32, i->tex.s * 4); 685 bld.mkOp2(OP_SHL, TYPE_U32, tmp[0], sRel, shCnt); 686 tmp[1] = bld.mkLoad(TYPE_U32, bind, tmp[0]); 687 bld.mkOp2(OP_AND, TYPE_U32, tmp[0], tmp[1], 688 bld.loadImm(tmp[0], 0xff000000u)); 689 sRel = tmp[0]; 690 i->setSrc(i->tex.sIndirectSrc, NULL); 691 } 692 bld.mkOp2(OP_OR, TYPE_U32, rRel, rRel, sRel); 693 694 int min = i->tex.rIndirectSrc; 695 if (min < 0 || min > i->tex.sIndirectSrc) 696 min = i->tex.sIndirectSrc; 697 for (int s = min; s >= 1; --s) 698 i->setSrc(s, i->getSrc(s - 1)); 699 i->setSrc(0, rRel); 700 } 701 } else 702 // (nvc0) generate and move the tsc/tic/array source to the front 703 if (dim != arg || i->tex.rIndirectSrc >= 0 || i->tex.sIndirectSrc >= 0) { 704 LValue *src = new_LValue(func, FILE_GPR); // 0xttxsaaaa 705 706 Value *arrayIndex = i->tex.target.isArray() ? i->getSrc(arg - 1) : NULL; 707 for (int s = dim; s >= 1; --s) 708 i->setSrc(s, i->getSrc(s - 1)); 709 i->setSrc(0, arrayIndex); 710 711 Value *ticRel = i->getIndirectR(); 712 Value *tscRel = i->getIndirectS(); 713 714 if (arrayIndex) { 715 int sat = (i->op == OP_TXF) ? 1 : 0; 716 DataType sTy = (i->op == OP_TXF) ? TYPE_U32 : TYPE_F32; 717 bld.mkCvt(OP_CVT, TYPE_U16, src, sTy, arrayIndex)->saturate = sat; 718 } else { 719 bld.loadImm(src, 0); 720 } 721 722 if (ticRel) { 723 i->setSrc(i->tex.rIndirectSrc, NULL); 724 bld.mkOp3(OP_INSBF, TYPE_U32, src, ticRel, bld.mkImm(0x0917), src); 725 } 726 if (tscRel) { 727 i->setSrc(i->tex.sIndirectSrc, NULL); 728 bld.mkOp3(OP_INSBF, TYPE_U32, src, tscRel, bld.mkImm(0x0710), src); 729 } 730 731 i->setSrc(0, src); 732 } 733 734 // offset is last source (lod 1st, dc 2nd) 735 if (i->tex.useOffsets) { 736 uint32_t value = 0; 737 int n, c; 738 int s = i->srcCount(0xff); 739 for (n = 0; n < i->tex.useOffsets; ++n) 740 for (c = 0; c < 3; ++c) 741 value |= (i->tex.offset[n][c] & 0xf) << (n * 12 + c * 4); 742 i->setSrc(s, bld.loadImm(NULL, value)); 743 } 744 745 return true; 746 } 747 748 bool 749 NVC0LoweringPass::handleManualTXD(TexInstruction *i) 750 { 751 static const uint8_t qOps[4][2] = 752 { 753 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(MOV2, MOV2, ADD, ADD) }, // l0 754 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(MOV2, MOV2, ADD, ADD) }, // l1 755 { QUADOP(MOV2, ADD, MOV2, ADD), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l2 756 { QUADOP(SUBR, MOV2, SUBR, MOV2), QUADOP(SUBR, SUBR, MOV2, MOV2) }, // l3 757 }; 758 Value *def[4][4]; 759 Value *crd[3]; 760 Instruction *tex; 761 Value *zero = bld.loadImm(bld.getSSA(), 0); 762 int l, c; 763 const int dim = i->tex.target.getDim(); 764 765 i->op = OP_TEX; // no need to clone dPdx/dPdy later 766 767 for (c = 0; c < dim; ++c) 768 crd[c] = bld.getScratch(); 769 770 bld.mkOp(OP_QUADON, TYPE_NONE, NULL); 771 for (l = 0; l < 4; ++l) { 772 // mov coordinates from lane l to all lanes 773 for (c = 0; c < dim; ++c) 774 bld.mkQuadop(0x00, crd[c], l, i->getSrc(c), zero); 775 // add dPdx from lane l to lanes dx 776 for (c = 0; c < dim; ++c) 777 bld.mkQuadop(qOps[l][0], crd[c], l, i->dPdx[c].get(), crd[c]); 778 // add dPdy from lane l to lanes dy 779 for (c = 0; c < dim; ++c) 780 bld.mkQuadop(qOps[l][1], crd[c], l, i->dPdy[c].get(), crd[c]); 781 // texture 782 bld.insert(tex = cloneForward(func, i)); 783 for (c = 0; c < dim; ++c) 784 tex->setSrc(c, crd[c]); 785 // save results 786 for (c = 0; i->defExists(c); ++c) { 787 Instruction *mov; 788 def[c][l] = bld.getSSA(); 789 mov = bld.mkMov(def[c][l], tex->getDef(c)); 790 mov->fixed = 1; 791 mov->lanes = 1 << l; 792 } 793 } 794 bld.mkOp(OP_QUADPOP, TYPE_NONE, NULL); 795 796 for (c = 0; i->defExists(c); ++c) { 797 Instruction *u = bld.mkOp(OP_UNION, TYPE_U32, i->getDef(c)); 798 for (l = 0; l < 4; ++l) 799 u->setSrc(l, def[c][l]); 800 } 801 802 i->bb->remove(i); 803 return true; 804 } 805 806 bool 807 NVC0LoweringPass::handleTXD(TexInstruction *txd) 808 { 809 int dim = txd->tex.target.getDim(); 810 int arg = txd->tex.target.getArgCount(); 811 812 handleTEX(txd); 813 while (txd->srcExists(arg)) 814 ++arg; 815 816 txd->tex.derivAll = true; 817 if (dim > 2 || 818 txd->tex.target.isCube() || 819 arg > 4 || 820 txd->tex.target.isShadow()) 821 return handleManualTXD(txd); 822 823 for (int c = 0; c < dim; ++c) { 824 txd->setSrc(arg + c * 2 + 0, txd->dPdx[c]); 825 txd->setSrc(arg + c * 2 + 1, txd->dPdy[c]); 826 txd->dPdx[c].set(NULL); 827 txd->dPdy[c].set(NULL); 828 } 829 return true; 830 } 831 832 bool 833 NVC0LoweringPass::handleTXQ(TexInstruction *txq) 834 { 835 // TODO: indirect resource/sampler index 836 return true; 837 } 838 839 bool 840 NVC0LoweringPass::handleWRSV(Instruction *i) 841 { 842 Instruction *st; 843 Symbol *sym; 844 uint32_t addr; 845 846 // must replace, $sreg are not writeable 847 addr = targ->getSVAddress(FILE_SHADER_OUTPUT, i->getSrc(0)->asSym()); 848 if (addr >= 0x400) 849 return false; 850 sym = bld.mkSymbol(FILE_SHADER_OUTPUT, 0, i->sType, addr); 851 852 st = bld.mkStore(OP_EXPORT, i->dType, sym, i->getIndirect(0, 0), 853 i->getSrc(1)); 854 st->perPatch = i->perPatch; 855 856 bld.getBB()->remove(i); 857 return true; 858 } 859 860 void 861 NVC0LoweringPass::readTessCoord(LValue *dst, int c) 862 { 863 Value *laneid = bld.getSSA(); 864 Value *x, *y; 865 866 bld.mkOp1(OP_RDSV, TYPE_U32, laneid, bld.mkSysVal(SV_LANEID, 0)); 867 868 if (c == 0) { 869 x = dst; 870 y = NULL; 871 } else 872 if (c == 1) { 873 x = NULL; 874 y = dst; 875 } else { 876 assert(c == 2); 877 x = bld.getSSA(); 878 y = bld.getSSA(); 879 } 880 if (x) 881 bld.mkFetch(x, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f0, NULL, laneid); 882 if (y) 883 bld.mkFetch(y, TYPE_F32, FILE_SHADER_OUTPUT, 0x2f4, NULL, laneid); 884 885 if (c == 2) { 886 bld.mkOp2(OP_ADD, TYPE_F32, dst, x, y); 887 bld.mkOp2(OP_SUB, TYPE_F32, dst, bld.loadImm(NULL, 1.0f), dst); 888 } 889 } 890 891 bool 892 NVC0LoweringPass::handleRDSV(Instruction *i) 893 { 894 Symbol *sym = i->getSrc(0)->asSym(); 895 Value *vtx = NULL; 896 Instruction *ld; 897 uint32_t addr = targ->getSVAddress(FILE_SHADER_INPUT, sym); 898 899 if (addr >= 0x400) // mov $sreg 900 return true; 901 902 switch (i->getSrc(0)->reg.data.sv.sv) { 903 case SV_POSITION: 904 assert(prog->getType() == Program::TYPE_FRAGMENT); 905 bld.mkInterp(NV50_IR_INTERP_LINEAR, i->getDef(0), addr, NULL); 906 break; 907 case SV_FACE: 908 { 909 Value *face = i->getDef(0); 910 bld.mkInterp(NV50_IR_INTERP_FLAT, face, addr, NULL); 911 if (i->dType == TYPE_F32) { 912 bld.mkOp2(OP_AND, TYPE_U32, face, face, bld.mkImm(0x80000000)); 913 bld.mkOp2(OP_XOR, TYPE_U32, face, face, bld.mkImm(0xbf800000)); 914 } 915 } 916 break; 917 case SV_TESS_COORD: 918 assert(prog->getType() == Program::TYPE_TESSELLATION_EVAL); 919 readTessCoord(i->getDef(0)->asLValue(), i->getSrc(0)->reg.data.sv.index); 920 break; 921 default: 922 if (prog->getType() == Program::TYPE_TESSELLATION_EVAL) 923 vtx = bld.mkOp1v(OP_PFETCH, TYPE_U32, bld.getSSA(), bld.mkImm(0)); 924 ld = bld.mkFetch(i->getDef(0), i->dType, 925 FILE_SHADER_INPUT, addr, i->getIndirect(0, 0), vtx); 926 ld->perPatch = i->perPatch; 927 break; 928 } 929 bld.getBB()->remove(i); 930 return true; 931 } 932 933 bool 934 NVC0LoweringPass::handleDIV(Instruction *i) 935 { 936 if (!isFloatType(i->dType)) 937 return true; 938 bld.setPosition(i, false); 939 Instruction *rcp = bld.mkOp1(OP_RCP, i->dType, bld.getSSA(), i->getSrc(1)); 940 i->op = OP_MUL; 941 i->setSrc(1, rcp->getDef(0)); 942 return true; 943 } 944 945 bool 946 NVC0LoweringPass::handleMOD(Instruction *i) 947 { 948 if (i->dType != TYPE_F32) 949 return true; 950 LValue *value = bld.getScratch(); 951 bld.mkOp1(OP_RCP, TYPE_F32, value, i->getSrc(1)); 952 bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(0), value); 953 bld.mkOp1(OP_TRUNC, TYPE_F32, value, value); 954 bld.mkOp2(OP_MUL, TYPE_F32, value, i->getSrc(1), value); 955 i->op = OP_SUB; 956 i->setSrc(1, value); 957 return true; 958 } 959 960 bool 961 NVC0LoweringPass::handleSQRT(Instruction *i) 962 { 963 Instruction *rsq = bld.mkOp1(OP_RSQ, TYPE_F32, 964 bld.getSSA(), i->getSrc(0)); 965 i->op = OP_MUL; 966 i->setSrc(1, rsq->getDef(0)); 967 968 return true; 969 } 970 971 bool 972 NVC0LoweringPass::handlePOW(Instruction *i) 973 { 974 LValue *val = bld.getScratch(); 975 976 bld.mkOp1(OP_LG2, TYPE_F32, val, i->getSrc(0)); 977 bld.mkOp2(OP_MUL, TYPE_F32, val, i->getSrc(1), val)->dnz = 1; 978 bld.mkOp1(OP_PREEX2, TYPE_F32, val, val); 979 980 i->op = OP_EX2; 981 i->setSrc(0, val); 982 i->setSrc(1, NULL); 983 984 return true; 985 } 986 987 bool 988 NVC0LoweringPass::handleEXPORT(Instruction *i) 989 { 990 if (prog->getType() == Program::TYPE_FRAGMENT) { 991 int id = i->getSrc(0)->reg.data.offset / 4; 992 993 if (i->src(0).isIndirect(0)) // TODO, ugly 994 return false; 995 i->op = OP_MOV; 996 i->subOp = NV50_IR_SUBOP_MOV_FINAL; 997 i->src(0).set(i->src(1)); 998 i->setSrc(1, NULL); 999 i->setDef(0, new_LValue(func, FILE_GPR)); 1000 i->getDef(0)->reg.data.id = id; 1001 1002 prog->maxGPR = MAX2(prog->maxGPR, id); 1003 } else 1004 if (prog->getType() == Program::TYPE_GEOMETRY) { 1005 i->setIndirect(0, 1, gpEmitAddress); 1006 } 1007 return true; 1008 } 1009 1010 bool 1011 NVC0LoweringPass::handleOUT(Instruction *i) 1012 { 1013 if (i->op == OP_RESTART && i->prev && i->prev->op == OP_EMIT) { 1014 i->prev->subOp = NV50_IR_SUBOP_EMIT_RESTART; 1015 delete_Instruction(prog, i); 1016 } else { 1017 assert(gpEmitAddress); 1018 i->setDef(0, gpEmitAddress); 1019 if (i->srcExists(0)) 1020 i->setSrc(1, i->getSrc(0)); 1021 i->setSrc(0, gpEmitAddress); 1022 } 1023 return true; 1024 } 1025 1026 // Generate a binary predicate if an instruction is predicated by 1027 // e.g. an f32 value. 1028 void 1029 NVC0LoweringPass::checkPredicate(Instruction *insn) 1030 { 1031 Value *pred = insn->getPredicate(); 1032 Value *pdst; 1033 1034 if (!pred || pred->reg.file == FILE_PREDICATE) 1035 return; 1036 pdst = new_LValue(func, FILE_PREDICATE); 1037 1038 // CAUTION: don't use pdst->getInsn, the definition might not be unique, 1039 // delay turning PSET(FSET(x,y),0) into PSET(x,y) to a later pass 1040 1041 bld.mkCmp(OP_SET, CC_NEU, TYPE_U32, pdst, bld.mkImm(0), pred); 1042 1043 insn->setPredicate(insn->cc, pdst); 1044 } 1045 1046 // 1047 // - add quadop dance for texturing 1048 // - put FP outputs in GPRs 1049 // - convert instruction sequences 1050 // 1051 bool 1052 NVC0LoweringPass::visit(Instruction *i) 1053 { 1054 bld.setPosition(i, false); 1055 1056 if (i->cc != CC_ALWAYS) 1057 checkPredicate(i); 1058 1059 switch (i->op) { 1060 case OP_TEX: 1061 case OP_TXB: 1062 case OP_TXL: 1063 case OP_TXF: 1064 case OP_TXG: 1065 return handleTEX(i->asTex()); 1066 case OP_TXD: 1067 return handleTXD(i->asTex()); 1068 case OP_TXQ: 1069 return handleTXQ(i->asTex()); 1070 case OP_EX2: 1071 bld.mkOp1(OP_PREEX2, TYPE_F32, i->getDef(0), i->getSrc(0)); 1072 i->setSrc(0, i->getDef(0)); 1073 break; 1074 case OP_POW: 1075 return handlePOW(i); 1076 case OP_DIV: 1077 return handleDIV(i); 1078 case OP_MOD: 1079 return handleMOD(i); 1080 case OP_SQRT: 1081 return handleSQRT(i); 1082 case OP_EXPORT: 1083 return handleEXPORT(i); 1084 case OP_EMIT: 1085 case OP_RESTART: 1086 return handleOUT(i); 1087 case OP_RDSV: 1088 return handleRDSV(i); 1089 case OP_WRSV: 1090 return handleWRSV(i); 1091 case OP_LOAD: 1092 if (i->src(0).getFile() == FILE_SHADER_INPUT) { 1093 i->op = OP_VFETCH; 1094 assert(prog->getType() != Program::TYPE_FRAGMENT); 1095 } 1096 break; 1097 default: 1098 break; 1099 } 1100 return true; 1101 } 1102 1103 bool 1104 TargetNVC0::runLegalizePass(Program *prog, CGStage stage) const 1105 { 1106 if (stage == CG_STAGE_PRE_SSA) { 1107 NVC0LoweringPass pass(prog); 1108 return pass.run(prog, false, true); 1109 } else 1110 if (stage == CG_STAGE_POST_RA) { 1111 NVC0LegalizePostRA pass(prog); 1112 return pass.run(prog, false, true); 1113 } else 1114 if (stage == CG_STAGE_SSA) { 1115 NVC0LegalizeSSA pass; 1116 return pass.run(prog, false, true); 1117 } 1118 return false; 1119 } 1120 1121 } // namespace nv50_ir 1122