1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// This pass compute turns all control flow pseudo instructions into native one 12 /// computing their address on the fly; it also sets STACK_SIZE info. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #include "AMDGPU.h" 17 #include "AMDGPUSubtarget.h" 18 #include "R600Defines.h" 19 #include "R600InstrInfo.h" 20 #include "R600MachineFunctionInfo.h" 21 #include "R600RegisterInfo.h" 22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h" 23 #include "llvm/ADT/STLExtras.h" 24 #include "llvm/ADT/SmallVector.h" 25 #include "llvm/ADT/StringRef.h" 26 #include "llvm/CodeGen/MachineBasicBlock.h" 27 #include "llvm/CodeGen/MachineFunction.h" 28 #include "llvm/CodeGen/MachineFunctionPass.h" 29 #include "llvm/CodeGen/MachineInstr.h" 30 #include "llvm/CodeGen/MachineInstrBuilder.h" 31 #include "llvm/CodeGen/MachineOperand.h" 32 #include "llvm/IR/CallingConv.h" 33 #include "llvm/IR/DebugLoc.h" 34 #include "llvm/IR/Function.h" 35 #include "llvm/Pass.h" 36 #include "llvm/Support/Compiler.h" 37 #include "llvm/Support/Debug.h" 38 #include "llvm/Support/MathExtras.h" 39 #include "llvm/Support/raw_ostream.h" 40 #include <algorithm> 41 #include <cassert> 42 #include <cstdint> 43 #include <set> 44 #include <utility> 45 #include <vector> 46 47 using namespace llvm; 48 49 #define DEBUG_TYPE "r600cf" 50 51 namespace { 52 53 struct CFStack { 54 enum StackItem { 55 ENTRY = 0, 56 SUB_ENTRY = 1, 57 FIRST_NON_WQM_PUSH = 2, 58 FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3 59 }; 60 61 const R600Subtarget *ST; 62 std::vector<StackItem> BranchStack; 63 std::vector<StackItem> LoopStack; 64 unsigned MaxStackSize; 65 unsigned CurrentEntries = 0; 66 unsigned CurrentSubEntries = 0; 67 68 CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st), 69 // We need to reserve a stack entry for CALL_FS in vertex shaders. 70 MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {} 71 72 unsigned getLoopDepth(); 73 bool branchStackContains(CFStack::StackItem); 74 bool requiresWorkAroundForInst(unsigned Opcode); 75 unsigned getSubEntrySize(CFStack::StackItem Item); 76 void updateMaxStackSize(); 77 void pushBranch(unsigned Opcode, bool isWQM = false); 78 void pushLoop(); 79 void popBranch(); 80 void popLoop(); 81 }; 82 83 unsigned CFStack::getLoopDepth() { 84 return LoopStack.size(); 85 } 86 87 bool CFStack::branchStackContains(CFStack::StackItem Item) { 88 for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(), 89 E = BranchStack.end(); I != E; ++I) { 90 if (*I == Item) 91 return true; 92 } 93 return false; 94 } 95 96 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) { 97 if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() && 98 getLoopDepth() > 1) 99 return true; 100 101 if (!ST->hasCFAluBug()) 102 return false; 103 104 switch(Opcode) { 105 default: return false; 106 case R600::CF_ALU_PUSH_BEFORE: 107 case R600::CF_ALU_ELSE_AFTER: 108 case R600::CF_ALU_BREAK: 109 case R600::CF_ALU_CONTINUE: 110 if (CurrentSubEntries == 0) 111 return false; 112 if (ST->getWavefrontSize() == 64) { 113 // We are being conservative here. We only require this work-around if 114 // CurrentSubEntries > 3 && 115 // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0) 116 // 117 // We have to be conservative, because we don't know for certain that 118 // our stack allocation algorithm for Evergreen/NI is correct. Applying this 119 // work-around when CurrentSubEntries > 3 allows us to over-allocate stack 120 // resources without any problems. 121 return CurrentSubEntries > 3; 122 } else { 123 assert(ST->getWavefrontSize() == 32); 124 // We are being conservative here. We only require the work-around if 125 // CurrentSubEntries > 7 && 126 // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0) 127 // See the comment on the wavefront size == 64 case for why we are 128 // being conservative. 129 return CurrentSubEntries > 7; 130 } 131 } 132 } 133 134 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) { 135 switch(Item) { 136 default: 137 return 0; 138 case CFStack::FIRST_NON_WQM_PUSH: 139 assert(!ST->hasCaymanISA()); 140 if (ST->getGeneration() <= AMDGPUSubtarget::R700) { 141 // +1 For the push operation. 142 // +2 Extra space required. 143 return 3; 144 } else { 145 // Some documentation says that this is not necessary on Evergreen, 146 // but experimentation has show that we need to allocate 1 extra 147 // sub-entry for the first non-WQM push. 148 // +1 For the push operation. 149 // +1 Extra space required. 150 return 2; 151 } 152 case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY: 153 assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 154 // +1 For the push operation. 155 // +1 Extra space required. 156 return 2; 157 case CFStack::SUB_ENTRY: 158 return 1; 159 } 160 } 161 162 void CFStack::updateMaxStackSize() { 163 unsigned CurrentStackSize = 164 CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4); 165 MaxStackSize = std::max(CurrentStackSize, MaxStackSize); 166 } 167 168 void CFStack::pushBranch(unsigned Opcode, bool isWQM) { 169 CFStack::StackItem Item = CFStack::ENTRY; 170 switch(Opcode) { 171 case R600::CF_PUSH_EG: 172 case R600::CF_ALU_PUSH_BEFORE: 173 if (!isWQM) { 174 if (!ST->hasCaymanISA() && 175 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH)) 176 Item = CFStack::FIRST_NON_WQM_PUSH; // May not be required on Evergreen/NI 177 // See comment in 178 // CFStack::getSubEntrySize() 179 else if (CurrentEntries > 0 && 180 ST->getGeneration() > AMDGPUSubtarget::EVERGREEN && 181 !ST->hasCaymanISA() && 182 !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY)) 183 Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY; 184 else 185 Item = CFStack::SUB_ENTRY; 186 } else 187 Item = CFStack::ENTRY; 188 break; 189 } 190 BranchStack.push_back(Item); 191 if (Item == CFStack::ENTRY) 192 CurrentEntries++; 193 else 194 CurrentSubEntries += getSubEntrySize(Item); 195 updateMaxStackSize(); 196 } 197 198 void CFStack::pushLoop() { 199 LoopStack.push_back(CFStack::ENTRY); 200 CurrentEntries++; 201 updateMaxStackSize(); 202 } 203 204 void CFStack::popBranch() { 205 CFStack::StackItem Top = BranchStack.back(); 206 if (Top == CFStack::ENTRY) 207 CurrentEntries--; 208 else 209 CurrentSubEntries-= getSubEntrySize(Top); 210 BranchStack.pop_back(); 211 } 212 213 void CFStack::popLoop() { 214 CurrentEntries--; 215 LoopStack.pop_back(); 216 } 217 218 class R600ControlFlowFinalizer : public MachineFunctionPass { 219 private: 220 using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>; 221 222 enum ControlFlowInstruction { 223 CF_TC, 224 CF_VC, 225 CF_CALL_FS, 226 CF_WHILE_LOOP, 227 CF_END_LOOP, 228 CF_LOOP_BREAK, 229 CF_LOOP_CONTINUE, 230 CF_JUMP, 231 CF_ELSE, 232 CF_POP, 233 CF_END 234 }; 235 236 const R600InstrInfo *TII = nullptr; 237 const R600RegisterInfo *TRI = nullptr; 238 unsigned MaxFetchInst; 239 const R600Subtarget *ST = nullptr; 240 241 bool IsTrivialInst(MachineInstr &MI) const { 242 switch (MI.getOpcode()) { 243 case R600::KILL: 244 case R600::RETURN: 245 return true; 246 default: 247 return false; 248 } 249 } 250 251 const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const { 252 unsigned Opcode = 0; 253 bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN); 254 switch (CFI) { 255 case CF_TC: 256 Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600; 257 break; 258 case CF_VC: 259 Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600; 260 break; 261 case CF_CALL_FS: 262 Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600; 263 break; 264 case CF_WHILE_LOOP: 265 Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600; 266 break; 267 case CF_END_LOOP: 268 Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600; 269 break; 270 case CF_LOOP_BREAK: 271 Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600; 272 break; 273 case CF_LOOP_CONTINUE: 274 Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600; 275 break; 276 case CF_JUMP: 277 Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600; 278 break; 279 case CF_ELSE: 280 Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600; 281 break; 282 case CF_POP: 283 Opcode = isEg ? R600::POP_EG : R600::POP_R600; 284 break; 285 case CF_END: 286 if (ST->hasCaymanISA()) { 287 Opcode = R600::CF_END_CM; 288 break; 289 } 290 Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600; 291 break; 292 } 293 assert (Opcode && "No opcode selected"); 294 return TII->get(Opcode); 295 } 296 297 bool isCompatibleWithClause(const MachineInstr &MI, 298 std::set<unsigned> &DstRegs) const { 299 unsigned DstMI, SrcMI; 300 for (MachineInstr::const_mop_iterator I = MI.operands_begin(), 301 E = MI.operands_end(); 302 I != E; ++I) { 303 const MachineOperand &MO = *I; 304 if (!MO.isReg()) 305 continue; 306 if (MO.isDef()) { 307 unsigned Reg = MO.getReg(); 308 if (R600::R600_Reg128RegClass.contains(Reg)) 309 DstMI = Reg; 310 else 311 DstMI = TRI->getMatchingSuperReg(Reg, 312 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 313 &R600::R600_Reg128RegClass); 314 } 315 if (MO.isUse()) { 316 unsigned Reg = MO.getReg(); 317 if (R600::R600_Reg128RegClass.contains(Reg)) 318 SrcMI = Reg; 319 else 320 SrcMI = TRI->getMatchingSuperReg(Reg, 321 AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)), 322 &R600::R600_Reg128RegClass); 323 } 324 } 325 if ((DstRegs.find(SrcMI) == DstRegs.end())) { 326 DstRegs.insert(DstMI); 327 return true; 328 } else 329 return false; 330 } 331 332 ClauseFile 333 MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 334 const { 335 MachineBasicBlock::iterator ClauseHead = I; 336 std::vector<MachineInstr *> ClauseContent; 337 unsigned AluInstCount = 0; 338 bool IsTex = TII->usesTextureCache(*ClauseHead); 339 std::set<unsigned> DstRegs; 340 for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) { 341 if (IsTrivialInst(*I)) 342 continue; 343 if (AluInstCount >= MaxFetchInst) 344 break; 345 if ((IsTex && !TII->usesTextureCache(*I)) || 346 (!IsTex && !TII->usesVertexCache(*I))) 347 break; 348 if (!isCompatibleWithClause(*I, DstRegs)) 349 break; 350 AluInstCount ++; 351 ClauseContent.push_back(&*I); 352 } 353 MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead), 354 getHWInstrDesc(IsTex?CF_TC:CF_VC)) 355 .addImm(0) // ADDR 356 .addImm(AluInstCount - 1); // COUNT 357 return ClauseFile(MIb, std::move(ClauseContent)); 358 } 359 360 void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const { 361 static const unsigned LiteralRegs[] = { 362 R600::ALU_LITERAL_X, 363 R600::ALU_LITERAL_Y, 364 R600::ALU_LITERAL_Z, 365 R600::ALU_LITERAL_W 366 }; 367 const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs = 368 TII->getSrcs(MI); 369 for (const auto &Src:Srcs) { 370 if (Src.first->getReg() != R600::ALU_LITERAL_X) 371 continue; 372 int64_t Imm = Src.second; 373 std::vector<MachineOperand *>::iterator It = 374 llvm::find_if(Lits, [&](MachineOperand *val) { 375 return val->isImm() && (val->getImm() == Imm); 376 }); 377 378 // Get corresponding Operand 379 MachineOperand &Operand = MI.getOperand( 380 TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal)); 381 382 if (It != Lits.end()) { 383 // Reuse existing literal reg 384 unsigned Index = It - Lits.begin(); 385 Src.first->setReg(LiteralRegs[Index]); 386 } else { 387 // Allocate new literal reg 388 assert(Lits.size() < 4 && "Too many literals in Instruction Group"); 389 Src.first->setReg(LiteralRegs[Lits.size()]); 390 Lits.push_back(&Operand); 391 } 392 } 393 } 394 395 MachineBasicBlock::iterator insertLiterals( 396 MachineBasicBlock::iterator InsertPos, 397 const std::vector<unsigned> &Literals) const { 398 MachineBasicBlock *MBB = InsertPos->getParent(); 399 for (unsigned i = 0, e = Literals.size(); i < e; i+=2) { 400 unsigned LiteralPair0 = Literals[i]; 401 unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0; 402 InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(), 403 TII->get(R600::LITERALS)) 404 .addImm(LiteralPair0) 405 .addImm(LiteralPair1); 406 } 407 return InsertPos; 408 } 409 410 ClauseFile 411 MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I) 412 const { 413 MachineInstr &ClauseHead = *I; 414 std::vector<MachineInstr *> ClauseContent; 415 I++; 416 for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) { 417 if (IsTrivialInst(*I)) { 418 ++I; 419 continue; 420 } 421 if (!I->isBundle() && !TII->isALUInstr(I->getOpcode())) 422 break; 423 std::vector<MachineOperand *>Literals; 424 if (I->isBundle()) { 425 MachineInstr &DeleteMI = *I; 426 MachineBasicBlock::instr_iterator BI = I.getInstrIterator(); 427 while (++BI != E && BI->isBundledWithPred()) { 428 BI->unbundleFromPred(); 429 for (MachineOperand &MO : BI->operands()) { 430 if (MO.isReg() && MO.isInternalRead()) 431 MO.setIsInternalRead(false); 432 } 433 getLiteral(*BI, Literals); 434 ClauseContent.push_back(&*BI); 435 } 436 I = BI; 437 DeleteMI.eraseFromParent(); 438 } else { 439 getLiteral(*I, Literals); 440 ClauseContent.push_back(&*I); 441 I++; 442 } 443 for (unsigned i = 0, e = Literals.size(); i < e; i += 2) { 444 MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(), 445 TII->get(R600::LITERALS)); 446 if (Literals[i]->isImm()) { 447 MILit.addImm(Literals[i]->getImm()); 448 } else { 449 MILit.addGlobalAddress(Literals[i]->getGlobal(), 450 Literals[i]->getOffset()); 451 } 452 if (i + 1 < e) { 453 if (Literals[i + 1]->isImm()) { 454 MILit.addImm(Literals[i + 1]->getImm()); 455 } else { 456 MILit.addGlobalAddress(Literals[i + 1]->getGlobal(), 457 Literals[i + 1]->getOffset()); 458 } 459 } else 460 MILit.addImm(0); 461 ClauseContent.push_back(MILit); 462 } 463 } 464 assert(ClauseContent.size() < 128 && "ALU clause is too big"); 465 ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1); 466 return ClauseFile(&ClauseHead, std::move(ClauseContent)); 467 } 468 469 void EmitFetchClause(MachineBasicBlock::iterator InsertPos, 470 const DebugLoc &DL, ClauseFile &Clause, 471 unsigned &CfCount) { 472 CounterPropagateAddr(*Clause.first, CfCount); 473 MachineBasicBlock *BB = Clause.first->getParent(); 474 BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount); 475 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 476 BB->splice(InsertPos, BB, Clause.second[i]); 477 } 478 CfCount += 2 * Clause.second.size(); 479 } 480 481 void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL, 482 ClauseFile &Clause, unsigned &CfCount) { 483 Clause.first->getOperand(0).setImm(0); 484 CounterPropagateAddr(*Clause.first, CfCount); 485 MachineBasicBlock *BB = Clause.first->getParent(); 486 BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount); 487 for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) { 488 BB->splice(InsertPos, BB, Clause.second[i]); 489 } 490 CfCount += Clause.second.size(); 491 } 492 493 void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const { 494 MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm()); 495 } 496 void CounterPropagateAddr(const std::set<MachineInstr *> &MIs, 497 unsigned Addr) const { 498 for (MachineInstr *MI : MIs) { 499 CounterPropagateAddr(*MI, Addr); 500 } 501 } 502 503 public: 504 static char ID; 505 506 R600ControlFlowFinalizer() : MachineFunctionPass(ID) {} 507 508 bool runOnMachineFunction(MachineFunction &MF) override { 509 ST = &MF.getSubtarget<R600Subtarget>(); 510 MaxFetchInst = ST->getTexVTXClauseSize(); 511 TII = ST->getInstrInfo(); 512 TRI = ST->getRegisterInfo(); 513 514 R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>(); 515 516 CFStack CFStack(ST, MF.getFunction().getCallingConv()); 517 for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME; 518 ++MB) { 519 MachineBasicBlock &MBB = *MB; 520 unsigned CfCount = 0; 521 std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack; 522 std::vector<MachineInstr * > IfThenElseStack; 523 if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) { 524 BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()), 525 getHWInstrDesc(CF_CALL_FS)); 526 CfCount++; 527 } 528 std::vector<ClauseFile> FetchClauses, AluClauses; 529 std::vector<MachineInstr *> LastAlu(1); 530 std::vector<MachineInstr *> ToPopAfter; 531 532 for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end(); 533 I != E;) { 534 if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) { 535 LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump();); 536 FetchClauses.push_back(MakeFetchClause(MBB, I)); 537 CfCount++; 538 LastAlu.back() = nullptr; 539 continue; 540 } 541 542 MachineBasicBlock::iterator MI = I; 543 if (MI->getOpcode() != R600::ENDIF) 544 LastAlu.back() = nullptr; 545 if (MI->getOpcode() == R600::CF_ALU) 546 LastAlu.back() = &*MI; 547 I++; 548 bool RequiresWorkAround = 549 CFStack.requiresWorkAroundForInst(MI->getOpcode()); 550 switch (MI->getOpcode()) { 551 case R600::CF_ALU_PUSH_BEFORE: 552 if (RequiresWorkAround) { 553 LLVM_DEBUG(dbgs() 554 << "Applying bug work-around for ALU_PUSH_BEFORE\n"); 555 BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG)) 556 .addImm(CfCount + 1) 557 .addImm(1); 558 MI->setDesc(TII->get(R600::CF_ALU)); 559 CfCount++; 560 CFStack.pushBranch(R600::CF_PUSH_EG); 561 } else 562 CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE); 563 LLVM_FALLTHROUGH; 564 case R600::CF_ALU: 565 I = MI; 566 AluClauses.push_back(MakeALUClause(MBB, I)); 567 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 568 CfCount++; 569 break; 570 case R600::WHILELOOP: { 571 CFStack.pushLoop(); 572 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 573 getHWInstrDesc(CF_WHILE_LOOP)) 574 .addImm(1); 575 std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount, 576 std::set<MachineInstr *>()); 577 Pair.second.insert(MIb); 578 LoopStack.push_back(std::move(Pair)); 579 MI->eraseFromParent(); 580 CfCount++; 581 break; 582 } 583 case R600::ENDLOOP: { 584 CFStack.popLoop(); 585 std::pair<unsigned, std::set<MachineInstr *>> Pair = 586 std::move(LoopStack.back()); 587 LoopStack.pop_back(); 588 CounterPropagateAddr(Pair.second, CfCount); 589 BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP)) 590 .addImm(Pair.first + 1); 591 MI->eraseFromParent(); 592 CfCount++; 593 break; 594 } 595 case R600::IF_PREDICATE_SET: { 596 LastAlu.push_back(nullptr); 597 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 598 getHWInstrDesc(CF_JUMP)) 599 .addImm(0) 600 .addImm(0); 601 IfThenElseStack.push_back(MIb); 602 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 603 MI->eraseFromParent(); 604 CfCount++; 605 break; 606 } 607 case R600::ELSE: { 608 MachineInstr * JumpInst = IfThenElseStack.back(); 609 IfThenElseStack.pop_back(); 610 CounterPropagateAddr(*JumpInst, CfCount); 611 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 612 getHWInstrDesc(CF_ELSE)) 613 .addImm(0) 614 .addImm(0); 615 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 616 IfThenElseStack.push_back(MIb); 617 MI->eraseFromParent(); 618 CfCount++; 619 break; 620 } 621 case R600::ENDIF: { 622 CFStack.popBranch(); 623 if (LastAlu.back()) { 624 ToPopAfter.push_back(LastAlu.back()); 625 } else { 626 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 627 getHWInstrDesc(CF_POP)) 628 .addImm(CfCount + 1) 629 .addImm(1); 630 (void)MIb; 631 LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump();); 632 CfCount++; 633 } 634 635 MachineInstr *IfOrElseInst = IfThenElseStack.back(); 636 IfThenElseStack.pop_back(); 637 CounterPropagateAddr(*IfOrElseInst, CfCount); 638 IfOrElseInst->getOperand(1).setImm(1); 639 LastAlu.pop_back(); 640 MI->eraseFromParent(); 641 break; 642 } 643 case R600::BREAK: { 644 CfCount ++; 645 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 646 getHWInstrDesc(CF_LOOP_BREAK)) 647 .addImm(0); 648 LoopStack.back().second.insert(MIb); 649 MI->eraseFromParent(); 650 break; 651 } 652 case R600::CONTINUE: { 653 MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI), 654 getHWInstrDesc(CF_LOOP_CONTINUE)) 655 .addImm(0); 656 LoopStack.back().second.insert(MIb); 657 MI->eraseFromParent(); 658 CfCount++; 659 break; 660 } 661 case R600::RETURN: { 662 DebugLoc DL = MBB.findDebugLoc(MI); 663 BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END)); 664 CfCount++; 665 if (CfCount % 2) { 666 BuildMI(MBB, I, DL, TII->get(R600::PAD)); 667 CfCount++; 668 } 669 MI->eraseFromParent(); 670 for (unsigned i = 0, e = FetchClauses.size(); i < e; i++) 671 EmitFetchClause(I, DL, FetchClauses[i], CfCount); 672 for (unsigned i = 0, e = AluClauses.size(); i < e; i++) 673 EmitALUClause(I, DL, AluClauses[i], CfCount); 674 break; 675 } 676 default: 677 if (TII->isExport(MI->getOpcode())) { 678 LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump();); 679 CfCount++; 680 } 681 break; 682 } 683 } 684 for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) { 685 MachineInstr *Alu = ToPopAfter[i]; 686 BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu), 687 TII->get(R600::CF_ALU_POP_AFTER)) 688 .addImm(Alu->getOperand(0).getImm()) 689 .addImm(Alu->getOperand(1).getImm()) 690 .addImm(Alu->getOperand(2).getImm()) 691 .addImm(Alu->getOperand(3).getImm()) 692 .addImm(Alu->getOperand(4).getImm()) 693 .addImm(Alu->getOperand(5).getImm()) 694 .addImm(Alu->getOperand(6).getImm()) 695 .addImm(Alu->getOperand(7).getImm()) 696 .addImm(Alu->getOperand(8).getImm()); 697 Alu->eraseFromParent(); 698 } 699 MFI->CFStackSize = CFStack.MaxStackSize; 700 } 701 702 return false; 703 } 704 705 StringRef getPassName() const override { 706 return "R600 Control Flow Finalizer Pass"; 707 } 708 }; 709 710 } // end anonymous namespace 711 712 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE, 713 "R600 Control Flow Finalizer", false, false) 714 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE, 715 "R600 Control Flow Finalizer", false, false) 716 717 char R600ControlFlowFinalizer::ID = 0; 718 719 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID; 720 721 FunctionPass *llvm::createR600ControlFlowFinalizer() { 722 return new R600ControlFlowFinalizer(); 723 } 724