Home | History | Annotate | Download | only in AMDGPU
      1 //===- R600ControlFlowFinalizer.cpp - Finalize Control Flow Inst ----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// This pass compute turns all control flow pseudo instructions into native one
     12 /// computing their address on the fly; it also sets STACK_SIZE info.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "AMDGPU.h"
     17 #include "AMDGPUSubtarget.h"
     18 #include "R600Defines.h"
     19 #include "R600InstrInfo.h"
     20 #include "R600MachineFunctionInfo.h"
     21 #include "R600RegisterInfo.h"
     22 #include "MCTargetDesc/AMDGPUMCTargetDesc.h"
     23 #include "llvm/ADT/STLExtras.h"
     24 #include "llvm/ADT/SmallVector.h"
     25 #include "llvm/ADT/StringRef.h"
     26 #include "llvm/CodeGen/MachineBasicBlock.h"
     27 #include "llvm/CodeGen/MachineFunction.h"
     28 #include "llvm/CodeGen/MachineFunctionPass.h"
     29 #include "llvm/CodeGen/MachineInstr.h"
     30 #include "llvm/CodeGen/MachineInstrBuilder.h"
     31 #include "llvm/CodeGen/MachineOperand.h"
     32 #include "llvm/IR/CallingConv.h"
     33 #include "llvm/IR/DebugLoc.h"
     34 #include "llvm/IR/Function.h"
     35 #include "llvm/Pass.h"
     36 #include "llvm/Support/Compiler.h"
     37 #include "llvm/Support/Debug.h"
     38 #include "llvm/Support/MathExtras.h"
     39 #include "llvm/Support/raw_ostream.h"
     40 #include <algorithm>
     41 #include <cassert>
     42 #include <cstdint>
     43 #include <set>
     44 #include <utility>
     45 #include <vector>
     46 
     47 using namespace llvm;
     48 
     49 #define DEBUG_TYPE "r600cf"
     50 
     51 namespace {
     52 
     53 struct CFStack {
     54   enum StackItem {
     55     ENTRY = 0,
     56     SUB_ENTRY = 1,
     57     FIRST_NON_WQM_PUSH = 2,
     58     FIRST_NON_WQM_PUSH_W_FULL_ENTRY = 3
     59   };
     60 
     61   const R600Subtarget *ST;
     62   std::vector<StackItem> BranchStack;
     63   std::vector<StackItem> LoopStack;
     64   unsigned MaxStackSize;
     65   unsigned CurrentEntries = 0;
     66   unsigned CurrentSubEntries = 0;
     67 
     68   CFStack(const R600Subtarget *st, CallingConv::ID cc) : ST(st),
     69       // We need to reserve a stack entry for CALL_FS in vertex shaders.
     70       MaxStackSize(cc == CallingConv::AMDGPU_VS ? 1 : 0) {}
     71 
     72   unsigned getLoopDepth();
     73   bool branchStackContains(CFStack::StackItem);
     74   bool requiresWorkAroundForInst(unsigned Opcode);
     75   unsigned getSubEntrySize(CFStack::StackItem Item);
     76   void updateMaxStackSize();
     77   void pushBranch(unsigned Opcode, bool isWQM = false);
     78   void pushLoop();
     79   void popBranch();
     80   void popLoop();
     81 };
     82 
     83 unsigned CFStack::getLoopDepth() {
     84   return LoopStack.size();
     85 }
     86 
     87 bool CFStack::branchStackContains(CFStack::StackItem Item) {
     88   for (std::vector<CFStack::StackItem>::const_iterator I = BranchStack.begin(),
     89        E = BranchStack.end(); I != E; ++I) {
     90     if (*I == Item)
     91       return true;
     92   }
     93   return false;
     94 }
     95 
     96 bool CFStack::requiresWorkAroundForInst(unsigned Opcode) {
     97   if (Opcode == R600::CF_ALU_PUSH_BEFORE && ST->hasCaymanISA() &&
     98       getLoopDepth() > 1)
     99     return true;
    100 
    101   if (!ST->hasCFAluBug())
    102     return false;
    103 
    104   switch(Opcode) {
    105   default: return false;
    106   case R600::CF_ALU_PUSH_BEFORE:
    107   case R600::CF_ALU_ELSE_AFTER:
    108   case R600::CF_ALU_BREAK:
    109   case R600::CF_ALU_CONTINUE:
    110     if (CurrentSubEntries == 0)
    111       return false;
    112     if (ST->getWavefrontSize() == 64) {
    113       // We are being conservative here.  We only require this work-around if
    114       // CurrentSubEntries > 3 &&
    115       // (CurrentSubEntries % 4 == 3 || CurrentSubEntries % 4 == 0)
    116       //
    117       // We have to be conservative, because we don't know for certain that
    118       // our stack allocation algorithm for Evergreen/NI is correct.  Applying this
    119       // work-around when CurrentSubEntries > 3 allows us to over-allocate stack
    120       // resources without any problems.
    121       return CurrentSubEntries > 3;
    122     } else {
    123       assert(ST->getWavefrontSize() == 32);
    124       // We are being conservative here.  We only require the work-around if
    125       // CurrentSubEntries > 7 &&
    126       // (CurrentSubEntries % 8 == 7 || CurrentSubEntries % 8 == 0)
    127       // See the comment on the wavefront size == 64 case for why we are
    128       // being conservative.
    129       return CurrentSubEntries > 7;
    130     }
    131   }
    132 }
    133 
    134 unsigned CFStack::getSubEntrySize(CFStack::StackItem Item) {
    135   switch(Item) {
    136   default:
    137     return 0;
    138   case CFStack::FIRST_NON_WQM_PUSH:
    139   assert(!ST->hasCaymanISA());
    140   if (ST->getGeneration() <= AMDGPUSubtarget::R700) {
    141     // +1 For the push operation.
    142     // +2 Extra space required.
    143     return 3;
    144   } else {
    145     // Some documentation says that this is not necessary on Evergreen,
    146     // but experimentation has show that we need to allocate 1 extra
    147     // sub-entry for the first non-WQM push.
    148     // +1 For the push operation.
    149     // +1 Extra space required.
    150     return 2;
    151   }
    152   case CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY:
    153     assert(ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
    154     // +1 For the push operation.
    155     // +1 Extra space required.
    156     return 2;
    157   case CFStack::SUB_ENTRY:
    158     return 1;
    159   }
    160 }
    161 
    162 void CFStack::updateMaxStackSize() {
    163   unsigned CurrentStackSize =
    164       CurrentEntries + (alignTo(CurrentSubEntries, 4) / 4);
    165   MaxStackSize = std::max(CurrentStackSize, MaxStackSize);
    166 }
    167 
    168 void CFStack::pushBranch(unsigned Opcode, bool isWQM) {
    169   CFStack::StackItem Item = CFStack::ENTRY;
    170   switch(Opcode) {
    171   case R600::CF_PUSH_EG:
    172   case R600::CF_ALU_PUSH_BEFORE:
    173     if (!isWQM) {
    174       if (!ST->hasCaymanISA() &&
    175           !branchStackContains(CFStack::FIRST_NON_WQM_PUSH))
    176         Item = CFStack::FIRST_NON_WQM_PUSH;  // May not be required on Evergreen/NI
    177                                              // See comment in
    178                                              // CFStack::getSubEntrySize()
    179       else if (CurrentEntries > 0 &&
    180                ST->getGeneration() > AMDGPUSubtarget::EVERGREEN &&
    181                !ST->hasCaymanISA() &&
    182                !branchStackContains(CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY))
    183         Item = CFStack::FIRST_NON_WQM_PUSH_W_FULL_ENTRY;
    184       else
    185         Item = CFStack::SUB_ENTRY;
    186     } else
    187       Item = CFStack::ENTRY;
    188     break;
    189   }
    190   BranchStack.push_back(Item);
    191   if (Item == CFStack::ENTRY)
    192     CurrentEntries++;
    193   else
    194     CurrentSubEntries += getSubEntrySize(Item);
    195   updateMaxStackSize();
    196 }
    197 
    198 void CFStack::pushLoop() {
    199   LoopStack.push_back(CFStack::ENTRY);
    200   CurrentEntries++;
    201   updateMaxStackSize();
    202 }
    203 
    204 void CFStack::popBranch() {
    205   CFStack::StackItem Top = BranchStack.back();
    206   if (Top == CFStack::ENTRY)
    207     CurrentEntries--;
    208   else
    209     CurrentSubEntries-= getSubEntrySize(Top);
    210   BranchStack.pop_back();
    211 }
    212 
    213 void CFStack::popLoop() {
    214   CurrentEntries--;
    215   LoopStack.pop_back();
    216 }
    217 
    218 class R600ControlFlowFinalizer : public MachineFunctionPass {
    219 private:
    220   using ClauseFile = std::pair<MachineInstr *, std::vector<MachineInstr *>>;
    221 
    222   enum ControlFlowInstruction {
    223     CF_TC,
    224     CF_VC,
    225     CF_CALL_FS,
    226     CF_WHILE_LOOP,
    227     CF_END_LOOP,
    228     CF_LOOP_BREAK,
    229     CF_LOOP_CONTINUE,
    230     CF_JUMP,
    231     CF_ELSE,
    232     CF_POP,
    233     CF_END
    234   };
    235 
    236   const R600InstrInfo *TII = nullptr;
    237   const R600RegisterInfo *TRI = nullptr;
    238   unsigned MaxFetchInst;
    239   const R600Subtarget *ST = nullptr;
    240 
    241   bool IsTrivialInst(MachineInstr &MI) const {
    242     switch (MI.getOpcode()) {
    243     case R600::KILL:
    244     case R600::RETURN:
    245       return true;
    246     default:
    247       return false;
    248     }
    249   }
    250 
    251   const MCInstrDesc &getHWInstrDesc(ControlFlowInstruction CFI) const {
    252     unsigned Opcode = 0;
    253     bool isEg = (ST->getGeneration() >= AMDGPUSubtarget::EVERGREEN);
    254     switch (CFI) {
    255     case CF_TC:
    256       Opcode = isEg ? R600::CF_TC_EG : R600::CF_TC_R600;
    257       break;
    258     case CF_VC:
    259       Opcode = isEg ? R600::CF_VC_EG : R600::CF_VC_R600;
    260       break;
    261     case CF_CALL_FS:
    262       Opcode = isEg ? R600::CF_CALL_FS_EG : R600::CF_CALL_FS_R600;
    263       break;
    264     case CF_WHILE_LOOP:
    265       Opcode = isEg ? R600::WHILE_LOOP_EG : R600::WHILE_LOOP_R600;
    266       break;
    267     case CF_END_LOOP:
    268       Opcode = isEg ? R600::END_LOOP_EG : R600::END_LOOP_R600;
    269       break;
    270     case CF_LOOP_BREAK:
    271       Opcode = isEg ? R600::LOOP_BREAK_EG : R600::LOOP_BREAK_R600;
    272       break;
    273     case CF_LOOP_CONTINUE:
    274       Opcode = isEg ? R600::CF_CONTINUE_EG : R600::CF_CONTINUE_R600;
    275       break;
    276     case CF_JUMP:
    277       Opcode = isEg ? R600::CF_JUMP_EG : R600::CF_JUMP_R600;
    278       break;
    279     case CF_ELSE:
    280       Opcode = isEg ? R600::CF_ELSE_EG : R600::CF_ELSE_R600;
    281       break;
    282     case CF_POP:
    283       Opcode = isEg ? R600::POP_EG : R600::POP_R600;
    284       break;
    285     case CF_END:
    286       if (ST->hasCaymanISA()) {
    287         Opcode = R600::CF_END_CM;
    288         break;
    289       }
    290       Opcode = isEg ? R600::CF_END_EG : R600::CF_END_R600;
    291       break;
    292     }
    293     assert (Opcode && "No opcode selected");
    294     return TII->get(Opcode);
    295   }
    296 
    297   bool isCompatibleWithClause(const MachineInstr &MI,
    298                               std::set<unsigned> &DstRegs) const {
    299     unsigned DstMI, SrcMI;
    300     for (MachineInstr::const_mop_iterator I = MI.operands_begin(),
    301                                           E = MI.operands_end();
    302          I != E; ++I) {
    303       const MachineOperand &MO = *I;
    304       if (!MO.isReg())
    305         continue;
    306       if (MO.isDef()) {
    307         unsigned Reg = MO.getReg();
    308         if (R600::R600_Reg128RegClass.contains(Reg))
    309           DstMI = Reg;
    310         else
    311           DstMI = TRI->getMatchingSuperReg(Reg,
    312               AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
    313               &R600::R600_Reg128RegClass);
    314       }
    315       if (MO.isUse()) {
    316         unsigned Reg = MO.getReg();
    317         if (R600::R600_Reg128RegClass.contains(Reg))
    318           SrcMI = Reg;
    319         else
    320           SrcMI = TRI->getMatchingSuperReg(Reg,
    321               AMDGPURegisterInfo::getSubRegFromChannel(TRI->getHWRegChan(Reg)),
    322               &R600::R600_Reg128RegClass);
    323       }
    324     }
    325     if ((DstRegs.find(SrcMI) == DstRegs.end())) {
    326       DstRegs.insert(DstMI);
    327       return true;
    328     } else
    329       return false;
    330   }
    331 
    332   ClauseFile
    333   MakeFetchClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
    334       const {
    335     MachineBasicBlock::iterator ClauseHead = I;
    336     std::vector<MachineInstr *> ClauseContent;
    337     unsigned AluInstCount = 0;
    338     bool IsTex = TII->usesTextureCache(*ClauseHead);
    339     std::set<unsigned> DstRegs;
    340     for (MachineBasicBlock::iterator E = MBB.end(); I != E; ++I) {
    341       if (IsTrivialInst(*I))
    342         continue;
    343       if (AluInstCount >= MaxFetchInst)
    344         break;
    345       if ((IsTex && !TII->usesTextureCache(*I)) ||
    346           (!IsTex && !TII->usesVertexCache(*I)))
    347         break;
    348       if (!isCompatibleWithClause(*I, DstRegs))
    349         break;
    350       AluInstCount ++;
    351       ClauseContent.push_back(&*I);
    352     }
    353     MachineInstr *MIb = BuildMI(MBB, ClauseHead, MBB.findDebugLoc(ClauseHead),
    354         getHWInstrDesc(IsTex?CF_TC:CF_VC))
    355         .addImm(0) // ADDR
    356         .addImm(AluInstCount - 1); // COUNT
    357     return ClauseFile(MIb, std::move(ClauseContent));
    358   }
    359 
    360   void getLiteral(MachineInstr &MI, std::vector<MachineOperand *> &Lits) const {
    361     static const unsigned LiteralRegs[] = {
    362       R600::ALU_LITERAL_X,
    363       R600::ALU_LITERAL_Y,
    364       R600::ALU_LITERAL_Z,
    365       R600::ALU_LITERAL_W
    366     };
    367     const SmallVector<std::pair<MachineOperand *, int64_t>, 3> Srcs =
    368         TII->getSrcs(MI);
    369     for (const auto &Src:Srcs) {
    370       if (Src.first->getReg() != R600::ALU_LITERAL_X)
    371         continue;
    372       int64_t Imm = Src.second;
    373       std::vector<MachineOperand *>::iterator It =
    374           llvm::find_if(Lits, [&](MachineOperand *val) {
    375             return val->isImm() && (val->getImm() == Imm);
    376           });
    377 
    378       // Get corresponding Operand
    379       MachineOperand &Operand = MI.getOperand(
    380           TII->getOperandIdx(MI.getOpcode(), R600::OpName::literal));
    381 
    382       if (It != Lits.end()) {
    383         // Reuse existing literal reg
    384         unsigned Index = It - Lits.begin();
    385         Src.first->setReg(LiteralRegs[Index]);
    386       } else {
    387         // Allocate new literal reg
    388         assert(Lits.size() < 4 && "Too many literals in Instruction Group");
    389         Src.first->setReg(LiteralRegs[Lits.size()]);
    390         Lits.push_back(&Operand);
    391       }
    392     }
    393   }
    394 
    395   MachineBasicBlock::iterator insertLiterals(
    396       MachineBasicBlock::iterator InsertPos,
    397       const std::vector<unsigned> &Literals) const {
    398     MachineBasicBlock *MBB = InsertPos->getParent();
    399     for (unsigned i = 0, e = Literals.size(); i < e; i+=2) {
    400       unsigned LiteralPair0 = Literals[i];
    401       unsigned LiteralPair1 = (i + 1 < e)?Literals[i + 1]:0;
    402       InsertPos = BuildMI(MBB, InsertPos->getDebugLoc(),
    403           TII->get(R600::LITERALS))
    404           .addImm(LiteralPair0)
    405           .addImm(LiteralPair1);
    406     }
    407     return InsertPos;
    408   }
    409 
    410   ClauseFile
    411   MakeALUClause(MachineBasicBlock &MBB, MachineBasicBlock::iterator &I)
    412       const {
    413     MachineInstr &ClauseHead = *I;
    414     std::vector<MachineInstr *> ClauseContent;
    415     I++;
    416     for (MachineBasicBlock::instr_iterator E = MBB.instr_end(); I != E;) {
    417       if (IsTrivialInst(*I)) {
    418         ++I;
    419         continue;
    420       }
    421       if (!I->isBundle() && !TII->isALUInstr(I->getOpcode()))
    422         break;
    423       std::vector<MachineOperand *>Literals;
    424       if (I->isBundle()) {
    425         MachineInstr &DeleteMI = *I;
    426         MachineBasicBlock::instr_iterator BI = I.getInstrIterator();
    427         while (++BI != E && BI->isBundledWithPred()) {
    428           BI->unbundleFromPred();
    429           for (MachineOperand &MO : BI->operands()) {
    430             if (MO.isReg() && MO.isInternalRead())
    431               MO.setIsInternalRead(false);
    432           }
    433           getLiteral(*BI, Literals);
    434           ClauseContent.push_back(&*BI);
    435         }
    436         I = BI;
    437         DeleteMI.eraseFromParent();
    438       } else {
    439         getLiteral(*I, Literals);
    440         ClauseContent.push_back(&*I);
    441         I++;
    442       }
    443       for (unsigned i = 0, e = Literals.size(); i < e; i += 2) {
    444         MachineInstrBuilder MILit = BuildMI(MBB, I, I->getDebugLoc(),
    445             TII->get(R600::LITERALS));
    446         if (Literals[i]->isImm()) {
    447             MILit.addImm(Literals[i]->getImm());
    448         } else {
    449             MILit.addGlobalAddress(Literals[i]->getGlobal(),
    450                                    Literals[i]->getOffset());
    451         }
    452         if (i + 1 < e) {
    453           if (Literals[i + 1]->isImm()) {
    454             MILit.addImm(Literals[i + 1]->getImm());
    455           } else {
    456             MILit.addGlobalAddress(Literals[i + 1]->getGlobal(),
    457                                    Literals[i + 1]->getOffset());
    458           }
    459         } else
    460           MILit.addImm(0);
    461         ClauseContent.push_back(MILit);
    462       }
    463     }
    464     assert(ClauseContent.size() < 128 && "ALU clause is too big");
    465     ClauseHead.getOperand(7).setImm(ClauseContent.size() - 1);
    466     return ClauseFile(&ClauseHead, std::move(ClauseContent));
    467   }
    468 
    469   void EmitFetchClause(MachineBasicBlock::iterator InsertPos,
    470                        const DebugLoc &DL, ClauseFile &Clause,
    471                        unsigned &CfCount) {
    472     CounterPropagateAddr(*Clause.first, CfCount);
    473     MachineBasicBlock *BB = Clause.first->getParent();
    474     BuildMI(BB, DL, TII->get(R600::FETCH_CLAUSE)).addImm(CfCount);
    475     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
    476       BB->splice(InsertPos, BB, Clause.second[i]);
    477     }
    478     CfCount += 2 * Clause.second.size();
    479   }
    480 
    481   void EmitALUClause(MachineBasicBlock::iterator InsertPos, const DebugLoc &DL,
    482                      ClauseFile &Clause, unsigned &CfCount) {
    483     Clause.first->getOperand(0).setImm(0);
    484     CounterPropagateAddr(*Clause.first, CfCount);
    485     MachineBasicBlock *BB = Clause.first->getParent();
    486     BuildMI(BB, DL, TII->get(R600::ALU_CLAUSE)).addImm(CfCount);
    487     for (unsigned i = 0, e = Clause.second.size(); i < e; ++i) {
    488       BB->splice(InsertPos, BB, Clause.second[i]);
    489     }
    490     CfCount += Clause.second.size();
    491   }
    492 
    493   void CounterPropagateAddr(MachineInstr &MI, unsigned Addr) const {
    494     MI.getOperand(0).setImm(Addr + MI.getOperand(0).getImm());
    495   }
    496   void CounterPropagateAddr(const std::set<MachineInstr *> &MIs,
    497                             unsigned Addr) const {
    498     for (MachineInstr *MI : MIs) {
    499       CounterPropagateAddr(*MI, Addr);
    500     }
    501   }
    502 
    503 public:
    504   static char ID;
    505 
    506   R600ControlFlowFinalizer() : MachineFunctionPass(ID) {}
    507 
    508   bool runOnMachineFunction(MachineFunction &MF) override {
    509     ST = &MF.getSubtarget<R600Subtarget>();
    510     MaxFetchInst = ST->getTexVTXClauseSize();
    511     TII = ST->getInstrInfo();
    512     TRI = ST->getRegisterInfo();
    513 
    514     R600MachineFunctionInfo *MFI = MF.getInfo<R600MachineFunctionInfo>();
    515 
    516     CFStack CFStack(ST, MF.getFunction().getCallingConv());
    517     for (MachineFunction::iterator MB = MF.begin(), ME = MF.end(); MB != ME;
    518         ++MB) {
    519       MachineBasicBlock &MBB = *MB;
    520       unsigned CfCount = 0;
    521       std::vector<std::pair<unsigned, std::set<MachineInstr *>>> LoopStack;
    522       std::vector<MachineInstr * > IfThenElseStack;
    523       if (MF.getFunction().getCallingConv() == CallingConv::AMDGPU_VS) {
    524         BuildMI(MBB, MBB.begin(), MBB.findDebugLoc(MBB.begin()),
    525             getHWInstrDesc(CF_CALL_FS));
    526         CfCount++;
    527       }
    528       std::vector<ClauseFile> FetchClauses, AluClauses;
    529       std::vector<MachineInstr *> LastAlu(1);
    530       std::vector<MachineInstr *> ToPopAfter;
    531 
    532       for (MachineBasicBlock::iterator I = MBB.begin(), E = MBB.end();
    533           I != E;) {
    534         if (TII->usesTextureCache(*I) || TII->usesVertexCache(*I)) {
    535           LLVM_DEBUG(dbgs() << CfCount << ":"; I->dump(););
    536           FetchClauses.push_back(MakeFetchClause(MBB, I));
    537           CfCount++;
    538           LastAlu.back() = nullptr;
    539           continue;
    540         }
    541 
    542         MachineBasicBlock::iterator MI = I;
    543         if (MI->getOpcode() != R600::ENDIF)
    544           LastAlu.back() = nullptr;
    545         if (MI->getOpcode() == R600::CF_ALU)
    546           LastAlu.back() = &*MI;
    547         I++;
    548         bool RequiresWorkAround =
    549             CFStack.requiresWorkAroundForInst(MI->getOpcode());
    550         switch (MI->getOpcode()) {
    551         case R600::CF_ALU_PUSH_BEFORE:
    552           if (RequiresWorkAround) {
    553             LLVM_DEBUG(dbgs()
    554                        << "Applying bug work-around for ALU_PUSH_BEFORE\n");
    555             BuildMI(MBB, MI, MBB.findDebugLoc(MI), TII->get(R600::CF_PUSH_EG))
    556                 .addImm(CfCount + 1)
    557                 .addImm(1);
    558             MI->setDesc(TII->get(R600::CF_ALU));
    559             CfCount++;
    560             CFStack.pushBranch(R600::CF_PUSH_EG);
    561           } else
    562             CFStack.pushBranch(R600::CF_ALU_PUSH_BEFORE);
    563           LLVM_FALLTHROUGH;
    564         case R600::CF_ALU:
    565           I = MI;
    566           AluClauses.push_back(MakeALUClause(MBB, I));
    567           LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
    568           CfCount++;
    569           break;
    570         case R600::WHILELOOP: {
    571           CFStack.pushLoop();
    572           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    573               getHWInstrDesc(CF_WHILE_LOOP))
    574               .addImm(1);
    575           std::pair<unsigned, std::set<MachineInstr *>> Pair(CfCount,
    576               std::set<MachineInstr *>());
    577           Pair.second.insert(MIb);
    578           LoopStack.push_back(std::move(Pair));
    579           MI->eraseFromParent();
    580           CfCount++;
    581           break;
    582         }
    583         case R600::ENDLOOP: {
    584           CFStack.popLoop();
    585           std::pair<unsigned, std::set<MachineInstr *>> Pair =
    586               std::move(LoopStack.back());
    587           LoopStack.pop_back();
    588           CounterPropagateAddr(Pair.second, CfCount);
    589           BuildMI(MBB, MI, MBB.findDebugLoc(MI), getHWInstrDesc(CF_END_LOOP))
    590               .addImm(Pair.first + 1);
    591           MI->eraseFromParent();
    592           CfCount++;
    593           break;
    594         }
    595         case R600::IF_PREDICATE_SET: {
    596           LastAlu.push_back(nullptr);
    597           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    598               getHWInstrDesc(CF_JUMP))
    599               .addImm(0)
    600               .addImm(0);
    601           IfThenElseStack.push_back(MIb);
    602           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    603           MI->eraseFromParent();
    604           CfCount++;
    605           break;
    606         }
    607         case R600::ELSE: {
    608           MachineInstr * JumpInst = IfThenElseStack.back();
    609           IfThenElseStack.pop_back();
    610           CounterPropagateAddr(*JumpInst, CfCount);
    611           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    612               getHWInstrDesc(CF_ELSE))
    613               .addImm(0)
    614               .addImm(0);
    615           LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    616           IfThenElseStack.push_back(MIb);
    617           MI->eraseFromParent();
    618           CfCount++;
    619           break;
    620         }
    621         case R600::ENDIF: {
    622           CFStack.popBranch();
    623           if (LastAlu.back()) {
    624             ToPopAfter.push_back(LastAlu.back());
    625           } else {
    626             MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    627                 getHWInstrDesc(CF_POP))
    628                 .addImm(CfCount + 1)
    629                 .addImm(1);
    630             (void)MIb;
    631             LLVM_DEBUG(dbgs() << CfCount << ":"; MIb->dump(););
    632             CfCount++;
    633           }
    634 
    635           MachineInstr *IfOrElseInst = IfThenElseStack.back();
    636           IfThenElseStack.pop_back();
    637           CounterPropagateAddr(*IfOrElseInst, CfCount);
    638           IfOrElseInst->getOperand(1).setImm(1);
    639           LastAlu.pop_back();
    640           MI->eraseFromParent();
    641           break;
    642         }
    643         case R600::BREAK: {
    644           CfCount ++;
    645           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    646               getHWInstrDesc(CF_LOOP_BREAK))
    647               .addImm(0);
    648           LoopStack.back().second.insert(MIb);
    649           MI->eraseFromParent();
    650           break;
    651         }
    652         case R600::CONTINUE: {
    653           MachineInstr *MIb = BuildMI(MBB, MI, MBB.findDebugLoc(MI),
    654               getHWInstrDesc(CF_LOOP_CONTINUE))
    655               .addImm(0);
    656           LoopStack.back().second.insert(MIb);
    657           MI->eraseFromParent();
    658           CfCount++;
    659           break;
    660         }
    661         case R600::RETURN: {
    662           DebugLoc DL = MBB.findDebugLoc(MI);
    663           BuildMI(MBB, MI, DL, getHWInstrDesc(CF_END));
    664           CfCount++;
    665           if (CfCount % 2) {
    666             BuildMI(MBB, I, DL, TII->get(R600::PAD));
    667             CfCount++;
    668           }
    669           MI->eraseFromParent();
    670           for (unsigned i = 0, e = FetchClauses.size(); i < e; i++)
    671             EmitFetchClause(I, DL, FetchClauses[i], CfCount);
    672           for (unsigned i = 0, e = AluClauses.size(); i < e; i++)
    673             EmitALUClause(I, DL, AluClauses[i], CfCount);
    674           break;
    675         }
    676         default:
    677           if (TII->isExport(MI->getOpcode())) {
    678             LLVM_DEBUG(dbgs() << CfCount << ":"; MI->dump(););
    679             CfCount++;
    680           }
    681           break;
    682         }
    683       }
    684       for (unsigned i = 0, e = ToPopAfter.size(); i < e; ++i) {
    685         MachineInstr *Alu = ToPopAfter[i];
    686         BuildMI(MBB, Alu, MBB.findDebugLoc((MachineBasicBlock::iterator)Alu),
    687             TII->get(R600::CF_ALU_POP_AFTER))
    688             .addImm(Alu->getOperand(0).getImm())
    689             .addImm(Alu->getOperand(1).getImm())
    690             .addImm(Alu->getOperand(2).getImm())
    691             .addImm(Alu->getOperand(3).getImm())
    692             .addImm(Alu->getOperand(4).getImm())
    693             .addImm(Alu->getOperand(5).getImm())
    694             .addImm(Alu->getOperand(6).getImm())
    695             .addImm(Alu->getOperand(7).getImm())
    696             .addImm(Alu->getOperand(8).getImm());
    697         Alu->eraseFromParent();
    698       }
    699       MFI->CFStackSize = CFStack.MaxStackSize;
    700     }
    701 
    702     return false;
    703   }
    704 
    705   StringRef getPassName() const override {
    706     return "R600 Control Flow Finalizer Pass";
    707   }
    708 };
    709 
    710 } // end anonymous namespace
    711 
    712 INITIALIZE_PASS_BEGIN(R600ControlFlowFinalizer, DEBUG_TYPE,
    713                      "R600 Control Flow Finalizer", false, false)
    714 INITIALIZE_PASS_END(R600ControlFlowFinalizer, DEBUG_TYPE,
    715                     "R600 Control Flow Finalizer", false, false)
    716 
    717 char R600ControlFlowFinalizer::ID = 0;
    718 
    719 char &llvm::R600ControlFlowFinalizerID = R600ControlFlowFinalizer::ID;
    720 
    721 FunctionPass *llvm::createR600ControlFlowFinalizer() {
    722   return new R600ControlFlowFinalizer();
    723 }
    724