Home | History | Annotate | Download | only in R600
      1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 /// \file
      9 //==-----------------------------------------------------------------------===//
     10 
     11 #define DEBUG_TYPE "structcfg"
     12 
     13 #include "AMDGPU.h"
     14 #include "AMDGPUInstrInfo.h"
     15 #include "R600InstrInfo.h"
     16 #include "llvm/Support/Debug.h"
     17 #include "llvm/Support/raw_ostream.h"
     18 #include "llvm/ADT/SCCIterator.h"
     19 #include "llvm/ADT/SmallVector.h"
     20 #include "llvm/ADT/Statistic.h"
     21 #include "llvm/ADT/DepthFirstIterator.h"
     22 #include "llvm/Analysis/DominatorInternals.h"
     23 #include "llvm/Analysis/Dominators.h"
     24 #include "llvm/CodeGen/MachineDominators.h"
     25 #include "llvm/CodeGen/MachineFunction.h"
     26 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
     27 #include "llvm/CodeGen/MachineFunctionPass.h"
     28 #include "llvm/CodeGen/MachineInstrBuilder.h"
     29 #include "llvm/CodeGen/MachineJumpTableInfo.h"
     30 #include "llvm/CodeGen/MachineLoopInfo.h"
     31 #include "llvm/CodeGen/MachinePostDominators.h"
     32 #include "llvm/CodeGen/MachineRegisterInfo.h"
     33 #include "llvm/Target/TargetInstrInfo.h"
     34 #include "llvm/Target/TargetMachine.h"
     35 
     36 using namespace llvm;
     37 
     38 #define DEFAULT_VEC_SLOTS 8
     39 
     40 // TODO: move-begin.
     41 
     42 //===----------------------------------------------------------------------===//
     43 //
     44 // Statistics for CFGStructurizer.
     45 //
     46 //===----------------------------------------------------------------------===//
     47 
     48 STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
     49     "matched");
     50 STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
     51     "matched");
     52 STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
     53     "pattern matched");
     54 STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
     55 STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
     56 
     57 //===----------------------------------------------------------------------===//
     58 //
     59 // Miscellaneous utility for CFGStructurizer.
     60 //
     61 //===----------------------------------------------------------------------===//
     62 namespace {
     63 #define SHOWNEWINSTR(i) \
     64   DEBUG(dbgs() << "New instr: " << *i << "\n");
     65 
     66 #define SHOWNEWBLK(b, msg) \
     67 DEBUG( \
     68   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
     69   dbgs() << "\n"; \
     70 );
     71 
     72 #define SHOWBLK_DETAIL(b, msg) \
     73 DEBUG( \
     74   if (b) { \
     75   dbgs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
     76   b->print(dbgs()); \
     77   dbgs() << "\n"; \
     78   } \
     79 );
     80 
     81 #define INVALIDSCCNUM -1
     82 
     83 template<class NodeT>
     84 void ReverseVector(SmallVectorImpl<NodeT *> &Src) {
     85   size_t sz = Src.size();
     86   for (size_t i = 0; i < sz/2; ++i) {
     87     NodeT *t = Src[i];
     88     Src[i] = Src[sz - i - 1];
     89     Src[sz - i - 1] = t;
     90   }
     91 }
     92 
     93 } // end anonymous namespace
     94 
     95 //===----------------------------------------------------------------------===//
     96 //
     97 // supporting data structure for CFGStructurizer
     98 //
     99 //===----------------------------------------------------------------------===//
    100 
    101 
    102 namespace {
    103 
    104 class BlockInformation {
    105 public:
    106   bool IsRetired;
    107   int  SccNum;
    108   BlockInformation() : IsRetired(false), SccNum(INVALIDSCCNUM) {}
    109 };
    110 
    111 } // end anonymous namespace
    112 
    113 //===----------------------------------------------------------------------===//
    114 //
    115 // CFGStructurizer
    116 //
    117 //===----------------------------------------------------------------------===//
    118 
    119 namespace {
    120 class AMDGPUCFGStructurizer : public MachineFunctionPass {
    121 public:
    122   typedef SmallVector<MachineBasicBlock *, 32> MBBVector;
    123   typedef std::map<MachineBasicBlock *, BlockInformation *> MBBInfoMap;
    124   typedef std::map<MachineLoop *, MachineBasicBlock *> LoopLandInfoMap;
    125 
    126   enum PathToKind {
    127     Not_SinglePath = 0,
    128     SinglePath_InPath = 1,
    129     SinglePath_NotInPath = 2
    130   };
    131 
    132   static char ID;
    133 
    134   AMDGPUCFGStructurizer(TargetMachine &tm) :
    135       MachineFunctionPass(ID), TM(tm),
    136       TII(static_cast<const R600InstrInfo *>(tm.getInstrInfo())),
    137       TRI(&TII->getRegisterInfo()) { }
    138 
    139    const char *getPassName() const {
    140     return "AMD IL Control Flow Graph structurizer Pass";
    141   }
    142 
    143   void getAnalysisUsage(AnalysisUsage &AU) const {
    144     AU.addPreserved<MachineFunctionAnalysis>();
    145     AU.addRequired<MachineFunctionAnalysis>();
    146     AU.addRequired<MachineDominatorTree>();
    147     AU.addRequired<MachinePostDominatorTree>();
    148     AU.addRequired<MachineLoopInfo>();
    149   }
    150 
    151   /// Perform the CFG structurization
    152   bool run();
    153 
    154   /// Perform the CFG preparation
    155   /// This step will remove every unconditionnal/dead jump instructions and make
    156   /// sure all loops have an exit block
    157   bool prepare();
    158 
    159   bool runOnMachineFunction(MachineFunction &MF) {
    160     DEBUG(MF.dump(););
    161     OrderedBlks.clear();
    162     FuncRep = &MF;
    163     MLI = &getAnalysis<MachineLoopInfo>();
    164     DEBUG(dbgs() << "LoopInfo:\n"; PrintLoopinfo(*MLI););
    165     MDT = &getAnalysis<MachineDominatorTree>();
    166     DEBUG(MDT->print(dbgs(), (const llvm::Module*)0););
    167     PDT = &getAnalysis<MachinePostDominatorTree>();
    168     DEBUG(PDT->print(dbgs()););
    169     prepare();
    170     run();
    171     DEBUG(MF.dump(););
    172     return true;
    173   }
    174 
    175 protected:
    176   TargetMachine &TM;
    177   MachineDominatorTree *MDT;
    178   MachinePostDominatorTree *PDT;
    179   MachineLoopInfo *MLI;
    180   const R600InstrInfo *TII;
    181   const AMDGPURegisterInfo *TRI;
    182 
    183   // PRINT FUNCTIONS
    184   /// Print the ordered Blocks.
    185   void printOrderedBlocks() const {
    186     size_t i = 0;
    187     for (MBBVector::const_iterator iterBlk = OrderedBlks.begin(),
    188         iterBlkEnd = OrderedBlks.end(); iterBlk != iterBlkEnd; ++iterBlk, ++i) {
    189       dbgs() << "BB" << (*iterBlk)->getNumber();
    190       dbgs() << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
    191       if (i != 0 && i % 10 == 0) {
    192         dbgs() << "\n";
    193       } else {
    194         dbgs() << " ";
    195       }
    196     }
    197   }
    198   static void PrintLoopinfo(const MachineLoopInfo &LoopInfo) {
    199     for (MachineLoop::iterator iter = LoopInfo.begin(),
    200          iterEnd = LoopInfo.end(); iter != iterEnd; ++iter) {
    201       (*iter)->print(dbgs(), 0);
    202     }
    203   }
    204 
    205   // UTILITY FUNCTIONS
    206   int getSCCNum(MachineBasicBlock *MBB) const;
    207   MachineBasicBlock *getLoopLandInfo(MachineLoop *LoopRep) const;
    208   bool hasBackEdge(MachineBasicBlock *MBB) const;
    209   static unsigned getLoopDepth(MachineLoop *LoopRep);
    210   bool isRetiredBlock(MachineBasicBlock *MBB) const;
    211   bool isActiveLoophead(MachineBasicBlock *MBB) const;
    212   PathToKind singlePathTo(MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
    213       bool AllowSideEntry = true) const;
    214   int countActiveBlock(MBBVector::const_iterator It,
    215       MBBVector::const_iterator E) const;
    216   bool needMigrateBlock(MachineBasicBlock *MBB) const;
    217 
    218   // Utility Functions
    219   void reversePredicateSetter(MachineBasicBlock::iterator I);
    220   /// Compute the reversed DFS post order of Blocks
    221   void orderBlocks(MachineFunction *MF);
    222 
    223   // Function originaly from CFGStructTraits
    224   void insertInstrEnd(MachineBasicBlock *MBB, int NewOpcode,
    225       DebugLoc DL = DebugLoc());
    226   MachineInstr *insertInstrBefore(MachineBasicBlock *MBB, int NewOpcode,
    227     DebugLoc DL = DebugLoc());
    228   MachineInstr *insertInstrBefore(MachineBasicBlock::iterator I, int NewOpcode);
    229   void insertCondBranchBefore(MachineBasicBlock::iterator I, int NewOpcode,
    230       DebugLoc DL);
    231   void insertCondBranchBefore(MachineBasicBlock *MBB,
    232       MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
    233       DebugLoc DL);
    234   void insertCondBranchEnd(MachineBasicBlock *MBB, int NewOpcode, int RegNum);
    235   static int getBranchNzeroOpcode(int OldOpcode);
    236   static int getBranchZeroOpcode(int OldOpcode);
    237   static int getContinueNzeroOpcode(int OldOpcode);
    238   static int getContinueZeroOpcode(int OldOpcode);
    239   static MachineBasicBlock *getTrueBranch(MachineInstr *MI);
    240   static void setTrueBranch(MachineInstr *MI, MachineBasicBlock *MBB);
    241   static MachineBasicBlock *getFalseBranch(MachineBasicBlock *MBB,
    242       MachineInstr *MI);
    243   static bool isCondBranch(MachineInstr *MI);
    244   static bool isUncondBranch(MachineInstr *MI);
    245   static DebugLoc getLastDebugLocInBB(MachineBasicBlock *MBB);
    246   static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *MBB);
    247   /// The correct naming for this is getPossibleLoopendBlockBranchInstr.
    248   ///
    249   /// BB with backward-edge could have move instructions after the branch
    250   /// instruction.  Such move instruction "belong to" the loop backward-edge.
    251   MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *MBB);
    252   static MachineInstr *getReturnInstr(MachineBasicBlock *MBB);
    253   static MachineInstr *getContinueInstr(MachineBasicBlock *MBB);
    254   static bool isReturnBlock(MachineBasicBlock *MBB);
    255   static void cloneSuccessorList(MachineBasicBlock *DstMBB,
    256       MachineBasicBlock *SrcMBB) ;
    257   static MachineBasicBlock *clone(MachineBasicBlock *MBB);
    258   /// MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose
    259   /// because the AMDGPU instruction is not recognized as terminator fix this
    260   /// and retire this routine
    261   void replaceInstrUseOfBlockWith(MachineBasicBlock *SrcMBB,
    262       MachineBasicBlock *OldMBB, MachineBasicBlock *NewBlk);
    263   static void wrapup(MachineBasicBlock *MBB);
    264 
    265 
    266   int patternMatch(MachineBasicBlock *MBB);
    267   int patternMatchGroup(MachineBasicBlock *MBB);
    268   int serialPatternMatch(MachineBasicBlock *MBB);
    269   int ifPatternMatch(MachineBasicBlock *MBB);
    270   int loopendPatternMatch();
    271   int mergeLoop(MachineLoop *LoopRep);
    272   int loopcontPatternMatch(MachineLoop *LoopRep, MachineBasicBlock *LoopHeader);
    273 
    274   void handleLoopcontBlock(MachineBasicBlock *ContingMBB,
    275       MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
    276       MachineLoop *ContLoop);
    277   /// return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in
    278   /// the same loop with LoopLandInfo without explicitly keeping track of
    279   /// loopContBlks and loopBreakBlks, this is a method to get the information.
    280   bool isSameloopDetachedContbreak(MachineBasicBlock *Src1MBB,
    281       MachineBasicBlock *Src2MBB);
    282   int handleJumpintoIf(MachineBasicBlock *HeadMBB,
    283       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
    284   int handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
    285       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB);
    286   int improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
    287       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
    288       MachineBasicBlock **LandMBBPtr);
    289   void showImproveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
    290       MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
    291       MachineBasicBlock *LandMBB, bool Detail = false);
    292   int cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
    293       MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB);
    294   void mergeSerialBlock(MachineBasicBlock *DstMBB,
    295       MachineBasicBlock *SrcMBB);
    296 
    297   void mergeIfthenelseBlock(MachineInstr *BranchMI,
    298       MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
    299       MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB);
    300   void mergeLooplandBlock(MachineBasicBlock *DstMBB,
    301       MachineBasicBlock *LandMBB);
    302   void mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
    303       MachineBasicBlock *LandMBB);
    304   void settleLoopcontBlock(MachineBasicBlock *ContingMBB,
    305       MachineBasicBlock *ContMBB);
    306   /// normalizeInfiniteLoopExit change
    307   ///   B1:
    308   ///        uncond_br LoopHeader
    309   ///
    310   /// to
    311   ///   B1:
    312   ///        cond_br 1 LoopHeader dummyExit
    313   /// and return the newly added dummy exit block
    314   MachineBasicBlock *normalizeInfiniteLoopExit(MachineLoop *LoopRep);
    315   void removeUnconditionalBranch(MachineBasicBlock *MBB);
    316   /// Remove duplicate branches instructions in a block.
    317   /// For instance
    318   /// B0:
    319   ///    cond_br X B1 B2
    320   ///    cond_br X B1 B2
    321   /// is transformed to
    322   /// B0:
    323   ///    cond_br X B1 B2
    324   void removeRedundantConditionalBranch(MachineBasicBlock *MBB);
    325   void addDummyExitBlock(SmallVectorImpl<MachineBasicBlock *> &RetMBB);
    326   void removeSuccessor(MachineBasicBlock *MBB);
    327   MachineBasicBlock *cloneBlockForPredecessor(MachineBasicBlock *MBB,
    328       MachineBasicBlock *PredMBB);
    329   void migrateInstruction(MachineBasicBlock *SrcMBB,
    330       MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I);
    331   void recordSccnum(MachineBasicBlock *MBB, int SCCNum);
    332   void retireBlock(MachineBasicBlock *MBB);
    333   void setLoopLandBlock(MachineLoop *LoopRep, MachineBasicBlock *MBB = NULL);
    334 
    335   MachineBasicBlock *findNearestCommonPostDom(std::set<MachineBasicBlock *>&);
    336   /// This is work around solution for findNearestCommonDominator not avaiable
    337   /// to post dom a proper fix should go to Dominators.h.
    338   MachineBasicBlock *findNearestCommonPostDom(MachineBasicBlock *MBB1,
    339       MachineBasicBlock *MBB2);
    340 
    341 private:
    342   MBBInfoMap BlockInfoMap;
    343   LoopLandInfoMap LLInfoMap;
    344   std::map<MachineLoop *, bool> Visited;
    345   MachineFunction *FuncRep;
    346   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> OrderedBlks;
    347 };
    348 
    349 int AMDGPUCFGStructurizer::getSCCNum(MachineBasicBlock *MBB) const {
    350   MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
    351   if (It == BlockInfoMap.end())
    352     return INVALIDSCCNUM;
    353   return (*It).second->SccNum;
    354 }
    355 
    356 MachineBasicBlock *AMDGPUCFGStructurizer::getLoopLandInfo(MachineLoop *LoopRep)
    357     const {
    358   LoopLandInfoMap::const_iterator It = LLInfoMap.find(LoopRep);
    359   if (It == LLInfoMap.end())
    360     return NULL;
    361   return (*It).second;
    362 }
    363 
    364 bool AMDGPUCFGStructurizer::hasBackEdge(MachineBasicBlock *MBB) const {
    365   MachineLoop *LoopRep = MLI->getLoopFor(MBB);
    366   if (!LoopRep)
    367     return false;
    368   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
    369   return MBB->isSuccessor(LoopHeader);
    370 }
    371 
    372 unsigned AMDGPUCFGStructurizer::getLoopDepth(MachineLoop *LoopRep) {
    373   return LoopRep ? LoopRep->getLoopDepth() : 0;
    374 }
    375 
    376 bool AMDGPUCFGStructurizer::isRetiredBlock(MachineBasicBlock *MBB) const {
    377   MBBInfoMap::const_iterator It = BlockInfoMap.find(MBB);
    378   if (It == BlockInfoMap.end())
    379     return false;
    380   return (*It).second->IsRetired;
    381 }
    382 
    383 bool AMDGPUCFGStructurizer::isActiveLoophead(MachineBasicBlock *MBB) const {
    384   MachineLoop *LoopRep = MLI->getLoopFor(MBB);
    385   while (LoopRep && LoopRep->getHeader() == MBB) {
    386     MachineBasicBlock *LoopLand = getLoopLandInfo(LoopRep);
    387     if(!LoopLand)
    388       return true;
    389     if (!isRetiredBlock(LoopLand))
    390       return true;
    391     LoopRep = LoopRep->getParentLoop();
    392   }
    393   return false;
    394 }
    395 AMDGPUCFGStructurizer::PathToKind AMDGPUCFGStructurizer::singlePathTo(
    396     MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB,
    397     bool AllowSideEntry) const {
    398   assert(DstMBB);
    399   if (SrcMBB == DstMBB)
    400     return SinglePath_InPath;
    401   while (SrcMBB && SrcMBB->succ_size() == 1) {
    402     SrcMBB = *SrcMBB->succ_begin();
    403     if (SrcMBB == DstMBB)
    404       return SinglePath_InPath;
    405     if (!AllowSideEntry && SrcMBB->pred_size() > 1)
    406       return Not_SinglePath;
    407   }
    408   if (SrcMBB && SrcMBB->succ_size()==0)
    409     return SinglePath_NotInPath;
    410   return Not_SinglePath;
    411 }
    412 
    413 int AMDGPUCFGStructurizer::countActiveBlock(MBBVector::const_iterator It,
    414     MBBVector::const_iterator E) const {
    415   int Count = 0;
    416   while (It != E) {
    417     if (!isRetiredBlock(*It))
    418       ++Count;
    419     ++It;
    420   }
    421   return Count;
    422 }
    423 
    424 bool AMDGPUCFGStructurizer::needMigrateBlock(MachineBasicBlock *MBB) const {
    425   unsigned BlockSizeThreshold = 30;
    426   unsigned CloneInstrThreshold = 100;
    427   bool MultiplePreds = MBB && (MBB->pred_size() > 1);
    428 
    429   if(!MultiplePreds)
    430     return false;
    431   unsigned BlkSize = MBB->size();
    432   return ((BlkSize > BlockSizeThreshold) &&
    433       (BlkSize * (MBB->pred_size() - 1) > CloneInstrThreshold));
    434 }
    435 
    436 void AMDGPUCFGStructurizer::reversePredicateSetter(
    437     MachineBasicBlock::iterator I) {
    438   while (I--) {
    439     if (I->getOpcode() == AMDGPU::PRED_X) {
    440       switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
    441       case OPCODE_IS_ZERO_INT:
    442         static_cast<MachineInstr *>(I)->getOperand(2)
    443             .setImm(OPCODE_IS_NOT_ZERO_INT);
    444         return;
    445       case OPCODE_IS_NOT_ZERO_INT:
    446         static_cast<MachineInstr *>(I)->getOperand(2)
    447             .setImm(OPCODE_IS_ZERO_INT);
    448         return;
    449       case OPCODE_IS_ZERO:
    450         static_cast<MachineInstr *>(I)->getOperand(2)
    451             .setImm(OPCODE_IS_NOT_ZERO);
    452         return;
    453       case OPCODE_IS_NOT_ZERO:
    454         static_cast<MachineInstr *>(I)->getOperand(2)
    455             .setImm(OPCODE_IS_ZERO);
    456         return;
    457       default:
    458         llvm_unreachable("PRED_X Opcode invalid!");
    459       }
    460     }
    461   }
    462 }
    463 
    464 void AMDGPUCFGStructurizer::insertInstrEnd(MachineBasicBlock *MBB,
    465     int NewOpcode, DebugLoc DL) {
    466  MachineInstr *MI = MBB->getParent()
    467     ->CreateMachineInstr(TII->get(NewOpcode), DL);
    468   MBB->push_back(MI);
    469   //assume the instruction doesn't take any reg operand ...
    470   SHOWNEWINSTR(MI);
    471 }
    472 
    473 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(MachineBasicBlock *MBB,
    474     int NewOpcode, DebugLoc DL) {
    475   MachineInstr *MI =
    476       MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DL);
    477   if (MBB->begin() != MBB->end())
    478     MBB->insert(MBB->begin(), MI);
    479   else
    480     MBB->push_back(MI);
    481   SHOWNEWINSTR(MI);
    482   return MI;
    483 }
    484 
    485 MachineInstr *AMDGPUCFGStructurizer::insertInstrBefore(
    486     MachineBasicBlock::iterator I, int NewOpcode) {
    487   MachineInstr *OldMI = &(*I);
    488   MachineBasicBlock *MBB = OldMI->getParent();
    489   MachineInstr *NewMBB =
    490       MBB->getParent()->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
    491   MBB->insert(I, NewMBB);
    492   //assume the instruction doesn't take any reg operand ...
    493   SHOWNEWINSTR(NewMBB);
    494   return NewMBB;
    495 }
    496 
    497 void AMDGPUCFGStructurizer::insertCondBranchBefore(
    498     MachineBasicBlock::iterator I, int NewOpcode, DebugLoc DL) {
    499   MachineInstr *OldMI = &(*I);
    500   MachineBasicBlock *MBB = OldMI->getParent();
    501   MachineFunction *MF = MBB->getParent();
    502   MachineInstr *NewMI = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
    503   MBB->insert(I, NewMI);
    504   MachineInstrBuilder MIB(*MF, NewMI);
    505   MIB.addReg(OldMI->getOperand(1).getReg(), false);
    506   SHOWNEWINSTR(NewMI);
    507   //erase later oldInstr->eraseFromParent();
    508 }
    509 
    510 void AMDGPUCFGStructurizer::insertCondBranchBefore(MachineBasicBlock *blk,
    511     MachineBasicBlock::iterator I, int NewOpcode, int RegNum,
    512     DebugLoc DL) {
    513   MachineFunction *MF = blk->getParent();
    514   MachineInstr *NewInstr = MF->CreateMachineInstr(TII->get(NewOpcode), DL);
    515   //insert before
    516   blk->insert(I, NewInstr);
    517   MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
    518   SHOWNEWINSTR(NewInstr);
    519 }
    520 
    521 void AMDGPUCFGStructurizer::insertCondBranchEnd(MachineBasicBlock *MBB,
    522     int NewOpcode, int RegNum) {
    523   MachineFunction *MF = MBB->getParent();
    524   MachineInstr *NewInstr =
    525     MF->CreateMachineInstr(TII->get(NewOpcode), DebugLoc());
    526   MBB->push_back(NewInstr);
    527   MachineInstrBuilder(*MF, NewInstr).addReg(RegNum, false);
    528   SHOWNEWINSTR(NewInstr);
    529 }
    530 
    531 int AMDGPUCFGStructurizer::getBranchNzeroOpcode(int OldOpcode) {
    532   switch(OldOpcode) {
    533   case AMDGPU::JUMP_COND:
    534   case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
    535   case AMDGPU::BRANCH_COND_i32:
    536   case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
    537   default: llvm_unreachable("internal error");
    538   }
    539   return -1;
    540 }
    541 
    542 int AMDGPUCFGStructurizer::getBranchZeroOpcode(int OldOpcode) {
    543   switch(OldOpcode) {
    544   case AMDGPU::JUMP_COND:
    545   case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
    546   case AMDGPU::BRANCH_COND_i32:
    547   case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
    548   default: llvm_unreachable("internal error");
    549   }
    550   return -1;
    551 }
    552 
    553 int AMDGPUCFGStructurizer::getContinueNzeroOpcode(int OldOpcode) {
    554   switch(OldOpcode) {
    555   case AMDGPU::JUMP_COND:
    556   case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
    557   default: llvm_unreachable("internal error");
    558   };
    559   return -1;
    560 }
    561 
    562 int AMDGPUCFGStructurizer::getContinueZeroOpcode(int OldOpcode) {
    563   switch(OldOpcode) {
    564   case AMDGPU::JUMP_COND:
    565   case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
    566   default: llvm_unreachable("internal error");
    567   }
    568   return -1;
    569 }
    570 
    571 MachineBasicBlock *AMDGPUCFGStructurizer::getTrueBranch(MachineInstr *MI) {
    572   return MI->getOperand(0).getMBB();
    573 }
    574 
    575 void AMDGPUCFGStructurizer::setTrueBranch(MachineInstr *MI,
    576     MachineBasicBlock *MBB) {
    577   MI->getOperand(0).setMBB(MBB);
    578 }
    579 
    580 MachineBasicBlock *
    581 AMDGPUCFGStructurizer::getFalseBranch(MachineBasicBlock *MBB,
    582     MachineInstr *MI) {
    583   assert(MBB->succ_size() == 2);
    584   MachineBasicBlock *TrueBranch = getTrueBranch(MI);
    585   MachineBasicBlock::succ_iterator It = MBB->succ_begin();
    586   MachineBasicBlock::succ_iterator Next = It;
    587   ++Next;
    588   return (*It == TrueBranch) ? *Next : *It;
    589 }
    590 
    591 bool AMDGPUCFGStructurizer::isCondBranch(MachineInstr *MI) {
    592   switch (MI->getOpcode()) {
    593     case AMDGPU::JUMP_COND:
    594     case AMDGPU::BRANCH_COND_i32:
    595     case AMDGPU::BRANCH_COND_f32: return true;
    596   default:
    597     return false;
    598   }
    599   return false;
    600 }
    601 
    602 bool AMDGPUCFGStructurizer::isUncondBranch(MachineInstr *MI) {
    603   switch (MI->getOpcode()) {
    604   case AMDGPU::JUMP:
    605   case AMDGPU::BRANCH:
    606     return true;
    607   default:
    608     return false;
    609   }
    610   return false;
    611 }
    612 
    613 DebugLoc AMDGPUCFGStructurizer::getLastDebugLocInBB(MachineBasicBlock *MBB) {
    614   //get DebugLoc from the first MachineBasicBlock instruction with debug info
    615   DebugLoc DL;
    616   for (MachineBasicBlock::iterator It = MBB->begin(); It != MBB->end();
    617       ++It) {
    618     MachineInstr *instr = &(*It);
    619     if (instr->getDebugLoc().isUnknown() == false)
    620       DL = instr->getDebugLoc();
    621   }
    622   return DL;
    623 }
    624 
    625 MachineInstr *AMDGPUCFGStructurizer::getNormalBlockBranchInstr(
    626     MachineBasicBlock *MBB) {
    627   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
    628   MachineInstr *MI = &*It;
    629   if (MI && (isCondBranch(MI) || isUncondBranch(MI)))
    630     return MI;
    631   return NULL;
    632 }
    633 
    634 MachineInstr *AMDGPUCFGStructurizer::getLoopendBlockBranchInstr(
    635     MachineBasicBlock *MBB) {
    636   for (MachineBasicBlock::reverse_iterator It = MBB->rbegin(), E = MBB->rend();
    637       It != E; ++It) {
    638     // FIXME: Simplify
    639     MachineInstr *MI = &*It;
    640     if (MI) {
    641       if (isCondBranch(MI) || isUncondBranch(MI))
    642         return MI;
    643       else if (!TII->isMov(MI->getOpcode()))
    644         break;
    645     }
    646   }
    647   return NULL;
    648 }
    649 
    650 MachineInstr *AMDGPUCFGStructurizer::getReturnInstr(MachineBasicBlock *MBB) {
    651   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
    652   if (It != MBB->rend()) {
    653     MachineInstr *instr = &(*It);
    654     if (instr->getOpcode() == AMDGPU::RETURN)
    655       return instr;
    656   }
    657   return NULL;
    658 }
    659 
    660 MachineInstr *AMDGPUCFGStructurizer::getContinueInstr(MachineBasicBlock *MBB) {
    661   MachineBasicBlock::reverse_iterator It = MBB->rbegin();
    662   if (It != MBB->rend()) {
    663     MachineInstr *MI = &(*It);
    664     if (MI->getOpcode() == AMDGPU::CONTINUE)
    665       return MI;
    666   }
    667   return NULL;
    668 }
    669 
    670 bool AMDGPUCFGStructurizer::isReturnBlock(MachineBasicBlock *MBB) {
    671   MachineInstr *MI = getReturnInstr(MBB);
    672   bool IsReturn = (MBB->succ_size() == 0);
    673   if (MI)
    674     assert(IsReturn);
    675   else if (IsReturn)
    676     DEBUG(
    677       dbgs() << "BB" << MBB->getNumber()
    678              <<" is return block without RETURN instr\n";);
    679   return  IsReturn;
    680 }
    681 
    682 void AMDGPUCFGStructurizer::cloneSuccessorList(MachineBasicBlock *DstMBB,
    683     MachineBasicBlock *SrcMBB) {
    684   for (MachineBasicBlock::succ_iterator It = SrcMBB->succ_begin(),
    685        iterEnd = SrcMBB->succ_end(); It != iterEnd; ++It)
    686     DstMBB->addSuccessor(*It);  // *iter's predecessor is also taken care of
    687 }
    688 
    689 MachineBasicBlock *AMDGPUCFGStructurizer::clone(MachineBasicBlock *MBB) {
    690   MachineFunction *Func = MBB->getParent();
    691   MachineBasicBlock *NewMBB = Func->CreateMachineBasicBlock();
    692   Func->push_back(NewMBB);  //insert to function
    693   for (MachineBasicBlock::iterator It = MBB->begin(), E = MBB->end();
    694       It != E; ++It) {
    695     MachineInstr *MI = Func->CloneMachineInstr(It);
    696     NewMBB->push_back(MI);
    697   }
    698   return NewMBB;
    699 }
    700 
    701 void AMDGPUCFGStructurizer::replaceInstrUseOfBlockWith(
    702     MachineBasicBlock *SrcMBB, MachineBasicBlock *OldMBB,
    703     MachineBasicBlock *NewBlk) {
    704   MachineInstr *BranchMI = getLoopendBlockBranchInstr(SrcMBB);
    705   if (BranchMI && isCondBranch(BranchMI) &&
    706       getTrueBranch(BranchMI) == OldMBB)
    707     setTrueBranch(BranchMI, NewBlk);
    708 }
    709 
    710 void AMDGPUCFGStructurizer::wrapup(MachineBasicBlock *MBB) {
    711   assert((!MBB->getParent()->getJumpTableInfo()
    712           || MBB->getParent()->getJumpTableInfo()->isEmpty())
    713          && "found a jump table");
    714 
    715    //collect continue right before endloop
    716    SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> ContInstr;
    717    MachineBasicBlock::iterator Pre = MBB->begin();
    718    MachineBasicBlock::iterator E = MBB->end();
    719    MachineBasicBlock::iterator It = Pre;
    720    while (It != E) {
    721      if (Pre->getOpcode() == AMDGPU::CONTINUE
    722          && It->getOpcode() == AMDGPU::ENDLOOP)
    723        ContInstr.push_back(Pre);
    724      Pre = It;
    725      ++It;
    726    }
    727 
    728    //delete continue right before endloop
    729    for (unsigned i = 0; i < ContInstr.size(); ++i)
    730       ContInstr[i]->eraseFromParent();
    731 
    732    // TODO to fix up jump table so later phase won't be confused.  if
    733    // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
    734    // there isn't such an interface yet.  alternatively, replace all the other
    735    // blocks in the jump table with the entryBlk //}
    736 
    737 }
    738 
    739 
    740 bool AMDGPUCFGStructurizer::prepare() {
    741   bool Changed = false;
    742 
    743   //FIXME: if not reducible flow graph, make it so ???
    744 
    745   DEBUG(dbgs() << "AMDGPUCFGStructurizer::prepare\n";);
    746 
    747   orderBlocks(FuncRep);
    748 
    749   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> RetBlks;
    750 
    751   // Add an ExitBlk to loop that don't have one
    752   for (MachineLoopInfo::iterator It = MLI->begin(),
    753        E = MLI->end(); It != E; ++It) {
    754     MachineLoop *LoopRep = (*It);
    755     MBBVector ExitingMBBs;
    756     LoopRep->getExitingBlocks(ExitingMBBs);
    757 
    758     if (ExitingMBBs.size() == 0) {
    759       MachineBasicBlock* DummyExitBlk = normalizeInfiniteLoopExit(LoopRep);
    760       if (DummyExitBlk)
    761         RetBlks.push_back(DummyExitBlk);
    762     }
    763   }
    764 
    765   // Remove unconditional branch instr.
    766   // Add dummy exit block iff there are multiple returns.
    767   for (SmallVectorImpl<MachineBasicBlock *>::const_iterator
    768        It = OrderedBlks.begin(), E = OrderedBlks.end(); It != E; ++It) {
    769     MachineBasicBlock *MBB = *It;
    770     removeUnconditionalBranch(MBB);
    771     removeRedundantConditionalBranch(MBB);
    772     if (isReturnBlock(MBB)) {
    773       RetBlks.push_back(MBB);
    774     }
    775     assert(MBB->succ_size() <= 2);
    776   }
    777 
    778   if (RetBlks.size() >= 2) {
    779     addDummyExitBlock(RetBlks);
    780     Changed = true;
    781   }
    782 
    783   return Changed;
    784 }
    785 
    786 bool AMDGPUCFGStructurizer::run() {
    787 
    788   //Assume reducible CFG...
    789   DEBUG(dbgs() << "AMDGPUCFGStructurizer::run\n";FuncRep->viewCFG(););
    790 
    791 #ifdef STRESSTEST
    792   //Use the worse block ordering to test the algorithm.
    793   ReverseVector(orderedBlks);
    794 #endif
    795 
    796   DEBUG(dbgs() << "Ordered blocks:\n"; printOrderedBlocks(););
    797   int NumIter = 0;
    798   bool Finish = false;
    799   MachineBasicBlock *MBB;
    800   bool MakeProgress = false;
    801   int NumRemainedBlk = countActiveBlock(OrderedBlks.begin(),
    802                                         OrderedBlks.end());
    803 
    804   do {
    805     ++NumIter;
    806     DEBUG(
    807       dbgs() << "numIter = " << NumIter
    808              << ", numRemaintedBlk = " << NumRemainedBlk << "\n";
    809     );
    810 
    811     SmallVectorImpl<MachineBasicBlock *>::const_iterator It =
    812         OrderedBlks.begin();
    813     SmallVectorImpl<MachineBasicBlock *>::const_iterator E =
    814         OrderedBlks.end();
    815 
    816     SmallVectorImpl<MachineBasicBlock *>::const_iterator SccBeginIter =
    817         It;
    818     MachineBasicBlock *SccBeginMBB = NULL;
    819     int SccNumBlk = 0;  // The number of active blocks, init to a
    820                         // maximum possible number.
    821     int SccNumIter;     // Number of iteration in this SCC.
    822 
    823     while (It != E) {
    824       MBB = *It;
    825 
    826       if (!SccBeginMBB) {
    827         SccBeginIter = It;
    828         SccBeginMBB = MBB;
    829         SccNumIter = 0;
    830         SccNumBlk = NumRemainedBlk; // Init to maximum possible number.
    831         DEBUG(
    832               dbgs() << "start processing SCC" << getSCCNum(SccBeginMBB);
    833               dbgs() << "\n";
    834         );
    835       }
    836 
    837       if (!isRetiredBlock(MBB))
    838         patternMatch(MBB);
    839 
    840       ++It;
    841 
    842       bool ContNextScc = true;
    843       if (It == E
    844           || getSCCNum(SccBeginMBB) != getSCCNum(*It)) {
    845         // Just finish one scc.
    846         ++SccNumIter;
    847         int sccRemainedNumBlk = countActiveBlock(SccBeginIter, It);
    848         if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= SccNumBlk) {
    849           DEBUG(
    850             dbgs() << "Can't reduce SCC " << getSCCNum(MBB)
    851                    << ", sccNumIter = " << SccNumIter;
    852             dbgs() << "doesn't make any progress\n";
    853           );
    854           ContNextScc = true;
    855         } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < SccNumBlk) {
    856           SccNumBlk = sccRemainedNumBlk;
    857           It = SccBeginIter;
    858           ContNextScc = false;
    859           DEBUG(
    860             dbgs() << "repeat processing SCC" << getSCCNum(MBB)
    861                    << "sccNumIter = " << SccNumIter << "\n";
    862             FuncRep->viewCFG();
    863           );
    864         } else {
    865           // Finish the current scc.
    866           ContNextScc = true;
    867         }
    868       } else {
    869         // Continue on next component in the current scc.
    870         ContNextScc = false;
    871       }
    872 
    873       if (ContNextScc)
    874         SccBeginMBB = NULL;
    875     } //while, "one iteration" over the function.
    876 
    877     MachineBasicBlock *EntryMBB =
    878         GraphTraits<MachineFunction *>::nodes_begin(FuncRep);
    879     if (EntryMBB->succ_size() == 0) {
    880       Finish = true;
    881       DEBUG(
    882         dbgs() << "Reduce to one block\n";
    883       );
    884     } else {
    885       int NewnumRemainedBlk
    886         = countActiveBlock(OrderedBlks.begin(), OrderedBlks.end());
    887       // consider cloned blocks ??
    888       if (NewnumRemainedBlk == 1 || NewnumRemainedBlk < NumRemainedBlk) {
    889         MakeProgress = true;
    890         NumRemainedBlk = NewnumRemainedBlk;
    891       } else {
    892         MakeProgress = false;
    893         DEBUG(
    894           dbgs() << "No progress\n";
    895         );
    896       }
    897     }
    898   } while (!Finish && MakeProgress);
    899 
    900   // Misc wrap up to maintain the consistency of the Function representation.
    901   wrapup(GraphTraits<MachineFunction *>::nodes_begin(FuncRep));
    902 
    903   // Detach retired Block, release memory.
    904   for (MBBInfoMap::iterator It = BlockInfoMap.begin(), E = BlockInfoMap.end();
    905       It != E; ++It) {
    906     if ((*It).second && (*It).second->IsRetired) {
    907       assert(((*It).first)->getNumber() != -1);
    908       DEBUG(
    909         dbgs() << "Erase BB" << ((*It).first)->getNumber() << "\n";
    910       );
    911       (*It).first->eraseFromParent();  //Remove from the parent Function.
    912     }
    913     delete (*It).second;
    914   }
    915   BlockInfoMap.clear();
    916   LLInfoMap.clear();
    917 
    918   DEBUG(
    919     FuncRep->viewCFG();
    920   );
    921 
    922   if (!Finish)
    923     llvm_unreachable("IRREDUCIBL_CF");
    924 
    925   return true;
    926 }
    927 
    928 
    929 
    930 void AMDGPUCFGStructurizer::orderBlocks(MachineFunction *MF) {
    931   int SccNum = 0;
    932   MachineBasicBlock *MBB;
    933   for (scc_iterator<MachineFunction *> It = scc_begin(MF), E = scc_end(MF);
    934       It != E; ++It, ++SccNum) {
    935     std::vector<MachineBasicBlock *> &SccNext = *It;
    936     for (std::vector<MachineBasicBlock *>::const_iterator
    937          blockIter = SccNext.begin(), blockEnd = SccNext.end();
    938          blockIter != blockEnd; ++blockIter) {
    939       MBB = *blockIter;
    940       OrderedBlks.push_back(MBB);
    941       recordSccnum(MBB, SccNum);
    942     }
    943   }
    944 
    945   //walk through all the block in func to check for unreachable
    946   typedef GraphTraits<MachineFunction *> GTM;
    947   MachineFunction::iterator It = GTM::nodes_begin(MF), E = GTM::nodes_end(MF);
    948   for (; It != E; ++It) {
    949     MachineBasicBlock *MBB = &(*It);
    950     SccNum = getSCCNum(MBB);
    951     if (SccNum == INVALIDSCCNUM)
    952       dbgs() << "unreachable block BB" << MBB->getNumber() << "\n";
    953   }
    954 }
    955 
    956 int AMDGPUCFGStructurizer::patternMatch(MachineBasicBlock *MBB) {
    957   int NumMatch = 0;
    958   int CurMatch;
    959 
    960   DEBUG(
    961         dbgs() << "Begin patternMatch BB" << MBB->getNumber() << "\n";
    962   );
    963 
    964   while ((CurMatch = patternMatchGroup(MBB)) > 0)
    965     NumMatch += CurMatch;
    966 
    967   DEBUG(
    968         dbgs() << "End patternMatch BB" << MBB->getNumber()
    969       << ", numMatch = " << NumMatch << "\n";
    970   );
    971 
    972   return NumMatch;
    973 }
    974 
    975 int AMDGPUCFGStructurizer::patternMatchGroup(MachineBasicBlock *MBB) {
    976   int NumMatch = 0;
    977   NumMatch += loopendPatternMatch();
    978   NumMatch += serialPatternMatch(MBB);
    979   NumMatch += ifPatternMatch(MBB);
    980   return NumMatch;
    981 }
    982 
    983 
    984 int AMDGPUCFGStructurizer::serialPatternMatch(MachineBasicBlock *MBB) {
    985   if (MBB->succ_size() != 1)
    986     return 0;
    987 
    988   MachineBasicBlock *childBlk = *MBB->succ_begin();
    989   if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk))
    990     return 0;
    991 
    992   mergeSerialBlock(MBB, childBlk);
    993   ++numSerialPatternMatch;
    994   return 1;
    995 }
    996 
    997 int AMDGPUCFGStructurizer::ifPatternMatch(MachineBasicBlock *MBB) {
    998   //two edges
    999   if (MBB->succ_size() != 2)
   1000     return 0;
   1001   if (hasBackEdge(MBB))
   1002     return 0;
   1003   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
   1004   if (!BranchMI)
   1005     return 0;
   1006 
   1007   assert(isCondBranch(BranchMI));
   1008 
   1009   MachineBasicBlock *TrueMBB = getTrueBranch(BranchMI);
   1010   serialPatternMatch(TrueMBB);
   1011   ifPatternMatch(TrueMBB);
   1012   MachineBasicBlock *FalseMBB = getFalseBranch(MBB, BranchMI);
   1013   serialPatternMatch(FalseMBB);
   1014   ifPatternMatch(FalseMBB);
   1015   MachineBasicBlock *LandBlk;
   1016   int Cloned = 0;
   1017 
   1018   assert (!TrueMBB->succ_empty() || !FalseMBB->succ_empty());
   1019   // TODO: Simplify
   1020   if (TrueMBB->succ_size() == 1 && FalseMBB->succ_size() == 1
   1021     && *TrueMBB->succ_begin() == *FalseMBB->succ_begin()) {
   1022     // Diamond pattern
   1023     LandBlk = *TrueMBB->succ_begin();
   1024   } else if (TrueMBB->succ_size() == 1 && *TrueMBB->succ_begin() == FalseMBB) {
   1025     // Triangle pattern, false is empty
   1026     LandBlk = FalseMBB;
   1027     FalseMBB = NULL;
   1028   } else if (FalseMBB->succ_size() == 1
   1029              && *FalseMBB->succ_begin() == TrueMBB) {
   1030     // Triangle pattern, true is empty
   1031     // We reverse the predicate to make a triangle, empty false pattern;
   1032     std::swap(TrueMBB, FalseMBB);
   1033     reversePredicateSetter(MBB->end());
   1034     LandBlk = FalseMBB;
   1035     FalseMBB = NULL;
   1036   } else if (FalseMBB->succ_size() == 1
   1037              && isSameloopDetachedContbreak(TrueMBB, FalseMBB)) {
   1038     LandBlk = *FalseMBB->succ_begin();
   1039   } else if (TrueMBB->succ_size() == 1
   1040     && isSameloopDetachedContbreak(FalseMBB, TrueMBB)) {
   1041     LandBlk = *TrueMBB->succ_begin();
   1042   } else {
   1043     return handleJumpintoIf(MBB, TrueMBB, FalseMBB);
   1044   }
   1045 
   1046   // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
   1047   // new BB created for landBlk==NULL may introduce new challenge to the
   1048   // reduction process.
   1049   if (LandBlk &&
   1050       ((TrueMBB && TrueMBB->pred_size() > 1)
   1051       || (FalseMBB && FalseMBB->pred_size() > 1))) {
   1052      Cloned += improveSimpleJumpintoIf(MBB, TrueMBB, FalseMBB, &LandBlk);
   1053   }
   1054 
   1055   if (TrueMBB && TrueMBB->pred_size() > 1) {
   1056     TrueMBB = cloneBlockForPredecessor(TrueMBB, MBB);
   1057     ++Cloned;
   1058   }
   1059 
   1060   if (FalseMBB && FalseMBB->pred_size() > 1) {
   1061     FalseMBB = cloneBlockForPredecessor(FalseMBB, MBB);
   1062     ++Cloned;
   1063   }
   1064 
   1065   mergeIfthenelseBlock(BranchMI, MBB, TrueMBB, FalseMBB, LandBlk);
   1066 
   1067   ++numIfPatternMatch;
   1068 
   1069   numClonedBlock += Cloned;
   1070 
   1071   return 1 + Cloned;
   1072 }
   1073 
   1074 int AMDGPUCFGStructurizer::loopendPatternMatch() {
   1075   std::vector<MachineLoop *> NestedLoops;
   1076   for (MachineLoopInfo::iterator It = MLI->begin(), E = MLI->end();
   1077       It != E; ++It) {
   1078     df_iterator<MachineLoop *> LpIt = df_begin(*It),
   1079         LpE = df_end(*It);
   1080     for (; LpIt != LpE; ++LpIt)
   1081       NestedLoops.push_back(*LpIt);
   1082   }
   1083   if (NestedLoops.size() == 0)
   1084     return 0;
   1085 
   1086   // Process nested loop outside->inside, so "continue" to a outside loop won't
   1087   // be mistaken as "break" of the current loop.
   1088   int Num = 0;
   1089   for (std::vector<MachineLoop *>::reverse_iterator It = NestedLoops.rbegin(),
   1090       E = NestedLoops.rend(); It != E; ++It) {
   1091     MachineLoop *ExaminedLoop = *It;
   1092     if (ExaminedLoop->getNumBlocks() == 0 || Visited[ExaminedLoop])
   1093       continue;
   1094     DEBUG(dbgs() << "Processing:\n"; ExaminedLoop->dump(););
   1095     int NumBreak = mergeLoop(ExaminedLoop);
   1096     if (NumBreak == -1)
   1097       break;
   1098     Num += NumBreak;
   1099   }
   1100   return Num;
   1101 }
   1102 
   1103 int AMDGPUCFGStructurizer::mergeLoop(MachineLoop *LoopRep) {
   1104   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
   1105   MBBVector ExitingMBBs;
   1106   LoopRep->getExitingBlocks(ExitingMBBs);
   1107   assert(!ExitingMBBs.empty() && "Infinite Loop not supported");
   1108   DEBUG(dbgs() << "Loop has " << ExitingMBBs.size() << " exiting blocks\n";);
   1109   // We assume a single ExitBlk
   1110   MBBVector ExitBlks;
   1111   LoopRep->getExitBlocks(ExitBlks);
   1112   SmallPtrSet<MachineBasicBlock *, 2> ExitBlkSet;
   1113   for (unsigned i = 0, e = ExitBlks.size(); i < e; ++i)
   1114     ExitBlkSet.insert(ExitBlks[i]);
   1115   assert(ExitBlkSet.size() == 1);
   1116   MachineBasicBlock *ExitBlk = *ExitBlks.begin();
   1117   assert(ExitBlk && "Loop has several exit block");
   1118   MBBVector LatchBlks;
   1119   typedef GraphTraits<Inverse<MachineBasicBlock*> > InvMBBTraits;
   1120   InvMBBTraits::ChildIteratorType PI = InvMBBTraits::child_begin(LoopHeader),
   1121       PE = InvMBBTraits::child_end(LoopHeader);
   1122   for (; PI != PE; PI++) {
   1123     if (LoopRep->contains(*PI))
   1124       LatchBlks.push_back(*PI);
   1125   }
   1126 
   1127   for (unsigned i = 0, e = ExitingMBBs.size(); i < e; ++i)
   1128     mergeLoopbreakBlock(ExitingMBBs[i], ExitBlk);
   1129   for (unsigned i = 0, e = LatchBlks.size(); i < e; ++i)
   1130     settleLoopcontBlock(LatchBlks[i], LoopHeader);
   1131   int Match = 0;
   1132   do {
   1133     Match = 0;
   1134     Match += serialPatternMatch(LoopHeader);
   1135     Match += ifPatternMatch(LoopHeader);
   1136   } while (Match > 0);
   1137   mergeLooplandBlock(LoopHeader, ExitBlk);
   1138   MachineLoop *ParentLoop = LoopRep->getParentLoop();
   1139   if (ParentLoop)
   1140     MLI->changeLoopFor(LoopHeader, ParentLoop);
   1141   else
   1142     MLI->removeBlock(LoopHeader);
   1143   Visited[LoopRep] = true;
   1144   return 1;
   1145 }
   1146 
   1147 int AMDGPUCFGStructurizer::loopcontPatternMatch(MachineLoop *LoopRep,
   1148     MachineBasicBlock *LoopHeader) {
   1149   int NumCont = 0;
   1150   SmallVector<MachineBasicBlock *, DEFAULT_VEC_SLOTS> ContMBB;
   1151   typedef GraphTraits<Inverse<MachineBasicBlock *> > GTIM;
   1152   GTIM::ChildIteratorType It = GTIM::child_begin(LoopHeader),
   1153       E = GTIM::child_end(LoopHeader);
   1154   for (; It != E; ++It) {
   1155     MachineBasicBlock *MBB = *It;
   1156     if (LoopRep->contains(MBB)) {
   1157       handleLoopcontBlock(MBB, MLI->getLoopFor(MBB),
   1158                           LoopHeader, LoopRep);
   1159       ContMBB.push_back(MBB);
   1160       ++NumCont;
   1161     }
   1162   }
   1163 
   1164   for (SmallVectorImpl<MachineBasicBlock *>::iterator It = ContMBB.begin(),
   1165       E = ContMBB.end(); It != E; ++It) {
   1166     (*It)->removeSuccessor(LoopHeader);
   1167   }
   1168 
   1169   numLoopcontPatternMatch += NumCont;
   1170 
   1171   return NumCont;
   1172 }
   1173 
   1174 
   1175 bool AMDGPUCFGStructurizer::isSameloopDetachedContbreak(
   1176     MachineBasicBlock *Src1MBB, MachineBasicBlock *Src2MBB) {
   1177   if (Src1MBB->succ_size() == 0) {
   1178     MachineLoop *LoopRep = MLI->getLoopFor(Src1MBB);
   1179     if (LoopRep&& LoopRep == MLI->getLoopFor(Src2MBB)) {
   1180       MachineBasicBlock *&TheEntry = LLInfoMap[LoopRep];
   1181       if (TheEntry) {
   1182         DEBUG(
   1183           dbgs() << "isLoopContBreakBlock yes src1 = BB"
   1184                  << Src1MBB->getNumber()
   1185                  << " src2 = BB" << Src2MBB->getNumber() << "\n";
   1186         );
   1187         return true;
   1188       }
   1189     }
   1190   }
   1191   return false;
   1192 }
   1193 
   1194 int AMDGPUCFGStructurizer::handleJumpintoIf(MachineBasicBlock *HeadMBB,
   1195     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
   1196   int Num = handleJumpintoIfImp(HeadMBB, TrueMBB, FalseMBB);
   1197   if (Num == 0) {
   1198     DEBUG(
   1199       dbgs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
   1200     );
   1201     Num = handleJumpintoIfImp(HeadMBB, FalseMBB, TrueMBB);
   1202   }
   1203   return Num;
   1204 }
   1205 
   1206 int AMDGPUCFGStructurizer::handleJumpintoIfImp(MachineBasicBlock *HeadMBB,
   1207     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB) {
   1208   int Num = 0;
   1209   MachineBasicBlock *DownBlk;
   1210 
   1211   //trueBlk could be the common post dominator
   1212   DownBlk = TrueMBB;
   1213 
   1214   DEBUG(
   1215     dbgs() << "handleJumpintoIfImp head = BB" << HeadMBB->getNumber()
   1216            << " true = BB" << TrueMBB->getNumber()
   1217            << ", numSucc=" << TrueMBB->succ_size()
   1218            << " false = BB" << FalseMBB->getNumber() << "\n";
   1219   );
   1220 
   1221   while (DownBlk) {
   1222     DEBUG(
   1223       dbgs() << "check down = BB" << DownBlk->getNumber();
   1224     );
   1225 
   1226     if (singlePathTo(FalseMBB, DownBlk) == SinglePath_InPath) {
   1227       DEBUG(
   1228         dbgs() << " working\n";
   1229       );
   1230 
   1231       Num += cloneOnSideEntryTo(HeadMBB, TrueMBB, DownBlk);
   1232       Num += cloneOnSideEntryTo(HeadMBB, FalseMBB, DownBlk);
   1233 
   1234       numClonedBlock += Num;
   1235       Num += serialPatternMatch(*HeadMBB->succ_begin());
   1236       Num += serialPatternMatch(*(++HeadMBB->succ_begin()));
   1237       Num += ifPatternMatch(HeadMBB);
   1238       assert(Num > 0);
   1239 
   1240       break;
   1241     }
   1242     DEBUG(
   1243       dbgs() << " not working\n";
   1244     );
   1245     DownBlk = (DownBlk->succ_size() == 1) ? (*DownBlk->succ_begin()) : NULL;
   1246   } // walk down the postDomTree
   1247 
   1248   return Num;
   1249 }
   1250 
   1251 void AMDGPUCFGStructurizer::showImproveSimpleJumpintoIf(
   1252     MachineBasicBlock *HeadMBB, MachineBasicBlock *TrueMBB,
   1253     MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB, bool Detail) {
   1254   dbgs() << "head = BB" << HeadMBB->getNumber()
   1255          << " size = " << HeadMBB->size();
   1256   if (Detail) {
   1257     dbgs() << "\n";
   1258     HeadMBB->print(dbgs());
   1259     dbgs() << "\n";
   1260   }
   1261 
   1262   if (TrueMBB) {
   1263     dbgs() << ", true = BB" << TrueMBB->getNumber() << " size = "
   1264            << TrueMBB->size() << " numPred = " << TrueMBB->pred_size();
   1265     if (Detail) {
   1266       dbgs() << "\n";
   1267       TrueMBB->print(dbgs());
   1268       dbgs() << "\n";
   1269     }
   1270   }
   1271   if (FalseMBB) {
   1272     dbgs() << ", false = BB" << FalseMBB->getNumber() << " size = "
   1273            << FalseMBB->size() << " numPred = " << FalseMBB->pred_size();
   1274     if (Detail) {
   1275       dbgs() << "\n";
   1276       FalseMBB->print(dbgs());
   1277       dbgs() << "\n";
   1278     }
   1279   }
   1280   if (LandMBB) {
   1281     dbgs() << ", land = BB" << LandMBB->getNumber() << " size = "
   1282            << LandMBB->size() << " numPred = " << LandMBB->pred_size();
   1283     if (Detail) {
   1284       dbgs() << "\n";
   1285       LandMBB->print(dbgs());
   1286       dbgs() << "\n";
   1287     }
   1288   }
   1289 
   1290     dbgs() << "\n";
   1291 }
   1292 
   1293 int AMDGPUCFGStructurizer::improveSimpleJumpintoIf(MachineBasicBlock *HeadMBB,
   1294     MachineBasicBlock *TrueMBB, MachineBasicBlock *FalseMBB,
   1295     MachineBasicBlock **LandMBBPtr) {
   1296   bool MigrateTrue = false;
   1297   bool MigrateFalse = false;
   1298 
   1299   MachineBasicBlock *LandBlk = *LandMBBPtr;
   1300 
   1301   assert((!TrueMBB || TrueMBB->succ_size() <= 1)
   1302          && (!FalseMBB || FalseMBB->succ_size() <= 1));
   1303 
   1304   if (TrueMBB == FalseMBB)
   1305     return 0;
   1306 
   1307   MigrateTrue = needMigrateBlock(TrueMBB);
   1308   MigrateFalse = needMigrateBlock(FalseMBB);
   1309 
   1310   if (!MigrateTrue && !MigrateFalse)
   1311     return 0;
   1312 
   1313   // If we need to migrate either trueBlk and falseBlk, migrate the rest that
   1314   // have more than one predecessors.  without doing this, its predecessor
   1315   // rather than headBlk will have undefined value in initReg.
   1316   if (!MigrateTrue && TrueMBB && TrueMBB->pred_size() > 1)
   1317     MigrateTrue = true;
   1318   if (!MigrateFalse && FalseMBB && FalseMBB->pred_size() > 1)
   1319     MigrateFalse = true;
   1320 
   1321   DEBUG(
   1322     dbgs() << "before improveSimpleJumpintoIf: ";
   1323     showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
   1324   );
   1325 
   1326   // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
   1327   //
   1328   // new: headBlk => if () {initReg = 1; org trueBlk branch} else
   1329   //      {initReg = 0; org falseBlk branch }
   1330   //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
   1331   //      => org landBlk
   1332   //      if landBlk->pred_size() > 2, put the about if-else inside
   1333   //      if (initReg !=2) {...}
   1334   //
   1335   // add initReg = initVal to headBlk
   1336 
   1337   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1338   unsigned InitReg =
   1339     HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
   1340   if (!MigrateTrue || !MigrateFalse)
   1341     llvm_unreachable("Extra register needed to handle CFG");
   1342 
   1343   int NumNewBlk = 0;
   1344 
   1345   if (!LandBlk) {
   1346     LandBlk = HeadMBB->getParent()->CreateMachineBasicBlock();
   1347     HeadMBB->getParent()->push_back(LandBlk);  //insert to function
   1348 
   1349     if (TrueMBB) {
   1350       TrueMBB->addSuccessor(LandBlk);
   1351     } else {
   1352       HeadMBB->addSuccessor(LandBlk);
   1353     }
   1354 
   1355     if (FalseMBB) {
   1356       FalseMBB->addSuccessor(LandBlk);
   1357     } else {
   1358       HeadMBB->addSuccessor(LandBlk);
   1359     }
   1360 
   1361     NumNewBlk ++;
   1362   }
   1363 
   1364   bool LandBlkHasOtherPred = (LandBlk->pred_size() > 2);
   1365 
   1366   //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
   1367   MachineBasicBlock::iterator I = insertInstrBefore(LandBlk, AMDGPU::ENDIF);
   1368 
   1369   if (LandBlkHasOtherPred) {
   1370     llvm_unreachable("Extra register needed to handle CFG");
   1371     unsigned CmpResReg =
   1372       HeadMBB->getParent()->getRegInfo().createVirtualRegister(I32RC);
   1373     llvm_unreachable("Extra compare instruction needed to handle CFG");
   1374     insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET,
   1375         CmpResReg, DebugLoc());
   1376   }
   1377 
   1378   insertCondBranchBefore(LandBlk, I, AMDGPU::IF_PREDICATE_SET, InitReg,
   1379       DebugLoc());
   1380 
   1381   if (MigrateTrue) {
   1382     migrateInstruction(TrueMBB, LandBlk, I);
   1383     // need to uncondionally insert the assignment to ensure a path from its
   1384     // predecessor rather than headBlk has valid value in initReg if
   1385     // (initVal != 1).
   1386     llvm_unreachable("Extra register needed to handle CFG");
   1387   }
   1388   insertInstrBefore(I, AMDGPU::ELSE);
   1389 
   1390   if (MigrateFalse) {
   1391     migrateInstruction(FalseMBB, LandBlk, I);
   1392     // need to uncondionally insert the assignment to ensure a path from its
   1393     // predecessor rather than headBlk has valid value in initReg if
   1394     // (initVal != 0)
   1395     llvm_unreachable("Extra register needed to handle CFG");
   1396   }
   1397 
   1398   if (LandBlkHasOtherPred) {
   1399     // add endif
   1400     insertInstrBefore(I, AMDGPU::ENDIF);
   1401 
   1402     // put initReg = 2 to other predecessors of landBlk
   1403     for (MachineBasicBlock::pred_iterator PI = LandBlk->pred_begin(),
   1404          PE = LandBlk->pred_end(); PI != PE; ++PI) {
   1405       MachineBasicBlock *MBB = *PI;
   1406       if (MBB != TrueMBB && MBB != FalseMBB)
   1407         llvm_unreachable("Extra register needed to handle CFG");
   1408     }
   1409   }
   1410   DEBUG(
   1411     dbgs() << "result from improveSimpleJumpintoIf: ";
   1412     showImproveSimpleJumpintoIf(HeadMBB, TrueMBB, FalseMBB, LandBlk, 0);
   1413   );
   1414 
   1415   // update landBlk
   1416   *LandMBBPtr = LandBlk;
   1417 
   1418   return NumNewBlk;
   1419 }
   1420 
   1421 void AMDGPUCFGStructurizer::handleLoopcontBlock(MachineBasicBlock *ContingMBB,
   1422     MachineLoop *ContingLoop, MachineBasicBlock *ContMBB,
   1423     MachineLoop *ContLoop) {
   1424   DEBUG(dbgs() << "loopcontPattern cont = BB" << ContingMBB->getNumber()
   1425                << " header = BB" << ContMBB->getNumber() << "\n";
   1426         dbgs() << "Trying to continue loop-depth = "
   1427                << getLoopDepth(ContLoop)
   1428                << " from loop-depth = " << getLoopDepth(ContingLoop) << "\n";);
   1429   settleLoopcontBlock(ContingMBB, ContMBB);
   1430 }
   1431 
   1432 void AMDGPUCFGStructurizer::mergeSerialBlock(MachineBasicBlock *DstMBB,
   1433     MachineBasicBlock *SrcMBB) {
   1434   DEBUG(
   1435     dbgs() << "serialPattern BB" << DstMBB->getNumber()
   1436            << " <= BB" << SrcMBB->getNumber() << "\n";
   1437   );
   1438   DstMBB->splice(DstMBB->end(), SrcMBB, SrcMBB->begin(), SrcMBB->end());
   1439 
   1440   DstMBB->removeSuccessor(SrcMBB);
   1441   cloneSuccessorList(DstMBB, SrcMBB);
   1442 
   1443   removeSuccessor(SrcMBB);
   1444   MLI->removeBlock(SrcMBB);
   1445   retireBlock(SrcMBB);
   1446 }
   1447 
   1448 void AMDGPUCFGStructurizer::mergeIfthenelseBlock(MachineInstr *BranchMI,
   1449     MachineBasicBlock *MBB, MachineBasicBlock *TrueMBB,
   1450     MachineBasicBlock *FalseMBB, MachineBasicBlock *LandMBB) {
   1451   assert (TrueMBB);
   1452   DEBUG(
   1453     dbgs() << "ifPattern BB" << MBB->getNumber();
   1454     dbgs() << "{  ";
   1455     if (TrueMBB) {
   1456       dbgs() << "BB" << TrueMBB->getNumber();
   1457     }
   1458     dbgs() << "  } else ";
   1459     dbgs() << "{  ";
   1460     if (FalseMBB) {
   1461       dbgs() << "BB" << FalseMBB->getNumber();
   1462     }
   1463     dbgs() << "  }\n ";
   1464     dbgs() << "landBlock: ";
   1465     if (!LandMBB) {
   1466       dbgs() << "NULL";
   1467     } else {
   1468       dbgs() << "BB" << LandMBB->getNumber();
   1469     }
   1470     dbgs() << "\n";
   1471   );
   1472 
   1473   int OldOpcode = BranchMI->getOpcode();
   1474   DebugLoc BranchDL = BranchMI->getDebugLoc();
   1475 
   1476 //    transform to
   1477 //    if cond
   1478 //       trueBlk
   1479 //    else
   1480 //       falseBlk
   1481 //    endif
   1482 //    landBlk
   1483 
   1484   MachineBasicBlock::iterator I = BranchMI;
   1485   insertCondBranchBefore(I, getBranchNzeroOpcode(OldOpcode),
   1486       BranchDL);
   1487 
   1488   if (TrueMBB) {
   1489     MBB->splice(I, TrueMBB, TrueMBB->begin(), TrueMBB->end());
   1490     MBB->removeSuccessor(TrueMBB);
   1491     if (LandMBB && TrueMBB->succ_size()!=0)
   1492       TrueMBB->removeSuccessor(LandMBB);
   1493     retireBlock(TrueMBB);
   1494     MLI->removeBlock(TrueMBB);
   1495   }
   1496 
   1497   if (FalseMBB) {
   1498     insertInstrBefore(I, AMDGPU::ELSE);
   1499     MBB->splice(I, FalseMBB, FalseMBB->begin(),
   1500                    FalseMBB->end());
   1501     MBB->removeSuccessor(FalseMBB);
   1502     if (LandMBB && FalseMBB->succ_size() != 0)
   1503       FalseMBB->removeSuccessor(LandMBB);
   1504     retireBlock(FalseMBB);
   1505     MLI->removeBlock(FalseMBB);
   1506   }
   1507   insertInstrBefore(I, AMDGPU::ENDIF);
   1508 
   1509   BranchMI->eraseFromParent();
   1510 
   1511   if (LandMBB && TrueMBB && FalseMBB)
   1512     MBB->addSuccessor(LandMBB);
   1513 
   1514 }
   1515 
   1516 void AMDGPUCFGStructurizer::mergeLooplandBlock(MachineBasicBlock *DstBlk,
   1517     MachineBasicBlock *LandMBB) {
   1518   DEBUG(dbgs() << "loopPattern header = BB" << DstBlk->getNumber()
   1519                << " land = BB" << LandMBB->getNumber() << "\n";);
   1520 
   1521   insertInstrBefore(DstBlk, AMDGPU::WHILELOOP, DebugLoc());
   1522   insertInstrEnd(DstBlk, AMDGPU::ENDLOOP, DebugLoc());
   1523   DstBlk->addSuccessor(LandMBB);
   1524   DstBlk->removeSuccessor(DstBlk);
   1525 }
   1526 
   1527 
   1528 void AMDGPUCFGStructurizer::mergeLoopbreakBlock(MachineBasicBlock *ExitingMBB,
   1529     MachineBasicBlock *LandMBB) {
   1530   DEBUG(dbgs() << "loopbreakPattern exiting = BB" << ExitingMBB->getNumber()
   1531                << " land = BB" << LandMBB->getNumber() << "\n";);
   1532   MachineInstr *BranchMI = getLoopendBlockBranchInstr(ExitingMBB);
   1533   assert(BranchMI && isCondBranch(BranchMI));
   1534   DebugLoc DL = BranchMI->getDebugLoc();
   1535   MachineBasicBlock *TrueBranch = getTrueBranch(BranchMI);
   1536   MachineBasicBlock::iterator I = BranchMI;
   1537   if (TrueBranch != LandMBB)
   1538     reversePredicateSetter(I);
   1539   insertCondBranchBefore(ExitingMBB, I, AMDGPU::IF_PREDICATE_SET, AMDGPU::PREDICATE_BIT, DL);
   1540   insertInstrBefore(I, AMDGPU::BREAK);
   1541   insertInstrBefore(I, AMDGPU::ENDIF);
   1542   //now branchInst can be erase safely
   1543   BranchMI->eraseFromParent();
   1544   //now take care of successors, retire blocks
   1545   ExitingMBB->removeSuccessor(LandMBB);
   1546 }
   1547 
   1548 void AMDGPUCFGStructurizer::settleLoopcontBlock(MachineBasicBlock *ContingMBB,
   1549     MachineBasicBlock *ContMBB) {
   1550   DEBUG(dbgs() << "settleLoopcontBlock conting = BB"
   1551                << ContingMBB->getNumber()
   1552                << ", cont = BB" << ContMBB->getNumber() << "\n";);
   1553 
   1554   MachineInstr *MI = getLoopendBlockBranchInstr(ContingMBB);
   1555   if (MI) {
   1556     assert(isCondBranch(MI));
   1557     MachineBasicBlock::iterator I = MI;
   1558     MachineBasicBlock *TrueBranch = getTrueBranch(MI);
   1559     int OldOpcode = MI->getOpcode();
   1560     DebugLoc DL = MI->getDebugLoc();
   1561 
   1562     bool UseContinueLogical = ((&*ContingMBB->rbegin()) == MI);
   1563 
   1564     if (UseContinueLogical == false) {
   1565       int BranchOpcode =
   1566           TrueBranch == ContMBB ? getBranchNzeroOpcode(OldOpcode) :
   1567           getBranchZeroOpcode(OldOpcode);
   1568       insertCondBranchBefore(I, BranchOpcode, DL);
   1569       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1570       insertInstrEnd(ContingMBB, AMDGPU::CONTINUE, DL);
   1571       insertInstrEnd(ContingMBB, AMDGPU::ENDIF, DL);
   1572     } else {
   1573       int BranchOpcode =
   1574           TrueBranch == ContMBB ? getContinueNzeroOpcode(OldOpcode) :
   1575           getContinueZeroOpcode(OldOpcode);
   1576       insertCondBranchBefore(I, BranchOpcode, DL);
   1577     }
   1578 
   1579     MI->eraseFromParent();
   1580   } else {
   1581     // if we've arrived here then we've already erased the branch instruction
   1582     // travel back up the basic block to see the last reference of our debug
   1583     // location we've just inserted that reference here so it should be
   1584     // representative insertEnd to ensure phi-moves, if exist, go before the
   1585     // continue-instr.
   1586     insertInstrEnd(ContingMBB, AMDGPU::CONTINUE,
   1587         getLastDebugLocInBB(ContingMBB));
   1588   }
   1589 }
   1590 
   1591 int AMDGPUCFGStructurizer::cloneOnSideEntryTo(MachineBasicBlock *PreMBB,
   1592     MachineBasicBlock *SrcMBB, MachineBasicBlock *DstMBB) {
   1593   int Cloned = 0;
   1594   assert(PreMBB->isSuccessor(SrcMBB));
   1595   while (SrcMBB && SrcMBB != DstMBB) {
   1596     assert(SrcMBB->succ_size() == 1);
   1597     if (SrcMBB->pred_size() > 1) {
   1598       SrcMBB = cloneBlockForPredecessor(SrcMBB, PreMBB);
   1599       ++Cloned;
   1600     }
   1601 
   1602     PreMBB = SrcMBB;
   1603     SrcMBB = *SrcMBB->succ_begin();
   1604   }
   1605 
   1606   return Cloned;
   1607 }
   1608 
   1609 MachineBasicBlock *
   1610 AMDGPUCFGStructurizer::cloneBlockForPredecessor(MachineBasicBlock *MBB,
   1611     MachineBasicBlock *PredMBB) {
   1612   assert(PredMBB->isSuccessor(MBB) &&
   1613          "succBlk is not a prececessor of curBlk");
   1614 
   1615   MachineBasicBlock *CloneMBB = clone(MBB);  //clone instructions
   1616   replaceInstrUseOfBlockWith(PredMBB, MBB, CloneMBB);
   1617   //srcBlk, oldBlk, newBlk
   1618 
   1619   PredMBB->removeSuccessor(MBB);
   1620   PredMBB->addSuccessor(CloneMBB);
   1621 
   1622   // add all successor to cloneBlk
   1623   cloneSuccessorList(CloneMBB, MBB);
   1624 
   1625   numClonedInstr += MBB->size();
   1626 
   1627   DEBUG(
   1628     dbgs() << "Cloned block: " << "BB"
   1629            << MBB->getNumber() << "size " << MBB->size() << "\n";
   1630   );
   1631 
   1632   SHOWNEWBLK(CloneMBB, "result of Cloned block: ");
   1633 
   1634   return CloneMBB;
   1635 }
   1636 
   1637 void AMDGPUCFGStructurizer::migrateInstruction(MachineBasicBlock *SrcMBB,
   1638     MachineBasicBlock *DstMBB, MachineBasicBlock::iterator I) {
   1639   MachineBasicBlock::iterator SpliceEnd;
   1640   //look for the input branchinstr, not the AMDGPU branchinstr
   1641   MachineInstr *BranchMI = getNormalBlockBranchInstr(SrcMBB);
   1642   if (!BranchMI) {
   1643     DEBUG(
   1644       dbgs() << "migrateInstruction don't see branch instr\n" ;
   1645     );
   1646     SpliceEnd = SrcMBB->end();
   1647   } else {
   1648     DEBUG(
   1649       dbgs() << "migrateInstruction see branch instr\n" ;
   1650       BranchMI->dump();
   1651     );
   1652     SpliceEnd = BranchMI;
   1653   }
   1654   DEBUG(
   1655     dbgs() << "migrateInstruction before splice dstSize = " << DstMBB->size()
   1656       << "srcSize = " << SrcMBB->size() << "\n";
   1657   );
   1658 
   1659   //splice insert before insertPos
   1660   DstMBB->splice(I, SrcMBB, SrcMBB->begin(), SpliceEnd);
   1661 
   1662   DEBUG(
   1663     dbgs() << "migrateInstruction after splice dstSize = " << DstMBB->size()
   1664       << "srcSize = " << SrcMBB->size() << "\n";
   1665   );
   1666 }
   1667 
   1668 MachineBasicBlock *
   1669 AMDGPUCFGStructurizer::normalizeInfiniteLoopExit(MachineLoop* LoopRep) {
   1670   MachineBasicBlock *LoopHeader = LoopRep->getHeader();
   1671   MachineBasicBlock *LoopLatch = LoopRep->getLoopLatch();
   1672   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1673 
   1674   if (!LoopHeader || !LoopLatch)
   1675     return NULL;
   1676   MachineInstr *BranchMI = getLoopendBlockBranchInstr(LoopLatch);
   1677   // Is LoopRep an infinite loop ?
   1678   if (!BranchMI || !isUncondBranch(BranchMI))
   1679     return NULL;
   1680 
   1681   MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
   1682   FuncRep->push_back(DummyExitBlk);  //insert to function
   1683   SHOWNEWBLK(DummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
   1684   DEBUG(dbgs() << "Old branch instr: " << *BranchMI << "\n";);
   1685   MachineBasicBlock::iterator I = BranchMI;
   1686   unsigned ImmReg = FuncRep->getRegInfo().createVirtualRegister(I32RC);
   1687   llvm_unreachable("Extra register needed to handle CFG");
   1688   MachineInstr *NewMI = insertInstrBefore(I, AMDGPU::BRANCH_COND_i32);
   1689   MachineInstrBuilder MIB(*FuncRep, NewMI);
   1690   MIB.addMBB(LoopHeader);
   1691   MIB.addReg(ImmReg, false);
   1692   SHOWNEWINSTR(NewMI);
   1693   BranchMI->eraseFromParent();
   1694   LoopLatch->addSuccessor(DummyExitBlk);
   1695 
   1696   return DummyExitBlk;
   1697 }
   1698 
   1699 void AMDGPUCFGStructurizer::removeUnconditionalBranch(MachineBasicBlock *MBB) {
   1700   MachineInstr *BranchMI;
   1701 
   1702   // I saw two unconditional branch in one basic block in example
   1703   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
   1704   while ((BranchMI = getLoopendBlockBranchInstr(MBB))
   1705           && isUncondBranch(BranchMI)) {
   1706     DEBUG(dbgs() << "Removing uncond branch instr"; BranchMI->dump(););
   1707     BranchMI->eraseFromParent();
   1708   }
   1709 }
   1710 
   1711 void AMDGPUCFGStructurizer::removeRedundantConditionalBranch(
   1712     MachineBasicBlock *MBB) {
   1713   if (MBB->succ_size() != 2)
   1714     return;
   1715   MachineBasicBlock *MBB1 = *MBB->succ_begin();
   1716   MachineBasicBlock *MBB2 = *(++MBB->succ_begin());
   1717   if (MBB1 != MBB2)
   1718     return;
   1719 
   1720   MachineInstr *BranchMI = getNormalBlockBranchInstr(MBB);
   1721   assert(BranchMI && isCondBranch(BranchMI));
   1722   DEBUG(dbgs() << "Removing unneeded cond branch instr"; BranchMI->dump(););
   1723   BranchMI->eraseFromParent();
   1724   SHOWNEWBLK(MBB1, "Removing redundant successor");
   1725   MBB->removeSuccessor(MBB1);
   1726 }
   1727 
   1728 void AMDGPUCFGStructurizer::addDummyExitBlock(
   1729     SmallVectorImpl<MachineBasicBlock*> &RetMBB) {
   1730   MachineBasicBlock *DummyExitBlk = FuncRep->CreateMachineBasicBlock();
   1731   FuncRep->push_back(DummyExitBlk);  //insert to function
   1732   insertInstrEnd(DummyExitBlk, AMDGPU::RETURN);
   1733 
   1734   for (SmallVectorImpl<MachineBasicBlock *>::iterator It = RetMBB.begin(),
   1735        E = RetMBB.end(); It != E; ++It) {
   1736     MachineBasicBlock *MBB = *It;
   1737     MachineInstr *MI = getReturnInstr(MBB);
   1738     if (MI)
   1739       MI->eraseFromParent();
   1740     MBB->addSuccessor(DummyExitBlk);
   1741     DEBUG(
   1742       dbgs() << "Add dummyExitBlock to BB" << MBB->getNumber()
   1743              << " successors\n";
   1744     );
   1745   }
   1746   SHOWNEWBLK(DummyExitBlk, "DummyExitBlock: ");
   1747 }
   1748 
   1749 void AMDGPUCFGStructurizer::removeSuccessor(MachineBasicBlock *MBB) {
   1750   while (MBB->succ_size())
   1751     MBB->removeSuccessor(*MBB->succ_begin());
   1752 }
   1753 
   1754 void AMDGPUCFGStructurizer::recordSccnum(MachineBasicBlock *MBB,
   1755     int SccNum) {
   1756   BlockInformation *&srcBlkInfo = BlockInfoMap[MBB];
   1757   if (!srcBlkInfo)
   1758     srcBlkInfo = new BlockInformation();
   1759   srcBlkInfo->SccNum = SccNum;
   1760 }
   1761 
   1762 void AMDGPUCFGStructurizer::retireBlock(MachineBasicBlock *MBB) {
   1763   DEBUG(
   1764         dbgs() << "Retiring BB" << MBB->getNumber() << "\n";
   1765   );
   1766 
   1767   BlockInformation *&SrcBlkInfo = BlockInfoMap[MBB];
   1768 
   1769   if (!SrcBlkInfo)
   1770     SrcBlkInfo = new BlockInformation();
   1771 
   1772   SrcBlkInfo->IsRetired = true;
   1773   assert(MBB->succ_size() == 0 && MBB->pred_size() == 0
   1774          && "can't retire block yet");
   1775 }
   1776 
   1777 void AMDGPUCFGStructurizer::setLoopLandBlock(MachineLoop *loopRep,
   1778     MachineBasicBlock *MBB) {
   1779   MachineBasicBlock *&TheEntry = LLInfoMap[loopRep];
   1780   if (!MBB) {
   1781     MBB = FuncRep->CreateMachineBasicBlock();
   1782     FuncRep->push_back(MBB);  //insert to function
   1783     SHOWNEWBLK(MBB, "DummyLandingBlock for loop without break: ");
   1784   }
   1785   TheEntry = MBB;
   1786   DEBUG(
   1787     dbgs() << "setLoopLandBlock loop-header = BB"
   1788            << loopRep->getHeader()->getNumber()
   1789            << "  landing-block = BB" << MBB->getNumber() << "\n";
   1790   );
   1791 }
   1792 
   1793 MachineBasicBlock *
   1794 AMDGPUCFGStructurizer::findNearestCommonPostDom(MachineBasicBlock *MBB1,
   1795     MachineBasicBlock *MBB2) {
   1796 
   1797   if (PDT->dominates(MBB1, MBB2))
   1798     return MBB1;
   1799   if (PDT->dominates(MBB2, MBB1))
   1800     return MBB2;
   1801 
   1802   MachineDomTreeNode *Node1 = PDT->getNode(MBB1);
   1803   MachineDomTreeNode *Node2 = PDT->getNode(MBB2);
   1804 
   1805   // Handle newly cloned node.
   1806   if (!Node1 && MBB1->succ_size() == 1)
   1807     return findNearestCommonPostDom(*MBB1->succ_begin(), MBB2);
   1808   if (!Node2 && MBB2->succ_size() == 1)
   1809     return findNearestCommonPostDom(MBB1, *MBB2->succ_begin());
   1810 
   1811   if (!Node1 || !Node2)
   1812     return NULL;
   1813 
   1814   Node1 = Node1->getIDom();
   1815   while (Node1) {
   1816     if (PDT->dominates(Node1, Node2))
   1817       return Node1->getBlock();
   1818     Node1 = Node1->getIDom();
   1819   }
   1820 
   1821   return NULL;
   1822 }
   1823 
   1824 MachineBasicBlock *
   1825 AMDGPUCFGStructurizer::findNearestCommonPostDom(
   1826     std::set<MachineBasicBlock *> &MBBs) {
   1827   MachineBasicBlock *CommonDom;
   1828   std::set<MachineBasicBlock *>::const_iterator It = MBBs.begin();
   1829   std::set<MachineBasicBlock *>::const_iterator E = MBBs.end();
   1830   for (CommonDom = *It; It != E && CommonDom; ++It) {
   1831     MachineBasicBlock *MBB = *It;
   1832     if (MBB != CommonDom)
   1833       CommonDom = findNearestCommonPostDom(MBB, CommonDom);
   1834   }
   1835 
   1836   DEBUG(
   1837     dbgs() << "Common post dominator for exit blocks is ";
   1838     if (CommonDom)
   1839           dbgs() << "BB" << CommonDom->getNumber() << "\n";
   1840     else
   1841       dbgs() << "NULL\n";
   1842   );
   1843 
   1844   return CommonDom;
   1845 }
   1846 
   1847 char AMDGPUCFGStructurizer::ID = 0;
   1848 
   1849 } // end anonymous namespace
   1850 
   1851 
   1852 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm) {
   1853   return new AMDGPUCFGStructurizer(tm);
   1854 }
   1855