Home | History | Annotate | Download | only in radeon
      1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //==-----------------------------------------------------------------------===//
      9 
     10 #define DEBUGME 0
     11 #define DEBUG_TYPE "structcfg"
     12 
     13 #include "AMDGPUInstrInfo.h"
     14 #include "AMDIL.h"
     15 #include "AMDILUtilityFunctions.h"
     16 #include "llvm/ADT/SCCIterator.h"
     17 #include "llvm/ADT/SmallVector.h"
     18 #include "llvm/ADT/Statistic.h"
     19 #include "llvm/Analysis/DominatorInternals.h"
     20 #include "llvm/Analysis/Dominators.h"
     21 #include "llvm/CodeGen/MachineDominators.h"
     22 #include "llvm/CodeGen/MachineDominators.h"
     23 #include "llvm/CodeGen/MachineFunction.h"
     24 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
     25 #include "llvm/CodeGen/MachineFunctionPass.h"
     26 #include "llvm/CodeGen/MachineFunctionPass.h"
     27 #include "llvm/CodeGen/MachineInstrBuilder.h"
     28 #include "llvm/CodeGen/MachineJumpTableInfo.h"
     29 #include "llvm/CodeGen/MachineLoopInfo.h"
     30 #include "llvm/CodeGen/MachineRegisterInfo.h"
     31 #include "llvm/Target/TargetInstrInfo.h"
     32 
     33 #define FirstNonDebugInstr(A) A->begin()
     34 using namespace llvm;
     35 
     36 // TODO: move-begin.
     37 
     38 //===----------------------------------------------------------------------===//
     39 //
     40 // Statistics for CFGStructurizer.
     41 //
     42 //===----------------------------------------------------------------------===//
     43 
     44 STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
     45     "matched");
     46 STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
     47     "matched");
     48 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
     49     "pattern matched");
     50 STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
     51     "pattern matched");
     52 STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
     53     "matched");
     54 STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
     55 STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
     56 
     57 //===----------------------------------------------------------------------===//
     58 //
     59 // Miscellaneous utility for CFGStructurizer.
     60 //
     61 //===----------------------------------------------------------------------===//
     62 namespace llvmCFGStruct
     63 {
     64 #define SHOWNEWINSTR(i) \
     65   if (DEBUGME) errs() << "New instr: " << *i << "\n"
     66 
     67 #define SHOWNEWBLK(b, msg) \
     68 if (DEBUGME) { \
     69   errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
     70   errs() << "\n"; \
     71 }
     72 
     73 #define SHOWBLK_DETAIL(b, msg) \
     74 if (DEBUGME) { \
     75   if (b) { \
     76   errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
     77   b->print(errs()); \
     78   errs() << "\n"; \
     79   } \
     80 }
     81 
     82 #define INVALIDSCCNUM -1
     83 #define INVALIDREGNUM 0
     84 
     85 template<class LoopinfoT>
     86 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
     87   for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
     88        iterEnd = LoopInfo.end();
     89        iter != iterEnd; ++iter) {
     90     (*iter)->print(OS, 0);
     91   }
     92 }
     93 
     94 template<class NodeT>
     95 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
     96   size_t sz = Src.size();
     97   for (size_t i = 0; i < sz/2; ++i) {
     98     NodeT *t = Src[i];
     99     Src[i] = Src[sz - i - 1];
    100     Src[sz - i - 1] = t;
    101   }
    102 }
    103 
    104 } //end namespace llvmCFGStruct
    105 
    106 
    107 //===----------------------------------------------------------------------===//
    108 //
    109 // MachinePostDominatorTree
    110 //
    111 //===----------------------------------------------------------------------===//
    112 
    113 namespace llvm {
    114 
    115 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used
    116 /// to compute the a post-dominator tree.
    117 ///
    118 struct MachinePostDominatorTree : public MachineFunctionPass {
    119   static char ID; // Pass identification, replacement for typeid
    120   DominatorTreeBase<MachineBasicBlock> *DT;
    121   MachinePostDominatorTree() : MachineFunctionPass(ID)
    122   {
    123     DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate
    124     // postdominator
    125   }
    126 
    127   ~MachinePostDominatorTree();
    128 
    129   virtual bool runOnMachineFunction(MachineFunction &MF);
    130 
    131   virtual void getAnalysisUsage(AnalysisUsage &AU) const {
    132     AU.setPreservesAll();
    133     MachineFunctionPass::getAnalysisUsage(AU);
    134   }
    135 
    136   inline const std::vector<MachineBasicBlock *> &getRoots() const {
    137     return DT->getRoots();
    138   }
    139 
    140   inline MachineDomTreeNode *getRootNode() const {
    141     return DT->getRootNode();
    142   }
    143 
    144   inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const {
    145     return DT->getNode(BB);
    146   }
    147 
    148   inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const {
    149     return DT->getNode(BB);
    150   }
    151 
    152   inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const {
    153     return DT->dominates(A, B);
    154   }
    155 
    156   inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
    157     return DT->dominates(A, B);
    158   }
    159 
    160   inline bool
    161   properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const {
    162     return DT->properlyDominates(A, B);
    163   }
    164 
    165   inline bool
    166   properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const {
    167     return DT->properlyDominates(A, B);
    168   }
    169 
    170   inline MachineBasicBlock *
    171   findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) {
    172     return DT->findNearestCommonDominator(A, B);
    173   }
    174 
    175   virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const {
    176     DT->print(OS);
    177   }
    178 };
    179 } //end of namespace llvm
    180 
    181 char MachinePostDominatorTree::ID = 0;
    182 static RegisterPass<MachinePostDominatorTree>
    183 machinePostDominatorTreePass("machinepostdomtree",
    184                              "MachinePostDominator Tree Construction",
    185                              true, true);
    186 
    187 //const PassInfo *const llvm::MachinePostDominatorsID
    188 //= &machinePostDominatorTreePass;
    189 
    190 bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) {
    191   DT->recalculate(F);
    192   //DEBUG(DT->dump());
    193   return false;
    194 }
    195 
    196 MachinePostDominatorTree::~MachinePostDominatorTree() {
    197   delete DT;
    198 }
    199 
    200 //===----------------------------------------------------------------------===//
    201 //
    202 // supporting data structure for CFGStructurizer
    203 //
    204 //===----------------------------------------------------------------------===//
    205 
    206 namespace llvmCFGStruct
    207 {
    208 template<class PassT>
    209 struct CFGStructTraits {
    210 };
    211 
    212 template <class InstrT>
    213 class BlockInformation {
    214 public:
    215   bool isRetired;
    216   int  sccNum;
    217   //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
    218   //Instructions defining the corresponding successor.
    219   BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
    220 };
    221 
    222 template <class BlockT, class InstrT, class RegiT>
    223 class LandInformation {
    224 public:
    225   BlockT *landBlk;
    226   std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
    227                                   //WHILELOOP(thisloop) init before entering
    228                                   //thisloop.
    229   std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
    230                                   //WHILELOOP(thisloop) init after entering
    231                                   //thisloop.
    232   std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
    233                                      //land block, branch cond on this reg.
    234   std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
    235                                      //endif" after ENDLOOP(thisloop) break
    236                                      //outerLoopOf(thisLoop).
    237   std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
    238                                     //endif" after ENDLOOP(thisloop) continue on
    239                                     //outerLoopOf(thisLoop).
    240   LandInformation() : landBlk(NULL) {}
    241 };
    242 
    243 } //end of namespace llvmCFGStruct
    244 
    245 //===----------------------------------------------------------------------===//
    246 //
    247 // CFGStructurizer
    248 //
    249 //===----------------------------------------------------------------------===//
    250 
    251 namespace llvmCFGStruct
    252 {
    253 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
    254 template<class PassT>
    255 class  CFGStructurizer
    256 {
    257 public:
    258   typedef enum {
    259     Not_SinglePath = 0,
    260     SinglePath_InPath = 1,
    261     SinglePath_NotInPath = 2
    262   } PathToKind;
    263 
    264 public:
    265   typedef typename PassT::InstructionType         InstrT;
    266   typedef typename PassT::FunctionType            FuncT;
    267   typedef typename PassT::DominatortreeType       DomTreeT;
    268   typedef typename PassT::PostDominatortreeType   PostDomTreeT;
    269   typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
    270   typedef typename PassT::LoopinfoType            LoopInfoT;
    271 
    272   typedef GraphTraits<FuncT *>                    FuncGTraits;
    273   //typedef FuncGTraits::nodes_iterator BlockIterator;
    274   typedef typename FuncT::iterator                BlockIterator;
    275 
    276   typedef typename FuncGTraits::NodeType          BlockT;
    277   typedef GraphTraits<BlockT *>                   BlockGTraits;
    278   typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
    279   //typedef BlockGTraits::succ_iterator InstructionIterator;
    280   typedef typename BlockT::iterator               InstrIterator;
    281 
    282   typedef CFGStructTraits<PassT>                  CFGTraits;
    283   typedef BlockInformation<InstrT>                BlockInfo;
    284   typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
    285 
    286   typedef int                                     RegiT;
    287   typedef typename PassT::LoopType                LoopT;
    288   typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
    289         typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
    290         //landing info for loop break
    291   typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
    292 
    293 public:
    294   CFGStructurizer();
    295   ~CFGStructurizer();
    296 
    297   /// Perform the CFG structurization
    298   bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
    299 
    300   /// Perform the CFG preparation
    301   bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
    302 
    303 private:
    304   void reversePredicateSetter(typename BlockT::iterator);
    305   void   orderBlocks();
    306   void   printOrderedBlocks(llvm::raw_ostream &OS);
    307   int patternMatch(BlockT *CurBlock);
    308   int patternMatchGroup(BlockT *CurBlock);
    309 
    310   int serialPatternMatch(BlockT *CurBlock);
    311   int ifPatternMatch(BlockT *CurBlock);
    312   int switchPatternMatch(BlockT *CurBlock);
    313   int loopendPatternMatch(BlockT *CurBlock);
    314   int loopPatternMatch(BlockT *CurBlock);
    315 
    316   int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
    317   int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
    318   //int loopWithoutBreak(BlockT *);
    319 
    320   void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
    321                         BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
    322   void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
    323                            BlockT *ContBlock, LoopT *contLoop);
    324   bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
    325   int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
    326                        BlockT *FalseBlock);
    327   int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
    328                           BlockT *FalseBlock);
    329   int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
    330                               BlockT *FalseBlock, BlockT **LandBlockPtr);
    331   void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
    332                                    BlockT *FalseBlock, BlockT *LandBlock,
    333                                    bool Detail = false);
    334   PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
    335                           bool AllowSideEntry = true);
    336   BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
    337                         bool AllowSideEntry = true);
    338   int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
    339   void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
    340 
    341   void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
    342                             BlockT *TrueBlock, BlockT *FalseBlock,
    343                             BlockT *LandBlock);
    344   void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
    345   void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
    346                            BlockT *ExitLandBlock, RegiT SetReg);
    347   void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
    348                            RegiT SetReg);
    349   BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
    350                                 std::set<BlockT*> &ExitBlockSet,
    351                                 BlockT *ExitLandBlk);
    352   BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
    353                                 BlockTSmallerVector &ExitingBlocks,
    354                                 BlockTSmallerVector &ExitBlocks);
    355   BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
    356   void removeUnconditionalBranch(BlockT *SrcBlock);
    357   void removeRedundantConditionalBranch(BlockT *SrcBlock);
    358   void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
    359 
    360   void removeSuccessor(BlockT *SrcBlock);
    361   BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
    362   BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
    363 
    364   void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
    365                           InstrIterator InsertPos);
    366 
    367   void recordSccnum(BlockT *SrcBlock, int SCCNum);
    368   int getSCCNum(BlockT *srcBlk);
    369 
    370   void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
    371   bool isRetiredBlock(BlockT *SrcBlock);
    372   bool isActiveLoophead(BlockT *CurBlock);
    373   bool needMigrateBlock(BlockT *Block);
    374 
    375   BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
    376                               BlockTSmallerVector &exitBlocks,
    377                               std::set<BlockT*> &ExitBlockSet);
    378   void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
    379   BlockT *getLoopLandBlock(LoopT *LoopRep);
    380   LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
    381 
    382   void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
    383   void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
    384   void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
    385   void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
    386   void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
    387 
    388   bool hasBackEdge(BlockT *curBlock);
    389   unsigned getLoopDepth  (LoopT *LoopRep);
    390   int countActiveBlock(
    391     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
    392     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
    393     BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
    394   BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
    395 
    396 private:
    397   DomTreeT *domTree;
    398   PostDomTreeT *postDomTree;
    399   LoopInfoT *loopInfo;
    400   PassT *passRep;
    401   FuncT *funcRep;
    402 
    403   BlockInfoMap blockInfoMap;
    404   LoopLandInfoMap loopLandInfoMap;
    405   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
    406   const AMDGPURegisterInfo *TRI;
    407 
    408 };  //template class CFGStructurizer
    409 
    410 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
    411   : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
    412 }
    413 
    414 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
    415   for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
    416        E = blockInfoMap.end(); I != E; ++I) {
    417     delete I->second;
    418   }
    419 }
    420 
    421 template<class PassT>
    422 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
    423                                      const AMDGPURegisterInfo * tri) {
    424   passRep = &pass;
    425   funcRep = &func;
    426   TRI = tri;
    427 
    428   bool changed = false;
    429   //func.RenumberBlocks();
    430 
    431   //to do, if not reducible flow graph, make it so ???
    432 
    433   if (DEBUGME) {
    434         errs() << "AMDGPUCFGStructurizer::prepare\n";
    435     //func.viewCFG();
    436     //func.viewCFGOnly();
    437     //func.dump();
    438   }
    439 
    440   //FIXME: gcc complains on this.
    441   //domTree = &pass.getAnalysis<DomTreeT>();
    442       //domTree = CFGTraits::getDominatorTree(pass);
    443       //if (DEBUGME) {
    444       //    domTree->print(errs());
    445     //}
    446 
    447   //FIXME: gcc complains on this.
    448   //domTree = &pass.getAnalysis<DomTreeT>();
    449       //postDomTree = CFGTraits::getPostDominatorTree(pass);
    450       //if (DEBUGME) {
    451       //   postDomTree->print(errs());
    452     //}
    453 
    454   //FIXME: gcc complains on this.
    455   //loopInfo = &pass.getAnalysis<LoopInfoT>();
    456   loopInfo = CFGTraits::getLoopInfo(pass);
    457   if (DEBUGME) {
    458     errs() << "LoopInfo:\n";
    459     PrintLoopinfo(*loopInfo, errs());
    460   }
    461 
    462   orderBlocks();
    463   if (DEBUGME) {
    464     errs() << "Ordered blocks:\n";
    465     printOrderedBlocks(errs());
    466   }
    467 
    468   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
    469 
    470   for (typename LoopInfoT::iterator iter = loopInfo->begin(),
    471        iterEnd = loopInfo->end();
    472        iter != iterEnd; ++iter) {
    473     LoopT* loopRep = (*iter);
    474     BlockTSmallerVector exitingBlks;
    475     loopRep->getExitingBlocks(exitingBlks);
    476 
    477     if (exitingBlks.size() == 0) {
    478       BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
    479       if (dummyExitBlk != NULL)
    480         retBlks.push_back(dummyExitBlk);
    481     }
    482   }
    483 
    484   // Remove unconditional branch instr.
    485   // Add dummy exit block iff there are multiple returns.
    486 
    487   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    488        iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
    489        iterBlk != iterEndBlk;
    490        ++iterBlk) {
    491     BlockT *curBlk = *iterBlk;
    492     removeUnconditionalBranch(curBlk);
    493     removeRedundantConditionalBranch(curBlk);
    494     if (CFGTraits::isReturnBlock(curBlk)) {
    495       retBlks.push_back(curBlk);
    496     }
    497     assert(curBlk->succ_size() <= 2);
    498     //assert(curBlk->size() > 0);
    499     //removeEmptyBlock(curBlk) ??
    500   } //for
    501 
    502   if (retBlks.size() >= 2) {
    503     addDummyExitBlock(retBlks);
    504     changed = true;
    505   }
    506 
    507   return changed;
    508 } //CFGStructurizer::prepare
    509 
    510 template<class PassT>
    511 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
    512     const AMDGPURegisterInfo * tri) {
    513   passRep = &pass;
    514   funcRep = &func;
    515   TRI = tri;
    516 
    517   //func.RenumberBlocks();
    518 
    519   //Assume reducible CFG...
    520   if (DEBUGME) {
    521     errs() << "AMDGPUCFGStructurizer::run\n";
    522     //errs() << func.getFunction()->getNameStr() << "\n";
    523     func.viewCFG();
    524     //func.viewCFGOnly();
    525     //func.dump();
    526   }
    527 
    528 #if 1
    529   //FIXME: gcc complains on this.
    530   //domTree = &pass.getAnalysis<DomTreeT>();
    531   domTree = CFGTraits::getDominatorTree(pass);
    532   if (DEBUGME) {
    533     domTree->print(errs(), (const llvm::Module*)0);
    534   }
    535 #endif
    536 
    537   //FIXME: gcc complains on this.
    538   //domTree = &pass.getAnalysis<DomTreeT>();
    539   postDomTree = CFGTraits::getPostDominatorTree(pass);
    540   if (DEBUGME) {
    541     postDomTree->print(errs());
    542   }
    543 
    544   //FIXME: gcc complains on this.
    545   //loopInfo = &pass.getAnalysis<LoopInfoT>();
    546   loopInfo = CFGTraits::getLoopInfo(pass);
    547   if (DEBUGME) {
    548     errs() << "LoopInfo:\n";
    549     PrintLoopinfo(*loopInfo, errs());
    550   }
    551 
    552   orderBlocks();
    553 //#define STRESSTEST
    554 #ifdef STRESSTEST
    555   //Use the worse block ordering to test the algorithm.
    556   ReverseVector(orderedBlks);
    557 #endif
    558 
    559   if (DEBUGME) {
    560     errs() << "Ordered blocks:\n";
    561     printOrderedBlocks(errs());
    562   }
    563   int numIter = 0;
    564   bool finish = false;
    565   BlockT *curBlk;
    566   bool makeProgress = false;
    567   int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
    568                                         orderedBlks.end());
    569 
    570   do {
    571     ++numIter;
    572     if (DEBUGME) {
    573       errs() << "numIter = " << numIter
    574              << ", numRemaintedBlk = " << numRemainedBlk << "\n";
    575     }
    576 
    577     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    578       iterBlk = orderedBlks.begin();
    579     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    580       iterBlkEnd = orderedBlks.end();
    581 
    582     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    583       sccBeginIter = iterBlk;
    584     BlockT *sccBeginBlk = NULL;
    585     int sccNumBlk = 0;  // The number of active blocks, init to a
    586                         // maximum possible number.
    587     int sccNumIter;     // Number of iteration in this SCC.
    588 
    589     while (iterBlk != iterBlkEnd) {
    590       curBlk = *iterBlk;
    591 
    592       if (sccBeginBlk == NULL) {
    593         sccBeginIter = iterBlk;
    594         sccBeginBlk = curBlk;
    595         sccNumIter = 0;
    596         sccNumBlk = numRemainedBlk; // Init to maximum possible number.
    597         if (DEBUGME) {
    598               errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
    599               errs() << "\n";
    600         }
    601       }
    602 
    603       if (!isRetiredBlock(curBlk)) {
    604         patternMatch(curBlk);
    605       }
    606 
    607       ++iterBlk;
    608 
    609       bool contNextScc = true;
    610       if (iterBlk == iterBlkEnd
    611           || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
    612         // Just finish one scc.
    613         ++sccNumIter;
    614         int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
    615         if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
    616           if (DEBUGME) {
    617             errs() << "Can't reduce SCC " << getSCCNum(curBlk)
    618                    << ", sccNumIter = " << sccNumIter;
    619             errs() << "doesn't make any progress\n";
    620           }
    621           contNextScc = true;
    622         } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
    623           sccNumBlk = sccRemainedNumBlk;
    624           iterBlk = sccBeginIter;
    625           contNextScc = false;
    626           if (DEBUGME) {
    627             errs() << "repeat processing SCC" << getSCCNum(curBlk)
    628                    << "sccNumIter = " << sccNumIter << "\n";
    629             func.viewCFG();
    630             //func.viewCFGOnly();
    631           }
    632         } else {
    633           // Finish the current scc.
    634           contNextScc = true;
    635         }
    636       } else {
    637         // Continue on next component in the current scc.
    638         contNextScc = false;
    639       }
    640 
    641       if (contNextScc) {
    642         sccBeginBlk = NULL;
    643       }
    644     } //while, "one iteration" over the function.
    645 
    646     BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
    647     if (entryBlk->succ_size() == 0) {
    648       finish = true;
    649       if (DEBUGME) {
    650         errs() << "Reduce to one block\n";
    651       }
    652     } else {
    653       int newnumRemainedBlk
    654         = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
    655       // consider cloned blocks ??
    656       if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
    657         makeProgress = true;
    658         numRemainedBlk = newnumRemainedBlk;
    659       } else {
    660         makeProgress = false;
    661         if (DEBUGME) {
    662           errs() << "No progress\n";
    663         }
    664       }
    665     }
    666   } while (!finish && makeProgress);
    667 
    668   // Misc wrap up to maintain the consistency of the Function representation.
    669   CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
    670 
    671   // Detach retired Block, release memory.
    672   for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
    673        iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
    674     if ((*iterMap).second && (*iterMap).second->isRetired) {
    675       assert(((*iterMap).first)->getNumber() != -1);
    676       if (DEBUGME) {
    677         errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
    678       }
    679       (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
    680     }
    681     delete (*iterMap).second;
    682   }
    683   blockInfoMap.clear();
    684 
    685   // clear loopLandInfoMap
    686   for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
    687        iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
    688     delete (*iterMap).second;
    689   }
    690   loopLandInfoMap.clear();
    691 
    692   if (DEBUGME) {
    693     func.viewCFG();
    694     //func.dump();
    695   }
    696 
    697   if (!finish) {
    698     assert(!"IRREDUCIBL_CF");
    699   }
    700 
    701   return true;
    702 } //CFGStructurizer::run
    703 
    704 /// Print the ordered Blocks.
    705 ///
    706 template<class PassT>
    707 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
    708   size_t i = 0;
    709   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    710       iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
    711        iterBlk != iterBlkEnd;
    712        ++iterBlk, ++i) {
    713     os << "BB" << (*iterBlk)->getNumber();
    714     os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
    715     if (i != 0 && i % 10 == 0) {
    716       os << "\n";
    717     } else {
    718       os << " ";
    719     }
    720   }
    721 } //printOrderedBlocks
    722 
    723 /// Compute the reversed DFS post order of Blocks
    724 ///
    725 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
    726   int sccNum = 0;
    727   BlockT *bb;
    728   for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
    729        sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
    730     std::vector<BlockT *> &sccNext = *sccIter;
    731     for (typename std::vector<BlockT *>::const_iterator
    732          blockIter = sccNext.begin(), blockEnd = sccNext.end();
    733          blockIter != blockEnd; ++blockIter) {
    734       bb = *blockIter;
    735       orderedBlks.push_back(bb);
    736       recordSccnum(bb, sccNum);
    737     }
    738   }
    739 
    740   //walk through all the block in func to check for unreachable
    741   for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
    742        blockEnd1 = FuncGTraits::nodes_end(funcRep);
    743        blockIter1 != blockEnd1; ++blockIter1) {
    744     BlockT *bb = &(*blockIter1);
    745     sccNum = getSCCNum(bb);
    746     if (sccNum == INVALIDSCCNUM) {
    747       errs() << "unreachable block BB" << bb->getNumber() << "\n";
    748     }
    749   } //end of for
    750 } //orderBlocks
    751 
    752 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
    753   int numMatch = 0;
    754   int curMatch;
    755 
    756   if (DEBUGME) {
    757         errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
    758   }
    759 
    760   while ((curMatch = patternMatchGroup(curBlk)) > 0) {
    761     numMatch += curMatch;
    762   }
    763 
    764   if (DEBUGME) {
    765         errs() << "End patternMatch BB" << curBlk->getNumber()
    766       << ", numMatch = " << numMatch << "\n";
    767   }
    768 
    769   return numMatch;
    770 } //patternMatch
    771 
    772 template<class PassT>
    773 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
    774   int numMatch = 0;
    775   numMatch += serialPatternMatch(curBlk);
    776   numMatch += ifPatternMatch(curBlk);
    777   //numMatch += switchPatternMatch(curBlk);
    778   numMatch += loopendPatternMatch(curBlk);
    779   numMatch += loopPatternMatch(curBlk);
    780   return numMatch;
    781 }//patternMatchGroup
    782 
    783 template<class PassT>
    784 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
    785   if (curBlk->succ_size() != 1) {
    786     return 0;
    787   }
    788 
    789   BlockT *childBlk = *curBlk->succ_begin();
    790   if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
    791     return 0;
    792   }
    793 
    794   mergeSerialBlock(curBlk, childBlk);
    795   ++numSerialPatternMatch;
    796   return 1;
    797 } //serialPatternMatch
    798 
    799 template<class PassT>
    800 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
    801   //two edges
    802   if (curBlk->succ_size() != 2) {
    803     return 0;
    804   }
    805 
    806   if (hasBackEdge(curBlk)) {
    807     return 0;
    808   }
    809 
    810   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
    811   if (branchInstr == NULL) {
    812     return 0;
    813   }
    814 
    815   assert(CFGTraits::isCondBranch(branchInstr));
    816 
    817   BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
    818   BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
    819   BlockT *landBlk;
    820   int cloned = 0;
    821 
    822   // TODO: Simplify
    823   if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
    824     && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
    825     landBlk = *trueBlk->succ_begin();
    826   } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
    827     landBlk = NULL;
    828   } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
    829     landBlk = falseBlk;
    830     falseBlk = NULL;
    831   } else if (falseBlk->succ_size() == 1
    832              && *falseBlk->succ_begin() == trueBlk) {
    833     landBlk = trueBlk;
    834     trueBlk = NULL;
    835   } else if (falseBlk->succ_size() == 1
    836              && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
    837     landBlk = *falseBlk->succ_begin();
    838   } else if (trueBlk->succ_size() == 1
    839     && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
    840     landBlk = *trueBlk->succ_begin();
    841   } else {
    842     return handleJumpintoIf(curBlk, trueBlk, falseBlk);
    843   }
    844 
    845   // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
    846   // new BB created for landBlk==NULL may introduce new challenge to the
    847   // reduction process.
    848   if (landBlk != NULL &&
    849       ((trueBlk && trueBlk->pred_size() > 1)
    850       || (falseBlk && falseBlk->pred_size() > 1))) {
    851      cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
    852   }
    853 
    854   if (trueBlk && trueBlk->pred_size() > 1) {
    855     trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
    856     ++cloned;
    857   }
    858 
    859   if (falseBlk && falseBlk->pred_size() > 1) {
    860     falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
    861     ++cloned;
    862   }
    863 
    864   mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
    865 
    866   ++numIfPatternMatch;
    867 
    868   numClonedBlock += cloned;
    869 
    870   return 1 + cloned;
    871 } //ifPatternMatch
    872 
    873 template<class PassT>
    874 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
    875   return 0;
    876 } //switchPatternMatch
    877 
    878 template<class PassT>
    879 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
    880   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
    881   typename std::vector<LoopT *> nestedLoops;
    882   while (loopRep) {
    883     nestedLoops.push_back(loopRep);
    884     loopRep = loopRep->getParentLoop();
    885   }
    886 
    887   if (nestedLoops.size() == 0) {
    888     return 0;
    889   }
    890 
    891   // Process nested loop outside->inside, so "continue" to a outside loop won't
    892   // be mistaken as "break" of the current loop.
    893   int num = 0;
    894   for (typename std::vector<LoopT *>::reverse_iterator
    895        iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
    896        iter != iterEnd; ++iter) {
    897     loopRep = *iter;
    898 
    899     if (getLoopLandBlock(loopRep) != NULL) {
    900       continue;
    901     }
    902 
    903     BlockT *loopHeader = loopRep->getHeader();
    904 
    905     int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
    906 
    907     if (numBreak == -1) {
    908       break;
    909     }
    910 
    911     int numCont = loopcontPatternMatch(loopRep, loopHeader);
    912     num += numBreak + numCont;
    913   }
    914 
    915   return num;
    916 } //loopendPatternMatch
    917 
    918 template<class PassT>
    919 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
    920   if (curBlk->succ_size() != 0) {
    921     return 0;
    922   }
    923 
    924   int numLoop = 0;
    925   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
    926   while (loopRep && loopRep->getHeader() == curBlk) {
    927     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
    928     if (loopLand) {
    929       BlockT *landBlk = loopLand->landBlk;
    930       assert(landBlk);
    931       if (!isRetiredBlock(landBlk)) {
    932         mergeLooplandBlock(curBlk, loopLand);
    933         ++numLoop;
    934       }
    935     }
    936     loopRep = loopRep->getParentLoop();
    937   }
    938 
    939   numLoopPatternMatch += numLoop;
    940 
    941   return numLoop;
    942 } //loopPatternMatch
    943 
    944 template<class PassT>
    945 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
    946                                                   BlockT *loopHeader) {
    947   BlockTSmallerVector exitingBlks;
    948   loopRep->getExitingBlocks(exitingBlks);
    949 
    950   if (DEBUGME) {
    951     errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
    952   }
    953 
    954   if (exitingBlks.size() == 0) {
    955     setLoopLandBlock(loopRep);
    956     return 0;
    957   }
    958 
    959   // Compute the corresponding exitBlks and exit block set.
    960   BlockTSmallerVector exitBlks;
    961   std::set<BlockT *> exitBlkSet;
    962   for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
    963        iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
    964     BlockT *exitingBlk = *iter;
    965     BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
    966     exitBlks.push_back(exitBlk);
    967     exitBlkSet.insert(exitBlk);  //non-duplicate insert
    968   }
    969 
    970   assert(exitBlkSet.size() > 0);
    971   assert(exitBlks.size() == exitingBlks.size());
    972 
    973   if (DEBUGME) {
    974     errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
    975   }
    976 
    977   // Find exitLandBlk.
    978   BlockT *exitLandBlk = NULL;
    979   int numCloned = 0;
    980   int numSerial = 0;
    981 
    982   if (exitBlkSet.size() == 1)
    983   {
    984     exitLandBlk = *exitBlkSet.begin();
    985   } else {
    986     exitLandBlk = findNearestCommonPostDom(exitBlkSet);
    987 
    988     if (exitLandBlk == NULL) {
    989       return -1;
    990     }
    991 
    992     bool allInPath = true;
    993     bool allNotInPath = true;
    994     for (typename std::set<BlockT*>::const_iterator
    995          iter = exitBlkSet.begin(),
    996          iterEnd = exitBlkSet.end();
    997          iter != iterEnd; ++iter) {
    998       BlockT *exitBlk = *iter;
    999 
   1000       PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
   1001       if (DEBUGME) {
   1002         errs() << "BB" << exitBlk->getNumber()
   1003                << " to BB" << exitLandBlk->getNumber() << " PathToKind="
   1004                << pathKind << "\n";
   1005       }
   1006 
   1007       allInPath = allInPath && (pathKind == SinglePath_InPath);
   1008       allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
   1009 
   1010       if (!allInPath && !allNotInPath) {
   1011         if (DEBUGME) {
   1012               errs() << "singlePath check fail\n";
   1013         }
   1014         return -1;
   1015       }
   1016     } // check all exit blocks
   1017 
   1018     if (allNotInPath) {
   1019 #if 1
   1020 
   1021       // TODO: Simplify, maybe separate function?
   1022       //funcRep->viewCFG();
   1023       LoopT *parentLoopRep = loopRep->getParentLoop();
   1024       BlockT *parentLoopHeader = NULL;
   1025       if (parentLoopRep)
   1026         parentLoopHeader = parentLoopRep->getHeader();
   1027 
   1028       if (exitLandBlk == parentLoopHeader &&
   1029           (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
   1030                                                loopRep,
   1031                                                exitBlkSet,
   1032                                                exitLandBlk)) != NULL) {
   1033         if (DEBUGME) {
   1034           errs() << "relocateLoopcontBlock success\n";
   1035         }
   1036       } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
   1037                                                       exitingBlks,
   1038                                                       exitBlks)) != NULL) {
   1039         if (DEBUGME) {
   1040           errs() << "insertEndbranchBlock success\n";
   1041         }
   1042       } else {
   1043         if (DEBUGME) {
   1044           errs() << "loop exit fail\n";
   1045         }
   1046         return -1;
   1047       }
   1048 #else
   1049       return -1;
   1050 #endif
   1051     }
   1052 
   1053     // Handle side entry to exit path.
   1054     exitBlks.clear();
   1055     exitBlkSet.clear();
   1056     for (typename BlockTSmallerVector::iterator iterExiting =
   1057            exitingBlks.begin(),
   1058          iterExitingEnd = exitingBlks.end();
   1059          iterExiting != iterExitingEnd; ++iterExiting) {
   1060       BlockT *exitingBlk = *iterExiting;
   1061       BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
   1062       BlockT *newExitBlk = exitBlk;
   1063 
   1064       if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
   1065         newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
   1066         ++numCloned;
   1067       }
   1068 
   1069       numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
   1070 
   1071       exitBlks.push_back(newExitBlk);
   1072       exitBlkSet.insert(newExitBlk);
   1073     }
   1074 
   1075     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
   1076          iterExitEnd = exitBlks.end();
   1077          iterExit != iterExitEnd; ++iterExit) {
   1078       BlockT *exitBlk = *iterExit;
   1079       numSerial += serialPatternMatch(exitBlk);
   1080     }
   1081 
   1082     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
   1083          iterExitEnd = exitBlks.end();
   1084          iterExit != iterExitEnd; ++iterExit) {
   1085       BlockT *exitBlk = *iterExit;
   1086       if (exitBlk->pred_size() > 1) {
   1087         if (exitBlk != exitLandBlk) {
   1088           return -1;
   1089         }
   1090       } else {
   1091         if (exitBlk != exitLandBlk &&
   1092             (exitBlk->succ_size() != 1 ||
   1093             *exitBlk->succ_begin() != exitLandBlk)) {
   1094           return -1;
   1095         }
   1096       }
   1097     }
   1098   } // else
   1099 
   1100   // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk);
   1101   exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
   1102 
   1103   // Fold break into the breaking block. Leverage across level breaks.
   1104   assert(exitingBlks.size() == exitBlks.size());
   1105   for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
   1106        iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
   1107        iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
   1108     BlockT *exitBlk = *iterExit;
   1109     BlockT *exitingBlk = *iterExiting;
   1110     assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
   1111     LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
   1112     handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
   1113   }
   1114 
   1115   int numBreak = static_cast<int>(exitingBlks.size());
   1116   numLoopbreakPatternMatch += numBreak;
   1117   numClonedBlock += numCloned;
   1118   return numBreak + numSerial + numCloned;
   1119 } //loopbreakPatternMatch
   1120 
   1121 template<class PassT>
   1122 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
   1123                                                  BlockT *loopHeader) {
   1124   int numCont = 0;
   1125   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
   1126   for (typename InvBlockGTraits::ChildIteratorType iter =
   1127        InvBlockGTraits::child_begin(loopHeader),
   1128        iterEnd = InvBlockGTraits::child_end(loopHeader);
   1129        iter != iterEnd; ++iter) {
   1130     BlockT *curBlk = *iter;
   1131     if (loopRep->contains(curBlk)) {
   1132       handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
   1133                           loopHeader, loopRep);
   1134       contBlk.push_back(curBlk);
   1135       ++numCont;
   1136     }
   1137   }
   1138 
   1139   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
   1140        iter = contBlk.begin(), iterEnd = contBlk.end();
   1141        iter != iterEnd; ++iter) {
   1142     (*iter)->removeSuccessor(loopHeader);
   1143   }
   1144 
   1145   numLoopcontPatternMatch += numCont;
   1146 
   1147   return numCont;
   1148 } //loopcontPatternMatch
   1149 
   1150 
   1151 template<class PassT>
   1152 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
   1153                                                          BlockT *src2Blk) {
   1154   // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
   1155   // same loop with LoopLandInfo without explicitly keeping track of
   1156   // loopContBlks and loopBreakBlks, this is a method to get the information.
   1157   //
   1158   if (src1Blk->succ_size() == 0) {
   1159     LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
   1160     if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
   1161       LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   1162       if (theEntry != NULL) {
   1163         if (DEBUGME) {
   1164           errs() << "isLoopContBreakBlock yes src1 = BB"
   1165                  << src1Blk->getNumber()
   1166                  << " src2 = BB" << src2Blk->getNumber() << "\n";
   1167         }
   1168         return true;
   1169       }
   1170     }
   1171   }
   1172   return false;
   1173 }  //isSameloopDetachedContbreak
   1174 
   1175 template<class PassT>
   1176 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
   1177                                              BlockT *trueBlk,
   1178                                              BlockT *falseBlk) {
   1179   int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
   1180   if (num == 0) {
   1181     if (DEBUGME) {
   1182       errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
   1183     }
   1184     num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
   1185   }
   1186   return num;
   1187 }
   1188 
   1189 template<class PassT>
   1190 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
   1191                                                 BlockT *trueBlk,
   1192                                                 BlockT *falseBlk) {
   1193   int num = 0;
   1194   BlockT *downBlk;
   1195 
   1196   //trueBlk could be the common post dominator
   1197   downBlk = trueBlk;
   1198 
   1199   if (DEBUGME) {
   1200     errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
   1201            << " true = BB" << trueBlk->getNumber()
   1202            << ", numSucc=" << trueBlk->succ_size()
   1203            << " false = BB" << falseBlk->getNumber() << "\n";
   1204   }
   1205 
   1206   while (downBlk) {
   1207     if (DEBUGME) {
   1208       errs() << "check down = BB" << downBlk->getNumber();
   1209     }
   1210 
   1211     if (//postDomTree->dominates(downBlk, falseBlk) &&
   1212         singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
   1213       if (DEBUGME) {
   1214         errs() << " working\n";
   1215       }
   1216 
   1217       num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
   1218       num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
   1219 
   1220       numClonedBlock += num;
   1221       num += serialPatternMatch(*headBlk->succ_begin());
   1222       num += serialPatternMatch(*(++headBlk->succ_begin()));
   1223       num += ifPatternMatch(headBlk);
   1224       assert(num > 0); //
   1225 
   1226       break;
   1227     }
   1228     if (DEBUGME) {
   1229       errs() << " not working\n";
   1230     }
   1231     downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
   1232   } // walk down the postDomTree
   1233 
   1234   return num;
   1235 } //handleJumpintoIf
   1236 
   1237 template<class PassT>
   1238 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
   1239                                                          BlockT *trueBlk,
   1240                                                          BlockT *falseBlk,
   1241                                                          BlockT *landBlk,
   1242                                                          bool detail) {
   1243   errs() << "head = BB" << headBlk->getNumber()
   1244          << " size = " << headBlk->size();
   1245   if (detail) {
   1246     errs() << "\n";
   1247     headBlk->print(errs());
   1248     errs() << "\n";
   1249   }
   1250 
   1251   if (trueBlk) {
   1252     errs() << ", true = BB" << trueBlk->getNumber() << " size = "
   1253            << trueBlk->size() << " numPred = " << trueBlk->pred_size();
   1254     if (detail) {
   1255       errs() << "\n";
   1256       trueBlk->print(errs());
   1257       errs() << "\n";
   1258     }
   1259   }
   1260   if (falseBlk) {
   1261     errs() << ", false = BB" << falseBlk->getNumber() << " size = "
   1262            << falseBlk->size() << " numPred = " << falseBlk->pred_size();
   1263     if (detail) {
   1264       errs() << "\n";
   1265       falseBlk->print(errs());
   1266       errs() << "\n";
   1267     }
   1268   }
   1269   if (landBlk) {
   1270     errs() << ", land = BB" << landBlk->getNumber() << " size = "
   1271            << landBlk->size() << " numPred = " << landBlk->pred_size();
   1272     if (detail) {
   1273       errs() << "\n";
   1274       landBlk->print(errs());
   1275       errs() << "\n";
   1276     }
   1277   }
   1278 
   1279     errs() << "\n";
   1280 } //showImproveSimpleJumpintoIf
   1281 
   1282 template<class PassT>
   1283 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
   1284                                                     BlockT *trueBlk,
   1285                                                     BlockT *falseBlk,
   1286                                                     BlockT **plandBlk) {
   1287   bool migrateTrue = false;
   1288   bool migrateFalse = false;
   1289 
   1290   BlockT *landBlk = *plandBlk;
   1291 
   1292   assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
   1293          && (falseBlk == NULL || falseBlk->succ_size() <= 1));
   1294 
   1295   if (trueBlk == falseBlk) {
   1296     return 0;
   1297   }
   1298 
   1299 #if 0
   1300   if (DEBUGME) {
   1301     errs() << "improveSimpleJumpintoIf: ";
   1302     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
   1303   }
   1304 #endif
   1305 
   1306   // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0;
   1307   // May consider the # landBlk->pred_size() as it represents the number of
   1308   // assignment initReg = .. needed to insert.
   1309   migrateTrue = needMigrateBlock(trueBlk);
   1310   migrateFalse = needMigrateBlock(falseBlk);
   1311 
   1312   if (!migrateTrue && !migrateFalse) {
   1313     return 0;
   1314   }
   1315 
   1316   // If we need to migrate either trueBlk and falseBlk, migrate the rest that
   1317   // have more than one predecessors.  without doing this, its predecessor
   1318   // rather than headBlk will have undefined value in initReg.
   1319   if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
   1320     migrateTrue = true;
   1321   }
   1322   if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
   1323     migrateFalse = true;
   1324   }
   1325 
   1326   if (DEBUGME) {
   1327     errs() << "before improveSimpleJumpintoIf: ";
   1328     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
   1329     //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
   1330   }
   1331 
   1332   // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
   1333   //
   1334   // new: headBlk => if () {initReg = 1; org trueBlk branch} else
   1335   //      {initReg = 0; org falseBlk branch }
   1336   //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
   1337   //      => org landBlk
   1338   //      if landBlk->pred_size() > 2, put the about if-else inside
   1339   //      if (initReg !=2) {...}
   1340   //
   1341   // add initReg = initVal to headBlk
   1342 
   1343   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1344   unsigned initReg =
   1345     funcRep->getRegInfo().createVirtualRegister(I32RC);
   1346   if (!migrateTrue || !migrateFalse) {
   1347     int initVal = migrateTrue ? 0 : 1;
   1348     CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
   1349   }
   1350 
   1351   int numNewBlk = 0;
   1352 
   1353   if (landBlk == NULL) {
   1354     landBlk = funcRep->CreateMachineBasicBlock();
   1355     funcRep->push_back(landBlk);  //insert to function
   1356 
   1357     if (trueBlk) {
   1358       trueBlk->addSuccessor(landBlk);
   1359     } else {
   1360       headBlk->addSuccessor(landBlk);
   1361     }
   1362 
   1363     if (falseBlk) {
   1364       falseBlk->addSuccessor(landBlk);
   1365     } else {
   1366       headBlk->addSuccessor(landBlk);
   1367     }
   1368 
   1369     numNewBlk ++;
   1370   }
   1371 
   1372   bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
   1373 
   1374   //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
   1375   typename BlockT::iterator insertPos =
   1376     CFGTraits::getInstrPos
   1377     (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
   1378 
   1379   if (landBlkHasOtherPred) {
   1380     unsigned immReg =
   1381       funcRep->getRegInfo().createVirtualRegister(I32RC);
   1382     CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
   1383     unsigned cmpResReg =
   1384       funcRep->getRegInfo().createVirtualRegister(I32RC);
   1385 
   1386     CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
   1387                                         initReg, immReg);
   1388     CFGTraits::insertCondBranchBefore(landBlk, insertPos,
   1389                                       AMDGPU::IF_LOGICALZ_i32, passRep,
   1390                                       cmpResReg, DebugLoc());
   1391   }
   1392 
   1393   CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32,
   1394                                     passRep, initReg, DebugLoc());
   1395 
   1396   if (migrateTrue) {
   1397     migrateInstruction(trueBlk, landBlk, insertPos);
   1398     // need to uncondionally insert the assignment to ensure a path from its
   1399     // predecessor rather than headBlk has valid value in initReg if
   1400     // (initVal != 1).
   1401     CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
   1402   }
   1403   CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
   1404 
   1405   if (migrateFalse) {
   1406     migrateInstruction(falseBlk, landBlk, insertPos);
   1407     // need to uncondionally insert the assignment to ensure a path from its
   1408     // predecessor rather than headBlk has valid value in initReg if
   1409     // (initVal != 0)
   1410     CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
   1411   }
   1412   //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
   1413 
   1414   if (landBlkHasOtherPred) {
   1415     // add endif
   1416     CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
   1417 
   1418     // put initReg = 2 to other predecessors of landBlk
   1419     for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
   1420          predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
   1421          ++predIter) {
   1422       BlockT *curBlk = *predIter;
   1423       if (curBlk != trueBlk && curBlk != falseBlk) {
   1424         CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
   1425       }
   1426     } //for
   1427   }
   1428   if (DEBUGME) {
   1429     errs() << "result from improveSimpleJumpintoIf: ";
   1430     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
   1431     //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1);
   1432   }
   1433 
   1434   // update landBlk
   1435   *plandBlk = landBlk;
   1436 
   1437   return numNewBlk;
   1438 } //improveSimpleJumpintoIf
   1439 
   1440 template<class PassT>
   1441 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
   1442                                               LoopT *exitingLoop,
   1443                                              BlockT *exitBlk,
   1444                                               LoopT *exitLoop,
   1445                                              BlockT *landBlk) {
   1446   if (DEBUGME) {
   1447     errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
   1448            << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
   1449   }
   1450   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1451 
   1452   RegiT initReg = INVALIDREGNUM;
   1453   if (exitingLoop != exitLoop) {
   1454     initReg = static_cast<int>
   1455       (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1456     assert(initReg != INVALIDREGNUM);
   1457     addLoopBreakInitReg(exitLoop, initReg);
   1458     while (exitingLoop != exitLoop && exitingLoop) {
   1459       addLoopBreakOnReg(exitingLoop, initReg);
   1460       exitingLoop = exitingLoop->getParentLoop();
   1461     }
   1462     assert(exitingLoop == exitLoop);
   1463   }
   1464 
   1465   mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
   1466 
   1467 } //handleLoopbreak
   1468 
   1469 template<class PassT>
   1470 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
   1471                                                   LoopT *contingLoop,
   1472                                                  BlockT *contBlk,
   1473                                                   LoopT *contLoop) {
   1474   if (DEBUGME) {
   1475     errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
   1476            << " header = BB" << contBlk->getNumber() << "\n";
   1477 
   1478     errs() << "Trying to continue loop-depth = "
   1479            << getLoopDepth(contLoop)
   1480            << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
   1481   }
   1482 
   1483   RegiT initReg = INVALIDREGNUM;
   1484   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1485   if (contingLoop != contLoop) {
   1486     initReg = static_cast<int>
   1487       (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1488     assert(initReg != INVALIDREGNUM);
   1489     addLoopContInitReg(contLoop, initReg);
   1490     while (contingLoop && contingLoop->getParentLoop() != contLoop) {
   1491       addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
   1492       contingLoop = contingLoop->getParentLoop();
   1493     }
   1494     assert(contingLoop && contingLoop->getParentLoop() == contLoop);
   1495     addLoopContOnReg(contingLoop, initReg);
   1496   }
   1497 
   1498   settleLoopcontBlock(contingBlk, contBlk, initReg);
   1499   //contingBlk->removeSuccessor(loopHeader);
   1500 } //handleLoopcontBlock
   1501 
   1502 template<class PassT>
   1503 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
   1504   if (DEBUGME) {
   1505     errs() << "serialPattern BB" << dstBlk->getNumber()
   1506            << " <= BB" << srcBlk->getNumber() << "\n";
   1507   }
   1508   //removeUnconditionalBranch(dstBlk);
   1509   dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end());
   1510 
   1511   dstBlk->removeSuccessor(srcBlk);
   1512   CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
   1513 
   1514   removeSuccessor(srcBlk);
   1515   retireBlock(dstBlk, srcBlk);
   1516 } //mergeSerialBlock
   1517 
   1518 template<class PassT>
   1519 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
   1520                                                   BlockT *curBlk,
   1521                                                   BlockT *trueBlk,
   1522                                                   BlockT *falseBlk,
   1523                                                   BlockT *landBlk) {
   1524   if (DEBUGME) {
   1525     errs() << "ifPattern BB" << curBlk->getNumber();
   1526     errs() << "{  ";
   1527     if (trueBlk) {
   1528       errs() << "BB" << trueBlk->getNumber();
   1529     }
   1530     errs() << "  } else ";
   1531     errs() << "{  ";
   1532     if (falseBlk) {
   1533       errs() << "BB" << falseBlk->getNumber();
   1534     }
   1535     errs() << "  }\n ";
   1536     errs() << "landBlock: ";
   1537     if (landBlk == NULL) {
   1538       errs() << "NULL";
   1539     } else {
   1540       errs() << "BB" << landBlk->getNumber();
   1541     }
   1542     errs() << "\n";
   1543   }
   1544 
   1545   int oldOpcode = branchInstr->getOpcode();
   1546   DebugLoc branchDL = branchInstr->getDebugLoc();
   1547 
   1548 //    transform to
   1549 //    if cond
   1550 //       trueBlk
   1551 //    else
   1552 //       falseBlk
   1553 //    endif
   1554 //    landBlk
   1555 
   1556   typename BlockT::iterator branchInstrPos =
   1557     CFGTraits::getInstrPos(curBlk, branchInstr);
   1558   CFGTraits::insertCondBranchBefore(branchInstrPos,
   1559                                     CFGTraits::getBranchNzeroOpcode(oldOpcode),
   1560                                     passRep,
   1561 									branchDL);
   1562 
   1563   if (trueBlk) {
   1564     curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end());
   1565     curBlk->removeSuccessor(trueBlk);
   1566     if (landBlk && trueBlk->succ_size()!=0) {
   1567       trueBlk->removeSuccessor(landBlk);
   1568     }
   1569     retireBlock(curBlk, trueBlk);
   1570   }
   1571   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
   1572 
   1573   if (falseBlk) {
   1574     curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk),
   1575                    falseBlk->end());
   1576     curBlk->removeSuccessor(falseBlk);
   1577     if (landBlk && falseBlk->succ_size() != 0) {
   1578       falseBlk->removeSuccessor(landBlk);
   1579     }
   1580     retireBlock(curBlk, falseBlk);
   1581   }
   1582   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
   1583 
   1584   //curBlk->remove(branchInstrPos);
   1585   branchInstr->eraseFromParent();
   1586 
   1587   if (landBlk && trueBlk && falseBlk) {
   1588     curBlk->addSuccessor(landBlk);
   1589   }
   1590 
   1591 } //mergeIfthenelseBlock
   1592 
   1593 template<class PassT>
   1594 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
   1595                                                 LoopLandInfo *loopLand) {
   1596   BlockT *landBlk = loopLand->landBlk;
   1597 
   1598   if (DEBUGME) {
   1599     errs() << "loopPattern header = BB" << dstBlk->getNumber()
   1600            << " land = BB" << landBlk->getNumber() << "\n";
   1601   }
   1602 
   1603   // Loop contInitRegs are init at the beginning of the loop.
   1604   for (typename std::set<RegiT>::const_iterator iter =
   1605          loopLand->contInitRegs.begin(),
   1606        iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
   1607     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
   1608   }
   1609 
   1610   /* we last inserterd the DebugLoc in the
   1611    * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
   1612    * search for the DebugLoc in the that statement.
   1613    * if not found, we have to insert the empty/default DebugLoc */
   1614   InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
   1615   DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
   1616 
   1617   CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
   1618   // Loop breakInitRegs are init before entering the loop.
   1619   for (typename std::set<RegiT>::const_iterator iter =
   1620          loopLand->breakInitRegs.begin(),
   1621        iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter)
   1622   {
   1623     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
   1624   }
   1625   // Loop endbranchInitRegs are init before entering the loop.
   1626   for (typename std::set<RegiT>::const_iterator iter =
   1627          loopLand->endbranchInitRegs.begin(),
   1628        iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
   1629     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
   1630   }
   1631 
   1632   /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
   1633    * search for the DebugLoc in the continue statement.
   1634    * if not found, we have to insert the empty/default DebugLoc */
   1635   InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
   1636   DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
   1637 
   1638   CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
   1639   // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
   1640   // loop.
   1641   for (typename std::set<RegiT>::const_iterator iter =
   1642          loopLand->breakOnRegs.begin(),
   1643        iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
   1644     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep,
   1645                                    *iter);
   1646   }
   1647 
   1648   // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
   1649   // loop.
   1650   for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
   1651        iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
   1652     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
   1653                                    passRep, *iter);
   1654   }
   1655 
   1656   dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
   1657 
   1658   for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
   1659        iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
   1660     dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
   1661   }
   1662 
   1663   removeSuccessor(landBlk);
   1664   retireBlock(dstBlk, landBlk);
   1665 } //mergeLooplandBlock
   1666 
   1667 template<class PassT>
   1668 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I)
   1669 {
   1670   while (I--) {
   1671     if (I->getOpcode() == AMDGPU::PRED_X) {
   1672       switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
   1673       case OPCODE_IS_ZERO_INT:
   1674         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
   1675         return;
   1676       case OPCODE_IS_NOT_ZERO_INT:
   1677         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
   1678         return;
   1679       case OPCODE_IS_ZERO:
   1680         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
   1681         return;
   1682       case OPCODE_IS_NOT_ZERO:
   1683         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
   1684         return;
   1685       default:
   1686         assert(0 && "PRED_X Opcode invalid!");
   1687       }
   1688     }
   1689   }
   1690 }
   1691 
   1692 template<class PassT>
   1693 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
   1694                                                  BlockT *exitBlk,
   1695                                                  BlockT *exitLandBlk,
   1696                                                  RegiT  setReg) {
   1697   if (DEBUGME) {
   1698     errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
   1699            << " exit = BB" << exitBlk->getNumber()
   1700            << " land = BB" << exitLandBlk->getNumber() << "\n";
   1701   }
   1702 
   1703   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
   1704   assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
   1705 
   1706   DebugLoc DL = branchInstr->getDebugLoc();
   1707 
   1708   BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
   1709   int oldOpcode = branchInstr->getOpcode();
   1710 
   1711   //    transform exitingBlk to
   1712   //    if ( ) {
   1713   //       exitBlk (if exitBlk != exitLandBlk)
   1714   //       setReg = 1
   1715   //       break
   1716   //    }endif
   1717   //    successor = {orgSuccessor(exitingBlk) - exitBlk}
   1718 
   1719   typename BlockT::iterator branchInstrPos =
   1720     CFGTraits::getInstrPos(exitingBlk, branchInstr);
   1721 
   1722   if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
   1723     //break_logical
   1724 
   1725     if (trueBranch != exitBlk) {
   1726       reversePredicateSetter(branchInstrPos);
   1727     }
   1728     int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
   1729     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
   1730   } else {
   1731     if (trueBranch != exitBlk) {
   1732       reversePredicateSetter(branchInstr);
   1733     }
   1734     int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode);
   1735     CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL);
   1736     if (exitBlk != exitLandBlk) {
   1737       //splice is insert-before ...
   1738       exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
   1739                          exitBlk->end());
   1740     }
   1741     if (setReg != INVALIDREGNUM) {
   1742       CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
   1743     }
   1744     CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
   1745     CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
   1746   } //if_logical
   1747 
   1748   //now branchInst can be erase safely
   1749   //exitingBlk->eraseFromParent(branchInstr);
   1750   branchInstr->eraseFromParent();
   1751 
   1752   //now take care of successors, retire blocks
   1753   exitingBlk->removeSuccessor(exitBlk);
   1754   if (exitBlk != exitLandBlk) {
   1755     //splice is insert-before ...
   1756     exitBlk->removeSuccessor(exitLandBlk);
   1757     retireBlock(exitingBlk, exitBlk);
   1758   }
   1759 
   1760 } //mergeLoopbreakBlock
   1761 
   1762 template<class PassT>
   1763 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
   1764                                                  BlockT *contBlk,
   1765                                                  RegiT   setReg) {
   1766   if (DEBUGME) {
   1767     errs() << "settleLoopcontBlock conting = BB"
   1768            << contingBlk->getNumber()
   1769            << ", cont = BB" << contBlk->getNumber() << "\n";
   1770   }
   1771 
   1772   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
   1773   if (branchInstr) {
   1774     assert(CFGTraits::isCondBranch(branchInstr));
   1775     typename BlockT::iterator branchInstrPos =
   1776       CFGTraits::getInstrPos(contingBlk, branchInstr);
   1777     BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
   1778     int oldOpcode = branchInstr->getOpcode();
   1779 	DebugLoc DL = branchInstr->getDebugLoc();
   1780 
   1781     //    transform contingBlk to
   1782     //     if () {
   1783     //          move instr after branchInstr
   1784     //          continue
   1785     //        or
   1786     //          setReg = 1
   1787     //          break
   1788     //     }endif
   1789     //     successor = {orgSuccessor(contingBlk) - loopHeader}
   1790 
   1791     bool useContinueLogical =
   1792       (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
   1793 
   1794     if (useContinueLogical == false)
   1795     {
   1796       int branchOpcode =
   1797         trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
   1798                               : CFGTraits::getBranchZeroOpcode(oldOpcode);
   1799 
   1800       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
   1801 
   1802       if (setReg != INVALIDREGNUM) {
   1803         CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
   1804         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1805         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
   1806       } else {
   1807         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1808         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
   1809       }
   1810 
   1811       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
   1812     } else {
   1813       int branchOpcode =
   1814         trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
   1815                               : CFGTraits::getContinueZeroOpcode(oldOpcode);
   1816 
   1817       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
   1818     }
   1819 
   1820     //contingBlk->eraseFromParent(branchInstr);
   1821     branchInstr->eraseFromParent();
   1822   } else {
   1823     /* if we've arrived here then we've already erased the branch instruction
   1824 	 * travel back up the basic block to see the last reference of our debug location
   1825 	 * we've just inserted that reference here so it should be representative */
   1826     if (setReg != INVALIDREGNUM) {
   1827       CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
   1828       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1829       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
   1830     } else {
   1831       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1832       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
   1833     }
   1834   } //else
   1835 
   1836 } //settleLoopcontBlock
   1837 
   1838 // BBs in exitBlkSet are determined as in break-path for loopRep,
   1839 // before we can put code for BBs as inside loop-body for loopRep
   1840 // check whether those BBs are determined as cont-BB for parentLoopRep
   1841 // earlier.
   1842 // If so, generate a new BB newBlk
   1843 //    (1) set newBlk common successor of BBs in exitBlkSet
   1844 //    (2) change the continue-instr in BBs in exitBlkSet to break-instr
   1845 //    (3) generate continue-instr in newBlk
   1846 //
   1847 template<class PassT>
   1848 typename CFGStructurizer<PassT>::BlockT *
   1849 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
   1850                                               LoopT *loopRep,
   1851                                               std::set<BlockT *> &exitBlkSet,
   1852                                               BlockT *exitLandBlk) {
   1853   std::set<BlockT *> endBlkSet;
   1854 
   1855 //  BlockT *parentLoopHead = parentLoopRep->getHeader();
   1856 
   1857 
   1858   for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
   1859        iterEnd = exitBlkSet.end();
   1860        iter != iterEnd; ++iter) {
   1861     BlockT *exitBlk = *iter;
   1862     BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
   1863 
   1864     if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
   1865       return NULL;
   1866 
   1867     endBlkSet.insert(endBlk);
   1868   }
   1869 
   1870   BlockT *newBlk = funcRep->CreateMachineBasicBlock();
   1871   funcRep->push_back(newBlk);  //insert to function
   1872   CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
   1873   SHOWNEWBLK(newBlk, "New continue block: ");
   1874 
   1875   for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
   1876        iterEnd = endBlkSet.end();
   1877        iter != iterEnd; ++iter) {
   1878       BlockT *endBlk = *iter;
   1879       InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
   1880       if (contInstr) {
   1881         contInstr->eraseFromParent();
   1882       }
   1883       endBlk->addSuccessor(newBlk);
   1884       if (DEBUGME) {
   1885         errs() << "Add new continue Block to BB"
   1886                << endBlk->getNumber() << " successors\n";
   1887       }
   1888   }
   1889 
   1890   return newBlk;
   1891 } //relocateLoopcontBlock
   1892 
   1893 
   1894 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
   1895 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
   1896 // pathes corresponding to the loop exiting branches.
   1897 
   1898 template<class PassT>
   1899 typename CFGStructurizer<PassT>::BlockT *
   1900 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
   1901                                               BlockTSmallerVector &exitingBlks,
   1902                                               BlockTSmallerVector &exitBlks) {
   1903   const AMDGPUInstrInfo *tii =
   1904              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   1905   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1906 
   1907   RegiT endBranchReg = static_cast<int>
   1908     (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1909   assert(endBranchReg >= 0);
   1910 
   1911   // reg = 0 before entering the loop
   1912   addLoopEndbranchInitReg(loopRep, endBranchReg);
   1913 
   1914   uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
   1915   assert(numBlks >=2 && numBlks == exitBlks.size());
   1916 
   1917   BlockT *preExitingBlk = exitingBlks[0];
   1918   BlockT *preExitBlk = exitBlks[0];
   1919   BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
   1920   funcRep->push_back(preBranchBlk);  //insert to function
   1921   SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
   1922 
   1923   BlockT *newLandBlk = preBranchBlk;
   1924 
   1925       CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
   1926         newLandBlk);
   1927   preExitingBlk->removeSuccessor(preExitBlk);
   1928   preExitingBlk->addSuccessor(newLandBlk);
   1929 
   1930   //it is redundant to add reg = 0 to exitingBlks[0]
   1931 
   1932   // For 1..n th exiting path (the last iteration handles two pathes) create the
   1933   // branch to the previous path and the current path.
   1934   for (uint32_t i = 1; i < numBlks; ++i) {
   1935     BlockT *curExitingBlk = exitingBlks[i];
   1936     BlockT *curExitBlk = exitBlks[i];
   1937     BlockT *curBranchBlk;
   1938 
   1939     if (i == numBlks - 1) {
   1940       curBranchBlk = curExitBlk;
   1941     } else {
   1942       curBranchBlk = funcRep->CreateMachineBasicBlock();
   1943       funcRep->push_back(curBranchBlk);  //insert to function
   1944       SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
   1945     }
   1946 
   1947     // Add reg = i to exitingBlks[i].
   1948     CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
   1949                                        endBranchReg, i);
   1950 
   1951     // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
   1952     // (exitingBlks[i], newLandBlk).
   1953     CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
   1954                                           newLandBlk);
   1955     curExitingBlk->removeSuccessor(curExitBlk);
   1956     curExitingBlk->addSuccessor(newLandBlk);
   1957 
   1958     // add to preBranchBlk the branch instruction:
   1959     // if (endBranchReg == preVal)
   1960     //    preExitBlk
   1961     // else
   1962     //    curBranchBlk
   1963     //
   1964     // preValReg = i - 1
   1965 
   1966   DebugLoc DL;
   1967   RegiT preValReg = static_cast<int>
   1968     (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1969 
   1970   preBranchBlk->insert(preBranchBlk->begin(),
   1971                        tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
   1972                        i - 1));
   1973 
   1974   // condResReg = (endBranchReg == preValReg)
   1975     RegiT condResReg = static_cast<int>
   1976       (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1977     BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
   1978       .addReg(endBranchReg).addReg(preValReg);
   1979 
   1980     BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
   1981       .addMBB(preExitBlk).addReg(condResReg);
   1982 
   1983     preBranchBlk->addSuccessor(preExitBlk);
   1984     preBranchBlk->addSuccessor(curBranchBlk);
   1985 
   1986     // Update preExitingBlk, preExitBlk, preBranchBlk.
   1987     preExitingBlk = curExitingBlk;
   1988     preExitBlk = curExitBlk;
   1989     preBranchBlk = curBranchBlk;
   1990 
   1991   }  //end for 1 .. n blocks
   1992 
   1993   return newLandBlk;
   1994 } //addLoopEndbranchBlock
   1995 
   1996 template<class PassT>
   1997 typename CFGStructurizer<PassT>::PathToKind
   1998 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
   1999                                      bool allowSideEntry) {
   2000   assert(dstBlk);
   2001 
   2002   if (srcBlk == dstBlk) {
   2003     return SinglePath_InPath;
   2004   }
   2005 
   2006   while (srcBlk && srcBlk->succ_size() == 1) {
   2007     srcBlk = *srcBlk->succ_begin();
   2008     if (srcBlk == dstBlk) {
   2009       return SinglePath_InPath;
   2010     }
   2011 
   2012     if (!allowSideEntry && srcBlk->pred_size() > 1) {
   2013       return Not_SinglePath;
   2014     }
   2015   }
   2016 
   2017   if (srcBlk && srcBlk->succ_size()==0) {
   2018     return SinglePath_NotInPath;
   2019   }
   2020 
   2021   return Not_SinglePath;
   2022 } //singlePathTo
   2023 
   2024 // If there is a single path from srcBlk to dstBlk, return the last block before
   2025 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
   2026 // last block in the path Otherwise, return NULL
   2027 template<class PassT>
   2028 typename CFGStructurizer<PassT>::BlockT *
   2029 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
   2030                                       bool allowSideEntry) {
   2031   assert(dstBlk);
   2032 
   2033   if (srcBlk == dstBlk) {
   2034     return srcBlk;
   2035   }
   2036 
   2037   if (srcBlk->succ_size() == 0) {
   2038     return srcBlk;
   2039   }
   2040 
   2041   while (srcBlk && srcBlk->succ_size() == 1) {
   2042     BlockT *preBlk = srcBlk;
   2043 
   2044     srcBlk = *srcBlk->succ_begin();
   2045     if (srcBlk == NULL) {
   2046       return preBlk;
   2047     }
   2048 
   2049     if (!allowSideEntry && srcBlk->pred_size() > 1) {
   2050       return NULL;
   2051     }
   2052   }
   2053 
   2054   if (srcBlk && srcBlk->succ_size()==0) {
   2055     return srcBlk;
   2056   }
   2057 
   2058   return NULL;
   2059 
   2060 } //singlePathEnd
   2061 
   2062 template<class PassT>
   2063 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
   2064                                                BlockT *dstBlk) {
   2065   int cloned = 0;
   2066   assert(preBlk->isSuccessor(srcBlk));
   2067   while (srcBlk && srcBlk != dstBlk) {
   2068     assert(srcBlk->succ_size() == 1);
   2069     if (srcBlk->pred_size() > 1) {
   2070       srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
   2071       ++cloned;
   2072     }
   2073 
   2074     preBlk = srcBlk;
   2075     srcBlk = *srcBlk->succ_begin();
   2076   }
   2077 
   2078   return cloned;
   2079 } //cloneOnSideEntryTo
   2080 
   2081 template<class PassT>
   2082 typename CFGStructurizer<PassT>::BlockT *
   2083 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
   2084                                                  BlockT *predBlk) {
   2085   assert(predBlk->isSuccessor(curBlk) &&
   2086          "succBlk is not a prececessor of curBlk");
   2087 
   2088   BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
   2089   CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
   2090   //srcBlk, oldBlk, newBlk
   2091 
   2092   predBlk->removeSuccessor(curBlk);
   2093   predBlk->addSuccessor(cloneBlk);
   2094 
   2095   // add all successor to cloneBlk
   2096   CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
   2097 
   2098   numClonedInstr += curBlk->size();
   2099 
   2100   if (DEBUGME) {
   2101     errs() << "Cloned block: " << "BB"
   2102            << curBlk->getNumber() << "size " << curBlk->size() << "\n";
   2103   }
   2104 
   2105   SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
   2106 
   2107   return cloneBlk;
   2108 } //cloneBlockForPredecessor
   2109 
   2110 template<class PassT>
   2111 typename CFGStructurizer<PassT>::BlockT *
   2112 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
   2113                                                BlockT *exitingBlk) {
   2114   BlockT *exitBlk = NULL;
   2115 
   2116   for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
   2117        iterSuccEnd = exitingBlk->succ_end();
   2118        iterSucc != iterSuccEnd; ++iterSucc) {
   2119     BlockT *curBlk = *iterSucc;
   2120     if (!loopRep->contains(curBlk)) {
   2121       assert(exitBlk == NULL);
   2122       exitBlk = curBlk;
   2123     }
   2124   }
   2125 
   2126   assert(exitBlk != NULL);
   2127 
   2128   return exitBlk;
   2129 } //exitingBlock2ExitBlock
   2130 
   2131 template<class PassT>
   2132 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
   2133                                                 BlockT *dstBlk,
   2134                                                 InstrIterator insertPos) {
   2135   InstrIterator spliceEnd;
   2136   //look for the input branchinstr, not the AMDGPU branchinstr
   2137   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
   2138   if (branchInstr == NULL) {
   2139     if (DEBUGME) {
   2140       errs() << "migrateInstruction don't see branch instr\n" ;
   2141     }
   2142     spliceEnd = srcBlk->end();
   2143   } else {
   2144     if (DEBUGME) {
   2145       errs() << "migrateInstruction see branch instr\n" ;
   2146       branchInstr->dump();
   2147     }
   2148     spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
   2149   }
   2150   if (DEBUGME) {
   2151     errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
   2152       << "srcSize = " << srcBlk->size() << "\n";
   2153   }
   2154 
   2155   //splice insert before insertPos
   2156   dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
   2157 
   2158   if (DEBUGME) {
   2159     errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
   2160       << "srcSize = " << srcBlk->size() << "\n";
   2161   }
   2162 } //migrateInstruction
   2163 
   2164 // normalizeInfiniteLoopExit change
   2165 //   B1:
   2166 //        uncond_br LoopHeader
   2167 //
   2168 // to
   2169 //   B1:
   2170 //        cond_br 1 LoopHeader dummyExit
   2171 // and return the newly added dummy exit block
   2172 //
   2173 template<class PassT>
   2174 typename CFGStructurizer<PassT>::BlockT *
   2175 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
   2176   BlockT *loopHeader;
   2177   BlockT *loopLatch;
   2178   loopHeader = LoopRep->getHeader();
   2179   loopLatch = LoopRep->getLoopLatch();
   2180   BlockT *dummyExitBlk = NULL;
   2181   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   2182   if (loopHeader!=NULL && loopLatch!=NULL) {
   2183     InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
   2184     if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
   2185       dummyExitBlk = funcRep->CreateMachineBasicBlock();
   2186       funcRep->push_back(dummyExitBlk);  //insert to function
   2187       SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
   2188 
   2189       if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
   2190 
   2191       typename BlockT::iterator insertPos =
   2192         CFGTraits::getInstrPos(loopLatch, branchInstr);
   2193       unsigned immReg =
   2194         funcRep->getRegInfo().createVirtualRegister(I32RC);
   2195       CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
   2196       InstrT *newInstr =
   2197         CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
   2198       MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false);
   2199 
   2200       SHOWNEWINSTR(newInstr);
   2201 
   2202       branchInstr->eraseFromParent();
   2203       loopLatch->addSuccessor(dummyExitBlk);
   2204     }
   2205   }
   2206 
   2207   return dummyExitBlk;
   2208 } //normalizeInfiniteLoopExit
   2209 
   2210 template<class PassT>
   2211 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
   2212   InstrT *branchInstr;
   2213 
   2214   // I saw two unconditional branch in one basic block in example
   2215   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
   2216   while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
   2217           && CFGTraits::isUncondBranch(branchInstr)) {
   2218     if (DEBUGME) {
   2219           errs() << "Removing unconditional branch instruction" ;
   2220       branchInstr->dump();
   2221     }
   2222     branchInstr->eraseFromParent();
   2223   }
   2224 } //removeUnconditionalBranch
   2225 
   2226 template<class PassT>
   2227 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
   2228   if (srcBlk->succ_size() == 2) {
   2229     BlockT *blk1 = *srcBlk->succ_begin();
   2230     BlockT *blk2 = *(++srcBlk->succ_begin());
   2231 
   2232     if (blk1 == blk2) {
   2233       InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
   2234       assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
   2235       if (DEBUGME) {
   2236         errs() << "Removing unneeded conditional branch instruction" ;
   2237         branchInstr->dump();
   2238       }
   2239       branchInstr->eraseFromParent();
   2240       SHOWNEWBLK(blk1, "Removing redundant successor");
   2241       srcBlk->removeSuccessor(blk1);
   2242     }
   2243   }
   2244 } //removeRedundantConditionalBranch
   2245 
   2246 template<class PassT>
   2247 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
   2248                                                DEFAULT_VEC_SLOTS> &retBlks) {
   2249   BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
   2250   funcRep->push_back(dummyExitBlk);  //insert to function
   2251   CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
   2252 
   2253   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
   2254          retBlks.begin(),
   2255        iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
   2256     BlockT *curBlk = *iter;
   2257     InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
   2258     if (curInstr) {
   2259       curInstr->eraseFromParent();
   2260     }
   2261 #if 0
   2262     if (curBlk->size()==0 && curBlk->pred_size() == 1) {
   2263       if (DEBUGME) {
   2264         errs() << "Replace empty block BB" <<  curBlk->getNumber()
   2265           << " with dummyExitBlock\n";
   2266       }
   2267       BlockT *predb = *curBlk->pred_begin();
   2268       predb->removeSuccessor(curBlk);
   2269       curBlk = predb;
   2270     } //handle empty curBlk
   2271 #endif
   2272     curBlk->addSuccessor(dummyExitBlk);
   2273     if (DEBUGME) {
   2274       errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
   2275              << " successors\n";
   2276     }
   2277   } //for
   2278 
   2279   SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
   2280 } //addDummyExitBlock
   2281 
   2282 template<class PassT>
   2283 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
   2284   while (srcBlk->succ_size()) {
   2285     srcBlk->removeSuccessor(*srcBlk->succ_begin());
   2286   }
   2287 }
   2288 
   2289 template<class PassT>
   2290 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
   2291   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
   2292 
   2293   if (srcBlkInfo == NULL) {
   2294     srcBlkInfo = new BlockInfo();
   2295   }
   2296 
   2297   srcBlkInfo->sccNum = sccNum;
   2298 }
   2299 
   2300 template<class PassT>
   2301 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
   2302   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
   2303   return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
   2304 }
   2305 
   2306 template<class PassT>
   2307 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
   2308   if (DEBUGME) {
   2309         errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
   2310   }
   2311 
   2312   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
   2313 
   2314   if (srcBlkInfo == NULL) {
   2315     srcBlkInfo = new BlockInfo();
   2316   }
   2317 
   2318   srcBlkInfo->isRetired = true;
   2319   //int i = srcBlk->succ_size();
   2320   //int j = srcBlk->pred_size();
   2321   assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
   2322          && "can't retire block yet");
   2323 }
   2324 
   2325 template<class PassT>
   2326 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
   2327   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
   2328   return (srcBlkInfo && srcBlkInfo->isRetired);
   2329 }
   2330 
   2331 template<class PassT>
   2332 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
   2333   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
   2334   while (loopRep && loopRep->getHeader() == curBlk) {
   2335     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
   2336 
   2337     if(loopLand == NULL)
   2338       return true;
   2339 
   2340     BlockT *landBlk = loopLand->landBlk;
   2341     assert(landBlk);
   2342     if (!isRetiredBlock(landBlk)) {
   2343       return true;
   2344     }
   2345 
   2346     loopRep = loopRep->getParentLoop();
   2347   }
   2348 
   2349   return false;
   2350 } //isActiveLoophead
   2351 
   2352 template<class PassT>
   2353 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
   2354   const unsigned blockSizeThreshold = 30;
   2355   const unsigned cloneInstrThreshold = 100;
   2356 
   2357   bool multiplePreds = blk && (blk->pred_size() > 1);
   2358 
   2359   if(!multiplePreds)
   2360     return false;
   2361 
   2362   unsigned blkSize = blk->size();
   2363   return ((blkSize > blockSizeThreshold)
   2364           && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
   2365 } //needMigrateBlock
   2366 
   2367 template<class PassT>
   2368 typename CFGStructurizer<PassT>::BlockT *
   2369 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
   2370                                             BlockTSmallerVector &exitBlks,
   2371                                             std::set<BlockT *> &exitBlkSet) {
   2372   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
   2373 
   2374   for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
   2375        predIterEnd = landBlk->pred_end();
   2376        predIter != predIterEnd; ++predIter) {
   2377     BlockT *curBlk = *predIter;
   2378     if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
   2379       inpathBlks.push_back(curBlk);
   2380     }
   2381   } //for
   2382 
   2383   //if landBlk has predecessors that are not in the given loop,
   2384   //create a new block
   2385   BlockT *newLandBlk = landBlk;
   2386   if (inpathBlks.size() != landBlk->pred_size()) {
   2387     newLandBlk = funcRep->CreateMachineBasicBlock();
   2388     funcRep->push_back(newLandBlk);  //insert to function
   2389     newLandBlk->addSuccessor(landBlk);
   2390     for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
   2391          inpathBlks.begin(),
   2392          iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
   2393       BlockT *curBlk = *iter;
   2394       CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
   2395       //srcBlk, oldBlk, newBlk
   2396       curBlk->removeSuccessor(landBlk);
   2397       curBlk->addSuccessor(newLandBlk);
   2398     }
   2399     for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
   2400       if (exitBlks[i] == landBlk) {
   2401         exitBlks[i] = newLandBlk;
   2402       }
   2403     }
   2404     SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
   2405   }
   2406 
   2407   setLoopLandBlock(loopRep, newLandBlk);
   2408 
   2409   return newLandBlk;
   2410 } // recordLoopbreakLand
   2411 
   2412 template<class PassT>
   2413 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
   2414   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2415 
   2416   if (theEntry == NULL) {
   2417     theEntry = new LoopLandInfo();
   2418   }
   2419   assert(theEntry->landBlk == NULL);
   2420 
   2421   if (blk == NULL) {
   2422     blk = funcRep->CreateMachineBasicBlock();
   2423     funcRep->push_back(blk);  //insert to function
   2424     SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
   2425   }
   2426 
   2427   theEntry->landBlk = blk;
   2428 
   2429   if (DEBUGME) {
   2430     errs() << "setLoopLandBlock loop-header = BB"
   2431            << loopRep->getHeader()->getNumber()
   2432            << "  landing-block = BB" << blk->getNumber() << "\n";
   2433   }
   2434 } // setLoopLandBlock
   2435 
   2436 template<class PassT>
   2437 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
   2438   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2439 
   2440   if (theEntry == NULL) {
   2441     theEntry = new LoopLandInfo();
   2442   }
   2443 
   2444   theEntry->breakOnRegs.insert(regNum);
   2445 
   2446   if (DEBUGME) {
   2447     errs() << "addLoopBreakOnReg loop-header = BB"
   2448            << loopRep->getHeader()->getNumber()
   2449            << "  regNum = " << regNum << "\n";
   2450   }
   2451 } // addLoopBreakOnReg
   2452 
   2453 template<class PassT>
   2454 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
   2455   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2456 
   2457   if (theEntry == NULL) {
   2458     theEntry = new LoopLandInfo();
   2459   }
   2460   theEntry->contOnRegs.insert(regNum);
   2461 
   2462   if (DEBUGME) {
   2463     errs() << "addLoopContOnReg loop-header = BB"
   2464            << loopRep->getHeader()->getNumber()
   2465            << "  regNum = " << regNum << "\n";
   2466   }
   2467 } // addLoopContOnReg
   2468 
   2469 template<class PassT>
   2470 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
   2471   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2472 
   2473   if (theEntry == NULL) {
   2474     theEntry = new LoopLandInfo();
   2475   }
   2476   theEntry->breakInitRegs.insert(regNum);
   2477 
   2478   if (DEBUGME) {
   2479     errs() << "addLoopBreakInitReg loop-header = BB"
   2480            << loopRep->getHeader()->getNumber()
   2481            << "  regNum = " << regNum << "\n";
   2482   }
   2483 } // addLoopBreakInitReg
   2484 
   2485 template<class PassT>
   2486 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
   2487   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2488 
   2489   if (theEntry == NULL) {
   2490     theEntry = new LoopLandInfo();
   2491   }
   2492   theEntry->contInitRegs.insert(regNum);
   2493 
   2494   if (DEBUGME) {
   2495     errs() << "addLoopContInitReg loop-header = BB"
   2496            << loopRep->getHeader()->getNumber()
   2497            << "  regNum = " << regNum << "\n";
   2498   }
   2499 } // addLoopContInitReg
   2500 
   2501 template<class PassT>
   2502 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
   2503                                                      RegiT regNum) {
   2504   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2505 
   2506   if (theEntry == NULL) {
   2507     theEntry = new LoopLandInfo();
   2508   }
   2509   theEntry->endbranchInitRegs.insert(regNum);
   2510 
   2511   if (DEBUGME)
   2512   {
   2513         errs() << "addLoopEndbranchInitReg loop-header = BB"
   2514       << loopRep->getHeader()->getNumber()
   2515       << "  regNum = " << regNum << "\n";
   2516   }
   2517 } // addLoopEndbranchInitReg
   2518 
   2519 template<class PassT>
   2520 typename CFGStructurizer<PassT>::LoopLandInfo *
   2521 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
   2522   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2523 
   2524   return theEntry;
   2525 } // getLoopLandInfo
   2526 
   2527 template<class PassT>
   2528 typename CFGStructurizer<PassT>::BlockT *
   2529 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
   2530   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2531 
   2532   return theEntry ? theEntry->landBlk : NULL;
   2533 } // getLoopLandBlock
   2534 
   2535 
   2536 template<class PassT>
   2537 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
   2538   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
   2539   if (loopRep == NULL)
   2540     return false;
   2541 
   2542   BlockT *loopHeader = loopRep->getHeader();
   2543 
   2544   return curBlk->isSuccessor(loopHeader);
   2545 
   2546 } //hasBackEdge
   2547 
   2548 template<class PassT>
   2549 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
   2550   return loopRep ? loopRep->getLoopDepth() : 0;
   2551 } //getLoopDepth
   2552 
   2553 template<class PassT>
   2554 int CFGStructurizer<PassT>::countActiveBlock
   2555 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
   2556  typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
   2557   int count = 0;
   2558   while (iterStart != iterEnd) {
   2559     if (!isRetiredBlock(*iterStart)) {
   2560       ++count;
   2561     }
   2562     ++iterStart;
   2563   }
   2564 
   2565   return count;
   2566 } //countActiveBlock
   2567 
   2568 // This is work around solution for findNearestCommonDominator not avaiable to
   2569 // post dom a proper fix should go to Dominators.h.
   2570 
   2571 template<class PassT>
   2572 typename CFGStructurizer<PassT>::BlockT*
   2573 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
   2574 
   2575   if (postDomTree->dominates(blk1, blk2)) {
   2576     return blk1;
   2577   }
   2578   if (postDomTree->dominates(blk2, blk1)) {
   2579     return blk2;
   2580   }
   2581 
   2582   DomTreeNodeT *node1 = postDomTree->getNode(blk1);
   2583   DomTreeNodeT *node2 = postDomTree->getNode(blk2);
   2584 
   2585   // Handle newly cloned node.
   2586   if (node1 == NULL && blk1->succ_size() == 1) {
   2587     return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
   2588   }
   2589   if (node2 == NULL && blk2->succ_size() == 1) {
   2590     return findNearestCommonPostDom(blk1, *blk2->succ_begin());
   2591   }
   2592 
   2593   if (node1 == NULL || node2 == NULL) {
   2594     return NULL;
   2595   }
   2596 
   2597   node1 = node1->getIDom();
   2598   while (node1) {
   2599     if (postDomTree->dominates(node1, node2)) {
   2600       return node1->getBlock();
   2601     }
   2602     node1 = node1->getIDom();
   2603   }
   2604 
   2605   return NULL;
   2606 }
   2607 
   2608 template<class PassT>
   2609 typename CFGStructurizer<PassT>::BlockT *
   2610 CFGStructurizer<PassT>::findNearestCommonPostDom
   2611 (typename std::set<BlockT *> &blks) {
   2612   BlockT *commonDom;
   2613   typename std::set<BlockT *>::const_iterator iter = blks.begin();
   2614   typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
   2615   for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
   2616     BlockT *curBlk = *iter;
   2617     if (curBlk != commonDom) {
   2618       commonDom = findNearestCommonPostDom(curBlk, commonDom);
   2619     }
   2620   }
   2621 
   2622   if (DEBUGME) {
   2623     errs() << "Common post dominator for exit blocks is ";
   2624     if (commonDom) {
   2625           errs() << "BB" << commonDom->getNumber() << "\n";
   2626     } else {
   2627       errs() << "NULL\n";
   2628     }
   2629   }
   2630 
   2631   return commonDom;
   2632 } //findNearestCommonPostDom
   2633 
   2634 } //end namespace llvm
   2635 
   2636 //todo: move-end
   2637 
   2638 
   2639 //===----------------------------------------------------------------------===//
   2640 //
   2641 // CFGStructurizer for AMDGPU
   2642 //
   2643 //===----------------------------------------------------------------------===//
   2644 
   2645 
   2646 using namespace llvmCFGStruct;
   2647 
   2648 namespace llvm
   2649 {
   2650 class AMDGPUCFGStructurizer : public MachineFunctionPass
   2651 {
   2652 public:
   2653   typedef MachineInstr              InstructionType;
   2654   typedef MachineFunction           FunctionType;
   2655   typedef MachineBasicBlock         BlockType;
   2656   typedef MachineLoopInfo           LoopinfoType;
   2657   typedef MachineDominatorTree      DominatortreeType;
   2658   typedef MachinePostDominatorTree  PostDominatortreeType;
   2659   typedef MachineDomTreeNode        DomTreeNodeType;
   2660   typedef MachineLoop               LoopType;
   2661 
   2662 protected:
   2663   TargetMachine &TM;
   2664   const TargetInstrInfo *TII;
   2665   const AMDGPURegisterInfo *TRI;
   2666 
   2667 public:
   2668   AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
   2669   const TargetInstrInfo *getTargetInstrInfo() const;
   2670   //bool runOnMachineFunction(MachineFunction &F);
   2671 
   2672 private:
   2673 
   2674 };   //end of class AMDGPUCFGStructurizer
   2675 
   2676 //char AMDGPUCFGStructurizer::ID = 0;
   2677 } //end of namespace llvm
   2678 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm
   2679                                           )
   2680 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
   2681   TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())
   2682   ) {
   2683 }
   2684 
   2685 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
   2686   return TII;
   2687 }
   2688 //===----------------------------------------------------------------------===//
   2689 //
   2690 // CFGPrepare
   2691 //
   2692 //===----------------------------------------------------------------------===//
   2693 
   2694 
   2695 using namespace llvmCFGStruct;
   2696 
   2697 namespace llvm
   2698 {
   2699 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer
   2700 {
   2701 public:
   2702   static char ID;
   2703 
   2704 public:
   2705   AMDGPUCFGPrepare(TargetMachine &tm);
   2706 
   2707   virtual const char *getPassName() const;
   2708   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   2709 
   2710   bool runOnMachineFunction(MachineFunction &F);
   2711 
   2712 private:
   2713 
   2714 };   //end of class AMDGPUCFGPrepare
   2715 
   2716 char AMDGPUCFGPrepare::ID = 0;
   2717 } //end of namespace llvm
   2718 
   2719 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
   2720   : AMDGPUCFGStructurizer(ID, tm )
   2721 {
   2722 }
   2723 const char *AMDGPUCFGPrepare::getPassName() const {
   2724   return "AMD IL Control Flow Graph Preparation Pass";
   2725 }
   2726 
   2727 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
   2728   AU.addPreserved<MachineFunctionAnalysis>();
   2729   AU.addRequired<MachineFunctionAnalysis>();
   2730   AU.addRequired<MachineDominatorTree>();
   2731   AU.addRequired<MachinePostDominatorTree>();
   2732   AU.addRequired<MachineLoopInfo>();
   2733 }
   2734 
   2735 //===----------------------------------------------------------------------===//
   2736 //
   2737 // CFGPerform
   2738 //
   2739 //===----------------------------------------------------------------------===//
   2740 
   2741 
   2742 using namespace llvmCFGStruct;
   2743 
   2744 namespace llvm
   2745 {
   2746 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer
   2747 {
   2748 public:
   2749   static char ID;
   2750 
   2751 public:
   2752   AMDGPUCFGPerform(TargetMachine &tm);
   2753   virtual const char *getPassName() const;
   2754   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   2755   bool runOnMachineFunction(MachineFunction &F);
   2756 
   2757 private:
   2758 
   2759 };   //end of class AMDGPUCFGPerform
   2760 
   2761 char AMDGPUCFGPerform::ID = 0;
   2762 } //end of namespace llvm
   2763 
   2764   AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
   2765 : AMDGPUCFGStructurizer(ID, tm)
   2766 {
   2767 }
   2768 
   2769 const char *AMDGPUCFGPerform::getPassName() const {
   2770   return "AMD IL Control Flow Graph structurizer Pass";
   2771 }
   2772 
   2773 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
   2774   AU.addPreserved<MachineFunctionAnalysis>();
   2775   AU.addRequired<MachineFunctionAnalysis>();
   2776   AU.addRequired<MachineDominatorTree>();
   2777   AU.addRequired<MachinePostDominatorTree>();
   2778   AU.addRequired<MachineLoopInfo>();
   2779 }
   2780 
   2781 //===----------------------------------------------------------------------===//
   2782 //
   2783 // CFGStructTraits<AMDGPUCFGStructurizer>
   2784 //
   2785 //===----------------------------------------------------------------------===//
   2786 
   2787 namespace llvmCFGStruct
   2788 {
   2789 // this class is tailor to the AMDGPU backend
   2790 template<>
   2791 struct CFGStructTraits<AMDGPUCFGStructurizer>
   2792 {
   2793   typedef int RegiT;
   2794 
   2795   static int getBreakNzeroOpcode(int oldOpcode) {
   2796     switch(oldOpcode) {
   2797       case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32;
   2798     default:
   2799       assert(0 && "internal error");
   2800     };
   2801     return -1;
   2802   }
   2803 
   2804   static int getBreakZeroOpcode(int oldOpcode) {
   2805     switch(oldOpcode) {
   2806       case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32;
   2807     default:
   2808       assert(0 && "internal error");
   2809     };
   2810     return -1;
   2811   }
   2812 
   2813   static int getBranchNzeroOpcode(int oldOpcode) {
   2814     switch(oldOpcode) {
   2815     case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32;
   2816       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ);
   2817       case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ;
   2818     default:
   2819       assert(0 && "internal error");
   2820     };
   2821     return -1;
   2822   }
   2823 
   2824   static int getBranchZeroOpcode(int oldOpcode) {
   2825     switch(oldOpcode) {
   2826     case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32;
   2827       ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ);
   2828       case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z;
   2829     default:
   2830       assert(0 && "internal error");
   2831     };
   2832     return -1;
   2833   }
   2834 
   2835   static int getContinueNzeroOpcode(int oldOpcode)
   2836   {
   2837     switch(oldOpcode) {
   2838       case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
   2839       default:
   2840         assert(0 && "internal error");
   2841     };
   2842     return -1;
   2843   }
   2844 
   2845   static int getContinueZeroOpcode(int oldOpcode) {
   2846     switch(oldOpcode) {
   2847       case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
   2848     default:
   2849       assert(0 && "internal error");
   2850     };
   2851     return -1;
   2852   }
   2853 
   2854 // the explicitly represented branch target is the true branch target
   2855 #define getExplicitBranch getTrueBranch
   2856 #define setExplicitBranch setTrueBranch
   2857 
   2858   static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
   2859     return instr->getOperand(0).getMBB();
   2860   }
   2861 
   2862   static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
   2863     instr->getOperand(0).setMBB(blk);
   2864   }
   2865 
   2866   static MachineBasicBlock *
   2867   getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
   2868     assert(blk->succ_size() == 2);
   2869     MachineBasicBlock *trueBranch = getTrueBranch(instr);
   2870     MachineBasicBlock::succ_iterator iter = blk->succ_begin();
   2871     MachineBasicBlock::succ_iterator iterNext = iter;
   2872     ++iterNext;
   2873 
   2874     return (*iter == trueBranch) ? *iterNext : *iter;
   2875   }
   2876 
   2877   static bool isCondBranch(MachineInstr *instr) {
   2878     switch (instr->getOpcode()) {
   2879       case AMDGPU::JUMP:
   2880         return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0;
   2881       ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND);
   2882       case AMDGPU::SI_IF_NZ:
   2883       case AMDGPU::SI_IF_Z:
   2884       break;
   2885     default:
   2886       return false;
   2887     }
   2888     return true;
   2889   }
   2890 
   2891   static bool isUncondBranch(MachineInstr *instr) {
   2892     switch (instr->getOpcode()) {
   2893     case AMDGPU::JUMP:
   2894       return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0;
   2895     default:
   2896       return false;
   2897     }
   2898     return true;
   2899   }
   2900 
   2901   static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
   2902     //get DebugLoc from the first MachineBasicBlock instruction with debug info
   2903     DebugLoc DL;
   2904 	for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
   2905 	  MachineInstr *instr = &(*iter);
   2906 	  if (instr->getDebugLoc().isUnknown() == false) {
   2907 	    DL = instr->getDebugLoc();
   2908 	  }
   2909     }
   2910     return DL;
   2911   }
   2912 
   2913   static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
   2914     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
   2915     MachineInstr *instr = &*iter;
   2916     if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
   2917       return instr;
   2918     }
   2919     return NULL;
   2920   }
   2921 
   2922   // The correct naming for this is getPossibleLoopendBlockBranchInstr.
   2923   //
   2924   // BB with backward-edge could have move instructions after the branch
   2925   // instruction.  Such move instruction "belong to" the loop backward-edge.
   2926   //
   2927   static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
   2928     const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
   2929                                   blk->getParent()->getTarget().getInstrInfo());
   2930 
   2931     for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
   2932          iterEnd = blk->rend(); iter != iterEnd; ++iter) {
   2933       // FIXME: Simplify
   2934       MachineInstr *instr = &*iter;
   2935       if (instr) {
   2936         if (isCondBranch(instr) || isUncondBranch(instr)) {
   2937           return instr;
   2938         } else if (!TII->isMov(instr->getOpcode())) {
   2939           break;
   2940         }
   2941       }
   2942     }
   2943     return NULL;
   2944   }
   2945 
   2946   static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
   2947     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
   2948     if (iter != blk->rend()) {
   2949       MachineInstr *instr = &(*iter);
   2950       if (instr->getOpcode() == AMDGPU::RETURN) {
   2951         return instr;
   2952       }
   2953     }
   2954     return NULL;
   2955   }
   2956 
   2957   static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
   2958     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
   2959     if (iter != blk->rend()) {
   2960       MachineInstr *instr = &(*iter);
   2961       if (instr->getOpcode() == AMDGPU::CONTINUE) {
   2962         return instr;
   2963       }
   2964     }
   2965     return NULL;
   2966   }
   2967 
   2968   static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
   2969     for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
   2970       MachineInstr *instr = &(*iter);
   2971       if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) {
   2972         return instr;
   2973       }
   2974     }
   2975     return NULL;
   2976   }
   2977 
   2978   static bool isReturnBlock(MachineBasicBlock *blk) {
   2979     MachineInstr *instr = getReturnInstr(blk);
   2980     bool isReturn = (blk->succ_size() == 0);
   2981     if (instr) {
   2982       assert(isReturn);
   2983     } else if (isReturn) {
   2984       if (DEBUGME) {
   2985         errs() << "BB" << blk->getNumber()
   2986                <<" is return block without RETURN instr\n";
   2987       }
   2988     }
   2989 
   2990     return  isReturn;
   2991   }
   2992 
   2993   static MachineBasicBlock::iterator
   2994   getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
   2995     assert(instr->getParent() == blk && "instruction doesn't belong to block");
   2996     MachineBasicBlock::iterator iter = blk->begin();
   2997     MachineBasicBlock::iterator iterEnd = blk->end();
   2998     while (&(*iter) != instr && iter != iterEnd) {
   2999       ++iter;
   3000     }
   3001 
   3002     assert(iter != iterEnd);
   3003     return iter;
   3004   }//getInstrPos
   3005 
   3006   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
   3007                                          AMDGPUCFGStructurizer *passRep) {
   3008     return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
   3009   } //insertInstrBefore
   3010 
   3011   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
   3012                                          AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
   3013     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   3014     MachineInstr *newInstr =
   3015       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
   3016 
   3017     MachineBasicBlock::iterator res;
   3018     if (blk->begin() != blk->end()) {
   3019       blk->insert(blk->begin(), newInstr);
   3020     } else {
   3021       blk->push_back(newInstr);
   3022     }
   3023 
   3024     SHOWNEWINSTR(newInstr);
   3025 
   3026     return newInstr;
   3027   } //insertInstrBefore
   3028 
   3029   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
   3030                              AMDGPUCFGStructurizer *passRep) {
   3031     insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
   3032   } //insertInstrEnd
   3033 
   3034   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
   3035                              AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
   3036     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   3037    MachineInstr *newInstr = blk->getParent()
   3038       ->CreateMachineInstr(tii->get(newOpcode), DL);
   3039 
   3040     blk->push_back(newInstr);
   3041     //assume the instruction doesn't take any reg operand ...
   3042 
   3043     SHOWNEWINSTR(newInstr);
   3044   } //insertInstrEnd
   3045 
   3046   static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
   3047                                          int newOpcode,
   3048                                          AMDGPUCFGStructurizer *passRep) {
   3049     MachineInstr *oldInstr = &(*instrPos);
   3050     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   3051     MachineBasicBlock *blk = oldInstr->getParent();
   3052     MachineInstr *newInstr =
   3053       blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
   3054                                            DebugLoc());
   3055 
   3056     blk->insert(instrPos, newInstr);
   3057     //assume the instruction doesn't take any reg operand ...
   3058 
   3059     SHOWNEWINSTR(newInstr);
   3060     return newInstr;
   3061   } //insertInstrBefore
   3062 
   3063   static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
   3064                                      int newOpcode,
   3065                                      AMDGPUCFGStructurizer *passRep,
   3066 									 DebugLoc DL) {
   3067     MachineInstr *oldInstr = &(*instrPos);
   3068     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   3069     MachineBasicBlock *blk = oldInstr->getParent();
   3070     MachineInstr *newInstr =
   3071       blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
   3072                                            DL);
   3073 
   3074     blk->insert(instrPos, newInstr);
   3075     MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(),
   3076                                          false);
   3077 
   3078     SHOWNEWINSTR(newInstr);
   3079     //erase later oldInstr->eraseFromParent();
   3080   } //insertCondBranchBefore
   3081 
   3082   static void insertCondBranchBefore(MachineBasicBlock *blk,
   3083                                      MachineBasicBlock::iterator insertPos,
   3084                                      int newOpcode,
   3085                                      AMDGPUCFGStructurizer *passRep,
   3086                                      RegiT regNum,
   3087 									 DebugLoc DL) {
   3088     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   3089 
   3090     MachineInstr *newInstr =
   3091       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
   3092 
   3093     //insert before
   3094     blk->insert(insertPos, newInstr);
   3095     MachineInstrBuilder(newInstr).addReg(regNum, false);
   3096 
   3097     SHOWNEWINSTR(newInstr);
   3098   } //insertCondBranchBefore
   3099 
   3100   static void insertCondBranchEnd(MachineBasicBlock *blk,
   3101                                   int newOpcode,
   3102                                   AMDGPUCFGStructurizer *passRep,
   3103                                   RegiT regNum) {
   3104     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   3105     MachineInstr *newInstr =
   3106       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
   3107 
   3108     blk->push_back(newInstr);
   3109     MachineInstrBuilder(newInstr).addReg(regNum, false);
   3110 
   3111     SHOWNEWINSTR(newInstr);
   3112   } //insertCondBranchEnd
   3113 
   3114 
   3115   static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
   3116                                       AMDGPUCFGStructurizer *passRep,
   3117                                       RegiT regNum, int regVal) {
   3118     MachineInstr *oldInstr = &(*instrPos);
   3119     const AMDGPUInstrInfo *tii =
   3120              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   3121     MachineBasicBlock *blk = oldInstr->getParent();
   3122     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
   3123                                                  regVal);
   3124     blk->insert(instrPos, newInstr);
   3125 
   3126     SHOWNEWINSTR(newInstr);
   3127   } //insertAssignInstrBefore
   3128 
   3129   static void insertAssignInstrBefore(MachineBasicBlock *blk,
   3130                                       AMDGPUCFGStructurizer *passRep,
   3131                                       RegiT regNum, int regVal) {
   3132     const AMDGPUInstrInfo *tii =
   3133              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   3134 
   3135     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
   3136                                                  regVal);
   3137     if (blk->begin() != blk->end()) {
   3138       blk->insert(blk->begin(), newInstr);
   3139     } else {
   3140       blk->push_back(newInstr);
   3141     }
   3142 
   3143     SHOWNEWINSTR(newInstr);
   3144 
   3145   } //insertInstrBefore
   3146 
   3147   static void insertCompareInstrBefore(MachineBasicBlock *blk,
   3148                                        MachineBasicBlock::iterator instrPos,
   3149                                        AMDGPUCFGStructurizer *passRep,
   3150                                        RegiT dstReg, RegiT src1Reg,
   3151                                        RegiT src2Reg) {
   3152     const AMDGPUInstrInfo *tii =
   3153              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   3154     MachineInstr *newInstr =
   3155       blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
   3156 
   3157     MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target
   3158     MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value
   3159     MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value
   3160 
   3161     blk->insert(instrPos, newInstr);
   3162     SHOWNEWINSTR(newInstr);
   3163 
   3164   } //insertCompareInstrBefore
   3165 
   3166   static void cloneSuccessorList(MachineBasicBlock *dstBlk,
   3167                                  MachineBasicBlock *srcBlk) {
   3168     for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
   3169          iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
   3170       dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
   3171     }
   3172   } //cloneSuccessorList
   3173 
   3174   static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
   3175     MachineFunction *func = srcBlk->getParent();
   3176     MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
   3177     func->push_back(newBlk);  //insert to function
   3178     //newBlk->setNumber(srcBlk->getNumber());
   3179     for (MachineBasicBlock::iterator iter = srcBlk->begin(),
   3180          iterEnd = srcBlk->end();
   3181          iter != iterEnd; ++iter) {
   3182       MachineInstr *instr = func->CloneMachineInstr(iter);
   3183       newBlk->push_back(instr);
   3184     }
   3185     return newBlk;
   3186   }
   3187 
   3188   //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
   3189   //the AMDGPU instruction is not recognized as terminator fix this and retire
   3190   //this routine
   3191   static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
   3192                                          MachineBasicBlock *oldBlk,
   3193                                          MachineBasicBlock *newBlk) {
   3194     MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
   3195     if (branchInstr && isCondBranch(branchInstr) &&
   3196         getExplicitBranch(branchInstr) == oldBlk) {
   3197       setExplicitBranch(branchInstr, newBlk);
   3198     }
   3199   }
   3200 
   3201   static void wrapup(MachineBasicBlock *entryBlk) {
   3202     assert((!entryBlk->getParent()->getJumpTableInfo()
   3203             || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
   3204            && "found a jump table");
   3205 
   3206      //collect continue right before endloop
   3207      SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
   3208      MachineBasicBlock::iterator pre = entryBlk->begin();
   3209      MachineBasicBlock::iterator iterEnd = entryBlk->end();
   3210      MachineBasicBlock::iterator iter = pre;
   3211      while (iter != iterEnd) {
   3212        if (pre->getOpcode() == AMDGPU::CONTINUE
   3213            && iter->getOpcode() == AMDGPU::ENDLOOP) {
   3214          contInstr.push_back(pre);
   3215        }
   3216        pre = iter;
   3217        ++iter;
   3218      } //end while
   3219 
   3220      //delete continue right before endloop
   3221      for (unsigned i = 0; i < contInstr.size(); ++i) {
   3222         contInstr[i]->eraseFromParent();
   3223      }
   3224 
   3225      // TODO to fix up jump table so later phase won't be confused.  if
   3226      // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
   3227      // there isn't such an interface yet.  alternatively, replace all the other
   3228      // blocks in the jump table with the entryBlk //}
   3229 
   3230   } //wrapup
   3231 
   3232   static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
   3233     return &pass.getAnalysis<MachineDominatorTree>();
   3234   }
   3235 
   3236   static MachinePostDominatorTree*
   3237   getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
   3238     return &pass.getAnalysis<MachinePostDominatorTree>();
   3239   }
   3240 
   3241   static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
   3242     return &pass.getAnalysis<MachineLoopInfo>();
   3243   }
   3244 }; // template class CFGStructTraits
   3245 } //end of namespace llvm
   3246 
   3247 // createAMDGPUCFGPreparationPass- Returns a pass
   3248 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
   3249                                                  ) {
   3250   return new AMDGPUCFGPrepare(tm );
   3251 }
   3252 
   3253 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
   3254   return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
   3255                                                                         *this,
   3256                                                                         TRI);
   3257 }
   3258 
   3259 // createAMDGPUCFGStructurizerPass- Returns a pass
   3260 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
   3261                                                   ) {
   3262   return new AMDGPUCFGPerform(tm );
   3263 }
   3264 
   3265 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
   3266   return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
   3267                                                                     *this,
   3268                                                                     TRI);
   3269 }
   3270 
   3271 //end of file newline goes below
   3272 
   3273