Home | History | Annotate | Download | only in R600
      1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 /// \file
      9 //==-----------------------------------------------------------------------===//
     10 
     11 #define DEBUGME 0
     12 #define DEBUG_TYPE "structcfg"
     13 
     14 #include "AMDGPUInstrInfo.h"
     15 #include "AMDIL.h"
     16 #include "llvm/ADT/SCCIterator.h"
     17 #include "llvm/ADT/SmallVector.h"
     18 #include "llvm/ADT/Statistic.h"
     19 #include "llvm/Analysis/DominatorInternals.h"
     20 #include "llvm/Analysis/Dominators.h"
     21 #include "llvm/CodeGen/MachineDominators.h"
     22 #include "llvm/CodeGen/MachineFunction.h"
     23 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
     24 #include "llvm/CodeGen/MachineFunctionPass.h"
     25 #include "llvm/CodeGen/MachineInstrBuilder.h"
     26 #include "llvm/CodeGen/MachineJumpTableInfo.h"
     27 #include "llvm/CodeGen/MachineLoopInfo.h"
     28 #include "llvm/CodeGen/MachinePostDominators.h"
     29 #include "llvm/CodeGen/MachineRegisterInfo.h"
     30 #include "llvm/Target/TargetInstrInfo.h"
     31 
     32 using namespace llvm;
     33 
     34 // TODO: move-begin.
     35 
     36 //===----------------------------------------------------------------------===//
     37 //
     38 // Statistics for CFGStructurizer.
     39 //
     40 //===----------------------------------------------------------------------===//
     41 
     42 STATISTIC(numSerialPatternMatch,    "CFGStructurizer number of serial pattern "
     43     "matched");
     44 STATISTIC(numIfPatternMatch,        "CFGStructurizer number of if pattern "
     45     "matched");
     46 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break "
     47     "pattern matched");
     48 STATISTIC(numLoopcontPatternMatch,  "CFGStructurizer number of loop-continue "
     49     "pattern matched");
     50 STATISTIC(numLoopPatternMatch,      "CFGStructurizer number of loop pattern "
     51     "matched");
     52 STATISTIC(numClonedBlock,           "CFGStructurizer cloned blocks");
     53 STATISTIC(numClonedInstr,           "CFGStructurizer cloned instructions");
     54 
     55 //===----------------------------------------------------------------------===//
     56 //
     57 // Miscellaneous utility for CFGStructurizer.
     58 //
     59 //===----------------------------------------------------------------------===//
     60 namespace llvmCFGStruct {
     61 #define SHOWNEWINSTR(i) \
     62   if (DEBUGME) errs() << "New instr: " << *i << "\n"
     63 
     64 #define SHOWNEWBLK(b, msg) \
     65 if (DEBUGME) { \
     66   errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
     67   errs() << "\n"; \
     68 }
     69 
     70 #define SHOWBLK_DETAIL(b, msg) \
     71 if (DEBUGME) { \
     72   if (b) { \
     73   errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \
     74   b->print(errs()); \
     75   errs() << "\n"; \
     76   } \
     77 }
     78 
     79 #define INVALIDSCCNUM -1
     80 #define INVALIDREGNUM 0
     81 
     82 template<class LoopinfoT>
     83 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) {
     84   for (typename LoopinfoT::iterator iter = LoopInfo.begin(),
     85        iterEnd = LoopInfo.end();
     86        iter != iterEnd; ++iter) {
     87     (*iter)->print(OS, 0);
     88   }
     89 }
     90 
     91 template<class NodeT>
     92 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) {
     93   size_t sz = Src.size();
     94   for (size_t i = 0; i < sz/2; ++i) {
     95     NodeT *t = Src[i];
     96     Src[i] = Src[sz - i - 1];
     97     Src[sz - i - 1] = t;
     98   }
     99 }
    100 
    101 } //end namespace llvmCFGStruct
    102 
    103 //===----------------------------------------------------------------------===//
    104 //
    105 // supporting data structure for CFGStructurizer
    106 //
    107 //===----------------------------------------------------------------------===//
    108 
    109 namespace llvmCFGStruct {
    110 template<class PassT>
    111 struct CFGStructTraits {
    112 };
    113 
    114 template <class InstrT>
    115 class BlockInformation {
    116 public:
    117   bool isRetired;
    118   int  sccNum;
    119   //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr;
    120   //Instructions defining the corresponding successor.
    121   BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {}
    122 };
    123 
    124 template <class BlockT, class InstrT, class RegiT>
    125 class LandInformation {
    126 public:
    127   BlockT *landBlk;
    128   std::set<RegiT> breakInitRegs;  //Registers that need to "reg = 0", before
    129                                   //WHILELOOP(thisloop) init before entering
    130                                   //thisloop.
    131   std::set<RegiT> contInitRegs;   //Registers that need to "reg = 0", after
    132                                   //WHILELOOP(thisloop) init after entering
    133                                   //thisloop.
    134   std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop
    135                                      //land block, branch cond on this reg.
    136   std::set<RegiT> breakOnRegs;       //registers that need to "if (reg) break
    137                                      //endif" after ENDLOOP(thisloop) break
    138                                      //outerLoopOf(thisLoop).
    139   std::set<RegiT> contOnRegs;       //registers that need to "if (reg) continue
    140                                     //endif" after ENDLOOP(thisloop) continue on
    141                                     //outerLoopOf(thisLoop).
    142   LandInformation() : landBlk(NULL) {}
    143 };
    144 
    145 } //end of namespace llvmCFGStruct
    146 
    147 //===----------------------------------------------------------------------===//
    148 //
    149 // CFGStructurizer
    150 //
    151 //===----------------------------------------------------------------------===//
    152 
    153 namespace llvmCFGStruct {
    154 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock.
    155 template<class PassT>
    156 class  CFGStructurizer {
    157 public:
    158   typedef enum {
    159     Not_SinglePath = 0,
    160     SinglePath_InPath = 1,
    161     SinglePath_NotInPath = 2
    162   } PathToKind;
    163 
    164 public:
    165   typedef typename PassT::InstructionType         InstrT;
    166   typedef typename PassT::FunctionType            FuncT;
    167   typedef typename PassT::DominatortreeType       DomTreeT;
    168   typedef typename PassT::PostDominatortreeType   PostDomTreeT;
    169   typedef typename PassT::DomTreeNodeType         DomTreeNodeT;
    170   typedef typename PassT::LoopinfoType            LoopInfoT;
    171 
    172   typedef GraphTraits<FuncT *>                    FuncGTraits;
    173   //typedef FuncGTraits::nodes_iterator BlockIterator;
    174   typedef typename FuncT::iterator                BlockIterator;
    175 
    176   typedef typename FuncGTraits::NodeType          BlockT;
    177   typedef GraphTraits<BlockT *>                   BlockGTraits;
    178   typedef GraphTraits<Inverse<BlockT *> >         InvBlockGTraits;
    179   //typedef BlockGTraits::succ_iterator InstructionIterator;
    180   typedef typename BlockT::iterator               InstrIterator;
    181 
    182   typedef CFGStructTraits<PassT>                  CFGTraits;
    183   typedef BlockInformation<InstrT>                BlockInfo;
    184   typedef std::map<BlockT *, BlockInfo *>         BlockInfoMap;
    185 
    186   typedef int                                     RegiT;
    187   typedef typename PassT::LoopType                LoopT;
    188   typedef LandInformation<BlockT, InstrT, RegiT>  LoopLandInfo;
    189         typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap;
    190         //landing info for loop break
    191   typedef SmallVector<BlockT *, 32>               BlockTSmallerVector;
    192 
    193 public:
    194   CFGStructurizer();
    195   ~CFGStructurizer();
    196 
    197   /// Perform the CFG structurization
    198   bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
    199 
    200   /// Perform the CFG preparation
    201   bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri);
    202 
    203 private:
    204   void reversePredicateSetter(typename BlockT::iterator);
    205   void   orderBlocks();
    206   void   printOrderedBlocks(llvm::raw_ostream &OS);
    207   int patternMatch(BlockT *CurBlock);
    208   int patternMatchGroup(BlockT *CurBlock);
    209 
    210   int serialPatternMatch(BlockT *CurBlock);
    211   int ifPatternMatch(BlockT *CurBlock);
    212   int switchPatternMatch(BlockT *CurBlock);
    213   int loopendPatternMatch(BlockT *CurBlock);
    214   int loopPatternMatch(BlockT *CurBlock);
    215 
    216   int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
    217   int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader);
    218   //int loopWithoutBreak(BlockT *);
    219 
    220   void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop,
    221                         BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock);
    222   void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop,
    223                            BlockT *ContBlock, LoopT *contLoop);
    224   bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block);
    225   int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
    226                        BlockT *FalseBlock);
    227   int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock,
    228                           BlockT *FalseBlock);
    229   int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
    230                               BlockT *FalseBlock, BlockT **LandBlockPtr);
    231   void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock,
    232                                    BlockT *FalseBlock, BlockT *LandBlock,
    233                                    bool Detail = false);
    234   PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock,
    235                           bool AllowSideEntry = true);
    236   BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock,
    237                         bool AllowSideEntry = true);
    238   int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock);
    239   void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock);
    240 
    241   void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock,
    242                             BlockT *TrueBlock, BlockT *FalseBlock,
    243                             BlockT *LandBlock);
    244   void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand);
    245   void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock,
    246                            BlockT *ExitLandBlock, RegiT SetReg);
    247   void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock,
    248                            RegiT SetReg);
    249   BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep,
    250                                 std::set<BlockT*> &ExitBlockSet,
    251                                 BlockT *ExitLandBlk);
    252   BlockT *addLoopEndbranchBlock(LoopT *LoopRep,
    253                                 BlockTSmallerVector &ExitingBlocks,
    254                                 BlockTSmallerVector &ExitBlocks);
    255   BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep);
    256   void removeUnconditionalBranch(BlockT *SrcBlock);
    257   void removeRedundantConditionalBranch(BlockT *SrcBlock);
    258   void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks);
    259 
    260   void removeSuccessor(BlockT *SrcBlock);
    261   BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock);
    262   BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock);
    263 
    264   void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock,
    265                           InstrIterator InsertPos);
    266 
    267   void recordSccnum(BlockT *SrcBlock, int SCCNum);
    268   int getSCCNum(BlockT *srcBlk);
    269 
    270   void retireBlock(BlockT *DstBlock, BlockT *SrcBlock);
    271   bool isRetiredBlock(BlockT *SrcBlock);
    272   bool isActiveLoophead(BlockT *CurBlock);
    273   bool needMigrateBlock(BlockT *Block);
    274 
    275   BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock,
    276                               BlockTSmallerVector &exitBlocks,
    277                               std::set<BlockT*> &ExitBlockSet);
    278   void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL);
    279   BlockT *getLoopLandBlock(LoopT *LoopRep);
    280   LoopLandInfo *getLoopLandInfo(LoopT *LoopRep);
    281 
    282   void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum);
    283   void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum);
    284   void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum);
    285   void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum);
    286   void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum);
    287 
    288   bool hasBackEdge(BlockT *curBlock);
    289   unsigned getLoopDepth  (LoopT *LoopRep);
    290   int countActiveBlock(
    291     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart,
    292     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd);
    293     BlockT *findNearestCommonPostDom(std::set<BlockT *>&);
    294   BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2);
    295 
    296 private:
    297   DomTreeT *domTree;
    298   PostDomTreeT *postDomTree;
    299   LoopInfoT *loopInfo;
    300   PassT *passRep;
    301   FuncT *funcRep;
    302 
    303   BlockInfoMap blockInfoMap;
    304   LoopLandInfoMap loopLandInfoMap;
    305   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks;
    306   const AMDGPURegisterInfo *TRI;
    307 
    308 };  //template class CFGStructurizer
    309 
    310 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer()
    311   : domTree(NULL), postDomTree(NULL), loopInfo(NULL) {
    312 }
    313 
    314 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() {
    315   for (typename BlockInfoMap::iterator I = blockInfoMap.begin(),
    316        E = blockInfoMap.end(); I != E; ++I) {
    317     delete I->second;
    318   }
    319 }
    320 
    321 template<class PassT>
    322 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass,
    323                                      const AMDGPURegisterInfo * tri) {
    324   passRep = &pass;
    325   funcRep = &func;
    326   TRI = tri;
    327 
    328   bool changed = false;
    329 
    330   //FIXME: if not reducible flow graph, make it so ???
    331 
    332   if (DEBUGME) {
    333         errs() << "AMDGPUCFGStructurizer::prepare\n";
    334   }
    335 
    336   loopInfo = CFGTraits::getLoopInfo(pass);
    337   if (DEBUGME) {
    338     errs() << "LoopInfo:\n";
    339     PrintLoopinfo(*loopInfo, errs());
    340   }
    341 
    342   orderBlocks();
    343   if (DEBUGME) {
    344     errs() << "Ordered blocks:\n";
    345     printOrderedBlocks(errs());
    346   }
    347 
    348   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks;
    349 
    350   for (typename LoopInfoT::iterator iter = loopInfo->begin(),
    351        iterEnd = loopInfo->end();
    352        iter != iterEnd; ++iter) {
    353     LoopT* loopRep = (*iter);
    354     BlockTSmallerVector exitingBlks;
    355     loopRep->getExitingBlocks(exitingBlks);
    356 
    357     if (exitingBlks.size() == 0) {
    358       BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep);
    359       if (dummyExitBlk != NULL)
    360         retBlks.push_back(dummyExitBlk);
    361     }
    362   }
    363 
    364   // Remove unconditional branch instr.
    365   // Add dummy exit block iff there are multiple returns.
    366 
    367   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    368        iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end();
    369        iterBlk != iterEndBlk;
    370        ++iterBlk) {
    371     BlockT *curBlk = *iterBlk;
    372     removeUnconditionalBranch(curBlk);
    373     removeRedundantConditionalBranch(curBlk);
    374     if (CFGTraits::isReturnBlock(curBlk)) {
    375       retBlks.push_back(curBlk);
    376     }
    377     assert(curBlk->succ_size() <= 2);
    378   } //for
    379 
    380   if (retBlks.size() >= 2) {
    381     addDummyExitBlock(retBlks);
    382     changed = true;
    383   }
    384 
    385   return changed;
    386 } //CFGStructurizer::prepare
    387 
    388 template<class PassT>
    389 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass,
    390     const AMDGPURegisterInfo * tri) {
    391   passRep = &pass;
    392   funcRep = &func;
    393   TRI = tri;
    394 
    395   //Assume reducible CFG...
    396   if (DEBUGME) {
    397     errs() << "AMDGPUCFGStructurizer::run\n";
    398     func.viewCFG();
    399   }
    400 
    401   domTree = CFGTraits::getDominatorTree(pass);
    402   if (DEBUGME) {
    403     domTree->print(errs(), (const llvm::Module*)0);
    404   }
    405 
    406   postDomTree = CFGTraits::getPostDominatorTree(pass);
    407   if (DEBUGME) {
    408     postDomTree->print(errs());
    409   }
    410 
    411   loopInfo = CFGTraits::getLoopInfo(pass);
    412   if (DEBUGME) {
    413     errs() << "LoopInfo:\n";
    414     PrintLoopinfo(*loopInfo, errs());
    415   }
    416 
    417   orderBlocks();
    418 #ifdef STRESSTEST
    419   //Use the worse block ordering to test the algorithm.
    420   ReverseVector(orderedBlks);
    421 #endif
    422 
    423   if (DEBUGME) {
    424     errs() << "Ordered blocks:\n";
    425     printOrderedBlocks(errs());
    426   }
    427   int numIter = 0;
    428   bool finish = false;
    429   BlockT *curBlk;
    430   bool makeProgress = false;
    431   int numRemainedBlk = countActiveBlock(orderedBlks.begin(),
    432                                         orderedBlks.end());
    433 
    434   do {
    435     ++numIter;
    436     if (DEBUGME) {
    437       errs() << "numIter = " << numIter
    438              << ", numRemaintedBlk = " << numRemainedBlk << "\n";
    439     }
    440 
    441     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    442       iterBlk = orderedBlks.begin();
    443     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    444       iterBlkEnd = orderedBlks.end();
    445 
    446     typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    447       sccBeginIter = iterBlk;
    448     BlockT *sccBeginBlk = NULL;
    449     int sccNumBlk = 0;  // The number of active blocks, init to a
    450                         // maximum possible number.
    451     int sccNumIter;     // Number of iteration in this SCC.
    452 
    453     while (iterBlk != iterBlkEnd) {
    454       curBlk = *iterBlk;
    455 
    456       if (sccBeginBlk == NULL) {
    457         sccBeginIter = iterBlk;
    458         sccBeginBlk = curBlk;
    459         sccNumIter = 0;
    460         sccNumBlk = numRemainedBlk; // Init to maximum possible number.
    461         if (DEBUGME) {
    462               errs() << "start processing SCC" << getSCCNum(sccBeginBlk);
    463               errs() << "\n";
    464         }
    465       }
    466 
    467       if (!isRetiredBlock(curBlk)) {
    468         patternMatch(curBlk);
    469       }
    470 
    471       ++iterBlk;
    472 
    473       bool contNextScc = true;
    474       if (iterBlk == iterBlkEnd
    475           || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) {
    476         // Just finish one scc.
    477         ++sccNumIter;
    478         int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk);
    479         if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) {
    480           if (DEBUGME) {
    481             errs() << "Can't reduce SCC " << getSCCNum(curBlk)
    482                    << ", sccNumIter = " << sccNumIter;
    483             errs() << "doesn't make any progress\n";
    484           }
    485           contNextScc = true;
    486         } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) {
    487           sccNumBlk = sccRemainedNumBlk;
    488           iterBlk = sccBeginIter;
    489           contNextScc = false;
    490           if (DEBUGME) {
    491             errs() << "repeat processing SCC" << getSCCNum(curBlk)
    492                    << "sccNumIter = " << sccNumIter << "\n";
    493             func.viewCFG();
    494           }
    495         } else {
    496           // Finish the current scc.
    497           contNextScc = true;
    498         }
    499       } else {
    500         // Continue on next component in the current scc.
    501         contNextScc = false;
    502       }
    503 
    504       if (contNextScc) {
    505         sccBeginBlk = NULL;
    506       }
    507     } //while, "one iteration" over the function.
    508 
    509     BlockT *entryBlk = FuncGTraits::nodes_begin(&func);
    510     if (entryBlk->succ_size() == 0) {
    511       finish = true;
    512       if (DEBUGME) {
    513         errs() << "Reduce to one block\n";
    514       }
    515     } else {
    516       int newnumRemainedBlk
    517         = countActiveBlock(orderedBlks.begin(), orderedBlks.end());
    518       // consider cloned blocks ??
    519       if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) {
    520         makeProgress = true;
    521         numRemainedBlk = newnumRemainedBlk;
    522       } else {
    523         makeProgress = false;
    524         if (DEBUGME) {
    525           errs() << "No progress\n";
    526         }
    527       }
    528     }
    529   } while (!finish && makeProgress);
    530 
    531   // Misc wrap up to maintain the consistency of the Function representation.
    532   CFGTraits::wrapup(FuncGTraits::nodes_begin(&func));
    533 
    534   // Detach retired Block, release memory.
    535   for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(),
    536        iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
    537     if ((*iterMap).second && (*iterMap).second->isRetired) {
    538       assert(((*iterMap).first)->getNumber() != -1);
    539       if (DEBUGME) {
    540         errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n";
    541       }
    542       (*iterMap).first->eraseFromParent();  //Remove from the parent Function.
    543     }
    544     delete (*iterMap).second;
    545   }
    546   blockInfoMap.clear();
    547 
    548   // clear loopLandInfoMap
    549   for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(),
    550        iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) {
    551     delete (*iterMap).second;
    552   }
    553   loopLandInfoMap.clear();
    554 
    555   if (DEBUGME) {
    556     func.viewCFG();
    557   }
    558 
    559   if (!finish) {
    560     assert(!"IRREDUCIBL_CF");
    561   }
    562 
    563   return true;
    564 } //CFGStructurizer::run
    565 
    566 /// Print the ordered Blocks.
    567 ///
    568 template<class PassT>
    569 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) {
    570   size_t i = 0;
    571   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator
    572       iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end();
    573        iterBlk != iterBlkEnd;
    574        ++iterBlk, ++i) {
    575     os << "BB" << (*iterBlk)->getNumber();
    576     os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")";
    577     if (i != 0 && i % 10 == 0) {
    578       os << "\n";
    579     } else {
    580       os << " ";
    581     }
    582   }
    583 } //printOrderedBlocks
    584 
    585 /// Compute the reversed DFS post order of Blocks
    586 ///
    587 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() {
    588   int sccNum = 0;
    589   BlockT *bb;
    590   for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep),
    591        sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) {
    592     std::vector<BlockT *> &sccNext = *sccIter;
    593     for (typename std::vector<BlockT *>::const_iterator
    594          blockIter = sccNext.begin(), blockEnd = sccNext.end();
    595          blockIter != blockEnd; ++blockIter) {
    596       bb = *blockIter;
    597       orderedBlks.push_back(bb);
    598       recordSccnum(bb, sccNum);
    599     }
    600   }
    601 
    602   //walk through all the block in func to check for unreachable
    603   for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep),
    604        blockEnd1 = FuncGTraits::nodes_end(funcRep);
    605        blockIter1 != blockEnd1; ++blockIter1) {
    606     BlockT *bb = &(*blockIter1);
    607     sccNum = getSCCNum(bb);
    608     if (sccNum == INVALIDSCCNUM) {
    609       errs() << "unreachable block BB" << bb->getNumber() << "\n";
    610     }
    611   }
    612 } //orderBlocks
    613 
    614 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) {
    615   int numMatch = 0;
    616   int curMatch;
    617 
    618   if (DEBUGME) {
    619         errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n";
    620   }
    621 
    622   while ((curMatch = patternMatchGroup(curBlk)) > 0) {
    623     numMatch += curMatch;
    624   }
    625 
    626   if (DEBUGME) {
    627         errs() << "End patternMatch BB" << curBlk->getNumber()
    628       << ", numMatch = " << numMatch << "\n";
    629   }
    630 
    631   return numMatch;
    632 } //patternMatch
    633 
    634 template<class PassT>
    635 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) {
    636   int numMatch = 0;
    637   numMatch += serialPatternMatch(curBlk);
    638   numMatch += ifPatternMatch(curBlk);
    639   numMatch += loopendPatternMatch(curBlk);
    640   numMatch += loopPatternMatch(curBlk);
    641   return numMatch;
    642 }//patternMatchGroup
    643 
    644 template<class PassT>
    645 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) {
    646   if (curBlk->succ_size() != 1) {
    647     return 0;
    648   }
    649 
    650   BlockT *childBlk = *curBlk->succ_begin();
    651   if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) {
    652     return 0;
    653   }
    654 
    655   mergeSerialBlock(curBlk, childBlk);
    656   ++numSerialPatternMatch;
    657   return 1;
    658 } //serialPatternMatch
    659 
    660 template<class PassT>
    661 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) {
    662   //two edges
    663   if (curBlk->succ_size() != 2) {
    664     return 0;
    665   }
    666 
    667   if (hasBackEdge(curBlk)) {
    668     return 0;
    669   }
    670 
    671   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk);
    672   if (branchInstr == NULL) {
    673     return 0;
    674   }
    675 
    676   assert(CFGTraits::isCondBranch(branchInstr));
    677 
    678   BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr);
    679   BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr);
    680   BlockT *landBlk;
    681   int cloned = 0;
    682 
    683   // TODO: Simplify
    684   if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1
    685     && *trueBlk->succ_begin() == *falseBlk->succ_begin()) {
    686     landBlk = *trueBlk->succ_begin();
    687   } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) {
    688     landBlk = NULL;
    689   } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) {
    690     landBlk = falseBlk;
    691     falseBlk = NULL;
    692   } else if (falseBlk->succ_size() == 1
    693              && *falseBlk->succ_begin() == trueBlk) {
    694     landBlk = trueBlk;
    695     trueBlk = NULL;
    696   } else if (falseBlk->succ_size() == 1
    697              && isSameloopDetachedContbreak(trueBlk, falseBlk)) {
    698     landBlk = *falseBlk->succ_begin();
    699   } else if (trueBlk->succ_size() == 1
    700     && isSameloopDetachedContbreak(falseBlk, trueBlk)) {
    701     landBlk = *trueBlk->succ_begin();
    702   } else {
    703     return handleJumpintoIf(curBlk, trueBlk, falseBlk);
    704   }
    705 
    706   // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the
    707   // new BB created for landBlk==NULL may introduce new challenge to the
    708   // reduction process.
    709   if (landBlk != NULL &&
    710       ((trueBlk && trueBlk->pred_size() > 1)
    711       || (falseBlk && falseBlk->pred_size() > 1))) {
    712      cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk);
    713   }
    714 
    715   if (trueBlk && trueBlk->pred_size() > 1) {
    716     trueBlk = cloneBlockForPredecessor(trueBlk, curBlk);
    717     ++cloned;
    718   }
    719 
    720   if (falseBlk && falseBlk->pred_size() > 1) {
    721     falseBlk = cloneBlockForPredecessor(falseBlk, curBlk);
    722     ++cloned;
    723   }
    724 
    725   mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk);
    726 
    727   ++numIfPatternMatch;
    728 
    729   numClonedBlock += cloned;
    730 
    731   return 1 + cloned;
    732 } //ifPatternMatch
    733 
    734 template<class PassT>
    735 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) {
    736   return 0;
    737 } //switchPatternMatch
    738 
    739 template<class PassT>
    740 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) {
    741   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
    742   typename std::vector<LoopT *> nestedLoops;
    743   while (loopRep) {
    744     nestedLoops.push_back(loopRep);
    745     loopRep = loopRep->getParentLoop();
    746   }
    747 
    748   if (nestedLoops.size() == 0) {
    749     return 0;
    750   }
    751 
    752   // Process nested loop outside->inside, so "continue" to a outside loop won't
    753   // be mistaken as "break" of the current loop.
    754   int num = 0;
    755   for (typename std::vector<LoopT *>::reverse_iterator
    756        iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend();
    757        iter != iterEnd; ++iter) {
    758     loopRep = *iter;
    759 
    760     if (getLoopLandBlock(loopRep) != NULL) {
    761       continue;
    762     }
    763 
    764     BlockT *loopHeader = loopRep->getHeader();
    765 
    766     int numBreak = loopbreakPatternMatch(loopRep, loopHeader);
    767 
    768     if (numBreak == -1) {
    769       break;
    770     }
    771 
    772     int numCont = loopcontPatternMatch(loopRep, loopHeader);
    773     num += numBreak + numCont;
    774   }
    775 
    776   return num;
    777 } //loopendPatternMatch
    778 
    779 template<class PassT>
    780 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) {
    781   if (curBlk->succ_size() != 0) {
    782     return 0;
    783   }
    784 
    785   int numLoop = 0;
    786   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
    787   while (loopRep && loopRep->getHeader() == curBlk) {
    788     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
    789     if (loopLand) {
    790       BlockT *landBlk = loopLand->landBlk;
    791       assert(landBlk);
    792       if (!isRetiredBlock(landBlk)) {
    793         mergeLooplandBlock(curBlk, loopLand);
    794         ++numLoop;
    795       }
    796     }
    797     loopRep = loopRep->getParentLoop();
    798   }
    799 
    800   numLoopPatternMatch += numLoop;
    801 
    802   return numLoop;
    803 } //loopPatternMatch
    804 
    805 template<class PassT>
    806 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep,
    807                                                   BlockT *loopHeader) {
    808   BlockTSmallerVector exitingBlks;
    809   loopRep->getExitingBlocks(exitingBlks);
    810 
    811   if (DEBUGME) {
    812     errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n";
    813   }
    814 
    815   if (exitingBlks.size() == 0) {
    816     setLoopLandBlock(loopRep);
    817     return 0;
    818   }
    819 
    820   // Compute the corresponding exitBlks and exit block set.
    821   BlockTSmallerVector exitBlks;
    822   std::set<BlockT *> exitBlkSet;
    823   for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(),
    824        iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) {
    825     BlockT *exitingBlk = *iter;
    826     BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
    827     exitBlks.push_back(exitBlk);
    828     exitBlkSet.insert(exitBlk);  //non-duplicate insert
    829   }
    830 
    831   assert(exitBlkSet.size() > 0);
    832   assert(exitBlks.size() == exitingBlks.size());
    833 
    834   if (DEBUGME) {
    835     errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n";
    836   }
    837 
    838   // Find exitLandBlk.
    839   BlockT *exitLandBlk = NULL;
    840   int numCloned = 0;
    841   int numSerial = 0;
    842 
    843   if (exitBlkSet.size() == 1) {
    844     exitLandBlk = *exitBlkSet.begin();
    845   } else {
    846     exitLandBlk = findNearestCommonPostDom(exitBlkSet);
    847 
    848     if (exitLandBlk == NULL) {
    849       return -1;
    850     }
    851 
    852     bool allInPath = true;
    853     bool allNotInPath = true;
    854     for (typename std::set<BlockT*>::const_iterator
    855          iter = exitBlkSet.begin(),
    856          iterEnd = exitBlkSet.end();
    857          iter != iterEnd; ++iter) {
    858       BlockT *exitBlk = *iter;
    859 
    860       PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true);
    861       if (DEBUGME) {
    862         errs() << "BB" << exitBlk->getNumber()
    863                << " to BB" << exitLandBlk->getNumber() << " PathToKind="
    864                << pathKind << "\n";
    865       }
    866 
    867       allInPath = allInPath && (pathKind == SinglePath_InPath);
    868       allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath);
    869 
    870       if (!allInPath && !allNotInPath) {
    871         if (DEBUGME) {
    872               errs() << "singlePath check fail\n";
    873         }
    874         return -1;
    875       }
    876     } // check all exit blocks
    877 
    878     if (allNotInPath) {
    879 
    880       // TODO: Simplify, maybe separate function?
    881       LoopT *parentLoopRep = loopRep->getParentLoop();
    882       BlockT *parentLoopHeader = NULL;
    883       if (parentLoopRep)
    884         parentLoopHeader = parentLoopRep->getHeader();
    885 
    886       if (exitLandBlk == parentLoopHeader &&
    887           (exitLandBlk = relocateLoopcontBlock(parentLoopRep,
    888                                                loopRep,
    889                                                exitBlkSet,
    890                                                exitLandBlk)) != NULL) {
    891         if (DEBUGME) {
    892           errs() << "relocateLoopcontBlock success\n";
    893         }
    894       } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep,
    895                                                       exitingBlks,
    896                                                       exitBlks)) != NULL) {
    897         if (DEBUGME) {
    898           errs() << "insertEndbranchBlock success\n";
    899         }
    900       } else {
    901         if (DEBUGME) {
    902           errs() << "loop exit fail\n";
    903         }
    904         return -1;
    905       }
    906     }
    907 
    908     // Handle side entry to exit path.
    909     exitBlks.clear();
    910     exitBlkSet.clear();
    911     for (typename BlockTSmallerVector::iterator iterExiting =
    912            exitingBlks.begin(),
    913          iterExitingEnd = exitingBlks.end();
    914          iterExiting != iterExitingEnd; ++iterExiting) {
    915       BlockT *exitingBlk = *iterExiting;
    916       BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk);
    917       BlockT *newExitBlk = exitBlk;
    918 
    919       if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) {
    920         newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk);
    921         ++numCloned;
    922       }
    923 
    924       numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk);
    925 
    926       exitBlks.push_back(newExitBlk);
    927       exitBlkSet.insert(newExitBlk);
    928     }
    929 
    930     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
    931          iterExitEnd = exitBlks.end();
    932          iterExit != iterExitEnd; ++iterExit) {
    933       BlockT *exitBlk = *iterExit;
    934       numSerial += serialPatternMatch(exitBlk);
    935     }
    936 
    937     for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(),
    938          iterExitEnd = exitBlks.end();
    939          iterExit != iterExitEnd; ++iterExit) {
    940       BlockT *exitBlk = *iterExit;
    941       if (exitBlk->pred_size() > 1) {
    942         if (exitBlk != exitLandBlk) {
    943           return -1;
    944         }
    945       } else {
    946         if (exitBlk != exitLandBlk &&
    947             (exitBlk->succ_size() != 1 ||
    948             *exitBlk->succ_begin() != exitLandBlk)) {
    949           return -1;
    950         }
    951       }
    952     }
    953   } // else
    954 
    955   exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet);
    956 
    957   // Fold break into the breaking block. Leverage across level breaks.
    958   assert(exitingBlks.size() == exitBlks.size());
    959   for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(),
    960        iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end();
    961        iterExit != iterExitEnd; ++iterExit, ++iterExiting) {
    962     BlockT *exitBlk = *iterExit;
    963     BlockT *exitingBlk = *iterExiting;
    964     assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk);
    965     LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk);
    966     handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk);
    967   }
    968 
    969   int numBreak = static_cast<int>(exitingBlks.size());
    970   numLoopbreakPatternMatch += numBreak;
    971   numClonedBlock += numCloned;
    972   return numBreak + numSerial + numCloned;
    973 } //loopbreakPatternMatch
    974 
    975 template<class PassT>
    976 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep,
    977                                                  BlockT *loopHeader) {
    978   int numCont = 0;
    979   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk;
    980   for (typename InvBlockGTraits::ChildIteratorType iter =
    981        InvBlockGTraits::child_begin(loopHeader),
    982        iterEnd = InvBlockGTraits::child_end(loopHeader);
    983        iter != iterEnd; ++iter) {
    984     BlockT *curBlk = *iter;
    985     if (loopRep->contains(curBlk)) {
    986       handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk),
    987                           loopHeader, loopRep);
    988       contBlk.push_back(curBlk);
    989       ++numCont;
    990     }
    991   }
    992 
    993   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator
    994        iter = contBlk.begin(), iterEnd = contBlk.end();
    995        iter != iterEnd; ++iter) {
    996     (*iter)->removeSuccessor(loopHeader);
    997   }
    998 
    999   numLoopcontPatternMatch += numCont;
   1000 
   1001   return numCont;
   1002 } //loopcontPatternMatch
   1003 
   1004 
   1005 template<class PassT>
   1006 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk,
   1007                                                          BlockT *src2Blk) {
   1008   // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the
   1009   // same loop with LoopLandInfo without explicitly keeping track of
   1010   // loopContBlks and loopBreakBlks, this is a method to get the information.
   1011   //
   1012   if (src1Blk->succ_size() == 0) {
   1013     LoopT *loopRep = loopInfo->getLoopFor(src1Blk);
   1014     if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) {
   1015       LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   1016       if (theEntry != NULL) {
   1017         if (DEBUGME) {
   1018           errs() << "isLoopContBreakBlock yes src1 = BB"
   1019                  << src1Blk->getNumber()
   1020                  << " src2 = BB" << src2Blk->getNumber() << "\n";
   1021         }
   1022         return true;
   1023       }
   1024     }
   1025   }
   1026   return false;
   1027 }  //isSameloopDetachedContbreak
   1028 
   1029 template<class PassT>
   1030 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk,
   1031                                              BlockT *trueBlk,
   1032                                              BlockT *falseBlk) {
   1033   int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk);
   1034   if (num == 0) {
   1035     if (DEBUGME) {
   1036       errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n";
   1037     }
   1038     num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk);
   1039   }
   1040   return num;
   1041 }
   1042 
   1043 template<class PassT>
   1044 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk,
   1045                                                 BlockT *trueBlk,
   1046                                                 BlockT *falseBlk) {
   1047   int num = 0;
   1048   BlockT *downBlk;
   1049 
   1050   //trueBlk could be the common post dominator
   1051   downBlk = trueBlk;
   1052 
   1053   if (DEBUGME) {
   1054     errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber()
   1055            << " true = BB" << trueBlk->getNumber()
   1056            << ", numSucc=" << trueBlk->succ_size()
   1057            << " false = BB" << falseBlk->getNumber() << "\n";
   1058   }
   1059 
   1060   while (downBlk) {
   1061     if (DEBUGME) {
   1062       errs() << "check down = BB" << downBlk->getNumber();
   1063     }
   1064 
   1065     if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) {
   1066       if (DEBUGME) {
   1067         errs() << " working\n";
   1068       }
   1069 
   1070       num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk);
   1071       num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk);
   1072 
   1073       numClonedBlock += num;
   1074       num += serialPatternMatch(*headBlk->succ_begin());
   1075       num += serialPatternMatch(*(++headBlk->succ_begin()));
   1076       num += ifPatternMatch(headBlk);
   1077       assert(num > 0);
   1078 
   1079       break;
   1080     }
   1081     if (DEBUGME) {
   1082       errs() << " not working\n";
   1083     }
   1084     downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL;
   1085   } // walk down the postDomTree
   1086 
   1087   return num;
   1088 } //handleJumpintoIf
   1089 
   1090 template<class PassT>
   1091 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk,
   1092                                                          BlockT *trueBlk,
   1093                                                          BlockT *falseBlk,
   1094                                                          BlockT *landBlk,
   1095                                                          bool detail) {
   1096   errs() << "head = BB" << headBlk->getNumber()
   1097          << " size = " << headBlk->size();
   1098   if (detail) {
   1099     errs() << "\n";
   1100     headBlk->print(errs());
   1101     errs() << "\n";
   1102   }
   1103 
   1104   if (trueBlk) {
   1105     errs() << ", true = BB" << trueBlk->getNumber() << " size = "
   1106            << trueBlk->size() << " numPred = " << trueBlk->pred_size();
   1107     if (detail) {
   1108       errs() << "\n";
   1109       trueBlk->print(errs());
   1110       errs() << "\n";
   1111     }
   1112   }
   1113   if (falseBlk) {
   1114     errs() << ", false = BB" << falseBlk->getNumber() << " size = "
   1115            << falseBlk->size() << " numPred = " << falseBlk->pred_size();
   1116     if (detail) {
   1117       errs() << "\n";
   1118       falseBlk->print(errs());
   1119       errs() << "\n";
   1120     }
   1121   }
   1122   if (landBlk) {
   1123     errs() << ", land = BB" << landBlk->getNumber() << " size = "
   1124            << landBlk->size() << " numPred = " << landBlk->pred_size();
   1125     if (detail) {
   1126       errs() << "\n";
   1127       landBlk->print(errs());
   1128       errs() << "\n";
   1129     }
   1130   }
   1131 
   1132     errs() << "\n";
   1133 } //showImproveSimpleJumpintoIf
   1134 
   1135 template<class PassT>
   1136 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk,
   1137                                                     BlockT *trueBlk,
   1138                                                     BlockT *falseBlk,
   1139                                                     BlockT **plandBlk) {
   1140   bool migrateTrue = false;
   1141   bool migrateFalse = false;
   1142 
   1143   BlockT *landBlk = *plandBlk;
   1144 
   1145   assert((trueBlk == NULL || trueBlk->succ_size() <= 1)
   1146          && (falseBlk == NULL || falseBlk->succ_size() <= 1));
   1147 
   1148   if (trueBlk == falseBlk) {
   1149     return 0;
   1150   }
   1151 
   1152   migrateTrue = needMigrateBlock(trueBlk);
   1153   migrateFalse = needMigrateBlock(falseBlk);
   1154 
   1155   if (!migrateTrue && !migrateFalse) {
   1156     return 0;
   1157   }
   1158 
   1159   // If we need to migrate either trueBlk and falseBlk, migrate the rest that
   1160   // have more than one predecessors.  without doing this, its predecessor
   1161   // rather than headBlk will have undefined value in initReg.
   1162   if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) {
   1163     migrateTrue = true;
   1164   }
   1165   if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) {
   1166     migrateFalse = true;
   1167   }
   1168 
   1169   if (DEBUGME) {
   1170     errs() << "before improveSimpleJumpintoIf: ";
   1171     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
   1172   }
   1173 
   1174   // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk
   1175   //
   1176   // new: headBlk => if () {initReg = 1; org trueBlk branch} else
   1177   //      {initReg = 0; org falseBlk branch }
   1178   //      => landBlk => if (initReg) {org trueBlk} else {org falseBlk}
   1179   //      => org landBlk
   1180   //      if landBlk->pred_size() > 2, put the about if-else inside
   1181   //      if (initReg !=2) {...}
   1182   //
   1183   // add initReg = initVal to headBlk
   1184 
   1185   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1186   unsigned initReg =
   1187     funcRep->getRegInfo().createVirtualRegister(I32RC);
   1188   if (!migrateTrue || !migrateFalse) {
   1189     int initVal = migrateTrue ? 0 : 1;
   1190     CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal);
   1191   }
   1192 
   1193   int numNewBlk = 0;
   1194 
   1195   if (landBlk == NULL) {
   1196     landBlk = funcRep->CreateMachineBasicBlock();
   1197     funcRep->push_back(landBlk);  //insert to function
   1198 
   1199     if (trueBlk) {
   1200       trueBlk->addSuccessor(landBlk);
   1201     } else {
   1202       headBlk->addSuccessor(landBlk);
   1203     }
   1204 
   1205     if (falseBlk) {
   1206       falseBlk->addSuccessor(landBlk);
   1207     } else {
   1208       headBlk->addSuccessor(landBlk);
   1209     }
   1210 
   1211     numNewBlk ++;
   1212   }
   1213 
   1214   bool landBlkHasOtherPred = (landBlk->pred_size() > 2);
   1215 
   1216   //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL"
   1217   typename BlockT::iterator insertPos =
   1218     CFGTraits::getInstrPos
   1219     (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep));
   1220 
   1221   if (landBlkHasOtherPred) {
   1222     unsigned immReg =
   1223       funcRep->getRegInfo().createVirtualRegister(I32RC);
   1224     CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2);
   1225     unsigned cmpResReg =
   1226       funcRep->getRegInfo().createVirtualRegister(I32RC);
   1227 
   1228     CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg,
   1229                                         initReg, immReg);
   1230     CFGTraits::insertCondBranchBefore(landBlk, insertPos,
   1231                                       AMDGPU::IF_PREDICATE_SET, passRep,
   1232                                       cmpResReg, DebugLoc());
   1233   }
   1234 
   1235   CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET,
   1236                                     passRep, initReg, DebugLoc());
   1237 
   1238   if (migrateTrue) {
   1239     migrateInstruction(trueBlk, landBlk, insertPos);
   1240     // need to uncondionally insert the assignment to ensure a path from its
   1241     // predecessor rather than headBlk has valid value in initReg if
   1242     // (initVal != 1).
   1243     CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1);
   1244   }
   1245   CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep);
   1246 
   1247   if (migrateFalse) {
   1248     migrateInstruction(falseBlk, landBlk, insertPos);
   1249     // need to uncondionally insert the assignment to ensure a path from its
   1250     // predecessor rather than headBlk has valid value in initReg if
   1251     // (initVal != 0)
   1252     CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0);
   1253   }
   1254 
   1255   if (landBlkHasOtherPred) {
   1256     // add endif
   1257     CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep);
   1258 
   1259     // put initReg = 2 to other predecessors of landBlk
   1260     for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
   1261          predIterEnd = landBlk->pred_end(); predIter != predIterEnd;
   1262          ++predIter) {
   1263       BlockT *curBlk = *predIter;
   1264       if (curBlk != trueBlk && curBlk != falseBlk) {
   1265         CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2);
   1266       }
   1267     } //for
   1268   }
   1269   if (DEBUGME) {
   1270     errs() << "result from improveSimpleJumpintoIf: ";
   1271     showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0);
   1272   }
   1273 
   1274   // update landBlk
   1275   *plandBlk = landBlk;
   1276 
   1277   return numNewBlk;
   1278 } //improveSimpleJumpintoIf
   1279 
   1280 template<class PassT>
   1281 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk,
   1282                                               LoopT *exitingLoop,
   1283                                              BlockT *exitBlk,
   1284                                               LoopT *exitLoop,
   1285                                              BlockT *landBlk) {
   1286   if (DEBUGME) {
   1287     errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop)
   1288            << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n";
   1289   }
   1290   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1291 
   1292   RegiT initReg = INVALIDREGNUM;
   1293   if (exitingLoop != exitLoop) {
   1294     initReg = static_cast<int>
   1295       (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1296     assert(initReg != INVALIDREGNUM);
   1297     addLoopBreakInitReg(exitLoop, initReg);
   1298     while (exitingLoop != exitLoop && exitingLoop) {
   1299       addLoopBreakOnReg(exitingLoop, initReg);
   1300       exitingLoop = exitingLoop->getParentLoop();
   1301     }
   1302     assert(exitingLoop == exitLoop);
   1303   }
   1304 
   1305   mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg);
   1306 
   1307 } //handleLoopbreak
   1308 
   1309 template<class PassT>
   1310 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk,
   1311                                                   LoopT *contingLoop,
   1312                                                  BlockT *contBlk,
   1313                                                   LoopT *contLoop) {
   1314   if (DEBUGME) {
   1315     errs() << "loopcontPattern cont = BB" << contingBlk->getNumber()
   1316            << " header = BB" << contBlk->getNumber() << "\n";
   1317 
   1318     errs() << "Trying to continue loop-depth = "
   1319            << getLoopDepth(contLoop)
   1320            << " from loop-depth = " << getLoopDepth(contingLoop) << "\n";
   1321   }
   1322 
   1323   RegiT initReg = INVALIDREGNUM;
   1324   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1325   if (contingLoop != contLoop) {
   1326     initReg = static_cast<int>
   1327       (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1328     assert(initReg != INVALIDREGNUM);
   1329     addLoopContInitReg(contLoop, initReg);
   1330     while (contingLoop && contingLoop->getParentLoop() != contLoop) {
   1331       addLoopBreakOnReg(contingLoop, initReg);  //not addLoopContOnReg
   1332       contingLoop = contingLoop->getParentLoop();
   1333     }
   1334     assert(contingLoop && contingLoop->getParentLoop() == contLoop);
   1335     addLoopContOnReg(contingLoop, initReg);
   1336   }
   1337 
   1338   settleLoopcontBlock(contingBlk, contBlk, initReg);
   1339 } //handleLoopcontBlock
   1340 
   1341 template<class PassT>
   1342 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) {
   1343   if (DEBUGME) {
   1344     errs() << "serialPattern BB" << dstBlk->getNumber()
   1345            << " <= BB" << srcBlk->getNumber() << "\n";
   1346   }
   1347   dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end());
   1348 
   1349   dstBlk->removeSuccessor(srcBlk);
   1350   CFGTraits::cloneSuccessorList(dstBlk, srcBlk);
   1351 
   1352   removeSuccessor(srcBlk);
   1353   retireBlock(dstBlk, srcBlk);
   1354 } //mergeSerialBlock
   1355 
   1356 template<class PassT>
   1357 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr,
   1358                                                   BlockT *curBlk,
   1359                                                   BlockT *trueBlk,
   1360                                                   BlockT *falseBlk,
   1361                                                   BlockT *landBlk) {
   1362   if (DEBUGME) {
   1363     errs() << "ifPattern BB" << curBlk->getNumber();
   1364     errs() << "{  ";
   1365     if (trueBlk) {
   1366       errs() << "BB" << trueBlk->getNumber();
   1367     }
   1368     errs() << "  } else ";
   1369     errs() << "{  ";
   1370     if (falseBlk) {
   1371       errs() << "BB" << falseBlk->getNumber();
   1372     }
   1373     errs() << "  }\n ";
   1374     errs() << "landBlock: ";
   1375     if (landBlk == NULL) {
   1376       errs() << "NULL";
   1377     } else {
   1378       errs() << "BB" << landBlk->getNumber();
   1379     }
   1380     errs() << "\n";
   1381   }
   1382 
   1383   int oldOpcode = branchInstr->getOpcode();
   1384   DebugLoc branchDL = branchInstr->getDebugLoc();
   1385 
   1386 //    transform to
   1387 //    if cond
   1388 //       trueBlk
   1389 //    else
   1390 //       falseBlk
   1391 //    endif
   1392 //    landBlk
   1393 
   1394   typename BlockT::iterator branchInstrPos =
   1395     CFGTraits::getInstrPos(curBlk, branchInstr);
   1396   CFGTraits::insertCondBranchBefore(branchInstrPos,
   1397                                     CFGTraits::getBranchNzeroOpcode(oldOpcode),
   1398                                     passRep,
   1399                                     branchDL);
   1400 
   1401   if (trueBlk) {
   1402     curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end());
   1403     curBlk->removeSuccessor(trueBlk);
   1404     if (landBlk && trueBlk->succ_size()!=0) {
   1405       trueBlk->removeSuccessor(landBlk);
   1406     }
   1407     retireBlock(curBlk, trueBlk);
   1408   }
   1409   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep);
   1410 
   1411   if (falseBlk) {
   1412     curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(),
   1413                    falseBlk->end());
   1414     curBlk->removeSuccessor(falseBlk);
   1415     if (landBlk && falseBlk->succ_size() != 0) {
   1416       falseBlk->removeSuccessor(landBlk);
   1417     }
   1418     retireBlock(curBlk, falseBlk);
   1419   }
   1420   CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep);
   1421 
   1422   branchInstr->eraseFromParent();
   1423 
   1424   if (landBlk && trueBlk && falseBlk) {
   1425     curBlk->addSuccessor(landBlk);
   1426   }
   1427 
   1428 } //mergeIfthenelseBlock
   1429 
   1430 template<class PassT>
   1431 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk,
   1432                                                 LoopLandInfo *loopLand) {
   1433   BlockT *landBlk = loopLand->landBlk;
   1434 
   1435   if (DEBUGME) {
   1436     errs() << "loopPattern header = BB" << dstBlk->getNumber()
   1437            << " land = BB" << landBlk->getNumber() << "\n";
   1438   }
   1439 
   1440   // Loop contInitRegs are init at the beginning of the loop.
   1441   for (typename std::set<RegiT>::const_iterator iter =
   1442          loopLand->contInitRegs.begin(),
   1443        iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) {
   1444     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
   1445   }
   1446 
   1447   /* we last inserterd the DebugLoc in the
   1448    * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk.
   1449    * search for the DebugLoc in the that statement.
   1450    * if not found, we have to insert the empty/default DebugLoc */
   1451   InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk);
   1452   DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc();
   1453 
   1454   CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak);
   1455   // Loop breakInitRegs are init before entering the loop.
   1456   for (typename std::set<RegiT>::const_iterator iter =
   1457          loopLand->breakInitRegs.begin(),
   1458        iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) {
   1459     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
   1460   }
   1461   // Loop endbranchInitRegs are init before entering the loop.
   1462   for (typename std::set<RegiT>::const_iterator iter =
   1463          loopLand->endbranchInitRegs.begin(),
   1464        iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) {
   1465     CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0);
   1466   }
   1467 
   1468   /* we last inserterd the DebugLoc in the continue statement in the current dstBlk
   1469    * search for the DebugLoc in the continue statement.
   1470    * if not found, we have to insert the empty/default DebugLoc */
   1471   InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk);
   1472   DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc();
   1473 
   1474   CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue);
   1475   // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this
   1476   // loop.
   1477   for (typename std::set<RegiT>::const_iterator iter =
   1478          loopLand->breakOnRegs.begin(),
   1479        iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) {
   1480     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep,
   1481                                    *iter);
   1482   }
   1483 
   1484   // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this
   1485   // loop.
   1486   for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(),
   1487        iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) {
   1488     CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32,
   1489                                    passRep, *iter);
   1490   }
   1491 
   1492   dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end());
   1493 
   1494   for (typename BlockT::succ_iterator iter = landBlk->succ_begin(),
   1495        iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) {
   1496     dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of.
   1497   }
   1498 
   1499   removeSuccessor(landBlk);
   1500   retireBlock(dstBlk, landBlk);
   1501 } //mergeLooplandBlock
   1502 
   1503 template<class PassT>
   1504 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) {
   1505   while (I--) {
   1506     if (I->getOpcode() == AMDGPU::PRED_X) {
   1507       switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) {
   1508       case OPCODE_IS_ZERO_INT:
   1509         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT);
   1510         return;
   1511       case OPCODE_IS_NOT_ZERO_INT:
   1512         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT);
   1513         return;
   1514       case OPCODE_IS_ZERO:
   1515         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO);
   1516         return;
   1517       case OPCODE_IS_NOT_ZERO:
   1518         static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO);
   1519         return;
   1520       default:
   1521         assert(0 && "PRED_X Opcode invalid!");
   1522       }
   1523     }
   1524   }
   1525 }
   1526 
   1527 template<class PassT>
   1528 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk,
   1529                                                  BlockT *exitBlk,
   1530                                                  BlockT *exitLandBlk,
   1531                                                  RegiT  setReg) {
   1532   if (DEBUGME) {
   1533     errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber()
   1534            << " exit = BB" << exitBlk->getNumber()
   1535            << " land = BB" << exitLandBlk->getNumber() << "\n";
   1536   }
   1537 
   1538   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk);
   1539   assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
   1540 
   1541   DebugLoc DL = branchInstr->getDebugLoc();
   1542 
   1543   BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
   1544 
   1545   //    transform exitingBlk to
   1546   //    if ( ) {
   1547   //       exitBlk (if exitBlk != exitLandBlk)
   1548   //       setReg = 1
   1549   //       break
   1550   //    }endif
   1551   //    successor = {orgSuccessor(exitingBlk) - exitBlk}
   1552 
   1553   typename BlockT::iterator branchInstrPos =
   1554     CFGTraits::getInstrPos(exitingBlk, branchInstr);
   1555 
   1556   if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) {
   1557     //break_logical
   1558 
   1559     if (trueBranch != exitBlk) {
   1560       reversePredicateSetter(branchInstrPos);
   1561     }
   1562     CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
   1563   } else {
   1564     if (trueBranch != exitBlk) {
   1565       reversePredicateSetter(branchInstr);
   1566     }
   1567     CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL);
   1568     if (exitBlk != exitLandBlk) {
   1569       //splice is insert-before ...
   1570       exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(),
   1571                          exitBlk->end());
   1572     }
   1573     if (setReg != INVALIDREGNUM) {
   1574       CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
   1575     }
   1576     CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep);
   1577   } //if_logical
   1578 
   1579   //now branchInst can be erase safely
   1580   branchInstr->eraseFromParent();
   1581 
   1582   //now take care of successors, retire blocks
   1583   exitingBlk->removeSuccessor(exitBlk);
   1584   if (exitBlk != exitLandBlk) {
   1585     //splice is insert-before ...
   1586     exitBlk->removeSuccessor(exitLandBlk);
   1587     retireBlock(exitingBlk, exitBlk);
   1588   }
   1589 
   1590 } //mergeLoopbreakBlock
   1591 
   1592 template<class PassT>
   1593 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk,
   1594                                                  BlockT *contBlk,
   1595                                                  RegiT   setReg) {
   1596   if (DEBUGME) {
   1597     errs() << "settleLoopcontBlock conting = BB"
   1598            << contingBlk->getNumber()
   1599            << ", cont = BB" << contBlk->getNumber() << "\n";
   1600   }
   1601 
   1602   InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk);
   1603   if (branchInstr) {
   1604     assert(CFGTraits::isCondBranch(branchInstr));
   1605     typename BlockT::iterator branchInstrPos =
   1606       CFGTraits::getInstrPos(contingBlk, branchInstr);
   1607     BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr);
   1608     int oldOpcode = branchInstr->getOpcode();
   1609     DebugLoc DL = branchInstr->getDebugLoc();
   1610 
   1611     //    transform contingBlk to
   1612     //     if () {
   1613     //          move instr after branchInstr
   1614     //          continue
   1615     //        or
   1616     //          setReg = 1
   1617     //          break
   1618     //     }endif
   1619     //     successor = {orgSuccessor(contingBlk) - loopHeader}
   1620 
   1621     bool useContinueLogical =
   1622       (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr);
   1623 
   1624     if (useContinueLogical == false) {
   1625       int branchOpcode =
   1626         trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode)
   1627                               : CFGTraits::getBranchZeroOpcode(oldOpcode);
   1628 
   1629       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
   1630 
   1631       if (setReg != INVALIDREGNUM) {
   1632         CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1);
   1633         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1634         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL);
   1635       } else {
   1636         // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1637         CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL);
   1638       }
   1639 
   1640       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL);
   1641     } else {
   1642       int branchOpcode =
   1643         trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode)
   1644                               : CFGTraits::getContinueZeroOpcode(oldOpcode);
   1645 
   1646       CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL);
   1647     }
   1648 
   1649     branchInstr->eraseFromParent();
   1650   } else {
   1651     // if we've arrived here then we've already erased the branch instruction
   1652     // travel back up the basic block to see the last reference of our debug location
   1653     // we've just inserted that reference here so it should be representative
   1654     if (setReg != INVALIDREGNUM) {
   1655       CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1);
   1656       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1657       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
   1658     } else {
   1659       // insertEnd to ensure phi-moves, if exist, go before the continue-instr.
   1660       CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk));
   1661     }
   1662   } //else
   1663 
   1664 } //settleLoopcontBlock
   1665 
   1666 // BBs in exitBlkSet are determined as in break-path for loopRep,
   1667 // before we can put code for BBs as inside loop-body for loopRep
   1668 // check whether those BBs are determined as cont-BB for parentLoopRep
   1669 // earlier.
   1670 // If so, generate a new BB newBlk
   1671 //    (1) set newBlk common successor of BBs in exitBlkSet
   1672 //    (2) change the continue-instr in BBs in exitBlkSet to break-instr
   1673 //    (3) generate continue-instr in newBlk
   1674 //
   1675 template<class PassT>
   1676 typename CFGStructurizer<PassT>::BlockT *
   1677 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep,
   1678                                               LoopT *loopRep,
   1679                                               std::set<BlockT *> &exitBlkSet,
   1680                                               BlockT *exitLandBlk) {
   1681   std::set<BlockT *> endBlkSet;
   1682 
   1683 
   1684 
   1685   for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(),
   1686        iterEnd = exitBlkSet.end();
   1687        iter != iterEnd; ++iter) {
   1688     BlockT *exitBlk = *iter;
   1689     BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk);
   1690 
   1691     if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL)
   1692       return NULL;
   1693 
   1694     endBlkSet.insert(endBlk);
   1695   }
   1696 
   1697   BlockT *newBlk = funcRep->CreateMachineBasicBlock();
   1698   funcRep->push_back(newBlk);  //insert to function
   1699   CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep);
   1700   SHOWNEWBLK(newBlk, "New continue block: ");
   1701 
   1702   for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(),
   1703        iterEnd = endBlkSet.end();
   1704        iter != iterEnd; ++iter) {
   1705       BlockT *endBlk = *iter;
   1706       InstrT *contInstr = CFGTraits::getContinueInstr(endBlk);
   1707       if (contInstr) {
   1708         contInstr->eraseFromParent();
   1709       }
   1710       endBlk->addSuccessor(newBlk);
   1711       if (DEBUGME) {
   1712         errs() << "Add new continue Block to BB"
   1713                << endBlk->getNumber() << " successors\n";
   1714       }
   1715   }
   1716 
   1717   return newBlk;
   1718 } //relocateLoopcontBlock
   1719 
   1720 
   1721 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as
   1722 // LoopLandBlock. This BB branch on the loop endBranchInit register to the
   1723 // pathes corresponding to the loop exiting branches.
   1724 
   1725 template<class PassT>
   1726 typename CFGStructurizer<PassT>::BlockT *
   1727 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep,
   1728                                               BlockTSmallerVector &exitingBlks,
   1729                                               BlockTSmallerVector &exitBlks) {
   1730   const AMDGPUInstrInfo *tii =
   1731              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   1732   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   1733 
   1734   RegiT endBranchReg = static_cast<int>
   1735     (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1736   assert(endBranchReg >= 0);
   1737 
   1738   // reg = 0 before entering the loop
   1739   addLoopEndbranchInitReg(loopRep, endBranchReg);
   1740 
   1741   uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size());
   1742   assert(numBlks >=2 && numBlks == exitBlks.size());
   1743 
   1744   BlockT *preExitingBlk = exitingBlks[0];
   1745   BlockT *preExitBlk = exitBlks[0];
   1746   BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock();
   1747   funcRep->push_back(preBranchBlk);  //insert to function
   1748   SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: ");
   1749 
   1750   BlockT *newLandBlk = preBranchBlk;
   1751 
   1752       CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk,
   1753         newLandBlk);
   1754   preExitingBlk->removeSuccessor(preExitBlk);
   1755   preExitingBlk->addSuccessor(newLandBlk);
   1756 
   1757   //it is redundant to add reg = 0 to exitingBlks[0]
   1758 
   1759   // For 1..n th exiting path (the last iteration handles two pathes) create the
   1760   // branch to the previous path and the current path.
   1761   for (uint32_t i = 1; i < numBlks; ++i) {
   1762     BlockT *curExitingBlk = exitingBlks[i];
   1763     BlockT *curExitBlk = exitBlks[i];
   1764     BlockT *curBranchBlk;
   1765 
   1766     if (i == numBlks - 1) {
   1767       curBranchBlk = curExitBlk;
   1768     } else {
   1769       curBranchBlk = funcRep->CreateMachineBasicBlock();
   1770       funcRep->push_back(curBranchBlk);  //insert to function
   1771       SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: ");
   1772     }
   1773 
   1774     // Add reg = i to exitingBlks[i].
   1775     CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep,
   1776                                        endBranchReg, i);
   1777 
   1778     // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge
   1779     // (exitingBlks[i], newLandBlk).
   1780     CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk,
   1781                                           newLandBlk);
   1782     curExitingBlk->removeSuccessor(curExitBlk);
   1783     curExitingBlk->addSuccessor(newLandBlk);
   1784 
   1785     // add to preBranchBlk the branch instruction:
   1786     // if (endBranchReg == preVal)
   1787     //    preExitBlk
   1788     // else
   1789     //    curBranchBlk
   1790     //
   1791     // preValReg = i - 1
   1792 
   1793   DebugLoc DL;
   1794   RegiT preValReg = static_cast<int>
   1795     (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1796 
   1797   preBranchBlk->insert(preBranchBlk->begin(),
   1798                        tii->getMovImmInstr(preBranchBlk->getParent(), preValReg,
   1799                        i - 1));
   1800 
   1801   // condResReg = (endBranchReg == preValReg)
   1802     RegiT condResReg = static_cast<int>
   1803       (funcRep->getRegInfo().createVirtualRegister(I32RC));
   1804     BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg)
   1805       .addReg(endBranchReg).addReg(preValReg);
   1806 
   1807     BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32))
   1808       .addMBB(preExitBlk).addReg(condResReg);
   1809 
   1810     preBranchBlk->addSuccessor(preExitBlk);
   1811     preBranchBlk->addSuccessor(curBranchBlk);
   1812 
   1813     // Update preExitingBlk, preExitBlk, preBranchBlk.
   1814     preExitingBlk = curExitingBlk;
   1815     preExitBlk = curExitBlk;
   1816     preBranchBlk = curBranchBlk;
   1817 
   1818   }  //end for 1 .. n blocks
   1819 
   1820   return newLandBlk;
   1821 } //addLoopEndbranchBlock
   1822 
   1823 template<class PassT>
   1824 typename CFGStructurizer<PassT>::PathToKind
   1825 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk,
   1826                                      bool allowSideEntry) {
   1827   assert(dstBlk);
   1828 
   1829   if (srcBlk == dstBlk) {
   1830     return SinglePath_InPath;
   1831   }
   1832 
   1833   while (srcBlk && srcBlk->succ_size() == 1) {
   1834     srcBlk = *srcBlk->succ_begin();
   1835     if (srcBlk == dstBlk) {
   1836       return SinglePath_InPath;
   1837     }
   1838 
   1839     if (!allowSideEntry && srcBlk->pred_size() > 1) {
   1840       return Not_SinglePath;
   1841     }
   1842   }
   1843 
   1844   if (srcBlk && srcBlk->succ_size()==0) {
   1845     return SinglePath_NotInPath;
   1846   }
   1847 
   1848   return Not_SinglePath;
   1849 } //singlePathTo
   1850 
   1851 // If there is a single path from srcBlk to dstBlk, return the last block before
   1852 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the
   1853 // last block in the path Otherwise, return NULL
   1854 template<class PassT>
   1855 typename CFGStructurizer<PassT>::BlockT *
   1856 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk,
   1857                                       bool allowSideEntry) {
   1858   assert(dstBlk);
   1859 
   1860   if (srcBlk == dstBlk) {
   1861     return srcBlk;
   1862   }
   1863 
   1864   if (srcBlk->succ_size() == 0) {
   1865     return srcBlk;
   1866   }
   1867 
   1868   while (srcBlk && srcBlk->succ_size() == 1) {
   1869     BlockT *preBlk = srcBlk;
   1870 
   1871     srcBlk = *srcBlk->succ_begin();
   1872     if (srcBlk == NULL) {
   1873       return preBlk;
   1874     }
   1875 
   1876     if (!allowSideEntry && srcBlk->pred_size() > 1) {
   1877       return NULL;
   1878     }
   1879   }
   1880 
   1881   if (srcBlk && srcBlk->succ_size()==0) {
   1882     return srcBlk;
   1883   }
   1884 
   1885   return NULL;
   1886 
   1887 } //singlePathEnd
   1888 
   1889 template<class PassT>
   1890 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk,
   1891                                                BlockT *dstBlk) {
   1892   int cloned = 0;
   1893   assert(preBlk->isSuccessor(srcBlk));
   1894   while (srcBlk && srcBlk != dstBlk) {
   1895     assert(srcBlk->succ_size() == 1);
   1896     if (srcBlk->pred_size() > 1) {
   1897       srcBlk = cloneBlockForPredecessor(srcBlk, preBlk);
   1898       ++cloned;
   1899     }
   1900 
   1901     preBlk = srcBlk;
   1902     srcBlk = *srcBlk->succ_begin();
   1903   }
   1904 
   1905   return cloned;
   1906 } //cloneOnSideEntryTo
   1907 
   1908 template<class PassT>
   1909 typename CFGStructurizer<PassT>::BlockT *
   1910 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk,
   1911                                                  BlockT *predBlk) {
   1912   assert(predBlk->isSuccessor(curBlk) &&
   1913          "succBlk is not a prececessor of curBlk");
   1914 
   1915   BlockT *cloneBlk = CFGTraits::clone(curBlk);  //clone instructions
   1916   CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk);
   1917   //srcBlk, oldBlk, newBlk
   1918 
   1919   predBlk->removeSuccessor(curBlk);
   1920   predBlk->addSuccessor(cloneBlk);
   1921 
   1922   // add all successor to cloneBlk
   1923   CFGTraits::cloneSuccessorList(cloneBlk, curBlk);
   1924 
   1925   numClonedInstr += curBlk->size();
   1926 
   1927   if (DEBUGME) {
   1928     errs() << "Cloned block: " << "BB"
   1929            << curBlk->getNumber() << "size " << curBlk->size() << "\n";
   1930   }
   1931 
   1932   SHOWNEWBLK(cloneBlk, "result of Cloned block: ");
   1933 
   1934   return cloneBlk;
   1935 } //cloneBlockForPredecessor
   1936 
   1937 template<class PassT>
   1938 typename CFGStructurizer<PassT>::BlockT *
   1939 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep,
   1940                                                BlockT *exitingBlk) {
   1941   BlockT *exitBlk = NULL;
   1942 
   1943   for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(),
   1944        iterSuccEnd = exitingBlk->succ_end();
   1945        iterSucc != iterSuccEnd; ++iterSucc) {
   1946     BlockT *curBlk = *iterSucc;
   1947     if (!loopRep->contains(curBlk)) {
   1948       assert(exitBlk == NULL);
   1949       exitBlk = curBlk;
   1950     }
   1951   }
   1952 
   1953   assert(exitBlk != NULL);
   1954 
   1955   return exitBlk;
   1956 } //exitingBlock2ExitBlock
   1957 
   1958 template<class PassT>
   1959 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk,
   1960                                                 BlockT *dstBlk,
   1961                                                 InstrIterator insertPos) {
   1962   InstrIterator spliceEnd;
   1963   //look for the input branchinstr, not the AMDGPU branchinstr
   1964   InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
   1965   if (branchInstr == NULL) {
   1966     if (DEBUGME) {
   1967       errs() << "migrateInstruction don't see branch instr\n" ;
   1968     }
   1969     spliceEnd = srcBlk->end();
   1970   } else {
   1971     if (DEBUGME) {
   1972       errs() << "migrateInstruction see branch instr\n" ;
   1973       branchInstr->dump();
   1974     }
   1975     spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr);
   1976   }
   1977   if (DEBUGME) {
   1978     errs() << "migrateInstruction before splice dstSize = " << dstBlk->size()
   1979       << "srcSize = " << srcBlk->size() << "\n";
   1980   }
   1981 
   1982   //splice insert before insertPos
   1983   dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd);
   1984 
   1985   if (DEBUGME) {
   1986     errs() << "migrateInstruction after splice dstSize = " << dstBlk->size()
   1987       << "srcSize = " << srcBlk->size() << "\n";
   1988   }
   1989 } //migrateInstruction
   1990 
   1991 // normalizeInfiniteLoopExit change
   1992 //   B1:
   1993 //        uncond_br LoopHeader
   1994 //
   1995 // to
   1996 //   B1:
   1997 //        cond_br 1 LoopHeader dummyExit
   1998 // and return the newly added dummy exit block
   1999 //
   2000 template<class PassT>
   2001 typename CFGStructurizer<PassT>::BlockT *
   2002 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) {
   2003   BlockT *loopHeader;
   2004   BlockT *loopLatch;
   2005   loopHeader = LoopRep->getHeader();
   2006   loopLatch = LoopRep->getLoopLatch();
   2007   BlockT *dummyExitBlk = NULL;
   2008   const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32);
   2009   if (loopHeader!=NULL && loopLatch!=NULL) {
   2010     InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch);
   2011     if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) {
   2012       dummyExitBlk = funcRep->CreateMachineBasicBlock();
   2013       funcRep->push_back(dummyExitBlk);  //insert to function
   2014       SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: ");
   2015 
   2016       if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n";
   2017 
   2018       typename BlockT::iterator insertPos =
   2019         CFGTraits::getInstrPos(loopLatch, branchInstr);
   2020       unsigned immReg =
   2021         funcRep->getRegInfo().createVirtualRegister(I32RC);
   2022       CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1);
   2023       InstrT *newInstr =
   2024         CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep);
   2025       MachineInstrBuilder MIB(*funcRep, newInstr);
   2026       MIB.addMBB(loopHeader);
   2027       MIB.addReg(immReg, false);
   2028 
   2029       SHOWNEWINSTR(newInstr);
   2030 
   2031       branchInstr->eraseFromParent();
   2032       loopLatch->addSuccessor(dummyExitBlk);
   2033     }
   2034   }
   2035 
   2036   return dummyExitBlk;
   2037 } //normalizeInfiniteLoopExit
   2038 
   2039 template<class PassT>
   2040 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) {
   2041   InstrT *branchInstr;
   2042 
   2043   // I saw two unconditional branch in one basic block in example
   2044   // test_fc_do_while_or.c need to fix the upstream on this to remove the loop.
   2045   while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk))
   2046           && CFGTraits::isUncondBranch(branchInstr)) {
   2047     if (DEBUGME) {
   2048           errs() << "Removing unconditional branch instruction" ;
   2049       branchInstr->dump();
   2050     }
   2051     branchInstr->eraseFromParent();
   2052   }
   2053 } //removeUnconditionalBranch
   2054 
   2055 template<class PassT>
   2056 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) {
   2057   if (srcBlk->succ_size() == 2) {
   2058     BlockT *blk1 = *srcBlk->succ_begin();
   2059     BlockT *blk2 = *(++srcBlk->succ_begin());
   2060 
   2061     if (blk1 == blk2) {
   2062       InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk);
   2063       assert(branchInstr && CFGTraits::isCondBranch(branchInstr));
   2064       if (DEBUGME) {
   2065         errs() << "Removing unneeded conditional branch instruction" ;
   2066         branchInstr->dump();
   2067       }
   2068       branchInstr->eraseFromParent();
   2069       SHOWNEWBLK(blk1, "Removing redundant successor");
   2070       srcBlk->removeSuccessor(blk1);
   2071     }
   2072   }
   2073 } //removeRedundantConditionalBranch
   2074 
   2075 template<class PassT>
   2076 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*,
   2077                                                DEFAULT_VEC_SLOTS> &retBlks) {
   2078   BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock();
   2079   funcRep->push_back(dummyExitBlk);  //insert to function
   2080   CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep);
   2081 
   2082   for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter =
   2083          retBlks.begin(),
   2084        iterEnd = retBlks.end(); iter != iterEnd; ++iter) {
   2085     BlockT *curBlk = *iter;
   2086     InstrT *curInstr = CFGTraits::getReturnInstr(curBlk);
   2087     if (curInstr) {
   2088       curInstr->eraseFromParent();
   2089     }
   2090     curBlk->addSuccessor(dummyExitBlk);
   2091     if (DEBUGME) {
   2092       errs() << "Add dummyExitBlock to BB" << curBlk->getNumber()
   2093              << " successors\n";
   2094     }
   2095   } //for
   2096 
   2097   SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: ");
   2098 } //addDummyExitBlock
   2099 
   2100 template<class PassT>
   2101 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) {
   2102   while (srcBlk->succ_size()) {
   2103     srcBlk->removeSuccessor(*srcBlk->succ_begin());
   2104   }
   2105 }
   2106 
   2107 template<class PassT>
   2108 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) {
   2109   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
   2110 
   2111   if (srcBlkInfo == NULL) {
   2112     srcBlkInfo = new BlockInfo();
   2113   }
   2114 
   2115   srcBlkInfo->sccNum = sccNum;
   2116 }
   2117 
   2118 template<class PassT>
   2119 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) {
   2120   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
   2121   return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM;
   2122 }
   2123 
   2124 template<class PassT>
   2125 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) {
   2126   if (DEBUGME) {
   2127         errs() << "Retiring BB" << srcBlk->getNumber() << "\n";
   2128   }
   2129 
   2130   BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk];
   2131 
   2132   if (srcBlkInfo == NULL) {
   2133     srcBlkInfo = new BlockInfo();
   2134   }
   2135 
   2136   srcBlkInfo->isRetired = true;
   2137   assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0
   2138          && "can't retire block yet");
   2139 }
   2140 
   2141 template<class PassT>
   2142 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) {
   2143   BlockInfo *srcBlkInfo = blockInfoMap[srcBlk];
   2144   return (srcBlkInfo && srcBlkInfo->isRetired);
   2145 }
   2146 
   2147 template<class PassT>
   2148 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) {
   2149   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
   2150   while (loopRep && loopRep->getHeader() == curBlk) {
   2151     LoopLandInfo *loopLand = getLoopLandInfo(loopRep);
   2152 
   2153     if(loopLand == NULL)
   2154       return true;
   2155 
   2156     BlockT *landBlk = loopLand->landBlk;
   2157     assert(landBlk);
   2158     if (!isRetiredBlock(landBlk)) {
   2159       return true;
   2160     }
   2161 
   2162     loopRep = loopRep->getParentLoop();
   2163   }
   2164 
   2165   return false;
   2166 } //isActiveLoophead
   2167 
   2168 template<class PassT>
   2169 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) {
   2170   const unsigned blockSizeThreshold = 30;
   2171   const unsigned cloneInstrThreshold = 100;
   2172 
   2173   bool multiplePreds = blk && (blk->pred_size() > 1);
   2174 
   2175   if(!multiplePreds)
   2176     return false;
   2177 
   2178   unsigned blkSize = blk->size();
   2179   return ((blkSize > blockSizeThreshold)
   2180           && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold));
   2181 } //needMigrateBlock
   2182 
   2183 template<class PassT>
   2184 typename CFGStructurizer<PassT>::BlockT *
   2185 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk,
   2186                                             BlockTSmallerVector &exitBlks,
   2187                                             std::set<BlockT *> &exitBlkSet) {
   2188   SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks;  //in exit path blocks
   2189 
   2190   for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(),
   2191        predIterEnd = landBlk->pred_end();
   2192        predIter != predIterEnd; ++predIter) {
   2193     BlockT *curBlk = *predIter;
   2194     if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) {
   2195       inpathBlks.push_back(curBlk);
   2196     }
   2197   } //for
   2198 
   2199   //if landBlk has predecessors that are not in the given loop,
   2200   //create a new block
   2201   BlockT *newLandBlk = landBlk;
   2202   if (inpathBlks.size() != landBlk->pred_size()) {
   2203     newLandBlk = funcRep->CreateMachineBasicBlock();
   2204     funcRep->push_back(newLandBlk);  //insert to function
   2205     newLandBlk->addSuccessor(landBlk);
   2206     for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter =
   2207          inpathBlks.begin(),
   2208          iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) {
   2209       BlockT *curBlk = *iter;
   2210       CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk);
   2211       //srcBlk, oldBlk, newBlk
   2212       curBlk->removeSuccessor(landBlk);
   2213       curBlk->addSuccessor(newLandBlk);
   2214     }
   2215     for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) {
   2216       if (exitBlks[i] == landBlk) {
   2217         exitBlks[i] = newLandBlk;
   2218       }
   2219     }
   2220     SHOWNEWBLK(newLandBlk, "NewLandingBlock: ");
   2221   }
   2222 
   2223   setLoopLandBlock(loopRep, newLandBlk);
   2224 
   2225   return newLandBlk;
   2226 } // recordLoopbreakLand
   2227 
   2228 template<class PassT>
   2229 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) {
   2230   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2231 
   2232   if (theEntry == NULL) {
   2233     theEntry = new LoopLandInfo();
   2234   }
   2235   assert(theEntry->landBlk == NULL);
   2236 
   2237   if (blk == NULL) {
   2238     blk = funcRep->CreateMachineBasicBlock();
   2239     funcRep->push_back(blk);  //insert to function
   2240     SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: ");
   2241   }
   2242 
   2243   theEntry->landBlk = blk;
   2244 
   2245   if (DEBUGME) {
   2246     errs() << "setLoopLandBlock loop-header = BB"
   2247            << loopRep->getHeader()->getNumber()
   2248            << "  landing-block = BB" << blk->getNumber() << "\n";
   2249   }
   2250 } // setLoopLandBlock
   2251 
   2252 template<class PassT>
   2253 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) {
   2254   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2255 
   2256   if (theEntry == NULL) {
   2257     theEntry = new LoopLandInfo();
   2258   }
   2259 
   2260   theEntry->breakOnRegs.insert(regNum);
   2261 
   2262   if (DEBUGME) {
   2263     errs() << "addLoopBreakOnReg loop-header = BB"
   2264            << loopRep->getHeader()->getNumber()
   2265            << "  regNum = " << regNum << "\n";
   2266   }
   2267 } // addLoopBreakOnReg
   2268 
   2269 template<class PassT>
   2270 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) {
   2271   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2272 
   2273   if (theEntry == NULL) {
   2274     theEntry = new LoopLandInfo();
   2275   }
   2276   theEntry->contOnRegs.insert(regNum);
   2277 
   2278   if (DEBUGME) {
   2279     errs() << "addLoopContOnReg loop-header = BB"
   2280            << loopRep->getHeader()->getNumber()
   2281            << "  regNum = " << regNum << "\n";
   2282   }
   2283 } // addLoopContOnReg
   2284 
   2285 template<class PassT>
   2286 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) {
   2287   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2288 
   2289   if (theEntry == NULL) {
   2290     theEntry = new LoopLandInfo();
   2291   }
   2292   theEntry->breakInitRegs.insert(regNum);
   2293 
   2294   if (DEBUGME) {
   2295     errs() << "addLoopBreakInitReg loop-header = BB"
   2296            << loopRep->getHeader()->getNumber()
   2297            << "  regNum = " << regNum << "\n";
   2298   }
   2299 } // addLoopBreakInitReg
   2300 
   2301 template<class PassT>
   2302 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) {
   2303   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2304 
   2305   if (theEntry == NULL) {
   2306     theEntry = new LoopLandInfo();
   2307   }
   2308   theEntry->contInitRegs.insert(regNum);
   2309 
   2310   if (DEBUGME) {
   2311     errs() << "addLoopContInitReg loop-header = BB"
   2312            << loopRep->getHeader()->getNumber()
   2313            << "  regNum = " << regNum << "\n";
   2314   }
   2315 } // addLoopContInitReg
   2316 
   2317 template<class PassT>
   2318 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep,
   2319                                                      RegiT regNum) {
   2320   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2321 
   2322   if (theEntry == NULL) {
   2323     theEntry = new LoopLandInfo();
   2324   }
   2325   theEntry->endbranchInitRegs.insert(regNum);
   2326 
   2327   if (DEBUGME) {
   2328         errs() << "addLoopEndbranchInitReg loop-header = BB"
   2329       << loopRep->getHeader()->getNumber()
   2330       << "  regNum = " << regNum << "\n";
   2331   }
   2332 } // addLoopEndbranchInitReg
   2333 
   2334 template<class PassT>
   2335 typename CFGStructurizer<PassT>::LoopLandInfo *
   2336 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) {
   2337   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2338 
   2339   return theEntry;
   2340 } // getLoopLandInfo
   2341 
   2342 template<class PassT>
   2343 typename CFGStructurizer<PassT>::BlockT *
   2344 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) {
   2345   LoopLandInfo *&theEntry = loopLandInfoMap[loopRep];
   2346 
   2347   return theEntry ? theEntry->landBlk : NULL;
   2348 } // getLoopLandBlock
   2349 
   2350 
   2351 template<class PassT>
   2352 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) {
   2353   LoopT *loopRep = loopInfo->getLoopFor(curBlk);
   2354   if (loopRep == NULL)
   2355     return false;
   2356 
   2357   BlockT *loopHeader = loopRep->getHeader();
   2358 
   2359   return curBlk->isSuccessor(loopHeader);
   2360 
   2361 } //hasBackEdge
   2362 
   2363 template<class PassT>
   2364 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) {
   2365   return loopRep ? loopRep->getLoopDepth() : 0;
   2366 } //getLoopDepth
   2367 
   2368 template<class PassT>
   2369 int CFGStructurizer<PassT>::countActiveBlock
   2370 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart,
   2371  typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) {
   2372   int count = 0;
   2373   while (iterStart != iterEnd) {
   2374     if (!isRetiredBlock(*iterStart)) {
   2375       ++count;
   2376     }
   2377     ++iterStart;
   2378   }
   2379 
   2380   return count;
   2381 } //countActiveBlock
   2382 
   2383 // This is work around solution for findNearestCommonDominator not avaiable to
   2384 // post dom a proper fix should go to Dominators.h.
   2385 
   2386 template<class PassT>
   2387 typename CFGStructurizer<PassT>::BlockT*
   2388 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) {
   2389 
   2390   if (postDomTree->dominates(blk1, blk2)) {
   2391     return blk1;
   2392   }
   2393   if (postDomTree->dominates(blk2, blk1)) {
   2394     return blk2;
   2395   }
   2396 
   2397   DomTreeNodeT *node1 = postDomTree->getNode(blk1);
   2398   DomTreeNodeT *node2 = postDomTree->getNode(blk2);
   2399 
   2400   // Handle newly cloned node.
   2401   if (node1 == NULL && blk1->succ_size() == 1) {
   2402     return findNearestCommonPostDom(*blk1->succ_begin(), blk2);
   2403   }
   2404   if (node2 == NULL && blk2->succ_size() == 1) {
   2405     return findNearestCommonPostDom(blk1, *blk2->succ_begin());
   2406   }
   2407 
   2408   if (node1 == NULL || node2 == NULL) {
   2409     return NULL;
   2410   }
   2411 
   2412   node1 = node1->getIDom();
   2413   while (node1) {
   2414     if (postDomTree->dominates(node1, node2)) {
   2415       return node1->getBlock();
   2416     }
   2417     node1 = node1->getIDom();
   2418   }
   2419 
   2420   return NULL;
   2421 }
   2422 
   2423 template<class PassT>
   2424 typename CFGStructurizer<PassT>::BlockT *
   2425 CFGStructurizer<PassT>::findNearestCommonPostDom
   2426 (typename std::set<BlockT *> &blks) {
   2427   BlockT *commonDom;
   2428   typename std::set<BlockT *>::const_iterator iter = blks.begin();
   2429   typename std::set<BlockT *>::const_iterator iterEnd = blks.end();
   2430   for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) {
   2431     BlockT *curBlk = *iter;
   2432     if (curBlk != commonDom) {
   2433       commonDom = findNearestCommonPostDom(curBlk, commonDom);
   2434     }
   2435   }
   2436 
   2437   if (DEBUGME) {
   2438     errs() << "Common post dominator for exit blocks is ";
   2439     if (commonDom) {
   2440           errs() << "BB" << commonDom->getNumber() << "\n";
   2441     } else {
   2442       errs() << "NULL\n";
   2443     }
   2444   }
   2445 
   2446   return commonDom;
   2447 } //findNearestCommonPostDom
   2448 
   2449 } //end namespace llvm
   2450 
   2451 //todo: move-end
   2452 
   2453 
   2454 //===----------------------------------------------------------------------===//
   2455 //
   2456 // CFGStructurizer for AMDGPU
   2457 //
   2458 //===----------------------------------------------------------------------===//
   2459 
   2460 
   2461 using namespace llvmCFGStruct;
   2462 
   2463 namespace llvm {
   2464 class AMDGPUCFGStructurizer : public MachineFunctionPass {
   2465 public:
   2466   typedef MachineInstr              InstructionType;
   2467   typedef MachineFunction           FunctionType;
   2468   typedef MachineBasicBlock         BlockType;
   2469   typedef MachineLoopInfo           LoopinfoType;
   2470   typedef MachineDominatorTree      DominatortreeType;
   2471   typedef MachinePostDominatorTree  PostDominatortreeType;
   2472   typedef MachineDomTreeNode        DomTreeNodeType;
   2473   typedef MachineLoop               LoopType;
   2474 
   2475 protected:
   2476   TargetMachine &TM;
   2477   const TargetInstrInfo *TII;
   2478   const AMDGPURegisterInfo *TRI;
   2479 
   2480 public:
   2481   AMDGPUCFGStructurizer(char &pid, TargetMachine &tm);
   2482   const TargetInstrInfo *getTargetInstrInfo() const;
   2483 
   2484 private:
   2485 
   2486 };
   2487 
   2488 } //end of namespace llvm
   2489 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm)
   2490 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()),
   2491   TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) {
   2492 }
   2493 
   2494 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const {
   2495   return TII;
   2496 }
   2497 //===----------------------------------------------------------------------===//
   2498 //
   2499 // CFGPrepare
   2500 //
   2501 //===----------------------------------------------------------------------===//
   2502 
   2503 
   2504 using namespace llvmCFGStruct;
   2505 
   2506 namespace llvm {
   2507 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer {
   2508 public:
   2509   static char ID;
   2510 
   2511 public:
   2512   AMDGPUCFGPrepare(TargetMachine &tm);
   2513 
   2514   virtual const char *getPassName() const;
   2515   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   2516 
   2517   bool runOnMachineFunction(MachineFunction &F);
   2518 
   2519 private:
   2520 
   2521 };
   2522 
   2523 char AMDGPUCFGPrepare::ID = 0;
   2524 } //end of namespace llvm
   2525 
   2526 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm)
   2527   : AMDGPUCFGStructurizer(ID, tm )  {
   2528 }
   2529 const char *AMDGPUCFGPrepare::getPassName() const {
   2530   return "AMD IL Control Flow Graph Preparation Pass";
   2531 }
   2532 
   2533 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const {
   2534   AU.addPreserved<MachineFunctionAnalysis>();
   2535   AU.addRequired<MachineFunctionAnalysis>();
   2536   AU.addRequired<MachineDominatorTree>();
   2537   AU.addRequired<MachinePostDominatorTree>();
   2538   AU.addRequired<MachineLoopInfo>();
   2539 }
   2540 
   2541 //===----------------------------------------------------------------------===//
   2542 //
   2543 // CFGPerform
   2544 //
   2545 //===----------------------------------------------------------------------===//
   2546 
   2547 
   2548 using namespace llvmCFGStruct;
   2549 
   2550 namespace llvm {
   2551 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer {
   2552 public:
   2553   static char ID;
   2554 
   2555 public:
   2556   AMDGPUCFGPerform(TargetMachine &tm);
   2557   virtual const char *getPassName() const;
   2558   virtual void getAnalysisUsage(AnalysisUsage &AU) const;
   2559   bool runOnMachineFunction(MachineFunction &F);
   2560 
   2561 private:
   2562 
   2563 };
   2564 
   2565 char AMDGPUCFGPerform::ID = 0;
   2566 } //end of namespace llvm
   2567 
   2568   AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm)
   2569 : AMDGPUCFGStructurizer(ID, tm) {
   2570 }
   2571 
   2572 const char *AMDGPUCFGPerform::getPassName() const {
   2573   return "AMD IL Control Flow Graph structurizer Pass";
   2574 }
   2575 
   2576 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const {
   2577   AU.addPreserved<MachineFunctionAnalysis>();
   2578   AU.addRequired<MachineFunctionAnalysis>();
   2579   AU.addRequired<MachineDominatorTree>();
   2580   AU.addRequired<MachinePostDominatorTree>();
   2581   AU.addRequired<MachineLoopInfo>();
   2582 }
   2583 
   2584 //===----------------------------------------------------------------------===//
   2585 //
   2586 // CFGStructTraits<AMDGPUCFGStructurizer>
   2587 //
   2588 //===----------------------------------------------------------------------===//
   2589 
   2590 namespace llvmCFGStruct {
   2591 // this class is tailor to the AMDGPU backend
   2592 template<>
   2593 struct CFGStructTraits<AMDGPUCFGStructurizer> {
   2594   typedef int RegiT;
   2595 
   2596   static int getBranchNzeroOpcode(int oldOpcode) {
   2597     switch(oldOpcode) {
   2598     case AMDGPU::JUMP_COND:
   2599     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
   2600     case AMDGPU::BRANCH_COND_i32:
   2601     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32;
   2602     default:
   2603       assert(0 && "internal error");
   2604     }
   2605     return -1;
   2606   }
   2607 
   2608   static int getBranchZeroOpcode(int oldOpcode) {
   2609     switch(oldOpcode) {
   2610     case AMDGPU::JUMP_COND:
   2611     case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET;
   2612     case AMDGPU::BRANCH_COND_i32:
   2613     case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32;
   2614     default:
   2615       assert(0 && "internal error");
   2616     }
   2617     return -1;
   2618   }
   2619 
   2620   static int getContinueNzeroOpcode(int oldOpcode) {
   2621     switch(oldOpcode) {
   2622     case AMDGPU::JUMP_COND:
   2623     case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32;
   2624     default:
   2625       assert(0 && "internal error");
   2626     };
   2627     return -1;
   2628   }
   2629 
   2630   static int getContinueZeroOpcode(int oldOpcode) {
   2631     switch(oldOpcode) {
   2632     case AMDGPU::JUMP_COND:
   2633     case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32;
   2634     default:
   2635       assert(0 && "internal error");
   2636     }
   2637     return -1;
   2638   }
   2639 
   2640   static MachineBasicBlock *getTrueBranch(MachineInstr *instr) {
   2641     return instr->getOperand(0).getMBB();
   2642   }
   2643 
   2644   static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) {
   2645     instr->getOperand(0).setMBB(blk);
   2646   }
   2647 
   2648   static MachineBasicBlock *
   2649   getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) {
   2650     assert(blk->succ_size() == 2);
   2651     MachineBasicBlock *trueBranch = getTrueBranch(instr);
   2652     MachineBasicBlock::succ_iterator iter = blk->succ_begin();
   2653     MachineBasicBlock::succ_iterator iterNext = iter;
   2654     ++iterNext;
   2655 
   2656     return (*iter == trueBranch) ? *iterNext : *iter;
   2657   }
   2658 
   2659   static bool isCondBranch(MachineInstr *instr) {
   2660     switch (instr->getOpcode()) {
   2661       case AMDGPU::JUMP_COND:
   2662       case AMDGPU::BRANCH_COND_i32:
   2663       case AMDGPU::BRANCH_COND_f32:
   2664       break;
   2665     default:
   2666       return false;
   2667     }
   2668     return true;
   2669   }
   2670 
   2671   static bool isUncondBranch(MachineInstr *instr) {
   2672     switch (instr->getOpcode()) {
   2673     case AMDGPU::JUMP:
   2674     case AMDGPU::BRANCH:
   2675       return true;
   2676     default:
   2677       return false;
   2678     }
   2679     return true;
   2680   }
   2681 
   2682   static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) {
   2683     //get DebugLoc from the first MachineBasicBlock instruction with debug info
   2684     DebugLoc DL;
   2685     for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) {
   2686       MachineInstr *instr = &(*iter);
   2687       if (instr->getDebugLoc().isUnknown() == false) {
   2688         DL = instr->getDebugLoc();
   2689       }
   2690     }
   2691     return DL;
   2692   }
   2693 
   2694   static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) {
   2695     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
   2696     MachineInstr *instr = &*iter;
   2697     if (instr && (isCondBranch(instr) || isUncondBranch(instr))) {
   2698       return instr;
   2699     }
   2700     return NULL;
   2701   }
   2702 
   2703   // The correct naming for this is getPossibleLoopendBlockBranchInstr.
   2704   //
   2705   // BB with backward-edge could have move instructions after the branch
   2706   // instruction.  Such move instruction "belong to" the loop backward-edge.
   2707   //
   2708   static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) {
   2709     const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>(
   2710                                   blk->getParent()->getTarget().getInstrInfo());
   2711 
   2712     for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(),
   2713          iterEnd = blk->rend(); iter != iterEnd; ++iter) {
   2714       // FIXME: Simplify
   2715       MachineInstr *instr = &*iter;
   2716       if (instr) {
   2717         if (isCondBranch(instr) || isUncondBranch(instr)) {
   2718           return instr;
   2719         } else if (!TII->isMov(instr->getOpcode())) {
   2720           break;
   2721         }
   2722       }
   2723     }
   2724     return NULL;
   2725   }
   2726 
   2727   static MachineInstr *getReturnInstr(MachineBasicBlock *blk) {
   2728     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
   2729     if (iter != blk->rend()) {
   2730       MachineInstr *instr = &(*iter);
   2731       if (instr->getOpcode() == AMDGPU::RETURN) {
   2732         return instr;
   2733       }
   2734     }
   2735     return NULL;
   2736   }
   2737 
   2738   static MachineInstr *getContinueInstr(MachineBasicBlock *blk) {
   2739     MachineBasicBlock::reverse_iterator iter = blk->rbegin();
   2740     if (iter != blk->rend()) {
   2741       MachineInstr *instr = &(*iter);
   2742       if (instr->getOpcode() == AMDGPU::CONTINUE) {
   2743         return instr;
   2744       }
   2745     }
   2746     return NULL;
   2747   }
   2748 
   2749   static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) {
   2750     for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) {
   2751       MachineInstr *instr = &(*iter);
   2752       if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) {
   2753         return instr;
   2754       }
   2755     }
   2756     return NULL;
   2757   }
   2758 
   2759   static bool isReturnBlock(MachineBasicBlock *blk) {
   2760     MachineInstr *instr = getReturnInstr(blk);
   2761     bool isReturn = (blk->succ_size() == 0);
   2762     if (instr) {
   2763       assert(isReturn);
   2764     } else if (isReturn) {
   2765       if (DEBUGME) {
   2766         errs() << "BB" << blk->getNumber()
   2767                <<" is return block without RETURN instr\n";
   2768       }
   2769     }
   2770 
   2771     return  isReturn;
   2772   }
   2773 
   2774   static MachineBasicBlock::iterator
   2775   getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) {
   2776     assert(instr->getParent() == blk && "instruction doesn't belong to block");
   2777     MachineBasicBlock::iterator iter = blk->begin();
   2778     MachineBasicBlock::iterator iterEnd = blk->end();
   2779     while (&(*iter) != instr && iter != iterEnd) {
   2780       ++iter;
   2781     }
   2782 
   2783     assert(iter != iterEnd);
   2784     return iter;
   2785   }//getInstrPos
   2786 
   2787   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
   2788                                          AMDGPUCFGStructurizer *passRep) {
   2789     return insertInstrBefore(blk,newOpcode,passRep,DebugLoc());
   2790   } //insertInstrBefore
   2791 
   2792   static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode,
   2793                                          AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
   2794     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   2795     MachineInstr *newInstr =
   2796       blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL);
   2797 
   2798     MachineBasicBlock::iterator res;
   2799     if (blk->begin() != blk->end()) {
   2800       blk->insert(blk->begin(), newInstr);
   2801     } else {
   2802       blk->push_back(newInstr);
   2803     }
   2804 
   2805     SHOWNEWINSTR(newInstr);
   2806 
   2807     return newInstr;
   2808   } //insertInstrBefore
   2809 
   2810   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
   2811                              AMDGPUCFGStructurizer *passRep) {
   2812     insertInstrEnd(blk,newOpcode,passRep,DebugLoc());
   2813   } //insertInstrEnd
   2814 
   2815   static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode,
   2816                              AMDGPUCFGStructurizer *passRep, DebugLoc DL) {
   2817     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   2818    MachineInstr *newInstr = blk->getParent()
   2819       ->CreateMachineInstr(tii->get(newOpcode), DL);
   2820 
   2821     blk->push_back(newInstr);
   2822     //assume the instruction doesn't take any reg operand ...
   2823 
   2824     SHOWNEWINSTR(newInstr);
   2825   } //insertInstrEnd
   2826 
   2827   static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos,
   2828                                          int newOpcode,
   2829                                          AMDGPUCFGStructurizer *passRep) {
   2830     MachineInstr *oldInstr = &(*instrPos);
   2831     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   2832     MachineBasicBlock *blk = oldInstr->getParent();
   2833     MachineInstr *newInstr =
   2834       blk->getParent()->CreateMachineInstr(tii->get(newOpcode),
   2835                                            DebugLoc());
   2836 
   2837     blk->insert(instrPos, newInstr);
   2838     //assume the instruction doesn't take any reg operand ...
   2839 
   2840     SHOWNEWINSTR(newInstr);
   2841     return newInstr;
   2842   } //insertInstrBefore
   2843 
   2844   static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos,
   2845                                      int newOpcode,
   2846                                      AMDGPUCFGStructurizer *passRep,
   2847                                      DebugLoc DL) {
   2848     MachineInstr *oldInstr = &(*instrPos);
   2849     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   2850     MachineBasicBlock *blk = oldInstr->getParent();
   2851     MachineFunction *MF = blk->getParent();
   2852     MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
   2853 
   2854     blk->insert(instrPos, newInstr);
   2855     MachineInstrBuilder MIB(*MF, newInstr);
   2856     MIB.addReg(oldInstr->getOperand(1).getReg(), false);
   2857 
   2858     SHOWNEWINSTR(newInstr);
   2859     //erase later oldInstr->eraseFromParent();
   2860   } //insertCondBranchBefore
   2861 
   2862   static void insertCondBranchBefore(MachineBasicBlock *blk,
   2863                                      MachineBasicBlock::iterator insertPos,
   2864                                      int newOpcode,
   2865                                      AMDGPUCFGStructurizer *passRep,
   2866                                      RegiT regNum,
   2867                                      DebugLoc DL) {
   2868     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   2869     MachineFunction *MF = blk->getParent();
   2870 
   2871     MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL);
   2872 
   2873     //insert before
   2874     blk->insert(insertPos, newInstr);
   2875     MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
   2876 
   2877     SHOWNEWINSTR(newInstr);
   2878   } //insertCondBranchBefore
   2879 
   2880   static void insertCondBranchEnd(MachineBasicBlock *blk,
   2881                                   int newOpcode,
   2882                                   AMDGPUCFGStructurizer *passRep,
   2883                                   RegiT regNum) {
   2884     const TargetInstrInfo *tii = passRep->getTargetInstrInfo();
   2885     MachineFunction *MF = blk->getParent();
   2886     MachineInstr *newInstr =
   2887       MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc());
   2888 
   2889     blk->push_back(newInstr);
   2890     MachineInstrBuilder(*MF, newInstr).addReg(regNum, false);
   2891 
   2892     SHOWNEWINSTR(newInstr);
   2893   } //insertCondBranchEnd
   2894 
   2895 
   2896   static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos,
   2897                                       AMDGPUCFGStructurizer *passRep,
   2898                                       RegiT regNum, int regVal) {
   2899     MachineInstr *oldInstr = &(*instrPos);
   2900     const AMDGPUInstrInfo *tii =
   2901              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   2902     MachineBasicBlock *blk = oldInstr->getParent();
   2903     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
   2904                                                  regVal);
   2905     blk->insert(instrPos, newInstr);
   2906 
   2907     SHOWNEWINSTR(newInstr);
   2908   } //insertAssignInstrBefore
   2909 
   2910   static void insertAssignInstrBefore(MachineBasicBlock *blk,
   2911                                       AMDGPUCFGStructurizer *passRep,
   2912                                       RegiT regNum, int regVal) {
   2913     const AMDGPUInstrInfo *tii =
   2914              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   2915 
   2916     MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum,
   2917                                                  regVal);
   2918     if (blk->begin() != blk->end()) {
   2919       blk->insert(blk->begin(), newInstr);
   2920     } else {
   2921       blk->push_back(newInstr);
   2922     }
   2923 
   2924     SHOWNEWINSTR(newInstr);
   2925 
   2926   } //insertInstrBefore
   2927 
   2928   static void insertCompareInstrBefore(MachineBasicBlock *blk,
   2929                                        MachineBasicBlock::iterator instrPos,
   2930                                        AMDGPUCFGStructurizer *passRep,
   2931                                        RegiT dstReg, RegiT src1Reg,
   2932                                        RegiT src2Reg) {
   2933     const AMDGPUInstrInfo *tii =
   2934              static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo());
   2935     MachineFunction *MF = blk->getParent();
   2936     MachineInstr *newInstr =
   2937       MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc());
   2938 
   2939     MachineInstrBuilder MIB(*MF, newInstr);
   2940     MIB.addReg(dstReg, RegState::Define); //set target
   2941     MIB.addReg(src1Reg); //set src value
   2942     MIB.addReg(src2Reg); //set src value
   2943 
   2944     blk->insert(instrPos, newInstr);
   2945     SHOWNEWINSTR(newInstr);
   2946 
   2947   } //insertCompareInstrBefore
   2948 
   2949   static void cloneSuccessorList(MachineBasicBlock *dstBlk,
   2950                                  MachineBasicBlock *srcBlk) {
   2951     for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(),
   2952          iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) {
   2953       dstBlk->addSuccessor(*iter);  // *iter's predecessor is also taken care of
   2954     }
   2955   } //cloneSuccessorList
   2956 
   2957   static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) {
   2958     MachineFunction *func = srcBlk->getParent();
   2959     MachineBasicBlock *newBlk = func->CreateMachineBasicBlock();
   2960     func->push_back(newBlk);  //insert to function
   2961     for (MachineBasicBlock::iterator iter = srcBlk->begin(),
   2962          iterEnd = srcBlk->end();
   2963          iter != iterEnd; ++iter) {
   2964       MachineInstr *instr = func->CloneMachineInstr(iter);
   2965       newBlk->push_back(instr);
   2966     }
   2967     return newBlk;
   2968   }
   2969 
   2970   //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because
   2971   //the AMDGPU instruction is not recognized as terminator fix this and retire
   2972   //this routine
   2973   static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk,
   2974                                          MachineBasicBlock *oldBlk,
   2975                                          MachineBasicBlock *newBlk) {
   2976     MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk);
   2977     if (branchInstr && isCondBranch(branchInstr) &&
   2978         getTrueBranch(branchInstr) == oldBlk) {
   2979       setTrueBranch(branchInstr, newBlk);
   2980     }
   2981   }
   2982 
   2983   static void wrapup(MachineBasicBlock *entryBlk) {
   2984     assert((!entryBlk->getParent()->getJumpTableInfo()
   2985             || entryBlk->getParent()->getJumpTableInfo()->isEmpty())
   2986            && "found a jump table");
   2987 
   2988      //collect continue right before endloop
   2989      SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr;
   2990      MachineBasicBlock::iterator pre = entryBlk->begin();
   2991      MachineBasicBlock::iterator iterEnd = entryBlk->end();
   2992      MachineBasicBlock::iterator iter = pre;
   2993      while (iter != iterEnd) {
   2994        if (pre->getOpcode() == AMDGPU::CONTINUE
   2995            && iter->getOpcode() == AMDGPU::ENDLOOP) {
   2996          contInstr.push_back(pre);
   2997        }
   2998        pre = iter;
   2999        ++iter;
   3000      } //end while
   3001 
   3002      //delete continue right before endloop
   3003      for (unsigned i = 0; i < contInstr.size(); ++i) {
   3004         contInstr[i]->eraseFromParent();
   3005      }
   3006 
   3007      // TODO to fix up jump table so later phase won't be confused.  if
   3008      // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but
   3009      // there isn't such an interface yet.  alternatively, replace all the other
   3010      // blocks in the jump table with the entryBlk //}
   3011 
   3012   } //wrapup
   3013 
   3014   static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) {
   3015     return &pass.getAnalysis<MachineDominatorTree>();
   3016   }
   3017 
   3018   static MachinePostDominatorTree*
   3019   getPostDominatorTree(AMDGPUCFGStructurizer &pass) {
   3020     return &pass.getAnalysis<MachinePostDominatorTree>();
   3021   }
   3022 
   3023   static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) {
   3024     return &pass.getAnalysis<MachineLoopInfo>();
   3025   }
   3026 }; // template class CFGStructTraits
   3027 } //end of namespace llvm
   3028 
   3029 // createAMDGPUCFGPreparationPass- Returns a pass
   3030 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm
   3031                                                  ) {
   3032   return new AMDGPUCFGPrepare(tm );
   3033 }
   3034 
   3035 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) {
   3036   return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func,
   3037                                                                         *this,
   3038                                                                         TRI);
   3039 }
   3040 
   3041 // createAMDGPUCFGStructurizerPass- Returns a pass
   3042 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm
   3043                                                   ) {
   3044   return new AMDGPUCFGPerform(tm );
   3045 }
   3046 
   3047 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) {
   3048   return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func,
   3049                                                                     *this,
   3050                                                                     TRI);
   3051 }
   3052