Home | History | Annotate | Download | only in R600
      1 //===-- AMDGPUStructurizeCFG.cpp -  ------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 /// \file
     11 /// The pass implemented in this file transforms the programs control flow
     12 /// graph into a form that's suitable for code generation on hardware that
     13 /// implements control flow by execution masking. This currently includes all
     14 /// AMD GPUs but may as well be useful for other types of hardware.
     15 //
     16 //===----------------------------------------------------------------------===//
     17 
     18 #include "AMDGPU.h"
     19 #include "llvm/ADT/SCCIterator.h"
     20 #include "llvm/Analysis/RegionInfo.h"
     21 #include "llvm/Analysis/RegionIterator.h"
     22 #include "llvm/Analysis/RegionPass.h"
     23 #include "llvm/IR/Module.h"
     24 #include "llvm/Transforms/Utils/SSAUpdater.h"
     25 #include "llvm/Support/PatternMatch.h"
     26 
     27 using namespace llvm;
     28 using namespace llvm::PatternMatch;
     29 
     30 namespace {
     31 
     32 // Definition of the complex types used in this pass.
     33 
     34 typedef std::pair<BasicBlock *, Value *> BBValuePair;
     35 
     36 typedef SmallVector<RegionNode*, 8> RNVector;
     37 typedef SmallVector<BasicBlock*, 8> BBVector;
     38 typedef SmallVector<BranchInst*, 8> BranchVector;
     39 typedef SmallVector<BBValuePair, 2> BBValueVector;
     40 
     41 typedef SmallPtrSet<BasicBlock *, 8> BBSet;
     42 
     43 typedef DenseMap<PHINode *, BBValueVector> PhiMap;
     44 typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap;
     45 typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap;
     46 typedef DenseMap<BasicBlock *, Value *> BBPredicates;
     47 typedef DenseMap<BasicBlock *, BBPredicates> PredMap;
     48 typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap;
     49 typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap;
     50 
     51 // The name for newly created blocks.
     52 
     53 static const char *FlowBlockName = "Flow";
     54 
     55 /// @brief Find the nearest common dominator for multiple BasicBlocks
     56 ///
     57 /// Helper class for AMDGPUStructurizeCFG
     58 /// TODO: Maybe move into common code
     59 class NearestCommonDominator {
     60 
     61   DominatorTree *DT;
     62 
     63   DTN2UnsignedMap IndexMap;
     64 
     65   BasicBlock *Result;
     66   unsigned ResultIndex;
     67   bool ExplicitMentioned;
     68 
     69 public:
     70   /// \brief Start a new query
     71   NearestCommonDominator(DominatorTree *DomTree) {
     72     DT = DomTree;
     73     Result = 0;
     74   }
     75 
     76   /// \brief Add BB to the resulting dominator
     77   void addBlock(BasicBlock *BB, bool Remember = true) {
     78 
     79     DomTreeNode *Node = DT->getNode(BB);
     80 
     81     if (Result == 0) {
     82       unsigned Numbering = 0;
     83       for (;Node;Node = Node->getIDom())
     84         IndexMap[Node] = ++Numbering;
     85       Result = BB;
     86       ResultIndex = 1;
     87       ExplicitMentioned = Remember;
     88       return;
     89     }
     90 
     91     for (;Node;Node = Node->getIDom())
     92       if (IndexMap.count(Node))
     93         break;
     94       else
     95         IndexMap[Node] = 0;
     96 
     97     assert(Node && "Dominator tree invalid!");
     98 
     99     unsigned Numbering = IndexMap[Node];
    100     if (Numbering > ResultIndex) {
    101       Result = Node->getBlock();
    102       ResultIndex = Numbering;
    103       ExplicitMentioned = Remember && (Result == BB);
    104     } else if (Numbering == ResultIndex) {
    105       ExplicitMentioned |= Remember;
    106     }
    107   }
    108 
    109   /// \brief Is "Result" one of the BBs added with "Remember" = True?
    110   bool wasResultExplicitMentioned() {
    111     return ExplicitMentioned;
    112   }
    113 
    114   /// \brief Get the query result
    115   BasicBlock *getResult() {
    116     return Result;
    117   }
    118 };
    119 
    120 /// @brief Transforms the control flow graph on one single entry/exit region
    121 /// at a time.
    122 ///
    123 /// After the transform all "If"/"Then"/"Else" style control flow looks like
    124 /// this:
    125 ///
    126 /// \verbatim
    127 /// 1
    128 /// ||
    129 /// | |
    130 /// 2 |
    131 /// | /
    132 /// |/
    133 /// 3
    134 /// ||   Where:
    135 /// | |  1 = "If" block, calculates the condition
    136 /// 4 |  2 = "Then" subregion, runs if the condition is true
    137 /// | /  3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow
    138 /// |/   4 = "Else" optional subregion, runs if the condition is false
    139 /// 5    5 = "End" block, also rejoins the control flow
    140 /// \endverbatim
    141 ///
    142 /// Control flow is expressed as a branch where the true exit goes into the
    143 /// "Then"/"Else" region, while the false exit skips the region
    144 /// The condition for the optional "Else" region is expressed as a PHI node.
    145 /// The incomming values of the PHI node are true for the "If" edge and false
    146 /// for the "Then" edge.
    147 ///
    148 /// Additionally to that even complicated loops look like this:
    149 ///
    150 /// \verbatim
    151 /// 1
    152 /// ||
    153 /// | |
    154 /// 2 ^  Where:
    155 /// | /  1 = "Entry" block
    156 /// |/   2 = "Loop" optional subregion, with all exits at "Flow" block
    157 /// 3    3 = "Flow" block, with back edge to entry block
    158 /// |
    159 /// \endverbatim
    160 ///
    161 /// The back edge of the "Flow" block is always on the false side of the branch
    162 /// while the true side continues the general flow. So the loop condition
    163 /// consist of a network of PHI nodes where the true incoming values expresses
    164 /// breaks and the false values expresses continue states.
    165 class AMDGPUStructurizeCFG : public RegionPass {
    166 
    167   static char ID;
    168 
    169   Type *Boolean;
    170   ConstantInt *BoolTrue;
    171   ConstantInt *BoolFalse;
    172   UndefValue *BoolUndef;
    173 
    174   Function *Func;
    175   Region *ParentRegion;
    176 
    177   DominatorTree *DT;
    178 
    179   RNVector Order;
    180   BBSet Visited;
    181 
    182   BBPhiMap DeletedPhis;
    183   BB2BBVecMap AddedPhis;
    184 
    185   PredMap Predicates;
    186   BranchVector Conditions;
    187 
    188   BB2BBMap Loops;
    189   PredMap LoopPreds;
    190   BranchVector LoopConds;
    191 
    192   RegionNode *PrevNode;
    193 
    194   void orderNodes();
    195 
    196   void analyzeLoops(RegionNode *N);
    197 
    198   Value *invert(Value *Condition);
    199 
    200   Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert);
    201 
    202   void gatherPredicates(RegionNode *N);
    203 
    204   void collectInfos();
    205 
    206   void insertConditions(bool Loops);
    207 
    208   void delPhiValues(BasicBlock *From, BasicBlock *To);
    209 
    210   void addPhiValues(BasicBlock *From, BasicBlock *To);
    211 
    212   void setPhiValues();
    213 
    214   void killTerminator(BasicBlock *BB);
    215 
    216   void changeExit(RegionNode *Node, BasicBlock *NewExit,
    217                   bool IncludeDominator);
    218 
    219   BasicBlock *getNextFlow(BasicBlock *Dominator);
    220 
    221   BasicBlock *needPrefix(bool NeedEmpty);
    222 
    223   BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed);
    224 
    225   void setPrevNode(BasicBlock *BB);
    226 
    227   bool dominatesPredicates(BasicBlock *BB, RegionNode *Node);
    228 
    229   bool isPredictableTrue(RegionNode *Node);
    230 
    231   void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd);
    232 
    233   void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd);
    234 
    235   void createFlow();
    236 
    237   void rebuildSSA();
    238 
    239 public:
    240   AMDGPUStructurizeCFG():
    241     RegionPass(ID) {
    242 
    243     initializeRegionInfoPass(*PassRegistry::getPassRegistry());
    244   }
    245 
    246   using Pass::doInitialization;
    247   virtual bool doInitialization(Region *R, RGPassManager &RGM);
    248 
    249   virtual bool runOnRegion(Region *R, RGPassManager &RGM);
    250 
    251   virtual const char *getPassName() const {
    252     return "AMDGPU simplify control flow";
    253   }
    254 
    255   void getAnalysisUsage(AnalysisUsage &AU) const {
    256 
    257     AU.addRequired<DominatorTree>();
    258     AU.addPreserved<DominatorTree>();
    259     RegionPass::getAnalysisUsage(AU);
    260   }
    261 
    262 };
    263 
    264 } // end anonymous namespace
    265 
    266 char AMDGPUStructurizeCFG::ID = 0;
    267 
    268 /// \brief Initialize the types and constants used in the pass
    269 bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) {
    270   LLVMContext &Context = R->getEntry()->getContext();
    271 
    272   Boolean = Type::getInt1Ty(Context);
    273   BoolTrue = ConstantInt::getTrue(Context);
    274   BoolFalse = ConstantInt::getFalse(Context);
    275   BoolUndef = UndefValue::get(Boolean);
    276 
    277   return false;
    278 }
    279 
    280 /// \brief Build up the general order of nodes
    281 void AMDGPUStructurizeCFG::orderNodes() {
    282   scc_iterator<Region *> I = scc_begin(ParentRegion),
    283                          E = scc_end(ParentRegion);
    284   for (Order.clear(); I != E; ++I) {
    285     std::vector<RegionNode *> &Nodes = *I;
    286     Order.append(Nodes.begin(), Nodes.end());
    287   }
    288 }
    289 
    290 /// \brief Determine the end of the loops
    291 void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) {
    292 
    293   if (N->isSubRegion()) {
    294     // Test for exit as back edge
    295     BasicBlock *Exit = N->getNodeAs<Region>()->getExit();
    296     if (Visited.count(Exit))
    297       Loops[Exit] = N->getEntry();
    298 
    299   } else {
    300     // Test for sucessors as back edge
    301     BasicBlock *BB = N->getNodeAs<BasicBlock>();
    302     BranchInst *Term = cast<BranchInst>(BB->getTerminator());
    303 
    304     for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
    305       BasicBlock *Succ = Term->getSuccessor(i);
    306 
    307       if (Visited.count(Succ))
    308         Loops[Succ] = BB;
    309     }
    310   }
    311 }
    312 
    313 /// \brief Invert the given condition
    314 Value *AMDGPUStructurizeCFG::invert(Value *Condition) {
    315 
    316   // First: Check if it's a constant
    317   if (Condition == BoolTrue)
    318     return BoolFalse;
    319 
    320   if (Condition == BoolFalse)
    321     return BoolTrue;
    322 
    323   if (Condition == BoolUndef)
    324     return BoolUndef;
    325 
    326   // Second: If the condition is already inverted, return the original value
    327   if (match(Condition, m_Not(m_Value(Condition))))
    328     return Condition;
    329 
    330   // Third: Check all the users for an invert
    331   BasicBlock *Parent = cast<Instruction>(Condition)->getParent();
    332   for (Value::use_iterator I = Condition->use_begin(),
    333        E = Condition->use_end(); I != E; ++I) {
    334 
    335     Instruction *User = dyn_cast<Instruction>(*I);
    336     if (!User || User->getParent() != Parent)
    337       continue;
    338 
    339     if (match(*I, m_Not(m_Specific(Condition))))
    340       return *I;
    341   }
    342 
    343   // Last option: Create a new instruction
    344   return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator());
    345 }
    346 
    347 /// \brief Build the condition for one edge
    348 Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx,
    349                                             bool Invert) {
    350   Value *Cond = Invert ? BoolFalse : BoolTrue;
    351   if (Term->isConditional()) {
    352     Cond = Term->getCondition();
    353 
    354     if (Idx != Invert)
    355       Cond = invert(Cond);
    356   }
    357   return Cond;
    358 }
    359 
    360 /// \brief Analyze the predecessors of each block and build up predicates
    361 void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) {
    362 
    363   RegionInfo *RI = ParentRegion->getRegionInfo();
    364   BasicBlock *BB = N->getEntry();
    365   BBPredicates &Pred = Predicates[BB];
    366   BBPredicates &LPred = LoopPreds[BB];
    367 
    368   for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB);
    369        PI != PE; ++PI) {
    370 
    371     // Ignore it if it's a branch from outside into our region entry
    372     if (!ParentRegion->contains(*PI))
    373       continue;
    374 
    375     Region *R = RI->getRegionFor(*PI);
    376     if (R == ParentRegion) {
    377 
    378       // It's a top level block in our region
    379       BranchInst *Term = cast<BranchInst>((*PI)->getTerminator());
    380       for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) {
    381         BasicBlock *Succ = Term->getSuccessor(i);
    382         if (Succ != BB)
    383           continue;
    384 
    385         if (Visited.count(*PI)) {
    386           // Normal forward edge
    387           if (Term->isConditional()) {
    388             // Try to treat it like an ELSE block
    389             BasicBlock *Other = Term->getSuccessor(!i);
    390             if (Visited.count(Other) && !Loops.count(Other) &&
    391                 !Pred.count(Other) && !Pred.count(*PI)) {
    392 
    393               Pred[Other] = BoolFalse;
    394               Pred[*PI] = BoolTrue;
    395               continue;
    396             }
    397           }
    398           Pred[*PI] = buildCondition(Term, i, false);
    399 
    400         } else {
    401           // Back edge
    402           LPred[*PI] = buildCondition(Term, i, true);
    403         }
    404       }
    405 
    406     } else {
    407 
    408       // It's an exit from a sub region
    409       while(R->getParent() != ParentRegion)
    410         R = R->getParent();
    411 
    412       // Edge from inside a subregion to its entry, ignore it
    413       if (R == N)
    414         continue;
    415 
    416       BasicBlock *Entry = R->getEntry();
    417       if (Visited.count(Entry))
    418         Pred[Entry] = BoolTrue;
    419       else
    420         LPred[Entry] = BoolFalse;
    421     }
    422   }
    423 }
    424 
    425 /// \brief Collect various loop and predicate infos
    426 void AMDGPUStructurizeCFG::collectInfos() {
    427 
    428   // Reset predicate
    429   Predicates.clear();
    430 
    431   // and loop infos
    432   Loops.clear();
    433   LoopPreds.clear();
    434 
    435   // Reset the visited nodes
    436   Visited.clear();
    437 
    438   for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend();
    439        OI != OE; ++OI) {
    440 
    441     // Analyze all the conditions leading to a node
    442     gatherPredicates(*OI);
    443 
    444     // Remember that we've seen this node
    445     Visited.insert((*OI)->getEntry());
    446 
    447     // Find the last back edges
    448     analyzeLoops(*OI);
    449   }
    450 }
    451 
    452 /// \brief Insert the missing branch conditions
    453 void AMDGPUStructurizeCFG::insertConditions(bool Loops) {
    454   BranchVector &Conds = Loops ? LoopConds : Conditions;
    455   Value *Default = Loops ? BoolTrue : BoolFalse;
    456   SSAUpdater PhiInserter;
    457 
    458   for (BranchVector::iterator I = Conds.begin(),
    459        E = Conds.end(); I != E; ++I) {
    460 
    461     BranchInst *Term = *I;
    462     assert(Term->isConditional());
    463 
    464     BasicBlock *Parent = Term->getParent();
    465     BasicBlock *SuccTrue = Term->getSuccessor(0);
    466     BasicBlock *SuccFalse = Term->getSuccessor(1);
    467 
    468     PhiInserter.Initialize(Boolean, "");
    469     PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default);
    470     PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default);
    471 
    472     BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue];
    473 
    474     NearestCommonDominator Dominator(DT);
    475     Dominator.addBlock(Parent, false);
    476 
    477     Value *ParentValue = 0;
    478     for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
    479          PI != PE; ++PI) {
    480 
    481       if (PI->first == Parent) {
    482         ParentValue = PI->second;
    483         break;
    484       }
    485       PhiInserter.AddAvailableValue(PI->first, PI->second);
    486       Dominator.addBlock(PI->first);
    487     }
    488 
    489     if (ParentValue) {
    490       Term->setCondition(ParentValue);
    491     } else {
    492       if (!Dominator.wasResultExplicitMentioned())
    493         PhiInserter.AddAvailableValue(Dominator.getResult(), Default);
    494 
    495       Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent));
    496     }
    497   }
    498 }
    499 
    500 /// \brief Remove all PHI values coming from "From" into "To" and remember
    501 /// them in DeletedPhis
    502 void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) {
    503   PhiMap &Map = DeletedPhis[To];
    504   for (BasicBlock::iterator I = To->begin(), E = To->end();
    505        I != E && isa<PHINode>(*I);) {
    506 
    507     PHINode &Phi = cast<PHINode>(*I++);
    508     while (Phi.getBasicBlockIndex(From) != -1) {
    509       Value *Deleted = Phi.removeIncomingValue(From, false);
    510       Map[&Phi].push_back(std::make_pair(From, Deleted));
    511     }
    512   }
    513 }
    514 
    515 /// \brief Add a dummy PHI value as soon as we knew the new predecessor
    516 void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) {
    517   for (BasicBlock::iterator I = To->begin(), E = To->end();
    518        I != E && isa<PHINode>(*I);) {
    519 
    520     PHINode &Phi = cast<PHINode>(*I++);
    521     Value *Undef = UndefValue::get(Phi.getType());
    522     Phi.addIncoming(Undef, From);
    523   }
    524   AddedPhis[To].push_back(From);
    525 }
    526 
    527 /// \brief Add the real PHI value as soon as everything is set up
    528 void AMDGPUStructurizeCFG::setPhiValues() {
    529 
    530   SSAUpdater Updater;
    531   for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end();
    532        AI != AE; ++AI) {
    533 
    534     BasicBlock *To = AI->first;
    535     BBVector &From = AI->second;
    536 
    537     if (!DeletedPhis.count(To))
    538       continue;
    539 
    540     PhiMap &Map = DeletedPhis[To];
    541     for (PhiMap::iterator PI = Map.begin(), PE = Map.end();
    542          PI != PE; ++PI) {
    543 
    544       PHINode *Phi = PI->first;
    545       Value *Undef = UndefValue::get(Phi->getType());
    546       Updater.Initialize(Phi->getType(), "");
    547       Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
    548       Updater.AddAvailableValue(To, Undef);
    549 
    550       NearestCommonDominator Dominator(DT);
    551       Dominator.addBlock(To, false);
    552       for (BBValueVector::iterator VI = PI->second.begin(),
    553            VE = PI->second.end(); VI != VE; ++VI) {
    554 
    555         Updater.AddAvailableValue(VI->first, VI->second);
    556         Dominator.addBlock(VI->first);
    557       }
    558 
    559       if (!Dominator.wasResultExplicitMentioned())
    560         Updater.AddAvailableValue(Dominator.getResult(), Undef);
    561 
    562       for (BBVector::iterator FI = From.begin(), FE = From.end();
    563            FI != FE; ++FI) {
    564 
    565         int Idx = Phi->getBasicBlockIndex(*FI);
    566         assert(Idx != -1);
    567         Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI));
    568       }
    569     }
    570 
    571     DeletedPhis.erase(To);
    572   }
    573   assert(DeletedPhis.empty());
    574 }
    575 
    576 /// \brief Remove phi values from all successors and then remove the terminator.
    577 void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) {
    578   TerminatorInst *Term = BB->getTerminator();
    579   if (!Term)
    580     return;
    581 
    582   for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB);
    583        SI != SE; ++SI) {
    584 
    585     delPhiValues(BB, *SI);
    586   }
    587 
    588   Term->eraseFromParent();
    589 }
    590 
    591 /// \brief Let node exit(s) point to NewExit
    592 void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit,
    593                                       bool IncludeDominator) {
    594 
    595   if (Node->isSubRegion()) {
    596     Region *SubRegion = Node->getNodeAs<Region>();
    597     BasicBlock *OldExit = SubRegion->getExit();
    598     BasicBlock *Dominator = 0;
    599 
    600     // Find all the edges from the sub region to the exit
    601     for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit);
    602          I != E;) {
    603 
    604       BasicBlock *BB = *I++;
    605       if (!SubRegion->contains(BB))
    606         continue;
    607 
    608       // Modify the edges to point to the new exit
    609       delPhiValues(BB, OldExit);
    610       BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit);
    611       addPhiValues(BB, NewExit);
    612 
    613       // Find the new dominator (if requested)
    614       if (IncludeDominator) {
    615         if (!Dominator)
    616           Dominator = BB;
    617         else
    618           Dominator = DT->findNearestCommonDominator(Dominator, BB);
    619       }
    620     }
    621 
    622     // Change the dominator (if requested)
    623     if (Dominator)
    624       DT->changeImmediateDominator(NewExit, Dominator);
    625 
    626     // Update the region info
    627     SubRegion->replaceExit(NewExit);
    628 
    629   } else {
    630     BasicBlock *BB = Node->getNodeAs<BasicBlock>();
    631     killTerminator(BB);
    632     BranchInst::Create(NewExit, BB);
    633     addPhiValues(BB, NewExit);
    634     if (IncludeDominator)
    635       DT->changeImmediateDominator(NewExit, BB);
    636   }
    637 }
    638 
    639 /// \brief Create a new flow node and update dominator tree and region info
    640 BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) {
    641   LLVMContext &Context = Func->getContext();
    642   BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() :
    643                        Order.back()->getEntry();
    644   BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName,
    645                                         Func, Insert);
    646   DT->addNewBlock(Flow, Dominator);
    647   ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion);
    648   return Flow;
    649 }
    650 
    651 /// \brief Create a new or reuse the previous node as flow node
    652 BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) {
    653 
    654   BasicBlock *Entry = PrevNode->getEntry();
    655 
    656   if (!PrevNode->isSubRegion()) {
    657     killTerminator(Entry);
    658     if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end())
    659       return Entry;
    660 
    661   }
    662 
    663   // create a new flow node
    664   BasicBlock *Flow = getNextFlow(Entry);
    665 
    666   // and wire it up
    667   changeExit(PrevNode, Flow, true);
    668   PrevNode = ParentRegion->getBBNode(Flow);
    669   return Flow;
    670 }
    671 
    672 /// \brief Returns the region exit if possible, otherwise just a new flow node
    673 BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow,
    674                                               bool ExitUseAllowed) {
    675 
    676   if (Order.empty() && ExitUseAllowed) {
    677     BasicBlock *Exit = ParentRegion->getExit();
    678     DT->changeImmediateDominator(Exit, Flow);
    679     addPhiValues(Flow, Exit);
    680     return Exit;
    681   }
    682   return getNextFlow(Flow);
    683 }
    684 
    685 /// \brief Set the previous node
    686 void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) {
    687   PrevNode =  ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0;
    688 }
    689 
    690 /// \brief Does BB dominate all the predicates of Node ?
    691 bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) {
    692   BBPredicates &Preds = Predicates[Node->getEntry()];
    693   for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end();
    694        PI != PE; ++PI) {
    695 
    696     if (!DT->dominates(BB, PI->first))
    697       return false;
    698   }
    699   return true;
    700 }
    701 
    702 /// \brief Can we predict that this node will always be called?
    703 bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) {
    704 
    705   BBPredicates &Preds = Predicates[Node->getEntry()];
    706   bool Dominated = false;
    707 
    708   // Regionentry is always true
    709   if (PrevNode == 0)
    710     return true;
    711 
    712   for (BBPredicates::iterator I = Preds.begin(), E = Preds.end();
    713        I != E; ++I) {
    714 
    715     if (I->second != BoolTrue)
    716       return false;
    717 
    718     if (!Dominated && DT->dominates(I->first, PrevNode->getEntry()))
    719       Dominated = true;
    720   }
    721 
    722   // TODO: The dominator check is too strict
    723   return Dominated;
    724 }
    725 
    726 /// Take one node from the order vector and wire it up
    727 void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed,
    728                                     BasicBlock *LoopEnd) {
    729 
    730   RegionNode *Node = Order.pop_back_val();
    731   Visited.insert(Node->getEntry());
    732 
    733   if (isPredictableTrue(Node)) {
    734     // Just a linear flow
    735     if (PrevNode) {
    736       changeExit(PrevNode, Node->getEntry(), true);
    737     }
    738     PrevNode = Node;
    739 
    740   } else {
    741     // Insert extra prefix node (or reuse last one)
    742     BasicBlock *Flow = needPrefix(false);
    743 
    744     // Insert extra postfix node (or use exit instead)
    745     BasicBlock *Entry = Node->getEntry();
    746     BasicBlock *Next = needPostfix(Flow, ExitUseAllowed);
    747 
    748     // let it point to entry and next block
    749     Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow));
    750     addPhiValues(Flow, Entry);
    751     DT->changeImmediateDominator(Entry, Flow);
    752 
    753     PrevNode = Node;
    754     while (!Order.empty() && !Visited.count(LoopEnd) &&
    755            dominatesPredicates(Entry, Order.back())) {
    756       handleLoops(false, LoopEnd);
    757     }
    758 
    759     changeExit(PrevNode, Next, false);
    760     setPrevNode(Next);
    761   }
    762 }
    763 
    764 void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed,
    765                                        BasicBlock *LoopEnd) {
    766   RegionNode *Node = Order.back();
    767   BasicBlock *LoopStart = Node->getEntry();
    768 
    769   if (!Loops.count(LoopStart)) {
    770     wireFlow(ExitUseAllowed, LoopEnd);
    771     return;
    772   }
    773 
    774   if (!isPredictableTrue(Node))
    775     LoopStart = needPrefix(true);
    776 
    777   LoopEnd = Loops[Node->getEntry()];
    778   wireFlow(false, LoopEnd);
    779   while (!Visited.count(LoopEnd)) {
    780     handleLoops(false, LoopEnd);
    781   }
    782 
    783   // Create an extra loop end node
    784   LoopEnd = needPrefix(false);
    785   BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed);
    786   LoopConds.push_back(BranchInst::Create(Next, LoopStart,
    787                                          BoolUndef, LoopEnd));
    788   addPhiValues(LoopEnd, LoopStart);
    789   setPrevNode(Next);
    790 }
    791 
    792 /// After this function control flow looks like it should be, but
    793 /// branches and PHI nodes only have undefined conditions.
    794 void AMDGPUStructurizeCFG::createFlow() {
    795 
    796   BasicBlock *Exit = ParentRegion->getExit();
    797   bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit);
    798 
    799   DeletedPhis.clear();
    800   AddedPhis.clear();
    801   Conditions.clear();
    802   LoopConds.clear();
    803 
    804   PrevNode = 0;
    805   Visited.clear();
    806 
    807   while (!Order.empty()) {
    808     handleLoops(EntryDominatesExit, 0);
    809   }
    810 
    811   if (PrevNode)
    812     changeExit(PrevNode, Exit, EntryDominatesExit);
    813   else
    814     assert(EntryDominatesExit);
    815 }
    816 
    817 /// Handle a rare case where the disintegrated nodes instructions
    818 /// no longer dominate all their uses. Not sure if this is really nessasary
    819 void AMDGPUStructurizeCFG::rebuildSSA() {
    820   SSAUpdater Updater;
    821   for (Region::block_iterator I = ParentRegion->block_begin(),
    822                               E = ParentRegion->block_end();
    823        I != E; ++I) {
    824 
    825     BasicBlock *BB = *I;
    826     for (BasicBlock::iterator II = BB->begin(), IE = BB->end();
    827          II != IE; ++II) {
    828 
    829       bool Initialized = false;
    830       for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) {
    831 
    832         Next = I->getNext();
    833 
    834         Instruction *User = cast<Instruction>(I->getUser());
    835         if (User->getParent() == BB) {
    836           continue;
    837 
    838         } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) {
    839           if (UserPN->getIncomingBlock(*I) == BB)
    840             continue;
    841         }
    842 
    843         if (DT->dominates(II, User))
    844           continue;
    845 
    846         if (!Initialized) {
    847           Value *Undef = UndefValue::get(II->getType());
    848           Updater.Initialize(II->getType(), "");
    849           Updater.AddAvailableValue(&Func->getEntryBlock(), Undef);
    850           Updater.AddAvailableValue(BB, II);
    851           Initialized = true;
    852         }
    853         Updater.RewriteUseAfterInsertions(*I);
    854       }
    855     }
    856   }
    857 }
    858 
    859 /// \brief Run the transformation for each region found
    860 bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) {
    861   if (R->isTopLevelRegion())
    862     return false;
    863 
    864   Func = R->getEntry()->getParent();
    865   ParentRegion = R;
    866 
    867   DT = &getAnalysis<DominatorTree>();
    868 
    869   orderNodes();
    870   collectInfos();
    871   createFlow();
    872   insertConditions(false);
    873   insertConditions(true);
    874   setPhiValues();
    875   rebuildSSA();
    876 
    877   // Cleanup
    878   Order.clear();
    879   Visited.clear();
    880   DeletedPhis.clear();
    881   AddedPhis.clear();
    882   Predicates.clear();
    883   Conditions.clear();
    884   Loops.clear();
    885   LoopPreds.clear();
    886   LoopConds.clear();
    887 
    888   return true;
    889 }
    890 
    891 /// \brief Create the pass
    892 Pass *llvm::createAMDGPUStructurizeCFGPass() {
    893   return new AMDGPUStructurizeCFG();
    894 }
    895