1 //===-- AMDGPUStructurizeCFG.cpp - ------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 /// \file 11 /// The pass implemented in this file transforms the programs control flow 12 /// graph into a form that's suitable for code generation on hardware that 13 /// implements control flow by execution masking. This currently includes all 14 /// AMD GPUs but may as well be useful for other types of hardware. 15 // 16 //===----------------------------------------------------------------------===// 17 18 #include "AMDGPU.h" 19 #include "llvm/ADT/SCCIterator.h" 20 #include "llvm/Analysis/RegionInfo.h" 21 #include "llvm/Analysis/RegionIterator.h" 22 #include "llvm/Analysis/RegionPass.h" 23 #include "llvm/IR/Module.h" 24 #include "llvm/Transforms/Utils/SSAUpdater.h" 25 #include "llvm/Support/PatternMatch.h" 26 27 using namespace llvm; 28 using namespace llvm::PatternMatch; 29 30 namespace { 31 32 // Definition of the complex types used in this pass. 33 34 typedef std::pair<BasicBlock *, Value *> BBValuePair; 35 36 typedef SmallVector<RegionNode*, 8> RNVector; 37 typedef SmallVector<BasicBlock*, 8> BBVector; 38 typedef SmallVector<BranchInst*, 8> BranchVector; 39 typedef SmallVector<BBValuePair, 2> BBValueVector; 40 41 typedef SmallPtrSet<BasicBlock *, 8> BBSet; 42 43 typedef DenseMap<PHINode *, BBValueVector> PhiMap; 44 typedef DenseMap<DomTreeNode *, unsigned> DTN2UnsignedMap; 45 typedef DenseMap<BasicBlock *, PhiMap> BBPhiMap; 46 typedef DenseMap<BasicBlock *, Value *> BBPredicates; 47 typedef DenseMap<BasicBlock *, BBPredicates> PredMap; 48 typedef DenseMap<BasicBlock *, BasicBlock*> BB2BBMap; 49 typedef DenseMap<BasicBlock *, BBVector> BB2BBVecMap; 50 51 // The name for newly created blocks. 52 53 static const char *FlowBlockName = "Flow"; 54 55 /// @brief Find the nearest common dominator for multiple BasicBlocks 56 /// 57 /// Helper class for AMDGPUStructurizeCFG 58 /// TODO: Maybe move into common code 59 class NearestCommonDominator { 60 61 DominatorTree *DT; 62 63 DTN2UnsignedMap IndexMap; 64 65 BasicBlock *Result; 66 unsigned ResultIndex; 67 bool ExplicitMentioned; 68 69 public: 70 /// \brief Start a new query 71 NearestCommonDominator(DominatorTree *DomTree) { 72 DT = DomTree; 73 Result = 0; 74 } 75 76 /// \brief Add BB to the resulting dominator 77 void addBlock(BasicBlock *BB, bool Remember = true) { 78 79 DomTreeNode *Node = DT->getNode(BB); 80 81 if (Result == 0) { 82 unsigned Numbering = 0; 83 for (;Node;Node = Node->getIDom()) 84 IndexMap[Node] = ++Numbering; 85 Result = BB; 86 ResultIndex = 1; 87 ExplicitMentioned = Remember; 88 return; 89 } 90 91 for (;Node;Node = Node->getIDom()) 92 if (IndexMap.count(Node)) 93 break; 94 else 95 IndexMap[Node] = 0; 96 97 assert(Node && "Dominator tree invalid!"); 98 99 unsigned Numbering = IndexMap[Node]; 100 if (Numbering > ResultIndex) { 101 Result = Node->getBlock(); 102 ResultIndex = Numbering; 103 ExplicitMentioned = Remember && (Result == BB); 104 } else if (Numbering == ResultIndex) { 105 ExplicitMentioned |= Remember; 106 } 107 } 108 109 /// \brief Is "Result" one of the BBs added with "Remember" = True? 110 bool wasResultExplicitMentioned() { 111 return ExplicitMentioned; 112 } 113 114 /// \brief Get the query result 115 BasicBlock *getResult() { 116 return Result; 117 } 118 }; 119 120 /// @brief Transforms the control flow graph on one single entry/exit region 121 /// at a time. 122 /// 123 /// After the transform all "If"/"Then"/"Else" style control flow looks like 124 /// this: 125 /// 126 /// \verbatim 127 /// 1 128 /// || 129 /// | | 130 /// 2 | 131 /// | / 132 /// |/ 133 /// 3 134 /// || Where: 135 /// | | 1 = "If" block, calculates the condition 136 /// 4 | 2 = "Then" subregion, runs if the condition is true 137 /// | / 3 = "Flow" blocks, newly inserted flow blocks, rejoins the flow 138 /// |/ 4 = "Else" optional subregion, runs if the condition is false 139 /// 5 5 = "End" block, also rejoins the control flow 140 /// \endverbatim 141 /// 142 /// Control flow is expressed as a branch where the true exit goes into the 143 /// "Then"/"Else" region, while the false exit skips the region 144 /// The condition for the optional "Else" region is expressed as a PHI node. 145 /// The incomming values of the PHI node are true for the "If" edge and false 146 /// for the "Then" edge. 147 /// 148 /// Additionally to that even complicated loops look like this: 149 /// 150 /// \verbatim 151 /// 1 152 /// || 153 /// | | 154 /// 2 ^ Where: 155 /// | / 1 = "Entry" block 156 /// |/ 2 = "Loop" optional subregion, with all exits at "Flow" block 157 /// 3 3 = "Flow" block, with back edge to entry block 158 /// | 159 /// \endverbatim 160 /// 161 /// The back edge of the "Flow" block is always on the false side of the branch 162 /// while the true side continues the general flow. So the loop condition 163 /// consist of a network of PHI nodes where the true incoming values expresses 164 /// breaks and the false values expresses continue states. 165 class AMDGPUStructurizeCFG : public RegionPass { 166 167 static char ID; 168 169 Type *Boolean; 170 ConstantInt *BoolTrue; 171 ConstantInt *BoolFalse; 172 UndefValue *BoolUndef; 173 174 Function *Func; 175 Region *ParentRegion; 176 177 DominatorTree *DT; 178 179 RNVector Order; 180 BBSet Visited; 181 182 BBPhiMap DeletedPhis; 183 BB2BBVecMap AddedPhis; 184 185 PredMap Predicates; 186 BranchVector Conditions; 187 188 BB2BBMap Loops; 189 PredMap LoopPreds; 190 BranchVector LoopConds; 191 192 RegionNode *PrevNode; 193 194 void orderNodes(); 195 196 void analyzeLoops(RegionNode *N); 197 198 Value *invert(Value *Condition); 199 200 Value *buildCondition(BranchInst *Term, unsigned Idx, bool Invert); 201 202 void gatherPredicates(RegionNode *N); 203 204 void collectInfos(); 205 206 void insertConditions(bool Loops); 207 208 void delPhiValues(BasicBlock *From, BasicBlock *To); 209 210 void addPhiValues(BasicBlock *From, BasicBlock *To); 211 212 void setPhiValues(); 213 214 void killTerminator(BasicBlock *BB); 215 216 void changeExit(RegionNode *Node, BasicBlock *NewExit, 217 bool IncludeDominator); 218 219 BasicBlock *getNextFlow(BasicBlock *Dominator); 220 221 BasicBlock *needPrefix(bool NeedEmpty); 222 223 BasicBlock *needPostfix(BasicBlock *Flow, bool ExitUseAllowed); 224 225 void setPrevNode(BasicBlock *BB); 226 227 bool dominatesPredicates(BasicBlock *BB, RegionNode *Node); 228 229 bool isPredictableTrue(RegionNode *Node); 230 231 void wireFlow(bool ExitUseAllowed, BasicBlock *LoopEnd); 232 233 void handleLoops(bool ExitUseAllowed, BasicBlock *LoopEnd); 234 235 void createFlow(); 236 237 void rebuildSSA(); 238 239 public: 240 AMDGPUStructurizeCFG(): 241 RegionPass(ID) { 242 243 initializeRegionInfoPass(*PassRegistry::getPassRegistry()); 244 } 245 246 using Pass::doInitialization; 247 virtual bool doInitialization(Region *R, RGPassManager &RGM); 248 249 virtual bool runOnRegion(Region *R, RGPassManager &RGM); 250 251 virtual const char *getPassName() const { 252 return "AMDGPU simplify control flow"; 253 } 254 255 void getAnalysisUsage(AnalysisUsage &AU) const { 256 257 AU.addRequired<DominatorTree>(); 258 AU.addPreserved<DominatorTree>(); 259 RegionPass::getAnalysisUsage(AU); 260 } 261 262 }; 263 264 } // end anonymous namespace 265 266 char AMDGPUStructurizeCFG::ID = 0; 267 268 /// \brief Initialize the types and constants used in the pass 269 bool AMDGPUStructurizeCFG::doInitialization(Region *R, RGPassManager &RGM) { 270 LLVMContext &Context = R->getEntry()->getContext(); 271 272 Boolean = Type::getInt1Ty(Context); 273 BoolTrue = ConstantInt::getTrue(Context); 274 BoolFalse = ConstantInt::getFalse(Context); 275 BoolUndef = UndefValue::get(Boolean); 276 277 return false; 278 } 279 280 /// \brief Build up the general order of nodes 281 void AMDGPUStructurizeCFG::orderNodes() { 282 scc_iterator<Region *> I = scc_begin(ParentRegion), 283 E = scc_end(ParentRegion); 284 for (Order.clear(); I != E; ++I) { 285 std::vector<RegionNode *> &Nodes = *I; 286 Order.append(Nodes.begin(), Nodes.end()); 287 } 288 } 289 290 /// \brief Determine the end of the loops 291 void AMDGPUStructurizeCFG::analyzeLoops(RegionNode *N) { 292 293 if (N->isSubRegion()) { 294 // Test for exit as back edge 295 BasicBlock *Exit = N->getNodeAs<Region>()->getExit(); 296 if (Visited.count(Exit)) 297 Loops[Exit] = N->getEntry(); 298 299 } else { 300 // Test for sucessors as back edge 301 BasicBlock *BB = N->getNodeAs<BasicBlock>(); 302 BranchInst *Term = cast<BranchInst>(BB->getTerminator()); 303 304 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 305 BasicBlock *Succ = Term->getSuccessor(i); 306 307 if (Visited.count(Succ)) 308 Loops[Succ] = BB; 309 } 310 } 311 } 312 313 /// \brief Invert the given condition 314 Value *AMDGPUStructurizeCFG::invert(Value *Condition) { 315 316 // First: Check if it's a constant 317 if (Condition == BoolTrue) 318 return BoolFalse; 319 320 if (Condition == BoolFalse) 321 return BoolTrue; 322 323 if (Condition == BoolUndef) 324 return BoolUndef; 325 326 // Second: If the condition is already inverted, return the original value 327 if (match(Condition, m_Not(m_Value(Condition)))) 328 return Condition; 329 330 // Third: Check all the users for an invert 331 BasicBlock *Parent = cast<Instruction>(Condition)->getParent(); 332 for (Value::use_iterator I = Condition->use_begin(), 333 E = Condition->use_end(); I != E; ++I) { 334 335 Instruction *User = dyn_cast<Instruction>(*I); 336 if (!User || User->getParent() != Parent) 337 continue; 338 339 if (match(*I, m_Not(m_Specific(Condition)))) 340 return *I; 341 } 342 343 // Last option: Create a new instruction 344 return BinaryOperator::CreateNot(Condition, "", Parent->getTerminator()); 345 } 346 347 /// \brief Build the condition for one edge 348 Value *AMDGPUStructurizeCFG::buildCondition(BranchInst *Term, unsigned Idx, 349 bool Invert) { 350 Value *Cond = Invert ? BoolFalse : BoolTrue; 351 if (Term->isConditional()) { 352 Cond = Term->getCondition(); 353 354 if (Idx != Invert) 355 Cond = invert(Cond); 356 } 357 return Cond; 358 } 359 360 /// \brief Analyze the predecessors of each block and build up predicates 361 void AMDGPUStructurizeCFG::gatherPredicates(RegionNode *N) { 362 363 RegionInfo *RI = ParentRegion->getRegionInfo(); 364 BasicBlock *BB = N->getEntry(); 365 BBPredicates &Pred = Predicates[BB]; 366 BBPredicates &LPred = LoopPreds[BB]; 367 368 for (pred_iterator PI = pred_begin(BB), PE = pred_end(BB); 369 PI != PE; ++PI) { 370 371 // Ignore it if it's a branch from outside into our region entry 372 if (!ParentRegion->contains(*PI)) 373 continue; 374 375 Region *R = RI->getRegionFor(*PI); 376 if (R == ParentRegion) { 377 378 // It's a top level block in our region 379 BranchInst *Term = cast<BranchInst>((*PI)->getTerminator()); 380 for (unsigned i = 0, e = Term->getNumSuccessors(); i != e; ++i) { 381 BasicBlock *Succ = Term->getSuccessor(i); 382 if (Succ != BB) 383 continue; 384 385 if (Visited.count(*PI)) { 386 // Normal forward edge 387 if (Term->isConditional()) { 388 // Try to treat it like an ELSE block 389 BasicBlock *Other = Term->getSuccessor(!i); 390 if (Visited.count(Other) && !Loops.count(Other) && 391 !Pred.count(Other) && !Pred.count(*PI)) { 392 393 Pred[Other] = BoolFalse; 394 Pred[*PI] = BoolTrue; 395 continue; 396 } 397 } 398 Pred[*PI] = buildCondition(Term, i, false); 399 400 } else { 401 // Back edge 402 LPred[*PI] = buildCondition(Term, i, true); 403 } 404 } 405 406 } else { 407 408 // It's an exit from a sub region 409 while(R->getParent() != ParentRegion) 410 R = R->getParent(); 411 412 // Edge from inside a subregion to its entry, ignore it 413 if (R == N) 414 continue; 415 416 BasicBlock *Entry = R->getEntry(); 417 if (Visited.count(Entry)) 418 Pred[Entry] = BoolTrue; 419 else 420 LPred[Entry] = BoolFalse; 421 } 422 } 423 } 424 425 /// \brief Collect various loop and predicate infos 426 void AMDGPUStructurizeCFG::collectInfos() { 427 428 // Reset predicate 429 Predicates.clear(); 430 431 // and loop infos 432 Loops.clear(); 433 LoopPreds.clear(); 434 435 // Reset the visited nodes 436 Visited.clear(); 437 438 for (RNVector::reverse_iterator OI = Order.rbegin(), OE = Order.rend(); 439 OI != OE; ++OI) { 440 441 // Analyze all the conditions leading to a node 442 gatherPredicates(*OI); 443 444 // Remember that we've seen this node 445 Visited.insert((*OI)->getEntry()); 446 447 // Find the last back edges 448 analyzeLoops(*OI); 449 } 450 } 451 452 /// \brief Insert the missing branch conditions 453 void AMDGPUStructurizeCFG::insertConditions(bool Loops) { 454 BranchVector &Conds = Loops ? LoopConds : Conditions; 455 Value *Default = Loops ? BoolTrue : BoolFalse; 456 SSAUpdater PhiInserter; 457 458 for (BranchVector::iterator I = Conds.begin(), 459 E = Conds.end(); I != E; ++I) { 460 461 BranchInst *Term = *I; 462 assert(Term->isConditional()); 463 464 BasicBlock *Parent = Term->getParent(); 465 BasicBlock *SuccTrue = Term->getSuccessor(0); 466 BasicBlock *SuccFalse = Term->getSuccessor(1); 467 468 PhiInserter.Initialize(Boolean, ""); 469 PhiInserter.AddAvailableValue(&Func->getEntryBlock(), Default); 470 PhiInserter.AddAvailableValue(Loops ? SuccFalse : Parent, Default); 471 472 BBPredicates &Preds = Loops ? LoopPreds[SuccFalse] : Predicates[SuccTrue]; 473 474 NearestCommonDominator Dominator(DT); 475 Dominator.addBlock(Parent, false); 476 477 Value *ParentValue = 0; 478 for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); 479 PI != PE; ++PI) { 480 481 if (PI->first == Parent) { 482 ParentValue = PI->second; 483 break; 484 } 485 PhiInserter.AddAvailableValue(PI->first, PI->second); 486 Dominator.addBlock(PI->first); 487 } 488 489 if (ParentValue) { 490 Term->setCondition(ParentValue); 491 } else { 492 if (!Dominator.wasResultExplicitMentioned()) 493 PhiInserter.AddAvailableValue(Dominator.getResult(), Default); 494 495 Term->setCondition(PhiInserter.GetValueInMiddleOfBlock(Parent)); 496 } 497 } 498 } 499 500 /// \brief Remove all PHI values coming from "From" into "To" and remember 501 /// them in DeletedPhis 502 void AMDGPUStructurizeCFG::delPhiValues(BasicBlock *From, BasicBlock *To) { 503 PhiMap &Map = DeletedPhis[To]; 504 for (BasicBlock::iterator I = To->begin(), E = To->end(); 505 I != E && isa<PHINode>(*I);) { 506 507 PHINode &Phi = cast<PHINode>(*I++); 508 while (Phi.getBasicBlockIndex(From) != -1) { 509 Value *Deleted = Phi.removeIncomingValue(From, false); 510 Map[&Phi].push_back(std::make_pair(From, Deleted)); 511 } 512 } 513 } 514 515 /// \brief Add a dummy PHI value as soon as we knew the new predecessor 516 void AMDGPUStructurizeCFG::addPhiValues(BasicBlock *From, BasicBlock *To) { 517 for (BasicBlock::iterator I = To->begin(), E = To->end(); 518 I != E && isa<PHINode>(*I);) { 519 520 PHINode &Phi = cast<PHINode>(*I++); 521 Value *Undef = UndefValue::get(Phi.getType()); 522 Phi.addIncoming(Undef, From); 523 } 524 AddedPhis[To].push_back(From); 525 } 526 527 /// \brief Add the real PHI value as soon as everything is set up 528 void AMDGPUStructurizeCFG::setPhiValues() { 529 530 SSAUpdater Updater; 531 for (BB2BBVecMap::iterator AI = AddedPhis.begin(), AE = AddedPhis.end(); 532 AI != AE; ++AI) { 533 534 BasicBlock *To = AI->first; 535 BBVector &From = AI->second; 536 537 if (!DeletedPhis.count(To)) 538 continue; 539 540 PhiMap &Map = DeletedPhis[To]; 541 for (PhiMap::iterator PI = Map.begin(), PE = Map.end(); 542 PI != PE; ++PI) { 543 544 PHINode *Phi = PI->first; 545 Value *Undef = UndefValue::get(Phi->getType()); 546 Updater.Initialize(Phi->getType(), ""); 547 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 548 Updater.AddAvailableValue(To, Undef); 549 550 NearestCommonDominator Dominator(DT); 551 Dominator.addBlock(To, false); 552 for (BBValueVector::iterator VI = PI->second.begin(), 553 VE = PI->second.end(); VI != VE; ++VI) { 554 555 Updater.AddAvailableValue(VI->first, VI->second); 556 Dominator.addBlock(VI->first); 557 } 558 559 if (!Dominator.wasResultExplicitMentioned()) 560 Updater.AddAvailableValue(Dominator.getResult(), Undef); 561 562 for (BBVector::iterator FI = From.begin(), FE = From.end(); 563 FI != FE; ++FI) { 564 565 int Idx = Phi->getBasicBlockIndex(*FI); 566 assert(Idx != -1); 567 Phi->setIncomingValue(Idx, Updater.GetValueAtEndOfBlock(*FI)); 568 } 569 } 570 571 DeletedPhis.erase(To); 572 } 573 assert(DeletedPhis.empty()); 574 } 575 576 /// \brief Remove phi values from all successors and then remove the terminator. 577 void AMDGPUStructurizeCFG::killTerminator(BasicBlock *BB) { 578 TerminatorInst *Term = BB->getTerminator(); 579 if (!Term) 580 return; 581 582 for (succ_iterator SI = succ_begin(BB), SE = succ_end(BB); 583 SI != SE; ++SI) { 584 585 delPhiValues(BB, *SI); 586 } 587 588 Term->eraseFromParent(); 589 } 590 591 /// \brief Let node exit(s) point to NewExit 592 void AMDGPUStructurizeCFG::changeExit(RegionNode *Node, BasicBlock *NewExit, 593 bool IncludeDominator) { 594 595 if (Node->isSubRegion()) { 596 Region *SubRegion = Node->getNodeAs<Region>(); 597 BasicBlock *OldExit = SubRegion->getExit(); 598 BasicBlock *Dominator = 0; 599 600 // Find all the edges from the sub region to the exit 601 for (pred_iterator I = pred_begin(OldExit), E = pred_end(OldExit); 602 I != E;) { 603 604 BasicBlock *BB = *I++; 605 if (!SubRegion->contains(BB)) 606 continue; 607 608 // Modify the edges to point to the new exit 609 delPhiValues(BB, OldExit); 610 BB->getTerminator()->replaceUsesOfWith(OldExit, NewExit); 611 addPhiValues(BB, NewExit); 612 613 // Find the new dominator (if requested) 614 if (IncludeDominator) { 615 if (!Dominator) 616 Dominator = BB; 617 else 618 Dominator = DT->findNearestCommonDominator(Dominator, BB); 619 } 620 } 621 622 // Change the dominator (if requested) 623 if (Dominator) 624 DT->changeImmediateDominator(NewExit, Dominator); 625 626 // Update the region info 627 SubRegion->replaceExit(NewExit); 628 629 } else { 630 BasicBlock *BB = Node->getNodeAs<BasicBlock>(); 631 killTerminator(BB); 632 BranchInst::Create(NewExit, BB); 633 addPhiValues(BB, NewExit); 634 if (IncludeDominator) 635 DT->changeImmediateDominator(NewExit, BB); 636 } 637 } 638 639 /// \brief Create a new flow node and update dominator tree and region info 640 BasicBlock *AMDGPUStructurizeCFG::getNextFlow(BasicBlock *Dominator) { 641 LLVMContext &Context = Func->getContext(); 642 BasicBlock *Insert = Order.empty() ? ParentRegion->getExit() : 643 Order.back()->getEntry(); 644 BasicBlock *Flow = BasicBlock::Create(Context, FlowBlockName, 645 Func, Insert); 646 DT->addNewBlock(Flow, Dominator); 647 ParentRegion->getRegionInfo()->setRegionFor(Flow, ParentRegion); 648 return Flow; 649 } 650 651 /// \brief Create a new or reuse the previous node as flow node 652 BasicBlock *AMDGPUStructurizeCFG::needPrefix(bool NeedEmpty) { 653 654 BasicBlock *Entry = PrevNode->getEntry(); 655 656 if (!PrevNode->isSubRegion()) { 657 killTerminator(Entry); 658 if (!NeedEmpty || Entry->getFirstInsertionPt() == Entry->end()) 659 return Entry; 660 661 } 662 663 // create a new flow node 664 BasicBlock *Flow = getNextFlow(Entry); 665 666 // and wire it up 667 changeExit(PrevNode, Flow, true); 668 PrevNode = ParentRegion->getBBNode(Flow); 669 return Flow; 670 } 671 672 /// \brief Returns the region exit if possible, otherwise just a new flow node 673 BasicBlock *AMDGPUStructurizeCFG::needPostfix(BasicBlock *Flow, 674 bool ExitUseAllowed) { 675 676 if (Order.empty() && ExitUseAllowed) { 677 BasicBlock *Exit = ParentRegion->getExit(); 678 DT->changeImmediateDominator(Exit, Flow); 679 addPhiValues(Flow, Exit); 680 return Exit; 681 } 682 return getNextFlow(Flow); 683 } 684 685 /// \brief Set the previous node 686 void AMDGPUStructurizeCFG::setPrevNode(BasicBlock *BB) { 687 PrevNode = ParentRegion->contains(BB) ? ParentRegion->getBBNode(BB) : 0; 688 } 689 690 /// \brief Does BB dominate all the predicates of Node ? 691 bool AMDGPUStructurizeCFG::dominatesPredicates(BasicBlock *BB, RegionNode *Node) { 692 BBPredicates &Preds = Predicates[Node->getEntry()]; 693 for (BBPredicates::iterator PI = Preds.begin(), PE = Preds.end(); 694 PI != PE; ++PI) { 695 696 if (!DT->dominates(BB, PI->first)) 697 return false; 698 } 699 return true; 700 } 701 702 /// \brief Can we predict that this node will always be called? 703 bool AMDGPUStructurizeCFG::isPredictableTrue(RegionNode *Node) { 704 705 BBPredicates &Preds = Predicates[Node->getEntry()]; 706 bool Dominated = false; 707 708 // Regionentry is always true 709 if (PrevNode == 0) 710 return true; 711 712 for (BBPredicates::iterator I = Preds.begin(), E = Preds.end(); 713 I != E; ++I) { 714 715 if (I->second != BoolTrue) 716 return false; 717 718 if (!Dominated && DT->dominates(I->first, PrevNode->getEntry())) 719 Dominated = true; 720 } 721 722 // TODO: The dominator check is too strict 723 return Dominated; 724 } 725 726 /// Take one node from the order vector and wire it up 727 void AMDGPUStructurizeCFG::wireFlow(bool ExitUseAllowed, 728 BasicBlock *LoopEnd) { 729 730 RegionNode *Node = Order.pop_back_val(); 731 Visited.insert(Node->getEntry()); 732 733 if (isPredictableTrue(Node)) { 734 // Just a linear flow 735 if (PrevNode) { 736 changeExit(PrevNode, Node->getEntry(), true); 737 } 738 PrevNode = Node; 739 740 } else { 741 // Insert extra prefix node (or reuse last one) 742 BasicBlock *Flow = needPrefix(false); 743 744 // Insert extra postfix node (or use exit instead) 745 BasicBlock *Entry = Node->getEntry(); 746 BasicBlock *Next = needPostfix(Flow, ExitUseAllowed); 747 748 // let it point to entry and next block 749 Conditions.push_back(BranchInst::Create(Entry, Next, BoolUndef, Flow)); 750 addPhiValues(Flow, Entry); 751 DT->changeImmediateDominator(Entry, Flow); 752 753 PrevNode = Node; 754 while (!Order.empty() && !Visited.count(LoopEnd) && 755 dominatesPredicates(Entry, Order.back())) { 756 handleLoops(false, LoopEnd); 757 } 758 759 changeExit(PrevNode, Next, false); 760 setPrevNode(Next); 761 } 762 } 763 764 void AMDGPUStructurizeCFG::handleLoops(bool ExitUseAllowed, 765 BasicBlock *LoopEnd) { 766 RegionNode *Node = Order.back(); 767 BasicBlock *LoopStart = Node->getEntry(); 768 769 if (!Loops.count(LoopStart)) { 770 wireFlow(ExitUseAllowed, LoopEnd); 771 return; 772 } 773 774 if (!isPredictableTrue(Node)) 775 LoopStart = needPrefix(true); 776 777 LoopEnd = Loops[Node->getEntry()]; 778 wireFlow(false, LoopEnd); 779 while (!Visited.count(LoopEnd)) { 780 handleLoops(false, LoopEnd); 781 } 782 783 // Create an extra loop end node 784 LoopEnd = needPrefix(false); 785 BasicBlock *Next = needPostfix(LoopEnd, ExitUseAllowed); 786 LoopConds.push_back(BranchInst::Create(Next, LoopStart, 787 BoolUndef, LoopEnd)); 788 addPhiValues(LoopEnd, LoopStart); 789 setPrevNode(Next); 790 } 791 792 /// After this function control flow looks like it should be, but 793 /// branches and PHI nodes only have undefined conditions. 794 void AMDGPUStructurizeCFG::createFlow() { 795 796 BasicBlock *Exit = ParentRegion->getExit(); 797 bool EntryDominatesExit = DT->dominates(ParentRegion->getEntry(), Exit); 798 799 DeletedPhis.clear(); 800 AddedPhis.clear(); 801 Conditions.clear(); 802 LoopConds.clear(); 803 804 PrevNode = 0; 805 Visited.clear(); 806 807 while (!Order.empty()) { 808 handleLoops(EntryDominatesExit, 0); 809 } 810 811 if (PrevNode) 812 changeExit(PrevNode, Exit, EntryDominatesExit); 813 else 814 assert(EntryDominatesExit); 815 } 816 817 /// Handle a rare case where the disintegrated nodes instructions 818 /// no longer dominate all their uses. Not sure if this is really nessasary 819 void AMDGPUStructurizeCFG::rebuildSSA() { 820 SSAUpdater Updater; 821 for (Region::block_iterator I = ParentRegion->block_begin(), 822 E = ParentRegion->block_end(); 823 I != E; ++I) { 824 825 BasicBlock *BB = *I; 826 for (BasicBlock::iterator II = BB->begin(), IE = BB->end(); 827 II != IE; ++II) { 828 829 bool Initialized = false; 830 for (Use *I = &II->use_begin().getUse(), *Next; I; I = Next) { 831 832 Next = I->getNext(); 833 834 Instruction *User = cast<Instruction>(I->getUser()); 835 if (User->getParent() == BB) { 836 continue; 837 838 } else if (PHINode *UserPN = dyn_cast<PHINode>(User)) { 839 if (UserPN->getIncomingBlock(*I) == BB) 840 continue; 841 } 842 843 if (DT->dominates(II, User)) 844 continue; 845 846 if (!Initialized) { 847 Value *Undef = UndefValue::get(II->getType()); 848 Updater.Initialize(II->getType(), ""); 849 Updater.AddAvailableValue(&Func->getEntryBlock(), Undef); 850 Updater.AddAvailableValue(BB, II); 851 Initialized = true; 852 } 853 Updater.RewriteUseAfterInsertions(*I); 854 } 855 } 856 } 857 } 858 859 /// \brief Run the transformation for each region found 860 bool AMDGPUStructurizeCFG::runOnRegion(Region *R, RGPassManager &RGM) { 861 if (R->isTopLevelRegion()) 862 return false; 863 864 Func = R->getEntry()->getParent(); 865 ParentRegion = R; 866 867 DT = &getAnalysis<DominatorTree>(); 868 869 orderNodes(); 870 collectInfos(); 871 createFlow(); 872 insertConditions(false); 873 insertConditions(true); 874 setPhiValues(); 875 rebuildSSA(); 876 877 // Cleanup 878 Order.clear(); 879 Visited.clear(); 880 DeletedPhis.clear(); 881 AddedPhis.clear(); 882 Predicates.clear(); 883 Conditions.clear(); 884 Loops.clear(); 885 LoopPreds.clear(); 886 LoopConds.clear(); 887 888 return true; 889 } 890 891 /// \brief Create the pass 892 Pass *llvm::createAMDGPUStructurizeCFGPass() { 893 return new AMDGPUStructurizeCFG(); 894 } 895