1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //==-----------------------------------------------------------------------===// 9 10 #define DEBUGME 0 11 #define DEBUG_TYPE "structcfg" 12 13 #include "AMDGPUInstrInfo.h" 14 #include "AMDIL.h" 15 #include "AMDILUtilityFunctions.h" 16 #include "llvm/ADT/SCCIterator.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/DominatorInternals.h" 20 #include "llvm/Analysis/Dominators.h" 21 #include "llvm/CodeGen/MachineDominators.h" 22 #include "llvm/CodeGen/MachineDominators.h" 23 #include "llvm/CodeGen/MachineFunction.h" 24 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 25 #include "llvm/CodeGen/MachineFunctionPass.h" 26 #include "llvm/CodeGen/MachineFunctionPass.h" 27 #include "llvm/CodeGen/MachineInstrBuilder.h" 28 #include "llvm/CodeGen/MachineJumpTableInfo.h" 29 #include "llvm/CodeGen/MachineLoopInfo.h" 30 #include "llvm/CodeGen/MachineRegisterInfo.h" 31 #include "llvm/Target/TargetInstrInfo.h" 32 33 #define FirstNonDebugInstr(A) A->begin() 34 using namespace llvm; 35 36 // TODO: move-begin. 37 38 //===----------------------------------------------------------------------===// 39 // 40 // Statistics for CFGStructurizer. 41 // 42 //===----------------------------------------------------------------------===// 43 44 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 45 "matched"); 46 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 47 "matched"); 48 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " 49 "pattern matched"); 50 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " 51 "pattern matched"); 52 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " 53 "matched"); 54 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 55 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 56 57 //===----------------------------------------------------------------------===// 58 // 59 // Miscellaneous utility for CFGStructurizer. 60 // 61 //===----------------------------------------------------------------------===// 62 namespace llvmCFGStruct 63 { 64 #define SHOWNEWINSTR(i) \ 65 if (DEBUGME) errs() << "New instr: " << *i << "\n" 66 67 #define SHOWNEWBLK(b, msg) \ 68 if (DEBUGME) { \ 69 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 70 errs() << "\n"; \ 71 } 72 73 #define SHOWBLK_DETAIL(b, msg) \ 74 if (DEBUGME) { \ 75 if (b) { \ 76 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 77 b->print(errs()); \ 78 errs() << "\n"; \ 79 } \ 80 } 81 82 #define INVALIDSCCNUM -1 83 #define INVALIDREGNUM 0 84 85 template<class LoopinfoT> 86 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { 87 for (typename LoopinfoT::iterator iter = LoopInfo.begin(), 88 iterEnd = LoopInfo.end(); 89 iter != iterEnd; ++iter) { 90 (*iter)->print(OS, 0); 91 } 92 } 93 94 template<class NodeT> 95 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { 96 size_t sz = Src.size(); 97 for (size_t i = 0; i < sz/2; ++i) { 98 NodeT *t = Src[i]; 99 Src[i] = Src[sz - i - 1]; 100 Src[sz - i - 1] = t; 101 } 102 } 103 104 } //end namespace llvmCFGStruct 105 106 107 //===----------------------------------------------------------------------===// 108 // 109 // MachinePostDominatorTree 110 // 111 //===----------------------------------------------------------------------===// 112 113 namespace llvm { 114 115 /// PostDominatorTree Class - Concrete subclass of DominatorTree that is used 116 /// to compute the a post-dominator tree. 117 /// 118 struct MachinePostDominatorTree : public MachineFunctionPass { 119 static char ID; // Pass identification, replacement for typeid 120 DominatorTreeBase<MachineBasicBlock> *DT; 121 MachinePostDominatorTree() : MachineFunctionPass(ID) 122 { 123 DT = new DominatorTreeBase<MachineBasicBlock>(true); //true indicate 124 // postdominator 125 } 126 127 ~MachinePostDominatorTree(); 128 129 virtual bool runOnMachineFunction(MachineFunction &MF); 130 131 virtual void getAnalysisUsage(AnalysisUsage &AU) const { 132 AU.setPreservesAll(); 133 MachineFunctionPass::getAnalysisUsage(AU); 134 } 135 136 inline const std::vector<MachineBasicBlock *> &getRoots() const { 137 return DT->getRoots(); 138 } 139 140 inline MachineDomTreeNode *getRootNode() const { 141 return DT->getRootNode(); 142 } 143 144 inline MachineDomTreeNode *operator[](MachineBasicBlock *BB) const { 145 return DT->getNode(BB); 146 } 147 148 inline MachineDomTreeNode *getNode(MachineBasicBlock *BB) const { 149 return DT->getNode(BB); 150 } 151 152 inline bool dominates(MachineDomTreeNode *A, MachineDomTreeNode *B) const { 153 return DT->dominates(A, B); 154 } 155 156 inline bool dominates(MachineBasicBlock *A, MachineBasicBlock *B) const { 157 return DT->dominates(A, B); 158 } 159 160 inline bool 161 properlyDominates(const MachineDomTreeNode *A, MachineDomTreeNode *B) const { 162 return DT->properlyDominates(A, B); 163 } 164 165 inline bool 166 properlyDominates(MachineBasicBlock *A, MachineBasicBlock *B) const { 167 return DT->properlyDominates(A, B); 168 } 169 170 inline MachineBasicBlock * 171 findNearestCommonDominator(MachineBasicBlock *A, MachineBasicBlock *B) { 172 return DT->findNearestCommonDominator(A, B); 173 } 174 175 virtual void print(llvm::raw_ostream &OS, const Module *M = 0) const { 176 DT->print(OS); 177 } 178 }; 179 } //end of namespace llvm 180 181 char MachinePostDominatorTree::ID = 0; 182 static RegisterPass<MachinePostDominatorTree> 183 machinePostDominatorTreePass("machinepostdomtree", 184 "MachinePostDominator Tree Construction", 185 true, true); 186 187 //const PassInfo *const llvm::MachinePostDominatorsID 188 //= &machinePostDominatorTreePass; 189 190 bool MachinePostDominatorTree::runOnMachineFunction(MachineFunction &F) { 191 DT->recalculate(F); 192 //DEBUG(DT->dump()); 193 return false; 194 } 195 196 MachinePostDominatorTree::~MachinePostDominatorTree() { 197 delete DT; 198 } 199 200 //===----------------------------------------------------------------------===// 201 // 202 // supporting data structure for CFGStructurizer 203 // 204 //===----------------------------------------------------------------------===// 205 206 namespace llvmCFGStruct 207 { 208 template<class PassT> 209 struct CFGStructTraits { 210 }; 211 212 template <class InstrT> 213 class BlockInformation { 214 public: 215 bool isRetired; 216 int sccNum; 217 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr; 218 //Instructions defining the corresponding successor. 219 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} 220 }; 221 222 template <class BlockT, class InstrT, class RegiT> 223 class LandInformation { 224 public: 225 BlockT *landBlk; 226 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before 227 //WHILELOOP(thisloop) init before entering 228 //thisloop. 229 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after 230 //WHILELOOP(thisloop) init after entering 231 //thisloop. 232 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop 233 //land block, branch cond on this reg. 234 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break 235 //endif" after ENDLOOP(thisloop) break 236 //outerLoopOf(thisLoop). 237 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue 238 //endif" after ENDLOOP(thisloop) continue on 239 //outerLoopOf(thisLoop). 240 LandInformation() : landBlk(NULL) {} 241 }; 242 243 } //end of namespace llvmCFGStruct 244 245 //===----------------------------------------------------------------------===// 246 // 247 // CFGStructurizer 248 // 249 //===----------------------------------------------------------------------===// 250 251 namespace llvmCFGStruct 252 { 253 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock. 254 template<class PassT> 255 class CFGStructurizer 256 { 257 public: 258 typedef enum { 259 Not_SinglePath = 0, 260 SinglePath_InPath = 1, 261 SinglePath_NotInPath = 2 262 } PathToKind; 263 264 public: 265 typedef typename PassT::InstructionType InstrT; 266 typedef typename PassT::FunctionType FuncT; 267 typedef typename PassT::DominatortreeType DomTreeT; 268 typedef typename PassT::PostDominatortreeType PostDomTreeT; 269 typedef typename PassT::DomTreeNodeType DomTreeNodeT; 270 typedef typename PassT::LoopinfoType LoopInfoT; 271 272 typedef GraphTraits<FuncT *> FuncGTraits; 273 //typedef FuncGTraits::nodes_iterator BlockIterator; 274 typedef typename FuncT::iterator BlockIterator; 275 276 typedef typename FuncGTraits::NodeType BlockT; 277 typedef GraphTraits<BlockT *> BlockGTraits; 278 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits; 279 //typedef BlockGTraits::succ_iterator InstructionIterator; 280 typedef typename BlockT::iterator InstrIterator; 281 282 typedef CFGStructTraits<PassT> CFGTraits; 283 typedef BlockInformation<InstrT> BlockInfo; 284 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap; 285 286 typedef int RegiT; 287 typedef typename PassT::LoopType LoopT; 288 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo; 289 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap; 290 //landing info for loop break 291 typedef SmallVector<BlockT *, 32> BlockTSmallerVector; 292 293 public: 294 CFGStructurizer(); 295 ~CFGStructurizer(); 296 297 /// Perform the CFG structurization 298 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); 299 300 /// Perform the CFG preparation 301 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); 302 303 private: 304 void reversePredicateSetter(typename BlockT::iterator); 305 void orderBlocks(); 306 void printOrderedBlocks(llvm::raw_ostream &OS); 307 int patternMatch(BlockT *CurBlock); 308 int patternMatchGroup(BlockT *CurBlock); 309 310 int serialPatternMatch(BlockT *CurBlock); 311 int ifPatternMatch(BlockT *CurBlock); 312 int switchPatternMatch(BlockT *CurBlock); 313 int loopendPatternMatch(BlockT *CurBlock); 314 int loopPatternMatch(BlockT *CurBlock); 315 316 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); 317 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); 318 //int loopWithoutBreak(BlockT *); 319 320 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, 321 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); 322 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, 323 BlockT *ContBlock, LoopT *contLoop); 324 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); 325 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 326 BlockT *FalseBlock); 327 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, 328 BlockT *FalseBlock); 329 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 330 BlockT *FalseBlock, BlockT **LandBlockPtr); 331 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 332 BlockT *FalseBlock, BlockT *LandBlock, 333 bool Detail = false); 334 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, 335 bool AllowSideEntry = true); 336 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, 337 bool AllowSideEntry = true); 338 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); 339 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); 340 341 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, 342 BlockT *TrueBlock, BlockT *FalseBlock, 343 BlockT *LandBlock); 344 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); 345 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, 346 BlockT *ExitLandBlock, RegiT SetReg); 347 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, 348 RegiT SetReg); 349 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, 350 std::set<BlockT*> &ExitBlockSet, 351 BlockT *ExitLandBlk); 352 BlockT *addLoopEndbranchBlock(LoopT *LoopRep, 353 BlockTSmallerVector &ExitingBlocks, 354 BlockTSmallerVector &ExitBlocks); 355 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); 356 void removeUnconditionalBranch(BlockT *SrcBlock); 357 void removeRedundantConditionalBranch(BlockT *SrcBlock); 358 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks); 359 360 void removeSuccessor(BlockT *SrcBlock); 361 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); 362 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); 363 364 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, 365 InstrIterator InsertPos); 366 367 void recordSccnum(BlockT *SrcBlock, int SCCNum); 368 int getSCCNum(BlockT *srcBlk); 369 370 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); 371 bool isRetiredBlock(BlockT *SrcBlock); 372 bool isActiveLoophead(BlockT *CurBlock); 373 bool needMigrateBlock(BlockT *Block); 374 375 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, 376 BlockTSmallerVector &exitBlocks, 377 std::set<BlockT*> &ExitBlockSet); 378 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); 379 BlockT *getLoopLandBlock(LoopT *LoopRep); 380 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); 381 382 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); 383 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); 384 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); 385 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); 386 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); 387 388 bool hasBackEdge(BlockT *curBlock); 389 unsigned getLoopDepth (LoopT *LoopRep); 390 int countActiveBlock( 391 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart, 392 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd); 393 BlockT *findNearestCommonPostDom(std::set<BlockT *>&); 394 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); 395 396 private: 397 DomTreeT *domTree; 398 PostDomTreeT *postDomTree; 399 LoopInfoT *loopInfo; 400 PassT *passRep; 401 FuncT *funcRep; 402 403 BlockInfoMap blockInfoMap; 404 LoopLandInfoMap loopLandInfoMap; 405 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks; 406 const AMDGPURegisterInfo *TRI; 407 408 }; //template class CFGStructurizer 409 410 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer() 411 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { 412 } 413 414 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() { 415 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), 416 E = blockInfoMap.end(); I != E; ++I) { 417 delete I->second; 418 } 419 } 420 421 template<class PassT> 422 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass, 423 const AMDGPURegisterInfo * tri) { 424 passRep = &pass; 425 funcRep = &func; 426 TRI = tri; 427 428 bool changed = false; 429 //func.RenumberBlocks(); 430 431 //to do, if not reducible flow graph, make it so ??? 432 433 if (DEBUGME) { 434 errs() << "AMDGPUCFGStructurizer::prepare\n"; 435 //func.viewCFG(); 436 //func.viewCFGOnly(); 437 //func.dump(); 438 } 439 440 //FIXME: gcc complains on this. 441 //domTree = &pass.getAnalysis<DomTreeT>(); 442 //domTree = CFGTraits::getDominatorTree(pass); 443 //if (DEBUGME) { 444 // domTree->print(errs()); 445 //} 446 447 //FIXME: gcc complains on this. 448 //domTree = &pass.getAnalysis<DomTreeT>(); 449 //postDomTree = CFGTraits::getPostDominatorTree(pass); 450 //if (DEBUGME) { 451 // postDomTree->print(errs()); 452 //} 453 454 //FIXME: gcc complains on this. 455 //loopInfo = &pass.getAnalysis<LoopInfoT>(); 456 loopInfo = CFGTraits::getLoopInfo(pass); 457 if (DEBUGME) { 458 errs() << "LoopInfo:\n"; 459 PrintLoopinfo(*loopInfo, errs()); 460 } 461 462 orderBlocks(); 463 if (DEBUGME) { 464 errs() << "Ordered blocks:\n"; 465 printOrderedBlocks(errs()); 466 } 467 468 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks; 469 470 for (typename LoopInfoT::iterator iter = loopInfo->begin(), 471 iterEnd = loopInfo->end(); 472 iter != iterEnd; ++iter) { 473 LoopT* loopRep = (*iter); 474 BlockTSmallerVector exitingBlks; 475 loopRep->getExitingBlocks(exitingBlks); 476 477 if (exitingBlks.size() == 0) { 478 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); 479 if (dummyExitBlk != NULL) 480 retBlks.push_back(dummyExitBlk); 481 } 482 } 483 484 // Remove unconditional branch instr. 485 // Add dummy exit block iff there are multiple returns. 486 487 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 488 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); 489 iterBlk != iterEndBlk; 490 ++iterBlk) { 491 BlockT *curBlk = *iterBlk; 492 removeUnconditionalBranch(curBlk); 493 removeRedundantConditionalBranch(curBlk); 494 if (CFGTraits::isReturnBlock(curBlk)) { 495 retBlks.push_back(curBlk); 496 } 497 assert(curBlk->succ_size() <= 2); 498 //assert(curBlk->size() > 0); 499 //removeEmptyBlock(curBlk) ?? 500 } //for 501 502 if (retBlks.size() >= 2) { 503 addDummyExitBlock(retBlks); 504 changed = true; 505 } 506 507 return changed; 508 } //CFGStructurizer::prepare 509 510 template<class PassT> 511 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass, 512 const AMDGPURegisterInfo * tri) { 513 passRep = &pass; 514 funcRep = &func; 515 TRI = tri; 516 517 //func.RenumberBlocks(); 518 519 //Assume reducible CFG... 520 if (DEBUGME) { 521 errs() << "AMDGPUCFGStructurizer::run\n"; 522 //errs() << func.getFunction()->getNameStr() << "\n"; 523 func.viewCFG(); 524 //func.viewCFGOnly(); 525 //func.dump(); 526 } 527 528 #if 1 529 //FIXME: gcc complains on this. 530 //domTree = &pass.getAnalysis<DomTreeT>(); 531 domTree = CFGTraits::getDominatorTree(pass); 532 if (DEBUGME) { 533 domTree->print(errs(), (const llvm::Module*)0); 534 } 535 #endif 536 537 //FIXME: gcc complains on this. 538 //domTree = &pass.getAnalysis<DomTreeT>(); 539 postDomTree = CFGTraits::getPostDominatorTree(pass); 540 if (DEBUGME) { 541 postDomTree->print(errs()); 542 } 543 544 //FIXME: gcc complains on this. 545 //loopInfo = &pass.getAnalysis<LoopInfoT>(); 546 loopInfo = CFGTraits::getLoopInfo(pass); 547 if (DEBUGME) { 548 errs() << "LoopInfo:\n"; 549 PrintLoopinfo(*loopInfo, errs()); 550 } 551 552 orderBlocks(); 553 //#define STRESSTEST 554 #ifdef STRESSTEST 555 //Use the worse block ordering to test the algorithm. 556 ReverseVector(orderedBlks); 557 #endif 558 559 if (DEBUGME) { 560 errs() << "Ordered blocks:\n"; 561 printOrderedBlocks(errs()); 562 } 563 int numIter = 0; 564 bool finish = false; 565 BlockT *curBlk; 566 bool makeProgress = false; 567 int numRemainedBlk = countActiveBlock(orderedBlks.begin(), 568 orderedBlks.end()); 569 570 do { 571 ++numIter; 572 if (DEBUGME) { 573 errs() << "numIter = " << numIter 574 << ", numRemaintedBlk = " << numRemainedBlk << "\n"; 575 } 576 577 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 578 iterBlk = orderedBlks.begin(); 579 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 580 iterBlkEnd = orderedBlks.end(); 581 582 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 583 sccBeginIter = iterBlk; 584 BlockT *sccBeginBlk = NULL; 585 int sccNumBlk = 0; // The number of active blocks, init to a 586 // maximum possible number. 587 int sccNumIter; // Number of iteration in this SCC. 588 589 while (iterBlk != iterBlkEnd) { 590 curBlk = *iterBlk; 591 592 if (sccBeginBlk == NULL) { 593 sccBeginIter = iterBlk; 594 sccBeginBlk = curBlk; 595 sccNumIter = 0; 596 sccNumBlk = numRemainedBlk; // Init to maximum possible number. 597 if (DEBUGME) { 598 errs() << "start processing SCC" << getSCCNum(sccBeginBlk); 599 errs() << "\n"; 600 } 601 } 602 603 if (!isRetiredBlock(curBlk)) { 604 patternMatch(curBlk); 605 } 606 607 ++iterBlk; 608 609 bool contNextScc = true; 610 if (iterBlk == iterBlkEnd 611 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { 612 // Just finish one scc. 613 ++sccNumIter; 614 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); 615 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { 616 if (DEBUGME) { 617 errs() << "Can't reduce SCC " << getSCCNum(curBlk) 618 << ", sccNumIter = " << sccNumIter; 619 errs() << "doesn't make any progress\n"; 620 } 621 contNextScc = true; 622 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { 623 sccNumBlk = sccRemainedNumBlk; 624 iterBlk = sccBeginIter; 625 contNextScc = false; 626 if (DEBUGME) { 627 errs() << "repeat processing SCC" << getSCCNum(curBlk) 628 << "sccNumIter = " << sccNumIter << "\n"; 629 func.viewCFG(); 630 //func.viewCFGOnly(); 631 } 632 } else { 633 // Finish the current scc. 634 contNextScc = true; 635 } 636 } else { 637 // Continue on next component in the current scc. 638 contNextScc = false; 639 } 640 641 if (contNextScc) { 642 sccBeginBlk = NULL; 643 } 644 } //while, "one iteration" over the function. 645 646 BlockT *entryBlk = FuncGTraits::nodes_begin(&func); 647 if (entryBlk->succ_size() == 0) { 648 finish = true; 649 if (DEBUGME) { 650 errs() << "Reduce to one block\n"; 651 } 652 } else { 653 int newnumRemainedBlk 654 = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); 655 // consider cloned blocks ?? 656 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { 657 makeProgress = true; 658 numRemainedBlk = newnumRemainedBlk; 659 } else { 660 makeProgress = false; 661 if (DEBUGME) { 662 errs() << "No progress\n"; 663 } 664 } 665 } 666 } while (!finish && makeProgress); 667 668 // Misc wrap up to maintain the consistency of the Function representation. 669 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); 670 671 // Detach retired Block, release memory. 672 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), 673 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { 674 if ((*iterMap).second && (*iterMap).second->isRetired) { 675 assert(((*iterMap).first)->getNumber() != -1); 676 if (DEBUGME) { 677 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; 678 } 679 (*iterMap).first->eraseFromParent(); //Remove from the parent Function. 680 } 681 delete (*iterMap).second; 682 } 683 blockInfoMap.clear(); 684 685 // clear loopLandInfoMap 686 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), 687 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { 688 delete (*iterMap).second; 689 } 690 loopLandInfoMap.clear(); 691 692 if (DEBUGME) { 693 func.viewCFG(); 694 //func.dump(); 695 } 696 697 if (!finish) { 698 assert(!"IRREDUCIBL_CF"); 699 } 700 701 return true; 702 } //CFGStructurizer::run 703 704 /// Print the ordered Blocks. 705 /// 706 template<class PassT> 707 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) { 708 size_t i = 0; 709 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 710 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); 711 iterBlk != iterBlkEnd; 712 ++iterBlk, ++i) { 713 os << "BB" << (*iterBlk)->getNumber(); 714 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 715 if (i != 0 && i % 10 == 0) { 716 os << "\n"; 717 } else { 718 os << " "; 719 } 720 } 721 } //printOrderedBlocks 722 723 /// Compute the reversed DFS post order of Blocks 724 /// 725 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() { 726 int sccNum = 0; 727 BlockT *bb; 728 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep), 729 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { 730 std::vector<BlockT *> &sccNext = *sccIter; 731 for (typename std::vector<BlockT *>::const_iterator 732 blockIter = sccNext.begin(), blockEnd = sccNext.end(); 733 blockIter != blockEnd; ++blockIter) { 734 bb = *blockIter; 735 orderedBlks.push_back(bb); 736 recordSccnum(bb, sccNum); 737 } 738 } 739 740 //walk through all the block in func to check for unreachable 741 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), 742 blockEnd1 = FuncGTraits::nodes_end(funcRep); 743 blockIter1 != blockEnd1; ++blockIter1) { 744 BlockT *bb = &(*blockIter1); 745 sccNum = getSCCNum(bb); 746 if (sccNum == INVALIDSCCNUM) { 747 errs() << "unreachable block BB" << bb->getNumber() << "\n"; 748 } 749 } //end of for 750 } //orderBlocks 751 752 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) { 753 int numMatch = 0; 754 int curMatch; 755 756 if (DEBUGME) { 757 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; 758 } 759 760 while ((curMatch = patternMatchGroup(curBlk)) > 0) { 761 numMatch += curMatch; 762 } 763 764 if (DEBUGME) { 765 errs() << "End patternMatch BB" << curBlk->getNumber() 766 << ", numMatch = " << numMatch << "\n"; 767 } 768 769 return numMatch; 770 } //patternMatch 771 772 template<class PassT> 773 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) { 774 int numMatch = 0; 775 numMatch += serialPatternMatch(curBlk); 776 numMatch += ifPatternMatch(curBlk); 777 //numMatch += switchPatternMatch(curBlk); 778 numMatch += loopendPatternMatch(curBlk); 779 numMatch += loopPatternMatch(curBlk); 780 return numMatch; 781 }//patternMatchGroup 782 783 template<class PassT> 784 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) { 785 if (curBlk->succ_size() != 1) { 786 return 0; 787 } 788 789 BlockT *childBlk = *curBlk->succ_begin(); 790 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { 791 return 0; 792 } 793 794 mergeSerialBlock(curBlk, childBlk); 795 ++numSerialPatternMatch; 796 return 1; 797 } //serialPatternMatch 798 799 template<class PassT> 800 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) { 801 //two edges 802 if (curBlk->succ_size() != 2) { 803 return 0; 804 } 805 806 if (hasBackEdge(curBlk)) { 807 return 0; 808 } 809 810 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); 811 if (branchInstr == NULL) { 812 return 0; 813 } 814 815 assert(CFGTraits::isCondBranch(branchInstr)); 816 817 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); 818 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); 819 BlockT *landBlk; 820 int cloned = 0; 821 822 // TODO: Simplify 823 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 824 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { 825 landBlk = *trueBlk->succ_begin(); 826 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { 827 landBlk = NULL; 828 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { 829 landBlk = falseBlk; 830 falseBlk = NULL; 831 } else if (falseBlk->succ_size() == 1 832 && *falseBlk->succ_begin() == trueBlk) { 833 landBlk = trueBlk; 834 trueBlk = NULL; 835 } else if (falseBlk->succ_size() == 1 836 && isSameloopDetachedContbreak(trueBlk, falseBlk)) { 837 landBlk = *falseBlk->succ_begin(); 838 } else if (trueBlk->succ_size() == 1 839 && isSameloopDetachedContbreak(falseBlk, trueBlk)) { 840 landBlk = *trueBlk->succ_begin(); 841 } else { 842 return handleJumpintoIf(curBlk, trueBlk, falseBlk); 843 } 844 845 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 846 // new BB created for landBlk==NULL may introduce new challenge to the 847 // reduction process. 848 if (landBlk != NULL && 849 ((trueBlk && trueBlk->pred_size() > 1) 850 || (falseBlk && falseBlk->pred_size() > 1))) { 851 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); 852 } 853 854 if (trueBlk && trueBlk->pred_size() > 1) { 855 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); 856 ++cloned; 857 } 858 859 if (falseBlk && falseBlk->pred_size() > 1) { 860 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); 861 ++cloned; 862 } 863 864 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); 865 866 ++numIfPatternMatch; 867 868 numClonedBlock += cloned; 869 870 return 1 + cloned; 871 } //ifPatternMatch 872 873 template<class PassT> 874 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) { 875 return 0; 876 } //switchPatternMatch 877 878 template<class PassT> 879 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) { 880 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 881 typename std::vector<LoopT *> nestedLoops; 882 while (loopRep) { 883 nestedLoops.push_back(loopRep); 884 loopRep = loopRep->getParentLoop(); 885 } 886 887 if (nestedLoops.size() == 0) { 888 return 0; 889 } 890 891 // Process nested loop outside->inside, so "continue" to a outside loop won't 892 // be mistaken as "break" of the current loop. 893 int num = 0; 894 for (typename std::vector<LoopT *>::reverse_iterator 895 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); 896 iter != iterEnd; ++iter) { 897 loopRep = *iter; 898 899 if (getLoopLandBlock(loopRep) != NULL) { 900 continue; 901 } 902 903 BlockT *loopHeader = loopRep->getHeader(); 904 905 int numBreak = loopbreakPatternMatch(loopRep, loopHeader); 906 907 if (numBreak == -1) { 908 break; 909 } 910 911 int numCont = loopcontPatternMatch(loopRep, loopHeader); 912 num += numBreak + numCont; 913 } 914 915 return num; 916 } //loopendPatternMatch 917 918 template<class PassT> 919 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) { 920 if (curBlk->succ_size() != 0) { 921 return 0; 922 } 923 924 int numLoop = 0; 925 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 926 while (loopRep && loopRep->getHeader() == curBlk) { 927 LoopLandInfo *loopLand = getLoopLandInfo(loopRep); 928 if (loopLand) { 929 BlockT *landBlk = loopLand->landBlk; 930 assert(landBlk); 931 if (!isRetiredBlock(landBlk)) { 932 mergeLooplandBlock(curBlk, loopLand); 933 ++numLoop; 934 } 935 } 936 loopRep = loopRep->getParentLoop(); 937 } 938 939 numLoopPatternMatch += numLoop; 940 941 return numLoop; 942 } //loopPatternMatch 943 944 template<class PassT> 945 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep, 946 BlockT *loopHeader) { 947 BlockTSmallerVector exitingBlks; 948 loopRep->getExitingBlocks(exitingBlks); 949 950 if (DEBUGME) { 951 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; 952 } 953 954 if (exitingBlks.size() == 0) { 955 setLoopLandBlock(loopRep); 956 return 0; 957 } 958 959 // Compute the corresponding exitBlks and exit block set. 960 BlockTSmallerVector exitBlks; 961 std::set<BlockT *> exitBlkSet; 962 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), 963 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { 964 BlockT *exitingBlk = *iter; 965 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); 966 exitBlks.push_back(exitBlk); 967 exitBlkSet.insert(exitBlk); //non-duplicate insert 968 } 969 970 assert(exitBlkSet.size() > 0); 971 assert(exitBlks.size() == exitingBlks.size()); 972 973 if (DEBUGME) { 974 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; 975 } 976 977 // Find exitLandBlk. 978 BlockT *exitLandBlk = NULL; 979 int numCloned = 0; 980 int numSerial = 0; 981 982 if (exitBlkSet.size() == 1) 983 { 984 exitLandBlk = *exitBlkSet.begin(); 985 } else { 986 exitLandBlk = findNearestCommonPostDom(exitBlkSet); 987 988 if (exitLandBlk == NULL) { 989 return -1; 990 } 991 992 bool allInPath = true; 993 bool allNotInPath = true; 994 for (typename std::set<BlockT*>::const_iterator 995 iter = exitBlkSet.begin(), 996 iterEnd = exitBlkSet.end(); 997 iter != iterEnd; ++iter) { 998 BlockT *exitBlk = *iter; 999 1000 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); 1001 if (DEBUGME) { 1002 errs() << "BB" << exitBlk->getNumber() 1003 << " to BB" << exitLandBlk->getNumber() << " PathToKind=" 1004 << pathKind << "\n"; 1005 } 1006 1007 allInPath = allInPath && (pathKind == SinglePath_InPath); 1008 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); 1009 1010 if (!allInPath && !allNotInPath) { 1011 if (DEBUGME) { 1012 errs() << "singlePath check fail\n"; 1013 } 1014 return -1; 1015 } 1016 } // check all exit blocks 1017 1018 if (allNotInPath) { 1019 #if 1 1020 1021 // TODO: Simplify, maybe separate function? 1022 //funcRep->viewCFG(); 1023 LoopT *parentLoopRep = loopRep->getParentLoop(); 1024 BlockT *parentLoopHeader = NULL; 1025 if (parentLoopRep) 1026 parentLoopHeader = parentLoopRep->getHeader(); 1027 1028 if (exitLandBlk == parentLoopHeader && 1029 (exitLandBlk = relocateLoopcontBlock(parentLoopRep, 1030 loopRep, 1031 exitBlkSet, 1032 exitLandBlk)) != NULL) { 1033 if (DEBUGME) { 1034 errs() << "relocateLoopcontBlock success\n"; 1035 } 1036 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, 1037 exitingBlks, 1038 exitBlks)) != NULL) { 1039 if (DEBUGME) { 1040 errs() << "insertEndbranchBlock success\n"; 1041 } 1042 } else { 1043 if (DEBUGME) { 1044 errs() << "loop exit fail\n"; 1045 } 1046 return -1; 1047 } 1048 #else 1049 return -1; 1050 #endif 1051 } 1052 1053 // Handle side entry to exit path. 1054 exitBlks.clear(); 1055 exitBlkSet.clear(); 1056 for (typename BlockTSmallerVector::iterator iterExiting = 1057 exitingBlks.begin(), 1058 iterExitingEnd = exitingBlks.end(); 1059 iterExiting != iterExitingEnd; ++iterExiting) { 1060 BlockT *exitingBlk = *iterExiting; 1061 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); 1062 BlockT *newExitBlk = exitBlk; 1063 1064 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { 1065 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); 1066 ++numCloned; 1067 } 1068 1069 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); 1070 1071 exitBlks.push_back(newExitBlk); 1072 exitBlkSet.insert(newExitBlk); 1073 } 1074 1075 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), 1076 iterExitEnd = exitBlks.end(); 1077 iterExit != iterExitEnd; ++iterExit) { 1078 BlockT *exitBlk = *iterExit; 1079 numSerial += serialPatternMatch(exitBlk); 1080 } 1081 1082 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), 1083 iterExitEnd = exitBlks.end(); 1084 iterExit != iterExitEnd; ++iterExit) { 1085 BlockT *exitBlk = *iterExit; 1086 if (exitBlk->pred_size() > 1) { 1087 if (exitBlk != exitLandBlk) { 1088 return -1; 1089 } 1090 } else { 1091 if (exitBlk != exitLandBlk && 1092 (exitBlk->succ_size() != 1 || 1093 *exitBlk->succ_begin() != exitLandBlk)) { 1094 return -1; 1095 } 1096 } 1097 } 1098 } // else 1099 1100 // LoopT *exitLandLoop = loopInfo->getLoopFor(exitLandBlk); 1101 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); 1102 1103 // Fold break into the breaking block. Leverage across level breaks. 1104 assert(exitingBlks.size() == exitBlks.size()); 1105 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), 1106 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); 1107 iterExit != iterExitEnd; ++iterExit, ++iterExiting) { 1108 BlockT *exitBlk = *iterExit; 1109 BlockT *exitingBlk = *iterExiting; 1110 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); 1111 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); 1112 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); 1113 } 1114 1115 int numBreak = static_cast<int>(exitingBlks.size()); 1116 numLoopbreakPatternMatch += numBreak; 1117 numClonedBlock += numCloned; 1118 return numBreak + numSerial + numCloned; 1119 } //loopbreakPatternMatch 1120 1121 template<class PassT> 1122 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep, 1123 BlockT *loopHeader) { 1124 int numCont = 0; 1125 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk; 1126 for (typename InvBlockGTraits::ChildIteratorType iter = 1127 InvBlockGTraits::child_begin(loopHeader), 1128 iterEnd = InvBlockGTraits::child_end(loopHeader); 1129 iter != iterEnd; ++iter) { 1130 BlockT *curBlk = *iter; 1131 if (loopRep->contains(curBlk)) { 1132 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), 1133 loopHeader, loopRep); 1134 contBlk.push_back(curBlk); 1135 ++numCont; 1136 } 1137 } 1138 1139 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator 1140 iter = contBlk.begin(), iterEnd = contBlk.end(); 1141 iter != iterEnd; ++iter) { 1142 (*iter)->removeSuccessor(loopHeader); 1143 } 1144 1145 numLoopcontPatternMatch += numCont; 1146 1147 return numCont; 1148 } //loopcontPatternMatch 1149 1150 1151 template<class PassT> 1152 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk, 1153 BlockT *src2Blk) { 1154 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the 1155 // same loop with LoopLandInfo without explicitly keeping track of 1156 // loopContBlks and loopBreakBlks, this is a method to get the information. 1157 // 1158 if (src1Blk->succ_size() == 0) { 1159 LoopT *loopRep = loopInfo->getLoopFor(src1Blk); 1160 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { 1161 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 1162 if (theEntry != NULL) { 1163 if (DEBUGME) { 1164 errs() << "isLoopContBreakBlock yes src1 = BB" 1165 << src1Blk->getNumber() 1166 << " src2 = BB" << src2Blk->getNumber() << "\n"; 1167 } 1168 return true; 1169 } 1170 } 1171 } 1172 return false; 1173 } //isSameloopDetachedContbreak 1174 1175 template<class PassT> 1176 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk, 1177 BlockT *trueBlk, 1178 BlockT *falseBlk) { 1179 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); 1180 if (num == 0) { 1181 if (DEBUGME) { 1182 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 1183 } 1184 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); 1185 } 1186 return num; 1187 } 1188 1189 template<class PassT> 1190 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk, 1191 BlockT *trueBlk, 1192 BlockT *falseBlk) { 1193 int num = 0; 1194 BlockT *downBlk; 1195 1196 //trueBlk could be the common post dominator 1197 downBlk = trueBlk; 1198 1199 if (DEBUGME) { 1200 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() 1201 << " true = BB" << trueBlk->getNumber() 1202 << ", numSucc=" << trueBlk->succ_size() 1203 << " false = BB" << falseBlk->getNumber() << "\n"; 1204 } 1205 1206 while (downBlk) { 1207 if (DEBUGME) { 1208 errs() << "check down = BB" << downBlk->getNumber(); 1209 } 1210 1211 if (//postDomTree->dominates(downBlk, falseBlk) && 1212 singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { 1213 if (DEBUGME) { 1214 errs() << " working\n"; 1215 } 1216 1217 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); 1218 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); 1219 1220 numClonedBlock += num; 1221 num += serialPatternMatch(*headBlk->succ_begin()); 1222 num += serialPatternMatch(*(++headBlk->succ_begin())); 1223 num += ifPatternMatch(headBlk); 1224 assert(num > 0); // 1225 1226 break; 1227 } 1228 if (DEBUGME) { 1229 errs() << " not working\n"; 1230 } 1231 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; 1232 } // walk down the postDomTree 1233 1234 return num; 1235 } //handleJumpintoIf 1236 1237 template<class PassT> 1238 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk, 1239 BlockT *trueBlk, 1240 BlockT *falseBlk, 1241 BlockT *landBlk, 1242 bool detail) { 1243 errs() << "head = BB" << headBlk->getNumber() 1244 << " size = " << headBlk->size(); 1245 if (detail) { 1246 errs() << "\n"; 1247 headBlk->print(errs()); 1248 errs() << "\n"; 1249 } 1250 1251 if (trueBlk) { 1252 errs() << ", true = BB" << trueBlk->getNumber() << " size = " 1253 << trueBlk->size() << " numPred = " << trueBlk->pred_size(); 1254 if (detail) { 1255 errs() << "\n"; 1256 trueBlk->print(errs()); 1257 errs() << "\n"; 1258 } 1259 } 1260 if (falseBlk) { 1261 errs() << ", false = BB" << falseBlk->getNumber() << " size = " 1262 << falseBlk->size() << " numPred = " << falseBlk->pred_size(); 1263 if (detail) { 1264 errs() << "\n"; 1265 falseBlk->print(errs()); 1266 errs() << "\n"; 1267 } 1268 } 1269 if (landBlk) { 1270 errs() << ", land = BB" << landBlk->getNumber() << " size = " 1271 << landBlk->size() << " numPred = " << landBlk->pred_size(); 1272 if (detail) { 1273 errs() << "\n"; 1274 landBlk->print(errs()); 1275 errs() << "\n"; 1276 } 1277 } 1278 1279 errs() << "\n"; 1280 } //showImproveSimpleJumpintoIf 1281 1282 template<class PassT> 1283 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk, 1284 BlockT *trueBlk, 1285 BlockT *falseBlk, 1286 BlockT **plandBlk) { 1287 bool migrateTrue = false; 1288 bool migrateFalse = false; 1289 1290 BlockT *landBlk = *plandBlk; 1291 1292 assert((trueBlk == NULL || trueBlk->succ_size() <= 1) 1293 && (falseBlk == NULL || falseBlk->succ_size() <= 1)); 1294 1295 if (trueBlk == falseBlk) { 1296 return 0; 1297 } 1298 1299 #if 0 1300 if (DEBUGME) { 1301 errs() << "improveSimpleJumpintoIf: "; 1302 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1303 } 1304 #endif 1305 1306 // unsigned landPredSize = landBlk ? landBlk->pred_size() : 0; 1307 // May consider the # landBlk->pred_size() as it represents the number of 1308 // assignment initReg = .. needed to insert. 1309 migrateTrue = needMigrateBlock(trueBlk); 1310 migrateFalse = needMigrateBlock(falseBlk); 1311 1312 if (!migrateTrue && !migrateFalse) { 1313 return 0; 1314 } 1315 1316 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1317 // have more than one predecessors. without doing this, its predecessor 1318 // rather than headBlk will have undefined value in initReg. 1319 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { 1320 migrateTrue = true; 1321 } 1322 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { 1323 migrateFalse = true; 1324 } 1325 1326 if (DEBUGME) { 1327 errs() << "before improveSimpleJumpintoIf: "; 1328 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1329 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); 1330 } 1331 1332 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1333 // 1334 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1335 // {initReg = 0; org falseBlk branch } 1336 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1337 // => org landBlk 1338 // if landBlk->pred_size() > 2, put the about if-else inside 1339 // if (initReg !=2) {...} 1340 // 1341 // add initReg = initVal to headBlk 1342 1343 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1344 unsigned initReg = 1345 funcRep->getRegInfo().createVirtualRegister(I32RC); 1346 if (!migrateTrue || !migrateFalse) { 1347 int initVal = migrateTrue ? 0 : 1; 1348 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); 1349 } 1350 1351 int numNewBlk = 0; 1352 1353 if (landBlk == NULL) { 1354 landBlk = funcRep->CreateMachineBasicBlock(); 1355 funcRep->push_back(landBlk); //insert to function 1356 1357 if (trueBlk) { 1358 trueBlk->addSuccessor(landBlk); 1359 } else { 1360 headBlk->addSuccessor(landBlk); 1361 } 1362 1363 if (falseBlk) { 1364 falseBlk->addSuccessor(landBlk); 1365 } else { 1366 headBlk->addSuccessor(landBlk); 1367 } 1368 1369 numNewBlk ++; 1370 } 1371 1372 bool landBlkHasOtherPred = (landBlk->pred_size() > 2); 1373 1374 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 1375 typename BlockT::iterator insertPos = 1376 CFGTraits::getInstrPos 1377 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); 1378 1379 if (landBlkHasOtherPred) { 1380 unsigned immReg = 1381 funcRep->getRegInfo().createVirtualRegister(I32RC); 1382 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); 1383 unsigned cmpResReg = 1384 funcRep->getRegInfo().createVirtualRegister(I32RC); 1385 1386 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, 1387 initReg, immReg); 1388 CFGTraits::insertCondBranchBefore(landBlk, insertPos, 1389 AMDGPU::IF_LOGICALZ_i32, passRep, 1390 cmpResReg, DebugLoc()); 1391 } 1392 1393 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_LOGICALNZ_i32, 1394 passRep, initReg, DebugLoc()); 1395 1396 if (migrateTrue) { 1397 migrateInstruction(trueBlk, landBlk, insertPos); 1398 // need to uncondionally insert the assignment to ensure a path from its 1399 // predecessor rather than headBlk has valid value in initReg if 1400 // (initVal != 1). 1401 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); 1402 } 1403 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); 1404 1405 if (migrateFalse) { 1406 migrateInstruction(falseBlk, landBlk, insertPos); 1407 // need to uncondionally insert the assignment to ensure a path from its 1408 // predecessor rather than headBlk has valid value in initReg if 1409 // (initVal != 0) 1410 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); 1411 } 1412 //CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); 1413 1414 if (landBlkHasOtherPred) { 1415 // add endif 1416 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); 1417 1418 // put initReg = 2 to other predecessors of landBlk 1419 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), 1420 predIterEnd = landBlk->pred_end(); predIter != predIterEnd; 1421 ++predIter) { 1422 BlockT *curBlk = *predIter; 1423 if (curBlk != trueBlk && curBlk != falseBlk) { 1424 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); 1425 } 1426 } //for 1427 } 1428 if (DEBUGME) { 1429 errs() << "result from improveSimpleJumpintoIf: "; 1430 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1431 //showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 1); 1432 } 1433 1434 // update landBlk 1435 *plandBlk = landBlk; 1436 1437 return numNewBlk; 1438 } //improveSimpleJumpintoIf 1439 1440 template<class PassT> 1441 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk, 1442 LoopT *exitingLoop, 1443 BlockT *exitBlk, 1444 LoopT *exitLoop, 1445 BlockT *landBlk) { 1446 if (DEBUGME) { 1447 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) 1448 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; 1449 } 1450 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1451 1452 RegiT initReg = INVALIDREGNUM; 1453 if (exitingLoop != exitLoop) { 1454 initReg = static_cast<int> 1455 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1456 assert(initReg != INVALIDREGNUM); 1457 addLoopBreakInitReg(exitLoop, initReg); 1458 while (exitingLoop != exitLoop && exitingLoop) { 1459 addLoopBreakOnReg(exitingLoop, initReg); 1460 exitingLoop = exitingLoop->getParentLoop(); 1461 } 1462 assert(exitingLoop == exitLoop); 1463 } 1464 1465 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); 1466 1467 } //handleLoopbreak 1468 1469 template<class PassT> 1470 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk, 1471 LoopT *contingLoop, 1472 BlockT *contBlk, 1473 LoopT *contLoop) { 1474 if (DEBUGME) { 1475 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() 1476 << " header = BB" << contBlk->getNumber() << "\n"; 1477 1478 errs() << "Trying to continue loop-depth = " 1479 << getLoopDepth(contLoop) 1480 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; 1481 } 1482 1483 RegiT initReg = INVALIDREGNUM; 1484 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1485 if (contingLoop != contLoop) { 1486 initReg = static_cast<int> 1487 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1488 assert(initReg != INVALIDREGNUM); 1489 addLoopContInitReg(contLoop, initReg); 1490 while (contingLoop && contingLoop->getParentLoop() != contLoop) { 1491 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg 1492 contingLoop = contingLoop->getParentLoop(); 1493 } 1494 assert(contingLoop && contingLoop->getParentLoop() == contLoop); 1495 addLoopContOnReg(contingLoop, initReg); 1496 } 1497 1498 settleLoopcontBlock(contingBlk, contBlk, initReg); 1499 //contingBlk->removeSuccessor(loopHeader); 1500 } //handleLoopcontBlock 1501 1502 template<class PassT> 1503 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { 1504 if (DEBUGME) { 1505 errs() << "serialPattern BB" << dstBlk->getNumber() 1506 << " <= BB" << srcBlk->getNumber() << "\n"; 1507 } 1508 //removeUnconditionalBranch(dstBlk); 1509 dstBlk->splice(dstBlk->end(), srcBlk, FirstNonDebugInstr(srcBlk), srcBlk->end()); 1510 1511 dstBlk->removeSuccessor(srcBlk); 1512 CFGTraits::cloneSuccessorList(dstBlk, srcBlk); 1513 1514 removeSuccessor(srcBlk); 1515 retireBlock(dstBlk, srcBlk); 1516 } //mergeSerialBlock 1517 1518 template<class PassT> 1519 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr, 1520 BlockT *curBlk, 1521 BlockT *trueBlk, 1522 BlockT *falseBlk, 1523 BlockT *landBlk) { 1524 if (DEBUGME) { 1525 errs() << "ifPattern BB" << curBlk->getNumber(); 1526 errs() << "{ "; 1527 if (trueBlk) { 1528 errs() << "BB" << trueBlk->getNumber(); 1529 } 1530 errs() << " } else "; 1531 errs() << "{ "; 1532 if (falseBlk) { 1533 errs() << "BB" << falseBlk->getNumber(); 1534 } 1535 errs() << " }\n "; 1536 errs() << "landBlock: "; 1537 if (landBlk == NULL) { 1538 errs() << "NULL"; 1539 } else { 1540 errs() << "BB" << landBlk->getNumber(); 1541 } 1542 errs() << "\n"; 1543 } 1544 1545 int oldOpcode = branchInstr->getOpcode(); 1546 DebugLoc branchDL = branchInstr->getDebugLoc(); 1547 1548 // transform to 1549 // if cond 1550 // trueBlk 1551 // else 1552 // falseBlk 1553 // endif 1554 // landBlk 1555 1556 typename BlockT::iterator branchInstrPos = 1557 CFGTraits::getInstrPos(curBlk, branchInstr); 1558 CFGTraits::insertCondBranchBefore(branchInstrPos, 1559 CFGTraits::getBranchNzeroOpcode(oldOpcode), 1560 passRep, 1561 branchDL); 1562 1563 if (trueBlk) { 1564 curBlk->splice(branchInstrPos, trueBlk, FirstNonDebugInstr(trueBlk), trueBlk->end()); 1565 curBlk->removeSuccessor(trueBlk); 1566 if (landBlk && trueBlk->succ_size()!=0) { 1567 trueBlk->removeSuccessor(landBlk); 1568 } 1569 retireBlock(curBlk, trueBlk); 1570 } 1571 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); 1572 1573 if (falseBlk) { 1574 curBlk->splice(branchInstrPos, falseBlk, FirstNonDebugInstr(falseBlk), 1575 falseBlk->end()); 1576 curBlk->removeSuccessor(falseBlk); 1577 if (landBlk && falseBlk->succ_size() != 0) { 1578 falseBlk->removeSuccessor(landBlk); 1579 } 1580 retireBlock(curBlk, falseBlk); 1581 } 1582 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); 1583 1584 //curBlk->remove(branchInstrPos); 1585 branchInstr->eraseFromParent(); 1586 1587 if (landBlk && trueBlk && falseBlk) { 1588 curBlk->addSuccessor(landBlk); 1589 } 1590 1591 } //mergeIfthenelseBlock 1592 1593 template<class PassT> 1594 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk, 1595 LoopLandInfo *loopLand) { 1596 BlockT *landBlk = loopLand->landBlk; 1597 1598 if (DEBUGME) { 1599 errs() << "loopPattern header = BB" << dstBlk->getNumber() 1600 << " land = BB" << landBlk->getNumber() << "\n"; 1601 } 1602 1603 // Loop contInitRegs are init at the beginning of the loop. 1604 for (typename std::set<RegiT>::const_iterator iter = 1605 loopLand->contInitRegs.begin(), 1606 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { 1607 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1608 } 1609 1610 /* we last inserterd the DebugLoc in the 1611 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. 1612 * search for the DebugLoc in the that statement. 1613 * if not found, we have to insert the empty/default DebugLoc */ 1614 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); 1615 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); 1616 1617 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); 1618 // Loop breakInitRegs are init before entering the loop. 1619 for (typename std::set<RegiT>::const_iterator iter = 1620 loopLand->breakInitRegs.begin(), 1621 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) 1622 { 1623 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1624 } 1625 // Loop endbranchInitRegs are init before entering the loop. 1626 for (typename std::set<RegiT>::const_iterator iter = 1627 loopLand->endbranchInitRegs.begin(), 1628 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { 1629 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1630 } 1631 1632 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk 1633 * search for the DebugLoc in the continue statement. 1634 * if not found, we have to insert the empty/default DebugLoc */ 1635 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); 1636 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); 1637 1638 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); 1639 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this 1640 // loop. 1641 for (typename std::set<RegiT>::const_iterator iter = 1642 loopLand->breakOnRegs.begin(), 1643 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { 1644 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::BREAK_LOGICALNZ_i32, passRep, 1645 *iter); 1646 } 1647 1648 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this 1649 // loop. 1650 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(), 1651 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { 1652 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, 1653 passRep, *iter); 1654 } 1655 1656 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); 1657 1658 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), 1659 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { 1660 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. 1661 } 1662 1663 removeSuccessor(landBlk); 1664 retireBlock(dstBlk, landBlk); 1665 } //mergeLooplandBlock 1666 1667 template<class PassT> 1668 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) 1669 { 1670 while (I--) { 1671 if (I->getOpcode() == AMDGPU::PRED_X) { 1672 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) { 1673 case OPCODE_IS_ZERO_INT: 1674 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); 1675 return; 1676 case OPCODE_IS_NOT_ZERO_INT: 1677 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); 1678 return; 1679 case OPCODE_IS_ZERO: 1680 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); 1681 return; 1682 case OPCODE_IS_NOT_ZERO: 1683 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO); 1684 return; 1685 default: 1686 assert(0 && "PRED_X Opcode invalid!"); 1687 } 1688 } 1689 } 1690 } 1691 1692 template<class PassT> 1693 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk, 1694 BlockT *exitBlk, 1695 BlockT *exitLandBlk, 1696 RegiT setReg) { 1697 if (DEBUGME) { 1698 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() 1699 << " exit = BB" << exitBlk->getNumber() 1700 << " land = BB" << exitLandBlk->getNumber() << "\n"; 1701 } 1702 1703 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); 1704 assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); 1705 1706 DebugLoc DL = branchInstr->getDebugLoc(); 1707 1708 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); 1709 int oldOpcode = branchInstr->getOpcode(); 1710 1711 // transform exitingBlk to 1712 // if ( ) { 1713 // exitBlk (if exitBlk != exitLandBlk) 1714 // setReg = 1 1715 // break 1716 // }endif 1717 // successor = {orgSuccessor(exitingBlk) - exitBlk} 1718 1719 typename BlockT::iterator branchInstrPos = 1720 CFGTraits::getInstrPos(exitingBlk, branchInstr); 1721 1722 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { 1723 //break_logical 1724 1725 if (trueBranch != exitBlk) { 1726 reversePredicateSetter(branchInstrPos); 1727 } 1728 int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); 1729 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); 1730 } else { 1731 if (trueBranch != exitBlk) { 1732 reversePredicateSetter(branchInstr); 1733 } 1734 int newOpcode = CFGTraits::getBreakZeroOpcode(oldOpcode); 1735 CFGTraits::insertCondBranchBefore(branchInstrPos, newOpcode, passRep, DL); 1736 if (exitBlk != exitLandBlk) { 1737 //splice is insert-before ... 1738 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), 1739 exitBlk->end()); 1740 } 1741 if (setReg != INVALIDREGNUM) { 1742 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); 1743 } 1744 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); 1745 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); 1746 } //if_logical 1747 1748 //now branchInst can be erase safely 1749 //exitingBlk->eraseFromParent(branchInstr); 1750 branchInstr->eraseFromParent(); 1751 1752 //now take care of successors, retire blocks 1753 exitingBlk->removeSuccessor(exitBlk); 1754 if (exitBlk != exitLandBlk) { 1755 //splice is insert-before ... 1756 exitBlk->removeSuccessor(exitLandBlk); 1757 retireBlock(exitingBlk, exitBlk); 1758 } 1759 1760 } //mergeLoopbreakBlock 1761 1762 template<class PassT> 1763 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk, 1764 BlockT *contBlk, 1765 RegiT setReg) { 1766 if (DEBUGME) { 1767 errs() << "settleLoopcontBlock conting = BB" 1768 << contingBlk->getNumber() 1769 << ", cont = BB" << contBlk->getNumber() << "\n"; 1770 } 1771 1772 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); 1773 if (branchInstr) { 1774 assert(CFGTraits::isCondBranch(branchInstr)); 1775 typename BlockT::iterator branchInstrPos = 1776 CFGTraits::getInstrPos(contingBlk, branchInstr); 1777 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); 1778 int oldOpcode = branchInstr->getOpcode(); 1779 DebugLoc DL = branchInstr->getDebugLoc(); 1780 1781 // transform contingBlk to 1782 // if () { 1783 // move instr after branchInstr 1784 // continue 1785 // or 1786 // setReg = 1 1787 // break 1788 // }endif 1789 // successor = {orgSuccessor(contingBlk) - loopHeader} 1790 1791 bool useContinueLogical = 1792 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); 1793 1794 if (useContinueLogical == false) 1795 { 1796 int branchOpcode = 1797 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) 1798 : CFGTraits::getBranchZeroOpcode(oldOpcode); 1799 1800 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); 1801 1802 if (setReg != INVALIDREGNUM) { 1803 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); 1804 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1805 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); 1806 } else { 1807 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1808 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); 1809 } 1810 1811 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); 1812 } else { 1813 int branchOpcode = 1814 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) 1815 : CFGTraits::getContinueZeroOpcode(oldOpcode); 1816 1817 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); 1818 } 1819 1820 //contingBlk->eraseFromParent(branchInstr); 1821 branchInstr->eraseFromParent(); 1822 } else { 1823 /* if we've arrived here then we've already erased the branch instruction 1824 * travel back up the basic block to see the last reference of our debug location 1825 * we've just inserted that reference here so it should be representative */ 1826 if (setReg != INVALIDREGNUM) { 1827 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); 1828 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1829 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); 1830 } else { 1831 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1832 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); 1833 } 1834 } //else 1835 1836 } //settleLoopcontBlock 1837 1838 // BBs in exitBlkSet are determined as in break-path for loopRep, 1839 // before we can put code for BBs as inside loop-body for loopRep 1840 // check whether those BBs are determined as cont-BB for parentLoopRep 1841 // earlier. 1842 // If so, generate a new BB newBlk 1843 // (1) set newBlk common successor of BBs in exitBlkSet 1844 // (2) change the continue-instr in BBs in exitBlkSet to break-instr 1845 // (3) generate continue-instr in newBlk 1846 // 1847 template<class PassT> 1848 typename CFGStructurizer<PassT>::BlockT * 1849 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep, 1850 LoopT *loopRep, 1851 std::set<BlockT *> &exitBlkSet, 1852 BlockT *exitLandBlk) { 1853 std::set<BlockT *> endBlkSet; 1854 1855 // BlockT *parentLoopHead = parentLoopRep->getHeader(); 1856 1857 1858 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(), 1859 iterEnd = exitBlkSet.end(); 1860 iter != iterEnd; ++iter) { 1861 BlockT *exitBlk = *iter; 1862 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); 1863 1864 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) 1865 return NULL; 1866 1867 endBlkSet.insert(endBlk); 1868 } 1869 1870 BlockT *newBlk = funcRep->CreateMachineBasicBlock(); 1871 funcRep->push_back(newBlk); //insert to function 1872 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); 1873 SHOWNEWBLK(newBlk, "New continue block: "); 1874 1875 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(), 1876 iterEnd = endBlkSet.end(); 1877 iter != iterEnd; ++iter) { 1878 BlockT *endBlk = *iter; 1879 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); 1880 if (contInstr) { 1881 contInstr->eraseFromParent(); 1882 } 1883 endBlk->addSuccessor(newBlk); 1884 if (DEBUGME) { 1885 errs() << "Add new continue Block to BB" 1886 << endBlk->getNumber() << " successors\n"; 1887 } 1888 } 1889 1890 return newBlk; 1891 } //relocateLoopcontBlock 1892 1893 1894 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as 1895 // LoopLandBlock. This BB branch on the loop endBranchInit register to the 1896 // pathes corresponding to the loop exiting branches. 1897 1898 template<class PassT> 1899 typename CFGStructurizer<PassT>::BlockT * 1900 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep, 1901 BlockTSmallerVector &exitingBlks, 1902 BlockTSmallerVector &exitBlks) { 1903 const AMDGPUInstrInfo *tii = 1904 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 1905 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1906 1907 RegiT endBranchReg = static_cast<int> 1908 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1909 assert(endBranchReg >= 0); 1910 1911 // reg = 0 before entering the loop 1912 addLoopEndbranchInitReg(loopRep, endBranchReg); 1913 1914 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size()); 1915 assert(numBlks >=2 && numBlks == exitBlks.size()); 1916 1917 BlockT *preExitingBlk = exitingBlks[0]; 1918 BlockT *preExitBlk = exitBlks[0]; 1919 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); 1920 funcRep->push_back(preBranchBlk); //insert to function 1921 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); 1922 1923 BlockT *newLandBlk = preBranchBlk; 1924 1925 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, 1926 newLandBlk); 1927 preExitingBlk->removeSuccessor(preExitBlk); 1928 preExitingBlk->addSuccessor(newLandBlk); 1929 1930 //it is redundant to add reg = 0 to exitingBlks[0] 1931 1932 // For 1..n th exiting path (the last iteration handles two pathes) create the 1933 // branch to the previous path and the current path. 1934 for (uint32_t i = 1; i < numBlks; ++i) { 1935 BlockT *curExitingBlk = exitingBlks[i]; 1936 BlockT *curExitBlk = exitBlks[i]; 1937 BlockT *curBranchBlk; 1938 1939 if (i == numBlks - 1) { 1940 curBranchBlk = curExitBlk; 1941 } else { 1942 curBranchBlk = funcRep->CreateMachineBasicBlock(); 1943 funcRep->push_back(curBranchBlk); //insert to function 1944 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); 1945 } 1946 1947 // Add reg = i to exitingBlks[i]. 1948 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, 1949 endBranchReg, i); 1950 1951 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge 1952 // (exitingBlks[i], newLandBlk). 1953 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, 1954 newLandBlk); 1955 curExitingBlk->removeSuccessor(curExitBlk); 1956 curExitingBlk->addSuccessor(newLandBlk); 1957 1958 // add to preBranchBlk the branch instruction: 1959 // if (endBranchReg == preVal) 1960 // preExitBlk 1961 // else 1962 // curBranchBlk 1963 // 1964 // preValReg = i - 1 1965 1966 DebugLoc DL; 1967 RegiT preValReg = static_cast<int> 1968 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1969 1970 preBranchBlk->insert(preBranchBlk->begin(), 1971 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, 1972 i - 1)); 1973 1974 // condResReg = (endBranchReg == preValReg) 1975 RegiT condResReg = static_cast<int> 1976 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1977 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) 1978 .addReg(endBranchReg).addReg(preValReg); 1979 1980 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) 1981 .addMBB(preExitBlk).addReg(condResReg); 1982 1983 preBranchBlk->addSuccessor(preExitBlk); 1984 preBranchBlk->addSuccessor(curBranchBlk); 1985 1986 // Update preExitingBlk, preExitBlk, preBranchBlk. 1987 preExitingBlk = curExitingBlk; 1988 preExitBlk = curExitBlk; 1989 preBranchBlk = curBranchBlk; 1990 1991 } //end for 1 .. n blocks 1992 1993 return newLandBlk; 1994 } //addLoopEndbranchBlock 1995 1996 template<class PassT> 1997 typename CFGStructurizer<PassT>::PathToKind 1998 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, 1999 bool allowSideEntry) { 2000 assert(dstBlk); 2001 2002 if (srcBlk == dstBlk) { 2003 return SinglePath_InPath; 2004 } 2005 2006 while (srcBlk && srcBlk->succ_size() == 1) { 2007 srcBlk = *srcBlk->succ_begin(); 2008 if (srcBlk == dstBlk) { 2009 return SinglePath_InPath; 2010 } 2011 2012 if (!allowSideEntry && srcBlk->pred_size() > 1) { 2013 return Not_SinglePath; 2014 } 2015 } 2016 2017 if (srcBlk && srcBlk->succ_size()==0) { 2018 return SinglePath_NotInPath; 2019 } 2020 2021 return Not_SinglePath; 2022 } //singlePathTo 2023 2024 // If there is a single path from srcBlk to dstBlk, return the last block before 2025 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the 2026 // last block in the path Otherwise, return NULL 2027 template<class PassT> 2028 typename CFGStructurizer<PassT>::BlockT * 2029 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, 2030 bool allowSideEntry) { 2031 assert(dstBlk); 2032 2033 if (srcBlk == dstBlk) { 2034 return srcBlk; 2035 } 2036 2037 if (srcBlk->succ_size() == 0) { 2038 return srcBlk; 2039 } 2040 2041 while (srcBlk && srcBlk->succ_size() == 1) { 2042 BlockT *preBlk = srcBlk; 2043 2044 srcBlk = *srcBlk->succ_begin(); 2045 if (srcBlk == NULL) { 2046 return preBlk; 2047 } 2048 2049 if (!allowSideEntry && srcBlk->pred_size() > 1) { 2050 return NULL; 2051 } 2052 } 2053 2054 if (srcBlk && srcBlk->succ_size()==0) { 2055 return srcBlk; 2056 } 2057 2058 return NULL; 2059 2060 } //singlePathEnd 2061 2062 template<class PassT> 2063 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, 2064 BlockT *dstBlk) { 2065 int cloned = 0; 2066 assert(preBlk->isSuccessor(srcBlk)); 2067 while (srcBlk && srcBlk != dstBlk) { 2068 assert(srcBlk->succ_size() == 1); 2069 if (srcBlk->pred_size() > 1) { 2070 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); 2071 ++cloned; 2072 } 2073 2074 preBlk = srcBlk; 2075 srcBlk = *srcBlk->succ_begin(); 2076 } 2077 2078 return cloned; 2079 } //cloneOnSideEntryTo 2080 2081 template<class PassT> 2082 typename CFGStructurizer<PassT>::BlockT * 2083 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk, 2084 BlockT *predBlk) { 2085 assert(predBlk->isSuccessor(curBlk) && 2086 "succBlk is not a prececessor of curBlk"); 2087 2088 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions 2089 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); 2090 //srcBlk, oldBlk, newBlk 2091 2092 predBlk->removeSuccessor(curBlk); 2093 predBlk->addSuccessor(cloneBlk); 2094 2095 // add all successor to cloneBlk 2096 CFGTraits::cloneSuccessorList(cloneBlk, curBlk); 2097 2098 numClonedInstr += curBlk->size(); 2099 2100 if (DEBUGME) { 2101 errs() << "Cloned block: " << "BB" 2102 << curBlk->getNumber() << "size " << curBlk->size() << "\n"; 2103 } 2104 2105 SHOWNEWBLK(cloneBlk, "result of Cloned block: "); 2106 2107 return cloneBlk; 2108 } //cloneBlockForPredecessor 2109 2110 template<class PassT> 2111 typename CFGStructurizer<PassT>::BlockT * 2112 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep, 2113 BlockT *exitingBlk) { 2114 BlockT *exitBlk = NULL; 2115 2116 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), 2117 iterSuccEnd = exitingBlk->succ_end(); 2118 iterSucc != iterSuccEnd; ++iterSucc) { 2119 BlockT *curBlk = *iterSucc; 2120 if (!loopRep->contains(curBlk)) { 2121 assert(exitBlk == NULL); 2122 exitBlk = curBlk; 2123 } 2124 } 2125 2126 assert(exitBlk != NULL); 2127 2128 return exitBlk; 2129 } //exitingBlock2ExitBlock 2130 2131 template<class PassT> 2132 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk, 2133 BlockT *dstBlk, 2134 InstrIterator insertPos) { 2135 InstrIterator spliceEnd; 2136 //look for the input branchinstr, not the AMDGPU branchinstr 2137 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); 2138 if (branchInstr == NULL) { 2139 if (DEBUGME) { 2140 errs() << "migrateInstruction don't see branch instr\n" ; 2141 } 2142 spliceEnd = srcBlk->end(); 2143 } else { 2144 if (DEBUGME) { 2145 errs() << "migrateInstruction see branch instr\n" ; 2146 branchInstr->dump(); 2147 } 2148 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); 2149 } 2150 if (DEBUGME) { 2151 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() 2152 << "srcSize = " << srcBlk->size() << "\n"; 2153 } 2154 2155 //splice insert before insertPos 2156 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); 2157 2158 if (DEBUGME) { 2159 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() 2160 << "srcSize = " << srcBlk->size() << "\n"; 2161 } 2162 } //migrateInstruction 2163 2164 // normalizeInfiniteLoopExit change 2165 // B1: 2166 // uncond_br LoopHeader 2167 // 2168 // to 2169 // B1: 2170 // cond_br 1 LoopHeader dummyExit 2171 // and return the newly added dummy exit block 2172 // 2173 template<class PassT> 2174 typename CFGStructurizer<PassT>::BlockT * 2175 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) { 2176 BlockT *loopHeader; 2177 BlockT *loopLatch; 2178 loopHeader = LoopRep->getHeader(); 2179 loopLatch = LoopRep->getLoopLatch(); 2180 BlockT *dummyExitBlk = NULL; 2181 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 2182 if (loopHeader!=NULL && loopLatch!=NULL) { 2183 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); 2184 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { 2185 dummyExitBlk = funcRep->CreateMachineBasicBlock(); 2186 funcRep->push_back(dummyExitBlk); //insert to function 2187 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 2188 2189 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; 2190 2191 typename BlockT::iterator insertPos = 2192 CFGTraits::getInstrPos(loopLatch, branchInstr); 2193 unsigned immReg = 2194 funcRep->getRegInfo().createVirtualRegister(I32RC); 2195 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); 2196 InstrT *newInstr = 2197 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); 2198 MachineInstrBuilder(newInstr).addMBB(loopHeader).addReg(immReg, false); 2199 2200 SHOWNEWINSTR(newInstr); 2201 2202 branchInstr->eraseFromParent(); 2203 loopLatch->addSuccessor(dummyExitBlk); 2204 } 2205 } 2206 2207 return dummyExitBlk; 2208 } //normalizeInfiniteLoopExit 2209 2210 template<class PassT> 2211 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) { 2212 InstrT *branchInstr; 2213 2214 // I saw two unconditional branch in one basic block in example 2215 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 2216 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) 2217 && CFGTraits::isUncondBranch(branchInstr)) { 2218 if (DEBUGME) { 2219 errs() << "Removing unconditional branch instruction" ; 2220 branchInstr->dump(); 2221 } 2222 branchInstr->eraseFromParent(); 2223 } 2224 } //removeUnconditionalBranch 2225 2226 template<class PassT> 2227 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) { 2228 if (srcBlk->succ_size() == 2) { 2229 BlockT *blk1 = *srcBlk->succ_begin(); 2230 BlockT *blk2 = *(++srcBlk->succ_begin()); 2231 2232 if (blk1 == blk2) { 2233 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); 2234 assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); 2235 if (DEBUGME) { 2236 errs() << "Removing unneeded conditional branch instruction" ; 2237 branchInstr->dump(); 2238 } 2239 branchInstr->eraseFromParent(); 2240 SHOWNEWBLK(blk1, "Removing redundant successor"); 2241 srcBlk->removeSuccessor(blk1); 2242 } 2243 } 2244 } //removeRedundantConditionalBranch 2245 2246 template<class PassT> 2247 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*, 2248 DEFAULT_VEC_SLOTS> &retBlks) { 2249 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); 2250 funcRep->push_back(dummyExitBlk); //insert to function 2251 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); 2252 2253 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter = 2254 retBlks.begin(), 2255 iterEnd = retBlks.end(); iter != iterEnd; ++iter) { 2256 BlockT *curBlk = *iter; 2257 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); 2258 if (curInstr) { 2259 curInstr->eraseFromParent(); 2260 } 2261 #if 0 2262 if (curBlk->size()==0 && curBlk->pred_size() == 1) { 2263 if (DEBUGME) { 2264 errs() << "Replace empty block BB" << curBlk->getNumber() 2265 << " with dummyExitBlock\n"; 2266 } 2267 BlockT *predb = *curBlk->pred_begin(); 2268 predb->removeSuccessor(curBlk); 2269 curBlk = predb; 2270 } //handle empty curBlk 2271 #endif 2272 curBlk->addSuccessor(dummyExitBlk); 2273 if (DEBUGME) { 2274 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() 2275 << " successors\n"; 2276 } 2277 } //for 2278 2279 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); 2280 } //addDummyExitBlock 2281 2282 template<class PassT> 2283 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) { 2284 while (srcBlk->succ_size()) { 2285 srcBlk->removeSuccessor(*srcBlk->succ_begin()); 2286 } 2287 } 2288 2289 template<class PassT> 2290 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) { 2291 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; 2292 2293 if (srcBlkInfo == NULL) { 2294 srcBlkInfo = new BlockInfo(); 2295 } 2296 2297 srcBlkInfo->sccNum = sccNum; 2298 } 2299 2300 template<class PassT> 2301 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) { 2302 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; 2303 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; 2304 } 2305 2306 template<class PassT> 2307 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { 2308 if (DEBUGME) { 2309 errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; 2310 } 2311 2312 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; 2313 2314 if (srcBlkInfo == NULL) { 2315 srcBlkInfo = new BlockInfo(); 2316 } 2317 2318 srcBlkInfo->isRetired = true; 2319 //int i = srcBlk->succ_size(); 2320 //int j = srcBlk->pred_size(); 2321 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 2322 && "can't retire block yet"); 2323 } 2324 2325 template<class PassT> 2326 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) { 2327 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; 2328 return (srcBlkInfo && srcBlkInfo->isRetired); 2329 } 2330 2331 template<class PassT> 2332 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) { 2333 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 2334 while (loopRep && loopRep->getHeader() == curBlk) { 2335 LoopLandInfo *loopLand = getLoopLandInfo(loopRep); 2336 2337 if(loopLand == NULL) 2338 return true; 2339 2340 BlockT *landBlk = loopLand->landBlk; 2341 assert(landBlk); 2342 if (!isRetiredBlock(landBlk)) { 2343 return true; 2344 } 2345 2346 loopRep = loopRep->getParentLoop(); 2347 } 2348 2349 return false; 2350 } //isActiveLoophead 2351 2352 template<class PassT> 2353 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) { 2354 const unsigned blockSizeThreshold = 30; 2355 const unsigned cloneInstrThreshold = 100; 2356 2357 bool multiplePreds = blk && (blk->pred_size() > 1); 2358 2359 if(!multiplePreds) 2360 return false; 2361 2362 unsigned blkSize = blk->size(); 2363 return ((blkSize > blockSizeThreshold) 2364 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); 2365 } //needMigrateBlock 2366 2367 template<class PassT> 2368 typename CFGStructurizer<PassT>::BlockT * 2369 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, 2370 BlockTSmallerVector &exitBlks, 2371 std::set<BlockT *> &exitBlkSet) { 2372 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks 2373 2374 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), 2375 predIterEnd = landBlk->pred_end(); 2376 predIter != predIterEnd; ++predIter) { 2377 BlockT *curBlk = *predIter; 2378 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { 2379 inpathBlks.push_back(curBlk); 2380 } 2381 } //for 2382 2383 //if landBlk has predecessors that are not in the given loop, 2384 //create a new block 2385 BlockT *newLandBlk = landBlk; 2386 if (inpathBlks.size() != landBlk->pred_size()) { 2387 newLandBlk = funcRep->CreateMachineBasicBlock(); 2388 funcRep->push_back(newLandBlk); //insert to function 2389 newLandBlk->addSuccessor(landBlk); 2390 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter = 2391 inpathBlks.begin(), 2392 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { 2393 BlockT *curBlk = *iter; 2394 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); 2395 //srcBlk, oldBlk, newBlk 2396 curBlk->removeSuccessor(landBlk); 2397 curBlk->addSuccessor(newLandBlk); 2398 } 2399 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { 2400 if (exitBlks[i] == landBlk) { 2401 exitBlks[i] = newLandBlk; 2402 } 2403 } 2404 SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); 2405 } 2406 2407 setLoopLandBlock(loopRep, newLandBlk); 2408 2409 return newLandBlk; 2410 } // recordLoopbreakLand 2411 2412 template<class PassT> 2413 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { 2414 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2415 2416 if (theEntry == NULL) { 2417 theEntry = new LoopLandInfo(); 2418 } 2419 assert(theEntry->landBlk == NULL); 2420 2421 if (blk == NULL) { 2422 blk = funcRep->CreateMachineBasicBlock(); 2423 funcRep->push_back(blk); //insert to function 2424 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); 2425 } 2426 2427 theEntry->landBlk = blk; 2428 2429 if (DEBUGME) { 2430 errs() << "setLoopLandBlock loop-header = BB" 2431 << loopRep->getHeader()->getNumber() 2432 << " landing-block = BB" << blk->getNumber() << "\n"; 2433 } 2434 } // setLoopLandBlock 2435 2436 template<class PassT> 2437 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { 2438 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2439 2440 if (theEntry == NULL) { 2441 theEntry = new LoopLandInfo(); 2442 } 2443 2444 theEntry->breakOnRegs.insert(regNum); 2445 2446 if (DEBUGME) { 2447 errs() << "addLoopBreakOnReg loop-header = BB" 2448 << loopRep->getHeader()->getNumber() 2449 << " regNum = " << regNum << "\n"; 2450 } 2451 } // addLoopBreakOnReg 2452 2453 template<class PassT> 2454 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { 2455 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2456 2457 if (theEntry == NULL) { 2458 theEntry = new LoopLandInfo(); 2459 } 2460 theEntry->contOnRegs.insert(regNum); 2461 2462 if (DEBUGME) { 2463 errs() << "addLoopContOnReg loop-header = BB" 2464 << loopRep->getHeader()->getNumber() 2465 << " regNum = " << regNum << "\n"; 2466 } 2467 } // addLoopContOnReg 2468 2469 template<class PassT> 2470 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { 2471 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2472 2473 if (theEntry == NULL) { 2474 theEntry = new LoopLandInfo(); 2475 } 2476 theEntry->breakInitRegs.insert(regNum); 2477 2478 if (DEBUGME) { 2479 errs() << "addLoopBreakInitReg loop-header = BB" 2480 << loopRep->getHeader()->getNumber() 2481 << " regNum = " << regNum << "\n"; 2482 } 2483 } // addLoopBreakInitReg 2484 2485 template<class PassT> 2486 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { 2487 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2488 2489 if (theEntry == NULL) { 2490 theEntry = new LoopLandInfo(); 2491 } 2492 theEntry->contInitRegs.insert(regNum); 2493 2494 if (DEBUGME) { 2495 errs() << "addLoopContInitReg loop-header = BB" 2496 << loopRep->getHeader()->getNumber() 2497 << " regNum = " << regNum << "\n"; 2498 } 2499 } // addLoopContInitReg 2500 2501 template<class PassT> 2502 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep, 2503 RegiT regNum) { 2504 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2505 2506 if (theEntry == NULL) { 2507 theEntry = new LoopLandInfo(); 2508 } 2509 theEntry->endbranchInitRegs.insert(regNum); 2510 2511 if (DEBUGME) 2512 { 2513 errs() << "addLoopEndbranchInitReg loop-header = BB" 2514 << loopRep->getHeader()->getNumber() 2515 << " regNum = " << regNum << "\n"; 2516 } 2517 } // addLoopEndbranchInitReg 2518 2519 template<class PassT> 2520 typename CFGStructurizer<PassT>::LoopLandInfo * 2521 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) { 2522 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2523 2524 return theEntry; 2525 } // getLoopLandInfo 2526 2527 template<class PassT> 2528 typename CFGStructurizer<PassT>::BlockT * 2529 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) { 2530 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2531 2532 return theEntry ? theEntry->landBlk : NULL; 2533 } // getLoopLandBlock 2534 2535 2536 template<class PassT> 2537 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) { 2538 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 2539 if (loopRep == NULL) 2540 return false; 2541 2542 BlockT *loopHeader = loopRep->getHeader(); 2543 2544 return curBlk->isSuccessor(loopHeader); 2545 2546 } //hasBackEdge 2547 2548 template<class PassT> 2549 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) { 2550 return loopRep ? loopRep->getLoopDepth() : 0; 2551 } //getLoopDepth 2552 2553 template<class PassT> 2554 int CFGStructurizer<PassT>::countActiveBlock 2555 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart, 2556 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) { 2557 int count = 0; 2558 while (iterStart != iterEnd) { 2559 if (!isRetiredBlock(*iterStart)) { 2560 ++count; 2561 } 2562 ++iterStart; 2563 } 2564 2565 return count; 2566 } //countActiveBlock 2567 2568 // This is work around solution for findNearestCommonDominator not avaiable to 2569 // post dom a proper fix should go to Dominators.h. 2570 2571 template<class PassT> 2572 typename CFGStructurizer<PassT>::BlockT* 2573 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { 2574 2575 if (postDomTree->dominates(blk1, blk2)) { 2576 return blk1; 2577 } 2578 if (postDomTree->dominates(blk2, blk1)) { 2579 return blk2; 2580 } 2581 2582 DomTreeNodeT *node1 = postDomTree->getNode(blk1); 2583 DomTreeNodeT *node2 = postDomTree->getNode(blk2); 2584 2585 // Handle newly cloned node. 2586 if (node1 == NULL && blk1->succ_size() == 1) { 2587 return findNearestCommonPostDom(*blk1->succ_begin(), blk2); 2588 } 2589 if (node2 == NULL && blk2->succ_size() == 1) { 2590 return findNearestCommonPostDom(blk1, *blk2->succ_begin()); 2591 } 2592 2593 if (node1 == NULL || node2 == NULL) { 2594 return NULL; 2595 } 2596 2597 node1 = node1->getIDom(); 2598 while (node1) { 2599 if (postDomTree->dominates(node1, node2)) { 2600 return node1->getBlock(); 2601 } 2602 node1 = node1->getIDom(); 2603 } 2604 2605 return NULL; 2606 } 2607 2608 template<class PassT> 2609 typename CFGStructurizer<PassT>::BlockT * 2610 CFGStructurizer<PassT>::findNearestCommonPostDom 2611 (typename std::set<BlockT *> &blks) { 2612 BlockT *commonDom; 2613 typename std::set<BlockT *>::const_iterator iter = blks.begin(); 2614 typename std::set<BlockT *>::const_iterator iterEnd = blks.end(); 2615 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { 2616 BlockT *curBlk = *iter; 2617 if (curBlk != commonDom) { 2618 commonDom = findNearestCommonPostDom(curBlk, commonDom); 2619 } 2620 } 2621 2622 if (DEBUGME) { 2623 errs() << "Common post dominator for exit blocks is "; 2624 if (commonDom) { 2625 errs() << "BB" << commonDom->getNumber() << "\n"; 2626 } else { 2627 errs() << "NULL\n"; 2628 } 2629 } 2630 2631 return commonDom; 2632 } //findNearestCommonPostDom 2633 2634 } //end namespace llvm 2635 2636 //todo: move-end 2637 2638 2639 //===----------------------------------------------------------------------===// 2640 // 2641 // CFGStructurizer for AMDGPU 2642 // 2643 //===----------------------------------------------------------------------===// 2644 2645 2646 using namespace llvmCFGStruct; 2647 2648 namespace llvm 2649 { 2650 class AMDGPUCFGStructurizer : public MachineFunctionPass 2651 { 2652 public: 2653 typedef MachineInstr InstructionType; 2654 typedef MachineFunction FunctionType; 2655 typedef MachineBasicBlock BlockType; 2656 typedef MachineLoopInfo LoopinfoType; 2657 typedef MachineDominatorTree DominatortreeType; 2658 typedef MachinePostDominatorTree PostDominatortreeType; 2659 typedef MachineDomTreeNode DomTreeNodeType; 2660 typedef MachineLoop LoopType; 2661 2662 protected: 2663 TargetMachine &TM; 2664 const TargetInstrInfo *TII; 2665 const AMDGPURegisterInfo *TRI; 2666 2667 public: 2668 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); 2669 const TargetInstrInfo *getTargetInstrInfo() const; 2670 //bool runOnMachineFunction(MachineFunction &F); 2671 2672 private: 2673 2674 }; //end of class AMDGPUCFGStructurizer 2675 2676 //char AMDGPUCFGStructurizer::ID = 0; 2677 } //end of namespace llvm 2678 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm 2679 ) 2680 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), 2681 TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo()) 2682 ) { 2683 } 2684 2685 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { 2686 return TII; 2687 } 2688 //===----------------------------------------------------------------------===// 2689 // 2690 // CFGPrepare 2691 // 2692 //===----------------------------------------------------------------------===// 2693 2694 2695 using namespace llvmCFGStruct; 2696 2697 namespace llvm 2698 { 2699 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer 2700 { 2701 public: 2702 static char ID; 2703 2704 public: 2705 AMDGPUCFGPrepare(TargetMachine &tm); 2706 2707 virtual const char *getPassName() const; 2708 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 2709 2710 bool runOnMachineFunction(MachineFunction &F); 2711 2712 private: 2713 2714 }; //end of class AMDGPUCFGPrepare 2715 2716 char AMDGPUCFGPrepare::ID = 0; 2717 } //end of namespace llvm 2718 2719 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) 2720 : AMDGPUCFGStructurizer(ID, tm ) 2721 { 2722 } 2723 const char *AMDGPUCFGPrepare::getPassName() const { 2724 return "AMD IL Control Flow Graph Preparation Pass"; 2725 } 2726 2727 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { 2728 AU.addPreserved<MachineFunctionAnalysis>(); 2729 AU.addRequired<MachineFunctionAnalysis>(); 2730 AU.addRequired<MachineDominatorTree>(); 2731 AU.addRequired<MachinePostDominatorTree>(); 2732 AU.addRequired<MachineLoopInfo>(); 2733 } 2734 2735 //===----------------------------------------------------------------------===// 2736 // 2737 // CFGPerform 2738 // 2739 //===----------------------------------------------------------------------===// 2740 2741 2742 using namespace llvmCFGStruct; 2743 2744 namespace llvm 2745 { 2746 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer 2747 { 2748 public: 2749 static char ID; 2750 2751 public: 2752 AMDGPUCFGPerform(TargetMachine &tm); 2753 virtual const char *getPassName() const; 2754 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 2755 bool runOnMachineFunction(MachineFunction &F); 2756 2757 private: 2758 2759 }; //end of class AMDGPUCFGPerform 2760 2761 char AMDGPUCFGPerform::ID = 0; 2762 } //end of namespace llvm 2763 2764 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) 2765 : AMDGPUCFGStructurizer(ID, tm) 2766 { 2767 } 2768 2769 const char *AMDGPUCFGPerform::getPassName() const { 2770 return "AMD IL Control Flow Graph structurizer Pass"; 2771 } 2772 2773 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { 2774 AU.addPreserved<MachineFunctionAnalysis>(); 2775 AU.addRequired<MachineFunctionAnalysis>(); 2776 AU.addRequired<MachineDominatorTree>(); 2777 AU.addRequired<MachinePostDominatorTree>(); 2778 AU.addRequired<MachineLoopInfo>(); 2779 } 2780 2781 //===----------------------------------------------------------------------===// 2782 // 2783 // CFGStructTraits<AMDGPUCFGStructurizer> 2784 // 2785 //===----------------------------------------------------------------------===// 2786 2787 namespace llvmCFGStruct 2788 { 2789 // this class is tailor to the AMDGPU backend 2790 template<> 2791 struct CFGStructTraits<AMDGPUCFGStructurizer> 2792 { 2793 typedef int RegiT; 2794 2795 static int getBreakNzeroOpcode(int oldOpcode) { 2796 switch(oldOpcode) { 2797 case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALNZ_i32; 2798 default: 2799 assert(0 && "internal error"); 2800 }; 2801 return -1; 2802 } 2803 2804 static int getBreakZeroOpcode(int oldOpcode) { 2805 switch(oldOpcode) { 2806 case AMDGPU::JUMP: return AMDGPU::BREAK_LOGICALZ_i32; 2807 default: 2808 assert(0 && "internal error"); 2809 }; 2810 return -1; 2811 } 2812 2813 static int getBranchNzeroOpcode(int oldOpcode) { 2814 switch(oldOpcode) { 2815 case AMDGPU::JUMP: return AMDGPU::IF_LOGICALNZ_i32; 2816 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALNZ); 2817 case AMDGPU::SI_IF_NZ: return AMDGPU::SI_IF_NZ; 2818 default: 2819 assert(0 && "internal error"); 2820 }; 2821 return -1; 2822 } 2823 2824 static int getBranchZeroOpcode(int oldOpcode) { 2825 switch(oldOpcode) { 2826 case AMDGPU::JUMP: return AMDGPU::IF_LOGICALZ_i32; 2827 ExpandCaseToAllScalarReturn(AMDGPU::BRANCH_COND, AMDGPU::IF_LOGICALZ); 2828 case AMDGPU::SI_IF_Z: return AMDGPU::SI_IF_Z; 2829 default: 2830 assert(0 && "internal error"); 2831 }; 2832 return -1; 2833 } 2834 2835 static int getContinueNzeroOpcode(int oldOpcode) 2836 { 2837 switch(oldOpcode) { 2838 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; 2839 default: 2840 assert(0 && "internal error"); 2841 }; 2842 return -1; 2843 } 2844 2845 static int getContinueZeroOpcode(int oldOpcode) { 2846 switch(oldOpcode) { 2847 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; 2848 default: 2849 assert(0 && "internal error"); 2850 }; 2851 return -1; 2852 } 2853 2854 // the explicitly represented branch target is the true branch target 2855 #define getExplicitBranch getTrueBranch 2856 #define setExplicitBranch setTrueBranch 2857 2858 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { 2859 return instr->getOperand(0).getMBB(); 2860 } 2861 2862 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { 2863 instr->getOperand(0).setMBB(blk); 2864 } 2865 2866 static MachineBasicBlock * 2867 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { 2868 assert(blk->succ_size() == 2); 2869 MachineBasicBlock *trueBranch = getTrueBranch(instr); 2870 MachineBasicBlock::succ_iterator iter = blk->succ_begin(); 2871 MachineBasicBlock::succ_iterator iterNext = iter; 2872 ++iterNext; 2873 2874 return (*iter == trueBranch) ? *iterNext : *iter; 2875 } 2876 2877 static bool isCondBranch(MachineInstr *instr) { 2878 switch (instr->getOpcode()) { 2879 case AMDGPU::JUMP: 2880 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() != 0; 2881 ExpandCaseToAllScalarTypes(AMDGPU::BRANCH_COND); 2882 case AMDGPU::SI_IF_NZ: 2883 case AMDGPU::SI_IF_Z: 2884 break; 2885 default: 2886 return false; 2887 } 2888 return true; 2889 } 2890 2891 static bool isUncondBranch(MachineInstr *instr) { 2892 switch (instr->getOpcode()) { 2893 case AMDGPU::JUMP: 2894 return instr->getOperand(instr->findFirstPredOperandIdx()).getReg() == 0; 2895 default: 2896 return false; 2897 } 2898 return true; 2899 } 2900 2901 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { 2902 //get DebugLoc from the first MachineBasicBlock instruction with debug info 2903 DebugLoc DL; 2904 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { 2905 MachineInstr *instr = &(*iter); 2906 if (instr->getDebugLoc().isUnknown() == false) { 2907 DL = instr->getDebugLoc(); 2908 } 2909 } 2910 return DL; 2911 } 2912 2913 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { 2914 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2915 MachineInstr *instr = &*iter; 2916 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { 2917 return instr; 2918 } 2919 return NULL; 2920 } 2921 2922 // The correct naming for this is getPossibleLoopendBlockBranchInstr. 2923 // 2924 // BB with backward-edge could have move instructions after the branch 2925 // instruction. Such move instruction "belong to" the loop backward-edge. 2926 // 2927 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { 2928 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>( 2929 blk->getParent()->getTarget().getInstrInfo()); 2930 2931 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), 2932 iterEnd = blk->rend(); iter != iterEnd; ++iter) { 2933 // FIXME: Simplify 2934 MachineInstr *instr = &*iter; 2935 if (instr) { 2936 if (isCondBranch(instr) || isUncondBranch(instr)) { 2937 return instr; 2938 } else if (!TII->isMov(instr->getOpcode())) { 2939 break; 2940 } 2941 } 2942 } 2943 return NULL; 2944 } 2945 2946 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { 2947 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2948 if (iter != blk->rend()) { 2949 MachineInstr *instr = &(*iter); 2950 if (instr->getOpcode() == AMDGPU::RETURN) { 2951 return instr; 2952 } 2953 } 2954 return NULL; 2955 } 2956 2957 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { 2958 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2959 if (iter != blk->rend()) { 2960 MachineInstr *instr = &(*iter); 2961 if (instr->getOpcode() == AMDGPU::CONTINUE) { 2962 return instr; 2963 } 2964 } 2965 return NULL; 2966 } 2967 2968 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { 2969 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { 2970 MachineInstr *instr = &(*iter); 2971 if ((instr->getOpcode() == AMDGPU::BREAK_LOGICALNZ_i32) || (instr->getOpcode() == AMDGPU::BREAK_LOGICALZ_i32)) { 2972 return instr; 2973 } 2974 } 2975 return NULL; 2976 } 2977 2978 static bool isReturnBlock(MachineBasicBlock *blk) { 2979 MachineInstr *instr = getReturnInstr(blk); 2980 bool isReturn = (blk->succ_size() == 0); 2981 if (instr) { 2982 assert(isReturn); 2983 } else if (isReturn) { 2984 if (DEBUGME) { 2985 errs() << "BB" << blk->getNumber() 2986 <<" is return block without RETURN instr\n"; 2987 } 2988 } 2989 2990 return isReturn; 2991 } 2992 2993 static MachineBasicBlock::iterator 2994 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { 2995 assert(instr->getParent() == blk && "instruction doesn't belong to block"); 2996 MachineBasicBlock::iterator iter = blk->begin(); 2997 MachineBasicBlock::iterator iterEnd = blk->end(); 2998 while (&(*iter) != instr && iter != iterEnd) { 2999 ++iter; 3000 } 3001 3002 assert(iter != iterEnd); 3003 return iter; 3004 }//getInstrPos 3005 3006 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, 3007 AMDGPUCFGStructurizer *passRep) { 3008 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); 3009 } //insertInstrBefore 3010 3011 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, 3012 AMDGPUCFGStructurizer *passRep, DebugLoc DL) { 3013 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3014 MachineInstr *newInstr = 3015 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); 3016 3017 MachineBasicBlock::iterator res; 3018 if (blk->begin() != blk->end()) { 3019 blk->insert(blk->begin(), newInstr); 3020 } else { 3021 blk->push_back(newInstr); 3022 } 3023 3024 SHOWNEWINSTR(newInstr); 3025 3026 return newInstr; 3027 } //insertInstrBefore 3028 3029 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, 3030 AMDGPUCFGStructurizer *passRep) { 3031 insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); 3032 } //insertInstrEnd 3033 3034 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, 3035 AMDGPUCFGStructurizer *passRep, DebugLoc DL) { 3036 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3037 MachineInstr *newInstr = blk->getParent() 3038 ->CreateMachineInstr(tii->get(newOpcode), DL); 3039 3040 blk->push_back(newInstr); 3041 //assume the instruction doesn't take any reg operand ... 3042 3043 SHOWNEWINSTR(newInstr); 3044 } //insertInstrEnd 3045 3046 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, 3047 int newOpcode, 3048 AMDGPUCFGStructurizer *passRep) { 3049 MachineInstr *oldInstr = &(*instrPos); 3050 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3051 MachineBasicBlock *blk = oldInstr->getParent(); 3052 MachineInstr *newInstr = 3053 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), 3054 DebugLoc()); 3055 3056 blk->insert(instrPos, newInstr); 3057 //assume the instruction doesn't take any reg operand ... 3058 3059 SHOWNEWINSTR(newInstr); 3060 return newInstr; 3061 } //insertInstrBefore 3062 3063 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, 3064 int newOpcode, 3065 AMDGPUCFGStructurizer *passRep, 3066 DebugLoc DL) { 3067 MachineInstr *oldInstr = &(*instrPos); 3068 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3069 MachineBasicBlock *blk = oldInstr->getParent(); 3070 MachineInstr *newInstr = 3071 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), 3072 DL); 3073 3074 blk->insert(instrPos, newInstr); 3075 MachineInstrBuilder(newInstr).addReg(oldInstr->getOperand(1).getReg(), 3076 false); 3077 3078 SHOWNEWINSTR(newInstr); 3079 //erase later oldInstr->eraseFromParent(); 3080 } //insertCondBranchBefore 3081 3082 static void insertCondBranchBefore(MachineBasicBlock *blk, 3083 MachineBasicBlock::iterator insertPos, 3084 int newOpcode, 3085 AMDGPUCFGStructurizer *passRep, 3086 RegiT regNum, 3087 DebugLoc DL) { 3088 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3089 3090 MachineInstr *newInstr = 3091 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); 3092 3093 //insert before 3094 blk->insert(insertPos, newInstr); 3095 MachineInstrBuilder(newInstr).addReg(regNum, false); 3096 3097 SHOWNEWINSTR(newInstr); 3098 } //insertCondBranchBefore 3099 3100 static void insertCondBranchEnd(MachineBasicBlock *blk, 3101 int newOpcode, 3102 AMDGPUCFGStructurizer *passRep, 3103 RegiT regNum) { 3104 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 3105 MachineInstr *newInstr = 3106 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); 3107 3108 blk->push_back(newInstr); 3109 MachineInstrBuilder(newInstr).addReg(regNum, false); 3110 3111 SHOWNEWINSTR(newInstr); 3112 } //insertCondBranchEnd 3113 3114 3115 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, 3116 AMDGPUCFGStructurizer *passRep, 3117 RegiT regNum, int regVal) { 3118 MachineInstr *oldInstr = &(*instrPos); 3119 const AMDGPUInstrInfo *tii = 3120 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 3121 MachineBasicBlock *blk = oldInstr->getParent(); 3122 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, 3123 regVal); 3124 blk->insert(instrPos, newInstr); 3125 3126 SHOWNEWINSTR(newInstr); 3127 } //insertAssignInstrBefore 3128 3129 static void insertAssignInstrBefore(MachineBasicBlock *blk, 3130 AMDGPUCFGStructurizer *passRep, 3131 RegiT regNum, int regVal) { 3132 const AMDGPUInstrInfo *tii = 3133 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 3134 3135 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, 3136 regVal); 3137 if (blk->begin() != blk->end()) { 3138 blk->insert(blk->begin(), newInstr); 3139 } else { 3140 blk->push_back(newInstr); 3141 } 3142 3143 SHOWNEWINSTR(newInstr); 3144 3145 } //insertInstrBefore 3146 3147 static void insertCompareInstrBefore(MachineBasicBlock *blk, 3148 MachineBasicBlock::iterator instrPos, 3149 AMDGPUCFGStructurizer *passRep, 3150 RegiT dstReg, RegiT src1Reg, 3151 RegiT src2Reg) { 3152 const AMDGPUInstrInfo *tii = 3153 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 3154 MachineInstr *newInstr = 3155 blk->getParent()->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); 3156 3157 MachineInstrBuilder(newInstr).addReg(dstReg, RegState::Define); //set target 3158 MachineInstrBuilder(newInstr).addReg(src1Reg); //set src value 3159 MachineInstrBuilder(newInstr).addReg(src2Reg); //set src value 3160 3161 blk->insert(instrPos, newInstr); 3162 SHOWNEWINSTR(newInstr); 3163 3164 } //insertCompareInstrBefore 3165 3166 static void cloneSuccessorList(MachineBasicBlock *dstBlk, 3167 MachineBasicBlock *srcBlk) { 3168 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), 3169 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { 3170 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of 3171 } 3172 } //cloneSuccessorList 3173 3174 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { 3175 MachineFunction *func = srcBlk->getParent(); 3176 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); 3177 func->push_back(newBlk); //insert to function 3178 //newBlk->setNumber(srcBlk->getNumber()); 3179 for (MachineBasicBlock::iterator iter = srcBlk->begin(), 3180 iterEnd = srcBlk->end(); 3181 iter != iterEnd; ++iter) { 3182 MachineInstr *instr = func->CloneMachineInstr(iter); 3183 newBlk->push_back(instr); 3184 } 3185 return newBlk; 3186 } 3187 3188 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because 3189 //the AMDGPU instruction is not recognized as terminator fix this and retire 3190 //this routine 3191 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, 3192 MachineBasicBlock *oldBlk, 3193 MachineBasicBlock *newBlk) { 3194 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); 3195 if (branchInstr && isCondBranch(branchInstr) && 3196 getExplicitBranch(branchInstr) == oldBlk) { 3197 setExplicitBranch(branchInstr, newBlk); 3198 } 3199 } 3200 3201 static void wrapup(MachineBasicBlock *entryBlk) { 3202 assert((!entryBlk->getParent()->getJumpTableInfo() 3203 || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) 3204 && "found a jump table"); 3205 3206 //collect continue right before endloop 3207 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr; 3208 MachineBasicBlock::iterator pre = entryBlk->begin(); 3209 MachineBasicBlock::iterator iterEnd = entryBlk->end(); 3210 MachineBasicBlock::iterator iter = pre; 3211 while (iter != iterEnd) { 3212 if (pre->getOpcode() == AMDGPU::CONTINUE 3213 && iter->getOpcode() == AMDGPU::ENDLOOP) { 3214 contInstr.push_back(pre); 3215 } 3216 pre = iter; 3217 ++iter; 3218 } //end while 3219 3220 //delete continue right before endloop 3221 for (unsigned i = 0; i < contInstr.size(); ++i) { 3222 contInstr[i]->eraseFromParent(); 3223 } 3224 3225 // TODO to fix up jump table so later phase won't be confused. if 3226 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 3227 // there isn't such an interface yet. alternatively, replace all the other 3228 // blocks in the jump table with the entryBlk //} 3229 3230 } //wrapup 3231 3232 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { 3233 return &pass.getAnalysis<MachineDominatorTree>(); 3234 } 3235 3236 static MachinePostDominatorTree* 3237 getPostDominatorTree(AMDGPUCFGStructurizer &pass) { 3238 return &pass.getAnalysis<MachinePostDominatorTree>(); 3239 } 3240 3241 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { 3242 return &pass.getAnalysis<MachineLoopInfo>(); 3243 } 3244 }; // template class CFGStructTraits 3245 } //end of namespace llvm 3246 3247 // createAMDGPUCFGPreparationPass- Returns a pass 3248 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm 3249 ) { 3250 return new AMDGPUCFGPrepare(tm ); 3251 } 3252 3253 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { 3254 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, 3255 *this, 3256 TRI); 3257 } 3258 3259 // createAMDGPUCFGStructurizerPass- Returns a pass 3260 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm 3261 ) { 3262 return new AMDGPUCFGPerform(tm ); 3263 } 3264 3265 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { 3266 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func, 3267 *this, 3268 TRI); 3269 } 3270 3271 //end of file newline goes below 3272 3273