1 //===-- AMDILCFGStructurizer.cpp - CFG Structurizer -----------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 /// \file 9 //==-----------------------------------------------------------------------===// 10 11 #define DEBUGME 0 12 #define DEBUG_TYPE "structcfg" 13 14 #include "AMDGPUInstrInfo.h" 15 #include "AMDIL.h" 16 #include "llvm/ADT/SCCIterator.h" 17 #include "llvm/ADT/SmallVector.h" 18 #include "llvm/ADT/Statistic.h" 19 #include "llvm/Analysis/DominatorInternals.h" 20 #include "llvm/Analysis/Dominators.h" 21 #include "llvm/CodeGen/MachineDominators.h" 22 #include "llvm/CodeGen/MachineFunction.h" 23 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 24 #include "llvm/CodeGen/MachineFunctionPass.h" 25 #include "llvm/CodeGen/MachineInstrBuilder.h" 26 #include "llvm/CodeGen/MachineJumpTableInfo.h" 27 #include "llvm/CodeGen/MachineLoopInfo.h" 28 #include "llvm/CodeGen/MachinePostDominators.h" 29 #include "llvm/CodeGen/MachineRegisterInfo.h" 30 #include "llvm/Target/TargetInstrInfo.h" 31 32 using namespace llvm; 33 34 // TODO: move-begin. 35 36 //===----------------------------------------------------------------------===// 37 // 38 // Statistics for CFGStructurizer. 39 // 40 //===----------------------------------------------------------------------===// 41 42 STATISTIC(numSerialPatternMatch, "CFGStructurizer number of serial pattern " 43 "matched"); 44 STATISTIC(numIfPatternMatch, "CFGStructurizer number of if pattern " 45 "matched"); 46 STATISTIC(numLoopbreakPatternMatch, "CFGStructurizer number of loop-break " 47 "pattern matched"); 48 STATISTIC(numLoopcontPatternMatch, "CFGStructurizer number of loop-continue " 49 "pattern matched"); 50 STATISTIC(numLoopPatternMatch, "CFGStructurizer number of loop pattern " 51 "matched"); 52 STATISTIC(numClonedBlock, "CFGStructurizer cloned blocks"); 53 STATISTIC(numClonedInstr, "CFGStructurizer cloned instructions"); 54 55 //===----------------------------------------------------------------------===// 56 // 57 // Miscellaneous utility for CFGStructurizer. 58 // 59 //===----------------------------------------------------------------------===// 60 namespace llvmCFGStruct { 61 #define SHOWNEWINSTR(i) \ 62 if (DEBUGME) errs() << "New instr: " << *i << "\n" 63 64 #define SHOWNEWBLK(b, msg) \ 65 if (DEBUGME) { \ 66 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 67 errs() << "\n"; \ 68 } 69 70 #define SHOWBLK_DETAIL(b, msg) \ 71 if (DEBUGME) { \ 72 if (b) { \ 73 errs() << msg << "BB" << b->getNumber() << "size " << b->size(); \ 74 b->print(errs()); \ 75 errs() << "\n"; \ 76 } \ 77 } 78 79 #define INVALIDSCCNUM -1 80 #define INVALIDREGNUM 0 81 82 template<class LoopinfoT> 83 void PrintLoopinfo(const LoopinfoT &LoopInfo, llvm::raw_ostream &OS) { 84 for (typename LoopinfoT::iterator iter = LoopInfo.begin(), 85 iterEnd = LoopInfo.end(); 86 iter != iterEnd; ++iter) { 87 (*iter)->print(OS, 0); 88 } 89 } 90 91 template<class NodeT> 92 void ReverseVector(SmallVector<NodeT *, DEFAULT_VEC_SLOTS> &Src) { 93 size_t sz = Src.size(); 94 for (size_t i = 0; i < sz/2; ++i) { 95 NodeT *t = Src[i]; 96 Src[i] = Src[sz - i - 1]; 97 Src[sz - i - 1] = t; 98 } 99 } 100 101 } //end namespace llvmCFGStruct 102 103 //===----------------------------------------------------------------------===// 104 // 105 // supporting data structure for CFGStructurizer 106 // 107 //===----------------------------------------------------------------------===// 108 109 namespace llvmCFGStruct { 110 template<class PassT> 111 struct CFGStructTraits { 112 }; 113 114 template <class InstrT> 115 class BlockInformation { 116 public: 117 bool isRetired; 118 int sccNum; 119 //SmallVector<InstrT*, DEFAULT_VEC_SLOTS> succInstr; 120 //Instructions defining the corresponding successor. 121 BlockInformation() : isRetired(false), sccNum(INVALIDSCCNUM) {} 122 }; 123 124 template <class BlockT, class InstrT, class RegiT> 125 class LandInformation { 126 public: 127 BlockT *landBlk; 128 std::set<RegiT> breakInitRegs; //Registers that need to "reg = 0", before 129 //WHILELOOP(thisloop) init before entering 130 //thisloop. 131 std::set<RegiT> contInitRegs; //Registers that need to "reg = 0", after 132 //WHILELOOP(thisloop) init after entering 133 //thisloop. 134 std::set<RegiT> endbranchInitRegs; //Init before entering this loop, at loop 135 //land block, branch cond on this reg. 136 std::set<RegiT> breakOnRegs; //registers that need to "if (reg) break 137 //endif" after ENDLOOP(thisloop) break 138 //outerLoopOf(thisLoop). 139 std::set<RegiT> contOnRegs; //registers that need to "if (reg) continue 140 //endif" after ENDLOOP(thisloop) continue on 141 //outerLoopOf(thisLoop). 142 LandInformation() : landBlk(NULL) {} 143 }; 144 145 } //end of namespace llvmCFGStruct 146 147 //===----------------------------------------------------------------------===// 148 // 149 // CFGStructurizer 150 // 151 //===----------------------------------------------------------------------===// 152 153 namespace llvmCFGStruct { 154 // bixia TODO: port it to BasicBlock, not just MachineBasicBlock. 155 template<class PassT> 156 class CFGStructurizer { 157 public: 158 typedef enum { 159 Not_SinglePath = 0, 160 SinglePath_InPath = 1, 161 SinglePath_NotInPath = 2 162 } PathToKind; 163 164 public: 165 typedef typename PassT::InstructionType InstrT; 166 typedef typename PassT::FunctionType FuncT; 167 typedef typename PassT::DominatortreeType DomTreeT; 168 typedef typename PassT::PostDominatortreeType PostDomTreeT; 169 typedef typename PassT::DomTreeNodeType DomTreeNodeT; 170 typedef typename PassT::LoopinfoType LoopInfoT; 171 172 typedef GraphTraits<FuncT *> FuncGTraits; 173 //typedef FuncGTraits::nodes_iterator BlockIterator; 174 typedef typename FuncT::iterator BlockIterator; 175 176 typedef typename FuncGTraits::NodeType BlockT; 177 typedef GraphTraits<BlockT *> BlockGTraits; 178 typedef GraphTraits<Inverse<BlockT *> > InvBlockGTraits; 179 //typedef BlockGTraits::succ_iterator InstructionIterator; 180 typedef typename BlockT::iterator InstrIterator; 181 182 typedef CFGStructTraits<PassT> CFGTraits; 183 typedef BlockInformation<InstrT> BlockInfo; 184 typedef std::map<BlockT *, BlockInfo *> BlockInfoMap; 185 186 typedef int RegiT; 187 typedef typename PassT::LoopType LoopT; 188 typedef LandInformation<BlockT, InstrT, RegiT> LoopLandInfo; 189 typedef std::map<LoopT *, LoopLandInfo *> LoopLandInfoMap; 190 //landing info for loop break 191 typedef SmallVector<BlockT *, 32> BlockTSmallerVector; 192 193 public: 194 CFGStructurizer(); 195 ~CFGStructurizer(); 196 197 /// Perform the CFG structurization 198 bool run(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); 199 200 /// Perform the CFG preparation 201 bool prepare(FuncT &Func, PassT &Pass, const AMDGPURegisterInfo *tri); 202 203 private: 204 void reversePredicateSetter(typename BlockT::iterator); 205 void orderBlocks(); 206 void printOrderedBlocks(llvm::raw_ostream &OS); 207 int patternMatch(BlockT *CurBlock); 208 int patternMatchGroup(BlockT *CurBlock); 209 210 int serialPatternMatch(BlockT *CurBlock); 211 int ifPatternMatch(BlockT *CurBlock); 212 int switchPatternMatch(BlockT *CurBlock); 213 int loopendPatternMatch(BlockT *CurBlock); 214 int loopPatternMatch(BlockT *CurBlock); 215 216 int loopbreakPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); 217 int loopcontPatternMatch(LoopT *LoopRep, BlockT *LoopHeader); 218 //int loopWithoutBreak(BlockT *); 219 220 void handleLoopbreak (BlockT *ExitingBlock, LoopT *ExitingLoop, 221 BlockT *ExitBlock, LoopT *exitLoop, BlockT *landBlock); 222 void handleLoopcontBlock(BlockT *ContingBlock, LoopT *contingLoop, 223 BlockT *ContBlock, LoopT *contLoop); 224 bool isSameloopDetachedContbreak(BlockT *Src1Block, BlockT *Src2Block); 225 int handleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 226 BlockT *FalseBlock); 227 int handleJumpintoIfImp(BlockT *HeadBlock, BlockT *TrueBlock, 228 BlockT *FalseBlock); 229 int improveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 230 BlockT *FalseBlock, BlockT **LandBlockPtr); 231 void showImproveSimpleJumpintoIf(BlockT *HeadBlock, BlockT *TrueBlock, 232 BlockT *FalseBlock, BlockT *LandBlock, 233 bool Detail = false); 234 PathToKind singlePathTo(BlockT *SrcBlock, BlockT *DstBlock, 235 bool AllowSideEntry = true); 236 BlockT *singlePathEnd(BlockT *srcBlock, BlockT *DstBlock, 237 bool AllowSideEntry = true); 238 int cloneOnSideEntryTo(BlockT *PreBlock, BlockT *SrcBlock, BlockT *DstBlock); 239 void mergeSerialBlock(BlockT *DstBlock, BlockT *srcBlock); 240 241 void mergeIfthenelseBlock(InstrT *BranchInstr, BlockT *CurBlock, 242 BlockT *TrueBlock, BlockT *FalseBlock, 243 BlockT *LandBlock); 244 void mergeLooplandBlock(BlockT *DstBlock, LoopLandInfo *LoopLand); 245 void mergeLoopbreakBlock(BlockT *ExitingBlock, BlockT *ExitBlock, 246 BlockT *ExitLandBlock, RegiT SetReg); 247 void settleLoopcontBlock(BlockT *ContingBlock, BlockT *ContBlock, 248 RegiT SetReg); 249 BlockT *relocateLoopcontBlock(LoopT *ParentLoopRep, LoopT *LoopRep, 250 std::set<BlockT*> &ExitBlockSet, 251 BlockT *ExitLandBlk); 252 BlockT *addLoopEndbranchBlock(LoopT *LoopRep, 253 BlockTSmallerVector &ExitingBlocks, 254 BlockTSmallerVector &ExitBlocks); 255 BlockT *normalizeInfiniteLoopExit(LoopT *LoopRep); 256 void removeUnconditionalBranch(BlockT *SrcBlock); 257 void removeRedundantConditionalBranch(BlockT *SrcBlock); 258 void addDummyExitBlock(SmallVector<BlockT *, DEFAULT_VEC_SLOTS> &RetBlocks); 259 260 void removeSuccessor(BlockT *SrcBlock); 261 BlockT *cloneBlockForPredecessor(BlockT *CurBlock, BlockT *PredBlock); 262 BlockT *exitingBlock2ExitBlock (LoopT *LoopRep, BlockT *exitingBlock); 263 264 void migrateInstruction(BlockT *SrcBlock, BlockT *DstBlock, 265 InstrIterator InsertPos); 266 267 void recordSccnum(BlockT *SrcBlock, int SCCNum); 268 int getSCCNum(BlockT *srcBlk); 269 270 void retireBlock(BlockT *DstBlock, BlockT *SrcBlock); 271 bool isRetiredBlock(BlockT *SrcBlock); 272 bool isActiveLoophead(BlockT *CurBlock); 273 bool needMigrateBlock(BlockT *Block); 274 275 BlockT *recordLoopLandBlock(LoopT *LoopRep, BlockT *LandBlock, 276 BlockTSmallerVector &exitBlocks, 277 std::set<BlockT*> &ExitBlockSet); 278 void setLoopLandBlock(LoopT *LoopRep, BlockT *Block = NULL); 279 BlockT *getLoopLandBlock(LoopT *LoopRep); 280 LoopLandInfo *getLoopLandInfo(LoopT *LoopRep); 281 282 void addLoopBreakOnReg(LoopT *LoopRep, RegiT RegNum); 283 void addLoopContOnReg(LoopT *LoopRep, RegiT RegNum); 284 void addLoopBreakInitReg(LoopT *LoopRep, RegiT RegNum); 285 void addLoopContInitReg(LoopT *LoopRep, RegiT RegNum); 286 void addLoopEndbranchInitReg(LoopT *LoopRep, RegiT RegNum); 287 288 bool hasBackEdge(BlockT *curBlock); 289 unsigned getLoopDepth (LoopT *LoopRep); 290 int countActiveBlock( 291 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterStart, 292 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator IterEnd); 293 BlockT *findNearestCommonPostDom(std::set<BlockT *>&); 294 BlockT *findNearestCommonPostDom(BlockT *Block1, BlockT *Block2); 295 296 private: 297 DomTreeT *domTree; 298 PostDomTreeT *postDomTree; 299 LoopInfoT *loopInfo; 300 PassT *passRep; 301 FuncT *funcRep; 302 303 BlockInfoMap blockInfoMap; 304 LoopLandInfoMap loopLandInfoMap; 305 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> orderedBlks; 306 const AMDGPURegisterInfo *TRI; 307 308 }; //template class CFGStructurizer 309 310 template<class PassT> CFGStructurizer<PassT>::CFGStructurizer() 311 : domTree(NULL), postDomTree(NULL), loopInfo(NULL) { 312 } 313 314 template<class PassT> CFGStructurizer<PassT>::~CFGStructurizer() { 315 for (typename BlockInfoMap::iterator I = blockInfoMap.begin(), 316 E = blockInfoMap.end(); I != E; ++I) { 317 delete I->second; 318 } 319 } 320 321 template<class PassT> 322 bool CFGStructurizer<PassT>::prepare(FuncT &func, PassT &pass, 323 const AMDGPURegisterInfo * tri) { 324 passRep = &pass; 325 funcRep = &func; 326 TRI = tri; 327 328 bool changed = false; 329 330 //FIXME: if not reducible flow graph, make it so ??? 331 332 if (DEBUGME) { 333 errs() << "AMDGPUCFGStructurizer::prepare\n"; 334 } 335 336 loopInfo = CFGTraits::getLoopInfo(pass); 337 if (DEBUGME) { 338 errs() << "LoopInfo:\n"; 339 PrintLoopinfo(*loopInfo, errs()); 340 } 341 342 orderBlocks(); 343 if (DEBUGME) { 344 errs() << "Ordered blocks:\n"; 345 printOrderedBlocks(errs()); 346 } 347 348 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> retBlks; 349 350 for (typename LoopInfoT::iterator iter = loopInfo->begin(), 351 iterEnd = loopInfo->end(); 352 iter != iterEnd; ++iter) { 353 LoopT* loopRep = (*iter); 354 BlockTSmallerVector exitingBlks; 355 loopRep->getExitingBlocks(exitingBlks); 356 357 if (exitingBlks.size() == 0) { 358 BlockT* dummyExitBlk = normalizeInfiniteLoopExit(loopRep); 359 if (dummyExitBlk != NULL) 360 retBlks.push_back(dummyExitBlk); 361 } 362 } 363 364 // Remove unconditional branch instr. 365 // Add dummy exit block iff there are multiple returns. 366 367 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 368 iterBlk = orderedBlks.begin(), iterEndBlk = orderedBlks.end(); 369 iterBlk != iterEndBlk; 370 ++iterBlk) { 371 BlockT *curBlk = *iterBlk; 372 removeUnconditionalBranch(curBlk); 373 removeRedundantConditionalBranch(curBlk); 374 if (CFGTraits::isReturnBlock(curBlk)) { 375 retBlks.push_back(curBlk); 376 } 377 assert(curBlk->succ_size() <= 2); 378 } //for 379 380 if (retBlks.size() >= 2) { 381 addDummyExitBlock(retBlks); 382 changed = true; 383 } 384 385 return changed; 386 } //CFGStructurizer::prepare 387 388 template<class PassT> 389 bool CFGStructurizer<PassT>::run(FuncT &func, PassT &pass, 390 const AMDGPURegisterInfo * tri) { 391 passRep = &pass; 392 funcRep = &func; 393 TRI = tri; 394 395 //Assume reducible CFG... 396 if (DEBUGME) { 397 errs() << "AMDGPUCFGStructurizer::run\n"; 398 func.viewCFG(); 399 } 400 401 domTree = CFGTraits::getDominatorTree(pass); 402 if (DEBUGME) { 403 domTree->print(errs(), (const llvm::Module*)0); 404 } 405 406 postDomTree = CFGTraits::getPostDominatorTree(pass); 407 if (DEBUGME) { 408 postDomTree->print(errs()); 409 } 410 411 loopInfo = CFGTraits::getLoopInfo(pass); 412 if (DEBUGME) { 413 errs() << "LoopInfo:\n"; 414 PrintLoopinfo(*loopInfo, errs()); 415 } 416 417 orderBlocks(); 418 #ifdef STRESSTEST 419 //Use the worse block ordering to test the algorithm. 420 ReverseVector(orderedBlks); 421 #endif 422 423 if (DEBUGME) { 424 errs() << "Ordered blocks:\n"; 425 printOrderedBlocks(errs()); 426 } 427 int numIter = 0; 428 bool finish = false; 429 BlockT *curBlk; 430 bool makeProgress = false; 431 int numRemainedBlk = countActiveBlock(orderedBlks.begin(), 432 orderedBlks.end()); 433 434 do { 435 ++numIter; 436 if (DEBUGME) { 437 errs() << "numIter = " << numIter 438 << ", numRemaintedBlk = " << numRemainedBlk << "\n"; 439 } 440 441 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 442 iterBlk = orderedBlks.begin(); 443 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 444 iterBlkEnd = orderedBlks.end(); 445 446 typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 447 sccBeginIter = iterBlk; 448 BlockT *sccBeginBlk = NULL; 449 int sccNumBlk = 0; // The number of active blocks, init to a 450 // maximum possible number. 451 int sccNumIter; // Number of iteration in this SCC. 452 453 while (iterBlk != iterBlkEnd) { 454 curBlk = *iterBlk; 455 456 if (sccBeginBlk == NULL) { 457 sccBeginIter = iterBlk; 458 sccBeginBlk = curBlk; 459 sccNumIter = 0; 460 sccNumBlk = numRemainedBlk; // Init to maximum possible number. 461 if (DEBUGME) { 462 errs() << "start processing SCC" << getSCCNum(sccBeginBlk); 463 errs() << "\n"; 464 } 465 } 466 467 if (!isRetiredBlock(curBlk)) { 468 patternMatch(curBlk); 469 } 470 471 ++iterBlk; 472 473 bool contNextScc = true; 474 if (iterBlk == iterBlkEnd 475 || getSCCNum(sccBeginBlk) != getSCCNum(*iterBlk)) { 476 // Just finish one scc. 477 ++sccNumIter; 478 int sccRemainedNumBlk = countActiveBlock(sccBeginIter, iterBlk); 479 if (sccRemainedNumBlk != 1 && sccRemainedNumBlk >= sccNumBlk) { 480 if (DEBUGME) { 481 errs() << "Can't reduce SCC " << getSCCNum(curBlk) 482 << ", sccNumIter = " << sccNumIter; 483 errs() << "doesn't make any progress\n"; 484 } 485 contNextScc = true; 486 } else if (sccRemainedNumBlk != 1 && sccRemainedNumBlk < sccNumBlk) { 487 sccNumBlk = sccRemainedNumBlk; 488 iterBlk = sccBeginIter; 489 contNextScc = false; 490 if (DEBUGME) { 491 errs() << "repeat processing SCC" << getSCCNum(curBlk) 492 << "sccNumIter = " << sccNumIter << "\n"; 493 func.viewCFG(); 494 } 495 } else { 496 // Finish the current scc. 497 contNextScc = true; 498 } 499 } else { 500 // Continue on next component in the current scc. 501 contNextScc = false; 502 } 503 504 if (contNextScc) { 505 sccBeginBlk = NULL; 506 } 507 } //while, "one iteration" over the function. 508 509 BlockT *entryBlk = FuncGTraits::nodes_begin(&func); 510 if (entryBlk->succ_size() == 0) { 511 finish = true; 512 if (DEBUGME) { 513 errs() << "Reduce to one block\n"; 514 } 515 } else { 516 int newnumRemainedBlk 517 = countActiveBlock(orderedBlks.begin(), orderedBlks.end()); 518 // consider cloned blocks ?? 519 if (newnumRemainedBlk == 1 || newnumRemainedBlk < numRemainedBlk) { 520 makeProgress = true; 521 numRemainedBlk = newnumRemainedBlk; 522 } else { 523 makeProgress = false; 524 if (DEBUGME) { 525 errs() << "No progress\n"; 526 } 527 } 528 } 529 } while (!finish && makeProgress); 530 531 // Misc wrap up to maintain the consistency of the Function representation. 532 CFGTraits::wrapup(FuncGTraits::nodes_begin(&func)); 533 534 // Detach retired Block, release memory. 535 for (typename BlockInfoMap::iterator iterMap = blockInfoMap.begin(), 536 iterEndMap = blockInfoMap.end(); iterMap != iterEndMap; ++iterMap) { 537 if ((*iterMap).second && (*iterMap).second->isRetired) { 538 assert(((*iterMap).first)->getNumber() != -1); 539 if (DEBUGME) { 540 errs() << "Erase BB" << ((*iterMap).first)->getNumber() << "\n"; 541 } 542 (*iterMap).first->eraseFromParent(); //Remove from the parent Function. 543 } 544 delete (*iterMap).second; 545 } 546 blockInfoMap.clear(); 547 548 // clear loopLandInfoMap 549 for (typename LoopLandInfoMap::iterator iterMap = loopLandInfoMap.begin(), 550 iterEndMap = loopLandInfoMap.end(); iterMap != iterEndMap; ++iterMap) { 551 delete (*iterMap).second; 552 } 553 loopLandInfoMap.clear(); 554 555 if (DEBUGME) { 556 func.viewCFG(); 557 } 558 559 if (!finish) { 560 assert(!"IRREDUCIBL_CF"); 561 } 562 563 return true; 564 } //CFGStructurizer::run 565 566 /// Print the ordered Blocks. 567 /// 568 template<class PassT> 569 void CFGStructurizer<PassT>::printOrderedBlocks(llvm::raw_ostream &os) { 570 size_t i = 0; 571 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::const_iterator 572 iterBlk = orderedBlks.begin(), iterBlkEnd = orderedBlks.end(); 573 iterBlk != iterBlkEnd; 574 ++iterBlk, ++i) { 575 os << "BB" << (*iterBlk)->getNumber(); 576 os << "(" << getSCCNum(*iterBlk) << "," << (*iterBlk)->size() << ")"; 577 if (i != 0 && i % 10 == 0) { 578 os << "\n"; 579 } else { 580 os << " "; 581 } 582 } 583 } //printOrderedBlocks 584 585 /// Compute the reversed DFS post order of Blocks 586 /// 587 template<class PassT> void CFGStructurizer<PassT>::orderBlocks() { 588 int sccNum = 0; 589 BlockT *bb; 590 for (scc_iterator<FuncT *> sccIter = scc_begin(funcRep), 591 sccEnd = scc_end(funcRep); sccIter != sccEnd; ++sccIter, ++sccNum) { 592 std::vector<BlockT *> &sccNext = *sccIter; 593 for (typename std::vector<BlockT *>::const_iterator 594 blockIter = sccNext.begin(), blockEnd = sccNext.end(); 595 blockIter != blockEnd; ++blockIter) { 596 bb = *blockIter; 597 orderedBlks.push_back(bb); 598 recordSccnum(bb, sccNum); 599 } 600 } 601 602 //walk through all the block in func to check for unreachable 603 for (BlockIterator blockIter1 = FuncGTraits::nodes_begin(funcRep), 604 blockEnd1 = FuncGTraits::nodes_end(funcRep); 605 blockIter1 != blockEnd1; ++blockIter1) { 606 BlockT *bb = &(*blockIter1); 607 sccNum = getSCCNum(bb); 608 if (sccNum == INVALIDSCCNUM) { 609 errs() << "unreachable block BB" << bb->getNumber() << "\n"; 610 } 611 } 612 } //orderBlocks 613 614 template<class PassT> int CFGStructurizer<PassT>::patternMatch(BlockT *curBlk) { 615 int numMatch = 0; 616 int curMatch; 617 618 if (DEBUGME) { 619 errs() << "Begin patternMatch BB" << curBlk->getNumber() << "\n"; 620 } 621 622 while ((curMatch = patternMatchGroup(curBlk)) > 0) { 623 numMatch += curMatch; 624 } 625 626 if (DEBUGME) { 627 errs() << "End patternMatch BB" << curBlk->getNumber() 628 << ", numMatch = " << numMatch << "\n"; 629 } 630 631 return numMatch; 632 } //patternMatch 633 634 template<class PassT> 635 int CFGStructurizer<PassT>::patternMatchGroup(BlockT *curBlk) { 636 int numMatch = 0; 637 numMatch += serialPatternMatch(curBlk); 638 numMatch += ifPatternMatch(curBlk); 639 numMatch += loopendPatternMatch(curBlk); 640 numMatch += loopPatternMatch(curBlk); 641 return numMatch; 642 }//patternMatchGroup 643 644 template<class PassT> 645 int CFGStructurizer<PassT>::serialPatternMatch(BlockT *curBlk) { 646 if (curBlk->succ_size() != 1) { 647 return 0; 648 } 649 650 BlockT *childBlk = *curBlk->succ_begin(); 651 if (childBlk->pred_size() != 1 || isActiveLoophead(childBlk)) { 652 return 0; 653 } 654 655 mergeSerialBlock(curBlk, childBlk); 656 ++numSerialPatternMatch; 657 return 1; 658 } //serialPatternMatch 659 660 template<class PassT> 661 int CFGStructurizer<PassT>::ifPatternMatch(BlockT *curBlk) { 662 //two edges 663 if (curBlk->succ_size() != 2) { 664 return 0; 665 } 666 667 if (hasBackEdge(curBlk)) { 668 return 0; 669 } 670 671 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(curBlk); 672 if (branchInstr == NULL) { 673 return 0; 674 } 675 676 assert(CFGTraits::isCondBranch(branchInstr)); 677 678 BlockT *trueBlk = CFGTraits::getTrueBranch(branchInstr); 679 BlockT *falseBlk = CFGTraits::getFalseBranch(curBlk, branchInstr); 680 BlockT *landBlk; 681 int cloned = 0; 682 683 // TODO: Simplify 684 if (trueBlk->succ_size() == 1 && falseBlk->succ_size() == 1 685 && *trueBlk->succ_begin() == *falseBlk->succ_begin()) { 686 landBlk = *trueBlk->succ_begin(); 687 } else if (trueBlk->succ_size() == 0 && falseBlk->succ_size() == 0) { 688 landBlk = NULL; 689 } else if (trueBlk->succ_size() == 1 && *trueBlk->succ_begin() == falseBlk) { 690 landBlk = falseBlk; 691 falseBlk = NULL; 692 } else if (falseBlk->succ_size() == 1 693 && *falseBlk->succ_begin() == trueBlk) { 694 landBlk = trueBlk; 695 trueBlk = NULL; 696 } else if (falseBlk->succ_size() == 1 697 && isSameloopDetachedContbreak(trueBlk, falseBlk)) { 698 landBlk = *falseBlk->succ_begin(); 699 } else if (trueBlk->succ_size() == 1 700 && isSameloopDetachedContbreak(falseBlk, trueBlk)) { 701 landBlk = *trueBlk->succ_begin(); 702 } else { 703 return handleJumpintoIf(curBlk, trueBlk, falseBlk); 704 } 705 706 // improveSimpleJumpinfoIf can handle the case where landBlk == NULL but the 707 // new BB created for landBlk==NULL may introduce new challenge to the 708 // reduction process. 709 if (landBlk != NULL && 710 ((trueBlk && trueBlk->pred_size() > 1) 711 || (falseBlk && falseBlk->pred_size() > 1))) { 712 cloned += improveSimpleJumpintoIf(curBlk, trueBlk, falseBlk, &landBlk); 713 } 714 715 if (trueBlk && trueBlk->pred_size() > 1) { 716 trueBlk = cloneBlockForPredecessor(trueBlk, curBlk); 717 ++cloned; 718 } 719 720 if (falseBlk && falseBlk->pred_size() > 1) { 721 falseBlk = cloneBlockForPredecessor(falseBlk, curBlk); 722 ++cloned; 723 } 724 725 mergeIfthenelseBlock(branchInstr, curBlk, trueBlk, falseBlk, landBlk); 726 727 ++numIfPatternMatch; 728 729 numClonedBlock += cloned; 730 731 return 1 + cloned; 732 } //ifPatternMatch 733 734 template<class PassT> 735 int CFGStructurizer<PassT>::switchPatternMatch(BlockT *curBlk) { 736 return 0; 737 } //switchPatternMatch 738 739 template<class PassT> 740 int CFGStructurizer<PassT>::loopendPatternMatch(BlockT *curBlk) { 741 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 742 typename std::vector<LoopT *> nestedLoops; 743 while (loopRep) { 744 nestedLoops.push_back(loopRep); 745 loopRep = loopRep->getParentLoop(); 746 } 747 748 if (nestedLoops.size() == 0) { 749 return 0; 750 } 751 752 // Process nested loop outside->inside, so "continue" to a outside loop won't 753 // be mistaken as "break" of the current loop. 754 int num = 0; 755 for (typename std::vector<LoopT *>::reverse_iterator 756 iter = nestedLoops.rbegin(), iterEnd = nestedLoops.rend(); 757 iter != iterEnd; ++iter) { 758 loopRep = *iter; 759 760 if (getLoopLandBlock(loopRep) != NULL) { 761 continue; 762 } 763 764 BlockT *loopHeader = loopRep->getHeader(); 765 766 int numBreak = loopbreakPatternMatch(loopRep, loopHeader); 767 768 if (numBreak == -1) { 769 break; 770 } 771 772 int numCont = loopcontPatternMatch(loopRep, loopHeader); 773 num += numBreak + numCont; 774 } 775 776 return num; 777 } //loopendPatternMatch 778 779 template<class PassT> 780 int CFGStructurizer<PassT>::loopPatternMatch(BlockT *curBlk) { 781 if (curBlk->succ_size() != 0) { 782 return 0; 783 } 784 785 int numLoop = 0; 786 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 787 while (loopRep && loopRep->getHeader() == curBlk) { 788 LoopLandInfo *loopLand = getLoopLandInfo(loopRep); 789 if (loopLand) { 790 BlockT *landBlk = loopLand->landBlk; 791 assert(landBlk); 792 if (!isRetiredBlock(landBlk)) { 793 mergeLooplandBlock(curBlk, loopLand); 794 ++numLoop; 795 } 796 } 797 loopRep = loopRep->getParentLoop(); 798 } 799 800 numLoopPatternMatch += numLoop; 801 802 return numLoop; 803 } //loopPatternMatch 804 805 template<class PassT> 806 int CFGStructurizer<PassT>::loopbreakPatternMatch(LoopT *loopRep, 807 BlockT *loopHeader) { 808 BlockTSmallerVector exitingBlks; 809 loopRep->getExitingBlocks(exitingBlks); 810 811 if (DEBUGME) { 812 errs() << "Loop has " << exitingBlks.size() << " exiting blocks\n"; 813 } 814 815 if (exitingBlks.size() == 0) { 816 setLoopLandBlock(loopRep); 817 return 0; 818 } 819 820 // Compute the corresponding exitBlks and exit block set. 821 BlockTSmallerVector exitBlks; 822 std::set<BlockT *> exitBlkSet; 823 for (typename BlockTSmallerVector::const_iterator iter = exitingBlks.begin(), 824 iterEnd = exitingBlks.end(); iter != iterEnd; ++iter) { 825 BlockT *exitingBlk = *iter; 826 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); 827 exitBlks.push_back(exitBlk); 828 exitBlkSet.insert(exitBlk); //non-duplicate insert 829 } 830 831 assert(exitBlkSet.size() > 0); 832 assert(exitBlks.size() == exitingBlks.size()); 833 834 if (DEBUGME) { 835 errs() << "Loop has " << exitBlkSet.size() << " exit blocks\n"; 836 } 837 838 // Find exitLandBlk. 839 BlockT *exitLandBlk = NULL; 840 int numCloned = 0; 841 int numSerial = 0; 842 843 if (exitBlkSet.size() == 1) { 844 exitLandBlk = *exitBlkSet.begin(); 845 } else { 846 exitLandBlk = findNearestCommonPostDom(exitBlkSet); 847 848 if (exitLandBlk == NULL) { 849 return -1; 850 } 851 852 bool allInPath = true; 853 bool allNotInPath = true; 854 for (typename std::set<BlockT*>::const_iterator 855 iter = exitBlkSet.begin(), 856 iterEnd = exitBlkSet.end(); 857 iter != iterEnd; ++iter) { 858 BlockT *exitBlk = *iter; 859 860 PathToKind pathKind = singlePathTo(exitBlk, exitLandBlk, true); 861 if (DEBUGME) { 862 errs() << "BB" << exitBlk->getNumber() 863 << " to BB" << exitLandBlk->getNumber() << " PathToKind=" 864 << pathKind << "\n"; 865 } 866 867 allInPath = allInPath && (pathKind == SinglePath_InPath); 868 allNotInPath = allNotInPath && (pathKind == SinglePath_NotInPath); 869 870 if (!allInPath && !allNotInPath) { 871 if (DEBUGME) { 872 errs() << "singlePath check fail\n"; 873 } 874 return -1; 875 } 876 } // check all exit blocks 877 878 if (allNotInPath) { 879 880 // TODO: Simplify, maybe separate function? 881 LoopT *parentLoopRep = loopRep->getParentLoop(); 882 BlockT *parentLoopHeader = NULL; 883 if (parentLoopRep) 884 parentLoopHeader = parentLoopRep->getHeader(); 885 886 if (exitLandBlk == parentLoopHeader && 887 (exitLandBlk = relocateLoopcontBlock(parentLoopRep, 888 loopRep, 889 exitBlkSet, 890 exitLandBlk)) != NULL) { 891 if (DEBUGME) { 892 errs() << "relocateLoopcontBlock success\n"; 893 } 894 } else if ((exitLandBlk = addLoopEndbranchBlock(loopRep, 895 exitingBlks, 896 exitBlks)) != NULL) { 897 if (DEBUGME) { 898 errs() << "insertEndbranchBlock success\n"; 899 } 900 } else { 901 if (DEBUGME) { 902 errs() << "loop exit fail\n"; 903 } 904 return -1; 905 } 906 } 907 908 // Handle side entry to exit path. 909 exitBlks.clear(); 910 exitBlkSet.clear(); 911 for (typename BlockTSmallerVector::iterator iterExiting = 912 exitingBlks.begin(), 913 iterExitingEnd = exitingBlks.end(); 914 iterExiting != iterExitingEnd; ++iterExiting) { 915 BlockT *exitingBlk = *iterExiting; 916 BlockT *exitBlk = exitingBlock2ExitBlock(loopRep, exitingBlk); 917 BlockT *newExitBlk = exitBlk; 918 919 if (exitBlk != exitLandBlk && exitBlk->pred_size() > 1) { 920 newExitBlk = cloneBlockForPredecessor(exitBlk, exitingBlk); 921 ++numCloned; 922 } 923 924 numCloned += cloneOnSideEntryTo(exitingBlk, newExitBlk, exitLandBlk); 925 926 exitBlks.push_back(newExitBlk); 927 exitBlkSet.insert(newExitBlk); 928 } 929 930 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), 931 iterExitEnd = exitBlks.end(); 932 iterExit != iterExitEnd; ++iterExit) { 933 BlockT *exitBlk = *iterExit; 934 numSerial += serialPatternMatch(exitBlk); 935 } 936 937 for (typename BlockTSmallerVector::iterator iterExit = exitBlks.begin(), 938 iterExitEnd = exitBlks.end(); 939 iterExit != iterExitEnd; ++iterExit) { 940 BlockT *exitBlk = *iterExit; 941 if (exitBlk->pred_size() > 1) { 942 if (exitBlk != exitLandBlk) { 943 return -1; 944 } 945 } else { 946 if (exitBlk != exitLandBlk && 947 (exitBlk->succ_size() != 1 || 948 *exitBlk->succ_begin() != exitLandBlk)) { 949 return -1; 950 } 951 } 952 } 953 } // else 954 955 exitLandBlk = recordLoopLandBlock(loopRep, exitLandBlk, exitBlks, exitBlkSet); 956 957 // Fold break into the breaking block. Leverage across level breaks. 958 assert(exitingBlks.size() == exitBlks.size()); 959 for (typename BlockTSmallerVector::const_iterator iterExit = exitBlks.begin(), 960 iterExiting = exitingBlks.begin(), iterExitEnd = exitBlks.end(); 961 iterExit != iterExitEnd; ++iterExit, ++iterExiting) { 962 BlockT *exitBlk = *iterExit; 963 BlockT *exitingBlk = *iterExiting; 964 assert(exitBlk->pred_size() == 1 || exitBlk == exitLandBlk); 965 LoopT *exitingLoop = loopInfo->getLoopFor(exitingBlk); 966 handleLoopbreak(exitingBlk, exitingLoop, exitBlk, loopRep, exitLandBlk); 967 } 968 969 int numBreak = static_cast<int>(exitingBlks.size()); 970 numLoopbreakPatternMatch += numBreak; 971 numClonedBlock += numCloned; 972 return numBreak + numSerial + numCloned; 973 } //loopbreakPatternMatch 974 975 template<class PassT> 976 int CFGStructurizer<PassT>::loopcontPatternMatch(LoopT *loopRep, 977 BlockT *loopHeader) { 978 int numCont = 0; 979 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> contBlk; 980 for (typename InvBlockGTraits::ChildIteratorType iter = 981 InvBlockGTraits::child_begin(loopHeader), 982 iterEnd = InvBlockGTraits::child_end(loopHeader); 983 iter != iterEnd; ++iter) { 984 BlockT *curBlk = *iter; 985 if (loopRep->contains(curBlk)) { 986 handleLoopcontBlock(curBlk, loopInfo->getLoopFor(curBlk), 987 loopHeader, loopRep); 988 contBlk.push_back(curBlk); 989 ++numCont; 990 } 991 } 992 993 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator 994 iter = contBlk.begin(), iterEnd = contBlk.end(); 995 iter != iterEnd; ++iter) { 996 (*iter)->removeSuccessor(loopHeader); 997 } 998 999 numLoopcontPatternMatch += numCont; 1000 1001 return numCont; 1002 } //loopcontPatternMatch 1003 1004 1005 template<class PassT> 1006 bool CFGStructurizer<PassT>::isSameloopDetachedContbreak(BlockT *src1Blk, 1007 BlockT *src2Blk) { 1008 // return true iff src1Blk->succ_size() == 0 && src1Blk and src2Blk are in the 1009 // same loop with LoopLandInfo without explicitly keeping track of 1010 // loopContBlks and loopBreakBlks, this is a method to get the information. 1011 // 1012 if (src1Blk->succ_size() == 0) { 1013 LoopT *loopRep = loopInfo->getLoopFor(src1Blk); 1014 if (loopRep != NULL && loopRep == loopInfo->getLoopFor(src2Blk)) { 1015 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 1016 if (theEntry != NULL) { 1017 if (DEBUGME) { 1018 errs() << "isLoopContBreakBlock yes src1 = BB" 1019 << src1Blk->getNumber() 1020 << " src2 = BB" << src2Blk->getNumber() << "\n"; 1021 } 1022 return true; 1023 } 1024 } 1025 } 1026 return false; 1027 } //isSameloopDetachedContbreak 1028 1029 template<class PassT> 1030 int CFGStructurizer<PassT>::handleJumpintoIf(BlockT *headBlk, 1031 BlockT *trueBlk, 1032 BlockT *falseBlk) { 1033 int num = handleJumpintoIfImp(headBlk, trueBlk, falseBlk); 1034 if (num == 0) { 1035 if (DEBUGME) { 1036 errs() << "handleJumpintoIf swap trueBlk and FalseBlk" << "\n"; 1037 } 1038 num = handleJumpintoIfImp(headBlk, falseBlk, trueBlk); 1039 } 1040 return num; 1041 } 1042 1043 template<class PassT> 1044 int CFGStructurizer<PassT>::handleJumpintoIfImp(BlockT *headBlk, 1045 BlockT *trueBlk, 1046 BlockT *falseBlk) { 1047 int num = 0; 1048 BlockT *downBlk; 1049 1050 //trueBlk could be the common post dominator 1051 downBlk = trueBlk; 1052 1053 if (DEBUGME) { 1054 errs() << "handleJumpintoIfImp head = BB" << headBlk->getNumber() 1055 << " true = BB" << trueBlk->getNumber() 1056 << ", numSucc=" << trueBlk->succ_size() 1057 << " false = BB" << falseBlk->getNumber() << "\n"; 1058 } 1059 1060 while (downBlk) { 1061 if (DEBUGME) { 1062 errs() << "check down = BB" << downBlk->getNumber(); 1063 } 1064 1065 if (singlePathTo(falseBlk, downBlk) == SinglePath_InPath) { 1066 if (DEBUGME) { 1067 errs() << " working\n"; 1068 } 1069 1070 num += cloneOnSideEntryTo(headBlk, trueBlk, downBlk); 1071 num += cloneOnSideEntryTo(headBlk, falseBlk, downBlk); 1072 1073 numClonedBlock += num; 1074 num += serialPatternMatch(*headBlk->succ_begin()); 1075 num += serialPatternMatch(*(++headBlk->succ_begin())); 1076 num += ifPatternMatch(headBlk); 1077 assert(num > 0); 1078 1079 break; 1080 } 1081 if (DEBUGME) { 1082 errs() << " not working\n"; 1083 } 1084 downBlk = (downBlk->succ_size() == 1) ? (*downBlk->succ_begin()) : NULL; 1085 } // walk down the postDomTree 1086 1087 return num; 1088 } //handleJumpintoIf 1089 1090 template<class PassT> 1091 void CFGStructurizer<PassT>::showImproveSimpleJumpintoIf(BlockT *headBlk, 1092 BlockT *trueBlk, 1093 BlockT *falseBlk, 1094 BlockT *landBlk, 1095 bool detail) { 1096 errs() << "head = BB" << headBlk->getNumber() 1097 << " size = " << headBlk->size(); 1098 if (detail) { 1099 errs() << "\n"; 1100 headBlk->print(errs()); 1101 errs() << "\n"; 1102 } 1103 1104 if (trueBlk) { 1105 errs() << ", true = BB" << trueBlk->getNumber() << " size = " 1106 << trueBlk->size() << " numPred = " << trueBlk->pred_size(); 1107 if (detail) { 1108 errs() << "\n"; 1109 trueBlk->print(errs()); 1110 errs() << "\n"; 1111 } 1112 } 1113 if (falseBlk) { 1114 errs() << ", false = BB" << falseBlk->getNumber() << " size = " 1115 << falseBlk->size() << " numPred = " << falseBlk->pred_size(); 1116 if (detail) { 1117 errs() << "\n"; 1118 falseBlk->print(errs()); 1119 errs() << "\n"; 1120 } 1121 } 1122 if (landBlk) { 1123 errs() << ", land = BB" << landBlk->getNumber() << " size = " 1124 << landBlk->size() << " numPred = " << landBlk->pred_size(); 1125 if (detail) { 1126 errs() << "\n"; 1127 landBlk->print(errs()); 1128 errs() << "\n"; 1129 } 1130 } 1131 1132 errs() << "\n"; 1133 } //showImproveSimpleJumpintoIf 1134 1135 template<class PassT> 1136 int CFGStructurizer<PassT>::improveSimpleJumpintoIf(BlockT *headBlk, 1137 BlockT *trueBlk, 1138 BlockT *falseBlk, 1139 BlockT **plandBlk) { 1140 bool migrateTrue = false; 1141 bool migrateFalse = false; 1142 1143 BlockT *landBlk = *plandBlk; 1144 1145 assert((trueBlk == NULL || trueBlk->succ_size() <= 1) 1146 && (falseBlk == NULL || falseBlk->succ_size() <= 1)); 1147 1148 if (trueBlk == falseBlk) { 1149 return 0; 1150 } 1151 1152 migrateTrue = needMigrateBlock(trueBlk); 1153 migrateFalse = needMigrateBlock(falseBlk); 1154 1155 if (!migrateTrue && !migrateFalse) { 1156 return 0; 1157 } 1158 1159 // If we need to migrate either trueBlk and falseBlk, migrate the rest that 1160 // have more than one predecessors. without doing this, its predecessor 1161 // rather than headBlk will have undefined value in initReg. 1162 if (!migrateTrue && trueBlk && trueBlk->pred_size() > 1) { 1163 migrateTrue = true; 1164 } 1165 if (!migrateFalse && falseBlk && falseBlk->pred_size() > 1) { 1166 migrateFalse = true; 1167 } 1168 1169 if (DEBUGME) { 1170 errs() << "before improveSimpleJumpintoIf: "; 1171 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1172 } 1173 1174 // org: headBlk => if () {trueBlk} else {falseBlk} => landBlk 1175 // 1176 // new: headBlk => if () {initReg = 1; org trueBlk branch} else 1177 // {initReg = 0; org falseBlk branch } 1178 // => landBlk => if (initReg) {org trueBlk} else {org falseBlk} 1179 // => org landBlk 1180 // if landBlk->pred_size() > 2, put the about if-else inside 1181 // if (initReg !=2) {...} 1182 // 1183 // add initReg = initVal to headBlk 1184 1185 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1186 unsigned initReg = 1187 funcRep->getRegInfo().createVirtualRegister(I32RC); 1188 if (!migrateTrue || !migrateFalse) { 1189 int initVal = migrateTrue ? 0 : 1; 1190 CFGTraits::insertAssignInstrBefore(headBlk, passRep, initReg, initVal); 1191 } 1192 1193 int numNewBlk = 0; 1194 1195 if (landBlk == NULL) { 1196 landBlk = funcRep->CreateMachineBasicBlock(); 1197 funcRep->push_back(landBlk); //insert to function 1198 1199 if (trueBlk) { 1200 trueBlk->addSuccessor(landBlk); 1201 } else { 1202 headBlk->addSuccessor(landBlk); 1203 } 1204 1205 if (falseBlk) { 1206 falseBlk->addSuccessor(landBlk); 1207 } else { 1208 headBlk->addSuccessor(landBlk); 1209 } 1210 1211 numNewBlk ++; 1212 } 1213 1214 bool landBlkHasOtherPred = (landBlk->pred_size() > 2); 1215 1216 //insert AMDGPU::ENDIF to avoid special case "input landBlk == NULL" 1217 typename BlockT::iterator insertPos = 1218 CFGTraits::getInstrPos 1219 (landBlk, CFGTraits::insertInstrBefore(landBlk, AMDGPU::ENDIF, passRep)); 1220 1221 if (landBlkHasOtherPred) { 1222 unsigned immReg = 1223 funcRep->getRegInfo().createVirtualRegister(I32RC); 1224 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 2); 1225 unsigned cmpResReg = 1226 funcRep->getRegInfo().createVirtualRegister(I32RC); 1227 1228 CFGTraits::insertCompareInstrBefore(landBlk, insertPos, passRep, cmpResReg, 1229 initReg, immReg); 1230 CFGTraits::insertCondBranchBefore(landBlk, insertPos, 1231 AMDGPU::IF_PREDICATE_SET, passRep, 1232 cmpResReg, DebugLoc()); 1233 } 1234 1235 CFGTraits::insertCondBranchBefore(landBlk, insertPos, AMDGPU::IF_PREDICATE_SET, 1236 passRep, initReg, DebugLoc()); 1237 1238 if (migrateTrue) { 1239 migrateInstruction(trueBlk, landBlk, insertPos); 1240 // need to uncondionally insert the assignment to ensure a path from its 1241 // predecessor rather than headBlk has valid value in initReg if 1242 // (initVal != 1). 1243 CFGTraits::insertAssignInstrBefore(trueBlk, passRep, initReg, 1); 1244 } 1245 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ELSE, passRep); 1246 1247 if (migrateFalse) { 1248 migrateInstruction(falseBlk, landBlk, insertPos); 1249 // need to uncondionally insert the assignment to ensure a path from its 1250 // predecessor rather than headBlk has valid value in initReg if 1251 // (initVal != 0) 1252 CFGTraits::insertAssignInstrBefore(falseBlk, passRep, initReg, 0); 1253 } 1254 1255 if (landBlkHasOtherPred) { 1256 // add endif 1257 CFGTraits::insertInstrBefore(insertPos, AMDGPU::ENDIF, passRep); 1258 1259 // put initReg = 2 to other predecessors of landBlk 1260 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), 1261 predIterEnd = landBlk->pred_end(); predIter != predIterEnd; 1262 ++predIter) { 1263 BlockT *curBlk = *predIter; 1264 if (curBlk != trueBlk && curBlk != falseBlk) { 1265 CFGTraits::insertAssignInstrBefore(curBlk, passRep, initReg, 2); 1266 } 1267 } //for 1268 } 1269 if (DEBUGME) { 1270 errs() << "result from improveSimpleJumpintoIf: "; 1271 showImproveSimpleJumpintoIf(headBlk, trueBlk, falseBlk, landBlk, 0); 1272 } 1273 1274 // update landBlk 1275 *plandBlk = landBlk; 1276 1277 return numNewBlk; 1278 } //improveSimpleJumpintoIf 1279 1280 template<class PassT> 1281 void CFGStructurizer<PassT>::handleLoopbreak(BlockT *exitingBlk, 1282 LoopT *exitingLoop, 1283 BlockT *exitBlk, 1284 LoopT *exitLoop, 1285 BlockT *landBlk) { 1286 if (DEBUGME) { 1287 errs() << "Trying to break loop-depth = " << getLoopDepth(exitLoop) 1288 << " from loop-depth = " << getLoopDepth(exitingLoop) << "\n"; 1289 } 1290 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1291 1292 RegiT initReg = INVALIDREGNUM; 1293 if (exitingLoop != exitLoop) { 1294 initReg = static_cast<int> 1295 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1296 assert(initReg != INVALIDREGNUM); 1297 addLoopBreakInitReg(exitLoop, initReg); 1298 while (exitingLoop != exitLoop && exitingLoop) { 1299 addLoopBreakOnReg(exitingLoop, initReg); 1300 exitingLoop = exitingLoop->getParentLoop(); 1301 } 1302 assert(exitingLoop == exitLoop); 1303 } 1304 1305 mergeLoopbreakBlock(exitingBlk, exitBlk, landBlk, initReg); 1306 1307 } //handleLoopbreak 1308 1309 template<class PassT> 1310 void CFGStructurizer<PassT>::handleLoopcontBlock(BlockT *contingBlk, 1311 LoopT *contingLoop, 1312 BlockT *contBlk, 1313 LoopT *contLoop) { 1314 if (DEBUGME) { 1315 errs() << "loopcontPattern cont = BB" << contingBlk->getNumber() 1316 << " header = BB" << contBlk->getNumber() << "\n"; 1317 1318 errs() << "Trying to continue loop-depth = " 1319 << getLoopDepth(contLoop) 1320 << " from loop-depth = " << getLoopDepth(contingLoop) << "\n"; 1321 } 1322 1323 RegiT initReg = INVALIDREGNUM; 1324 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1325 if (contingLoop != contLoop) { 1326 initReg = static_cast<int> 1327 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1328 assert(initReg != INVALIDREGNUM); 1329 addLoopContInitReg(contLoop, initReg); 1330 while (contingLoop && contingLoop->getParentLoop() != contLoop) { 1331 addLoopBreakOnReg(contingLoop, initReg); //not addLoopContOnReg 1332 contingLoop = contingLoop->getParentLoop(); 1333 } 1334 assert(contingLoop && contingLoop->getParentLoop() == contLoop); 1335 addLoopContOnReg(contingLoop, initReg); 1336 } 1337 1338 settleLoopcontBlock(contingBlk, contBlk, initReg); 1339 } //handleLoopcontBlock 1340 1341 template<class PassT> 1342 void CFGStructurizer<PassT>::mergeSerialBlock(BlockT *dstBlk, BlockT *srcBlk) { 1343 if (DEBUGME) { 1344 errs() << "serialPattern BB" << dstBlk->getNumber() 1345 << " <= BB" << srcBlk->getNumber() << "\n"; 1346 } 1347 dstBlk->splice(dstBlk->end(), srcBlk, srcBlk->begin(), srcBlk->end()); 1348 1349 dstBlk->removeSuccessor(srcBlk); 1350 CFGTraits::cloneSuccessorList(dstBlk, srcBlk); 1351 1352 removeSuccessor(srcBlk); 1353 retireBlock(dstBlk, srcBlk); 1354 } //mergeSerialBlock 1355 1356 template<class PassT> 1357 void CFGStructurizer<PassT>::mergeIfthenelseBlock(InstrT *branchInstr, 1358 BlockT *curBlk, 1359 BlockT *trueBlk, 1360 BlockT *falseBlk, 1361 BlockT *landBlk) { 1362 if (DEBUGME) { 1363 errs() << "ifPattern BB" << curBlk->getNumber(); 1364 errs() << "{ "; 1365 if (trueBlk) { 1366 errs() << "BB" << trueBlk->getNumber(); 1367 } 1368 errs() << " } else "; 1369 errs() << "{ "; 1370 if (falseBlk) { 1371 errs() << "BB" << falseBlk->getNumber(); 1372 } 1373 errs() << " }\n "; 1374 errs() << "landBlock: "; 1375 if (landBlk == NULL) { 1376 errs() << "NULL"; 1377 } else { 1378 errs() << "BB" << landBlk->getNumber(); 1379 } 1380 errs() << "\n"; 1381 } 1382 1383 int oldOpcode = branchInstr->getOpcode(); 1384 DebugLoc branchDL = branchInstr->getDebugLoc(); 1385 1386 // transform to 1387 // if cond 1388 // trueBlk 1389 // else 1390 // falseBlk 1391 // endif 1392 // landBlk 1393 1394 typename BlockT::iterator branchInstrPos = 1395 CFGTraits::getInstrPos(curBlk, branchInstr); 1396 CFGTraits::insertCondBranchBefore(branchInstrPos, 1397 CFGTraits::getBranchNzeroOpcode(oldOpcode), 1398 passRep, 1399 branchDL); 1400 1401 if (trueBlk) { 1402 curBlk->splice(branchInstrPos, trueBlk, trueBlk->begin(), trueBlk->end()); 1403 curBlk->removeSuccessor(trueBlk); 1404 if (landBlk && trueBlk->succ_size()!=0) { 1405 trueBlk->removeSuccessor(landBlk); 1406 } 1407 retireBlock(curBlk, trueBlk); 1408 } 1409 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ELSE, passRep); 1410 1411 if (falseBlk) { 1412 curBlk->splice(branchInstrPos, falseBlk, falseBlk->begin(), 1413 falseBlk->end()); 1414 curBlk->removeSuccessor(falseBlk); 1415 if (landBlk && falseBlk->succ_size() != 0) { 1416 falseBlk->removeSuccessor(landBlk); 1417 } 1418 retireBlock(curBlk, falseBlk); 1419 } 1420 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::ENDIF, passRep); 1421 1422 branchInstr->eraseFromParent(); 1423 1424 if (landBlk && trueBlk && falseBlk) { 1425 curBlk->addSuccessor(landBlk); 1426 } 1427 1428 } //mergeIfthenelseBlock 1429 1430 template<class PassT> 1431 void CFGStructurizer<PassT>::mergeLooplandBlock(BlockT *dstBlk, 1432 LoopLandInfo *loopLand) { 1433 BlockT *landBlk = loopLand->landBlk; 1434 1435 if (DEBUGME) { 1436 errs() << "loopPattern header = BB" << dstBlk->getNumber() 1437 << " land = BB" << landBlk->getNumber() << "\n"; 1438 } 1439 1440 // Loop contInitRegs are init at the beginning of the loop. 1441 for (typename std::set<RegiT>::const_iterator iter = 1442 loopLand->contInitRegs.begin(), 1443 iterEnd = loopLand->contInitRegs.end(); iter != iterEnd; ++iter) { 1444 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1445 } 1446 1447 /* we last inserterd the DebugLoc in the 1448 * BREAK_LOGICALZ_i32 or AMDGPU::BREAK_LOGICALNZ statement in the current dstBlk. 1449 * search for the DebugLoc in the that statement. 1450 * if not found, we have to insert the empty/default DebugLoc */ 1451 InstrT *loopBreakInstr = CFGTraits::getLoopBreakInstr(dstBlk); 1452 DebugLoc DLBreak = (loopBreakInstr) ? loopBreakInstr->getDebugLoc() : DebugLoc(); 1453 1454 CFGTraits::insertInstrBefore(dstBlk, AMDGPU::WHILELOOP, passRep, DLBreak); 1455 // Loop breakInitRegs are init before entering the loop. 1456 for (typename std::set<RegiT>::const_iterator iter = 1457 loopLand->breakInitRegs.begin(), 1458 iterEnd = loopLand->breakInitRegs.end(); iter != iterEnd; ++iter) { 1459 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1460 } 1461 // Loop endbranchInitRegs are init before entering the loop. 1462 for (typename std::set<RegiT>::const_iterator iter = 1463 loopLand->endbranchInitRegs.begin(), 1464 iterEnd = loopLand->endbranchInitRegs.end(); iter != iterEnd; ++iter) { 1465 CFGTraits::insertAssignInstrBefore(dstBlk, passRep, *iter, 0); 1466 } 1467 1468 /* we last inserterd the DebugLoc in the continue statement in the current dstBlk 1469 * search for the DebugLoc in the continue statement. 1470 * if not found, we have to insert the empty/default DebugLoc */ 1471 InstrT *continueInstr = CFGTraits::getContinueInstr(dstBlk); 1472 DebugLoc DLContinue = (continueInstr) ? continueInstr->getDebugLoc() : DebugLoc(); 1473 1474 CFGTraits::insertInstrEnd(dstBlk, AMDGPU::ENDLOOP, passRep, DLContinue); 1475 // Loop breakOnRegs are check after the ENDLOOP: break the loop outside this 1476 // loop. 1477 for (typename std::set<RegiT>::const_iterator iter = 1478 loopLand->breakOnRegs.begin(), 1479 iterEnd = loopLand->breakOnRegs.end(); iter != iterEnd; ++iter) { 1480 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::PREDICATED_BREAK, passRep, 1481 *iter); 1482 } 1483 1484 // Loop contOnRegs are check after the ENDLOOP: cont the loop outside this 1485 // loop. 1486 for (std::set<RegiT>::const_iterator iter = loopLand->contOnRegs.begin(), 1487 iterEnd = loopLand->contOnRegs.end(); iter != iterEnd; ++iter) { 1488 CFGTraits::insertCondBranchEnd(dstBlk, AMDGPU::CONTINUE_LOGICALNZ_i32, 1489 passRep, *iter); 1490 } 1491 1492 dstBlk->splice(dstBlk->end(), landBlk, landBlk->begin(), landBlk->end()); 1493 1494 for (typename BlockT::succ_iterator iter = landBlk->succ_begin(), 1495 iterEnd = landBlk->succ_end(); iter != iterEnd; ++iter) { 1496 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of. 1497 } 1498 1499 removeSuccessor(landBlk); 1500 retireBlock(dstBlk, landBlk); 1501 } //mergeLooplandBlock 1502 1503 template<class PassT> 1504 void CFGStructurizer<PassT>::reversePredicateSetter(typename BlockT::iterator I) { 1505 while (I--) { 1506 if (I->getOpcode() == AMDGPU::PRED_X) { 1507 switch (static_cast<MachineInstr *>(I)->getOperand(2).getImm()) { 1508 case OPCODE_IS_ZERO_INT: 1509 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO_INT); 1510 return; 1511 case OPCODE_IS_NOT_ZERO_INT: 1512 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO_INT); 1513 return; 1514 case OPCODE_IS_ZERO: 1515 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_NOT_ZERO); 1516 return; 1517 case OPCODE_IS_NOT_ZERO: 1518 static_cast<MachineInstr *>(I)->getOperand(2).setImm(OPCODE_IS_ZERO); 1519 return; 1520 default: 1521 assert(0 && "PRED_X Opcode invalid!"); 1522 } 1523 } 1524 } 1525 } 1526 1527 template<class PassT> 1528 void CFGStructurizer<PassT>::mergeLoopbreakBlock(BlockT *exitingBlk, 1529 BlockT *exitBlk, 1530 BlockT *exitLandBlk, 1531 RegiT setReg) { 1532 if (DEBUGME) { 1533 errs() << "loopbreakPattern exiting = BB" << exitingBlk->getNumber() 1534 << " exit = BB" << exitBlk->getNumber() 1535 << " land = BB" << exitLandBlk->getNumber() << "\n"; 1536 } 1537 1538 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(exitingBlk); 1539 assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); 1540 1541 DebugLoc DL = branchInstr->getDebugLoc(); 1542 1543 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); 1544 1545 // transform exitingBlk to 1546 // if ( ) { 1547 // exitBlk (if exitBlk != exitLandBlk) 1548 // setReg = 1 1549 // break 1550 // }endif 1551 // successor = {orgSuccessor(exitingBlk) - exitBlk} 1552 1553 typename BlockT::iterator branchInstrPos = 1554 CFGTraits::getInstrPos(exitingBlk, branchInstr); 1555 1556 if (exitBlk == exitLandBlk && setReg == INVALIDREGNUM) { 1557 //break_logical 1558 1559 if (trueBranch != exitBlk) { 1560 reversePredicateSetter(branchInstrPos); 1561 } 1562 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); 1563 } else { 1564 if (trueBranch != exitBlk) { 1565 reversePredicateSetter(branchInstr); 1566 } 1567 CFGTraits::insertCondBranchBefore(branchInstrPos, AMDGPU::PREDICATED_BREAK, passRep, DL); 1568 if (exitBlk != exitLandBlk) { 1569 //splice is insert-before ... 1570 exitingBlk->splice(branchInstrPos, exitBlk, exitBlk->begin(), 1571 exitBlk->end()); 1572 } 1573 if (setReg != INVALIDREGNUM) { 1574 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); 1575 } 1576 CFGTraits::insertInstrBefore(branchInstrPos, AMDGPU::BREAK, passRep); 1577 } //if_logical 1578 1579 //now branchInst can be erase safely 1580 branchInstr->eraseFromParent(); 1581 1582 //now take care of successors, retire blocks 1583 exitingBlk->removeSuccessor(exitBlk); 1584 if (exitBlk != exitLandBlk) { 1585 //splice is insert-before ... 1586 exitBlk->removeSuccessor(exitLandBlk); 1587 retireBlock(exitingBlk, exitBlk); 1588 } 1589 1590 } //mergeLoopbreakBlock 1591 1592 template<class PassT> 1593 void CFGStructurizer<PassT>::settleLoopcontBlock(BlockT *contingBlk, 1594 BlockT *contBlk, 1595 RegiT setReg) { 1596 if (DEBUGME) { 1597 errs() << "settleLoopcontBlock conting = BB" 1598 << contingBlk->getNumber() 1599 << ", cont = BB" << contBlk->getNumber() << "\n"; 1600 } 1601 1602 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(contingBlk); 1603 if (branchInstr) { 1604 assert(CFGTraits::isCondBranch(branchInstr)); 1605 typename BlockT::iterator branchInstrPos = 1606 CFGTraits::getInstrPos(contingBlk, branchInstr); 1607 BlockT *trueBranch = CFGTraits::getTrueBranch(branchInstr); 1608 int oldOpcode = branchInstr->getOpcode(); 1609 DebugLoc DL = branchInstr->getDebugLoc(); 1610 1611 // transform contingBlk to 1612 // if () { 1613 // move instr after branchInstr 1614 // continue 1615 // or 1616 // setReg = 1 1617 // break 1618 // }endif 1619 // successor = {orgSuccessor(contingBlk) - loopHeader} 1620 1621 bool useContinueLogical = 1622 (setReg == INVALIDREGNUM && (&*contingBlk->rbegin()) == branchInstr); 1623 1624 if (useContinueLogical == false) { 1625 int branchOpcode = 1626 trueBranch == contBlk ? CFGTraits::getBranchNzeroOpcode(oldOpcode) 1627 : CFGTraits::getBranchZeroOpcode(oldOpcode); 1628 1629 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); 1630 1631 if (setReg != INVALIDREGNUM) { 1632 CFGTraits::insertAssignInstrBefore(branchInstrPos, passRep, setReg, 1); 1633 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1634 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, DL); 1635 } else { 1636 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1637 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, DL); 1638 } 1639 1640 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::ENDIF, passRep, DL); 1641 } else { 1642 int branchOpcode = 1643 trueBranch == contBlk ? CFGTraits::getContinueNzeroOpcode(oldOpcode) 1644 : CFGTraits::getContinueZeroOpcode(oldOpcode); 1645 1646 CFGTraits::insertCondBranchBefore(branchInstrPos, branchOpcode, passRep, DL); 1647 } 1648 1649 branchInstr->eraseFromParent(); 1650 } else { 1651 // if we've arrived here then we've already erased the branch instruction 1652 // travel back up the basic block to see the last reference of our debug location 1653 // we've just inserted that reference here so it should be representative 1654 if (setReg != INVALIDREGNUM) { 1655 CFGTraits::insertAssignInstrBefore(contingBlk, passRep, setReg, 1); 1656 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1657 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::BREAK, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); 1658 } else { 1659 // insertEnd to ensure phi-moves, if exist, go before the continue-instr. 1660 CFGTraits::insertInstrEnd(contingBlk, AMDGPU::CONTINUE, passRep, CFGTraits::getLastDebugLocInBB(contingBlk)); 1661 } 1662 } //else 1663 1664 } //settleLoopcontBlock 1665 1666 // BBs in exitBlkSet are determined as in break-path for loopRep, 1667 // before we can put code for BBs as inside loop-body for loopRep 1668 // check whether those BBs are determined as cont-BB for parentLoopRep 1669 // earlier. 1670 // If so, generate a new BB newBlk 1671 // (1) set newBlk common successor of BBs in exitBlkSet 1672 // (2) change the continue-instr in BBs in exitBlkSet to break-instr 1673 // (3) generate continue-instr in newBlk 1674 // 1675 template<class PassT> 1676 typename CFGStructurizer<PassT>::BlockT * 1677 CFGStructurizer<PassT>::relocateLoopcontBlock(LoopT *parentLoopRep, 1678 LoopT *loopRep, 1679 std::set<BlockT *> &exitBlkSet, 1680 BlockT *exitLandBlk) { 1681 std::set<BlockT *> endBlkSet; 1682 1683 1684 1685 for (typename std::set<BlockT *>::const_iterator iter = exitBlkSet.begin(), 1686 iterEnd = exitBlkSet.end(); 1687 iter != iterEnd; ++iter) { 1688 BlockT *exitBlk = *iter; 1689 BlockT *endBlk = singlePathEnd(exitBlk, exitLandBlk); 1690 1691 if (endBlk == NULL || CFGTraits::getContinueInstr(endBlk) == NULL) 1692 return NULL; 1693 1694 endBlkSet.insert(endBlk); 1695 } 1696 1697 BlockT *newBlk = funcRep->CreateMachineBasicBlock(); 1698 funcRep->push_back(newBlk); //insert to function 1699 CFGTraits::insertInstrEnd(newBlk, AMDGPU::CONTINUE, passRep); 1700 SHOWNEWBLK(newBlk, "New continue block: "); 1701 1702 for (typename std::set<BlockT*>::const_iterator iter = endBlkSet.begin(), 1703 iterEnd = endBlkSet.end(); 1704 iter != iterEnd; ++iter) { 1705 BlockT *endBlk = *iter; 1706 InstrT *contInstr = CFGTraits::getContinueInstr(endBlk); 1707 if (contInstr) { 1708 contInstr->eraseFromParent(); 1709 } 1710 endBlk->addSuccessor(newBlk); 1711 if (DEBUGME) { 1712 errs() << "Add new continue Block to BB" 1713 << endBlk->getNumber() << " successors\n"; 1714 } 1715 } 1716 1717 return newBlk; 1718 } //relocateLoopcontBlock 1719 1720 1721 // LoopEndbranchBlock is a BB created by the CFGStructurizer to use as 1722 // LoopLandBlock. This BB branch on the loop endBranchInit register to the 1723 // pathes corresponding to the loop exiting branches. 1724 1725 template<class PassT> 1726 typename CFGStructurizer<PassT>::BlockT * 1727 CFGStructurizer<PassT>::addLoopEndbranchBlock(LoopT *loopRep, 1728 BlockTSmallerVector &exitingBlks, 1729 BlockTSmallerVector &exitBlks) { 1730 const AMDGPUInstrInfo *tii = 1731 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 1732 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 1733 1734 RegiT endBranchReg = static_cast<int> 1735 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1736 assert(endBranchReg >= 0); 1737 1738 // reg = 0 before entering the loop 1739 addLoopEndbranchInitReg(loopRep, endBranchReg); 1740 1741 uint32_t numBlks = static_cast<uint32_t>(exitingBlks.size()); 1742 assert(numBlks >=2 && numBlks == exitBlks.size()); 1743 1744 BlockT *preExitingBlk = exitingBlks[0]; 1745 BlockT *preExitBlk = exitBlks[0]; 1746 BlockT *preBranchBlk = funcRep->CreateMachineBasicBlock(); 1747 funcRep->push_back(preBranchBlk); //insert to function 1748 SHOWNEWBLK(preBranchBlk, "New loopEndbranch block: "); 1749 1750 BlockT *newLandBlk = preBranchBlk; 1751 1752 CFGTraits::replaceInstrUseOfBlockWith(preExitingBlk, preExitBlk, 1753 newLandBlk); 1754 preExitingBlk->removeSuccessor(preExitBlk); 1755 preExitingBlk->addSuccessor(newLandBlk); 1756 1757 //it is redundant to add reg = 0 to exitingBlks[0] 1758 1759 // For 1..n th exiting path (the last iteration handles two pathes) create the 1760 // branch to the previous path and the current path. 1761 for (uint32_t i = 1; i < numBlks; ++i) { 1762 BlockT *curExitingBlk = exitingBlks[i]; 1763 BlockT *curExitBlk = exitBlks[i]; 1764 BlockT *curBranchBlk; 1765 1766 if (i == numBlks - 1) { 1767 curBranchBlk = curExitBlk; 1768 } else { 1769 curBranchBlk = funcRep->CreateMachineBasicBlock(); 1770 funcRep->push_back(curBranchBlk); //insert to function 1771 SHOWNEWBLK(curBranchBlk, "New loopEndbranch block: "); 1772 } 1773 1774 // Add reg = i to exitingBlks[i]. 1775 CFGTraits::insertAssignInstrBefore(curExitingBlk, passRep, 1776 endBranchReg, i); 1777 1778 // Remove the edge (exitingBlks[i] exitBlks[i]) add new edge 1779 // (exitingBlks[i], newLandBlk). 1780 CFGTraits::replaceInstrUseOfBlockWith(curExitingBlk, curExitBlk, 1781 newLandBlk); 1782 curExitingBlk->removeSuccessor(curExitBlk); 1783 curExitingBlk->addSuccessor(newLandBlk); 1784 1785 // add to preBranchBlk the branch instruction: 1786 // if (endBranchReg == preVal) 1787 // preExitBlk 1788 // else 1789 // curBranchBlk 1790 // 1791 // preValReg = i - 1 1792 1793 DebugLoc DL; 1794 RegiT preValReg = static_cast<int> 1795 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1796 1797 preBranchBlk->insert(preBranchBlk->begin(), 1798 tii->getMovImmInstr(preBranchBlk->getParent(), preValReg, 1799 i - 1)); 1800 1801 // condResReg = (endBranchReg == preValReg) 1802 RegiT condResReg = static_cast<int> 1803 (funcRep->getRegInfo().createVirtualRegister(I32RC)); 1804 BuildMI(preBranchBlk, DL, tii->get(tii->getIEQOpcode()), condResReg) 1805 .addReg(endBranchReg).addReg(preValReg); 1806 1807 BuildMI(preBranchBlk, DL, tii->get(AMDGPU::BRANCH_COND_i32)) 1808 .addMBB(preExitBlk).addReg(condResReg); 1809 1810 preBranchBlk->addSuccessor(preExitBlk); 1811 preBranchBlk->addSuccessor(curBranchBlk); 1812 1813 // Update preExitingBlk, preExitBlk, preBranchBlk. 1814 preExitingBlk = curExitingBlk; 1815 preExitBlk = curExitBlk; 1816 preBranchBlk = curBranchBlk; 1817 1818 } //end for 1 .. n blocks 1819 1820 return newLandBlk; 1821 } //addLoopEndbranchBlock 1822 1823 template<class PassT> 1824 typename CFGStructurizer<PassT>::PathToKind 1825 CFGStructurizer<PassT>::singlePathTo(BlockT *srcBlk, BlockT *dstBlk, 1826 bool allowSideEntry) { 1827 assert(dstBlk); 1828 1829 if (srcBlk == dstBlk) { 1830 return SinglePath_InPath; 1831 } 1832 1833 while (srcBlk && srcBlk->succ_size() == 1) { 1834 srcBlk = *srcBlk->succ_begin(); 1835 if (srcBlk == dstBlk) { 1836 return SinglePath_InPath; 1837 } 1838 1839 if (!allowSideEntry && srcBlk->pred_size() > 1) { 1840 return Not_SinglePath; 1841 } 1842 } 1843 1844 if (srcBlk && srcBlk->succ_size()==0) { 1845 return SinglePath_NotInPath; 1846 } 1847 1848 return Not_SinglePath; 1849 } //singlePathTo 1850 1851 // If there is a single path from srcBlk to dstBlk, return the last block before 1852 // dstBlk If there is a single path from srcBlk->end without dstBlk, return the 1853 // last block in the path Otherwise, return NULL 1854 template<class PassT> 1855 typename CFGStructurizer<PassT>::BlockT * 1856 CFGStructurizer<PassT>::singlePathEnd(BlockT *srcBlk, BlockT *dstBlk, 1857 bool allowSideEntry) { 1858 assert(dstBlk); 1859 1860 if (srcBlk == dstBlk) { 1861 return srcBlk; 1862 } 1863 1864 if (srcBlk->succ_size() == 0) { 1865 return srcBlk; 1866 } 1867 1868 while (srcBlk && srcBlk->succ_size() == 1) { 1869 BlockT *preBlk = srcBlk; 1870 1871 srcBlk = *srcBlk->succ_begin(); 1872 if (srcBlk == NULL) { 1873 return preBlk; 1874 } 1875 1876 if (!allowSideEntry && srcBlk->pred_size() > 1) { 1877 return NULL; 1878 } 1879 } 1880 1881 if (srcBlk && srcBlk->succ_size()==0) { 1882 return srcBlk; 1883 } 1884 1885 return NULL; 1886 1887 } //singlePathEnd 1888 1889 template<class PassT> 1890 int CFGStructurizer<PassT>::cloneOnSideEntryTo(BlockT *preBlk, BlockT *srcBlk, 1891 BlockT *dstBlk) { 1892 int cloned = 0; 1893 assert(preBlk->isSuccessor(srcBlk)); 1894 while (srcBlk && srcBlk != dstBlk) { 1895 assert(srcBlk->succ_size() == 1); 1896 if (srcBlk->pred_size() > 1) { 1897 srcBlk = cloneBlockForPredecessor(srcBlk, preBlk); 1898 ++cloned; 1899 } 1900 1901 preBlk = srcBlk; 1902 srcBlk = *srcBlk->succ_begin(); 1903 } 1904 1905 return cloned; 1906 } //cloneOnSideEntryTo 1907 1908 template<class PassT> 1909 typename CFGStructurizer<PassT>::BlockT * 1910 CFGStructurizer<PassT>::cloneBlockForPredecessor(BlockT *curBlk, 1911 BlockT *predBlk) { 1912 assert(predBlk->isSuccessor(curBlk) && 1913 "succBlk is not a prececessor of curBlk"); 1914 1915 BlockT *cloneBlk = CFGTraits::clone(curBlk); //clone instructions 1916 CFGTraits::replaceInstrUseOfBlockWith(predBlk, curBlk, cloneBlk); 1917 //srcBlk, oldBlk, newBlk 1918 1919 predBlk->removeSuccessor(curBlk); 1920 predBlk->addSuccessor(cloneBlk); 1921 1922 // add all successor to cloneBlk 1923 CFGTraits::cloneSuccessorList(cloneBlk, curBlk); 1924 1925 numClonedInstr += curBlk->size(); 1926 1927 if (DEBUGME) { 1928 errs() << "Cloned block: " << "BB" 1929 << curBlk->getNumber() << "size " << curBlk->size() << "\n"; 1930 } 1931 1932 SHOWNEWBLK(cloneBlk, "result of Cloned block: "); 1933 1934 return cloneBlk; 1935 } //cloneBlockForPredecessor 1936 1937 template<class PassT> 1938 typename CFGStructurizer<PassT>::BlockT * 1939 CFGStructurizer<PassT>::exitingBlock2ExitBlock(LoopT *loopRep, 1940 BlockT *exitingBlk) { 1941 BlockT *exitBlk = NULL; 1942 1943 for (typename BlockT::succ_iterator iterSucc = exitingBlk->succ_begin(), 1944 iterSuccEnd = exitingBlk->succ_end(); 1945 iterSucc != iterSuccEnd; ++iterSucc) { 1946 BlockT *curBlk = *iterSucc; 1947 if (!loopRep->contains(curBlk)) { 1948 assert(exitBlk == NULL); 1949 exitBlk = curBlk; 1950 } 1951 } 1952 1953 assert(exitBlk != NULL); 1954 1955 return exitBlk; 1956 } //exitingBlock2ExitBlock 1957 1958 template<class PassT> 1959 void CFGStructurizer<PassT>::migrateInstruction(BlockT *srcBlk, 1960 BlockT *dstBlk, 1961 InstrIterator insertPos) { 1962 InstrIterator spliceEnd; 1963 //look for the input branchinstr, not the AMDGPU branchinstr 1964 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); 1965 if (branchInstr == NULL) { 1966 if (DEBUGME) { 1967 errs() << "migrateInstruction don't see branch instr\n" ; 1968 } 1969 spliceEnd = srcBlk->end(); 1970 } else { 1971 if (DEBUGME) { 1972 errs() << "migrateInstruction see branch instr\n" ; 1973 branchInstr->dump(); 1974 } 1975 spliceEnd = CFGTraits::getInstrPos(srcBlk, branchInstr); 1976 } 1977 if (DEBUGME) { 1978 errs() << "migrateInstruction before splice dstSize = " << dstBlk->size() 1979 << "srcSize = " << srcBlk->size() << "\n"; 1980 } 1981 1982 //splice insert before insertPos 1983 dstBlk->splice(insertPos, srcBlk, srcBlk->begin(), spliceEnd); 1984 1985 if (DEBUGME) { 1986 errs() << "migrateInstruction after splice dstSize = " << dstBlk->size() 1987 << "srcSize = " << srcBlk->size() << "\n"; 1988 } 1989 } //migrateInstruction 1990 1991 // normalizeInfiniteLoopExit change 1992 // B1: 1993 // uncond_br LoopHeader 1994 // 1995 // to 1996 // B1: 1997 // cond_br 1 LoopHeader dummyExit 1998 // and return the newly added dummy exit block 1999 // 2000 template<class PassT> 2001 typename CFGStructurizer<PassT>::BlockT * 2002 CFGStructurizer<PassT>::normalizeInfiniteLoopExit(LoopT* LoopRep) { 2003 BlockT *loopHeader; 2004 BlockT *loopLatch; 2005 loopHeader = LoopRep->getHeader(); 2006 loopLatch = LoopRep->getLoopLatch(); 2007 BlockT *dummyExitBlk = NULL; 2008 const TargetRegisterClass * I32RC = TRI->getCFGStructurizerRegClass(MVT::i32); 2009 if (loopHeader!=NULL && loopLatch!=NULL) { 2010 InstrT *branchInstr = CFGTraits::getLoopendBlockBranchInstr(loopLatch); 2011 if (branchInstr!=NULL && CFGTraits::isUncondBranch(branchInstr)) { 2012 dummyExitBlk = funcRep->CreateMachineBasicBlock(); 2013 funcRep->push_back(dummyExitBlk); //insert to function 2014 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock to normalize infiniteLoop: "); 2015 2016 if (DEBUGME) errs() << "Old branch instr: " << *branchInstr << "\n"; 2017 2018 typename BlockT::iterator insertPos = 2019 CFGTraits::getInstrPos(loopLatch, branchInstr); 2020 unsigned immReg = 2021 funcRep->getRegInfo().createVirtualRegister(I32RC); 2022 CFGTraits::insertAssignInstrBefore(insertPos, passRep, immReg, 1); 2023 InstrT *newInstr = 2024 CFGTraits::insertInstrBefore(insertPos, AMDGPU::BRANCH_COND_i32, passRep); 2025 MachineInstrBuilder MIB(*funcRep, newInstr); 2026 MIB.addMBB(loopHeader); 2027 MIB.addReg(immReg, false); 2028 2029 SHOWNEWINSTR(newInstr); 2030 2031 branchInstr->eraseFromParent(); 2032 loopLatch->addSuccessor(dummyExitBlk); 2033 } 2034 } 2035 2036 return dummyExitBlk; 2037 } //normalizeInfiniteLoopExit 2038 2039 template<class PassT> 2040 void CFGStructurizer<PassT>::removeUnconditionalBranch(BlockT *srcBlk) { 2041 InstrT *branchInstr; 2042 2043 // I saw two unconditional branch in one basic block in example 2044 // test_fc_do_while_or.c need to fix the upstream on this to remove the loop. 2045 while ((branchInstr = CFGTraits::getLoopendBlockBranchInstr(srcBlk)) 2046 && CFGTraits::isUncondBranch(branchInstr)) { 2047 if (DEBUGME) { 2048 errs() << "Removing unconditional branch instruction" ; 2049 branchInstr->dump(); 2050 } 2051 branchInstr->eraseFromParent(); 2052 } 2053 } //removeUnconditionalBranch 2054 2055 template<class PassT> 2056 void CFGStructurizer<PassT>::removeRedundantConditionalBranch(BlockT *srcBlk) { 2057 if (srcBlk->succ_size() == 2) { 2058 BlockT *blk1 = *srcBlk->succ_begin(); 2059 BlockT *blk2 = *(++srcBlk->succ_begin()); 2060 2061 if (blk1 == blk2) { 2062 InstrT *branchInstr = CFGTraits::getNormalBlockBranchInstr(srcBlk); 2063 assert(branchInstr && CFGTraits::isCondBranch(branchInstr)); 2064 if (DEBUGME) { 2065 errs() << "Removing unneeded conditional branch instruction" ; 2066 branchInstr->dump(); 2067 } 2068 branchInstr->eraseFromParent(); 2069 SHOWNEWBLK(blk1, "Removing redundant successor"); 2070 srcBlk->removeSuccessor(blk1); 2071 } 2072 } 2073 } //removeRedundantConditionalBranch 2074 2075 template<class PassT> 2076 void CFGStructurizer<PassT>::addDummyExitBlock(SmallVector<BlockT*, 2077 DEFAULT_VEC_SLOTS> &retBlks) { 2078 BlockT *dummyExitBlk = funcRep->CreateMachineBasicBlock(); 2079 funcRep->push_back(dummyExitBlk); //insert to function 2080 CFGTraits::insertInstrEnd(dummyExitBlk, AMDGPU::RETURN, passRep); 2081 2082 for (typename SmallVector<BlockT *, DEFAULT_VEC_SLOTS>::iterator iter = 2083 retBlks.begin(), 2084 iterEnd = retBlks.end(); iter != iterEnd; ++iter) { 2085 BlockT *curBlk = *iter; 2086 InstrT *curInstr = CFGTraits::getReturnInstr(curBlk); 2087 if (curInstr) { 2088 curInstr->eraseFromParent(); 2089 } 2090 curBlk->addSuccessor(dummyExitBlk); 2091 if (DEBUGME) { 2092 errs() << "Add dummyExitBlock to BB" << curBlk->getNumber() 2093 << " successors\n"; 2094 } 2095 } //for 2096 2097 SHOWNEWBLK(dummyExitBlk, "DummyExitBlock: "); 2098 } //addDummyExitBlock 2099 2100 template<class PassT> 2101 void CFGStructurizer<PassT>::removeSuccessor(BlockT *srcBlk) { 2102 while (srcBlk->succ_size()) { 2103 srcBlk->removeSuccessor(*srcBlk->succ_begin()); 2104 } 2105 } 2106 2107 template<class PassT> 2108 void CFGStructurizer<PassT>::recordSccnum(BlockT *srcBlk, int sccNum) { 2109 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; 2110 2111 if (srcBlkInfo == NULL) { 2112 srcBlkInfo = new BlockInfo(); 2113 } 2114 2115 srcBlkInfo->sccNum = sccNum; 2116 } 2117 2118 template<class PassT> 2119 int CFGStructurizer<PassT>::getSCCNum(BlockT *srcBlk) { 2120 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; 2121 return srcBlkInfo ? srcBlkInfo->sccNum : INVALIDSCCNUM; 2122 } 2123 2124 template<class PassT> 2125 void CFGStructurizer<PassT>::retireBlock(BlockT *dstBlk, BlockT *srcBlk) { 2126 if (DEBUGME) { 2127 errs() << "Retiring BB" << srcBlk->getNumber() << "\n"; 2128 } 2129 2130 BlockInfo *&srcBlkInfo = blockInfoMap[srcBlk]; 2131 2132 if (srcBlkInfo == NULL) { 2133 srcBlkInfo = new BlockInfo(); 2134 } 2135 2136 srcBlkInfo->isRetired = true; 2137 assert(srcBlk->succ_size() == 0 && srcBlk->pred_size() == 0 2138 && "can't retire block yet"); 2139 } 2140 2141 template<class PassT> 2142 bool CFGStructurizer<PassT>::isRetiredBlock(BlockT *srcBlk) { 2143 BlockInfo *srcBlkInfo = blockInfoMap[srcBlk]; 2144 return (srcBlkInfo && srcBlkInfo->isRetired); 2145 } 2146 2147 template<class PassT> 2148 bool CFGStructurizer<PassT>::isActiveLoophead(BlockT *curBlk) { 2149 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 2150 while (loopRep && loopRep->getHeader() == curBlk) { 2151 LoopLandInfo *loopLand = getLoopLandInfo(loopRep); 2152 2153 if(loopLand == NULL) 2154 return true; 2155 2156 BlockT *landBlk = loopLand->landBlk; 2157 assert(landBlk); 2158 if (!isRetiredBlock(landBlk)) { 2159 return true; 2160 } 2161 2162 loopRep = loopRep->getParentLoop(); 2163 } 2164 2165 return false; 2166 } //isActiveLoophead 2167 2168 template<class PassT> 2169 bool CFGStructurizer<PassT>::needMigrateBlock(BlockT *blk) { 2170 const unsigned blockSizeThreshold = 30; 2171 const unsigned cloneInstrThreshold = 100; 2172 2173 bool multiplePreds = blk && (blk->pred_size() > 1); 2174 2175 if(!multiplePreds) 2176 return false; 2177 2178 unsigned blkSize = blk->size(); 2179 return ((blkSize > blockSizeThreshold) 2180 && (blkSize * (blk->pred_size() - 1) > cloneInstrThreshold)); 2181 } //needMigrateBlock 2182 2183 template<class PassT> 2184 typename CFGStructurizer<PassT>::BlockT * 2185 CFGStructurizer<PassT>::recordLoopLandBlock(LoopT *loopRep, BlockT *landBlk, 2186 BlockTSmallerVector &exitBlks, 2187 std::set<BlockT *> &exitBlkSet) { 2188 SmallVector<BlockT *, DEFAULT_VEC_SLOTS> inpathBlks; //in exit path blocks 2189 2190 for (typename BlockT::pred_iterator predIter = landBlk->pred_begin(), 2191 predIterEnd = landBlk->pred_end(); 2192 predIter != predIterEnd; ++predIter) { 2193 BlockT *curBlk = *predIter; 2194 if (loopRep->contains(curBlk) || exitBlkSet.count(curBlk)) { 2195 inpathBlks.push_back(curBlk); 2196 } 2197 } //for 2198 2199 //if landBlk has predecessors that are not in the given loop, 2200 //create a new block 2201 BlockT *newLandBlk = landBlk; 2202 if (inpathBlks.size() != landBlk->pred_size()) { 2203 newLandBlk = funcRep->CreateMachineBasicBlock(); 2204 funcRep->push_back(newLandBlk); //insert to function 2205 newLandBlk->addSuccessor(landBlk); 2206 for (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::iterator iter = 2207 inpathBlks.begin(), 2208 iterEnd = inpathBlks.end(); iter != iterEnd; ++iter) { 2209 BlockT *curBlk = *iter; 2210 CFGTraits::replaceInstrUseOfBlockWith(curBlk, landBlk, newLandBlk); 2211 //srcBlk, oldBlk, newBlk 2212 curBlk->removeSuccessor(landBlk); 2213 curBlk->addSuccessor(newLandBlk); 2214 } 2215 for (size_t i = 0, tot = exitBlks.size(); i < tot; ++i) { 2216 if (exitBlks[i] == landBlk) { 2217 exitBlks[i] = newLandBlk; 2218 } 2219 } 2220 SHOWNEWBLK(newLandBlk, "NewLandingBlock: "); 2221 } 2222 2223 setLoopLandBlock(loopRep, newLandBlk); 2224 2225 return newLandBlk; 2226 } // recordLoopbreakLand 2227 2228 template<class PassT> 2229 void CFGStructurizer<PassT>::setLoopLandBlock(LoopT *loopRep, BlockT *blk) { 2230 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2231 2232 if (theEntry == NULL) { 2233 theEntry = new LoopLandInfo(); 2234 } 2235 assert(theEntry->landBlk == NULL); 2236 2237 if (blk == NULL) { 2238 blk = funcRep->CreateMachineBasicBlock(); 2239 funcRep->push_back(blk); //insert to function 2240 SHOWNEWBLK(blk, "DummyLandingBlock for loop without break: "); 2241 } 2242 2243 theEntry->landBlk = blk; 2244 2245 if (DEBUGME) { 2246 errs() << "setLoopLandBlock loop-header = BB" 2247 << loopRep->getHeader()->getNumber() 2248 << " landing-block = BB" << blk->getNumber() << "\n"; 2249 } 2250 } // setLoopLandBlock 2251 2252 template<class PassT> 2253 void CFGStructurizer<PassT>::addLoopBreakOnReg(LoopT *loopRep, RegiT regNum) { 2254 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2255 2256 if (theEntry == NULL) { 2257 theEntry = new LoopLandInfo(); 2258 } 2259 2260 theEntry->breakOnRegs.insert(regNum); 2261 2262 if (DEBUGME) { 2263 errs() << "addLoopBreakOnReg loop-header = BB" 2264 << loopRep->getHeader()->getNumber() 2265 << " regNum = " << regNum << "\n"; 2266 } 2267 } // addLoopBreakOnReg 2268 2269 template<class PassT> 2270 void CFGStructurizer<PassT>::addLoopContOnReg(LoopT *loopRep, RegiT regNum) { 2271 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2272 2273 if (theEntry == NULL) { 2274 theEntry = new LoopLandInfo(); 2275 } 2276 theEntry->contOnRegs.insert(regNum); 2277 2278 if (DEBUGME) { 2279 errs() << "addLoopContOnReg loop-header = BB" 2280 << loopRep->getHeader()->getNumber() 2281 << " regNum = " << regNum << "\n"; 2282 } 2283 } // addLoopContOnReg 2284 2285 template<class PassT> 2286 void CFGStructurizer<PassT>::addLoopBreakInitReg(LoopT *loopRep, RegiT regNum) { 2287 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2288 2289 if (theEntry == NULL) { 2290 theEntry = new LoopLandInfo(); 2291 } 2292 theEntry->breakInitRegs.insert(regNum); 2293 2294 if (DEBUGME) { 2295 errs() << "addLoopBreakInitReg loop-header = BB" 2296 << loopRep->getHeader()->getNumber() 2297 << " regNum = " << regNum << "\n"; 2298 } 2299 } // addLoopBreakInitReg 2300 2301 template<class PassT> 2302 void CFGStructurizer<PassT>::addLoopContInitReg(LoopT *loopRep, RegiT regNum) { 2303 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2304 2305 if (theEntry == NULL) { 2306 theEntry = new LoopLandInfo(); 2307 } 2308 theEntry->contInitRegs.insert(regNum); 2309 2310 if (DEBUGME) { 2311 errs() << "addLoopContInitReg loop-header = BB" 2312 << loopRep->getHeader()->getNumber() 2313 << " regNum = " << regNum << "\n"; 2314 } 2315 } // addLoopContInitReg 2316 2317 template<class PassT> 2318 void CFGStructurizer<PassT>::addLoopEndbranchInitReg(LoopT *loopRep, 2319 RegiT regNum) { 2320 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2321 2322 if (theEntry == NULL) { 2323 theEntry = new LoopLandInfo(); 2324 } 2325 theEntry->endbranchInitRegs.insert(regNum); 2326 2327 if (DEBUGME) { 2328 errs() << "addLoopEndbranchInitReg loop-header = BB" 2329 << loopRep->getHeader()->getNumber() 2330 << " regNum = " << regNum << "\n"; 2331 } 2332 } // addLoopEndbranchInitReg 2333 2334 template<class PassT> 2335 typename CFGStructurizer<PassT>::LoopLandInfo * 2336 CFGStructurizer<PassT>::getLoopLandInfo(LoopT *loopRep) { 2337 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2338 2339 return theEntry; 2340 } // getLoopLandInfo 2341 2342 template<class PassT> 2343 typename CFGStructurizer<PassT>::BlockT * 2344 CFGStructurizer<PassT>::getLoopLandBlock(LoopT *loopRep) { 2345 LoopLandInfo *&theEntry = loopLandInfoMap[loopRep]; 2346 2347 return theEntry ? theEntry->landBlk : NULL; 2348 } // getLoopLandBlock 2349 2350 2351 template<class PassT> 2352 bool CFGStructurizer<PassT>::hasBackEdge(BlockT *curBlk) { 2353 LoopT *loopRep = loopInfo->getLoopFor(curBlk); 2354 if (loopRep == NULL) 2355 return false; 2356 2357 BlockT *loopHeader = loopRep->getHeader(); 2358 2359 return curBlk->isSuccessor(loopHeader); 2360 2361 } //hasBackEdge 2362 2363 template<class PassT> 2364 unsigned CFGStructurizer<PassT>::getLoopDepth(LoopT *loopRep) { 2365 return loopRep ? loopRep->getLoopDepth() : 0; 2366 } //getLoopDepth 2367 2368 template<class PassT> 2369 int CFGStructurizer<PassT>::countActiveBlock 2370 (typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterStart, 2371 typename SmallVector<BlockT*, DEFAULT_VEC_SLOTS>::const_iterator iterEnd) { 2372 int count = 0; 2373 while (iterStart != iterEnd) { 2374 if (!isRetiredBlock(*iterStart)) { 2375 ++count; 2376 } 2377 ++iterStart; 2378 } 2379 2380 return count; 2381 } //countActiveBlock 2382 2383 // This is work around solution for findNearestCommonDominator not avaiable to 2384 // post dom a proper fix should go to Dominators.h. 2385 2386 template<class PassT> 2387 typename CFGStructurizer<PassT>::BlockT* 2388 CFGStructurizer<PassT>::findNearestCommonPostDom(BlockT *blk1, BlockT *blk2) { 2389 2390 if (postDomTree->dominates(blk1, blk2)) { 2391 return blk1; 2392 } 2393 if (postDomTree->dominates(blk2, blk1)) { 2394 return blk2; 2395 } 2396 2397 DomTreeNodeT *node1 = postDomTree->getNode(blk1); 2398 DomTreeNodeT *node2 = postDomTree->getNode(blk2); 2399 2400 // Handle newly cloned node. 2401 if (node1 == NULL && blk1->succ_size() == 1) { 2402 return findNearestCommonPostDom(*blk1->succ_begin(), blk2); 2403 } 2404 if (node2 == NULL && blk2->succ_size() == 1) { 2405 return findNearestCommonPostDom(blk1, *blk2->succ_begin()); 2406 } 2407 2408 if (node1 == NULL || node2 == NULL) { 2409 return NULL; 2410 } 2411 2412 node1 = node1->getIDom(); 2413 while (node1) { 2414 if (postDomTree->dominates(node1, node2)) { 2415 return node1->getBlock(); 2416 } 2417 node1 = node1->getIDom(); 2418 } 2419 2420 return NULL; 2421 } 2422 2423 template<class PassT> 2424 typename CFGStructurizer<PassT>::BlockT * 2425 CFGStructurizer<PassT>::findNearestCommonPostDom 2426 (typename std::set<BlockT *> &blks) { 2427 BlockT *commonDom; 2428 typename std::set<BlockT *>::const_iterator iter = blks.begin(); 2429 typename std::set<BlockT *>::const_iterator iterEnd = blks.end(); 2430 for (commonDom = *iter; iter != iterEnd && commonDom != NULL; ++iter) { 2431 BlockT *curBlk = *iter; 2432 if (curBlk != commonDom) { 2433 commonDom = findNearestCommonPostDom(curBlk, commonDom); 2434 } 2435 } 2436 2437 if (DEBUGME) { 2438 errs() << "Common post dominator for exit blocks is "; 2439 if (commonDom) { 2440 errs() << "BB" << commonDom->getNumber() << "\n"; 2441 } else { 2442 errs() << "NULL\n"; 2443 } 2444 } 2445 2446 return commonDom; 2447 } //findNearestCommonPostDom 2448 2449 } //end namespace llvm 2450 2451 //todo: move-end 2452 2453 2454 //===----------------------------------------------------------------------===// 2455 // 2456 // CFGStructurizer for AMDGPU 2457 // 2458 //===----------------------------------------------------------------------===// 2459 2460 2461 using namespace llvmCFGStruct; 2462 2463 namespace llvm { 2464 class AMDGPUCFGStructurizer : public MachineFunctionPass { 2465 public: 2466 typedef MachineInstr InstructionType; 2467 typedef MachineFunction FunctionType; 2468 typedef MachineBasicBlock BlockType; 2469 typedef MachineLoopInfo LoopinfoType; 2470 typedef MachineDominatorTree DominatortreeType; 2471 typedef MachinePostDominatorTree PostDominatortreeType; 2472 typedef MachineDomTreeNode DomTreeNodeType; 2473 typedef MachineLoop LoopType; 2474 2475 protected: 2476 TargetMachine &TM; 2477 const TargetInstrInfo *TII; 2478 const AMDGPURegisterInfo *TRI; 2479 2480 public: 2481 AMDGPUCFGStructurizer(char &pid, TargetMachine &tm); 2482 const TargetInstrInfo *getTargetInstrInfo() const; 2483 2484 private: 2485 2486 }; 2487 2488 } //end of namespace llvm 2489 AMDGPUCFGStructurizer::AMDGPUCFGStructurizer(char &pid, TargetMachine &tm) 2490 : MachineFunctionPass(pid), TM(tm), TII(tm.getInstrInfo()), 2491 TRI(static_cast<const AMDGPURegisterInfo *>(tm.getRegisterInfo())) { 2492 } 2493 2494 const TargetInstrInfo *AMDGPUCFGStructurizer::getTargetInstrInfo() const { 2495 return TII; 2496 } 2497 //===----------------------------------------------------------------------===// 2498 // 2499 // CFGPrepare 2500 // 2501 //===----------------------------------------------------------------------===// 2502 2503 2504 using namespace llvmCFGStruct; 2505 2506 namespace llvm { 2507 class AMDGPUCFGPrepare : public AMDGPUCFGStructurizer { 2508 public: 2509 static char ID; 2510 2511 public: 2512 AMDGPUCFGPrepare(TargetMachine &tm); 2513 2514 virtual const char *getPassName() const; 2515 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 2516 2517 bool runOnMachineFunction(MachineFunction &F); 2518 2519 private: 2520 2521 }; 2522 2523 char AMDGPUCFGPrepare::ID = 0; 2524 } //end of namespace llvm 2525 2526 AMDGPUCFGPrepare::AMDGPUCFGPrepare(TargetMachine &tm) 2527 : AMDGPUCFGStructurizer(ID, tm ) { 2528 } 2529 const char *AMDGPUCFGPrepare::getPassName() const { 2530 return "AMD IL Control Flow Graph Preparation Pass"; 2531 } 2532 2533 void AMDGPUCFGPrepare::getAnalysisUsage(AnalysisUsage &AU) const { 2534 AU.addPreserved<MachineFunctionAnalysis>(); 2535 AU.addRequired<MachineFunctionAnalysis>(); 2536 AU.addRequired<MachineDominatorTree>(); 2537 AU.addRequired<MachinePostDominatorTree>(); 2538 AU.addRequired<MachineLoopInfo>(); 2539 } 2540 2541 //===----------------------------------------------------------------------===// 2542 // 2543 // CFGPerform 2544 // 2545 //===----------------------------------------------------------------------===// 2546 2547 2548 using namespace llvmCFGStruct; 2549 2550 namespace llvm { 2551 class AMDGPUCFGPerform : public AMDGPUCFGStructurizer { 2552 public: 2553 static char ID; 2554 2555 public: 2556 AMDGPUCFGPerform(TargetMachine &tm); 2557 virtual const char *getPassName() const; 2558 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 2559 bool runOnMachineFunction(MachineFunction &F); 2560 2561 private: 2562 2563 }; 2564 2565 char AMDGPUCFGPerform::ID = 0; 2566 } //end of namespace llvm 2567 2568 AMDGPUCFGPerform::AMDGPUCFGPerform(TargetMachine &tm) 2569 : AMDGPUCFGStructurizer(ID, tm) { 2570 } 2571 2572 const char *AMDGPUCFGPerform::getPassName() const { 2573 return "AMD IL Control Flow Graph structurizer Pass"; 2574 } 2575 2576 void AMDGPUCFGPerform::getAnalysisUsage(AnalysisUsage &AU) const { 2577 AU.addPreserved<MachineFunctionAnalysis>(); 2578 AU.addRequired<MachineFunctionAnalysis>(); 2579 AU.addRequired<MachineDominatorTree>(); 2580 AU.addRequired<MachinePostDominatorTree>(); 2581 AU.addRequired<MachineLoopInfo>(); 2582 } 2583 2584 //===----------------------------------------------------------------------===// 2585 // 2586 // CFGStructTraits<AMDGPUCFGStructurizer> 2587 // 2588 //===----------------------------------------------------------------------===// 2589 2590 namespace llvmCFGStruct { 2591 // this class is tailor to the AMDGPU backend 2592 template<> 2593 struct CFGStructTraits<AMDGPUCFGStructurizer> { 2594 typedef int RegiT; 2595 2596 static int getBranchNzeroOpcode(int oldOpcode) { 2597 switch(oldOpcode) { 2598 case AMDGPU::JUMP_COND: 2599 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 2600 case AMDGPU::BRANCH_COND_i32: 2601 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALNZ_f32; 2602 default: 2603 assert(0 && "internal error"); 2604 } 2605 return -1; 2606 } 2607 2608 static int getBranchZeroOpcode(int oldOpcode) { 2609 switch(oldOpcode) { 2610 case AMDGPU::JUMP_COND: 2611 case AMDGPU::JUMP: return AMDGPU::IF_PREDICATE_SET; 2612 case AMDGPU::BRANCH_COND_i32: 2613 case AMDGPU::BRANCH_COND_f32: return AMDGPU::IF_LOGICALZ_f32; 2614 default: 2615 assert(0 && "internal error"); 2616 } 2617 return -1; 2618 } 2619 2620 static int getContinueNzeroOpcode(int oldOpcode) { 2621 switch(oldOpcode) { 2622 case AMDGPU::JUMP_COND: 2623 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALNZ_i32; 2624 default: 2625 assert(0 && "internal error"); 2626 }; 2627 return -1; 2628 } 2629 2630 static int getContinueZeroOpcode(int oldOpcode) { 2631 switch(oldOpcode) { 2632 case AMDGPU::JUMP_COND: 2633 case AMDGPU::JUMP: return AMDGPU::CONTINUE_LOGICALZ_i32; 2634 default: 2635 assert(0 && "internal error"); 2636 } 2637 return -1; 2638 } 2639 2640 static MachineBasicBlock *getTrueBranch(MachineInstr *instr) { 2641 return instr->getOperand(0).getMBB(); 2642 } 2643 2644 static void setTrueBranch(MachineInstr *instr, MachineBasicBlock *blk) { 2645 instr->getOperand(0).setMBB(blk); 2646 } 2647 2648 static MachineBasicBlock * 2649 getFalseBranch(MachineBasicBlock *blk, MachineInstr *instr) { 2650 assert(blk->succ_size() == 2); 2651 MachineBasicBlock *trueBranch = getTrueBranch(instr); 2652 MachineBasicBlock::succ_iterator iter = blk->succ_begin(); 2653 MachineBasicBlock::succ_iterator iterNext = iter; 2654 ++iterNext; 2655 2656 return (*iter == trueBranch) ? *iterNext : *iter; 2657 } 2658 2659 static bool isCondBranch(MachineInstr *instr) { 2660 switch (instr->getOpcode()) { 2661 case AMDGPU::JUMP_COND: 2662 case AMDGPU::BRANCH_COND_i32: 2663 case AMDGPU::BRANCH_COND_f32: 2664 break; 2665 default: 2666 return false; 2667 } 2668 return true; 2669 } 2670 2671 static bool isUncondBranch(MachineInstr *instr) { 2672 switch (instr->getOpcode()) { 2673 case AMDGPU::JUMP: 2674 case AMDGPU::BRANCH: 2675 return true; 2676 default: 2677 return false; 2678 } 2679 return true; 2680 } 2681 2682 static DebugLoc getLastDebugLocInBB(MachineBasicBlock *blk) { 2683 //get DebugLoc from the first MachineBasicBlock instruction with debug info 2684 DebugLoc DL; 2685 for (MachineBasicBlock::iterator iter = blk->begin(); iter != blk->end(); ++iter) { 2686 MachineInstr *instr = &(*iter); 2687 if (instr->getDebugLoc().isUnknown() == false) { 2688 DL = instr->getDebugLoc(); 2689 } 2690 } 2691 return DL; 2692 } 2693 2694 static MachineInstr *getNormalBlockBranchInstr(MachineBasicBlock *blk) { 2695 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2696 MachineInstr *instr = &*iter; 2697 if (instr && (isCondBranch(instr) || isUncondBranch(instr))) { 2698 return instr; 2699 } 2700 return NULL; 2701 } 2702 2703 // The correct naming for this is getPossibleLoopendBlockBranchInstr. 2704 // 2705 // BB with backward-edge could have move instructions after the branch 2706 // instruction. Such move instruction "belong to" the loop backward-edge. 2707 // 2708 static MachineInstr *getLoopendBlockBranchInstr(MachineBasicBlock *blk) { 2709 const AMDGPUInstrInfo * TII = static_cast<const AMDGPUInstrInfo *>( 2710 blk->getParent()->getTarget().getInstrInfo()); 2711 2712 for (MachineBasicBlock::reverse_iterator iter = blk->rbegin(), 2713 iterEnd = blk->rend(); iter != iterEnd; ++iter) { 2714 // FIXME: Simplify 2715 MachineInstr *instr = &*iter; 2716 if (instr) { 2717 if (isCondBranch(instr) || isUncondBranch(instr)) { 2718 return instr; 2719 } else if (!TII->isMov(instr->getOpcode())) { 2720 break; 2721 } 2722 } 2723 } 2724 return NULL; 2725 } 2726 2727 static MachineInstr *getReturnInstr(MachineBasicBlock *blk) { 2728 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2729 if (iter != blk->rend()) { 2730 MachineInstr *instr = &(*iter); 2731 if (instr->getOpcode() == AMDGPU::RETURN) { 2732 return instr; 2733 } 2734 } 2735 return NULL; 2736 } 2737 2738 static MachineInstr *getContinueInstr(MachineBasicBlock *blk) { 2739 MachineBasicBlock::reverse_iterator iter = blk->rbegin(); 2740 if (iter != blk->rend()) { 2741 MachineInstr *instr = &(*iter); 2742 if (instr->getOpcode() == AMDGPU::CONTINUE) { 2743 return instr; 2744 } 2745 } 2746 return NULL; 2747 } 2748 2749 static MachineInstr *getLoopBreakInstr(MachineBasicBlock *blk) { 2750 for (MachineBasicBlock::iterator iter = blk->begin(); (iter != blk->end()); ++iter) { 2751 MachineInstr *instr = &(*iter); 2752 if (instr->getOpcode() == AMDGPU::PREDICATED_BREAK) { 2753 return instr; 2754 } 2755 } 2756 return NULL; 2757 } 2758 2759 static bool isReturnBlock(MachineBasicBlock *blk) { 2760 MachineInstr *instr = getReturnInstr(blk); 2761 bool isReturn = (blk->succ_size() == 0); 2762 if (instr) { 2763 assert(isReturn); 2764 } else if (isReturn) { 2765 if (DEBUGME) { 2766 errs() << "BB" << blk->getNumber() 2767 <<" is return block without RETURN instr\n"; 2768 } 2769 } 2770 2771 return isReturn; 2772 } 2773 2774 static MachineBasicBlock::iterator 2775 getInstrPos(MachineBasicBlock *blk, MachineInstr *instr) { 2776 assert(instr->getParent() == blk && "instruction doesn't belong to block"); 2777 MachineBasicBlock::iterator iter = blk->begin(); 2778 MachineBasicBlock::iterator iterEnd = blk->end(); 2779 while (&(*iter) != instr && iter != iterEnd) { 2780 ++iter; 2781 } 2782 2783 assert(iter != iterEnd); 2784 return iter; 2785 }//getInstrPos 2786 2787 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, 2788 AMDGPUCFGStructurizer *passRep) { 2789 return insertInstrBefore(blk,newOpcode,passRep,DebugLoc()); 2790 } //insertInstrBefore 2791 2792 static MachineInstr *insertInstrBefore(MachineBasicBlock *blk, int newOpcode, 2793 AMDGPUCFGStructurizer *passRep, DebugLoc DL) { 2794 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 2795 MachineInstr *newInstr = 2796 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), DL); 2797 2798 MachineBasicBlock::iterator res; 2799 if (blk->begin() != blk->end()) { 2800 blk->insert(blk->begin(), newInstr); 2801 } else { 2802 blk->push_back(newInstr); 2803 } 2804 2805 SHOWNEWINSTR(newInstr); 2806 2807 return newInstr; 2808 } //insertInstrBefore 2809 2810 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, 2811 AMDGPUCFGStructurizer *passRep) { 2812 insertInstrEnd(blk,newOpcode,passRep,DebugLoc()); 2813 } //insertInstrEnd 2814 2815 static void insertInstrEnd(MachineBasicBlock *blk, int newOpcode, 2816 AMDGPUCFGStructurizer *passRep, DebugLoc DL) { 2817 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 2818 MachineInstr *newInstr = blk->getParent() 2819 ->CreateMachineInstr(tii->get(newOpcode), DL); 2820 2821 blk->push_back(newInstr); 2822 //assume the instruction doesn't take any reg operand ... 2823 2824 SHOWNEWINSTR(newInstr); 2825 } //insertInstrEnd 2826 2827 static MachineInstr *insertInstrBefore(MachineBasicBlock::iterator instrPos, 2828 int newOpcode, 2829 AMDGPUCFGStructurizer *passRep) { 2830 MachineInstr *oldInstr = &(*instrPos); 2831 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 2832 MachineBasicBlock *blk = oldInstr->getParent(); 2833 MachineInstr *newInstr = 2834 blk->getParent()->CreateMachineInstr(tii->get(newOpcode), 2835 DebugLoc()); 2836 2837 blk->insert(instrPos, newInstr); 2838 //assume the instruction doesn't take any reg operand ... 2839 2840 SHOWNEWINSTR(newInstr); 2841 return newInstr; 2842 } //insertInstrBefore 2843 2844 static void insertCondBranchBefore(MachineBasicBlock::iterator instrPos, 2845 int newOpcode, 2846 AMDGPUCFGStructurizer *passRep, 2847 DebugLoc DL) { 2848 MachineInstr *oldInstr = &(*instrPos); 2849 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 2850 MachineBasicBlock *blk = oldInstr->getParent(); 2851 MachineFunction *MF = blk->getParent(); 2852 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); 2853 2854 blk->insert(instrPos, newInstr); 2855 MachineInstrBuilder MIB(*MF, newInstr); 2856 MIB.addReg(oldInstr->getOperand(1).getReg(), false); 2857 2858 SHOWNEWINSTR(newInstr); 2859 //erase later oldInstr->eraseFromParent(); 2860 } //insertCondBranchBefore 2861 2862 static void insertCondBranchBefore(MachineBasicBlock *blk, 2863 MachineBasicBlock::iterator insertPos, 2864 int newOpcode, 2865 AMDGPUCFGStructurizer *passRep, 2866 RegiT regNum, 2867 DebugLoc DL) { 2868 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 2869 MachineFunction *MF = blk->getParent(); 2870 2871 MachineInstr *newInstr = MF->CreateMachineInstr(tii->get(newOpcode), DL); 2872 2873 //insert before 2874 blk->insert(insertPos, newInstr); 2875 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); 2876 2877 SHOWNEWINSTR(newInstr); 2878 } //insertCondBranchBefore 2879 2880 static void insertCondBranchEnd(MachineBasicBlock *blk, 2881 int newOpcode, 2882 AMDGPUCFGStructurizer *passRep, 2883 RegiT regNum) { 2884 const TargetInstrInfo *tii = passRep->getTargetInstrInfo(); 2885 MachineFunction *MF = blk->getParent(); 2886 MachineInstr *newInstr = 2887 MF->CreateMachineInstr(tii->get(newOpcode), DebugLoc()); 2888 2889 blk->push_back(newInstr); 2890 MachineInstrBuilder(*MF, newInstr).addReg(regNum, false); 2891 2892 SHOWNEWINSTR(newInstr); 2893 } //insertCondBranchEnd 2894 2895 2896 static void insertAssignInstrBefore(MachineBasicBlock::iterator instrPos, 2897 AMDGPUCFGStructurizer *passRep, 2898 RegiT regNum, int regVal) { 2899 MachineInstr *oldInstr = &(*instrPos); 2900 const AMDGPUInstrInfo *tii = 2901 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 2902 MachineBasicBlock *blk = oldInstr->getParent(); 2903 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, 2904 regVal); 2905 blk->insert(instrPos, newInstr); 2906 2907 SHOWNEWINSTR(newInstr); 2908 } //insertAssignInstrBefore 2909 2910 static void insertAssignInstrBefore(MachineBasicBlock *blk, 2911 AMDGPUCFGStructurizer *passRep, 2912 RegiT regNum, int regVal) { 2913 const AMDGPUInstrInfo *tii = 2914 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 2915 2916 MachineInstr *newInstr = tii->getMovImmInstr(blk->getParent(), regNum, 2917 regVal); 2918 if (blk->begin() != blk->end()) { 2919 blk->insert(blk->begin(), newInstr); 2920 } else { 2921 blk->push_back(newInstr); 2922 } 2923 2924 SHOWNEWINSTR(newInstr); 2925 2926 } //insertInstrBefore 2927 2928 static void insertCompareInstrBefore(MachineBasicBlock *blk, 2929 MachineBasicBlock::iterator instrPos, 2930 AMDGPUCFGStructurizer *passRep, 2931 RegiT dstReg, RegiT src1Reg, 2932 RegiT src2Reg) { 2933 const AMDGPUInstrInfo *tii = 2934 static_cast<const AMDGPUInstrInfo *>(passRep->getTargetInstrInfo()); 2935 MachineFunction *MF = blk->getParent(); 2936 MachineInstr *newInstr = 2937 MF->CreateMachineInstr(tii->get(tii->getIEQOpcode()), DebugLoc()); 2938 2939 MachineInstrBuilder MIB(*MF, newInstr); 2940 MIB.addReg(dstReg, RegState::Define); //set target 2941 MIB.addReg(src1Reg); //set src value 2942 MIB.addReg(src2Reg); //set src value 2943 2944 blk->insert(instrPos, newInstr); 2945 SHOWNEWINSTR(newInstr); 2946 2947 } //insertCompareInstrBefore 2948 2949 static void cloneSuccessorList(MachineBasicBlock *dstBlk, 2950 MachineBasicBlock *srcBlk) { 2951 for (MachineBasicBlock::succ_iterator iter = srcBlk->succ_begin(), 2952 iterEnd = srcBlk->succ_end(); iter != iterEnd; ++iter) { 2953 dstBlk->addSuccessor(*iter); // *iter's predecessor is also taken care of 2954 } 2955 } //cloneSuccessorList 2956 2957 static MachineBasicBlock *clone(MachineBasicBlock *srcBlk) { 2958 MachineFunction *func = srcBlk->getParent(); 2959 MachineBasicBlock *newBlk = func->CreateMachineBasicBlock(); 2960 func->push_back(newBlk); //insert to function 2961 for (MachineBasicBlock::iterator iter = srcBlk->begin(), 2962 iterEnd = srcBlk->end(); 2963 iter != iterEnd; ++iter) { 2964 MachineInstr *instr = func->CloneMachineInstr(iter); 2965 newBlk->push_back(instr); 2966 } 2967 return newBlk; 2968 } 2969 2970 //MachineBasicBlock::ReplaceUsesOfBlockWith doesn't serve the purpose because 2971 //the AMDGPU instruction is not recognized as terminator fix this and retire 2972 //this routine 2973 static void replaceInstrUseOfBlockWith(MachineBasicBlock *srcBlk, 2974 MachineBasicBlock *oldBlk, 2975 MachineBasicBlock *newBlk) { 2976 MachineInstr *branchInstr = getLoopendBlockBranchInstr(srcBlk); 2977 if (branchInstr && isCondBranch(branchInstr) && 2978 getTrueBranch(branchInstr) == oldBlk) { 2979 setTrueBranch(branchInstr, newBlk); 2980 } 2981 } 2982 2983 static void wrapup(MachineBasicBlock *entryBlk) { 2984 assert((!entryBlk->getParent()->getJumpTableInfo() 2985 || entryBlk->getParent()->getJumpTableInfo()->isEmpty()) 2986 && "found a jump table"); 2987 2988 //collect continue right before endloop 2989 SmallVector<MachineInstr *, DEFAULT_VEC_SLOTS> contInstr; 2990 MachineBasicBlock::iterator pre = entryBlk->begin(); 2991 MachineBasicBlock::iterator iterEnd = entryBlk->end(); 2992 MachineBasicBlock::iterator iter = pre; 2993 while (iter != iterEnd) { 2994 if (pre->getOpcode() == AMDGPU::CONTINUE 2995 && iter->getOpcode() == AMDGPU::ENDLOOP) { 2996 contInstr.push_back(pre); 2997 } 2998 pre = iter; 2999 ++iter; 3000 } //end while 3001 3002 //delete continue right before endloop 3003 for (unsigned i = 0; i < contInstr.size(); ++i) { 3004 contInstr[i]->eraseFromParent(); 3005 } 3006 3007 // TODO to fix up jump table so later phase won't be confused. if 3008 // (jumpTableInfo->isEmpty() == false) { need to clean the jump table, but 3009 // there isn't such an interface yet. alternatively, replace all the other 3010 // blocks in the jump table with the entryBlk //} 3011 3012 } //wrapup 3013 3014 static MachineDominatorTree *getDominatorTree(AMDGPUCFGStructurizer &pass) { 3015 return &pass.getAnalysis<MachineDominatorTree>(); 3016 } 3017 3018 static MachinePostDominatorTree* 3019 getPostDominatorTree(AMDGPUCFGStructurizer &pass) { 3020 return &pass.getAnalysis<MachinePostDominatorTree>(); 3021 } 3022 3023 static MachineLoopInfo *getLoopInfo(AMDGPUCFGStructurizer &pass) { 3024 return &pass.getAnalysis<MachineLoopInfo>(); 3025 } 3026 }; // template class CFGStructTraits 3027 } //end of namespace llvm 3028 3029 // createAMDGPUCFGPreparationPass- Returns a pass 3030 FunctionPass *llvm::createAMDGPUCFGPreparationPass(TargetMachine &tm 3031 ) { 3032 return new AMDGPUCFGPrepare(tm ); 3033 } 3034 3035 bool AMDGPUCFGPrepare::runOnMachineFunction(MachineFunction &func) { 3036 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().prepare(func, 3037 *this, 3038 TRI); 3039 } 3040 3041 // createAMDGPUCFGStructurizerPass- Returns a pass 3042 FunctionPass *llvm::createAMDGPUCFGStructurizerPass(TargetMachine &tm 3043 ) { 3044 return new AMDGPUCFGPerform(tm ); 3045 } 3046 3047 bool AMDGPUCFGPerform::runOnMachineFunction(MachineFunction &func) { 3048 return llvmCFGStruct::CFGStructurizer<AMDGPUCFGStructurizer>().run(func, 3049 *this, 3050 TRI); 3051 } 3052