1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This pass performs loop invariant code motion on machine instructions. We 11 // attempt to remove as much code from the body of a loop as possible. 12 // 13 // This pass is not intended to be a replacement or a complete alternative 14 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple 15 // constructs that are not exposed before lowering and instruction selection. 16 // 17 //===----------------------------------------------------------------------===// 18 19 #include "llvm/CodeGen/Passes.h" 20 #include "llvm/ADT/DenseMap.h" 21 #include "llvm/ADT/SmallSet.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/MachineDominators.h" 25 #include "llvm/CodeGen/MachineFrameInfo.h" 26 #include "llvm/CodeGen/MachineLoopInfo.h" 27 #include "llvm/CodeGen/MachineMemOperand.h" 28 #include "llvm/CodeGen/MachineRegisterInfo.h" 29 #include "llvm/CodeGen/PseudoSourceValue.h" 30 #include "llvm/CodeGen/TargetSchedule.h" 31 #include "llvm/Support/CommandLine.h" 32 #include "llvm/Support/Debug.h" 33 #include "llvm/Support/raw_ostream.h" 34 #include "llvm/Target/TargetInstrInfo.h" 35 #include "llvm/Target/TargetLowering.h" 36 #include "llvm/Target/TargetMachine.h" 37 #include "llvm/Target/TargetRegisterInfo.h" 38 #include "llvm/Target/TargetSubtargetInfo.h" 39 using namespace llvm; 40 41 #define DEBUG_TYPE "machine-licm" 42 43 static cl::opt<bool> 44 AvoidSpeculation("avoid-speculation", 45 cl::desc("MachineLICM should avoid speculation"), 46 cl::init(true), cl::Hidden); 47 48 static cl::opt<bool> 49 HoistCheapInsts("hoist-cheap-insts", 50 cl::desc("MachineLICM should hoist even cheap instructions"), 51 cl::init(false), cl::Hidden); 52 53 static cl::opt<bool> 54 SinkInstsToAvoidSpills("sink-insts-to-avoid-spills", 55 cl::desc("MachineLICM should sink instructions into " 56 "loops to avoid register spills"), 57 cl::init(false), cl::Hidden); 58 59 STATISTIC(NumHoisted, 60 "Number of machine instructions hoisted out of loops"); 61 STATISTIC(NumLowRP, 62 "Number of instructions hoisted in low reg pressure situation"); 63 STATISTIC(NumHighLatency, 64 "Number of high latency instructions hoisted"); 65 STATISTIC(NumCSEed, 66 "Number of hoisted machine instructions CSEed"); 67 STATISTIC(NumPostRAHoisted, 68 "Number of machine instructions hoisted out of loops post regalloc"); 69 70 namespace { 71 class MachineLICM : public MachineFunctionPass { 72 const TargetInstrInfo *TII; 73 const TargetLoweringBase *TLI; 74 const TargetRegisterInfo *TRI; 75 const MachineFrameInfo *MFI; 76 MachineRegisterInfo *MRI; 77 TargetSchedModel SchedModel; 78 bool PreRegAlloc; 79 80 // Various analyses that we use... 81 AliasAnalysis *AA; // Alias analysis info. 82 MachineLoopInfo *MLI; // Current MachineLoopInfo 83 MachineDominatorTree *DT; // Machine dominator tree for the cur loop 84 85 // State that is updated as we process loops 86 bool Changed; // True if a loop is changed. 87 bool FirstInLoop; // True if it's the first LICM in the loop. 88 MachineLoop *CurLoop; // The current loop we are working on. 89 MachineBasicBlock *CurPreheader; // The preheader for CurLoop. 90 91 // Exit blocks for CurLoop. 92 SmallVector<MachineBasicBlock*, 8> ExitBlocks; 93 94 bool isExitBlock(const MachineBasicBlock *MBB) const { 95 return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) != 96 ExitBlocks.end(); 97 } 98 99 // Track 'estimated' register pressure. 100 SmallSet<unsigned, 32> RegSeen; 101 SmallVector<unsigned, 8> RegPressure; 102 103 // Register pressure "limit" per register pressure set. If the pressure 104 // is higher than the limit, then it's considered high. 105 SmallVector<unsigned, 8> RegLimit; 106 107 // Register pressure on path leading from loop preheader to current BB. 108 SmallVector<SmallVector<unsigned, 8>, 16> BackTrace; 109 110 // For each opcode, keep a list of potential CSE instructions. 111 DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap; 112 113 enum { 114 SpeculateFalse = 0, 115 SpeculateTrue = 1, 116 SpeculateUnknown = 2 117 }; 118 119 // If a MBB does not dominate loop exiting blocks then it may not safe 120 // to hoist loads from this block. 121 // Tri-state: 0 - false, 1 - true, 2 - unknown 122 unsigned SpeculationState; 123 124 public: 125 static char ID; // Pass identification, replacement for typeid 126 MachineLICM() : 127 MachineFunctionPass(ID), PreRegAlloc(true) { 128 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 129 } 130 131 explicit MachineLICM(bool PreRA) : 132 MachineFunctionPass(ID), PreRegAlloc(PreRA) { 133 initializeMachineLICMPass(*PassRegistry::getPassRegistry()); 134 } 135 136 bool runOnMachineFunction(MachineFunction &MF) override; 137 138 void getAnalysisUsage(AnalysisUsage &AU) const override { 139 AU.addRequired<MachineLoopInfo>(); 140 AU.addRequired<MachineDominatorTree>(); 141 AU.addRequired<AAResultsWrapperPass>(); 142 AU.addPreserved<MachineLoopInfo>(); 143 AU.addPreserved<MachineDominatorTree>(); 144 MachineFunctionPass::getAnalysisUsage(AU); 145 } 146 147 void releaseMemory() override { 148 RegSeen.clear(); 149 RegPressure.clear(); 150 RegLimit.clear(); 151 BackTrace.clear(); 152 CSEMap.clear(); 153 } 154 155 private: 156 /// Keep track of information about hoisting candidates. 157 struct CandidateInfo { 158 MachineInstr *MI; 159 unsigned Def; 160 int FI; 161 CandidateInfo(MachineInstr *mi, unsigned def, int fi) 162 : MI(mi), Def(def), FI(fi) {} 163 }; 164 165 void HoistRegionPostRA(); 166 167 void HoistPostRA(MachineInstr *MI, unsigned Def); 168 169 void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, 170 BitVector &PhysRegClobbers, SmallSet<int, 32> &StoredFIs, 171 SmallVectorImpl<CandidateInfo> &Candidates); 172 173 void AddToLiveIns(unsigned Reg); 174 175 bool IsLICMCandidate(MachineInstr &I); 176 177 bool IsLoopInvariantInst(MachineInstr &I); 178 179 bool HasLoopPHIUse(const MachineInstr *MI) const; 180 181 bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, 182 unsigned Reg) const; 183 184 bool IsCheapInstruction(MachineInstr &MI) const; 185 186 bool CanCauseHighRegPressure(const DenseMap<unsigned, int> &Cost, 187 bool Cheap); 188 189 void UpdateBackTraceRegPressure(const MachineInstr *MI); 190 191 bool IsProfitableToHoist(MachineInstr &MI); 192 193 bool IsGuaranteedToExecute(MachineBasicBlock *BB); 194 195 void EnterScope(MachineBasicBlock *MBB); 196 197 void ExitScope(MachineBasicBlock *MBB); 198 199 void ExitScopeIfDone( 200 MachineDomTreeNode *Node, 201 DenseMap<MachineDomTreeNode *, unsigned> &OpenChildren, 202 DenseMap<MachineDomTreeNode *, MachineDomTreeNode *> &ParentMap); 203 204 void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); 205 206 void HoistRegion(MachineDomTreeNode *N, bool IsHeader); 207 208 void SinkIntoLoop(); 209 210 void InitRegPressure(MachineBasicBlock *BB); 211 212 DenseMap<unsigned, int> calcRegisterCost(const MachineInstr *MI, 213 bool ConsiderSeen, 214 bool ConsiderUnseenAsDef); 215 216 void UpdateRegPressure(const MachineInstr *MI, 217 bool ConsiderUnseenAsDef = false); 218 219 MachineInstr *ExtractHoistableLoad(MachineInstr *MI); 220 221 const MachineInstr * 222 LookForDuplicate(const MachineInstr *MI, 223 std::vector<const MachineInstr *> &PrevMIs); 224 225 bool EliminateCSE( 226 MachineInstr *MI, 227 DenseMap<unsigned, std::vector<const MachineInstr *>>::iterator &CI); 228 229 bool MayCSE(MachineInstr *MI); 230 231 bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); 232 233 void InitCSEMap(MachineBasicBlock *BB); 234 235 MachineBasicBlock *getCurPreheader(); 236 }; 237 } // end anonymous namespace 238 239 char MachineLICM::ID = 0; 240 char &llvm::MachineLICMID = MachineLICM::ID; 241 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm", 242 "Machine Loop Invariant Code Motion", false, false) 243 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 244 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 245 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) 246 INITIALIZE_PASS_END(MachineLICM, "machinelicm", 247 "Machine Loop Invariant Code Motion", false, false) 248 249 /// Test if the given loop is the outer-most loop that has a unique predecessor. 250 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { 251 // Check whether this loop even has a unique predecessor. 252 if (!CurLoop->getLoopPredecessor()) 253 return false; 254 // Ok, now check to see if any of its outer loops do. 255 for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop()) 256 if (L->getLoopPredecessor()) 257 return false; 258 // None of them did, so this is the outermost with a unique predecessor. 259 return true; 260 } 261 262 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { 263 if (skipOptnoneFunction(*MF.getFunction())) 264 return false; 265 266 Changed = FirstInLoop = false; 267 const TargetSubtargetInfo &ST = MF.getSubtarget(); 268 TII = ST.getInstrInfo(); 269 TLI = ST.getTargetLowering(); 270 TRI = ST.getRegisterInfo(); 271 MFI = MF.getFrameInfo(); 272 MRI = &MF.getRegInfo(); 273 SchedModel.init(ST.getSchedModel(), &ST, TII); 274 275 PreRegAlloc = MRI->isSSA(); 276 277 if (PreRegAlloc) 278 DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: "); 279 else 280 DEBUG(dbgs() << "******** Post-regalloc Machine LICM: "); 281 DEBUG(dbgs() << MF.getName() << " ********\n"); 282 283 if (PreRegAlloc) { 284 // Estimate register pressure during pre-regalloc pass. 285 unsigned NumRPS = TRI->getNumRegPressureSets(); 286 RegPressure.resize(NumRPS); 287 std::fill(RegPressure.begin(), RegPressure.end(), 0); 288 RegLimit.resize(NumRPS); 289 for (unsigned i = 0, e = NumRPS; i != e; ++i) 290 RegLimit[i] = TRI->getRegPressureSetLimit(MF, i); 291 } 292 293 // Get our Loop information... 294 MLI = &getAnalysis<MachineLoopInfo>(); 295 DT = &getAnalysis<MachineDominatorTree>(); 296 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 297 298 SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end()); 299 while (!Worklist.empty()) { 300 CurLoop = Worklist.pop_back_val(); 301 CurPreheader = nullptr; 302 ExitBlocks.clear(); 303 304 // If this is done before regalloc, only visit outer-most preheader-sporting 305 // loops. 306 if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) { 307 Worklist.append(CurLoop->begin(), CurLoop->end()); 308 continue; 309 } 310 311 CurLoop->getExitBlocks(ExitBlocks); 312 313 if (!PreRegAlloc) 314 HoistRegionPostRA(); 315 else { 316 // CSEMap is initialized for loop header when the first instruction is 317 // being hoisted. 318 MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader()); 319 FirstInLoop = true; 320 HoistOutOfLoop(N); 321 CSEMap.clear(); 322 323 if (SinkInstsToAvoidSpills) 324 SinkIntoLoop(); 325 } 326 } 327 328 return Changed; 329 } 330 331 /// Return true if instruction stores to the specified frame. 332 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { 333 for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), 334 oe = MI->memoperands_end(); o != oe; ++o) { 335 if (!(*o)->isStore() || !(*o)->getPseudoValue()) 336 continue; 337 if (const FixedStackPseudoSourceValue *Value = 338 dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) { 339 if (Value->getFrameIndex() == FI) 340 return true; 341 } 342 } 343 return false; 344 } 345 346 /// Examine the instruction for potentai LICM candidate. Also 347 /// gather register def and frame object update information. 348 void MachineLICM::ProcessMI(MachineInstr *MI, 349 BitVector &PhysRegDefs, 350 BitVector &PhysRegClobbers, 351 SmallSet<int, 32> &StoredFIs, 352 SmallVectorImpl<CandidateInfo> &Candidates) { 353 bool RuledOut = false; 354 bool HasNonInvariantUse = false; 355 unsigned Def = 0; 356 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 357 const MachineOperand &MO = MI->getOperand(i); 358 if (MO.isFI()) { 359 // Remember if the instruction stores to the frame index. 360 int FI = MO.getIndex(); 361 if (!StoredFIs.count(FI) && 362 MFI->isSpillSlotObjectIndex(FI) && 363 InstructionStoresToFI(MI, FI)) 364 StoredFIs.insert(FI); 365 HasNonInvariantUse = true; 366 continue; 367 } 368 369 // We can't hoist an instruction defining a physreg that is clobbered in 370 // the loop. 371 if (MO.isRegMask()) { 372 PhysRegClobbers.setBitsNotInMask(MO.getRegMask()); 373 continue; 374 } 375 376 if (!MO.isReg()) 377 continue; 378 unsigned Reg = MO.getReg(); 379 if (!Reg) 380 continue; 381 assert(TargetRegisterInfo::isPhysicalRegister(Reg) && 382 "Not expecting virtual register!"); 383 384 if (!MO.isDef()) { 385 if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg))) 386 // If it's using a non-loop-invariant register, then it's obviously not 387 // safe to hoist. 388 HasNonInvariantUse = true; 389 continue; 390 } 391 392 if (MO.isImplicit()) { 393 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 394 PhysRegClobbers.set(*AI); 395 if (!MO.isDead()) 396 // Non-dead implicit def? This cannot be hoisted. 397 RuledOut = true; 398 // No need to check if a dead implicit def is also defined by 399 // another instruction. 400 continue; 401 } 402 403 // FIXME: For now, avoid instructions with multiple defs, unless 404 // it's a dead implicit def. 405 if (Def) 406 RuledOut = true; 407 else 408 Def = Reg; 409 410 // If we have already seen another instruction that defines the same 411 // register, then this is not safe. Two defs is indicated by setting a 412 // PhysRegClobbers bit. 413 for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) { 414 if (PhysRegDefs.test(*AS)) 415 PhysRegClobbers.set(*AS); 416 PhysRegDefs.set(*AS); 417 } 418 if (PhysRegClobbers.test(Reg)) 419 // MI defined register is seen defined by another instruction in 420 // the loop, it cannot be a LICM candidate. 421 RuledOut = true; 422 } 423 424 // Only consider reloads for now and remats which do not have register 425 // operands. FIXME: Consider unfold load folding instructions. 426 if (Def && !RuledOut) { 427 int FI = INT_MIN; 428 if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) || 429 (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI))) 430 Candidates.push_back(CandidateInfo(MI, Def, FI)); 431 } 432 } 433 434 /// Walk the specified region of the CFG and hoist loop invariants out to the 435 /// preheader. 436 void MachineLICM::HoistRegionPostRA() { 437 MachineBasicBlock *Preheader = getCurPreheader(); 438 if (!Preheader) 439 return; 440 441 unsigned NumRegs = TRI->getNumRegs(); 442 BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop. 443 BitVector PhysRegClobbers(NumRegs); // Regs defined more than once. 444 445 SmallVector<CandidateInfo, 32> Candidates; 446 SmallSet<int, 32> StoredFIs; 447 448 // Walk the entire region, count number of defs for each register, and 449 // collect potential LICM candidates. 450 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 451 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 452 MachineBasicBlock *BB = Blocks[i]; 453 454 // If the header of the loop containing this basic block is a landing pad, 455 // then don't try to hoist instructions out of this loop. 456 const MachineLoop *ML = MLI->getLoopFor(BB); 457 if (ML && ML->getHeader()->isEHPad()) continue; 458 459 // Conservatively treat live-in's as an external def. 460 // FIXME: That means a reload that're reused in successor block(s) will not 461 // be LICM'ed. 462 for (const auto &LI : BB->liveins()) { 463 for (MCRegAliasIterator AI(LI.PhysReg, TRI, true); AI.isValid(); ++AI) 464 PhysRegDefs.set(*AI); 465 } 466 467 SpeculationState = SpeculateUnknown; 468 for (MachineBasicBlock::iterator 469 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 470 MachineInstr *MI = &*MII; 471 ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates); 472 } 473 } 474 475 // Gather the registers read / clobbered by the terminator. 476 BitVector TermRegs(NumRegs); 477 MachineBasicBlock::iterator TI = Preheader->getFirstTerminator(); 478 if (TI != Preheader->end()) { 479 for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) { 480 const MachineOperand &MO = TI->getOperand(i); 481 if (!MO.isReg()) 482 continue; 483 unsigned Reg = MO.getReg(); 484 if (!Reg) 485 continue; 486 for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI) 487 TermRegs.set(*AI); 488 } 489 } 490 491 // Now evaluate whether the potential candidates qualify. 492 // 1. Check if the candidate defined register is defined by another 493 // instruction in the loop. 494 // 2. If the candidate is a load from stack slot (always true for now), 495 // check if the slot is stored anywhere in the loop. 496 // 3. Make sure candidate def should not clobber 497 // registers read by the terminator. Similarly its def should not be 498 // clobbered by the terminator. 499 for (unsigned i = 0, e = Candidates.size(); i != e; ++i) { 500 if (Candidates[i].FI != INT_MIN && 501 StoredFIs.count(Candidates[i].FI)) 502 continue; 503 504 unsigned Def = Candidates[i].Def; 505 if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) { 506 bool Safe = true; 507 MachineInstr *MI = Candidates[i].MI; 508 for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) { 509 const MachineOperand &MO = MI->getOperand(j); 510 if (!MO.isReg() || MO.isDef() || !MO.getReg()) 511 continue; 512 unsigned Reg = MO.getReg(); 513 if (PhysRegDefs.test(Reg) || 514 PhysRegClobbers.test(Reg)) { 515 // If it's using a non-loop-invariant register, then it's obviously 516 // not safe to hoist. 517 Safe = false; 518 break; 519 } 520 } 521 if (Safe) 522 HoistPostRA(MI, Candidates[i].Def); 523 } 524 } 525 } 526 527 /// Add register 'Reg' to the livein sets of BBs in the current loop, and make 528 /// sure it is not killed by any instructions in the loop. 529 void MachineLICM::AddToLiveIns(unsigned Reg) { 530 const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks(); 531 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 532 MachineBasicBlock *BB = Blocks[i]; 533 if (!BB->isLiveIn(Reg)) 534 BB->addLiveIn(Reg); 535 for (MachineBasicBlock::iterator 536 MII = BB->begin(), E = BB->end(); MII != E; ++MII) { 537 MachineInstr *MI = &*MII; 538 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 539 MachineOperand &MO = MI->getOperand(i); 540 if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue; 541 if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg())) 542 MO.setIsKill(false); 543 } 544 } 545 } 546 } 547 548 /// When an instruction is found to only use loop invariant operands that is 549 /// safe to hoist, this instruction is called to do the dirty work. 550 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { 551 MachineBasicBlock *Preheader = getCurPreheader(); 552 553 // Now move the instructions to the predecessor, inserting it before any 554 // terminator instructions. 555 DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#" 556 << MI->getParent()->getNumber() << ": " << *MI); 557 558 // Splice the instruction to the preheader. 559 MachineBasicBlock *MBB = MI->getParent(); 560 Preheader->splice(Preheader->getFirstTerminator(), MBB, MI); 561 562 // Add register to livein list to all the BBs in the current loop since a 563 // loop invariant must be kept live throughout the whole loop. This is 564 // important to ensure later passes do not scavenge the def register. 565 AddToLiveIns(Def); 566 567 ++NumPostRAHoisted; 568 Changed = true; 569 } 570 571 /// Check if this mbb is guaranteed to execute. If not then a load from this mbb 572 /// may not be safe to hoist. 573 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { 574 if (SpeculationState != SpeculateUnknown) 575 return SpeculationState == SpeculateFalse; 576 577 if (BB != CurLoop->getHeader()) { 578 // Check loop exiting blocks. 579 SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks; 580 CurLoop->getExitingBlocks(CurrentLoopExitingBlocks); 581 for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i) 582 if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) { 583 SpeculationState = SpeculateTrue; 584 return false; 585 } 586 } 587 588 SpeculationState = SpeculateFalse; 589 return true; 590 } 591 592 void MachineLICM::EnterScope(MachineBasicBlock *MBB) { 593 DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n'); 594 595 // Remember livein register pressure. 596 BackTrace.push_back(RegPressure); 597 } 598 599 void MachineLICM::ExitScope(MachineBasicBlock *MBB) { 600 DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n'); 601 BackTrace.pop_back(); 602 } 603 604 /// Destroy scope for the MBB that corresponds to the given dominator tree node 605 /// if its a leaf or all of its children are done. Walk up the dominator tree to 606 /// destroy ancestors which are now done. 607 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, 608 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren, 609 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) { 610 if (OpenChildren[Node]) 611 return; 612 613 // Pop scope. 614 ExitScope(Node->getBlock()); 615 616 // Now traverse upwards to pop ancestors whose offsprings are all done. 617 while (MachineDomTreeNode *Parent = ParentMap[Node]) { 618 unsigned Left = --OpenChildren[Parent]; 619 if (Left != 0) 620 break; 621 ExitScope(Parent->getBlock()); 622 Node = Parent; 623 } 624 } 625 626 /// Walk the specified loop in the CFG (defined by all blocks dominated by the 627 /// specified header block, and that are in the current loop) in depth first 628 /// order w.r.t the DominatorTree. This allows us to visit definitions before 629 /// uses, allowing us to hoist a loop body in one pass without iteration. 630 /// 631 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { 632 MachineBasicBlock *Preheader = getCurPreheader(); 633 if (!Preheader) 634 return; 635 636 SmallVector<MachineDomTreeNode*, 32> Scopes; 637 SmallVector<MachineDomTreeNode*, 8> WorkList; 638 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap; 639 DenseMap<MachineDomTreeNode*, unsigned> OpenChildren; 640 641 // Perform a DFS walk to determine the order of visit. 642 WorkList.push_back(HeaderN); 643 while (!WorkList.empty()) { 644 MachineDomTreeNode *Node = WorkList.pop_back_val(); 645 assert(Node && "Null dominator tree node?"); 646 MachineBasicBlock *BB = Node->getBlock(); 647 648 // If the header of the loop containing this basic block is a landing pad, 649 // then don't try to hoist instructions out of this loop. 650 const MachineLoop *ML = MLI->getLoopFor(BB); 651 if (ML && ML->getHeader()->isEHPad()) 652 continue; 653 654 // If this subregion is not in the top level loop at all, exit. 655 if (!CurLoop->contains(BB)) 656 continue; 657 658 Scopes.push_back(Node); 659 const std::vector<MachineDomTreeNode*> &Children = Node->getChildren(); 660 unsigned NumChildren = Children.size(); 661 662 // Don't hoist things out of a large switch statement. This often causes 663 // code to be hoisted that wasn't going to be executed, and increases 664 // register pressure in a situation where it's likely to matter. 665 if (BB->succ_size() >= 25) 666 NumChildren = 0; 667 668 OpenChildren[Node] = NumChildren; 669 // Add children in reverse order as then the next popped worklist node is 670 // the first child of this node. This means we ultimately traverse the 671 // DOM tree in exactly the same order as if we'd recursed. 672 for (int i = (int)NumChildren-1; i >= 0; --i) { 673 MachineDomTreeNode *Child = Children[i]; 674 ParentMap[Child] = Node; 675 WorkList.push_back(Child); 676 } 677 } 678 679 if (Scopes.size() == 0) 680 return; 681 682 // Compute registers which are livein into the loop headers. 683 RegSeen.clear(); 684 BackTrace.clear(); 685 InitRegPressure(Preheader); 686 687 // Now perform LICM. 688 for (unsigned i = 0, e = Scopes.size(); i != e; ++i) { 689 MachineDomTreeNode *Node = Scopes[i]; 690 MachineBasicBlock *MBB = Node->getBlock(); 691 692 EnterScope(MBB); 693 694 // Process the block 695 SpeculationState = SpeculateUnknown; 696 for (MachineBasicBlock::iterator 697 MII = MBB->begin(), E = MBB->end(); MII != E; ) { 698 MachineBasicBlock::iterator NextMII = MII; ++NextMII; 699 MachineInstr *MI = &*MII; 700 if (!Hoist(MI, Preheader)) 701 UpdateRegPressure(MI); 702 MII = NextMII; 703 } 704 705 // If it's a leaf node, it's done. Traverse upwards to pop ancestors. 706 ExitScopeIfDone(Node, OpenChildren, ParentMap); 707 } 708 } 709 710 /// Sink instructions into loops if profitable. This especially tries to prevent 711 /// register spills caused by register pressure if there is little to no 712 /// overhead moving instructions into loops. 713 void MachineLICM::SinkIntoLoop() { 714 MachineBasicBlock *Preheader = getCurPreheader(); 715 if (!Preheader) 716 return; 717 718 SmallVector<MachineInstr *, 8> Candidates; 719 for (MachineBasicBlock::instr_iterator I = Preheader->instr_begin(); 720 I != Preheader->instr_end(); ++I) { 721 // We need to ensure that we can safely move this instruction into the loop. 722 // As such, it must not have side-effects, e.g. such as a call has. 723 if (IsLoopInvariantInst(*I) && !HasLoopPHIUse(&*I)) 724 Candidates.push_back(&*I); 725 } 726 727 for (MachineInstr *I : Candidates) { 728 const MachineOperand &MO = I->getOperand(0); 729 if (!MO.isDef() || !MO.isReg() || !MO.getReg()) 730 continue; 731 if (!MRI->hasOneDef(MO.getReg())) 732 continue; 733 bool CanSink = true; 734 MachineBasicBlock *B = nullptr; 735 for (MachineInstr &MI : MRI->use_instructions(MO.getReg())) { 736 // FIXME: Come up with a proper cost model that estimates whether sinking 737 // the instruction (and thus possibly executing it on every loop 738 // iteration) is more expensive than a register. 739 // For now assumes that copies are cheap and thus almost always worth it. 740 if (!MI.isCopy()) { 741 CanSink = false; 742 break; 743 } 744 if (!B) { 745 B = MI.getParent(); 746 continue; 747 } 748 B = DT->findNearestCommonDominator(B, MI.getParent()); 749 if (!B) { 750 CanSink = false; 751 break; 752 } 753 } 754 if (!CanSink || !B || B == Preheader) 755 continue; 756 B->splice(B->getFirstNonPHI(), Preheader, I); 757 } 758 } 759 760 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { 761 return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); 762 } 763 764 /// Find all virtual register references that are liveout of the preheader to 765 /// initialize the starting "register pressure". Note this does not count live 766 /// through (livein but not used) registers. 767 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { 768 std::fill(RegPressure.begin(), RegPressure.end(), 0); 769 770 // If the preheader has only a single predecessor and it ends with a 771 // fallthrough or an unconditional branch, then scan its predecessor for live 772 // defs as well. This happens whenever the preheader is created by splitting 773 // the critical edge from the loop predecessor to the loop header. 774 if (BB->pred_size() == 1) { 775 MachineBasicBlock *TBB = nullptr, *FBB = nullptr; 776 SmallVector<MachineOperand, 4> Cond; 777 if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty()) 778 InitRegPressure(*BB->pred_begin()); 779 } 780 781 for (const MachineInstr &MI : *BB) 782 UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); 783 } 784 785 /// Update estimate of register pressure after the specified instruction. 786 void MachineLICM::UpdateRegPressure(const MachineInstr *MI, 787 bool ConsiderUnseenAsDef) { 788 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); 789 for (const auto &RPIdAndCost : Cost) { 790 unsigned Class = RPIdAndCost.first; 791 if (static_cast<int>(RegPressure[Class]) < -RPIdAndCost.second) 792 RegPressure[Class] = 0; 793 else 794 RegPressure[Class] += RPIdAndCost.second; 795 } 796 } 797 798 /// Calculate the additional register pressure that the registers used in MI 799 /// cause. 800 /// 801 /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to 802 /// figure out which usages are live-ins. 803 /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. 804 DenseMap<unsigned, int> 805 MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, 806 bool ConsiderUnseenAsDef) { 807 DenseMap<unsigned, int> Cost; 808 if (MI->isImplicitDef()) 809 return Cost; 810 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 811 const MachineOperand &MO = MI->getOperand(i); 812 if (!MO.isReg() || MO.isImplicit()) 813 continue; 814 unsigned Reg = MO.getReg(); 815 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 816 continue; 817 818 // FIXME: It seems bad to use RegSeen only for some of these calculations. 819 bool isNew = ConsiderSeen ? RegSeen.insert(Reg).second : false; 820 const TargetRegisterClass *RC = MRI->getRegClass(Reg); 821 822 RegClassWeight W = TRI->getRegClassWeight(RC); 823 int RCCost = 0; 824 if (MO.isDef()) 825 RCCost = W.RegWeight; 826 else { 827 bool isKill = isOperandKill(MO, MRI); 828 if (isNew && !isKill && ConsiderUnseenAsDef) 829 // Haven't seen this, it must be a livein. 830 RCCost = W.RegWeight; 831 else if (!isNew && isKill) 832 RCCost = -W.RegWeight; 833 } 834 if (RCCost == 0) 835 continue; 836 const int *PS = TRI->getRegClassPressureSets(RC); 837 for (; *PS != -1; ++PS) { 838 if (Cost.find(*PS) == Cost.end()) 839 Cost[*PS] = RCCost; 840 else 841 Cost[*PS] += RCCost; 842 } 843 } 844 return Cost; 845 } 846 847 /// Return true if this machine instruction loads from global offset table or 848 /// constant pool. 849 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { 850 assert (MI.mayLoad() && "Expected MI that loads!"); 851 for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), 852 E = MI.memoperands_end(); I != E; ++I) { 853 if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) { 854 if (PSV->isGOT() || PSV->isConstantPool()) 855 return true; 856 } 857 } 858 return false; 859 } 860 861 /// Returns true if the instruction may be a suitable candidate for LICM. 862 /// e.g. If the instruction is a call, then it's obviously not safe to hoist it. 863 bool MachineLICM::IsLICMCandidate(MachineInstr &I) { 864 // Check if it's safe to move the instruction. 865 bool DontMoveAcrossStore = true; 866 if (!I.isSafeToMove(AA, DontMoveAcrossStore)) 867 return false; 868 869 // If it is load then check if it is guaranteed to execute by making sure that 870 // it dominates all exiting blocks. If it doesn't, then there is a path out of 871 // the loop which does not execute this load, so we can't hoist it. Loads 872 // from constant memory are not safe to speculate all the time, for example 873 // indexed load from a jump table. 874 // Stores and side effects are already checked by isSafeToMove. 875 if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) && 876 !IsGuaranteedToExecute(I.getParent())) 877 return false; 878 879 return true; 880 } 881 882 /// Returns true if the instruction is loop invariant. 883 /// I.e., all virtual register operands are defined outside of the loop, 884 /// physical registers aren't accessed explicitly, and there are no side 885 /// effects that aren't captured by the operands or other flags. 886 /// 887 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { 888 if (!IsLICMCandidate(I)) 889 return false; 890 891 // The instruction is loop invariant if all of its operands are. 892 for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) { 893 const MachineOperand &MO = I.getOperand(i); 894 895 if (!MO.isReg()) 896 continue; 897 898 unsigned Reg = MO.getReg(); 899 if (Reg == 0) continue; 900 901 // Don't hoist an instruction that uses or defines a physical register. 902 if (TargetRegisterInfo::isPhysicalRegister(Reg)) { 903 if (MO.isUse()) { 904 // If the physreg has no defs anywhere, it's just an ambient register 905 // and we can freely move its uses. Alternatively, if it's allocatable, 906 // it could get allocated to something with a def during allocation. 907 if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent())) 908 return false; 909 // Otherwise it's safe to move. 910 continue; 911 } else if (!MO.isDead()) { 912 // A def that isn't dead. We can't move it. 913 return false; 914 } else if (CurLoop->getHeader()->isLiveIn(Reg)) { 915 // If the reg is live into the loop, we can't hoist an instruction 916 // which would clobber it. 917 return false; 918 } 919 } 920 921 if (!MO.isUse()) 922 continue; 923 924 assert(MRI->getVRegDef(Reg) && 925 "Machine instr not mapped for this vreg?!"); 926 927 // If the loop contains the definition of an operand, then the instruction 928 // isn't loop invariant. 929 if (CurLoop->contains(MRI->getVRegDef(Reg))) 930 return false; 931 } 932 933 // If we got this far, the instruction is loop invariant! 934 return true; 935 } 936 937 938 /// Return true if the specified instruction is used by a phi node and hoisting 939 /// it could cause a copy to be inserted. 940 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { 941 SmallVector<const MachineInstr*, 8> Work(1, MI); 942 do { 943 MI = Work.pop_back_val(); 944 for (const MachineOperand &MO : MI->operands()) { 945 if (!MO.isReg() || !MO.isDef()) 946 continue; 947 unsigned Reg = MO.getReg(); 948 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 949 continue; 950 for (MachineInstr &UseMI : MRI->use_instructions(Reg)) { 951 // A PHI may cause a copy to be inserted. 952 if (UseMI.isPHI()) { 953 // A PHI inside the loop causes a copy because the live range of Reg is 954 // extended across the PHI. 955 if (CurLoop->contains(&UseMI)) 956 return true; 957 // A PHI in an exit block can cause a copy to be inserted if the PHI 958 // has multiple predecessors in the loop with different values. 959 // For now, approximate by rejecting all exit blocks. 960 if (isExitBlock(UseMI.getParent())) 961 return true; 962 continue; 963 } 964 // Look past copies as well. 965 if (UseMI.isCopy() && CurLoop->contains(&UseMI)) 966 Work.push_back(&UseMI); 967 } 968 } 969 } while (!Work.empty()); 970 return false; 971 } 972 973 /// Compute operand latency between a def of 'Reg' and an use in the current 974 /// loop, return true if the target considered it high. 975 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, 976 unsigned DefIdx, unsigned Reg) const { 977 if (MRI->use_nodbg_empty(Reg)) 978 return false; 979 980 for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) { 981 if (UseMI.isCopyLike()) 982 continue; 983 if (!CurLoop->contains(UseMI.getParent())) 984 continue; 985 for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) { 986 const MachineOperand &MO = UseMI.getOperand(i); 987 if (!MO.isReg() || !MO.isUse()) 988 continue; 989 unsigned MOReg = MO.getReg(); 990 if (MOReg != Reg) 991 continue; 992 993 if (TII->hasHighOperandLatency(SchedModel, MRI, &MI, DefIdx, &UseMI, i)) 994 return true; 995 } 996 997 // Only look at the first in loop use. 998 break; 999 } 1000 1001 return false; 1002 } 1003 1004 /// Return true if the instruction is marked "cheap" or the operand latency 1005 /// between its def and a use is one or less. 1006 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { 1007 if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) 1008 return true; 1009 1010 bool isCheap = false; 1011 unsigned NumDefs = MI.getDesc().getNumDefs(); 1012 for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) { 1013 MachineOperand &DefMO = MI.getOperand(i); 1014 if (!DefMO.isReg() || !DefMO.isDef()) 1015 continue; 1016 --NumDefs; 1017 unsigned Reg = DefMO.getReg(); 1018 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 1019 continue; 1020 1021 if (!TII->hasLowDefLatency(SchedModel, &MI, i)) 1022 return false; 1023 isCheap = true; 1024 } 1025 1026 return isCheap; 1027 } 1028 1029 /// Visit BBs from header to current BB, check if hoisting an instruction of the 1030 /// given cost matrix can cause high register pressure. 1031 bool MachineLICM::CanCauseHighRegPressure(const DenseMap<unsigned, int>& Cost, 1032 bool CheapInstr) { 1033 for (const auto &RPIdAndCost : Cost) { 1034 if (RPIdAndCost.second <= 0) 1035 continue; 1036 1037 unsigned Class = RPIdAndCost.first; 1038 int Limit = RegLimit[Class]; 1039 1040 // Don't hoist cheap instructions if they would increase register pressure, 1041 // even if we're under the limit. 1042 if (CheapInstr && !HoistCheapInsts) 1043 return true; 1044 1045 for (const auto &RP : BackTrace) 1046 if (static_cast<int>(RP[Class]) + RPIdAndCost.second >= Limit) 1047 return true; 1048 } 1049 1050 return false; 1051 } 1052 1053 /// Traverse the back trace from header to the current block and update their 1054 /// register pressures to reflect the effect of hoisting MI from the current 1055 /// block to the preheader. 1056 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { 1057 // First compute the 'cost' of the instruction, i.e. its contribution 1058 // to register pressure. 1059 auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/false, 1060 /*ConsiderUnseenAsDef=*/false); 1061 1062 // Update register pressure of blocks from loop header to current block. 1063 for (auto &RP : BackTrace) 1064 for (const auto &RPIdAndCost : Cost) 1065 RP[RPIdAndCost.first] += RPIdAndCost.second; 1066 } 1067 1068 /// Return true if it is potentially profitable to hoist the given loop 1069 /// invariant. 1070 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { 1071 if (MI.isImplicitDef()) 1072 return true; 1073 1074 // Besides removing computation from the loop, hoisting an instruction has 1075 // these effects: 1076 // 1077 // - The value defined by the instruction becomes live across the entire 1078 // loop. This increases register pressure in the loop. 1079 // 1080 // - If the value is used by a PHI in the loop, a copy will be required for 1081 // lowering the PHI after extending the live range. 1082 // 1083 // - When hoisting the last use of a value in the loop, that value no longer 1084 // needs to be live in the loop. This lowers register pressure in the loop. 1085 1086 bool CheapInstr = IsCheapInstruction(MI); 1087 bool CreatesCopy = HasLoopPHIUse(&MI); 1088 1089 // Don't hoist a cheap instruction if it would create a copy in the loop. 1090 if (CheapInstr && CreatesCopy) { 1091 DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI); 1092 return false; 1093 } 1094 1095 // Rematerializable instructions should always be hoisted since the register 1096 // allocator can just pull them down again when needed. 1097 if (TII->isTriviallyReMaterializable(&MI, AA)) 1098 return true; 1099 1100 // FIXME: If there are long latency loop-invariant instructions inside the 1101 // loop at this point, why didn't the optimizer's LICM hoist them? 1102 for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) { 1103 const MachineOperand &MO = MI.getOperand(i); 1104 if (!MO.isReg() || MO.isImplicit()) 1105 continue; 1106 unsigned Reg = MO.getReg(); 1107 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1108 continue; 1109 if (MO.isDef() && HasHighOperandLatency(MI, i, Reg)) { 1110 DEBUG(dbgs() << "Hoist High Latency: " << MI); 1111 ++NumHighLatency; 1112 return true; 1113 } 1114 } 1115 1116 // Estimate register pressure to determine whether to LICM the instruction. 1117 // In low register pressure situation, we can be more aggressive about 1118 // hoisting. Also, favors hoisting long latency instructions even in 1119 // moderately high pressure situation. 1120 // Cheap instructions will only be hoisted if they don't increase register 1121 // pressure at all. 1122 auto Cost = calcRegisterCost(&MI, /*ConsiderSeen=*/false, 1123 /*ConsiderUnseenAsDef=*/false); 1124 1125 // Visit BBs from header to current BB, if hoisting this doesn't cause 1126 // high register pressure, then it's safe to proceed. 1127 if (!CanCauseHighRegPressure(Cost, CheapInstr)) { 1128 DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI); 1129 ++NumLowRP; 1130 return true; 1131 } 1132 1133 // Don't risk increasing register pressure if it would create copies. 1134 if (CreatesCopy) { 1135 DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI); 1136 return false; 1137 } 1138 1139 // Do not "speculate" in high register pressure situation. If an 1140 // instruction is not guaranteed to be executed in the loop, it's best to be 1141 // conservative. 1142 if (AvoidSpeculation && 1143 (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) { 1144 DEBUG(dbgs() << "Won't speculate: " << MI); 1145 return false; 1146 } 1147 1148 // High register pressure situation, only hoist if the instruction is going 1149 // to be remat'ed. 1150 if (!TII->isTriviallyReMaterializable(&MI, AA) && 1151 !MI.isInvariantLoad(AA)) { 1152 DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI); 1153 return false; 1154 } 1155 1156 return true; 1157 } 1158 1159 /// Unfold a load from the given machineinstr if the load itself could be 1160 /// hoisted. Return the unfolded and hoistable load, or null if the load 1161 /// couldn't be unfolded or if it wouldn't be hoistable. 1162 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { 1163 // Don't unfold simple loads. 1164 if (MI->canFoldAsLoad()) 1165 return nullptr; 1166 1167 // If not, we may be able to unfold a load and hoist that. 1168 // First test whether the instruction is loading from an amenable 1169 // memory location. 1170 if (!MI->isInvariantLoad(AA)) 1171 return nullptr; 1172 1173 // Next determine the register class for a temporary register. 1174 unsigned LoadRegIndex; 1175 unsigned NewOpc = 1176 TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(), 1177 /*UnfoldLoad=*/true, 1178 /*UnfoldStore=*/false, 1179 &LoadRegIndex); 1180 if (NewOpc == 0) return nullptr; 1181 const MCInstrDesc &MID = TII->get(NewOpc); 1182 if (MID.getNumDefs() != 1) return nullptr; 1183 MachineFunction &MF = *MI->getParent()->getParent(); 1184 const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF); 1185 // Ok, we're unfolding. Create a temporary register and do the unfold. 1186 unsigned Reg = MRI->createVirtualRegister(RC); 1187 1188 SmallVector<MachineInstr *, 2> NewMIs; 1189 bool Success = 1190 TII->unfoldMemoryOperand(MF, MI, Reg, 1191 /*UnfoldLoad=*/true, /*UnfoldStore=*/false, 1192 NewMIs); 1193 (void)Success; 1194 assert(Success && 1195 "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold " 1196 "succeeded!"); 1197 assert(NewMIs.size() == 2 && 1198 "Unfolded a load into multiple instructions!"); 1199 MachineBasicBlock *MBB = MI->getParent(); 1200 MachineBasicBlock::iterator Pos = MI; 1201 MBB->insert(Pos, NewMIs[0]); 1202 MBB->insert(Pos, NewMIs[1]); 1203 // If unfolding produced a load that wasn't loop-invariant or profitable to 1204 // hoist, discard the new instructions and bail. 1205 if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) { 1206 NewMIs[0]->eraseFromParent(); 1207 NewMIs[1]->eraseFromParent(); 1208 return nullptr; 1209 } 1210 1211 // Update register pressure for the unfolded instruction. 1212 UpdateRegPressure(NewMIs[1]); 1213 1214 // Otherwise we successfully unfolded a load that we can hoist. 1215 MI->eraseFromParent(); 1216 return NewMIs[0]; 1217 } 1218 1219 /// Initialize the CSE map with instructions that are in the current loop 1220 /// preheader that may become duplicates of instructions that are hoisted 1221 /// out of the loop. 1222 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { 1223 for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { 1224 const MachineInstr *MI = &*I; 1225 unsigned Opcode = MI->getOpcode(); 1226 CSEMap[Opcode].push_back(MI); 1227 } 1228 } 1229 1230 /// Find an instruction amount PrevMIs that is a duplicate of MI. 1231 /// Return this instruction if it's found. 1232 const MachineInstr* 1233 MachineLICM::LookForDuplicate(const MachineInstr *MI, 1234 std::vector<const MachineInstr*> &PrevMIs) { 1235 for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) { 1236 const MachineInstr *PrevMI = PrevMIs[i]; 1237 if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr))) 1238 return PrevMI; 1239 } 1240 return nullptr; 1241 } 1242 1243 /// Given a LICM'ed instruction, look for an instruction on the preheader that 1244 /// computes the same value. If it's found, do a RAU on with the definition of 1245 /// the existing instruction rather than hoisting the instruction to the 1246 /// preheader. 1247 bool MachineLICM::EliminateCSE(MachineInstr *MI, 1248 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) { 1249 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1250 // the undef property onto uses. 1251 if (CI == CSEMap.end() || MI->isImplicitDef()) 1252 return false; 1253 1254 if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) { 1255 DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup); 1256 1257 // Replace virtual registers defined by MI by their counterparts defined 1258 // by Dup. 1259 SmallVector<unsigned, 2> Defs; 1260 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1261 const MachineOperand &MO = MI->getOperand(i); 1262 1263 // Physical registers may not differ here. 1264 assert((!MO.isReg() || MO.getReg() == 0 || 1265 !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) || 1266 MO.getReg() == Dup->getOperand(i).getReg()) && 1267 "Instructions with different phys regs are not identical!"); 1268 1269 if (MO.isReg() && MO.isDef() && 1270 !TargetRegisterInfo::isPhysicalRegister(MO.getReg())) 1271 Defs.push_back(i); 1272 } 1273 1274 SmallVector<const TargetRegisterClass*, 2> OrigRCs; 1275 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1276 unsigned Idx = Defs[i]; 1277 unsigned Reg = MI->getOperand(Idx).getReg(); 1278 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1279 OrigRCs.push_back(MRI->getRegClass(DupReg)); 1280 1281 if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) { 1282 // Restore old RCs if more than one defs. 1283 for (unsigned j = 0; j != i; ++j) 1284 MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]); 1285 return false; 1286 } 1287 } 1288 1289 for (unsigned i = 0, e = Defs.size(); i != e; ++i) { 1290 unsigned Idx = Defs[i]; 1291 unsigned Reg = MI->getOperand(Idx).getReg(); 1292 unsigned DupReg = Dup->getOperand(Idx).getReg(); 1293 MRI->replaceRegWith(Reg, DupReg); 1294 MRI->clearKillFlags(DupReg); 1295 } 1296 1297 MI->eraseFromParent(); 1298 ++NumCSEed; 1299 return true; 1300 } 1301 return false; 1302 } 1303 1304 /// Return true if the given instruction will be CSE'd if it's hoisted out of 1305 /// the loop. 1306 bool MachineLICM::MayCSE(MachineInstr *MI) { 1307 unsigned Opcode = MI->getOpcode(); 1308 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1309 CI = CSEMap.find(Opcode); 1310 // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate 1311 // the undef property onto uses. 1312 if (CI == CSEMap.end() || MI->isImplicitDef()) 1313 return false; 1314 1315 return LookForDuplicate(MI, CI->second) != nullptr; 1316 } 1317 1318 /// When an instruction is found to use only loop invariant operands 1319 /// that are safe to hoist, this instruction is called to do the dirty work. 1320 /// It returns true if the instruction is hoisted. 1321 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { 1322 // First check whether we should hoist this instruction. 1323 if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { 1324 // If not, try unfolding a hoistable load. 1325 MI = ExtractHoistableLoad(MI); 1326 if (!MI) return false; 1327 } 1328 1329 // Now move the instructions to the predecessor, inserting it before any 1330 // terminator instructions. 1331 DEBUG({ 1332 dbgs() << "Hoisting " << *MI; 1333 if (Preheader->getBasicBlock()) 1334 dbgs() << " to MachineBasicBlock " 1335 << Preheader->getName(); 1336 if (MI->getParent()->getBasicBlock()) 1337 dbgs() << " from MachineBasicBlock " 1338 << MI->getParent()->getName(); 1339 dbgs() << "\n"; 1340 }); 1341 1342 // If this is the first instruction being hoisted to the preheader, 1343 // initialize the CSE map with potential common expressions. 1344 if (FirstInLoop) { 1345 InitCSEMap(Preheader); 1346 FirstInLoop = false; 1347 } 1348 1349 // Look for opportunity to CSE the hoisted instruction. 1350 unsigned Opcode = MI->getOpcode(); 1351 DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator 1352 CI = CSEMap.find(Opcode); 1353 if (!EliminateCSE(MI, CI)) { 1354 // Otherwise, splice the instruction to the preheader. 1355 Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI); 1356 1357 // Update register pressure for BBs from header to this block. 1358 UpdateBackTraceRegPressure(MI); 1359 1360 // Clear the kill flags of any register this instruction defines, 1361 // since they may need to be live throughout the entire loop 1362 // rather than just live for part of it. 1363 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1364 MachineOperand &MO = MI->getOperand(i); 1365 if (MO.isReg() && MO.isDef() && !MO.isDead()) 1366 MRI->clearKillFlags(MO.getReg()); 1367 } 1368 1369 // Add to the CSE map. 1370 if (CI != CSEMap.end()) 1371 CI->second.push_back(MI); 1372 else 1373 CSEMap[Opcode].push_back(MI); 1374 } 1375 1376 ++NumHoisted; 1377 Changed = true; 1378 1379 return true; 1380 } 1381 1382 /// Get the preheader for the current loop, splitting a critical edge if needed. 1383 MachineBasicBlock *MachineLICM::getCurPreheader() { 1384 // Determine the block to which to hoist instructions. If we can't find a 1385 // suitable loop predecessor, we can't do any hoisting. 1386 1387 // If we've tried to get a preheader and failed, don't try again. 1388 if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1)) 1389 return nullptr; 1390 1391 if (!CurPreheader) { 1392 CurPreheader = CurLoop->getLoopPreheader(); 1393 if (!CurPreheader) { 1394 MachineBasicBlock *Pred = CurLoop->getLoopPredecessor(); 1395 if (!Pred) { 1396 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1397 return nullptr; 1398 } 1399 1400 CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this); 1401 if (!CurPreheader) { 1402 CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1); 1403 return nullptr; 1404 } 1405 } 1406 } 1407 return CurPreheader; 1408 } 1409