1 //===-- RegAllocGreedy.cpp - greedy register allocator --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file defines the RAGreedy function pass for register allocation in 11 // optimized builds. 12 // 13 //===----------------------------------------------------------------------===// 14 15 #include "AllocationOrder.h" 16 #include "InterferenceCache.h" 17 #include "LiveDebugVariables.h" 18 #include "RegAllocBase.h" 19 #include "SpillPlacement.h" 20 #include "Spiller.h" 21 #include "SplitKit.h" 22 #include "llvm/ADT/Statistic.h" 23 #include "llvm/Analysis/AliasAnalysis.h" 24 #include "llvm/CodeGen/CalcSpillWeights.h" 25 #include "llvm/CodeGen/EdgeBundles.h" 26 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 27 #include "llvm/CodeGen/LiveRangeEdit.h" 28 #include "llvm/CodeGen/LiveRegMatrix.h" 29 #include "llvm/CodeGen/LiveStackAnalysis.h" 30 #include "llvm/CodeGen/MachineBlockFrequencyInfo.h" 31 #include "llvm/CodeGen/MachineDominators.h" 32 #include "llvm/CodeGen/MachineFunctionPass.h" 33 #include "llvm/CodeGen/MachineLoopInfo.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/CodeGen/RegAllocRegistry.h" 37 #include "llvm/CodeGen/RegisterClassInfo.h" 38 #include "llvm/CodeGen/VirtRegMap.h" 39 #include "llvm/IR/LLVMContext.h" 40 #include "llvm/PassAnalysisSupport.h" 41 #include "llvm/Support/BranchProbability.h" 42 #include "llvm/Support/CommandLine.h" 43 #include "llvm/Support/Debug.h" 44 #include "llvm/Support/ErrorHandling.h" 45 #include "llvm/Support/Timer.h" 46 #include "llvm/Support/raw_ostream.h" 47 #include "llvm/Target/TargetInstrInfo.h" 48 #include "llvm/Target/TargetSubtargetInfo.h" 49 #include <queue> 50 51 using namespace llvm; 52 53 #define DEBUG_TYPE "regalloc" 54 55 STATISTIC(NumGlobalSplits, "Number of split global live ranges"); 56 STATISTIC(NumLocalSplits, "Number of split local live ranges"); 57 STATISTIC(NumEvicted, "Number of interferences evicted"); 58 59 static cl::opt<SplitEditor::ComplementSpillMode> SplitSpillMode( 60 "split-spill-mode", cl::Hidden, 61 cl::desc("Spill mode for splitting live ranges"), 62 cl::values(clEnumValN(SplitEditor::SM_Partition, "default", "Default"), 63 clEnumValN(SplitEditor::SM_Size, "size", "Optimize for size"), 64 clEnumValN(SplitEditor::SM_Speed, "speed", "Optimize for speed"), 65 clEnumValEnd), 66 cl::init(SplitEditor::SM_Speed)); 67 68 static cl::opt<unsigned> 69 LastChanceRecoloringMaxDepth("lcr-max-depth", cl::Hidden, 70 cl::desc("Last chance recoloring max depth"), 71 cl::init(5)); 72 73 static cl::opt<unsigned> LastChanceRecoloringMaxInterference( 74 "lcr-max-interf", cl::Hidden, 75 cl::desc("Last chance recoloring maximum number of considered" 76 " interference at a time"), 77 cl::init(8)); 78 79 static cl::opt<bool> 80 ExhaustiveSearch("exhaustive-register-search", cl::NotHidden, 81 cl::desc("Exhaustive Search for registers bypassing the depth " 82 "and interference cutoffs of last chance recoloring")); 83 84 static cl::opt<bool> EnableLocalReassignment( 85 "enable-local-reassign", cl::Hidden, 86 cl::desc("Local reassignment can yield better allocation decisions, but " 87 "may be compile time intensive"), 88 cl::init(false)); 89 90 static cl::opt<bool> EnableDeferredSpilling( 91 "enable-deferred-spilling", cl::Hidden, 92 cl::desc("Instead of spilling a variable right away, defer the actual " 93 "code insertion to the end of the allocation. That way the " 94 "allocator might still find a suitable coloring for this " 95 "variable because of other evicted variables."), 96 cl::init(false)); 97 98 // FIXME: Find a good default for this flag and remove the flag. 99 static cl::opt<unsigned> 100 CSRFirstTimeCost("regalloc-csr-first-time-cost", 101 cl::desc("Cost for first time use of callee-saved register."), 102 cl::init(0), cl::Hidden); 103 104 static RegisterRegAlloc greedyRegAlloc("greedy", "greedy register allocator", 105 createGreedyRegisterAllocator); 106 107 namespace { 108 class RAGreedy : public MachineFunctionPass, 109 public RegAllocBase, 110 private LiveRangeEdit::Delegate { 111 // Convenient shortcuts. 112 typedef std::priority_queue<std::pair<unsigned, unsigned> > PQueue; 113 typedef SmallPtrSet<LiveInterval *, 4> SmallLISet; 114 typedef SmallSet<unsigned, 16> SmallVirtRegSet; 115 116 // context 117 MachineFunction *MF; 118 119 // Shortcuts to some useful interface. 120 const TargetInstrInfo *TII; 121 const TargetRegisterInfo *TRI; 122 RegisterClassInfo RCI; 123 124 // analyses 125 SlotIndexes *Indexes; 126 MachineBlockFrequencyInfo *MBFI; 127 MachineDominatorTree *DomTree; 128 MachineLoopInfo *Loops; 129 EdgeBundles *Bundles; 130 SpillPlacement *SpillPlacer; 131 LiveDebugVariables *DebugVars; 132 AliasAnalysis *AA; 133 134 // state 135 std::unique_ptr<Spiller> SpillerInstance; 136 PQueue Queue; 137 unsigned NextCascade; 138 139 // Live ranges pass through a number of stages as we try to allocate them. 140 // Some of the stages may also create new live ranges: 141 // 142 // - Region splitting. 143 // - Per-block splitting. 144 // - Local splitting. 145 // - Spilling. 146 // 147 // Ranges produced by one of the stages skip the previous stages when they are 148 // dequeued. This improves performance because we can skip interference checks 149 // that are unlikely to give any results. It also guarantees that the live 150 // range splitting algorithm terminates, something that is otherwise hard to 151 // ensure. 152 enum LiveRangeStage { 153 /// Newly created live range that has never been queued. 154 RS_New, 155 156 /// Only attempt assignment and eviction. Then requeue as RS_Split. 157 RS_Assign, 158 159 /// Attempt live range splitting if assignment is impossible. 160 RS_Split, 161 162 /// Attempt more aggressive live range splitting that is guaranteed to make 163 /// progress. This is used for split products that may not be making 164 /// progress. 165 RS_Split2, 166 167 /// Live range will be spilled. No more splitting will be attempted. 168 RS_Spill, 169 170 171 /// Live range is in memory. Because of other evictions, it might get moved 172 /// in a register in the end. 173 RS_Memory, 174 175 /// There is nothing more we can do to this live range. Abort compilation 176 /// if it can't be assigned. 177 RS_Done 178 }; 179 180 // Enum CutOffStage to keep a track whether the register allocation failed 181 // because of the cutoffs encountered in last chance recoloring. 182 // Note: This is used as bitmask. New value should be next power of 2. 183 enum CutOffStage { 184 // No cutoffs encountered 185 CO_None = 0, 186 187 // lcr-max-depth cutoff encountered 188 CO_Depth = 1, 189 190 // lcr-max-interf cutoff encountered 191 CO_Interf = 2 192 }; 193 194 uint8_t CutOffInfo; 195 196 #ifndef NDEBUG 197 static const char *const StageName[]; 198 #endif 199 200 // RegInfo - Keep additional information about each live range. 201 struct RegInfo { 202 LiveRangeStage Stage; 203 204 // Cascade - Eviction loop prevention. See canEvictInterference(). 205 unsigned Cascade; 206 207 RegInfo() : Stage(RS_New), Cascade(0) {} 208 }; 209 210 IndexedMap<RegInfo, VirtReg2IndexFunctor> ExtraRegInfo; 211 212 LiveRangeStage getStage(const LiveInterval &VirtReg) const { 213 return ExtraRegInfo[VirtReg.reg].Stage; 214 } 215 216 void setStage(const LiveInterval &VirtReg, LiveRangeStage Stage) { 217 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 218 ExtraRegInfo[VirtReg.reg].Stage = Stage; 219 } 220 221 template<typename Iterator> 222 void setStage(Iterator Begin, Iterator End, LiveRangeStage NewStage) { 223 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 224 for (;Begin != End; ++Begin) { 225 unsigned Reg = *Begin; 226 if (ExtraRegInfo[Reg].Stage == RS_New) 227 ExtraRegInfo[Reg].Stage = NewStage; 228 } 229 } 230 231 /// Cost of evicting interference. 232 struct EvictionCost { 233 unsigned BrokenHints; ///< Total number of broken hints. 234 float MaxWeight; ///< Maximum spill weight evicted. 235 236 EvictionCost(): BrokenHints(0), MaxWeight(0) {} 237 238 bool isMax() const { return BrokenHints == ~0u; } 239 240 void setMax() { BrokenHints = ~0u; } 241 242 void setBrokenHints(unsigned NHints) { BrokenHints = NHints; } 243 244 bool operator<(const EvictionCost &O) const { 245 return std::tie(BrokenHints, MaxWeight) < 246 std::tie(O.BrokenHints, O.MaxWeight); 247 } 248 }; 249 250 // splitting state. 251 std::unique_ptr<SplitAnalysis> SA; 252 std::unique_ptr<SplitEditor> SE; 253 254 /// Cached per-block interference maps 255 InterferenceCache IntfCache; 256 257 /// All basic blocks where the current register has uses. 258 SmallVector<SpillPlacement::BlockConstraint, 8> SplitConstraints; 259 260 /// Global live range splitting candidate info. 261 struct GlobalSplitCandidate { 262 // Register intended for assignment, or 0. 263 unsigned PhysReg; 264 265 // SplitKit interval index for this candidate. 266 unsigned IntvIdx; 267 268 // Interference for PhysReg. 269 InterferenceCache::Cursor Intf; 270 271 // Bundles where this candidate should be live. 272 BitVector LiveBundles; 273 SmallVector<unsigned, 8> ActiveBlocks; 274 275 void reset(InterferenceCache &Cache, unsigned Reg) { 276 PhysReg = Reg; 277 IntvIdx = 0; 278 Intf.setPhysReg(Cache, Reg); 279 LiveBundles.clear(); 280 ActiveBlocks.clear(); 281 } 282 283 // Set B[i] = C for every live bundle where B[i] was NoCand. 284 unsigned getBundles(SmallVectorImpl<unsigned> &B, unsigned C) { 285 unsigned Count = 0; 286 for (int i = LiveBundles.find_first(); i >= 0; 287 i = LiveBundles.find_next(i)) 288 if (B[i] == NoCand) { 289 B[i] = C; 290 Count++; 291 } 292 return Count; 293 } 294 }; 295 296 /// Candidate info for each PhysReg in AllocationOrder. 297 /// This vector never shrinks, but grows to the size of the largest register 298 /// class. 299 SmallVector<GlobalSplitCandidate, 32> GlobalCand; 300 301 enum : unsigned { NoCand = ~0u }; 302 303 /// Candidate map. Each edge bundle is assigned to a GlobalCand entry, or to 304 /// NoCand which indicates the stack interval. 305 SmallVector<unsigned, 32> BundleCand; 306 307 /// Callee-save register cost, calculated once per machine function. 308 BlockFrequency CSRCost; 309 310 /// Run or not the local reassignment heuristic. This information is 311 /// obtained from the TargetSubtargetInfo. 312 bool EnableLocalReassign; 313 314 /// Set of broken hints that may be reconciled later because of eviction. 315 SmallSetVector<LiveInterval *, 8> SetOfBrokenHints; 316 317 public: 318 RAGreedy(); 319 320 /// Return the pass name. 321 const char* getPassName() const override { 322 return "Greedy Register Allocator"; 323 } 324 325 /// RAGreedy analysis usage. 326 void getAnalysisUsage(AnalysisUsage &AU) const override; 327 void releaseMemory() override; 328 Spiller &spiller() override { return *SpillerInstance; } 329 void enqueue(LiveInterval *LI) override; 330 LiveInterval *dequeue() override; 331 unsigned selectOrSplit(LiveInterval&, SmallVectorImpl<unsigned>&) override; 332 void aboutToRemoveInterval(LiveInterval &) override; 333 334 /// Perform register allocation. 335 bool runOnMachineFunction(MachineFunction &mf) override; 336 337 static char ID; 338 339 private: 340 unsigned selectOrSplitImpl(LiveInterval &, SmallVectorImpl<unsigned> &, 341 SmallVirtRegSet &, unsigned = 0); 342 343 bool LRE_CanEraseVirtReg(unsigned) override; 344 void LRE_WillShrinkVirtReg(unsigned) override; 345 void LRE_DidCloneVirtReg(unsigned, unsigned) override; 346 void enqueue(PQueue &CurQueue, LiveInterval *LI); 347 LiveInterval *dequeue(PQueue &CurQueue); 348 349 BlockFrequency calcSpillCost(); 350 bool addSplitConstraints(InterferenceCache::Cursor, BlockFrequency&); 351 void addThroughConstraints(InterferenceCache::Cursor, ArrayRef<unsigned>); 352 void growRegion(GlobalSplitCandidate &Cand); 353 BlockFrequency calcGlobalSplitCost(GlobalSplitCandidate&); 354 bool calcCompactRegion(GlobalSplitCandidate&); 355 void splitAroundRegion(LiveRangeEdit&, ArrayRef<unsigned>); 356 void calcGapWeights(unsigned, SmallVectorImpl<float>&); 357 unsigned canReassign(LiveInterval &VirtReg, unsigned PhysReg); 358 bool shouldEvict(LiveInterval &A, bool, LiveInterval &B, bool); 359 bool canEvictInterference(LiveInterval&, unsigned, bool, EvictionCost&); 360 void evictInterference(LiveInterval&, unsigned, 361 SmallVectorImpl<unsigned>&); 362 bool mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 363 SmallLISet &RecoloringCandidates, 364 const SmallVirtRegSet &FixedRegisters); 365 366 unsigned tryAssign(LiveInterval&, AllocationOrder&, 367 SmallVectorImpl<unsigned>&); 368 unsigned tryEvict(LiveInterval&, AllocationOrder&, 369 SmallVectorImpl<unsigned>&, unsigned = ~0u); 370 unsigned tryRegionSplit(LiveInterval&, AllocationOrder&, 371 SmallVectorImpl<unsigned>&); 372 /// Calculate cost of region splitting. 373 unsigned calculateRegionSplitCost(LiveInterval &VirtReg, 374 AllocationOrder &Order, 375 BlockFrequency &BestCost, 376 unsigned &NumCands, bool IgnoreCSR); 377 /// Perform region splitting. 378 unsigned doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 379 bool HasCompact, 380 SmallVectorImpl<unsigned> &NewVRegs); 381 /// Check other options before using a callee-saved register for the first 382 /// time. 383 unsigned tryAssignCSRFirstTime(LiveInterval &VirtReg, AllocationOrder &Order, 384 unsigned PhysReg, unsigned &CostPerUseLimit, 385 SmallVectorImpl<unsigned> &NewVRegs); 386 void initializeCSRCost(); 387 unsigned tryBlockSplit(LiveInterval&, AllocationOrder&, 388 SmallVectorImpl<unsigned>&); 389 unsigned tryInstructionSplit(LiveInterval&, AllocationOrder&, 390 SmallVectorImpl<unsigned>&); 391 unsigned tryLocalSplit(LiveInterval&, AllocationOrder&, 392 SmallVectorImpl<unsigned>&); 393 unsigned trySplit(LiveInterval&, AllocationOrder&, 394 SmallVectorImpl<unsigned>&); 395 unsigned tryLastChanceRecoloring(LiveInterval &, AllocationOrder &, 396 SmallVectorImpl<unsigned> &, 397 SmallVirtRegSet &, unsigned); 398 bool tryRecoloringCandidates(PQueue &, SmallVectorImpl<unsigned> &, 399 SmallVirtRegSet &, unsigned); 400 void tryHintRecoloring(LiveInterval &); 401 void tryHintsRecoloring(); 402 403 /// Model the information carried by one end of a copy. 404 struct HintInfo { 405 /// The frequency of the copy. 406 BlockFrequency Freq; 407 /// The virtual register or physical register. 408 unsigned Reg; 409 /// Its currently assigned register. 410 /// In case of a physical register Reg == PhysReg. 411 unsigned PhysReg; 412 HintInfo(BlockFrequency Freq, unsigned Reg, unsigned PhysReg) 413 : Freq(Freq), Reg(Reg), PhysReg(PhysReg) {} 414 }; 415 typedef SmallVector<HintInfo, 4> HintsInfo; 416 BlockFrequency getBrokenHintFreq(const HintsInfo &, unsigned); 417 void collectHintInfo(unsigned, HintsInfo &); 418 419 bool isUnusedCalleeSavedReg(unsigned PhysReg) const; 420 }; 421 } // end anonymous namespace 422 423 char RAGreedy::ID = 0; 424 425 #ifndef NDEBUG 426 const char *const RAGreedy::StageName[] = { 427 "RS_New", 428 "RS_Assign", 429 "RS_Split", 430 "RS_Split2", 431 "RS_Spill", 432 "RS_Memory", 433 "RS_Done" 434 }; 435 #endif 436 437 // Hysteresis to use when comparing floats. 438 // This helps stabilize decisions based on float comparisons. 439 const float Hysteresis = (2007 / 2048.0f); // 0.97998046875 440 441 442 FunctionPass* llvm::createGreedyRegisterAllocator() { 443 return new RAGreedy(); 444 } 445 446 RAGreedy::RAGreedy(): MachineFunctionPass(ID) { 447 initializeLiveDebugVariablesPass(*PassRegistry::getPassRegistry()); 448 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 449 initializeLiveIntervalsPass(*PassRegistry::getPassRegistry()); 450 initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); 451 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 452 initializeMachineSchedulerPass(*PassRegistry::getPassRegistry()); 453 initializeLiveStacksPass(*PassRegistry::getPassRegistry()); 454 initializeMachineDominatorTreePass(*PassRegistry::getPassRegistry()); 455 initializeMachineLoopInfoPass(*PassRegistry::getPassRegistry()); 456 initializeVirtRegMapPass(*PassRegistry::getPassRegistry()); 457 initializeLiveRegMatrixPass(*PassRegistry::getPassRegistry()); 458 initializeEdgeBundlesPass(*PassRegistry::getPassRegistry()); 459 initializeSpillPlacementPass(*PassRegistry::getPassRegistry()); 460 } 461 462 void RAGreedy::getAnalysisUsage(AnalysisUsage &AU) const { 463 AU.setPreservesCFG(); 464 AU.addRequired<MachineBlockFrequencyInfo>(); 465 AU.addPreserved<MachineBlockFrequencyInfo>(); 466 AU.addRequired<AAResultsWrapperPass>(); 467 AU.addPreserved<AAResultsWrapperPass>(); 468 AU.addRequired<LiveIntervals>(); 469 AU.addPreserved<LiveIntervals>(); 470 AU.addRequired<SlotIndexes>(); 471 AU.addPreserved<SlotIndexes>(); 472 AU.addRequired<LiveDebugVariables>(); 473 AU.addPreserved<LiveDebugVariables>(); 474 AU.addRequired<LiveStacks>(); 475 AU.addPreserved<LiveStacks>(); 476 AU.addRequired<MachineDominatorTree>(); 477 AU.addPreserved<MachineDominatorTree>(); 478 AU.addRequired<MachineLoopInfo>(); 479 AU.addPreserved<MachineLoopInfo>(); 480 AU.addRequired<VirtRegMap>(); 481 AU.addPreserved<VirtRegMap>(); 482 AU.addRequired<LiveRegMatrix>(); 483 AU.addPreserved<LiveRegMatrix>(); 484 AU.addRequired<EdgeBundles>(); 485 AU.addRequired<SpillPlacement>(); 486 MachineFunctionPass::getAnalysisUsage(AU); 487 } 488 489 490 //===----------------------------------------------------------------------===// 491 // LiveRangeEdit delegate methods 492 //===----------------------------------------------------------------------===// 493 494 bool RAGreedy::LRE_CanEraseVirtReg(unsigned VirtReg) { 495 if (VRM->hasPhys(VirtReg)) { 496 LiveInterval &LI = LIS->getInterval(VirtReg); 497 Matrix->unassign(LI); 498 aboutToRemoveInterval(LI); 499 return true; 500 } 501 // Unassigned virtreg is probably in the priority queue. 502 // RegAllocBase will erase it after dequeueing. 503 return false; 504 } 505 506 void RAGreedy::LRE_WillShrinkVirtReg(unsigned VirtReg) { 507 if (!VRM->hasPhys(VirtReg)) 508 return; 509 510 // Register is assigned, put it back on the queue for reassignment. 511 LiveInterval &LI = LIS->getInterval(VirtReg); 512 Matrix->unassign(LI); 513 enqueue(&LI); 514 } 515 516 void RAGreedy::LRE_DidCloneVirtReg(unsigned New, unsigned Old) { 517 // Cloning a register we haven't even heard about yet? Just ignore it. 518 if (!ExtraRegInfo.inBounds(Old)) 519 return; 520 521 // LRE may clone a virtual register because dead code elimination causes it to 522 // be split into connected components. The new components are much smaller 523 // than the original, so they should get a new chance at being assigned. 524 // same stage as the parent. 525 ExtraRegInfo[Old].Stage = RS_Assign; 526 ExtraRegInfo.grow(New); 527 ExtraRegInfo[New] = ExtraRegInfo[Old]; 528 } 529 530 void RAGreedy::releaseMemory() { 531 SpillerInstance.reset(); 532 ExtraRegInfo.clear(); 533 GlobalCand.clear(); 534 } 535 536 void RAGreedy::enqueue(LiveInterval *LI) { enqueue(Queue, LI); } 537 538 void RAGreedy::enqueue(PQueue &CurQueue, LiveInterval *LI) { 539 // Prioritize live ranges by size, assigning larger ranges first. 540 // The queue holds (size, reg) pairs. 541 const unsigned Size = LI->getSize(); 542 const unsigned Reg = LI->reg; 543 assert(TargetRegisterInfo::isVirtualRegister(Reg) && 544 "Can only enqueue virtual registers"); 545 unsigned Prio; 546 547 ExtraRegInfo.grow(Reg); 548 if (ExtraRegInfo[Reg].Stage == RS_New) 549 ExtraRegInfo[Reg].Stage = RS_Assign; 550 551 if (ExtraRegInfo[Reg].Stage == RS_Split) { 552 // Unsplit ranges that couldn't be allocated immediately are deferred until 553 // everything else has been allocated. 554 Prio = Size; 555 } else if (ExtraRegInfo[Reg].Stage == RS_Memory) { 556 // Memory operand should be considered last. 557 // Change the priority such that Memory operand are assigned in 558 // the reverse order that they came in. 559 // TODO: Make this a member variable and probably do something about hints. 560 static unsigned MemOp = 0; 561 Prio = MemOp++; 562 } else { 563 // Giant live ranges fall back to the global assignment heuristic, which 564 // prevents excessive spilling in pathological cases. 565 bool ReverseLocal = TRI->reverseLocalAssignment(); 566 const TargetRegisterClass &RC = *MRI->getRegClass(Reg); 567 bool ForceGlobal = !ReverseLocal && 568 (Size / SlotIndex::InstrDist) > (2 * RC.getNumRegs()); 569 570 if (ExtraRegInfo[Reg].Stage == RS_Assign && !ForceGlobal && !LI->empty() && 571 LIS->intervalIsInOneMBB(*LI)) { 572 // Allocate original local ranges in linear instruction order. Since they 573 // are singly defined, this produces optimal coloring in the absence of 574 // global interference and other constraints. 575 if (!ReverseLocal) 576 Prio = LI->beginIndex().getInstrDistance(Indexes->getLastIndex()); 577 else { 578 // Allocating bottom up may allow many short LRGs to be assigned first 579 // to one of the cheap registers. This could be much faster for very 580 // large blocks on targets with many physical registers. 581 Prio = Indexes->getZeroIndex().getInstrDistance(LI->endIndex()); 582 } 583 Prio |= RC.AllocationPriority << 24; 584 } else { 585 // Allocate global and split ranges in long->short order. Long ranges that 586 // don't fit should be spilled (or split) ASAP so they don't create 587 // interference. Mark a bit to prioritize global above local ranges. 588 Prio = (1u << 29) + Size; 589 } 590 // Mark a higher bit to prioritize global and local above RS_Split. 591 Prio |= (1u << 31); 592 593 // Boost ranges that have a physical register hint. 594 if (VRM->hasKnownPreference(Reg)) 595 Prio |= (1u << 30); 596 } 597 // The virtual register number is a tie breaker for same-sized ranges. 598 // Give lower vreg numbers higher priority to assign them first. 599 CurQueue.push(std::make_pair(Prio, ~Reg)); 600 } 601 602 LiveInterval *RAGreedy::dequeue() { return dequeue(Queue); } 603 604 LiveInterval *RAGreedy::dequeue(PQueue &CurQueue) { 605 if (CurQueue.empty()) 606 return nullptr; 607 LiveInterval *LI = &LIS->getInterval(~CurQueue.top().second); 608 CurQueue.pop(); 609 return LI; 610 } 611 612 613 //===----------------------------------------------------------------------===// 614 // Direct Assignment 615 //===----------------------------------------------------------------------===// 616 617 /// tryAssign - Try to assign VirtReg to an available register. 618 unsigned RAGreedy::tryAssign(LiveInterval &VirtReg, 619 AllocationOrder &Order, 620 SmallVectorImpl<unsigned> &NewVRegs) { 621 Order.rewind(); 622 unsigned PhysReg; 623 while ((PhysReg = Order.next())) 624 if (!Matrix->checkInterference(VirtReg, PhysReg)) 625 break; 626 if (!PhysReg || Order.isHint()) 627 return PhysReg; 628 629 // PhysReg is available, but there may be a better choice. 630 631 // If we missed a simple hint, try to cheaply evict interference from the 632 // preferred register. 633 if (unsigned Hint = MRI->getSimpleHint(VirtReg.reg)) 634 if (Order.isHint(Hint)) { 635 DEBUG(dbgs() << "missed hint " << PrintReg(Hint, TRI) << '\n'); 636 EvictionCost MaxCost; 637 MaxCost.setBrokenHints(1); 638 if (canEvictInterference(VirtReg, Hint, true, MaxCost)) { 639 evictInterference(VirtReg, Hint, NewVRegs); 640 return Hint; 641 } 642 } 643 644 // Try to evict interference from a cheaper alternative. 645 unsigned Cost = TRI->getCostPerUse(PhysReg); 646 647 // Most registers have 0 additional cost. 648 if (!Cost) 649 return PhysReg; 650 651 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " is available at cost " << Cost 652 << '\n'); 653 unsigned CheapReg = tryEvict(VirtReg, Order, NewVRegs, Cost); 654 return CheapReg ? CheapReg : PhysReg; 655 } 656 657 658 //===----------------------------------------------------------------------===// 659 // Interference eviction 660 //===----------------------------------------------------------------------===// 661 662 unsigned RAGreedy::canReassign(LiveInterval &VirtReg, unsigned PrevReg) { 663 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); 664 unsigned PhysReg; 665 while ((PhysReg = Order.next())) { 666 if (PhysReg == PrevReg) 667 continue; 668 669 MCRegUnitIterator Units(PhysReg, TRI); 670 for (; Units.isValid(); ++Units) { 671 // Instantiate a "subquery", not to be confused with the Queries array. 672 LiveIntervalUnion::Query subQ(&VirtReg, &Matrix->getLiveUnions()[*Units]); 673 if (subQ.checkInterference()) 674 break; 675 } 676 // If no units have interference, break out with the current PhysReg. 677 if (!Units.isValid()) 678 break; 679 } 680 if (PhysReg) 681 DEBUG(dbgs() << "can reassign: " << VirtReg << " from " 682 << PrintReg(PrevReg, TRI) << " to " << PrintReg(PhysReg, TRI) 683 << '\n'); 684 return PhysReg; 685 } 686 687 /// shouldEvict - determine if A should evict the assigned live range B. The 688 /// eviction policy defined by this function together with the allocation order 689 /// defined by enqueue() decides which registers ultimately end up being split 690 /// and spilled. 691 /// 692 /// Cascade numbers are used to prevent infinite loops if this function is a 693 /// cyclic relation. 694 /// 695 /// @param A The live range to be assigned. 696 /// @param IsHint True when A is about to be assigned to its preferred 697 /// register. 698 /// @param B The live range to be evicted. 699 /// @param BreaksHint True when B is already assigned to its preferred register. 700 bool RAGreedy::shouldEvict(LiveInterval &A, bool IsHint, 701 LiveInterval &B, bool BreaksHint) { 702 bool CanSplit = getStage(B) < RS_Spill; 703 704 // Be fairly aggressive about following hints as long as the evictee can be 705 // split. 706 if (CanSplit && IsHint && !BreaksHint) 707 return true; 708 709 if (A.weight > B.weight) { 710 DEBUG(dbgs() << "should evict: " << B << " w= " << B.weight << '\n'); 711 return true; 712 } 713 return false; 714 } 715 716 /// canEvictInterference - Return true if all interferences between VirtReg and 717 /// PhysReg can be evicted. 718 /// 719 /// @param VirtReg Live range that is about to be assigned. 720 /// @param PhysReg Desired register for assignment. 721 /// @param IsHint True when PhysReg is VirtReg's preferred register. 722 /// @param MaxCost Only look for cheaper candidates and update with new cost 723 /// when returning true. 724 /// @returns True when interference can be evicted cheaper than MaxCost. 725 bool RAGreedy::canEvictInterference(LiveInterval &VirtReg, unsigned PhysReg, 726 bool IsHint, EvictionCost &MaxCost) { 727 // It is only possible to evict virtual register interference. 728 if (Matrix->checkInterference(VirtReg, PhysReg) > LiveRegMatrix::IK_VirtReg) 729 return false; 730 731 bool IsLocal = LIS->intervalIsInOneMBB(VirtReg); 732 733 // Find VirtReg's cascade number. This will be unassigned if VirtReg was never 734 // involved in an eviction before. If a cascade number was assigned, deny 735 // evicting anything with the same or a newer cascade number. This prevents 736 // infinite eviction loops. 737 // 738 // This works out so a register without a cascade number is allowed to evict 739 // anything, and it can be evicted by anything. 740 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 741 if (!Cascade) 742 Cascade = NextCascade; 743 744 EvictionCost Cost; 745 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 746 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 747 // If there is 10 or more interferences, chances are one is heavier. 748 if (Q.collectInterferingVRegs(10) >= 10) 749 return false; 750 751 // Check if any interfering live range is heavier than MaxWeight. 752 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 753 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 754 assert(TargetRegisterInfo::isVirtualRegister(Intf->reg) && 755 "Only expecting virtual register interference from query"); 756 // Never evict spill products. They cannot split or spill. 757 if (getStage(*Intf) == RS_Done) 758 return false; 759 // Once a live range becomes small enough, it is urgent that we find a 760 // register for it. This is indicated by an infinite spill weight. These 761 // urgent live ranges get to evict almost anything. 762 // 763 // Also allow urgent evictions of unspillable ranges from a strictly 764 // larger allocation order. 765 bool Urgent = !VirtReg.isSpillable() && 766 (Intf->isSpillable() || 767 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(VirtReg.reg)) < 768 RegClassInfo.getNumAllocatableRegs(MRI->getRegClass(Intf->reg))); 769 // Only evict older cascades or live ranges without a cascade. 770 unsigned IntfCascade = ExtraRegInfo[Intf->reg].Cascade; 771 if (Cascade <= IntfCascade) { 772 if (!Urgent) 773 return false; 774 // We permit breaking cascades for urgent evictions. It should be the 775 // last resort, though, so make it really expensive. 776 Cost.BrokenHints += 10; 777 } 778 // Would this break a satisfied hint? 779 bool BreaksHint = VRM->hasPreferredPhys(Intf->reg); 780 // Update eviction cost. 781 Cost.BrokenHints += BreaksHint; 782 Cost.MaxWeight = std::max(Cost.MaxWeight, Intf->weight); 783 // Abort if this would be too expensive. 784 if (!(Cost < MaxCost)) 785 return false; 786 if (Urgent) 787 continue; 788 // Apply the eviction policy for non-urgent evictions. 789 if (!shouldEvict(VirtReg, IsHint, *Intf, BreaksHint)) 790 return false; 791 // If !MaxCost.isMax(), then we're just looking for a cheap register. 792 // Evicting another local live range in this case could lead to suboptimal 793 // coloring. 794 if (!MaxCost.isMax() && IsLocal && LIS->intervalIsInOneMBB(*Intf) && 795 (!EnableLocalReassign || !canReassign(*Intf, PhysReg))) { 796 return false; 797 } 798 } 799 } 800 MaxCost = Cost; 801 return true; 802 } 803 804 /// evictInterference - Evict any interferring registers that prevent VirtReg 805 /// from being assigned to Physreg. This assumes that canEvictInterference 806 /// returned true. 807 void RAGreedy::evictInterference(LiveInterval &VirtReg, unsigned PhysReg, 808 SmallVectorImpl<unsigned> &NewVRegs) { 809 // Make sure that VirtReg has a cascade number, and assign that cascade 810 // number to every evicted register. These live ranges than then only be 811 // evicted by a newer cascade, preventing infinite loops. 812 unsigned Cascade = ExtraRegInfo[VirtReg.reg].Cascade; 813 if (!Cascade) 814 Cascade = ExtraRegInfo[VirtReg.reg].Cascade = NextCascade++; 815 816 DEBUG(dbgs() << "evicting " << PrintReg(PhysReg, TRI) 817 << " interference: Cascade " << Cascade << '\n'); 818 819 // Collect all interfering virtregs first. 820 SmallVector<LiveInterval*, 8> Intfs; 821 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 822 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 823 assert(Q.seenAllInterferences() && "Didn't check all interfererences."); 824 ArrayRef<LiveInterval*> IVR = Q.interferingVRegs(); 825 Intfs.append(IVR.begin(), IVR.end()); 826 } 827 828 // Evict them second. This will invalidate the queries. 829 for (unsigned i = 0, e = Intfs.size(); i != e; ++i) { 830 LiveInterval *Intf = Intfs[i]; 831 // The same VirtReg may be present in multiple RegUnits. Skip duplicates. 832 if (!VRM->hasPhys(Intf->reg)) 833 continue; 834 Matrix->unassign(*Intf); 835 assert((ExtraRegInfo[Intf->reg].Cascade < Cascade || 836 VirtReg.isSpillable() < Intf->isSpillable()) && 837 "Cannot decrease cascade number, illegal eviction"); 838 ExtraRegInfo[Intf->reg].Cascade = Cascade; 839 ++NumEvicted; 840 NewVRegs.push_back(Intf->reg); 841 } 842 } 843 844 /// Returns true if the given \p PhysReg is a callee saved register and has not 845 /// been used for allocation yet. 846 bool RAGreedy::isUnusedCalleeSavedReg(unsigned PhysReg) const { 847 unsigned CSR = RegClassInfo.getLastCalleeSavedAlias(PhysReg); 848 if (CSR == 0) 849 return false; 850 851 return !Matrix->isPhysRegUsed(PhysReg); 852 } 853 854 /// tryEvict - Try to evict all interferences for a physreg. 855 /// @param VirtReg Currently unassigned virtual register. 856 /// @param Order Physregs to try. 857 /// @return Physreg to assign VirtReg, or 0. 858 unsigned RAGreedy::tryEvict(LiveInterval &VirtReg, 859 AllocationOrder &Order, 860 SmallVectorImpl<unsigned> &NewVRegs, 861 unsigned CostPerUseLimit) { 862 NamedRegionTimer T("Evict", TimerGroupName, TimePassesIsEnabled); 863 864 // Keep track of the cheapest interference seen so far. 865 EvictionCost BestCost; 866 BestCost.setMax(); 867 unsigned BestPhys = 0; 868 unsigned OrderLimit = Order.getOrder().size(); 869 870 // When we are just looking for a reduced cost per use, don't break any 871 // hints, and only evict smaller spill weights. 872 if (CostPerUseLimit < ~0u) { 873 BestCost.BrokenHints = 0; 874 BestCost.MaxWeight = VirtReg.weight; 875 876 // Check of any registers in RC are below CostPerUseLimit. 877 const TargetRegisterClass *RC = MRI->getRegClass(VirtReg.reg); 878 unsigned MinCost = RegClassInfo.getMinCost(RC); 879 if (MinCost >= CostPerUseLimit) { 880 DEBUG(dbgs() << TRI->getRegClassName(RC) << " minimum cost = " << MinCost 881 << ", no cheaper registers to be found.\n"); 882 return 0; 883 } 884 885 // It is normal for register classes to have a long tail of registers with 886 // the same cost. We don't need to look at them if they're too expensive. 887 if (TRI->getCostPerUse(Order.getOrder().back()) >= CostPerUseLimit) { 888 OrderLimit = RegClassInfo.getLastCostChange(RC); 889 DEBUG(dbgs() << "Only trying the first " << OrderLimit << " regs.\n"); 890 } 891 } 892 893 Order.rewind(); 894 while (unsigned PhysReg = Order.next(OrderLimit)) { 895 if (TRI->getCostPerUse(PhysReg) >= CostPerUseLimit) 896 continue; 897 // The first use of a callee-saved register in a function has cost 1. 898 // Don't start using a CSR when the CostPerUseLimit is low. 899 if (CostPerUseLimit == 1 && isUnusedCalleeSavedReg(PhysReg)) { 900 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << " would clobber CSR " 901 << PrintReg(RegClassInfo.getLastCalleeSavedAlias(PhysReg), TRI) 902 << '\n'); 903 continue; 904 } 905 906 if (!canEvictInterference(VirtReg, PhysReg, false, BestCost)) 907 continue; 908 909 // Best so far. 910 BestPhys = PhysReg; 911 912 // Stop if the hint can be used. 913 if (Order.isHint()) 914 break; 915 } 916 917 if (!BestPhys) 918 return 0; 919 920 evictInterference(VirtReg, BestPhys, NewVRegs); 921 return BestPhys; 922 } 923 924 925 //===----------------------------------------------------------------------===// 926 // Region Splitting 927 //===----------------------------------------------------------------------===// 928 929 /// addSplitConstraints - Fill out the SplitConstraints vector based on the 930 /// interference pattern in Physreg and its aliases. Add the constraints to 931 /// SpillPlacement and return the static cost of this split in Cost, assuming 932 /// that all preferences in SplitConstraints are met. 933 /// Return false if there are no bundles with positive bias. 934 bool RAGreedy::addSplitConstraints(InterferenceCache::Cursor Intf, 935 BlockFrequency &Cost) { 936 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 937 938 // Reset interference dependent info. 939 SplitConstraints.resize(UseBlocks.size()); 940 BlockFrequency StaticCost = 0; 941 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 942 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 943 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 944 945 BC.Number = BI.MBB->getNumber(); 946 Intf.moveToBlock(BC.Number); 947 BC.Entry = BI.LiveIn ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 948 BC.Exit = BI.LiveOut ? SpillPlacement::PrefReg : SpillPlacement::DontCare; 949 BC.ChangesValue = BI.FirstDef.isValid(); 950 951 if (!Intf.hasInterference()) 952 continue; 953 954 // Number of spill code instructions to insert. 955 unsigned Ins = 0; 956 957 // Interference for the live-in value. 958 if (BI.LiveIn) { 959 if (Intf.first() <= Indexes->getMBBStartIdx(BC.Number)) { 960 BC.Entry = SpillPlacement::MustSpill; 961 ++Ins; 962 } else if (Intf.first() < BI.FirstInstr) { 963 BC.Entry = SpillPlacement::PrefSpill; 964 ++Ins; 965 } else if (Intf.first() < BI.LastInstr) { 966 ++Ins; 967 } 968 } 969 970 // Interference for the live-out value. 971 if (BI.LiveOut) { 972 if (Intf.last() >= SA->getLastSplitPoint(BC.Number)) { 973 BC.Exit = SpillPlacement::MustSpill; 974 ++Ins; 975 } else if (Intf.last() > BI.LastInstr) { 976 BC.Exit = SpillPlacement::PrefSpill; 977 ++Ins; 978 } else if (Intf.last() > BI.FirstInstr) { 979 ++Ins; 980 } 981 } 982 983 // Accumulate the total frequency of inserted spill code. 984 while (Ins--) 985 StaticCost += SpillPlacer->getBlockFrequency(BC.Number); 986 } 987 Cost = StaticCost; 988 989 // Add constraints for use-blocks. Note that these are the only constraints 990 // that may add a positive bias, it is downhill from here. 991 SpillPlacer->addConstraints(SplitConstraints); 992 return SpillPlacer->scanActiveBundles(); 993 } 994 995 996 /// addThroughConstraints - Add constraints and links to SpillPlacer from the 997 /// live-through blocks in Blocks. 998 void RAGreedy::addThroughConstraints(InterferenceCache::Cursor Intf, 999 ArrayRef<unsigned> Blocks) { 1000 const unsigned GroupSize = 8; 1001 SpillPlacement::BlockConstraint BCS[GroupSize]; 1002 unsigned TBS[GroupSize]; 1003 unsigned B = 0, T = 0; 1004 1005 for (unsigned i = 0; i != Blocks.size(); ++i) { 1006 unsigned Number = Blocks[i]; 1007 Intf.moveToBlock(Number); 1008 1009 if (!Intf.hasInterference()) { 1010 assert(T < GroupSize && "Array overflow"); 1011 TBS[T] = Number; 1012 if (++T == GroupSize) { 1013 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 1014 T = 0; 1015 } 1016 continue; 1017 } 1018 1019 assert(B < GroupSize && "Array overflow"); 1020 BCS[B].Number = Number; 1021 1022 // Interference for the live-in value. 1023 if (Intf.first() <= Indexes->getMBBStartIdx(Number)) 1024 BCS[B].Entry = SpillPlacement::MustSpill; 1025 else 1026 BCS[B].Entry = SpillPlacement::PrefSpill; 1027 1028 // Interference for the live-out value. 1029 if (Intf.last() >= SA->getLastSplitPoint(Number)) 1030 BCS[B].Exit = SpillPlacement::MustSpill; 1031 else 1032 BCS[B].Exit = SpillPlacement::PrefSpill; 1033 1034 if (++B == GroupSize) { 1035 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 1036 B = 0; 1037 } 1038 } 1039 1040 SpillPlacer->addConstraints(makeArrayRef(BCS, B)); 1041 SpillPlacer->addLinks(makeArrayRef(TBS, T)); 1042 } 1043 1044 void RAGreedy::growRegion(GlobalSplitCandidate &Cand) { 1045 // Keep track of through blocks that have not been added to SpillPlacer. 1046 BitVector Todo = SA->getThroughBlocks(); 1047 SmallVectorImpl<unsigned> &ActiveBlocks = Cand.ActiveBlocks; 1048 unsigned AddedTo = 0; 1049 #ifndef NDEBUG 1050 unsigned Visited = 0; 1051 #endif 1052 1053 for (;;) { 1054 ArrayRef<unsigned> NewBundles = SpillPlacer->getRecentPositive(); 1055 // Find new through blocks in the periphery of PrefRegBundles. 1056 for (int i = 0, e = NewBundles.size(); i != e; ++i) { 1057 unsigned Bundle = NewBundles[i]; 1058 // Look at all blocks connected to Bundle in the full graph. 1059 ArrayRef<unsigned> Blocks = Bundles->getBlocks(Bundle); 1060 for (ArrayRef<unsigned>::iterator I = Blocks.begin(), E = Blocks.end(); 1061 I != E; ++I) { 1062 unsigned Block = *I; 1063 if (!Todo.test(Block)) 1064 continue; 1065 Todo.reset(Block); 1066 // This is a new through block. Add it to SpillPlacer later. 1067 ActiveBlocks.push_back(Block); 1068 #ifndef NDEBUG 1069 ++Visited; 1070 #endif 1071 } 1072 } 1073 // Any new blocks to add? 1074 if (ActiveBlocks.size() == AddedTo) 1075 break; 1076 1077 // Compute through constraints from the interference, or assume that all 1078 // through blocks prefer spilling when forming compact regions. 1079 auto NewBlocks = makeArrayRef(ActiveBlocks).slice(AddedTo); 1080 if (Cand.PhysReg) 1081 addThroughConstraints(Cand.Intf, NewBlocks); 1082 else 1083 // Provide a strong negative bias on through blocks to prevent unwanted 1084 // liveness on loop backedges. 1085 SpillPlacer->addPrefSpill(NewBlocks, /* Strong= */ true); 1086 AddedTo = ActiveBlocks.size(); 1087 1088 // Perhaps iterating can enable more bundles? 1089 SpillPlacer->iterate(); 1090 } 1091 DEBUG(dbgs() << ", v=" << Visited); 1092 } 1093 1094 /// calcCompactRegion - Compute the set of edge bundles that should be live 1095 /// when splitting the current live range into compact regions. Compact 1096 /// regions can be computed without looking at interference. They are the 1097 /// regions formed by removing all the live-through blocks from the live range. 1098 /// 1099 /// Returns false if the current live range is already compact, or if the 1100 /// compact regions would form single block regions anyway. 1101 bool RAGreedy::calcCompactRegion(GlobalSplitCandidate &Cand) { 1102 // Without any through blocks, the live range is already compact. 1103 if (!SA->getNumThroughBlocks()) 1104 return false; 1105 1106 // Compact regions don't correspond to any physreg. 1107 Cand.reset(IntfCache, 0); 1108 1109 DEBUG(dbgs() << "Compact region bundles"); 1110 1111 // Use the spill placer to determine the live bundles. GrowRegion pretends 1112 // that all the through blocks have interference when PhysReg is unset. 1113 SpillPlacer->prepare(Cand.LiveBundles); 1114 1115 // The static split cost will be zero since Cand.Intf reports no interference. 1116 BlockFrequency Cost; 1117 if (!addSplitConstraints(Cand.Intf, Cost)) { 1118 DEBUG(dbgs() << ", none.\n"); 1119 return false; 1120 } 1121 1122 growRegion(Cand); 1123 SpillPlacer->finish(); 1124 1125 if (!Cand.LiveBundles.any()) { 1126 DEBUG(dbgs() << ", none.\n"); 1127 return false; 1128 } 1129 1130 DEBUG({ 1131 for (int i = Cand.LiveBundles.find_first(); i>=0; 1132 i = Cand.LiveBundles.find_next(i)) 1133 dbgs() << " EB#" << i; 1134 dbgs() << ".\n"; 1135 }); 1136 return true; 1137 } 1138 1139 /// calcSpillCost - Compute how expensive it would be to split the live range in 1140 /// SA around all use blocks instead of forming bundle regions. 1141 BlockFrequency RAGreedy::calcSpillCost() { 1142 BlockFrequency Cost = 0; 1143 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1144 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1145 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1146 unsigned Number = BI.MBB->getNumber(); 1147 // We normally only need one spill instruction - a load or a store. 1148 Cost += SpillPlacer->getBlockFrequency(Number); 1149 1150 // Unless the value is redefined in the block. 1151 if (BI.LiveIn && BI.LiveOut && BI.FirstDef) 1152 Cost += SpillPlacer->getBlockFrequency(Number); 1153 } 1154 return Cost; 1155 } 1156 1157 /// calcGlobalSplitCost - Return the global split cost of following the split 1158 /// pattern in LiveBundles. This cost should be added to the local cost of the 1159 /// interference pattern in SplitConstraints. 1160 /// 1161 BlockFrequency RAGreedy::calcGlobalSplitCost(GlobalSplitCandidate &Cand) { 1162 BlockFrequency GlobalCost = 0; 1163 const BitVector &LiveBundles = Cand.LiveBundles; 1164 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1165 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1166 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1167 SpillPlacement::BlockConstraint &BC = SplitConstraints[i]; 1168 bool RegIn = LiveBundles[Bundles->getBundle(BC.Number, 0)]; 1169 bool RegOut = LiveBundles[Bundles->getBundle(BC.Number, 1)]; 1170 unsigned Ins = 0; 1171 1172 if (BI.LiveIn) 1173 Ins += RegIn != (BC.Entry == SpillPlacement::PrefReg); 1174 if (BI.LiveOut) 1175 Ins += RegOut != (BC.Exit == SpillPlacement::PrefReg); 1176 while (Ins--) 1177 GlobalCost += SpillPlacer->getBlockFrequency(BC.Number); 1178 } 1179 1180 for (unsigned i = 0, e = Cand.ActiveBlocks.size(); i != e; ++i) { 1181 unsigned Number = Cand.ActiveBlocks[i]; 1182 bool RegIn = LiveBundles[Bundles->getBundle(Number, 0)]; 1183 bool RegOut = LiveBundles[Bundles->getBundle(Number, 1)]; 1184 if (!RegIn && !RegOut) 1185 continue; 1186 if (RegIn && RegOut) { 1187 // We need double spill code if this block has interference. 1188 Cand.Intf.moveToBlock(Number); 1189 if (Cand.Intf.hasInterference()) { 1190 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1191 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1192 } 1193 continue; 1194 } 1195 // live-in / stack-out or stack-in live-out. 1196 GlobalCost += SpillPlacer->getBlockFrequency(Number); 1197 } 1198 return GlobalCost; 1199 } 1200 1201 /// splitAroundRegion - Split the current live range around the regions 1202 /// determined by BundleCand and GlobalCand. 1203 /// 1204 /// Before calling this function, GlobalCand and BundleCand must be initialized 1205 /// so each bundle is assigned to a valid candidate, or NoCand for the 1206 /// stack-bound bundles. The shared SA/SE SplitAnalysis and SplitEditor 1207 /// objects must be initialized for the current live range, and intervals 1208 /// created for the used candidates. 1209 /// 1210 /// @param LREdit The LiveRangeEdit object handling the current split. 1211 /// @param UsedCands List of used GlobalCand entries. Every BundleCand value 1212 /// must appear in this list. 1213 void RAGreedy::splitAroundRegion(LiveRangeEdit &LREdit, 1214 ArrayRef<unsigned> UsedCands) { 1215 // These are the intervals created for new global ranges. We may create more 1216 // intervals for local ranges. 1217 const unsigned NumGlobalIntvs = LREdit.size(); 1218 DEBUG(dbgs() << "splitAroundRegion with " << NumGlobalIntvs << " globals.\n"); 1219 assert(NumGlobalIntvs && "No global intervals configured"); 1220 1221 // Isolate even single instructions when dealing with a proper sub-class. 1222 // That guarantees register class inflation for the stack interval because it 1223 // is all copies. 1224 unsigned Reg = SA->getParent().reg; 1225 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1226 1227 // First handle all the blocks with uses. 1228 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1229 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1230 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1231 unsigned Number = BI.MBB->getNumber(); 1232 unsigned IntvIn = 0, IntvOut = 0; 1233 SlotIndex IntfIn, IntfOut; 1234 if (BI.LiveIn) { 1235 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1236 if (CandIn != NoCand) { 1237 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1238 IntvIn = Cand.IntvIdx; 1239 Cand.Intf.moveToBlock(Number); 1240 IntfIn = Cand.Intf.first(); 1241 } 1242 } 1243 if (BI.LiveOut) { 1244 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1245 if (CandOut != NoCand) { 1246 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1247 IntvOut = Cand.IntvIdx; 1248 Cand.Intf.moveToBlock(Number); 1249 IntfOut = Cand.Intf.last(); 1250 } 1251 } 1252 1253 // Create separate intervals for isolated blocks with multiple uses. 1254 if (!IntvIn && !IntvOut) { 1255 DEBUG(dbgs() << "BB#" << BI.MBB->getNumber() << " isolated.\n"); 1256 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1257 SE->splitSingleBlock(BI); 1258 continue; 1259 } 1260 1261 if (IntvIn && IntvOut) 1262 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1263 else if (IntvIn) 1264 SE->splitRegInBlock(BI, IntvIn, IntfIn); 1265 else 1266 SE->splitRegOutBlock(BI, IntvOut, IntfOut); 1267 } 1268 1269 // Handle live-through blocks. The relevant live-through blocks are stored in 1270 // the ActiveBlocks list with each candidate. We need to filter out 1271 // duplicates. 1272 BitVector Todo = SA->getThroughBlocks(); 1273 for (unsigned c = 0; c != UsedCands.size(); ++c) { 1274 ArrayRef<unsigned> Blocks = GlobalCand[UsedCands[c]].ActiveBlocks; 1275 for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { 1276 unsigned Number = Blocks[i]; 1277 if (!Todo.test(Number)) 1278 continue; 1279 Todo.reset(Number); 1280 1281 unsigned IntvIn = 0, IntvOut = 0; 1282 SlotIndex IntfIn, IntfOut; 1283 1284 unsigned CandIn = BundleCand[Bundles->getBundle(Number, 0)]; 1285 if (CandIn != NoCand) { 1286 GlobalSplitCandidate &Cand = GlobalCand[CandIn]; 1287 IntvIn = Cand.IntvIdx; 1288 Cand.Intf.moveToBlock(Number); 1289 IntfIn = Cand.Intf.first(); 1290 } 1291 1292 unsigned CandOut = BundleCand[Bundles->getBundle(Number, 1)]; 1293 if (CandOut != NoCand) { 1294 GlobalSplitCandidate &Cand = GlobalCand[CandOut]; 1295 IntvOut = Cand.IntvIdx; 1296 Cand.Intf.moveToBlock(Number); 1297 IntfOut = Cand.Intf.last(); 1298 } 1299 if (!IntvIn && !IntvOut) 1300 continue; 1301 SE->splitLiveThroughBlock(Number, IntvIn, IntfIn, IntvOut, IntfOut); 1302 } 1303 } 1304 1305 ++NumGlobalSplits; 1306 1307 SmallVector<unsigned, 8> IntvMap; 1308 SE->finish(&IntvMap); 1309 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1310 1311 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1312 unsigned OrigBlocks = SA->getNumLiveBlocks(); 1313 1314 // Sort out the new intervals created by splitting. We get four kinds: 1315 // - Remainder intervals should not be split again. 1316 // - Candidate intervals can be assigned to Cand.PhysReg. 1317 // - Block-local splits are candidates for local splitting. 1318 // - DCE leftovers should go back on the queue. 1319 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1320 LiveInterval &Reg = LIS->getInterval(LREdit.get(i)); 1321 1322 // Ignore old intervals from DCE. 1323 if (getStage(Reg) != RS_New) 1324 continue; 1325 1326 // Remainder interval. Don't try splitting again, spill if it doesn't 1327 // allocate. 1328 if (IntvMap[i] == 0) { 1329 setStage(Reg, RS_Spill); 1330 continue; 1331 } 1332 1333 // Global intervals. Allow repeated splitting as long as the number of live 1334 // blocks is strictly decreasing. 1335 if (IntvMap[i] < NumGlobalIntvs) { 1336 if (SA->countLiveBlocks(&Reg) >= OrigBlocks) { 1337 DEBUG(dbgs() << "Main interval covers the same " << OrigBlocks 1338 << " blocks as original.\n"); 1339 // Don't allow repeated splitting as a safe guard against looping. 1340 setStage(Reg, RS_Split2); 1341 } 1342 continue; 1343 } 1344 1345 // Other intervals are treated as new. This includes local intervals created 1346 // for blocks with multiple uses, and anything created by DCE. 1347 } 1348 1349 if (VerifyEnabled) 1350 MF->verify(this, "After splitting live range around region"); 1351 } 1352 1353 unsigned RAGreedy::tryRegionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1354 SmallVectorImpl<unsigned> &NewVRegs) { 1355 unsigned NumCands = 0; 1356 BlockFrequency BestCost; 1357 1358 // Check if we can split this live range around a compact region. 1359 bool HasCompact = calcCompactRegion(GlobalCand.front()); 1360 if (HasCompact) { 1361 // Yes, keep GlobalCand[0] as the compact region candidate. 1362 NumCands = 1; 1363 BestCost = BlockFrequency::getMaxFrequency(); 1364 } else { 1365 // No benefit from the compact region, our fallback will be per-block 1366 // splitting. Make sure we find a solution that is cheaper than spilling. 1367 BestCost = calcSpillCost(); 1368 DEBUG(dbgs() << "Cost of isolating all blocks = "; 1369 MBFI->printBlockFreq(dbgs(), BestCost) << '\n'); 1370 } 1371 1372 unsigned BestCand = 1373 calculateRegionSplitCost(VirtReg, Order, BestCost, NumCands, 1374 false/*IgnoreCSR*/); 1375 1376 // No solutions found, fall back to single block splitting. 1377 if (!HasCompact && BestCand == NoCand) 1378 return 0; 1379 1380 return doRegionSplit(VirtReg, BestCand, HasCompact, NewVRegs); 1381 } 1382 1383 unsigned RAGreedy::calculateRegionSplitCost(LiveInterval &VirtReg, 1384 AllocationOrder &Order, 1385 BlockFrequency &BestCost, 1386 unsigned &NumCands, 1387 bool IgnoreCSR) { 1388 unsigned BestCand = NoCand; 1389 Order.rewind(); 1390 while (unsigned PhysReg = Order.next()) { 1391 if (IgnoreCSR && isUnusedCalleeSavedReg(PhysReg)) 1392 continue; 1393 1394 // Discard bad candidates before we run out of interference cache cursors. 1395 // This will only affect register classes with a lot of registers (>32). 1396 if (NumCands == IntfCache.getMaxCursors()) { 1397 unsigned WorstCount = ~0u; 1398 unsigned Worst = 0; 1399 for (unsigned i = 0; i != NumCands; ++i) { 1400 if (i == BestCand || !GlobalCand[i].PhysReg) 1401 continue; 1402 unsigned Count = GlobalCand[i].LiveBundles.count(); 1403 if (Count < WorstCount) { 1404 Worst = i; 1405 WorstCount = Count; 1406 } 1407 } 1408 --NumCands; 1409 GlobalCand[Worst] = GlobalCand[NumCands]; 1410 if (BestCand == NumCands) 1411 BestCand = Worst; 1412 } 1413 1414 if (GlobalCand.size() <= NumCands) 1415 GlobalCand.resize(NumCands+1); 1416 GlobalSplitCandidate &Cand = GlobalCand[NumCands]; 1417 Cand.reset(IntfCache, PhysReg); 1418 1419 SpillPlacer->prepare(Cand.LiveBundles); 1420 BlockFrequency Cost; 1421 if (!addSplitConstraints(Cand.Intf, Cost)) { 1422 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tno positive bundles\n"); 1423 continue; 1424 } 1425 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << "\tstatic = "; 1426 MBFI->printBlockFreq(dbgs(), Cost)); 1427 if (Cost >= BestCost) { 1428 DEBUG({ 1429 if (BestCand == NoCand) 1430 dbgs() << " worse than no bundles\n"; 1431 else 1432 dbgs() << " worse than " 1433 << PrintReg(GlobalCand[BestCand].PhysReg, TRI) << '\n'; 1434 }); 1435 continue; 1436 } 1437 growRegion(Cand); 1438 1439 SpillPlacer->finish(); 1440 1441 // No live bundles, defer to splitSingleBlocks(). 1442 if (!Cand.LiveBundles.any()) { 1443 DEBUG(dbgs() << " no bundles.\n"); 1444 continue; 1445 } 1446 1447 Cost += calcGlobalSplitCost(Cand); 1448 DEBUG({ 1449 dbgs() << ", total = "; MBFI->printBlockFreq(dbgs(), Cost) 1450 << " with bundles"; 1451 for (int i = Cand.LiveBundles.find_first(); i>=0; 1452 i = Cand.LiveBundles.find_next(i)) 1453 dbgs() << " EB#" << i; 1454 dbgs() << ".\n"; 1455 }); 1456 if (Cost < BestCost) { 1457 BestCand = NumCands; 1458 BestCost = Cost; 1459 } 1460 ++NumCands; 1461 } 1462 return BestCand; 1463 } 1464 1465 unsigned RAGreedy::doRegionSplit(LiveInterval &VirtReg, unsigned BestCand, 1466 bool HasCompact, 1467 SmallVectorImpl<unsigned> &NewVRegs) { 1468 SmallVector<unsigned, 8> UsedCands; 1469 // Prepare split editor. 1470 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1471 SE->reset(LREdit, SplitSpillMode); 1472 1473 // Assign all edge bundles to the preferred candidate, or NoCand. 1474 BundleCand.assign(Bundles->getNumBundles(), NoCand); 1475 1476 // Assign bundles for the best candidate region. 1477 if (BestCand != NoCand) { 1478 GlobalSplitCandidate &Cand = GlobalCand[BestCand]; 1479 if (unsigned B = Cand.getBundles(BundleCand, BestCand)) { 1480 UsedCands.push_back(BestCand); 1481 Cand.IntvIdx = SE->openIntv(); 1482 DEBUG(dbgs() << "Split for " << PrintReg(Cand.PhysReg, TRI) << " in " 1483 << B << " bundles, intv " << Cand.IntvIdx << ".\n"); 1484 (void)B; 1485 } 1486 } 1487 1488 // Assign bundles for the compact region. 1489 if (HasCompact) { 1490 GlobalSplitCandidate &Cand = GlobalCand.front(); 1491 assert(!Cand.PhysReg && "Compact region has no physreg"); 1492 if (unsigned B = Cand.getBundles(BundleCand, 0)) { 1493 UsedCands.push_back(0); 1494 Cand.IntvIdx = SE->openIntv(); 1495 DEBUG(dbgs() << "Split for compact region in " << B << " bundles, intv " 1496 << Cand.IntvIdx << ".\n"); 1497 (void)B; 1498 } 1499 } 1500 1501 splitAroundRegion(LREdit, UsedCands); 1502 return 0; 1503 } 1504 1505 1506 //===----------------------------------------------------------------------===// 1507 // Per-Block Splitting 1508 //===----------------------------------------------------------------------===// 1509 1510 /// tryBlockSplit - Split a global live range around every block with uses. This 1511 /// creates a lot of local live ranges, that will be split by tryLocalSplit if 1512 /// they don't allocate. 1513 unsigned RAGreedy::tryBlockSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1514 SmallVectorImpl<unsigned> &NewVRegs) { 1515 assert(&SA->getParent() == &VirtReg && "Live range wasn't analyzed"); 1516 unsigned Reg = VirtReg.reg; 1517 bool SingleInstrs = RegClassInfo.isProperSubClass(MRI->getRegClass(Reg)); 1518 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1519 SE->reset(LREdit, SplitSpillMode); 1520 ArrayRef<SplitAnalysis::BlockInfo> UseBlocks = SA->getUseBlocks(); 1521 for (unsigned i = 0; i != UseBlocks.size(); ++i) { 1522 const SplitAnalysis::BlockInfo &BI = UseBlocks[i]; 1523 if (SA->shouldSplitSingleBlock(BI, SingleInstrs)) 1524 SE->splitSingleBlock(BI); 1525 } 1526 // No blocks were split. 1527 if (LREdit.empty()) 1528 return 0; 1529 1530 // We did split for some blocks. 1531 SmallVector<unsigned, 8> IntvMap; 1532 SE->finish(&IntvMap); 1533 1534 // Tell LiveDebugVariables about the new ranges. 1535 DebugVars->splitRegister(Reg, LREdit.regs(), *LIS); 1536 1537 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1538 1539 // Sort out the new intervals created by splitting. The remainder interval 1540 // goes straight to spilling, the new local ranges get to stay RS_New. 1541 for (unsigned i = 0, e = LREdit.size(); i != e; ++i) { 1542 LiveInterval &LI = LIS->getInterval(LREdit.get(i)); 1543 if (getStage(LI) == RS_New && IntvMap[i] == 0) 1544 setStage(LI, RS_Spill); 1545 } 1546 1547 if (VerifyEnabled) 1548 MF->verify(this, "After splitting live range around basic blocks"); 1549 return 0; 1550 } 1551 1552 1553 //===----------------------------------------------------------------------===// 1554 // Per-Instruction Splitting 1555 //===----------------------------------------------------------------------===// 1556 1557 /// Get the number of allocatable registers that match the constraints of \p Reg 1558 /// on \p MI and that are also in \p SuperRC. 1559 static unsigned getNumAllocatableRegsForConstraints( 1560 const MachineInstr *MI, unsigned Reg, const TargetRegisterClass *SuperRC, 1561 const TargetInstrInfo *TII, const TargetRegisterInfo *TRI, 1562 const RegisterClassInfo &RCI) { 1563 assert(SuperRC && "Invalid register class"); 1564 1565 const TargetRegisterClass *ConstrainedRC = 1566 MI->getRegClassConstraintEffectForVReg(Reg, SuperRC, TII, TRI, 1567 /* ExploreBundle */ true); 1568 if (!ConstrainedRC) 1569 return 0; 1570 return RCI.getNumAllocatableRegs(ConstrainedRC); 1571 } 1572 1573 /// tryInstructionSplit - Split a live range around individual instructions. 1574 /// This is normally not worthwhile since the spiller is doing essentially the 1575 /// same thing. However, when the live range is in a constrained register 1576 /// class, it may help to insert copies such that parts of the live range can 1577 /// be moved to a larger register class. 1578 /// 1579 /// This is similar to spilling to a larger register class. 1580 unsigned 1581 RAGreedy::tryInstructionSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1582 SmallVectorImpl<unsigned> &NewVRegs) { 1583 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 1584 // There is no point to this if there are no larger sub-classes. 1585 if (!RegClassInfo.isProperSubClass(CurRC)) 1586 return 0; 1587 1588 // Always enable split spill mode, since we're effectively spilling to a 1589 // register. 1590 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1591 SE->reset(LREdit, SplitEditor::SM_Size); 1592 1593 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1594 if (Uses.size() <= 1) 1595 return 0; 1596 1597 DEBUG(dbgs() << "Split around " << Uses.size() << " individual instrs.\n"); 1598 1599 const TargetRegisterClass *SuperRC = 1600 TRI->getLargestLegalSuperClass(CurRC, *MF); 1601 unsigned SuperRCNumAllocatableRegs = RCI.getNumAllocatableRegs(SuperRC); 1602 // Split around every non-copy instruction if this split will relax 1603 // the constraints on the virtual register. 1604 // Otherwise, splitting just inserts uncoalescable copies that do not help 1605 // the allocation. 1606 for (unsigned i = 0; i != Uses.size(); ++i) { 1607 if (const MachineInstr *MI = Indexes->getInstructionFromIndex(Uses[i])) 1608 if (MI->isFullCopy() || 1609 SuperRCNumAllocatableRegs == 1610 getNumAllocatableRegsForConstraints(MI, VirtReg.reg, SuperRC, TII, 1611 TRI, RCI)) { 1612 DEBUG(dbgs() << " skip:\t" << Uses[i] << '\t' << *MI); 1613 continue; 1614 } 1615 SE->openIntv(); 1616 SlotIndex SegStart = SE->enterIntvBefore(Uses[i]); 1617 SlotIndex SegStop = SE->leaveIntvAfter(Uses[i]); 1618 SE->useIntv(SegStart, SegStop); 1619 } 1620 1621 if (LREdit.empty()) { 1622 DEBUG(dbgs() << "All uses were copies.\n"); 1623 return 0; 1624 } 1625 1626 SmallVector<unsigned, 8> IntvMap; 1627 SE->finish(&IntvMap); 1628 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1629 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 1630 1631 // Assign all new registers to RS_Spill. This was the last chance. 1632 setStage(LREdit.begin(), LREdit.end(), RS_Spill); 1633 return 0; 1634 } 1635 1636 1637 //===----------------------------------------------------------------------===// 1638 // Local Splitting 1639 //===----------------------------------------------------------------------===// 1640 1641 1642 /// calcGapWeights - Compute the maximum spill weight that needs to be evicted 1643 /// in order to use PhysReg between two entries in SA->UseSlots. 1644 /// 1645 /// GapWeight[i] represents the gap between UseSlots[i] and UseSlots[i+1]. 1646 /// 1647 void RAGreedy::calcGapWeights(unsigned PhysReg, 1648 SmallVectorImpl<float> &GapWeight) { 1649 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1650 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1651 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1652 const unsigned NumGaps = Uses.size()-1; 1653 1654 // Start and end points for the interference check. 1655 SlotIndex StartIdx = 1656 BI.LiveIn ? BI.FirstInstr.getBaseIndex() : BI.FirstInstr; 1657 SlotIndex StopIdx = 1658 BI.LiveOut ? BI.LastInstr.getBoundaryIndex() : BI.LastInstr; 1659 1660 GapWeight.assign(NumGaps, 0.0f); 1661 1662 // Add interference from each overlapping register. 1663 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1664 if (!Matrix->query(const_cast<LiveInterval&>(SA->getParent()), *Units) 1665 .checkInterference()) 1666 continue; 1667 1668 // We know that VirtReg is a continuous interval from FirstInstr to 1669 // LastInstr, so we don't need InterferenceQuery. 1670 // 1671 // Interference that overlaps an instruction is counted in both gaps 1672 // surrounding the instruction. The exception is interference before 1673 // StartIdx and after StopIdx. 1674 // 1675 LiveIntervalUnion::SegmentIter IntI = 1676 Matrix->getLiveUnions()[*Units] .find(StartIdx); 1677 for (unsigned Gap = 0; IntI.valid() && IntI.start() < StopIdx; ++IntI) { 1678 // Skip the gaps before IntI. 1679 while (Uses[Gap+1].getBoundaryIndex() < IntI.start()) 1680 if (++Gap == NumGaps) 1681 break; 1682 if (Gap == NumGaps) 1683 break; 1684 1685 // Update the gaps covered by IntI. 1686 const float weight = IntI.value()->weight; 1687 for (; Gap != NumGaps; ++Gap) { 1688 GapWeight[Gap] = std::max(GapWeight[Gap], weight); 1689 if (Uses[Gap+1].getBaseIndex() >= IntI.stop()) 1690 break; 1691 } 1692 if (Gap == NumGaps) 1693 break; 1694 } 1695 } 1696 1697 // Add fixed interference. 1698 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 1699 const LiveRange &LR = LIS->getRegUnit(*Units); 1700 LiveRange::const_iterator I = LR.find(StartIdx); 1701 LiveRange::const_iterator E = LR.end(); 1702 1703 // Same loop as above. Mark any overlapped gaps as HUGE_VALF. 1704 for (unsigned Gap = 0; I != E && I->start < StopIdx; ++I) { 1705 while (Uses[Gap+1].getBoundaryIndex() < I->start) 1706 if (++Gap == NumGaps) 1707 break; 1708 if (Gap == NumGaps) 1709 break; 1710 1711 for (; Gap != NumGaps; ++Gap) { 1712 GapWeight[Gap] = llvm::huge_valf; 1713 if (Uses[Gap+1].getBaseIndex() >= I->end) 1714 break; 1715 } 1716 if (Gap == NumGaps) 1717 break; 1718 } 1719 } 1720 } 1721 1722 /// tryLocalSplit - Try to split VirtReg into smaller intervals inside its only 1723 /// basic block. 1724 /// 1725 unsigned RAGreedy::tryLocalSplit(LiveInterval &VirtReg, AllocationOrder &Order, 1726 SmallVectorImpl<unsigned> &NewVRegs) { 1727 assert(SA->getUseBlocks().size() == 1 && "Not a local interval"); 1728 const SplitAnalysis::BlockInfo &BI = SA->getUseBlocks().front(); 1729 1730 // Note that it is possible to have an interval that is live-in or live-out 1731 // while only covering a single block - A phi-def can use undef values from 1732 // predecessors, and the block could be a single-block loop. 1733 // We don't bother doing anything clever about such a case, we simply assume 1734 // that the interval is continuous from FirstInstr to LastInstr. We should 1735 // make sure that we don't do anything illegal to such an interval, though. 1736 1737 ArrayRef<SlotIndex> Uses = SA->getUseSlots(); 1738 if (Uses.size() <= 2) 1739 return 0; 1740 const unsigned NumGaps = Uses.size()-1; 1741 1742 DEBUG({ 1743 dbgs() << "tryLocalSplit: "; 1744 for (unsigned i = 0, e = Uses.size(); i != e; ++i) 1745 dbgs() << ' ' << Uses[i]; 1746 dbgs() << '\n'; 1747 }); 1748 1749 // If VirtReg is live across any register mask operands, compute a list of 1750 // gaps with register masks. 1751 SmallVector<unsigned, 8> RegMaskGaps; 1752 if (Matrix->checkRegMaskInterference(VirtReg)) { 1753 // Get regmask slots for the whole block. 1754 ArrayRef<SlotIndex> RMS = LIS->getRegMaskSlotsInBlock(BI.MBB->getNumber()); 1755 DEBUG(dbgs() << RMS.size() << " regmasks in block:"); 1756 // Constrain to VirtReg's live range. 1757 unsigned ri = std::lower_bound(RMS.begin(), RMS.end(), 1758 Uses.front().getRegSlot()) - RMS.begin(); 1759 unsigned re = RMS.size(); 1760 for (unsigned i = 0; i != NumGaps && ri != re; ++i) { 1761 // Look for Uses[i] <= RMS <= Uses[i+1]. 1762 assert(!SlotIndex::isEarlierInstr(RMS[ri], Uses[i])); 1763 if (SlotIndex::isEarlierInstr(Uses[i+1], RMS[ri])) 1764 continue; 1765 // Skip a regmask on the same instruction as the last use. It doesn't 1766 // overlap the live range. 1767 if (SlotIndex::isSameInstr(Uses[i+1], RMS[ri]) && i+1 == NumGaps) 1768 break; 1769 DEBUG(dbgs() << ' ' << RMS[ri] << ':' << Uses[i] << '-' << Uses[i+1]); 1770 RegMaskGaps.push_back(i); 1771 // Advance ri to the next gap. A regmask on one of the uses counts in 1772 // both gaps. 1773 while (ri != re && SlotIndex::isEarlierInstr(RMS[ri], Uses[i+1])) 1774 ++ri; 1775 } 1776 DEBUG(dbgs() << '\n'); 1777 } 1778 1779 // Since we allow local split results to be split again, there is a risk of 1780 // creating infinite loops. It is tempting to require that the new live 1781 // ranges have less instructions than the original. That would guarantee 1782 // convergence, but it is too strict. A live range with 3 instructions can be 1783 // split 2+3 (including the COPY), and we want to allow that. 1784 // 1785 // Instead we use these rules: 1786 // 1787 // 1. Allow any split for ranges with getStage() < RS_Split2. (Except for the 1788 // noop split, of course). 1789 // 2. Require progress be made for ranges with getStage() == RS_Split2. All 1790 // the new ranges must have fewer instructions than before the split. 1791 // 3. New ranges with the same number of instructions are marked RS_Split2, 1792 // smaller ranges are marked RS_New. 1793 // 1794 // These rules allow a 3 -> 2+3 split once, which we need. They also prevent 1795 // excessive splitting and infinite loops. 1796 // 1797 bool ProgressRequired = getStage(VirtReg) >= RS_Split2; 1798 1799 // Best split candidate. 1800 unsigned BestBefore = NumGaps; 1801 unsigned BestAfter = 0; 1802 float BestDiff = 0; 1803 1804 const float blockFreq = 1805 SpillPlacer->getBlockFrequency(BI.MBB->getNumber()).getFrequency() * 1806 (1.0f / MBFI->getEntryFreq()); 1807 SmallVector<float, 8> GapWeight; 1808 1809 Order.rewind(); 1810 while (unsigned PhysReg = Order.next()) { 1811 // Keep track of the largest spill weight that would need to be evicted in 1812 // order to make use of PhysReg between UseSlots[i] and UseSlots[i+1]. 1813 calcGapWeights(PhysReg, GapWeight); 1814 1815 // Remove any gaps with regmask clobbers. 1816 if (Matrix->checkRegMaskInterference(VirtReg, PhysReg)) 1817 for (unsigned i = 0, e = RegMaskGaps.size(); i != e; ++i) 1818 GapWeight[RegMaskGaps[i]] = llvm::huge_valf; 1819 1820 // Try to find the best sequence of gaps to close. 1821 // The new spill weight must be larger than any gap interference. 1822 1823 // We will split before Uses[SplitBefore] and after Uses[SplitAfter]. 1824 unsigned SplitBefore = 0, SplitAfter = 1; 1825 1826 // MaxGap should always be max(GapWeight[SplitBefore..SplitAfter-1]). 1827 // It is the spill weight that needs to be evicted. 1828 float MaxGap = GapWeight[0]; 1829 1830 for (;;) { 1831 // Live before/after split? 1832 const bool LiveBefore = SplitBefore != 0 || BI.LiveIn; 1833 const bool LiveAfter = SplitAfter != NumGaps || BI.LiveOut; 1834 1835 DEBUG(dbgs() << PrintReg(PhysReg, TRI) << ' ' 1836 << Uses[SplitBefore] << '-' << Uses[SplitAfter] 1837 << " i=" << MaxGap); 1838 1839 // Stop before the interval gets so big we wouldn't be making progress. 1840 if (!LiveBefore && !LiveAfter) { 1841 DEBUG(dbgs() << " all\n"); 1842 break; 1843 } 1844 // Should the interval be extended or shrunk? 1845 bool Shrink = true; 1846 1847 // How many gaps would the new range have? 1848 unsigned NewGaps = LiveBefore + SplitAfter - SplitBefore + LiveAfter; 1849 1850 // Legally, without causing looping? 1851 bool Legal = !ProgressRequired || NewGaps < NumGaps; 1852 1853 if (Legal && MaxGap < llvm::huge_valf) { 1854 // Estimate the new spill weight. Each instruction reads or writes the 1855 // register. Conservatively assume there are no read-modify-write 1856 // instructions. 1857 // 1858 // Try to guess the size of the new interval. 1859 const float EstWeight = normalizeSpillWeight( 1860 blockFreq * (NewGaps + 1), 1861 Uses[SplitBefore].distance(Uses[SplitAfter]) + 1862 (LiveBefore + LiveAfter) * SlotIndex::InstrDist, 1863 1); 1864 // Would this split be possible to allocate? 1865 // Never allocate all gaps, we wouldn't be making progress. 1866 DEBUG(dbgs() << " w=" << EstWeight); 1867 if (EstWeight * Hysteresis >= MaxGap) { 1868 Shrink = false; 1869 float Diff = EstWeight - MaxGap; 1870 if (Diff > BestDiff) { 1871 DEBUG(dbgs() << " (best)"); 1872 BestDiff = Hysteresis * Diff; 1873 BestBefore = SplitBefore; 1874 BestAfter = SplitAfter; 1875 } 1876 } 1877 } 1878 1879 // Try to shrink. 1880 if (Shrink) { 1881 if (++SplitBefore < SplitAfter) { 1882 DEBUG(dbgs() << " shrink\n"); 1883 // Recompute the max when necessary. 1884 if (GapWeight[SplitBefore - 1] >= MaxGap) { 1885 MaxGap = GapWeight[SplitBefore]; 1886 for (unsigned i = SplitBefore + 1; i != SplitAfter; ++i) 1887 MaxGap = std::max(MaxGap, GapWeight[i]); 1888 } 1889 continue; 1890 } 1891 MaxGap = 0; 1892 } 1893 1894 // Try to extend the interval. 1895 if (SplitAfter >= NumGaps) { 1896 DEBUG(dbgs() << " end\n"); 1897 break; 1898 } 1899 1900 DEBUG(dbgs() << " extend\n"); 1901 MaxGap = std::max(MaxGap, GapWeight[SplitAfter++]); 1902 } 1903 } 1904 1905 // Didn't find any candidates? 1906 if (BestBefore == NumGaps) 1907 return 0; 1908 1909 DEBUG(dbgs() << "Best local split range: " << Uses[BestBefore] 1910 << '-' << Uses[BestAfter] << ", " << BestDiff 1911 << ", " << (BestAfter - BestBefore + 1) << " instrs\n"); 1912 1913 LiveRangeEdit LREdit(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 1914 SE->reset(LREdit); 1915 1916 SE->openIntv(); 1917 SlotIndex SegStart = SE->enterIntvBefore(Uses[BestBefore]); 1918 SlotIndex SegStop = SE->leaveIntvAfter(Uses[BestAfter]); 1919 SE->useIntv(SegStart, SegStop); 1920 SmallVector<unsigned, 8> IntvMap; 1921 SE->finish(&IntvMap); 1922 DebugVars->splitRegister(VirtReg.reg, LREdit.regs(), *LIS); 1923 1924 // If the new range has the same number of instructions as before, mark it as 1925 // RS_Split2 so the next split will be forced to make progress. Otherwise, 1926 // leave the new intervals as RS_New so they can compete. 1927 bool LiveBefore = BestBefore != 0 || BI.LiveIn; 1928 bool LiveAfter = BestAfter != NumGaps || BI.LiveOut; 1929 unsigned NewGaps = LiveBefore + BestAfter - BestBefore + LiveAfter; 1930 if (NewGaps >= NumGaps) { 1931 DEBUG(dbgs() << "Tagging non-progress ranges: "); 1932 assert(!ProgressRequired && "Didn't make progress when it was required."); 1933 for (unsigned i = 0, e = IntvMap.size(); i != e; ++i) 1934 if (IntvMap[i] == 1) { 1935 setStage(LIS->getInterval(LREdit.get(i)), RS_Split2); 1936 DEBUG(dbgs() << PrintReg(LREdit.get(i))); 1937 } 1938 DEBUG(dbgs() << '\n'); 1939 } 1940 ++NumLocalSplits; 1941 1942 return 0; 1943 } 1944 1945 //===----------------------------------------------------------------------===// 1946 // Live Range Splitting 1947 //===----------------------------------------------------------------------===// 1948 1949 /// trySplit - Try to split VirtReg or one of its interferences, making it 1950 /// assignable. 1951 /// @return Physreg when VirtReg may be assigned and/or new NewVRegs. 1952 unsigned RAGreedy::trySplit(LiveInterval &VirtReg, AllocationOrder &Order, 1953 SmallVectorImpl<unsigned>&NewVRegs) { 1954 // Ranges must be Split2 or less. 1955 if (getStage(VirtReg) >= RS_Spill) 1956 return 0; 1957 1958 // Local intervals are handled separately. 1959 if (LIS->intervalIsInOneMBB(VirtReg)) { 1960 NamedRegionTimer T("Local Splitting", TimerGroupName, TimePassesIsEnabled); 1961 SA->analyze(&VirtReg); 1962 unsigned PhysReg = tryLocalSplit(VirtReg, Order, NewVRegs); 1963 if (PhysReg || !NewVRegs.empty()) 1964 return PhysReg; 1965 return tryInstructionSplit(VirtReg, Order, NewVRegs); 1966 } 1967 1968 NamedRegionTimer T("Global Splitting", TimerGroupName, TimePassesIsEnabled); 1969 1970 SA->analyze(&VirtReg); 1971 1972 // FIXME: SplitAnalysis may repair broken live ranges coming from the 1973 // coalescer. That may cause the range to become allocatable which means that 1974 // tryRegionSplit won't be making progress. This check should be replaced with 1975 // an assertion when the coalescer is fixed. 1976 if (SA->didRepairRange()) { 1977 // VirtReg has changed, so all cached queries are invalid. 1978 Matrix->invalidateVirtRegs(); 1979 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) 1980 return PhysReg; 1981 } 1982 1983 // First try to split around a region spanning multiple blocks. RS_Split2 1984 // ranges already made dubious progress with region splitting, so they go 1985 // straight to single block splitting. 1986 if (getStage(VirtReg) < RS_Split2) { 1987 unsigned PhysReg = tryRegionSplit(VirtReg, Order, NewVRegs); 1988 if (PhysReg || !NewVRegs.empty()) 1989 return PhysReg; 1990 } 1991 1992 // Then isolate blocks. 1993 return tryBlockSplit(VirtReg, Order, NewVRegs); 1994 } 1995 1996 //===----------------------------------------------------------------------===// 1997 // Last Chance Recoloring 1998 //===----------------------------------------------------------------------===// 1999 2000 /// mayRecolorAllInterferences - Check if the virtual registers that 2001 /// interfere with \p VirtReg on \p PhysReg (or one of its aliases) may be 2002 /// recolored to free \p PhysReg. 2003 /// When true is returned, \p RecoloringCandidates has been augmented with all 2004 /// the live intervals that need to be recolored in order to free \p PhysReg 2005 /// for \p VirtReg. 2006 /// \p FixedRegisters contains all the virtual registers that cannot be 2007 /// recolored. 2008 bool 2009 RAGreedy::mayRecolorAllInterferences(unsigned PhysReg, LiveInterval &VirtReg, 2010 SmallLISet &RecoloringCandidates, 2011 const SmallVirtRegSet &FixedRegisters) { 2012 const TargetRegisterClass *CurRC = MRI->getRegClass(VirtReg.reg); 2013 2014 for (MCRegUnitIterator Units(PhysReg, TRI); Units.isValid(); ++Units) { 2015 LiveIntervalUnion::Query &Q = Matrix->query(VirtReg, *Units); 2016 // If there is LastChanceRecoloringMaxInterference or more interferences, 2017 // chances are one would not be recolorable. 2018 if (Q.collectInterferingVRegs(LastChanceRecoloringMaxInterference) >= 2019 LastChanceRecoloringMaxInterference && !ExhaustiveSearch) { 2020 DEBUG(dbgs() << "Early abort: too many interferences.\n"); 2021 CutOffInfo |= CO_Interf; 2022 return false; 2023 } 2024 for (unsigned i = Q.interferingVRegs().size(); i; --i) { 2025 LiveInterval *Intf = Q.interferingVRegs()[i - 1]; 2026 // If Intf is done and sit on the same register class as VirtReg, 2027 // it would not be recolorable as it is in the same state as VirtReg. 2028 if ((getStage(*Intf) == RS_Done && 2029 MRI->getRegClass(Intf->reg) == CurRC) || 2030 FixedRegisters.count(Intf->reg)) { 2031 DEBUG(dbgs() << "Early abort: the inteference is not recolorable.\n"); 2032 return false; 2033 } 2034 RecoloringCandidates.insert(Intf); 2035 } 2036 } 2037 return true; 2038 } 2039 2040 /// tryLastChanceRecoloring - Try to assign a color to \p VirtReg by recoloring 2041 /// its interferences. 2042 /// Last chance recoloring chooses a color for \p VirtReg and recolors every 2043 /// virtual register that was using it. The recoloring process may recursively 2044 /// use the last chance recoloring. Therefore, when a virtual register has been 2045 /// assigned a color by this mechanism, it is marked as Fixed, i.e., it cannot 2046 /// be last-chance-recolored again during this recoloring "session". 2047 /// E.g., 2048 /// Let 2049 /// vA can use {R1, R2 } 2050 /// vB can use { R2, R3} 2051 /// vC can use {R1 } 2052 /// Where vA, vB, and vC cannot be split anymore (they are reloads for 2053 /// instance) and they all interfere. 2054 /// 2055 /// vA is assigned R1 2056 /// vB is assigned R2 2057 /// vC tries to evict vA but vA is already done. 2058 /// Regular register allocation fails. 2059 /// 2060 /// Last chance recoloring kicks in: 2061 /// vC does as if vA was evicted => vC uses R1. 2062 /// vC is marked as fixed. 2063 /// vA needs to find a color. 2064 /// None are available. 2065 /// vA cannot evict vC: vC is a fixed virtual register now. 2066 /// vA does as if vB was evicted => vA uses R2. 2067 /// vB needs to find a color. 2068 /// R3 is available. 2069 /// Recoloring => vC = R1, vA = R2, vB = R3 2070 /// 2071 /// \p Order defines the preferred allocation order for \p VirtReg. 2072 /// \p NewRegs will contain any new virtual register that have been created 2073 /// (split, spill) during the process and that must be assigned. 2074 /// \p FixedRegisters contains all the virtual registers that cannot be 2075 /// recolored. 2076 /// \p Depth gives the current depth of the last chance recoloring. 2077 /// \return a physical register that can be used for VirtReg or ~0u if none 2078 /// exists. 2079 unsigned RAGreedy::tryLastChanceRecoloring(LiveInterval &VirtReg, 2080 AllocationOrder &Order, 2081 SmallVectorImpl<unsigned> &NewVRegs, 2082 SmallVirtRegSet &FixedRegisters, 2083 unsigned Depth) { 2084 DEBUG(dbgs() << "Try last chance recoloring for " << VirtReg << '\n'); 2085 // Ranges must be Done. 2086 assert((getStage(VirtReg) >= RS_Done || !VirtReg.isSpillable()) && 2087 "Last chance recoloring should really be last chance"); 2088 // Set the max depth to LastChanceRecoloringMaxDepth. 2089 // We may want to reconsider that if we end up with a too large search space 2090 // for target with hundreds of registers. 2091 // Indeed, in that case we may want to cut the search space earlier. 2092 if (Depth >= LastChanceRecoloringMaxDepth && !ExhaustiveSearch) { 2093 DEBUG(dbgs() << "Abort because max depth has been reached.\n"); 2094 CutOffInfo |= CO_Depth; 2095 return ~0u; 2096 } 2097 2098 // Set of Live intervals that will need to be recolored. 2099 SmallLISet RecoloringCandidates; 2100 // Record the original mapping virtual register to physical register in case 2101 // the recoloring fails. 2102 DenseMap<unsigned, unsigned> VirtRegToPhysReg; 2103 // Mark VirtReg as fixed, i.e., it will not be recolored pass this point in 2104 // this recoloring "session". 2105 FixedRegisters.insert(VirtReg.reg); 2106 2107 Order.rewind(); 2108 while (unsigned PhysReg = Order.next()) { 2109 DEBUG(dbgs() << "Try to assign: " << VirtReg << " to " 2110 << PrintReg(PhysReg, TRI) << '\n'); 2111 RecoloringCandidates.clear(); 2112 VirtRegToPhysReg.clear(); 2113 2114 // It is only possible to recolor virtual register interference. 2115 if (Matrix->checkInterference(VirtReg, PhysReg) > 2116 LiveRegMatrix::IK_VirtReg) { 2117 DEBUG(dbgs() << "Some inteferences are not with virtual registers.\n"); 2118 2119 continue; 2120 } 2121 2122 // Early give up on this PhysReg if it is obvious we cannot recolor all 2123 // the interferences. 2124 if (!mayRecolorAllInterferences(PhysReg, VirtReg, RecoloringCandidates, 2125 FixedRegisters)) { 2126 DEBUG(dbgs() << "Some inteferences cannot be recolored.\n"); 2127 continue; 2128 } 2129 2130 // RecoloringCandidates contains all the virtual registers that interfer 2131 // with VirtReg on PhysReg (or one of its aliases). 2132 // Enqueue them for recoloring and perform the actual recoloring. 2133 PQueue RecoloringQueue; 2134 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2135 EndIt = RecoloringCandidates.end(); 2136 It != EndIt; ++It) { 2137 unsigned ItVirtReg = (*It)->reg; 2138 enqueue(RecoloringQueue, *It); 2139 assert(VRM->hasPhys(ItVirtReg) && 2140 "Interferences are supposed to be with allocated vairables"); 2141 2142 // Record the current allocation. 2143 VirtRegToPhysReg[ItVirtReg] = VRM->getPhys(ItVirtReg); 2144 // unset the related struct. 2145 Matrix->unassign(**It); 2146 } 2147 2148 // Do as if VirtReg was assigned to PhysReg so that the underlying 2149 // recoloring has the right information about the interferes and 2150 // available colors. 2151 Matrix->assign(VirtReg, PhysReg); 2152 2153 // Save the current recoloring state. 2154 // If we cannot recolor all the interferences, we will have to start again 2155 // at this point for the next physical register. 2156 SmallVirtRegSet SaveFixedRegisters(FixedRegisters); 2157 if (tryRecoloringCandidates(RecoloringQueue, NewVRegs, FixedRegisters, 2158 Depth)) { 2159 // Do not mess up with the global assignment process. 2160 // I.e., VirtReg must be unassigned. 2161 Matrix->unassign(VirtReg); 2162 return PhysReg; 2163 } 2164 2165 DEBUG(dbgs() << "Fail to assign: " << VirtReg << " to " 2166 << PrintReg(PhysReg, TRI) << '\n'); 2167 2168 // The recoloring attempt failed, undo the changes. 2169 FixedRegisters = SaveFixedRegisters; 2170 Matrix->unassign(VirtReg); 2171 2172 for (SmallLISet::iterator It = RecoloringCandidates.begin(), 2173 EndIt = RecoloringCandidates.end(); 2174 It != EndIt; ++It) { 2175 unsigned ItVirtReg = (*It)->reg; 2176 if (VRM->hasPhys(ItVirtReg)) 2177 Matrix->unassign(**It); 2178 unsigned ItPhysReg = VirtRegToPhysReg[ItVirtReg]; 2179 Matrix->assign(**It, ItPhysReg); 2180 } 2181 } 2182 2183 // Last chance recoloring did not worked either, give up. 2184 return ~0u; 2185 } 2186 2187 /// tryRecoloringCandidates - Try to assign a new color to every register 2188 /// in \RecoloringQueue. 2189 /// \p NewRegs will contain any new virtual register created during the 2190 /// recoloring process. 2191 /// \p FixedRegisters[in/out] contains all the registers that have been 2192 /// recolored. 2193 /// \return true if all virtual registers in RecoloringQueue were successfully 2194 /// recolored, false otherwise. 2195 bool RAGreedy::tryRecoloringCandidates(PQueue &RecoloringQueue, 2196 SmallVectorImpl<unsigned> &NewVRegs, 2197 SmallVirtRegSet &FixedRegisters, 2198 unsigned Depth) { 2199 while (!RecoloringQueue.empty()) { 2200 LiveInterval *LI = dequeue(RecoloringQueue); 2201 DEBUG(dbgs() << "Try to recolor: " << *LI << '\n'); 2202 unsigned PhysReg; 2203 PhysReg = selectOrSplitImpl(*LI, NewVRegs, FixedRegisters, Depth + 1); 2204 if (PhysReg == ~0u || !PhysReg) 2205 return false; 2206 DEBUG(dbgs() << "Recoloring of " << *LI 2207 << " succeeded with: " << PrintReg(PhysReg, TRI) << '\n'); 2208 Matrix->assign(*LI, PhysReg); 2209 FixedRegisters.insert(LI->reg); 2210 } 2211 return true; 2212 } 2213 2214 //===----------------------------------------------------------------------===// 2215 // Main Entry Point 2216 //===----------------------------------------------------------------------===// 2217 2218 unsigned RAGreedy::selectOrSplit(LiveInterval &VirtReg, 2219 SmallVectorImpl<unsigned> &NewVRegs) { 2220 CutOffInfo = CO_None; 2221 LLVMContext &Ctx = MF->getFunction()->getContext(); 2222 SmallVirtRegSet FixedRegisters; 2223 unsigned Reg = selectOrSplitImpl(VirtReg, NewVRegs, FixedRegisters); 2224 if (Reg == ~0U && (CutOffInfo != CO_None)) { 2225 uint8_t CutOffEncountered = CutOffInfo & (CO_Depth | CO_Interf); 2226 if (CutOffEncountered == CO_Depth) 2227 Ctx.emitError("register allocation failed: maximum depth for recoloring " 2228 "reached. Use -fexhaustive-register-search to skip " 2229 "cutoffs"); 2230 else if (CutOffEncountered == CO_Interf) 2231 Ctx.emitError("register allocation failed: maximum interference for " 2232 "recoloring reached. Use -fexhaustive-register-search " 2233 "to skip cutoffs"); 2234 else if (CutOffEncountered == (CO_Depth | CO_Interf)) 2235 Ctx.emitError("register allocation failed: maximum interference and " 2236 "depth for recoloring reached. Use " 2237 "-fexhaustive-register-search to skip cutoffs"); 2238 } 2239 return Reg; 2240 } 2241 2242 /// Using a CSR for the first time has a cost because it causes push|pop 2243 /// to be added to prologue|epilogue. Splitting a cold section of the live 2244 /// range can have lower cost than using the CSR for the first time; 2245 /// Spilling a live range in the cold path can have lower cost than using 2246 /// the CSR for the first time. Returns the physical register if we decide 2247 /// to use the CSR; otherwise return 0. 2248 unsigned RAGreedy::tryAssignCSRFirstTime(LiveInterval &VirtReg, 2249 AllocationOrder &Order, 2250 unsigned PhysReg, 2251 unsigned &CostPerUseLimit, 2252 SmallVectorImpl<unsigned> &NewVRegs) { 2253 if (getStage(VirtReg) == RS_Spill && VirtReg.isSpillable()) { 2254 // We choose spill over using the CSR for the first time if the spill cost 2255 // is lower than CSRCost. 2256 SA->analyze(&VirtReg); 2257 if (calcSpillCost() >= CSRCost) 2258 return PhysReg; 2259 2260 // We are going to spill, set CostPerUseLimit to 1 to make sure that 2261 // we will not use a callee-saved register in tryEvict. 2262 CostPerUseLimit = 1; 2263 return 0; 2264 } 2265 if (getStage(VirtReg) < RS_Split) { 2266 // We choose pre-splitting over using the CSR for the first time if 2267 // the cost of splitting is lower than CSRCost. 2268 SA->analyze(&VirtReg); 2269 unsigned NumCands = 0; 2270 BlockFrequency BestCost = CSRCost; // Don't modify CSRCost. 2271 unsigned BestCand = calculateRegionSplitCost(VirtReg, Order, BestCost, 2272 NumCands, true /*IgnoreCSR*/); 2273 if (BestCand == NoCand) 2274 // Use the CSR if we can't find a region split below CSRCost. 2275 return PhysReg; 2276 2277 // Perform the actual pre-splitting. 2278 doRegionSplit(VirtReg, BestCand, false/*HasCompact*/, NewVRegs); 2279 return 0; 2280 } 2281 return PhysReg; 2282 } 2283 2284 void RAGreedy::aboutToRemoveInterval(LiveInterval &LI) { 2285 // Do not keep invalid information around. 2286 SetOfBrokenHints.remove(&LI); 2287 } 2288 2289 void RAGreedy::initializeCSRCost() { 2290 // We use the larger one out of the command-line option and the value report 2291 // by TRI. 2292 CSRCost = BlockFrequency( 2293 std::max((unsigned)CSRFirstTimeCost, TRI->getCSRFirstUseCost())); 2294 if (!CSRCost.getFrequency()) 2295 return; 2296 2297 // Raw cost is relative to Entry == 2^14; scale it appropriately. 2298 uint64_t ActualEntry = MBFI->getEntryFreq(); 2299 if (!ActualEntry) { 2300 CSRCost = 0; 2301 return; 2302 } 2303 uint64_t FixedEntry = 1 << 14; 2304 if (ActualEntry < FixedEntry) 2305 CSRCost *= BranchProbability(ActualEntry, FixedEntry); 2306 else if (ActualEntry <= UINT32_MAX) 2307 // Invert the fraction and divide. 2308 CSRCost /= BranchProbability(FixedEntry, ActualEntry); 2309 else 2310 // Can't use BranchProbability in general, since it takes 32-bit numbers. 2311 CSRCost = CSRCost.getFrequency() * (ActualEntry / FixedEntry); 2312 } 2313 2314 /// \brief Collect the hint info for \p Reg. 2315 /// The results are stored into \p Out. 2316 /// \p Out is not cleared before being populated. 2317 void RAGreedy::collectHintInfo(unsigned Reg, HintsInfo &Out) { 2318 for (const MachineInstr &Instr : MRI->reg_nodbg_instructions(Reg)) { 2319 if (!Instr.isFullCopy()) 2320 continue; 2321 // Look for the other end of the copy. 2322 unsigned OtherReg = Instr.getOperand(0).getReg(); 2323 if (OtherReg == Reg) { 2324 OtherReg = Instr.getOperand(1).getReg(); 2325 if (OtherReg == Reg) 2326 continue; 2327 } 2328 // Get the current assignment. 2329 unsigned OtherPhysReg = TargetRegisterInfo::isPhysicalRegister(OtherReg) 2330 ? OtherReg 2331 : VRM->getPhys(OtherReg); 2332 // Push the collected information. 2333 Out.push_back(HintInfo(MBFI->getBlockFreq(Instr.getParent()), OtherReg, 2334 OtherPhysReg)); 2335 } 2336 } 2337 2338 /// \brief Using the given \p List, compute the cost of the broken hints if 2339 /// \p PhysReg was used. 2340 /// \return The cost of \p List for \p PhysReg. 2341 BlockFrequency RAGreedy::getBrokenHintFreq(const HintsInfo &List, 2342 unsigned PhysReg) { 2343 BlockFrequency Cost = 0; 2344 for (const HintInfo &Info : List) { 2345 if (Info.PhysReg != PhysReg) 2346 Cost += Info.Freq; 2347 } 2348 return Cost; 2349 } 2350 2351 /// \brief Using the register assigned to \p VirtReg, try to recolor 2352 /// all the live ranges that are copy-related with \p VirtReg. 2353 /// The recoloring is then propagated to all the live-ranges that have 2354 /// been recolored and so on, until no more copies can be coalesced or 2355 /// it is not profitable. 2356 /// For a given live range, profitability is determined by the sum of the 2357 /// frequencies of the non-identity copies it would introduce with the old 2358 /// and new register. 2359 void RAGreedy::tryHintRecoloring(LiveInterval &VirtReg) { 2360 // We have a broken hint, check if it is possible to fix it by 2361 // reusing PhysReg for the copy-related live-ranges. Indeed, we evicted 2362 // some register and PhysReg may be available for the other live-ranges. 2363 SmallSet<unsigned, 4> Visited; 2364 SmallVector<unsigned, 2> RecoloringCandidates; 2365 HintsInfo Info; 2366 unsigned Reg = VirtReg.reg; 2367 unsigned PhysReg = VRM->getPhys(Reg); 2368 // Start the recoloring algorithm from the input live-interval, then 2369 // it will propagate to the ones that are copy-related with it. 2370 Visited.insert(Reg); 2371 RecoloringCandidates.push_back(Reg); 2372 2373 DEBUG(dbgs() << "Trying to reconcile hints for: " << PrintReg(Reg, TRI) << '(' 2374 << PrintReg(PhysReg, TRI) << ")\n"); 2375 2376 do { 2377 Reg = RecoloringCandidates.pop_back_val(); 2378 2379 // We cannot recolor physcal register. 2380 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 2381 continue; 2382 2383 assert(VRM->hasPhys(Reg) && "We have unallocated variable!!"); 2384 2385 // Get the live interval mapped with this virtual register to be able 2386 // to check for the interference with the new color. 2387 LiveInterval &LI = LIS->getInterval(Reg); 2388 unsigned CurrPhys = VRM->getPhys(Reg); 2389 // Check that the new color matches the register class constraints and 2390 // that it is free for this live range. 2391 if (CurrPhys != PhysReg && (!MRI->getRegClass(Reg)->contains(PhysReg) || 2392 Matrix->checkInterference(LI, PhysReg))) 2393 continue; 2394 2395 DEBUG(dbgs() << PrintReg(Reg, TRI) << '(' << PrintReg(CurrPhys, TRI) 2396 << ") is recolorable.\n"); 2397 2398 // Gather the hint info. 2399 Info.clear(); 2400 collectHintInfo(Reg, Info); 2401 // Check if recoloring the live-range will increase the cost of the 2402 // non-identity copies. 2403 if (CurrPhys != PhysReg) { 2404 DEBUG(dbgs() << "Checking profitability:\n"); 2405 BlockFrequency OldCopiesCost = getBrokenHintFreq(Info, CurrPhys); 2406 BlockFrequency NewCopiesCost = getBrokenHintFreq(Info, PhysReg); 2407 DEBUG(dbgs() << "Old Cost: " << OldCopiesCost.getFrequency() 2408 << "\nNew Cost: " << NewCopiesCost.getFrequency() << '\n'); 2409 if (OldCopiesCost < NewCopiesCost) { 2410 DEBUG(dbgs() << "=> Not profitable.\n"); 2411 continue; 2412 } 2413 // At this point, the cost is either cheaper or equal. If it is 2414 // equal, we consider this is profitable because it may expose 2415 // more recoloring opportunities. 2416 DEBUG(dbgs() << "=> Profitable.\n"); 2417 // Recolor the live-range. 2418 Matrix->unassign(LI); 2419 Matrix->assign(LI, PhysReg); 2420 } 2421 // Push all copy-related live-ranges to keep reconciling the broken 2422 // hints. 2423 for (const HintInfo &HI : Info) { 2424 if (Visited.insert(HI.Reg).second) 2425 RecoloringCandidates.push_back(HI.Reg); 2426 } 2427 } while (!RecoloringCandidates.empty()); 2428 } 2429 2430 /// \brief Try to recolor broken hints. 2431 /// Broken hints may be repaired by recoloring when an evicted variable 2432 /// freed up a register for a larger live-range. 2433 /// Consider the following example: 2434 /// BB1: 2435 /// a = 2436 /// b = 2437 /// BB2: 2438 /// ... 2439 /// = b 2440 /// = a 2441 /// Let us assume b gets split: 2442 /// BB1: 2443 /// a = 2444 /// b = 2445 /// BB2: 2446 /// c = b 2447 /// ... 2448 /// d = c 2449 /// = d 2450 /// = a 2451 /// Because of how the allocation work, b, c, and d may be assigned different 2452 /// colors. Now, if a gets evicted later: 2453 /// BB1: 2454 /// a = 2455 /// st a, SpillSlot 2456 /// b = 2457 /// BB2: 2458 /// c = b 2459 /// ... 2460 /// d = c 2461 /// = d 2462 /// e = ld SpillSlot 2463 /// = e 2464 /// This is likely that we can assign the same register for b, c, and d, 2465 /// getting rid of 2 copies. 2466 void RAGreedy::tryHintsRecoloring() { 2467 for (LiveInterval *LI : SetOfBrokenHints) { 2468 assert(TargetRegisterInfo::isVirtualRegister(LI->reg) && 2469 "Recoloring is possible only for virtual registers"); 2470 // Some dead defs may be around (e.g., because of debug uses). 2471 // Ignore those. 2472 if (!VRM->hasPhys(LI->reg)) 2473 continue; 2474 tryHintRecoloring(*LI); 2475 } 2476 } 2477 2478 unsigned RAGreedy::selectOrSplitImpl(LiveInterval &VirtReg, 2479 SmallVectorImpl<unsigned> &NewVRegs, 2480 SmallVirtRegSet &FixedRegisters, 2481 unsigned Depth) { 2482 unsigned CostPerUseLimit = ~0u; 2483 // First try assigning a free register. 2484 AllocationOrder Order(VirtReg.reg, *VRM, RegClassInfo, Matrix); 2485 if (unsigned PhysReg = tryAssign(VirtReg, Order, NewVRegs)) { 2486 // When NewVRegs is not empty, we may have made decisions such as evicting 2487 // a virtual register, go with the earlier decisions and use the physical 2488 // register. 2489 if (CSRCost.getFrequency() && isUnusedCalleeSavedReg(PhysReg) && 2490 NewVRegs.empty()) { 2491 unsigned CSRReg = tryAssignCSRFirstTime(VirtReg, Order, PhysReg, 2492 CostPerUseLimit, NewVRegs); 2493 if (CSRReg || !NewVRegs.empty()) 2494 // Return now if we decide to use a CSR or create new vregs due to 2495 // pre-splitting. 2496 return CSRReg; 2497 } else 2498 return PhysReg; 2499 } 2500 2501 LiveRangeStage Stage = getStage(VirtReg); 2502 DEBUG(dbgs() << StageName[Stage] 2503 << " Cascade " << ExtraRegInfo[VirtReg.reg].Cascade << '\n'); 2504 2505 // Try to evict a less worthy live range, but only for ranges from the primary 2506 // queue. The RS_Split ranges already failed to do this, and they should not 2507 // get a second chance until they have been split. 2508 if (Stage != RS_Split) 2509 if (unsigned PhysReg = 2510 tryEvict(VirtReg, Order, NewVRegs, CostPerUseLimit)) { 2511 unsigned Hint = MRI->getSimpleHint(VirtReg.reg); 2512 // If VirtReg has a hint and that hint is broken record this 2513 // virtual register as a recoloring candidate for broken hint. 2514 // Indeed, since we evicted a variable in its neighborhood it is 2515 // likely we can at least partially recolor some of the 2516 // copy-related live-ranges. 2517 if (Hint && Hint != PhysReg) 2518 SetOfBrokenHints.insert(&VirtReg); 2519 return PhysReg; 2520 } 2521 2522 assert(NewVRegs.empty() && "Cannot append to existing NewVRegs"); 2523 2524 // The first time we see a live range, don't try to split or spill. 2525 // Wait until the second time, when all smaller ranges have been allocated. 2526 // This gives a better picture of the interference to split around. 2527 if (Stage < RS_Split) { 2528 setStage(VirtReg, RS_Split); 2529 DEBUG(dbgs() << "wait for second round\n"); 2530 NewVRegs.push_back(VirtReg.reg); 2531 return 0; 2532 } 2533 2534 // If we couldn't allocate a register from spilling, there is probably some 2535 // invalid inline assembly. The base class wil report it. 2536 if (Stage >= RS_Done || !VirtReg.isSpillable()) 2537 return tryLastChanceRecoloring(VirtReg, Order, NewVRegs, FixedRegisters, 2538 Depth); 2539 2540 // Try splitting VirtReg or interferences. 2541 unsigned PhysReg = trySplit(VirtReg, Order, NewVRegs); 2542 if (PhysReg || !NewVRegs.empty()) 2543 return PhysReg; 2544 2545 // Finally spill VirtReg itself. 2546 if (EnableDeferredSpilling && getStage(VirtReg) < RS_Memory) { 2547 // TODO: This is experimental and in particular, we do not model 2548 // the live range splitting done by spilling correctly. 2549 // We would need a deep integration with the spiller to do the 2550 // right thing here. Anyway, that is still good for early testing. 2551 setStage(VirtReg, RS_Memory); 2552 DEBUG(dbgs() << "Do as if this register is in memory\n"); 2553 NewVRegs.push_back(VirtReg.reg); 2554 } else { 2555 NamedRegionTimer T("Spiller", TimerGroupName, TimePassesIsEnabled); 2556 LiveRangeEdit LRE(&VirtReg, NewVRegs, *MF, *LIS, VRM, this, &DeadRemats); 2557 spiller().spill(LRE); 2558 setStage(NewVRegs.begin(), NewVRegs.end(), RS_Done); 2559 2560 if (VerifyEnabled) 2561 MF->verify(this, "After spilling"); 2562 } 2563 2564 // The live virtual register requesting allocation was spilled, so tell 2565 // the caller not to allocate anything during this round. 2566 return 0; 2567 } 2568 2569 bool RAGreedy::runOnMachineFunction(MachineFunction &mf) { 2570 DEBUG(dbgs() << "********** GREEDY REGISTER ALLOCATION **********\n" 2571 << "********** Function: " << mf.getName() << '\n'); 2572 2573 MF = &mf; 2574 TRI = MF->getSubtarget().getRegisterInfo(); 2575 TII = MF->getSubtarget().getInstrInfo(); 2576 RCI.runOnMachineFunction(mf); 2577 2578 EnableLocalReassign = EnableLocalReassignment || 2579 MF->getSubtarget().enableRALocalReassignment( 2580 MF->getTarget().getOptLevel()); 2581 2582 if (VerifyEnabled) 2583 MF->verify(this, "Before greedy register allocator"); 2584 2585 RegAllocBase::init(getAnalysis<VirtRegMap>(), 2586 getAnalysis<LiveIntervals>(), 2587 getAnalysis<LiveRegMatrix>()); 2588 Indexes = &getAnalysis<SlotIndexes>(); 2589 MBFI = &getAnalysis<MachineBlockFrequencyInfo>(); 2590 DomTree = &getAnalysis<MachineDominatorTree>(); 2591 SpillerInstance.reset(createInlineSpiller(*this, *MF, *VRM)); 2592 Loops = &getAnalysis<MachineLoopInfo>(); 2593 Bundles = &getAnalysis<EdgeBundles>(); 2594 SpillPlacer = &getAnalysis<SpillPlacement>(); 2595 DebugVars = &getAnalysis<LiveDebugVariables>(); 2596 AA = &getAnalysis<AAResultsWrapperPass>().getAAResults(); 2597 2598 initializeCSRCost(); 2599 2600 calculateSpillWeightsAndHints(*LIS, mf, VRM, *Loops, *MBFI); 2601 2602 DEBUG(LIS->dump()); 2603 2604 SA.reset(new SplitAnalysis(*VRM, *LIS, *Loops)); 2605 SE.reset(new SplitEditor(*SA, *AA, *LIS, *VRM, *DomTree, *MBFI)); 2606 ExtraRegInfo.clear(); 2607 ExtraRegInfo.resize(MRI->getNumVirtRegs()); 2608 NextCascade = 1; 2609 IntfCache.init(MF, Matrix->getLiveUnions(), Indexes, LIS, TRI); 2610 GlobalCand.resize(32); // This will grow as needed. 2611 SetOfBrokenHints.clear(); 2612 2613 allocatePhysRegs(); 2614 tryHintsRecoloring(); 2615 postOptimization(); 2616 2617 releaseMemory(); 2618 return true; 2619 } 2620