1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface -------==// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // This file implements the generic RegisterCoalescer interface which 11 // is used as the common interface used by all clients and 12 // implementations of register coalescing. 13 // 14 //===----------------------------------------------------------------------===// 15 16 #define DEBUG_TYPE "regalloc" 17 #include "RegisterCoalescer.h" 18 #include "LiveDebugVariables.h" 19 #include "RegisterClassInfo.h" 20 #include "VirtRegMap.h" 21 22 #include "llvm/Pass.h" 23 #include "llvm/Value.h" 24 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 25 #include "llvm/CodeGen/MachineInstr.h" 26 #include "llvm/CodeGen/MachineRegisterInfo.h" 27 #include "llvm/Target/TargetInstrInfo.h" 28 #include "llvm/Target/TargetRegisterInfo.h" 29 #include "llvm/CodeGen/LiveIntervalAnalysis.h" 30 #include "llvm/Analysis/AliasAnalysis.h" 31 #include "llvm/CodeGen/MachineFrameInfo.h" 32 #include "llvm/CodeGen/MachineInstr.h" 33 #include "llvm/CodeGen/MachineLoopInfo.h" 34 #include "llvm/CodeGen/MachineRegisterInfo.h" 35 #include "llvm/CodeGen/Passes.h" 36 #include "llvm/Target/TargetInstrInfo.h" 37 #include "llvm/Target/TargetMachine.h" 38 #include "llvm/Target/TargetOptions.h" 39 #include "llvm/Support/CommandLine.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/ErrorHandling.h" 42 #include "llvm/Support/raw_ostream.h" 43 #include "llvm/ADT/OwningPtr.h" 44 #include "llvm/ADT/SmallSet.h" 45 #include "llvm/ADT/Statistic.h" 46 #include "llvm/ADT/STLExtras.h" 47 #include <algorithm> 48 #include <cmath> 49 using namespace llvm; 50 51 STATISTIC(numJoins , "Number of interval joins performed"); 52 STATISTIC(numCrossRCs , "Number of cross class joins performed"); 53 STATISTIC(numCommutes , "Number of instruction commuting performed"); 54 STATISTIC(numExtends , "Number of copies extended"); 55 STATISTIC(NumReMats , "Number of instructions re-materialized"); 56 STATISTIC(numPeep , "Number of identity moves eliminated after coalescing"); 57 STATISTIC(numAborts , "Number of times interval joining aborted"); 58 STATISTIC(NumInflated , "Number of register classes inflated"); 59 60 static cl::opt<bool> 61 EnableJoining("join-liveintervals", 62 cl::desc("Coalesce copies (default=true)"), 63 cl::init(true)); 64 65 static cl::opt<bool> 66 DisableCrossClassJoin("disable-cross-class-join", 67 cl::desc("Avoid coalescing cross register class copies"), 68 cl::init(false), cl::Hidden); 69 70 static cl::opt<bool> 71 EnablePhysicalJoin("join-physregs", 72 cl::desc("Join physical register copies"), 73 cl::init(false), cl::Hidden); 74 75 static cl::opt<bool> 76 VerifyCoalescing("verify-coalescing", 77 cl::desc("Verify machine instrs before and after register coalescing"), 78 cl::Hidden); 79 80 namespace { 81 class RegisterCoalescer : public MachineFunctionPass { 82 MachineFunction* MF; 83 MachineRegisterInfo* MRI; 84 const TargetMachine* TM; 85 const TargetRegisterInfo* TRI; 86 const TargetInstrInfo* TII; 87 LiveIntervals *LIS; 88 LiveDebugVariables *LDV; 89 const MachineLoopInfo* Loops; 90 AliasAnalysis *AA; 91 RegisterClassInfo RegClassInfo; 92 93 /// JoinedCopies - Keep track of copies eliminated due to coalescing. 94 /// 95 SmallPtrSet<MachineInstr*, 32> JoinedCopies; 96 97 /// ReMatCopies - Keep track of copies eliminated due to remat. 98 /// 99 SmallPtrSet<MachineInstr*, 32> ReMatCopies; 100 101 /// ReMatDefs - Keep track of definition instructions which have 102 /// been remat'ed. 103 SmallPtrSet<MachineInstr*, 8> ReMatDefs; 104 105 /// joinIntervals - join compatible live intervals 106 void joinIntervals(); 107 108 /// CopyCoalesceInMBB - Coalesce copies in the specified MBB, putting 109 /// copies that cannot yet be coalesced into the "TryAgain" list. 110 void CopyCoalesceInMBB(MachineBasicBlock *MBB, 111 std::vector<MachineInstr*> &TryAgain); 112 113 /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 114 /// which are the src/dst of the copy instruction CopyMI. This returns 115 /// true if the copy was successfully coalesced away. If it is not 116 /// currently possible to coalesce this interval, but it may be possible if 117 /// other things get coalesced, then it returns true by reference in 118 /// 'Again'. 119 bool JoinCopy(MachineInstr *TheCopy, bool &Again); 120 121 /// JoinIntervals - Attempt to join these two intervals. On failure, this 122 /// returns false. The output "SrcInt" will not have been modified, so we 123 /// can use this information below to update aliases. 124 bool JoinIntervals(CoalescerPair &CP); 125 126 /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy. If 127 /// the source value number is defined by a copy from the destination reg 128 /// see if we can merge these two destination reg valno# into a single 129 /// value number, eliminating a copy. 130 bool AdjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI); 131 132 /// HasOtherReachingDefs - Return true if there are definitions of IntB 133 /// other than BValNo val# that can reach uses of AValno val# of IntA. 134 bool HasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB, 135 VNInfo *AValNo, VNInfo *BValNo); 136 137 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy. 138 /// If the source value number is defined by a commutable instruction and 139 /// its other operand is coalesced to the copy dest register, see if we 140 /// can transform the copy into a noop by commuting the definition. 141 bool RemoveCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI); 142 143 /// ReMaterializeTrivialDef - If the source of a copy is defined by a 144 /// trivial computation, replace the copy by rematerialize the definition. 145 /// If PreserveSrcInt is true, make sure SrcInt is valid after the call. 146 bool ReMaterializeTrivialDef(LiveInterval &SrcInt, bool PreserveSrcInt, 147 unsigned DstReg, MachineInstr *CopyMI); 148 149 /// shouldJoinPhys - Return true if a physreg copy should be joined. 150 bool shouldJoinPhys(CoalescerPair &CP); 151 152 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce 153 /// two virtual registers from different register classes. 154 bool isWinToJoinCrossClass(unsigned SrcReg, 155 unsigned DstReg, 156 const TargetRegisterClass *SrcRC, 157 const TargetRegisterClass *DstRC, 158 const TargetRegisterClass *NewRC); 159 160 /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 161 /// update the subregister number if it is not zero. If DstReg is a 162 /// physical register and the existing subregister number of the def / use 163 /// being updated is not zero, make sure to set it to the correct physical 164 /// subregister. 165 void UpdateRegDefsUses(const CoalescerPair &CP); 166 167 /// RemoveDeadDef - If a def of a live interval is now determined dead, 168 /// remove the val# it defines. If the live interval becomes empty, remove 169 /// it as well. 170 bool RemoveDeadDef(LiveInterval &li, MachineInstr *DefMI); 171 172 /// markAsJoined - Remember that CopyMI has already been joined. 173 void markAsJoined(MachineInstr *CopyMI); 174 175 /// eliminateUndefCopy - Handle copies of undef values. 176 bool eliminateUndefCopy(MachineInstr *CopyMI, const CoalescerPair &CP); 177 178 public: 179 static char ID; // Class identification, replacement for typeinfo 180 RegisterCoalescer() : MachineFunctionPass(ID) { 181 initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry()); 182 } 183 184 virtual void getAnalysisUsage(AnalysisUsage &AU) const; 185 186 virtual void releaseMemory(); 187 188 /// runOnMachineFunction - pass entry point 189 virtual bool runOnMachineFunction(MachineFunction&); 190 191 /// print - Implement the dump method. 192 virtual void print(raw_ostream &O, const Module* = 0) const; 193 }; 194 } /// end anonymous namespace 195 196 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID; 197 198 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing", 199 "Simple Register Coalescing", false, false) 200 INITIALIZE_PASS_DEPENDENCY(LiveIntervals) 201 INITIALIZE_PASS_DEPENDENCY(LiveDebugVariables) 202 INITIALIZE_PASS_DEPENDENCY(SlotIndexes) 203 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo) 204 INITIALIZE_AG_DEPENDENCY(AliasAnalysis) 205 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing", 206 "Simple Register Coalescing", false, false) 207 208 char RegisterCoalescer::ID = 0; 209 210 static unsigned compose(const TargetRegisterInfo &tri, unsigned a, unsigned b) { 211 if (!a) return b; 212 if (!b) return a; 213 return tri.composeSubRegIndices(a, b); 214 } 215 216 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI, 217 unsigned &Src, unsigned &Dst, 218 unsigned &SrcSub, unsigned &DstSub) { 219 if (MI->isCopy()) { 220 Dst = MI->getOperand(0).getReg(); 221 DstSub = MI->getOperand(0).getSubReg(); 222 Src = MI->getOperand(1).getReg(); 223 SrcSub = MI->getOperand(1).getSubReg(); 224 } else if (MI->isSubregToReg()) { 225 Dst = MI->getOperand(0).getReg(); 226 DstSub = compose(tri, MI->getOperand(0).getSubReg(), 227 MI->getOperand(3).getImm()); 228 Src = MI->getOperand(2).getReg(); 229 SrcSub = MI->getOperand(2).getSubReg(); 230 } else 231 return false; 232 return true; 233 } 234 235 bool CoalescerPair::setRegisters(const MachineInstr *MI) { 236 SrcReg = DstReg = SubIdx = 0; 237 NewRC = 0; 238 Flipped = CrossClass = false; 239 240 unsigned Src, Dst, SrcSub, DstSub; 241 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 242 return false; 243 Partial = SrcSub || DstSub; 244 245 // If one register is a physreg, it must be Dst. 246 if (TargetRegisterInfo::isPhysicalRegister(Src)) { 247 if (TargetRegisterInfo::isPhysicalRegister(Dst)) 248 return false; 249 std::swap(Src, Dst); 250 std::swap(SrcSub, DstSub); 251 Flipped = true; 252 } 253 254 const MachineRegisterInfo &MRI = MI->getParent()->getParent()->getRegInfo(); 255 256 if (TargetRegisterInfo::isPhysicalRegister(Dst)) { 257 // Eliminate DstSub on a physreg. 258 if (DstSub) { 259 Dst = TRI.getSubReg(Dst, DstSub); 260 if (!Dst) return false; 261 DstSub = 0; 262 } 263 264 // Eliminate SrcSub by picking a corresponding Dst superregister. 265 if (SrcSub) { 266 Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src)); 267 if (!Dst) return false; 268 SrcSub = 0; 269 } else if (!MRI.getRegClass(Src)->contains(Dst)) { 270 return false; 271 } 272 } else { 273 // Both registers are virtual. 274 275 // Both registers have subreg indices. 276 if (SrcSub && DstSub) { 277 // For now we only handle the case of identical indices in commensurate 278 // registers: Dreg:ssub_1 + Dreg:ssub_1 -> Dreg 279 // FIXME: Handle Qreg:ssub_3 + Dreg:ssub_1 as QReg:dsub_1 + Dreg. 280 if (SrcSub != DstSub) 281 return false; 282 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 283 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 284 if (!TRI.getCommonSubClass(DstRC, SrcRC)) 285 return false; 286 SrcSub = DstSub = 0; 287 } 288 289 // There can be no SrcSub. 290 if (SrcSub) { 291 std::swap(Src, Dst); 292 DstSub = SrcSub; 293 SrcSub = 0; 294 assert(!Flipped && "Unexpected flip"); 295 Flipped = true; 296 } 297 298 // Find the new register class. 299 const TargetRegisterClass *SrcRC = MRI.getRegClass(Src); 300 const TargetRegisterClass *DstRC = MRI.getRegClass(Dst); 301 if (DstSub) 302 NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub); 303 else 304 NewRC = TRI.getCommonSubClass(DstRC, SrcRC); 305 if (!NewRC) 306 return false; 307 CrossClass = NewRC != DstRC || NewRC != SrcRC; 308 } 309 // Check our invariants 310 assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual"); 311 assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) && 312 "Cannot have a physical SubIdx"); 313 SrcReg = Src; 314 DstReg = Dst; 315 SubIdx = DstSub; 316 return true; 317 } 318 319 bool CoalescerPair::flip() { 320 if (SubIdx || TargetRegisterInfo::isPhysicalRegister(DstReg)) 321 return false; 322 std::swap(SrcReg, DstReg); 323 Flipped = !Flipped; 324 return true; 325 } 326 327 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const { 328 if (!MI) 329 return false; 330 unsigned Src, Dst, SrcSub, DstSub; 331 if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub)) 332 return false; 333 334 // Find the virtual register that is SrcReg. 335 if (Dst == SrcReg) { 336 std::swap(Src, Dst); 337 std::swap(SrcSub, DstSub); 338 } else if (Src != SrcReg) { 339 return false; 340 } 341 342 // Now check that Dst matches DstReg. 343 if (TargetRegisterInfo::isPhysicalRegister(DstReg)) { 344 if (!TargetRegisterInfo::isPhysicalRegister(Dst)) 345 return false; 346 assert(!SubIdx && "Inconsistent CoalescerPair state."); 347 // DstSub could be set for a physreg from INSERT_SUBREG. 348 if (DstSub) 349 Dst = TRI.getSubReg(Dst, DstSub); 350 // Full copy of Src. 351 if (!SrcSub) 352 return DstReg == Dst; 353 // This is a partial register copy. Check that the parts match. 354 return TRI.getSubReg(DstReg, SrcSub) == Dst; 355 } else { 356 // DstReg is virtual. 357 if (DstReg != Dst) 358 return false; 359 // Registers match, do the subregisters line up? 360 return compose(TRI, SubIdx, SrcSub) == DstSub; 361 } 362 } 363 364 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const { 365 AU.setPreservesCFG(); 366 AU.addRequired<AliasAnalysis>(); 367 AU.addRequired<LiveIntervals>(); 368 AU.addPreserved<LiveIntervals>(); 369 AU.addRequired<LiveDebugVariables>(); 370 AU.addPreserved<LiveDebugVariables>(); 371 AU.addPreserved<SlotIndexes>(); 372 AU.addRequired<MachineLoopInfo>(); 373 AU.addPreserved<MachineLoopInfo>(); 374 AU.addPreservedID(MachineDominatorsID); 375 MachineFunctionPass::getAnalysisUsage(AU); 376 } 377 378 void RegisterCoalescer::markAsJoined(MachineInstr *CopyMI) { 379 /// Joined copies are not deleted immediately, but kept in JoinedCopies. 380 JoinedCopies.insert(CopyMI); 381 382 /// Mark all register operands of CopyMI as <undef> so they won't affect dead 383 /// code elimination. 384 for (MachineInstr::mop_iterator I = CopyMI->operands_begin(), 385 E = CopyMI->operands_end(); I != E; ++I) 386 if (I->isReg()) 387 I->setIsUndef(true); 388 } 389 390 /// AdjustCopiesBackFrom - We found a non-trivially-coalescable copy with IntA 391 /// being the source and IntB being the dest, thus this defines a value number 392 /// in IntB. If the source value number (in IntA) is defined by a copy from B, 393 /// see if we can merge these two pieces of B into a single value number, 394 /// eliminating a copy. For example: 395 /// 396 /// A3 = B0 397 /// ... 398 /// B1 = A3 <- this copy 399 /// 400 /// In this case, B0 can be extended to where the B1 copy lives, allowing the B1 401 /// value number to be replaced with B0 (which simplifies the B liveinterval). 402 /// 403 /// This returns true if an interval was modified. 404 /// 405 bool RegisterCoalescer::AdjustCopiesBackFrom(const CoalescerPair &CP, 406 MachineInstr *CopyMI) { 407 // Bail if there is no dst interval - can happen when merging physical subreg 408 // operations. 409 if (!LIS->hasInterval(CP.getDstReg())) 410 return false; 411 412 LiveInterval &IntA = 413 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 414 LiveInterval &IntB = 415 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 416 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 417 418 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 419 // the example above. 420 LiveInterval::iterator BLR = IntB.FindLiveRangeContaining(CopyIdx); 421 if (BLR == IntB.end()) return false; 422 VNInfo *BValNo = BLR->valno; 423 424 // Get the location that B is defined at. Two options: either this value has 425 // an unknown definition point or it is defined at CopyIdx. If unknown, we 426 // can't process it. 427 if (BValNo->def != CopyIdx) return false; 428 429 // AValNo is the value number in A that defines the copy, A3 in the example. 430 SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true); 431 LiveInterval::iterator ALR = IntA.FindLiveRangeContaining(CopyUseIdx); 432 // The live range might not exist after fun with physreg coalescing. 433 if (ALR == IntA.end()) return false; 434 VNInfo *AValNo = ALR->valno; 435 436 // If AValNo is defined as a copy from IntB, we can potentially process this. 437 // Get the instruction that defines this value number. 438 MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def); 439 if (!CP.isCoalescable(ACopyMI)) 440 return false; 441 442 // Get the LiveRange in IntB that this value number starts with. 443 LiveInterval::iterator ValLR = 444 IntB.FindLiveRangeContaining(AValNo->def.getPrevSlot()); 445 if (ValLR == IntB.end()) 446 return false; 447 448 // Make sure that the end of the live range is inside the same block as 449 // CopyMI. 450 MachineInstr *ValLREndInst = 451 LIS->getInstructionFromIndex(ValLR->end.getPrevSlot()); 452 if (!ValLREndInst || ValLREndInst->getParent() != CopyMI->getParent()) 453 return false; 454 455 // Okay, we now know that ValLR ends in the same block that the CopyMI 456 // live-range starts. If there are no intervening live ranges between them in 457 // IntB, we can merge them. 458 if (ValLR+1 != BLR) return false; 459 460 // If a live interval is a physical register, conservatively check if any 461 // of its aliases is overlapping the live interval of the virtual register. 462 // If so, do not coalesce. 463 if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { 464 for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) 465 if (LIS->hasInterval(*AS) && IntA.overlaps(LIS->getInterval(*AS))) { 466 DEBUG({ 467 dbgs() << "\t\tInterfere with alias "; 468 LIS->getInterval(*AS).print(dbgs(), TRI); 469 }); 470 return false; 471 } 472 } 473 474 DEBUG({ 475 dbgs() << "Extending: "; 476 IntB.print(dbgs(), TRI); 477 }); 478 479 SlotIndex FillerStart = ValLR->end, FillerEnd = BLR->start; 480 // We are about to delete CopyMI, so need to remove it as the 'instruction 481 // that defines this value #'. Update the valnum with the new defining 482 // instruction #. 483 BValNo->def = FillerStart; 484 485 // Okay, we can merge them. We need to insert a new liverange: 486 // [ValLR.end, BLR.begin) of either value number, then we merge the 487 // two value numbers. 488 IntB.addRange(LiveRange(FillerStart, FillerEnd, BValNo)); 489 490 // If the IntB live range is assigned to a physical register, and if that 491 // physreg has sub-registers, update their live intervals as well. 492 if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) { 493 for (const uint16_t *SR = TRI->getSubRegisters(IntB.reg); *SR; ++SR) { 494 if (!LIS->hasInterval(*SR)) 495 continue; 496 LiveInterval &SRLI = LIS->getInterval(*SR); 497 SRLI.addRange(LiveRange(FillerStart, FillerEnd, 498 SRLI.getNextValue(FillerStart, 499 LIS->getVNInfoAllocator()))); 500 } 501 } 502 503 // Okay, merge "B1" into the same value number as "B0". 504 if (BValNo != ValLR->valno) { 505 // If B1 is killed by a PHI, then the merged live range must also be killed 506 // by the same PHI, as B0 and B1 can not overlap. 507 bool HasPHIKill = BValNo->hasPHIKill(); 508 IntB.MergeValueNumberInto(BValNo, ValLR->valno); 509 if (HasPHIKill) 510 ValLR->valno->setHasPHIKill(true); 511 } 512 DEBUG({ 513 dbgs() << " result = "; 514 IntB.print(dbgs(), TRI); 515 dbgs() << "\n"; 516 }); 517 518 // If the source instruction was killing the source register before the 519 // merge, unset the isKill marker given the live range has been extended. 520 int UIdx = ValLREndInst->findRegisterUseOperandIdx(IntB.reg, true); 521 if (UIdx != -1) { 522 ValLREndInst->getOperand(UIdx).setIsKill(false); 523 } 524 525 // Rewrite the copy. If the copy instruction was killing the destination 526 // register before the merge, find the last use and trim the live range. That 527 // will also add the isKill marker. 528 CopyMI->substituteRegister(IntA.reg, IntB.reg, CP.getSubIdx(), 529 *TRI); 530 if (ALR->end == CopyIdx) 531 LIS->shrinkToUses(&IntA); 532 533 ++numExtends; 534 return true; 535 } 536 537 /// HasOtherReachingDefs - Return true if there are definitions of IntB 538 /// other than BValNo val# that can reach uses of AValno val# of IntA. 539 bool RegisterCoalescer::HasOtherReachingDefs(LiveInterval &IntA, 540 LiveInterval &IntB, 541 VNInfo *AValNo, 542 VNInfo *BValNo) { 543 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 544 AI != AE; ++AI) { 545 if (AI->valno != AValNo) continue; 546 LiveInterval::Ranges::iterator BI = 547 std::upper_bound(IntB.ranges.begin(), IntB.ranges.end(), AI->start); 548 if (BI != IntB.ranges.begin()) 549 --BI; 550 for (; BI != IntB.ranges.end() && AI->end >= BI->start; ++BI) { 551 if (BI->valno == BValNo) 552 continue; 553 if (BI->start <= AI->start && BI->end > AI->start) 554 return true; 555 if (BI->start > AI->start && BI->start < AI->end) 556 return true; 557 } 558 } 559 return false; 560 } 561 562 /// RemoveCopyByCommutingDef - We found a non-trivially-coalescable copy with 563 /// IntA being the source and IntB being the dest, thus this defines a value 564 /// number in IntB. If the source value number (in IntA) is defined by a 565 /// commutable instruction and its other operand is coalesced to the copy dest 566 /// register, see if we can transform the copy into a noop by commuting the 567 /// definition. For example, 568 /// 569 /// A3 = op A2 B0<kill> 570 /// ... 571 /// B1 = A3 <- this copy 572 /// ... 573 /// = op A3 <- more uses 574 /// 575 /// ==> 576 /// 577 /// B2 = op B0 A2<kill> 578 /// ... 579 /// B1 = B2 <- now an identify copy 580 /// ... 581 /// = op B2 <- more uses 582 /// 583 /// This returns true if an interval was modified. 584 /// 585 bool RegisterCoalescer::RemoveCopyByCommutingDef(const CoalescerPair &CP, 586 MachineInstr *CopyMI) { 587 // FIXME: For now, only eliminate the copy by commuting its def when the 588 // source register is a virtual register. We want to guard against cases 589 // where the copy is a back edge copy and commuting the def lengthen the 590 // live interval of the source register to the entire loop. 591 if (CP.isPhys() && CP.isFlipped()) 592 return false; 593 594 // Bail if there is no dst interval. 595 if (!LIS->hasInterval(CP.getDstReg())) 596 return false; 597 598 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(); 599 600 LiveInterval &IntA = 601 LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg()); 602 LiveInterval &IntB = 603 LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg()); 604 605 // BValNo is a value number in B that is defined by a copy from A. 'B3' in 606 // the example above. 607 VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx); 608 if (!BValNo || BValNo->def != CopyIdx) 609 return false; 610 611 assert(BValNo->def == CopyIdx && "Copy doesn't define the value?"); 612 613 // AValNo is the value number in A that defines the copy, A3 in the example. 614 VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true)); 615 assert(AValNo && "COPY source not live"); 616 617 // If other defs can reach uses of this def, then it's not safe to perform 618 // the optimization. 619 if (AValNo->isPHIDef() || AValNo->isUnused() || AValNo->hasPHIKill()) 620 return false; 621 MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def); 622 if (!DefMI) 623 return false; 624 if (!DefMI->isCommutable()) 625 return false; 626 // If DefMI is a two-address instruction then commuting it will change the 627 // destination register. 628 int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg); 629 assert(DefIdx != -1); 630 unsigned UseOpIdx; 631 if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx)) 632 return false; 633 unsigned Op1, Op2, NewDstIdx; 634 if (!TII->findCommutedOpIndices(DefMI, Op1, Op2)) 635 return false; 636 if (Op1 == UseOpIdx) 637 NewDstIdx = Op2; 638 else if (Op2 == UseOpIdx) 639 NewDstIdx = Op1; 640 else 641 return false; 642 643 MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx); 644 unsigned NewReg = NewDstMO.getReg(); 645 if (NewReg != IntB.reg || !NewDstMO.isKill()) 646 return false; 647 648 // Make sure there are no other definitions of IntB that would reach the 649 // uses which the new definition can reach. 650 if (HasOtherReachingDefs(IntA, IntB, AValNo, BValNo)) 651 return false; 652 653 // Abort if the aliases of IntB.reg have values that are not simply the 654 // clobbers from the superreg. 655 if (TargetRegisterInfo::isPhysicalRegister(IntB.reg)) 656 for (const uint16_t *AS = TRI->getAliasSet(IntB.reg); *AS; ++AS) 657 if (LIS->hasInterval(*AS) && 658 HasOtherReachingDefs(IntA, LIS->getInterval(*AS), AValNo, 0)) 659 return false; 660 661 // If some of the uses of IntA.reg is already coalesced away, return false. 662 // It's not possible to determine whether it's safe to perform the coalescing. 663 for (MachineRegisterInfo::use_nodbg_iterator UI = 664 MRI->use_nodbg_begin(IntA.reg), 665 UE = MRI->use_nodbg_end(); UI != UE; ++UI) { 666 MachineInstr *UseMI = &*UI; 667 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI); 668 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 669 if (ULR == IntA.end()) 670 continue; 671 if (ULR->valno == AValNo && JoinedCopies.count(UseMI)) 672 return false; 673 } 674 675 DEBUG(dbgs() << "\tRemoveCopyByCommutingDef: " << AValNo->def << '\t' 676 << *DefMI); 677 678 // At this point we have decided that it is legal to do this 679 // transformation. Start by commuting the instruction. 680 MachineBasicBlock *MBB = DefMI->getParent(); 681 MachineInstr *NewMI = TII->commuteInstruction(DefMI); 682 if (!NewMI) 683 return false; 684 if (TargetRegisterInfo::isVirtualRegister(IntA.reg) && 685 TargetRegisterInfo::isVirtualRegister(IntB.reg) && 686 !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg))) 687 return false; 688 if (NewMI != DefMI) { 689 LIS->ReplaceMachineInstrInMaps(DefMI, NewMI); 690 MachineBasicBlock::iterator Pos = DefMI; 691 MBB->insert(Pos, NewMI); 692 MBB->erase(DefMI); 693 } 694 unsigned OpIdx = NewMI->findRegisterUseOperandIdx(IntA.reg, false); 695 NewMI->getOperand(OpIdx).setIsKill(); 696 697 // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g. 698 // A = or A, B 699 // ... 700 // B = A 701 // ... 702 // C = A<kill> 703 // ... 704 // = B 705 706 // Update uses of IntA of the specific Val# with IntB. 707 for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg), 708 UE = MRI->use_end(); UI != UE;) { 709 MachineOperand &UseMO = UI.getOperand(); 710 MachineInstr *UseMI = &*UI; 711 ++UI; 712 if (JoinedCopies.count(UseMI)) 713 continue; 714 if (UseMI->isDebugValue()) { 715 // FIXME These don't have an instruction index. Not clear we have enough 716 // info to decide whether to do this replacement or not. For now do it. 717 UseMO.setReg(NewReg); 718 continue; 719 } 720 SlotIndex UseIdx = LIS->getInstructionIndex(UseMI).getRegSlot(true); 721 LiveInterval::iterator ULR = IntA.FindLiveRangeContaining(UseIdx); 722 if (ULR == IntA.end() || ULR->valno != AValNo) 723 continue; 724 if (TargetRegisterInfo::isPhysicalRegister(NewReg)) 725 UseMO.substPhysReg(NewReg, *TRI); 726 else 727 UseMO.setReg(NewReg); 728 if (UseMI == CopyMI) 729 continue; 730 if (!UseMI->isCopy()) 731 continue; 732 if (UseMI->getOperand(0).getReg() != IntB.reg || 733 UseMI->getOperand(0).getSubReg()) 734 continue; 735 736 // This copy will become a noop. If it's defining a new val#, merge it into 737 // BValNo. 738 SlotIndex DefIdx = UseIdx.getRegSlot(); 739 VNInfo *DVNI = IntB.getVNInfoAt(DefIdx); 740 if (!DVNI) 741 continue; 742 DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI); 743 assert(DVNI->def == DefIdx); 744 BValNo = IntB.MergeValueNumberInto(BValNo, DVNI); 745 markAsJoined(UseMI); 746 } 747 748 // Extend BValNo by merging in IntA live ranges of AValNo. Val# definition 749 // is updated. 750 VNInfo *ValNo = BValNo; 751 ValNo->def = AValNo->def; 752 for (LiveInterval::iterator AI = IntA.begin(), AE = IntA.end(); 753 AI != AE; ++AI) { 754 if (AI->valno != AValNo) continue; 755 IntB.addRange(LiveRange(AI->start, AI->end, ValNo)); 756 } 757 DEBUG(dbgs() << "\t\textended: " << IntB << '\n'); 758 759 IntA.removeValNo(AValNo); 760 DEBUG(dbgs() << "\t\ttrimmed: " << IntA << '\n'); 761 ++numCommutes; 762 return true; 763 } 764 765 /// ReMaterializeTrivialDef - If the source of a copy is defined by a trivial 766 /// computation, replace the copy by rematerialize the definition. 767 bool RegisterCoalescer::ReMaterializeTrivialDef(LiveInterval &SrcInt, 768 bool preserveSrcInt, 769 unsigned DstReg, 770 MachineInstr *CopyMI) { 771 SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true); 772 LiveInterval::iterator SrcLR = SrcInt.FindLiveRangeContaining(CopyIdx); 773 assert(SrcLR != SrcInt.end() && "Live range not found!"); 774 VNInfo *ValNo = SrcLR->valno; 775 if (ValNo->isPHIDef() || ValNo->isUnused()) 776 return false; 777 MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def); 778 if (!DefMI) 779 return false; 780 assert(DefMI && "Defining instruction disappeared"); 781 if (!DefMI->isAsCheapAsAMove()) 782 return false; 783 if (!TII->isTriviallyReMaterializable(DefMI, AA)) 784 return false; 785 bool SawStore = false; 786 if (!DefMI->isSafeToMove(TII, AA, SawStore)) 787 return false; 788 const MCInstrDesc &MCID = DefMI->getDesc(); 789 if (MCID.getNumDefs() != 1) 790 return false; 791 if (!DefMI->isImplicitDef()) { 792 // Make sure the copy destination register class fits the instruction 793 // definition register class. The mismatch can happen as a result of earlier 794 // extract_subreg, insert_subreg, subreg_to_reg coalescing. 795 const TargetRegisterClass *RC = TII->getRegClass(MCID, 0, TRI); 796 if (TargetRegisterInfo::isVirtualRegister(DstReg)) { 797 if (MRI->getRegClass(DstReg) != RC) 798 return false; 799 } else if (!RC->contains(DstReg)) 800 return false; 801 } 802 803 MachineBasicBlock *MBB = CopyMI->getParent(); 804 MachineBasicBlock::iterator MII = 805 llvm::next(MachineBasicBlock::iterator(CopyMI)); 806 TII->reMaterialize(*MBB, MII, DstReg, 0, DefMI, *TRI); 807 MachineInstr *NewMI = prior(MII); 808 809 // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86). 810 // We need to remember these so we can add intervals once we insert 811 // NewMI into SlotIndexes. 812 SmallVector<unsigned, 4> NewMIImplDefs; 813 for (unsigned i = NewMI->getDesc().getNumOperands(), 814 e = NewMI->getNumOperands(); i != e; ++i) { 815 MachineOperand &MO = NewMI->getOperand(i); 816 if (MO.isReg()) { 817 assert(MO.isDef() && MO.isImplicit() && MO.isDead() && 818 TargetRegisterInfo::isPhysicalRegister(MO.getReg())); 819 NewMIImplDefs.push_back(MO.getReg()); 820 } 821 } 822 823 // CopyMI may have implicit operands, transfer them over to the newly 824 // rematerialized instruction. And update implicit def interval valnos. 825 for (unsigned i = CopyMI->getDesc().getNumOperands(), 826 e = CopyMI->getNumOperands(); i != e; ++i) { 827 MachineOperand &MO = CopyMI->getOperand(i); 828 if (MO.isReg()) { 829 assert(MO.isImplicit() && "No explicit operands after implict operands."); 830 // Discard VReg implicit defs. 831 if (TargetRegisterInfo::isPhysicalRegister(MO.getReg())) { 832 NewMI->addOperand(MO); 833 } 834 } 835 } 836 837 LIS->ReplaceMachineInstrInMaps(CopyMI, NewMI); 838 839 SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI); 840 for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) { 841 unsigned reg = NewMIImplDefs[i]; 842 LiveInterval &li = LIS->getInterval(reg); 843 VNInfo *DeadDefVN = li.getNextValue(NewMIIdx.getRegSlot(), 844 LIS->getVNInfoAllocator()); 845 LiveRange lr(NewMIIdx.getRegSlot(), NewMIIdx.getDeadSlot(), DeadDefVN); 846 li.addRange(lr); 847 } 848 849 CopyMI->eraseFromParent(); 850 ReMatCopies.insert(CopyMI); 851 ReMatDefs.insert(DefMI); 852 DEBUG(dbgs() << "Remat: " << *NewMI); 853 ++NumReMats; 854 855 // The source interval can become smaller because we removed a use. 856 if (preserveSrcInt) 857 LIS->shrinkToUses(&SrcInt); 858 859 return true; 860 } 861 862 /// eliminateUndefCopy - ProcessImpicitDefs may leave some copies of <undef> 863 /// values, it only removes local variables. When we have a copy like: 864 /// 865 /// %vreg1 = COPY %vreg2<undef> 866 /// 867 /// We delete the copy and remove the corresponding value number from %vreg1. 868 /// Any uses of that value number are marked as <undef>. 869 bool RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI, 870 const CoalescerPair &CP) { 871 SlotIndex Idx = LIS->getInstructionIndex(CopyMI); 872 LiveInterval *SrcInt = &LIS->getInterval(CP.getSrcReg()); 873 if (SrcInt->liveAt(Idx)) 874 return false; 875 LiveInterval *DstInt = &LIS->getInterval(CP.getDstReg()); 876 if (DstInt->liveAt(Idx)) 877 return false; 878 879 // No intervals are live-in to CopyMI - it is undef. 880 if (CP.isFlipped()) 881 DstInt = SrcInt; 882 SrcInt = 0; 883 884 VNInfo *DeadVNI = DstInt->getVNInfoAt(Idx.getRegSlot()); 885 assert(DeadVNI && "No value defined in DstInt"); 886 DstInt->removeValNo(DeadVNI); 887 888 // Find new undef uses. 889 for (MachineRegisterInfo::reg_nodbg_iterator 890 I = MRI->reg_nodbg_begin(DstInt->reg), E = MRI->reg_nodbg_end(); 891 I != E; ++I) { 892 MachineOperand &MO = I.getOperand(); 893 if (MO.isDef() || MO.isUndef()) 894 continue; 895 MachineInstr *MI = MO.getParent(); 896 SlotIndex Idx = LIS->getInstructionIndex(MI); 897 if (DstInt->liveAt(Idx)) 898 continue; 899 MO.setIsUndef(true); 900 DEBUG(dbgs() << "\tnew undef: " << Idx << '\t' << *MI); 901 } 902 return true; 903 } 904 905 /// UpdateRegDefsUses - Replace all defs and uses of SrcReg to DstReg and 906 /// update the subregister number if it is not zero. If DstReg is a 907 /// physical register and the existing subregister number of the def / use 908 /// being updated is not zero, make sure to set it to the correct physical 909 /// subregister. 910 void 911 RegisterCoalescer::UpdateRegDefsUses(const CoalescerPair &CP) { 912 bool DstIsPhys = CP.isPhys(); 913 unsigned SrcReg = CP.getSrcReg(); 914 unsigned DstReg = CP.getDstReg(); 915 unsigned SubIdx = CP.getSubIdx(); 916 917 // Update LiveDebugVariables. 918 LDV->renameRegister(SrcReg, DstReg, SubIdx); 919 920 for (MachineRegisterInfo::reg_iterator I = MRI->reg_begin(SrcReg); 921 MachineInstr *UseMI = I.skipInstruction();) { 922 // A PhysReg copy that won't be coalesced can perhaps be rematerialized 923 // instead. 924 if (DstIsPhys) { 925 if (UseMI->isFullCopy() && 926 UseMI->getOperand(1).getReg() == SrcReg && 927 UseMI->getOperand(0).getReg() != SrcReg && 928 UseMI->getOperand(0).getReg() != DstReg && 929 !JoinedCopies.count(UseMI) && 930 ReMaterializeTrivialDef(LIS->getInterval(SrcReg), false, 931 UseMI->getOperand(0).getReg(), UseMI)) 932 continue; 933 } 934 935 SmallVector<unsigned,8> Ops; 936 bool Reads, Writes; 937 tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops); 938 939 // Replace SrcReg with DstReg in all UseMI operands. 940 for (unsigned i = 0, e = Ops.size(); i != e; ++i) { 941 MachineOperand &MO = UseMI->getOperand(Ops[i]); 942 943 // Make sure we don't create read-modify-write defs accidentally. We 944 // assume here that a SrcReg def cannot be joined into a live DstReg. If 945 // RegisterCoalescer starts tracking partially live registers, we will 946 // need to check the actual LiveInterval to determine if DstReg is live 947 // here. 948 if (SubIdx && !Reads) 949 MO.setIsUndef(); 950 951 if (DstIsPhys) 952 MO.substPhysReg(DstReg, *TRI); 953 else 954 MO.substVirtReg(DstReg, SubIdx, *TRI); 955 } 956 957 // This instruction is a copy that will be removed. 958 if (JoinedCopies.count(UseMI)) 959 continue; 960 961 DEBUG({ 962 dbgs() << "\t\tupdated: "; 963 if (!UseMI->isDebugValue()) 964 dbgs() << LIS->getInstructionIndex(UseMI) << "\t"; 965 dbgs() << *UseMI; 966 }); 967 } 968 } 969 970 /// removeIntervalIfEmpty - Check if the live interval of a physical register 971 /// is empty, if so remove it and also remove the empty intervals of its 972 /// sub-registers. Return true if live interval is removed. 973 static bool removeIntervalIfEmpty(LiveInterval &li, LiveIntervals *LIS, 974 const TargetRegisterInfo *TRI) { 975 if (li.empty()) { 976 if (TargetRegisterInfo::isPhysicalRegister(li.reg)) 977 for (const uint16_t* SR = TRI->getSubRegisters(li.reg); *SR; ++SR) { 978 if (!LIS->hasInterval(*SR)) 979 continue; 980 LiveInterval &sli = LIS->getInterval(*SR); 981 if (sli.empty()) 982 LIS->removeInterval(*SR); 983 } 984 LIS->removeInterval(li.reg); 985 return true; 986 } 987 return false; 988 } 989 990 /// RemoveDeadDef - If a def of a live interval is now determined dead, remove 991 /// the val# it defines. If the live interval becomes empty, remove it as well. 992 bool RegisterCoalescer::RemoveDeadDef(LiveInterval &li, 993 MachineInstr *DefMI) { 994 SlotIndex DefIdx = LIS->getInstructionIndex(DefMI).getRegSlot(); 995 LiveInterval::iterator MLR = li.FindLiveRangeContaining(DefIdx); 996 if (DefIdx != MLR->valno->def) 997 return false; 998 li.removeValNo(MLR->valno); 999 return removeIntervalIfEmpty(li, LIS, TRI); 1000 } 1001 1002 /// shouldJoinPhys - Return true if a copy involving a physreg should be joined. 1003 /// We need to be careful about coalescing a source physical register with a 1004 /// virtual register. Once the coalescing is done, it cannot be broken and these 1005 /// are not spillable! If the destination interval uses are far away, think 1006 /// twice about coalescing them! 1007 bool RegisterCoalescer::shouldJoinPhys(CoalescerPair &CP) { 1008 bool Allocatable = LIS->isAllocatable(CP.getDstReg()); 1009 LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg()); 1010 1011 /// Always join simple intervals that are defined by a single copy from a 1012 /// reserved register. This doesn't increase register pressure, so it is 1013 /// always beneficial. 1014 if (!Allocatable && CP.isFlipped() && JoinVInt.containsOneValue()) 1015 return true; 1016 1017 if (!EnablePhysicalJoin) { 1018 DEBUG(dbgs() << "\tPhysreg joins disabled.\n"); 1019 return false; 1020 } 1021 1022 // Only coalesce to allocatable physreg, we don't want to risk modifying 1023 // reserved registers. 1024 if (!Allocatable) { 1025 DEBUG(dbgs() << "\tRegister is an unallocatable physreg.\n"); 1026 return false; // Not coalescable. 1027 } 1028 1029 // Don't join with physregs that have a ridiculous number of live 1030 // ranges. The data structure performance is really bad when that 1031 // happens. 1032 if (LIS->hasInterval(CP.getDstReg()) && 1033 LIS->getInterval(CP.getDstReg()).ranges.size() > 1000) { 1034 ++numAborts; 1035 DEBUG(dbgs() 1036 << "\tPhysical register live interval too complicated, abort!\n"); 1037 return false; 1038 } 1039 1040 // FIXME: Why are we skipping this test for partial copies? 1041 // CodeGen/X86/phys_subreg_coalesce-3.ll needs it. 1042 if (!CP.isPartial()) { 1043 const TargetRegisterClass *RC = MRI->getRegClass(CP.getSrcReg()); 1044 unsigned Threshold = RegClassInfo.getNumAllocatableRegs(RC) * 2; 1045 unsigned Length = LIS->getApproximateInstructionCount(JoinVInt); 1046 if (Length > Threshold) { 1047 ++numAborts; 1048 DEBUG(dbgs() << "\tMay tie down a physical register, abort!\n"); 1049 return false; 1050 } 1051 } 1052 return true; 1053 } 1054 1055 /// isWinToJoinCrossClass - Return true if it's profitable to coalesce 1056 /// two virtual registers from different register classes. 1057 bool 1058 RegisterCoalescer::isWinToJoinCrossClass(unsigned SrcReg, 1059 unsigned DstReg, 1060 const TargetRegisterClass *SrcRC, 1061 const TargetRegisterClass *DstRC, 1062 const TargetRegisterClass *NewRC) { 1063 unsigned NewRCCount = RegClassInfo.getNumAllocatableRegs(NewRC); 1064 // This heuristics is good enough in practice, but it's obviously not *right*. 1065 // 4 is a magic number that works well enough for x86, ARM, etc. It filter 1066 // out all but the most restrictive register classes. 1067 if (NewRCCount > 4 || 1068 // Early exit if the function is fairly small, coalesce aggressively if 1069 // that's the case. For really special register classes with 3 or 1070 // fewer registers, be a bit more careful. 1071 (LIS->getFuncInstructionCount() / NewRCCount) < 8) 1072 return true; 1073 LiveInterval &SrcInt = LIS->getInterval(SrcReg); 1074 LiveInterval &DstInt = LIS->getInterval(DstReg); 1075 unsigned SrcSize = LIS->getApproximateInstructionCount(SrcInt); 1076 unsigned DstSize = LIS->getApproximateInstructionCount(DstInt); 1077 1078 // Coalesce aggressively if the intervals are small compared to the number of 1079 // registers in the new class. The number 4 is fairly arbitrary, chosen to be 1080 // less aggressive than the 8 used for the whole function size. 1081 const unsigned ThresSize = 4 * NewRCCount; 1082 if (SrcSize <= ThresSize && DstSize <= ThresSize) 1083 return true; 1084 1085 // Estimate *register use density*. If it doubles or more, abort. 1086 unsigned SrcUses = std::distance(MRI->use_nodbg_begin(SrcReg), 1087 MRI->use_nodbg_end()); 1088 unsigned DstUses = std::distance(MRI->use_nodbg_begin(DstReg), 1089 MRI->use_nodbg_end()); 1090 unsigned NewUses = SrcUses + DstUses; 1091 unsigned NewSize = SrcSize + DstSize; 1092 if (SrcRC != NewRC && SrcSize > ThresSize) { 1093 unsigned SrcRCCount = RegClassInfo.getNumAllocatableRegs(SrcRC); 1094 if (NewUses*SrcSize*SrcRCCount > 2*SrcUses*NewSize*NewRCCount) 1095 return false; 1096 } 1097 if (DstRC != NewRC && DstSize > ThresSize) { 1098 unsigned DstRCCount = RegClassInfo.getNumAllocatableRegs(DstRC); 1099 if (NewUses*DstSize*DstRCCount > 2*DstUses*NewSize*NewRCCount) 1100 return false; 1101 } 1102 return true; 1103 } 1104 1105 1106 /// JoinCopy - Attempt to join intervals corresponding to SrcReg/DstReg, 1107 /// which are the src/dst of the copy instruction CopyMI. This returns true 1108 /// if the copy was successfully coalesced away. If it is not currently 1109 /// possible to coalesce this interval, but it may be possible if other 1110 /// things get coalesced, then it returns true by reference in 'Again'. 1111 bool RegisterCoalescer::JoinCopy(MachineInstr *CopyMI, bool &Again) { 1112 1113 Again = false; 1114 if (JoinedCopies.count(CopyMI) || ReMatCopies.count(CopyMI)) 1115 return false; // Already done. 1116 1117 DEBUG(dbgs() << LIS->getInstructionIndex(CopyMI) << '\t' << *CopyMI); 1118 1119 CoalescerPair CP(*TII, *TRI); 1120 if (!CP.setRegisters(CopyMI)) { 1121 DEBUG(dbgs() << "\tNot coalescable.\n"); 1122 return false; 1123 } 1124 1125 // If they are already joined we continue. 1126 if (CP.getSrcReg() == CP.getDstReg()) { 1127 markAsJoined(CopyMI); 1128 DEBUG(dbgs() << "\tCopy already coalesced.\n"); 1129 return false; // Not coalescable. 1130 } 1131 1132 // Eliminate undefs. 1133 if (!CP.isPhys() && eliminateUndefCopy(CopyMI, CP)) { 1134 markAsJoined(CopyMI); 1135 DEBUG(dbgs() << "\tEliminated copy of <undef> value.\n"); 1136 return false; // Not coalescable. 1137 } 1138 1139 DEBUG(dbgs() << "\tConsidering merging " << PrintReg(CP.getSrcReg(), TRI) 1140 << " with " << PrintReg(CP.getDstReg(), TRI, CP.getSubIdx()) 1141 << "\n"); 1142 1143 // Enforce policies. 1144 if (CP.isPhys()) { 1145 if (!shouldJoinPhys(CP)) { 1146 // Before giving up coalescing, if definition of source is defined by 1147 // trivial computation, try rematerializing it. 1148 if (!CP.isFlipped() && 1149 ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, 1150 CP.getDstReg(), CopyMI)) 1151 return true; 1152 return false; 1153 } 1154 } else { 1155 // Avoid constraining virtual register regclass too much. 1156 if (CP.isCrossClass()) { 1157 DEBUG(dbgs() << "\tCross-class to " << CP.getNewRC()->getName() << ".\n"); 1158 if (DisableCrossClassJoin) { 1159 DEBUG(dbgs() << "\tCross-class joins disabled.\n"); 1160 return false; 1161 } 1162 if (!isWinToJoinCrossClass(CP.getSrcReg(), CP.getDstReg(), 1163 MRI->getRegClass(CP.getSrcReg()), 1164 MRI->getRegClass(CP.getDstReg()), 1165 CP.getNewRC())) { 1166 DEBUG(dbgs() << "\tAvoid coalescing to constrained register class.\n"); 1167 Again = true; // May be possible to coalesce later. 1168 return false; 1169 } 1170 } 1171 1172 // When possible, let DstReg be the larger interval. 1173 if (!CP.getSubIdx() && LIS->getInterval(CP.getSrcReg()).ranges.size() > 1174 LIS->getInterval(CP.getDstReg()).ranges.size()) 1175 CP.flip(); 1176 } 1177 1178 // Okay, attempt to join these two intervals. On failure, this returns false. 1179 // Otherwise, if one of the intervals being joined is a physreg, this method 1180 // always canonicalizes DstInt to be it. The output "SrcInt" will not have 1181 // been modified, so we can use this information below to update aliases. 1182 if (!JoinIntervals(CP)) { 1183 // Coalescing failed. 1184 1185 // If definition of source is defined by trivial computation, try 1186 // rematerializing it. 1187 if (!CP.isFlipped() && 1188 ReMaterializeTrivialDef(LIS->getInterval(CP.getSrcReg()), true, 1189 CP.getDstReg(), CopyMI)) 1190 return true; 1191 1192 // If we can eliminate the copy without merging the live ranges, do so now. 1193 if (!CP.isPartial()) { 1194 if (AdjustCopiesBackFrom(CP, CopyMI) || 1195 RemoveCopyByCommutingDef(CP, CopyMI)) { 1196 markAsJoined(CopyMI); 1197 DEBUG(dbgs() << "\tTrivial!\n"); 1198 return true; 1199 } 1200 } 1201 1202 // Otherwise, we are unable to join the intervals. 1203 DEBUG(dbgs() << "\tInterference!\n"); 1204 Again = true; // May be possible to coalesce later. 1205 return false; 1206 } 1207 1208 // Coalescing to a virtual register that is of a sub-register class of the 1209 // other. Make sure the resulting register is set to the right register class. 1210 if (CP.isCrossClass()) { 1211 ++numCrossRCs; 1212 MRI->setRegClass(CP.getDstReg(), CP.getNewRC()); 1213 } 1214 1215 // Remember to delete the copy instruction. 1216 markAsJoined(CopyMI); 1217 1218 UpdateRegDefsUses(CP); 1219 1220 // If we have extended the live range of a physical register, make sure we 1221 // update live-in lists as well. 1222 if (CP.isPhys()) { 1223 SmallVector<MachineBasicBlock*, 16> BlockSeq; 1224 // JoinIntervals invalidates the VNInfos in SrcInt, but we only need the 1225 // ranges for this, and they are preserved. 1226 LiveInterval &SrcInt = LIS->getInterval(CP.getSrcReg()); 1227 for (LiveInterval::const_iterator I = SrcInt.begin(), E = SrcInt.end(); 1228 I != E; ++I ) { 1229 LIS->findLiveInMBBs(I->start, I->end, BlockSeq); 1230 for (unsigned idx = 0, size = BlockSeq.size(); idx != size; ++idx) { 1231 MachineBasicBlock &block = *BlockSeq[idx]; 1232 if (!block.isLiveIn(CP.getDstReg())) 1233 block.addLiveIn(CP.getDstReg()); 1234 } 1235 BlockSeq.clear(); 1236 } 1237 } 1238 1239 // SrcReg is guaranteed to be the register whose live interval that is 1240 // being merged. 1241 LIS->removeInterval(CP.getSrcReg()); 1242 1243 // Update regalloc hint. 1244 TRI->UpdateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF); 1245 1246 DEBUG({ 1247 LiveInterval &DstInt = LIS->getInterval(CP.getDstReg()); 1248 dbgs() << "\tJoined. Result = "; 1249 DstInt.print(dbgs(), TRI); 1250 dbgs() << "\n"; 1251 }); 1252 1253 ++numJoins; 1254 return true; 1255 } 1256 1257 /// ComputeUltimateVN - Assuming we are going to join two live intervals, 1258 /// compute what the resultant value numbers for each value in the input two 1259 /// ranges will be. This is complicated by copies between the two which can 1260 /// and will commonly cause multiple value numbers to be merged into one. 1261 /// 1262 /// VN is the value number that we're trying to resolve. InstDefiningValue 1263 /// keeps track of the new InstDefiningValue assignment for the result 1264 /// LiveInterval. ThisFromOther/OtherFromThis are sets that keep track of 1265 /// whether a value in this or other is a copy from the opposite set. 1266 /// ThisValNoAssignments/OtherValNoAssignments keep track of value #'s that have 1267 /// already been assigned. 1268 /// 1269 /// ThisFromOther[x] - If x is defined as a copy from the other interval, this 1270 /// contains the value number the copy is from. 1271 /// 1272 static unsigned ComputeUltimateVN(VNInfo *VNI, 1273 SmallVector<VNInfo*, 16> &NewVNInfo, 1274 DenseMap<VNInfo*, VNInfo*> &ThisFromOther, 1275 DenseMap<VNInfo*, VNInfo*> &OtherFromThis, 1276 SmallVector<int, 16> &ThisValNoAssignments, 1277 SmallVector<int, 16> &OtherValNoAssignments) { 1278 unsigned VN = VNI->id; 1279 1280 // If the VN has already been computed, just return it. 1281 if (ThisValNoAssignments[VN] >= 0) 1282 return ThisValNoAssignments[VN]; 1283 assert(ThisValNoAssignments[VN] != -2 && "Cyclic value numbers"); 1284 1285 // If this val is not a copy from the other val, then it must be a new value 1286 // number in the destination. 1287 DenseMap<VNInfo*, VNInfo*>::iterator I = ThisFromOther.find(VNI); 1288 if (I == ThisFromOther.end()) { 1289 NewVNInfo.push_back(VNI); 1290 return ThisValNoAssignments[VN] = NewVNInfo.size()-1; 1291 } 1292 VNInfo *OtherValNo = I->second; 1293 1294 // Otherwise, this *is* a copy from the RHS. If the other side has already 1295 // been computed, return it. 1296 if (OtherValNoAssignments[OtherValNo->id] >= 0) 1297 return ThisValNoAssignments[VN] = OtherValNoAssignments[OtherValNo->id]; 1298 1299 // Mark this value number as currently being computed, then ask what the 1300 // ultimate value # of the other value is. 1301 ThisValNoAssignments[VN] = -2; 1302 unsigned UltimateVN = 1303 ComputeUltimateVN(OtherValNo, NewVNInfo, OtherFromThis, ThisFromOther, 1304 OtherValNoAssignments, ThisValNoAssignments); 1305 return ThisValNoAssignments[VN] = UltimateVN; 1306 } 1307 1308 1309 // Find out if we have something like 1310 // A = X 1311 // B = X 1312 // if so, we can pretend this is actually 1313 // A = X 1314 // B = A 1315 // which allows us to coalesce A and B. 1316 // VNI is the definition of B. LR is the life range of A that includes 1317 // the slot just before B. If we return true, we add "B = X" to DupCopies. 1318 // This implies that A dominates B. 1319 static bool RegistersDefinedFromSameValue(LiveIntervals &li, 1320 const TargetRegisterInfo &tri, 1321 CoalescerPair &CP, 1322 VNInfo *VNI, 1323 LiveRange *LR, 1324 SmallVector<MachineInstr*, 8> &DupCopies) { 1325 // FIXME: This is very conservative. For example, we don't handle 1326 // physical registers. 1327 1328 MachineInstr *MI = li.getInstructionFromIndex(VNI->def); 1329 1330 if (!MI || !MI->isFullCopy() || CP.isPartial() || CP.isPhys()) 1331 return false; 1332 1333 unsigned Dst = MI->getOperand(0).getReg(); 1334 unsigned Src = MI->getOperand(1).getReg(); 1335 1336 if (!TargetRegisterInfo::isVirtualRegister(Src) || 1337 !TargetRegisterInfo::isVirtualRegister(Dst)) 1338 return false; 1339 1340 unsigned A = CP.getDstReg(); 1341 unsigned B = CP.getSrcReg(); 1342 1343 if (B == Dst) 1344 std::swap(A, B); 1345 assert(Dst == A); 1346 1347 VNInfo *Other = LR->valno; 1348 const MachineInstr *OtherMI = li.getInstructionFromIndex(Other->def); 1349 1350 if (!OtherMI || !OtherMI->isFullCopy()) 1351 return false; 1352 1353 unsigned OtherDst = OtherMI->getOperand(0).getReg(); 1354 unsigned OtherSrc = OtherMI->getOperand(1).getReg(); 1355 1356 if (!TargetRegisterInfo::isVirtualRegister(OtherSrc) || 1357 !TargetRegisterInfo::isVirtualRegister(OtherDst)) 1358 return false; 1359 1360 assert(OtherDst == B); 1361 1362 if (Src != OtherSrc) 1363 return false; 1364 1365 // If the copies use two different value numbers of X, we cannot merge 1366 // A and B. 1367 LiveInterval &SrcInt = li.getInterval(Src); 1368 // getVNInfoBefore returns NULL for undef copies. In this case, the 1369 // optimization is still safe. 1370 if (SrcInt.getVNInfoBefore(Other->def) != SrcInt.getVNInfoBefore(VNI->def)) 1371 return false; 1372 1373 DupCopies.push_back(MI); 1374 1375 return true; 1376 } 1377 1378 /// JoinIntervals - Attempt to join these two intervals. On failure, this 1379 /// returns false. 1380 bool RegisterCoalescer::JoinIntervals(CoalescerPair &CP) { 1381 LiveInterval &RHS = LIS->getInterval(CP.getSrcReg()); 1382 DEBUG({ dbgs() << "\t\tRHS = "; RHS.print(dbgs(), TRI); dbgs() << "\n"; }); 1383 1384 // If a live interval is a physical register, check for interference with any 1385 // aliases. The interference check implemented here is a bit more conservative 1386 // than the full interfeence check below. We allow overlapping live ranges 1387 // only when one is a copy of the other. 1388 if (CP.isPhys()) { 1389 // Optimization for reserved registers like ESP. 1390 // We can only merge with a reserved physreg if RHS has a single value that 1391 // is a copy of CP.DstReg(). The live range of the reserved register will 1392 // look like a set of dead defs - we don't properly track the live range of 1393 // reserved registers. 1394 if (RegClassInfo.isReserved(CP.getDstReg())) { 1395 assert(CP.isFlipped() && RHS.containsOneValue() && 1396 "Invalid join with reserved register"); 1397 // Deny any overlapping intervals. This depends on all the reserved 1398 // register live ranges to look like dead defs. 1399 for (const uint16_t *AS = TRI->getOverlaps(CP.getDstReg()); *AS; ++AS) { 1400 if (!LIS->hasInterval(*AS)) { 1401 // Make sure at least DstReg itself exists before attempting a join. 1402 if (*AS == CP.getDstReg()) 1403 LIS->getOrCreateInterval(CP.getDstReg()); 1404 continue; 1405 } 1406 if (RHS.overlaps(LIS->getInterval(*AS))) { 1407 DEBUG(dbgs() << "\t\tInterference: " << PrintReg(*AS, TRI) << '\n'); 1408 return false; 1409 } 1410 } 1411 // Skip any value computations, we are not adding new values to the 1412 // reserved register. Also skip merging the live ranges, the reserved 1413 // register live range doesn't need to be accurate as long as all the 1414 // defs are there. 1415 return true; 1416 } 1417 1418 // Check if a register mask clobbers DstReg. 1419 BitVector UsableRegs; 1420 if (LIS->checkRegMaskInterference(RHS, UsableRegs) && 1421 !UsableRegs.test(CP.getDstReg())) { 1422 DEBUG(dbgs() << "\t\tRegister mask interference.\n"); 1423 return false; 1424 } 1425 1426 for (const uint16_t *AS = TRI->getAliasSet(CP.getDstReg()); *AS; ++AS){ 1427 if (!LIS->hasInterval(*AS)) 1428 continue; 1429 const LiveInterval &LHS = LIS->getInterval(*AS); 1430 LiveInterval::const_iterator LI = LHS.begin(); 1431 for (LiveInterval::const_iterator RI = RHS.begin(), RE = RHS.end(); 1432 RI != RE; ++RI) { 1433 LI = std::lower_bound(LI, LHS.end(), RI->start); 1434 // Does LHS have an overlapping live range starting before RI? 1435 if ((LI != LHS.begin() && LI[-1].end > RI->start) && 1436 (RI->start != RI->valno->def || 1437 !CP.isCoalescable(LIS->getInstructionFromIndex(RI->start)))) { 1438 DEBUG({ 1439 dbgs() << "\t\tInterference from alias: "; 1440 LHS.print(dbgs(), TRI); 1441 dbgs() << "\n\t\tOverlap at " << RI->start << " and no copy.\n"; 1442 }); 1443 return false; 1444 } 1445 1446 // Check that LHS ranges beginning in this range are copies. 1447 for (; LI != LHS.end() && LI->start < RI->end; ++LI) { 1448 if (LI->start != LI->valno->def || 1449 !CP.isCoalescable(LIS->getInstructionFromIndex(LI->start))) { 1450 DEBUG({ 1451 dbgs() << "\t\tInterference from alias: "; 1452 LHS.print(dbgs(), TRI); 1453 dbgs() << "\n\t\tDef at " << LI->start << " is not a copy.\n"; 1454 }); 1455 return false; 1456 } 1457 } 1458 } 1459 } 1460 } 1461 1462 // Compute the final value assignment, assuming that the live ranges can be 1463 // coalesced. 1464 SmallVector<int, 16> LHSValNoAssignments; 1465 SmallVector<int, 16> RHSValNoAssignments; 1466 DenseMap<VNInfo*, VNInfo*> LHSValsDefinedFromRHS; 1467 DenseMap<VNInfo*, VNInfo*> RHSValsDefinedFromLHS; 1468 SmallVector<VNInfo*, 16> NewVNInfo; 1469 1470 SmallVector<MachineInstr*, 8> DupCopies; 1471 1472 LiveInterval &LHS = LIS->getOrCreateInterval(CP.getDstReg()); 1473 DEBUG({ dbgs() << "\t\tLHS = "; LHS.print(dbgs(), TRI); dbgs() << "\n"; }); 1474 1475 // Loop over the value numbers of the LHS, seeing if any are defined from 1476 // the RHS. 1477 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 1478 i != e; ++i) { 1479 VNInfo *VNI = *i; 1480 if (VNI->isUnused() || VNI->isPHIDef()) 1481 continue; 1482 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 1483 assert(MI && "Missing def"); 1484 if (!MI->isCopyLike()) // Src not defined by a copy? 1485 continue; 1486 1487 // Figure out the value # from the RHS. 1488 LiveRange *lr = RHS.getLiveRangeContaining(VNI->def.getPrevSlot()); 1489 // The copy could be to an aliased physreg. 1490 if (!lr) continue; 1491 1492 // DstReg is known to be a register in the LHS interval. If the src is 1493 // from the RHS interval, we can use its value #. 1494 if (!CP.isCoalescable(MI) && 1495 !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) 1496 continue; 1497 1498 LHSValsDefinedFromRHS[VNI] = lr->valno; 1499 } 1500 1501 // Loop over the value numbers of the RHS, seeing if any are defined from 1502 // the LHS. 1503 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 1504 i != e; ++i) { 1505 VNInfo *VNI = *i; 1506 if (VNI->isUnused() || VNI->isPHIDef()) 1507 continue; 1508 MachineInstr *MI = LIS->getInstructionFromIndex(VNI->def); 1509 assert(MI && "Missing def"); 1510 if (!MI->isCopyLike()) // Src not defined by a copy? 1511 continue; 1512 1513 // Figure out the value # from the LHS. 1514 LiveRange *lr = LHS.getLiveRangeContaining(VNI->def.getPrevSlot()); 1515 // The copy could be to an aliased physreg. 1516 if (!lr) continue; 1517 1518 // DstReg is known to be a register in the RHS interval. If the src is 1519 // from the LHS interval, we can use its value #. 1520 if (!CP.isCoalescable(MI) && 1521 !RegistersDefinedFromSameValue(*LIS, *TRI, CP, VNI, lr, DupCopies)) 1522 continue; 1523 1524 RHSValsDefinedFromLHS[VNI] = lr->valno; 1525 } 1526 1527 LHSValNoAssignments.resize(LHS.getNumValNums(), -1); 1528 RHSValNoAssignments.resize(RHS.getNumValNums(), -1); 1529 NewVNInfo.reserve(LHS.getNumValNums() + RHS.getNumValNums()); 1530 1531 for (LiveInterval::vni_iterator i = LHS.vni_begin(), e = LHS.vni_end(); 1532 i != e; ++i) { 1533 VNInfo *VNI = *i; 1534 unsigned VN = VNI->id; 1535 if (LHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 1536 continue; 1537 ComputeUltimateVN(VNI, NewVNInfo, 1538 LHSValsDefinedFromRHS, RHSValsDefinedFromLHS, 1539 LHSValNoAssignments, RHSValNoAssignments); 1540 } 1541 for (LiveInterval::vni_iterator i = RHS.vni_begin(), e = RHS.vni_end(); 1542 i != e; ++i) { 1543 VNInfo *VNI = *i; 1544 unsigned VN = VNI->id; 1545 if (RHSValNoAssignments[VN] >= 0 || VNI->isUnused()) 1546 continue; 1547 // If this value number isn't a copy from the LHS, it's a new number. 1548 if (RHSValsDefinedFromLHS.find(VNI) == RHSValsDefinedFromLHS.end()) { 1549 NewVNInfo.push_back(VNI); 1550 RHSValNoAssignments[VN] = NewVNInfo.size()-1; 1551 continue; 1552 } 1553 1554 ComputeUltimateVN(VNI, NewVNInfo, 1555 RHSValsDefinedFromLHS, LHSValsDefinedFromRHS, 1556 RHSValNoAssignments, LHSValNoAssignments); 1557 } 1558 1559 // Armed with the mappings of LHS/RHS values to ultimate values, walk the 1560 // interval lists to see if these intervals are coalescable. 1561 LiveInterval::const_iterator I = LHS.begin(); 1562 LiveInterval::const_iterator IE = LHS.end(); 1563 LiveInterval::const_iterator J = RHS.begin(); 1564 LiveInterval::const_iterator JE = RHS.end(); 1565 1566 // Skip ahead until the first place of potential sharing. 1567 if (I != IE && J != JE) { 1568 if (I->start < J->start) { 1569 I = std::upper_bound(I, IE, J->start); 1570 if (I != LHS.begin()) --I; 1571 } else if (J->start < I->start) { 1572 J = std::upper_bound(J, JE, I->start); 1573 if (J != RHS.begin()) --J; 1574 } 1575 } 1576 1577 while (I != IE && J != JE) { 1578 // Determine if these two live ranges overlap. 1579 bool Overlaps; 1580 if (I->start < J->start) { 1581 Overlaps = I->end > J->start; 1582 } else { 1583 Overlaps = J->end > I->start; 1584 } 1585 1586 // If so, check value # info to determine if they are really different. 1587 if (Overlaps) { 1588 // If the live range overlap will map to the same value number in the 1589 // result liverange, we can still coalesce them. If not, we can't. 1590 if (LHSValNoAssignments[I->valno->id] != 1591 RHSValNoAssignments[J->valno->id]) 1592 return false; 1593 } 1594 1595 if (I->end < J->end) 1596 ++I; 1597 else 1598 ++J; 1599 } 1600 1601 // Update kill info. Some live ranges are extended due to copy coalescing. 1602 for (DenseMap<VNInfo*, VNInfo*>::iterator I = LHSValsDefinedFromRHS.begin(), 1603 E = LHSValsDefinedFromRHS.end(); I != E; ++I) { 1604 VNInfo *VNI = I->first; 1605 unsigned LHSValID = LHSValNoAssignments[VNI->id]; 1606 if (VNI->hasPHIKill()) 1607 NewVNInfo[LHSValID]->setHasPHIKill(true); 1608 } 1609 1610 // Update kill info. Some live ranges are extended due to copy coalescing. 1611 for (DenseMap<VNInfo*, VNInfo*>::iterator I = RHSValsDefinedFromLHS.begin(), 1612 E = RHSValsDefinedFromLHS.end(); I != E; ++I) { 1613 VNInfo *VNI = I->first; 1614 unsigned RHSValID = RHSValNoAssignments[VNI->id]; 1615 if (VNI->hasPHIKill()) 1616 NewVNInfo[RHSValID]->setHasPHIKill(true); 1617 } 1618 1619 if (LHSValNoAssignments.empty()) 1620 LHSValNoAssignments.push_back(-1); 1621 if (RHSValNoAssignments.empty()) 1622 RHSValNoAssignments.push_back(-1); 1623 1624 SmallVector<unsigned, 8> SourceRegisters; 1625 for (SmallVector<MachineInstr*, 8>::iterator I = DupCopies.begin(), 1626 E = DupCopies.end(); I != E; ++I) { 1627 MachineInstr *MI = *I; 1628 1629 // We have pretended that the assignment to B in 1630 // A = X 1631 // B = X 1632 // was actually a copy from A. Now that we decided to coalesce A and B, 1633 // transform the code into 1634 // A = X 1635 // X = X 1636 // and mark the X as coalesced to keep the illusion. 1637 unsigned Src = MI->getOperand(1).getReg(); 1638 SourceRegisters.push_back(Src); 1639 MI->getOperand(0).substVirtReg(Src, 0, *TRI); 1640 1641 markAsJoined(MI); 1642 } 1643 1644 // If B = X was the last use of X in a liverange, we have to shrink it now 1645 // that B = X is gone. 1646 for (SmallVector<unsigned, 8>::iterator I = SourceRegisters.begin(), 1647 E = SourceRegisters.end(); I != E; ++I) { 1648 LIS->shrinkToUses(&LIS->getInterval(*I)); 1649 } 1650 1651 // If we get here, we know that we can coalesce the live ranges. Ask the 1652 // intervals to coalesce themselves now. 1653 LHS.join(RHS, &LHSValNoAssignments[0], &RHSValNoAssignments[0], NewVNInfo, 1654 MRI); 1655 return true; 1656 } 1657 1658 namespace { 1659 // DepthMBBCompare - Comparison predicate that sort first based on the loop 1660 // depth of the basic block (the unsigned), and then on the MBB number. 1661 struct DepthMBBCompare { 1662 typedef std::pair<unsigned, MachineBasicBlock*> DepthMBBPair; 1663 bool operator()(const DepthMBBPair &LHS, const DepthMBBPair &RHS) const { 1664 // Deeper loops first 1665 if (LHS.first != RHS.first) 1666 return LHS.first > RHS.first; 1667 1668 // Prefer blocks that are more connected in the CFG. This takes care of 1669 // the most difficult copies first while intervals are short. 1670 unsigned cl = LHS.second->pred_size() + LHS.second->succ_size(); 1671 unsigned cr = RHS.second->pred_size() + RHS.second->succ_size(); 1672 if (cl != cr) 1673 return cl > cr; 1674 1675 // As a last resort, sort by block number. 1676 return LHS.second->getNumber() < RHS.second->getNumber(); 1677 } 1678 }; 1679 } 1680 1681 void RegisterCoalescer::CopyCoalesceInMBB(MachineBasicBlock *MBB, 1682 std::vector<MachineInstr*> &TryAgain) { 1683 DEBUG(dbgs() << MBB->getName() << ":\n"); 1684 1685 SmallVector<MachineInstr*, 8> VirtCopies; 1686 SmallVector<MachineInstr*, 8> PhysCopies; 1687 SmallVector<MachineInstr*, 8> ImpDefCopies; 1688 for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end(); 1689 MII != E;) { 1690 MachineInstr *Inst = MII++; 1691 1692 // If this isn't a copy nor a extract_subreg, we can't join intervals. 1693 unsigned SrcReg, DstReg; 1694 if (Inst->isCopy()) { 1695 DstReg = Inst->getOperand(0).getReg(); 1696 SrcReg = Inst->getOperand(1).getReg(); 1697 } else if (Inst->isSubregToReg()) { 1698 DstReg = Inst->getOperand(0).getReg(); 1699 SrcReg = Inst->getOperand(2).getReg(); 1700 } else 1701 continue; 1702 1703 bool SrcIsPhys = TargetRegisterInfo::isPhysicalRegister(SrcReg); 1704 bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg); 1705 if (LIS->hasInterval(SrcReg) && LIS->getInterval(SrcReg).empty()) 1706 ImpDefCopies.push_back(Inst); 1707 else if (SrcIsPhys || DstIsPhys) 1708 PhysCopies.push_back(Inst); 1709 else 1710 VirtCopies.push_back(Inst); 1711 } 1712 1713 // Try coalescing implicit copies and insert_subreg <undef> first, 1714 // followed by copies to / from physical registers, then finally copies 1715 // from virtual registers to virtual registers. 1716 for (unsigned i = 0, e = ImpDefCopies.size(); i != e; ++i) { 1717 MachineInstr *TheCopy = ImpDefCopies[i]; 1718 bool Again = false; 1719 if (!JoinCopy(TheCopy, Again)) 1720 if (Again) 1721 TryAgain.push_back(TheCopy); 1722 } 1723 for (unsigned i = 0, e = PhysCopies.size(); i != e; ++i) { 1724 MachineInstr *TheCopy = PhysCopies[i]; 1725 bool Again = false; 1726 if (!JoinCopy(TheCopy, Again)) 1727 if (Again) 1728 TryAgain.push_back(TheCopy); 1729 } 1730 for (unsigned i = 0, e = VirtCopies.size(); i != e; ++i) { 1731 MachineInstr *TheCopy = VirtCopies[i]; 1732 bool Again = false; 1733 if (!JoinCopy(TheCopy, Again)) 1734 if (Again) 1735 TryAgain.push_back(TheCopy); 1736 } 1737 } 1738 1739 void RegisterCoalescer::joinIntervals() { 1740 DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n"); 1741 1742 std::vector<MachineInstr*> TryAgainList; 1743 if (Loops->empty()) { 1744 // If there are no loops in the function, join intervals in function order. 1745 for (MachineFunction::iterator I = MF->begin(), E = MF->end(); 1746 I != E; ++I) 1747 CopyCoalesceInMBB(I, TryAgainList); 1748 } else { 1749 // Otherwise, join intervals in inner loops before other intervals. 1750 // Unfortunately we can't just iterate over loop hierarchy here because 1751 // there may be more MBB's than BB's. Collect MBB's for sorting. 1752 1753 // Join intervals in the function prolog first. We want to join physical 1754 // registers with virtual registers before the intervals got too long. 1755 std::vector<std::pair<unsigned, MachineBasicBlock*> > MBBs; 1756 for (MachineFunction::iterator I = MF->begin(), E = MF->end();I != E;++I){ 1757 MachineBasicBlock *MBB = I; 1758 MBBs.push_back(std::make_pair(Loops->getLoopDepth(MBB), I)); 1759 } 1760 1761 // Sort by loop depth. 1762 std::sort(MBBs.begin(), MBBs.end(), DepthMBBCompare()); 1763 1764 // Finally, join intervals in loop nest order. 1765 for (unsigned i = 0, e = MBBs.size(); i != e; ++i) 1766 CopyCoalesceInMBB(MBBs[i].second, TryAgainList); 1767 } 1768 1769 // Joining intervals can allow other intervals to be joined. Iteratively join 1770 // until we make no progress. 1771 bool ProgressMade = true; 1772 while (ProgressMade) { 1773 ProgressMade = false; 1774 1775 for (unsigned i = 0, e = TryAgainList.size(); i != e; ++i) { 1776 MachineInstr *&TheCopy = TryAgainList[i]; 1777 if (!TheCopy) 1778 continue; 1779 1780 bool Again = false; 1781 bool Success = JoinCopy(TheCopy, Again); 1782 if (Success || !Again) { 1783 TheCopy= 0; // Mark this one as done. 1784 ProgressMade = true; 1785 } 1786 } 1787 } 1788 } 1789 1790 void RegisterCoalescer::releaseMemory() { 1791 JoinedCopies.clear(); 1792 ReMatCopies.clear(); 1793 ReMatDefs.clear(); 1794 } 1795 1796 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) { 1797 MF = &fn; 1798 MRI = &fn.getRegInfo(); 1799 TM = &fn.getTarget(); 1800 TRI = TM->getRegisterInfo(); 1801 TII = TM->getInstrInfo(); 1802 LIS = &getAnalysis<LiveIntervals>(); 1803 LDV = &getAnalysis<LiveDebugVariables>(); 1804 AA = &getAnalysis<AliasAnalysis>(); 1805 Loops = &getAnalysis<MachineLoopInfo>(); 1806 1807 DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n" 1808 << "********** Function: " 1809 << ((Value*)MF->getFunction())->getName() << '\n'); 1810 1811 if (VerifyCoalescing) 1812 MF->verify(this, "Before register coalescing"); 1813 1814 RegClassInfo.runOnMachineFunction(fn); 1815 1816 // Join (coalesce) intervals if requested. 1817 if (EnableJoining) { 1818 joinIntervals(); 1819 DEBUG({ 1820 dbgs() << "********** INTERVALS POST JOINING **********\n"; 1821 for (LiveIntervals::iterator I = LIS->begin(), E = LIS->end(); 1822 I != E; ++I){ 1823 I->second->print(dbgs(), TRI); 1824 dbgs() << "\n"; 1825 } 1826 }); 1827 } 1828 1829 // Perform a final pass over the instructions and compute spill weights 1830 // and remove identity moves. 1831 SmallVector<unsigned, 4> DeadDefs, InflateRegs; 1832 for (MachineFunction::iterator mbbi = MF->begin(), mbbe = MF->end(); 1833 mbbi != mbbe; ++mbbi) { 1834 MachineBasicBlock* mbb = mbbi; 1835 for (MachineBasicBlock::iterator mii = mbb->begin(), mie = mbb->end(); 1836 mii != mie; ) { 1837 MachineInstr *MI = mii; 1838 if (JoinedCopies.count(MI)) { 1839 // Delete all coalesced copies. 1840 bool DoDelete = true; 1841 assert(MI->isCopyLike() && "Unrecognized copy instruction"); 1842 unsigned SrcReg = MI->getOperand(MI->isSubregToReg() ? 2 : 1).getReg(); 1843 unsigned DstReg = MI->getOperand(0).getReg(); 1844 1845 // Collect candidates for register class inflation. 1846 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && 1847 RegClassInfo.isProperSubClass(MRI->getRegClass(SrcReg))) 1848 InflateRegs.push_back(SrcReg); 1849 if (TargetRegisterInfo::isVirtualRegister(DstReg) && 1850 RegClassInfo.isProperSubClass(MRI->getRegClass(DstReg))) 1851 InflateRegs.push_back(DstReg); 1852 1853 if (TargetRegisterInfo::isPhysicalRegister(SrcReg) && 1854 MI->getNumOperands() > 2) 1855 // Do not delete extract_subreg, insert_subreg of physical 1856 // registers unless the definition is dead. e.g. 1857 // %DO<def> = INSERT_SUBREG %D0<undef>, %S0<kill>, 1 1858 // or else the scavenger may complain. LowerSubregs will 1859 // delete them later. 1860 DoDelete = false; 1861 1862 if (MI->allDefsAreDead()) { 1863 if (TargetRegisterInfo::isVirtualRegister(SrcReg) && 1864 LIS->hasInterval(SrcReg)) 1865 LIS->shrinkToUses(&LIS->getInterval(SrcReg)); 1866 DoDelete = true; 1867 } 1868 if (!DoDelete) { 1869 // We need the instruction to adjust liveness, so make it a KILL. 1870 if (MI->isSubregToReg()) { 1871 MI->RemoveOperand(3); 1872 MI->RemoveOperand(1); 1873 } 1874 MI->setDesc(TII->get(TargetOpcode::KILL)); 1875 mii = llvm::next(mii); 1876 } else { 1877 LIS->RemoveMachineInstrFromMaps(MI); 1878 mii = mbbi->erase(mii); 1879 ++numPeep; 1880 } 1881 continue; 1882 } 1883 1884 // Now check if this is a remat'ed def instruction which is now dead. 1885 if (ReMatDefs.count(MI)) { 1886 bool isDead = true; 1887 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1888 const MachineOperand &MO = MI->getOperand(i); 1889 if (!MO.isReg()) 1890 continue; 1891 unsigned Reg = MO.getReg(); 1892 if (!Reg) 1893 continue; 1894 DeadDefs.push_back(Reg); 1895 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1896 // Remat may also enable register class inflation. 1897 if (RegClassInfo.isProperSubClass(MRI->getRegClass(Reg))) 1898 InflateRegs.push_back(Reg); 1899 } 1900 if (MO.isDead()) 1901 continue; 1902 if (TargetRegisterInfo::isPhysicalRegister(Reg) || 1903 !MRI->use_nodbg_empty(Reg)) { 1904 isDead = false; 1905 break; 1906 } 1907 } 1908 if (isDead) { 1909 while (!DeadDefs.empty()) { 1910 unsigned DeadDef = DeadDefs.back(); 1911 DeadDefs.pop_back(); 1912 RemoveDeadDef(LIS->getInterval(DeadDef), MI); 1913 } 1914 LIS->RemoveMachineInstrFromMaps(mii); 1915 mii = mbbi->erase(mii); 1916 continue; 1917 } else 1918 DeadDefs.clear(); 1919 } 1920 1921 ++mii; 1922 1923 // Check for now unnecessary kill flags. 1924 if (LIS->isNotInMIMap(MI)) continue; 1925 SlotIndex DefIdx = LIS->getInstructionIndex(MI).getRegSlot(); 1926 for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) { 1927 MachineOperand &MO = MI->getOperand(i); 1928 if (!MO.isReg() || !MO.isKill()) continue; 1929 unsigned reg = MO.getReg(); 1930 if (!reg || !LIS->hasInterval(reg)) continue; 1931 if (!LIS->getInterval(reg).killedAt(DefIdx)) { 1932 MO.setIsKill(false); 1933 continue; 1934 } 1935 // When leaving a kill flag on a physreg, check if any subregs should 1936 // remain alive. 1937 if (!TargetRegisterInfo::isPhysicalRegister(reg)) 1938 continue; 1939 for (const uint16_t *SR = TRI->getSubRegisters(reg); 1940 unsigned S = *SR; ++SR) 1941 if (LIS->hasInterval(S) && LIS->getInterval(S).liveAt(DefIdx)) 1942 MI->addRegisterDefined(S, TRI); 1943 } 1944 } 1945 } 1946 1947 // After deleting a lot of copies, register classes may be less constrained. 1948 // Removing sub-register opreands may alow GR32_ABCD -> GR32 and DPR_VFP2 -> 1949 // DPR inflation. 1950 array_pod_sort(InflateRegs.begin(), InflateRegs.end()); 1951 InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()), 1952 InflateRegs.end()); 1953 DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size() << " regs.\n"); 1954 for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) { 1955 unsigned Reg = InflateRegs[i]; 1956 if (MRI->reg_nodbg_empty(Reg)) 1957 continue; 1958 if (MRI->recomputeRegClass(Reg, *TM)) { 1959 DEBUG(dbgs() << PrintReg(Reg) << " inflated to " 1960 << MRI->getRegClass(Reg)->getName() << '\n'); 1961 ++NumInflated; 1962 } 1963 } 1964 1965 DEBUG(dump()); 1966 DEBUG(LDV->dump()); 1967 if (VerifyCoalescing) 1968 MF->verify(this, "After register coalescing"); 1969 return true; 1970 } 1971 1972 /// print - Implement the dump method. 1973 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const { 1974 LIS->print(O, m); 1975 } 1976