1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Perform peephole optimizations on the machine code: 11 // 12 // - Optimize Extensions 13 // 14 // Optimization of sign / zero extension instructions. It may be extended to 15 // handle other instructions with similar properties. 16 // 17 // On some targets, some instructions, e.g. X86 sign / zero extension, may 18 // leave the source value in the lower part of the result. This optimization 19 // will replace some uses of the pre-extension value with uses of the 20 // sub-register of the results. 21 // 22 // - Optimize Comparisons 23 // 24 // Optimization of comparison instructions. For instance, in this code: 25 // 26 // sub r1, 1 27 // cmp r1, 0 28 // bz L1 29 // 30 // If the "sub" instruction all ready sets (or could be modified to set) the 31 // same flag that the "cmp" instruction sets and that "bz" uses, then we can 32 // eliminate the "cmp" instruction. 33 // 34 // Another instance, in this code: 35 // 36 // sub r1, r3 | sub r1, imm 37 // cmp r3, r1 or cmp r1, r3 | cmp r1, imm 38 // bge L1 39 // 40 // If the branch instruction can use flag from "sub", then we can replace 41 // "sub" with "subs" and eliminate the "cmp" instruction. 42 // 43 // - Optimize Loads: 44 // 45 // Loads that can be folded into a later instruction. A load is foldable 46 // if it loads to virtual registers and the virtual register defined has 47 // a single use. 48 // 49 // - Optimize Copies and Bitcast (more generally, target specific copies): 50 // 51 // Rewrite copies and bitcasts to avoid cross register bank copies 52 // when possible. 53 // E.g., Consider the following example, where capital and lower 54 // letters denote different register file: 55 // b = copy A <-- cross-bank copy 56 // C = copy b <-- cross-bank copy 57 // => 58 // b = copy A <-- cross-bank copy 59 // C = copy A <-- same-bank copy 60 // 61 // E.g., for bitcast: 62 // b = bitcast A <-- cross-bank copy 63 // C = bitcast b <-- cross-bank copy 64 // => 65 // b = bitcast A <-- cross-bank copy 66 // C = copy A <-- same-bank copy 67 //===----------------------------------------------------------------------===// 68 69 #include "llvm/CodeGen/Passes.h" 70 #include "llvm/ADT/DenseMap.h" 71 #include "llvm/ADT/SmallPtrSet.h" 72 #include "llvm/ADT/SmallSet.h" 73 #include "llvm/ADT/Statistic.h" 74 #include "llvm/CodeGen/MachineDominators.h" 75 #include "llvm/CodeGen/MachineInstrBuilder.h" 76 #include "llvm/CodeGen/MachineRegisterInfo.h" 77 #include "llvm/Support/CommandLine.h" 78 #include "llvm/Support/Debug.h" 79 #include "llvm/Support/raw_ostream.h" 80 #include "llvm/Target/TargetInstrInfo.h" 81 #include "llvm/Target/TargetRegisterInfo.h" 82 #include "llvm/Target/TargetSubtargetInfo.h" 83 #include <utility> 84 using namespace llvm; 85 86 #define DEBUG_TYPE "peephole-opt" 87 88 // Optimize Extensions 89 static cl::opt<bool> 90 Aggressive("aggressive-ext-opt", cl::Hidden, 91 cl::desc("Aggressive extension optimization")); 92 93 static cl::opt<bool> 94 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false), 95 cl::desc("Disable the peephole optimizer")); 96 97 static cl::opt<bool> 98 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), 99 cl::desc("Disable advanced copy optimization")); 100 101 STATISTIC(NumReuse, "Number of extension results reused"); 102 STATISTIC(NumCmps, "Number of compares eliminated"); 103 STATISTIC(NumImmFold, "Number of move immediate folded"); 104 STATISTIC(NumLoadFold, "Number of loads folded"); 105 STATISTIC(NumSelects, "Number of selects optimized"); 106 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); 107 STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); 108 109 namespace { 110 class PeepholeOptimizer : public MachineFunctionPass { 111 const TargetInstrInfo *TII; 112 const TargetRegisterInfo *TRI; 113 MachineRegisterInfo *MRI; 114 MachineDominatorTree *DT; // Machine dominator tree 115 116 public: 117 static char ID; // Pass identification 118 PeepholeOptimizer() : MachineFunctionPass(ID) { 119 initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry()); 120 } 121 122 bool runOnMachineFunction(MachineFunction &MF) override; 123 124 void getAnalysisUsage(AnalysisUsage &AU) const override { 125 AU.setPreservesCFG(); 126 MachineFunctionPass::getAnalysisUsage(AU); 127 if (Aggressive) { 128 AU.addRequired<MachineDominatorTree>(); 129 AU.addPreserved<MachineDominatorTree>(); 130 } 131 } 132 133 private: 134 bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB); 135 bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 136 SmallPtrSetImpl<MachineInstr*> &LocalMIs); 137 bool optimizeSelect(MachineInstr *MI, 138 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 139 bool optimizeCondBranch(MachineInstr *MI); 140 bool optimizeCopyOrBitcast(MachineInstr *MI); 141 bool optimizeCoalescableCopy(MachineInstr *MI); 142 bool optimizeUncoalescableCopy(MachineInstr *MI, 143 SmallPtrSetImpl<MachineInstr *> &LocalMIs); 144 bool findNextSource(unsigned &Reg, unsigned &SubReg); 145 bool isMoveImmediate(MachineInstr *MI, 146 SmallSet<unsigned, 4> &ImmDefRegs, 147 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 148 bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 149 SmallSet<unsigned, 4> &ImmDefRegs, 150 DenseMap<unsigned, MachineInstr*> &ImmDefMIs); 151 bool isLoadFoldable(MachineInstr *MI, 152 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates); 153 154 /// \brief Check whether \p MI is understood by the register coalescer 155 /// but may require some rewriting. 156 bool isCoalescableCopy(const MachineInstr &MI) { 157 // SubregToRegs are not interesting, because they are already register 158 // coalescer friendly. 159 return MI.isCopy() || (!DisableAdvCopyOpt && 160 (MI.isRegSequence() || MI.isInsertSubreg() || 161 MI.isExtractSubreg())); 162 } 163 164 /// \brief Check whether \p MI is a copy like instruction that is 165 /// not recognized by the register coalescer. 166 bool isUncoalescableCopy(const MachineInstr &MI) { 167 return MI.isBitcast() || 168 (!DisableAdvCopyOpt && 169 (MI.isRegSequenceLike() || MI.isInsertSubregLike() || 170 MI.isExtractSubregLike())); 171 } 172 }; 173 174 /// \brief Helper class to track the possible sources of a value defined by 175 /// a (chain of) copy related instructions. 176 /// Given a definition (instruction and definition index), this class 177 /// follows the use-def chain to find successive suitable sources. 178 /// The given source can be used to rewrite the definition into 179 /// def = COPY src. 180 /// 181 /// For instance, let us consider the following snippet: 182 /// v0 = 183 /// v2 = INSERT_SUBREG v1, v0, sub0 184 /// def = COPY v2.sub0 185 /// 186 /// Using a ValueTracker for def = COPY v2.sub0 will give the following 187 /// suitable sources: 188 /// v2.sub0 and v0. 189 /// Then, def can be rewritten into def = COPY v0. 190 class ValueTracker { 191 private: 192 /// The current point into the use-def chain. 193 const MachineInstr *Def; 194 /// The index of the definition in Def. 195 unsigned DefIdx; 196 /// The sub register index of the definition. 197 unsigned DefSubReg; 198 /// The register where the value can be found. 199 unsigned Reg; 200 /// Specifiy whether or not the value tracking looks through 201 /// complex instructions. When this is false, the value tracker 202 /// bails on everything that is not a copy or a bitcast. 203 /// 204 /// Note: This could have been implemented as a specialized version of 205 /// the ValueTracker class but that would have complicated the code of 206 /// the users of this class. 207 bool UseAdvancedTracking; 208 /// MachineRegisterInfo used to perform tracking. 209 const MachineRegisterInfo &MRI; 210 /// Optional TargetInstrInfo used to perform some complex 211 /// tracking. 212 const TargetInstrInfo *TII; 213 214 /// \brief Dispatcher to the right underlying implementation of 215 /// getNextSource. 216 bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg); 217 /// \brief Specialized version of getNextSource for Copy instructions. 218 bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg); 219 /// \brief Specialized version of getNextSource for Bitcast instructions. 220 bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg); 221 /// \brief Specialized version of getNextSource for RegSequence 222 /// instructions. 223 bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg); 224 /// \brief Specialized version of getNextSource for InsertSubreg 225 /// instructions. 226 bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 227 /// \brief Specialized version of getNextSource for ExtractSubreg 228 /// instructions. 229 bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg); 230 /// \brief Specialized version of getNextSource for SubregToReg 231 /// instructions. 232 bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg); 233 234 public: 235 /// \brief Create a ValueTracker instance for the value defined by \p Reg. 236 /// \p DefSubReg represents the sub register index the value tracker will 237 /// track. It does not need to match the sub register index used in the 238 /// definition of \p Reg. 239 /// \p UseAdvancedTracking specifies whether or not the value tracker looks 240 /// through complex instructions. By default (false), it handles only copy 241 /// and bitcast instructions. 242 /// If \p Reg is a physical register, a value tracker constructed with 243 /// this constructor will not find any alternative source. 244 /// Indeed, when \p Reg is a physical register that constructor does not 245 /// know which definition of \p Reg it should track. 246 /// Use the next constructor to track a physical register. 247 ValueTracker(unsigned Reg, unsigned DefSubReg, 248 const MachineRegisterInfo &MRI, 249 bool UseAdvancedTracking = false, 250 const TargetInstrInfo *TII = nullptr) 251 : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg), 252 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 253 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 254 Def = MRI.getVRegDef(Reg); 255 DefIdx = MRI.def_begin(Reg).getOperandNo(); 256 } 257 } 258 259 /// \brief Create a ValueTracker instance for the value defined by 260 /// the pair \p MI, \p DefIdx. 261 /// Unlike the other constructor, the value tracker produced by this one 262 /// may be able to find a new source when the definition is a physical 263 /// register. 264 /// This could be useful to rewrite target specific instructions into 265 /// generic copy instructions. 266 ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg, 267 const MachineRegisterInfo &MRI, 268 bool UseAdvancedTracking = false, 269 const TargetInstrInfo *TII = nullptr) 270 : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg), 271 UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) { 272 assert(DefIdx < Def->getDesc().getNumDefs() && 273 Def->getOperand(DefIdx).isReg() && "Invalid definition"); 274 Reg = Def->getOperand(DefIdx).getReg(); 275 } 276 277 /// \brief Following the use-def chain, get the next available source 278 /// for the tracked value. 279 /// When the returned value is not nullptr, \p SrcReg gives the register 280 /// that contain the tracked value. 281 /// \note The sub register index returned in \p SrcSubReg must be used 282 /// on \p SrcReg to access the actual value. 283 /// \return Unless the returned value is nullptr (i.e., no source found), 284 /// \p SrcReg gives the register of the next source used in the returned 285 /// instruction and \p SrcSubReg the sub-register index to be used on that 286 /// source to get the tracked value. When nullptr is returned, no 287 /// alternative source has been found. 288 const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg); 289 290 /// \brief Get the last register where the initial value can be found. 291 /// Initially this is the register of the definition. 292 /// Then, after each successful call to getNextSource, this is the 293 /// register of the last source. 294 unsigned getReg() const { return Reg; } 295 }; 296 } 297 298 char PeepholeOptimizer::ID = 0; 299 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID; 300 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts", 301 "Peephole Optimizations", false, false) 302 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree) 303 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts", 304 "Peephole Optimizations", false, false) 305 306 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads 307 /// a single register and writes a single register and it does not modify the 308 /// source, and if the source value is preserved as a sub-register of the 309 /// result, then replace all reachable uses of the source with the subreg of the 310 /// result. 311 /// 312 /// Do not generate an EXTRACT that is used only in a debug use, as this changes 313 /// the code. Since this code does not currently share EXTRACTs, just ignore all 314 /// debug uses. 315 bool PeepholeOptimizer:: 316 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB, 317 SmallPtrSetImpl<MachineInstr*> &LocalMIs) { 318 unsigned SrcReg, DstReg, SubIdx; 319 if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx)) 320 return false; 321 322 if (TargetRegisterInfo::isPhysicalRegister(DstReg) || 323 TargetRegisterInfo::isPhysicalRegister(SrcReg)) 324 return false; 325 326 if (MRI->hasOneNonDBGUse(SrcReg)) 327 // No other uses. 328 return false; 329 330 // Ensure DstReg can get a register class that actually supports 331 // sub-registers. Don't change the class until we commit. 332 const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg); 333 DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx); 334 if (!DstRC) 335 return false; 336 337 // The ext instr may be operating on a sub-register of SrcReg as well. 338 // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit 339 // register. 340 // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of 341 // SrcReg:SubIdx should be replaced. 342 bool UseSrcSubIdx = 343 TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr; 344 345 // The source has other uses. See if we can replace the other uses with use of 346 // the result of the extension. 347 SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs; 348 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 349 ReachedBBs.insert(UI.getParent()); 350 351 // Uses that are in the same BB of uses of the result of the instruction. 352 SmallVector<MachineOperand*, 8> Uses; 353 354 // Uses that the result of the instruction can reach. 355 SmallVector<MachineOperand*, 8> ExtendedUses; 356 357 bool ExtendLife = true; 358 for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) { 359 MachineInstr *UseMI = UseMO.getParent(); 360 if (UseMI == MI) 361 continue; 362 363 if (UseMI->isPHI()) { 364 ExtendLife = false; 365 continue; 366 } 367 368 // Only accept uses of SrcReg:SubIdx. 369 if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx) 370 continue; 371 372 // It's an error to translate this: 373 // 374 // %reg1025 = <sext> %reg1024 375 // ... 376 // %reg1026 = SUBREG_TO_REG 0, %reg1024, 4 377 // 378 // into this: 379 // 380 // %reg1025 = <sext> %reg1024 381 // ... 382 // %reg1027 = COPY %reg1025:4 383 // %reg1026 = SUBREG_TO_REG 0, %reg1027, 4 384 // 385 // The problem here is that SUBREG_TO_REG is there to assert that an 386 // implicit zext occurs. It doesn't insert a zext instruction. If we allow 387 // the COPY here, it will give us the value after the <sext>, not the 388 // original value of %reg1024 before <sext>. 389 if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG) 390 continue; 391 392 MachineBasicBlock *UseMBB = UseMI->getParent(); 393 if (UseMBB == MBB) { 394 // Local uses that come after the extension. 395 if (!LocalMIs.count(UseMI)) 396 Uses.push_back(&UseMO); 397 } else if (ReachedBBs.count(UseMBB)) { 398 // Non-local uses where the result of the extension is used. Always 399 // replace these unless it's a PHI. 400 Uses.push_back(&UseMO); 401 } else if (Aggressive && DT->dominates(MBB, UseMBB)) { 402 // We may want to extend the live range of the extension result in order 403 // to replace these uses. 404 ExtendedUses.push_back(&UseMO); 405 } else { 406 // Both will be live out of the def MBB anyway. Don't extend live range of 407 // the extension result. 408 ExtendLife = false; 409 break; 410 } 411 } 412 413 if (ExtendLife && !ExtendedUses.empty()) 414 // Extend the liveness of the extension result. 415 Uses.append(ExtendedUses.begin(), ExtendedUses.end()); 416 417 // Now replace all uses. 418 bool Changed = false; 419 if (!Uses.empty()) { 420 SmallPtrSet<MachineBasicBlock*, 4> PHIBBs; 421 422 // Look for PHI uses of the extended result, we don't want to extend the 423 // liveness of a PHI input. It breaks all kinds of assumptions down 424 // stream. A PHI use is expected to be the kill of its source values. 425 for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg)) 426 if (UI.isPHI()) 427 PHIBBs.insert(UI.getParent()); 428 429 const TargetRegisterClass *RC = MRI->getRegClass(SrcReg); 430 for (unsigned i = 0, e = Uses.size(); i != e; ++i) { 431 MachineOperand *UseMO = Uses[i]; 432 MachineInstr *UseMI = UseMO->getParent(); 433 MachineBasicBlock *UseMBB = UseMI->getParent(); 434 if (PHIBBs.count(UseMBB)) 435 continue; 436 437 // About to add uses of DstReg, clear DstReg's kill flags. 438 if (!Changed) { 439 MRI->clearKillFlags(DstReg); 440 MRI->constrainRegClass(DstReg, DstRC); 441 } 442 443 unsigned NewVR = MRI->createVirtualRegister(RC); 444 MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(), 445 TII->get(TargetOpcode::COPY), NewVR) 446 .addReg(DstReg, 0, SubIdx); 447 // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set. 448 if (UseSrcSubIdx) { 449 Copy->getOperand(0).setSubReg(SubIdx); 450 Copy->getOperand(0).setIsUndef(); 451 } 452 UseMO->setReg(NewVR); 453 ++NumReuse; 454 Changed = true; 455 } 456 } 457 458 return Changed; 459 } 460 461 /// optimizeCmpInstr - If the instruction is a compare and the previous 462 /// instruction it's comparing against all ready sets (or could be modified to 463 /// set) the same flag as the compare, then we can remove the comparison and use 464 /// the flag from the previous instruction. 465 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI, 466 MachineBasicBlock *MBB) { 467 // If this instruction is a comparison against zero and isn't comparing a 468 // physical register, we can try to optimize it. 469 unsigned SrcReg, SrcReg2; 470 int CmpMask, CmpValue; 471 if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) || 472 TargetRegisterInfo::isPhysicalRegister(SrcReg) || 473 (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2))) 474 return false; 475 476 // Attempt to optimize the comparison instruction. 477 if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) { 478 ++NumCmps; 479 return true; 480 } 481 482 return false; 483 } 484 485 /// Optimize a select instruction. 486 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI, 487 SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 488 unsigned TrueOp = 0; 489 unsigned FalseOp = 0; 490 bool Optimizable = false; 491 SmallVector<MachineOperand, 4> Cond; 492 if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable)) 493 return false; 494 if (!Optimizable) 495 return false; 496 if (!TII->optimizeSelect(MI, LocalMIs)) 497 return false; 498 MI->eraseFromParent(); 499 ++NumSelects; 500 return true; 501 } 502 503 /// \brief Check if a simpler conditional branch can be 504 // generated 505 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) { 506 return TII->optimizeCondBranch(MI); 507 } 508 509 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg) 510 /// share the same register file. 511 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI, 512 const TargetRegisterClass *DefRC, 513 unsigned DefSubReg, 514 const TargetRegisterClass *SrcRC, 515 unsigned SrcSubReg) { 516 // Same register class. 517 if (DefRC == SrcRC) 518 return true; 519 520 // Both operands are sub registers. Check if they share a register class. 521 unsigned SrcIdx, DefIdx; 522 if (SrcSubReg && DefSubReg) 523 return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg, 524 SrcIdx, DefIdx) != nullptr; 525 // At most one of the register is a sub register, make it Src to avoid 526 // duplicating the test. 527 if (!SrcSubReg) { 528 std::swap(DefSubReg, SrcSubReg); 529 std::swap(DefRC, SrcRC); 530 } 531 532 // One of the register is a sub register, check if we can get a superclass. 533 if (SrcSubReg) 534 return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr; 535 // Plain copy. 536 return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr; 537 } 538 539 /// \brief Try to find the next source that share the same register file 540 /// for the value defined by \p Reg and \p SubReg. 541 /// When true is returned, \p Reg and \p SubReg are updated with the 542 /// register number and sub-register index of the new source. 543 /// \return False if no alternative sources are available. True otherwise. 544 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) { 545 // Do not try to find a new source for a physical register. 546 // So far we do not have any motivating example for doing that. 547 // Thus, instead of maintaining untested code, we will revisit that if 548 // that changes at some point. 549 if (TargetRegisterInfo::isPhysicalRegister(Reg)) 550 return false; 551 552 const TargetRegisterClass *DefRC = MRI->getRegClass(Reg); 553 unsigned DefSubReg = SubReg; 554 555 unsigned Src; 556 unsigned SrcSubReg; 557 bool ShouldRewrite = false; 558 559 // Follow the chain of copies until we reach the top of the use-def chain 560 // or find a more suitable source. 561 ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII); 562 do { 563 unsigned CopySrcReg, CopySrcSubReg; 564 if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg)) 565 break; 566 Src = CopySrcReg; 567 SrcSubReg = CopySrcSubReg; 568 569 // Do not extend the live-ranges of physical registers as they add 570 // constraints to the register allocator. 571 // Moreover, if we want to extend the live-range of a physical register, 572 // unlike SSA virtual register, we will have to check that they are not 573 // redefine before the related use. 574 if (TargetRegisterInfo::isPhysicalRegister(Src)) 575 break; 576 577 const TargetRegisterClass *SrcRC = MRI->getRegClass(Src); 578 579 // If this source does not incur a cross register bank copy, use it. 580 ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC, 581 SrcSubReg); 582 } while (!ShouldRewrite); 583 584 // If we did not find a more suitable source, there is nothing to optimize. 585 if (!ShouldRewrite || Src == Reg) 586 return false; 587 588 Reg = Src; 589 SubReg = SrcSubReg; 590 return true; 591 } 592 593 namespace { 594 /// \brief Helper class to rewrite the arguments of a copy-like instruction. 595 class CopyRewriter { 596 protected: 597 /// The copy-like instruction. 598 MachineInstr &CopyLike; 599 /// The index of the source being rewritten. 600 unsigned CurrentSrcIdx; 601 602 public: 603 CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {} 604 605 virtual ~CopyRewriter() {} 606 607 /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and 608 /// the related value that it affects (TrackReg, TrackSubReg). 609 /// A source is considered rewritable if its register class and the 610 /// register class of the related TrackReg may not be register 611 /// coalescer friendly. In other words, given a copy-like instruction 612 /// not all the arguments may be returned at rewritable source, since 613 /// some arguments are none to be register coalescer friendly. 614 /// 615 /// Each call of this method moves the current source to the next 616 /// rewritable source. 617 /// For instance, let CopyLike be the instruction to rewrite. 618 /// CopyLike has one definition and one source: 619 /// dst.dstSubIdx = CopyLike src.srcSubIdx. 620 /// 621 /// The first call will give the first rewritable source, i.e., 622 /// the only source this instruction has: 623 /// (SrcReg, SrcSubReg) = (src, srcSubIdx). 624 /// This source defines the whole definition, i.e., 625 /// (TrackReg, TrackSubReg) = (dst, dstSubIdx). 626 /// 627 /// The second and subsequent calls will return false, has there is only one 628 /// rewritable source. 629 /// 630 /// \return True if a rewritable source has been found, false otherwise. 631 /// The output arguments are valid if and only if true is returned. 632 virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 633 unsigned &TrackReg, 634 unsigned &TrackSubReg) { 635 // If CurrentSrcIdx == 1, this means this function has already been 636 // called once. CopyLike has one defintiion and one argument, thus, 637 // there is nothing else to rewrite. 638 if (!CopyLike.isCopy() || CurrentSrcIdx == 1) 639 return false; 640 // This is the first call to getNextRewritableSource. 641 // Move the CurrentSrcIdx to remember that we made that call. 642 CurrentSrcIdx = 1; 643 // The rewritable source is the argument. 644 const MachineOperand &MOSrc = CopyLike.getOperand(1); 645 SrcReg = MOSrc.getReg(); 646 SrcSubReg = MOSrc.getSubReg(); 647 // What we track are the alternative sources of the definition. 648 const MachineOperand &MODef = CopyLike.getOperand(0); 649 TrackReg = MODef.getReg(); 650 TrackSubReg = MODef.getSubReg(); 651 return true; 652 } 653 654 /// \brief Rewrite the current source with \p NewReg and \p NewSubReg 655 /// if possible. 656 /// \return True if the rewritting was possible, false otherwise. 657 virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) { 658 if (!CopyLike.isCopy() || CurrentSrcIdx != 1) 659 return false; 660 MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx); 661 MOSrc.setReg(NewReg); 662 MOSrc.setSubReg(NewSubReg); 663 return true; 664 } 665 }; 666 667 /// \brief Specialized rewriter for INSERT_SUBREG instruction. 668 class InsertSubregRewriter : public CopyRewriter { 669 public: 670 InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) { 671 assert(MI.isInsertSubreg() && "Invalid instruction"); 672 } 673 674 /// \brief See CopyRewriter::getNextRewritableSource. 675 /// Here CopyLike has the following form: 676 /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx. 677 /// Src1 has the same register class has dst, hence, there is 678 /// nothing to rewrite. 679 /// Src2.src2SubIdx, may not be register coalescer friendly. 680 /// Therefore, the first call to this method returns: 681 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 682 /// (TrackReg, TrackSubReg) = (dst, subIdx). 683 /// 684 /// Subsequence calls will return false. 685 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 686 unsigned &TrackReg, 687 unsigned &TrackSubReg) override { 688 // If we already get the only source we can rewrite, return false. 689 if (CurrentSrcIdx == 2) 690 return false; 691 // We are looking at v2 = INSERT_SUBREG v0, v1, sub0. 692 CurrentSrcIdx = 2; 693 const MachineOperand &MOInsertedReg = CopyLike.getOperand(2); 694 SrcReg = MOInsertedReg.getReg(); 695 SrcSubReg = MOInsertedReg.getSubReg(); 696 const MachineOperand &MODef = CopyLike.getOperand(0); 697 698 // We want to track something that is compatible with the 699 // partial definition. 700 TrackReg = MODef.getReg(); 701 if (MODef.getSubReg()) 702 // Bails if we have to compose sub-register indices. 703 return false; 704 TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm(); 705 return true; 706 } 707 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 708 if (CurrentSrcIdx != 2) 709 return false; 710 // We are rewriting the inserted reg. 711 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 712 MO.setReg(NewReg); 713 MO.setSubReg(NewSubReg); 714 return true; 715 } 716 }; 717 718 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction. 719 class ExtractSubregRewriter : public CopyRewriter { 720 const TargetInstrInfo &TII; 721 722 public: 723 ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII) 724 : CopyRewriter(MI), TII(TII) { 725 assert(MI.isExtractSubreg() && "Invalid instruction"); 726 } 727 728 /// \brief See CopyRewriter::getNextRewritableSource. 729 /// Here CopyLike has the following form: 730 /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx. 731 /// There is only one rewritable source: Src.subIdx, 732 /// which defines dst.dstSubIdx. 733 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 734 unsigned &TrackReg, 735 unsigned &TrackSubReg) override { 736 // If we already get the only source we can rewrite, return false. 737 if (CurrentSrcIdx == 1) 738 return false; 739 // We are looking at v1 = EXTRACT_SUBREG v0, sub0. 740 CurrentSrcIdx = 1; 741 const MachineOperand &MOExtractedReg = CopyLike.getOperand(1); 742 SrcReg = MOExtractedReg.getReg(); 743 // If we have to compose sub-register indices, bails out. 744 if (MOExtractedReg.getSubReg()) 745 return false; 746 747 SrcSubReg = CopyLike.getOperand(2).getImm(); 748 749 // We want to track something that is compatible with the definition. 750 const MachineOperand &MODef = CopyLike.getOperand(0); 751 TrackReg = MODef.getReg(); 752 TrackSubReg = MODef.getSubReg(); 753 return true; 754 } 755 756 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 757 // The only source we can rewrite is the input register. 758 if (CurrentSrcIdx != 1) 759 return false; 760 761 CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg); 762 763 // If we find a source that does not require to extract something, 764 // rewrite the operation with a copy. 765 if (!NewSubReg) { 766 // Move the current index to an invalid position. 767 // We do not want another call to this method to be able 768 // to do any change. 769 CurrentSrcIdx = -1; 770 // Rewrite the operation as a COPY. 771 // Get rid of the sub-register index. 772 CopyLike.RemoveOperand(2); 773 // Morph the operation into a COPY. 774 CopyLike.setDesc(TII.get(TargetOpcode::COPY)); 775 return true; 776 } 777 CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg); 778 return true; 779 } 780 }; 781 782 /// \brief Specialized rewriter for REG_SEQUENCE instruction. 783 class RegSequenceRewriter : public CopyRewriter { 784 public: 785 RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) { 786 assert(MI.isRegSequence() && "Invalid instruction"); 787 } 788 789 /// \brief See CopyRewriter::getNextRewritableSource. 790 /// Here CopyLike has the following form: 791 /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2. 792 /// Each call will return a different source, walking all the available 793 /// source. 794 /// 795 /// The first call returns: 796 /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx). 797 /// (TrackReg, TrackSubReg) = (dst, subIdx1). 798 /// 799 /// The second call returns: 800 /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx). 801 /// (TrackReg, TrackSubReg) = (dst, subIdx2). 802 /// 803 /// And so on, until all the sources have been traversed, then 804 /// it returns false. 805 bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg, 806 unsigned &TrackReg, 807 unsigned &TrackSubReg) override { 808 // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc. 809 810 // If this is the first call, move to the first argument. 811 if (CurrentSrcIdx == 0) { 812 CurrentSrcIdx = 1; 813 } else { 814 // Otherwise, move to the next argument and check that it is valid. 815 CurrentSrcIdx += 2; 816 if (CurrentSrcIdx >= CopyLike.getNumOperands()) 817 return false; 818 } 819 const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx); 820 SrcReg = MOInsertedReg.getReg(); 821 // If we have to compose sub-register indices, bails out. 822 if ((SrcSubReg = MOInsertedReg.getSubReg())) 823 return false; 824 825 // We want to track something that is compatible with the related 826 // partial definition. 827 TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm(); 828 829 const MachineOperand &MODef = CopyLike.getOperand(0); 830 TrackReg = MODef.getReg(); 831 // If we have to compose sub-registers, bails. 832 return MODef.getSubReg() == 0; 833 } 834 835 bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override { 836 // We cannot rewrite out of bound operands. 837 // Moreover, rewritable sources are at odd positions. 838 if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands()) 839 return false; 840 841 MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx); 842 MO.setReg(NewReg); 843 MO.setSubReg(NewSubReg); 844 return true; 845 } 846 }; 847 } // End namespace. 848 849 /// \brief Get the appropriated CopyRewriter for \p MI. 850 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr 851 /// if no rewriter works for \p MI. 852 static CopyRewriter *getCopyRewriter(MachineInstr &MI, 853 const TargetInstrInfo &TII) { 854 switch (MI.getOpcode()) { 855 default: 856 return nullptr; 857 case TargetOpcode::COPY: 858 return new CopyRewriter(MI); 859 case TargetOpcode::INSERT_SUBREG: 860 return new InsertSubregRewriter(MI); 861 case TargetOpcode::EXTRACT_SUBREG: 862 return new ExtractSubregRewriter(MI, TII); 863 case TargetOpcode::REG_SEQUENCE: 864 return new RegSequenceRewriter(MI); 865 } 866 llvm_unreachable(nullptr); 867 } 868 869 /// \brief Optimize generic copy instructions to avoid cross 870 /// register bank copy. The optimization looks through a chain of 871 /// copies and tries to find a source that has a compatible register 872 /// class. 873 /// Two register classes are considered to be compatible if they share 874 /// the same register bank. 875 /// New copies issued by this optimization are register allocator 876 /// friendly. This optimization does not remove any copy as it may 877 /// overconstraint the register allocator, but replaces some operands 878 /// when possible. 879 /// \pre isCoalescableCopy(*MI) is true. 880 /// \return True, when \p MI has been rewritten. False otherwise. 881 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) { 882 assert(MI && isCoalescableCopy(*MI) && "Invalid argument"); 883 assert(MI->getDesc().getNumDefs() == 1 && 884 "Coalescer can understand multiple defs?!"); 885 const MachineOperand &MODef = MI->getOperand(0); 886 // Do not rewrite physical definitions. 887 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 888 return false; 889 890 bool Changed = false; 891 // Get the right rewriter for the current copy. 892 std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII)); 893 // If none exists, bails out. 894 if (!CpyRewriter) 895 return false; 896 // Rewrite each rewritable source. 897 unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg; 898 while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg, 899 TrackSubReg)) { 900 unsigned NewSrc = TrackReg; 901 unsigned NewSubReg = TrackSubReg; 902 // Try to find a more suitable source. 903 // If we failed to do so, or get the actual source, 904 // move to the next source. 905 if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc) 906 continue; 907 // Rewrite source. 908 if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) { 909 // We may have extended the live-range of NewSrc, account for that. 910 MRI->clearKillFlags(NewSrc); 911 Changed = true; 912 } 913 } 914 // TODO: We could have a clean-up method to tidy the instruction. 915 // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0 916 // => v0 = COPY v1 917 // Currently we haven't seen motivating example for that and we 918 // want to avoid untested code. 919 NumRewrittenCopies += Changed; 920 return Changed; 921 } 922 923 /// \brief Optimize copy-like instructions to create 924 /// register coalescer friendly instruction. 925 /// The optimization tries to kill-off the \p MI by looking 926 /// through a chain of copies to find a source that has a compatible 927 /// register class. 928 /// If such a source is found, it replace \p MI by a generic COPY 929 /// operation. 930 /// \pre isUncoalescableCopy(*MI) is true. 931 /// \return True, when \p MI has been optimized. In that case, \p MI has 932 /// been removed from its parent. 933 /// All COPY instructions created, are inserted in \p LocalMIs. 934 bool PeepholeOptimizer::optimizeUncoalescableCopy( 935 MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) { 936 assert(MI && isUncoalescableCopy(*MI) && "Invalid argument"); 937 938 // Check if we can rewrite all the values defined by this instruction. 939 SmallVector< 940 std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>, 941 4> RewritePairs; 942 for (const MachineOperand &MODef : MI->defs()) { 943 if (MODef.isDead()) 944 // We can ignore those. 945 continue; 946 947 // If a physical register is here, this is probably for a good reason. 948 // Do not rewrite that. 949 if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg())) 950 return false; 951 952 // If we do not know how to rewrite this definition, there is no point 953 // in trying to kill this instruction. 954 TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg()); 955 TargetInstrInfo::RegSubRegPair Src = Def; 956 if (!findNextSource(Src.Reg, Src.SubReg)) 957 return false; 958 RewritePairs.push_back(std::make_pair(Def, Src)); 959 } 960 // The change is possible for all defs, do it. 961 for (const auto &PairDefSrc : RewritePairs) { 962 const auto &Def = PairDefSrc.first; 963 const auto &Src = PairDefSrc.second; 964 // Rewrite the "copy" in a way the register coalescer understands. 965 assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) && 966 "We do not rewrite physical registers"); 967 const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg); 968 unsigned NewVR = MRI->createVirtualRegister(DefRC); 969 MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(), 970 TII->get(TargetOpcode::COPY), 971 NewVR).addReg(Src.Reg, 0, Src.SubReg); 972 NewCopy->getOperand(0).setSubReg(Def.SubReg); 973 if (Def.SubReg) 974 NewCopy->getOperand(0).setIsUndef(); 975 LocalMIs.insert(NewCopy); 976 MRI->replaceRegWith(Def.Reg, NewVR); 977 MRI->clearKillFlags(NewVR); 978 // We extended the lifetime of Src. 979 // Clear the kill flags to account for that. 980 MRI->clearKillFlags(Src.Reg); 981 } 982 // MI is now dead. 983 MI->eraseFromParent(); 984 ++NumUncoalescableCopies; 985 return true; 986 } 987 988 /// isLoadFoldable - Check whether MI is a candidate for folding into a later 989 /// instruction. We only fold loads to virtual registers and the virtual 990 /// register defined has a single use. 991 bool PeepholeOptimizer::isLoadFoldable( 992 MachineInstr *MI, 993 SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) { 994 if (!MI->canFoldAsLoad() || !MI->mayLoad()) 995 return false; 996 const MCInstrDesc &MCID = MI->getDesc(); 997 if (MCID.getNumDefs() != 1) 998 return false; 999 1000 unsigned Reg = MI->getOperand(0).getReg(); 1001 // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting 1002 // loads. It should be checked when processing uses of the load, since 1003 // uses can be removed during peephole. 1004 if (!MI->getOperand(0).getSubReg() && 1005 TargetRegisterInfo::isVirtualRegister(Reg) && 1006 MRI->hasOneNonDBGUse(Reg)) { 1007 FoldAsLoadDefCandidates.insert(Reg); 1008 return true; 1009 } 1010 return false; 1011 } 1012 1013 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI, 1014 SmallSet<unsigned, 4> &ImmDefRegs, 1015 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1016 const MCInstrDesc &MCID = MI->getDesc(); 1017 if (!MI->isMoveImmediate()) 1018 return false; 1019 if (MCID.getNumDefs() != 1) 1020 return false; 1021 unsigned Reg = MI->getOperand(0).getReg(); 1022 if (TargetRegisterInfo::isVirtualRegister(Reg)) { 1023 ImmDefMIs.insert(std::make_pair(Reg, MI)); 1024 ImmDefRegs.insert(Reg); 1025 return true; 1026 } 1027 1028 return false; 1029 } 1030 1031 /// foldImmediate - Try folding register operands that are defined by move 1032 /// immediate instructions, i.e. a trivial constant folding optimization, if 1033 /// and only if the def and use are in the same BB. 1034 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, 1035 SmallSet<unsigned, 4> &ImmDefRegs, 1036 DenseMap<unsigned, MachineInstr*> &ImmDefMIs) { 1037 for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) { 1038 MachineOperand &MO = MI->getOperand(i); 1039 if (!MO.isReg() || MO.isDef()) 1040 continue; 1041 unsigned Reg = MO.getReg(); 1042 if (!TargetRegisterInfo::isVirtualRegister(Reg)) 1043 continue; 1044 if (ImmDefRegs.count(Reg) == 0) 1045 continue; 1046 DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg); 1047 assert(II != ImmDefMIs.end()); 1048 if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { 1049 ++NumImmFold; 1050 return true; 1051 } 1052 } 1053 return false; 1054 } 1055 1056 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { 1057 if (skipOptnoneFunction(*MF.getFunction())) 1058 return false; 1059 1060 DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n"); 1061 DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n'); 1062 1063 if (DisablePeephole) 1064 return false; 1065 1066 TII = MF.getSubtarget().getInstrInfo(); 1067 TRI = MF.getSubtarget().getRegisterInfo(); 1068 MRI = &MF.getRegInfo(); 1069 DT = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr; 1070 1071 bool Changed = false; 1072 1073 for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) { 1074 MachineBasicBlock *MBB = &*I; 1075 1076 bool SeenMoveImm = false; 1077 1078 // During this forward scan, at some point it needs to answer the question 1079 // "given a pointer to an MI in the current BB, is it located before or 1080 // after the current instruction". 1081 // To perform this, the following set keeps track of the MIs already seen 1082 // during the scan, if a MI is not in the set, it is assumed to be located 1083 // after. Newly created MIs have to be inserted in the set as well. 1084 SmallPtrSet<MachineInstr*, 16> LocalMIs; 1085 SmallSet<unsigned, 4> ImmDefRegs; 1086 DenseMap<unsigned, MachineInstr*> ImmDefMIs; 1087 SmallSet<unsigned, 16> FoldAsLoadDefCandidates; 1088 1089 for (MachineBasicBlock::iterator 1090 MII = I->begin(), MIE = I->end(); MII != MIE; ) { 1091 MachineInstr *MI = &*MII; 1092 // We may be erasing MI below, increment MII now. 1093 ++MII; 1094 LocalMIs.insert(MI); 1095 1096 // Skip debug values. They should not affect this peephole optimization. 1097 if (MI->isDebugValue()) 1098 continue; 1099 1100 // If there exists an instruction which belongs to the following 1101 // categories, we will discard the load candidates. 1102 if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || 1103 MI->isKill() || MI->isInlineAsm() || 1104 MI->hasUnmodeledSideEffects()) { 1105 FoldAsLoadDefCandidates.clear(); 1106 continue; 1107 } 1108 if (MI->mayStore() || MI->isCall()) 1109 FoldAsLoadDefCandidates.clear(); 1110 1111 if ((isUncoalescableCopy(*MI) && 1112 optimizeUncoalescableCopy(MI, LocalMIs)) || 1113 (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || 1114 (MI->isSelect() && optimizeSelect(MI, LocalMIs))) { 1115 // MI is deleted. 1116 LocalMIs.erase(MI); 1117 Changed = true; 1118 continue; 1119 } 1120 1121 if (MI->isConditionalBranch() && optimizeCondBranch(MI)) { 1122 Changed = true; 1123 continue; 1124 } 1125 1126 if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) { 1127 // MI is just rewritten. 1128 Changed = true; 1129 continue; 1130 } 1131 1132 if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) { 1133 SeenMoveImm = true; 1134 } else { 1135 Changed |= optimizeExtInstr(MI, MBB, LocalMIs); 1136 // optimizeExtInstr might have created new instructions after MI 1137 // and before the already incremented MII. Adjust MII so that the 1138 // next iteration sees the new instructions. 1139 MII = MI; 1140 ++MII; 1141 if (SeenMoveImm) 1142 Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs); 1143 } 1144 1145 // Check whether MI is a load candidate for folding into a later 1146 // instruction. If MI is not a candidate, check whether we can fold an 1147 // earlier load into MI. 1148 if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) && 1149 !FoldAsLoadDefCandidates.empty()) { 1150 const MCInstrDesc &MIDesc = MI->getDesc(); 1151 for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands(); 1152 ++i) { 1153 const MachineOperand &MOp = MI->getOperand(i); 1154 if (!MOp.isReg()) 1155 continue; 1156 unsigned FoldAsLoadDefReg = MOp.getReg(); 1157 if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) { 1158 // We need to fold load after optimizeCmpInstr, since 1159 // optimizeCmpInstr can enable folding by converting SUB to CMP. 1160 // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and 1161 // we need it for markUsesInDebugValueAsUndef(). 1162 unsigned FoldedReg = FoldAsLoadDefReg; 1163 MachineInstr *DefMI = nullptr; 1164 MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI, 1165 FoldAsLoadDefReg, 1166 DefMI); 1167 if (FoldMI) { 1168 // Update LocalMIs since we replaced MI with FoldMI and deleted 1169 // DefMI. 1170 DEBUG(dbgs() << "Replacing: " << *MI); 1171 DEBUG(dbgs() << " With: " << *FoldMI); 1172 LocalMIs.erase(MI); 1173 LocalMIs.erase(DefMI); 1174 LocalMIs.insert(FoldMI); 1175 MI->eraseFromParent(); 1176 DefMI->eraseFromParent(); 1177 MRI->markUsesInDebugValueAsUndef(FoldedReg); 1178 FoldAsLoadDefCandidates.erase(FoldedReg); 1179 ++NumLoadFold; 1180 // MI is replaced with FoldMI. 1181 Changed = true; 1182 break; 1183 } 1184 } 1185 } 1186 } 1187 } 1188 } 1189 1190 return Changed; 1191 } 1192 1193 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg, 1194 unsigned &SrcSubReg) { 1195 assert(Def->isCopy() && "Invalid definition"); 1196 // Copy instruction are supposed to be: Def = Src. 1197 // If someone breaks this assumption, bad things will happen everywhere. 1198 assert(Def->getNumOperands() == 2 && "Invalid number of operands"); 1199 1200 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1201 // If we look for a different subreg, it means we want a subreg of src. 1202 // Bails as we do not support composing subreg yet. 1203 return false; 1204 // Otherwise, we want the whole source. 1205 const MachineOperand &Src = Def->getOperand(1); 1206 SrcReg = Src.getReg(); 1207 SrcSubReg = Src.getSubReg(); 1208 return true; 1209 } 1210 1211 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg, 1212 unsigned &SrcSubReg) { 1213 assert(Def->isBitcast() && "Invalid definition"); 1214 1215 // Bail if there are effects that a plain copy will not expose. 1216 if (Def->hasUnmodeledSideEffects()) 1217 return false; 1218 1219 // Bitcasts with more than one def are not supported. 1220 if (Def->getDesc().getNumDefs() != 1) 1221 return false; 1222 if (Def->getOperand(DefIdx).getSubReg() != DefSubReg) 1223 // If we look for a different subreg, it means we want a subreg of the src. 1224 // Bails as we do not support composing subreg yet. 1225 return false; 1226 1227 unsigned SrcIdx = Def->getNumOperands(); 1228 for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx; 1229 ++OpIdx) { 1230 const MachineOperand &MO = Def->getOperand(OpIdx); 1231 if (!MO.isReg() || !MO.getReg()) 1232 continue; 1233 assert(!MO.isDef() && "We should have skipped all the definitions by now"); 1234 if (SrcIdx != EndOpIdx) 1235 // Multiple sources? 1236 return false; 1237 SrcIdx = OpIdx; 1238 } 1239 const MachineOperand &Src = Def->getOperand(SrcIdx); 1240 SrcReg = Src.getReg(); 1241 SrcSubReg = Src.getSubReg(); 1242 return true; 1243 } 1244 1245 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg, 1246 unsigned &SrcSubReg) { 1247 assert((Def->isRegSequence() || Def->isRegSequenceLike()) && 1248 "Invalid definition"); 1249 1250 if (Def->getOperand(DefIdx).getSubReg()) 1251 // If we are composing subreg, bails out. 1252 // The case we are checking is Def.<subreg> = REG_SEQUENCE. 1253 // This should almost never happen as the SSA property is tracked at 1254 // the register level (as opposed to the subreg level). 1255 // I.e., 1256 // Def.sub0 = 1257 // Def.sub1 = 1258 // is a valid SSA representation for Def.sub0 and Def.sub1, but not for 1259 // Def. Thus, it must not be generated. 1260 // However, some code could theoretically generates a single 1261 // Def.sub0 (i.e, not defining the other subregs) and we would 1262 // have this case. 1263 // If we can ascertain (or force) that this never happens, we could 1264 // turn that into an assertion. 1265 return false; 1266 1267 if (!TII) 1268 // We could handle the REG_SEQUENCE here, but we do not want to 1269 // duplicate the code from the generic TII. 1270 return false; 1271 1272 SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs; 1273 if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs)) 1274 return false; 1275 1276 // We are looking at: 1277 // Def = REG_SEQUENCE v0, sub0, v1, sub1, ... 1278 // Check if one of the operand defines the subreg we are interested in. 1279 for (auto &RegSeqInput : RegSeqInputRegs) { 1280 if (RegSeqInput.SubIdx == DefSubReg) { 1281 if (RegSeqInput.SubReg) 1282 // Bails if we have to compose sub registers. 1283 return false; 1284 1285 SrcReg = RegSeqInput.Reg; 1286 SrcSubReg = RegSeqInput.SubReg; 1287 return true; 1288 } 1289 } 1290 1291 // If the subreg we are tracking is super-defined by another subreg, 1292 // we could follow this value. However, this would require to compose 1293 // the subreg and we do not do that for now. 1294 return false; 1295 } 1296 1297 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg, 1298 unsigned &SrcSubReg) { 1299 assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) && 1300 "Invalid definition"); 1301 1302 if (Def->getOperand(DefIdx).getSubReg()) 1303 // If we are composing subreg, bails out. 1304 // Same remark as getNextSourceFromRegSequence. 1305 // I.e., this may be turned into an assert. 1306 return false; 1307 1308 if (!TII) 1309 // We could handle the REG_SEQUENCE here, but we do not want to 1310 // duplicate the code from the generic TII. 1311 return false; 1312 1313 TargetInstrInfo::RegSubRegPair BaseReg; 1314 TargetInstrInfo::RegSubRegPairAndIdx InsertedReg; 1315 if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg)) 1316 return false; 1317 1318 // We are looking at: 1319 // Def = INSERT_SUBREG v0, v1, sub1 1320 // There are two cases: 1321 // 1. DefSubReg == sub1, get v1. 1322 // 2. DefSubReg != sub1, the value may be available through v0. 1323 1324 // #1 Check if the inserted register matches the required sub index. 1325 if (InsertedReg.SubIdx == DefSubReg) { 1326 SrcReg = InsertedReg.Reg; 1327 SrcSubReg = InsertedReg.SubReg; 1328 return true; 1329 } 1330 // #2 Otherwise, if the sub register we are looking for is not partial 1331 // defined by the inserted element, we can look through the main 1332 // register (v0). 1333 const MachineOperand &MODef = Def->getOperand(DefIdx); 1334 // If the result register (Def) and the base register (v0) do not 1335 // have the same register class or if we have to compose 1336 // subregisters, bails out. 1337 if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) || 1338 BaseReg.SubReg) 1339 return false; 1340 1341 // Get the TRI and check if the inserted sub-register overlaps with the 1342 // sub-register we are tracking. 1343 const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); 1344 if (!TRI || 1345 (TRI->getSubRegIndexLaneMask(DefSubReg) & 1346 TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0) 1347 return false; 1348 // At this point, the value is available in v0 via the same subreg 1349 // we used for Def. 1350 SrcReg = BaseReg.Reg; 1351 SrcSubReg = DefSubReg; 1352 return true; 1353 } 1354 1355 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg, 1356 unsigned &SrcSubReg) { 1357 assert((Def->isExtractSubreg() || 1358 Def->isExtractSubregLike()) && "Invalid definition"); 1359 // We are looking at: 1360 // Def = EXTRACT_SUBREG v0, sub0 1361 1362 // Bails if we have to compose sub registers. 1363 // Indeed, if DefSubReg != 0, we would have to compose it with sub0. 1364 if (DefSubReg) 1365 return false; 1366 1367 if (!TII) 1368 // We could handle the EXTRACT_SUBREG here, but we do not want to 1369 // duplicate the code from the generic TII. 1370 return false; 1371 1372 TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg; 1373 if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg)) 1374 return false; 1375 1376 // Bails if we have to compose sub registers. 1377 // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0. 1378 if (ExtractSubregInputReg.SubReg) 1379 return false; 1380 // Otherwise, the value is available in the v0.sub0. 1381 SrcReg = ExtractSubregInputReg.Reg; 1382 SrcSubReg = ExtractSubregInputReg.SubIdx; 1383 return true; 1384 } 1385 1386 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg, 1387 unsigned &SrcSubReg) { 1388 assert(Def->isSubregToReg() && "Invalid definition"); 1389 // We are looking at: 1390 // Def = SUBREG_TO_REG Imm, v0, sub0 1391 1392 // Bails if we have to compose sub registers. 1393 // If DefSubReg != sub0, we would have to check that all the bits 1394 // we track are included in sub0 and if yes, we would have to 1395 // determine the right subreg in v0. 1396 if (DefSubReg != Def->getOperand(3).getImm()) 1397 return false; 1398 // Bails if we have to compose sub registers. 1399 // Likewise, if v0.subreg != 0, we would have to compose it with sub0. 1400 if (Def->getOperand(2).getSubReg()) 1401 return false; 1402 1403 SrcReg = Def->getOperand(2).getReg(); 1404 SrcSubReg = Def->getOperand(3).getImm(); 1405 return true; 1406 } 1407 1408 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) { 1409 assert(Def && "This method needs a valid definition"); 1410 1411 assert( 1412 (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) && 1413 Def->getOperand(DefIdx).isDef() && "Invalid DefIdx"); 1414 if (Def->isCopy()) 1415 return getNextSourceFromCopy(SrcReg, SrcSubReg); 1416 if (Def->isBitcast()) 1417 return getNextSourceFromBitcast(SrcReg, SrcSubReg); 1418 // All the remaining cases involve "complex" instructions. 1419 // Bails if we did not ask for the advanced tracking. 1420 if (!UseAdvancedTracking) 1421 return false; 1422 if (Def->isRegSequence() || Def->isRegSequenceLike()) 1423 return getNextSourceFromRegSequence(SrcReg, SrcSubReg); 1424 if (Def->isInsertSubreg() || Def->isInsertSubregLike()) 1425 return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg); 1426 if (Def->isExtractSubreg() || Def->isExtractSubregLike()) 1427 return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg); 1428 if (Def->isSubregToReg()) 1429 return getNextSourceFromSubregToReg(SrcReg, SrcSubReg); 1430 return false; 1431 } 1432 1433 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg, 1434 unsigned &SrcSubReg) { 1435 // If we reach a point where we cannot move up in the use-def chain, 1436 // there is nothing we can get. 1437 if (!Def) 1438 return nullptr; 1439 1440 const MachineInstr *PrevDef = nullptr; 1441 // Try to find the next source. 1442 if (getNextSourceImpl(SrcReg, SrcSubReg)) { 1443 // Update definition, definition index, and subregister for the 1444 // next call of getNextSource. 1445 // Update the current register. 1446 Reg = SrcReg; 1447 // Update the return value before moving up in the use-def chain. 1448 PrevDef = Def; 1449 // If we can still move up in the use-def chain, move to the next 1450 // defintion. 1451 if (!TargetRegisterInfo::isPhysicalRegister(Reg)) { 1452 Def = MRI.getVRegDef(Reg); 1453 DefIdx = MRI.def_begin(Reg).getOperandNo(); 1454 DefSubReg = SrcSubReg; 1455 return PrevDef; 1456 } 1457 } 1458 // If we end up here, this means we will not be able to find another source 1459 // for the next iteration. 1460 // Make sure any new call to getNextSource bails out early by cutting the 1461 // use-def chain. 1462 Def = nullptr; 1463 return PrevDef; 1464 } 1465