Home | History | Annotate | Download | only in CodeGen
      1 //===- PeepholeOptimizer.cpp - Peephole Optimizations ---------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Perform peephole optimizations on the machine code:
     11 //
     12 // - Optimize Extensions
     13 //
     14 //     Optimization of sign / zero extension instructions. It may be extended to
     15 //     handle other instructions with similar properties.
     16 //
     17 //     On some targets, some instructions, e.g. X86 sign / zero extension, may
     18 //     leave the source value in the lower part of the result. This optimization
     19 //     will replace some uses of the pre-extension value with uses of the
     20 //     sub-register of the results.
     21 //
     22 // - Optimize Comparisons
     23 //
     24 //     Optimization of comparison instructions. For instance, in this code:
     25 //
     26 //       sub r1, 1
     27 //       cmp r1, 0
     28 //       bz  L1
     29 //
     30 //     If the "sub" instruction all ready sets (or could be modified to set) the
     31 //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
     32 //     eliminate the "cmp" instruction.
     33 //
     34 //     Another instance, in this code:
     35 //
     36 //       sub r1, r3 | sub r1, imm
     37 //       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
     38 //       bge L1
     39 //
     40 //     If the branch instruction can use flag from "sub", then we can replace
     41 //     "sub" with "subs" and eliminate the "cmp" instruction.
     42 //
     43 // - Optimize Loads:
     44 //
     45 //     Loads that can be folded into a later instruction. A load is foldable
     46 //     if it loads to virtual registers and the virtual register defined has
     47 //     a single use.
     48 //
     49 // - Optimize Copies and Bitcast (more generally, target specific copies):
     50 //
     51 //     Rewrite copies and bitcasts to avoid cross register bank copies
     52 //     when possible.
     53 //     E.g., Consider the following example, where capital and lower
     54 //     letters denote different register file:
     55 //     b = copy A <-- cross-bank copy
     56 //     C = copy b <-- cross-bank copy
     57 //   =>
     58 //     b = copy A <-- cross-bank copy
     59 //     C = copy A <-- same-bank copy
     60 //
     61 //     E.g., for bitcast:
     62 //     b = bitcast A <-- cross-bank copy
     63 //     C = bitcast b <-- cross-bank copy
     64 //   =>
     65 //     b = bitcast A <-- cross-bank copy
     66 //     C = copy A    <-- same-bank copy
     67 //===----------------------------------------------------------------------===//
     68 
     69 #include "llvm/ADT/DenseMap.h"
     70 #include "llvm/ADT/Optional.h"
     71 #include "llvm/ADT/SmallPtrSet.h"
     72 #include "llvm/ADT/SmallSet.h"
     73 #include "llvm/ADT/SmallVector.h"
     74 #include "llvm/ADT/Statistic.h"
     75 #include "llvm/CodeGen/MachineBasicBlock.h"
     76 #include "llvm/CodeGen/MachineDominators.h"
     77 #include "llvm/CodeGen/MachineFunction.h"
     78 #include "llvm/CodeGen/MachineFunctionPass.h"
     79 #include "llvm/CodeGen/MachineInstr.h"
     80 #include "llvm/CodeGen/MachineInstrBuilder.h"
     81 #include "llvm/CodeGen/MachineLoopInfo.h"
     82 #include "llvm/CodeGen/MachineOperand.h"
     83 #include "llvm/CodeGen/MachineRegisterInfo.h"
     84 #include "llvm/CodeGen/TargetInstrInfo.h"
     85 #include "llvm/CodeGen/TargetOpcodes.h"
     86 #include "llvm/CodeGen/TargetRegisterInfo.h"
     87 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     88 #include "llvm/MC/LaneBitmask.h"
     89 #include "llvm/MC/MCInstrDesc.h"
     90 #include "llvm/Pass.h"
     91 #include "llvm/Support/CommandLine.h"
     92 #include "llvm/Support/Debug.h"
     93 #include "llvm/Support/ErrorHandling.h"
     94 #include "llvm/Support/raw_ostream.h"
     95 #include <cassert>
     96 #include <cstdint>
     97 #include <memory>
     98 #include <utility>
     99 
    100 using namespace llvm;
    101 using RegSubRegPair = TargetInstrInfo::RegSubRegPair;
    102 using RegSubRegPairAndIdx = TargetInstrInfo::RegSubRegPairAndIdx;
    103 
    104 #define DEBUG_TYPE "peephole-opt"
    105 
    106 // Optimize Extensions
    107 static cl::opt<bool>
    108 Aggressive("aggressive-ext-opt", cl::Hidden,
    109            cl::desc("Aggressive extension optimization"));
    110 
    111 static cl::opt<bool>
    112 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
    113                 cl::desc("Disable the peephole optimizer"));
    114 
    115 /// Specifiy whether or not the value tracking looks through
    116 /// complex instructions. When this is true, the value tracker
    117 /// bails on everything that is not a copy or a bitcast.
    118 static cl::opt<bool>
    119 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
    120                   cl::desc("Disable advanced copy optimization"));
    121 
    122 static cl::opt<bool> DisableNAPhysCopyOpt(
    123     "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false),
    124     cl::desc("Disable non-allocatable physical register copy optimization"));
    125 
    126 // Limit the number of PHI instructions to process
    127 // in PeepholeOptimizer::getNextSource.
    128 static cl::opt<unsigned> RewritePHILimit(
    129     "rewrite-phi-limit", cl::Hidden, cl::init(10),
    130     cl::desc("Limit the length of PHI chains to lookup"));
    131 
    132 // Limit the length of recurrence chain when evaluating the benefit of
    133 // commuting operands.
    134 static cl::opt<unsigned> MaxRecurrenceChain(
    135     "recurrence-chain-limit", cl::Hidden, cl::init(3),
    136     cl::desc("Maximum length of recurrence chain when evaluating the benefit "
    137              "of commuting operands"));
    138 
    139 
    140 STATISTIC(NumReuse, "Number of extension results reused");
    141 STATISTIC(NumCmps, "Number of compares eliminated");
    142 STATISTIC(NumImmFold, "Number of move immediate folded");
    143 STATISTIC(NumLoadFold, "Number of loads folded");
    144 STATISTIC(NumSelects, "Number of selects optimized");
    145 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
    146 STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
    147 STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed");
    148 
    149 namespace {
    150 
    151   class ValueTrackerResult;
    152   class RecurrenceInstr;
    153 
    154   class PeepholeOptimizer : public MachineFunctionPass {
    155     const TargetInstrInfo *TII;
    156     const TargetRegisterInfo *TRI;
    157     MachineRegisterInfo *MRI;
    158     MachineDominatorTree *DT;  // Machine dominator tree
    159     MachineLoopInfo *MLI;
    160 
    161   public:
    162     static char ID; // Pass identification
    163 
    164     PeepholeOptimizer() : MachineFunctionPass(ID) {
    165       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
    166     }
    167 
    168     bool runOnMachineFunction(MachineFunction &MF) override;
    169 
    170     void getAnalysisUsage(AnalysisUsage &AU) const override {
    171       AU.setPreservesCFG();
    172       MachineFunctionPass::getAnalysisUsage(AU);
    173       AU.addRequired<MachineLoopInfo>();
    174       AU.addPreserved<MachineLoopInfo>();
    175       if (Aggressive) {
    176         AU.addRequired<MachineDominatorTree>();
    177         AU.addPreserved<MachineDominatorTree>();
    178       }
    179     }
    180 
    181     /// Track Def -> Use info used for rewriting copies.
    182     using RewriteMapTy = SmallDenseMap<RegSubRegPair, ValueTrackerResult>;
    183 
    184     /// Sequence of instructions that formulate recurrence cycle.
    185     using RecurrenceCycle = SmallVector<RecurrenceInstr, 4>;
    186 
    187   private:
    188     bool optimizeCmpInstr(MachineInstr &MI);
    189     bool optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
    190                           SmallPtrSetImpl<MachineInstr*> &LocalMIs);
    191     bool optimizeSelect(MachineInstr &MI,
    192                         SmallPtrSetImpl<MachineInstr *> &LocalMIs);
    193     bool optimizeCondBranch(MachineInstr &MI);
    194     bool optimizeCoalescableCopy(MachineInstr &MI);
    195     bool optimizeUncoalescableCopy(MachineInstr &MI,
    196                                    SmallPtrSetImpl<MachineInstr *> &LocalMIs);
    197     bool optimizeRecurrence(MachineInstr &PHI);
    198     bool findNextSource(RegSubRegPair RegSubReg, RewriteMapTy &RewriteMap);
    199     bool isMoveImmediate(MachineInstr &MI,
    200                          SmallSet<unsigned, 4> &ImmDefRegs,
    201                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    202     bool foldImmediate(MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
    203                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    204 
    205     /// Finds recurrence cycles, but only ones that formulated around
    206     /// a def operand and a use operand that are tied. If there is a use
    207     /// operand commutable with the tied use operand, find recurrence cycle
    208     /// along that operand as well.
    209     bool findTargetRecurrence(unsigned Reg,
    210                               const SmallSet<unsigned, 2> &TargetReg,
    211                               RecurrenceCycle &RC);
    212 
    213     /// If copy instruction \p MI is a virtual register copy, track it in
    214     /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was
    215     /// previously seen as a copy, replace the uses of this copy with the
    216     /// previously seen copy's destination register.
    217     bool foldRedundantCopy(MachineInstr &MI,
    218                            SmallSet<unsigned, 4> &CopySrcRegs,
    219                            DenseMap<unsigned, MachineInstr *> &CopyMIs);
    220 
    221     /// Is the register \p Reg a non-allocatable physical register?
    222     bool isNAPhysCopy(unsigned Reg);
    223 
    224     /// If copy instruction \p MI is a non-allocatable virtual<->physical
    225     /// register copy, track it in the \p NAPhysToVirtMIs map. If this
    226     /// non-allocatable physical register was previously copied to a virtual
    227     /// registered and hasn't been clobbered, the virt->phys copy can be
    228     /// deleted.
    229     bool foldRedundantNAPhysCopy(MachineInstr &MI,
    230         DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs);
    231 
    232     bool isLoadFoldable(MachineInstr &MI,
    233                         SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
    234 
    235     /// Check whether \p MI is understood by the register coalescer
    236     /// but may require some rewriting.
    237     bool isCoalescableCopy(const MachineInstr &MI) {
    238       // SubregToRegs are not interesting, because they are already register
    239       // coalescer friendly.
    240       return MI.isCopy() || (!DisableAdvCopyOpt &&
    241                              (MI.isRegSequence() || MI.isInsertSubreg() ||
    242                               MI.isExtractSubreg()));
    243     }
    244 
    245     /// Check whether \p MI is a copy like instruction that is
    246     /// not recognized by the register coalescer.
    247     bool isUncoalescableCopy(const MachineInstr &MI) {
    248       return MI.isBitcast() ||
    249              (!DisableAdvCopyOpt &&
    250               (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
    251                MI.isExtractSubregLike()));
    252     }
    253 
    254     MachineInstr &rewriteSource(MachineInstr &CopyLike,
    255                                 RegSubRegPair Def, RewriteMapTy &RewriteMap);
    256   };
    257 
    258   /// Helper class to hold instructions that are inside recurrence cycles.
    259   /// The recurrence cycle is formulated around 1) a def operand and its
    260   /// tied use operand, or 2) a def operand and a use operand that is commutable
    261   /// with another use operand which is tied to the def operand. In the latter
    262   /// case, index of the tied use operand and the commutable use operand are
    263   /// maintained with CommutePair.
    264   class RecurrenceInstr {
    265   public:
    266     using IndexPair = std::pair<unsigned, unsigned>;
    267 
    268     RecurrenceInstr(MachineInstr *MI) : MI(MI) {}
    269     RecurrenceInstr(MachineInstr *MI, unsigned Idx1, unsigned Idx2)
    270       : MI(MI), CommutePair(std::make_pair(Idx1, Idx2)) {}
    271 
    272     MachineInstr *getMI() const { return MI; }
    273     Optional<IndexPair> getCommutePair() const { return CommutePair; }
    274 
    275   private:
    276     MachineInstr *MI;
    277     Optional<IndexPair> CommutePair;
    278   };
    279 
    280   /// Helper class to hold a reply for ValueTracker queries.
    281   /// Contains the returned sources for a given search and the instructions
    282   /// where the sources were tracked from.
    283   class ValueTrackerResult {
    284   private:
    285     /// Track all sources found by one ValueTracker query.
    286     SmallVector<RegSubRegPair, 2> RegSrcs;
    287 
    288     /// Instruction using the sources in 'RegSrcs'.
    289     const MachineInstr *Inst = nullptr;
    290 
    291   public:
    292     ValueTrackerResult() = default;
    293 
    294     ValueTrackerResult(unsigned Reg, unsigned SubReg) {
    295       addSource(Reg, SubReg);
    296     }
    297 
    298     bool isValid() const { return getNumSources() > 0; }
    299 
    300     void setInst(const MachineInstr *I) { Inst = I; }
    301     const MachineInstr *getInst() const { return Inst; }
    302 
    303     void clear() {
    304       RegSrcs.clear();
    305       Inst = nullptr;
    306     }
    307 
    308     void addSource(unsigned SrcReg, unsigned SrcSubReg) {
    309       RegSrcs.push_back(RegSubRegPair(SrcReg, SrcSubReg));
    310     }
    311 
    312     void setSource(int Idx, unsigned SrcReg, unsigned SrcSubReg) {
    313       assert(Idx < getNumSources() && "Reg pair source out of index");
    314       RegSrcs[Idx] = RegSubRegPair(SrcReg, SrcSubReg);
    315     }
    316 
    317     int getNumSources() const { return RegSrcs.size(); }
    318 
    319     RegSubRegPair getSrc(int Idx) const {
    320       return RegSrcs[Idx];
    321     }
    322 
    323     unsigned getSrcReg(int Idx) const {
    324       assert(Idx < getNumSources() && "Reg source out of index");
    325       return RegSrcs[Idx].Reg;
    326     }
    327 
    328     unsigned getSrcSubReg(int Idx) const {
    329       assert(Idx < getNumSources() && "SubReg source out of index");
    330       return RegSrcs[Idx].SubReg;
    331     }
    332 
    333     bool operator==(const ValueTrackerResult &Other) {
    334       if (Other.getInst() != getInst())
    335         return false;
    336 
    337       if (Other.getNumSources() != getNumSources())
    338         return false;
    339 
    340       for (int i = 0, e = Other.getNumSources(); i != e; ++i)
    341         if (Other.getSrcReg(i) != getSrcReg(i) ||
    342             Other.getSrcSubReg(i) != getSrcSubReg(i))
    343           return false;
    344       return true;
    345     }
    346   };
    347 
    348   /// Helper class to track the possible sources of a value defined by
    349   /// a (chain of) copy related instructions.
    350   /// Given a definition (instruction and definition index), this class
    351   /// follows the use-def chain to find successive suitable sources.
    352   /// The given source can be used to rewrite the definition into
    353   /// def = COPY src.
    354   ///
    355   /// For instance, let us consider the following snippet:
    356   /// v0 =
    357   /// v2 = INSERT_SUBREG v1, v0, sub0
    358   /// def = COPY v2.sub0
    359   ///
    360   /// Using a ValueTracker for def = COPY v2.sub0 will give the following
    361   /// suitable sources:
    362   /// v2.sub0 and v0.
    363   /// Then, def can be rewritten into def = COPY v0.
    364   class ValueTracker {
    365   private:
    366     /// The current point into the use-def chain.
    367     const MachineInstr *Def = nullptr;
    368 
    369     /// The index of the definition in Def.
    370     unsigned DefIdx = 0;
    371 
    372     /// The sub register index of the definition.
    373     unsigned DefSubReg;
    374 
    375     /// The register where the value can be found.
    376     unsigned Reg;
    377 
    378     /// MachineRegisterInfo used to perform tracking.
    379     const MachineRegisterInfo &MRI;
    380 
    381     /// Optional TargetInstrInfo used to perform some complex tracking.
    382     const TargetInstrInfo *TII;
    383 
    384     /// Dispatcher to the right underlying implementation of getNextSource.
    385     ValueTrackerResult getNextSourceImpl();
    386 
    387     /// Specialized version of getNextSource for Copy instructions.
    388     ValueTrackerResult getNextSourceFromCopy();
    389 
    390     /// Specialized version of getNextSource for Bitcast instructions.
    391     ValueTrackerResult getNextSourceFromBitcast();
    392 
    393     /// Specialized version of getNextSource for RegSequence instructions.
    394     ValueTrackerResult getNextSourceFromRegSequence();
    395 
    396     /// Specialized version of getNextSource for InsertSubreg instructions.
    397     ValueTrackerResult getNextSourceFromInsertSubreg();
    398 
    399     /// Specialized version of getNextSource for ExtractSubreg instructions.
    400     ValueTrackerResult getNextSourceFromExtractSubreg();
    401 
    402     /// Specialized version of getNextSource for SubregToReg instructions.
    403     ValueTrackerResult getNextSourceFromSubregToReg();
    404 
    405     /// Specialized version of getNextSource for PHI instructions.
    406     ValueTrackerResult getNextSourceFromPHI();
    407 
    408   public:
    409     /// Create a ValueTracker instance for the value defined by \p Reg.
    410     /// \p DefSubReg represents the sub register index the value tracker will
    411     /// track. It does not need to match the sub register index used in the
    412     /// definition of \p Reg.
    413     /// If \p Reg is a physical register, a value tracker constructed with
    414     /// this constructor will not find any alternative source.
    415     /// Indeed, when \p Reg is a physical register that constructor does not
    416     /// know which definition of \p Reg it should track.
    417     /// Use the next constructor to track a physical register.
    418     ValueTracker(unsigned Reg, unsigned DefSubReg,
    419                  const MachineRegisterInfo &MRI,
    420                  const TargetInstrInfo *TII = nullptr)
    421         : DefSubReg(DefSubReg), Reg(Reg), MRI(MRI), TII(TII) {
    422       if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
    423         Def = MRI.getVRegDef(Reg);
    424         DefIdx = MRI.def_begin(Reg).getOperandNo();
    425       }
    426     }
    427 
    428     /// Following the use-def chain, get the next available source
    429     /// for the tracked value.
    430     /// \return A ValueTrackerResult containing a set of registers
    431     /// and sub registers with tracked values. A ValueTrackerResult with
    432     /// an empty set of registers means no source was found.
    433     ValueTrackerResult getNextSource();
    434   };
    435 
    436 } // end anonymous namespace
    437 
    438 char PeepholeOptimizer::ID = 0;
    439 
    440 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
    441 
    442 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, DEBUG_TYPE,
    443                       "Peephole Optimizations", false, false)
    444 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    445 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    446 INITIALIZE_PASS_END(PeepholeOptimizer, DEBUG_TYPE,
    447                     "Peephole Optimizations", false, false)
    448 
    449 /// If instruction is a copy-like instruction, i.e. it reads a single register
    450 /// and writes a single register and it does not modify the source, and if the
    451 /// source value is preserved as a sub-register of the result, then replace all
    452 /// reachable uses of the source with the subreg of the result.
    453 ///
    454 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
    455 /// the code. Since this code does not currently share EXTRACTs, just ignore all
    456 /// debug uses.
    457 bool PeepholeOptimizer::
    458 optimizeExtInstr(MachineInstr &MI, MachineBasicBlock &MBB,
    459                  SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
    460   unsigned SrcReg, DstReg, SubIdx;
    461   if (!TII->isCoalescableExtInstr(MI, SrcReg, DstReg, SubIdx))
    462     return false;
    463 
    464   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
    465       TargetRegisterInfo::isPhysicalRegister(SrcReg))
    466     return false;
    467 
    468   if (MRI->hasOneNonDBGUse(SrcReg))
    469     // No other uses.
    470     return false;
    471 
    472   // Ensure DstReg can get a register class that actually supports
    473   // sub-registers. Don't change the class until we commit.
    474   const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
    475   DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
    476   if (!DstRC)
    477     return false;
    478 
    479   // The ext instr may be operating on a sub-register of SrcReg as well.
    480   // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
    481   // register.
    482   // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
    483   // SrcReg:SubIdx should be replaced.
    484   bool UseSrcSubIdx =
    485       TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
    486 
    487   // The source has other uses. See if we can replace the other uses with use of
    488   // the result of the extension.
    489   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
    490   for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
    491     ReachedBBs.insert(UI.getParent());
    492 
    493   // Uses that are in the same BB of uses of the result of the instruction.
    494   SmallVector<MachineOperand*, 8> Uses;
    495 
    496   // Uses that the result of the instruction can reach.
    497   SmallVector<MachineOperand*, 8> ExtendedUses;
    498 
    499   bool ExtendLife = true;
    500   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
    501     MachineInstr *UseMI = UseMO.getParent();
    502     if (UseMI == &MI)
    503       continue;
    504 
    505     if (UseMI->isPHI()) {
    506       ExtendLife = false;
    507       continue;
    508     }
    509 
    510     // Only accept uses of SrcReg:SubIdx.
    511     if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
    512       continue;
    513 
    514     // It's an error to translate this:
    515     //
    516     //    %reg1025 = <sext> %reg1024
    517     //     ...
    518     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    519     //
    520     // into this:
    521     //
    522     //    %reg1025 = <sext> %reg1024
    523     //     ...
    524     //    %reg1027 = COPY %reg1025:4
    525     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    526     //
    527     // The problem here is that SUBREG_TO_REG is there to assert that an
    528     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    529     // the COPY here, it will give us the value after the <sext>, not the
    530     // original value of %reg1024 before <sext>.
    531     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
    532       continue;
    533 
    534     MachineBasicBlock *UseMBB = UseMI->getParent();
    535     if (UseMBB == &MBB) {
    536       // Local uses that come after the extension.
    537       if (!LocalMIs.count(UseMI))
    538         Uses.push_back(&UseMO);
    539     } else if (ReachedBBs.count(UseMBB)) {
    540       // Non-local uses where the result of the extension is used. Always
    541       // replace these unless it's a PHI.
    542       Uses.push_back(&UseMO);
    543     } else if (Aggressive && DT->dominates(&MBB, UseMBB)) {
    544       // We may want to extend the live range of the extension result in order
    545       // to replace these uses.
    546       ExtendedUses.push_back(&UseMO);
    547     } else {
    548       // Both will be live out of the def MBB anyway. Don't extend live range of
    549       // the extension result.
    550       ExtendLife = false;
    551       break;
    552     }
    553   }
    554 
    555   if (ExtendLife && !ExtendedUses.empty())
    556     // Extend the liveness of the extension result.
    557     Uses.append(ExtendedUses.begin(), ExtendedUses.end());
    558 
    559   // Now replace all uses.
    560   bool Changed = false;
    561   if (!Uses.empty()) {
    562     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
    563 
    564     // Look for PHI uses of the extended result, we don't want to extend the
    565     // liveness of a PHI input. It breaks all kinds of assumptions down
    566     // stream. A PHI use is expected to be the kill of its source values.
    567     for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
    568       if (UI.isPHI())
    569         PHIBBs.insert(UI.getParent());
    570 
    571     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    572     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
    573       MachineOperand *UseMO = Uses[i];
    574       MachineInstr *UseMI = UseMO->getParent();
    575       MachineBasicBlock *UseMBB = UseMI->getParent();
    576       if (PHIBBs.count(UseMBB))
    577         continue;
    578 
    579       // About to add uses of DstReg, clear DstReg's kill flags.
    580       if (!Changed) {
    581         MRI->clearKillFlags(DstReg);
    582         MRI->constrainRegClass(DstReg, DstRC);
    583       }
    584 
    585       unsigned NewVR = MRI->createVirtualRegister(RC);
    586       MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
    587                                    TII->get(TargetOpcode::COPY), NewVR)
    588         .addReg(DstReg, 0, SubIdx);
    589       // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
    590       if (UseSrcSubIdx) {
    591         Copy->getOperand(0).setSubReg(SubIdx);
    592         Copy->getOperand(0).setIsUndef();
    593       }
    594       UseMO->setReg(NewVR);
    595       ++NumReuse;
    596       Changed = true;
    597     }
    598   }
    599 
    600   return Changed;
    601 }
    602 
    603 /// If the instruction is a compare and the previous instruction it's comparing
    604 /// against already sets (or could be modified to set) the same flag as the
    605 /// compare, then we can remove the comparison and use the flag from the
    606 /// previous instruction.
    607 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr &MI) {
    608   // If this instruction is a comparison against zero and isn't comparing a
    609   // physical register, we can try to optimize it.
    610   unsigned SrcReg, SrcReg2;
    611   int CmpMask, CmpValue;
    612   if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
    613       TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
    614       (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
    615     return false;
    616 
    617   // Attempt to optimize the comparison instruction.
    618   if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
    619     ++NumCmps;
    620     return true;
    621   }
    622 
    623   return false;
    624 }
    625 
    626 /// Optimize a select instruction.
    627 bool PeepholeOptimizer::optimizeSelect(MachineInstr &MI,
    628                             SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
    629   unsigned TrueOp = 0;
    630   unsigned FalseOp = 0;
    631   bool Optimizable = false;
    632   SmallVector<MachineOperand, 4> Cond;
    633   if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
    634     return false;
    635   if (!Optimizable)
    636     return false;
    637   if (!TII->optimizeSelect(MI, LocalMIs))
    638     return false;
    639   MI.eraseFromParent();
    640   ++NumSelects;
    641   return true;
    642 }
    643 
    644 /// Check if a simpler conditional branch can be generated.
    645 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr &MI) {
    646   return TII->optimizeCondBranch(MI);
    647 }
    648 
    649 /// Try to find the next source that share the same register file
    650 /// for the value defined by \p Reg and \p SubReg.
    651 /// When true is returned, the \p RewriteMap can be used by the client to
    652 /// retrieve all Def -> Use along the way up to the next source. Any found
    653 /// Use that is not itself a key for another entry, is the next source to
    654 /// use. During the search for the next source, multiple sources can be found
    655 /// given multiple incoming sources of a PHI instruction. In this case, we
    656 /// look in each PHI source for the next source; all found next sources must
    657 /// share the same register file as \p Reg and \p SubReg. The client should
    658 /// then be capable to rewrite all intermediate PHIs to get the next source.
    659 /// \return False if no alternative sources are available. True otherwise.
    660 bool PeepholeOptimizer::findNextSource(RegSubRegPair RegSubReg,
    661                                        RewriteMapTy &RewriteMap) {
    662   // Do not try to find a new source for a physical register.
    663   // So far we do not have any motivating example for doing that.
    664   // Thus, instead of maintaining untested code, we will revisit that if
    665   // that changes at some point.
    666   unsigned Reg = RegSubReg.Reg;
    667   if (TargetRegisterInfo::isPhysicalRegister(Reg))
    668     return false;
    669   const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
    670 
    671   SmallVector<RegSubRegPair, 4> SrcToLook;
    672   RegSubRegPair CurSrcPair = RegSubReg;
    673   SrcToLook.push_back(CurSrcPair);
    674 
    675   unsigned PHICount = 0;
    676   do {
    677     CurSrcPair = SrcToLook.pop_back_val();
    678     // As explained above, do not handle physical registers
    679     if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
    680       return false;
    681 
    682     ValueTracker ValTracker(CurSrcPair.Reg, CurSrcPair.SubReg, *MRI, TII);
    683 
    684     // Follow the chain of copies until we find a more suitable source, a phi
    685     // or have to abort.
    686     while (true) {
    687       ValueTrackerResult Res = ValTracker.getNextSource();
    688       // Abort at the end of a chain (without finding a suitable source).
    689       if (!Res.isValid())
    690         return false;
    691 
    692       // Insert the Def -> Use entry for the recently found source.
    693       ValueTrackerResult CurSrcRes = RewriteMap.lookup(CurSrcPair);
    694       if (CurSrcRes.isValid()) {
    695         assert(CurSrcRes == Res && "ValueTrackerResult found must match");
    696         // An existent entry with multiple sources is a PHI cycle we must avoid.
    697         // Otherwise it's an entry with a valid next source we already found.
    698         if (CurSrcRes.getNumSources() > 1) {
    699           LLVM_DEBUG(dbgs()
    700                      << "findNextSource: found PHI cycle, aborting...\n");
    701           return false;
    702         }
    703         break;
    704       }
    705       RewriteMap.insert(std::make_pair(CurSrcPair, Res));
    706 
    707       // ValueTrackerResult usually have one source unless it's the result from
    708       // a PHI instruction. Add the found PHI edges to be looked up further.
    709       unsigned NumSrcs = Res.getNumSources();
    710       if (NumSrcs > 1) {
    711         PHICount++;
    712         if (PHICount >= RewritePHILimit) {
    713           LLVM_DEBUG(dbgs() << "findNextSource: PHI limit reached\n");
    714           return false;
    715         }
    716 
    717         for (unsigned i = 0; i < NumSrcs; ++i)
    718           SrcToLook.push_back(Res.getSrc(i));
    719         break;
    720       }
    721 
    722       CurSrcPair = Res.getSrc(0);
    723       // Do not extend the live-ranges of physical registers as they add
    724       // constraints to the register allocator. Moreover, if we want to extend
    725       // the live-range of a physical register, unlike SSA virtual register,
    726       // we will have to check that they aren't redefine before the related use.
    727       if (TargetRegisterInfo::isPhysicalRegister(CurSrcPair.Reg))
    728         return false;
    729 
    730       // Keep following the chain if the value isn't any better yet.
    731       const TargetRegisterClass *SrcRC = MRI->getRegClass(CurSrcPair.Reg);
    732       if (!TRI->shouldRewriteCopySrc(DefRC, RegSubReg.SubReg, SrcRC,
    733                                      CurSrcPair.SubReg))
    734         continue;
    735 
    736       // We currently cannot deal with subreg operands on PHI instructions
    737       // (see insertPHI()).
    738       if (PHICount > 0 && CurSrcPair.SubReg != 0)
    739         continue;
    740 
    741       // We found a suitable source, and are done with this chain.
    742       break;
    743     }
    744   } while (!SrcToLook.empty());
    745 
    746   // If we did not find a more suitable source, there is nothing to optimize.
    747   return CurSrcPair.Reg != Reg;
    748 }
    749 
    750 /// Insert a PHI instruction with incoming edges \p SrcRegs that are
    751 /// guaranteed to have the same register class. This is necessary whenever we
    752 /// successfully traverse a PHI instruction and find suitable sources coming
    753 /// from its edges. By inserting a new PHI, we provide a rewritten PHI def
    754 /// suitable to be used in a new COPY instruction.
    755 static MachineInstr &
    756 insertPHI(MachineRegisterInfo &MRI, const TargetInstrInfo &TII,
    757           const SmallVectorImpl<RegSubRegPair> &SrcRegs,
    758           MachineInstr &OrigPHI) {
    759   assert(!SrcRegs.empty() && "No sources to create a PHI instruction?");
    760 
    761   const TargetRegisterClass *NewRC = MRI.getRegClass(SrcRegs[0].Reg);
    762   // NewRC is only correct if no subregisters are involved. findNextSource()
    763   // should have rejected those cases already.
    764   assert(SrcRegs[0].SubReg == 0 && "should not have subreg operand");
    765   unsigned NewVR = MRI.createVirtualRegister(NewRC);
    766   MachineBasicBlock *MBB = OrigPHI.getParent();
    767   MachineInstrBuilder MIB = BuildMI(*MBB, &OrigPHI, OrigPHI.getDebugLoc(),
    768                                     TII.get(TargetOpcode::PHI), NewVR);
    769 
    770   unsigned MBBOpIdx = 2;
    771   for (const RegSubRegPair &RegPair : SrcRegs) {
    772     MIB.addReg(RegPair.Reg, 0, RegPair.SubReg);
    773     MIB.addMBB(OrigPHI.getOperand(MBBOpIdx).getMBB());
    774     // Since we're extended the lifetime of RegPair.Reg, clear the
    775     // kill flags to account for that and make RegPair.Reg reaches
    776     // the new PHI.
    777     MRI.clearKillFlags(RegPair.Reg);
    778     MBBOpIdx += 2;
    779   }
    780 
    781   return *MIB;
    782 }
    783 
    784 namespace {
    785 
    786 /// Interface to query instructions amenable to copy rewriting.
    787 class Rewriter {
    788 protected:
    789   MachineInstr &CopyLike;
    790   unsigned CurrentSrcIdx = 0;   ///< The index of the source being rewritten.
    791 public:
    792   Rewriter(MachineInstr &CopyLike) : CopyLike(CopyLike) {}
    793   virtual ~Rewriter() {}
    794 
    795   /// Get the next rewritable source (SrcReg, SrcSubReg) and
    796   /// the related value that it affects (DstReg, DstSubReg).
    797   /// A source is considered rewritable if its register class and the
    798   /// register class of the related DstReg may not be register
    799   /// coalescer friendly. In other words, given a copy-like instruction
    800   /// not all the arguments may be returned at rewritable source, since
    801   /// some arguments are none to be register coalescer friendly.
    802   ///
    803   /// Each call of this method moves the current source to the next
    804   /// rewritable source.
    805   /// For instance, let CopyLike be the instruction to rewrite.
    806   /// CopyLike has one definition and one source:
    807   /// dst.dstSubIdx = CopyLike src.srcSubIdx.
    808   ///
    809   /// The first call will give the first rewritable source, i.e.,
    810   /// the only source this instruction has:
    811   /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
    812   /// This source defines the whole definition, i.e.,
    813   /// (DstReg, DstSubReg) = (dst, dstSubIdx).
    814   ///
    815   /// The second and subsequent calls will return false, as there is only one
    816   /// rewritable source.
    817   ///
    818   /// \return True if a rewritable source has been found, false otherwise.
    819   /// The output arguments are valid if and only if true is returned.
    820   virtual bool getNextRewritableSource(RegSubRegPair &Src,
    821                                        RegSubRegPair &Dst) = 0;
    822 
    823   /// Rewrite the current source with \p NewReg and \p NewSubReg if possible.
    824   /// \return True if the rewriting was possible, false otherwise.
    825   virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) = 0;
    826 };
    827 
    828 /// Rewriter for COPY instructions.
    829 class CopyRewriter : public Rewriter {
    830 public:
    831   CopyRewriter(MachineInstr &MI) : Rewriter(MI) {
    832     assert(MI.isCopy() && "Expected copy instruction");
    833   }
    834   virtual ~CopyRewriter() = default;
    835 
    836   bool getNextRewritableSource(RegSubRegPair &Src,
    837                                RegSubRegPair &Dst) override {
    838     // CurrentSrcIdx > 0 means this function has already been called.
    839     if (CurrentSrcIdx > 0)
    840       return false;
    841     // This is the first call to getNextRewritableSource.
    842     // Move the CurrentSrcIdx to remember that we made that call.
    843     CurrentSrcIdx = 1;
    844     // The rewritable source is the argument.
    845     const MachineOperand &MOSrc = CopyLike.getOperand(1);
    846     Src = RegSubRegPair(MOSrc.getReg(), MOSrc.getSubReg());
    847     // What we track are the alternative sources of the definition.
    848     const MachineOperand &MODef = CopyLike.getOperand(0);
    849     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
    850     return true;
    851   }
    852 
    853   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    854     if (CurrentSrcIdx != 1)
    855       return false;
    856     MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
    857     MOSrc.setReg(NewReg);
    858     MOSrc.setSubReg(NewSubReg);
    859     return true;
    860   }
    861 };
    862 
    863 /// Helper class to rewrite uncoalescable copy like instructions
    864 /// into new COPY (coalescable friendly) instructions.
    865 class UncoalescableRewriter : public Rewriter {
    866   unsigned NumDefs;  ///< Number of defs in the bitcast.
    867 
    868 public:
    869   UncoalescableRewriter(MachineInstr &MI) : Rewriter(MI) {
    870     NumDefs = MI.getDesc().getNumDefs();
    871   }
    872 
    873   /// \see See Rewriter::getNextRewritableSource()
    874   /// All such sources need to be considered rewritable in order to
    875   /// rewrite a uncoalescable copy-like instruction. This method return
    876   /// each definition that must be checked if rewritable.
    877   bool getNextRewritableSource(RegSubRegPair &Src,
    878                                RegSubRegPair &Dst) override {
    879     // Find the next non-dead definition and continue from there.
    880     if (CurrentSrcIdx == NumDefs)
    881       return false;
    882 
    883     while (CopyLike.getOperand(CurrentSrcIdx).isDead()) {
    884       ++CurrentSrcIdx;
    885       if (CurrentSrcIdx == NumDefs)
    886         return false;
    887     }
    888 
    889     // What we track are the alternative sources of the definition.
    890     Src = RegSubRegPair(0, 0);
    891     const MachineOperand &MODef = CopyLike.getOperand(CurrentSrcIdx);
    892     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
    893 
    894     CurrentSrcIdx++;
    895     return true;
    896   }
    897 
    898   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    899     return false;
    900   }
    901 };
    902 
    903 /// Specialized rewriter for INSERT_SUBREG instruction.
    904 class InsertSubregRewriter : public Rewriter {
    905 public:
    906   InsertSubregRewriter(MachineInstr &MI) : Rewriter(MI) {
    907     assert(MI.isInsertSubreg() && "Invalid instruction");
    908   }
    909 
    910   /// \see See Rewriter::getNextRewritableSource()
    911   /// Here CopyLike has the following form:
    912   /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
    913   /// Src1 has the same register class has dst, hence, there is
    914   /// nothing to rewrite.
    915   /// Src2.src2SubIdx, may not be register coalescer friendly.
    916   /// Therefore, the first call to this method returns:
    917   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
    918   /// (DstReg, DstSubReg) = (dst, subIdx).
    919   ///
    920   /// Subsequence calls will return false.
    921   bool getNextRewritableSource(RegSubRegPair &Src,
    922                                RegSubRegPair &Dst) override {
    923     // If we already get the only source we can rewrite, return false.
    924     if (CurrentSrcIdx == 2)
    925       return false;
    926     // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
    927     CurrentSrcIdx = 2;
    928     const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
    929     Src = RegSubRegPair(MOInsertedReg.getReg(), MOInsertedReg.getSubReg());
    930     const MachineOperand &MODef = CopyLike.getOperand(0);
    931 
    932     // We want to track something that is compatible with the
    933     // partial definition.
    934     if (MODef.getSubReg())
    935       // Bail if we have to compose sub-register indices.
    936       return false;
    937     Dst = RegSubRegPair(MODef.getReg(),
    938                         (unsigned)CopyLike.getOperand(3).getImm());
    939     return true;
    940   }
    941 
    942   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    943     if (CurrentSrcIdx != 2)
    944       return false;
    945     // We are rewriting the inserted reg.
    946     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
    947     MO.setReg(NewReg);
    948     MO.setSubReg(NewSubReg);
    949     return true;
    950   }
    951 };
    952 
    953 /// Specialized rewriter for EXTRACT_SUBREG instruction.
    954 class ExtractSubregRewriter : public Rewriter {
    955   const TargetInstrInfo &TII;
    956 
    957 public:
    958   ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
    959       : Rewriter(MI), TII(TII) {
    960     assert(MI.isExtractSubreg() && "Invalid instruction");
    961   }
    962 
    963   /// \see Rewriter::getNextRewritableSource()
    964   /// Here CopyLike has the following form:
    965   /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
    966   /// There is only one rewritable source: Src.subIdx,
    967   /// which defines dst.dstSubIdx.
    968   bool getNextRewritableSource(RegSubRegPair &Src,
    969                                RegSubRegPair &Dst) override {
    970     // If we already get the only source we can rewrite, return false.
    971     if (CurrentSrcIdx == 1)
    972       return false;
    973     // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
    974     CurrentSrcIdx = 1;
    975     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
    976     // If we have to compose sub-register indices, bail out.
    977     if (MOExtractedReg.getSubReg())
    978       return false;
    979 
    980     Src = RegSubRegPair(MOExtractedReg.getReg(),
    981                         CopyLike.getOperand(2).getImm());
    982 
    983     // We want to track something that is compatible with the definition.
    984     const MachineOperand &MODef = CopyLike.getOperand(0);
    985     Dst = RegSubRegPair(MODef.getReg(), MODef.getSubReg());
    986     return true;
    987   }
    988 
    989   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    990     // The only source we can rewrite is the input register.
    991     if (CurrentSrcIdx != 1)
    992       return false;
    993 
    994     CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
    995 
    996     // If we find a source that does not require to extract something,
    997     // rewrite the operation with a copy.
    998     if (!NewSubReg) {
    999       // Move the current index to an invalid position.
   1000       // We do not want another call to this method to be able
   1001       // to do any change.
   1002       CurrentSrcIdx = -1;
   1003       // Rewrite the operation as a COPY.
   1004       // Get rid of the sub-register index.
   1005       CopyLike.RemoveOperand(2);
   1006       // Morph the operation into a COPY.
   1007       CopyLike.setDesc(TII.get(TargetOpcode::COPY));
   1008       return true;
   1009     }
   1010     CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
   1011     return true;
   1012   }
   1013 };
   1014 
   1015 /// Specialized rewriter for REG_SEQUENCE instruction.
   1016 class RegSequenceRewriter : public Rewriter {
   1017 public:
   1018   RegSequenceRewriter(MachineInstr &MI) : Rewriter(MI) {
   1019     assert(MI.isRegSequence() && "Invalid instruction");
   1020   }
   1021 
   1022   /// \see Rewriter::getNextRewritableSource()
   1023   /// Here CopyLike has the following form:
   1024   /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
   1025   /// Each call will return a different source, walking all the available
   1026   /// source.
   1027   ///
   1028   /// The first call returns:
   1029   /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
   1030   /// (DstReg, DstSubReg) = (dst, subIdx1).
   1031   ///
   1032   /// The second call returns:
   1033   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
   1034   /// (DstReg, DstSubReg) = (dst, subIdx2).
   1035   ///
   1036   /// And so on, until all the sources have been traversed, then
   1037   /// it returns false.
   1038   bool getNextRewritableSource(RegSubRegPair &Src,
   1039                                RegSubRegPair &Dst) override {
   1040     // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
   1041 
   1042     // If this is the first call, move to the first argument.
   1043     if (CurrentSrcIdx == 0) {
   1044       CurrentSrcIdx = 1;
   1045     } else {
   1046       // Otherwise, move to the next argument and check that it is valid.
   1047       CurrentSrcIdx += 2;
   1048       if (CurrentSrcIdx >= CopyLike.getNumOperands())
   1049         return false;
   1050     }
   1051     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
   1052     Src.Reg = MOInsertedReg.getReg();
   1053     // If we have to compose sub-register indices, bail out.
   1054     if ((Src.SubReg = MOInsertedReg.getSubReg()))
   1055       return false;
   1056 
   1057     // We want to track something that is compatible with the related
   1058     // partial definition.
   1059     Dst.SubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
   1060 
   1061     const MachineOperand &MODef = CopyLike.getOperand(0);
   1062     Dst.Reg = MODef.getReg();
   1063     // If we have to compose sub-registers, bail.
   1064     return MODef.getSubReg() == 0;
   1065   }
   1066 
   1067   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
   1068     // We cannot rewrite out of bound operands.
   1069     // Moreover, rewritable sources are at odd positions.
   1070     if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
   1071       return false;
   1072 
   1073     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
   1074     MO.setReg(NewReg);
   1075     MO.setSubReg(NewSubReg);
   1076     return true;
   1077   }
   1078 };
   1079 
   1080 } // end anonymous namespace
   1081 
   1082 /// Get the appropriated Rewriter for \p MI.
   1083 /// \return A pointer to a dynamically allocated Rewriter or nullptr if no
   1084 /// rewriter works for \p MI.
   1085 static Rewriter *getCopyRewriter(MachineInstr &MI, const TargetInstrInfo &TII) {
   1086   // Handle uncoalescable copy-like instructions.
   1087   if (MI.isBitcast() || MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
   1088       MI.isExtractSubregLike())
   1089     return new UncoalescableRewriter(MI);
   1090 
   1091   switch (MI.getOpcode()) {
   1092   default:
   1093     return nullptr;
   1094   case TargetOpcode::COPY:
   1095     return new CopyRewriter(MI);
   1096   case TargetOpcode::INSERT_SUBREG:
   1097     return new InsertSubregRewriter(MI);
   1098   case TargetOpcode::EXTRACT_SUBREG:
   1099     return new ExtractSubregRewriter(MI, TII);
   1100   case TargetOpcode::REG_SEQUENCE:
   1101     return new RegSequenceRewriter(MI);
   1102   }
   1103 }
   1104 
   1105 /// Given a \p Def.Reg and Def.SubReg  pair, use \p RewriteMap to find
   1106 /// the new source to use for rewrite. If \p HandleMultipleSources is true and
   1107 /// multiple sources for a given \p Def are found along the way, we found a
   1108 /// PHI instructions that needs to be rewritten.
   1109 /// TODO: HandleMultipleSources should be removed once we test PHI handling
   1110 /// with coalescable copies.
   1111 static RegSubRegPair
   1112 getNewSource(MachineRegisterInfo *MRI, const TargetInstrInfo *TII,
   1113              RegSubRegPair Def,
   1114              const PeepholeOptimizer::RewriteMapTy &RewriteMap,
   1115              bool HandleMultipleSources = true) {
   1116   RegSubRegPair LookupSrc(Def.Reg, Def.SubReg);
   1117   while (true) {
   1118     ValueTrackerResult Res = RewriteMap.lookup(LookupSrc);
   1119     // If there are no entries on the map, LookupSrc is the new source.
   1120     if (!Res.isValid())
   1121       return LookupSrc;
   1122 
   1123     // There's only one source for this definition, keep searching...
   1124     unsigned NumSrcs = Res.getNumSources();
   1125     if (NumSrcs == 1) {
   1126       LookupSrc.Reg = Res.getSrcReg(0);
   1127       LookupSrc.SubReg = Res.getSrcSubReg(0);
   1128       continue;
   1129     }
   1130 
   1131     // TODO: Remove once multiple srcs w/ coalescable copies are supported.
   1132     if (!HandleMultipleSources)
   1133       break;
   1134 
   1135     // Multiple sources, recurse into each source to find a new source
   1136     // for it. Then, rewrite the PHI accordingly to its new edges.
   1137     SmallVector<RegSubRegPair, 4> NewPHISrcs;
   1138     for (unsigned i = 0; i < NumSrcs; ++i) {
   1139       RegSubRegPair PHISrc(Res.getSrcReg(i), Res.getSrcSubReg(i));
   1140       NewPHISrcs.push_back(
   1141           getNewSource(MRI, TII, PHISrc, RewriteMap, HandleMultipleSources));
   1142     }
   1143 
   1144     // Build the new PHI node and return its def register as the new source.
   1145     MachineInstr &OrigPHI = const_cast<MachineInstr &>(*Res.getInst());
   1146     MachineInstr &NewPHI = insertPHI(*MRI, *TII, NewPHISrcs, OrigPHI);
   1147     LLVM_DEBUG(dbgs() << "-- getNewSource\n");
   1148     LLVM_DEBUG(dbgs() << "   Replacing: " << OrigPHI);
   1149     LLVM_DEBUG(dbgs() << "        With: " << NewPHI);
   1150     const MachineOperand &MODef = NewPHI.getOperand(0);
   1151     return RegSubRegPair(MODef.getReg(), MODef.getSubReg());
   1152   }
   1153 
   1154   return RegSubRegPair(0, 0);
   1155 }
   1156 
   1157 /// Optimize generic copy instructions to avoid cross register bank copy.
   1158 /// The optimization looks through a chain of copies and tries to find a source
   1159 /// that has a compatible register class.
   1160 /// Two register classes are considered to be compatible if they share the same
   1161 /// register bank.
   1162 /// New copies issued by this optimization are register allocator
   1163 /// friendly. This optimization does not remove any copy as it may
   1164 /// overconstrain the register allocator, but replaces some operands
   1165 /// when possible.
   1166 /// \pre isCoalescableCopy(*MI) is true.
   1167 /// \return True, when \p MI has been rewritten. False otherwise.
   1168 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr &MI) {
   1169   assert(isCoalescableCopy(MI) && "Invalid argument");
   1170   assert(MI.getDesc().getNumDefs() == 1 &&
   1171          "Coalescer can understand multiple defs?!");
   1172   const MachineOperand &MODef = MI.getOperand(0);
   1173   // Do not rewrite physical definitions.
   1174   if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
   1175     return false;
   1176 
   1177   bool Changed = false;
   1178   // Get the right rewriter for the current copy.
   1179   std::unique_ptr<Rewriter> CpyRewriter(getCopyRewriter(MI, *TII));
   1180   // If none exists, bail out.
   1181   if (!CpyRewriter)
   1182     return false;
   1183   // Rewrite each rewritable source.
   1184   RegSubRegPair Src;
   1185   RegSubRegPair TrackPair;
   1186   while (CpyRewriter->getNextRewritableSource(Src, TrackPair)) {
   1187     // Keep track of PHI nodes and its incoming edges when looking for sources.
   1188     RewriteMapTy RewriteMap;
   1189     // Try to find a more suitable source. If we failed to do so, or get the
   1190     // actual source, move to the next source.
   1191     if (!findNextSource(TrackPair, RewriteMap))
   1192       continue;
   1193 
   1194     // Get the new source to rewrite. TODO: Only enable handling of multiple
   1195     // sources (PHIs) once we have a motivating example and testcases for it.
   1196     RegSubRegPair NewSrc = getNewSource(MRI, TII, TrackPair, RewriteMap,
   1197                                         /*HandleMultipleSources=*/false);
   1198     if (Src.Reg == NewSrc.Reg || NewSrc.Reg == 0)
   1199       continue;
   1200 
   1201     // Rewrite source.
   1202     if (CpyRewriter->RewriteCurrentSource(NewSrc.Reg, NewSrc.SubReg)) {
   1203       // We may have extended the live-range of NewSrc, account for that.
   1204       MRI->clearKillFlags(NewSrc.Reg);
   1205       Changed = true;
   1206     }
   1207   }
   1208   // TODO: We could have a clean-up method to tidy the instruction.
   1209   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
   1210   // => v0 = COPY v1
   1211   // Currently we haven't seen motivating example for that and we
   1212   // want to avoid untested code.
   1213   NumRewrittenCopies += Changed;
   1214   return Changed;
   1215 }
   1216 
   1217 /// Rewrite the source found through \p Def, by using the \p RewriteMap
   1218 /// and create a new COPY instruction. More info about RewriteMap in
   1219 /// PeepholeOptimizer::findNextSource. Right now this is only used to handle
   1220 /// Uncoalescable copies, since they are copy like instructions that aren't
   1221 /// recognized by the register allocator.
   1222 MachineInstr &
   1223 PeepholeOptimizer::rewriteSource(MachineInstr &CopyLike,
   1224                                  RegSubRegPair Def, RewriteMapTy &RewriteMap) {
   1225   assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
   1226          "We do not rewrite physical registers");
   1227 
   1228   // Find the new source to use in the COPY rewrite.
   1229   RegSubRegPair NewSrc = getNewSource(MRI, TII, Def, RewriteMap);
   1230 
   1231   // Insert the COPY.
   1232   const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
   1233   unsigned NewVReg = MRI->createVirtualRegister(DefRC);
   1234 
   1235   MachineInstr *NewCopy =
   1236       BuildMI(*CopyLike.getParent(), &CopyLike, CopyLike.getDebugLoc(),
   1237               TII->get(TargetOpcode::COPY), NewVReg)
   1238           .addReg(NewSrc.Reg, 0, NewSrc.SubReg);
   1239 
   1240   if (Def.SubReg) {
   1241     NewCopy->getOperand(0).setSubReg(Def.SubReg);
   1242     NewCopy->getOperand(0).setIsUndef();
   1243   }
   1244 
   1245   LLVM_DEBUG(dbgs() << "-- RewriteSource\n");
   1246   LLVM_DEBUG(dbgs() << "   Replacing: " << CopyLike);
   1247   LLVM_DEBUG(dbgs() << "        With: " << *NewCopy);
   1248   MRI->replaceRegWith(Def.Reg, NewVReg);
   1249   MRI->clearKillFlags(NewVReg);
   1250 
   1251   // We extended the lifetime of NewSrc.Reg, clear the kill flags to
   1252   // account for that.
   1253   MRI->clearKillFlags(NewSrc.Reg);
   1254 
   1255   return *NewCopy;
   1256 }
   1257 
   1258 /// Optimize copy-like instructions to create
   1259 /// register coalescer friendly instruction.
   1260 /// The optimization tries to kill-off the \p MI by looking
   1261 /// through a chain of copies to find a source that has a compatible
   1262 /// register class.
   1263 /// If such a source is found, it replace \p MI by a generic COPY
   1264 /// operation.
   1265 /// \pre isUncoalescableCopy(*MI) is true.
   1266 /// \return True, when \p MI has been optimized. In that case, \p MI has
   1267 /// been removed from its parent.
   1268 /// All COPY instructions created, are inserted in \p LocalMIs.
   1269 bool PeepholeOptimizer::optimizeUncoalescableCopy(
   1270     MachineInstr &MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
   1271   assert(isUncoalescableCopy(MI) && "Invalid argument");
   1272   UncoalescableRewriter CpyRewriter(MI);
   1273 
   1274   // Rewrite each rewritable source by generating new COPYs. This works
   1275   // differently from optimizeCoalescableCopy since it first makes sure that all
   1276   // definitions can be rewritten.
   1277   RewriteMapTy RewriteMap;
   1278   RegSubRegPair Src;
   1279   RegSubRegPair Def;
   1280   SmallVector<RegSubRegPair, 4> RewritePairs;
   1281   while (CpyRewriter.getNextRewritableSource(Src, Def)) {
   1282     // If a physical register is here, this is probably for a good reason.
   1283     // Do not rewrite that.
   1284     if (TargetRegisterInfo::isPhysicalRegister(Def.Reg))
   1285       return false;
   1286 
   1287     // If we do not know how to rewrite this definition, there is no point
   1288     // in trying to kill this instruction.
   1289     if (!findNextSource(Def, RewriteMap))
   1290       return false;
   1291 
   1292     RewritePairs.push_back(Def);
   1293   }
   1294 
   1295   // The change is possible for all defs, do it.
   1296   for (const RegSubRegPair &Def : RewritePairs) {
   1297     // Rewrite the "copy" in a way the register coalescer understands.
   1298     MachineInstr &NewCopy = rewriteSource(MI, Def, RewriteMap);
   1299     LocalMIs.insert(&NewCopy);
   1300   }
   1301 
   1302   // MI is now dead.
   1303   MI.eraseFromParent();
   1304   ++NumUncoalescableCopies;
   1305   return true;
   1306 }
   1307 
   1308 /// Check whether MI is a candidate for folding into a later instruction.
   1309 /// We only fold loads to virtual registers and the virtual register defined
   1310 /// has a single use.
   1311 bool PeepholeOptimizer::isLoadFoldable(
   1312     MachineInstr &MI, SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
   1313   if (!MI.canFoldAsLoad() || !MI.mayLoad())
   1314     return false;
   1315   const MCInstrDesc &MCID = MI.getDesc();
   1316   if (MCID.getNumDefs() != 1)
   1317     return false;
   1318 
   1319   unsigned Reg = MI.getOperand(0).getReg();
   1320   // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
   1321   // loads. It should be checked when processing uses of the load, since
   1322   // uses can be removed during peephole.
   1323   if (!MI.getOperand(0).getSubReg() &&
   1324       TargetRegisterInfo::isVirtualRegister(Reg) &&
   1325       MRI->hasOneNonDBGUse(Reg)) {
   1326     FoldAsLoadDefCandidates.insert(Reg);
   1327     return true;
   1328   }
   1329   return false;
   1330 }
   1331 
   1332 bool PeepholeOptimizer::isMoveImmediate(
   1333     MachineInstr &MI, SmallSet<unsigned, 4> &ImmDefRegs,
   1334     DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
   1335   const MCInstrDesc &MCID = MI.getDesc();
   1336   if (!MI.isMoveImmediate())
   1337     return false;
   1338   if (MCID.getNumDefs() != 1)
   1339     return false;
   1340   unsigned Reg = MI.getOperand(0).getReg();
   1341   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
   1342     ImmDefMIs.insert(std::make_pair(Reg, &MI));
   1343     ImmDefRegs.insert(Reg);
   1344     return true;
   1345   }
   1346 
   1347   return false;
   1348 }
   1349 
   1350 /// Try folding register operands that are defined by move immediate
   1351 /// instructions, i.e. a trivial constant folding optimization, if
   1352 /// and only if the def and use are in the same BB.
   1353 bool PeepholeOptimizer::foldImmediate(MachineInstr &MI,
   1354     SmallSet<unsigned, 4> &ImmDefRegs,
   1355     DenseMap<unsigned, MachineInstr *> &ImmDefMIs) {
   1356   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
   1357     MachineOperand &MO = MI.getOperand(i);
   1358     if (!MO.isReg() || MO.isDef())
   1359       continue;
   1360     // Ignore dead implicit defs.
   1361     if (MO.isImplicit() && MO.isDead())
   1362       continue;
   1363     unsigned Reg = MO.getReg();
   1364     if (!TargetRegisterInfo::isVirtualRegister(Reg))
   1365       continue;
   1366     if (ImmDefRegs.count(Reg) == 0)
   1367       continue;
   1368     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
   1369     assert(II != ImmDefMIs.end() && "couldn't find immediate definition");
   1370     if (TII->FoldImmediate(MI, *II->second, Reg, MRI)) {
   1371       ++NumImmFold;
   1372       return true;
   1373     }
   1374   }
   1375   return false;
   1376 }
   1377 
   1378 // FIXME: This is very simple and misses some cases which should be handled when
   1379 // motivating examples are found.
   1380 //
   1381 // The copy rewriting logic should look at uses as well as defs and be able to
   1382 // eliminate copies across blocks.
   1383 //
   1384 // Later copies that are subregister extracts will also not be eliminated since
   1385 // only the first copy is considered.
   1386 //
   1387 // e.g.
   1388 // %1 = COPY %0
   1389 // %2 = COPY %0:sub1
   1390 //
   1391 // Should replace %2 uses with %1:sub1
   1392 bool PeepholeOptimizer::foldRedundantCopy(MachineInstr &MI,
   1393     SmallSet<unsigned, 4> &CopySrcRegs,
   1394     DenseMap<unsigned, MachineInstr *> &CopyMIs) {
   1395   assert(MI.isCopy() && "expected a COPY machine instruction");
   1396 
   1397   unsigned SrcReg = MI.getOperand(1).getReg();
   1398   if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
   1399     return false;
   1400 
   1401   unsigned DstReg = MI.getOperand(0).getReg();
   1402   if (!TargetRegisterInfo::isVirtualRegister(DstReg))
   1403     return false;
   1404 
   1405   if (CopySrcRegs.insert(SrcReg).second) {
   1406     // First copy of this reg seen.
   1407     CopyMIs.insert(std::make_pair(SrcReg, &MI));
   1408     return false;
   1409   }
   1410 
   1411   MachineInstr *PrevCopy = CopyMIs.find(SrcReg)->second;
   1412 
   1413   unsigned SrcSubReg = MI.getOperand(1).getSubReg();
   1414   unsigned PrevSrcSubReg = PrevCopy->getOperand(1).getSubReg();
   1415 
   1416   // Can't replace different subregister extracts.
   1417   if (SrcSubReg != PrevSrcSubReg)
   1418     return false;
   1419 
   1420   unsigned PrevDstReg = PrevCopy->getOperand(0).getReg();
   1421 
   1422   // Only replace if the copy register class is the same.
   1423   //
   1424   // TODO: If we have multiple copies to different register classes, we may want
   1425   // to track multiple copies of the same source register.
   1426   if (MRI->getRegClass(DstReg) != MRI->getRegClass(PrevDstReg))
   1427     return false;
   1428 
   1429   MRI->replaceRegWith(DstReg, PrevDstReg);
   1430 
   1431   // Lifetime of the previous copy has been extended.
   1432   MRI->clearKillFlags(PrevDstReg);
   1433   return true;
   1434 }
   1435 
   1436 bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) {
   1437   return TargetRegisterInfo::isPhysicalRegister(Reg) &&
   1438          !MRI->isAllocatable(Reg);
   1439 }
   1440 
   1441 bool PeepholeOptimizer::foldRedundantNAPhysCopy(
   1442     MachineInstr &MI, DenseMap<unsigned, MachineInstr *> &NAPhysToVirtMIs) {
   1443   assert(MI.isCopy() && "expected a COPY machine instruction");
   1444 
   1445   if (DisableNAPhysCopyOpt)
   1446     return false;
   1447 
   1448   unsigned DstReg = MI.getOperand(0).getReg();
   1449   unsigned SrcReg = MI.getOperand(1).getReg();
   1450   if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) {
   1451     // %vreg = COPY %physreg
   1452     // Avoid using a datastructure which can track multiple live non-allocatable
   1453     // phys->virt copies since LLVM doesn't seem to do this.
   1454     NAPhysToVirtMIs.insert({SrcReg, &MI});
   1455     return false;
   1456   }
   1457 
   1458   if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg)))
   1459     return false;
   1460 
   1461   // %physreg = COPY %vreg
   1462   auto PrevCopy = NAPhysToVirtMIs.find(DstReg);
   1463   if (PrevCopy == NAPhysToVirtMIs.end()) {
   1464     // We can't remove the copy: there was an intervening clobber of the
   1465     // non-allocatable physical register after the copy to virtual.
   1466     LLVM_DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing "
   1467                       << MI);
   1468     return false;
   1469   }
   1470 
   1471   unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg();
   1472   if (PrevDstReg == SrcReg) {
   1473     // Remove the virt->phys copy: we saw the virtual register definition, and
   1474     // the non-allocatable physical register's state hasn't changed since then.
   1475     LLVM_DEBUG(dbgs() << "NAPhysCopy: erasing " << MI);
   1476     ++NumNAPhysCopies;
   1477     return true;
   1478   }
   1479 
   1480   // Potential missed optimization opportunity: we saw a different virtual
   1481   // register get a copy of the non-allocatable physical register, and we only
   1482   // track one such copy. Avoid getting confused by this new non-allocatable
   1483   // physical register definition, and remove it from the tracked copies.
   1484   LLVM_DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << MI);
   1485   NAPhysToVirtMIs.erase(PrevCopy);
   1486   return false;
   1487 }
   1488 
   1489 /// \bried Returns true if \p MO is a virtual register operand.
   1490 static bool isVirtualRegisterOperand(MachineOperand &MO) {
   1491   if (!MO.isReg())
   1492     return false;
   1493   return TargetRegisterInfo::isVirtualRegister(MO.getReg());
   1494 }
   1495 
   1496 bool PeepholeOptimizer::findTargetRecurrence(
   1497     unsigned Reg, const SmallSet<unsigned, 2> &TargetRegs,
   1498     RecurrenceCycle &RC) {
   1499   // Recurrence found if Reg is in TargetRegs.
   1500   if (TargetRegs.count(Reg))
   1501     return true;
   1502 
   1503   // TODO: Curerntly, we only allow the last instruction of the recurrence
   1504   // cycle (the instruction that feeds the PHI instruction) to have more than
   1505   // one uses to guarantee that commuting operands does not tie registers
   1506   // with overlapping live range. Once we have actual live range info of
   1507   // each register, this constraint can be relaxed.
   1508   if (!MRI->hasOneNonDBGUse(Reg))
   1509     return false;
   1510 
   1511   // Give up if the reccurrence chain length is longer than the limit.
   1512   if (RC.size() >= MaxRecurrenceChain)
   1513     return false;
   1514 
   1515   MachineInstr &MI = *(MRI->use_instr_nodbg_begin(Reg));
   1516   unsigned Idx = MI.findRegisterUseOperandIdx(Reg);
   1517 
   1518   // Only interested in recurrences whose instructions have only one def, which
   1519   // is a virtual register.
   1520   if (MI.getDesc().getNumDefs() != 1)
   1521     return false;
   1522 
   1523   MachineOperand &DefOp = MI.getOperand(0);
   1524   if (!isVirtualRegisterOperand(DefOp))
   1525     return false;
   1526 
   1527   // Check if def operand of MI is tied to any use operand. We are only
   1528   // interested in the case that all the instructions in the recurrence chain
   1529   // have there def operand tied with one of the use operand.
   1530   unsigned TiedUseIdx;
   1531   if (!MI.isRegTiedToUseOperand(0, &TiedUseIdx))
   1532     return false;
   1533 
   1534   if (Idx == TiedUseIdx) {
   1535     RC.push_back(RecurrenceInstr(&MI));
   1536     return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
   1537   } else {
   1538     // If Idx is not TiedUseIdx, check if Idx is commutable with TiedUseIdx.
   1539     unsigned CommIdx = TargetInstrInfo::CommuteAnyOperandIndex;
   1540     if (TII->findCommutedOpIndices(MI, Idx, CommIdx) && CommIdx == TiedUseIdx) {
   1541       RC.push_back(RecurrenceInstr(&MI, Idx, CommIdx));
   1542       return findTargetRecurrence(DefOp.getReg(), TargetRegs, RC);
   1543     }
   1544   }
   1545 
   1546   return false;
   1547 }
   1548 
   1549 /// Phi instructions will eventually be lowered to copy instructions.
   1550 /// If phi is in a loop header, a recurrence may formulated around the source
   1551 /// and destination of the phi. For such case commuting operands of the
   1552 /// instructions in the recurrence may enable coalescing of the copy instruction
   1553 /// generated from the phi. For example, if there is a recurrence of
   1554 ///
   1555 /// LoopHeader:
   1556 ///   %1 = phi(%0, %100)
   1557 /// LoopLatch:
   1558 ///   %0<def, tied1> = ADD %2<def, tied0>, %1
   1559 ///
   1560 /// , the fact that %0 and %2 are in the same tied operands set makes
   1561 /// the coalescing of copy instruction generated from the phi in
   1562 /// LoopHeader(i.e. %1 = COPY %0) impossible, because %1 and
   1563 /// %2 have overlapping live range. This introduces additional move
   1564 /// instruction to the final assembly. However, if we commute %2 and
   1565 /// %1 of ADD instruction, the redundant move instruction can be
   1566 /// avoided.
   1567 bool PeepholeOptimizer::optimizeRecurrence(MachineInstr &PHI) {
   1568   SmallSet<unsigned, 2> TargetRegs;
   1569   for (unsigned Idx = 1; Idx < PHI.getNumOperands(); Idx += 2) {
   1570     MachineOperand &MO = PHI.getOperand(Idx);
   1571     assert(isVirtualRegisterOperand(MO) && "Invalid PHI instruction");
   1572     TargetRegs.insert(MO.getReg());
   1573   }
   1574 
   1575   bool Changed = false;
   1576   RecurrenceCycle RC;
   1577   if (findTargetRecurrence(PHI.getOperand(0).getReg(), TargetRegs, RC)) {
   1578     // Commutes operands of instructions in RC if necessary so that the copy to
   1579     // be generated from PHI can be coalesced.
   1580     LLVM_DEBUG(dbgs() << "Optimize recurrence chain from " << PHI);
   1581     for (auto &RI : RC) {
   1582       LLVM_DEBUG(dbgs() << "\tInst: " << *(RI.getMI()));
   1583       auto CP = RI.getCommutePair();
   1584       if (CP) {
   1585         Changed = true;
   1586         TII->commuteInstruction(*(RI.getMI()), false, (*CP).first,
   1587                                 (*CP).second);
   1588         LLVM_DEBUG(dbgs() << "\t\tCommuted: " << *(RI.getMI()));
   1589       }
   1590     }
   1591   }
   1592 
   1593   return Changed;
   1594 }
   1595 
   1596 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   1597   if (skipFunction(MF.getFunction()))
   1598     return false;
   1599 
   1600   LLVM_DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
   1601   LLVM_DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
   1602 
   1603   if (DisablePeephole)
   1604     return false;
   1605 
   1606   TII = MF.getSubtarget().getInstrInfo();
   1607   TRI = MF.getSubtarget().getRegisterInfo();
   1608   MRI = &MF.getRegInfo();
   1609   DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
   1610   MLI = &getAnalysis<MachineLoopInfo>();
   1611 
   1612   bool Changed = false;
   1613 
   1614   for (MachineBasicBlock &MBB : MF) {
   1615     bool SeenMoveImm = false;
   1616 
   1617     // During this forward scan, at some point it needs to answer the question
   1618     // "given a pointer to an MI in the current BB, is it located before or
   1619     // after the current instruction".
   1620     // To perform this, the following set keeps track of the MIs already seen
   1621     // during the scan, if a MI is not in the set, it is assumed to be located
   1622     // after. Newly created MIs have to be inserted in the set as well.
   1623     SmallPtrSet<MachineInstr*, 16> LocalMIs;
   1624     SmallSet<unsigned, 4> ImmDefRegs;
   1625     DenseMap<unsigned, MachineInstr*> ImmDefMIs;
   1626     SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
   1627 
   1628     // Track when a non-allocatable physical register is copied to a virtual
   1629     // register so that useless moves can be removed.
   1630     //
   1631     // %physreg is the map index; MI is the last valid `%vreg = COPY %physreg`
   1632     // without any intervening re-definition of %physreg.
   1633     DenseMap<unsigned, MachineInstr *> NAPhysToVirtMIs;
   1634 
   1635     // Set of virtual registers that are copied from.
   1636     SmallSet<unsigned, 4> CopySrcRegs;
   1637     DenseMap<unsigned, MachineInstr *> CopySrcMIs;
   1638 
   1639     bool IsLoopHeader = MLI->isLoopHeader(&MBB);
   1640 
   1641     for (MachineBasicBlock::iterator MII = MBB.begin(), MIE = MBB.end();
   1642          MII != MIE; ) {
   1643       MachineInstr *MI = &*MII;
   1644       // We may be erasing MI below, increment MII now.
   1645       ++MII;
   1646       LocalMIs.insert(MI);
   1647 
   1648       // Skip debug instructions. They should not affect this peephole optimization.
   1649       if (MI->isDebugInstr())
   1650           continue;
   1651 
   1652       if (MI->isPosition())
   1653         continue;
   1654 
   1655       if (IsLoopHeader && MI->isPHI()) {
   1656         if (optimizeRecurrence(*MI)) {
   1657           Changed = true;
   1658           continue;
   1659         }
   1660       }
   1661 
   1662       if (!MI->isCopy()) {
   1663         for (const MachineOperand &MO : MI->operands()) {
   1664           // Visit all operands: definitions can be implicit or explicit.
   1665           if (MO.isReg()) {
   1666             unsigned Reg = MO.getReg();
   1667             if (MO.isDef() && isNAPhysCopy(Reg)) {
   1668               const auto &Def = NAPhysToVirtMIs.find(Reg);
   1669               if (Def != NAPhysToVirtMIs.end()) {
   1670                 // A new definition of the non-allocatable physical register
   1671                 // invalidates previous copies.
   1672                 LLVM_DEBUG(dbgs()
   1673                            << "NAPhysCopy: invalidating because of " << *MI);
   1674                 NAPhysToVirtMIs.erase(Def);
   1675               }
   1676             }
   1677           } else if (MO.isRegMask()) {
   1678             const uint32_t *RegMask = MO.getRegMask();
   1679             for (auto &RegMI : NAPhysToVirtMIs) {
   1680               unsigned Def = RegMI.first;
   1681               if (MachineOperand::clobbersPhysReg(RegMask, Def)) {
   1682                 LLVM_DEBUG(dbgs()
   1683                            << "NAPhysCopy: invalidating because of " << *MI);
   1684                 NAPhysToVirtMIs.erase(Def);
   1685               }
   1686             }
   1687           }
   1688         }
   1689       }
   1690 
   1691       if (MI->isImplicitDef() || MI->isKill())
   1692         continue;
   1693 
   1694       if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) {
   1695         // Blow away all non-allocatable physical registers knowledge since we
   1696         // don't know what's correct anymore.
   1697         //
   1698         // FIXME: handle explicit asm clobbers.
   1699         LLVM_DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to "
   1700                           << *MI);
   1701         NAPhysToVirtMIs.clear();
   1702       }
   1703 
   1704       if ((isUncoalescableCopy(*MI) &&
   1705            optimizeUncoalescableCopy(*MI, LocalMIs)) ||
   1706           (MI->isCompare() && optimizeCmpInstr(*MI)) ||
   1707           (MI->isSelect() && optimizeSelect(*MI, LocalMIs))) {
   1708         // MI is deleted.
   1709         LocalMIs.erase(MI);
   1710         Changed = true;
   1711         continue;
   1712       }
   1713 
   1714       if (MI->isConditionalBranch() && optimizeCondBranch(*MI)) {
   1715         Changed = true;
   1716         continue;
   1717       }
   1718 
   1719       if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(*MI)) {
   1720         // MI is just rewritten.
   1721         Changed = true;
   1722         continue;
   1723       }
   1724 
   1725       if (MI->isCopy() &&
   1726           (foldRedundantCopy(*MI, CopySrcRegs, CopySrcMIs) ||
   1727            foldRedundantNAPhysCopy(*MI, NAPhysToVirtMIs))) {
   1728         LocalMIs.erase(MI);
   1729         MI->eraseFromParent();
   1730         Changed = true;
   1731         continue;
   1732       }
   1733 
   1734       if (isMoveImmediate(*MI, ImmDefRegs, ImmDefMIs)) {
   1735         SeenMoveImm = true;
   1736       } else {
   1737         Changed |= optimizeExtInstr(*MI, MBB, LocalMIs);
   1738         // optimizeExtInstr might have created new instructions after MI
   1739         // and before the already incremented MII. Adjust MII so that the
   1740         // next iteration sees the new instructions.
   1741         MII = MI;
   1742         ++MII;
   1743         if (SeenMoveImm)
   1744           Changed |= foldImmediate(*MI, ImmDefRegs, ImmDefMIs);
   1745       }
   1746 
   1747       // Check whether MI is a load candidate for folding into a later
   1748       // instruction. If MI is not a candidate, check whether we can fold an
   1749       // earlier load into MI.
   1750       if (!isLoadFoldable(*MI, FoldAsLoadDefCandidates) &&
   1751           !FoldAsLoadDefCandidates.empty()) {
   1752 
   1753         // We visit each operand even after successfully folding a previous
   1754         // one.  This allows us to fold multiple loads into a single
   1755         // instruction.  We do assume that optimizeLoadInstr doesn't insert
   1756         // foldable uses earlier in the argument list.  Since we don't restart
   1757         // iteration, we'd miss such cases.
   1758         const MCInstrDesc &MIDesc = MI->getDesc();
   1759         for (unsigned i = MIDesc.getNumDefs(); i != MI->getNumOperands();
   1760              ++i) {
   1761           const MachineOperand &MOp = MI->getOperand(i);
   1762           if (!MOp.isReg())
   1763             continue;
   1764           unsigned FoldAsLoadDefReg = MOp.getReg();
   1765           if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
   1766             // We need to fold load after optimizeCmpInstr, since
   1767             // optimizeCmpInstr can enable folding by converting SUB to CMP.
   1768             // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
   1769             // we need it for markUsesInDebugValueAsUndef().
   1770             unsigned FoldedReg = FoldAsLoadDefReg;
   1771             MachineInstr *DefMI = nullptr;
   1772             if (MachineInstr *FoldMI =
   1773                     TII->optimizeLoadInstr(*MI, MRI, FoldAsLoadDefReg, DefMI)) {
   1774               // Update LocalMIs since we replaced MI with FoldMI and deleted
   1775               // DefMI.
   1776               LLVM_DEBUG(dbgs() << "Replacing: " << *MI);
   1777               LLVM_DEBUG(dbgs() << "     With: " << *FoldMI);
   1778               LocalMIs.erase(MI);
   1779               LocalMIs.erase(DefMI);
   1780               LocalMIs.insert(FoldMI);
   1781               MI->eraseFromParent();
   1782               DefMI->eraseFromParent();
   1783               MRI->markUsesInDebugValueAsUndef(FoldedReg);
   1784               FoldAsLoadDefCandidates.erase(FoldedReg);
   1785               ++NumLoadFold;
   1786 
   1787               // MI is replaced with FoldMI so we can continue trying to fold
   1788               Changed = true;
   1789               MI = FoldMI;
   1790             }
   1791           }
   1792         }
   1793       }
   1794 
   1795       // If we run into an instruction we can't fold across, discard
   1796       // the load candidates.  Note: We might be able to fold *into* this
   1797       // instruction, so this needs to be after the folding logic.
   1798       if (MI->isLoadFoldBarrier()) {
   1799         LLVM_DEBUG(dbgs() << "Encountered load fold barrier on " << *MI);
   1800         FoldAsLoadDefCandidates.clear();
   1801       }
   1802     }
   1803   }
   1804 
   1805   return Changed;
   1806 }
   1807 
   1808 ValueTrackerResult ValueTracker::getNextSourceFromCopy() {
   1809   assert(Def->isCopy() && "Invalid definition");
   1810   // Copy instruction are supposed to be: Def = Src.
   1811   // If someone breaks this assumption, bad things will happen everywhere.
   1812   assert(Def->getNumOperands() == 2 && "Invalid number of operands");
   1813 
   1814   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
   1815     // If we look for a different subreg, it means we want a subreg of src.
   1816     // Bails as we do not support composing subregs yet.
   1817     return ValueTrackerResult();
   1818   // Otherwise, we want the whole source.
   1819   const MachineOperand &Src = Def->getOperand(1);
   1820   if (Src.isUndef())
   1821     return ValueTrackerResult();
   1822   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
   1823 }
   1824 
   1825 ValueTrackerResult ValueTracker::getNextSourceFromBitcast() {
   1826   assert(Def->isBitcast() && "Invalid definition");
   1827 
   1828   // Bail if there are effects that a plain copy will not expose.
   1829   if (Def->hasUnmodeledSideEffects())
   1830     return ValueTrackerResult();
   1831 
   1832   // Bitcasts with more than one def are not supported.
   1833   if (Def->getDesc().getNumDefs() != 1)
   1834     return ValueTrackerResult();
   1835   const MachineOperand DefOp = Def->getOperand(DefIdx);
   1836   if (DefOp.getSubReg() != DefSubReg)
   1837     // If we look for a different subreg, it means we want a subreg of the src.
   1838     // Bails as we do not support composing subregs yet.
   1839     return ValueTrackerResult();
   1840 
   1841   unsigned SrcIdx = Def->getNumOperands();
   1842   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
   1843        ++OpIdx) {
   1844     const MachineOperand &MO = Def->getOperand(OpIdx);
   1845     if (!MO.isReg() || !MO.getReg())
   1846       continue;
   1847     // Ignore dead implicit defs.
   1848     if (MO.isImplicit() && MO.isDead())
   1849       continue;
   1850     assert(!MO.isDef() && "We should have skipped all the definitions by now");
   1851     if (SrcIdx != EndOpIdx)
   1852       // Multiple sources?
   1853       return ValueTrackerResult();
   1854     SrcIdx = OpIdx;
   1855   }
   1856 
   1857   // Stop when any user of the bitcast is a SUBREG_TO_REG, replacing with a COPY
   1858   // will break the assumed guarantees for the upper bits.
   1859   for (const MachineInstr &UseMI : MRI.use_nodbg_instructions(DefOp.getReg())) {
   1860     if (UseMI.isSubregToReg())
   1861       return ValueTrackerResult();
   1862   }
   1863 
   1864   const MachineOperand &Src = Def->getOperand(SrcIdx);
   1865   if (Src.isUndef())
   1866     return ValueTrackerResult();
   1867   return ValueTrackerResult(Src.getReg(), Src.getSubReg());
   1868 }
   1869 
   1870 ValueTrackerResult ValueTracker::getNextSourceFromRegSequence() {
   1871   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
   1872          "Invalid definition");
   1873 
   1874   if (Def->getOperand(DefIdx).getSubReg())
   1875     // If we are composing subregs, bail out.
   1876     // The case we are checking is Def.<subreg> = REG_SEQUENCE.
   1877     // This should almost never happen as the SSA property is tracked at
   1878     // the register level (as opposed to the subreg level).
   1879     // I.e.,
   1880     // Def.sub0 =
   1881     // Def.sub1 =
   1882     // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
   1883     // Def. Thus, it must not be generated.
   1884     // However, some code could theoretically generates a single
   1885     // Def.sub0 (i.e, not defining the other subregs) and we would
   1886     // have this case.
   1887     // If we can ascertain (or force) that this never happens, we could
   1888     // turn that into an assertion.
   1889     return ValueTrackerResult();
   1890 
   1891   if (!TII)
   1892     // We could handle the REG_SEQUENCE here, but we do not want to
   1893     // duplicate the code from the generic TII.
   1894     return ValueTrackerResult();
   1895 
   1896   SmallVector<RegSubRegPairAndIdx, 8> RegSeqInputRegs;
   1897   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
   1898     return ValueTrackerResult();
   1899 
   1900   // We are looking at:
   1901   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
   1902   // Check if one of the operand defines the subreg we are interested in.
   1903   for (const RegSubRegPairAndIdx &RegSeqInput : RegSeqInputRegs) {
   1904     if (RegSeqInput.SubIdx == DefSubReg) {
   1905       if (RegSeqInput.SubReg)
   1906         // Bail if we have to compose sub registers.
   1907         return ValueTrackerResult();
   1908 
   1909       return ValueTrackerResult(RegSeqInput.Reg, RegSeqInput.SubReg);
   1910     }
   1911   }
   1912 
   1913   // If the subreg we are tracking is super-defined by another subreg,
   1914   // we could follow this value. However, this would require to compose
   1915   // the subreg and we do not do that for now.
   1916   return ValueTrackerResult();
   1917 }
   1918 
   1919 ValueTrackerResult ValueTracker::getNextSourceFromInsertSubreg() {
   1920   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
   1921          "Invalid definition");
   1922 
   1923   if (Def->getOperand(DefIdx).getSubReg())
   1924     // If we are composing subreg, bail out.
   1925     // Same remark as getNextSourceFromRegSequence.
   1926     // I.e., this may be turned into an assert.
   1927     return ValueTrackerResult();
   1928 
   1929   if (!TII)
   1930     // We could handle the REG_SEQUENCE here, but we do not want to
   1931     // duplicate the code from the generic TII.
   1932     return ValueTrackerResult();
   1933 
   1934   RegSubRegPair BaseReg;
   1935   RegSubRegPairAndIdx InsertedReg;
   1936   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
   1937     return ValueTrackerResult();
   1938 
   1939   // We are looking at:
   1940   // Def = INSERT_SUBREG v0, v1, sub1
   1941   // There are two cases:
   1942   // 1. DefSubReg == sub1, get v1.
   1943   // 2. DefSubReg != sub1, the value may be available through v0.
   1944 
   1945   // #1 Check if the inserted register matches the required sub index.
   1946   if (InsertedReg.SubIdx == DefSubReg) {
   1947     return ValueTrackerResult(InsertedReg.Reg, InsertedReg.SubReg);
   1948   }
   1949   // #2 Otherwise, if the sub register we are looking for is not partial
   1950   // defined by the inserted element, we can look through the main
   1951   // register (v0).
   1952   const MachineOperand &MODef = Def->getOperand(DefIdx);
   1953   // If the result register (Def) and the base register (v0) do not
   1954   // have the same register class or if we have to compose
   1955   // subregisters, bail out.
   1956   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
   1957       BaseReg.SubReg)
   1958     return ValueTrackerResult();
   1959 
   1960   // Get the TRI and check if the inserted sub-register overlaps with the
   1961   // sub-register we are tracking.
   1962   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
   1963   if (!TRI ||
   1964       !(TRI->getSubRegIndexLaneMask(DefSubReg) &
   1965         TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)).none())
   1966     return ValueTrackerResult();
   1967   // At this point, the value is available in v0 via the same subreg
   1968   // we used for Def.
   1969   return ValueTrackerResult(BaseReg.Reg, DefSubReg);
   1970 }
   1971 
   1972 ValueTrackerResult ValueTracker::getNextSourceFromExtractSubreg() {
   1973   assert((Def->isExtractSubreg() ||
   1974           Def->isExtractSubregLike()) && "Invalid definition");
   1975   // We are looking at:
   1976   // Def = EXTRACT_SUBREG v0, sub0
   1977 
   1978   // Bail if we have to compose sub registers.
   1979   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
   1980   if (DefSubReg)
   1981     return ValueTrackerResult();
   1982 
   1983   if (!TII)
   1984     // We could handle the EXTRACT_SUBREG here, but we do not want to
   1985     // duplicate the code from the generic TII.
   1986     return ValueTrackerResult();
   1987 
   1988   RegSubRegPairAndIdx ExtractSubregInputReg;
   1989   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
   1990     return ValueTrackerResult();
   1991 
   1992   // Bail if we have to compose sub registers.
   1993   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
   1994   if (ExtractSubregInputReg.SubReg)
   1995     return ValueTrackerResult();
   1996   // Otherwise, the value is available in the v0.sub0.
   1997   return ValueTrackerResult(ExtractSubregInputReg.Reg,
   1998                             ExtractSubregInputReg.SubIdx);
   1999 }
   2000 
   2001 ValueTrackerResult ValueTracker::getNextSourceFromSubregToReg() {
   2002   assert(Def->isSubregToReg() && "Invalid definition");
   2003   // We are looking at:
   2004   // Def = SUBREG_TO_REG Imm, v0, sub0
   2005 
   2006   // Bail if we have to compose sub registers.
   2007   // If DefSubReg != sub0, we would have to check that all the bits
   2008   // we track are included in sub0 and if yes, we would have to
   2009   // determine the right subreg in v0.
   2010   if (DefSubReg != Def->getOperand(3).getImm())
   2011     return ValueTrackerResult();
   2012   // Bail if we have to compose sub registers.
   2013   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
   2014   if (Def->getOperand(2).getSubReg())
   2015     return ValueTrackerResult();
   2016 
   2017   return ValueTrackerResult(Def->getOperand(2).getReg(),
   2018                             Def->getOperand(3).getImm());
   2019 }
   2020 
   2021 /// Explore each PHI incoming operand and return its sources.
   2022 ValueTrackerResult ValueTracker::getNextSourceFromPHI() {
   2023   assert(Def->isPHI() && "Invalid definition");
   2024   ValueTrackerResult Res;
   2025 
   2026   // If we look for a different subreg, bail as we do not support composing
   2027   // subregs yet.
   2028   if (Def->getOperand(0).getSubReg() != DefSubReg)
   2029     return ValueTrackerResult();
   2030 
   2031   // Return all register sources for PHI instructions.
   2032   for (unsigned i = 1, e = Def->getNumOperands(); i < e; i += 2) {
   2033     const MachineOperand &MO = Def->getOperand(i);
   2034     assert(MO.isReg() && "Invalid PHI instruction");
   2035     // We have no code to deal with undef operands. They shouldn't happen in
   2036     // normal programs anyway.
   2037     if (MO.isUndef())
   2038       return ValueTrackerResult();
   2039     Res.addSource(MO.getReg(), MO.getSubReg());
   2040   }
   2041 
   2042   return Res;
   2043 }
   2044 
   2045 ValueTrackerResult ValueTracker::getNextSourceImpl() {
   2046   assert(Def && "This method needs a valid definition");
   2047 
   2048   assert(((Def->getOperand(DefIdx).isDef() &&
   2049            (DefIdx < Def->getDesc().getNumDefs() ||
   2050             Def->getDesc().isVariadic())) ||
   2051           Def->getOperand(DefIdx).isImplicit()) &&
   2052          "Invalid DefIdx");
   2053   if (Def->isCopy())
   2054     return getNextSourceFromCopy();
   2055   if (Def->isBitcast())
   2056     return getNextSourceFromBitcast();
   2057   // All the remaining cases involve "complex" instructions.
   2058   // Bail if we did not ask for the advanced tracking.
   2059   if (DisableAdvCopyOpt)
   2060     return ValueTrackerResult();
   2061   if (Def->isRegSequence() || Def->isRegSequenceLike())
   2062     return getNextSourceFromRegSequence();
   2063   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
   2064     return getNextSourceFromInsertSubreg();
   2065   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
   2066     return getNextSourceFromExtractSubreg();
   2067   if (Def->isSubregToReg())
   2068     return getNextSourceFromSubregToReg();
   2069   if (Def->isPHI())
   2070     return getNextSourceFromPHI();
   2071   return ValueTrackerResult();
   2072 }
   2073 
   2074 ValueTrackerResult ValueTracker::getNextSource() {
   2075   // If we reach a point where we cannot move up in the use-def chain,
   2076   // there is nothing we can get.
   2077   if (!Def)
   2078     return ValueTrackerResult();
   2079 
   2080   ValueTrackerResult Res = getNextSourceImpl();
   2081   if (Res.isValid()) {
   2082     // Update definition, definition index, and subregister for the
   2083     // next call of getNextSource.
   2084     // Update the current register.
   2085     bool OneRegSrc = Res.getNumSources() == 1;
   2086     if (OneRegSrc)
   2087       Reg = Res.getSrcReg(0);
   2088     // Update the result before moving up in the use-def chain
   2089     // with the instruction containing the last found sources.
   2090     Res.setInst(Def);
   2091 
   2092     // If we can still move up in the use-def chain, move to the next
   2093     // definition.
   2094     if (!TargetRegisterInfo::isPhysicalRegister(Reg) && OneRegSrc) {
   2095       MachineRegisterInfo::def_iterator DI = MRI.def_begin(Reg);
   2096       if (DI != MRI.def_end()) {
   2097         Def = DI->getParent();
   2098         DefIdx = DI.getOperandNo();
   2099         DefSubReg = Res.getSrcSubReg(0);
   2100       } else {
   2101         Def = nullptr;
   2102       }
   2103       return Res;
   2104     }
   2105   }
   2106   // If we end up here, this means we will not be able to find another source
   2107   // for the next iteration. Make sure any new call to getNextSource bails out
   2108   // early by cutting the use-def chain.
   2109   Def = nullptr;
   2110   return Res;
   2111 }
   2112