Home | History | Annotate | Download | only in CodeGen
      1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Perform peephole optimizations on the machine code:
     11 //
     12 // - Optimize Extensions
     13 //
     14 //     Optimization of sign / zero extension instructions. It may be extended to
     15 //     handle other instructions with similar properties.
     16 //
     17 //     On some targets, some instructions, e.g. X86 sign / zero extension, may
     18 //     leave the source value in the lower part of the result. This optimization
     19 //     will replace some uses of the pre-extension value with uses of the
     20 //     sub-register of the results.
     21 //
     22 // - Optimize Comparisons
     23 //
     24 //     Optimization of comparison instructions. For instance, in this code:
     25 //
     26 //       sub r1, 1
     27 //       cmp r1, 0
     28 //       bz  L1
     29 //
     30 //     If the "sub" instruction all ready sets (or could be modified to set) the
     31 //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
     32 //     eliminate the "cmp" instruction.
     33 //
     34 //     Another instance, in this code:
     35 //
     36 //       sub r1, r3 | sub r1, imm
     37 //       cmp r3, r1 or cmp r1, r3 | cmp r1, imm
     38 //       bge L1
     39 //
     40 //     If the branch instruction can use flag from "sub", then we can replace
     41 //     "sub" with "subs" and eliminate the "cmp" instruction.
     42 //
     43 // - Optimize Loads:
     44 //
     45 //     Loads that can be folded into a later instruction. A load is foldable
     46 //     if it loads to virtual registers and the virtual register defined has
     47 //     a single use.
     48 //
     49 // - Optimize Copies and Bitcast (more generally, target specific copies):
     50 //
     51 //     Rewrite copies and bitcasts to avoid cross register bank copies
     52 //     when possible.
     53 //     E.g., Consider the following example, where capital and lower
     54 //     letters denote different register file:
     55 //     b = copy A <-- cross-bank copy
     56 //     C = copy b <-- cross-bank copy
     57 //   =>
     58 //     b = copy A <-- cross-bank copy
     59 //     C = copy A <-- same-bank copy
     60 //
     61 //     E.g., for bitcast:
     62 //     b = bitcast A <-- cross-bank copy
     63 //     C = bitcast b <-- cross-bank copy
     64 //   =>
     65 //     b = bitcast A <-- cross-bank copy
     66 //     C = copy A    <-- same-bank copy
     67 //===----------------------------------------------------------------------===//
     68 
     69 #include "llvm/CodeGen/Passes.h"
     70 #include "llvm/ADT/DenseMap.h"
     71 #include "llvm/ADT/SmallPtrSet.h"
     72 #include "llvm/ADT/SmallSet.h"
     73 #include "llvm/ADT/Statistic.h"
     74 #include "llvm/CodeGen/MachineDominators.h"
     75 #include "llvm/CodeGen/MachineInstrBuilder.h"
     76 #include "llvm/CodeGen/MachineRegisterInfo.h"
     77 #include "llvm/Support/CommandLine.h"
     78 #include "llvm/Support/Debug.h"
     79 #include "llvm/Support/raw_ostream.h"
     80 #include "llvm/Target/TargetInstrInfo.h"
     81 #include "llvm/Target/TargetRegisterInfo.h"
     82 #include "llvm/Target/TargetSubtargetInfo.h"
     83 #include <utility>
     84 using namespace llvm;
     85 
     86 #define DEBUG_TYPE "peephole-opt"
     87 
     88 // Optimize Extensions
     89 static cl::opt<bool>
     90 Aggressive("aggressive-ext-opt", cl::Hidden,
     91            cl::desc("Aggressive extension optimization"));
     92 
     93 static cl::opt<bool>
     94 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
     95                 cl::desc("Disable the peephole optimizer"));
     96 
     97 static cl::opt<bool>
     98 DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false),
     99                   cl::desc("Disable advanced copy optimization"));
    100 
    101 STATISTIC(NumReuse,      "Number of extension results reused");
    102 STATISTIC(NumCmps,       "Number of compares eliminated");
    103 STATISTIC(NumImmFold,    "Number of move immediate folded");
    104 STATISTIC(NumLoadFold,   "Number of loads folded");
    105 STATISTIC(NumSelects,    "Number of selects optimized");
    106 STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized");
    107 STATISTIC(NumRewrittenCopies, "Number of copies rewritten");
    108 
    109 namespace {
    110   class PeepholeOptimizer : public MachineFunctionPass {
    111     const TargetInstrInfo *TII;
    112     const TargetRegisterInfo *TRI;
    113     MachineRegisterInfo   *MRI;
    114     MachineDominatorTree  *DT;  // Machine dominator tree
    115 
    116   public:
    117     static char ID; // Pass identification
    118     PeepholeOptimizer() : MachineFunctionPass(ID) {
    119       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
    120     }
    121 
    122     bool runOnMachineFunction(MachineFunction &MF) override;
    123 
    124     void getAnalysisUsage(AnalysisUsage &AU) const override {
    125       AU.setPreservesCFG();
    126       MachineFunctionPass::getAnalysisUsage(AU);
    127       if (Aggressive) {
    128         AU.addRequired<MachineDominatorTree>();
    129         AU.addPreserved<MachineDominatorTree>();
    130       }
    131     }
    132 
    133   private:
    134     bool optimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
    135     bool optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
    136                           SmallPtrSetImpl<MachineInstr*> &LocalMIs);
    137     bool optimizeSelect(MachineInstr *MI,
    138                         SmallPtrSetImpl<MachineInstr *> &LocalMIs);
    139     bool optimizeCondBranch(MachineInstr *MI);
    140     bool optimizeCopyOrBitcast(MachineInstr *MI);
    141     bool optimizeCoalescableCopy(MachineInstr *MI);
    142     bool optimizeUncoalescableCopy(MachineInstr *MI,
    143                                    SmallPtrSetImpl<MachineInstr *> &LocalMIs);
    144     bool findNextSource(unsigned &Reg, unsigned &SubReg);
    145     bool isMoveImmediate(MachineInstr *MI,
    146                          SmallSet<unsigned, 4> &ImmDefRegs,
    147                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    148     bool foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
    149                        SmallSet<unsigned, 4> &ImmDefRegs,
    150                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    151     bool isLoadFoldable(MachineInstr *MI,
    152                         SmallSet<unsigned, 16> &FoldAsLoadDefCandidates);
    153 
    154     /// \brief Check whether \p MI is understood by the register coalescer
    155     /// but may require some rewriting.
    156     bool isCoalescableCopy(const MachineInstr &MI) {
    157       // SubregToRegs are not interesting, because they are already register
    158       // coalescer friendly.
    159       return MI.isCopy() || (!DisableAdvCopyOpt &&
    160                              (MI.isRegSequence() || MI.isInsertSubreg() ||
    161                               MI.isExtractSubreg()));
    162     }
    163 
    164     /// \brief Check whether \p MI is a copy like instruction that is
    165     /// not recognized by the register coalescer.
    166     bool isUncoalescableCopy(const MachineInstr &MI) {
    167       return MI.isBitcast() ||
    168              (!DisableAdvCopyOpt &&
    169               (MI.isRegSequenceLike() || MI.isInsertSubregLike() ||
    170                MI.isExtractSubregLike()));
    171     }
    172   };
    173 
    174   /// \brief Helper class to track the possible sources of a value defined by
    175   /// a (chain of) copy related instructions.
    176   /// Given a definition (instruction and definition index), this class
    177   /// follows the use-def chain to find successive suitable sources.
    178   /// The given source can be used to rewrite the definition into
    179   /// def = COPY src.
    180   ///
    181   /// For instance, let us consider the following snippet:
    182   /// v0 =
    183   /// v2 = INSERT_SUBREG v1, v0, sub0
    184   /// def = COPY v2.sub0
    185   ///
    186   /// Using a ValueTracker for def = COPY v2.sub0 will give the following
    187   /// suitable sources:
    188   /// v2.sub0 and v0.
    189   /// Then, def can be rewritten into def = COPY v0.
    190   class ValueTracker {
    191   private:
    192     /// The current point into the use-def chain.
    193     const MachineInstr *Def;
    194     /// The index of the definition in Def.
    195     unsigned DefIdx;
    196     /// The sub register index of the definition.
    197     unsigned DefSubReg;
    198     /// The register where the value can be found.
    199     unsigned Reg;
    200     /// Specifiy whether or not the value tracking looks through
    201     /// complex instructions. When this is false, the value tracker
    202     /// bails on everything that is not a copy or a bitcast.
    203     ///
    204     /// Note: This could have been implemented as a specialized version of
    205     /// the ValueTracker class but that would have complicated the code of
    206     /// the users of this class.
    207     bool UseAdvancedTracking;
    208     /// MachineRegisterInfo used to perform tracking.
    209     const MachineRegisterInfo &MRI;
    210     /// Optional TargetInstrInfo used to perform some complex
    211     /// tracking.
    212     const TargetInstrInfo *TII;
    213 
    214     /// \brief Dispatcher to the right underlying implementation of
    215     /// getNextSource.
    216     bool getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg);
    217     /// \brief Specialized version of getNextSource for Copy instructions.
    218     bool getNextSourceFromCopy(unsigned &SrcReg, unsigned &SrcSubReg);
    219     /// \brief Specialized version of getNextSource for Bitcast instructions.
    220     bool getNextSourceFromBitcast(unsigned &SrcReg, unsigned &SrcSubReg);
    221     /// \brief Specialized version of getNextSource for RegSequence
    222     /// instructions.
    223     bool getNextSourceFromRegSequence(unsigned &SrcReg, unsigned &SrcSubReg);
    224     /// \brief Specialized version of getNextSource for InsertSubreg
    225     /// instructions.
    226     bool getNextSourceFromInsertSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
    227     /// \brief Specialized version of getNextSource for ExtractSubreg
    228     /// instructions.
    229     bool getNextSourceFromExtractSubreg(unsigned &SrcReg, unsigned &SrcSubReg);
    230     /// \brief Specialized version of getNextSource for SubregToReg
    231     /// instructions.
    232     bool getNextSourceFromSubregToReg(unsigned &SrcReg, unsigned &SrcSubReg);
    233 
    234   public:
    235     /// \brief Create a ValueTracker instance for the value defined by \p Reg.
    236     /// \p DefSubReg represents the sub register index the value tracker will
    237     /// track. It does not need to match the sub register index used in the
    238     /// definition of \p Reg.
    239     /// \p UseAdvancedTracking specifies whether or not the value tracker looks
    240     /// through complex instructions. By default (false), it handles only copy
    241     /// and bitcast instructions.
    242     /// If \p Reg is a physical register, a value tracker constructed with
    243     /// this constructor will not find any alternative source.
    244     /// Indeed, when \p Reg is a physical register that constructor does not
    245     /// know which definition of \p Reg it should track.
    246     /// Use the next constructor to track a physical register.
    247     ValueTracker(unsigned Reg, unsigned DefSubReg,
    248                  const MachineRegisterInfo &MRI,
    249                  bool UseAdvancedTracking = false,
    250                  const TargetInstrInfo *TII = nullptr)
    251         : Def(nullptr), DefIdx(0), DefSubReg(DefSubReg), Reg(Reg),
    252           UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
    253       if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
    254         Def = MRI.getVRegDef(Reg);
    255         DefIdx = MRI.def_begin(Reg).getOperandNo();
    256       }
    257     }
    258 
    259     /// \brief Create a ValueTracker instance for the value defined by
    260     /// the pair \p MI, \p DefIdx.
    261     /// Unlike the other constructor, the value tracker produced by this one
    262     /// may be able to find a new source when the definition is a physical
    263     /// register.
    264     /// This could be useful to rewrite target specific instructions into
    265     /// generic copy instructions.
    266     ValueTracker(const MachineInstr &MI, unsigned DefIdx, unsigned DefSubReg,
    267                  const MachineRegisterInfo &MRI,
    268                  bool UseAdvancedTracking = false,
    269                  const TargetInstrInfo *TII = nullptr)
    270         : Def(&MI), DefIdx(DefIdx), DefSubReg(DefSubReg),
    271           UseAdvancedTracking(UseAdvancedTracking), MRI(MRI), TII(TII) {
    272       assert(DefIdx < Def->getDesc().getNumDefs() &&
    273              Def->getOperand(DefIdx).isReg() && "Invalid definition");
    274       Reg = Def->getOperand(DefIdx).getReg();
    275     }
    276 
    277     /// \brief Following the use-def chain, get the next available source
    278     /// for the tracked value.
    279     /// When the returned value is not nullptr, \p SrcReg gives the register
    280     /// that contain the tracked value.
    281     /// \note The sub register index returned in \p SrcSubReg must be used
    282     /// on \p SrcReg to access the actual value.
    283     /// \return Unless the returned value is nullptr (i.e., no source found),
    284     /// \p SrcReg gives the register of the next source used in the returned
    285     /// instruction and \p SrcSubReg the sub-register index to be used on that
    286     /// source to get the tracked value. When nullptr is returned, no
    287     /// alternative source has been found.
    288     const MachineInstr *getNextSource(unsigned &SrcReg, unsigned &SrcSubReg);
    289 
    290     /// \brief Get the last register where the initial value can be found.
    291     /// Initially this is the register of the definition.
    292     /// Then, after each successful call to getNextSource, this is the
    293     /// register of the last source.
    294     unsigned getReg() const { return Reg; }
    295   };
    296 }
    297 
    298 char PeepholeOptimizer::ID = 0;
    299 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
    300 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
    301                 "Peephole Optimizations", false, false)
    302 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    303 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
    304                 "Peephole Optimizations", false, false)
    305 
    306 /// optimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
    307 /// a single register and writes a single register and it does not modify the
    308 /// source, and if the source value is preserved as a sub-register of the
    309 /// result, then replace all reachable uses of the source with the subreg of the
    310 /// result.
    311 ///
    312 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
    313 /// the code. Since this code does not currently share EXTRACTs, just ignore all
    314 /// debug uses.
    315 bool PeepholeOptimizer::
    316 optimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
    317                  SmallPtrSetImpl<MachineInstr*> &LocalMIs) {
    318   unsigned SrcReg, DstReg, SubIdx;
    319   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    320     return false;
    321 
    322   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
    323       TargetRegisterInfo::isPhysicalRegister(SrcReg))
    324     return false;
    325 
    326   if (MRI->hasOneNonDBGUse(SrcReg))
    327     // No other uses.
    328     return false;
    329 
    330   // Ensure DstReg can get a register class that actually supports
    331   // sub-registers. Don't change the class until we commit.
    332   const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
    333   DstRC = TRI->getSubClassWithSubReg(DstRC, SubIdx);
    334   if (!DstRC)
    335     return false;
    336 
    337   // The ext instr may be operating on a sub-register of SrcReg as well.
    338   // PPC::EXTSW is a 32 -> 64-bit sign extension, but it reads a 64-bit
    339   // register.
    340   // If UseSrcSubIdx is Set, SubIdx also applies to SrcReg, and only uses of
    341   // SrcReg:SubIdx should be replaced.
    342   bool UseSrcSubIdx =
    343       TRI->getSubClassWithSubReg(MRI->getRegClass(SrcReg), SubIdx) != nullptr;
    344 
    345   // The source has other uses. See if we can replace the other uses with use of
    346   // the result of the extension.
    347   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
    348   for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
    349     ReachedBBs.insert(UI.getParent());
    350 
    351   // Uses that are in the same BB of uses of the result of the instruction.
    352   SmallVector<MachineOperand*, 8> Uses;
    353 
    354   // Uses that the result of the instruction can reach.
    355   SmallVector<MachineOperand*, 8> ExtendedUses;
    356 
    357   bool ExtendLife = true;
    358   for (MachineOperand &UseMO : MRI->use_nodbg_operands(SrcReg)) {
    359     MachineInstr *UseMI = UseMO.getParent();
    360     if (UseMI == MI)
    361       continue;
    362 
    363     if (UseMI->isPHI()) {
    364       ExtendLife = false;
    365       continue;
    366     }
    367 
    368     // Only accept uses of SrcReg:SubIdx.
    369     if (UseSrcSubIdx && UseMO.getSubReg() != SubIdx)
    370       continue;
    371 
    372     // It's an error to translate this:
    373     //
    374     //    %reg1025 = <sext> %reg1024
    375     //     ...
    376     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    377     //
    378     // into this:
    379     //
    380     //    %reg1025 = <sext> %reg1024
    381     //     ...
    382     //    %reg1027 = COPY %reg1025:4
    383     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    384     //
    385     // The problem here is that SUBREG_TO_REG is there to assert that an
    386     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    387     // the COPY here, it will give us the value after the <sext>, not the
    388     // original value of %reg1024 before <sext>.
    389     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
    390       continue;
    391 
    392     MachineBasicBlock *UseMBB = UseMI->getParent();
    393     if (UseMBB == MBB) {
    394       // Local uses that come after the extension.
    395       if (!LocalMIs.count(UseMI))
    396         Uses.push_back(&UseMO);
    397     } else if (ReachedBBs.count(UseMBB)) {
    398       // Non-local uses where the result of the extension is used. Always
    399       // replace these unless it's a PHI.
    400       Uses.push_back(&UseMO);
    401     } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
    402       // We may want to extend the live range of the extension result in order
    403       // to replace these uses.
    404       ExtendedUses.push_back(&UseMO);
    405     } else {
    406       // Both will be live out of the def MBB anyway. Don't extend live range of
    407       // the extension result.
    408       ExtendLife = false;
    409       break;
    410     }
    411   }
    412 
    413   if (ExtendLife && !ExtendedUses.empty())
    414     // Extend the liveness of the extension result.
    415     Uses.append(ExtendedUses.begin(), ExtendedUses.end());
    416 
    417   // Now replace all uses.
    418   bool Changed = false;
    419   if (!Uses.empty()) {
    420     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
    421 
    422     // Look for PHI uses of the extended result, we don't want to extend the
    423     // liveness of a PHI input. It breaks all kinds of assumptions down
    424     // stream. A PHI use is expected to be the kill of its source values.
    425     for (MachineInstr &UI : MRI->use_nodbg_instructions(DstReg))
    426       if (UI.isPHI())
    427         PHIBBs.insert(UI.getParent());
    428 
    429     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    430     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
    431       MachineOperand *UseMO = Uses[i];
    432       MachineInstr *UseMI = UseMO->getParent();
    433       MachineBasicBlock *UseMBB = UseMI->getParent();
    434       if (PHIBBs.count(UseMBB))
    435         continue;
    436 
    437       // About to add uses of DstReg, clear DstReg's kill flags.
    438       if (!Changed) {
    439         MRI->clearKillFlags(DstReg);
    440         MRI->constrainRegClass(DstReg, DstRC);
    441       }
    442 
    443       unsigned NewVR = MRI->createVirtualRegister(RC);
    444       MachineInstr *Copy = BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
    445                                    TII->get(TargetOpcode::COPY), NewVR)
    446         .addReg(DstReg, 0, SubIdx);
    447       // SubIdx applies to both SrcReg and DstReg when UseSrcSubIdx is set.
    448       if (UseSrcSubIdx) {
    449         Copy->getOperand(0).setSubReg(SubIdx);
    450         Copy->getOperand(0).setIsUndef();
    451       }
    452       UseMO->setReg(NewVR);
    453       ++NumReuse;
    454       Changed = true;
    455     }
    456   }
    457 
    458   return Changed;
    459 }
    460 
    461 /// optimizeCmpInstr - If the instruction is a compare and the previous
    462 /// instruction it's comparing against all ready sets (or could be modified to
    463 /// set) the same flag as the compare, then we can remove the comparison and use
    464 /// the flag from the previous instruction.
    465 bool PeepholeOptimizer::optimizeCmpInstr(MachineInstr *MI,
    466                                          MachineBasicBlock *MBB) {
    467   // If this instruction is a comparison against zero and isn't comparing a
    468   // physical register, we can try to optimize it.
    469   unsigned SrcReg, SrcReg2;
    470   int CmpMask, CmpValue;
    471   if (!TII->analyzeCompare(MI, SrcReg, SrcReg2, CmpMask, CmpValue) ||
    472       TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
    473       (SrcReg2 != 0 && TargetRegisterInfo::isPhysicalRegister(SrcReg2)))
    474     return false;
    475 
    476   // Attempt to optimize the comparison instruction.
    477   if (TII->optimizeCompareInstr(MI, SrcReg, SrcReg2, CmpMask, CmpValue, MRI)) {
    478     ++NumCmps;
    479     return true;
    480   }
    481 
    482   return false;
    483 }
    484 
    485 /// Optimize a select instruction.
    486 bool PeepholeOptimizer::optimizeSelect(MachineInstr *MI,
    487                             SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
    488   unsigned TrueOp = 0;
    489   unsigned FalseOp = 0;
    490   bool Optimizable = false;
    491   SmallVector<MachineOperand, 4> Cond;
    492   if (TII->analyzeSelect(MI, Cond, TrueOp, FalseOp, Optimizable))
    493     return false;
    494   if (!Optimizable)
    495     return false;
    496   if (!TII->optimizeSelect(MI, LocalMIs))
    497     return false;
    498   MI->eraseFromParent();
    499   ++NumSelects;
    500   return true;
    501 }
    502 
    503 /// \brief Check if a simpler conditional branch can be
    504 // generated
    505 bool PeepholeOptimizer::optimizeCondBranch(MachineInstr *MI) {
    506   return TII->optimizeCondBranch(MI);
    507 }
    508 
    509 /// \brief Check if the registers defined by the pair (RegisterClass, SubReg)
    510 /// share the same register file.
    511 static bool shareSameRegisterFile(const TargetRegisterInfo &TRI,
    512                                   const TargetRegisterClass *DefRC,
    513                                   unsigned DefSubReg,
    514                                   const TargetRegisterClass *SrcRC,
    515                                   unsigned SrcSubReg) {
    516   // Same register class.
    517   if (DefRC == SrcRC)
    518     return true;
    519 
    520   // Both operands are sub registers. Check if they share a register class.
    521   unsigned SrcIdx, DefIdx;
    522   if (SrcSubReg && DefSubReg)
    523     return TRI.getCommonSuperRegClass(SrcRC, SrcSubReg, DefRC, DefSubReg,
    524                                       SrcIdx, DefIdx) != nullptr;
    525   // At most one of the register is a sub register, make it Src to avoid
    526   // duplicating the test.
    527   if (!SrcSubReg) {
    528     std::swap(DefSubReg, SrcSubReg);
    529     std::swap(DefRC, SrcRC);
    530   }
    531 
    532   // One of the register is a sub register, check if we can get a superclass.
    533   if (SrcSubReg)
    534     return TRI.getMatchingSuperRegClass(SrcRC, DefRC, SrcSubReg) != nullptr;
    535   // Plain copy.
    536   return TRI.getCommonSubClass(DefRC, SrcRC) != nullptr;
    537 }
    538 
    539 /// \brief Try to find the next source that share the same register file
    540 /// for the value defined by \p Reg and \p SubReg.
    541 /// When true is returned, \p Reg and \p SubReg are updated with the
    542 /// register number and sub-register index of the new source.
    543 /// \return False if no alternative sources are available. True otherwise.
    544 bool PeepholeOptimizer::findNextSource(unsigned &Reg, unsigned &SubReg) {
    545   // Do not try to find a new source for a physical register.
    546   // So far we do not have any motivating example for doing that.
    547   // Thus, instead of maintaining untested code, we will revisit that if
    548   // that changes at some point.
    549   if (TargetRegisterInfo::isPhysicalRegister(Reg))
    550     return false;
    551 
    552   const TargetRegisterClass *DefRC = MRI->getRegClass(Reg);
    553   unsigned DefSubReg = SubReg;
    554 
    555   unsigned Src;
    556   unsigned SrcSubReg;
    557   bool ShouldRewrite = false;
    558 
    559   // Follow the chain of copies until we reach the top of the use-def chain
    560   // or find a more suitable source.
    561   ValueTracker ValTracker(Reg, DefSubReg, *MRI, !DisableAdvCopyOpt, TII);
    562   do {
    563     unsigned CopySrcReg, CopySrcSubReg;
    564     if (!ValTracker.getNextSource(CopySrcReg, CopySrcSubReg))
    565       break;
    566     Src = CopySrcReg;
    567     SrcSubReg = CopySrcSubReg;
    568 
    569     // Do not extend the live-ranges of physical registers as they add
    570     // constraints to the register allocator.
    571     // Moreover, if we want to extend the live-range of a physical register,
    572     // unlike SSA virtual register, we will have to check that they are not
    573     // redefine before the related use.
    574     if (TargetRegisterInfo::isPhysicalRegister(Src))
    575       break;
    576 
    577     const TargetRegisterClass *SrcRC = MRI->getRegClass(Src);
    578 
    579     // If this source does not incur a cross register bank copy, use it.
    580     ShouldRewrite = shareSameRegisterFile(*TRI, DefRC, DefSubReg, SrcRC,
    581                                           SrcSubReg);
    582   } while (!ShouldRewrite);
    583 
    584   // If we did not find a more suitable source, there is nothing to optimize.
    585   if (!ShouldRewrite || Src == Reg)
    586     return false;
    587 
    588   Reg = Src;
    589   SubReg = SrcSubReg;
    590   return true;
    591 }
    592 
    593 namespace {
    594 /// \brief Helper class to rewrite the arguments of a copy-like instruction.
    595 class CopyRewriter {
    596 protected:
    597   /// The copy-like instruction.
    598   MachineInstr &CopyLike;
    599   /// The index of the source being rewritten.
    600   unsigned CurrentSrcIdx;
    601 
    602 public:
    603   CopyRewriter(MachineInstr &MI) : CopyLike(MI), CurrentSrcIdx(0) {}
    604 
    605   virtual ~CopyRewriter() {}
    606 
    607   /// \brief Get the next rewritable source (SrcReg, SrcSubReg) and
    608   /// the related value that it affects (TrackReg, TrackSubReg).
    609   /// A source is considered rewritable if its register class and the
    610   /// register class of the related TrackReg may not be register
    611   /// coalescer friendly. In other words, given a copy-like instruction
    612   /// not all the arguments may be returned at rewritable source, since
    613   /// some arguments are none to be register coalescer friendly.
    614   ///
    615   /// Each call of this method moves the current source to the next
    616   /// rewritable source.
    617   /// For instance, let CopyLike be the instruction to rewrite.
    618   /// CopyLike has one definition and one source:
    619   /// dst.dstSubIdx = CopyLike src.srcSubIdx.
    620   ///
    621   /// The first call will give the first rewritable source, i.e.,
    622   /// the only source this instruction has:
    623   /// (SrcReg, SrcSubReg) = (src, srcSubIdx).
    624   /// This source defines the whole definition, i.e.,
    625   /// (TrackReg, TrackSubReg) = (dst, dstSubIdx).
    626   ///
    627   /// The second and subsequent calls will return false, has there is only one
    628   /// rewritable source.
    629   ///
    630   /// \return True if a rewritable source has been found, false otherwise.
    631   /// The output arguments are valid if and only if true is returned.
    632   virtual bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
    633                                        unsigned &TrackReg,
    634                                        unsigned &TrackSubReg) {
    635     // If CurrentSrcIdx == 1, this means this function has already been
    636     // called once. CopyLike has one defintiion and one argument, thus,
    637     // there is nothing else to rewrite.
    638     if (!CopyLike.isCopy() || CurrentSrcIdx == 1)
    639       return false;
    640     // This is the first call to getNextRewritableSource.
    641     // Move the CurrentSrcIdx to remember that we made that call.
    642     CurrentSrcIdx = 1;
    643     // The rewritable source is the argument.
    644     const MachineOperand &MOSrc = CopyLike.getOperand(1);
    645     SrcReg = MOSrc.getReg();
    646     SrcSubReg = MOSrc.getSubReg();
    647     // What we track are the alternative sources of the definition.
    648     const MachineOperand &MODef = CopyLike.getOperand(0);
    649     TrackReg = MODef.getReg();
    650     TrackSubReg = MODef.getSubReg();
    651     return true;
    652   }
    653 
    654   /// \brief Rewrite the current source with \p NewReg and \p NewSubReg
    655   /// if possible.
    656   /// \return True if the rewritting was possible, false otherwise.
    657   virtual bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) {
    658     if (!CopyLike.isCopy() || CurrentSrcIdx != 1)
    659       return false;
    660     MachineOperand &MOSrc = CopyLike.getOperand(CurrentSrcIdx);
    661     MOSrc.setReg(NewReg);
    662     MOSrc.setSubReg(NewSubReg);
    663     return true;
    664   }
    665 };
    666 
    667 /// \brief Specialized rewriter for INSERT_SUBREG instruction.
    668 class InsertSubregRewriter : public CopyRewriter {
    669 public:
    670   InsertSubregRewriter(MachineInstr &MI) : CopyRewriter(MI) {
    671     assert(MI.isInsertSubreg() && "Invalid instruction");
    672   }
    673 
    674   /// \brief See CopyRewriter::getNextRewritableSource.
    675   /// Here CopyLike has the following form:
    676   /// dst = INSERT_SUBREG Src1, Src2.src2SubIdx, subIdx.
    677   /// Src1 has the same register class has dst, hence, there is
    678   /// nothing to rewrite.
    679   /// Src2.src2SubIdx, may not be register coalescer friendly.
    680   /// Therefore, the first call to this method returns:
    681   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
    682   /// (TrackReg, TrackSubReg) = (dst, subIdx).
    683   ///
    684   /// Subsequence calls will return false.
    685   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
    686                                unsigned &TrackReg,
    687                                unsigned &TrackSubReg) override {
    688     // If we already get the only source we can rewrite, return false.
    689     if (CurrentSrcIdx == 2)
    690       return false;
    691     // We are looking at v2 = INSERT_SUBREG v0, v1, sub0.
    692     CurrentSrcIdx = 2;
    693     const MachineOperand &MOInsertedReg = CopyLike.getOperand(2);
    694     SrcReg = MOInsertedReg.getReg();
    695     SrcSubReg = MOInsertedReg.getSubReg();
    696     const MachineOperand &MODef = CopyLike.getOperand(0);
    697 
    698     // We want to track something that is compatible with the
    699     // partial definition.
    700     TrackReg = MODef.getReg();
    701     if (MODef.getSubReg())
    702       // Bails if we have to compose sub-register indices.
    703       return false;
    704     TrackSubReg = (unsigned)CopyLike.getOperand(3).getImm();
    705     return true;
    706   }
    707   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    708     if (CurrentSrcIdx != 2)
    709       return false;
    710     // We are rewriting the inserted reg.
    711     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
    712     MO.setReg(NewReg);
    713     MO.setSubReg(NewSubReg);
    714     return true;
    715   }
    716 };
    717 
    718 /// \brief Specialized rewriter for EXTRACT_SUBREG instruction.
    719 class ExtractSubregRewriter : public CopyRewriter {
    720   const TargetInstrInfo &TII;
    721 
    722 public:
    723   ExtractSubregRewriter(MachineInstr &MI, const TargetInstrInfo &TII)
    724       : CopyRewriter(MI), TII(TII) {
    725     assert(MI.isExtractSubreg() && "Invalid instruction");
    726   }
    727 
    728   /// \brief See CopyRewriter::getNextRewritableSource.
    729   /// Here CopyLike has the following form:
    730   /// dst.dstSubIdx = EXTRACT_SUBREG Src, subIdx.
    731   /// There is only one rewritable source: Src.subIdx,
    732   /// which defines dst.dstSubIdx.
    733   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
    734                                unsigned &TrackReg,
    735                                unsigned &TrackSubReg) override {
    736     // If we already get the only source we can rewrite, return false.
    737     if (CurrentSrcIdx == 1)
    738       return false;
    739     // We are looking at v1 = EXTRACT_SUBREG v0, sub0.
    740     CurrentSrcIdx = 1;
    741     const MachineOperand &MOExtractedReg = CopyLike.getOperand(1);
    742     SrcReg = MOExtractedReg.getReg();
    743     // If we have to compose sub-register indices, bails out.
    744     if (MOExtractedReg.getSubReg())
    745       return false;
    746 
    747     SrcSubReg = CopyLike.getOperand(2).getImm();
    748 
    749     // We want to track something that is compatible with the definition.
    750     const MachineOperand &MODef = CopyLike.getOperand(0);
    751     TrackReg = MODef.getReg();
    752     TrackSubReg = MODef.getSubReg();
    753     return true;
    754   }
    755 
    756   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    757     // The only source we can rewrite is the input register.
    758     if (CurrentSrcIdx != 1)
    759       return false;
    760 
    761     CopyLike.getOperand(CurrentSrcIdx).setReg(NewReg);
    762 
    763     // If we find a source that does not require to extract something,
    764     // rewrite the operation with a copy.
    765     if (!NewSubReg) {
    766       // Move the current index to an invalid position.
    767       // We do not want another call to this method to be able
    768       // to do any change.
    769       CurrentSrcIdx = -1;
    770       // Rewrite the operation as a COPY.
    771       // Get rid of the sub-register index.
    772       CopyLike.RemoveOperand(2);
    773       // Morph the operation into a COPY.
    774       CopyLike.setDesc(TII.get(TargetOpcode::COPY));
    775       return true;
    776     }
    777     CopyLike.getOperand(CurrentSrcIdx + 1).setImm(NewSubReg);
    778     return true;
    779   }
    780 };
    781 
    782 /// \brief Specialized rewriter for REG_SEQUENCE instruction.
    783 class RegSequenceRewriter : public CopyRewriter {
    784 public:
    785   RegSequenceRewriter(MachineInstr &MI) : CopyRewriter(MI) {
    786     assert(MI.isRegSequence() && "Invalid instruction");
    787   }
    788 
    789   /// \brief See CopyRewriter::getNextRewritableSource.
    790   /// Here CopyLike has the following form:
    791   /// dst = REG_SEQUENCE Src1.src1SubIdx, subIdx1, Src2.src2SubIdx, subIdx2.
    792   /// Each call will return a different source, walking all the available
    793   /// source.
    794   ///
    795   /// The first call returns:
    796   /// (SrcReg, SrcSubReg) = (Src1, src1SubIdx).
    797   /// (TrackReg, TrackSubReg) = (dst, subIdx1).
    798   ///
    799   /// The second call returns:
    800   /// (SrcReg, SrcSubReg) = (Src2, src2SubIdx).
    801   /// (TrackReg, TrackSubReg) = (dst, subIdx2).
    802   ///
    803   /// And so on, until all the sources have been traversed, then
    804   /// it returns false.
    805   bool getNextRewritableSource(unsigned &SrcReg, unsigned &SrcSubReg,
    806                                unsigned &TrackReg,
    807                                unsigned &TrackSubReg) override {
    808     // We are looking at v0 = REG_SEQUENCE v1, sub1, v2, sub2, etc.
    809 
    810     // If this is the first call, move to the first argument.
    811     if (CurrentSrcIdx == 0) {
    812       CurrentSrcIdx = 1;
    813     } else {
    814       // Otherwise, move to the next argument and check that it is valid.
    815       CurrentSrcIdx += 2;
    816       if (CurrentSrcIdx >= CopyLike.getNumOperands())
    817         return false;
    818     }
    819     const MachineOperand &MOInsertedReg = CopyLike.getOperand(CurrentSrcIdx);
    820     SrcReg = MOInsertedReg.getReg();
    821     // If we have to compose sub-register indices, bails out.
    822     if ((SrcSubReg = MOInsertedReg.getSubReg()))
    823       return false;
    824 
    825     // We want to track something that is compatible with the related
    826     // partial definition.
    827     TrackSubReg = CopyLike.getOperand(CurrentSrcIdx + 1).getImm();
    828 
    829     const MachineOperand &MODef = CopyLike.getOperand(0);
    830     TrackReg = MODef.getReg();
    831     // If we have to compose sub-registers, bails.
    832     return MODef.getSubReg() == 0;
    833   }
    834 
    835   bool RewriteCurrentSource(unsigned NewReg, unsigned NewSubReg) override {
    836     // We cannot rewrite out of bound operands.
    837     // Moreover, rewritable sources are at odd positions.
    838     if ((CurrentSrcIdx & 1) != 1 || CurrentSrcIdx > CopyLike.getNumOperands())
    839       return false;
    840 
    841     MachineOperand &MO = CopyLike.getOperand(CurrentSrcIdx);
    842     MO.setReg(NewReg);
    843     MO.setSubReg(NewSubReg);
    844     return true;
    845   }
    846 };
    847 } // End namespace.
    848 
    849 /// \brief Get the appropriated CopyRewriter for \p MI.
    850 /// \return A pointer to a dynamically allocated CopyRewriter or nullptr
    851 /// if no rewriter works for \p MI.
    852 static CopyRewriter *getCopyRewriter(MachineInstr &MI,
    853                                      const TargetInstrInfo &TII) {
    854   switch (MI.getOpcode()) {
    855   default:
    856     return nullptr;
    857   case TargetOpcode::COPY:
    858     return new CopyRewriter(MI);
    859   case TargetOpcode::INSERT_SUBREG:
    860     return new InsertSubregRewriter(MI);
    861   case TargetOpcode::EXTRACT_SUBREG:
    862     return new ExtractSubregRewriter(MI, TII);
    863   case TargetOpcode::REG_SEQUENCE:
    864     return new RegSequenceRewriter(MI);
    865   }
    866   llvm_unreachable(nullptr);
    867 }
    868 
    869 /// \brief Optimize generic copy instructions to avoid cross
    870 /// register bank copy. The optimization looks through a chain of
    871 /// copies and tries to find a source that has a compatible register
    872 /// class.
    873 /// Two register classes are considered to be compatible if they share
    874 /// the same register bank.
    875 /// New copies issued by this optimization are register allocator
    876 /// friendly. This optimization does not remove any copy as it may
    877 /// overconstraint the register allocator, but replaces some operands
    878 /// when possible.
    879 /// \pre isCoalescableCopy(*MI) is true.
    880 /// \return True, when \p MI has been rewritten. False otherwise.
    881 bool PeepholeOptimizer::optimizeCoalescableCopy(MachineInstr *MI) {
    882   assert(MI && isCoalescableCopy(*MI) && "Invalid argument");
    883   assert(MI->getDesc().getNumDefs() == 1 &&
    884          "Coalescer can understand multiple defs?!");
    885   const MachineOperand &MODef = MI->getOperand(0);
    886   // Do not rewrite physical definitions.
    887   if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
    888     return false;
    889 
    890   bool Changed = false;
    891   // Get the right rewriter for the current copy.
    892   std::unique_ptr<CopyRewriter> CpyRewriter(getCopyRewriter(*MI, *TII));
    893   // If none exists, bails out.
    894   if (!CpyRewriter)
    895     return false;
    896   // Rewrite each rewritable source.
    897   unsigned SrcReg, SrcSubReg, TrackReg, TrackSubReg;
    898   while (CpyRewriter->getNextRewritableSource(SrcReg, SrcSubReg, TrackReg,
    899                                               TrackSubReg)) {
    900     unsigned NewSrc = TrackReg;
    901     unsigned NewSubReg = TrackSubReg;
    902     // Try to find a more suitable source.
    903     // If we failed to do so, or get the actual source,
    904     // move to the next source.
    905     if (!findNextSource(NewSrc, NewSubReg) || SrcReg == NewSrc)
    906       continue;
    907     // Rewrite source.
    908     if (CpyRewriter->RewriteCurrentSource(NewSrc, NewSubReg)) {
    909       // We may have extended the live-range of NewSrc, account for that.
    910       MRI->clearKillFlags(NewSrc);
    911       Changed = true;
    912     }
    913   }
    914   // TODO: We could have a clean-up method to tidy the instruction.
    915   // E.g., v0 = INSERT_SUBREG v1, v1.sub0, sub0
    916   // => v0 = COPY v1
    917   // Currently we haven't seen motivating example for that and we
    918   // want to avoid untested code.
    919   NumRewrittenCopies += Changed;
    920   return Changed;
    921 }
    922 
    923 /// \brief Optimize copy-like instructions to create
    924 /// register coalescer friendly instruction.
    925 /// The optimization tries to kill-off the \p MI by looking
    926 /// through a chain of copies to find a source that has a compatible
    927 /// register class.
    928 /// If such a source is found, it replace \p MI by a generic COPY
    929 /// operation.
    930 /// \pre isUncoalescableCopy(*MI) is true.
    931 /// \return True, when \p MI has been optimized. In that case, \p MI has
    932 /// been removed from its parent.
    933 /// All COPY instructions created, are inserted in \p LocalMIs.
    934 bool PeepholeOptimizer::optimizeUncoalescableCopy(
    935     MachineInstr *MI, SmallPtrSetImpl<MachineInstr *> &LocalMIs) {
    936   assert(MI && isUncoalescableCopy(*MI) && "Invalid argument");
    937 
    938   // Check if we can rewrite all the values defined by this instruction.
    939   SmallVector<
    940       std::pair<TargetInstrInfo::RegSubRegPair, TargetInstrInfo::RegSubRegPair>,
    941       4> RewritePairs;
    942   for (const MachineOperand &MODef : MI->defs()) {
    943     if (MODef.isDead())
    944       // We can ignore those.
    945       continue;
    946 
    947     // If a physical register is here, this is probably for a good reason.
    948     // Do not rewrite that.
    949     if (TargetRegisterInfo::isPhysicalRegister(MODef.getReg()))
    950       return false;
    951 
    952     // If we do not know how to rewrite this definition, there is no point
    953     // in trying to kill this instruction.
    954     TargetInstrInfo::RegSubRegPair Def(MODef.getReg(), MODef.getSubReg());
    955     TargetInstrInfo::RegSubRegPair Src = Def;
    956     if (!findNextSource(Src.Reg, Src.SubReg))
    957       return false;
    958     RewritePairs.push_back(std::make_pair(Def, Src));
    959   }
    960   // The change is possible for all defs, do it.
    961   for (const auto &PairDefSrc : RewritePairs) {
    962     const auto &Def = PairDefSrc.first;
    963     const auto &Src = PairDefSrc.second;
    964     // Rewrite the "copy" in a way the register coalescer understands.
    965     assert(!TargetRegisterInfo::isPhysicalRegister(Def.Reg) &&
    966            "We do not rewrite physical registers");
    967     const TargetRegisterClass *DefRC = MRI->getRegClass(Def.Reg);
    968     unsigned NewVR = MRI->createVirtualRegister(DefRC);
    969     MachineInstr *NewCopy = BuildMI(*MI->getParent(), MI, MI->getDebugLoc(),
    970                                     TII->get(TargetOpcode::COPY),
    971                                     NewVR).addReg(Src.Reg, 0, Src.SubReg);
    972     NewCopy->getOperand(0).setSubReg(Def.SubReg);
    973     if (Def.SubReg)
    974       NewCopy->getOperand(0).setIsUndef();
    975     LocalMIs.insert(NewCopy);
    976     MRI->replaceRegWith(Def.Reg, NewVR);
    977     MRI->clearKillFlags(NewVR);
    978     // We extended the lifetime of Src.
    979     // Clear the kill flags to account for that.
    980     MRI->clearKillFlags(Src.Reg);
    981   }
    982   // MI is now dead.
    983   MI->eraseFromParent();
    984   ++NumUncoalescableCopies;
    985   return true;
    986 }
    987 
    988 /// isLoadFoldable - Check whether MI is a candidate for folding into a later
    989 /// instruction. We only fold loads to virtual registers and the virtual
    990 /// register defined has a single use.
    991 bool PeepholeOptimizer::isLoadFoldable(
    992                               MachineInstr *MI,
    993                               SmallSet<unsigned, 16> &FoldAsLoadDefCandidates) {
    994   if (!MI->canFoldAsLoad() || !MI->mayLoad())
    995     return false;
    996   const MCInstrDesc &MCID = MI->getDesc();
    997   if (MCID.getNumDefs() != 1)
    998     return false;
    999 
   1000   unsigned Reg = MI->getOperand(0).getReg();
   1001   // To reduce compilation time, we check MRI->hasOneNonDBGUse when inserting
   1002   // loads. It should be checked when processing uses of the load, since
   1003   // uses can be removed during peephole.
   1004   if (!MI->getOperand(0).getSubReg() &&
   1005       TargetRegisterInfo::isVirtualRegister(Reg) &&
   1006       MRI->hasOneNonDBGUse(Reg)) {
   1007     FoldAsLoadDefCandidates.insert(Reg);
   1008     return true;
   1009   }
   1010   return false;
   1011 }
   1012 
   1013 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
   1014                                         SmallSet<unsigned, 4> &ImmDefRegs,
   1015                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
   1016   const MCInstrDesc &MCID = MI->getDesc();
   1017   if (!MI->isMoveImmediate())
   1018     return false;
   1019   if (MCID.getNumDefs() != 1)
   1020     return false;
   1021   unsigned Reg = MI->getOperand(0).getReg();
   1022   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
   1023     ImmDefMIs.insert(std::make_pair(Reg, MI));
   1024     ImmDefRegs.insert(Reg);
   1025     return true;
   1026   }
   1027 
   1028   return false;
   1029 }
   1030 
   1031 /// foldImmediate - Try folding register operands that are defined by move
   1032 /// immediate instructions, i.e. a trivial constant folding optimization, if
   1033 /// and only if the def and use are in the same BB.
   1034 bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
   1035                                       SmallSet<unsigned, 4> &ImmDefRegs,
   1036                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
   1037   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
   1038     MachineOperand &MO = MI->getOperand(i);
   1039     if (!MO.isReg() || MO.isDef())
   1040       continue;
   1041     unsigned Reg = MO.getReg();
   1042     if (!TargetRegisterInfo::isVirtualRegister(Reg))
   1043       continue;
   1044     if (ImmDefRegs.count(Reg) == 0)
   1045       continue;
   1046     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
   1047     assert(II != ImmDefMIs.end());
   1048     if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
   1049       ++NumImmFold;
   1050       return true;
   1051     }
   1052   }
   1053   return false;
   1054 }
   1055 
   1056 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
   1057   if (skipOptnoneFunction(*MF.getFunction()))
   1058     return false;
   1059 
   1060   DEBUG(dbgs() << "********** PEEPHOLE OPTIMIZER **********\n");
   1061   DEBUG(dbgs() << "********** Function: " << MF.getName() << '\n');
   1062 
   1063   if (DisablePeephole)
   1064     return false;
   1065 
   1066   TII = MF.getSubtarget().getInstrInfo();
   1067   TRI = MF.getSubtarget().getRegisterInfo();
   1068   MRI = &MF.getRegInfo();
   1069   DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : nullptr;
   1070 
   1071   bool Changed = false;
   1072 
   1073   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
   1074     MachineBasicBlock *MBB = &*I;
   1075 
   1076     bool SeenMoveImm = false;
   1077 
   1078     // During this forward scan, at some point it needs to answer the question
   1079     // "given a pointer to an MI in the current BB, is it located before or
   1080     // after the current instruction".
   1081     // To perform this, the following set keeps track of the MIs already seen
   1082     // during the scan, if a MI is not in the set, it is assumed to be located
   1083     // after. Newly created MIs have to be inserted in the set as well.
   1084     SmallPtrSet<MachineInstr*, 16> LocalMIs;
   1085     SmallSet<unsigned, 4> ImmDefRegs;
   1086     DenseMap<unsigned, MachineInstr*> ImmDefMIs;
   1087     SmallSet<unsigned, 16> FoldAsLoadDefCandidates;
   1088 
   1089     for (MachineBasicBlock::iterator
   1090            MII = I->begin(), MIE = I->end(); MII != MIE; ) {
   1091       MachineInstr *MI = &*MII;
   1092       // We may be erasing MI below, increment MII now.
   1093       ++MII;
   1094       LocalMIs.insert(MI);
   1095 
   1096       // Skip debug values. They should not affect this peephole optimization.
   1097       if (MI->isDebugValue())
   1098           continue;
   1099 
   1100       // If there exists an instruction which belongs to the following
   1101       // categories, we will discard the load candidates.
   1102       if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() ||
   1103           MI->isKill() || MI->isInlineAsm() ||
   1104           MI->hasUnmodeledSideEffects()) {
   1105         FoldAsLoadDefCandidates.clear();
   1106         continue;
   1107       }
   1108       if (MI->mayStore() || MI->isCall())
   1109         FoldAsLoadDefCandidates.clear();
   1110 
   1111       if ((isUncoalescableCopy(*MI) &&
   1112            optimizeUncoalescableCopy(MI, LocalMIs)) ||
   1113           (MI->isCompare() && optimizeCmpInstr(MI, MBB)) ||
   1114           (MI->isSelect() && optimizeSelect(MI, LocalMIs))) {
   1115         // MI is deleted.
   1116         LocalMIs.erase(MI);
   1117         Changed = true;
   1118         continue;
   1119       }
   1120 
   1121       if (MI->isConditionalBranch() && optimizeCondBranch(MI)) {
   1122         Changed = true;
   1123         continue;
   1124       }
   1125 
   1126       if (isCoalescableCopy(*MI) && optimizeCoalescableCopy(MI)) {
   1127         // MI is just rewritten.
   1128         Changed = true;
   1129         continue;
   1130       }
   1131 
   1132       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
   1133         SeenMoveImm = true;
   1134       } else {
   1135         Changed |= optimizeExtInstr(MI, MBB, LocalMIs);
   1136         // optimizeExtInstr might have created new instructions after MI
   1137         // and before the already incremented MII. Adjust MII so that the
   1138         // next iteration sees the new instructions.
   1139         MII = MI;
   1140         ++MII;
   1141         if (SeenMoveImm)
   1142           Changed |= foldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
   1143       }
   1144 
   1145       // Check whether MI is a load candidate for folding into a later
   1146       // instruction. If MI is not a candidate, check whether we can fold an
   1147       // earlier load into MI.
   1148       if (!isLoadFoldable(MI, FoldAsLoadDefCandidates) &&
   1149           !FoldAsLoadDefCandidates.empty()) {
   1150         const MCInstrDesc &MIDesc = MI->getDesc();
   1151         for (unsigned i = MIDesc.getNumDefs(); i != MIDesc.getNumOperands();
   1152              ++i) {
   1153           const MachineOperand &MOp = MI->getOperand(i);
   1154           if (!MOp.isReg())
   1155             continue;
   1156           unsigned FoldAsLoadDefReg = MOp.getReg();
   1157           if (FoldAsLoadDefCandidates.count(FoldAsLoadDefReg)) {
   1158             // We need to fold load after optimizeCmpInstr, since
   1159             // optimizeCmpInstr can enable folding by converting SUB to CMP.
   1160             // Save FoldAsLoadDefReg because optimizeLoadInstr() resets it and
   1161             // we need it for markUsesInDebugValueAsUndef().
   1162             unsigned FoldedReg = FoldAsLoadDefReg;
   1163             MachineInstr *DefMI = nullptr;
   1164             MachineInstr *FoldMI = TII->optimizeLoadInstr(MI, MRI,
   1165                                                           FoldAsLoadDefReg,
   1166                                                           DefMI);
   1167             if (FoldMI) {
   1168               // Update LocalMIs since we replaced MI with FoldMI and deleted
   1169               // DefMI.
   1170               DEBUG(dbgs() << "Replacing: " << *MI);
   1171               DEBUG(dbgs() << "     With: " << *FoldMI);
   1172               LocalMIs.erase(MI);
   1173               LocalMIs.erase(DefMI);
   1174               LocalMIs.insert(FoldMI);
   1175               MI->eraseFromParent();
   1176               DefMI->eraseFromParent();
   1177               MRI->markUsesInDebugValueAsUndef(FoldedReg);
   1178               FoldAsLoadDefCandidates.erase(FoldedReg);
   1179               ++NumLoadFold;
   1180               // MI is replaced with FoldMI.
   1181               Changed = true;
   1182               break;
   1183             }
   1184           }
   1185         }
   1186       }
   1187     }
   1188   }
   1189 
   1190   return Changed;
   1191 }
   1192 
   1193 bool ValueTracker::getNextSourceFromCopy(unsigned &SrcReg,
   1194                                          unsigned &SrcSubReg) {
   1195   assert(Def->isCopy() && "Invalid definition");
   1196   // Copy instruction are supposed to be: Def = Src.
   1197   // If someone breaks this assumption, bad things will happen everywhere.
   1198   assert(Def->getNumOperands() == 2 && "Invalid number of operands");
   1199 
   1200   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
   1201     // If we look for a different subreg, it means we want a subreg of src.
   1202     // Bails as we do not support composing subreg yet.
   1203     return false;
   1204   // Otherwise, we want the whole source.
   1205   const MachineOperand &Src = Def->getOperand(1);
   1206   SrcReg = Src.getReg();
   1207   SrcSubReg = Src.getSubReg();
   1208   return true;
   1209 }
   1210 
   1211 bool ValueTracker::getNextSourceFromBitcast(unsigned &SrcReg,
   1212                                             unsigned &SrcSubReg) {
   1213   assert(Def->isBitcast() && "Invalid definition");
   1214 
   1215   // Bail if there are effects that a plain copy will not expose.
   1216   if (Def->hasUnmodeledSideEffects())
   1217     return false;
   1218 
   1219   // Bitcasts with more than one def are not supported.
   1220   if (Def->getDesc().getNumDefs() != 1)
   1221     return false;
   1222   if (Def->getOperand(DefIdx).getSubReg() != DefSubReg)
   1223     // If we look for a different subreg, it means we want a subreg of the src.
   1224     // Bails as we do not support composing subreg yet.
   1225     return false;
   1226 
   1227   unsigned SrcIdx = Def->getNumOperands();
   1228   for (unsigned OpIdx = DefIdx + 1, EndOpIdx = SrcIdx; OpIdx != EndOpIdx;
   1229        ++OpIdx) {
   1230     const MachineOperand &MO = Def->getOperand(OpIdx);
   1231     if (!MO.isReg() || !MO.getReg())
   1232       continue;
   1233     assert(!MO.isDef() && "We should have skipped all the definitions by now");
   1234     if (SrcIdx != EndOpIdx)
   1235       // Multiple sources?
   1236       return false;
   1237     SrcIdx = OpIdx;
   1238   }
   1239   const MachineOperand &Src = Def->getOperand(SrcIdx);
   1240   SrcReg = Src.getReg();
   1241   SrcSubReg = Src.getSubReg();
   1242   return true;
   1243 }
   1244 
   1245 bool ValueTracker::getNextSourceFromRegSequence(unsigned &SrcReg,
   1246                                                 unsigned &SrcSubReg) {
   1247   assert((Def->isRegSequence() || Def->isRegSequenceLike()) &&
   1248          "Invalid definition");
   1249 
   1250   if (Def->getOperand(DefIdx).getSubReg())
   1251     // If we are composing subreg, bails out.
   1252     // The case we are checking is Def.<subreg> = REG_SEQUENCE.
   1253     // This should almost never happen as the SSA property is tracked at
   1254     // the register level (as opposed to the subreg level).
   1255     // I.e.,
   1256     // Def.sub0 =
   1257     // Def.sub1 =
   1258     // is a valid SSA representation for Def.sub0 and Def.sub1, but not for
   1259     // Def. Thus, it must not be generated.
   1260     // However, some code could theoretically generates a single
   1261     // Def.sub0 (i.e, not defining the other subregs) and we would
   1262     // have this case.
   1263     // If we can ascertain (or force) that this never happens, we could
   1264     // turn that into an assertion.
   1265     return false;
   1266 
   1267   if (!TII)
   1268     // We could handle the REG_SEQUENCE here, but we do not want to
   1269     // duplicate the code from the generic TII.
   1270     return false;
   1271 
   1272   SmallVector<TargetInstrInfo::RegSubRegPairAndIdx, 8> RegSeqInputRegs;
   1273   if (!TII->getRegSequenceInputs(*Def, DefIdx, RegSeqInputRegs))
   1274     return false;
   1275 
   1276   // We are looking at:
   1277   // Def = REG_SEQUENCE v0, sub0, v1, sub1, ...
   1278   // Check if one of the operand defines the subreg we are interested in.
   1279   for (auto &RegSeqInput : RegSeqInputRegs) {
   1280     if (RegSeqInput.SubIdx == DefSubReg) {
   1281       if (RegSeqInput.SubReg)
   1282         // Bails if we have to compose sub registers.
   1283         return false;
   1284 
   1285       SrcReg = RegSeqInput.Reg;
   1286       SrcSubReg = RegSeqInput.SubReg;
   1287       return true;
   1288     }
   1289   }
   1290 
   1291   // If the subreg we are tracking is super-defined by another subreg,
   1292   // we could follow this value. However, this would require to compose
   1293   // the subreg and we do not do that for now.
   1294   return false;
   1295 }
   1296 
   1297 bool ValueTracker::getNextSourceFromInsertSubreg(unsigned &SrcReg,
   1298                                                  unsigned &SrcSubReg) {
   1299   assert((Def->isInsertSubreg() || Def->isInsertSubregLike()) &&
   1300          "Invalid definition");
   1301 
   1302   if (Def->getOperand(DefIdx).getSubReg())
   1303     // If we are composing subreg, bails out.
   1304     // Same remark as getNextSourceFromRegSequence.
   1305     // I.e., this may be turned into an assert.
   1306     return false;
   1307 
   1308   if (!TII)
   1309     // We could handle the REG_SEQUENCE here, but we do not want to
   1310     // duplicate the code from the generic TII.
   1311     return false;
   1312 
   1313   TargetInstrInfo::RegSubRegPair BaseReg;
   1314   TargetInstrInfo::RegSubRegPairAndIdx InsertedReg;
   1315   if (!TII->getInsertSubregInputs(*Def, DefIdx, BaseReg, InsertedReg))
   1316     return false;
   1317 
   1318   // We are looking at:
   1319   // Def = INSERT_SUBREG v0, v1, sub1
   1320   // There are two cases:
   1321   // 1. DefSubReg == sub1, get v1.
   1322   // 2. DefSubReg != sub1, the value may be available through v0.
   1323 
   1324   // #1 Check if the inserted register matches the required sub index.
   1325   if (InsertedReg.SubIdx == DefSubReg) {
   1326     SrcReg = InsertedReg.Reg;
   1327     SrcSubReg = InsertedReg.SubReg;
   1328     return true;
   1329   }
   1330   // #2 Otherwise, if the sub register we are looking for is not partial
   1331   // defined by the inserted element, we can look through the main
   1332   // register (v0).
   1333   const MachineOperand &MODef = Def->getOperand(DefIdx);
   1334   // If the result register (Def) and the base register (v0) do not
   1335   // have the same register class or if we have to compose
   1336   // subregisters, bails out.
   1337   if (MRI.getRegClass(MODef.getReg()) != MRI.getRegClass(BaseReg.Reg) ||
   1338       BaseReg.SubReg)
   1339     return false;
   1340 
   1341   // Get the TRI and check if the inserted sub-register overlaps with the
   1342   // sub-register we are tracking.
   1343   const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo();
   1344   if (!TRI ||
   1345       (TRI->getSubRegIndexLaneMask(DefSubReg) &
   1346        TRI->getSubRegIndexLaneMask(InsertedReg.SubIdx)) != 0)
   1347     return false;
   1348   // At this point, the value is available in v0 via the same subreg
   1349   // we used for Def.
   1350   SrcReg = BaseReg.Reg;
   1351   SrcSubReg = DefSubReg;
   1352   return true;
   1353 }
   1354 
   1355 bool ValueTracker::getNextSourceFromExtractSubreg(unsigned &SrcReg,
   1356                                                   unsigned &SrcSubReg) {
   1357   assert((Def->isExtractSubreg() ||
   1358           Def->isExtractSubregLike()) && "Invalid definition");
   1359   // We are looking at:
   1360   // Def = EXTRACT_SUBREG v0, sub0
   1361 
   1362   // Bails if we have to compose sub registers.
   1363   // Indeed, if DefSubReg != 0, we would have to compose it with sub0.
   1364   if (DefSubReg)
   1365     return false;
   1366 
   1367   if (!TII)
   1368     // We could handle the EXTRACT_SUBREG here, but we do not want to
   1369     // duplicate the code from the generic TII.
   1370     return false;
   1371 
   1372   TargetInstrInfo::RegSubRegPairAndIdx ExtractSubregInputReg;
   1373   if (!TII->getExtractSubregInputs(*Def, DefIdx, ExtractSubregInputReg))
   1374     return false;
   1375 
   1376   // Bails if we have to compose sub registers.
   1377   // Likewise, if v0.subreg != 0, we would have to compose v0.subreg with sub0.
   1378   if (ExtractSubregInputReg.SubReg)
   1379     return false;
   1380   // Otherwise, the value is available in the v0.sub0.
   1381   SrcReg = ExtractSubregInputReg.Reg;
   1382   SrcSubReg = ExtractSubregInputReg.SubIdx;
   1383   return true;
   1384 }
   1385 
   1386 bool ValueTracker::getNextSourceFromSubregToReg(unsigned &SrcReg,
   1387                                                 unsigned &SrcSubReg) {
   1388   assert(Def->isSubregToReg() && "Invalid definition");
   1389   // We are looking at:
   1390   // Def = SUBREG_TO_REG Imm, v0, sub0
   1391 
   1392   // Bails if we have to compose sub registers.
   1393   // If DefSubReg != sub0, we would have to check that all the bits
   1394   // we track are included in sub0 and if yes, we would have to
   1395   // determine the right subreg in v0.
   1396   if (DefSubReg != Def->getOperand(3).getImm())
   1397     return false;
   1398   // Bails if we have to compose sub registers.
   1399   // Likewise, if v0.subreg != 0, we would have to compose it with sub0.
   1400   if (Def->getOperand(2).getSubReg())
   1401     return false;
   1402 
   1403   SrcReg = Def->getOperand(2).getReg();
   1404   SrcSubReg = Def->getOperand(3).getImm();
   1405   return true;
   1406 }
   1407 
   1408 bool ValueTracker::getNextSourceImpl(unsigned &SrcReg, unsigned &SrcSubReg) {
   1409   assert(Def && "This method needs a valid definition");
   1410 
   1411   assert(
   1412       (DefIdx < Def->getDesc().getNumDefs() || Def->getDesc().isVariadic()) &&
   1413       Def->getOperand(DefIdx).isDef() && "Invalid DefIdx");
   1414   if (Def->isCopy())
   1415     return getNextSourceFromCopy(SrcReg, SrcSubReg);
   1416   if (Def->isBitcast())
   1417     return getNextSourceFromBitcast(SrcReg, SrcSubReg);
   1418   // All the remaining cases involve "complex" instructions.
   1419   // Bails if we did not ask for the advanced tracking.
   1420   if (!UseAdvancedTracking)
   1421     return false;
   1422   if (Def->isRegSequence() || Def->isRegSequenceLike())
   1423     return getNextSourceFromRegSequence(SrcReg, SrcSubReg);
   1424   if (Def->isInsertSubreg() || Def->isInsertSubregLike())
   1425     return getNextSourceFromInsertSubreg(SrcReg, SrcSubReg);
   1426   if (Def->isExtractSubreg() || Def->isExtractSubregLike())
   1427     return getNextSourceFromExtractSubreg(SrcReg, SrcSubReg);
   1428   if (Def->isSubregToReg())
   1429     return getNextSourceFromSubregToReg(SrcReg, SrcSubReg);
   1430   return false;
   1431 }
   1432 
   1433 const MachineInstr *ValueTracker::getNextSource(unsigned &SrcReg,
   1434                                                 unsigned &SrcSubReg) {
   1435   // If we reach a point where we cannot move up in the use-def chain,
   1436   // there is nothing we can get.
   1437   if (!Def)
   1438     return nullptr;
   1439 
   1440   const MachineInstr *PrevDef = nullptr;
   1441   // Try to find the next source.
   1442   if (getNextSourceImpl(SrcReg, SrcSubReg)) {
   1443     // Update definition, definition index, and subregister for the
   1444     // next call of getNextSource.
   1445     // Update the current register.
   1446     Reg = SrcReg;
   1447     // Update the return value before moving up in the use-def chain.
   1448     PrevDef = Def;
   1449     // If we can still move up in the use-def chain, move to the next
   1450     // defintion.
   1451     if (!TargetRegisterInfo::isPhysicalRegister(Reg)) {
   1452       Def = MRI.getVRegDef(Reg);
   1453       DefIdx = MRI.def_begin(Reg).getOperandNo();
   1454       DefSubReg = SrcSubReg;
   1455       return PrevDef;
   1456     }
   1457   }
   1458   // If we end up here, this means we will not be able to find another source
   1459   // for the next iteration.
   1460   // Make sure any new call to getNextSource bails out early by cutting the
   1461   // use-def chain.
   1462   Def = nullptr;
   1463   return PrevDef;
   1464 }
   1465