Home | History | Annotate | Download | only in CodeGen
      1 //===- RegisterCoalescer.cpp - Generic Register Coalescing Interface ------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This file implements the generic RegisterCoalescer interface which
     11 // is used as the common interface used by all clients and
     12 // implementations of register coalescing.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "RegisterCoalescer.h"
     17 #include "llvm/ADT/ArrayRef.h"
     18 #include "llvm/ADT/BitVector.h"
     19 #include "llvm/ADT/STLExtras.h"
     20 #include "llvm/ADT/SmallPtrSet.h"
     21 #include "llvm/ADT/SmallVector.h"
     22 #include "llvm/ADT/Statistic.h"
     23 #include "llvm/Analysis/AliasAnalysis.h"
     24 #include "llvm/CodeGen/LiveInterval.h"
     25 #include "llvm/CodeGen/LiveIntervals.h"
     26 #include "llvm/CodeGen/LiveRangeEdit.h"
     27 #include "llvm/CodeGen/MachineBasicBlock.h"
     28 #include "llvm/CodeGen/MachineFunction.h"
     29 #include "llvm/CodeGen/MachineFunctionPass.h"
     30 #include "llvm/CodeGen/MachineInstr.h"
     31 #include "llvm/CodeGen/MachineInstrBuilder.h"
     32 #include "llvm/CodeGen/MachineLoopInfo.h"
     33 #include "llvm/CodeGen/MachineOperand.h"
     34 #include "llvm/CodeGen/MachineRegisterInfo.h"
     35 #include "llvm/CodeGen/Passes.h"
     36 #include "llvm/CodeGen/RegisterClassInfo.h"
     37 #include "llvm/CodeGen/SlotIndexes.h"
     38 #include "llvm/CodeGen/TargetInstrInfo.h"
     39 #include "llvm/CodeGen/TargetOpcodes.h"
     40 #include "llvm/CodeGen/TargetRegisterInfo.h"
     41 #include "llvm/CodeGen/TargetSubtargetInfo.h"
     42 #include "llvm/IR/DebugLoc.h"
     43 #include "llvm/MC/LaneBitmask.h"
     44 #include "llvm/MC/MCInstrDesc.h"
     45 #include "llvm/MC/MCRegisterInfo.h"
     46 #include "llvm/Pass.h"
     47 #include "llvm/Support/CommandLine.h"
     48 #include "llvm/Support/Compiler.h"
     49 #include "llvm/Support/Debug.h"
     50 #include "llvm/Support/ErrorHandling.h"
     51 #include "llvm/Support/raw_ostream.h"
     52 #include <algorithm>
     53 #include <cassert>
     54 #include <iterator>
     55 #include <limits>
     56 #include <tuple>
     57 #include <utility>
     58 #include <vector>
     59 
     60 using namespace llvm;
     61 
     62 #define DEBUG_TYPE "regalloc"
     63 
     64 STATISTIC(numJoins    , "Number of interval joins performed");
     65 STATISTIC(numCrossRCs , "Number of cross class joins performed");
     66 STATISTIC(numCommutes , "Number of instruction commuting performed");
     67 STATISTIC(numExtends  , "Number of copies extended");
     68 STATISTIC(NumReMats   , "Number of instructions re-materialized");
     69 STATISTIC(NumInflated , "Number of register classes inflated");
     70 STATISTIC(NumLaneConflicts, "Number of dead lane conflicts tested");
     71 STATISTIC(NumLaneResolves,  "Number of dead lane conflicts resolved");
     72 
     73 static cl::opt<bool> EnableJoining("join-liveintervals",
     74                                    cl::desc("Coalesce copies (default=true)"),
     75                                    cl::init(true), cl::Hidden);
     76 
     77 static cl::opt<bool> UseTerminalRule("terminal-rule",
     78                                      cl::desc("Apply the terminal rule"),
     79                                      cl::init(false), cl::Hidden);
     80 
     81 /// Temporary flag to test critical edge unsplitting.
     82 static cl::opt<bool>
     83 EnableJoinSplits("join-splitedges",
     84   cl::desc("Coalesce copies on split edges (default=subtarget)"), cl::Hidden);
     85 
     86 /// Temporary flag to test global copy optimization.
     87 static cl::opt<cl::boolOrDefault>
     88 EnableGlobalCopies("join-globalcopies",
     89   cl::desc("Coalesce copies that span blocks (default=subtarget)"),
     90   cl::init(cl::BOU_UNSET), cl::Hidden);
     91 
     92 static cl::opt<bool>
     93 VerifyCoalescing("verify-coalescing",
     94          cl::desc("Verify machine instrs before and after register coalescing"),
     95          cl::Hidden);
     96 
     97 namespace {
     98 
     99   class RegisterCoalescer : public MachineFunctionPass,
    100                             private LiveRangeEdit::Delegate {
    101     MachineFunction* MF;
    102     MachineRegisterInfo* MRI;
    103     const TargetRegisterInfo* TRI;
    104     const TargetInstrInfo* TII;
    105     LiveIntervals *LIS;
    106     const MachineLoopInfo* Loops;
    107     AliasAnalysis *AA;
    108     RegisterClassInfo RegClassInfo;
    109 
    110     /// A LaneMask to remember on which subregister live ranges we need to call
    111     /// shrinkToUses() later.
    112     LaneBitmask ShrinkMask;
    113 
    114     /// True if the main range of the currently coalesced intervals should be
    115     /// checked for smaller live intervals.
    116     bool ShrinkMainRange;
    117 
    118     /// True if the coalescer should aggressively coalesce global copies
    119     /// in favor of keeping local copies.
    120     bool JoinGlobalCopies;
    121 
    122     /// True if the coalescer should aggressively coalesce fall-thru
    123     /// blocks exclusively containing copies.
    124     bool JoinSplitEdges;
    125 
    126     /// Copy instructions yet to be coalesced.
    127     SmallVector<MachineInstr*, 8> WorkList;
    128     SmallVector<MachineInstr*, 8> LocalWorkList;
    129 
    130     /// Set of instruction pointers that have been erased, and
    131     /// that may be present in WorkList.
    132     SmallPtrSet<MachineInstr*, 8> ErasedInstrs;
    133 
    134     /// Dead instructions that are about to be deleted.
    135     SmallVector<MachineInstr*, 8> DeadDefs;
    136 
    137     /// Virtual registers to be considered for register class inflation.
    138     SmallVector<unsigned, 8> InflateRegs;
    139 
    140     /// Recursively eliminate dead defs in DeadDefs.
    141     void eliminateDeadDefs();
    142 
    143     /// LiveRangeEdit callback for eliminateDeadDefs().
    144     void LRE_WillEraseInstruction(MachineInstr *MI) override;
    145 
    146     /// Coalesce the LocalWorkList.
    147     void coalesceLocals();
    148 
    149     /// Join compatible live intervals
    150     void joinAllIntervals();
    151 
    152     /// Coalesce copies in the specified MBB, putting
    153     /// copies that cannot yet be coalesced into WorkList.
    154     void copyCoalesceInMBB(MachineBasicBlock *MBB);
    155 
    156     /// Tries to coalesce all copies in CurrList. Returns true if any progress
    157     /// was made.
    158     bool copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList);
    159 
    160     /// Attempt to join intervals corresponding to SrcReg/DstReg, which are the
    161     /// src/dst of the copy instruction CopyMI.  This returns true if the copy
    162     /// was successfully coalesced away. If it is not currently possible to
    163     /// coalesce this interval, but it may be possible if other things get
    164     /// coalesced, then it returns true by reference in 'Again'.
    165     bool joinCopy(MachineInstr *CopyMI, bool &Again);
    166 
    167     /// Attempt to join these two intervals.  On failure, this
    168     /// returns false.  The output "SrcInt" will not have been modified, so we
    169     /// can use this information below to update aliases.
    170     bool joinIntervals(CoalescerPair &CP);
    171 
    172     /// Attempt joining two virtual registers. Return true on success.
    173     bool joinVirtRegs(CoalescerPair &CP);
    174 
    175     /// Attempt joining with a reserved physreg.
    176     bool joinReservedPhysReg(CoalescerPair &CP);
    177 
    178     /// Add the LiveRange @p ToMerge as a subregister liverange of @p LI.
    179     /// Subranges in @p LI which only partially interfere with the desired
    180     /// LaneMask are split as necessary. @p LaneMask are the lanes that
    181     /// @p ToMerge will occupy in the coalescer register. @p LI has its subrange
    182     /// lanemasks already adjusted to the coalesced register.
    183     void mergeSubRangeInto(LiveInterval &LI, const LiveRange &ToMerge,
    184                            LaneBitmask LaneMask, CoalescerPair &CP);
    185 
    186     /// Join the liveranges of two subregisters. Joins @p RRange into
    187     /// @p LRange, @p RRange may be invalid afterwards.
    188     void joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
    189                           LaneBitmask LaneMask, const CoalescerPair &CP);
    190 
    191     /// We found a non-trivially-coalescable copy. If the source value number is
    192     /// defined by a copy from the destination reg see if we can merge these two
    193     /// destination reg valno# into a single value number, eliminating a copy.
    194     /// This returns true if an interval was modified.
    195     bool adjustCopiesBackFrom(const CoalescerPair &CP, MachineInstr *CopyMI);
    196 
    197     /// Return true if there are definitions of IntB
    198     /// other than BValNo val# that can reach uses of AValno val# of IntA.
    199     bool hasOtherReachingDefs(LiveInterval &IntA, LiveInterval &IntB,
    200                               VNInfo *AValNo, VNInfo *BValNo);
    201 
    202     /// We found a non-trivially-coalescable copy.
    203     /// If the source value number is defined by a commutable instruction and
    204     /// its other operand is coalesced to the copy dest register, see if we
    205     /// can transform the copy into a noop by commuting the definition.
    206     /// This returns true if an interval was modified.
    207     bool removeCopyByCommutingDef(const CoalescerPair &CP,MachineInstr *CopyMI);
    208 
    209     /// We found a copy which can be moved to its less frequent predecessor.
    210     bool removePartialRedundancy(const CoalescerPair &CP, MachineInstr &CopyMI);
    211 
    212     /// If the source of a copy is defined by a
    213     /// trivial computation, replace the copy by rematerialize the definition.
    214     bool reMaterializeTrivialDef(const CoalescerPair &CP, MachineInstr *CopyMI,
    215                                  bool &IsDefCopy);
    216 
    217     /// Return true if a copy involving a physreg should be joined.
    218     bool canJoinPhys(const CoalescerPair &CP);
    219 
    220     /// Replace all defs and uses of SrcReg to DstReg and update the subregister
    221     /// number if it is not zero. If DstReg is a physical register and the
    222     /// existing subregister number of the def / use being updated is not zero,
    223     /// make sure to set it to the correct physical subregister.
    224     void updateRegDefsUses(unsigned SrcReg, unsigned DstReg, unsigned SubIdx);
    225 
    226     /// If the given machine operand reads only undefined lanes add an undef
    227     /// flag.
    228     /// This can happen when undef uses were previously concealed by a copy
    229     /// which we coalesced. Example:
    230     ///    %0:sub0<def,read-undef> = ...
    231     ///    %1 = COPY %0           <-- Coalescing COPY reveals undef
    232     ///       = use %1:sub1       <-- hidden undef use
    233     void addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
    234                       MachineOperand &MO, unsigned SubRegIdx);
    235 
    236     /// Handle copies of undef values. If the undef value is an incoming
    237     /// PHI value, it will convert @p CopyMI to an IMPLICIT_DEF.
    238     /// Returns nullptr if @p CopyMI was not in any way eliminable. Otherwise,
    239     /// it returns @p CopyMI (which could be an IMPLICIT_DEF at this point).
    240     MachineInstr *eliminateUndefCopy(MachineInstr *CopyMI);
    241 
    242     /// Check whether or not we should apply the terminal rule on the
    243     /// destination (Dst) of \p Copy.
    244     /// When the terminal rule applies, Copy is not profitable to
    245     /// coalesce.
    246     /// Dst is terminal if it has exactly one affinity (Dst, Src) and
    247     /// at least one interference (Dst, Dst2). If Dst is terminal, the
    248     /// terminal rule consists in checking that at least one of
    249     /// interfering node, say Dst2, has an affinity of equal or greater
    250     /// weight with Src.
    251     /// In that case, Dst2 and Dst will not be able to be both coalesced
    252     /// with Src. Since Dst2 exposes more coalescing opportunities than
    253     /// Dst, we can drop \p Copy.
    254     bool applyTerminalRule(const MachineInstr &Copy) const;
    255 
    256     /// Wrapper method for \see LiveIntervals::shrinkToUses.
    257     /// This method does the proper fixing of the live-ranges when the afore
    258     /// mentioned method returns true.
    259     void shrinkToUses(LiveInterval *LI,
    260                       SmallVectorImpl<MachineInstr * > *Dead = nullptr) {
    261       if (LIS->shrinkToUses(LI, Dead)) {
    262         /// Check whether or not \p LI is composed by multiple connected
    263         /// components and if that is the case, fix that.
    264         SmallVector<LiveInterval*, 8> SplitLIs;
    265         LIS->splitSeparateComponents(*LI, SplitLIs);
    266       }
    267     }
    268 
    269     /// Wrapper Method to do all the necessary work when an Instruction is
    270     /// deleted.
    271     /// Optimizations should use this to make sure that deleted instructions
    272     /// are always accounted for.
    273     void deleteInstr(MachineInstr* MI) {
    274       ErasedInstrs.insert(MI);
    275       LIS->RemoveMachineInstrFromMaps(*MI);
    276       MI->eraseFromParent();
    277     }
    278 
    279   public:
    280     static char ID; ///< Class identification, replacement for typeinfo
    281 
    282     RegisterCoalescer() : MachineFunctionPass(ID) {
    283       initializeRegisterCoalescerPass(*PassRegistry::getPassRegistry());
    284     }
    285 
    286     void getAnalysisUsage(AnalysisUsage &AU) const override;
    287 
    288     void releaseMemory() override;
    289 
    290     /// This is the pass entry point.
    291     bool runOnMachineFunction(MachineFunction&) override;
    292 
    293     /// Implement the dump method.
    294     void print(raw_ostream &O, const Module* = nullptr) const override;
    295   };
    296 
    297 } // end anonymous namespace
    298 
    299 char RegisterCoalescer::ID = 0;
    300 
    301 char &llvm::RegisterCoalescerID = RegisterCoalescer::ID;
    302 
    303 INITIALIZE_PASS_BEGIN(RegisterCoalescer, "simple-register-coalescing",
    304                       "Simple Register Coalescing", false, false)
    305 INITIALIZE_PASS_DEPENDENCY(LiveIntervals)
    306 INITIALIZE_PASS_DEPENDENCY(SlotIndexes)
    307 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    308 INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass)
    309 INITIALIZE_PASS_END(RegisterCoalescer, "simple-register-coalescing",
    310                     "Simple Register Coalescing", false, false)
    311 
    312 static bool isMoveInstr(const TargetRegisterInfo &tri, const MachineInstr *MI,
    313                         unsigned &Src, unsigned &Dst,
    314                         unsigned &SrcSub, unsigned &DstSub) {
    315   if (MI->isCopy()) {
    316     Dst = MI->getOperand(0).getReg();
    317     DstSub = MI->getOperand(0).getSubReg();
    318     Src = MI->getOperand(1).getReg();
    319     SrcSub = MI->getOperand(1).getSubReg();
    320   } else if (MI->isSubregToReg()) {
    321     Dst = MI->getOperand(0).getReg();
    322     DstSub = tri.composeSubRegIndices(MI->getOperand(0).getSubReg(),
    323                                       MI->getOperand(3).getImm());
    324     Src = MI->getOperand(2).getReg();
    325     SrcSub = MI->getOperand(2).getSubReg();
    326   } else
    327     return false;
    328   return true;
    329 }
    330 
    331 /// Return true if this block should be vacated by the coalescer to eliminate
    332 /// branches. The important cases to handle in the coalescer are critical edges
    333 /// split during phi elimination which contain only copies. Simple blocks that
    334 /// contain non-branches should also be vacated, but this can be handled by an
    335 /// earlier pass similar to early if-conversion.
    336 static bool isSplitEdge(const MachineBasicBlock *MBB) {
    337   if (MBB->pred_size() != 1 || MBB->succ_size() != 1)
    338     return false;
    339 
    340   for (const auto &MI : *MBB) {
    341     if (!MI.isCopyLike() && !MI.isUnconditionalBranch())
    342       return false;
    343   }
    344   return true;
    345 }
    346 
    347 bool CoalescerPair::setRegisters(const MachineInstr *MI) {
    348   SrcReg = DstReg = 0;
    349   SrcIdx = DstIdx = 0;
    350   NewRC = nullptr;
    351   Flipped = CrossClass = false;
    352 
    353   unsigned Src, Dst, SrcSub, DstSub;
    354   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
    355     return false;
    356   Partial = SrcSub || DstSub;
    357 
    358   // If one register is a physreg, it must be Dst.
    359   if (TargetRegisterInfo::isPhysicalRegister(Src)) {
    360     if (TargetRegisterInfo::isPhysicalRegister(Dst))
    361       return false;
    362     std::swap(Src, Dst);
    363     std::swap(SrcSub, DstSub);
    364     Flipped = true;
    365   }
    366 
    367   const MachineRegisterInfo &MRI = MI->getMF()->getRegInfo();
    368 
    369   if (TargetRegisterInfo::isPhysicalRegister(Dst)) {
    370     // Eliminate DstSub on a physreg.
    371     if (DstSub) {
    372       Dst = TRI.getSubReg(Dst, DstSub);
    373       if (!Dst) return false;
    374       DstSub = 0;
    375     }
    376 
    377     // Eliminate SrcSub by picking a corresponding Dst superregister.
    378     if (SrcSub) {
    379       Dst = TRI.getMatchingSuperReg(Dst, SrcSub, MRI.getRegClass(Src));
    380       if (!Dst) return false;
    381     } else if (!MRI.getRegClass(Src)->contains(Dst)) {
    382       return false;
    383     }
    384   } else {
    385     // Both registers are virtual.
    386     const TargetRegisterClass *SrcRC = MRI.getRegClass(Src);
    387     const TargetRegisterClass *DstRC = MRI.getRegClass(Dst);
    388 
    389     // Both registers have subreg indices.
    390     if (SrcSub && DstSub) {
    391       // Copies between different sub-registers are never coalescable.
    392       if (Src == Dst && SrcSub != DstSub)
    393         return false;
    394 
    395       NewRC = TRI.getCommonSuperRegClass(SrcRC, SrcSub, DstRC, DstSub,
    396                                          SrcIdx, DstIdx);
    397       if (!NewRC)
    398         return false;
    399     } else if (DstSub) {
    400       // SrcReg will be merged with a sub-register of DstReg.
    401       SrcIdx = DstSub;
    402       NewRC = TRI.getMatchingSuperRegClass(DstRC, SrcRC, DstSub);
    403     } else if (SrcSub) {
    404       // DstReg will be merged with a sub-register of SrcReg.
    405       DstIdx = SrcSub;
    406       NewRC = TRI.getMatchingSuperRegClass(SrcRC, DstRC, SrcSub);
    407     } else {
    408       // This is a straight copy without sub-registers.
    409       NewRC = TRI.getCommonSubClass(DstRC, SrcRC);
    410     }
    411 
    412     // The combined constraint may be impossible to satisfy.
    413     if (!NewRC)
    414       return false;
    415 
    416     // Prefer SrcReg to be a sub-register of DstReg.
    417     // FIXME: Coalescer should support subregs symmetrically.
    418     if (DstIdx && !SrcIdx) {
    419       std::swap(Src, Dst);
    420       std::swap(SrcIdx, DstIdx);
    421       Flipped = !Flipped;
    422     }
    423 
    424     CrossClass = NewRC != DstRC || NewRC != SrcRC;
    425   }
    426   // Check our invariants
    427   assert(TargetRegisterInfo::isVirtualRegister(Src) && "Src must be virtual");
    428   assert(!(TargetRegisterInfo::isPhysicalRegister(Dst) && DstSub) &&
    429          "Cannot have a physical SubIdx");
    430   SrcReg = Src;
    431   DstReg = Dst;
    432   return true;
    433 }
    434 
    435 bool CoalescerPair::flip() {
    436   if (TargetRegisterInfo::isPhysicalRegister(DstReg))
    437     return false;
    438   std::swap(SrcReg, DstReg);
    439   std::swap(SrcIdx, DstIdx);
    440   Flipped = !Flipped;
    441   return true;
    442 }
    443 
    444 bool CoalescerPair::isCoalescable(const MachineInstr *MI) const {
    445   if (!MI)
    446     return false;
    447   unsigned Src, Dst, SrcSub, DstSub;
    448   if (!isMoveInstr(TRI, MI, Src, Dst, SrcSub, DstSub))
    449     return false;
    450 
    451   // Find the virtual register that is SrcReg.
    452   if (Dst == SrcReg) {
    453     std::swap(Src, Dst);
    454     std::swap(SrcSub, DstSub);
    455   } else if (Src != SrcReg) {
    456     return false;
    457   }
    458 
    459   // Now check that Dst matches DstReg.
    460   if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
    461     if (!TargetRegisterInfo::isPhysicalRegister(Dst))
    462       return false;
    463     assert(!DstIdx && !SrcIdx && "Inconsistent CoalescerPair state.");
    464     // DstSub could be set for a physreg from INSERT_SUBREG.
    465     if (DstSub)
    466       Dst = TRI.getSubReg(Dst, DstSub);
    467     // Full copy of Src.
    468     if (!SrcSub)
    469       return DstReg == Dst;
    470     // This is a partial register copy. Check that the parts match.
    471     return TRI.getSubReg(DstReg, SrcSub) == Dst;
    472   } else {
    473     // DstReg is virtual.
    474     if (DstReg != Dst)
    475       return false;
    476     // Registers match, do the subregisters line up?
    477     return TRI.composeSubRegIndices(SrcIdx, SrcSub) ==
    478            TRI.composeSubRegIndices(DstIdx, DstSub);
    479   }
    480 }
    481 
    482 void RegisterCoalescer::getAnalysisUsage(AnalysisUsage &AU) const {
    483   AU.setPreservesCFG();
    484   AU.addRequired<AAResultsWrapperPass>();
    485   AU.addRequired<LiveIntervals>();
    486   AU.addPreserved<LiveIntervals>();
    487   AU.addPreserved<SlotIndexes>();
    488   AU.addRequired<MachineLoopInfo>();
    489   AU.addPreserved<MachineLoopInfo>();
    490   AU.addPreservedID(MachineDominatorsID);
    491   MachineFunctionPass::getAnalysisUsage(AU);
    492 }
    493 
    494 void RegisterCoalescer::eliminateDeadDefs() {
    495   SmallVector<unsigned, 8> NewRegs;
    496   LiveRangeEdit(nullptr, NewRegs, *MF, *LIS,
    497                 nullptr, this).eliminateDeadDefs(DeadDefs);
    498 }
    499 
    500 void RegisterCoalescer::LRE_WillEraseInstruction(MachineInstr *MI) {
    501   // MI may be in WorkList. Make sure we don't visit it.
    502   ErasedInstrs.insert(MI);
    503 }
    504 
    505 bool RegisterCoalescer::adjustCopiesBackFrom(const CoalescerPair &CP,
    506                                              MachineInstr *CopyMI) {
    507   assert(!CP.isPartial() && "This doesn't work for partial copies.");
    508   assert(!CP.isPhys() && "This doesn't work for physreg copies.");
    509 
    510   LiveInterval &IntA =
    511     LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
    512   LiveInterval &IntB =
    513     LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
    514   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
    515 
    516   // We have a non-trivially-coalescable copy with IntA being the source and
    517   // IntB being the dest, thus this defines a value number in IntB.  If the
    518   // source value number (in IntA) is defined by a copy from B, see if we can
    519   // merge these two pieces of B into a single value number, eliminating a copy.
    520   // For example:
    521   //
    522   //  A3 = B0
    523   //    ...
    524   //  B1 = A3      <- this copy
    525   //
    526   // In this case, B0 can be extended to where the B1 copy lives, allowing the
    527   // B1 value number to be replaced with B0 (which simplifies the B
    528   // liveinterval).
    529 
    530   // BValNo is a value number in B that is defined by a copy from A.  'B1' in
    531   // the example above.
    532   LiveInterval::iterator BS = IntB.FindSegmentContaining(CopyIdx);
    533   if (BS == IntB.end()) return false;
    534   VNInfo *BValNo = BS->valno;
    535 
    536   // Get the location that B is defined at.  Two options: either this value has
    537   // an unknown definition point or it is defined at CopyIdx.  If unknown, we
    538   // can't process it.
    539   if (BValNo->def != CopyIdx) return false;
    540 
    541   // AValNo is the value number in A that defines the copy, A3 in the example.
    542   SlotIndex CopyUseIdx = CopyIdx.getRegSlot(true);
    543   LiveInterval::iterator AS = IntA.FindSegmentContaining(CopyUseIdx);
    544   // The live segment might not exist after fun with physreg coalescing.
    545   if (AS == IntA.end()) return false;
    546   VNInfo *AValNo = AS->valno;
    547 
    548   // If AValNo is defined as a copy from IntB, we can potentially process this.
    549   // Get the instruction that defines this value number.
    550   MachineInstr *ACopyMI = LIS->getInstructionFromIndex(AValNo->def);
    551   // Don't allow any partial copies, even if isCoalescable() allows them.
    552   if (!CP.isCoalescable(ACopyMI) || !ACopyMI->isFullCopy())
    553     return false;
    554 
    555   // Get the Segment in IntB that this value number starts with.
    556   LiveInterval::iterator ValS =
    557     IntB.FindSegmentContaining(AValNo->def.getPrevSlot());
    558   if (ValS == IntB.end())
    559     return false;
    560 
    561   // Make sure that the end of the live segment is inside the same block as
    562   // CopyMI.
    563   MachineInstr *ValSEndInst =
    564     LIS->getInstructionFromIndex(ValS->end.getPrevSlot());
    565   if (!ValSEndInst || ValSEndInst->getParent() != CopyMI->getParent())
    566     return false;
    567 
    568   // Okay, we now know that ValS ends in the same block that the CopyMI
    569   // live-range starts.  If there are no intervening live segments between them
    570   // in IntB, we can merge them.
    571   if (ValS+1 != BS) return false;
    572 
    573   LLVM_DEBUG(dbgs() << "Extending: " << printReg(IntB.reg, TRI));
    574 
    575   SlotIndex FillerStart = ValS->end, FillerEnd = BS->start;
    576   // We are about to delete CopyMI, so need to remove it as the 'instruction
    577   // that defines this value #'. Update the valnum with the new defining
    578   // instruction #.
    579   BValNo->def = FillerStart;
    580 
    581   // Okay, we can merge them.  We need to insert a new liverange:
    582   // [ValS.end, BS.begin) of either value number, then we merge the
    583   // two value numbers.
    584   IntB.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, BValNo));
    585 
    586   // Okay, merge "B1" into the same value number as "B0".
    587   if (BValNo != ValS->valno)
    588     IntB.MergeValueNumberInto(BValNo, ValS->valno);
    589 
    590   // Do the same for the subregister segments.
    591   for (LiveInterval::SubRange &S : IntB.subranges()) {
    592     // Check for SubRange Segments of the form [1234r,1234d:0) which can be
    593     // removed to prevent creating bogus SubRange Segments.
    594     LiveInterval::iterator SS = S.FindSegmentContaining(CopyIdx);
    595     if (SS != S.end() && SlotIndex::isSameInstr(SS->start, SS->end)) {
    596       S.removeSegment(*SS, true);
    597       continue;
    598     }
    599     VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
    600     S.addSegment(LiveInterval::Segment(FillerStart, FillerEnd, SubBValNo));
    601     VNInfo *SubValSNo = S.getVNInfoAt(AValNo->def.getPrevSlot());
    602     if (SubBValNo != SubValSNo)
    603       S.MergeValueNumberInto(SubBValNo, SubValSNo);
    604   }
    605 
    606   LLVM_DEBUG(dbgs() << "   result = " << IntB << '\n');
    607 
    608   // If the source instruction was killing the source register before the
    609   // merge, unset the isKill marker given the live range has been extended.
    610   int UIdx = ValSEndInst->findRegisterUseOperandIdx(IntB.reg, true);
    611   if (UIdx != -1) {
    612     ValSEndInst->getOperand(UIdx).setIsKill(false);
    613   }
    614 
    615   // Rewrite the copy.
    616   CopyMI->substituteRegister(IntA.reg, IntB.reg, 0, *TRI);
    617   // If the copy instruction was killing the destination register or any
    618   // subrange before the merge trim the live range.
    619   bool RecomputeLiveRange = AS->end == CopyIdx;
    620   if (!RecomputeLiveRange) {
    621     for (LiveInterval::SubRange &S : IntA.subranges()) {
    622       LiveInterval::iterator SS = S.FindSegmentContaining(CopyUseIdx);
    623       if (SS != S.end() && SS->end == CopyIdx) {
    624         RecomputeLiveRange = true;
    625         break;
    626       }
    627     }
    628   }
    629   if (RecomputeLiveRange)
    630     shrinkToUses(&IntA);
    631 
    632   ++numExtends;
    633   return true;
    634 }
    635 
    636 bool RegisterCoalescer::hasOtherReachingDefs(LiveInterval &IntA,
    637                                              LiveInterval &IntB,
    638                                              VNInfo *AValNo,
    639                                              VNInfo *BValNo) {
    640   // If AValNo has PHI kills, conservatively assume that IntB defs can reach
    641   // the PHI values.
    642   if (LIS->hasPHIKill(IntA, AValNo))
    643     return true;
    644 
    645   for (LiveRange::Segment &ASeg : IntA.segments) {
    646     if (ASeg.valno != AValNo) continue;
    647     LiveInterval::iterator BI =
    648       std::upper_bound(IntB.begin(), IntB.end(), ASeg.start);
    649     if (BI != IntB.begin())
    650       --BI;
    651     for (; BI != IntB.end() && ASeg.end >= BI->start; ++BI) {
    652       if (BI->valno == BValNo)
    653         continue;
    654       if (BI->start <= ASeg.start && BI->end > ASeg.start)
    655         return true;
    656       if (BI->start > ASeg.start && BI->start < ASeg.end)
    657         return true;
    658     }
    659   }
    660   return false;
    661 }
    662 
    663 /// Copy segments with value number @p SrcValNo from liverange @p Src to live
    664 /// range @Dst and use value number @p DstValNo there.
    665 static void addSegmentsWithValNo(LiveRange &Dst, VNInfo *DstValNo,
    666                                  const LiveRange &Src, const VNInfo *SrcValNo) {
    667   for (const LiveRange::Segment &S : Src.segments) {
    668     if (S.valno != SrcValNo)
    669       continue;
    670     Dst.addSegment(LiveRange::Segment(S.start, S.end, DstValNo));
    671   }
    672 }
    673 
    674 bool RegisterCoalescer::removeCopyByCommutingDef(const CoalescerPair &CP,
    675                                                  MachineInstr *CopyMI) {
    676   assert(!CP.isPhys());
    677 
    678   LiveInterval &IntA =
    679       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
    680   LiveInterval &IntB =
    681       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
    682 
    683   // We found a non-trivially-coalescable copy with IntA being the source and
    684   // IntB being the dest, thus this defines a value number in IntB.  If the
    685   // source value number (in IntA) is defined by a commutable instruction and
    686   // its other operand is coalesced to the copy dest register, see if we can
    687   // transform the copy into a noop by commuting the definition. For example,
    688   //
    689   //  A3 = op A2 killed B0
    690   //    ...
    691   //  B1 = A3      <- this copy
    692   //    ...
    693   //     = op A3   <- more uses
    694   //
    695   // ==>
    696   //
    697   //  B2 = op B0 killed A2
    698   //    ...
    699   //  B1 = B2      <- now an identity copy
    700   //    ...
    701   //     = op B2   <- more uses
    702 
    703   // BValNo is a value number in B that is defined by a copy from A. 'B1' in
    704   // the example above.
    705   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
    706   VNInfo *BValNo = IntB.getVNInfoAt(CopyIdx);
    707   assert(BValNo != nullptr && BValNo->def == CopyIdx);
    708 
    709   // AValNo is the value number in A that defines the copy, A3 in the example.
    710   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx.getRegSlot(true));
    711   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
    712   if (AValNo->isPHIDef())
    713     return false;
    714   MachineInstr *DefMI = LIS->getInstructionFromIndex(AValNo->def);
    715   if (!DefMI)
    716     return false;
    717   if (!DefMI->isCommutable())
    718     return false;
    719   // If DefMI is a two-address instruction then commuting it will change the
    720   // destination register.
    721   int DefIdx = DefMI->findRegisterDefOperandIdx(IntA.reg);
    722   assert(DefIdx != -1);
    723   unsigned UseOpIdx;
    724   if (!DefMI->isRegTiedToUseOperand(DefIdx, &UseOpIdx))
    725     return false;
    726 
    727   // FIXME: The code below tries to commute 'UseOpIdx' operand with some other
    728   // commutable operand which is expressed by 'CommuteAnyOperandIndex'value
    729   // passed to the method. That _other_ operand is chosen by
    730   // the findCommutedOpIndices() method.
    731   //
    732   // That is obviously an area for improvement in case of instructions having
    733   // more than 2 operands. For example, if some instruction has 3 commutable
    734   // operands then all possible variants (i.e. op#1<->op#2, op#1<->op#3,
    735   // op#2<->op#3) of commute transformation should be considered/tried here.
    736   unsigned NewDstIdx = TargetInstrInfo::CommuteAnyOperandIndex;
    737   if (!TII->findCommutedOpIndices(*DefMI, UseOpIdx, NewDstIdx))
    738     return false;
    739 
    740   MachineOperand &NewDstMO = DefMI->getOperand(NewDstIdx);
    741   unsigned NewReg = NewDstMO.getReg();
    742   if (NewReg != IntB.reg || !IntB.Query(AValNo->def).isKill())
    743     return false;
    744 
    745   // Make sure there are no other definitions of IntB that would reach the
    746   // uses which the new definition can reach.
    747   if (hasOtherReachingDefs(IntA, IntB, AValNo, BValNo))
    748     return false;
    749 
    750   // If some of the uses of IntA.reg is already coalesced away, return false.
    751   // It's not possible to determine whether it's safe to perform the coalescing.
    752   for (MachineOperand &MO : MRI->use_nodbg_operands(IntA.reg)) {
    753     MachineInstr *UseMI = MO.getParent();
    754     unsigned OpNo = &MO - &UseMI->getOperand(0);
    755     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI);
    756     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
    757     if (US == IntA.end() || US->valno != AValNo)
    758       continue;
    759     // If this use is tied to a def, we can't rewrite the register.
    760     if (UseMI->isRegTiedToDefOperand(OpNo))
    761       return false;
    762   }
    763 
    764   LLVM_DEBUG(dbgs() << "\tremoveCopyByCommutingDef: " << AValNo->def << '\t'
    765                     << *DefMI);
    766 
    767   // At this point we have decided that it is legal to do this
    768   // transformation.  Start by commuting the instruction.
    769   MachineBasicBlock *MBB = DefMI->getParent();
    770   MachineInstr *NewMI =
    771       TII->commuteInstruction(*DefMI, false, UseOpIdx, NewDstIdx);
    772   if (!NewMI)
    773     return false;
    774   if (TargetRegisterInfo::isVirtualRegister(IntA.reg) &&
    775       TargetRegisterInfo::isVirtualRegister(IntB.reg) &&
    776       !MRI->constrainRegClass(IntB.reg, MRI->getRegClass(IntA.reg)))
    777     return false;
    778   if (NewMI != DefMI) {
    779     LIS->ReplaceMachineInstrInMaps(*DefMI, *NewMI);
    780     MachineBasicBlock::iterator Pos = DefMI;
    781     MBB->insert(Pos, NewMI);
    782     MBB->erase(DefMI);
    783   }
    784 
    785   // If ALR and BLR overlaps and end of BLR extends beyond end of ALR, e.g.
    786   // A = or A, B
    787   // ...
    788   // B = A
    789   // ...
    790   // C = killed A
    791   // ...
    792   //   = B
    793 
    794   // Update uses of IntA of the specific Val# with IntB.
    795   for (MachineRegisterInfo::use_iterator UI = MRI->use_begin(IntA.reg),
    796                                          UE = MRI->use_end();
    797        UI != UE; /* ++UI is below because of possible MI removal */) {
    798     MachineOperand &UseMO = *UI;
    799     ++UI;
    800     if (UseMO.isUndef())
    801       continue;
    802     MachineInstr *UseMI = UseMO.getParent();
    803     if (UseMI->isDebugValue()) {
    804       // FIXME These don't have an instruction index.  Not clear we have enough
    805       // info to decide whether to do this replacement or not.  For now do it.
    806       UseMO.setReg(NewReg);
    807       continue;
    808     }
    809     SlotIndex UseIdx = LIS->getInstructionIndex(*UseMI).getRegSlot(true);
    810     LiveInterval::iterator US = IntA.FindSegmentContaining(UseIdx);
    811     assert(US != IntA.end() && "Use must be live");
    812     if (US->valno != AValNo)
    813       continue;
    814     // Kill flags are no longer accurate. They are recomputed after RA.
    815     UseMO.setIsKill(false);
    816     if (TargetRegisterInfo::isPhysicalRegister(NewReg))
    817       UseMO.substPhysReg(NewReg, *TRI);
    818     else
    819       UseMO.setReg(NewReg);
    820     if (UseMI == CopyMI)
    821       continue;
    822     if (!UseMI->isCopy())
    823       continue;
    824     if (UseMI->getOperand(0).getReg() != IntB.reg ||
    825         UseMI->getOperand(0).getSubReg())
    826       continue;
    827 
    828     // This copy will become a noop. If it's defining a new val#, merge it into
    829     // BValNo.
    830     SlotIndex DefIdx = UseIdx.getRegSlot();
    831     VNInfo *DVNI = IntB.getVNInfoAt(DefIdx);
    832     if (!DVNI)
    833       continue;
    834     LLVM_DEBUG(dbgs() << "\t\tnoop: " << DefIdx << '\t' << *UseMI);
    835     assert(DVNI->def == DefIdx);
    836     BValNo = IntB.MergeValueNumberInto(DVNI, BValNo);
    837     for (LiveInterval::SubRange &S : IntB.subranges()) {
    838       VNInfo *SubDVNI = S.getVNInfoAt(DefIdx);
    839       if (!SubDVNI)
    840         continue;
    841       VNInfo *SubBValNo = S.getVNInfoAt(CopyIdx);
    842       assert(SubBValNo->def == CopyIdx);
    843       S.MergeValueNumberInto(SubDVNI, SubBValNo);
    844     }
    845 
    846     deleteInstr(UseMI);
    847   }
    848 
    849   // Extend BValNo by merging in IntA live segments of AValNo. Val# definition
    850   // is updated.
    851   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
    852   if (IntB.hasSubRanges()) {
    853     if (!IntA.hasSubRanges()) {
    854       LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(IntA.reg);
    855       IntA.createSubRangeFrom(Allocator, Mask, IntA);
    856     }
    857     SlotIndex AIdx = CopyIdx.getRegSlot(true);
    858     for (LiveInterval::SubRange &SA : IntA.subranges()) {
    859       VNInfo *ASubValNo = SA.getVNInfoAt(AIdx);
    860       assert(ASubValNo != nullptr);
    861 
    862       IntB.refineSubRanges(Allocator, SA.LaneMask,
    863           [&Allocator,&SA,CopyIdx,ASubValNo](LiveInterval::SubRange &SR) {
    864         VNInfo *BSubValNo = SR.empty()
    865           ? SR.getNextValue(CopyIdx, Allocator)
    866           : SR.getVNInfoAt(CopyIdx);
    867         assert(BSubValNo != nullptr);
    868         addSegmentsWithValNo(SR, BSubValNo, SA, ASubValNo);
    869       });
    870     }
    871   }
    872 
    873   BValNo->def = AValNo->def;
    874   addSegmentsWithValNo(IntB, BValNo, IntA, AValNo);
    875   LLVM_DEBUG(dbgs() << "\t\textended: " << IntB << '\n');
    876 
    877   LIS->removeVRegDefAt(IntA, AValNo->def);
    878 
    879   LLVM_DEBUG(dbgs() << "\t\ttrimmed:  " << IntA << '\n');
    880   ++numCommutes;
    881   return true;
    882 }
    883 
    884 /// For copy B = A in BB2, if A is defined by A = B in BB0 which is a
    885 /// predecessor of BB2, and if B is not redefined on the way from A = B
    886 /// in BB2 to B = A in BB2, B = A in BB2 is partially redundant if the
    887 /// execution goes through the path from BB0 to BB2. We may move B = A
    888 /// to the predecessor without such reversed copy.
    889 /// So we will transform the program from:
    890 ///   BB0:
    891 ///      A = B;    BB1:
    892 ///       ...         ...
    893 ///     /     \      /
    894 ///             BB2:
    895 ///               ...
    896 ///               B = A;
    897 ///
    898 /// to:
    899 ///
    900 ///   BB0:         BB1:
    901 ///      A = B;        ...
    902 ///       ...          B = A;
    903 ///     /     \       /
    904 ///             BB2:
    905 ///               ...
    906 ///
    907 /// A special case is when BB0 and BB2 are the same BB which is the only
    908 /// BB in a loop:
    909 ///   BB1:
    910 ///        ...
    911 ///   BB0/BB2:  ----
    912 ///        B = A;   |
    913 ///        ...      |
    914 ///        A = B;   |
    915 ///          |-------
    916 ///          |
    917 /// We may hoist B = A from BB0/BB2 to BB1.
    918 ///
    919 /// The major preconditions for correctness to remove such partial
    920 /// redundancy include:
    921 /// 1. A in B = A in BB2 is defined by a PHI in BB2, and one operand of
    922 ///    the PHI is defined by the reversed copy A = B in BB0.
    923 /// 2. No B is referenced from the start of BB2 to B = A.
    924 /// 3. No B is defined from A = B to the end of BB0.
    925 /// 4. BB1 has only one successor.
    926 ///
    927 /// 2 and 4 implicitly ensure B is not live at the end of BB1.
    928 /// 4 guarantees BB2 is hotter than BB1, so we can only move a copy to a
    929 /// colder place, which not only prevent endless loop, but also make sure
    930 /// the movement of copy is beneficial.
    931 bool RegisterCoalescer::removePartialRedundancy(const CoalescerPair &CP,
    932                                                 MachineInstr &CopyMI) {
    933   assert(!CP.isPhys());
    934   if (!CopyMI.isFullCopy())
    935     return false;
    936 
    937   MachineBasicBlock &MBB = *CopyMI.getParent();
    938   if (MBB.isEHPad())
    939     return false;
    940 
    941   if (MBB.pred_size() != 2)
    942     return false;
    943 
    944   LiveInterval &IntA =
    945       LIS->getInterval(CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg());
    946   LiveInterval &IntB =
    947       LIS->getInterval(CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg());
    948 
    949   // A is defined by PHI at the entry of MBB.
    950   SlotIndex CopyIdx = LIS->getInstructionIndex(CopyMI).getRegSlot(true);
    951   VNInfo *AValNo = IntA.getVNInfoAt(CopyIdx);
    952   assert(AValNo && !AValNo->isUnused() && "COPY source not live");
    953   if (!AValNo->isPHIDef())
    954     return false;
    955 
    956   // No B is referenced before CopyMI in MBB.
    957   if (IntB.overlaps(LIS->getMBBStartIdx(&MBB), CopyIdx))
    958     return false;
    959 
    960   // MBB has two predecessors: one contains A = B so no copy will be inserted
    961   // for it. The other one will have a copy moved from MBB.
    962   bool FoundReverseCopy = false;
    963   MachineBasicBlock *CopyLeftBB = nullptr;
    964   for (MachineBasicBlock *Pred : MBB.predecessors()) {
    965     VNInfo *PVal = IntA.getVNInfoBefore(LIS->getMBBEndIdx(Pred));
    966     MachineInstr *DefMI = LIS->getInstructionFromIndex(PVal->def);
    967     if (!DefMI || !DefMI->isFullCopy()) {
    968       CopyLeftBB = Pred;
    969       continue;
    970     }
    971     // Check DefMI is a reverse copy and it is in BB Pred.
    972     if (DefMI->getOperand(0).getReg() != IntA.reg ||
    973         DefMI->getOperand(1).getReg() != IntB.reg ||
    974         DefMI->getParent() != Pred) {
    975       CopyLeftBB = Pred;
    976       continue;
    977     }
    978     // If there is any other def of B after DefMI and before the end of Pred,
    979     // we need to keep the copy of B = A at the end of Pred if we remove
    980     // B = A from MBB.
    981     bool ValB_Changed = false;
    982     for (auto VNI : IntB.valnos) {
    983       if (VNI->isUnused())
    984         continue;
    985       if (PVal->def < VNI->def && VNI->def < LIS->getMBBEndIdx(Pred)) {
    986         ValB_Changed = true;
    987         break;
    988       }
    989     }
    990     if (ValB_Changed) {
    991       CopyLeftBB = Pred;
    992       continue;
    993     }
    994     FoundReverseCopy = true;
    995   }
    996 
    997   // If no reverse copy is found in predecessors, nothing to do.
    998   if (!FoundReverseCopy)
    999     return false;
   1000 
   1001   // If CopyLeftBB is nullptr, it means every predecessor of MBB contains
   1002   // reverse copy, CopyMI can be removed trivially if only IntA/IntB is updated.
   1003   // If CopyLeftBB is not nullptr, move CopyMI from MBB to CopyLeftBB and
   1004   // update IntA/IntB.
   1005   //
   1006   // If CopyLeftBB is not nullptr, ensure CopyLeftBB has a single succ so
   1007   // MBB is hotter than CopyLeftBB.
   1008   if (CopyLeftBB && CopyLeftBB->succ_size() > 1)
   1009     return false;
   1010 
   1011   // Now (almost sure it's) ok to move copy.
   1012   if (CopyLeftBB) {
   1013     // Position in CopyLeftBB where we should insert new copy.
   1014     auto InsPos = CopyLeftBB->getFirstTerminator();
   1015 
   1016     // Make sure that B isn't referenced in the terminators (if any) at the end
   1017     // of the predecessor since we're about to insert a new definition of B
   1018     // before them.
   1019     if (InsPos != CopyLeftBB->end()) {
   1020       SlotIndex InsPosIdx = LIS->getInstructionIndex(*InsPos).getRegSlot(true);
   1021       if (IntB.overlaps(InsPosIdx, LIS->getMBBEndIdx(CopyLeftBB)))
   1022         return false;
   1023     }
   1024 
   1025     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Move the copy to "
   1026                       << printMBBReference(*CopyLeftBB) << '\t' << CopyMI);
   1027 
   1028     // Insert new copy to CopyLeftBB.
   1029     MachineInstr *NewCopyMI = BuildMI(*CopyLeftBB, InsPos, CopyMI.getDebugLoc(),
   1030                                       TII->get(TargetOpcode::COPY), IntB.reg)
   1031                                   .addReg(IntA.reg);
   1032     SlotIndex NewCopyIdx =
   1033         LIS->InsertMachineInstrInMaps(*NewCopyMI).getRegSlot();
   1034     IntB.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
   1035     for (LiveInterval::SubRange &SR : IntB.subranges())
   1036       SR.createDeadDef(NewCopyIdx, LIS->getVNInfoAllocator());
   1037 
   1038     // If the newly created Instruction has an address of an instruction that was
   1039     // deleted before (object recycled by the allocator) it needs to be removed from
   1040     // the deleted list.
   1041     ErasedInstrs.erase(NewCopyMI);
   1042   } else {
   1043     LLVM_DEBUG(dbgs() << "\tremovePartialRedundancy: Remove the copy from "
   1044                       << printMBBReference(MBB) << '\t' << CopyMI);
   1045   }
   1046 
   1047   // Remove CopyMI.
   1048   // Note: This is fine to remove the copy before updating the live-ranges.
   1049   // While updating the live-ranges, we only look at slot indices and
   1050   // never go back to the instruction.
   1051   // Mark instructions as deleted.
   1052   deleteInstr(&CopyMI);
   1053 
   1054   // Update the liveness.
   1055   SmallVector<SlotIndex, 8> EndPoints;
   1056   VNInfo *BValNo = IntB.Query(CopyIdx).valueOutOrDead();
   1057   LIS->pruneValue(*static_cast<LiveRange *>(&IntB), CopyIdx.getRegSlot(),
   1058                   &EndPoints);
   1059   BValNo->markUnused();
   1060   // Extend IntB to the EndPoints of its original live interval.
   1061   LIS->extendToIndices(IntB, EndPoints);
   1062 
   1063   // Now, do the same for its subranges.
   1064   for (LiveInterval::SubRange &SR : IntB.subranges()) {
   1065     EndPoints.clear();
   1066     VNInfo *BValNo = SR.Query(CopyIdx).valueOutOrDead();
   1067     assert(BValNo && "All sublanes should be live");
   1068     LIS->pruneValue(SR, CopyIdx.getRegSlot(), &EndPoints);
   1069     BValNo->markUnused();
   1070     LIS->extendToIndices(SR, EndPoints);
   1071   }
   1072   // If any dead defs were extended, truncate them.
   1073   shrinkToUses(&IntB);
   1074 
   1075   // Finally, update the live-range of IntA.
   1076   shrinkToUses(&IntA);
   1077   return true;
   1078 }
   1079 
   1080 /// Returns true if @p MI defines the full vreg @p Reg, as opposed to just
   1081 /// defining a subregister.
   1082 static bool definesFullReg(const MachineInstr &MI, unsigned Reg) {
   1083   assert(!TargetRegisterInfo::isPhysicalRegister(Reg) &&
   1084          "This code cannot handle physreg aliasing");
   1085   for (const MachineOperand &Op : MI.operands()) {
   1086     if (!Op.isReg() || !Op.isDef() || Op.getReg() != Reg)
   1087       continue;
   1088     // Return true if we define the full register or don't care about the value
   1089     // inside other subregisters.
   1090     if (Op.getSubReg() == 0 || Op.isUndef())
   1091       return true;
   1092   }
   1093   return false;
   1094 }
   1095 
   1096 bool RegisterCoalescer::reMaterializeTrivialDef(const CoalescerPair &CP,
   1097                                                 MachineInstr *CopyMI,
   1098                                                 bool &IsDefCopy) {
   1099   IsDefCopy = false;
   1100   unsigned SrcReg = CP.isFlipped() ? CP.getDstReg() : CP.getSrcReg();
   1101   unsigned SrcIdx = CP.isFlipped() ? CP.getDstIdx() : CP.getSrcIdx();
   1102   unsigned DstReg = CP.isFlipped() ? CP.getSrcReg() : CP.getDstReg();
   1103   unsigned DstIdx = CP.isFlipped() ? CP.getSrcIdx() : CP.getDstIdx();
   1104   if (TargetRegisterInfo::isPhysicalRegister(SrcReg))
   1105     return false;
   1106 
   1107   LiveInterval &SrcInt = LIS->getInterval(SrcReg);
   1108   SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
   1109   VNInfo *ValNo = SrcInt.Query(CopyIdx).valueIn();
   1110   assert(ValNo && "CopyMI input register not live");
   1111   if (ValNo->isPHIDef() || ValNo->isUnused())
   1112     return false;
   1113   MachineInstr *DefMI = LIS->getInstructionFromIndex(ValNo->def);
   1114   if (!DefMI)
   1115     return false;
   1116   if (DefMI->isCopyLike()) {
   1117     IsDefCopy = true;
   1118     return false;
   1119   }
   1120   if (!TII->isAsCheapAsAMove(*DefMI))
   1121     return false;
   1122   if (!TII->isTriviallyReMaterializable(*DefMI, AA))
   1123     return false;
   1124   if (!definesFullReg(*DefMI, SrcReg))
   1125     return false;
   1126   bool SawStore = false;
   1127   if (!DefMI->isSafeToMove(AA, SawStore))
   1128     return false;
   1129   const MCInstrDesc &MCID = DefMI->getDesc();
   1130   if (MCID.getNumDefs() != 1)
   1131     return false;
   1132   // Only support subregister destinations when the def is read-undef.
   1133   MachineOperand &DstOperand = CopyMI->getOperand(0);
   1134   unsigned CopyDstReg = DstOperand.getReg();
   1135   if (DstOperand.getSubReg() && !DstOperand.isUndef())
   1136     return false;
   1137 
   1138   // If both SrcIdx and DstIdx are set, correct rematerialization would widen
   1139   // the register substantially (beyond both source and dest size). This is bad
   1140   // for performance since it can cascade through a function, introducing many
   1141   // extra spills and fills (e.g. ARM can easily end up copying QQQQPR registers
   1142   // around after a few subreg copies).
   1143   if (SrcIdx && DstIdx)
   1144     return false;
   1145 
   1146   const TargetRegisterClass *DefRC = TII->getRegClass(MCID, 0, TRI, *MF);
   1147   if (!DefMI->isImplicitDef()) {
   1148     if (TargetRegisterInfo::isPhysicalRegister(DstReg)) {
   1149       unsigned NewDstReg = DstReg;
   1150 
   1151       unsigned NewDstIdx = TRI->composeSubRegIndices(CP.getSrcIdx(),
   1152                                               DefMI->getOperand(0).getSubReg());
   1153       if (NewDstIdx)
   1154         NewDstReg = TRI->getSubReg(DstReg, NewDstIdx);
   1155 
   1156       // Finally, make sure that the physical subregister that will be
   1157       // constructed later is permitted for the instruction.
   1158       if (!DefRC->contains(NewDstReg))
   1159         return false;
   1160     } else {
   1161       // Theoretically, some stack frame reference could exist. Just make sure
   1162       // it hasn't actually happened.
   1163       assert(TargetRegisterInfo::isVirtualRegister(DstReg) &&
   1164              "Only expect to deal with virtual or physical registers");
   1165     }
   1166   }
   1167 
   1168   DebugLoc DL = CopyMI->getDebugLoc();
   1169   MachineBasicBlock *MBB = CopyMI->getParent();
   1170   MachineBasicBlock::iterator MII =
   1171     std::next(MachineBasicBlock::iterator(CopyMI));
   1172   TII->reMaterialize(*MBB, MII, DstReg, SrcIdx, *DefMI, *TRI);
   1173   MachineInstr &NewMI = *std::prev(MII);
   1174   NewMI.setDebugLoc(DL);
   1175 
   1176   // In a situation like the following:
   1177   //     %0:subreg = instr              ; DefMI, subreg = DstIdx
   1178   //     %1        = copy %0:subreg ; CopyMI, SrcIdx = 0
   1179   // instead of widening %1 to the register class of %0 simply do:
   1180   //     %1 = instr
   1181   const TargetRegisterClass *NewRC = CP.getNewRC();
   1182   if (DstIdx != 0) {
   1183     MachineOperand &DefMO = NewMI.getOperand(0);
   1184     if (DefMO.getSubReg() == DstIdx) {
   1185       assert(SrcIdx == 0 && CP.isFlipped()
   1186              && "Shouldn't have SrcIdx+DstIdx at this point");
   1187       const TargetRegisterClass *DstRC = MRI->getRegClass(DstReg);
   1188       const TargetRegisterClass *CommonRC =
   1189         TRI->getCommonSubClass(DefRC, DstRC);
   1190       if (CommonRC != nullptr) {
   1191         NewRC = CommonRC;
   1192         DstIdx = 0;
   1193         DefMO.setSubReg(0);
   1194         DefMO.setIsUndef(false); // Only subregs can have def+undef.
   1195       }
   1196     }
   1197   }
   1198 
   1199   // CopyMI may have implicit operands, save them so that we can transfer them
   1200   // over to the newly materialized instruction after CopyMI is removed.
   1201   SmallVector<MachineOperand, 4> ImplicitOps;
   1202   ImplicitOps.reserve(CopyMI->getNumOperands() -
   1203                       CopyMI->getDesc().getNumOperands());
   1204   for (unsigned I = CopyMI->getDesc().getNumOperands(),
   1205                 E = CopyMI->getNumOperands();
   1206        I != E; ++I) {
   1207     MachineOperand &MO = CopyMI->getOperand(I);
   1208     if (MO.isReg()) {
   1209       assert(MO.isImplicit() && "No explicit operands after implicit operands.");
   1210       // Discard VReg implicit defs.
   1211       if (TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
   1212         ImplicitOps.push_back(MO);
   1213     }
   1214   }
   1215 
   1216   LIS->ReplaceMachineInstrInMaps(*CopyMI, NewMI);
   1217   CopyMI->eraseFromParent();
   1218   ErasedInstrs.insert(CopyMI);
   1219 
   1220   // NewMI may have dead implicit defs (E.g. EFLAGS for MOV<bits>r0 on X86).
   1221   // We need to remember these so we can add intervals once we insert
   1222   // NewMI into SlotIndexes.
   1223   SmallVector<unsigned, 4> NewMIImplDefs;
   1224   for (unsigned i = NewMI.getDesc().getNumOperands(),
   1225                 e = NewMI.getNumOperands();
   1226        i != e; ++i) {
   1227     MachineOperand &MO = NewMI.getOperand(i);
   1228     if (MO.isReg() && MO.isDef()) {
   1229       assert(MO.isImplicit() && MO.isDead() &&
   1230              TargetRegisterInfo::isPhysicalRegister(MO.getReg()));
   1231       NewMIImplDefs.push_back(MO.getReg());
   1232     }
   1233   }
   1234 
   1235   if (TargetRegisterInfo::isVirtualRegister(DstReg)) {
   1236     unsigned NewIdx = NewMI.getOperand(0).getSubReg();
   1237 
   1238     if (DefRC != nullptr) {
   1239       if (NewIdx)
   1240         NewRC = TRI->getMatchingSuperRegClass(NewRC, DefRC, NewIdx);
   1241       else
   1242         NewRC = TRI->getCommonSubClass(NewRC, DefRC);
   1243       assert(NewRC && "subreg chosen for remat incompatible with instruction");
   1244     }
   1245     // Remap subranges to new lanemask and change register class.
   1246     LiveInterval &DstInt = LIS->getInterval(DstReg);
   1247     for (LiveInterval::SubRange &SR : DstInt.subranges()) {
   1248       SR.LaneMask = TRI->composeSubRegIndexLaneMask(DstIdx, SR.LaneMask);
   1249     }
   1250     MRI->setRegClass(DstReg, NewRC);
   1251 
   1252     // Update machine operands and add flags.
   1253     updateRegDefsUses(DstReg, DstReg, DstIdx);
   1254     NewMI.getOperand(0).setSubReg(NewIdx);
   1255     // updateRegDefUses can add an "undef" flag to the definition, since
   1256     // it will replace DstReg with DstReg.DstIdx. If NewIdx is 0, make
   1257     // sure that "undef" is not set.
   1258     if (NewIdx == 0)
   1259       NewMI.getOperand(0).setIsUndef(false);
   1260     // Add dead subregister definitions if we are defining the whole register
   1261     // but only part of it is live.
   1262     // This could happen if the rematerialization instruction is rematerializing
   1263     // more than actually is used in the register.
   1264     // An example would be:
   1265     // %1 = LOAD CONSTANTS 5, 8 ; Loading both 5 and 8 in different subregs
   1266     // ; Copying only part of the register here, but the rest is undef.
   1267     // %2:sub_16bit<def, read-undef> = COPY %1:sub_16bit
   1268     // ==>
   1269     // ; Materialize all the constants but only using one
   1270     // %2 = LOAD_CONSTANTS 5, 8
   1271     //
   1272     // at this point for the part that wasn't defined before we could have
   1273     // subranges missing the definition.
   1274     if (NewIdx == 0 && DstInt.hasSubRanges()) {
   1275       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
   1276       SlotIndex DefIndex =
   1277           CurrIdx.getRegSlot(NewMI.getOperand(0).isEarlyClobber());
   1278       LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(DstReg);
   1279       VNInfo::Allocator& Alloc = LIS->getVNInfoAllocator();
   1280       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
   1281         if (!SR.liveAt(DefIndex))
   1282           SR.createDeadDef(DefIndex, Alloc);
   1283         MaxMask &= ~SR.LaneMask;
   1284       }
   1285       if (MaxMask.any()) {
   1286         LiveInterval::SubRange *SR = DstInt.createSubRange(Alloc, MaxMask);
   1287         SR->createDeadDef(DefIndex, Alloc);
   1288       }
   1289     }
   1290 
   1291     // Make sure that the subrange for resultant undef is removed
   1292     // For example:
   1293     //   %1:sub1<def,read-undef> = LOAD CONSTANT 1
   1294     //   %2 = COPY %1
   1295     // ==>
   1296     //   %2:sub1<def, read-undef> = LOAD CONSTANT 1
   1297     //     ; Correct but need to remove the subrange for %2:sub0
   1298     //     ; as it is now undef
   1299     if (NewIdx != 0 && DstInt.hasSubRanges()) {
   1300       // The affected subregister segments can be removed.
   1301       SlotIndex CurrIdx = LIS->getInstructionIndex(NewMI);
   1302       LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(NewIdx);
   1303       bool UpdatedSubRanges = false;
   1304       for (LiveInterval::SubRange &SR : DstInt.subranges()) {
   1305         if ((SR.LaneMask & DstMask).none()) {
   1306           LLVM_DEBUG(dbgs()
   1307                      << "Removing undefined SubRange "
   1308                      << PrintLaneMask(SR.LaneMask) << " : " << SR << "\n");
   1309           // VNI is in ValNo - remove any segments in this SubRange that have this ValNo
   1310           if (VNInfo *RmValNo = SR.getVNInfoAt(CurrIdx.getRegSlot())) {
   1311             SR.removeValNo(RmValNo);
   1312             UpdatedSubRanges = true;
   1313           }
   1314         }
   1315       }
   1316       if (UpdatedSubRanges)
   1317         DstInt.removeEmptySubRanges();
   1318     }
   1319   } else if (NewMI.getOperand(0).getReg() != CopyDstReg) {
   1320     // The New instruction may be defining a sub-register of what's actually
   1321     // been asked for. If so it must implicitly define the whole thing.
   1322     assert(TargetRegisterInfo::isPhysicalRegister(DstReg) &&
   1323            "Only expect virtual or physical registers in remat");
   1324     NewMI.getOperand(0).setIsDead(true);
   1325     NewMI.addOperand(MachineOperand::CreateReg(
   1326         CopyDstReg, true /*IsDef*/, true /*IsImp*/, false /*IsKill*/));
   1327     // Record small dead def live-ranges for all the subregisters
   1328     // of the destination register.
   1329     // Otherwise, variables that live through may miss some
   1330     // interferences, thus creating invalid allocation.
   1331     // E.g., i386 code:
   1332     // %1 = somedef ; %1 GR8
   1333     // %2 = remat ; %2 GR32
   1334     // CL = COPY %2.sub_8bit
   1335     // = somedef %1 ; %1 GR8
   1336     // =>
   1337     // %1 = somedef ; %1 GR8
   1338     // dead ECX = remat ; implicit-def CL
   1339     // = somedef %1 ; %1 GR8
   1340     // %1 will see the interferences with CL but not with CH since
   1341     // no live-ranges would have been created for ECX.
   1342     // Fix that!
   1343     SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
   1344     for (MCRegUnitIterator Units(NewMI.getOperand(0).getReg(), TRI);
   1345          Units.isValid(); ++Units)
   1346       if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
   1347         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
   1348   }
   1349 
   1350   if (NewMI.getOperand(0).getSubReg())
   1351     NewMI.getOperand(0).setIsUndef();
   1352 
   1353   // Transfer over implicit operands to the rematerialized instruction.
   1354   for (MachineOperand &MO : ImplicitOps)
   1355     NewMI.addOperand(MO);
   1356 
   1357   SlotIndex NewMIIdx = LIS->getInstructionIndex(NewMI);
   1358   for (unsigned i = 0, e = NewMIImplDefs.size(); i != e; ++i) {
   1359     unsigned Reg = NewMIImplDefs[i];
   1360     for (MCRegUnitIterator Units(Reg, TRI); Units.isValid(); ++Units)
   1361       if (LiveRange *LR = LIS->getCachedRegUnit(*Units))
   1362         LR->createDeadDef(NewMIIdx.getRegSlot(), LIS->getVNInfoAllocator());
   1363   }
   1364 
   1365   LLVM_DEBUG(dbgs() << "Remat: " << NewMI);
   1366   ++NumReMats;
   1367 
   1368   // The source interval can become smaller because we removed a use.
   1369   shrinkToUses(&SrcInt, &DeadDefs);
   1370   if (!DeadDefs.empty()) {
   1371     // If the virtual SrcReg is completely eliminated, update all DBG_VALUEs
   1372     // to describe DstReg instead.
   1373     for (MachineOperand &UseMO : MRI->use_operands(SrcReg)) {
   1374       MachineInstr *UseMI = UseMO.getParent();
   1375       if (UseMI->isDebugValue()) {
   1376         UseMO.setReg(DstReg);
   1377         // Move the debug value directly after the def of the rematerialized
   1378         // value in DstReg.
   1379         MBB->splice(std::next(NewMI.getIterator()), UseMI->getParent(), UseMI);
   1380         LLVM_DEBUG(dbgs() << "\t\tupdated: " << *UseMI);
   1381       }
   1382     }
   1383     eliminateDeadDefs();
   1384   }
   1385 
   1386   return true;
   1387 }
   1388 
   1389 MachineInstr *RegisterCoalescer::eliminateUndefCopy(MachineInstr *CopyMI) {
   1390   // ProcessImplicitDefs may leave some copies of <undef> values, it only
   1391   // removes local variables. When we have a copy like:
   1392   //
   1393   //   %1 = COPY undef %2
   1394   //
   1395   // We delete the copy and remove the corresponding value number from %1.
   1396   // Any uses of that value number are marked as <undef>.
   1397 
   1398   // Note that we do not query CoalescerPair here but redo isMoveInstr as the
   1399   // CoalescerPair may have a new register class with adjusted subreg indices
   1400   // at this point.
   1401   unsigned SrcReg, DstReg, SrcSubIdx, DstSubIdx;
   1402   isMoveInstr(*TRI, CopyMI, SrcReg, DstReg, SrcSubIdx, DstSubIdx);
   1403 
   1404   SlotIndex Idx = LIS->getInstructionIndex(*CopyMI);
   1405   const LiveInterval &SrcLI = LIS->getInterval(SrcReg);
   1406   // CopyMI is undef iff SrcReg is not live before the instruction.
   1407   if (SrcSubIdx != 0 && SrcLI.hasSubRanges()) {
   1408     LaneBitmask SrcMask = TRI->getSubRegIndexLaneMask(SrcSubIdx);
   1409     for (const LiveInterval::SubRange &SR : SrcLI.subranges()) {
   1410       if ((SR.LaneMask & SrcMask).none())
   1411         continue;
   1412       if (SR.liveAt(Idx))
   1413         return nullptr;
   1414     }
   1415   } else if (SrcLI.liveAt(Idx))
   1416     return nullptr;
   1417 
   1418   // If the undef copy defines a live-out value (i.e. an input to a PHI def),
   1419   // then replace it with an IMPLICIT_DEF.
   1420   LiveInterval &DstLI = LIS->getInterval(DstReg);
   1421   SlotIndex RegIndex = Idx.getRegSlot();
   1422   LiveRange::Segment *Seg = DstLI.getSegmentContaining(RegIndex);
   1423   assert(Seg != nullptr && "No segment for defining instruction");
   1424   if (VNInfo *V = DstLI.getVNInfoAt(Seg->end)) {
   1425     if (V->isPHIDef()) {
   1426       CopyMI->setDesc(TII->get(TargetOpcode::IMPLICIT_DEF));
   1427       for (unsigned i = CopyMI->getNumOperands(); i != 0; --i) {
   1428         MachineOperand &MO = CopyMI->getOperand(i-1);
   1429         if (MO.isReg() && MO.isUse())
   1430           CopyMI->RemoveOperand(i-1);
   1431       }
   1432       LLVM_DEBUG(dbgs() << "\tReplaced copy of <undef> value with an "
   1433                            "implicit def\n");
   1434       return CopyMI;
   1435     }
   1436   }
   1437 
   1438   // Remove any DstReg segments starting at the instruction.
   1439   LLVM_DEBUG(dbgs() << "\tEliminating copy of <undef> value\n");
   1440 
   1441   // Remove value or merge with previous one in case of a subregister def.
   1442   if (VNInfo *PrevVNI = DstLI.getVNInfoAt(Idx)) {
   1443     VNInfo *VNI = DstLI.getVNInfoAt(RegIndex);
   1444     DstLI.MergeValueNumberInto(VNI, PrevVNI);
   1445 
   1446     // The affected subregister segments can be removed.
   1447     LaneBitmask DstMask = TRI->getSubRegIndexLaneMask(DstSubIdx);
   1448     for (LiveInterval::SubRange &SR : DstLI.subranges()) {
   1449       if ((SR.LaneMask & DstMask).none())
   1450         continue;
   1451 
   1452       VNInfo *SVNI = SR.getVNInfoAt(RegIndex);
   1453       assert(SVNI != nullptr && SlotIndex::isSameInstr(SVNI->def, RegIndex));
   1454       SR.removeValNo(SVNI);
   1455     }
   1456     DstLI.removeEmptySubRanges();
   1457   } else
   1458     LIS->removeVRegDefAt(DstLI, RegIndex);
   1459 
   1460   // Mark uses as undef.
   1461   for (MachineOperand &MO : MRI->reg_nodbg_operands(DstReg)) {
   1462     if (MO.isDef() /*|| MO.isUndef()*/)
   1463       continue;
   1464     const MachineInstr &MI = *MO.getParent();
   1465     SlotIndex UseIdx = LIS->getInstructionIndex(MI);
   1466     LaneBitmask UseMask = TRI->getSubRegIndexLaneMask(MO.getSubReg());
   1467     bool isLive;
   1468     if (!UseMask.all() && DstLI.hasSubRanges()) {
   1469       isLive = false;
   1470       for (const LiveInterval::SubRange &SR : DstLI.subranges()) {
   1471         if ((SR.LaneMask & UseMask).none())
   1472           continue;
   1473         if (SR.liveAt(UseIdx)) {
   1474           isLive = true;
   1475           break;
   1476         }
   1477       }
   1478     } else
   1479       isLive = DstLI.liveAt(UseIdx);
   1480     if (isLive)
   1481       continue;
   1482     MO.setIsUndef(true);
   1483     LLVM_DEBUG(dbgs() << "\tnew undef: " << UseIdx << '\t' << MI);
   1484   }
   1485 
   1486   // A def of a subregister may be a use of the other subregisters, so
   1487   // deleting a def of a subregister may also remove uses. Since CopyMI
   1488   // is still part of the function (but about to be erased), mark all
   1489   // defs of DstReg in it as <undef>, so that shrinkToUses would
   1490   // ignore them.
   1491   for (MachineOperand &MO : CopyMI->operands())
   1492     if (MO.isReg() && MO.isDef() && MO.getReg() == DstReg)
   1493       MO.setIsUndef(true);
   1494   LIS->shrinkToUses(&DstLI);
   1495 
   1496   return CopyMI;
   1497 }
   1498 
   1499 void RegisterCoalescer::addUndefFlag(const LiveInterval &Int, SlotIndex UseIdx,
   1500                                      MachineOperand &MO, unsigned SubRegIdx) {
   1501   LaneBitmask Mask = TRI->getSubRegIndexLaneMask(SubRegIdx);
   1502   if (MO.isDef())
   1503     Mask = ~Mask;
   1504   bool IsUndef = true;
   1505   for (const LiveInterval::SubRange &S : Int.subranges()) {
   1506     if ((S.LaneMask & Mask).none())
   1507       continue;
   1508     if (S.liveAt(UseIdx)) {
   1509       IsUndef = false;
   1510       break;
   1511     }
   1512   }
   1513   if (IsUndef) {
   1514     MO.setIsUndef(true);
   1515     // We found out some subregister use is actually reading an undefined
   1516     // value. In some cases the whole vreg has become undefined at this
   1517     // point so we have to potentially shrink the main range if the
   1518     // use was ending a live segment there.
   1519     LiveQueryResult Q = Int.Query(UseIdx);
   1520     if (Q.valueOut() == nullptr)
   1521       ShrinkMainRange = true;
   1522   }
   1523 }
   1524 
   1525 void RegisterCoalescer::updateRegDefsUses(unsigned SrcReg,
   1526                                           unsigned DstReg,
   1527                                           unsigned SubIdx) {
   1528   bool DstIsPhys = TargetRegisterInfo::isPhysicalRegister(DstReg);
   1529   LiveInterval *DstInt = DstIsPhys ? nullptr : &LIS->getInterval(DstReg);
   1530 
   1531   if (DstInt && DstInt->hasSubRanges() && DstReg != SrcReg) {
   1532     for (MachineOperand &MO : MRI->reg_operands(DstReg)) {
   1533       unsigned SubReg = MO.getSubReg();
   1534       if (SubReg == 0 || MO.isUndef())
   1535         continue;
   1536       MachineInstr &MI = *MO.getParent();
   1537       if (MI.isDebugValue())
   1538         continue;
   1539       SlotIndex UseIdx = LIS->getInstructionIndex(MI).getRegSlot(true);
   1540       addUndefFlag(*DstInt, UseIdx, MO, SubReg);
   1541     }
   1542   }
   1543 
   1544   SmallPtrSet<MachineInstr*, 8> Visited;
   1545   for (MachineRegisterInfo::reg_instr_iterator
   1546        I = MRI->reg_instr_begin(SrcReg), E = MRI->reg_instr_end();
   1547        I != E; ) {
   1548     MachineInstr *UseMI = &*(I++);
   1549 
   1550     // Each instruction can only be rewritten once because sub-register
   1551     // composition is not always idempotent. When SrcReg != DstReg, rewriting
   1552     // the UseMI operands removes them from the SrcReg use-def chain, but when
   1553     // SrcReg is DstReg we could encounter UseMI twice if it has multiple
   1554     // operands mentioning the virtual register.
   1555     if (SrcReg == DstReg && !Visited.insert(UseMI).second)
   1556       continue;
   1557 
   1558     SmallVector<unsigned,8> Ops;
   1559     bool Reads, Writes;
   1560     std::tie(Reads, Writes) = UseMI->readsWritesVirtualRegister(SrcReg, &Ops);
   1561 
   1562     // If SrcReg wasn't read, it may still be the case that DstReg is live-in
   1563     // because SrcReg is a sub-register.
   1564     if (DstInt && !Reads && SubIdx && !UseMI->isDebugValue())
   1565       Reads = DstInt->liveAt(LIS->getInstructionIndex(*UseMI));
   1566 
   1567     // Replace SrcReg with DstReg in all UseMI operands.
   1568     for (unsigned i = 0, e = Ops.size(); i != e; ++i) {
   1569       MachineOperand &MO = UseMI->getOperand(Ops[i]);
   1570 
   1571       // Adjust <undef> flags in case of sub-register joins. We don't want to
   1572       // turn a full def into a read-modify-write sub-register def and vice
   1573       // versa.
   1574       if (SubIdx && MO.isDef())
   1575         MO.setIsUndef(!Reads);
   1576 
   1577       // A subreg use of a partially undef (super) register may be a complete
   1578       // undef use now and then has to be marked that way.
   1579       if (SubIdx != 0 && MO.isUse() && MRI->shouldTrackSubRegLiveness(DstReg)) {
   1580         if (!DstInt->hasSubRanges()) {
   1581           BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   1582           LaneBitmask Mask = MRI->getMaxLaneMaskForVReg(DstInt->reg);
   1583           DstInt->createSubRangeFrom(Allocator, Mask, *DstInt);
   1584         }
   1585         SlotIndex MIIdx = UseMI->isDebugValue()
   1586                               ? LIS->getSlotIndexes()->getIndexBefore(*UseMI)
   1587                               : LIS->getInstructionIndex(*UseMI);
   1588         SlotIndex UseIdx = MIIdx.getRegSlot(true);
   1589         addUndefFlag(*DstInt, UseIdx, MO, SubIdx);
   1590       }
   1591 
   1592       if (DstIsPhys)
   1593         MO.substPhysReg(DstReg, *TRI);
   1594       else
   1595         MO.substVirtReg(DstReg, SubIdx, *TRI);
   1596     }
   1597 
   1598     LLVM_DEBUG({
   1599       dbgs() << "\t\tupdated: ";
   1600       if (!UseMI->isDebugValue())
   1601         dbgs() << LIS->getInstructionIndex(*UseMI) << "\t";
   1602       dbgs() << *UseMI;
   1603     });
   1604   }
   1605 }
   1606 
   1607 bool RegisterCoalescer::canJoinPhys(const CoalescerPair &CP) {
   1608   // Always join simple intervals that are defined by a single copy from a
   1609   // reserved register. This doesn't increase register pressure, so it is
   1610   // always beneficial.
   1611   if (!MRI->isReserved(CP.getDstReg())) {
   1612     LLVM_DEBUG(dbgs() << "\tCan only merge into reserved registers.\n");
   1613     return false;
   1614   }
   1615 
   1616   LiveInterval &JoinVInt = LIS->getInterval(CP.getSrcReg());
   1617   if (JoinVInt.containsOneValue())
   1618     return true;
   1619 
   1620   LLVM_DEBUG(
   1621       dbgs() << "\tCannot join complex intervals into reserved register.\n");
   1622   return false;
   1623 }
   1624 
   1625 bool RegisterCoalescer::joinCopy(MachineInstr *CopyMI, bool &Again) {
   1626   Again = false;
   1627   LLVM_DEBUG(dbgs() << LIS->getInstructionIndex(*CopyMI) << '\t' << *CopyMI);
   1628 
   1629   CoalescerPair CP(*TRI);
   1630   if (!CP.setRegisters(CopyMI)) {
   1631     LLVM_DEBUG(dbgs() << "\tNot coalescable.\n");
   1632     return false;
   1633   }
   1634 
   1635   if (CP.getNewRC()) {
   1636     auto SrcRC = MRI->getRegClass(CP.getSrcReg());
   1637     auto DstRC = MRI->getRegClass(CP.getDstReg());
   1638     unsigned SrcIdx = CP.getSrcIdx();
   1639     unsigned DstIdx = CP.getDstIdx();
   1640     if (CP.isFlipped()) {
   1641       std::swap(SrcIdx, DstIdx);
   1642       std::swap(SrcRC, DstRC);
   1643     }
   1644     if (!TRI->shouldCoalesce(CopyMI, SrcRC, SrcIdx, DstRC, DstIdx,
   1645                              CP.getNewRC(), *LIS)) {
   1646       LLVM_DEBUG(dbgs() << "\tSubtarget bailed on coalescing.\n");
   1647       return false;
   1648     }
   1649   }
   1650 
   1651   // Dead code elimination. This really should be handled by MachineDCE, but
   1652   // sometimes dead copies slip through, and we can't generate invalid live
   1653   // ranges.
   1654   if (!CP.isPhys() && CopyMI->allDefsAreDead()) {
   1655     LLVM_DEBUG(dbgs() << "\tCopy is dead.\n");
   1656     DeadDefs.push_back(CopyMI);
   1657     eliminateDeadDefs();
   1658     return true;
   1659   }
   1660 
   1661   // Eliminate undefs.
   1662   if (!CP.isPhys()) {
   1663     // If this is an IMPLICIT_DEF, leave it alone, but don't try to coalesce.
   1664     if (MachineInstr *UndefMI = eliminateUndefCopy(CopyMI)) {
   1665       if (UndefMI->isImplicitDef())
   1666         return false;
   1667       deleteInstr(CopyMI);
   1668       return false;  // Not coalescable.
   1669     }
   1670   }
   1671 
   1672   // Coalesced copies are normally removed immediately, but transformations
   1673   // like removeCopyByCommutingDef() can inadvertently create identity copies.
   1674   // When that happens, just join the values and remove the copy.
   1675   if (CP.getSrcReg() == CP.getDstReg()) {
   1676     LiveInterval &LI = LIS->getInterval(CP.getSrcReg());
   1677     LLVM_DEBUG(dbgs() << "\tCopy already coalesced: " << LI << '\n');
   1678     const SlotIndex CopyIdx = LIS->getInstructionIndex(*CopyMI);
   1679     LiveQueryResult LRQ = LI.Query(CopyIdx);
   1680     if (VNInfo *DefVNI = LRQ.valueDefined()) {
   1681       VNInfo *ReadVNI = LRQ.valueIn();
   1682       assert(ReadVNI && "No value before copy and no <undef> flag.");
   1683       assert(ReadVNI != DefVNI && "Cannot read and define the same value.");
   1684       LI.MergeValueNumberInto(DefVNI, ReadVNI);
   1685 
   1686       // Process subregister liveranges.
   1687       for (LiveInterval::SubRange &S : LI.subranges()) {
   1688         LiveQueryResult SLRQ = S.Query(CopyIdx);
   1689         if (VNInfo *SDefVNI = SLRQ.valueDefined()) {
   1690           VNInfo *SReadVNI = SLRQ.valueIn();
   1691           S.MergeValueNumberInto(SDefVNI, SReadVNI);
   1692         }
   1693       }
   1694       LLVM_DEBUG(dbgs() << "\tMerged values:          " << LI << '\n');
   1695     }
   1696     deleteInstr(CopyMI);
   1697     return true;
   1698   }
   1699 
   1700   // Enforce policies.
   1701   if (CP.isPhys()) {
   1702     LLVM_DEBUG(dbgs() << "\tConsidering merging "
   1703                       << printReg(CP.getSrcReg(), TRI) << " with "
   1704                       << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n');
   1705     if (!canJoinPhys(CP)) {
   1706       // Before giving up coalescing, if definition of source is defined by
   1707       // trivial computation, try rematerializing it.
   1708       bool IsDefCopy;
   1709       if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
   1710         return true;
   1711       if (IsDefCopy)
   1712         Again = true;  // May be possible to coalesce later.
   1713       return false;
   1714     }
   1715   } else {
   1716     // When possible, let DstReg be the larger interval.
   1717     if (!CP.isPartial() && LIS->getInterval(CP.getSrcReg()).size() >
   1718                            LIS->getInterval(CP.getDstReg()).size())
   1719       CP.flip();
   1720 
   1721     LLVM_DEBUG({
   1722       dbgs() << "\tConsidering merging to "
   1723              << TRI->getRegClassName(CP.getNewRC()) << " with ";
   1724       if (CP.getDstIdx() && CP.getSrcIdx())
   1725         dbgs() << printReg(CP.getDstReg()) << " in "
   1726                << TRI->getSubRegIndexName(CP.getDstIdx()) << " and "
   1727                << printReg(CP.getSrcReg()) << " in "
   1728                << TRI->getSubRegIndexName(CP.getSrcIdx()) << '\n';
   1729       else
   1730         dbgs() << printReg(CP.getSrcReg(), TRI) << " in "
   1731                << printReg(CP.getDstReg(), TRI, CP.getSrcIdx()) << '\n';
   1732     });
   1733   }
   1734 
   1735   ShrinkMask = LaneBitmask::getNone();
   1736   ShrinkMainRange = false;
   1737 
   1738   // Okay, attempt to join these two intervals.  On failure, this returns false.
   1739   // Otherwise, if one of the intervals being joined is a physreg, this method
   1740   // always canonicalizes DstInt to be it.  The output "SrcInt" will not have
   1741   // been modified, so we can use this information below to update aliases.
   1742   if (!joinIntervals(CP)) {
   1743     // Coalescing failed.
   1744 
   1745     // If definition of source is defined by trivial computation, try
   1746     // rematerializing it.
   1747     bool IsDefCopy;
   1748     if (reMaterializeTrivialDef(CP, CopyMI, IsDefCopy))
   1749       return true;
   1750 
   1751     // If we can eliminate the copy without merging the live segments, do so
   1752     // now.
   1753     if (!CP.isPartial() && !CP.isPhys()) {
   1754       if (adjustCopiesBackFrom(CP, CopyMI) ||
   1755           removeCopyByCommutingDef(CP, CopyMI)) {
   1756         deleteInstr(CopyMI);
   1757         LLVM_DEBUG(dbgs() << "\tTrivial!\n");
   1758         return true;
   1759       }
   1760     }
   1761 
   1762     // Try and see if we can partially eliminate the copy by moving the copy to
   1763     // its predecessor.
   1764     if (!CP.isPartial() && !CP.isPhys())
   1765       if (removePartialRedundancy(CP, *CopyMI))
   1766         return true;
   1767 
   1768     // Otherwise, we are unable to join the intervals.
   1769     LLVM_DEBUG(dbgs() << "\tInterference!\n");
   1770     Again = true;  // May be possible to coalesce later.
   1771     return false;
   1772   }
   1773 
   1774   // Coalescing to a virtual register that is of a sub-register class of the
   1775   // other. Make sure the resulting register is set to the right register class.
   1776   if (CP.isCrossClass()) {
   1777     ++numCrossRCs;
   1778     MRI->setRegClass(CP.getDstReg(), CP.getNewRC());
   1779   }
   1780 
   1781   // Removing sub-register copies can ease the register class constraints.
   1782   // Make sure we attempt to inflate the register class of DstReg.
   1783   if (!CP.isPhys() && RegClassInfo.isProperSubClass(CP.getNewRC()))
   1784     InflateRegs.push_back(CP.getDstReg());
   1785 
   1786   // CopyMI has been erased by joinIntervals at this point. Remove it from
   1787   // ErasedInstrs since copyCoalesceWorkList() won't add a successful join back
   1788   // to the work list. This keeps ErasedInstrs from growing needlessly.
   1789   ErasedInstrs.erase(CopyMI);
   1790 
   1791   // Rewrite all SrcReg operands to DstReg.
   1792   // Also update DstReg operands to include DstIdx if it is set.
   1793   if (CP.getDstIdx())
   1794     updateRegDefsUses(CP.getDstReg(), CP.getDstReg(), CP.getDstIdx());
   1795   updateRegDefsUses(CP.getSrcReg(), CP.getDstReg(), CP.getSrcIdx());
   1796 
   1797   // Shrink subregister ranges if necessary.
   1798   if (ShrinkMask.any()) {
   1799     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
   1800     for (LiveInterval::SubRange &S : LI.subranges()) {
   1801       if ((S.LaneMask & ShrinkMask).none())
   1802         continue;
   1803       LLVM_DEBUG(dbgs() << "Shrink LaneUses (Lane " << PrintLaneMask(S.LaneMask)
   1804                         << ")\n");
   1805       LIS->shrinkToUses(S, LI.reg);
   1806     }
   1807     LI.removeEmptySubRanges();
   1808   }
   1809   if (ShrinkMainRange) {
   1810     LiveInterval &LI = LIS->getInterval(CP.getDstReg());
   1811     shrinkToUses(&LI);
   1812   }
   1813 
   1814   // SrcReg is guaranteed to be the register whose live interval that is
   1815   // being merged.
   1816   LIS->removeInterval(CP.getSrcReg());
   1817 
   1818   // Update regalloc hint.
   1819   TRI->updateRegAllocHint(CP.getSrcReg(), CP.getDstReg(), *MF);
   1820 
   1821   LLVM_DEBUG({
   1822     dbgs() << "\tSuccess: " << printReg(CP.getSrcReg(), TRI, CP.getSrcIdx())
   1823            << " -> " << printReg(CP.getDstReg(), TRI, CP.getDstIdx()) << '\n';
   1824     dbgs() << "\tResult = ";
   1825     if (CP.isPhys())
   1826       dbgs() << printReg(CP.getDstReg(), TRI);
   1827     else
   1828       dbgs() << LIS->getInterval(CP.getDstReg());
   1829     dbgs() << '\n';
   1830   });
   1831 
   1832   ++numJoins;
   1833   return true;
   1834 }
   1835 
   1836 bool RegisterCoalescer::joinReservedPhysReg(CoalescerPair &CP) {
   1837   unsigned DstReg = CP.getDstReg();
   1838   unsigned SrcReg = CP.getSrcReg();
   1839   assert(CP.isPhys() && "Must be a physreg copy");
   1840   assert(MRI->isReserved(DstReg) && "Not a reserved register");
   1841   LiveInterval &RHS = LIS->getInterval(SrcReg);
   1842   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << '\n');
   1843 
   1844   assert(RHS.containsOneValue() && "Invalid join with reserved register");
   1845 
   1846   // Optimization for reserved registers like ESP. We can only merge with a
   1847   // reserved physreg if RHS has a single value that is a copy of DstReg.
   1848   // The live range of the reserved register will look like a set of dead defs
   1849   // - we don't properly track the live range of reserved registers.
   1850 
   1851   // Deny any overlapping intervals.  This depends on all the reserved
   1852   // register live ranges to look like dead defs.
   1853   if (!MRI->isConstantPhysReg(DstReg)) {
   1854     for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
   1855       // Abort if not all the regunits are reserved.
   1856       for (MCRegUnitRootIterator RI(*UI, TRI); RI.isValid(); ++RI) {
   1857         if (!MRI->isReserved(*RI))
   1858           return false;
   1859       }
   1860       if (RHS.overlaps(LIS->getRegUnit(*UI))) {
   1861         LLVM_DEBUG(dbgs() << "\t\tInterference: " << printRegUnit(*UI, TRI)
   1862                           << '\n');
   1863         return false;
   1864       }
   1865     }
   1866 
   1867     // We must also check for overlaps with regmask clobbers.
   1868     BitVector RegMaskUsable;
   1869     if (LIS->checkRegMaskInterference(RHS, RegMaskUsable) &&
   1870         !RegMaskUsable.test(DstReg)) {
   1871       LLVM_DEBUG(dbgs() << "\t\tRegMask interference\n");
   1872       return false;
   1873     }
   1874   }
   1875 
   1876   // Skip any value computations, we are not adding new values to the
   1877   // reserved register.  Also skip merging the live ranges, the reserved
   1878   // register live range doesn't need to be accurate as long as all the
   1879   // defs are there.
   1880 
   1881   // Delete the identity copy.
   1882   MachineInstr *CopyMI;
   1883   if (CP.isFlipped()) {
   1884     // Physreg is copied into vreg
   1885     //   %y = COPY %physreg_x
   1886     //   ...  //< no other def of %x here
   1887     //   use %y
   1888     // =>
   1889     //   ...
   1890     //   use %x
   1891     CopyMI = MRI->getVRegDef(SrcReg);
   1892   } else {
   1893     // VReg is copied into physreg:
   1894     //   %y = def
   1895     //   ... //< no other def or use of %y here
   1896     //   %y = COPY %physreg_x
   1897     // =>
   1898     //   %y = def
   1899     //   ...
   1900     if (!MRI->hasOneNonDBGUse(SrcReg)) {
   1901       LLVM_DEBUG(dbgs() << "\t\tMultiple vreg uses!\n");
   1902       return false;
   1903     }
   1904 
   1905     if (!LIS->intervalIsInOneMBB(RHS)) {
   1906       LLVM_DEBUG(dbgs() << "\t\tComplex control flow!\n");
   1907       return false;
   1908     }
   1909 
   1910     MachineInstr &DestMI = *MRI->getVRegDef(SrcReg);
   1911     CopyMI = &*MRI->use_instr_nodbg_begin(SrcReg);
   1912     SlotIndex CopyRegIdx = LIS->getInstructionIndex(*CopyMI).getRegSlot();
   1913     SlotIndex DestRegIdx = LIS->getInstructionIndex(DestMI).getRegSlot();
   1914 
   1915     if (!MRI->isConstantPhysReg(DstReg)) {
   1916       // We checked above that there are no interfering defs of the physical
   1917       // register. However, for this case, where we intend to move up the def of
   1918       // the physical register, we also need to check for interfering uses.
   1919       SlotIndexes *Indexes = LIS->getSlotIndexes();
   1920       for (SlotIndex SI = Indexes->getNextNonNullIndex(DestRegIdx);
   1921            SI != CopyRegIdx; SI = Indexes->getNextNonNullIndex(SI)) {
   1922         MachineInstr *MI = LIS->getInstructionFromIndex(SI);
   1923         if (MI->readsRegister(DstReg, TRI)) {
   1924           LLVM_DEBUG(dbgs() << "\t\tInterference (read): " << *MI);
   1925           return false;
   1926         }
   1927       }
   1928     }
   1929 
   1930     // We're going to remove the copy which defines a physical reserved
   1931     // register, so remove its valno, etc.
   1932     LLVM_DEBUG(dbgs() << "\t\tRemoving phys reg def of "
   1933                       << printReg(DstReg, TRI) << " at " << CopyRegIdx << "\n");
   1934 
   1935     LIS->removePhysRegDefAt(DstReg, CopyRegIdx);
   1936     // Create a new dead def at the new def location.
   1937     for (MCRegUnitIterator UI(DstReg, TRI); UI.isValid(); ++UI) {
   1938       LiveRange &LR = LIS->getRegUnit(*UI);
   1939       LR.createDeadDef(DestRegIdx, LIS->getVNInfoAllocator());
   1940     }
   1941   }
   1942 
   1943   deleteInstr(CopyMI);
   1944 
   1945   // We don't track kills for reserved registers.
   1946   MRI->clearKillFlags(CP.getSrcReg());
   1947 
   1948   return true;
   1949 }
   1950 
   1951 //===----------------------------------------------------------------------===//
   1952 //                 Interference checking and interval joining
   1953 //===----------------------------------------------------------------------===//
   1954 //
   1955 // In the easiest case, the two live ranges being joined are disjoint, and
   1956 // there is no interference to consider. It is quite common, though, to have
   1957 // overlapping live ranges, and we need to check if the interference can be
   1958 // resolved.
   1959 //
   1960 // The live range of a single SSA value forms a sub-tree of the dominator tree.
   1961 // This means that two SSA values overlap if and only if the def of one value
   1962 // is contained in the live range of the other value. As a special case, the
   1963 // overlapping values can be defined at the same index.
   1964 //
   1965 // The interference from an overlapping def can be resolved in these cases:
   1966 //
   1967 // 1. Coalescable copies. The value is defined by a copy that would become an
   1968 //    identity copy after joining SrcReg and DstReg. The copy instruction will
   1969 //    be removed, and the value will be merged with the source value.
   1970 //
   1971 //    There can be several copies back and forth, causing many values to be
   1972 //    merged into one. We compute a list of ultimate values in the joined live
   1973 //    range as well as a mappings from the old value numbers.
   1974 //
   1975 // 2. IMPLICIT_DEF. This instruction is only inserted to ensure all PHI
   1976 //    predecessors have a live out value. It doesn't cause real interference,
   1977 //    and can be merged into the value it overlaps. Like a coalescable copy, it
   1978 //    can be erased after joining.
   1979 //
   1980 // 3. Copy of external value. The overlapping def may be a copy of a value that
   1981 //    is already in the other register. This is like a coalescable copy, but
   1982 //    the live range of the source register must be trimmed after erasing the
   1983 //    copy instruction:
   1984 //
   1985 //      %src = COPY %ext
   1986 //      %dst = COPY %ext  <-- Remove this COPY, trim the live range of %ext.
   1987 //
   1988 // 4. Clobbering undefined lanes. Vector registers are sometimes built by
   1989 //    defining one lane at a time:
   1990 //
   1991 //      %dst:ssub0<def,read-undef> = FOO
   1992 //      %src = BAR
   1993 //      %dst:ssub1 = COPY %src
   1994 //
   1995 //    The live range of %src overlaps the %dst value defined by FOO, but
   1996 //    merging %src into %dst:ssub1 is only going to clobber the ssub1 lane
   1997 //    which was undef anyway.
   1998 //
   1999 //    The value mapping is more complicated in this case. The final live range
   2000 //    will have different value numbers for both FOO and BAR, but there is no
   2001 //    simple mapping from old to new values. It may even be necessary to add
   2002 //    new PHI values.
   2003 //
   2004 // 5. Clobbering dead lanes. A def may clobber a lane of a vector register that
   2005 //    is live, but never read. This can happen because we don't compute
   2006 //    individual live ranges per lane.
   2007 //
   2008 //      %dst = FOO
   2009 //      %src = BAR
   2010 //      %dst:ssub1 = COPY %src
   2011 //
   2012 //    This kind of interference is only resolved locally. If the clobbered
   2013 //    lane value escapes the block, the join is aborted.
   2014 
   2015 namespace {
   2016 
   2017 /// Track information about values in a single virtual register about to be
   2018 /// joined. Objects of this class are always created in pairs - one for each
   2019 /// side of the CoalescerPair (or one for each lane of a side of the coalescer
   2020 /// pair)
   2021 class JoinVals {
   2022   /// Live range we work on.
   2023   LiveRange &LR;
   2024 
   2025   /// (Main) register we work on.
   2026   const unsigned Reg;
   2027 
   2028   /// Reg (and therefore the values in this liverange) will end up as
   2029   /// subregister SubIdx in the coalesced register. Either CP.DstIdx or
   2030   /// CP.SrcIdx.
   2031   const unsigned SubIdx;
   2032 
   2033   /// The LaneMask that this liverange will occupy the coalesced register. May
   2034   /// be smaller than the lanemask produced by SubIdx when merging subranges.
   2035   const LaneBitmask LaneMask;
   2036 
   2037   /// This is true when joining sub register ranges, false when joining main
   2038   /// ranges.
   2039   const bool SubRangeJoin;
   2040 
   2041   /// Whether the current LiveInterval tracks subregister liveness.
   2042   const bool TrackSubRegLiveness;
   2043 
   2044   /// Values that will be present in the final live range.
   2045   SmallVectorImpl<VNInfo*> &NewVNInfo;
   2046 
   2047   const CoalescerPair &CP;
   2048   LiveIntervals *LIS;
   2049   SlotIndexes *Indexes;
   2050   const TargetRegisterInfo *TRI;
   2051 
   2052   /// Value number assignments. Maps value numbers in LI to entries in
   2053   /// NewVNInfo. This is suitable for passing to LiveInterval::join().
   2054   SmallVector<int, 8> Assignments;
   2055 
   2056   /// Conflict resolution for overlapping values.
   2057   enum ConflictResolution {
   2058     /// No overlap, simply keep this value.
   2059     CR_Keep,
   2060 
   2061     /// Merge this value into OtherVNI and erase the defining instruction.
   2062     /// Used for IMPLICIT_DEF, coalescable copies, and copies from external
   2063     /// values.
   2064     CR_Erase,
   2065 
   2066     /// Merge this value into OtherVNI but keep the defining instruction.
   2067     /// This is for the special case where OtherVNI is defined by the same
   2068     /// instruction.
   2069     CR_Merge,
   2070 
   2071     /// Keep this value, and have it replace OtherVNI where possible. This
   2072     /// complicates value mapping since OtherVNI maps to two different values
   2073     /// before and after this def.
   2074     /// Used when clobbering undefined or dead lanes.
   2075     CR_Replace,
   2076 
   2077     /// Unresolved conflict. Visit later when all values have been mapped.
   2078     CR_Unresolved,
   2079 
   2080     /// Unresolvable conflict. Abort the join.
   2081     CR_Impossible
   2082   };
   2083 
   2084   /// Per-value info for LI. The lane bit masks are all relative to the final
   2085   /// joined register, so they can be compared directly between SrcReg and
   2086   /// DstReg.
   2087   struct Val {
   2088     ConflictResolution Resolution = CR_Keep;
   2089 
   2090     /// Lanes written by this def, 0 for unanalyzed values.
   2091     LaneBitmask WriteLanes;
   2092 
   2093     /// Lanes with defined values in this register. Other lanes are undef and
   2094     /// safe to clobber.
   2095     LaneBitmask ValidLanes;
   2096 
   2097     /// Value in LI being redefined by this def.
   2098     VNInfo *RedefVNI = nullptr;
   2099 
   2100     /// Value in the other live range that overlaps this def, if any.
   2101     VNInfo *OtherVNI = nullptr;
   2102 
   2103     /// Is this value an IMPLICIT_DEF that can be erased?
   2104     ///
   2105     /// IMPLICIT_DEF values should only exist at the end of a basic block that
   2106     /// is a predecessor to a phi-value. These IMPLICIT_DEF instructions can be
   2107     /// safely erased if they are overlapping a live value in the other live
   2108     /// interval.
   2109     ///
   2110     /// Weird control flow graphs and incomplete PHI handling in
   2111     /// ProcessImplicitDefs can very rarely create IMPLICIT_DEF values with
   2112     /// longer live ranges. Such IMPLICIT_DEF values should be treated like
   2113     /// normal values.
   2114     bool ErasableImplicitDef = false;
   2115 
   2116     /// True when the live range of this value will be pruned because of an
   2117     /// overlapping CR_Replace value in the other live range.
   2118     bool Pruned = false;
   2119 
   2120     /// True once Pruned above has been computed.
   2121     bool PrunedComputed = false;
   2122 
   2123     /// True if this value is determined to be identical to OtherVNI
   2124     /// (in valuesIdentical). This is used with CR_Erase where the erased
   2125     /// copy is redundant, i.e. the source value is already the same as
   2126     /// the destination. In such cases the subranges need to be updated
   2127     /// properly. See comment at pruneSubRegValues for more info.
   2128     bool Identical = false;
   2129 
   2130     Val() = default;
   2131 
   2132     bool isAnalyzed() const { return WriteLanes.any(); }
   2133   };
   2134 
   2135   /// One entry per value number in LI.
   2136   SmallVector<Val, 8> Vals;
   2137 
   2138   /// Compute the bitmask of lanes actually written by DefMI.
   2139   /// Set Redef if there are any partial register definitions that depend on the
   2140   /// previous value of the register.
   2141   LaneBitmask computeWriteLanes(const MachineInstr *DefMI, bool &Redef) const;
   2142 
   2143   /// Find the ultimate value that VNI was copied from.
   2144   std::pair<const VNInfo*,unsigned> followCopyChain(const VNInfo *VNI) const;
   2145 
   2146   bool valuesIdentical(VNInfo *Value0, VNInfo *Value1, const JoinVals &Other) const;
   2147 
   2148   /// Analyze ValNo in this live range, and set all fields of Vals[ValNo].
   2149   /// Return a conflict resolution when possible, but leave the hard cases as
   2150   /// CR_Unresolved.
   2151   /// Recursively calls computeAssignment() on this and Other, guaranteeing that
   2152   /// both OtherVNI and RedefVNI have been analyzed and mapped before returning.
   2153   /// The recursion always goes upwards in the dominator tree, making loops
   2154   /// impossible.
   2155   ConflictResolution analyzeValue(unsigned ValNo, JoinVals &Other);
   2156 
   2157   /// Compute the value assignment for ValNo in RI.
   2158   /// This may be called recursively by analyzeValue(), but never for a ValNo on
   2159   /// the stack.
   2160   void computeAssignment(unsigned ValNo, JoinVals &Other);
   2161 
   2162   /// Assuming ValNo is going to clobber some valid lanes in Other.LR, compute
   2163   /// the extent of the tainted lanes in the block.
   2164   ///
   2165   /// Multiple values in Other.LR can be affected since partial redefinitions
   2166   /// can preserve previously tainted lanes.
   2167   ///
   2168   ///   1 %dst = VLOAD           <-- Define all lanes in %dst
   2169   ///   2 %src = FOO             <-- ValNo to be joined with %dst:ssub0
   2170   ///   3 %dst:ssub1 = BAR       <-- Partial redef doesn't clear taint in ssub0
   2171   ///   4 %dst:ssub0 = COPY %src <-- Conflict resolved, ssub0 wasn't read
   2172   ///
   2173   /// For each ValNo in Other that is affected, add an (EndIndex, TaintedLanes)
   2174   /// entry to TaintedVals.
   2175   ///
   2176   /// Returns false if the tainted lanes extend beyond the basic block.
   2177   bool
   2178   taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
   2179               SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent);
   2180 
   2181   /// Return true if MI uses any of the given Lanes from Reg.
   2182   /// This does not include partial redefinitions of Reg.
   2183   bool usesLanes(const MachineInstr &MI, unsigned, unsigned, LaneBitmask) const;
   2184 
   2185   /// Determine if ValNo is a copy of a value number in LR or Other.LR that will
   2186   /// be pruned:
   2187   ///
   2188   ///   %dst = COPY %src
   2189   ///   %src = COPY %dst  <-- This value to be pruned.
   2190   ///   %dst = COPY %src  <-- This value is a copy of a pruned value.
   2191   bool isPrunedValue(unsigned ValNo, JoinVals &Other);
   2192 
   2193 public:
   2194   JoinVals(LiveRange &LR, unsigned Reg, unsigned SubIdx, LaneBitmask LaneMask,
   2195            SmallVectorImpl<VNInfo*> &newVNInfo, const CoalescerPair &cp,
   2196            LiveIntervals *lis, const TargetRegisterInfo *TRI, bool SubRangeJoin,
   2197            bool TrackSubRegLiveness)
   2198     : LR(LR), Reg(Reg), SubIdx(SubIdx), LaneMask(LaneMask),
   2199       SubRangeJoin(SubRangeJoin), TrackSubRegLiveness(TrackSubRegLiveness),
   2200       NewVNInfo(newVNInfo), CP(cp), LIS(lis), Indexes(LIS->getSlotIndexes()),
   2201       TRI(TRI), Assignments(LR.getNumValNums(), -1), Vals(LR.getNumValNums()) {}
   2202 
   2203   /// Analyze defs in LR and compute a value mapping in NewVNInfo.
   2204   /// Returns false if any conflicts were impossible to resolve.
   2205   bool mapValues(JoinVals &Other);
   2206 
   2207   /// Try to resolve conflicts that require all values to be mapped.
   2208   /// Returns false if any conflicts were impossible to resolve.
   2209   bool resolveConflicts(JoinVals &Other);
   2210 
   2211   /// Prune the live range of values in Other.LR where they would conflict with
   2212   /// CR_Replace values in LR. Collect end points for restoring the live range
   2213   /// after joining.
   2214   void pruneValues(JoinVals &Other, SmallVectorImpl<SlotIndex> &EndPoints,
   2215                    bool changeInstrs);
   2216 
   2217   /// Removes subranges starting at copies that get removed. This sometimes
   2218   /// happens when undefined subranges are copied around. These ranges contain
   2219   /// no useful information and can be removed.
   2220   void pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask);
   2221 
   2222   /// Pruning values in subranges can lead to removing segments in these
   2223   /// subranges started by IMPLICIT_DEFs. The corresponding segments in
   2224   /// the main range also need to be removed. This function will mark
   2225   /// the corresponding values in the main range as pruned, so that
   2226   /// eraseInstrs can do the final cleanup.
   2227   /// The parameter @p LI must be the interval whose main range is the
   2228   /// live range LR.
   2229   void pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange);
   2230 
   2231   /// Erase any machine instructions that have been coalesced away.
   2232   /// Add erased instructions to ErasedInstrs.
   2233   /// Add foreign virtual registers to ShrinkRegs if their live range ended at
   2234   /// the erased instrs.
   2235   void eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
   2236                    SmallVectorImpl<unsigned> &ShrinkRegs,
   2237                    LiveInterval *LI = nullptr);
   2238 
   2239   /// Remove liverange defs at places where implicit defs will be removed.
   2240   void removeImplicitDefs();
   2241 
   2242   /// Get the value assignments suitable for passing to LiveInterval::join.
   2243   const int *getAssignments() const { return Assignments.data(); }
   2244 };
   2245 
   2246 } // end anonymous namespace
   2247 
   2248 LaneBitmask JoinVals::computeWriteLanes(const MachineInstr *DefMI, bool &Redef)
   2249   const {
   2250   LaneBitmask L;
   2251   for (const MachineOperand &MO : DefMI->operands()) {
   2252     if (!MO.isReg() || MO.getReg() != Reg || !MO.isDef())
   2253       continue;
   2254     L |= TRI->getSubRegIndexLaneMask(
   2255            TRI->composeSubRegIndices(SubIdx, MO.getSubReg()));
   2256     if (MO.readsReg())
   2257       Redef = true;
   2258   }
   2259   return L;
   2260 }
   2261 
   2262 std::pair<const VNInfo*, unsigned> JoinVals::followCopyChain(
   2263     const VNInfo *VNI) const {
   2264   unsigned TrackReg = Reg;
   2265 
   2266   while (!VNI->isPHIDef()) {
   2267     SlotIndex Def = VNI->def;
   2268     MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
   2269     assert(MI && "No defining instruction");
   2270     if (!MI->isFullCopy())
   2271       return std::make_pair(VNI, TrackReg);
   2272     unsigned SrcReg = MI->getOperand(1).getReg();
   2273     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
   2274       return std::make_pair(VNI, TrackReg);
   2275 
   2276     const LiveInterval &LI = LIS->getInterval(SrcReg);
   2277     const VNInfo *ValueIn;
   2278     // No subrange involved.
   2279     if (!SubRangeJoin || !LI.hasSubRanges()) {
   2280       LiveQueryResult LRQ = LI.Query(Def);
   2281       ValueIn = LRQ.valueIn();
   2282     } else {
   2283       // Query subranges. Ensure that all matching ones take us to the same def
   2284       // (allowing some of them to be undef).
   2285       ValueIn = nullptr;
   2286       for (const LiveInterval::SubRange &S : LI.subranges()) {
   2287         // Transform lanemask to a mask in the joined live interval.
   2288         LaneBitmask SMask = TRI->composeSubRegIndexLaneMask(SubIdx, S.LaneMask);
   2289         if ((SMask & LaneMask).none())
   2290           continue;
   2291         LiveQueryResult LRQ = S.Query(Def);
   2292         if (!ValueIn) {
   2293           ValueIn = LRQ.valueIn();
   2294           continue;
   2295         }
   2296         if (LRQ.valueIn() && ValueIn != LRQ.valueIn())
   2297           return std::make_pair(VNI, TrackReg);
   2298       }
   2299     }
   2300     if (ValueIn == nullptr) {
   2301       // Reaching an undefined value is legitimate, for example:
   2302       //
   2303       // 1   undef %0.sub1 = ...  ;; %0.sub0 == undef
   2304       // 2   %1 = COPY %0         ;; %1 is defined here.
   2305       // 3   %0 = COPY %1         ;; Now %0.sub0 has a definition,
   2306       //                          ;; but it's equivalent to "undef".
   2307       return std::make_pair(nullptr, SrcReg);
   2308     }
   2309     VNI = ValueIn;
   2310     TrackReg = SrcReg;
   2311   }
   2312   return std::make_pair(VNI, TrackReg);
   2313 }
   2314 
   2315 bool JoinVals::valuesIdentical(VNInfo *Value0, VNInfo *Value1,
   2316                                const JoinVals &Other) const {
   2317   const VNInfo *Orig0;
   2318   unsigned Reg0;
   2319   std::tie(Orig0, Reg0) = followCopyChain(Value0);
   2320   if (Orig0 == Value1 && Reg0 == Other.Reg)
   2321     return true;
   2322 
   2323   const VNInfo *Orig1;
   2324   unsigned Reg1;
   2325   std::tie(Orig1, Reg1) = Other.followCopyChain(Value1);
   2326   // If both values are undefined, and the source registers are the same
   2327   // register, the values are identical. Filter out cases where only one
   2328   // value is defined.
   2329   if (Orig0 == nullptr || Orig1 == nullptr)
   2330     return Orig0 == Orig1 && Reg0 == Reg1;
   2331 
   2332   // The values are equal if they are defined at the same place and use the
   2333   // same register. Note that we cannot compare VNInfos directly as some of
   2334   // them might be from a copy created in mergeSubRangeInto()  while the other
   2335   // is from the original LiveInterval.
   2336   return Orig0->def == Orig1->def && Reg0 == Reg1;
   2337 }
   2338 
   2339 JoinVals::ConflictResolution
   2340 JoinVals::analyzeValue(unsigned ValNo, JoinVals &Other) {
   2341   Val &V = Vals[ValNo];
   2342   assert(!V.isAnalyzed() && "Value has already been analyzed!");
   2343   VNInfo *VNI = LR.getValNumInfo(ValNo);
   2344   if (VNI->isUnused()) {
   2345     V.WriteLanes = LaneBitmask::getAll();
   2346     return CR_Keep;
   2347   }
   2348 
   2349   // Get the instruction defining this value, compute the lanes written.
   2350   const MachineInstr *DefMI = nullptr;
   2351   if (VNI->isPHIDef()) {
   2352     // Conservatively assume that all lanes in a PHI are valid.
   2353     LaneBitmask Lanes = SubRangeJoin ? LaneBitmask::getLane(0)
   2354                                      : TRI->getSubRegIndexLaneMask(SubIdx);
   2355     V.ValidLanes = V.WriteLanes = Lanes;
   2356   } else {
   2357     DefMI = Indexes->getInstructionFromIndex(VNI->def);
   2358     assert(DefMI != nullptr);
   2359     if (SubRangeJoin) {
   2360       // We don't care about the lanes when joining subregister ranges.
   2361       V.WriteLanes = V.ValidLanes = LaneBitmask::getLane(0);
   2362       if (DefMI->isImplicitDef()) {
   2363         V.ValidLanes = LaneBitmask::getNone();
   2364         V.ErasableImplicitDef = true;
   2365       }
   2366     } else {
   2367       bool Redef = false;
   2368       V.ValidLanes = V.WriteLanes = computeWriteLanes(DefMI, Redef);
   2369 
   2370       // If this is a read-modify-write instruction, there may be more valid
   2371       // lanes than the ones written by this instruction.
   2372       // This only covers partial redef operands. DefMI may have normal use
   2373       // operands reading the register. They don't contribute valid lanes.
   2374       //
   2375       // This adds ssub1 to the set of valid lanes in %src:
   2376       //
   2377       //   %src:ssub1 = FOO
   2378       //
   2379       // This leaves only ssub1 valid, making any other lanes undef:
   2380       //
   2381       //   %src:ssub1<def,read-undef> = FOO %src:ssub2
   2382       //
   2383       // The <read-undef> flag on the def operand means that old lane values are
   2384       // not important.
   2385       if (Redef) {
   2386         V.RedefVNI = LR.Query(VNI->def).valueIn();
   2387         assert((TrackSubRegLiveness || V.RedefVNI) &&
   2388                "Instruction is reading nonexistent value");
   2389         if (V.RedefVNI != nullptr) {
   2390           computeAssignment(V.RedefVNI->id, Other);
   2391           V.ValidLanes |= Vals[V.RedefVNI->id].ValidLanes;
   2392         }
   2393       }
   2394 
   2395       // An IMPLICIT_DEF writes undef values.
   2396       if (DefMI->isImplicitDef()) {
   2397         // We normally expect IMPLICIT_DEF values to be live only until the end
   2398         // of their block. If the value is really live longer and gets pruned in
   2399         // another block, this flag is cleared again.
   2400         V.ErasableImplicitDef = true;
   2401         V.ValidLanes &= ~V.WriteLanes;
   2402       }
   2403     }
   2404   }
   2405 
   2406   // Find the value in Other that overlaps VNI->def, if any.
   2407   LiveQueryResult OtherLRQ = Other.LR.Query(VNI->def);
   2408 
   2409   // It is possible that both values are defined by the same instruction, or
   2410   // the values are PHIs defined in the same block. When that happens, the two
   2411   // values should be merged into one, but not into any preceding value.
   2412   // The first value defined or visited gets CR_Keep, the other gets CR_Merge.
   2413   if (VNInfo *OtherVNI = OtherLRQ.valueDefined()) {
   2414     assert(SlotIndex::isSameInstr(VNI->def, OtherVNI->def) && "Broken LRQ");
   2415 
   2416     // One value stays, the other is merged. Keep the earlier one, or the first
   2417     // one we see.
   2418     if (OtherVNI->def < VNI->def)
   2419       Other.computeAssignment(OtherVNI->id, *this);
   2420     else if (VNI->def < OtherVNI->def && OtherLRQ.valueIn()) {
   2421       // This is an early-clobber def overlapping a live-in value in the other
   2422       // register. Not mergeable.
   2423       V.OtherVNI = OtherLRQ.valueIn();
   2424       return CR_Impossible;
   2425     }
   2426     V.OtherVNI = OtherVNI;
   2427     Val &OtherV = Other.Vals[OtherVNI->id];
   2428     // Keep this value, check for conflicts when analyzing OtherVNI.
   2429     if (!OtherV.isAnalyzed())
   2430       return CR_Keep;
   2431     // Both sides have been analyzed now.
   2432     // Allow overlapping PHI values. Any real interference would show up in a
   2433     // predecessor, the PHI itself can't introduce any conflicts.
   2434     if (VNI->isPHIDef())
   2435       return CR_Merge;
   2436     if ((V.ValidLanes & OtherV.ValidLanes).any())
   2437       // Overlapping lanes can't be resolved.
   2438       return CR_Impossible;
   2439     else
   2440       return CR_Merge;
   2441   }
   2442 
   2443   // No simultaneous def. Is Other live at the def?
   2444   V.OtherVNI = OtherLRQ.valueIn();
   2445   if (!V.OtherVNI)
   2446     // No overlap, no conflict.
   2447     return CR_Keep;
   2448 
   2449   assert(!SlotIndex::isSameInstr(VNI->def, V.OtherVNI->def) && "Broken LRQ");
   2450 
   2451   // We have overlapping values, or possibly a kill of Other.
   2452   // Recursively compute assignments up the dominator tree.
   2453   Other.computeAssignment(V.OtherVNI->id, *this);
   2454   Val &OtherV = Other.Vals[V.OtherVNI->id];
   2455 
   2456   // Check if OtherV is an IMPLICIT_DEF that extends beyond its basic block.
   2457   // This shouldn't normally happen, but ProcessImplicitDefs can leave such
   2458   // IMPLICIT_DEF instructions behind, and there is nothing wrong with it
   2459   // technically.
   2460   //
   2461   // When it happens, treat that IMPLICIT_DEF as a normal value, and don't try
   2462   // to erase the IMPLICIT_DEF instruction.
   2463   if (OtherV.ErasableImplicitDef && DefMI &&
   2464       DefMI->getParent() != Indexes->getMBBFromIndex(V.OtherVNI->def)) {
   2465     LLVM_DEBUG(dbgs() << "IMPLICIT_DEF defined at " << V.OtherVNI->def
   2466                       << " extends into "
   2467                       << printMBBReference(*DefMI->getParent())
   2468                       << ", keeping it.\n");
   2469     OtherV.ErasableImplicitDef = false;
   2470   }
   2471 
   2472   // Allow overlapping PHI values. Any real interference would show up in a
   2473   // predecessor, the PHI itself can't introduce any conflicts.
   2474   if (VNI->isPHIDef())
   2475     return CR_Replace;
   2476 
   2477   // Check for simple erasable conflicts.
   2478   if (DefMI->isImplicitDef()) {
   2479     // We need the def for the subregister if there is nothing else live at the
   2480     // subrange at this point.
   2481     if (TrackSubRegLiveness
   2482         && (V.WriteLanes & (OtherV.ValidLanes | OtherV.WriteLanes)).none())
   2483       return CR_Replace;
   2484     return CR_Erase;
   2485   }
   2486 
   2487   // Include the non-conflict where DefMI is a coalescable copy that kills
   2488   // OtherVNI. We still want the copy erased and value numbers merged.
   2489   if (CP.isCoalescable(DefMI)) {
   2490     // Some of the lanes copied from OtherVNI may be undef, making them undef
   2491     // here too.
   2492     V.ValidLanes &= ~V.WriteLanes | OtherV.ValidLanes;
   2493     return CR_Erase;
   2494   }
   2495 
   2496   // This may not be a real conflict if DefMI simply kills Other and defines
   2497   // VNI.
   2498   if (OtherLRQ.isKill() && OtherLRQ.endPoint() <= VNI->def)
   2499     return CR_Keep;
   2500 
   2501   // Handle the case where VNI and OtherVNI can be proven to be identical:
   2502   //
   2503   //   %other = COPY %ext
   2504   //   %this  = COPY %ext <-- Erase this copy
   2505   //
   2506   if (DefMI->isFullCopy() && !CP.isPartial() &&
   2507       valuesIdentical(VNI, V.OtherVNI, Other)) {
   2508     V.Identical = true;
   2509     return CR_Erase;
   2510   }
   2511 
   2512   // If the lanes written by this instruction were all undef in OtherVNI, it is
   2513   // still safe to join the live ranges. This can't be done with a simple value
   2514   // mapping, though - OtherVNI will map to multiple values:
   2515   //
   2516   //   1 %dst:ssub0 = FOO                <-- OtherVNI
   2517   //   2 %src = BAR                      <-- VNI
   2518   //   3 %dst:ssub1 = COPY killed %src    <-- Eliminate this copy.
   2519   //   4 BAZ killed %dst
   2520   //   5 QUUX killed %src
   2521   //
   2522   // Here OtherVNI will map to itself in [1;2), but to VNI in [2;5). CR_Replace
   2523   // handles this complex value mapping.
   2524   if ((V.WriteLanes & OtherV.ValidLanes).none())
   2525     return CR_Replace;
   2526 
   2527   // If the other live range is killed by DefMI and the live ranges are still
   2528   // overlapping, it must be because we're looking at an early clobber def:
   2529   //
   2530   //   %dst<def,early-clobber> = ASM killed %src
   2531   //
   2532   // In this case, it is illegal to merge the two live ranges since the early
   2533   // clobber def would clobber %src before it was read.
   2534   if (OtherLRQ.isKill()) {
   2535     // This case where the def doesn't overlap the kill is handled above.
   2536     assert(VNI->def.isEarlyClobber() &&
   2537            "Only early clobber defs can overlap a kill");
   2538     return CR_Impossible;
   2539   }
   2540 
   2541   // VNI is clobbering live lanes in OtherVNI, but there is still the
   2542   // possibility that no instructions actually read the clobbered lanes.
   2543   // If we're clobbering all the lanes in OtherVNI, at least one must be read.
   2544   // Otherwise Other.RI wouldn't be live here.
   2545   if ((TRI->getSubRegIndexLaneMask(Other.SubIdx) & ~V.WriteLanes).none())
   2546     return CR_Impossible;
   2547 
   2548   // We need to verify that no instructions are reading the clobbered lanes. To
   2549   // save compile time, we'll only check that locally. Don't allow the tainted
   2550   // value to escape the basic block.
   2551   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   2552   if (OtherLRQ.endPoint() >= Indexes->getMBBEndIdx(MBB))
   2553     return CR_Impossible;
   2554 
   2555   // There are still some things that could go wrong besides clobbered lanes
   2556   // being read, for example OtherVNI may be only partially redefined in MBB,
   2557   // and some clobbered lanes could escape the block. Save this analysis for
   2558   // resolveConflicts() when all values have been mapped. We need to know
   2559   // RedefVNI and WriteLanes for any later defs in MBB, and we can't compute
   2560   // that now - the recursive analyzeValue() calls must go upwards in the
   2561   // dominator tree.
   2562   return CR_Unresolved;
   2563 }
   2564 
   2565 void JoinVals::computeAssignment(unsigned ValNo, JoinVals &Other) {
   2566   Val &V = Vals[ValNo];
   2567   if (V.isAnalyzed()) {
   2568     // Recursion should always move up the dominator tree, so ValNo is not
   2569     // supposed to reappear before it has been assigned.
   2570     assert(Assignments[ValNo] != -1 && "Bad recursion?");
   2571     return;
   2572   }
   2573   switch ((V.Resolution = analyzeValue(ValNo, Other))) {
   2574   case CR_Erase:
   2575   case CR_Merge:
   2576     // Merge this ValNo into OtherVNI.
   2577     assert(V.OtherVNI && "OtherVNI not assigned, can't merge.");
   2578     assert(Other.Vals[V.OtherVNI->id].isAnalyzed() && "Missing recursion");
   2579     Assignments[ValNo] = Other.Assignments[V.OtherVNI->id];
   2580     LLVM_DEBUG(dbgs() << "\t\tmerge " << printReg(Reg) << ':' << ValNo << '@'
   2581                       << LR.getValNumInfo(ValNo)->def << " into "
   2582                       << printReg(Other.Reg) << ':' << V.OtherVNI->id << '@'
   2583                       << V.OtherVNI->def << " --> @"
   2584                       << NewVNInfo[Assignments[ValNo]]->def << '\n');
   2585     break;
   2586   case CR_Replace:
   2587   case CR_Unresolved: {
   2588     // The other value is going to be pruned if this join is successful.
   2589     assert(V.OtherVNI && "OtherVNI not assigned, can't prune");
   2590     Val &OtherV = Other.Vals[V.OtherVNI->id];
   2591     // We cannot erase an IMPLICIT_DEF if we don't have valid values for all
   2592     // its lanes.
   2593     if ((OtherV.WriteLanes & ~V.ValidLanes).any() && TrackSubRegLiveness)
   2594       OtherV.ErasableImplicitDef = false;
   2595     OtherV.Pruned = true;
   2596     LLVM_FALLTHROUGH;
   2597   }
   2598   default:
   2599     // This value number needs to go in the final joined live range.
   2600     Assignments[ValNo] = NewVNInfo.size();
   2601     NewVNInfo.push_back(LR.getValNumInfo(ValNo));
   2602     break;
   2603   }
   2604 }
   2605 
   2606 bool JoinVals::mapValues(JoinVals &Other) {
   2607   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2608     computeAssignment(i, Other);
   2609     if (Vals[i].Resolution == CR_Impossible) {
   2610       LLVM_DEBUG(dbgs() << "\t\tinterference at " << printReg(Reg) << ':' << i
   2611                         << '@' << LR.getValNumInfo(i)->def << '\n');
   2612       return false;
   2613     }
   2614   }
   2615   return true;
   2616 }
   2617 
   2618 bool JoinVals::
   2619 taintExtent(unsigned ValNo, LaneBitmask TaintedLanes, JoinVals &Other,
   2620             SmallVectorImpl<std::pair<SlotIndex, LaneBitmask>> &TaintExtent) {
   2621   VNInfo *VNI = LR.getValNumInfo(ValNo);
   2622   MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   2623   SlotIndex MBBEnd = Indexes->getMBBEndIdx(MBB);
   2624 
   2625   // Scan Other.LR from VNI.def to MBBEnd.
   2626   LiveInterval::iterator OtherI = Other.LR.find(VNI->def);
   2627   assert(OtherI != Other.LR.end() && "No conflict?");
   2628   do {
   2629     // OtherI is pointing to a tainted value. Abort the join if the tainted
   2630     // lanes escape the block.
   2631     SlotIndex End = OtherI->end;
   2632     if (End >= MBBEnd) {
   2633       LLVM_DEBUG(dbgs() << "\t\ttaints global " << printReg(Other.Reg) << ':'
   2634                         << OtherI->valno->id << '@' << OtherI->start << '\n');
   2635       return false;
   2636     }
   2637     LLVM_DEBUG(dbgs() << "\t\ttaints local " << printReg(Other.Reg) << ':'
   2638                       << OtherI->valno->id << '@' << OtherI->start << " to "
   2639                       << End << '\n');
   2640     // A dead def is not a problem.
   2641     if (End.isDead())
   2642       break;
   2643     TaintExtent.push_back(std::make_pair(End, TaintedLanes));
   2644 
   2645     // Check for another def in the MBB.
   2646     if (++OtherI == Other.LR.end() || OtherI->start >= MBBEnd)
   2647       break;
   2648 
   2649     // Lanes written by the new def are no longer tainted.
   2650     const Val &OV = Other.Vals[OtherI->valno->id];
   2651     TaintedLanes &= ~OV.WriteLanes;
   2652     if (!OV.RedefVNI)
   2653       break;
   2654   } while (TaintedLanes.any());
   2655   return true;
   2656 }
   2657 
   2658 bool JoinVals::usesLanes(const MachineInstr &MI, unsigned Reg, unsigned SubIdx,
   2659                          LaneBitmask Lanes) const {
   2660   if (MI.isDebugInstr())
   2661     return false;
   2662   for (const MachineOperand &MO : MI.operands()) {
   2663     if (!MO.isReg() || MO.isDef() || MO.getReg() != Reg)
   2664       continue;
   2665     if (!MO.readsReg())
   2666       continue;
   2667     unsigned S = TRI->composeSubRegIndices(SubIdx, MO.getSubReg());
   2668     if ((Lanes & TRI->getSubRegIndexLaneMask(S)).any())
   2669       return true;
   2670   }
   2671   return false;
   2672 }
   2673 
   2674 bool JoinVals::resolveConflicts(JoinVals &Other) {
   2675   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2676     Val &V = Vals[i];
   2677     assert(V.Resolution != CR_Impossible && "Unresolvable conflict");
   2678     if (V.Resolution != CR_Unresolved)
   2679       continue;
   2680     LLVM_DEBUG(dbgs() << "\t\tconflict at " << printReg(Reg) << ':' << i << '@'
   2681                       << LR.getValNumInfo(i)->def << '\n');
   2682     if (SubRangeJoin)
   2683       return false;
   2684 
   2685     ++NumLaneConflicts;
   2686     assert(V.OtherVNI && "Inconsistent conflict resolution.");
   2687     VNInfo *VNI = LR.getValNumInfo(i);
   2688     const Val &OtherV = Other.Vals[V.OtherVNI->id];
   2689 
   2690     // VNI is known to clobber some lanes in OtherVNI. If we go ahead with the
   2691     // join, those lanes will be tainted with a wrong value. Get the extent of
   2692     // the tainted lanes.
   2693     LaneBitmask TaintedLanes = V.WriteLanes & OtherV.ValidLanes;
   2694     SmallVector<std::pair<SlotIndex, LaneBitmask>, 8> TaintExtent;
   2695     if (!taintExtent(i, TaintedLanes, Other, TaintExtent))
   2696       // Tainted lanes would extend beyond the basic block.
   2697       return false;
   2698 
   2699     assert(!TaintExtent.empty() && "There should be at least one conflict.");
   2700 
   2701     // Now look at the instructions from VNI->def to TaintExtent (inclusive).
   2702     MachineBasicBlock *MBB = Indexes->getMBBFromIndex(VNI->def);
   2703     MachineBasicBlock::iterator MI = MBB->begin();
   2704     if (!VNI->isPHIDef()) {
   2705       MI = Indexes->getInstructionFromIndex(VNI->def);
   2706       // No need to check the instruction defining VNI for reads.
   2707       ++MI;
   2708     }
   2709     assert(!SlotIndex::isSameInstr(VNI->def, TaintExtent.front().first) &&
   2710            "Interference ends on VNI->def. Should have been handled earlier");
   2711     MachineInstr *LastMI =
   2712       Indexes->getInstructionFromIndex(TaintExtent.front().first);
   2713     assert(LastMI && "Range must end at a proper instruction");
   2714     unsigned TaintNum = 0;
   2715     while (true) {
   2716       assert(MI != MBB->end() && "Bad LastMI");
   2717       if (usesLanes(*MI, Other.Reg, Other.SubIdx, TaintedLanes)) {
   2718         LLVM_DEBUG(dbgs() << "\t\ttainted lanes used by: " << *MI);
   2719         return false;
   2720       }
   2721       // LastMI is the last instruction to use the current value.
   2722       if (&*MI == LastMI) {
   2723         if (++TaintNum == TaintExtent.size())
   2724           break;
   2725         LastMI = Indexes->getInstructionFromIndex(TaintExtent[TaintNum].first);
   2726         assert(LastMI && "Range must end at a proper instruction");
   2727         TaintedLanes = TaintExtent[TaintNum].second;
   2728       }
   2729       ++MI;
   2730     }
   2731 
   2732     // The tainted lanes are unused.
   2733     V.Resolution = CR_Replace;
   2734     ++NumLaneResolves;
   2735   }
   2736   return true;
   2737 }
   2738 
   2739 bool JoinVals::isPrunedValue(unsigned ValNo, JoinVals &Other) {
   2740   Val &V = Vals[ValNo];
   2741   if (V.Pruned || V.PrunedComputed)
   2742     return V.Pruned;
   2743 
   2744   if (V.Resolution != CR_Erase && V.Resolution != CR_Merge)
   2745     return V.Pruned;
   2746 
   2747   // Follow copies up the dominator tree and check if any intermediate value
   2748   // has been pruned.
   2749   V.PrunedComputed = true;
   2750   V.Pruned = Other.isPrunedValue(V.OtherVNI->id, *this);
   2751   return V.Pruned;
   2752 }
   2753 
   2754 void JoinVals::pruneValues(JoinVals &Other,
   2755                            SmallVectorImpl<SlotIndex> &EndPoints,
   2756                            bool changeInstrs) {
   2757   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2758     SlotIndex Def = LR.getValNumInfo(i)->def;
   2759     switch (Vals[i].Resolution) {
   2760     case CR_Keep:
   2761       break;
   2762     case CR_Replace: {
   2763       // This value takes precedence over the value in Other.LR.
   2764       LIS->pruneValue(Other.LR, Def, &EndPoints);
   2765       // Check if we're replacing an IMPLICIT_DEF value. The IMPLICIT_DEF
   2766       // instructions are only inserted to provide a live-out value for PHI
   2767       // predecessors, so the instruction should simply go away once its value
   2768       // has been replaced.
   2769       Val &OtherV = Other.Vals[Vals[i].OtherVNI->id];
   2770       bool EraseImpDef = OtherV.ErasableImplicitDef &&
   2771                          OtherV.Resolution == CR_Keep;
   2772       if (!Def.isBlock()) {
   2773         if (changeInstrs) {
   2774           // Remove <def,read-undef> flags. This def is now a partial redef.
   2775           // Also remove dead flags since the joined live range will
   2776           // continue past this instruction.
   2777           for (MachineOperand &MO :
   2778                Indexes->getInstructionFromIndex(Def)->operands()) {
   2779             if (MO.isReg() && MO.isDef() && MO.getReg() == Reg) {
   2780               if (MO.getSubReg() != 0 && MO.isUndef() && !EraseImpDef)
   2781                 MO.setIsUndef(false);
   2782               MO.setIsDead(false);
   2783             }
   2784           }
   2785         }
   2786         // This value will reach instructions below, but we need to make sure
   2787         // the live range also reaches the instruction at Def.
   2788         if (!EraseImpDef)
   2789           EndPoints.push_back(Def);
   2790       }
   2791       LLVM_DEBUG(dbgs() << "\t\tpruned " << printReg(Other.Reg) << " at " << Def
   2792                         << ": " << Other.LR << '\n');
   2793       break;
   2794     }
   2795     case CR_Erase:
   2796     case CR_Merge:
   2797       if (isPrunedValue(i, Other)) {
   2798         // This value is ultimately a copy of a pruned value in LR or Other.LR.
   2799         // We can no longer trust the value mapping computed by
   2800         // computeAssignment(), the value that was originally copied could have
   2801         // been replaced.
   2802         LIS->pruneValue(LR, Def, &EndPoints);
   2803         LLVM_DEBUG(dbgs() << "\t\tpruned all of " << printReg(Reg) << " at "
   2804                           << Def << ": " << LR << '\n');
   2805       }
   2806       break;
   2807     case CR_Unresolved:
   2808     case CR_Impossible:
   2809       llvm_unreachable("Unresolved conflicts");
   2810     }
   2811   }
   2812 }
   2813 
   2814 /// Consider the following situation when coalescing the copy between
   2815 /// %31 and %45 at 800. (The vertical lines represent live range segments.)
   2816 ///
   2817 ///                              Main range         Subrange 0004 (sub2)
   2818 ///                              %31    %45           %31    %45
   2819 ///  544    %45 = COPY %28               +                    +
   2820 ///                                      | v1                 | v1
   2821 ///  560B bb.1:                          +                    +
   2822 ///  624        = %45.sub2               | v2                 | v2
   2823 ///  800    %31 = COPY %45        +      +             +      +
   2824 ///                               | v0                 | v0
   2825 ///  816    %31.sub1 = ...        +                    |
   2826 ///  880    %30 = COPY %31        | v1                 +
   2827 ///  928    %45 = COPY %30        |      +                    +
   2828 ///                               |      | v0                 | v0  <--+
   2829 ///  992B   ; backedge -> bb.1    |      +                    +        |
   2830 /// 1040        = %31.sub0        +                                    |
   2831 ///                                                 This value must remain
   2832 ///                                                 live-out!
   2833 ///
   2834 /// Assuming that %31 is coalesced into %45, the copy at 928 becomes
   2835 /// redundant, since it copies the value from %45 back into it. The
   2836 /// conflict resolution for the main range determines that %45.v0 is
   2837 /// to be erased, which is ok since %31.v1 is identical to it.
   2838 /// The problem happens with the subrange for sub2: it has to be live
   2839 /// on exit from the block, but since 928 was actually a point of
   2840 /// definition of %45.sub2, %45.sub2 was not live immediately prior
   2841 /// to that definition. As a result, when 928 was erased, the value v0
   2842 /// for %45.sub2 was pruned in pruneSubRegValues. Consequently, an
   2843 /// IMPLICIT_DEF was inserted as a "backedge" definition for %45.sub2,
   2844 /// providing an incorrect value to the use at 624.
   2845 ///
   2846 /// Since the main-range values %31.v1 and %45.v0 were proved to be
   2847 /// identical, the corresponding values in subranges must also be the
   2848 /// same. A redundant copy is removed because it's not needed, and not
   2849 /// because it copied an undefined value, so any liveness that originated
   2850 /// from that copy cannot disappear. When pruning a value that started
   2851 /// at the removed copy, the corresponding identical value must be
   2852 /// extended to replace it.
   2853 void JoinVals::pruneSubRegValues(LiveInterval &LI, LaneBitmask &ShrinkMask) {
   2854   // Look for values being erased.
   2855   bool DidPrune = false;
   2856   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2857     Val &V = Vals[i];
   2858     // We should trigger in all cases in which eraseInstrs() does something.
   2859     // match what eraseInstrs() is doing, print a message so
   2860     if (V.Resolution != CR_Erase &&
   2861         (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned))
   2862       continue;
   2863 
   2864     // Check subranges at the point where the copy will be removed.
   2865     SlotIndex Def = LR.getValNumInfo(i)->def;
   2866     SlotIndex OtherDef;
   2867     if (V.Identical)
   2868       OtherDef = V.OtherVNI->def;
   2869 
   2870     // Print message so mismatches with eraseInstrs() can be diagnosed.
   2871     LLVM_DEBUG(dbgs() << "\t\tExpecting instruction removal at " << Def
   2872                       << '\n');
   2873     for (LiveInterval::SubRange &S : LI.subranges()) {
   2874       LiveQueryResult Q = S.Query(Def);
   2875 
   2876       // If a subrange starts at the copy then an undefined value has been
   2877       // copied and we must remove that subrange value as well.
   2878       VNInfo *ValueOut = Q.valueOutOrDead();
   2879       if (ValueOut != nullptr && Q.valueIn() == nullptr) {
   2880         LLVM_DEBUG(dbgs() << "\t\tPrune sublane " << PrintLaneMask(S.LaneMask)
   2881                           << " at " << Def << "\n");
   2882         SmallVector<SlotIndex,8> EndPoints;
   2883         LIS->pruneValue(S, Def, &EndPoints);
   2884         DidPrune = true;
   2885         // Mark value number as unused.
   2886         ValueOut->markUnused();
   2887 
   2888         if (V.Identical && S.Query(OtherDef).valueOut()) {
   2889           // If V is identical to V.OtherVNI (and S was live at OtherDef),
   2890           // then we can't simply prune V from S. V needs to be replaced
   2891           // with V.OtherVNI.
   2892           LIS->extendToIndices(S, EndPoints);
   2893         }
   2894         continue;
   2895       }
   2896       // If a subrange ends at the copy, then a value was copied but only
   2897       // partially used later. Shrink the subregister range appropriately.
   2898       if (Q.valueIn() != nullptr && Q.valueOut() == nullptr) {
   2899         LLVM_DEBUG(dbgs() << "\t\tDead uses at sublane "
   2900                           << PrintLaneMask(S.LaneMask) << " at " << Def
   2901                           << "\n");
   2902         ShrinkMask |= S.LaneMask;
   2903       }
   2904     }
   2905   }
   2906   if (DidPrune)
   2907     LI.removeEmptySubRanges();
   2908 }
   2909 
   2910 /// Check if any of the subranges of @p LI contain a definition at @p Def.
   2911 static bool isDefInSubRange(LiveInterval &LI, SlotIndex Def) {
   2912   for (LiveInterval::SubRange &SR : LI.subranges()) {
   2913     if (VNInfo *VNI = SR.Query(Def).valueOutOrDead())
   2914       if (VNI->def == Def)
   2915         return true;
   2916   }
   2917   return false;
   2918 }
   2919 
   2920 void JoinVals::pruneMainSegments(LiveInterval &LI, bool &ShrinkMainRange) {
   2921   assert(&static_cast<LiveRange&>(LI) == &LR);
   2922 
   2923   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2924     if (Vals[i].Resolution != CR_Keep)
   2925       continue;
   2926     VNInfo *VNI = LR.getValNumInfo(i);
   2927     if (VNI->isUnused() || VNI->isPHIDef() || isDefInSubRange(LI, VNI->def))
   2928       continue;
   2929     Vals[i].Pruned = true;
   2930     ShrinkMainRange = true;
   2931   }
   2932 }
   2933 
   2934 void JoinVals::removeImplicitDefs() {
   2935   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2936     Val &V = Vals[i];
   2937     if (V.Resolution != CR_Keep || !V.ErasableImplicitDef || !V.Pruned)
   2938       continue;
   2939 
   2940     VNInfo *VNI = LR.getValNumInfo(i);
   2941     VNI->markUnused();
   2942     LR.removeValNo(VNI);
   2943   }
   2944 }
   2945 
   2946 void JoinVals::eraseInstrs(SmallPtrSetImpl<MachineInstr*> &ErasedInstrs,
   2947                            SmallVectorImpl<unsigned> &ShrinkRegs,
   2948                            LiveInterval *LI) {
   2949   for (unsigned i = 0, e = LR.getNumValNums(); i != e; ++i) {
   2950     // Get the def location before markUnused() below invalidates it.
   2951     SlotIndex Def = LR.getValNumInfo(i)->def;
   2952     switch (Vals[i].Resolution) {
   2953     case CR_Keep: {
   2954       // If an IMPLICIT_DEF value is pruned, it doesn't serve a purpose any
   2955       // longer. The IMPLICIT_DEF instructions are only inserted by
   2956       // PHIElimination to guarantee that all PHI predecessors have a value.
   2957       if (!Vals[i].ErasableImplicitDef || !Vals[i].Pruned)
   2958         break;
   2959       // Remove value number i from LR.
   2960       // For intervals with subranges, removing a segment from the main range
   2961       // may require extending the previous segment: for each definition of
   2962       // a subregister, there will be a corresponding def in the main range.
   2963       // That def may fall in the middle of a segment from another subrange.
   2964       // In such cases, removing this def from the main range must be
   2965       // complemented by extending the main range to account for the liveness
   2966       // of the other subrange.
   2967       VNInfo *VNI = LR.getValNumInfo(i);
   2968       SlotIndex Def = VNI->def;
   2969       // The new end point of the main range segment to be extended.
   2970       SlotIndex NewEnd;
   2971       if (LI != nullptr) {
   2972         LiveRange::iterator I = LR.FindSegmentContaining(Def);
   2973         assert(I != LR.end());
   2974         // Do not extend beyond the end of the segment being removed.
   2975         // The segment may have been pruned in preparation for joining
   2976         // live ranges.
   2977         NewEnd = I->end;
   2978       }
   2979 
   2980       LR.removeValNo(VNI);
   2981       // Note that this VNInfo is reused and still referenced in NewVNInfo,
   2982       // make it appear like an unused value number.
   2983       VNI->markUnused();
   2984 
   2985       if (LI != nullptr && LI->hasSubRanges()) {
   2986         assert(static_cast<LiveRange*>(LI) == &LR);
   2987         // Determine the end point based on the subrange information:
   2988         // minimum of (earliest def of next segment,
   2989         //             latest end point of containing segment)
   2990         SlotIndex ED, LE;
   2991         for (LiveInterval::SubRange &SR : LI->subranges()) {
   2992           LiveRange::iterator I = SR.find(Def);
   2993           if (I == SR.end())
   2994             continue;
   2995           if (I->start > Def)
   2996             ED = ED.isValid() ? std::min(ED, I->start) : I->start;
   2997           else
   2998             LE = LE.isValid() ? std::max(LE, I->end) : I->end;
   2999         }
   3000         if (LE.isValid())
   3001           NewEnd = std::min(NewEnd, LE);
   3002         if (ED.isValid())
   3003           NewEnd = std::min(NewEnd, ED);
   3004 
   3005         // We only want to do the extension if there was a subrange that
   3006         // was live across Def.
   3007         if (LE.isValid()) {
   3008           LiveRange::iterator S = LR.find(Def);
   3009           if (S != LR.begin())
   3010             std::prev(S)->end = NewEnd;
   3011         }
   3012       }
   3013       LLVM_DEBUG({
   3014         dbgs() << "\t\tremoved " << i << '@' << Def << ": " << LR << '\n';
   3015         if (LI != nullptr)
   3016           dbgs() << "\t\t  LHS = " << *LI << '\n';
   3017       });
   3018       LLVM_FALLTHROUGH;
   3019     }
   3020 
   3021     case CR_Erase: {
   3022       MachineInstr *MI = Indexes->getInstructionFromIndex(Def);
   3023       assert(MI && "No instruction to erase");
   3024       if (MI->isCopy()) {
   3025         unsigned Reg = MI->getOperand(1).getReg();
   3026         if (TargetRegisterInfo::isVirtualRegister(Reg) &&
   3027             Reg != CP.getSrcReg() && Reg != CP.getDstReg())
   3028           ShrinkRegs.push_back(Reg);
   3029       }
   3030       ErasedInstrs.insert(MI);
   3031       LLVM_DEBUG(dbgs() << "\t\terased:\t" << Def << '\t' << *MI);
   3032       LIS->RemoveMachineInstrFromMaps(*MI);
   3033       MI->eraseFromParent();
   3034       break;
   3035     }
   3036     default:
   3037       break;
   3038     }
   3039   }
   3040 }
   3041 
   3042 void RegisterCoalescer::joinSubRegRanges(LiveRange &LRange, LiveRange &RRange,
   3043                                          LaneBitmask LaneMask,
   3044                                          const CoalescerPair &CP) {
   3045   SmallVector<VNInfo*, 16> NewVNInfo;
   3046   JoinVals RHSVals(RRange, CP.getSrcReg(), CP.getSrcIdx(), LaneMask,
   3047                    NewVNInfo, CP, LIS, TRI, true, true);
   3048   JoinVals LHSVals(LRange, CP.getDstReg(), CP.getDstIdx(), LaneMask,
   3049                    NewVNInfo, CP, LIS, TRI, true, true);
   3050 
   3051   // Compute NewVNInfo and resolve conflicts (see also joinVirtRegs())
   3052   // We should be able to resolve all conflicts here as we could successfully do
   3053   // it on the mainrange already. There is however a problem when multiple
   3054   // ranges get mapped to the "overflow" lane mask bit which creates unexpected
   3055   // interferences.
   3056   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals)) {
   3057     // We already determined that it is legal to merge the intervals, so this
   3058     // should never fail.
   3059     llvm_unreachable("*** Couldn't join subrange!\n");
   3060   }
   3061   if (!LHSVals.resolveConflicts(RHSVals) ||
   3062       !RHSVals.resolveConflicts(LHSVals)) {
   3063     // We already determined that it is legal to merge the intervals, so this
   3064     // should never fail.
   3065     llvm_unreachable("*** Couldn't join subrange!\n");
   3066   }
   3067 
   3068   // The merging algorithm in LiveInterval::join() can't handle conflicting
   3069   // value mappings, so we need to remove any live ranges that overlap a
   3070   // CR_Replace resolution. Collect a set of end points that can be used to
   3071   // restore the live range after joining.
   3072   SmallVector<SlotIndex, 8> EndPoints;
   3073   LHSVals.pruneValues(RHSVals, EndPoints, false);
   3074   RHSVals.pruneValues(LHSVals, EndPoints, false);
   3075 
   3076   LHSVals.removeImplicitDefs();
   3077   RHSVals.removeImplicitDefs();
   3078 
   3079   LRange.verify();
   3080   RRange.verify();
   3081 
   3082   // Join RRange into LHS.
   3083   LRange.join(RRange, LHSVals.getAssignments(), RHSVals.getAssignments(),
   3084               NewVNInfo);
   3085 
   3086   LLVM_DEBUG(dbgs() << "\t\tjoined lanes: " << PrintLaneMask(LaneMask)
   3087                     << ' ' << LRange << "\n");
   3088   if (EndPoints.empty())
   3089     return;
   3090 
   3091   // Recompute the parts of the live range we had to remove because of
   3092   // CR_Replace conflicts.
   3093   LLVM_DEBUG({
   3094     dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
   3095     for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
   3096       dbgs() << EndPoints[i];
   3097       if (i != n-1)
   3098         dbgs() << ',';
   3099     }
   3100     dbgs() << ":  " << LRange << '\n';
   3101   });
   3102   LIS->extendToIndices(LRange, EndPoints);
   3103 }
   3104 
   3105 void RegisterCoalescer::mergeSubRangeInto(LiveInterval &LI,
   3106                                           const LiveRange &ToMerge,
   3107                                           LaneBitmask LaneMask,
   3108                                           CoalescerPair &CP) {
   3109   BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   3110   LI.refineSubRanges(Allocator, LaneMask,
   3111       [this,&Allocator,&ToMerge,&CP](LiveInterval::SubRange &SR) {
   3112     if (SR.empty()) {
   3113       SR.assign(ToMerge, Allocator);
   3114     } else {
   3115       // joinSubRegRange() destroys the merged range, so we need a copy.
   3116       LiveRange RangeCopy(ToMerge, Allocator);
   3117       joinSubRegRanges(SR, RangeCopy, SR.LaneMask, CP);
   3118     }
   3119   });
   3120 }
   3121 
   3122 bool RegisterCoalescer::joinVirtRegs(CoalescerPair &CP) {
   3123   SmallVector<VNInfo*, 16> NewVNInfo;
   3124   LiveInterval &RHS = LIS->getInterval(CP.getSrcReg());
   3125   LiveInterval &LHS = LIS->getInterval(CP.getDstReg());
   3126   bool TrackSubRegLiveness = MRI->shouldTrackSubRegLiveness(*CP.getNewRC());
   3127   JoinVals RHSVals(RHS, CP.getSrcReg(), CP.getSrcIdx(), LaneBitmask::getNone(),
   3128                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
   3129   JoinVals LHSVals(LHS, CP.getDstReg(), CP.getDstIdx(), LaneBitmask::getNone(),
   3130                    NewVNInfo, CP, LIS, TRI, false, TrackSubRegLiveness);
   3131 
   3132   LLVM_DEBUG(dbgs() << "\t\tRHS = " << RHS << "\n\t\tLHS = " << LHS << '\n');
   3133 
   3134   // First compute NewVNInfo and the simple value mappings.
   3135   // Detect impossible conflicts early.
   3136   if (!LHSVals.mapValues(RHSVals) || !RHSVals.mapValues(LHSVals))
   3137     return false;
   3138 
   3139   // Some conflicts can only be resolved after all values have been mapped.
   3140   if (!LHSVals.resolveConflicts(RHSVals) || !RHSVals.resolveConflicts(LHSVals))
   3141     return false;
   3142 
   3143   // All clear, the live ranges can be merged.
   3144   if (RHS.hasSubRanges() || LHS.hasSubRanges()) {
   3145     BumpPtrAllocator &Allocator = LIS->getVNInfoAllocator();
   3146 
   3147     // Transform lanemasks from the LHS to masks in the coalesced register and
   3148     // create initial subranges if necessary.
   3149     unsigned DstIdx = CP.getDstIdx();
   3150     if (!LHS.hasSubRanges()) {
   3151       LaneBitmask Mask = DstIdx == 0 ? CP.getNewRC()->getLaneMask()
   3152                                      : TRI->getSubRegIndexLaneMask(DstIdx);
   3153       // LHS must support subregs or we wouldn't be in this codepath.
   3154       assert(Mask.any());
   3155       LHS.createSubRangeFrom(Allocator, Mask, LHS);
   3156     } else if (DstIdx != 0) {
   3157       // Transform LHS lanemasks to new register class if necessary.
   3158       for (LiveInterval::SubRange &R : LHS.subranges()) {
   3159         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(DstIdx, R.LaneMask);
   3160         R.LaneMask = Mask;
   3161       }
   3162     }
   3163     LLVM_DEBUG(dbgs() << "\t\tLHST = " << printReg(CP.getDstReg()) << ' ' << LHS
   3164                       << '\n');
   3165 
   3166     // Determine lanemasks of RHS in the coalesced register and merge subranges.
   3167     unsigned SrcIdx = CP.getSrcIdx();
   3168     if (!RHS.hasSubRanges()) {
   3169       LaneBitmask Mask = SrcIdx == 0 ? CP.getNewRC()->getLaneMask()
   3170                                      : TRI->getSubRegIndexLaneMask(SrcIdx);
   3171       mergeSubRangeInto(LHS, RHS, Mask, CP);
   3172     } else {
   3173       // Pair up subranges and merge.
   3174       for (LiveInterval::SubRange &R : RHS.subranges()) {
   3175         LaneBitmask Mask = TRI->composeSubRegIndexLaneMask(SrcIdx, R.LaneMask);
   3176         mergeSubRangeInto(LHS, R, Mask, CP);
   3177       }
   3178     }
   3179     LLVM_DEBUG(dbgs() << "\tJoined SubRanges " << LHS << "\n");
   3180 
   3181     // Pruning implicit defs from subranges may result in the main range
   3182     // having stale segments.
   3183     LHSVals.pruneMainSegments(LHS, ShrinkMainRange);
   3184 
   3185     LHSVals.pruneSubRegValues(LHS, ShrinkMask);
   3186     RHSVals.pruneSubRegValues(LHS, ShrinkMask);
   3187   }
   3188 
   3189   // The merging algorithm in LiveInterval::join() can't handle conflicting
   3190   // value mappings, so we need to remove any live ranges that overlap a
   3191   // CR_Replace resolution. Collect a set of end points that can be used to
   3192   // restore the live range after joining.
   3193   SmallVector<SlotIndex, 8> EndPoints;
   3194   LHSVals.pruneValues(RHSVals, EndPoints, true);
   3195   RHSVals.pruneValues(LHSVals, EndPoints, true);
   3196 
   3197   // Erase COPY and IMPLICIT_DEF instructions. This may cause some external
   3198   // registers to require trimming.
   3199   SmallVector<unsigned, 8> ShrinkRegs;
   3200   LHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs, &LHS);
   3201   RHSVals.eraseInstrs(ErasedInstrs, ShrinkRegs);
   3202   while (!ShrinkRegs.empty())
   3203     shrinkToUses(&LIS->getInterval(ShrinkRegs.pop_back_val()));
   3204 
   3205   // Join RHS into LHS.
   3206   LHS.join(RHS, LHSVals.getAssignments(), RHSVals.getAssignments(), NewVNInfo);
   3207 
   3208   // Kill flags are going to be wrong if the live ranges were overlapping.
   3209   // Eventually, we should simply clear all kill flags when computing live
   3210   // ranges. They are reinserted after register allocation.
   3211   MRI->clearKillFlags(LHS.reg);
   3212   MRI->clearKillFlags(RHS.reg);
   3213 
   3214   if (!EndPoints.empty()) {
   3215     // Recompute the parts of the live range we had to remove because of
   3216     // CR_Replace conflicts.
   3217     LLVM_DEBUG({
   3218       dbgs() << "\t\trestoring liveness to " << EndPoints.size() << " points: ";
   3219       for (unsigned i = 0, n = EndPoints.size(); i != n; ++i) {
   3220         dbgs() << EndPoints[i];
   3221         if (i != n-1)
   3222           dbgs() << ',';
   3223       }
   3224       dbgs() << ":  " << LHS << '\n';
   3225     });
   3226     LIS->extendToIndices((LiveRange&)LHS, EndPoints);
   3227   }
   3228 
   3229   return true;
   3230 }
   3231 
   3232 bool RegisterCoalescer::joinIntervals(CoalescerPair &CP) {
   3233   return CP.isPhys() ? joinReservedPhysReg(CP) : joinVirtRegs(CP);
   3234 }
   3235 
   3236 namespace {
   3237 
   3238 /// Information concerning MBB coalescing priority.
   3239 struct MBBPriorityInfo {
   3240   MachineBasicBlock *MBB;
   3241   unsigned Depth;
   3242   bool IsSplit;
   3243 
   3244   MBBPriorityInfo(MachineBasicBlock *mbb, unsigned depth, bool issplit)
   3245     : MBB(mbb), Depth(depth), IsSplit(issplit) {}
   3246 };
   3247 
   3248 } // end anonymous namespace
   3249 
   3250 /// C-style comparator that sorts first based on the loop depth of the basic
   3251 /// block (the unsigned), and then on the MBB number.
   3252 ///
   3253 /// EnableGlobalCopies assumes that the primary sort key is loop depth.
   3254 static int compareMBBPriority(const MBBPriorityInfo *LHS,
   3255                               const MBBPriorityInfo *RHS) {
   3256   // Deeper loops first
   3257   if (LHS->Depth != RHS->Depth)
   3258     return LHS->Depth > RHS->Depth ? -1 : 1;
   3259 
   3260   // Try to unsplit critical edges next.
   3261   if (LHS->IsSplit != RHS->IsSplit)
   3262     return LHS->IsSplit ? -1 : 1;
   3263 
   3264   // Prefer blocks that are more connected in the CFG. This takes care of
   3265   // the most difficult copies first while intervals are short.
   3266   unsigned cl = LHS->MBB->pred_size() + LHS->MBB->succ_size();
   3267   unsigned cr = RHS->MBB->pred_size() + RHS->MBB->succ_size();
   3268   if (cl != cr)
   3269     return cl > cr ? -1 : 1;
   3270 
   3271   // As a last resort, sort by block number.
   3272   return LHS->MBB->getNumber() < RHS->MBB->getNumber() ? -1 : 1;
   3273 }
   3274 
   3275 /// \returns true if the given copy uses or defines a local live range.
   3276 static bool isLocalCopy(MachineInstr *Copy, const LiveIntervals *LIS) {
   3277   if (!Copy->isCopy())
   3278     return false;
   3279 
   3280   if (Copy->getOperand(1).isUndef())
   3281     return false;
   3282 
   3283   unsigned SrcReg = Copy->getOperand(1).getReg();
   3284   unsigned DstReg = Copy->getOperand(0).getReg();
   3285   if (TargetRegisterInfo::isPhysicalRegister(SrcReg)
   3286       || TargetRegisterInfo::isPhysicalRegister(DstReg))
   3287     return false;
   3288 
   3289   return LIS->intervalIsInOneMBB(LIS->getInterval(SrcReg))
   3290     || LIS->intervalIsInOneMBB(LIS->getInterval(DstReg));
   3291 }
   3292 
   3293 bool RegisterCoalescer::
   3294 copyCoalesceWorkList(MutableArrayRef<MachineInstr*> CurrList) {
   3295   bool Progress = false;
   3296   for (unsigned i = 0, e = CurrList.size(); i != e; ++i) {
   3297     if (!CurrList[i])
   3298       continue;
   3299     // Skip instruction pointers that have already been erased, for example by
   3300     // dead code elimination.
   3301     if (ErasedInstrs.count(CurrList[i])) {
   3302       CurrList[i] = nullptr;
   3303       continue;
   3304     }
   3305     bool Again = false;
   3306     bool Success = joinCopy(CurrList[i], Again);
   3307     Progress |= Success;
   3308     if (Success || !Again)
   3309       CurrList[i] = nullptr;
   3310   }
   3311   return Progress;
   3312 }
   3313 
   3314 /// Check if DstReg is a terminal node.
   3315 /// I.e., it does not have any affinity other than \p Copy.
   3316 static bool isTerminalReg(unsigned DstReg, const MachineInstr &Copy,
   3317                           const MachineRegisterInfo *MRI) {
   3318   assert(Copy.isCopyLike());
   3319   // Check if the destination of this copy as any other affinity.
   3320   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(DstReg))
   3321     if (&MI != &Copy && MI.isCopyLike())
   3322       return false;
   3323   return true;
   3324 }
   3325 
   3326 bool RegisterCoalescer::applyTerminalRule(const MachineInstr &Copy) const {
   3327   assert(Copy.isCopyLike());
   3328   if (!UseTerminalRule)
   3329     return false;
   3330   unsigned DstReg, DstSubReg, SrcReg, SrcSubReg;
   3331   isMoveInstr(*TRI, &Copy, SrcReg, DstReg, SrcSubReg, DstSubReg);
   3332   // Check if the destination of this copy has any other affinity.
   3333   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
   3334       // If SrcReg is a physical register, the copy won't be coalesced.
   3335       // Ignoring it may have other side effect (like missing
   3336       // rematerialization). So keep it.
   3337       TargetRegisterInfo::isPhysicalRegister(SrcReg) ||
   3338       !isTerminalReg(DstReg, Copy, MRI))
   3339     return false;
   3340 
   3341   // DstReg is a terminal node. Check if it interferes with any other
   3342   // copy involving SrcReg.
   3343   const MachineBasicBlock *OrigBB = Copy.getParent();
   3344   const LiveInterval &DstLI = LIS->getInterval(DstReg);
   3345   for (const MachineInstr &MI : MRI->reg_nodbg_instructions(SrcReg)) {
   3346     // Technically we should check if the weight of the new copy is
   3347     // interesting compared to the other one and update the weight
   3348     // of the copies accordingly. However, this would only work if
   3349     // we would gather all the copies first then coalesce, whereas
   3350     // right now we interleave both actions.
   3351     // For now, just consider the copies that are in the same block.
   3352     if (&MI == &Copy || !MI.isCopyLike() || MI.getParent() != OrigBB)
   3353       continue;
   3354     unsigned OtherReg, OtherSubReg, OtherSrcReg, OtherSrcSubReg;
   3355     isMoveInstr(*TRI, &Copy, OtherSrcReg, OtherReg, OtherSrcSubReg,
   3356                 OtherSubReg);
   3357     if (OtherReg == SrcReg)
   3358       OtherReg = OtherSrcReg;
   3359     // Check if OtherReg is a non-terminal.
   3360     if (TargetRegisterInfo::isPhysicalRegister(OtherReg) ||
   3361         isTerminalReg(OtherReg, MI, MRI))
   3362       continue;
   3363     // Check that OtherReg interfere with DstReg.
   3364     if (LIS->getInterval(OtherReg).overlaps(DstLI)) {
   3365       LLVM_DEBUG(dbgs() << "Apply terminal rule for: " << printReg(DstReg)
   3366                         << '\n');
   3367       return true;
   3368     }
   3369   }
   3370   return false;
   3371 }
   3372 
   3373 void
   3374 RegisterCoalescer::copyCoalesceInMBB(MachineBasicBlock *MBB) {
   3375   LLVM_DEBUG(dbgs() << MBB->getName() << ":\n");
   3376 
   3377   // Collect all copy-like instructions in MBB. Don't start coalescing anything
   3378   // yet, it might invalidate the iterator.
   3379   const unsigned PrevSize = WorkList.size();
   3380   if (JoinGlobalCopies) {
   3381     SmallVector<MachineInstr*, 2> LocalTerminals;
   3382     SmallVector<MachineInstr*, 2> GlobalTerminals;
   3383     // Coalesce copies bottom-up to coalesce local defs before local uses. They
   3384     // are not inherently easier to resolve, but slightly preferable until we
   3385     // have local live range splitting. In particular this is required by
   3386     // cmp+jmp macro fusion.
   3387     for (MachineBasicBlock::iterator MII = MBB->begin(), E = MBB->end();
   3388          MII != E; ++MII) {
   3389       if (!MII->isCopyLike())
   3390         continue;
   3391       bool ApplyTerminalRule = applyTerminalRule(*MII);
   3392       if (isLocalCopy(&(*MII), LIS)) {
   3393         if (ApplyTerminalRule)
   3394           LocalTerminals.push_back(&(*MII));
   3395         else
   3396           LocalWorkList.push_back(&(*MII));
   3397       } else {
   3398         if (ApplyTerminalRule)
   3399           GlobalTerminals.push_back(&(*MII));
   3400         else
   3401           WorkList.push_back(&(*MII));
   3402       }
   3403     }
   3404     // Append the copies evicted by the terminal rule at the end of the list.
   3405     LocalWorkList.append(LocalTerminals.begin(), LocalTerminals.end());
   3406     WorkList.append(GlobalTerminals.begin(), GlobalTerminals.end());
   3407   }
   3408   else {
   3409     SmallVector<MachineInstr*, 2> Terminals;
   3410     for (MachineInstr &MII : *MBB)
   3411       if (MII.isCopyLike()) {
   3412         if (applyTerminalRule(MII))
   3413           Terminals.push_back(&MII);
   3414         else
   3415           WorkList.push_back(&MII);
   3416       }
   3417     // Append the copies evicted by the terminal rule at the end of the list.
   3418     WorkList.append(Terminals.begin(), Terminals.end());
   3419   }
   3420   // Try coalescing the collected copies immediately, and remove the nulls.
   3421   // This prevents the WorkList from getting too large since most copies are
   3422   // joinable on the first attempt.
   3423   MutableArrayRef<MachineInstr*>
   3424     CurrList(WorkList.begin() + PrevSize, WorkList.end());
   3425   if (copyCoalesceWorkList(CurrList))
   3426     WorkList.erase(std::remove(WorkList.begin() + PrevSize, WorkList.end(),
   3427                                nullptr), WorkList.end());
   3428 }
   3429 
   3430 void RegisterCoalescer::coalesceLocals() {
   3431   copyCoalesceWorkList(LocalWorkList);
   3432   for (unsigned j = 0, je = LocalWorkList.size(); j != je; ++j) {
   3433     if (LocalWorkList[j])
   3434       WorkList.push_back(LocalWorkList[j]);
   3435   }
   3436   LocalWorkList.clear();
   3437 }
   3438 
   3439 void RegisterCoalescer::joinAllIntervals() {
   3440   LLVM_DEBUG(dbgs() << "********** JOINING INTERVALS ***********\n");
   3441   assert(WorkList.empty() && LocalWorkList.empty() && "Old data still around.");
   3442 
   3443   std::vector<MBBPriorityInfo> MBBs;
   3444   MBBs.reserve(MF->size());
   3445   for (MachineFunction::iterator I = MF->begin(), E = MF->end(); I != E; ++I) {
   3446     MachineBasicBlock *MBB = &*I;
   3447     MBBs.push_back(MBBPriorityInfo(MBB, Loops->getLoopDepth(MBB),
   3448                                    JoinSplitEdges && isSplitEdge(MBB)));
   3449   }
   3450   array_pod_sort(MBBs.begin(), MBBs.end(), compareMBBPriority);
   3451 
   3452   // Coalesce intervals in MBB priority order.
   3453   unsigned CurrDepth = std::numeric_limits<unsigned>::max();
   3454   for (unsigned i = 0, e = MBBs.size(); i != e; ++i) {
   3455     // Try coalescing the collected local copies for deeper loops.
   3456     if (JoinGlobalCopies && MBBs[i].Depth < CurrDepth) {
   3457       coalesceLocals();
   3458       CurrDepth = MBBs[i].Depth;
   3459     }
   3460     copyCoalesceInMBB(MBBs[i].MBB);
   3461   }
   3462   coalesceLocals();
   3463 
   3464   // Joining intervals can allow other intervals to be joined.  Iteratively join
   3465   // until we make no progress.
   3466   while (copyCoalesceWorkList(WorkList))
   3467     /* empty */ ;
   3468 }
   3469 
   3470 void RegisterCoalescer::releaseMemory() {
   3471   ErasedInstrs.clear();
   3472   WorkList.clear();
   3473   DeadDefs.clear();
   3474   InflateRegs.clear();
   3475 }
   3476 
   3477 bool RegisterCoalescer::runOnMachineFunction(MachineFunction &fn) {
   3478   MF = &fn;
   3479   MRI = &fn.getRegInfo();
   3480   const TargetSubtargetInfo &STI = fn.getSubtarget();
   3481   TRI = STI.getRegisterInfo();
   3482   TII = STI.getInstrInfo();
   3483   LIS = &getAnalysis<LiveIntervals>();
   3484   AA = &getAnalysis<AAResultsWrapperPass>().getAAResults();
   3485   Loops = &getAnalysis<MachineLoopInfo>();
   3486   if (EnableGlobalCopies == cl::BOU_UNSET)
   3487     JoinGlobalCopies = STI.enableJoinGlobalCopies();
   3488   else
   3489     JoinGlobalCopies = (EnableGlobalCopies == cl::BOU_TRUE);
   3490 
   3491   // The MachineScheduler does not currently require JoinSplitEdges. This will
   3492   // either be enabled unconditionally or replaced by a more general live range
   3493   // splitting optimization.
   3494   JoinSplitEdges = EnableJoinSplits;
   3495 
   3496   LLVM_DEBUG(dbgs() << "********** SIMPLE REGISTER COALESCING **********\n"
   3497                     << "********** Function: " << MF->getName() << '\n');
   3498 
   3499   if (VerifyCoalescing)
   3500     MF->verify(this, "Before register coalescing");
   3501 
   3502   RegClassInfo.runOnMachineFunction(fn);
   3503 
   3504   // Join (coalesce) intervals if requested.
   3505   if (EnableJoining)
   3506     joinAllIntervals();
   3507 
   3508   // After deleting a lot of copies, register classes may be less constrained.
   3509   // Removing sub-register operands may allow GR32_ABCD -> GR32 and DPR_VFP2 ->
   3510   // DPR inflation.
   3511   array_pod_sort(InflateRegs.begin(), InflateRegs.end());
   3512   InflateRegs.erase(std::unique(InflateRegs.begin(), InflateRegs.end()),
   3513                     InflateRegs.end());
   3514   LLVM_DEBUG(dbgs() << "Trying to inflate " << InflateRegs.size()
   3515                     << " regs.\n");
   3516   for (unsigned i = 0, e = InflateRegs.size(); i != e; ++i) {
   3517     unsigned Reg = InflateRegs[i];
   3518     if (MRI->reg_nodbg_empty(Reg))
   3519       continue;
   3520     if (MRI->recomputeRegClass(Reg)) {
   3521       LLVM_DEBUG(dbgs() << printReg(Reg) << " inflated to "
   3522                         << TRI->getRegClassName(MRI->getRegClass(Reg)) << '\n');
   3523       ++NumInflated;
   3524 
   3525       LiveInterval &LI = LIS->getInterval(Reg);
   3526       if (LI.hasSubRanges()) {
   3527         // If the inflated register class does not support subregisters anymore
   3528         // remove the subranges.
   3529         if (!MRI->shouldTrackSubRegLiveness(Reg)) {
   3530           LI.clearSubRanges();
   3531         } else {
   3532 #ifndef NDEBUG
   3533           LaneBitmask MaxMask = MRI->getMaxLaneMaskForVReg(Reg);
   3534           // If subranges are still supported, then the same subregs
   3535           // should still be supported.
   3536           for (LiveInterval::SubRange &S : LI.subranges()) {
   3537             assert((S.LaneMask & ~MaxMask).none());
   3538           }
   3539 #endif
   3540         }
   3541       }
   3542     }
   3543   }
   3544 
   3545   LLVM_DEBUG(dump());
   3546   if (VerifyCoalescing)
   3547     MF->verify(this, "After register coalescing");
   3548   return true;
   3549 }
   3550 
   3551 void RegisterCoalescer::print(raw_ostream &O, const Module* m) const {
   3552    LIS->print(O, m);
   3553 }
   3554