Home | History | Annotate | Download | only in CodeGen
      1 //===-- MachineLICM.cpp - Machine Loop Invariant Code Motion Pass ---------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass performs loop invariant code motion on machine instructions. We
     11 // attempt to remove as much code from the body of a loop as possible.
     12 //
     13 // This pass does not attempt to throttle itself to limit register pressure.
     14 // The register allocation phases are expected to perform rematerialization
     15 // to recover when register pressure is high.
     16 //
     17 // This pass is not intended to be a replacement or a complete alternative
     18 // for the LLVM-IR-level LICM pass. It is only designed to hoist simple
     19 // constructs that are not exposed before lowering and instruction selection.
     20 //
     21 //===----------------------------------------------------------------------===//
     22 
     23 #include "llvm/CodeGen/Passes.h"
     24 #include "llvm/ADT/DenseMap.h"
     25 #include "llvm/ADT/SmallSet.h"
     26 #include "llvm/ADT/Statistic.h"
     27 #include "llvm/Analysis/AliasAnalysis.h"
     28 #include "llvm/CodeGen/MachineDominators.h"
     29 #include "llvm/CodeGen/MachineFrameInfo.h"
     30 #include "llvm/CodeGen/MachineLoopInfo.h"
     31 #include "llvm/CodeGen/MachineMemOperand.h"
     32 #include "llvm/CodeGen/MachineRegisterInfo.h"
     33 #include "llvm/CodeGen/PseudoSourceValue.h"
     34 #include "llvm/MC/MCInstrItineraries.h"
     35 #include "llvm/Support/CommandLine.h"
     36 #include "llvm/Support/Debug.h"
     37 #include "llvm/Support/raw_ostream.h"
     38 #include "llvm/Target/TargetInstrInfo.h"
     39 #include "llvm/Target/TargetLowering.h"
     40 #include "llvm/Target/TargetMachine.h"
     41 #include "llvm/Target/TargetRegisterInfo.h"
     42 using namespace llvm;
     43 
     44 #define DEBUG_TYPE "machine-licm"
     45 
     46 static cl::opt<bool>
     47 AvoidSpeculation("avoid-speculation",
     48                  cl::desc("MachineLICM should avoid speculation"),
     49                  cl::init(true), cl::Hidden);
     50 
     51 STATISTIC(NumHoisted,
     52           "Number of machine instructions hoisted out of loops");
     53 STATISTIC(NumLowRP,
     54           "Number of instructions hoisted in low reg pressure situation");
     55 STATISTIC(NumHighLatency,
     56           "Number of high latency instructions hoisted");
     57 STATISTIC(NumCSEed,
     58           "Number of hoisted machine instructions CSEed");
     59 STATISTIC(NumPostRAHoisted,
     60           "Number of machine instructions hoisted out of loops post regalloc");
     61 
     62 namespace {
     63   class MachineLICM : public MachineFunctionPass {
     64     const TargetMachine   *TM;
     65     const TargetInstrInfo *TII;
     66     const TargetLoweringBase *TLI;
     67     const TargetRegisterInfo *TRI;
     68     const MachineFrameInfo *MFI;
     69     MachineRegisterInfo *MRI;
     70     const InstrItineraryData *InstrItins;
     71     bool PreRegAlloc;
     72 
     73     // Various analyses that we use...
     74     AliasAnalysis        *AA;      // Alias analysis info.
     75     MachineLoopInfo      *MLI;     // Current MachineLoopInfo
     76     MachineDominatorTree *DT;      // Machine dominator tree for the cur loop
     77 
     78     // State that is updated as we process loops
     79     bool         Changed;          // True if a loop is changed.
     80     bool         FirstInLoop;      // True if it's the first LICM in the loop.
     81     MachineLoop *CurLoop;          // The current loop we are working on.
     82     MachineBasicBlock *CurPreheader; // The preheader for CurLoop.
     83 
     84     // Exit blocks for CurLoop.
     85     SmallVector<MachineBasicBlock*, 8> ExitBlocks;
     86 
     87     bool isExitBlock(const MachineBasicBlock *MBB) const {
     88       return std::find(ExitBlocks.begin(), ExitBlocks.end(), MBB) !=
     89         ExitBlocks.end();
     90     }
     91 
     92     // Track 'estimated' register pressure.
     93     SmallSet<unsigned, 32> RegSeen;
     94     SmallVector<unsigned, 8> RegPressure;
     95 
     96     // Register pressure "limit" per register class. If the pressure
     97     // is higher than the limit, then it's considered high.
     98     SmallVector<unsigned, 8> RegLimit;
     99 
    100     // Register pressure on path leading from loop preheader to current BB.
    101     SmallVector<SmallVector<unsigned, 8>, 16> BackTrace;
    102 
    103     // For each opcode, keep a list of potential CSE instructions.
    104     DenseMap<unsigned, std::vector<const MachineInstr*> > CSEMap;
    105 
    106     enum {
    107       SpeculateFalse   = 0,
    108       SpeculateTrue    = 1,
    109       SpeculateUnknown = 2
    110     };
    111 
    112     // If a MBB does not dominate loop exiting blocks then it may not safe
    113     // to hoist loads from this block.
    114     // Tri-state: 0 - false, 1 - true, 2 - unknown
    115     unsigned SpeculationState;
    116 
    117   public:
    118     static char ID; // Pass identification, replacement for typeid
    119     MachineLICM() :
    120       MachineFunctionPass(ID), PreRegAlloc(true) {
    121         initializeMachineLICMPass(*PassRegistry::getPassRegistry());
    122       }
    123 
    124     explicit MachineLICM(bool PreRA) :
    125       MachineFunctionPass(ID), PreRegAlloc(PreRA) {
    126         initializeMachineLICMPass(*PassRegistry::getPassRegistry());
    127       }
    128 
    129     bool runOnMachineFunction(MachineFunction &MF) override;
    130 
    131     void getAnalysisUsage(AnalysisUsage &AU) const override {
    132       AU.addRequired<MachineLoopInfo>();
    133       AU.addRequired<MachineDominatorTree>();
    134       AU.addRequired<AliasAnalysis>();
    135       AU.addPreserved<MachineLoopInfo>();
    136       AU.addPreserved<MachineDominatorTree>();
    137       MachineFunctionPass::getAnalysisUsage(AU);
    138     }
    139 
    140     void releaseMemory() override {
    141       RegSeen.clear();
    142       RegPressure.clear();
    143       RegLimit.clear();
    144       BackTrace.clear();
    145       for (DenseMap<unsigned,std::vector<const MachineInstr*> >::iterator
    146              CI = CSEMap.begin(), CE = CSEMap.end(); CI != CE; ++CI)
    147         CI->second.clear();
    148       CSEMap.clear();
    149     }
    150 
    151   private:
    152     /// CandidateInfo - Keep track of information about hoisting candidates.
    153     struct CandidateInfo {
    154       MachineInstr *MI;
    155       unsigned      Def;
    156       int           FI;
    157       CandidateInfo(MachineInstr *mi, unsigned def, int fi)
    158         : MI(mi), Def(def), FI(fi) {}
    159     };
    160 
    161     /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
    162     /// invariants out to the preheader.
    163     void HoistRegionPostRA();
    164 
    165     /// HoistPostRA - When an instruction is found to only use loop invariant
    166     /// operands that is safe to hoist, this instruction is called to do the
    167     /// dirty work.
    168     void HoistPostRA(MachineInstr *MI, unsigned Def);
    169 
    170     /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
    171     /// gather register def and frame object update information.
    172     void ProcessMI(MachineInstr *MI,
    173                    BitVector &PhysRegDefs,
    174                    BitVector &PhysRegClobbers,
    175                    SmallSet<int, 32> &StoredFIs,
    176                    SmallVectorImpl<CandidateInfo> &Candidates);
    177 
    178     /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the
    179     /// current loop.
    180     void AddToLiveIns(unsigned Reg);
    181 
    182     /// IsLICMCandidate - Returns true if the instruction may be a suitable
    183     /// candidate for LICM. e.g. If the instruction is a call, then it's
    184     /// obviously not safe to hoist it.
    185     bool IsLICMCandidate(MachineInstr &I);
    186 
    187     /// IsLoopInvariantInst - Returns true if the instruction is loop
    188     /// invariant. I.e., all virtual register operands are defined outside of
    189     /// the loop, physical registers aren't accessed (explicitly or implicitly),
    190     /// and the instruction is hoistable.
    191     ///
    192     bool IsLoopInvariantInst(MachineInstr &I);
    193 
    194     /// HasLoopPHIUse - Return true if the specified instruction is used by any
    195     /// phi node in the current loop.
    196     bool HasLoopPHIUse(const MachineInstr *MI) const;
    197 
    198     /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
    199     /// and an use in the current loop, return true if the target considered
    200     /// it 'high'.
    201     bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx,
    202                                unsigned Reg) const;
    203 
    204     bool IsCheapInstruction(MachineInstr &MI) const;
    205 
    206     /// CanCauseHighRegPressure - Visit BBs from header to current BB,
    207     /// check if hoisting an instruction of the given cost matrix can cause high
    208     /// register pressure.
    209     bool CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost, bool Cheap);
    210 
    211     /// UpdateBackTraceRegPressure - Traverse the back trace from header to
    212     /// the current block and update their register pressures to reflect the
    213     /// effect of hoisting MI from the current block to the preheader.
    214     void UpdateBackTraceRegPressure(const MachineInstr *MI);
    215 
    216     /// IsProfitableToHoist - Return true if it is potentially profitable to
    217     /// hoist the given loop invariant.
    218     bool IsProfitableToHoist(MachineInstr &MI);
    219 
    220     /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
    221     /// If not then a load from this mbb may not be safe to hoist.
    222     bool IsGuaranteedToExecute(MachineBasicBlock *BB);
    223 
    224     void EnterScope(MachineBasicBlock *MBB);
    225 
    226     void ExitScope(MachineBasicBlock *MBB);
    227 
    228     /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given
    229     /// dominator tree node if its a leaf or all of its children are done. Walk
    230     /// up the dominator tree to destroy ancestors which are now done.
    231     void ExitScopeIfDone(MachineDomTreeNode *Node,
    232                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
    233                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap);
    234 
    235     /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
    236     /// blocks dominated by the specified header block, and that are in the
    237     /// current loop) in depth first order w.r.t the DominatorTree. This allows
    238     /// us to visit definitions before uses, allowing us to hoist a loop body in
    239     /// one pass without iteration.
    240     ///
    241     void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode);
    242     void HoistRegion(MachineDomTreeNode *N, bool IsHeader);
    243 
    244     /// getRegisterClassIDAndCost - For a given MI, register, and the operand
    245     /// index, return the ID and cost of its representative register class by
    246     /// reference.
    247     void getRegisterClassIDAndCost(const MachineInstr *MI,
    248                                    unsigned Reg, unsigned OpIdx,
    249                                    unsigned &RCId, unsigned &RCCost) const;
    250 
    251     /// InitRegPressure - Find all virtual register references that are liveout
    252     /// of the preheader to initialize the starting "register pressure". Note
    253     /// this does not count live through (livein but not used) registers.
    254     void InitRegPressure(MachineBasicBlock *BB);
    255 
    256     /// UpdateRegPressure - Update estimate of register pressure after the
    257     /// specified instruction.
    258     void UpdateRegPressure(const MachineInstr *MI);
    259 
    260     /// ExtractHoistableLoad - Unfold a load from the given machineinstr if
    261     /// the load itself could be hoisted. Return the unfolded and hoistable
    262     /// load, or null if the load couldn't be unfolded or if it wouldn't
    263     /// be hoistable.
    264     MachineInstr *ExtractHoistableLoad(MachineInstr *MI);
    265 
    266     /// LookForDuplicate - Find an instruction amount PrevMIs that is a
    267     /// duplicate of MI. Return this instruction if it's found.
    268     const MachineInstr *LookForDuplicate(const MachineInstr *MI,
    269                                      std::vector<const MachineInstr*> &PrevMIs);
    270 
    271     /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on
    272     /// the preheader that compute the same value. If it's found, do a RAU on
    273     /// with the definition of the existing instruction rather than hoisting
    274     /// the instruction to the preheader.
    275     bool EliminateCSE(MachineInstr *MI,
    276            DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI);
    277 
    278     /// MayCSE - Return true if the given instruction will be CSE'd if it's
    279     /// hoisted out of the loop.
    280     bool MayCSE(MachineInstr *MI);
    281 
    282     /// Hoist - When an instruction is found to only use loop invariant operands
    283     /// that is safe to hoist, this instruction is called to do the dirty work.
    284     /// It returns true if the instruction is hoisted.
    285     bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader);
    286 
    287     /// InitCSEMap - Initialize the CSE map with instructions that are in the
    288     /// current loop preheader that may become duplicates of instructions that
    289     /// are hoisted out of the loop.
    290     void InitCSEMap(MachineBasicBlock *BB);
    291 
    292     /// getCurPreheader - Get the preheader for the current loop, splitting
    293     /// a critical edge if needed.
    294     MachineBasicBlock *getCurPreheader();
    295   };
    296 } // end anonymous namespace
    297 
    298 char MachineLICM::ID = 0;
    299 char &llvm::MachineLICMID = MachineLICM::ID;
    300 INITIALIZE_PASS_BEGIN(MachineLICM, "machinelicm",
    301                 "Machine Loop Invariant Code Motion", false, false)
    302 INITIALIZE_PASS_DEPENDENCY(MachineLoopInfo)
    303 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    304 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
    305 INITIALIZE_PASS_END(MachineLICM, "machinelicm",
    306                 "Machine Loop Invariant Code Motion", false, false)
    307 
    308 /// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most
    309 /// loop that has a unique predecessor.
    310 static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) {
    311   // Check whether this loop even has a unique predecessor.
    312   if (!CurLoop->getLoopPredecessor())
    313     return false;
    314   // Ok, now check to see if any of its outer loops do.
    315   for (MachineLoop *L = CurLoop->getParentLoop(); L; L = L->getParentLoop())
    316     if (L->getLoopPredecessor())
    317       return false;
    318   // None of them did, so this is the outermost with a unique predecessor.
    319   return true;
    320 }
    321 
    322 bool MachineLICM::runOnMachineFunction(MachineFunction &MF) {
    323   if (skipOptnoneFunction(*MF.getFunction()))
    324     return false;
    325 
    326   Changed = FirstInLoop = false;
    327   TM = &MF.getTarget();
    328   TII = TM->getInstrInfo();
    329   TLI = TM->getTargetLowering();
    330   TRI = TM->getRegisterInfo();
    331   MFI = MF.getFrameInfo();
    332   MRI = &MF.getRegInfo();
    333   InstrItins = TM->getInstrItineraryData();
    334 
    335   PreRegAlloc = MRI->isSSA();
    336 
    337   if (PreRegAlloc)
    338     DEBUG(dbgs() << "******** Pre-regalloc Machine LICM: ");
    339   else
    340     DEBUG(dbgs() << "******** Post-regalloc Machine LICM: ");
    341   DEBUG(dbgs() << MF.getName() << " ********\n");
    342 
    343   if (PreRegAlloc) {
    344     // Estimate register pressure during pre-regalloc pass.
    345     unsigned NumRC = TRI->getNumRegClasses();
    346     RegPressure.resize(NumRC);
    347     std::fill(RegPressure.begin(), RegPressure.end(), 0);
    348     RegLimit.resize(NumRC);
    349     for (TargetRegisterInfo::regclass_iterator I = TRI->regclass_begin(),
    350            E = TRI->regclass_end(); I != E; ++I)
    351       RegLimit[(*I)->getID()] = TRI->getRegPressureLimit(*I, MF);
    352   }
    353 
    354   // Get our Loop information...
    355   MLI = &getAnalysis<MachineLoopInfo>();
    356   DT  = &getAnalysis<MachineDominatorTree>();
    357   AA  = &getAnalysis<AliasAnalysis>();
    358 
    359   SmallVector<MachineLoop *, 8> Worklist(MLI->begin(), MLI->end());
    360   while (!Worklist.empty()) {
    361     CurLoop = Worklist.pop_back_val();
    362     CurPreheader = nullptr;
    363     ExitBlocks.clear();
    364 
    365     // If this is done before regalloc, only visit outer-most preheader-sporting
    366     // loops.
    367     if (PreRegAlloc && !LoopIsOuterMostWithPredecessor(CurLoop)) {
    368       Worklist.append(CurLoop->begin(), CurLoop->end());
    369       continue;
    370     }
    371 
    372     CurLoop->getExitBlocks(ExitBlocks);
    373 
    374     if (!PreRegAlloc)
    375       HoistRegionPostRA();
    376     else {
    377       // CSEMap is initialized for loop header when the first instruction is
    378       // being hoisted.
    379       MachineDomTreeNode *N = DT->getNode(CurLoop->getHeader());
    380       FirstInLoop = true;
    381       HoistOutOfLoop(N);
    382       CSEMap.clear();
    383     }
    384   }
    385 
    386   return Changed;
    387 }
    388 
    389 /// InstructionStoresToFI - Return true if instruction stores to the
    390 /// specified frame.
    391 static bool InstructionStoresToFI(const MachineInstr *MI, int FI) {
    392   for (MachineInstr::mmo_iterator o = MI->memoperands_begin(),
    393          oe = MI->memoperands_end(); o != oe; ++o) {
    394     if (!(*o)->isStore() || !(*o)->getPseudoValue())
    395       continue;
    396     if (const FixedStackPseudoSourceValue *Value =
    397         dyn_cast<FixedStackPseudoSourceValue>((*o)->getPseudoValue())) {
    398       if (Value->getFrameIndex() == FI)
    399         return true;
    400     }
    401   }
    402   return false;
    403 }
    404 
    405 /// ProcessMI - Examine the instruction for potentai LICM candidate. Also
    406 /// gather register def and frame object update information.
    407 void MachineLICM::ProcessMI(MachineInstr *MI,
    408                             BitVector &PhysRegDefs,
    409                             BitVector &PhysRegClobbers,
    410                             SmallSet<int, 32> &StoredFIs,
    411                             SmallVectorImpl<CandidateInfo> &Candidates) {
    412   bool RuledOut = false;
    413   bool HasNonInvariantUse = false;
    414   unsigned Def = 0;
    415   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    416     const MachineOperand &MO = MI->getOperand(i);
    417     if (MO.isFI()) {
    418       // Remember if the instruction stores to the frame index.
    419       int FI = MO.getIndex();
    420       if (!StoredFIs.count(FI) &&
    421           MFI->isSpillSlotObjectIndex(FI) &&
    422           InstructionStoresToFI(MI, FI))
    423         StoredFIs.insert(FI);
    424       HasNonInvariantUse = true;
    425       continue;
    426     }
    427 
    428     // We can't hoist an instruction defining a physreg that is clobbered in
    429     // the loop.
    430     if (MO.isRegMask()) {
    431       PhysRegClobbers.setBitsNotInMask(MO.getRegMask());
    432       continue;
    433     }
    434 
    435     if (!MO.isReg())
    436       continue;
    437     unsigned Reg = MO.getReg();
    438     if (!Reg)
    439       continue;
    440     assert(TargetRegisterInfo::isPhysicalRegister(Reg) &&
    441            "Not expecting virtual register!");
    442 
    443     if (!MO.isDef()) {
    444       if (Reg && (PhysRegDefs.test(Reg) || PhysRegClobbers.test(Reg)))
    445         // If it's using a non-loop-invariant register, then it's obviously not
    446         // safe to hoist.
    447         HasNonInvariantUse = true;
    448       continue;
    449     }
    450 
    451     if (MO.isImplicit()) {
    452       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
    453         PhysRegClobbers.set(*AI);
    454       if (!MO.isDead())
    455         // Non-dead implicit def? This cannot be hoisted.
    456         RuledOut = true;
    457       // No need to check if a dead implicit def is also defined by
    458       // another instruction.
    459       continue;
    460     }
    461 
    462     // FIXME: For now, avoid instructions with multiple defs, unless
    463     // it's a dead implicit def.
    464     if (Def)
    465       RuledOut = true;
    466     else
    467       Def = Reg;
    468 
    469     // If we have already seen another instruction that defines the same
    470     // register, then this is not safe.  Two defs is indicated by setting a
    471     // PhysRegClobbers bit.
    472     for (MCRegAliasIterator AS(Reg, TRI, true); AS.isValid(); ++AS) {
    473       if (PhysRegDefs.test(*AS))
    474         PhysRegClobbers.set(*AS);
    475       PhysRegDefs.set(*AS);
    476     }
    477     if (PhysRegClobbers.test(Reg))
    478       // MI defined register is seen defined by another instruction in
    479       // the loop, it cannot be a LICM candidate.
    480       RuledOut = true;
    481   }
    482 
    483   // Only consider reloads for now and remats which do not have register
    484   // operands. FIXME: Consider unfold load folding instructions.
    485   if (Def && !RuledOut) {
    486     int FI = INT_MIN;
    487     if ((!HasNonInvariantUse && IsLICMCandidate(*MI)) ||
    488         (TII->isLoadFromStackSlot(MI, FI) && MFI->isSpillSlotObjectIndex(FI)))
    489       Candidates.push_back(CandidateInfo(MI, Def, FI));
    490   }
    491 }
    492 
    493 /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop
    494 /// invariants out to the preheader.
    495 void MachineLICM::HoistRegionPostRA() {
    496   MachineBasicBlock *Preheader = getCurPreheader();
    497   if (!Preheader)
    498     return;
    499 
    500   unsigned NumRegs = TRI->getNumRegs();
    501   BitVector PhysRegDefs(NumRegs); // Regs defined once in the loop.
    502   BitVector PhysRegClobbers(NumRegs); // Regs defined more than once.
    503 
    504   SmallVector<CandidateInfo, 32> Candidates;
    505   SmallSet<int, 32> StoredFIs;
    506 
    507   // Walk the entire region, count number of defs for each register, and
    508   // collect potential LICM candidates.
    509   const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
    510   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    511     MachineBasicBlock *BB = Blocks[i];
    512 
    513     // If the header of the loop containing this basic block is a landing pad,
    514     // then don't try to hoist instructions out of this loop.
    515     const MachineLoop *ML = MLI->getLoopFor(BB);
    516     if (ML && ML->getHeader()->isLandingPad()) continue;
    517 
    518     // Conservatively treat live-in's as an external def.
    519     // FIXME: That means a reload that're reused in successor block(s) will not
    520     // be LICM'ed.
    521     for (MachineBasicBlock::livein_iterator I = BB->livein_begin(),
    522            E = BB->livein_end(); I != E; ++I) {
    523       unsigned Reg = *I;
    524       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
    525         PhysRegDefs.set(*AI);
    526     }
    527 
    528     SpeculationState = SpeculateUnknown;
    529     for (MachineBasicBlock::iterator
    530            MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
    531       MachineInstr *MI = &*MII;
    532       ProcessMI(MI, PhysRegDefs, PhysRegClobbers, StoredFIs, Candidates);
    533     }
    534   }
    535 
    536   // Gather the registers read / clobbered by the terminator.
    537   BitVector TermRegs(NumRegs);
    538   MachineBasicBlock::iterator TI = Preheader->getFirstTerminator();
    539   if (TI != Preheader->end()) {
    540     for (unsigned i = 0, e = TI->getNumOperands(); i != e; ++i) {
    541       const MachineOperand &MO = TI->getOperand(i);
    542       if (!MO.isReg())
    543         continue;
    544       unsigned Reg = MO.getReg();
    545       if (!Reg)
    546         continue;
    547       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
    548         TermRegs.set(*AI);
    549     }
    550   }
    551 
    552   // Now evaluate whether the potential candidates qualify.
    553   // 1. Check if the candidate defined register is defined by another
    554   //    instruction in the loop.
    555   // 2. If the candidate is a load from stack slot (always true for now),
    556   //    check if the slot is stored anywhere in the loop.
    557   // 3. Make sure candidate def should not clobber
    558   //    registers read by the terminator. Similarly its def should not be
    559   //    clobbered by the terminator.
    560   for (unsigned i = 0, e = Candidates.size(); i != e; ++i) {
    561     if (Candidates[i].FI != INT_MIN &&
    562         StoredFIs.count(Candidates[i].FI))
    563       continue;
    564 
    565     unsigned Def = Candidates[i].Def;
    566     if (!PhysRegClobbers.test(Def) && !TermRegs.test(Def)) {
    567       bool Safe = true;
    568       MachineInstr *MI = Candidates[i].MI;
    569       for (unsigned j = 0, ee = MI->getNumOperands(); j != ee; ++j) {
    570         const MachineOperand &MO = MI->getOperand(j);
    571         if (!MO.isReg() || MO.isDef() || !MO.getReg())
    572           continue;
    573         unsigned Reg = MO.getReg();
    574         if (PhysRegDefs.test(Reg) ||
    575             PhysRegClobbers.test(Reg)) {
    576           // If it's using a non-loop-invariant register, then it's obviously
    577           // not safe to hoist.
    578           Safe = false;
    579           break;
    580         }
    581       }
    582       if (Safe)
    583         HoistPostRA(MI, Candidates[i].Def);
    584     }
    585   }
    586 }
    587 
    588 /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current
    589 /// loop, and make sure it is not killed by any instructions in the loop.
    590 void MachineLICM::AddToLiveIns(unsigned Reg) {
    591   const std::vector<MachineBasicBlock *> &Blocks = CurLoop->getBlocks();
    592   for (unsigned i = 0, e = Blocks.size(); i != e; ++i) {
    593     MachineBasicBlock *BB = Blocks[i];
    594     if (!BB->isLiveIn(Reg))
    595       BB->addLiveIn(Reg);
    596     for (MachineBasicBlock::iterator
    597            MII = BB->begin(), E = BB->end(); MII != E; ++MII) {
    598       MachineInstr *MI = &*MII;
    599       for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    600         MachineOperand &MO = MI->getOperand(i);
    601         if (!MO.isReg() || !MO.getReg() || MO.isDef()) continue;
    602         if (MO.getReg() == Reg || TRI->isSuperRegister(Reg, MO.getReg()))
    603           MO.setIsKill(false);
    604       }
    605     }
    606   }
    607 }
    608 
    609 /// HoistPostRA - When an instruction is found to only use loop invariant
    610 /// operands that is safe to hoist, this instruction is called to do the
    611 /// dirty work.
    612 void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) {
    613   MachineBasicBlock *Preheader = getCurPreheader();
    614 
    615   // Now move the instructions to the predecessor, inserting it before any
    616   // terminator instructions.
    617   DEBUG(dbgs() << "Hoisting to BB#" << Preheader->getNumber() << " from BB#"
    618                << MI->getParent()->getNumber() << ": " << *MI);
    619 
    620   // Splice the instruction to the preheader.
    621   MachineBasicBlock *MBB = MI->getParent();
    622   Preheader->splice(Preheader->getFirstTerminator(), MBB, MI);
    623 
    624   // Add register to livein list to all the BBs in the current loop since a
    625   // loop invariant must be kept live throughout the whole loop. This is
    626   // important to ensure later passes do not scavenge the def register.
    627   AddToLiveIns(Def);
    628 
    629   ++NumPostRAHoisted;
    630   Changed = true;
    631 }
    632 
    633 // IsGuaranteedToExecute - Check if this mbb is guaranteed to execute.
    634 // If not then a load from this mbb may not be safe to hoist.
    635 bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) {
    636   if (SpeculationState != SpeculateUnknown)
    637     return SpeculationState == SpeculateFalse;
    638 
    639   if (BB != CurLoop->getHeader()) {
    640     // Check loop exiting blocks.
    641     SmallVector<MachineBasicBlock*, 8> CurrentLoopExitingBlocks;
    642     CurLoop->getExitingBlocks(CurrentLoopExitingBlocks);
    643     for (unsigned i = 0, e = CurrentLoopExitingBlocks.size(); i != e; ++i)
    644       if (!DT->dominates(BB, CurrentLoopExitingBlocks[i])) {
    645         SpeculationState = SpeculateTrue;
    646         return false;
    647       }
    648   }
    649 
    650   SpeculationState = SpeculateFalse;
    651   return true;
    652 }
    653 
    654 void MachineLICM::EnterScope(MachineBasicBlock *MBB) {
    655   DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
    656 
    657   // Remember livein register pressure.
    658   BackTrace.push_back(RegPressure);
    659 }
    660 
    661 void MachineLICM::ExitScope(MachineBasicBlock *MBB) {
    662   DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
    663   BackTrace.pop_back();
    664 }
    665 
    666 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
    667 /// dominator tree node if its a leaf or all of its children are done. Walk
    668 /// up the dominator tree to destroy ancestors which are now done.
    669 void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node,
    670                 DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren,
    671                 DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> &ParentMap) {
    672   if (OpenChildren[Node])
    673     return;
    674 
    675   // Pop scope.
    676   ExitScope(Node->getBlock());
    677 
    678   // Now traverse upwards to pop ancestors whose offsprings are all done.
    679   while (MachineDomTreeNode *Parent = ParentMap[Node]) {
    680     unsigned Left = --OpenChildren[Parent];
    681     if (Left != 0)
    682       break;
    683     ExitScope(Parent->getBlock());
    684     Node = Parent;
    685   }
    686 }
    687 
    688 /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all
    689 /// blocks dominated by the specified header block, and that are in the
    690 /// current loop) in depth first order w.r.t the DominatorTree. This allows
    691 /// us to visit definitions before uses, allowing us to hoist a loop body in
    692 /// one pass without iteration.
    693 ///
    694 void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) {
    695   SmallVector<MachineDomTreeNode*, 32> Scopes;
    696   SmallVector<MachineDomTreeNode*, 8> WorkList;
    697   DenseMap<MachineDomTreeNode*, MachineDomTreeNode*> ParentMap;
    698   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
    699 
    700   // Perform a DFS walk to determine the order of visit.
    701   WorkList.push_back(HeaderN);
    702   do {
    703     MachineDomTreeNode *Node = WorkList.pop_back_val();
    704     assert(Node && "Null dominator tree node?");
    705     MachineBasicBlock *BB = Node->getBlock();
    706 
    707     // If the header of the loop containing this basic block is a landing pad,
    708     // then don't try to hoist instructions out of this loop.
    709     const MachineLoop *ML = MLI->getLoopFor(BB);
    710     if (ML && ML->getHeader()->isLandingPad())
    711       continue;
    712 
    713     // If this subregion is not in the top level loop at all, exit.
    714     if (!CurLoop->contains(BB))
    715       continue;
    716 
    717     Scopes.push_back(Node);
    718     const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
    719     unsigned NumChildren = Children.size();
    720 
    721     // Don't hoist things out of a large switch statement.  This often causes
    722     // code to be hoisted that wasn't going to be executed, and increases
    723     // register pressure in a situation where it's likely to matter.
    724     if (BB->succ_size() >= 25)
    725       NumChildren = 0;
    726 
    727     OpenChildren[Node] = NumChildren;
    728     // Add children in reverse order as then the next popped worklist node is
    729     // the first child of this node.  This means we ultimately traverse the
    730     // DOM tree in exactly the same order as if we'd recursed.
    731     for (int i = (int)NumChildren-1; i >= 0; --i) {
    732       MachineDomTreeNode *Child = Children[i];
    733       ParentMap[Child] = Node;
    734       WorkList.push_back(Child);
    735     }
    736   } while (!WorkList.empty());
    737 
    738   if (Scopes.size() != 0) {
    739     MachineBasicBlock *Preheader = getCurPreheader();
    740     if (!Preheader)
    741       return;
    742 
    743     // Compute registers which are livein into the loop headers.
    744     RegSeen.clear();
    745     BackTrace.clear();
    746     InitRegPressure(Preheader);
    747   }
    748 
    749   // Now perform LICM.
    750   for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
    751     MachineDomTreeNode *Node = Scopes[i];
    752     MachineBasicBlock *MBB = Node->getBlock();
    753 
    754     MachineBasicBlock *Preheader = getCurPreheader();
    755     if (!Preheader)
    756       continue;
    757 
    758     EnterScope(MBB);
    759 
    760     // Process the block
    761     SpeculationState = SpeculateUnknown;
    762     for (MachineBasicBlock::iterator
    763          MII = MBB->begin(), E = MBB->end(); MII != E; ) {
    764       MachineBasicBlock::iterator NextMII = MII; ++NextMII;
    765       MachineInstr *MI = &*MII;
    766       if (!Hoist(MI, Preheader))
    767         UpdateRegPressure(MI);
    768       MII = NextMII;
    769     }
    770 
    771     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
    772     ExitScopeIfDone(Node, OpenChildren, ParentMap);
    773   }
    774 }
    775 
    776 static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) {
    777   return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg());
    778 }
    779 
    780 /// getRegisterClassIDAndCost - For a given MI, register, and the operand
    781 /// index, return the ID and cost of its representative register class.
    782 void
    783 MachineLICM::getRegisterClassIDAndCost(const MachineInstr *MI,
    784                                        unsigned Reg, unsigned OpIdx,
    785                                        unsigned &RCId, unsigned &RCCost) const {
    786   const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    787   MVT VT = *RC->vt_begin();
    788   if (VT == MVT::Untyped) {
    789     RCId = RC->getID();
    790     RCCost = 1;
    791   } else {
    792     RCId = TLI->getRepRegClassFor(VT)->getID();
    793     RCCost = TLI->getRepRegClassCostFor(VT);
    794   }
    795 }
    796 
    797 /// InitRegPressure - Find all virtual register references that are liveout of
    798 /// the preheader to initialize the starting "register pressure". Note this
    799 /// does not count live through (livein but not used) registers.
    800 void MachineLICM::InitRegPressure(MachineBasicBlock *BB) {
    801   std::fill(RegPressure.begin(), RegPressure.end(), 0);
    802 
    803   // If the preheader has only a single predecessor and it ends with a
    804   // fallthrough or an unconditional branch, then scan its predecessor for live
    805   // defs as well. This happens whenever the preheader is created by splitting
    806   // the critical edge from the loop predecessor to the loop header.
    807   if (BB->pred_size() == 1) {
    808     MachineBasicBlock *TBB = nullptr, *FBB = nullptr;
    809     SmallVector<MachineOperand, 4> Cond;
    810     if (!TII->AnalyzeBranch(*BB, TBB, FBB, Cond, false) && Cond.empty())
    811       InitRegPressure(*BB->pred_begin());
    812   }
    813 
    814   for (MachineBasicBlock::iterator MII = BB->begin(), E = BB->end();
    815        MII != E; ++MII) {
    816     MachineInstr *MI = &*MII;
    817     for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
    818       const MachineOperand &MO = MI->getOperand(i);
    819       if (!MO.isReg() || MO.isImplicit())
    820         continue;
    821       unsigned Reg = MO.getReg();
    822       if (!TargetRegisterInfo::isVirtualRegister(Reg))
    823         continue;
    824 
    825       bool isNew = RegSeen.insert(Reg);
    826       unsigned RCId, RCCost;
    827       getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
    828       if (MO.isDef())
    829         RegPressure[RCId] += RCCost;
    830       else {
    831         bool isKill = isOperandKill(MO, MRI);
    832         if (isNew && !isKill)
    833           // Haven't seen this, it must be a livein.
    834           RegPressure[RCId] += RCCost;
    835         else if (!isNew && isKill)
    836           RegPressure[RCId] -= RCCost;
    837       }
    838     }
    839   }
    840 }
    841 
    842 /// UpdateRegPressure - Update estimate of register pressure after the
    843 /// specified instruction.
    844 void MachineLICM::UpdateRegPressure(const MachineInstr *MI) {
    845   if (MI->isImplicitDef())
    846     return;
    847 
    848   SmallVector<unsigned, 4> Defs;
    849   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
    850     const MachineOperand &MO = MI->getOperand(i);
    851     if (!MO.isReg() || MO.isImplicit())
    852       continue;
    853     unsigned Reg = MO.getReg();
    854     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    855       continue;
    856 
    857     bool isNew = RegSeen.insert(Reg);
    858     if (MO.isDef())
    859       Defs.push_back(Reg);
    860     else if (!isNew && isOperandKill(MO, MRI)) {
    861       unsigned RCId, RCCost;
    862       getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
    863       if (RCCost > RegPressure[RCId])
    864         RegPressure[RCId] = 0;
    865       else
    866         RegPressure[RCId] -= RCCost;
    867     }
    868   }
    869 
    870   unsigned Idx = 0;
    871   while (!Defs.empty()) {
    872     unsigned Reg = Defs.pop_back_val();
    873     unsigned RCId, RCCost;
    874     getRegisterClassIDAndCost(MI, Reg, Idx, RCId, RCCost);
    875     RegPressure[RCId] += RCCost;
    876     ++Idx;
    877   }
    878 }
    879 
    880 /// isLoadFromGOTOrConstantPool - Return true if this machine instruction
    881 /// loads from global offset table or constant pool.
    882 static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) {
    883   assert (MI.mayLoad() && "Expected MI that loads!");
    884   for (MachineInstr::mmo_iterator I = MI.memoperands_begin(),
    885          E = MI.memoperands_end(); I != E; ++I) {
    886     if (const PseudoSourceValue *PSV = (*I)->getPseudoValue()) {
    887       if (PSV == PSV->getGOT() || PSV == PSV->getConstantPool())
    888         return true;
    889     }
    890   }
    891   return false;
    892 }
    893 
    894 /// IsLICMCandidate - Returns true if the instruction may be a suitable
    895 /// candidate for LICM. e.g. If the instruction is a call, then it's obviously
    896 /// not safe to hoist it.
    897 bool MachineLICM::IsLICMCandidate(MachineInstr &I) {
    898   // Check if it's safe to move the instruction.
    899   bool DontMoveAcrossStore = true;
    900   if (!I.isSafeToMove(TII, AA, DontMoveAcrossStore))
    901     return false;
    902 
    903   // If it is load then check if it is guaranteed to execute by making sure that
    904   // it dominates all exiting blocks. If it doesn't, then there is a path out of
    905   // the loop which does not execute this load, so we can't hoist it. Loads
    906   // from constant memory are not safe to speculate all the time, for example
    907   // indexed load from a jump table.
    908   // Stores and side effects are already checked by isSafeToMove.
    909   if (I.mayLoad() && !isLoadFromGOTOrConstantPool(I) &&
    910       !IsGuaranteedToExecute(I.getParent()))
    911     return false;
    912 
    913   return true;
    914 }
    915 
    916 /// IsLoopInvariantInst - Returns true if the instruction is loop
    917 /// invariant. I.e., all virtual register operands are defined outside of the
    918 /// loop, physical registers aren't accessed explicitly, and there are no side
    919 /// effects that aren't captured by the operands or other flags.
    920 ///
    921 bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) {
    922   if (!IsLICMCandidate(I))
    923     return false;
    924 
    925   // The instruction is loop invariant if all of its operands are.
    926   for (unsigned i = 0, e = I.getNumOperands(); i != e; ++i) {
    927     const MachineOperand &MO = I.getOperand(i);
    928 
    929     if (!MO.isReg())
    930       continue;
    931 
    932     unsigned Reg = MO.getReg();
    933     if (Reg == 0) continue;
    934 
    935     // Don't hoist an instruction that uses or defines a physical register.
    936     if (TargetRegisterInfo::isPhysicalRegister(Reg)) {
    937       if (MO.isUse()) {
    938         // If the physreg has no defs anywhere, it's just an ambient register
    939         // and we can freely move its uses. Alternatively, if it's allocatable,
    940         // it could get allocated to something with a def during allocation.
    941         if (!MRI->isConstantPhysReg(Reg, *I.getParent()->getParent()))
    942           return false;
    943         // Otherwise it's safe to move.
    944         continue;
    945       } else if (!MO.isDead()) {
    946         // A def that isn't dead. We can't move it.
    947         return false;
    948       } else if (CurLoop->getHeader()->isLiveIn(Reg)) {
    949         // If the reg is live into the loop, we can't hoist an instruction
    950         // which would clobber it.
    951         return false;
    952       }
    953     }
    954 
    955     if (!MO.isUse())
    956       continue;
    957 
    958     assert(MRI->getVRegDef(Reg) &&
    959            "Machine instr not mapped for this vreg?!");
    960 
    961     // If the loop contains the definition of an operand, then the instruction
    962     // isn't loop invariant.
    963     if (CurLoop->contains(MRI->getVRegDef(Reg)))
    964       return false;
    965   }
    966 
    967   // If we got this far, the instruction is loop invariant!
    968   return true;
    969 }
    970 
    971 
    972 /// HasLoopPHIUse - Return true if the specified instruction is used by a
    973 /// phi node and hoisting it could cause a copy to be inserted.
    974 bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const {
    975   SmallVector<const MachineInstr*, 8> Work(1, MI);
    976   do {
    977     MI = Work.pop_back_val();
    978     for (ConstMIOperands MO(MI); MO.isValid(); ++MO) {
    979       if (!MO->isReg() || !MO->isDef())
    980         continue;
    981       unsigned Reg = MO->getReg();
    982       if (!TargetRegisterInfo::isVirtualRegister(Reg))
    983         continue;
    984       for (MachineInstr &UseMI : MRI->use_instructions(Reg)) {
    985         // A PHI may cause a copy to be inserted.
    986         if (UseMI.isPHI()) {
    987           // A PHI inside the loop causes a copy because the live range of Reg is
    988           // extended across the PHI.
    989           if (CurLoop->contains(&UseMI))
    990             return true;
    991           // A PHI in an exit block can cause a copy to be inserted if the PHI
    992           // has multiple predecessors in the loop with different values.
    993           // For now, approximate by rejecting all exit blocks.
    994           if (isExitBlock(UseMI.getParent()))
    995             return true;
    996           continue;
    997         }
    998         // Look past copies as well.
    999         if (UseMI.isCopy() && CurLoop->contains(&UseMI))
   1000           Work.push_back(&UseMI);
   1001       }
   1002     }
   1003   } while (!Work.empty());
   1004   return false;
   1005 }
   1006 
   1007 /// HasHighOperandLatency - Compute operand latency between a def of 'Reg'
   1008 /// and an use in the current loop, return true if the target considered
   1009 /// it 'high'.
   1010 bool MachineLICM::HasHighOperandLatency(MachineInstr &MI,
   1011                                         unsigned DefIdx, unsigned Reg) const {
   1012   if (!InstrItins || InstrItins->isEmpty() || MRI->use_nodbg_empty(Reg))
   1013     return false;
   1014 
   1015   for (MachineInstr &UseMI : MRI->use_nodbg_instructions(Reg)) {
   1016     if (UseMI.isCopyLike())
   1017       continue;
   1018     if (!CurLoop->contains(UseMI.getParent()))
   1019       continue;
   1020     for (unsigned i = 0, e = UseMI.getNumOperands(); i != e; ++i) {
   1021       const MachineOperand &MO = UseMI.getOperand(i);
   1022       if (!MO.isReg() || !MO.isUse())
   1023         continue;
   1024       unsigned MOReg = MO.getReg();
   1025       if (MOReg != Reg)
   1026         continue;
   1027 
   1028       if (TII->hasHighOperandLatency(InstrItins, MRI, &MI, DefIdx, &UseMI, i))
   1029         return true;
   1030     }
   1031 
   1032     // Only look at the first in loop use.
   1033     break;
   1034   }
   1035 
   1036   return false;
   1037 }
   1038 
   1039 /// IsCheapInstruction - Return true if the instruction is marked "cheap" or
   1040 /// the operand latency between its def and a use is one or less.
   1041 bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const {
   1042   if (MI.isAsCheapAsAMove() || MI.isCopyLike())
   1043     return true;
   1044   if (!InstrItins || InstrItins->isEmpty())
   1045     return false;
   1046 
   1047   bool isCheap = false;
   1048   unsigned NumDefs = MI.getDesc().getNumDefs();
   1049   for (unsigned i = 0, e = MI.getNumOperands(); NumDefs && i != e; ++i) {
   1050     MachineOperand &DefMO = MI.getOperand(i);
   1051     if (!DefMO.isReg() || !DefMO.isDef())
   1052       continue;
   1053     --NumDefs;
   1054     unsigned Reg = DefMO.getReg();
   1055     if (TargetRegisterInfo::isPhysicalRegister(Reg))
   1056       continue;
   1057 
   1058     if (!TII->hasLowDefLatency(InstrItins, &MI, i))
   1059       return false;
   1060     isCheap = true;
   1061   }
   1062 
   1063   return isCheap;
   1064 }
   1065 
   1066 /// CanCauseHighRegPressure - Visit BBs from header to current BB, check
   1067 /// if hoisting an instruction of the given cost matrix can cause high
   1068 /// register pressure.
   1069 bool MachineLICM::CanCauseHighRegPressure(DenseMap<unsigned, int> &Cost,
   1070                                           bool CheapInstr) {
   1071   for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
   1072        CI != CE; ++CI) {
   1073     if (CI->second <= 0)
   1074       continue;
   1075 
   1076     unsigned RCId = CI->first;
   1077     unsigned Limit = RegLimit[RCId];
   1078     int Cost = CI->second;
   1079 
   1080     // Don't hoist cheap instructions if they would increase register pressure,
   1081     // even if we're under the limit.
   1082     if (CheapInstr)
   1083       return true;
   1084 
   1085     for (unsigned i = BackTrace.size(); i != 0; --i) {
   1086       SmallVectorImpl<unsigned> &RP = BackTrace[i-1];
   1087       if (RP[RCId] + Cost >= Limit)
   1088         return true;
   1089     }
   1090   }
   1091 
   1092   return false;
   1093 }
   1094 
   1095 /// UpdateBackTraceRegPressure - Traverse the back trace from header to the
   1096 /// current block and update their register pressures to reflect the effect
   1097 /// of hoisting MI from the current block to the preheader.
   1098 void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) {
   1099   if (MI->isImplicitDef())
   1100     return;
   1101 
   1102   // First compute the 'cost' of the instruction, i.e. its contribution
   1103   // to register pressure.
   1104   DenseMap<unsigned, int> Cost;
   1105   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
   1106     const MachineOperand &MO = MI->getOperand(i);
   1107     if (!MO.isReg() || MO.isImplicit())
   1108       continue;
   1109     unsigned Reg = MO.getReg();
   1110     if (!TargetRegisterInfo::isVirtualRegister(Reg))
   1111       continue;
   1112 
   1113     unsigned RCId, RCCost;
   1114     getRegisterClassIDAndCost(MI, Reg, i, RCId, RCCost);
   1115     if (MO.isDef()) {
   1116       DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
   1117       if (CI != Cost.end())
   1118         CI->second += RCCost;
   1119       else
   1120         Cost.insert(std::make_pair(RCId, RCCost));
   1121     } else if (isOperandKill(MO, MRI)) {
   1122       DenseMap<unsigned, int>::iterator CI = Cost.find(RCId);
   1123       if (CI != Cost.end())
   1124         CI->second -= RCCost;
   1125       else
   1126         Cost.insert(std::make_pair(RCId, -RCCost));
   1127     }
   1128   }
   1129 
   1130   // Update register pressure of blocks from loop header to current block.
   1131   for (unsigned i = 0, e = BackTrace.size(); i != e; ++i) {
   1132     SmallVectorImpl<unsigned> &RP = BackTrace[i];
   1133     for (DenseMap<unsigned, int>::iterator CI = Cost.begin(), CE = Cost.end();
   1134          CI != CE; ++CI) {
   1135       unsigned RCId = CI->first;
   1136       RP[RCId] += CI->second;
   1137     }
   1138   }
   1139 }
   1140 
   1141 /// IsProfitableToHoist - Return true if it is potentially profitable to hoist
   1142 /// the given loop invariant.
   1143 bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) {
   1144   if (MI.isImplicitDef())
   1145     return true;
   1146 
   1147   // Besides removing computation from the loop, hoisting an instruction has
   1148   // these effects:
   1149   //
   1150   // - The value defined by the instruction becomes live across the entire
   1151   //   loop. This increases register pressure in the loop.
   1152   //
   1153   // - If the value is used by a PHI in the loop, a copy will be required for
   1154   //   lowering the PHI after extending the live range.
   1155   //
   1156   // - When hoisting the last use of a value in the loop, that value no longer
   1157   //   needs to be live in the loop. This lowers register pressure in the loop.
   1158 
   1159   bool CheapInstr = IsCheapInstruction(MI);
   1160   bool CreatesCopy = HasLoopPHIUse(&MI);
   1161 
   1162   // Don't hoist a cheap instruction if it would create a copy in the loop.
   1163   if (CheapInstr && CreatesCopy) {
   1164     DEBUG(dbgs() << "Won't hoist cheap instr with loop PHI use: " << MI);
   1165     return false;
   1166   }
   1167 
   1168   // Rematerializable instructions should always be hoisted since the register
   1169   // allocator can just pull them down again when needed.
   1170   if (TII->isTriviallyReMaterializable(&MI, AA))
   1171     return true;
   1172 
   1173   // Estimate register pressure to determine whether to LICM the instruction.
   1174   // In low register pressure situation, we can be more aggressive about
   1175   // hoisting. Also, favors hoisting long latency instructions even in
   1176   // moderately high pressure situation.
   1177   // Cheap instructions will only be hoisted if they don't increase register
   1178   // pressure at all.
   1179   // FIXME: If there are long latency loop-invariant instructions inside the
   1180   // loop at this point, why didn't the optimizer's LICM hoist them?
   1181   DenseMap<unsigned, int> Cost;
   1182   for (unsigned i = 0, e = MI.getDesc().getNumOperands(); i != e; ++i) {
   1183     const MachineOperand &MO = MI.getOperand(i);
   1184     if (!MO.isReg() || MO.isImplicit())
   1185       continue;
   1186     unsigned Reg = MO.getReg();
   1187     if (!TargetRegisterInfo::isVirtualRegister(Reg))
   1188       continue;
   1189 
   1190     unsigned RCId, RCCost;
   1191     getRegisterClassIDAndCost(&MI, Reg, i, RCId, RCCost);
   1192     if (MO.isDef()) {
   1193       if (HasHighOperandLatency(MI, i, Reg)) {
   1194         DEBUG(dbgs() << "Hoist High Latency: " << MI);
   1195         ++NumHighLatency;
   1196         return true;
   1197       }
   1198       Cost[RCId] += RCCost;
   1199     } else if (isOperandKill(MO, MRI)) {
   1200       // Is a virtual register use is a kill, hoisting it out of the loop
   1201       // may actually reduce register pressure or be register pressure
   1202       // neutral.
   1203       Cost[RCId] -= RCCost;
   1204     }
   1205   }
   1206 
   1207   // Visit BBs from header to current BB, if hoisting this doesn't cause
   1208   // high register pressure, then it's safe to proceed.
   1209   if (!CanCauseHighRegPressure(Cost, CheapInstr)) {
   1210     DEBUG(dbgs() << "Hoist non-reg-pressure: " << MI);
   1211     ++NumLowRP;
   1212     return true;
   1213   }
   1214 
   1215   // Don't risk increasing register pressure if it would create copies.
   1216   if (CreatesCopy) {
   1217     DEBUG(dbgs() << "Won't hoist instr with loop PHI use: " << MI);
   1218     return false;
   1219   }
   1220 
   1221   // Do not "speculate" in high register pressure situation. If an
   1222   // instruction is not guaranteed to be executed in the loop, it's best to be
   1223   // conservative.
   1224   if (AvoidSpeculation &&
   1225       (!IsGuaranteedToExecute(MI.getParent()) && !MayCSE(&MI))) {
   1226     DEBUG(dbgs() << "Won't speculate: " << MI);
   1227     return false;
   1228   }
   1229 
   1230   // High register pressure situation, only hoist if the instruction is going
   1231   // to be remat'ed.
   1232   if (!TII->isTriviallyReMaterializable(&MI, AA) &&
   1233       !MI.isInvariantLoad(AA)) {
   1234     DEBUG(dbgs() << "Can't remat / high reg-pressure: " << MI);
   1235     return false;
   1236   }
   1237 
   1238   return true;
   1239 }
   1240 
   1241 MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) {
   1242   // Don't unfold simple loads.
   1243   if (MI->canFoldAsLoad())
   1244     return nullptr;
   1245 
   1246   // If not, we may be able to unfold a load and hoist that.
   1247   // First test whether the instruction is loading from an amenable
   1248   // memory location.
   1249   if (!MI->isInvariantLoad(AA))
   1250     return nullptr;
   1251 
   1252   // Next determine the register class for a temporary register.
   1253   unsigned LoadRegIndex;
   1254   unsigned NewOpc =
   1255     TII->getOpcodeAfterMemoryUnfold(MI->getOpcode(),
   1256                                     /*UnfoldLoad=*/true,
   1257                                     /*UnfoldStore=*/false,
   1258                                     &LoadRegIndex);
   1259   if (NewOpc == 0) return nullptr;
   1260   const MCInstrDesc &MID = TII->get(NewOpc);
   1261   if (MID.getNumDefs() != 1) return nullptr;
   1262   MachineFunction &MF = *MI->getParent()->getParent();
   1263   const TargetRegisterClass *RC = TII->getRegClass(MID, LoadRegIndex, TRI, MF);
   1264   // Ok, we're unfolding. Create a temporary register and do the unfold.
   1265   unsigned Reg = MRI->createVirtualRegister(RC);
   1266 
   1267   SmallVector<MachineInstr *, 2> NewMIs;
   1268   bool Success =
   1269     TII->unfoldMemoryOperand(MF, MI, Reg,
   1270                              /*UnfoldLoad=*/true, /*UnfoldStore=*/false,
   1271                              NewMIs);
   1272   (void)Success;
   1273   assert(Success &&
   1274          "unfoldMemoryOperand failed when getOpcodeAfterMemoryUnfold "
   1275          "succeeded!");
   1276   assert(NewMIs.size() == 2 &&
   1277          "Unfolded a load into multiple instructions!");
   1278   MachineBasicBlock *MBB = MI->getParent();
   1279   MachineBasicBlock::iterator Pos = MI;
   1280   MBB->insert(Pos, NewMIs[0]);
   1281   MBB->insert(Pos, NewMIs[1]);
   1282   // If unfolding produced a load that wasn't loop-invariant or profitable to
   1283   // hoist, discard the new instructions and bail.
   1284   if (!IsLoopInvariantInst(*NewMIs[0]) || !IsProfitableToHoist(*NewMIs[0])) {
   1285     NewMIs[0]->eraseFromParent();
   1286     NewMIs[1]->eraseFromParent();
   1287     return nullptr;
   1288   }
   1289 
   1290   // Update register pressure for the unfolded instruction.
   1291   UpdateRegPressure(NewMIs[1]);
   1292 
   1293   // Otherwise we successfully unfolded a load that we can hoist.
   1294   MI->eraseFromParent();
   1295   return NewMIs[0];
   1296 }
   1297 
   1298 void MachineLICM::InitCSEMap(MachineBasicBlock *BB) {
   1299   for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) {
   1300     const MachineInstr *MI = &*I;
   1301     unsigned Opcode = MI->getOpcode();
   1302     DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
   1303       CI = CSEMap.find(Opcode);
   1304     if (CI != CSEMap.end())
   1305       CI->second.push_back(MI);
   1306     else {
   1307       std::vector<const MachineInstr*> CSEMIs;
   1308       CSEMIs.push_back(MI);
   1309       CSEMap.insert(std::make_pair(Opcode, CSEMIs));
   1310     }
   1311   }
   1312 }
   1313 
   1314 const MachineInstr*
   1315 MachineLICM::LookForDuplicate(const MachineInstr *MI,
   1316                               std::vector<const MachineInstr*> &PrevMIs) {
   1317   for (unsigned i = 0, e = PrevMIs.size(); i != e; ++i) {
   1318     const MachineInstr *PrevMI = PrevMIs[i];
   1319     if (TII->produceSameValue(MI, PrevMI, (PreRegAlloc ? MRI : nullptr)))
   1320       return PrevMI;
   1321   }
   1322   return nullptr;
   1323 }
   1324 
   1325 bool MachineLICM::EliminateCSE(MachineInstr *MI,
   1326           DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator &CI) {
   1327   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
   1328   // the undef property onto uses.
   1329   if (CI == CSEMap.end() || MI->isImplicitDef())
   1330     return false;
   1331 
   1332   if (const MachineInstr *Dup = LookForDuplicate(MI, CI->second)) {
   1333     DEBUG(dbgs() << "CSEing " << *MI << " with " << *Dup);
   1334 
   1335     // Replace virtual registers defined by MI by their counterparts defined
   1336     // by Dup.
   1337     SmallVector<unsigned, 2> Defs;
   1338     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
   1339       const MachineOperand &MO = MI->getOperand(i);
   1340 
   1341       // Physical registers may not differ here.
   1342       assert((!MO.isReg() || MO.getReg() == 0 ||
   1343               !TargetRegisterInfo::isPhysicalRegister(MO.getReg()) ||
   1344               MO.getReg() == Dup->getOperand(i).getReg()) &&
   1345              "Instructions with different phys regs are not identical!");
   1346 
   1347       if (MO.isReg() && MO.isDef() &&
   1348           !TargetRegisterInfo::isPhysicalRegister(MO.getReg()))
   1349         Defs.push_back(i);
   1350     }
   1351 
   1352     SmallVector<const TargetRegisterClass*, 2> OrigRCs;
   1353     for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
   1354       unsigned Idx = Defs[i];
   1355       unsigned Reg = MI->getOperand(Idx).getReg();
   1356       unsigned DupReg = Dup->getOperand(Idx).getReg();
   1357       OrigRCs.push_back(MRI->getRegClass(DupReg));
   1358 
   1359       if (!MRI->constrainRegClass(DupReg, MRI->getRegClass(Reg))) {
   1360         // Restore old RCs if more than one defs.
   1361         for (unsigned j = 0; j != i; ++j)
   1362           MRI->setRegClass(Dup->getOperand(Defs[j]).getReg(), OrigRCs[j]);
   1363         return false;
   1364       }
   1365     }
   1366 
   1367     for (unsigned i = 0, e = Defs.size(); i != e; ++i) {
   1368       unsigned Idx = Defs[i];
   1369       unsigned Reg = MI->getOperand(Idx).getReg();
   1370       unsigned DupReg = Dup->getOperand(Idx).getReg();
   1371       MRI->replaceRegWith(Reg, DupReg);
   1372       MRI->clearKillFlags(DupReg);
   1373     }
   1374 
   1375     MI->eraseFromParent();
   1376     ++NumCSEed;
   1377     return true;
   1378   }
   1379   return false;
   1380 }
   1381 
   1382 /// MayCSE - Return true if the given instruction will be CSE'd if it's
   1383 /// hoisted out of the loop.
   1384 bool MachineLICM::MayCSE(MachineInstr *MI) {
   1385   unsigned Opcode = MI->getOpcode();
   1386   DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
   1387     CI = CSEMap.find(Opcode);
   1388   // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate
   1389   // the undef property onto uses.
   1390   if (CI == CSEMap.end() || MI->isImplicitDef())
   1391     return false;
   1392 
   1393   return LookForDuplicate(MI, CI->second) != nullptr;
   1394 }
   1395 
   1396 /// Hoist - When an instruction is found to use only loop invariant operands
   1397 /// that are safe to hoist, this instruction is called to do the dirty work.
   1398 ///
   1399 bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) {
   1400   // First check whether we should hoist this instruction.
   1401   if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) {
   1402     // If not, try unfolding a hoistable load.
   1403     MI = ExtractHoistableLoad(MI);
   1404     if (!MI) return false;
   1405   }
   1406 
   1407   // Now move the instructions to the predecessor, inserting it before any
   1408   // terminator instructions.
   1409   DEBUG({
   1410       dbgs() << "Hoisting " << *MI;
   1411       if (Preheader->getBasicBlock())
   1412         dbgs() << " to MachineBasicBlock "
   1413                << Preheader->getName();
   1414       if (MI->getParent()->getBasicBlock())
   1415         dbgs() << " from MachineBasicBlock "
   1416                << MI->getParent()->getName();
   1417       dbgs() << "\n";
   1418     });
   1419 
   1420   // If this is the first instruction being hoisted to the preheader,
   1421   // initialize the CSE map with potential common expressions.
   1422   if (FirstInLoop) {
   1423     InitCSEMap(Preheader);
   1424     FirstInLoop = false;
   1425   }
   1426 
   1427   // Look for opportunity to CSE the hoisted instruction.
   1428   unsigned Opcode = MI->getOpcode();
   1429   DenseMap<unsigned, std::vector<const MachineInstr*> >::iterator
   1430     CI = CSEMap.find(Opcode);
   1431   if (!EliminateCSE(MI, CI)) {
   1432     // Otherwise, splice the instruction to the preheader.
   1433     Preheader->splice(Preheader->getFirstTerminator(),MI->getParent(),MI);
   1434 
   1435     // Update register pressure for BBs from header to this block.
   1436     UpdateBackTraceRegPressure(MI);
   1437 
   1438     // Clear the kill flags of any register this instruction defines,
   1439     // since they may need to be live throughout the entire loop
   1440     // rather than just live for part of it.
   1441     for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
   1442       MachineOperand &MO = MI->getOperand(i);
   1443       if (MO.isReg() && MO.isDef() && !MO.isDead())
   1444         MRI->clearKillFlags(MO.getReg());
   1445     }
   1446 
   1447     // Add to the CSE map.
   1448     if (CI != CSEMap.end())
   1449       CI->second.push_back(MI);
   1450     else {
   1451       std::vector<const MachineInstr*> CSEMIs;
   1452       CSEMIs.push_back(MI);
   1453       CSEMap.insert(std::make_pair(Opcode, CSEMIs));
   1454     }
   1455   }
   1456 
   1457   ++NumHoisted;
   1458   Changed = true;
   1459 
   1460   return true;
   1461 }
   1462 
   1463 MachineBasicBlock *MachineLICM::getCurPreheader() {
   1464   // Determine the block to which to hoist instructions. If we can't find a
   1465   // suitable loop predecessor, we can't do any hoisting.
   1466 
   1467   // If we've tried to get a preheader and failed, don't try again.
   1468   if (CurPreheader == reinterpret_cast<MachineBasicBlock *>(-1))
   1469     return nullptr;
   1470 
   1471   if (!CurPreheader) {
   1472     CurPreheader = CurLoop->getLoopPreheader();
   1473     if (!CurPreheader) {
   1474       MachineBasicBlock *Pred = CurLoop->getLoopPredecessor();
   1475       if (!Pred) {
   1476         CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
   1477         return nullptr;
   1478       }
   1479 
   1480       CurPreheader = Pred->SplitCriticalEdge(CurLoop->getHeader(), this);
   1481       if (!CurPreheader) {
   1482         CurPreheader = reinterpret_cast<MachineBasicBlock *>(-1);
   1483         return nullptr;
   1484       }
   1485     }
   1486   }
   1487   return CurPreheader;
   1488 }
   1489