Home | History | Annotate | Download | only in CodeGen
      1 //===-- MachineCSE.cpp - Machine Common Subexpression Elimination Pass ----===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // This pass performs global common subexpression elimination on machine
     11 // instructions using a scoped hash table based value numbering scheme. It
     12 // must be run while the machine function is still in SSA form.
     13 //
     14 //===----------------------------------------------------------------------===//
     15 
     16 #include "llvm/CodeGen/Passes.h"
     17 #include "llvm/ADT/DenseMap.h"
     18 #include "llvm/ADT/ScopedHashTable.h"
     19 #include "llvm/ADT/SmallSet.h"
     20 #include "llvm/ADT/Statistic.h"
     21 #include "llvm/Analysis/AliasAnalysis.h"
     22 #include "llvm/CodeGen/MachineDominators.h"
     23 #include "llvm/CodeGen/MachineInstr.h"
     24 #include "llvm/CodeGen/MachineRegisterInfo.h"
     25 #include "llvm/Support/Debug.h"
     26 #include "llvm/Support/RecyclingAllocator.h"
     27 #include "llvm/Target/TargetInstrInfo.h"
     28 using namespace llvm;
     29 
     30 #define DEBUG_TYPE "machine-cse"
     31 
     32 STATISTIC(NumCoalesces, "Number of copies coalesced");
     33 STATISTIC(NumCSEs,      "Number of common subexpression eliminated");
     34 STATISTIC(NumPhysCSEs,
     35           "Number of physreg referencing common subexpr eliminated");
     36 STATISTIC(NumCrossBBCSEs,
     37           "Number of cross-MBB physreg referencing CS eliminated");
     38 STATISTIC(NumCommutes,  "Number of copies coalesced after commuting");
     39 
     40 namespace {
     41   class MachineCSE : public MachineFunctionPass {
     42     const TargetInstrInfo *TII;
     43     const TargetRegisterInfo *TRI;
     44     AliasAnalysis *AA;
     45     MachineDominatorTree *DT;
     46     MachineRegisterInfo *MRI;
     47   public:
     48     static char ID; // Pass identification
     49     MachineCSE() : MachineFunctionPass(ID), LookAheadLimit(5), CurrVN(0) {
     50       initializeMachineCSEPass(*PassRegistry::getPassRegistry());
     51     }
     52 
     53     bool runOnMachineFunction(MachineFunction &MF) override;
     54 
     55     void getAnalysisUsage(AnalysisUsage &AU) const override {
     56       AU.setPreservesCFG();
     57       MachineFunctionPass::getAnalysisUsage(AU);
     58       AU.addRequired<AliasAnalysis>();
     59       AU.addPreservedID(MachineLoopInfoID);
     60       AU.addRequired<MachineDominatorTree>();
     61       AU.addPreserved<MachineDominatorTree>();
     62     }
     63 
     64     void releaseMemory() override {
     65       ScopeMap.clear();
     66       Exps.clear();
     67     }
     68 
     69   private:
     70     const unsigned LookAheadLimit;
     71     typedef RecyclingAllocator<BumpPtrAllocator,
     72         ScopedHashTableVal<MachineInstr*, unsigned> > AllocatorTy;
     73     typedef ScopedHashTable<MachineInstr*, unsigned,
     74         MachineInstrExpressionTrait, AllocatorTy> ScopedHTType;
     75     typedef ScopedHTType::ScopeTy ScopeType;
     76     DenseMap<MachineBasicBlock*, ScopeType*> ScopeMap;
     77     ScopedHTType VNT;
     78     SmallVector<MachineInstr*, 64> Exps;
     79     unsigned CurrVN;
     80 
     81     bool PerformTrivialCoalescing(MachineInstr *MI, MachineBasicBlock *MBB);
     82     bool isPhysDefTriviallyDead(unsigned Reg,
     83                                 MachineBasicBlock::const_iterator I,
     84                                 MachineBasicBlock::const_iterator E) const;
     85     bool hasLivePhysRegDefUses(const MachineInstr *MI,
     86                                const MachineBasicBlock *MBB,
     87                                SmallSet<unsigned,8> &PhysRefs,
     88                                SmallVectorImpl<unsigned> &PhysDefs,
     89                                bool &PhysUseDef) const;
     90     bool PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
     91                           SmallSet<unsigned,8> &PhysRefs,
     92                           SmallVectorImpl<unsigned> &PhysDefs,
     93                           bool &NonLocal) const;
     94     bool isCSECandidate(MachineInstr *MI);
     95     bool isProfitableToCSE(unsigned CSReg, unsigned Reg,
     96                            MachineInstr *CSMI, MachineInstr *MI);
     97     void EnterScope(MachineBasicBlock *MBB);
     98     void ExitScope(MachineBasicBlock *MBB);
     99     bool ProcessBlock(MachineBasicBlock *MBB);
    100     void ExitScopeIfDone(MachineDomTreeNode *Node,
    101                          DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren);
    102     bool PerformCSE(MachineDomTreeNode *Node);
    103   };
    104 } // end anonymous namespace
    105 
    106 char MachineCSE::ID = 0;
    107 char &llvm::MachineCSEID = MachineCSE::ID;
    108 INITIALIZE_PASS_BEGIN(MachineCSE, "machine-cse",
    109                 "Machine Common Subexpression Elimination", false, false)
    110 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    111 INITIALIZE_AG_DEPENDENCY(AliasAnalysis)
    112 INITIALIZE_PASS_END(MachineCSE, "machine-cse",
    113                 "Machine Common Subexpression Elimination", false, false)
    114 
    115 bool MachineCSE::PerformTrivialCoalescing(MachineInstr *MI,
    116                                           MachineBasicBlock *MBB) {
    117   bool Changed = false;
    118   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    119     MachineOperand &MO = MI->getOperand(i);
    120     if (!MO.isReg() || !MO.isUse())
    121       continue;
    122     unsigned Reg = MO.getReg();
    123     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    124       continue;
    125     if (!MRI->hasOneNonDBGUse(Reg))
    126       // Only coalesce single use copies. This ensure the copy will be
    127       // deleted.
    128       continue;
    129     MachineInstr *DefMI = MRI->getVRegDef(Reg);
    130     if (!DefMI->isCopy())
    131       continue;
    132     unsigned SrcReg = DefMI->getOperand(1).getReg();
    133     if (!TargetRegisterInfo::isVirtualRegister(SrcReg))
    134       continue;
    135     if (DefMI->getOperand(0).getSubReg())
    136       continue;
    137     // FIXME: We should trivially coalesce subregister copies to expose CSE
    138     // opportunities on instructions with truncated operands (see
    139     // cse-add-with-overflow.ll). This can be done here as follows:
    140     // if (SrcSubReg)
    141     //  RC = TRI->getMatchingSuperRegClass(MRI->getRegClass(SrcReg), RC,
    142     //                                     SrcSubReg);
    143     // MO.substVirtReg(SrcReg, SrcSubReg, *TRI);
    144     //
    145     // The 2-addr pass has been updated to handle coalesced subregs. However,
    146     // some machine-specific code still can't handle it.
    147     // To handle it properly we also need a way find a constrained subregister
    148     // class given a super-reg class and subreg index.
    149     if (DefMI->getOperand(1).getSubReg())
    150       continue;
    151     const TargetRegisterClass *RC = MRI->getRegClass(Reg);
    152     if (!MRI->constrainRegClass(SrcReg, RC))
    153       continue;
    154     DEBUG(dbgs() << "Coalescing: " << *DefMI);
    155     DEBUG(dbgs() << "***     to: " << *MI);
    156     MO.setReg(SrcReg);
    157     MRI->clearKillFlags(SrcReg);
    158     DefMI->eraseFromParent();
    159     ++NumCoalesces;
    160     Changed = true;
    161   }
    162 
    163   return Changed;
    164 }
    165 
    166 bool
    167 MachineCSE::isPhysDefTriviallyDead(unsigned Reg,
    168                                    MachineBasicBlock::const_iterator I,
    169                                    MachineBasicBlock::const_iterator E) const {
    170   unsigned LookAheadLeft = LookAheadLimit;
    171   while (LookAheadLeft) {
    172     // Skip over dbg_value's.
    173     while (I != E && I->isDebugValue())
    174       ++I;
    175 
    176     if (I == E)
    177       // Reached end of block, register is obviously dead.
    178       return true;
    179 
    180     bool SeenDef = false;
    181     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
    182       const MachineOperand &MO = I->getOperand(i);
    183       if (MO.isRegMask() && MO.clobbersPhysReg(Reg))
    184         SeenDef = true;
    185       if (!MO.isReg() || !MO.getReg())
    186         continue;
    187       if (!TRI->regsOverlap(MO.getReg(), Reg))
    188         continue;
    189       if (MO.isUse())
    190         // Found a use!
    191         return false;
    192       SeenDef = true;
    193     }
    194     if (SeenDef)
    195       // See a def of Reg (or an alias) before encountering any use, it's
    196       // trivially dead.
    197       return true;
    198 
    199     --LookAheadLeft;
    200     ++I;
    201   }
    202   return false;
    203 }
    204 
    205 /// hasLivePhysRegDefUses - Return true if the specified instruction read/write
    206 /// physical registers (except for dead defs of physical registers). It also
    207 /// returns the physical register def by reference if it's the only one and the
    208 /// instruction does not uses a physical register.
    209 bool MachineCSE::hasLivePhysRegDefUses(const MachineInstr *MI,
    210                                        const MachineBasicBlock *MBB,
    211                                        SmallSet<unsigned,8> &PhysRefs,
    212                                        SmallVectorImpl<unsigned> &PhysDefs,
    213                                        bool &PhysUseDef) const{
    214   // First, add all uses to PhysRefs.
    215   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    216     const MachineOperand &MO = MI->getOperand(i);
    217     if (!MO.isReg() || MO.isDef())
    218       continue;
    219     unsigned Reg = MO.getReg();
    220     if (!Reg)
    221       continue;
    222     if (TargetRegisterInfo::isVirtualRegister(Reg))
    223       continue;
    224     // Reading constant physregs is ok.
    225     if (!MRI->isConstantPhysReg(Reg, *MBB->getParent()))
    226       for (MCRegAliasIterator AI(Reg, TRI, true); AI.isValid(); ++AI)
    227         PhysRefs.insert(*AI);
    228   }
    229 
    230   // Next, collect all defs into PhysDefs.  If any is already in PhysRefs
    231   // (which currently contains only uses), set the PhysUseDef flag.
    232   PhysUseDef = false;
    233   MachineBasicBlock::const_iterator I = MI; I = std::next(I);
    234   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    235     const MachineOperand &MO = MI->getOperand(i);
    236     if (!MO.isReg() || !MO.isDef())
    237       continue;
    238     unsigned Reg = MO.getReg();
    239     if (!Reg)
    240       continue;
    241     if (TargetRegisterInfo::isVirtualRegister(Reg))
    242       continue;
    243     // Check against PhysRefs even if the def is "dead".
    244     if (PhysRefs.count(Reg))
    245       PhysUseDef = true;
    246     // If the def is dead, it's ok. But the def may not marked "dead". That's
    247     // common since this pass is run before livevariables. We can scan
    248     // forward a few instructions and check if it is obviously dead.
    249     if (!MO.isDead() && !isPhysDefTriviallyDead(Reg, I, MBB->end()))
    250       PhysDefs.push_back(Reg);
    251   }
    252 
    253   // Finally, add all defs to PhysRefs as well.
    254   for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i)
    255     for (MCRegAliasIterator AI(PhysDefs[i], TRI, true); AI.isValid(); ++AI)
    256       PhysRefs.insert(*AI);
    257 
    258   return !PhysRefs.empty();
    259 }
    260 
    261 bool MachineCSE::PhysRegDefsReach(MachineInstr *CSMI, MachineInstr *MI,
    262                                   SmallSet<unsigned,8> &PhysRefs,
    263                                   SmallVectorImpl<unsigned> &PhysDefs,
    264                                   bool &NonLocal) const {
    265   // For now conservatively returns false if the common subexpression is
    266   // not in the same basic block as the given instruction. The only exception
    267   // is if the common subexpression is in the sole predecessor block.
    268   const MachineBasicBlock *MBB = MI->getParent();
    269   const MachineBasicBlock *CSMBB = CSMI->getParent();
    270 
    271   bool CrossMBB = false;
    272   if (CSMBB != MBB) {
    273     if (MBB->pred_size() != 1 || *MBB->pred_begin() != CSMBB)
    274       return false;
    275 
    276     for (unsigned i = 0, e = PhysDefs.size(); i != e; ++i) {
    277       if (MRI->isAllocatable(PhysDefs[i]) || MRI->isReserved(PhysDefs[i]))
    278         // Avoid extending live range of physical registers if they are
    279         //allocatable or reserved.
    280         return false;
    281     }
    282     CrossMBB = true;
    283   }
    284   MachineBasicBlock::const_iterator I = CSMI; I = std::next(I);
    285   MachineBasicBlock::const_iterator E = MI;
    286   MachineBasicBlock::const_iterator EE = CSMBB->end();
    287   unsigned LookAheadLeft = LookAheadLimit;
    288   while (LookAheadLeft) {
    289     // Skip over dbg_value's.
    290     while (I != E && I != EE && I->isDebugValue())
    291       ++I;
    292 
    293     if (I == EE) {
    294       assert(CrossMBB && "Reaching end-of-MBB without finding MI?");
    295       (void)CrossMBB;
    296       CrossMBB = false;
    297       NonLocal = true;
    298       I = MBB->begin();
    299       EE = MBB->end();
    300       continue;
    301     }
    302 
    303     if (I == E)
    304       return true;
    305 
    306     for (unsigned i = 0, e = I->getNumOperands(); i != e; ++i) {
    307       const MachineOperand &MO = I->getOperand(i);
    308       // RegMasks go on instructions like calls that clobber lots of physregs.
    309       // Don't attempt to CSE across such an instruction.
    310       if (MO.isRegMask())
    311         return false;
    312       if (!MO.isReg() || !MO.isDef())
    313         continue;
    314       unsigned MOReg = MO.getReg();
    315       if (TargetRegisterInfo::isVirtualRegister(MOReg))
    316         continue;
    317       if (PhysRefs.count(MOReg))
    318         return false;
    319     }
    320 
    321     --LookAheadLeft;
    322     ++I;
    323   }
    324 
    325   return false;
    326 }
    327 
    328 bool MachineCSE::isCSECandidate(MachineInstr *MI) {
    329   if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || MI->isKill() ||
    330       MI->isInlineAsm() || MI->isDebugValue())
    331     return false;
    332 
    333   // Ignore copies.
    334   if (MI->isCopyLike())
    335     return false;
    336 
    337   // Ignore stuff that we obviously can't move.
    338   if (MI->mayStore() || MI->isCall() || MI->isTerminator() ||
    339       MI->hasUnmodeledSideEffects())
    340     return false;
    341 
    342   if (MI->mayLoad()) {
    343     // Okay, this instruction does a load. As a refinement, we allow the target
    344     // to decide whether the loaded value is actually a constant. If so, we can
    345     // actually use it as a load.
    346     if (!MI->isInvariantLoad(AA))
    347       // FIXME: we should be able to hoist loads with no other side effects if
    348       // there are no other instructions which can change memory in this loop.
    349       // This is a trivial form of alias analysis.
    350       return false;
    351   }
    352   return true;
    353 }
    354 
    355 /// isProfitableToCSE - Return true if it's profitable to eliminate MI with a
    356 /// common expression that defines Reg.
    357 bool MachineCSE::isProfitableToCSE(unsigned CSReg, unsigned Reg,
    358                                    MachineInstr *CSMI, MachineInstr *MI) {
    359   // FIXME: Heuristics that works around the lack the live range splitting.
    360 
    361   // If CSReg is used at all uses of Reg, CSE should not increase register
    362   // pressure of CSReg.
    363   bool MayIncreasePressure = true;
    364   if (TargetRegisterInfo::isVirtualRegister(CSReg) &&
    365       TargetRegisterInfo::isVirtualRegister(Reg)) {
    366     MayIncreasePressure = false;
    367     SmallPtrSet<MachineInstr*, 8> CSUses;
    368     for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
    369       CSUses.insert(&MI);
    370     }
    371     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
    372       if (!CSUses.count(&MI)) {
    373         MayIncreasePressure = true;
    374         break;
    375       }
    376     }
    377   }
    378   if (!MayIncreasePressure) return true;
    379 
    380   // Heuristics #1: Don't CSE "cheap" computation if the def is not local or in
    381   // an immediate predecessor. We don't want to increase register pressure and
    382   // end up causing other computation to be spilled.
    383   if (MI->isAsCheapAsAMove()) {
    384     MachineBasicBlock *CSBB = CSMI->getParent();
    385     MachineBasicBlock *BB = MI->getParent();
    386     if (CSBB != BB && !CSBB->isSuccessor(BB))
    387       return false;
    388   }
    389 
    390   // Heuristics #2: If the expression doesn't not use a vr and the only use
    391   // of the redundant computation are copies, do not cse.
    392   bool HasVRegUse = false;
    393   for (unsigned i = 0, e = MI->getNumOperands(); i != e; ++i) {
    394     const MachineOperand &MO = MI->getOperand(i);
    395     if (MO.isReg() && MO.isUse() &&
    396         TargetRegisterInfo::isVirtualRegister(MO.getReg())) {
    397       HasVRegUse = true;
    398       break;
    399     }
    400   }
    401   if (!HasVRegUse) {
    402     bool HasNonCopyUse = false;
    403     for (MachineInstr &MI : MRI->use_nodbg_instructions(Reg)) {
    404       // Ignore copies.
    405       if (!MI.isCopyLike()) {
    406         HasNonCopyUse = true;
    407         break;
    408       }
    409     }
    410     if (!HasNonCopyUse)
    411       return false;
    412   }
    413 
    414   // Heuristics #3: If the common subexpression is used by PHIs, do not reuse
    415   // it unless the defined value is already used in the BB of the new use.
    416   bool HasPHI = false;
    417   SmallPtrSet<MachineBasicBlock*, 4> CSBBs;
    418   for (MachineInstr &MI : MRI->use_nodbg_instructions(CSReg)) {
    419     HasPHI |= MI.isPHI();
    420     CSBBs.insert(MI.getParent());
    421   }
    422 
    423   if (!HasPHI)
    424     return true;
    425   return CSBBs.count(MI->getParent());
    426 }
    427 
    428 void MachineCSE::EnterScope(MachineBasicBlock *MBB) {
    429   DEBUG(dbgs() << "Entering: " << MBB->getName() << '\n');
    430   ScopeType *Scope = new ScopeType(VNT);
    431   ScopeMap[MBB] = Scope;
    432 }
    433 
    434 void MachineCSE::ExitScope(MachineBasicBlock *MBB) {
    435   DEBUG(dbgs() << "Exiting: " << MBB->getName() << '\n');
    436   DenseMap<MachineBasicBlock*, ScopeType*>::iterator SI = ScopeMap.find(MBB);
    437   assert(SI != ScopeMap.end());
    438   delete SI->second;
    439   ScopeMap.erase(SI);
    440 }
    441 
    442 bool MachineCSE::ProcessBlock(MachineBasicBlock *MBB) {
    443   bool Changed = false;
    444 
    445   SmallVector<std::pair<unsigned, unsigned>, 8> CSEPairs;
    446   SmallVector<unsigned, 2> ImplicitDefsToUpdate;
    447   for (MachineBasicBlock::iterator I = MBB->begin(), E = MBB->end(); I != E; ) {
    448     MachineInstr *MI = &*I;
    449     ++I;
    450 
    451     if (!isCSECandidate(MI))
    452       continue;
    453 
    454     bool FoundCSE = VNT.count(MI);
    455     if (!FoundCSE) {
    456       // Look for trivial copy coalescing opportunities.
    457       if (PerformTrivialCoalescing(MI, MBB)) {
    458         Changed = true;
    459 
    460         // After coalescing MI itself may become a copy.
    461         if (MI->isCopyLike())
    462           continue;
    463         FoundCSE = VNT.count(MI);
    464       }
    465     }
    466 
    467     // Commute commutable instructions.
    468     bool Commuted = false;
    469     if (!FoundCSE && MI->isCommutable()) {
    470       MachineInstr *NewMI = TII->commuteInstruction(MI);
    471       if (NewMI) {
    472         Commuted = true;
    473         FoundCSE = VNT.count(NewMI);
    474         if (NewMI != MI) {
    475           // New instruction. It doesn't need to be kept.
    476           NewMI->eraseFromParent();
    477           Changed = true;
    478         } else if (!FoundCSE)
    479           // MI was changed but it didn't help, commute it back!
    480           (void)TII->commuteInstruction(MI);
    481       }
    482     }
    483 
    484     // If the instruction defines physical registers and the values *may* be
    485     // used, then it's not safe to replace it with a common subexpression.
    486     // It's also not safe if the instruction uses physical registers.
    487     bool CrossMBBPhysDef = false;
    488     SmallSet<unsigned, 8> PhysRefs;
    489     SmallVector<unsigned, 2> PhysDefs;
    490     bool PhysUseDef = false;
    491     if (FoundCSE && hasLivePhysRegDefUses(MI, MBB, PhysRefs,
    492                                           PhysDefs, PhysUseDef)) {
    493       FoundCSE = false;
    494 
    495       // ... Unless the CS is local or is in the sole predecessor block
    496       // and it also defines the physical register which is not clobbered
    497       // in between and the physical register uses were not clobbered.
    498       // This can never be the case if the instruction both uses and
    499       // defines the same physical register, which was detected above.
    500       if (!PhysUseDef) {
    501         unsigned CSVN = VNT.lookup(MI);
    502         MachineInstr *CSMI = Exps[CSVN];
    503         if (PhysRegDefsReach(CSMI, MI, PhysRefs, PhysDefs, CrossMBBPhysDef))
    504           FoundCSE = true;
    505       }
    506     }
    507 
    508     if (!FoundCSE) {
    509       VNT.insert(MI, CurrVN++);
    510       Exps.push_back(MI);
    511       continue;
    512     }
    513 
    514     // Found a common subexpression, eliminate it.
    515     unsigned CSVN = VNT.lookup(MI);
    516     MachineInstr *CSMI = Exps[CSVN];
    517     DEBUG(dbgs() << "Examining: " << *MI);
    518     DEBUG(dbgs() << "*** Found a common subexpression: " << *CSMI);
    519 
    520     // Check if it's profitable to perform this CSE.
    521     bool DoCSE = true;
    522     unsigned NumDefs = MI->getDesc().getNumDefs() +
    523                        MI->getDesc().getNumImplicitDefs();
    524 
    525     for (unsigned i = 0, e = MI->getNumOperands(); NumDefs && i != e; ++i) {
    526       MachineOperand &MO = MI->getOperand(i);
    527       if (!MO.isReg() || !MO.isDef())
    528         continue;
    529       unsigned OldReg = MO.getReg();
    530       unsigned NewReg = CSMI->getOperand(i).getReg();
    531 
    532       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
    533       // we should make sure it is not dead at CSMI.
    534       if (MO.isImplicit() && !MO.isDead() && CSMI->getOperand(i).isDead())
    535         ImplicitDefsToUpdate.push_back(i);
    536       if (OldReg == NewReg) {
    537         --NumDefs;
    538         continue;
    539       }
    540 
    541       assert(TargetRegisterInfo::isVirtualRegister(OldReg) &&
    542              TargetRegisterInfo::isVirtualRegister(NewReg) &&
    543              "Do not CSE physical register defs!");
    544 
    545       if (!isProfitableToCSE(NewReg, OldReg, CSMI, MI)) {
    546         DEBUG(dbgs() << "*** Not profitable, avoid CSE!\n");
    547         DoCSE = false;
    548         break;
    549       }
    550 
    551       // Don't perform CSE if the result of the old instruction cannot exist
    552       // within the register class of the new instruction.
    553       const TargetRegisterClass *OldRC = MRI->getRegClass(OldReg);
    554       if (!MRI->constrainRegClass(NewReg, OldRC)) {
    555         DEBUG(dbgs() << "*** Not the same register class, avoid CSE!\n");
    556         DoCSE = false;
    557         break;
    558       }
    559 
    560       CSEPairs.push_back(std::make_pair(OldReg, NewReg));
    561       --NumDefs;
    562     }
    563 
    564     // Actually perform the elimination.
    565     if (DoCSE) {
    566       for (unsigned i = 0, e = CSEPairs.size(); i != e; ++i) {
    567         MRI->replaceRegWith(CSEPairs[i].first, CSEPairs[i].second);
    568         MRI->clearKillFlags(CSEPairs[i].second);
    569       }
    570 
    571       // Go through implicit defs of CSMI and MI, if a def is not dead at MI,
    572       // we should make sure it is not dead at CSMI.
    573       for (unsigned i = 0, e = ImplicitDefsToUpdate.size(); i != e; ++i)
    574         CSMI->getOperand(ImplicitDefsToUpdate[i]).setIsDead(false);
    575 
    576       if (CrossMBBPhysDef) {
    577         // Add physical register defs now coming in from a predecessor to MBB
    578         // livein list.
    579         while (!PhysDefs.empty()) {
    580           unsigned LiveIn = PhysDefs.pop_back_val();
    581           if (!MBB->isLiveIn(LiveIn))
    582             MBB->addLiveIn(LiveIn);
    583         }
    584         ++NumCrossBBCSEs;
    585       }
    586 
    587       MI->eraseFromParent();
    588       ++NumCSEs;
    589       if (!PhysRefs.empty())
    590         ++NumPhysCSEs;
    591       if (Commuted)
    592         ++NumCommutes;
    593       Changed = true;
    594     } else {
    595       VNT.insert(MI, CurrVN++);
    596       Exps.push_back(MI);
    597     }
    598     CSEPairs.clear();
    599     ImplicitDefsToUpdate.clear();
    600   }
    601 
    602   return Changed;
    603 }
    604 
    605 /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given
    606 /// dominator tree node if its a leaf or all of its children are done. Walk
    607 /// up the dominator tree to destroy ancestors which are now done.
    608 void
    609 MachineCSE::ExitScopeIfDone(MachineDomTreeNode *Node,
    610                         DenseMap<MachineDomTreeNode*, unsigned> &OpenChildren) {
    611   if (OpenChildren[Node])
    612     return;
    613 
    614   // Pop scope.
    615   ExitScope(Node->getBlock());
    616 
    617   // Now traverse upwards to pop ancestors whose offsprings are all done.
    618   while (MachineDomTreeNode *Parent = Node->getIDom()) {
    619     unsigned Left = --OpenChildren[Parent];
    620     if (Left != 0)
    621       break;
    622     ExitScope(Parent->getBlock());
    623     Node = Parent;
    624   }
    625 }
    626 
    627 bool MachineCSE::PerformCSE(MachineDomTreeNode *Node) {
    628   SmallVector<MachineDomTreeNode*, 32> Scopes;
    629   SmallVector<MachineDomTreeNode*, 8> WorkList;
    630   DenseMap<MachineDomTreeNode*, unsigned> OpenChildren;
    631 
    632   CurrVN = 0;
    633 
    634   // Perform a DFS walk to determine the order of visit.
    635   WorkList.push_back(Node);
    636   do {
    637     Node = WorkList.pop_back_val();
    638     Scopes.push_back(Node);
    639     const std::vector<MachineDomTreeNode*> &Children = Node->getChildren();
    640     unsigned NumChildren = Children.size();
    641     OpenChildren[Node] = NumChildren;
    642     for (unsigned i = 0; i != NumChildren; ++i) {
    643       MachineDomTreeNode *Child = Children[i];
    644       WorkList.push_back(Child);
    645     }
    646   } while (!WorkList.empty());
    647 
    648   // Now perform CSE.
    649   bool Changed = false;
    650   for (unsigned i = 0, e = Scopes.size(); i != e; ++i) {
    651     MachineDomTreeNode *Node = Scopes[i];
    652     MachineBasicBlock *MBB = Node->getBlock();
    653     EnterScope(MBB);
    654     Changed |= ProcessBlock(MBB);
    655     // If it's a leaf node, it's done. Traverse upwards to pop ancestors.
    656     ExitScopeIfDone(Node, OpenChildren);
    657   }
    658 
    659   return Changed;
    660 }
    661 
    662 bool MachineCSE::runOnMachineFunction(MachineFunction &MF) {
    663   if (skipOptnoneFunction(*MF.getFunction()))
    664     return false;
    665 
    666   TII = MF.getTarget().getInstrInfo();
    667   TRI = MF.getTarget().getRegisterInfo();
    668   MRI = &MF.getRegInfo();
    669   AA = &getAnalysis<AliasAnalysis>();
    670   DT = &getAnalysis<MachineDominatorTree>();
    671   return PerformCSE(DT->getRootNode());
    672 }
    673