Home | History | Annotate | Download | only in CodeGen
      1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Perform peephole optimizations on the machine code:
     11 //
     12 // - Optimize Extensions
     13 //
     14 //     Optimization of sign / zero extension instructions. It may be extended to
     15 //     handle other instructions with similar properties.
     16 //
     17 //     On some targets, some instructions, e.g. X86 sign / zero extension, may
     18 //     leave the source value in the lower part of the result. This optimization
     19 //     will replace some uses of the pre-extension value with uses of the
     20 //     sub-register of the results.
     21 //
     22 // - Optimize Comparisons
     23 //
     24 //     Optimization of comparison instructions. For instance, in this code:
     25 //
     26 //       sub r1, 1
     27 //       cmp r1, 0
     28 //       bz  L1
     29 //
     30 //     If the "sub" instruction all ready sets (or could be modified to set) the
     31 //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
     32 //     eliminate the "cmp" instruction.
     33 //
     34 // - Optimize Bitcast pairs:
     35 //
     36 //     v1 = bitcast v0
     37 //     v2 = bitcast v1
     38 //        = v2
     39 //   =>
     40 //     v1 = bitcast v0
     41 //        = v0
     42 //
     43 //===----------------------------------------------------------------------===//
     44 
     45 #define DEBUG_TYPE "peephole-opt"
     46 #include "llvm/CodeGen/Passes.h"
     47 #include "llvm/CodeGen/MachineDominators.h"
     48 #include "llvm/CodeGen/MachineInstrBuilder.h"
     49 #include "llvm/CodeGen/MachineRegisterInfo.h"
     50 #include "llvm/Target/TargetInstrInfo.h"
     51 #include "llvm/Target/TargetRegisterInfo.h"
     52 #include "llvm/Support/CommandLine.h"
     53 #include "llvm/ADT/DenseMap.h"
     54 #include "llvm/ADT/SmallPtrSet.h"
     55 #include "llvm/ADT/SmallSet.h"
     56 #include "llvm/ADT/Statistic.h"
     57 using namespace llvm;
     58 
     59 // Optimize Extensions
     60 static cl::opt<bool>
     61 Aggressive("aggressive-ext-opt", cl::Hidden,
     62            cl::desc("Aggressive extension optimization"));
     63 
     64 static cl::opt<bool>
     65 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
     66                 cl::desc("Disable the peephole optimizer"));
     67 
     68 STATISTIC(NumReuse,      "Number of extension results reused");
     69 STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
     70 STATISTIC(NumCmps,       "Number of compares eliminated");
     71 STATISTIC(NumImmFold,    "Number of move immediate folded");
     72 
     73 namespace {
     74   class PeepholeOptimizer : public MachineFunctionPass {
     75     const TargetMachine   *TM;
     76     const TargetInstrInfo *TII;
     77     MachineRegisterInfo   *MRI;
     78     MachineDominatorTree  *DT;  // Machine dominator tree
     79 
     80   public:
     81     static char ID; // Pass identification
     82     PeepholeOptimizer() : MachineFunctionPass(ID) {
     83       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
     84     }
     85 
     86     virtual bool runOnMachineFunction(MachineFunction &MF);
     87 
     88     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     89       AU.setPreservesCFG();
     90       MachineFunctionPass::getAnalysisUsage(AU);
     91       if (Aggressive) {
     92         AU.addRequired<MachineDominatorTree>();
     93         AU.addPreserved<MachineDominatorTree>();
     94       }
     95     }
     96 
     97   private:
     98     bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     99     bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
    100     bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
    101                           SmallPtrSet<MachineInstr*, 8> &LocalMIs);
    102     bool isMoveImmediate(MachineInstr *MI,
    103                          SmallSet<unsigned, 4> &ImmDefRegs,
    104                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    105     bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
    106                        SmallSet<unsigned, 4> &ImmDefRegs,
    107                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    108   };
    109 }
    110 
    111 char PeepholeOptimizer::ID = 0;
    112 char &llvm::PeepholeOptimizerID = PeepholeOptimizer::ID;
    113 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
    114                 "Peephole Optimizations", false, false)
    115 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    116 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
    117                 "Peephole Optimizations", false, false)
    118 
    119 /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
    120 /// a single register and writes a single register and it does not modify the
    121 /// source, and if the source value is preserved as a sub-register of the
    122 /// result, then replace all reachable uses of the source with the subreg of the
    123 /// result.
    124 ///
    125 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
    126 /// the code. Since this code does not currently share EXTRACTs, just ignore all
    127 /// debug uses.
    128 bool PeepholeOptimizer::
    129 OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
    130                  SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
    131   unsigned SrcReg, DstReg, SubIdx;
    132   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    133     return false;
    134 
    135   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
    136       TargetRegisterInfo::isPhysicalRegister(SrcReg))
    137     return false;
    138 
    139   MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
    140   if (++UI == MRI->use_nodbg_end())
    141     // No other uses.
    142     return false;
    143 
    144   // The source has other uses. See if we can replace the other uses with use of
    145   // the result of the extension.
    146   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
    147   UI = MRI->use_nodbg_begin(DstReg);
    148   for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
    149        UI != UE; ++UI)
    150     ReachedBBs.insert(UI->getParent());
    151 
    152   // Uses that are in the same BB of uses of the result of the instruction.
    153   SmallVector<MachineOperand*, 8> Uses;
    154 
    155   // Uses that the result of the instruction can reach.
    156   SmallVector<MachineOperand*, 8> ExtendedUses;
    157 
    158   bool ExtendLife = true;
    159   UI = MRI->use_nodbg_begin(SrcReg);
    160   for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
    161        UI != UE; ++UI) {
    162     MachineOperand &UseMO = UI.getOperand();
    163     MachineInstr *UseMI = &*UI;
    164     if (UseMI == MI)
    165       continue;
    166 
    167     if (UseMI->isPHI()) {
    168       ExtendLife = false;
    169       continue;
    170     }
    171 
    172     // It's an error to translate this:
    173     //
    174     //    %reg1025 = <sext> %reg1024
    175     //     ...
    176     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    177     //
    178     // into this:
    179     //
    180     //    %reg1025 = <sext> %reg1024
    181     //     ...
    182     //    %reg1027 = COPY %reg1025:4
    183     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    184     //
    185     // The problem here is that SUBREG_TO_REG is there to assert that an
    186     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    187     // the COPY here, it will give us the value after the <sext>, not the
    188     // original value of %reg1024 before <sext>.
    189     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
    190       continue;
    191 
    192     MachineBasicBlock *UseMBB = UseMI->getParent();
    193     if (UseMBB == MBB) {
    194       // Local uses that come after the extension.
    195       if (!LocalMIs.count(UseMI))
    196         Uses.push_back(&UseMO);
    197     } else if (ReachedBBs.count(UseMBB)) {
    198       // Non-local uses where the result of the extension is used. Always
    199       // replace these unless it's a PHI.
    200       Uses.push_back(&UseMO);
    201     } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
    202       // We may want to extend the live range of the extension result in order
    203       // to replace these uses.
    204       ExtendedUses.push_back(&UseMO);
    205     } else {
    206       // Both will be live out of the def MBB anyway. Don't extend live range of
    207       // the extension result.
    208       ExtendLife = false;
    209       break;
    210     }
    211   }
    212 
    213   if (ExtendLife && !ExtendedUses.empty())
    214     // Extend the liveness of the extension result.
    215     std::copy(ExtendedUses.begin(), ExtendedUses.end(),
    216               std::back_inserter(Uses));
    217 
    218   // Now replace all uses.
    219   bool Changed = false;
    220   if (!Uses.empty()) {
    221     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
    222 
    223     // Look for PHI uses of the extended result, we don't want to extend the
    224     // liveness of a PHI input. It breaks all kinds of assumptions down
    225     // stream. A PHI use is expected to be the kill of its source values.
    226     UI = MRI->use_nodbg_begin(DstReg);
    227     for (MachineRegisterInfo::use_nodbg_iterator
    228            UE = MRI->use_nodbg_end(); UI != UE; ++UI)
    229       if (UI->isPHI())
    230         PHIBBs.insert(UI->getParent());
    231 
    232     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    233     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
    234       MachineOperand *UseMO = Uses[i];
    235       MachineInstr *UseMI = UseMO->getParent();
    236       MachineBasicBlock *UseMBB = UseMI->getParent();
    237       if (PHIBBs.count(UseMBB))
    238         continue;
    239 
    240       // About to add uses of DstReg, clear DstReg's kill flags.
    241       if (!Changed)
    242         MRI->clearKillFlags(DstReg);
    243 
    244       unsigned NewVR = MRI->createVirtualRegister(RC);
    245       BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
    246               TII->get(TargetOpcode::COPY), NewVR)
    247         .addReg(DstReg, 0, SubIdx);
    248 
    249       UseMO->setReg(NewVR);
    250       ++NumReuse;
    251       Changed = true;
    252     }
    253   }
    254 
    255   return Changed;
    256 }
    257 
    258 /// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
    259 /// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
    260 /// a value cross register classes), and the source is defined by another
    261 /// bitcast instruction B. And if the register class of source of B matches
    262 /// the register class of instruction A, then it is legal to replace all uses
    263 /// of the def of A with source of B. e.g.
    264 ///   %vreg0<def> = VMOVSR %vreg1
    265 ///   %vreg3<def> = VMOVRS %vreg0
    266 ///   Replace all uses of vreg3 with vreg1.
    267 
    268 bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
    269                                              MachineBasicBlock *MBB) {
    270   unsigned NumDefs = MI->getDesc().getNumDefs();
    271   unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
    272   if (NumDefs != 1)
    273     return false;
    274 
    275   unsigned Def = 0;
    276   unsigned Src = 0;
    277   for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
    278     const MachineOperand &MO = MI->getOperand(i);
    279     if (!MO.isReg())
    280       continue;
    281     unsigned Reg = MO.getReg();
    282     if (!Reg)
    283       continue;
    284     if (MO.isDef())
    285       Def = Reg;
    286     else if (Src)
    287       // Multiple sources?
    288       return false;
    289     else
    290       Src = Reg;
    291   }
    292 
    293   assert(Def && Src && "Malformed bitcast instruction!");
    294 
    295   MachineInstr *DefMI = MRI->getVRegDef(Src);
    296   if (!DefMI || !DefMI->isBitcast())
    297     return false;
    298 
    299   unsigned SrcSrc = 0;
    300   NumDefs = DefMI->getDesc().getNumDefs();
    301   NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
    302   if (NumDefs != 1)
    303     return false;
    304   for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
    305     const MachineOperand &MO = DefMI->getOperand(i);
    306     if (!MO.isReg() || MO.isDef())
    307       continue;
    308     unsigned Reg = MO.getReg();
    309     if (!Reg)
    310       continue;
    311     if (!MO.isDef()) {
    312       if (SrcSrc)
    313         // Multiple sources?
    314         return false;
    315       else
    316         SrcSrc = Reg;
    317     }
    318   }
    319 
    320   if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
    321     return false;
    322 
    323   MRI->replaceRegWith(Def, SrcSrc);
    324   MRI->clearKillFlags(SrcSrc);
    325   MI->eraseFromParent();
    326   ++NumBitcasts;
    327   return true;
    328 }
    329 
    330 /// OptimizeCmpInstr - If the instruction is a compare and the previous
    331 /// instruction it's comparing against all ready sets (or could be modified to
    332 /// set) the same flag as the compare, then we can remove the comparison and use
    333 /// the flag from the previous instruction.
    334 bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
    335                                          MachineBasicBlock *MBB) {
    336   // If this instruction is a comparison against zero and isn't comparing a
    337   // physical register, we can try to optimize it.
    338   unsigned SrcReg;
    339   int CmpMask, CmpValue;
    340   if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
    341       TargetRegisterInfo::isPhysicalRegister(SrcReg))
    342     return false;
    343 
    344   // Attempt to optimize the comparison instruction.
    345   if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
    346     ++NumCmps;
    347     return true;
    348   }
    349 
    350   return false;
    351 }
    352 
    353 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
    354                                         SmallSet<unsigned, 4> &ImmDefRegs,
    355                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
    356   const MCInstrDesc &MCID = MI->getDesc();
    357   if (!MI->isMoveImmediate())
    358     return false;
    359   if (MCID.getNumDefs() != 1)
    360     return false;
    361   unsigned Reg = MI->getOperand(0).getReg();
    362   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    363     ImmDefMIs.insert(std::make_pair(Reg, MI));
    364     ImmDefRegs.insert(Reg);
    365     return true;
    366   }
    367 
    368   return false;
    369 }
    370 
    371 /// FoldImmediate - Try folding register operands that are defined by move
    372 /// immediate instructions, i.e. a trivial constant folding optimization, if
    373 /// and only if the def and use are in the same BB.
    374 bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
    375                                       SmallSet<unsigned, 4> &ImmDefRegs,
    376                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
    377   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
    378     MachineOperand &MO = MI->getOperand(i);
    379     if (!MO.isReg() || MO.isDef())
    380       continue;
    381     unsigned Reg = MO.getReg();
    382     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    383       continue;
    384     if (ImmDefRegs.count(Reg) == 0)
    385       continue;
    386     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
    387     assert(II != ImmDefMIs.end());
    388     if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
    389       ++NumImmFold;
    390       return true;
    391     }
    392   }
    393   return false;
    394 }
    395 
    396 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    397   if (DisablePeephole)
    398     return false;
    399 
    400   TM  = &MF.getTarget();
    401   TII = TM->getInstrInfo();
    402   MRI = &MF.getRegInfo();
    403   DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
    404 
    405   bool Changed = false;
    406 
    407   SmallPtrSet<MachineInstr*, 8> LocalMIs;
    408   SmallSet<unsigned, 4> ImmDefRegs;
    409   DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    410   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    411     MachineBasicBlock *MBB = &*I;
    412 
    413     bool SeenMoveImm = false;
    414     LocalMIs.clear();
    415     ImmDefRegs.clear();
    416     ImmDefMIs.clear();
    417 
    418     bool First = true;
    419     MachineBasicBlock::iterator PMII;
    420     for (MachineBasicBlock::iterator
    421            MII = I->begin(), MIE = I->end(); MII != MIE; ) {
    422       MachineInstr *MI = &*MII;
    423       LocalMIs.insert(MI);
    424 
    425       if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
    426           MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
    427           MI->hasUnmodeledSideEffects()) {
    428         ++MII;
    429         continue;
    430       }
    431 
    432       if (MI->isBitcast()) {
    433         if (OptimizeBitcastInstr(MI, MBB)) {
    434           // MI is deleted.
    435           LocalMIs.erase(MI);
    436           Changed = true;
    437           MII = First ? I->begin() : llvm::next(PMII);
    438           continue;
    439         }
    440       } else if (MI->isCompare()) {
    441         if (OptimizeCmpInstr(MI, MBB)) {
    442           // MI is deleted.
    443           LocalMIs.erase(MI);
    444           Changed = true;
    445           MII = First ? I->begin() : llvm::next(PMII);
    446           continue;
    447         }
    448       }
    449 
    450       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
    451         SeenMoveImm = true;
    452       } else {
    453         Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
    454         if (SeenMoveImm)
    455           Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
    456       }
    457 
    458       First = false;
    459       PMII = MII;
    460       ++MII;
    461     }
    462   }
    463 
    464   return Changed;
    465 }
    466