Home | History | Annotate | Download | only in CodeGen
      1 //===-- PeepholeOptimizer.cpp - Peephole Optimizations --------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Perform peephole optimizations on the machine code:
     11 //
     12 // - Optimize Extensions
     13 //
     14 //     Optimization of sign / zero extension instructions. It may be extended to
     15 //     handle other instructions with similar properties.
     16 //
     17 //     On some targets, some instructions, e.g. X86 sign / zero extension, may
     18 //     leave the source value in the lower part of the result. This optimization
     19 //     will replace some uses of the pre-extension value with uses of the
     20 //     sub-register of the results.
     21 //
     22 // - Optimize Comparisons
     23 //
     24 //     Optimization of comparison instructions. For instance, in this code:
     25 //
     26 //       sub r1, 1
     27 //       cmp r1, 0
     28 //       bz  L1
     29 //
     30 //     If the "sub" instruction all ready sets (or could be modified to set) the
     31 //     same flag that the "cmp" instruction sets and that "bz" uses, then we can
     32 //     eliminate the "cmp" instruction.
     33 //
     34 // - Optimize Bitcast pairs:
     35 //
     36 //     v1 = bitcast v0
     37 //     v2 = bitcast v1
     38 //        = v2
     39 //   =>
     40 //     v1 = bitcast v0
     41 //        = v0
     42 //
     43 //===----------------------------------------------------------------------===//
     44 
     45 #define DEBUG_TYPE "peephole-opt"
     46 #include "llvm/CodeGen/Passes.h"
     47 #include "llvm/CodeGen/MachineDominators.h"
     48 #include "llvm/CodeGen/MachineInstrBuilder.h"
     49 #include "llvm/CodeGen/MachineRegisterInfo.h"
     50 #include "llvm/Target/TargetInstrInfo.h"
     51 #include "llvm/Target/TargetRegisterInfo.h"
     52 #include "llvm/Support/CommandLine.h"
     53 #include "llvm/ADT/DenseMap.h"
     54 #include "llvm/ADT/SmallPtrSet.h"
     55 #include "llvm/ADT/SmallSet.h"
     56 #include "llvm/ADT/Statistic.h"
     57 using namespace llvm;
     58 
     59 // Optimize Extensions
     60 static cl::opt<bool>
     61 Aggressive("aggressive-ext-opt", cl::Hidden,
     62            cl::desc("Aggressive extension optimization"));
     63 
     64 static cl::opt<bool>
     65 DisablePeephole("disable-peephole", cl::Hidden, cl::init(false),
     66                 cl::desc("Disable the peephole optimizer"));
     67 
     68 STATISTIC(NumReuse,      "Number of extension results reused");
     69 STATISTIC(NumBitcasts,   "Number of bitcasts eliminated");
     70 STATISTIC(NumCmps,       "Number of compares eliminated");
     71 STATISTIC(NumImmFold,    "Number of move immediate foled");
     72 
     73 namespace {
     74   class PeepholeOptimizer : public MachineFunctionPass {
     75     const TargetMachine   *TM;
     76     const TargetInstrInfo *TII;
     77     MachineRegisterInfo   *MRI;
     78     MachineDominatorTree  *DT;  // Machine dominator tree
     79 
     80   public:
     81     static char ID; // Pass identification
     82     PeepholeOptimizer() : MachineFunctionPass(ID) {
     83       initializePeepholeOptimizerPass(*PassRegistry::getPassRegistry());
     84     }
     85 
     86     virtual bool runOnMachineFunction(MachineFunction &MF);
     87 
     88     virtual void getAnalysisUsage(AnalysisUsage &AU) const {
     89       AU.setPreservesCFG();
     90       MachineFunctionPass::getAnalysisUsage(AU);
     91       if (Aggressive) {
     92         AU.addRequired<MachineDominatorTree>();
     93         AU.addPreserved<MachineDominatorTree>();
     94       }
     95     }
     96 
     97   private:
     98     bool OptimizeBitcastInstr(MachineInstr *MI, MachineBasicBlock *MBB);
     99     bool OptimizeCmpInstr(MachineInstr *MI, MachineBasicBlock *MBB);
    100     bool OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
    101                           SmallPtrSet<MachineInstr*, 8> &LocalMIs);
    102     bool isMoveImmediate(MachineInstr *MI,
    103                          SmallSet<unsigned, 4> &ImmDefRegs,
    104                          DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    105     bool FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
    106                        SmallSet<unsigned, 4> &ImmDefRegs,
    107                        DenseMap<unsigned, MachineInstr*> &ImmDefMIs);
    108   };
    109 }
    110 
    111 char PeepholeOptimizer::ID = 0;
    112 INITIALIZE_PASS_BEGIN(PeepholeOptimizer, "peephole-opts",
    113                 "Peephole Optimizations", false, false)
    114 INITIALIZE_PASS_DEPENDENCY(MachineDominatorTree)
    115 INITIALIZE_PASS_END(PeepholeOptimizer, "peephole-opts",
    116                 "Peephole Optimizations", false, false)
    117 
    118 FunctionPass *llvm::createPeepholeOptimizerPass() {
    119   return new PeepholeOptimizer();
    120 }
    121 
    122 /// OptimizeExtInstr - If instruction is a copy-like instruction, i.e. it reads
    123 /// a single register and writes a single register and it does not modify the
    124 /// source, and if the source value is preserved as a sub-register of the
    125 /// result, then replace all reachable uses of the source with the subreg of the
    126 /// result.
    127 ///
    128 /// Do not generate an EXTRACT that is used only in a debug use, as this changes
    129 /// the code. Since this code does not currently share EXTRACTs, just ignore all
    130 /// debug uses.
    131 bool PeepholeOptimizer::
    132 OptimizeExtInstr(MachineInstr *MI, MachineBasicBlock *MBB,
    133                  SmallPtrSet<MachineInstr*, 8> &LocalMIs) {
    134   unsigned SrcReg, DstReg, SubIdx;
    135   if (!TII->isCoalescableExtInstr(*MI, SrcReg, DstReg, SubIdx))
    136     return false;
    137 
    138   if (TargetRegisterInfo::isPhysicalRegister(DstReg) ||
    139       TargetRegisterInfo::isPhysicalRegister(SrcReg))
    140     return false;
    141 
    142   MachineRegisterInfo::use_nodbg_iterator UI = MRI->use_nodbg_begin(SrcReg);
    143   if (++UI == MRI->use_nodbg_end())
    144     // No other uses.
    145     return false;
    146 
    147   // The source has other uses. See if we can replace the other uses with use of
    148   // the result of the extension.
    149   SmallPtrSet<MachineBasicBlock*, 4> ReachedBBs;
    150   UI = MRI->use_nodbg_begin(DstReg);
    151   for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
    152        UI != UE; ++UI)
    153     ReachedBBs.insert(UI->getParent());
    154 
    155   // Uses that are in the same BB of uses of the result of the instruction.
    156   SmallVector<MachineOperand*, 8> Uses;
    157 
    158   // Uses that the result of the instruction can reach.
    159   SmallVector<MachineOperand*, 8> ExtendedUses;
    160 
    161   bool ExtendLife = true;
    162   UI = MRI->use_nodbg_begin(SrcReg);
    163   for (MachineRegisterInfo::use_nodbg_iterator UE = MRI->use_nodbg_end();
    164        UI != UE; ++UI) {
    165     MachineOperand &UseMO = UI.getOperand();
    166     MachineInstr *UseMI = &*UI;
    167     if (UseMI == MI)
    168       continue;
    169 
    170     if (UseMI->isPHI()) {
    171       ExtendLife = false;
    172       continue;
    173     }
    174 
    175     // It's an error to translate this:
    176     //
    177     //    %reg1025 = <sext> %reg1024
    178     //     ...
    179     //    %reg1026 = SUBREG_TO_REG 0, %reg1024, 4
    180     //
    181     // into this:
    182     //
    183     //    %reg1025 = <sext> %reg1024
    184     //     ...
    185     //    %reg1027 = COPY %reg1025:4
    186     //    %reg1026 = SUBREG_TO_REG 0, %reg1027, 4
    187     //
    188     // The problem here is that SUBREG_TO_REG is there to assert that an
    189     // implicit zext occurs. It doesn't insert a zext instruction. If we allow
    190     // the COPY here, it will give us the value after the <sext>, not the
    191     // original value of %reg1024 before <sext>.
    192     if (UseMI->getOpcode() == TargetOpcode::SUBREG_TO_REG)
    193       continue;
    194 
    195     MachineBasicBlock *UseMBB = UseMI->getParent();
    196     if (UseMBB == MBB) {
    197       // Local uses that come after the extension.
    198       if (!LocalMIs.count(UseMI))
    199         Uses.push_back(&UseMO);
    200     } else if (ReachedBBs.count(UseMBB)) {
    201       // Non-local uses where the result of the extension is used. Always
    202       // replace these unless it's a PHI.
    203       Uses.push_back(&UseMO);
    204     } else if (Aggressive && DT->dominates(MBB, UseMBB)) {
    205       // We may want to extend the live range of the extension result in order
    206       // to replace these uses.
    207       ExtendedUses.push_back(&UseMO);
    208     } else {
    209       // Both will be live out of the def MBB anyway. Don't extend live range of
    210       // the extension result.
    211       ExtendLife = false;
    212       break;
    213     }
    214   }
    215 
    216   if (ExtendLife && !ExtendedUses.empty())
    217     // Extend the liveness of the extension result.
    218     std::copy(ExtendedUses.begin(), ExtendedUses.end(),
    219               std::back_inserter(Uses));
    220 
    221   // Now replace all uses.
    222   bool Changed = false;
    223   if (!Uses.empty()) {
    224     SmallPtrSet<MachineBasicBlock*, 4> PHIBBs;
    225 
    226     // Look for PHI uses of the extended result, we don't want to extend the
    227     // liveness of a PHI input. It breaks all kinds of assumptions down
    228     // stream. A PHI use is expected to be the kill of its source values.
    229     UI = MRI->use_nodbg_begin(DstReg);
    230     for (MachineRegisterInfo::use_nodbg_iterator
    231            UE = MRI->use_nodbg_end(); UI != UE; ++UI)
    232       if (UI->isPHI())
    233         PHIBBs.insert(UI->getParent());
    234 
    235     const TargetRegisterClass *RC = MRI->getRegClass(SrcReg);
    236     for (unsigned i = 0, e = Uses.size(); i != e; ++i) {
    237       MachineOperand *UseMO = Uses[i];
    238       MachineInstr *UseMI = UseMO->getParent();
    239       MachineBasicBlock *UseMBB = UseMI->getParent();
    240       if (PHIBBs.count(UseMBB))
    241         continue;
    242 
    243       unsigned NewVR = MRI->createVirtualRegister(RC);
    244       BuildMI(*UseMBB, UseMI, UseMI->getDebugLoc(),
    245               TII->get(TargetOpcode::COPY), NewVR)
    246         .addReg(DstReg, 0, SubIdx);
    247 
    248       UseMO->setReg(NewVR);
    249       ++NumReuse;
    250       Changed = true;
    251     }
    252   }
    253 
    254   return Changed;
    255 }
    256 
    257 /// OptimizeBitcastInstr - If the instruction is a bitcast instruction A that
    258 /// cannot be optimized away during isel (e.g. ARM::VMOVSR, which bitcast
    259 /// a value cross register classes), and the source is defined by another
    260 /// bitcast instruction B. And if the register class of source of B matches
    261 /// the register class of instruction A, then it is legal to replace all uses
    262 /// of the def of A with source of B. e.g.
    263 ///   %vreg0<def> = VMOVSR %vreg1
    264 ///   %vreg3<def> = VMOVRS %vreg0
    265 ///   Replace all uses of vreg3 with vreg1.
    266 
    267 bool PeepholeOptimizer::OptimizeBitcastInstr(MachineInstr *MI,
    268                                              MachineBasicBlock *MBB) {
    269   unsigned NumDefs = MI->getDesc().getNumDefs();
    270   unsigned NumSrcs = MI->getDesc().getNumOperands() - NumDefs;
    271   if (NumDefs != 1)
    272     return false;
    273 
    274   unsigned Def = 0;
    275   unsigned Src = 0;
    276   for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
    277     const MachineOperand &MO = MI->getOperand(i);
    278     if (!MO.isReg())
    279       continue;
    280     unsigned Reg = MO.getReg();
    281     if (!Reg)
    282       continue;
    283     if (MO.isDef())
    284       Def = Reg;
    285     else if (Src)
    286       // Multiple sources?
    287       return false;
    288     else
    289       Src = Reg;
    290   }
    291 
    292   assert(Def && Src && "Malformed bitcast instruction!");
    293 
    294   MachineInstr *DefMI = MRI->getVRegDef(Src);
    295   if (!DefMI || !DefMI->getDesc().isBitcast())
    296     return false;
    297 
    298   unsigned SrcSrc = 0;
    299   NumDefs = DefMI->getDesc().getNumDefs();
    300   NumSrcs = DefMI->getDesc().getNumOperands() - NumDefs;
    301   if (NumDefs != 1)
    302     return false;
    303   for (unsigned i = 0, e = NumDefs + NumSrcs; i != e; ++i) {
    304     const MachineOperand &MO = DefMI->getOperand(i);
    305     if (!MO.isReg() || MO.isDef())
    306       continue;
    307     unsigned Reg = MO.getReg();
    308     if (!Reg)
    309       continue;
    310     if (!MO.isDef()) {
    311       if (SrcSrc)
    312         // Multiple sources?
    313         return false;
    314       else
    315         SrcSrc = Reg;
    316     }
    317   }
    318 
    319   if (MRI->getRegClass(SrcSrc) != MRI->getRegClass(Def))
    320     return false;
    321 
    322   MRI->replaceRegWith(Def, SrcSrc);
    323   MRI->clearKillFlags(SrcSrc);
    324   MI->eraseFromParent();
    325   ++NumBitcasts;
    326   return true;
    327 }
    328 
    329 /// OptimizeCmpInstr - If the instruction is a compare and the previous
    330 /// instruction it's comparing against all ready sets (or could be modified to
    331 /// set) the same flag as the compare, then we can remove the comparison and use
    332 /// the flag from the previous instruction.
    333 bool PeepholeOptimizer::OptimizeCmpInstr(MachineInstr *MI,
    334                                          MachineBasicBlock *MBB) {
    335   // If this instruction is a comparison against zero and isn't comparing a
    336   // physical register, we can try to optimize it.
    337   unsigned SrcReg;
    338   int CmpMask, CmpValue;
    339   if (!TII->AnalyzeCompare(MI, SrcReg, CmpMask, CmpValue) ||
    340       TargetRegisterInfo::isPhysicalRegister(SrcReg))
    341     return false;
    342 
    343   // Attempt to optimize the comparison instruction.
    344   if (TII->OptimizeCompareInstr(MI, SrcReg, CmpMask, CmpValue, MRI)) {
    345     ++NumCmps;
    346     return true;
    347   }
    348 
    349   return false;
    350 }
    351 
    352 bool PeepholeOptimizer::isMoveImmediate(MachineInstr *MI,
    353                                         SmallSet<unsigned, 4> &ImmDefRegs,
    354                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
    355   const MCInstrDesc &MCID = MI->getDesc();
    356   if (!MCID.isMoveImmediate())
    357     return false;
    358   if (MCID.getNumDefs() != 1)
    359     return false;
    360   unsigned Reg = MI->getOperand(0).getReg();
    361   if (TargetRegisterInfo::isVirtualRegister(Reg)) {
    362     ImmDefMIs.insert(std::make_pair(Reg, MI));
    363     ImmDefRegs.insert(Reg);
    364     return true;
    365   }
    366 
    367   return false;
    368 }
    369 
    370 /// FoldImmediate - Try folding register operands that are defined by move
    371 /// immediate instructions, i.e. a trivial constant folding optimization, if
    372 /// and only if the def and use are in the same BB.
    373 bool PeepholeOptimizer::FoldImmediate(MachineInstr *MI, MachineBasicBlock *MBB,
    374                                       SmallSet<unsigned, 4> &ImmDefRegs,
    375                                  DenseMap<unsigned, MachineInstr*> &ImmDefMIs) {
    376   for (unsigned i = 0, e = MI->getDesc().getNumOperands(); i != e; ++i) {
    377     MachineOperand &MO = MI->getOperand(i);
    378     if (!MO.isReg() || MO.isDef())
    379       continue;
    380     unsigned Reg = MO.getReg();
    381     if (!TargetRegisterInfo::isVirtualRegister(Reg))
    382       continue;
    383     if (ImmDefRegs.count(Reg) == 0)
    384       continue;
    385     DenseMap<unsigned, MachineInstr*>::iterator II = ImmDefMIs.find(Reg);
    386     assert(II != ImmDefMIs.end());
    387     if (TII->FoldImmediate(MI, II->second, Reg, MRI)) {
    388       ++NumImmFold;
    389       return true;
    390     }
    391   }
    392   return false;
    393 }
    394 
    395 bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) {
    396   if (DisablePeephole)
    397     return false;
    398 
    399   TM  = &MF.getTarget();
    400   TII = TM->getInstrInfo();
    401   MRI = &MF.getRegInfo();
    402   DT  = Aggressive ? &getAnalysis<MachineDominatorTree>() : 0;
    403 
    404   bool Changed = false;
    405 
    406   SmallPtrSet<MachineInstr*, 8> LocalMIs;
    407   SmallSet<unsigned, 4> ImmDefRegs;
    408   DenseMap<unsigned, MachineInstr*> ImmDefMIs;
    409   for (MachineFunction::iterator I = MF.begin(), E = MF.end(); I != E; ++I) {
    410     MachineBasicBlock *MBB = &*I;
    411 
    412     bool SeenMoveImm = false;
    413     LocalMIs.clear();
    414     ImmDefRegs.clear();
    415     ImmDefMIs.clear();
    416 
    417     bool First = true;
    418     MachineBasicBlock::iterator PMII;
    419     for (MachineBasicBlock::iterator
    420            MII = I->begin(), MIE = I->end(); MII != MIE; ) {
    421       MachineInstr *MI = &*MII;
    422       LocalMIs.insert(MI);
    423 
    424       if (MI->isLabel() || MI->isPHI() || MI->isImplicitDef() ||
    425           MI->isKill() || MI->isInlineAsm() || MI->isDebugValue() ||
    426           MI->hasUnmodeledSideEffects()) {
    427         ++MII;
    428         continue;
    429       }
    430 
    431       const MCInstrDesc &MCID = MI->getDesc();
    432 
    433       if (MCID.isBitcast()) {
    434         if (OptimizeBitcastInstr(MI, MBB)) {
    435           // MI is deleted.
    436           LocalMIs.erase(MI);
    437           Changed = true;
    438           MII = First ? I->begin() : llvm::next(PMII);
    439           continue;
    440         }
    441       } else if (MCID.isCompare()) {
    442         if (OptimizeCmpInstr(MI, MBB)) {
    443           // MI is deleted.
    444           LocalMIs.erase(MI);
    445           Changed = true;
    446           MII = First ? I->begin() : llvm::next(PMII);
    447           continue;
    448         }
    449       }
    450 
    451       if (isMoveImmediate(MI, ImmDefRegs, ImmDefMIs)) {
    452         SeenMoveImm = true;
    453       } else {
    454         Changed |= OptimizeExtInstr(MI, MBB, LocalMIs);
    455         if (SeenMoveImm)
    456           Changed |= FoldImmediate(MI, MBB, ImmDefRegs, ImmDefMIs);
    457       }
    458 
    459       First = false;
    460       PMII = MII;
    461       ++MII;
    462     }
    463   }
    464 
    465   return Changed;
    466 }
    467