Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning
     11 // of a MachineFunction.
     12 //
     13 //   mov %SPL, %depot
     14 //   cvta.local %SP, %SPL
     15 //
     16 // Because Frame Index is a generic address and alloca can only return generic
     17 // pointer, without this pass the instructions producing alloca'ed address will
     18 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on
     19 // this address with their .local versions, but this may introduce a lot of
     20 // cvta.to.local instructions. Performance can be improved if we avoid casting
     21 // address back and forth and directly calculate local address based on %SPL.
     22 // This peephole pass optimizes these cases, for example
     23 //
     24 // It will transform the following pattern
     25 //    %vreg0<def> = LEA_ADDRi64 %VRFrame, 4
     26 //    %vreg1<def> = cvta_to_local_yes_64 %vreg0
     27 //
     28 // into
     29 //    %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4
     30 //
     31 // %VRFrameLocal is the virtual register name of %SPL
     32 //
     33 //===----------------------------------------------------------------------===//
     34 
     35 #include "NVPTX.h"
     36 #include "llvm/CodeGen/MachineFunctionPass.h"
     37 #include "llvm/CodeGen/MachineInstrBuilder.h"
     38 #include "llvm/CodeGen/MachineRegisterInfo.h"
     39 #include "llvm/Target/TargetRegisterInfo.h"
     40 #include "llvm/Target/TargetInstrInfo.h"
     41 
     42 using namespace llvm;
     43 
     44 #define DEBUG_TYPE "nvptx-peephole"
     45 
     46 namespace llvm {
     47 void initializeNVPTXPeepholePass(PassRegistry &);
     48 }
     49 
     50 namespace {
     51 struct NVPTXPeephole : public MachineFunctionPass {
     52  public:
     53   static char ID;
     54   NVPTXPeephole() : MachineFunctionPass(ID) {
     55     initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry());
     56   }
     57 
     58   bool runOnMachineFunction(MachineFunction &MF) override;
     59 
     60   const char *getPassName() const override {
     61     return "NVPTX optimize redundant cvta.to.local instruction";
     62   }
     63 
     64   void getAnalysisUsage(AnalysisUsage &AU) const override {
     65     MachineFunctionPass::getAnalysisUsage(AU);
     66   }
     67 };
     68 }
     69 
     70 char NVPTXPeephole::ID = 0;
     71 
     72 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false)
     73 
     74 static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) {
     75   auto &MBB = *Root.getParent();
     76   auto &MF = *MBB.getParent();
     77   // Check current instruction is cvta.to.local
     78   if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 &&
     79       Root.getOpcode() != NVPTX::cvta_to_local_yes)
     80     return false;
     81 
     82   auto &Op = Root.getOperand(1);
     83   const auto &MRI = MF.getRegInfo();
     84   MachineInstr *GenericAddrDef = nullptr;
     85   if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) {
     86     GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg());
     87   }
     88 
     89   // Check the register operand is uniquely defined by LEA_ADDRi instruction
     90   if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB ||
     91       (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 &&
     92        GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) {
     93     return false;
     94   }
     95 
     96   // Check the LEA_ADDRi operand is Frame index
     97   auto &BaseAddrOp = GenericAddrDef->getOperand(1);
     98   if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) {
     99     return true;
    100   }
    101 
    102   return false;
    103 }
    104 
    105 static void CombineCVTAToLocal(MachineInstr &Root) {
    106   auto &MBB = *Root.getParent();
    107   auto &MF = *MBB.getParent();
    108   const auto &MRI = MF.getRegInfo();
    109   const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo();
    110   auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg());
    111 
    112   MachineInstrBuilder MIB =
    113       BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()),
    114               Root.getOperand(0).getReg())
    115           .addReg(NVPTX::VRFrameLocal)
    116           .addOperand(Prev.getOperand(2));
    117 
    118   MBB.insert((MachineBasicBlock::iterator)&Root, MIB);
    119 
    120   // Check if MRI has only one non dbg use, which is Root
    121   if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) {
    122     Prev.eraseFromParentAndMarkDBGValuesForRemoval();
    123   }
    124   Root.eraseFromParentAndMarkDBGValuesForRemoval();
    125 }
    126 
    127 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) {
    128   bool Changed = false;
    129   // Loop over all of the basic blocks.
    130   for (auto &MBB : MF) {
    131     // Traverse the basic block.
    132     auto BlockIter = MBB.begin();
    133 
    134     while (BlockIter != MBB.end()) {
    135       auto &MI = *BlockIter++;
    136       if (isCVTAToLocalCombinationCandidate(MI)) {
    137         CombineCVTAToLocal(MI);
    138         Changed = true;
    139       }
    140     }  // Instruction
    141   }    // Basic Block
    142 
    143   // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal
    144   const auto &MRI = MF.getRegInfo();
    145   if (MRI.use_empty(NVPTX::VRFrame)) {
    146     if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) {
    147       MI->eraseFromParentAndMarkDBGValuesForRemoval();
    148     }
    149   }
    150 
    151   return Changed;
    152 }
    153 
    154 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); }
    155