1 //===-- NVPTXPeephole.cpp - NVPTX Peephole Optimiztions -------------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // In NVPTX, NVPTXFrameLowering will emit following instruction at the beginning 11 // of a MachineFunction. 12 // 13 // mov %SPL, %depot 14 // cvta.local %SP, %SPL 15 // 16 // Because Frame Index is a generic address and alloca can only return generic 17 // pointer, without this pass the instructions producing alloca'ed address will 18 // be based on %SP. NVPTXLowerAlloca tends to help replace store and load on 19 // this address with their .local versions, but this may introduce a lot of 20 // cvta.to.local instructions. Performance can be improved if we avoid casting 21 // address back and forth and directly calculate local address based on %SPL. 22 // This peephole pass optimizes these cases, for example 23 // 24 // It will transform the following pattern 25 // %vreg0<def> = LEA_ADDRi64 %VRFrame, 4 26 // %vreg1<def> = cvta_to_local_yes_64 %vreg0 27 // 28 // into 29 // %vreg1<def> = LEA_ADDRi64 %VRFrameLocal, 4 30 // 31 // %VRFrameLocal is the virtual register name of %SPL 32 // 33 //===----------------------------------------------------------------------===// 34 35 #include "NVPTX.h" 36 #include "llvm/CodeGen/MachineFunctionPass.h" 37 #include "llvm/CodeGen/MachineInstrBuilder.h" 38 #include "llvm/CodeGen/MachineRegisterInfo.h" 39 #include "llvm/Target/TargetRegisterInfo.h" 40 #include "llvm/Target/TargetInstrInfo.h" 41 42 using namespace llvm; 43 44 #define DEBUG_TYPE "nvptx-peephole" 45 46 namespace llvm { 47 void initializeNVPTXPeepholePass(PassRegistry &); 48 } 49 50 namespace { 51 struct NVPTXPeephole : public MachineFunctionPass { 52 public: 53 static char ID; 54 NVPTXPeephole() : MachineFunctionPass(ID) { 55 initializeNVPTXPeepholePass(*PassRegistry::getPassRegistry()); 56 } 57 58 bool runOnMachineFunction(MachineFunction &MF) override; 59 60 const char *getPassName() const override { 61 return "NVPTX optimize redundant cvta.to.local instruction"; 62 } 63 64 void getAnalysisUsage(AnalysisUsage &AU) const override { 65 MachineFunctionPass::getAnalysisUsage(AU); 66 } 67 }; 68 } 69 70 char NVPTXPeephole::ID = 0; 71 72 INITIALIZE_PASS(NVPTXPeephole, "nvptx-peephole", "NVPTX Peephole", false, false) 73 74 static bool isCVTAToLocalCombinationCandidate(MachineInstr &Root) { 75 auto &MBB = *Root.getParent(); 76 auto &MF = *MBB.getParent(); 77 // Check current instruction is cvta.to.local 78 if (Root.getOpcode() != NVPTX::cvta_to_local_yes_64 && 79 Root.getOpcode() != NVPTX::cvta_to_local_yes) 80 return false; 81 82 auto &Op = Root.getOperand(1); 83 const auto &MRI = MF.getRegInfo(); 84 MachineInstr *GenericAddrDef = nullptr; 85 if (Op.isReg() && TargetRegisterInfo::isVirtualRegister(Op.getReg())) { 86 GenericAddrDef = MRI.getUniqueVRegDef(Op.getReg()); 87 } 88 89 // Check the register operand is uniquely defined by LEA_ADDRi instruction 90 if (!GenericAddrDef || GenericAddrDef->getParent() != &MBB || 91 (GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi64 && 92 GenericAddrDef->getOpcode() != NVPTX::LEA_ADDRi)) { 93 return false; 94 } 95 96 // Check the LEA_ADDRi operand is Frame index 97 auto &BaseAddrOp = GenericAddrDef->getOperand(1); 98 if (BaseAddrOp.isReg() && BaseAddrOp.getReg() == NVPTX::VRFrame) { 99 return true; 100 } 101 102 return false; 103 } 104 105 static void CombineCVTAToLocal(MachineInstr &Root) { 106 auto &MBB = *Root.getParent(); 107 auto &MF = *MBB.getParent(); 108 const auto &MRI = MF.getRegInfo(); 109 const TargetInstrInfo *TII = MF.getSubtarget().getInstrInfo(); 110 auto &Prev = *MRI.getUniqueVRegDef(Root.getOperand(1).getReg()); 111 112 MachineInstrBuilder MIB = 113 BuildMI(MF, Root.getDebugLoc(), TII->get(Prev.getOpcode()), 114 Root.getOperand(0).getReg()) 115 .addReg(NVPTX::VRFrameLocal) 116 .addOperand(Prev.getOperand(2)); 117 118 MBB.insert((MachineBasicBlock::iterator)&Root, MIB); 119 120 // Check if MRI has only one non dbg use, which is Root 121 if (MRI.hasOneNonDBGUse(Prev.getOperand(0).getReg())) { 122 Prev.eraseFromParentAndMarkDBGValuesForRemoval(); 123 } 124 Root.eraseFromParentAndMarkDBGValuesForRemoval(); 125 } 126 127 bool NVPTXPeephole::runOnMachineFunction(MachineFunction &MF) { 128 if (skipFunction(*MF.getFunction())) 129 return false; 130 131 bool Changed = false; 132 // Loop over all of the basic blocks. 133 for (auto &MBB : MF) { 134 // Traverse the basic block. 135 auto BlockIter = MBB.begin(); 136 137 while (BlockIter != MBB.end()) { 138 auto &MI = *BlockIter++; 139 if (isCVTAToLocalCombinationCandidate(MI)) { 140 CombineCVTAToLocal(MI); 141 Changed = true; 142 } 143 } // Instruction 144 } // Basic Block 145 146 // Remove unnecessary %VRFrame = cvta.local %VRFrameLocal 147 const auto &MRI = MF.getRegInfo(); 148 if (MRI.use_empty(NVPTX::VRFrame)) { 149 if (auto MI = MRI.getUniqueVRegDef(NVPTX::VRFrame)) { 150 MI->eraseFromParentAndMarkDBGValuesForRemoval(); 151 } 152 } 153 154 return Changed; 155 } 156 157 MachineFunctionPass *llvm::createNVPTXPeephole() { return new NVPTXPeephole(); } 158