1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Top-level implementation for the NVPTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "NVPTXTargetMachine.h" 15 #include "MCTargetDesc/NVPTXMCAsmInfo.h" 16 #include "NVPTX.h" 17 #include "NVPTXAllocaHoisting.h" 18 #include "NVPTXLowerAggrCopies.h" 19 #include "llvm/Analysis/Passes.h" 20 #include "llvm/CodeGen/AsmPrinter.h" 21 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 22 #include "llvm/CodeGen/MachineModuleInfo.h" 23 #include "llvm/CodeGen/Passes.h" 24 #include "llvm/IR/DataLayout.h" 25 #include "llvm/IR/IRPrintingPasses.h" 26 #include "llvm/IR/Verifier.h" 27 #include "llvm/MC/MCAsmInfo.h" 28 #include "llvm/MC/MCInstrInfo.h" 29 #include "llvm/MC/MCStreamer.h" 30 #include "llvm/MC/MCSubtargetInfo.h" 31 #include "llvm/PassManager.h" 32 #include "llvm/Support/CommandLine.h" 33 #include "llvm/Support/Debug.h" 34 #include "llvm/Support/FormattedStream.h" 35 #include "llvm/Support/TargetRegistry.h" 36 #include "llvm/Support/raw_ostream.h" 37 #include "llvm/Target/TargetInstrInfo.h" 38 #include "llvm/Target/TargetLowering.h" 39 #include "llvm/Target/TargetLoweringObjectFile.h" 40 #include "llvm/Target/TargetMachine.h" 41 #include "llvm/Target/TargetOptions.h" 42 #include "llvm/Target/TargetRegisterInfo.h" 43 #include "llvm/Target/TargetSubtargetInfo.h" 44 #include "llvm/Transforms/Scalar.h" 45 46 using namespace llvm; 47 48 namespace llvm { 49 void initializeNVVMReflectPass(PassRegistry&); 50 void initializeGenericToNVVMPass(PassRegistry&); 51 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&); 52 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &); 53 } 54 55 extern "C" void LLVMInitializeNVPTXTarget() { 56 // Register the target. 57 RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32); 58 RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64); 59 60 // FIXME: This pass is really intended to be invoked during IR optimization, 61 // but it's very NVPTX-specific. 62 initializeNVVMReflectPass(*PassRegistry::getPassRegistry()); 63 initializeGenericToNVVMPass(*PassRegistry::getPassRegistry()); 64 initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry()); 65 initializeNVPTXFavorNonGenericAddrSpacesPass( 66 *PassRegistry::getPassRegistry()); 67 } 68 69 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT, 70 StringRef CPU, StringRef FS, 71 const TargetOptions &Options, 72 Reloc::Model RM, CodeModel::Model CM, 73 CodeGenOpt::Level OL, bool is64bit) 74 : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL), 75 Subtarget(TT, CPU, FS, *this, is64bit) { 76 initAsmInfo(); 77 } 78 79 void NVPTXTargetMachine32::anchor() {} 80 81 NVPTXTargetMachine32::NVPTXTargetMachine32( 82 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 83 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 84 CodeGenOpt::Level OL) 85 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {} 86 87 void NVPTXTargetMachine64::anchor() {} 88 89 NVPTXTargetMachine64::NVPTXTargetMachine64( 90 const Target &T, StringRef TT, StringRef CPU, StringRef FS, 91 const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM, 92 CodeGenOpt::Level OL) 93 : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {} 94 95 namespace { 96 class NVPTXPassConfig : public TargetPassConfig { 97 public: 98 NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM) 99 : TargetPassConfig(TM, PM) {} 100 101 NVPTXTargetMachine &getNVPTXTargetMachine() const { 102 return getTM<NVPTXTargetMachine>(); 103 } 104 105 void addIRPasses() override; 106 bool addInstSelector() override; 107 bool addPreRegAlloc() override; 108 bool addPostRegAlloc() override; 109 void addMachineSSAOptimization() override; 110 111 FunctionPass *createTargetRegisterAllocator(bool) override; 112 void addFastRegAlloc(FunctionPass *RegAllocPass) override; 113 void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override; 114 }; 115 } // end anonymous namespace 116 117 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) { 118 NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM); 119 return PassConfig; 120 } 121 122 void NVPTXPassConfig::addIRPasses() { 123 // The following passes are known to not play well with virtual regs hanging 124 // around after register allocation (which in our case, is *all* registers). 125 // We explicitly disable them here. We do, however, need some functionality 126 // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the 127 // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp). 128 disablePass(&PrologEpilogCodeInserterID); 129 disablePass(&MachineCopyPropagationID); 130 disablePass(&BranchFolderPassID); 131 disablePass(&TailDuplicateID); 132 133 addPass(createNVPTXImageOptimizerPass()); 134 TargetPassConfig::addIRPasses(); 135 addPass(createNVPTXAssignValidGlobalNamesPass()); 136 addPass(createGenericToNVVMPass()); 137 addPass(createNVPTXFavorNonGenericAddrSpacesPass()); 138 addPass(createSeparateConstOffsetFromGEPPass()); 139 // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used 140 // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates 141 // significantly better code than EarlyCSE for some of our benchmarks. 142 if (getOptLevel() == CodeGenOpt::Aggressive) 143 addPass(createGVNPass()); 144 else 145 addPass(createEarlyCSEPass()); 146 // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave 147 // some dead code. We could remove dead code in an ad-hoc manner, but that 148 // requires manual work and might be error-prone. 149 // 150 // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts, 151 // and leave them unused. 152 // 153 // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the 154 // old index and some of its intermediate results may become unused. 155 addPass(createDeadCodeEliminationPass()); 156 } 157 158 bool NVPTXPassConfig::addInstSelector() { 159 const NVPTXSubtarget &ST = 160 getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>(); 161 162 addPass(createLowerAggrCopies()); 163 addPass(createAllocaHoisting()); 164 addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel())); 165 166 if (!ST.hasImageHandles()) 167 addPass(createNVPTXReplaceImageHandlesPass()); 168 169 return false; 170 } 171 172 bool NVPTXPassConfig::addPreRegAlloc() { return false; } 173 bool NVPTXPassConfig::addPostRegAlloc() { 174 addPass(createNVPTXPrologEpilogPass()); 175 return false; 176 } 177 178 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) { 179 return nullptr; // No reg alloc 180 } 181 182 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) { 183 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 184 addPass(&PHIEliminationID); 185 addPass(&TwoAddressInstructionPassID); 186 } 187 188 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) { 189 assert(!RegAllocPass && "NVPTX uses no regalloc!"); 190 191 addPass(&ProcessImplicitDefsID); 192 addPass(&LiveVariablesID); 193 addPass(&MachineLoopInfoID); 194 addPass(&PHIEliminationID); 195 196 addPass(&TwoAddressInstructionPassID); 197 addPass(&RegisterCoalescerID); 198 199 // PreRA instruction scheduling. 200 if (addPass(&MachineSchedulerID)) 201 printAndVerify("After Machine Scheduling"); 202 203 204 addPass(&StackSlotColoringID); 205 206 // FIXME: Needs physical registers 207 //addPass(&PostRAMachineLICMID); 208 209 printAndVerify("After StackSlotColoring"); 210 } 211 212 void NVPTXPassConfig::addMachineSSAOptimization() { 213 // Pre-ra tail duplication. 214 if (addPass(&EarlyTailDuplicateID)) 215 printAndVerify("After Pre-RegAlloc TailDuplicate"); 216 217 // Optimize PHIs before DCE: removing dead PHI cycles may make more 218 // instructions dead. 219 addPass(&OptimizePHIsID); 220 221 // This pass merges large allocas. StackSlotColoring is a different pass 222 // which merges spill slots. 223 addPass(&StackColoringID); 224 225 // If the target requests it, assign local variables to stack slots relative 226 // to one another and simplify frame index references where possible. 227 addPass(&LocalStackSlotAllocationID); 228 229 // With optimization, dead code should already be eliminated. However 230 // there is one known exception: lowered code for arguments that are only 231 // used by tail calls, where the tail calls reuse the incoming stack 232 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 233 addPass(&DeadMachineInstructionElimID); 234 printAndVerify("After codegen DCE pass"); 235 236 // Allow targets to insert passes that improve instruction level parallelism, 237 // like if-conversion. Such passes will typically need dominator trees and 238 // loop info, just like LICM and CSE below. 239 if (addILPOpts()) 240 printAndVerify("After ILP optimizations"); 241 242 addPass(&MachineLICMID); 243 addPass(&MachineCSEID); 244 245 addPass(&MachineSinkingID); 246 printAndVerify("After Machine LICM, CSE and Sinking passes"); 247 248 addPass(&PeepholeOptimizerID); 249 printAndVerify("After codegen peephole optimization pass"); 250 } 251