Home | History | Annotate | Download | only in NVPTX
      1 //===-- NVPTXTargetMachine.cpp - Define TargetMachine for NVPTX -----------===//
      2 //
      3 //                     The LLVM Compiler Infrastructure
      4 //
      5 // This file is distributed under the University of Illinois Open Source
      6 // License. See LICENSE.TXT for details.
      7 //
      8 //===----------------------------------------------------------------------===//
      9 //
     10 // Top-level implementation for the NVPTX target.
     11 //
     12 //===----------------------------------------------------------------------===//
     13 
     14 #include "NVPTXTargetMachine.h"
     15 #include "MCTargetDesc/NVPTXMCAsmInfo.h"
     16 #include "NVPTX.h"
     17 #include "NVPTXAllocaHoisting.h"
     18 #include "NVPTXLowerAggrCopies.h"
     19 #include "llvm/Analysis/Passes.h"
     20 #include "llvm/CodeGen/AsmPrinter.h"
     21 #include "llvm/CodeGen/MachineFunctionAnalysis.h"
     22 #include "llvm/CodeGen/MachineModuleInfo.h"
     23 #include "llvm/CodeGen/Passes.h"
     24 #include "llvm/IR/DataLayout.h"
     25 #include "llvm/IR/IRPrintingPasses.h"
     26 #include "llvm/IR/Verifier.h"
     27 #include "llvm/MC/MCAsmInfo.h"
     28 #include "llvm/MC/MCInstrInfo.h"
     29 #include "llvm/MC/MCStreamer.h"
     30 #include "llvm/MC/MCSubtargetInfo.h"
     31 #include "llvm/PassManager.h"
     32 #include "llvm/Support/CommandLine.h"
     33 #include "llvm/Support/Debug.h"
     34 #include "llvm/Support/FormattedStream.h"
     35 #include "llvm/Support/TargetRegistry.h"
     36 #include "llvm/Support/raw_ostream.h"
     37 #include "llvm/Target/TargetInstrInfo.h"
     38 #include "llvm/Target/TargetLowering.h"
     39 #include "llvm/Target/TargetLoweringObjectFile.h"
     40 #include "llvm/Target/TargetMachine.h"
     41 #include "llvm/Target/TargetOptions.h"
     42 #include "llvm/Target/TargetRegisterInfo.h"
     43 #include "llvm/Target/TargetSubtargetInfo.h"
     44 #include "llvm/Transforms/Scalar.h"
     45 
     46 using namespace llvm;
     47 
     48 namespace llvm {
     49 void initializeNVVMReflectPass(PassRegistry&);
     50 void initializeGenericToNVVMPass(PassRegistry&);
     51 void initializeNVPTXAssignValidGlobalNamesPass(PassRegistry&);
     52 void initializeNVPTXFavorNonGenericAddrSpacesPass(PassRegistry &);
     53 }
     54 
     55 extern "C" void LLVMInitializeNVPTXTarget() {
     56   // Register the target.
     57   RegisterTargetMachine<NVPTXTargetMachine32> X(TheNVPTXTarget32);
     58   RegisterTargetMachine<NVPTXTargetMachine64> Y(TheNVPTXTarget64);
     59 
     60   // FIXME: This pass is really intended to be invoked during IR optimization,
     61   // but it's very NVPTX-specific.
     62   initializeNVVMReflectPass(*PassRegistry::getPassRegistry());
     63   initializeGenericToNVVMPass(*PassRegistry::getPassRegistry());
     64   initializeNVPTXAssignValidGlobalNamesPass(*PassRegistry::getPassRegistry());
     65   initializeNVPTXFavorNonGenericAddrSpacesPass(
     66     *PassRegistry::getPassRegistry());
     67 }
     68 
     69 NVPTXTargetMachine::NVPTXTargetMachine(const Target &T, StringRef TT,
     70                                        StringRef CPU, StringRef FS,
     71                                        const TargetOptions &Options,
     72                                        Reloc::Model RM, CodeModel::Model CM,
     73                                        CodeGenOpt::Level OL, bool is64bit)
     74     : LLVMTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL),
     75       Subtarget(TT, CPU, FS, *this, is64bit) {
     76   initAsmInfo();
     77 }
     78 
     79 void NVPTXTargetMachine32::anchor() {}
     80 
     81 NVPTXTargetMachine32::NVPTXTargetMachine32(
     82     const Target &T, StringRef TT, StringRef CPU, StringRef FS,
     83     const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
     84     CodeGenOpt::Level OL)
     85     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, false) {}
     86 
     87 void NVPTXTargetMachine64::anchor() {}
     88 
     89 NVPTXTargetMachine64::NVPTXTargetMachine64(
     90     const Target &T, StringRef TT, StringRef CPU, StringRef FS,
     91     const TargetOptions &Options, Reloc::Model RM, CodeModel::Model CM,
     92     CodeGenOpt::Level OL)
     93     : NVPTXTargetMachine(T, TT, CPU, FS, Options, RM, CM, OL, true) {}
     94 
     95 namespace {
     96 class NVPTXPassConfig : public TargetPassConfig {
     97 public:
     98   NVPTXPassConfig(NVPTXTargetMachine *TM, PassManagerBase &PM)
     99       : TargetPassConfig(TM, PM) {}
    100 
    101   NVPTXTargetMachine &getNVPTXTargetMachine() const {
    102     return getTM<NVPTXTargetMachine>();
    103   }
    104 
    105   void addIRPasses() override;
    106   bool addInstSelector() override;
    107   bool addPreRegAlloc() override;
    108   bool addPostRegAlloc() override;
    109   void addMachineSSAOptimization() override;
    110 
    111   FunctionPass *createTargetRegisterAllocator(bool) override;
    112   void addFastRegAlloc(FunctionPass *RegAllocPass) override;
    113   void addOptimizedRegAlloc(FunctionPass *RegAllocPass) override;
    114 };
    115 } // end anonymous namespace
    116 
    117 TargetPassConfig *NVPTXTargetMachine::createPassConfig(PassManagerBase &PM) {
    118   NVPTXPassConfig *PassConfig = new NVPTXPassConfig(this, PM);
    119   return PassConfig;
    120 }
    121 
    122 void NVPTXPassConfig::addIRPasses() {
    123   // The following passes are known to not play well with virtual regs hanging
    124   // around after register allocation (which in our case, is *all* registers).
    125   // We explicitly disable them here.  We do, however, need some functionality
    126   // of the PrologEpilogCodeInserter pass, so we emulate that behavior in the
    127   // NVPTXPrologEpilog pass (see NVPTXPrologEpilogPass.cpp).
    128   disablePass(&PrologEpilogCodeInserterID);
    129   disablePass(&MachineCopyPropagationID);
    130   disablePass(&BranchFolderPassID);
    131   disablePass(&TailDuplicateID);
    132 
    133   addPass(createNVPTXImageOptimizerPass());
    134   TargetPassConfig::addIRPasses();
    135   addPass(createNVPTXAssignValidGlobalNamesPass());
    136   addPass(createGenericToNVVMPass());
    137   addPass(createNVPTXFavorNonGenericAddrSpacesPass());
    138   addPass(createSeparateConstOffsetFromGEPPass());
    139   // The SeparateConstOffsetFromGEP pass creates variadic bases that can be used
    140   // by multiple GEPs. Run GVN or EarlyCSE to really reuse them. GVN generates
    141   // significantly better code than EarlyCSE for some of our benchmarks.
    142   if (getOptLevel() == CodeGenOpt::Aggressive)
    143     addPass(createGVNPass());
    144   else
    145     addPass(createEarlyCSEPass());
    146   // Both FavorNonGenericAddrSpaces and SeparateConstOffsetFromGEP may leave
    147   // some dead code.  We could remove dead code in an ad-hoc manner, but that
    148   // requires manual work and might be error-prone.
    149   //
    150   // The FavorNonGenericAddrSpaces pass shortcuts unnecessary addrspacecasts,
    151   // and leave them unused.
    152   //
    153   // SeparateConstOffsetFromGEP rebuilds a new index from the old index, and the
    154   // old index and some of its intermediate results may become unused.
    155   addPass(createDeadCodeEliminationPass());
    156 }
    157 
    158 bool NVPTXPassConfig::addInstSelector() {
    159   const NVPTXSubtarget &ST =
    160     getTM<NVPTXTargetMachine>().getSubtarget<NVPTXSubtarget>();
    161 
    162   addPass(createLowerAggrCopies());
    163   addPass(createAllocaHoisting());
    164   addPass(createNVPTXISelDag(getNVPTXTargetMachine(), getOptLevel()));
    165 
    166   if (!ST.hasImageHandles())
    167     addPass(createNVPTXReplaceImageHandlesPass());
    168 
    169   return false;
    170 }
    171 
    172 bool NVPTXPassConfig::addPreRegAlloc() { return false; }
    173 bool NVPTXPassConfig::addPostRegAlloc() {
    174   addPass(createNVPTXPrologEpilogPass());
    175   return false;
    176 }
    177 
    178 FunctionPass *NVPTXPassConfig::createTargetRegisterAllocator(bool) {
    179   return nullptr; // No reg alloc
    180 }
    181 
    182 void NVPTXPassConfig::addFastRegAlloc(FunctionPass *RegAllocPass) {
    183   assert(!RegAllocPass && "NVPTX uses no regalloc!");
    184   addPass(&PHIEliminationID);
    185   addPass(&TwoAddressInstructionPassID);
    186 }
    187 
    188 void NVPTXPassConfig::addOptimizedRegAlloc(FunctionPass *RegAllocPass) {
    189   assert(!RegAllocPass && "NVPTX uses no regalloc!");
    190 
    191   addPass(&ProcessImplicitDefsID);
    192   addPass(&LiveVariablesID);
    193   addPass(&MachineLoopInfoID);
    194   addPass(&PHIEliminationID);
    195 
    196   addPass(&TwoAddressInstructionPassID);
    197   addPass(&RegisterCoalescerID);
    198 
    199   // PreRA instruction scheduling.
    200   if (addPass(&MachineSchedulerID))
    201     printAndVerify("After Machine Scheduling");
    202 
    203 
    204   addPass(&StackSlotColoringID);
    205 
    206   // FIXME: Needs physical registers
    207   //addPass(&PostRAMachineLICMID);
    208 
    209   printAndVerify("After StackSlotColoring");
    210 }
    211 
    212 void NVPTXPassConfig::addMachineSSAOptimization() {
    213   // Pre-ra tail duplication.
    214   if (addPass(&EarlyTailDuplicateID))
    215     printAndVerify("After Pre-RegAlloc TailDuplicate");
    216 
    217   // Optimize PHIs before DCE: removing dead PHI cycles may make more
    218   // instructions dead.
    219   addPass(&OptimizePHIsID);
    220 
    221   // This pass merges large allocas. StackSlotColoring is a different pass
    222   // which merges spill slots.
    223   addPass(&StackColoringID);
    224 
    225   // If the target requests it, assign local variables to stack slots relative
    226   // to one another and simplify frame index references where possible.
    227   addPass(&LocalStackSlotAllocationID);
    228 
    229   // With optimization, dead code should already be eliminated. However
    230   // there is one known exception: lowered code for arguments that are only
    231   // used by tail calls, where the tail calls reuse the incoming stack
    232   // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll).
    233   addPass(&DeadMachineInstructionElimID);
    234   printAndVerify("After codegen DCE pass");
    235 
    236   // Allow targets to insert passes that improve instruction level parallelism,
    237   // like if-conversion. Such passes will typically need dominator trees and
    238   // loop info, just like LICM and CSE below.
    239   if (addILPOpts())
    240     printAndVerify("After ILP optimizations");
    241 
    242   addPass(&MachineLICMID);
    243   addPass(&MachineCSEID);
    244 
    245   addPass(&MachineSinkingID);
    246   printAndVerify("After Machine LICM, CSE and Sinking passes");
    247 
    248   addPass(&PeepholeOptimizerID);
    249   printAndVerify("After codegen peephole optimization pass");
    250 }
    251