1 //===-- PTXTargetMachine.cpp - Define TargetMachine for PTX ---------------===// 2 // 3 // The LLVM Compiler Infrastructure 4 // 5 // This file is distributed under the University of Illinois Open Source 6 // License. See LICENSE.TXT for details. 7 // 8 //===----------------------------------------------------------------------===// 9 // 10 // Top-level implementation for the PTX target. 11 // 12 //===----------------------------------------------------------------------===// 13 14 #include "PTX.h" 15 #include "PTXTargetMachine.h" 16 #include "llvm/PassManager.h" 17 #include "llvm/Analysis/Passes.h" 18 #include "llvm/Analysis/Verifier.h" 19 #include "llvm/Assembly/PrintModulePass.h" 20 #include "llvm/ADT/OwningPtr.h" 21 #include "llvm/CodeGen/AsmPrinter.h" 22 #include "llvm/CodeGen/MachineFunctionAnalysis.h" 23 #include "llvm/CodeGen/MachineModuleInfo.h" 24 #include "llvm/CodeGen/Passes.h" 25 #include "llvm/MC/MCAsmInfo.h" 26 #include "llvm/MC/MCInstrInfo.h" 27 #include "llvm/MC/MCStreamer.h" 28 #include "llvm/MC/MCSubtargetInfo.h" 29 #include "llvm/Support/TargetRegistry.h" 30 #include "llvm/Support/raw_ostream.h" 31 #include "llvm/Target/TargetData.h" 32 #include "llvm/Target/TargetInstrInfo.h" 33 #include "llvm/Target/TargetLowering.h" 34 #include "llvm/Target/TargetLoweringObjectFile.h" 35 #include "llvm/Target/TargetMachine.h" 36 #include "llvm/Target/TargetOptions.h" 37 #include "llvm/Target/TargetRegisterInfo.h" 38 #include "llvm/Target/TargetSubtargetInfo.h" 39 #include "llvm/Transforms/Scalar.h" 40 #include "llvm/Support/Debug.h" 41 #include "llvm/Support/TargetRegistry.h" 42 43 44 using namespace llvm; 45 46 namespace llvm { 47 MCStreamer *createPTXAsmStreamer(MCContext &Ctx, formatted_raw_ostream &OS, 48 bool isVerboseAsm, bool useLoc, 49 bool useCFI, 50 MCInstPrinter *InstPrint, 51 MCCodeEmitter *CE, 52 MCAsmBackend *MAB, 53 bool ShowInst); 54 } 55 56 extern "C" void LLVMInitializePTXTarget() { 57 58 RegisterTargetMachine<PTX32TargetMachine> X(ThePTX32Target); 59 RegisterTargetMachine<PTX64TargetMachine> Y(ThePTX64Target); 60 61 TargetRegistry::RegisterAsmStreamer(ThePTX32Target, createPTXAsmStreamer); 62 TargetRegistry::RegisterAsmStreamer(ThePTX64Target, createPTXAsmStreamer); 63 } 64 65 namespace { 66 const char* DataLayout32 = 67 "e-p:32:32-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; 68 const char* DataLayout64 = 69 "e-p:64:64-i64:32:32-f64:32:32-v128:32:128-v64:32:64-n32:64"; 70 71 // Copied from LLVMTargetMachine.cpp 72 void printNoVerify(PassManagerBase &PM, const char *Banner) { 73 if (PrintMachineCode) 74 PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); 75 } 76 77 void printAndVerify(PassManagerBase &PM, 78 const char *Banner) { 79 if (PrintMachineCode) 80 PM.add(createMachineFunctionPrinterPass(dbgs(), Banner)); 81 82 //if (VerifyMachineCode) 83 // PM.add(createMachineVerifierPass(Banner)); 84 } 85 } 86 87 // DataLayout and FrameLowering are filled with dummy data 88 PTXTargetMachine::PTXTargetMachine(const Target &T, 89 StringRef TT, StringRef CPU, StringRef FS, 90 Reloc::Model RM, CodeModel::Model CM, 91 bool is64Bit) 92 : LLVMTargetMachine(T, TT, CPU, FS, RM, CM), 93 DataLayout(is64Bit ? DataLayout64 : DataLayout32), 94 Subtarget(TT, CPU, FS, is64Bit), 95 FrameLowering(Subtarget), 96 InstrInfo(*this), 97 TSInfo(*this), 98 TLInfo(*this) { 99 } 100 101 PTX32TargetMachine::PTX32TargetMachine(const Target &T, StringRef TT, 102 StringRef CPU, StringRef FS, 103 Reloc::Model RM, CodeModel::Model CM) 104 : PTXTargetMachine(T, TT, CPU, FS, RM, CM, false) { 105 } 106 107 PTX64TargetMachine::PTX64TargetMachine(const Target &T, StringRef TT, 108 StringRef CPU, StringRef FS, 109 Reloc::Model RM, CodeModel::Model CM) 110 : PTXTargetMachine(T, TT, CPU, FS, RM, CM, true) { 111 } 112 113 bool PTXTargetMachine::addInstSelector(PassManagerBase &PM, 114 CodeGenOpt::Level OptLevel) { 115 PM.add(createPTXISelDag(*this, OptLevel)); 116 return false; 117 } 118 119 bool PTXTargetMachine::addPostRegAlloc(PassManagerBase &PM, 120 CodeGenOpt::Level OptLevel) { 121 // PTXMFInfoExtract must after register allocation! 122 //PM.add(createPTXMFInfoExtract(*this, OptLevel)); 123 return false; 124 } 125 126 bool PTXTargetMachine::addPassesToEmitFile(PassManagerBase &PM, 127 formatted_raw_ostream &Out, 128 CodeGenFileType FileType, 129 CodeGenOpt::Level OptLevel, 130 bool DisableVerify) { 131 // This is mostly based on LLVMTargetMachine::addPassesToEmitFile 132 133 // Add common CodeGen passes. 134 MCContext *Context = 0; 135 if (addCommonCodeGenPasses(PM, OptLevel, DisableVerify, Context)) 136 return true; 137 assert(Context != 0 && "Failed to get MCContext"); 138 139 if (hasMCSaveTempLabels()) 140 Context->setAllowTemporaryLabels(false); 141 142 const MCAsmInfo &MAI = *getMCAsmInfo(); 143 const MCSubtargetInfo &STI = getSubtarget<MCSubtargetInfo>(); 144 OwningPtr<MCStreamer> AsmStreamer; 145 146 switch (FileType) { 147 default: return true; 148 case CGFT_AssemblyFile: { 149 MCInstPrinter *InstPrinter = 150 getTarget().createMCInstPrinter(MAI.getAssemblerDialect(), MAI, STI); 151 152 // Create a code emitter if asked to show the encoding. 153 MCCodeEmitter *MCE = 0; 154 MCAsmBackend *MAB = 0; 155 156 MCStreamer *S = getTarget().createAsmStreamer(*Context, Out, 157 true, /* verbose asm */ 158 hasMCUseLoc(), 159 hasMCUseCFI(), 160 InstPrinter, 161 MCE, MAB, 162 false /* show MC encoding */); 163 AsmStreamer.reset(S); 164 break; 165 } 166 case CGFT_ObjectFile: { 167 llvm_unreachable("Object file emission is not supported with PTX"); 168 } 169 case CGFT_Null: 170 // The Null output is intended for use for performance analysis and testing, 171 // not real users. 172 AsmStreamer.reset(createNullStreamer(*Context)); 173 break; 174 } 175 176 // MC Logging 177 //AsmStreamer.reset(createLoggingStreamer(AsmStreamer.take(), errs())); 178 179 // Create the AsmPrinter, which takes ownership of AsmStreamer if successful. 180 FunctionPass *Printer = getTarget().createAsmPrinter(*this, *AsmStreamer); 181 if (Printer == 0) 182 return true; 183 184 // If successful, createAsmPrinter took ownership of AsmStreamer. 185 AsmStreamer.take(); 186 187 PM.add(Printer); 188 189 PM.add(createGCInfoDeleter()); 190 return false; 191 } 192 193 bool PTXTargetMachine::addCommonCodeGenPasses(PassManagerBase &PM, 194 CodeGenOpt::Level OptLevel, 195 bool DisableVerify, 196 MCContext *&OutContext) { 197 // Add standard LLVM codegen passes. 198 // This is derived from LLVMTargetMachine::addCommonCodeGenPasses, with some 199 // modifications for the PTX target. 200 201 // Standard LLVM-Level Passes. 202 203 // Basic AliasAnalysis support. 204 // Add TypeBasedAliasAnalysis before BasicAliasAnalysis so that 205 // BasicAliasAnalysis wins if they disagree. This is intended to help 206 // support "obvious" type-punning idioms. 207 PM.add(createTypeBasedAliasAnalysisPass()); 208 PM.add(createBasicAliasAnalysisPass()); 209 210 // Before running any passes, run the verifier to determine if the input 211 // coming from the front-end and/or optimizer is valid. 212 if (!DisableVerify) 213 PM.add(createVerifierPass()); 214 215 // Run loop strength reduction before anything else. 216 if (OptLevel != CodeGenOpt::None) { 217 PM.add(createLoopStrengthReducePass(getTargetLowering())); 218 //PM.add(createPrintFunctionPass("\n\n*** Code after LSR ***\n", &dbgs())); 219 } 220 221 PM.add(createGCLoweringPass()); 222 223 // Make sure that no unreachable blocks are instruction selected. 224 PM.add(createUnreachableBlockEliminationPass()); 225 226 PM.add(createLowerInvokePass(getTargetLowering())); 227 // The lower invoke pass may create unreachable code. Remove it. 228 PM.add(createUnreachableBlockEliminationPass()); 229 230 if (OptLevel != CodeGenOpt::None) 231 PM.add(createCodeGenPreparePass(getTargetLowering())); 232 233 PM.add(createStackProtectorPass(getTargetLowering())); 234 235 addPreISel(PM, OptLevel); 236 237 //PM.add(createPrintFunctionPass("\n\n" 238 // "*** Final LLVM Code input to ISel ***\n", 239 // &dbgs())); 240 241 // All passes which modify the LLVM IR are now complete; run the verifier 242 // to ensure that the IR is valid. 243 if (!DisableVerify) 244 PM.add(createVerifierPass()); 245 246 // Standard Lower-Level Passes. 247 248 // Install a MachineModuleInfo class, which is an immutable pass that holds 249 // all the per-module stuff we're generating, including MCContext. 250 MachineModuleInfo *MMI = new MachineModuleInfo(*getMCAsmInfo(), 251 *getRegisterInfo(), 252 &getTargetLowering()->getObjFileLowering()); 253 PM.add(MMI); 254 OutContext = &MMI->getContext(); // Return the MCContext specifically by-ref. 255 256 // Set up a MachineFunction for the rest of CodeGen to work on. 257 PM.add(new MachineFunctionAnalysis(*this, OptLevel)); 258 259 // Ask the target for an isel. 260 if (addInstSelector(PM, OptLevel)) 261 return true; 262 263 // Print the instruction selected machine code... 264 printAndVerify(PM, "After Instruction Selection"); 265 266 // Expand pseudo-instructions emitted by ISel. 267 PM.add(createExpandISelPseudosPass()); 268 269 // Pre-ra tail duplication. 270 if (OptLevel != CodeGenOpt::None) { 271 PM.add(createTailDuplicatePass(true)); 272 printAndVerify(PM, "After Pre-RegAlloc TailDuplicate"); 273 } 274 275 // Optimize PHIs before DCE: removing dead PHI cycles may make more 276 // instructions dead. 277 if (OptLevel != CodeGenOpt::None) 278 PM.add(createOptimizePHIsPass()); 279 280 // If the target requests it, assign local variables to stack slots relative 281 // to one another and simplify frame index references where possible. 282 PM.add(createLocalStackSlotAllocationPass()); 283 284 if (OptLevel != CodeGenOpt::None) { 285 // With optimization, dead code should already be eliminated. However 286 // there is one known exception: lowered code for arguments that are only 287 // used by tail calls, where the tail calls reuse the incoming stack 288 // arguments directly (see t11 in test/CodeGen/X86/sibcall.ll). 289 PM.add(createDeadMachineInstructionElimPass()); 290 printAndVerify(PM, "After codegen DCE pass"); 291 292 PM.add(createMachineLICMPass()); 293 PM.add(createMachineCSEPass()); 294 PM.add(createMachineSinkingPass()); 295 printAndVerify(PM, "After Machine LICM, CSE and Sinking passes"); 296 297 PM.add(createPeepholeOptimizerPass()); 298 printAndVerify(PM, "After codegen peephole optimization pass"); 299 } 300 301 // Run pre-ra passes. 302 if (addPreRegAlloc(PM, OptLevel)) 303 printAndVerify(PM, "After PreRegAlloc passes"); 304 305 // Perform register allocation. 306 PM.add(createPTXRegisterAllocator()); 307 printAndVerify(PM, "After Register Allocation"); 308 309 // Perform stack slot coloring and post-ra machine LICM. 310 if (OptLevel != CodeGenOpt::None) { 311 // FIXME: Re-enable coloring with register when it's capable of adding 312 // kill markers. 313 PM.add(createStackSlotColoringPass(false)); 314 315 // FIXME: Post-RA LICM has asserts that fire on virtual registers. 316 // Run post-ra machine LICM to hoist reloads / remats. 317 //if (!DisablePostRAMachineLICM) 318 // PM.add(createMachineLICMPass(false)); 319 320 printAndVerify(PM, "After StackSlotColoring and postra Machine LICM"); 321 } 322 323 // Run post-ra passes. 324 if (addPostRegAlloc(PM, OptLevel)) 325 printAndVerify(PM, "After PostRegAlloc passes"); 326 327 PM.add(createExpandPostRAPseudosPass()); 328 printAndVerify(PM, "After ExpandPostRAPseudos"); 329 330 // Insert prolog/epilog code. Eliminate abstract frame index references... 331 PM.add(createPrologEpilogCodeInserter()); 332 printAndVerify(PM, "After PrologEpilogCodeInserter"); 333 334 // Run pre-sched2 passes. 335 if (addPreSched2(PM, OptLevel)) 336 printAndVerify(PM, "After PreSched2 passes"); 337 338 // Second pass scheduler. 339 if (OptLevel != CodeGenOpt::None) { 340 PM.add(createPostRAScheduler(OptLevel)); 341 printAndVerify(PM, "After PostRAScheduler"); 342 } 343 344 // Branch folding must be run after regalloc and prolog/epilog insertion. 345 if (OptLevel != CodeGenOpt::None) { 346 PM.add(createBranchFoldingPass(getEnableTailMergeDefault())); 347 printNoVerify(PM, "After BranchFolding"); 348 } 349 350 // Tail duplication. 351 if (OptLevel != CodeGenOpt::None) { 352 PM.add(createTailDuplicatePass(false)); 353 printNoVerify(PM, "After TailDuplicate"); 354 } 355 356 PM.add(createGCMachineCodeAnalysisPass()); 357 358 //if (PrintGCInfo) 359 // PM.add(createGCInfoPrinter(dbgs())); 360 361 if (OptLevel != CodeGenOpt::None) { 362 PM.add(createCodePlacementOptPass()); 363 printNoVerify(PM, "After CodePlacementOpt"); 364 } 365 366 if (addPreEmitPass(PM, OptLevel)) 367 printNoVerify(PM, "After PreEmit passes"); 368 369 PM.add(createPTXMFInfoExtract(*this, OptLevel)); 370 PM.add(createPTXFPRoundingModePass(*this, OptLevel)); 371 372 return false; 373 } 374